xref: /openbmc/qemu/tcg/tcg.c (revision 2c6fe2e2)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 #include "accel/tcg/perf.h"
65 
66 /* Forward declarations for functions declared in tcg-target.c.inc and
67    used here. */
68 static void tcg_target_init(TCGContext *s);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(const void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103                        intptr_t arg2);
104 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106                          TCGReg ret, tcg_target_long arg);
107 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
108                        const TCGArg args[TCG_MAX_OP_ARGS],
109                        const int const_args[TCG_MAX_OP_ARGS]);
110 #if TCG_TARGET_MAYBE_vec
111 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
112                             TCGReg dst, TCGReg src);
113 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
114                              TCGReg dst, TCGReg base, intptr_t offset);
115 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, int64_t arg);
117 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
118                            unsigned vecl, unsigned vece,
119                            const TCGArg args[TCG_MAX_OP_ARGS],
120                            const int const_args[TCG_MAX_OP_ARGS]);
121 #else
122 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
123                                    TCGReg dst, TCGReg src)
124 {
125     g_assert_not_reached();
126 }
127 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
128                                     TCGReg dst, TCGReg base, intptr_t offset)
129 {
130     g_assert_not_reached();
131 }
132 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
133                                     TCGReg dst, int64_t arg)
134 {
135     g_assert_not_reached();
136 }
137 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
138                                   unsigned vecl, unsigned vece,
139                                   const TCGArg args[TCG_MAX_OP_ARGS],
140                                   const int const_args[TCG_MAX_OP_ARGS])
141 {
142     g_assert_not_reached();
143 }
144 #endif
145 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
146                        intptr_t arg2);
147 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
148                         TCGReg base, intptr_t ofs);
149 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
150                          const TCGHelperInfo *info);
151 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
152 #ifdef TCG_TARGET_NEED_LDST_LABELS
153 static int tcg_out_ldst_finalize(TCGContext *s);
154 #endif
155 
156 TCGContext tcg_init_ctx;
157 __thread TCGContext *tcg_ctx;
158 
159 TCGContext **tcg_ctxs;
160 unsigned int tcg_cur_ctxs;
161 unsigned int tcg_max_ctxs;
162 TCGv_env cpu_env = 0;
163 const void *tcg_code_gen_epilogue;
164 uintptr_t tcg_splitwx_diff;
165 
166 #ifndef CONFIG_TCG_INTERPRETER
167 tcg_prologue_fn *tcg_qemu_tb_exec;
168 #endif
169 
170 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
171 static TCGRegSet tcg_target_call_clobber_regs;
172 
173 #if TCG_TARGET_INSN_UNIT_SIZE == 1
174 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
175 {
176     *s->code_ptr++ = v;
177 }
178 
179 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
180                                                       uint8_t v)
181 {
182     *p = v;
183 }
184 #endif
185 
186 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
187 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
188 {
189     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
190         *s->code_ptr++ = v;
191     } else {
192         tcg_insn_unit *p = s->code_ptr;
193         memcpy(p, &v, sizeof(v));
194         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
195     }
196 }
197 
198 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
199                                                        uint16_t v)
200 {
201     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
202         *p = v;
203     } else {
204         memcpy(p, &v, sizeof(v));
205     }
206 }
207 #endif
208 
209 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
210 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
211 {
212     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
213         *s->code_ptr++ = v;
214     } else {
215         tcg_insn_unit *p = s->code_ptr;
216         memcpy(p, &v, sizeof(v));
217         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
218     }
219 }
220 
221 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
222                                                        uint32_t v)
223 {
224     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
225         *p = v;
226     } else {
227         memcpy(p, &v, sizeof(v));
228     }
229 }
230 #endif
231 
232 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
233 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
234 {
235     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
236         *s->code_ptr++ = v;
237     } else {
238         tcg_insn_unit *p = s->code_ptr;
239         memcpy(p, &v, sizeof(v));
240         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
241     }
242 }
243 
244 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
245                                                        uint64_t v)
246 {
247     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
248         *p = v;
249     } else {
250         memcpy(p, &v, sizeof(v));
251     }
252 }
253 #endif
254 
255 /* label relocation processing */
256 
257 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
258                           TCGLabel *l, intptr_t addend)
259 {
260     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
261 
262     r->type = type;
263     r->ptr = code_ptr;
264     r->addend = addend;
265     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
266 }
267 
268 static void tcg_out_label(TCGContext *s, TCGLabel *l)
269 {
270     tcg_debug_assert(!l->has_value);
271     l->has_value = 1;
272     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
273 }
274 
275 TCGLabel *gen_new_label(void)
276 {
277     TCGContext *s = tcg_ctx;
278     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
279 
280     memset(l, 0, sizeof(TCGLabel));
281     l->id = s->nb_labels++;
282     QSIMPLEQ_INIT(&l->relocs);
283 
284     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
285 
286     return l;
287 }
288 
289 static bool tcg_resolve_relocs(TCGContext *s)
290 {
291     TCGLabel *l;
292 
293     QSIMPLEQ_FOREACH(l, &s->labels, next) {
294         TCGRelocation *r;
295         uintptr_t value = l->u.value;
296 
297         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
298             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
299                 return false;
300             }
301         }
302     }
303     return true;
304 }
305 
306 static void set_jmp_reset_offset(TCGContext *s, int which)
307 {
308     /*
309      * We will check for overflow at the end of the opcode loop in
310      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
311      */
312     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
313 }
314 
315 /* Signal overflow, starting over with fewer guest insns. */
316 static G_NORETURN
317 void tcg_raise_tb_overflow(TCGContext *s)
318 {
319     siglongjmp(s->jmp_trans, -2);
320 }
321 
322 #define C_PFX1(P, A)                    P##A
323 #define C_PFX2(P, A, B)                 P##A##_##B
324 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
325 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
326 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
327 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
328 
329 /* Define an enumeration for the various combinations. */
330 
331 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
332 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
333 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
334 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
335 
336 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
337 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
338 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
339 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
340 
341 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
342 
343 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
344 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
345 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
346 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
347 
348 typedef enum {
349 #include "tcg-target-con-set.h"
350 } TCGConstraintSetIndex;
351 
352 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
353 
354 #undef C_O0_I1
355 #undef C_O0_I2
356 #undef C_O0_I3
357 #undef C_O0_I4
358 #undef C_O1_I1
359 #undef C_O1_I2
360 #undef C_O1_I3
361 #undef C_O1_I4
362 #undef C_N1_I2
363 #undef C_O2_I1
364 #undef C_O2_I2
365 #undef C_O2_I3
366 #undef C_O2_I4
367 
368 /* Put all of the constraint sets into an array, indexed by the enum. */
369 
370 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
371 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
372 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
373 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
374 
375 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
376 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
377 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
378 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
379 
380 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
381 
382 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
383 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
384 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
385 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
386 
387 static const TCGTargetOpDef constraint_sets[] = {
388 #include "tcg-target-con-set.h"
389 };
390 
391 
392 #undef C_O0_I1
393 #undef C_O0_I2
394 #undef C_O0_I3
395 #undef C_O0_I4
396 #undef C_O1_I1
397 #undef C_O1_I2
398 #undef C_O1_I3
399 #undef C_O1_I4
400 #undef C_N1_I2
401 #undef C_O2_I1
402 #undef C_O2_I2
403 #undef C_O2_I3
404 #undef C_O2_I4
405 
406 /* Expand the enumerator to be returned from tcg_target_op_def(). */
407 
408 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
409 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
410 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
411 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
412 
413 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
414 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
415 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
416 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
417 
418 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
419 
420 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
421 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
422 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
423 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
424 
425 #include "tcg-target.c.inc"
426 
427 static void alloc_tcg_plugin_context(TCGContext *s)
428 {
429 #ifdef CONFIG_PLUGIN
430     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
431     s->plugin_tb->insns =
432         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
433 #endif
434 }
435 
436 /*
437  * All TCG threads except the parent (i.e. the one that called tcg_context_init
438  * and registered the target's TCG globals) must register with this function
439  * before initiating translation.
440  *
441  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
442  * of tcg_region_init() for the reasoning behind this.
443  *
444  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
445  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
446  * is not used anymore for translation once this function is called.
447  *
448  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
449  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
450  */
451 #ifdef CONFIG_USER_ONLY
452 void tcg_register_thread(void)
453 {
454     tcg_ctx = &tcg_init_ctx;
455 }
456 #else
457 void tcg_register_thread(void)
458 {
459     TCGContext *s = g_malloc(sizeof(*s));
460     unsigned int i, n;
461 
462     *s = tcg_init_ctx;
463 
464     /* Relink mem_base.  */
465     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
466         if (tcg_init_ctx.temps[i].mem_base) {
467             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
468             tcg_debug_assert(b >= 0 && b < n);
469             s->temps[i].mem_base = &s->temps[b];
470         }
471     }
472 
473     /* Claim an entry in tcg_ctxs */
474     n = qatomic_fetch_inc(&tcg_cur_ctxs);
475     g_assert(n < tcg_max_ctxs);
476     qatomic_set(&tcg_ctxs[n], s);
477 
478     if (n > 0) {
479         alloc_tcg_plugin_context(s);
480         tcg_region_initial_alloc(s);
481     }
482 
483     tcg_ctx = s;
484 }
485 #endif /* !CONFIG_USER_ONLY */
486 
487 /* pool based memory allocation */
488 void *tcg_malloc_internal(TCGContext *s, int size)
489 {
490     TCGPool *p;
491     int pool_size;
492 
493     if (size > TCG_POOL_CHUNK_SIZE) {
494         /* big malloc: insert a new pool (XXX: could optimize) */
495         p = g_malloc(sizeof(TCGPool) + size);
496         p->size = size;
497         p->next = s->pool_first_large;
498         s->pool_first_large = p;
499         return p->data;
500     } else {
501         p = s->pool_current;
502         if (!p) {
503             p = s->pool_first;
504             if (!p)
505                 goto new_pool;
506         } else {
507             if (!p->next) {
508             new_pool:
509                 pool_size = TCG_POOL_CHUNK_SIZE;
510                 p = g_malloc(sizeof(TCGPool) + pool_size);
511                 p->size = pool_size;
512                 p->next = NULL;
513                 if (s->pool_current) {
514                     s->pool_current->next = p;
515                 } else {
516                     s->pool_first = p;
517                 }
518             } else {
519                 p = p->next;
520             }
521         }
522     }
523     s->pool_current = p;
524     s->pool_cur = p->data + size;
525     s->pool_end = p->data + p->size;
526     return p->data;
527 }
528 
529 void tcg_pool_reset(TCGContext *s)
530 {
531     TCGPool *p, *t;
532     for (p = s->pool_first_large; p; p = t) {
533         t = p->next;
534         g_free(p);
535     }
536     s->pool_first_large = NULL;
537     s->pool_cur = s->pool_end = NULL;
538     s->pool_current = NULL;
539 }
540 
541 #include "exec/helper-proto.h"
542 
543 static TCGHelperInfo all_helpers[] = {
544 #include "exec/helper-tcg.h"
545 };
546 static GHashTable *helper_table;
547 
548 #ifdef CONFIG_TCG_INTERPRETER
549 static ffi_type *typecode_to_ffi(int argmask)
550 {
551     switch (argmask) {
552     case dh_typecode_void:
553         return &ffi_type_void;
554     case dh_typecode_i32:
555         return &ffi_type_uint32;
556     case dh_typecode_s32:
557         return &ffi_type_sint32;
558     case dh_typecode_i64:
559         return &ffi_type_uint64;
560     case dh_typecode_s64:
561         return &ffi_type_sint64;
562     case dh_typecode_ptr:
563         return &ffi_type_pointer;
564     }
565     g_assert_not_reached();
566 }
567 
568 static void init_ffi_layouts(void)
569 {
570     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
571     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
572 
573     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
574         TCGHelperInfo *info = &all_helpers[i];
575         unsigned typemask = info->typemask;
576         gpointer hash = (gpointer)(uintptr_t)typemask;
577         struct {
578             ffi_cif cif;
579             ffi_type *args[];
580         } *ca;
581         ffi_status status;
582         int nargs;
583         ffi_cif *cif;
584 
585         cif = g_hash_table_lookup(ffi_table, hash);
586         if (cif) {
587             info->cif = cif;
588             continue;
589         }
590 
591         /* Ignoring the return type, find the last non-zero field. */
592         nargs = 32 - clz32(typemask >> 3);
593         nargs = DIV_ROUND_UP(nargs, 3);
594 
595         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
596         ca->cif.rtype = typecode_to_ffi(typemask & 7);
597         ca->cif.nargs = nargs;
598 
599         if (nargs != 0) {
600             ca->cif.arg_types = ca->args;
601             for (int j = 0; j < nargs; ++j) {
602                 int typecode = extract32(typemask, (j + 1) * 3, 3);
603                 ca->args[j] = typecode_to_ffi(typecode);
604             }
605         }
606 
607         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
608                               ca->cif.rtype, ca->cif.arg_types);
609         assert(status == FFI_OK);
610 
611         cif = &ca->cif;
612         info->cif = cif;
613         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
614     }
615 
616     g_hash_table_destroy(ffi_table);
617 }
618 #endif /* CONFIG_TCG_INTERPRETER */
619 
620 typedef struct TCGCumulativeArgs {
621     int arg_idx;                /* tcg_gen_callN args[] */
622     int info_in_idx;            /* TCGHelperInfo in[] */
623     int arg_slot;               /* regs+stack slot */
624     int ref_slot;               /* stack slots for references */
625 } TCGCumulativeArgs;
626 
627 static void layout_arg_even(TCGCumulativeArgs *cum)
628 {
629     cum->arg_slot += cum->arg_slot & 1;
630 }
631 
632 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
633                          TCGCallArgumentKind kind)
634 {
635     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
636 
637     *loc = (TCGCallArgumentLoc){
638         .kind = kind,
639         .arg_idx = cum->arg_idx,
640         .arg_slot = cum->arg_slot,
641     };
642     cum->info_in_idx++;
643     cum->arg_slot++;
644 }
645 
646 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
647                                 TCGHelperInfo *info, int n)
648 {
649     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
650 
651     for (int i = 0; i < n; ++i) {
652         /* Layout all using the same arg_idx, adjusting the subindex. */
653         loc[i] = (TCGCallArgumentLoc){
654             .kind = TCG_CALL_ARG_NORMAL,
655             .arg_idx = cum->arg_idx,
656             .tmp_subindex = i,
657             .arg_slot = cum->arg_slot + i,
658         };
659     }
660     cum->info_in_idx += n;
661     cum->arg_slot += n;
662 }
663 
664 static void init_call_layout(TCGHelperInfo *info)
665 {
666     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
667     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
668     unsigned typemask = info->typemask;
669     unsigned typecode;
670     TCGCumulativeArgs cum = { };
671 
672     /*
673      * Parse and place any function return value.
674      */
675     typecode = typemask & 7;
676     switch (typecode) {
677     case dh_typecode_void:
678         info->nr_out = 0;
679         break;
680     case dh_typecode_i32:
681     case dh_typecode_s32:
682     case dh_typecode_ptr:
683         info->nr_out = 1;
684         info->out_kind = TCG_CALL_RET_NORMAL;
685         break;
686     case dh_typecode_i64:
687     case dh_typecode_s64:
688         info->nr_out = 64 / TCG_TARGET_REG_BITS;
689         info->out_kind = TCG_CALL_RET_NORMAL;
690         break;
691     default:
692         g_assert_not_reached();
693     }
694     assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
695 
696     /*
697      * Parse and place function arguments.
698      */
699     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
700         TCGCallArgumentKind kind;
701         TCGType type;
702 
703         typecode = typemask & 7;
704         switch (typecode) {
705         case dh_typecode_i32:
706         case dh_typecode_s32:
707             type = TCG_TYPE_I32;
708             break;
709         case dh_typecode_i64:
710         case dh_typecode_s64:
711             type = TCG_TYPE_I64;
712             break;
713         case dh_typecode_ptr:
714             type = TCG_TYPE_PTR;
715             break;
716         default:
717             g_assert_not_reached();
718         }
719 
720         switch (type) {
721         case TCG_TYPE_I32:
722             switch (TCG_TARGET_CALL_ARG_I32) {
723             case TCG_CALL_ARG_EVEN:
724                 layout_arg_even(&cum);
725                 /* fall through */
726             case TCG_CALL_ARG_NORMAL:
727                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
728                 break;
729             case TCG_CALL_ARG_EXTEND:
730                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
731                 layout_arg_1(&cum, info, kind);
732                 break;
733             default:
734                 qemu_build_not_reached();
735             }
736             break;
737 
738         case TCG_TYPE_I64:
739             switch (TCG_TARGET_CALL_ARG_I64) {
740             case TCG_CALL_ARG_EVEN:
741                 layout_arg_even(&cum);
742                 /* fall through */
743             case TCG_CALL_ARG_NORMAL:
744                 if (TCG_TARGET_REG_BITS == 32) {
745                     layout_arg_normal_n(&cum, info, 2);
746                 } else {
747                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
748                 }
749                 break;
750             default:
751                 qemu_build_not_reached();
752             }
753             break;
754 
755         default:
756             g_assert_not_reached();
757         }
758     }
759     info->nr_in = cum.info_in_idx;
760 
761     /* Validate that we didn't overrun the input array. */
762     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
763     /* Validate the backend has enough argument space. */
764     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
765     assert(cum.ref_slot <= max_stk_slots);
766 }
767 
768 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
769 static void process_op_defs(TCGContext *s);
770 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
771                                             TCGReg reg, const char *name);
772 
773 static void tcg_context_init(unsigned max_cpus)
774 {
775     TCGContext *s = &tcg_init_ctx;
776     int op, total_args, n, i;
777     TCGOpDef *def;
778     TCGArgConstraint *args_ct;
779     TCGTemp *ts;
780 
781     memset(s, 0, sizeof(*s));
782     s->nb_globals = 0;
783 
784     /* Count total number of arguments and allocate the corresponding
785        space */
786     total_args = 0;
787     for(op = 0; op < NB_OPS; op++) {
788         def = &tcg_op_defs[op];
789         n = def->nb_iargs + def->nb_oargs;
790         total_args += n;
791     }
792 
793     args_ct = g_new0(TCGArgConstraint, total_args);
794 
795     for(op = 0; op < NB_OPS; op++) {
796         def = &tcg_op_defs[op];
797         def->args_ct = args_ct;
798         n = def->nb_iargs + def->nb_oargs;
799         args_ct += n;
800     }
801 
802     /* Register helpers.  */
803     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
804     helper_table = g_hash_table_new(NULL, NULL);
805 
806     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
807         init_call_layout(&all_helpers[i]);
808         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
809                             (gpointer)&all_helpers[i]);
810     }
811 
812 #ifdef CONFIG_TCG_INTERPRETER
813     init_ffi_layouts();
814 #endif
815 
816     tcg_target_init(s);
817     process_op_defs(s);
818 
819     /* Reverse the order of the saved registers, assuming they're all at
820        the start of tcg_target_reg_alloc_order.  */
821     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
822         int r = tcg_target_reg_alloc_order[n];
823         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
824             break;
825         }
826     }
827     for (i = 0; i < n; ++i) {
828         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
829     }
830     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
831         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
832     }
833 
834     alloc_tcg_plugin_context(s);
835 
836     tcg_ctx = s;
837     /*
838      * In user-mode we simply share the init context among threads, since we
839      * use a single region. See the documentation tcg_region_init() for the
840      * reasoning behind this.
841      * In softmmu we will have at most max_cpus TCG threads.
842      */
843 #ifdef CONFIG_USER_ONLY
844     tcg_ctxs = &tcg_ctx;
845     tcg_cur_ctxs = 1;
846     tcg_max_ctxs = 1;
847 #else
848     tcg_max_ctxs = max_cpus;
849     tcg_ctxs = g_new0(TCGContext *, max_cpus);
850 #endif
851 
852     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
853     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
854     cpu_env = temp_tcgv_ptr(ts);
855 }
856 
857 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
858 {
859     tcg_context_init(max_cpus);
860     tcg_region_init(tb_size, splitwx, max_cpus);
861 }
862 
863 /*
864  * Allocate TBs right before their corresponding translated code, making
865  * sure that TBs and code are on different cache lines.
866  */
867 TranslationBlock *tcg_tb_alloc(TCGContext *s)
868 {
869     uintptr_t align = qemu_icache_linesize;
870     TranslationBlock *tb;
871     void *next;
872 
873  retry:
874     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
875     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
876 
877     if (unlikely(next > s->code_gen_highwater)) {
878         if (tcg_region_alloc(s)) {
879             return NULL;
880         }
881         goto retry;
882     }
883     qatomic_set(&s->code_gen_ptr, next);
884     s->data_gen_ptr = NULL;
885     return tb;
886 }
887 
888 void tcg_prologue_init(TCGContext *s)
889 {
890     size_t prologue_size;
891 
892     s->code_ptr = s->code_gen_ptr;
893     s->code_buf = s->code_gen_ptr;
894     s->data_gen_ptr = NULL;
895 
896 #ifndef CONFIG_TCG_INTERPRETER
897     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
898 #endif
899 
900 #ifdef TCG_TARGET_NEED_POOL_LABELS
901     s->pool_labels = NULL;
902 #endif
903 
904     qemu_thread_jit_write();
905     /* Generate the prologue.  */
906     tcg_target_qemu_prologue(s);
907 
908 #ifdef TCG_TARGET_NEED_POOL_LABELS
909     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
910     {
911         int result = tcg_out_pool_finalize(s);
912         tcg_debug_assert(result == 0);
913     }
914 #endif
915 
916     prologue_size = tcg_current_code_size(s);
917     perf_report_prologue(s->code_gen_ptr, prologue_size);
918 
919 #ifndef CONFIG_TCG_INTERPRETER
920     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
921                         (uintptr_t)s->code_buf, prologue_size);
922 #endif
923 
924 #ifdef DEBUG_DISAS
925     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
926         FILE *logfile = qemu_log_trylock();
927         if (logfile) {
928             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
929             if (s->data_gen_ptr) {
930                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
931                 size_t data_size = prologue_size - code_size;
932                 size_t i;
933 
934                 disas(logfile, s->code_gen_ptr, code_size);
935 
936                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
937                     if (sizeof(tcg_target_ulong) == 8) {
938                         fprintf(logfile,
939                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
940                                 (uintptr_t)s->data_gen_ptr + i,
941                                 *(uint64_t *)(s->data_gen_ptr + i));
942                     } else {
943                         fprintf(logfile,
944                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
945                                 (uintptr_t)s->data_gen_ptr + i,
946                                 *(uint32_t *)(s->data_gen_ptr + i));
947                     }
948                 }
949             } else {
950                 disas(logfile, s->code_gen_ptr, prologue_size);
951             }
952             fprintf(logfile, "\n");
953             qemu_log_unlock(logfile);
954         }
955     }
956 #endif
957 
958 #ifndef CONFIG_TCG_INTERPRETER
959     /*
960      * Assert that goto_ptr is implemented completely, setting an epilogue.
961      * For tci, we use NULL as the signal to return from the interpreter,
962      * so skip this check.
963      */
964     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
965 #endif
966 
967     tcg_region_prologue_set(s);
968 }
969 
970 void tcg_func_start(TCGContext *s)
971 {
972     tcg_pool_reset(s);
973     s->nb_temps = s->nb_globals;
974 
975     /* No temps have been previously allocated for size or locality.  */
976     memset(s->free_temps, 0, sizeof(s->free_temps));
977 
978     /* No constant temps have been previously allocated. */
979     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
980         if (s->const_table[i]) {
981             g_hash_table_remove_all(s->const_table[i]);
982         }
983     }
984 
985     s->nb_ops = 0;
986     s->nb_labels = 0;
987     s->current_frame_offset = s->frame_start;
988 
989 #ifdef CONFIG_DEBUG_TCG
990     s->goto_tb_issue_mask = 0;
991 #endif
992 
993     QTAILQ_INIT(&s->ops);
994     QTAILQ_INIT(&s->free_ops);
995     QSIMPLEQ_INIT(&s->labels);
996 }
997 
998 static TCGTemp *tcg_temp_alloc(TCGContext *s)
999 {
1000     int n = s->nb_temps++;
1001 
1002     if (n >= TCG_MAX_TEMPS) {
1003         tcg_raise_tb_overflow(s);
1004     }
1005     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1006 }
1007 
1008 static TCGTemp *tcg_global_alloc(TCGContext *s)
1009 {
1010     TCGTemp *ts;
1011 
1012     tcg_debug_assert(s->nb_globals == s->nb_temps);
1013     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1014     s->nb_globals++;
1015     ts = tcg_temp_alloc(s);
1016     ts->kind = TEMP_GLOBAL;
1017 
1018     return ts;
1019 }
1020 
1021 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1022                                             TCGReg reg, const char *name)
1023 {
1024     TCGTemp *ts;
1025 
1026     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1027         tcg_abort();
1028     }
1029 
1030     ts = tcg_global_alloc(s);
1031     ts->base_type = type;
1032     ts->type = type;
1033     ts->kind = TEMP_FIXED;
1034     ts->reg = reg;
1035     ts->name = name;
1036     tcg_regset_set_reg(s->reserved_regs, reg);
1037 
1038     return ts;
1039 }
1040 
1041 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1042 {
1043     s->frame_start = start;
1044     s->frame_end = start + size;
1045     s->frame_temp
1046         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1047 }
1048 
1049 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1050                                      intptr_t offset, const char *name)
1051 {
1052     TCGContext *s = tcg_ctx;
1053     TCGTemp *base_ts = tcgv_ptr_temp(base);
1054     TCGTemp *ts = tcg_global_alloc(s);
1055     int indirect_reg = 0;
1056 
1057     switch (base_ts->kind) {
1058     case TEMP_FIXED:
1059         break;
1060     case TEMP_GLOBAL:
1061         /* We do not support double-indirect registers.  */
1062         tcg_debug_assert(!base_ts->indirect_reg);
1063         base_ts->indirect_base = 1;
1064         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1065                             ? 2 : 1);
1066         indirect_reg = 1;
1067         break;
1068     default:
1069         g_assert_not_reached();
1070     }
1071 
1072     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1073         TCGTemp *ts2 = tcg_global_alloc(s);
1074         char buf[64];
1075 
1076         ts->base_type = TCG_TYPE_I64;
1077         ts->type = TCG_TYPE_I32;
1078         ts->indirect_reg = indirect_reg;
1079         ts->mem_allocated = 1;
1080         ts->mem_base = base_ts;
1081         ts->mem_offset = offset;
1082         pstrcpy(buf, sizeof(buf), name);
1083         pstrcat(buf, sizeof(buf), "_0");
1084         ts->name = strdup(buf);
1085 
1086         tcg_debug_assert(ts2 == ts + 1);
1087         ts2->base_type = TCG_TYPE_I64;
1088         ts2->type = TCG_TYPE_I32;
1089         ts2->indirect_reg = indirect_reg;
1090         ts2->mem_allocated = 1;
1091         ts2->mem_base = base_ts;
1092         ts2->mem_offset = offset + 4;
1093         ts2->temp_subindex = 1;
1094         pstrcpy(buf, sizeof(buf), name);
1095         pstrcat(buf, sizeof(buf), "_1");
1096         ts2->name = strdup(buf);
1097     } else {
1098         ts->base_type = type;
1099         ts->type = type;
1100         ts->indirect_reg = indirect_reg;
1101         ts->mem_allocated = 1;
1102         ts->mem_base = base_ts;
1103         ts->mem_offset = offset;
1104         ts->name = name;
1105     }
1106     return ts;
1107 }
1108 
1109 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1110 {
1111     TCGContext *s = tcg_ctx;
1112     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1113     TCGTemp *ts;
1114     int idx, k;
1115 
1116     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1117     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1118     if (idx < TCG_MAX_TEMPS) {
1119         /* There is already an available temp with the right type.  */
1120         clear_bit(idx, s->free_temps[k].l);
1121 
1122         ts = &s->temps[idx];
1123         ts->temp_allocated = 1;
1124         tcg_debug_assert(ts->base_type == type);
1125         tcg_debug_assert(ts->kind == kind);
1126     } else {
1127         ts = tcg_temp_alloc(s);
1128         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1129             TCGTemp *ts2 = tcg_temp_alloc(s);
1130 
1131             ts->base_type = type;
1132             ts->type = TCG_TYPE_I32;
1133             ts->temp_allocated = 1;
1134             ts->kind = kind;
1135 
1136             tcg_debug_assert(ts2 == ts + 1);
1137             ts2->base_type = TCG_TYPE_I64;
1138             ts2->type = TCG_TYPE_I32;
1139             ts2->temp_allocated = 1;
1140             ts2->temp_subindex = 1;
1141             ts2->kind = kind;
1142         } else {
1143             ts->base_type = type;
1144             ts->type = type;
1145             ts->temp_allocated = 1;
1146             ts->kind = kind;
1147         }
1148     }
1149 
1150 #if defined(CONFIG_DEBUG_TCG)
1151     s->temps_in_use++;
1152 #endif
1153     return ts;
1154 }
1155 
1156 TCGv_vec tcg_temp_new_vec(TCGType type)
1157 {
1158     TCGTemp *t;
1159 
1160 #ifdef CONFIG_DEBUG_TCG
1161     switch (type) {
1162     case TCG_TYPE_V64:
1163         assert(TCG_TARGET_HAS_v64);
1164         break;
1165     case TCG_TYPE_V128:
1166         assert(TCG_TARGET_HAS_v128);
1167         break;
1168     case TCG_TYPE_V256:
1169         assert(TCG_TARGET_HAS_v256);
1170         break;
1171     default:
1172         g_assert_not_reached();
1173     }
1174 #endif
1175 
1176     t = tcg_temp_new_internal(type, 0);
1177     return temp_tcgv_vec(t);
1178 }
1179 
1180 /* Create a new temp of the same type as an existing temp.  */
1181 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1182 {
1183     TCGTemp *t = tcgv_vec_temp(match);
1184 
1185     tcg_debug_assert(t->temp_allocated != 0);
1186 
1187     t = tcg_temp_new_internal(t->base_type, 0);
1188     return temp_tcgv_vec(t);
1189 }
1190 
1191 void tcg_temp_free_internal(TCGTemp *ts)
1192 {
1193     TCGContext *s = tcg_ctx;
1194     int k, idx;
1195 
1196     switch (ts->kind) {
1197     case TEMP_CONST:
1198         /*
1199          * In order to simplify users of tcg_constant_*,
1200          * silently ignore free.
1201          */
1202         return;
1203     case TEMP_NORMAL:
1204     case TEMP_LOCAL:
1205         break;
1206     default:
1207         g_assert_not_reached();
1208     }
1209 
1210 #if defined(CONFIG_DEBUG_TCG)
1211     s->temps_in_use--;
1212     if (s->temps_in_use < 0) {
1213         fprintf(stderr, "More temporaries freed than allocated!\n");
1214     }
1215 #endif
1216 
1217     tcg_debug_assert(ts->temp_allocated != 0);
1218     ts->temp_allocated = 0;
1219 
1220     idx = temp_idx(ts);
1221     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1222     set_bit(idx, s->free_temps[k].l);
1223 }
1224 
1225 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1226 {
1227     TCGContext *s = tcg_ctx;
1228     GHashTable *h = s->const_table[type];
1229     TCGTemp *ts;
1230 
1231     if (h == NULL) {
1232         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1233         s->const_table[type] = h;
1234     }
1235 
1236     ts = g_hash_table_lookup(h, &val);
1237     if (ts == NULL) {
1238         int64_t *val_ptr;
1239 
1240         ts = tcg_temp_alloc(s);
1241 
1242         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1243             TCGTemp *ts2 = tcg_temp_alloc(s);
1244 
1245             tcg_debug_assert(ts2 == ts + 1);
1246 
1247             ts->base_type = TCG_TYPE_I64;
1248             ts->type = TCG_TYPE_I32;
1249             ts->kind = TEMP_CONST;
1250             ts->temp_allocated = 1;
1251 
1252             ts2->base_type = TCG_TYPE_I64;
1253             ts2->type = TCG_TYPE_I32;
1254             ts2->kind = TEMP_CONST;
1255             ts2->temp_allocated = 1;
1256             ts2->temp_subindex = 1;
1257 
1258             /*
1259              * Retain the full value of the 64-bit constant in the low
1260              * part, so that the hash table works.  Actual uses will
1261              * truncate the value to the low part.
1262              */
1263             ts[HOST_BIG_ENDIAN].val = val;
1264             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1265             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1266         } else {
1267             ts->base_type = type;
1268             ts->type = type;
1269             ts->kind = TEMP_CONST;
1270             ts->temp_allocated = 1;
1271             ts->val = val;
1272             val_ptr = &ts->val;
1273         }
1274         g_hash_table_insert(h, val_ptr, ts);
1275     }
1276 
1277     return ts;
1278 }
1279 
1280 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1281 {
1282     val = dup_const(vece, val);
1283     return temp_tcgv_vec(tcg_constant_internal(type, val));
1284 }
1285 
1286 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1287 {
1288     TCGTemp *t = tcgv_vec_temp(match);
1289 
1290     tcg_debug_assert(t->temp_allocated != 0);
1291     return tcg_constant_vec(t->base_type, vece, val);
1292 }
1293 
1294 TCGv_i32 tcg_const_i32(int32_t val)
1295 {
1296     TCGv_i32 t0;
1297     t0 = tcg_temp_new_i32();
1298     tcg_gen_movi_i32(t0, val);
1299     return t0;
1300 }
1301 
1302 TCGv_i64 tcg_const_i64(int64_t val)
1303 {
1304     TCGv_i64 t0;
1305     t0 = tcg_temp_new_i64();
1306     tcg_gen_movi_i64(t0, val);
1307     return t0;
1308 }
1309 
1310 TCGv_i32 tcg_const_local_i32(int32_t val)
1311 {
1312     TCGv_i32 t0;
1313     t0 = tcg_temp_local_new_i32();
1314     tcg_gen_movi_i32(t0, val);
1315     return t0;
1316 }
1317 
1318 TCGv_i64 tcg_const_local_i64(int64_t val)
1319 {
1320     TCGv_i64 t0;
1321     t0 = tcg_temp_local_new_i64();
1322     tcg_gen_movi_i64(t0, val);
1323     return t0;
1324 }
1325 
1326 #if defined(CONFIG_DEBUG_TCG)
1327 void tcg_clear_temp_count(void)
1328 {
1329     TCGContext *s = tcg_ctx;
1330     s->temps_in_use = 0;
1331 }
1332 
1333 int tcg_check_temp_count(void)
1334 {
1335     TCGContext *s = tcg_ctx;
1336     if (s->temps_in_use) {
1337         /* Clear the count so that we don't give another
1338          * warning immediately next time around.
1339          */
1340         s->temps_in_use = 0;
1341         return 1;
1342     }
1343     return 0;
1344 }
1345 #endif
1346 
1347 /* Return true if OP may appear in the opcode stream.
1348    Test the runtime variable that controls each opcode.  */
1349 bool tcg_op_supported(TCGOpcode op)
1350 {
1351     const bool have_vec
1352         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1353 
1354     switch (op) {
1355     case INDEX_op_discard:
1356     case INDEX_op_set_label:
1357     case INDEX_op_call:
1358     case INDEX_op_br:
1359     case INDEX_op_mb:
1360     case INDEX_op_insn_start:
1361     case INDEX_op_exit_tb:
1362     case INDEX_op_goto_tb:
1363     case INDEX_op_goto_ptr:
1364     case INDEX_op_qemu_ld_i32:
1365     case INDEX_op_qemu_st_i32:
1366     case INDEX_op_qemu_ld_i64:
1367     case INDEX_op_qemu_st_i64:
1368         return true;
1369 
1370     case INDEX_op_qemu_st8_i32:
1371         return TCG_TARGET_HAS_qemu_st8_i32;
1372 
1373     case INDEX_op_mov_i32:
1374     case INDEX_op_setcond_i32:
1375     case INDEX_op_brcond_i32:
1376     case INDEX_op_ld8u_i32:
1377     case INDEX_op_ld8s_i32:
1378     case INDEX_op_ld16u_i32:
1379     case INDEX_op_ld16s_i32:
1380     case INDEX_op_ld_i32:
1381     case INDEX_op_st8_i32:
1382     case INDEX_op_st16_i32:
1383     case INDEX_op_st_i32:
1384     case INDEX_op_add_i32:
1385     case INDEX_op_sub_i32:
1386     case INDEX_op_mul_i32:
1387     case INDEX_op_and_i32:
1388     case INDEX_op_or_i32:
1389     case INDEX_op_xor_i32:
1390     case INDEX_op_shl_i32:
1391     case INDEX_op_shr_i32:
1392     case INDEX_op_sar_i32:
1393         return true;
1394 
1395     case INDEX_op_movcond_i32:
1396         return TCG_TARGET_HAS_movcond_i32;
1397     case INDEX_op_div_i32:
1398     case INDEX_op_divu_i32:
1399         return TCG_TARGET_HAS_div_i32;
1400     case INDEX_op_rem_i32:
1401     case INDEX_op_remu_i32:
1402         return TCG_TARGET_HAS_rem_i32;
1403     case INDEX_op_div2_i32:
1404     case INDEX_op_divu2_i32:
1405         return TCG_TARGET_HAS_div2_i32;
1406     case INDEX_op_rotl_i32:
1407     case INDEX_op_rotr_i32:
1408         return TCG_TARGET_HAS_rot_i32;
1409     case INDEX_op_deposit_i32:
1410         return TCG_TARGET_HAS_deposit_i32;
1411     case INDEX_op_extract_i32:
1412         return TCG_TARGET_HAS_extract_i32;
1413     case INDEX_op_sextract_i32:
1414         return TCG_TARGET_HAS_sextract_i32;
1415     case INDEX_op_extract2_i32:
1416         return TCG_TARGET_HAS_extract2_i32;
1417     case INDEX_op_add2_i32:
1418         return TCG_TARGET_HAS_add2_i32;
1419     case INDEX_op_sub2_i32:
1420         return TCG_TARGET_HAS_sub2_i32;
1421     case INDEX_op_mulu2_i32:
1422         return TCG_TARGET_HAS_mulu2_i32;
1423     case INDEX_op_muls2_i32:
1424         return TCG_TARGET_HAS_muls2_i32;
1425     case INDEX_op_muluh_i32:
1426         return TCG_TARGET_HAS_muluh_i32;
1427     case INDEX_op_mulsh_i32:
1428         return TCG_TARGET_HAS_mulsh_i32;
1429     case INDEX_op_ext8s_i32:
1430         return TCG_TARGET_HAS_ext8s_i32;
1431     case INDEX_op_ext16s_i32:
1432         return TCG_TARGET_HAS_ext16s_i32;
1433     case INDEX_op_ext8u_i32:
1434         return TCG_TARGET_HAS_ext8u_i32;
1435     case INDEX_op_ext16u_i32:
1436         return TCG_TARGET_HAS_ext16u_i32;
1437     case INDEX_op_bswap16_i32:
1438         return TCG_TARGET_HAS_bswap16_i32;
1439     case INDEX_op_bswap32_i32:
1440         return TCG_TARGET_HAS_bswap32_i32;
1441     case INDEX_op_not_i32:
1442         return TCG_TARGET_HAS_not_i32;
1443     case INDEX_op_neg_i32:
1444         return TCG_TARGET_HAS_neg_i32;
1445     case INDEX_op_andc_i32:
1446         return TCG_TARGET_HAS_andc_i32;
1447     case INDEX_op_orc_i32:
1448         return TCG_TARGET_HAS_orc_i32;
1449     case INDEX_op_eqv_i32:
1450         return TCG_TARGET_HAS_eqv_i32;
1451     case INDEX_op_nand_i32:
1452         return TCG_TARGET_HAS_nand_i32;
1453     case INDEX_op_nor_i32:
1454         return TCG_TARGET_HAS_nor_i32;
1455     case INDEX_op_clz_i32:
1456         return TCG_TARGET_HAS_clz_i32;
1457     case INDEX_op_ctz_i32:
1458         return TCG_TARGET_HAS_ctz_i32;
1459     case INDEX_op_ctpop_i32:
1460         return TCG_TARGET_HAS_ctpop_i32;
1461 
1462     case INDEX_op_brcond2_i32:
1463     case INDEX_op_setcond2_i32:
1464         return TCG_TARGET_REG_BITS == 32;
1465 
1466     case INDEX_op_mov_i64:
1467     case INDEX_op_setcond_i64:
1468     case INDEX_op_brcond_i64:
1469     case INDEX_op_ld8u_i64:
1470     case INDEX_op_ld8s_i64:
1471     case INDEX_op_ld16u_i64:
1472     case INDEX_op_ld16s_i64:
1473     case INDEX_op_ld32u_i64:
1474     case INDEX_op_ld32s_i64:
1475     case INDEX_op_ld_i64:
1476     case INDEX_op_st8_i64:
1477     case INDEX_op_st16_i64:
1478     case INDEX_op_st32_i64:
1479     case INDEX_op_st_i64:
1480     case INDEX_op_add_i64:
1481     case INDEX_op_sub_i64:
1482     case INDEX_op_mul_i64:
1483     case INDEX_op_and_i64:
1484     case INDEX_op_or_i64:
1485     case INDEX_op_xor_i64:
1486     case INDEX_op_shl_i64:
1487     case INDEX_op_shr_i64:
1488     case INDEX_op_sar_i64:
1489     case INDEX_op_ext_i32_i64:
1490     case INDEX_op_extu_i32_i64:
1491         return TCG_TARGET_REG_BITS == 64;
1492 
1493     case INDEX_op_movcond_i64:
1494         return TCG_TARGET_HAS_movcond_i64;
1495     case INDEX_op_div_i64:
1496     case INDEX_op_divu_i64:
1497         return TCG_TARGET_HAS_div_i64;
1498     case INDEX_op_rem_i64:
1499     case INDEX_op_remu_i64:
1500         return TCG_TARGET_HAS_rem_i64;
1501     case INDEX_op_div2_i64:
1502     case INDEX_op_divu2_i64:
1503         return TCG_TARGET_HAS_div2_i64;
1504     case INDEX_op_rotl_i64:
1505     case INDEX_op_rotr_i64:
1506         return TCG_TARGET_HAS_rot_i64;
1507     case INDEX_op_deposit_i64:
1508         return TCG_TARGET_HAS_deposit_i64;
1509     case INDEX_op_extract_i64:
1510         return TCG_TARGET_HAS_extract_i64;
1511     case INDEX_op_sextract_i64:
1512         return TCG_TARGET_HAS_sextract_i64;
1513     case INDEX_op_extract2_i64:
1514         return TCG_TARGET_HAS_extract2_i64;
1515     case INDEX_op_extrl_i64_i32:
1516         return TCG_TARGET_HAS_extrl_i64_i32;
1517     case INDEX_op_extrh_i64_i32:
1518         return TCG_TARGET_HAS_extrh_i64_i32;
1519     case INDEX_op_ext8s_i64:
1520         return TCG_TARGET_HAS_ext8s_i64;
1521     case INDEX_op_ext16s_i64:
1522         return TCG_TARGET_HAS_ext16s_i64;
1523     case INDEX_op_ext32s_i64:
1524         return TCG_TARGET_HAS_ext32s_i64;
1525     case INDEX_op_ext8u_i64:
1526         return TCG_TARGET_HAS_ext8u_i64;
1527     case INDEX_op_ext16u_i64:
1528         return TCG_TARGET_HAS_ext16u_i64;
1529     case INDEX_op_ext32u_i64:
1530         return TCG_TARGET_HAS_ext32u_i64;
1531     case INDEX_op_bswap16_i64:
1532         return TCG_TARGET_HAS_bswap16_i64;
1533     case INDEX_op_bswap32_i64:
1534         return TCG_TARGET_HAS_bswap32_i64;
1535     case INDEX_op_bswap64_i64:
1536         return TCG_TARGET_HAS_bswap64_i64;
1537     case INDEX_op_not_i64:
1538         return TCG_TARGET_HAS_not_i64;
1539     case INDEX_op_neg_i64:
1540         return TCG_TARGET_HAS_neg_i64;
1541     case INDEX_op_andc_i64:
1542         return TCG_TARGET_HAS_andc_i64;
1543     case INDEX_op_orc_i64:
1544         return TCG_TARGET_HAS_orc_i64;
1545     case INDEX_op_eqv_i64:
1546         return TCG_TARGET_HAS_eqv_i64;
1547     case INDEX_op_nand_i64:
1548         return TCG_TARGET_HAS_nand_i64;
1549     case INDEX_op_nor_i64:
1550         return TCG_TARGET_HAS_nor_i64;
1551     case INDEX_op_clz_i64:
1552         return TCG_TARGET_HAS_clz_i64;
1553     case INDEX_op_ctz_i64:
1554         return TCG_TARGET_HAS_ctz_i64;
1555     case INDEX_op_ctpop_i64:
1556         return TCG_TARGET_HAS_ctpop_i64;
1557     case INDEX_op_add2_i64:
1558         return TCG_TARGET_HAS_add2_i64;
1559     case INDEX_op_sub2_i64:
1560         return TCG_TARGET_HAS_sub2_i64;
1561     case INDEX_op_mulu2_i64:
1562         return TCG_TARGET_HAS_mulu2_i64;
1563     case INDEX_op_muls2_i64:
1564         return TCG_TARGET_HAS_muls2_i64;
1565     case INDEX_op_muluh_i64:
1566         return TCG_TARGET_HAS_muluh_i64;
1567     case INDEX_op_mulsh_i64:
1568         return TCG_TARGET_HAS_mulsh_i64;
1569 
1570     case INDEX_op_mov_vec:
1571     case INDEX_op_dup_vec:
1572     case INDEX_op_dupm_vec:
1573     case INDEX_op_ld_vec:
1574     case INDEX_op_st_vec:
1575     case INDEX_op_add_vec:
1576     case INDEX_op_sub_vec:
1577     case INDEX_op_and_vec:
1578     case INDEX_op_or_vec:
1579     case INDEX_op_xor_vec:
1580     case INDEX_op_cmp_vec:
1581         return have_vec;
1582     case INDEX_op_dup2_vec:
1583         return have_vec && TCG_TARGET_REG_BITS == 32;
1584     case INDEX_op_not_vec:
1585         return have_vec && TCG_TARGET_HAS_not_vec;
1586     case INDEX_op_neg_vec:
1587         return have_vec && TCG_TARGET_HAS_neg_vec;
1588     case INDEX_op_abs_vec:
1589         return have_vec && TCG_TARGET_HAS_abs_vec;
1590     case INDEX_op_andc_vec:
1591         return have_vec && TCG_TARGET_HAS_andc_vec;
1592     case INDEX_op_orc_vec:
1593         return have_vec && TCG_TARGET_HAS_orc_vec;
1594     case INDEX_op_nand_vec:
1595         return have_vec && TCG_TARGET_HAS_nand_vec;
1596     case INDEX_op_nor_vec:
1597         return have_vec && TCG_TARGET_HAS_nor_vec;
1598     case INDEX_op_eqv_vec:
1599         return have_vec && TCG_TARGET_HAS_eqv_vec;
1600     case INDEX_op_mul_vec:
1601         return have_vec && TCG_TARGET_HAS_mul_vec;
1602     case INDEX_op_shli_vec:
1603     case INDEX_op_shri_vec:
1604     case INDEX_op_sari_vec:
1605         return have_vec && TCG_TARGET_HAS_shi_vec;
1606     case INDEX_op_shls_vec:
1607     case INDEX_op_shrs_vec:
1608     case INDEX_op_sars_vec:
1609         return have_vec && TCG_TARGET_HAS_shs_vec;
1610     case INDEX_op_shlv_vec:
1611     case INDEX_op_shrv_vec:
1612     case INDEX_op_sarv_vec:
1613         return have_vec && TCG_TARGET_HAS_shv_vec;
1614     case INDEX_op_rotli_vec:
1615         return have_vec && TCG_TARGET_HAS_roti_vec;
1616     case INDEX_op_rotls_vec:
1617         return have_vec && TCG_TARGET_HAS_rots_vec;
1618     case INDEX_op_rotlv_vec:
1619     case INDEX_op_rotrv_vec:
1620         return have_vec && TCG_TARGET_HAS_rotv_vec;
1621     case INDEX_op_ssadd_vec:
1622     case INDEX_op_usadd_vec:
1623     case INDEX_op_sssub_vec:
1624     case INDEX_op_ussub_vec:
1625         return have_vec && TCG_TARGET_HAS_sat_vec;
1626     case INDEX_op_smin_vec:
1627     case INDEX_op_umin_vec:
1628     case INDEX_op_smax_vec:
1629     case INDEX_op_umax_vec:
1630         return have_vec && TCG_TARGET_HAS_minmax_vec;
1631     case INDEX_op_bitsel_vec:
1632         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1633     case INDEX_op_cmpsel_vec:
1634         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1635 
1636     default:
1637         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1638         return true;
1639     }
1640 }
1641 
1642 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1643 
1644 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1645 {
1646     const TCGHelperInfo *info;
1647     TCGv_i64 extend_free[MAX_CALL_IARGS];
1648     int n_extend = 0;
1649     TCGOp *op;
1650     int i, n, pi = 0, total_args;
1651 
1652     info = g_hash_table_lookup(helper_table, (gpointer)func);
1653     total_args = info->nr_out + info->nr_in + 2;
1654     op = tcg_op_alloc(INDEX_op_call, total_args);
1655 
1656 #ifdef CONFIG_PLUGIN
1657     /* detect non-plugin helpers */
1658     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1659         tcg_ctx->plugin_insn->calls_helpers = true;
1660     }
1661 #endif
1662 
1663     TCGOP_CALLO(op) = n = info->nr_out;
1664     switch (n) {
1665     case 0:
1666         tcg_debug_assert(ret == NULL);
1667         break;
1668     case 1:
1669         tcg_debug_assert(ret != NULL);
1670         op->args[pi++] = temp_arg(ret);
1671         break;
1672     case 2:
1673         tcg_debug_assert(ret != NULL);
1674         tcg_debug_assert(ret->base_type == ret->type + 1);
1675         tcg_debug_assert(ret->temp_subindex == 0);
1676         op->args[pi++] = temp_arg(ret);
1677         op->args[pi++] = temp_arg(ret + 1);
1678         break;
1679     default:
1680         g_assert_not_reached();
1681     }
1682 
1683     TCGOP_CALLI(op) = n = info->nr_in;
1684     for (i = 0; i < n; i++) {
1685         const TCGCallArgumentLoc *loc = &info->in[i];
1686         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1687 
1688         switch (loc->kind) {
1689         case TCG_CALL_ARG_NORMAL:
1690             op->args[pi++] = temp_arg(ts);
1691             break;
1692 
1693         case TCG_CALL_ARG_EXTEND_U:
1694         case TCG_CALL_ARG_EXTEND_S:
1695             {
1696                 TCGv_i64 temp = tcg_temp_new_i64();
1697                 TCGv_i32 orig = temp_tcgv_i32(ts);
1698 
1699                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1700                     tcg_gen_ext_i32_i64(temp, orig);
1701                 } else {
1702                     tcg_gen_extu_i32_i64(temp, orig);
1703                 }
1704                 op->args[pi++] = tcgv_i64_arg(temp);
1705                 extend_free[n_extend++] = temp;
1706             }
1707             break;
1708 
1709         default:
1710             g_assert_not_reached();
1711         }
1712     }
1713     op->args[pi++] = (uintptr_t)func;
1714     op->args[pi++] = (uintptr_t)info;
1715     tcg_debug_assert(pi == total_args);
1716 
1717     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1718 
1719     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1720     for (i = 0; i < n_extend; ++i) {
1721         tcg_temp_free_i64(extend_free[i]);
1722     }
1723 }
1724 
1725 static void tcg_reg_alloc_start(TCGContext *s)
1726 {
1727     int i, n;
1728 
1729     for (i = 0, n = s->nb_temps; i < n; i++) {
1730         TCGTemp *ts = &s->temps[i];
1731         TCGTempVal val = TEMP_VAL_MEM;
1732 
1733         switch (ts->kind) {
1734         case TEMP_CONST:
1735             val = TEMP_VAL_CONST;
1736             break;
1737         case TEMP_FIXED:
1738             val = TEMP_VAL_REG;
1739             break;
1740         case TEMP_GLOBAL:
1741             break;
1742         case TEMP_NORMAL:
1743         case TEMP_EBB:
1744             val = TEMP_VAL_DEAD;
1745             /* fall through */
1746         case TEMP_LOCAL:
1747             ts->mem_allocated = 0;
1748             break;
1749         default:
1750             g_assert_not_reached();
1751         }
1752         ts->val_type = val;
1753     }
1754 
1755     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1756 }
1757 
1758 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1759                                  TCGTemp *ts)
1760 {
1761     int idx = temp_idx(ts);
1762 
1763     switch (ts->kind) {
1764     case TEMP_FIXED:
1765     case TEMP_GLOBAL:
1766         pstrcpy(buf, buf_size, ts->name);
1767         break;
1768     case TEMP_LOCAL:
1769         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1770         break;
1771     case TEMP_EBB:
1772         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1773         break;
1774     case TEMP_NORMAL:
1775         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1776         break;
1777     case TEMP_CONST:
1778         switch (ts->type) {
1779         case TCG_TYPE_I32:
1780             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1781             break;
1782 #if TCG_TARGET_REG_BITS > 32
1783         case TCG_TYPE_I64:
1784             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1785             break;
1786 #endif
1787         case TCG_TYPE_V64:
1788         case TCG_TYPE_V128:
1789         case TCG_TYPE_V256:
1790             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1791                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1792             break;
1793         default:
1794             g_assert_not_reached();
1795         }
1796         break;
1797     }
1798     return buf;
1799 }
1800 
1801 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1802                              int buf_size, TCGArg arg)
1803 {
1804     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1805 }
1806 
1807 static const char * const cond_name[] =
1808 {
1809     [TCG_COND_NEVER] = "never",
1810     [TCG_COND_ALWAYS] = "always",
1811     [TCG_COND_EQ] = "eq",
1812     [TCG_COND_NE] = "ne",
1813     [TCG_COND_LT] = "lt",
1814     [TCG_COND_GE] = "ge",
1815     [TCG_COND_LE] = "le",
1816     [TCG_COND_GT] = "gt",
1817     [TCG_COND_LTU] = "ltu",
1818     [TCG_COND_GEU] = "geu",
1819     [TCG_COND_LEU] = "leu",
1820     [TCG_COND_GTU] = "gtu"
1821 };
1822 
1823 static const char * const ldst_name[] =
1824 {
1825     [MO_UB]   = "ub",
1826     [MO_SB]   = "sb",
1827     [MO_LEUW] = "leuw",
1828     [MO_LESW] = "lesw",
1829     [MO_LEUL] = "leul",
1830     [MO_LESL] = "lesl",
1831     [MO_LEUQ] = "leq",
1832     [MO_BEUW] = "beuw",
1833     [MO_BESW] = "besw",
1834     [MO_BEUL] = "beul",
1835     [MO_BESL] = "besl",
1836     [MO_BEUQ] = "beq",
1837 };
1838 
1839 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1840 #ifdef TARGET_ALIGNED_ONLY
1841     [MO_UNALN >> MO_ASHIFT]    = "un+",
1842     [MO_ALIGN >> MO_ASHIFT]    = "",
1843 #else
1844     [MO_UNALN >> MO_ASHIFT]    = "",
1845     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1846 #endif
1847     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1848     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1849     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1850     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1851     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1852     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1853 };
1854 
1855 static const char bswap_flag_name[][6] = {
1856     [TCG_BSWAP_IZ] = "iz",
1857     [TCG_BSWAP_OZ] = "oz",
1858     [TCG_BSWAP_OS] = "os",
1859     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1860     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1861 };
1862 
1863 static inline bool tcg_regset_single(TCGRegSet d)
1864 {
1865     return (d & (d - 1)) == 0;
1866 }
1867 
1868 static inline TCGReg tcg_regset_first(TCGRegSet d)
1869 {
1870     if (TCG_TARGET_NB_REGS <= 32) {
1871         return ctz32(d);
1872     } else {
1873         return ctz64(d);
1874     }
1875 }
1876 
1877 /* Return only the number of characters output -- no error return. */
1878 #define ne_fprintf(...) \
1879     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1880 
1881 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1882 {
1883     char buf[128];
1884     TCGOp *op;
1885 
1886     QTAILQ_FOREACH(op, &s->ops, link) {
1887         int i, k, nb_oargs, nb_iargs, nb_cargs;
1888         const TCGOpDef *def;
1889         TCGOpcode c;
1890         int col = 0;
1891 
1892         c = op->opc;
1893         def = &tcg_op_defs[c];
1894 
1895         if (c == INDEX_op_insn_start) {
1896             nb_oargs = 0;
1897             col += ne_fprintf(f, "\n ----");
1898 
1899             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1900                 target_ulong a;
1901 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1902                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1903 #else
1904                 a = op->args[i];
1905 #endif
1906                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1907             }
1908         } else if (c == INDEX_op_call) {
1909             const TCGHelperInfo *info = tcg_call_info(op);
1910             void *func = tcg_call_func(op);
1911 
1912             /* variable number of arguments */
1913             nb_oargs = TCGOP_CALLO(op);
1914             nb_iargs = TCGOP_CALLI(op);
1915             nb_cargs = def->nb_cargs;
1916 
1917             col += ne_fprintf(f, " %s ", def->name);
1918 
1919             /*
1920              * Print the function name from TCGHelperInfo, if available.
1921              * Note that plugins have a template function for the info,
1922              * but the actual function pointer comes from the plugin.
1923              */
1924             if (func == info->func) {
1925                 col += ne_fprintf(f, "%s", info->name);
1926             } else {
1927                 col += ne_fprintf(f, "plugin(%p)", func);
1928             }
1929 
1930             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1931             for (i = 0; i < nb_oargs; i++) {
1932                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1933                                                             op->args[i]));
1934             }
1935             for (i = 0; i < nb_iargs; i++) {
1936                 TCGArg arg = op->args[nb_oargs + i];
1937                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1938                 col += ne_fprintf(f, ",%s", t);
1939             }
1940         } else {
1941             col += ne_fprintf(f, " %s ", def->name);
1942 
1943             nb_oargs = def->nb_oargs;
1944             nb_iargs = def->nb_iargs;
1945             nb_cargs = def->nb_cargs;
1946 
1947             if (def->flags & TCG_OPF_VECTOR) {
1948                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1949                                   8 << TCGOP_VECE(op));
1950             }
1951 
1952             k = 0;
1953             for (i = 0; i < nb_oargs; i++) {
1954                 const char *sep =  k ? "," : "";
1955                 col += ne_fprintf(f, "%s%s", sep,
1956                                   tcg_get_arg_str(s, buf, sizeof(buf),
1957                                                   op->args[k++]));
1958             }
1959             for (i = 0; i < nb_iargs; i++) {
1960                 const char *sep =  k ? "," : "";
1961                 col += ne_fprintf(f, "%s%s", sep,
1962                                   tcg_get_arg_str(s, buf, sizeof(buf),
1963                                                   op->args[k++]));
1964             }
1965             switch (c) {
1966             case INDEX_op_brcond_i32:
1967             case INDEX_op_setcond_i32:
1968             case INDEX_op_movcond_i32:
1969             case INDEX_op_brcond2_i32:
1970             case INDEX_op_setcond2_i32:
1971             case INDEX_op_brcond_i64:
1972             case INDEX_op_setcond_i64:
1973             case INDEX_op_movcond_i64:
1974             case INDEX_op_cmp_vec:
1975             case INDEX_op_cmpsel_vec:
1976                 if (op->args[k] < ARRAY_SIZE(cond_name)
1977                     && cond_name[op->args[k]]) {
1978                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1979                 } else {
1980                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1981                 }
1982                 i = 1;
1983                 break;
1984             case INDEX_op_qemu_ld_i32:
1985             case INDEX_op_qemu_st_i32:
1986             case INDEX_op_qemu_st8_i32:
1987             case INDEX_op_qemu_ld_i64:
1988             case INDEX_op_qemu_st_i64:
1989                 {
1990                     MemOpIdx oi = op->args[k++];
1991                     MemOp op = get_memop(oi);
1992                     unsigned ix = get_mmuidx(oi);
1993 
1994                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1995                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1996                     } else {
1997                         const char *s_al, *s_op;
1998                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1999                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2000                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2001                     }
2002                     i = 1;
2003                 }
2004                 break;
2005             case INDEX_op_bswap16_i32:
2006             case INDEX_op_bswap16_i64:
2007             case INDEX_op_bswap32_i32:
2008             case INDEX_op_bswap32_i64:
2009             case INDEX_op_bswap64_i64:
2010                 {
2011                     TCGArg flags = op->args[k];
2012                     const char *name = NULL;
2013 
2014                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2015                         name = bswap_flag_name[flags];
2016                     }
2017                     if (name) {
2018                         col += ne_fprintf(f, ",%s", name);
2019                     } else {
2020                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2021                     }
2022                     i = k = 1;
2023                 }
2024                 break;
2025             default:
2026                 i = 0;
2027                 break;
2028             }
2029             switch (c) {
2030             case INDEX_op_set_label:
2031             case INDEX_op_br:
2032             case INDEX_op_brcond_i32:
2033             case INDEX_op_brcond_i64:
2034             case INDEX_op_brcond2_i32:
2035                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2036                                   arg_label(op->args[k])->id);
2037                 i++, k++;
2038                 break;
2039             default:
2040                 break;
2041             }
2042             for (; i < nb_cargs; i++, k++) {
2043                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2044                                   op->args[k]);
2045             }
2046         }
2047 
2048         if (have_prefs || op->life) {
2049             for (; col < 40; ++col) {
2050                 putc(' ', f);
2051             }
2052         }
2053 
2054         if (op->life) {
2055             unsigned life = op->life;
2056 
2057             if (life & (SYNC_ARG * 3)) {
2058                 ne_fprintf(f, "  sync:");
2059                 for (i = 0; i < 2; ++i) {
2060                     if (life & (SYNC_ARG << i)) {
2061                         ne_fprintf(f, " %d", i);
2062                     }
2063                 }
2064             }
2065             life /= DEAD_ARG;
2066             if (life) {
2067                 ne_fprintf(f, "  dead:");
2068                 for (i = 0; life; ++i, life >>= 1) {
2069                     if (life & 1) {
2070                         ne_fprintf(f, " %d", i);
2071                     }
2072                 }
2073             }
2074         }
2075 
2076         if (have_prefs) {
2077             for (i = 0; i < nb_oargs; ++i) {
2078                 TCGRegSet set = output_pref(op, i);
2079 
2080                 if (i == 0) {
2081                     ne_fprintf(f, "  pref=");
2082                 } else {
2083                     ne_fprintf(f, ",");
2084                 }
2085                 if (set == 0) {
2086                     ne_fprintf(f, "none");
2087                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2088                     ne_fprintf(f, "all");
2089 #ifdef CONFIG_DEBUG_TCG
2090                 } else if (tcg_regset_single(set)) {
2091                     TCGReg reg = tcg_regset_first(set);
2092                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2093 #endif
2094                 } else if (TCG_TARGET_NB_REGS <= 32) {
2095                     ne_fprintf(f, "0x%x", (uint32_t)set);
2096                 } else {
2097                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2098                 }
2099             }
2100         }
2101 
2102         putc('\n', f);
2103     }
2104 }
2105 
2106 /* we give more priority to constraints with less registers */
2107 static int get_constraint_priority(const TCGOpDef *def, int k)
2108 {
2109     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2110     int n = ctpop64(arg_ct->regs);
2111 
2112     /*
2113      * Sort constraints of a single register first, which includes output
2114      * aliases (which must exactly match the input already allocated).
2115      */
2116     if (n == 1 || arg_ct->oalias) {
2117         return INT_MAX;
2118     }
2119 
2120     /*
2121      * Sort register pairs next, first then second immediately after.
2122      * Arbitrarily sort multiple pairs by the index of the first reg;
2123      * there shouldn't be many pairs.
2124      */
2125     switch (arg_ct->pair) {
2126     case 1:
2127     case 3:
2128         return (k + 1) * 2;
2129     case 2:
2130         return (arg_ct->pair_index + 1) * 2 - 1;
2131     }
2132 
2133     /* Finally, sort by decreasing register count. */
2134     assert(n > 1);
2135     return -n;
2136 }
2137 
2138 /* sort from highest priority to lowest */
2139 static void sort_constraints(TCGOpDef *def, int start, int n)
2140 {
2141     int i, j;
2142     TCGArgConstraint *a = def->args_ct;
2143 
2144     for (i = 0; i < n; i++) {
2145         a[start + i].sort_index = start + i;
2146     }
2147     if (n <= 1) {
2148         return;
2149     }
2150     for (i = 0; i < n - 1; i++) {
2151         for (j = i + 1; j < n; j++) {
2152             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2153             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2154             if (p1 < p2) {
2155                 int tmp = a[start + i].sort_index;
2156                 a[start + i].sort_index = a[start + j].sort_index;
2157                 a[start + j].sort_index = tmp;
2158             }
2159         }
2160     }
2161 }
2162 
2163 static void process_op_defs(TCGContext *s)
2164 {
2165     TCGOpcode op;
2166 
2167     for (op = 0; op < NB_OPS; op++) {
2168         TCGOpDef *def = &tcg_op_defs[op];
2169         const TCGTargetOpDef *tdefs;
2170         bool saw_alias_pair = false;
2171         int i, o, i2, o2, nb_args;
2172 
2173         if (def->flags & TCG_OPF_NOT_PRESENT) {
2174             continue;
2175         }
2176 
2177         nb_args = def->nb_iargs + def->nb_oargs;
2178         if (nb_args == 0) {
2179             continue;
2180         }
2181 
2182         /*
2183          * Macro magic should make it impossible, but double-check that
2184          * the array index is in range.  Since the signness of an enum
2185          * is implementation defined, force the result to unsigned.
2186          */
2187         unsigned con_set = tcg_target_op_def(op);
2188         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2189         tdefs = &constraint_sets[con_set];
2190 
2191         for (i = 0; i < nb_args; i++) {
2192             const char *ct_str = tdefs->args_ct_str[i];
2193             bool input_p = i >= def->nb_oargs;
2194 
2195             /* Incomplete TCGTargetOpDef entry. */
2196             tcg_debug_assert(ct_str != NULL);
2197 
2198             switch (*ct_str) {
2199             case '0' ... '9':
2200                 o = *ct_str - '0';
2201                 tcg_debug_assert(input_p);
2202                 tcg_debug_assert(o < def->nb_oargs);
2203                 tcg_debug_assert(def->args_ct[o].regs != 0);
2204                 tcg_debug_assert(!def->args_ct[o].oalias);
2205                 def->args_ct[i] = def->args_ct[o];
2206                 /* The output sets oalias.  */
2207                 def->args_ct[o].oalias = 1;
2208                 def->args_ct[o].alias_index = i;
2209                 /* The input sets ialias. */
2210                 def->args_ct[i].ialias = 1;
2211                 def->args_ct[i].alias_index = o;
2212                 if (def->args_ct[i].pair) {
2213                     saw_alias_pair = true;
2214                 }
2215                 tcg_debug_assert(ct_str[1] == '\0');
2216                 continue;
2217 
2218             case '&':
2219                 tcg_debug_assert(!input_p);
2220                 def->args_ct[i].newreg = true;
2221                 ct_str++;
2222                 break;
2223 
2224             case 'p': /* plus */
2225                 /* Allocate to the register after the previous. */
2226                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2227                 o = i - 1;
2228                 tcg_debug_assert(!def->args_ct[o].pair);
2229                 tcg_debug_assert(!def->args_ct[o].ct);
2230                 def->args_ct[i] = (TCGArgConstraint){
2231                     .pair = 2,
2232                     .pair_index = o,
2233                     .regs = def->args_ct[o].regs << 1,
2234                 };
2235                 def->args_ct[o].pair = 1;
2236                 def->args_ct[o].pair_index = i;
2237                 tcg_debug_assert(ct_str[1] == '\0');
2238                 continue;
2239 
2240             case 'm': /* minus */
2241                 /* Allocate to the register before the previous. */
2242                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2243                 o = i - 1;
2244                 tcg_debug_assert(!def->args_ct[o].pair);
2245                 tcg_debug_assert(!def->args_ct[o].ct);
2246                 def->args_ct[i] = (TCGArgConstraint){
2247                     .pair = 1,
2248                     .pair_index = o,
2249                     .regs = def->args_ct[o].regs >> 1,
2250                 };
2251                 def->args_ct[o].pair = 2;
2252                 def->args_ct[o].pair_index = i;
2253                 tcg_debug_assert(ct_str[1] == '\0');
2254                 continue;
2255             }
2256 
2257             do {
2258                 switch (*ct_str) {
2259                 case 'i':
2260                     def->args_ct[i].ct |= TCG_CT_CONST;
2261                     break;
2262 
2263                 /* Include all of the target-specific constraints. */
2264 
2265 #undef CONST
2266 #define CONST(CASE, MASK) \
2267     case CASE: def->args_ct[i].ct |= MASK; break;
2268 #define REGS(CASE, MASK) \
2269     case CASE: def->args_ct[i].regs |= MASK; break;
2270 
2271 #include "tcg-target-con-str.h"
2272 
2273 #undef REGS
2274 #undef CONST
2275                 default:
2276                 case '0' ... '9':
2277                 case '&':
2278                 case 'p':
2279                 case 'm':
2280                     /* Typo in TCGTargetOpDef constraint. */
2281                     g_assert_not_reached();
2282                 }
2283             } while (*++ct_str != '\0');
2284         }
2285 
2286         /* TCGTargetOpDef entry with too much information? */
2287         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2288 
2289         /*
2290          * Fix up output pairs that are aliased with inputs.
2291          * When we created the alias, we copied pair from the output.
2292          * There are three cases:
2293          *    (1a) Pairs of inputs alias pairs of outputs.
2294          *    (1b) One input aliases the first of a pair of outputs.
2295          *    (2)  One input aliases the second of a pair of outputs.
2296          *
2297          * Case 1a is handled by making sure that the pair_index'es are
2298          * properly updated so that they appear the same as a pair of inputs.
2299          *
2300          * Case 1b is handled by setting the pair_index of the input to
2301          * itself, simply so it doesn't point to an unrelated argument.
2302          * Since we don't encounter the "second" during the input allocation
2303          * phase, nothing happens with the second half of the input pair.
2304          *
2305          * Case 2 is handled by setting the second input to pair=3, the
2306          * first output to pair=3, and the pair_index'es to match.
2307          */
2308         if (saw_alias_pair) {
2309             for (i = def->nb_oargs; i < nb_args; i++) {
2310                 /*
2311                  * Since [0-9pm] must be alone in the constraint string,
2312                  * the only way they can both be set is if the pair comes
2313                  * from the output alias.
2314                  */
2315                 if (!def->args_ct[i].ialias) {
2316                     continue;
2317                 }
2318                 switch (def->args_ct[i].pair) {
2319                 case 0:
2320                     break;
2321                 case 1:
2322                     o = def->args_ct[i].alias_index;
2323                     o2 = def->args_ct[o].pair_index;
2324                     tcg_debug_assert(def->args_ct[o].pair == 1);
2325                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2326                     if (def->args_ct[o2].oalias) {
2327                         /* Case 1a */
2328                         i2 = def->args_ct[o2].alias_index;
2329                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2330                         def->args_ct[i2].pair_index = i;
2331                         def->args_ct[i].pair_index = i2;
2332                     } else {
2333                         /* Case 1b */
2334                         def->args_ct[i].pair_index = i;
2335                     }
2336                     break;
2337                 case 2:
2338                     o = def->args_ct[i].alias_index;
2339                     o2 = def->args_ct[o].pair_index;
2340                     tcg_debug_assert(def->args_ct[o].pair == 2);
2341                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2342                     if (def->args_ct[o2].oalias) {
2343                         /* Case 1a */
2344                         i2 = def->args_ct[o2].alias_index;
2345                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2346                         def->args_ct[i2].pair_index = i;
2347                         def->args_ct[i].pair_index = i2;
2348                     } else {
2349                         /* Case 2 */
2350                         def->args_ct[i].pair = 3;
2351                         def->args_ct[o2].pair = 3;
2352                         def->args_ct[i].pair_index = o2;
2353                         def->args_ct[o2].pair_index = i;
2354                     }
2355                     break;
2356                 default:
2357                     g_assert_not_reached();
2358                 }
2359             }
2360         }
2361 
2362         /* sort the constraints (XXX: this is just an heuristic) */
2363         sort_constraints(def, 0, def->nb_oargs);
2364         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2365     }
2366 }
2367 
2368 void tcg_op_remove(TCGContext *s, TCGOp *op)
2369 {
2370     TCGLabel *label;
2371 
2372     switch (op->opc) {
2373     case INDEX_op_br:
2374         label = arg_label(op->args[0]);
2375         label->refs--;
2376         break;
2377     case INDEX_op_brcond_i32:
2378     case INDEX_op_brcond_i64:
2379         label = arg_label(op->args[3]);
2380         label->refs--;
2381         break;
2382     case INDEX_op_brcond2_i32:
2383         label = arg_label(op->args[5]);
2384         label->refs--;
2385         break;
2386     default:
2387         break;
2388     }
2389 
2390     QTAILQ_REMOVE(&s->ops, op, link);
2391     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2392     s->nb_ops--;
2393 
2394 #ifdef CONFIG_PROFILER
2395     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2396 #endif
2397 }
2398 
2399 void tcg_remove_ops_after(TCGOp *op)
2400 {
2401     TCGContext *s = tcg_ctx;
2402 
2403     while (true) {
2404         TCGOp *last = tcg_last_op();
2405         if (last == op) {
2406             return;
2407         }
2408         tcg_op_remove(s, last);
2409     }
2410 }
2411 
2412 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2413 {
2414     TCGContext *s = tcg_ctx;
2415     TCGOp *op = NULL;
2416 
2417     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2418         QTAILQ_FOREACH(op, &s->free_ops, link) {
2419             if (nargs <= op->nargs) {
2420                 QTAILQ_REMOVE(&s->free_ops, op, link);
2421                 nargs = op->nargs;
2422                 goto found;
2423             }
2424         }
2425     }
2426 
2427     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2428     nargs = MAX(4, nargs);
2429     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2430 
2431  found:
2432     memset(op, 0, offsetof(TCGOp, link));
2433     op->opc = opc;
2434     op->nargs = nargs;
2435 
2436     /* Check for bitfield overflow. */
2437     tcg_debug_assert(op->nargs == nargs);
2438 
2439     s->nb_ops++;
2440     return op;
2441 }
2442 
2443 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2444 {
2445     TCGOp *op = tcg_op_alloc(opc, nargs);
2446     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2447     return op;
2448 }
2449 
2450 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2451                             TCGOpcode opc, unsigned nargs)
2452 {
2453     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2454     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2455     return new_op;
2456 }
2457 
2458 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2459                            TCGOpcode opc, unsigned nargs)
2460 {
2461     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2462     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2463     return new_op;
2464 }
2465 
2466 /* Reachable analysis : remove unreachable code.  */
2467 static void reachable_code_pass(TCGContext *s)
2468 {
2469     TCGOp *op, *op_next;
2470     bool dead = false;
2471 
2472     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2473         bool remove = dead;
2474         TCGLabel *label;
2475 
2476         switch (op->opc) {
2477         case INDEX_op_set_label:
2478             label = arg_label(op->args[0]);
2479             if (label->refs == 0) {
2480                 /*
2481                  * While there is an occasional backward branch, virtually
2482                  * all branches generated by the translators are forward.
2483                  * Which means that generally we will have already removed
2484                  * all references to the label that will be, and there is
2485                  * little to be gained by iterating.
2486                  */
2487                 remove = true;
2488             } else {
2489                 /* Once we see a label, insns become live again.  */
2490                 dead = false;
2491                 remove = false;
2492 
2493                 /*
2494                  * Optimization can fold conditional branches to unconditional.
2495                  * If we find a label with one reference which is preceded by
2496                  * an unconditional branch to it, remove both.  This needed to
2497                  * wait until the dead code in between them was removed.
2498                  */
2499                 if (label->refs == 1) {
2500                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2501                     if (op_prev->opc == INDEX_op_br &&
2502                         label == arg_label(op_prev->args[0])) {
2503                         tcg_op_remove(s, op_prev);
2504                         remove = true;
2505                     }
2506                 }
2507             }
2508             break;
2509 
2510         case INDEX_op_br:
2511         case INDEX_op_exit_tb:
2512         case INDEX_op_goto_ptr:
2513             /* Unconditional branches; everything following is dead.  */
2514             dead = true;
2515             break;
2516 
2517         case INDEX_op_call:
2518             /* Notice noreturn helper calls, raising exceptions.  */
2519             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2520                 dead = true;
2521             }
2522             break;
2523 
2524         case INDEX_op_insn_start:
2525             /* Never remove -- we need to keep these for unwind.  */
2526             remove = false;
2527             break;
2528 
2529         default:
2530             break;
2531         }
2532 
2533         if (remove) {
2534             tcg_op_remove(s, op);
2535         }
2536     }
2537 }
2538 
2539 #define TS_DEAD  1
2540 #define TS_MEM   2
2541 
2542 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2543 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2544 
2545 /* For liveness_pass_1, the register preferences for a given temp.  */
2546 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2547 {
2548     return ts->state_ptr;
2549 }
2550 
2551 /* For liveness_pass_1, reset the preferences for a given temp to the
2552  * maximal regset for its type.
2553  */
2554 static inline void la_reset_pref(TCGTemp *ts)
2555 {
2556     *la_temp_pref(ts)
2557         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2558 }
2559 
2560 /* liveness analysis: end of function: all temps are dead, and globals
2561    should be in memory. */
2562 static void la_func_end(TCGContext *s, int ng, int nt)
2563 {
2564     int i;
2565 
2566     for (i = 0; i < ng; ++i) {
2567         s->temps[i].state = TS_DEAD | TS_MEM;
2568         la_reset_pref(&s->temps[i]);
2569     }
2570     for (i = ng; i < nt; ++i) {
2571         s->temps[i].state = TS_DEAD;
2572         la_reset_pref(&s->temps[i]);
2573     }
2574 }
2575 
2576 /* liveness analysis: end of basic block: all temps are dead, globals
2577    and local temps should be in memory. */
2578 static void la_bb_end(TCGContext *s, int ng, int nt)
2579 {
2580     int i;
2581 
2582     for (i = 0; i < nt; ++i) {
2583         TCGTemp *ts = &s->temps[i];
2584         int state;
2585 
2586         switch (ts->kind) {
2587         case TEMP_FIXED:
2588         case TEMP_GLOBAL:
2589         case TEMP_LOCAL:
2590             state = TS_DEAD | TS_MEM;
2591             break;
2592         case TEMP_NORMAL:
2593         case TEMP_EBB:
2594         case TEMP_CONST:
2595             state = TS_DEAD;
2596             break;
2597         default:
2598             g_assert_not_reached();
2599         }
2600         ts->state = state;
2601         la_reset_pref(ts);
2602     }
2603 }
2604 
2605 /* liveness analysis: sync globals back to memory.  */
2606 static void la_global_sync(TCGContext *s, int ng)
2607 {
2608     int i;
2609 
2610     for (i = 0; i < ng; ++i) {
2611         int state = s->temps[i].state;
2612         s->temps[i].state = state | TS_MEM;
2613         if (state == TS_DEAD) {
2614             /* If the global was previously dead, reset prefs.  */
2615             la_reset_pref(&s->temps[i]);
2616         }
2617     }
2618 }
2619 
2620 /*
2621  * liveness analysis: conditional branch: all temps are dead unless
2622  * explicitly live-across-conditional-branch, globals and local temps
2623  * should be synced.
2624  */
2625 static void la_bb_sync(TCGContext *s, int ng, int nt)
2626 {
2627     la_global_sync(s, ng);
2628 
2629     for (int i = ng; i < nt; ++i) {
2630         TCGTemp *ts = &s->temps[i];
2631         int state;
2632 
2633         switch (ts->kind) {
2634         case TEMP_LOCAL:
2635             state = ts->state;
2636             ts->state = state | TS_MEM;
2637             if (state != TS_DEAD) {
2638                 continue;
2639             }
2640             break;
2641         case TEMP_NORMAL:
2642             s->temps[i].state = TS_DEAD;
2643             break;
2644         case TEMP_EBB:
2645         case TEMP_CONST:
2646             continue;
2647         default:
2648             g_assert_not_reached();
2649         }
2650         la_reset_pref(&s->temps[i]);
2651     }
2652 }
2653 
2654 /* liveness analysis: sync globals back to memory and kill.  */
2655 static void la_global_kill(TCGContext *s, int ng)
2656 {
2657     int i;
2658 
2659     for (i = 0; i < ng; i++) {
2660         s->temps[i].state = TS_DEAD | TS_MEM;
2661         la_reset_pref(&s->temps[i]);
2662     }
2663 }
2664 
2665 /* liveness analysis: note live globals crossing calls.  */
2666 static void la_cross_call(TCGContext *s, int nt)
2667 {
2668     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2669     int i;
2670 
2671     for (i = 0; i < nt; i++) {
2672         TCGTemp *ts = &s->temps[i];
2673         if (!(ts->state & TS_DEAD)) {
2674             TCGRegSet *pset = la_temp_pref(ts);
2675             TCGRegSet set = *pset;
2676 
2677             set &= mask;
2678             /* If the combination is not possible, restart.  */
2679             if (set == 0) {
2680                 set = tcg_target_available_regs[ts->type] & mask;
2681             }
2682             *pset = set;
2683         }
2684     }
2685 }
2686 
2687 /* Liveness analysis : update the opc_arg_life array to tell if a
2688    given input arguments is dead. Instructions updating dead
2689    temporaries are removed. */
2690 static void liveness_pass_1(TCGContext *s)
2691 {
2692     int nb_globals = s->nb_globals;
2693     int nb_temps = s->nb_temps;
2694     TCGOp *op, *op_prev;
2695     TCGRegSet *prefs;
2696     int i;
2697 
2698     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2699     for (i = 0; i < nb_temps; ++i) {
2700         s->temps[i].state_ptr = prefs + i;
2701     }
2702 
2703     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2704     la_func_end(s, nb_globals, nb_temps);
2705 
2706     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2707         int nb_iargs, nb_oargs;
2708         TCGOpcode opc_new, opc_new2;
2709         bool have_opc_new2;
2710         TCGLifeData arg_life = 0;
2711         TCGTemp *ts;
2712         TCGOpcode opc = op->opc;
2713         const TCGOpDef *def = &tcg_op_defs[opc];
2714 
2715         switch (opc) {
2716         case INDEX_op_call:
2717             {
2718                 const TCGHelperInfo *info = tcg_call_info(op);
2719                 int call_flags = tcg_call_flags(op);
2720 
2721                 nb_oargs = TCGOP_CALLO(op);
2722                 nb_iargs = TCGOP_CALLI(op);
2723 
2724                 /* pure functions can be removed if their result is unused */
2725                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2726                     for (i = 0; i < nb_oargs; i++) {
2727                         ts = arg_temp(op->args[i]);
2728                         if (ts->state != TS_DEAD) {
2729                             goto do_not_remove_call;
2730                         }
2731                     }
2732                     goto do_remove;
2733                 }
2734             do_not_remove_call:
2735 
2736                 /* Output args are dead.  */
2737                 for (i = 0; i < nb_oargs; i++) {
2738                     ts = arg_temp(op->args[i]);
2739                     if (ts->state & TS_DEAD) {
2740                         arg_life |= DEAD_ARG << i;
2741                     }
2742                     if (ts->state & TS_MEM) {
2743                         arg_life |= SYNC_ARG << i;
2744                     }
2745                     ts->state = TS_DEAD;
2746                     la_reset_pref(ts);
2747                 }
2748 
2749                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
2750                 memset(op->output_pref, 0, sizeof(op->output_pref));
2751 
2752                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2753                                     TCG_CALL_NO_READ_GLOBALS))) {
2754                     la_global_kill(s, nb_globals);
2755                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2756                     la_global_sync(s, nb_globals);
2757                 }
2758 
2759                 /* Record arguments that die in this helper.  */
2760                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2761                     ts = arg_temp(op->args[i]);
2762                     if (ts->state & TS_DEAD) {
2763                         arg_life |= DEAD_ARG << i;
2764                     }
2765                 }
2766 
2767                 /* For all live registers, remove call-clobbered prefs.  */
2768                 la_cross_call(s, nb_temps);
2769 
2770                 /*
2771                  * Input arguments are live for preceding opcodes.
2772                  *
2773                  * For those arguments that die, and will be allocated in
2774                  * registers, clear the register set for that arg, to be
2775                  * filled in below.  For args that will be on the stack,
2776                  * reset to any available reg.  Process arguments in reverse
2777                  * order so that if a temp is used more than once, the stack
2778                  * reset to max happens before the register reset to 0.
2779                  */
2780                 for (i = nb_iargs - 1; i >= 0; i--) {
2781                     const TCGCallArgumentLoc *loc = &info->in[i];
2782                     ts = arg_temp(op->args[nb_oargs + i]);
2783 
2784                     if (ts->state & TS_DEAD) {
2785                         switch (loc->kind) {
2786                         case TCG_CALL_ARG_NORMAL:
2787                         case TCG_CALL_ARG_EXTEND_U:
2788                         case TCG_CALL_ARG_EXTEND_S:
2789                             if (REG_P(loc)) {
2790                                 *la_temp_pref(ts) = 0;
2791                                 break;
2792                             }
2793                             /* fall through */
2794                         default:
2795                             *la_temp_pref(ts) =
2796                                 tcg_target_available_regs[ts->type];
2797                             break;
2798                         }
2799                         ts->state &= ~TS_DEAD;
2800                     }
2801                 }
2802 
2803                 /*
2804                  * For each input argument, add its input register to prefs.
2805                  * If a temp is used once, this produces a single set bit;
2806                  * if a temp is used multiple times, this produces a set.
2807                  */
2808                 for (i = 0; i < nb_iargs; i++) {
2809                     const TCGCallArgumentLoc *loc = &info->in[i];
2810                     ts = arg_temp(op->args[nb_oargs + i]);
2811 
2812                     switch (loc->kind) {
2813                     case TCG_CALL_ARG_NORMAL:
2814                     case TCG_CALL_ARG_EXTEND_U:
2815                     case TCG_CALL_ARG_EXTEND_S:
2816                         if (REG_P(loc)) {
2817                             tcg_regset_set_reg(*la_temp_pref(ts),
2818                                 tcg_target_call_iarg_regs[loc->arg_slot]);
2819                         }
2820                         break;
2821                     default:
2822                         break;
2823                     }
2824                 }
2825             }
2826             break;
2827         case INDEX_op_insn_start:
2828             break;
2829         case INDEX_op_discard:
2830             /* mark the temporary as dead */
2831             ts = arg_temp(op->args[0]);
2832             ts->state = TS_DEAD;
2833             la_reset_pref(ts);
2834             break;
2835 
2836         case INDEX_op_add2_i32:
2837             opc_new = INDEX_op_add_i32;
2838             goto do_addsub2;
2839         case INDEX_op_sub2_i32:
2840             opc_new = INDEX_op_sub_i32;
2841             goto do_addsub2;
2842         case INDEX_op_add2_i64:
2843             opc_new = INDEX_op_add_i64;
2844             goto do_addsub2;
2845         case INDEX_op_sub2_i64:
2846             opc_new = INDEX_op_sub_i64;
2847         do_addsub2:
2848             nb_iargs = 4;
2849             nb_oargs = 2;
2850             /* Test if the high part of the operation is dead, but not
2851                the low part.  The result can be optimized to a simple
2852                add or sub.  This happens often for x86_64 guest when the
2853                cpu mode is set to 32 bit.  */
2854             if (arg_temp(op->args[1])->state == TS_DEAD) {
2855                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2856                     goto do_remove;
2857                 }
2858                 /* Replace the opcode and adjust the args in place,
2859                    leaving 3 unused args at the end.  */
2860                 op->opc = opc = opc_new;
2861                 op->args[1] = op->args[2];
2862                 op->args[2] = op->args[4];
2863                 /* Fall through and mark the single-word operation live.  */
2864                 nb_iargs = 2;
2865                 nb_oargs = 1;
2866             }
2867             goto do_not_remove;
2868 
2869         case INDEX_op_mulu2_i32:
2870             opc_new = INDEX_op_mul_i32;
2871             opc_new2 = INDEX_op_muluh_i32;
2872             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2873             goto do_mul2;
2874         case INDEX_op_muls2_i32:
2875             opc_new = INDEX_op_mul_i32;
2876             opc_new2 = INDEX_op_mulsh_i32;
2877             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2878             goto do_mul2;
2879         case INDEX_op_mulu2_i64:
2880             opc_new = INDEX_op_mul_i64;
2881             opc_new2 = INDEX_op_muluh_i64;
2882             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2883             goto do_mul2;
2884         case INDEX_op_muls2_i64:
2885             opc_new = INDEX_op_mul_i64;
2886             opc_new2 = INDEX_op_mulsh_i64;
2887             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2888             goto do_mul2;
2889         do_mul2:
2890             nb_iargs = 2;
2891             nb_oargs = 2;
2892             if (arg_temp(op->args[1])->state == TS_DEAD) {
2893                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2894                     /* Both parts of the operation are dead.  */
2895                     goto do_remove;
2896                 }
2897                 /* The high part of the operation is dead; generate the low. */
2898                 op->opc = opc = opc_new;
2899                 op->args[1] = op->args[2];
2900                 op->args[2] = op->args[3];
2901             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2902                 /* The low part of the operation is dead; generate the high. */
2903                 op->opc = opc = opc_new2;
2904                 op->args[0] = op->args[1];
2905                 op->args[1] = op->args[2];
2906                 op->args[2] = op->args[3];
2907             } else {
2908                 goto do_not_remove;
2909             }
2910             /* Mark the single-word operation live.  */
2911             nb_oargs = 1;
2912             goto do_not_remove;
2913 
2914         default:
2915             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2916             nb_iargs = def->nb_iargs;
2917             nb_oargs = def->nb_oargs;
2918 
2919             /* Test if the operation can be removed because all
2920                its outputs are dead. We assume that nb_oargs == 0
2921                implies side effects */
2922             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2923                 for (i = 0; i < nb_oargs; i++) {
2924                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2925                         goto do_not_remove;
2926                     }
2927                 }
2928                 goto do_remove;
2929             }
2930             goto do_not_remove;
2931 
2932         do_remove:
2933             tcg_op_remove(s, op);
2934             break;
2935 
2936         do_not_remove:
2937             for (i = 0; i < nb_oargs; i++) {
2938                 ts = arg_temp(op->args[i]);
2939 
2940                 /* Remember the preference of the uses that followed.  */
2941                 if (i < ARRAY_SIZE(op->output_pref)) {
2942                     op->output_pref[i] = *la_temp_pref(ts);
2943                 }
2944 
2945                 /* Output args are dead.  */
2946                 if (ts->state & TS_DEAD) {
2947                     arg_life |= DEAD_ARG << i;
2948                 }
2949                 if (ts->state & TS_MEM) {
2950                     arg_life |= SYNC_ARG << i;
2951                 }
2952                 ts->state = TS_DEAD;
2953                 la_reset_pref(ts);
2954             }
2955 
2956             /* If end of basic block, update.  */
2957             if (def->flags & TCG_OPF_BB_EXIT) {
2958                 la_func_end(s, nb_globals, nb_temps);
2959             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2960                 la_bb_sync(s, nb_globals, nb_temps);
2961             } else if (def->flags & TCG_OPF_BB_END) {
2962                 la_bb_end(s, nb_globals, nb_temps);
2963             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2964                 la_global_sync(s, nb_globals);
2965                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2966                     la_cross_call(s, nb_temps);
2967                 }
2968             }
2969 
2970             /* Record arguments that die in this opcode.  */
2971             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2972                 ts = arg_temp(op->args[i]);
2973                 if (ts->state & TS_DEAD) {
2974                     arg_life |= DEAD_ARG << i;
2975                 }
2976             }
2977 
2978             /* Input arguments are live for preceding opcodes.  */
2979             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2980                 ts = arg_temp(op->args[i]);
2981                 if (ts->state & TS_DEAD) {
2982                     /* For operands that were dead, initially allow
2983                        all regs for the type.  */
2984                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2985                     ts->state &= ~TS_DEAD;
2986                 }
2987             }
2988 
2989             /* Incorporate constraints for this operand.  */
2990             switch (opc) {
2991             case INDEX_op_mov_i32:
2992             case INDEX_op_mov_i64:
2993                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2994                    have proper constraints.  That said, special case
2995                    moves to propagate preferences backward.  */
2996                 if (IS_DEAD_ARG(1)) {
2997                     *la_temp_pref(arg_temp(op->args[0]))
2998                         = *la_temp_pref(arg_temp(op->args[1]));
2999                 }
3000                 break;
3001 
3002             default:
3003                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3004                     const TCGArgConstraint *ct = &def->args_ct[i];
3005                     TCGRegSet set, *pset;
3006 
3007                     ts = arg_temp(op->args[i]);
3008                     pset = la_temp_pref(ts);
3009                     set = *pset;
3010 
3011                     set &= ct->regs;
3012                     if (ct->ialias) {
3013                         set &= output_pref(op, ct->alias_index);
3014                     }
3015                     /* If the combination is not possible, restart.  */
3016                     if (set == 0) {
3017                         set = ct->regs;
3018                     }
3019                     *pset = set;
3020                 }
3021                 break;
3022             }
3023             break;
3024         }
3025         op->life = arg_life;
3026     }
3027 }
3028 
3029 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3030 static bool liveness_pass_2(TCGContext *s)
3031 {
3032     int nb_globals = s->nb_globals;
3033     int nb_temps, i;
3034     bool changes = false;
3035     TCGOp *op, *op_next;
3036 
3037     /* Create a temporary for each indirect global.  */
3038     for (i = 0; i < nb_globals; ++i) {
3039         TCGTemp *its = &s->temps[i];
3040         if (its->indirect_reg) {
3041             TCGTemp *dts = tcg_temp_alloc(s);
3042             dts->type = its->type;
3043             dts->base_type = its->base_type;
3044             dts->kind = TEMP_EBB;
3045             its->state_ptr = dts;
3046         } else {
3047             its->state_ptr = NULL;
3048         }
3049         /* All globals begin dead.  */
3050         its->state = TS_DEAD;
3051     }
3052     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3053         TCGTemp *its = &s->temps[i];
3054         its->state_ptr = NULL;
3055         its->state = TS_DEAD;
3056     }
3057 
3058     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3059         TCGOpcode opc = op->opc;
3060         const TCGOpDef *def = &tcg_op_defs[opc];
3061         TCGLifeData arg_life = op->life;
3062         int nb_iargs, nb_oargs, call_flags;
3063         TCGTemp *arg_ts, *dir_ts;
3064 
3065         if (opc == INDEX_op_call) {
3066             nb_oargs = TCGOP_CALLO(op);
3067             nb_iargs = TCGOP_CALLI(op);
3068             call_flags = tcg_call_flags(op);
3069         } else {
3070             nb_iargs = def->nb_iargs;
3071             nb_oargs = def->nb_oargs;
3072 
3073             /* Set flags similar to how calls require.  */
3074             if (def->flags & TCG_OPF_COND_BRANCH) {
3075                 /* Like reading globals: sync_globals */
3076                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3077             } else if (def->flags & TCG_OPF_BB_END) {
3078                 /* Like writing globals: save_globals */
3079                 call_flags = 0;
3080             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3081                 /* Like reading globals: sync_globals */
3082                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3083             } else {
3084                 /* No effect on globals.  */
3085                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3086                               TCG_CALL_NO_WRITE_GLOBALS);
3087             }
3088         }
3089 
3090         /* Make sure that input arguments are available.  */
3091         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3092             arg_ts = arg_temp(op->args[i]);
3093             dir_ts = arg_ts->state_ptr;
3094             if (dir_ts && arg_ts->state == TS_DEAD) {
3095                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3096                                   ? INDEX_op_ld_i32
3097                                   : INDEX_op_ld_i64);
3098                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3099 
3100                 lop->args[0] = temp_arg(dir_ts);
3101                 lop->args[1] = temp_arg(arg_ts->mem_base);
3102                 lop->args[2] = arg_ts->mem_offset;
3103 
3104                 /* Loaded, but synced with memory.  */
3105                 arg_ts->state = TS_MEM;
3106             }
3107         }
3108 
3109         /* Perform input replacement, and mark inputs that became dead.
3110            No action is required except keeping temp_state up to date
3111            so that we reload when needed.  */
3112         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3113             arg_ts = arg_temp(op->args[i]);
3114             dir_ts = arg_ts->state_ptr;
3115             if (dir_ts) {
3116                 op->args[i] = temp_arg(dir_ts);
3117                 changes = true;
3118                 if (IS_DEAD_ARG(i)) {
3119                     arg_ts->state = TS_DEAD;
3120                 }
3121             }
3122         }
3123 
3124         /* Liveness analysis should ensure that the following are
3125            all correct, for call sites and basic block end points.  */
3126         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3127             /* Nothing to do */
3128         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3129             for (i = 0; i < nb_globals; ++i) {
3130                 /* Liveness should see that globals are synced back,
3131                    that is, either TS_DEAD or TS_MEM.  */
3132                 arg_ts = &s->temps[i];
3133                 tcg_debug_assert(arg_ts->state_ptr == 0
3134                                  || arg_ts->state != 0);
3135             }
3136         } else {
3137             for (i = 0; i < nb_globals; ++i) {
3138                 /* Liveness should see that globals are saved back,
3139                    that is, TS_DEAD, waiting to be reloaded.  */
3140                 arg_ts = &s->temps[i];
3141                 tcg_debug_assert(arg_ts->state_ptr == 0
3142                                  || arg_ts->state == TS_DEAD);
3143             }
3144         }
3145 
3146         /* Outputs become available.  */
3147         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3148             arg_ts = arg_temp(op->args[0]);
3149             dir_ts = arg_ts->state_ptr;
3150             if (dir_ts) {
3151                 op->args[0] = temp_arg(dir_ts);
3152                 changes = true;
3153 
3154                 /* The output is now live and modified.  */
3155                 arg_ts->state = 0;
3156 
3157                 if (NEED_SYNC_ARG(0)) {
3158                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3159                                       ? INDEX_op_st_i32
3160                                       : INDEX_op_st_i64);
3161                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3162                     TCGTemp *out_ts = dir_ts;
3163 
3164                     if (IS_DEAD_ARG(0)) {
3165                         out_ts = arg_temp(op->args[1]);
3166                         arg_ts->state = TS_DEAD;
3167                         tcg_op_remove(s, op);
3168                     } else {
3169                         arg_ts->state = TS_MEM;
3170                     }
3171 
3172                     sop->args[0] = temp_arg(out_ts);
3173                     sop->args[1] = temp_arg(arg_ts->mem_base);
3174                     sop->args[2] = arg_ts->mem_offset;
3175                 } else {
3176                     tcg_debug_assert(!IS_DEAD_ARG(0));
3177                 }
3178             }
3179         } else {
3180             for (i = 0; i < nb_oargs; i++) {
3181                 arg_ts = arg_temp(op->args[i]);
3182                 dir_ts = arg_ts->state_ptr;
3183                 if (!dir_ts) {
3184                     continue;
3185                 }
3186                 op->args[i] = temp_arg(dir_ts);
3187                 changes = true;
3188 
3189                 /* The output is now live and modified.  */
3190                 arg_ts->state = 0;
3191 
3192                 /* Sync outputs upon their last write.  */
3193                 if (NEED_SYNC_ARG(i)) {
3194                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3195                                       ? INDEX_op_st_i32
3196                                       : INDEX_op_st_i64);
3197                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3198 
3199                     sop->args[0] = temp_arg(dir_ts);
3200                     sop->args[1] = temp_arg(arg_ts->mem_base);
3201                     sop->args[2] = arg_ts->mem_offset;
3202 
3203                     arg_ts->state = TS_MEM;
3204                 }
3205                 /* Drop outputs that are dead.  */
3206                 if (IS_DEAD_ARG(i)) {
3207                     arg_ts->state = TS_DEAD;
3208                 }
3209             }
3210         }
3211     }
3212 
3213     return changes;
3214 }
3215 
3216 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3217 {
3218     int size = tcg_type_size(ts->type);
3219     int align;
3220     intptr_t off;
3221 
3222     switch (ts->type) {
3223     case TCG_TYPE_I32:
3224         align = 4;
3225         break;
3226     case TCG_TYPE_I64:
3227     case TCG_TYPE_V64:
3228         align = 8;
3229         break;
3230     case TCG_TYPE_V128:
3231     case TCG_TYPE_V256:
3232         /* Note that we do not require aligned storage for V256. */
3233         align = 16;
3234         break;
3235     default:
3236         g_assert_not_reached();
3237     }
3238 
3239     /*
3240      * Assume the stack is sufficiently aligned.
3241      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3242      * and do not require 16 byte vector alignment.  This seems slightly
3243      * easier than fully parameterizing the above switch statement.
3244      */
3245     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3246     off = ROUND_UP(s->current_frame_offset, align);
3247 
3248     /* If we've exhausted the stack frame, restart with a smaller TB. */
3249     if (off + size > s->frame_end) {
3250         tcg_raise_tb_overflow(s);
3251     }
3252     s->current_frame_offset = off + size;
3253 
3254     ts->mem_offset = off;
3255 #if defined(__sparc__)
3256     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3257 #endif
3258     ts->mem_base = s->frame_temp;
3259     ts->mem_allocated = 1;
3260 }
3261 
3262 /* Assign @reg to @ts, and update reg_to_temp[]. */
3263 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3264 {
3265     if (ts->val_type == TEMP_VAL_REG) {
3266         TCGReg old = ts->reg;
3267         tcg_debug_assert(s->reg_to_temp[old] == ts);
3268         if (old == reg) {
3269             return;
3270         }
3271         s->reg_to_temp[old] = NULL;
3272     }
3273     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3274     s->reg_to_temp[reg] = ts;
3275     ts->val_type = TEMP_VAL_REG;
3276     ts->reg = reg;
3277 }
3278 
3279 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3280 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3281 {
3282     tcg_debug_assert(type != TEMP_VAL_REG);
3283     if (ts->val_type == TEMP_VAL_REG) {
3284         TCGReg reg = ts->reg;
3285         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3286         s->reg_to_temp[reg] = NULL;
3287     }
3288     ts->val_type = type;
3289 }
3290 
3291 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3292 
3293 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3294    mark it free; otherwise mark it dead.  */
3295 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3296 {
3297     TCGTempVal new_type;
3298 
3299     switch (ts->kind) {
3300     case TEMP_FIXED:
3301         return;
3302     case TEMP_GLOBAL:
3303     case TEMP_LOCAL:
3304         new_type = TEMP_VAL_MEM;
3305         break;
3306     case TEMP_NORMAL:
3307     case TEMP_EBB:
3308         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3309         break;
3310     case TEMP_CONST:
3311         new_type = TEMP_VAL_CONST;
3312         break;
3313     default:
3314         g_assert_not_reached();
3315     }
3316     set_temp_val_nonreg(s, ts, new_type);
3317 }
3318 
3319 /* Mark a temporary as dead.  */
3320 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3321 {
3322     temp_free_or_dead(s, ts, 1);
3323 }
3324 
3325 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3326    registers needs to be allocated to store a constant.  If 'free_or_dead'
3327    is non-zero, subsequently release the temporary; if it is positive, the
3328    temp is dead; if it is negative, the temp is free.  */
3329 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3330                       TCGRegSet preferred_regs, int free_or_dead)
3331 {
3332     if (!temp_readonly(ts) && !ts->mem_coherent) {
3333         if (!ts->mem_allocated) {
3334             temp_allocate_frame(s, ts);
3335         }
3336         switch (ts->val_type) {
3337         case TEMP_VAL_CONST:
3338             /* If we're going to free the temp immediately, then we won't
3339                require it later in a register, so attempt to store the
3340                constant to memory directly.  */
3341             if (free_or_dead
3342                 && tcg_out_sti(s, ts->type, ts->val,
3343                                ts->mem_base->reg, ts->mem_offset)) {
3344                 break;
3345             }
3346             temp_load(s, ts, tcg_target_available_regs[ts->type],
3347                       allocated_regs, preferred_regs);
3348             /* fallthrough */
3349 
3350         case TEMP_VAL_REG:
3351             tcg_out_st(s, ts->type, ts->reg,
3352                        ts->mem_base->reg, ts->mem_offset);
3353             break;
3354 
3355         case TEMP_VAL_MEM:
3356             break;
3357 
3358         case TEMP_VAL_DEAD:
3359         default:
3360             tcg_abort();
3361         }
3362         ts->mem_coherent = 1;
3363     }
3364     if (free_or_dead) {
3365         temp_free_or_dead(s, ts, free_or_dead);
3366     }
3367 }
3368 
3369 /* free register 'reg' by spilling the corresponding temporary if necessary */
3370 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3371 {
3372     TCGTemp *ts = s->reg_to_temp[reg];
3373     if (ts != NULL) {
3374         temp_sync(s, ts, allocated_regs, 0, -1);
3375     }
3376 }
3377 
3378 /**
3379  * tcg_reg_alloc:
3380  * @required_regs: Set of registers in which we must allocate.
3381  * @allocated_regs: Set of registers which must be avoided.
3382  * @preferred_regs: Set of registers we should prefer.
3383  * @rev: True if we search the registers in "indirect" order.
3384  *
3385  * The allocated register must be in @required_regs & ~@allocated_regs,
3386  * but if we can put it in @preferred_regs we may save a move later.
3387  */
3388 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3389                             TCGRegSet allocated_regs,
3390                             TCGRegSet preferred_regs, bool rev)
3391 {
3392     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3393     TCGRegSet reg_ct[2];
3394     const int *order;
3395 
3396     reg_ct[1] = required_regs & ~allocated_regs;
3397     tcg_debug_assert(reg_ct[1] != 0);
3398     reg_ct[0] = reg_ct[1] & preferred_regs;
3399 
3400     /* Skip the preferred_regs option if it cannot be satisfied,
3401        or if the preference made no difference.  */
3402     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3403 
3404     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3405 
3406     /* Try free registers, preferences first.  */
3407     for (j = f; j < 2; j++) {
3408         TCGRegSet set = reg_ct[j];
3409 
3410         if (tcg_regset_single(set)) {
3411             /* One register in the set.  */
3412             TCGReg reg = tcg_regset_first(set);
3413             if (s->reg_to_temp[reg] == NULL) {
3414                 return reg;
3415             }
3416         } else {
3417             for (i = 0; i < n; i++) {
3418                 TCGReg reg = order[i];
3419                 if (s->reg_to_temp[reg] == NULL &&
3420                     tcg_regset_test_reg(set, reg)) {
3421                     return reg;
3422                 }
3423             }
3424         }
3425     }
3426 
3427     /* We must spill something.  */
3428     for (j = f; j < 2; j++) {
3429         TCGRegSet set = reg_ct[j];
3430 
3431         if (tcg_regset_single(set)) {
3432             /* One register in the set.  */
3433             TCGReg reg = tcg_regset_first(set);
3434             tcg_reg_free(s, reg, allocated_regs);
3435             return reg;
3436         } else {
3437             for (i = 0; i < n; i++) {
3438                 TCGReg reg = order[i];
3439                 if (tcg_regset_test_reg(set, reg)) {
3440                     tcg_reg_free(s, reg, allocated_regs);
3441                     return reg;
3442                 }
3443             }
3444         }
3445     }
3446 
3447     tcg_abort();
3448 }
3449 
3450 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3451                                  TCGRegSet allocated_regs,
3452                                  TCGRegSet preferred_regs, bool rev)
3453 {
3454     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3455     TCGRegSet reg_ct[2];
3456     const int *order;
3457 
3458     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3459     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3460     tcg_debug_assert(reg_ct[1] != 0);
3461     reg_ct[0] = reg_ct[1] & preferred_regs;
3462 
3463     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3464 
3465     /*
3466      * Skip the preferred_regs option if it cannot be satisfied,
3467      * or if the preference made no difference.
3468      */
3469     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3470 
3471     /*
3472      * Minimize the number of flushes by looking for 2 free registers first,
3473      * then a single flush, then two flushes.
3474      */
3475     for (fmin = 2; fmin >= 0; fmin--) {
3476         for (j = k; j < 2; j++) {
3477             TCGRegSet set = reg_ct[j];
3478 
3479             for (i = 0; i < n; i++) {
3480                 TCGReg reg = order[i];
3481 
3482                 if (tcg_regset_test_reg(set, reg)) {
3483                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3484                     if (f >= fmin) {
3485                         tcg_reg_free(s, reg, allocated_regs);
3486                         tcg_reg_free(s, reg + 1, allocated_regs);
3487                         return reg;
3488                     }
3489                 }
3490             }
3491         }
3492     }
3493     tcg_abort();
3494 }
3495 
3496 /* Make sure the temporary is in a register.  If needed, allocate the register
3497    from DESIRED while avoiding ALLOCATED.  */
3498 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3499                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3500 {
3501     TCGReg reg;
3502 
3503     switch (ts->val_type) {
3504     case TEMP_VAL_REG:
3505         return;
3506     case TEMP_VAL_CONST:
3507         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3508                             preferred_regs, ts->indirect_base);
3509         if (ts->type <= TCG_TYPE_I64) {
3510             tcg_out_movi(s, ts->type, reg, ts->val);
3511         } else {
3512             uint64_t val = ts->val;
3513             MemOp vece = MO_64;
3514 
3515             /*
3516              * Find the minimal vector element that matches the constant.
3517              * The targets will, in general, have to do this search anyway,
3518              * do this generically.
3519              */
3520             if (val == dup_const(MO_8, val)) {
3521                 vece = MO_8;
3522             } else if (val == dup_const(MO_16, val)) {
3523                 vece = MO_16;
3524             } else if (val == dup_const(MO_32, val)) {
3525                 vece = MO_32;
3526             }
3527 
3528             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3529         }
3530         ts->mem_coherent = 0;
3531         break;
3532     case TEMP_VAL_MEM:
3533         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3534                             preferred_regs, ts->indirect_base);
3535         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3536         ts->mem_coherent = 1;
3537         break;
3538     case TEMP_VAL_DEAD:
3539     default:
3540         tcg_abort();
3541     }
3542     set_temp_val_reg(s, ts, reg);
3543 }
3544 
3545 /* Save a temporary to memory. 'allocated_regs' is used in case a
3546    temporary registers needs to be allocated to store a constant.  */
3547 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3548 {
3549     /* The liveness analysis already ensures that globals are back
3550        in memory. Keep an tcg_debug_assert for safety. */
3551     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3552 }
3553 
3554 /* save globals to their canonical location and assume they can be
3555    modified be the following code. 'allocated_regs' is used in case a
3556    temporary registers needs to be allocated to store a constant. */
3557 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3558 {
3559     int i, n;
3560 
3561     for (i = 0, n = s->nb_globals; i < n; i++) {
3562         temp_save(s, &s->temps[i], allocated_regs);
3563     }
3564 }
3565 
3566 /* sync globals to their canonical location and assume they can be
3567    read by the following code. 'allocated_regs' is used in case a
3568    temporary registers needs to be allocated to store a constant. */
3569 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3570 {
3571     int i, n;
3572 
3573     for (i = 0, n = s->nb_globals; i < n; i++) {
3574         TCGTemp *ts = &s->temps[i];
3575         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3576                          || ts->kind == TEMP_FIXED
3577                          || ts->mem_coherent);
3578     }
3579 }
3580 
3581 /* at the end of a basic block, we assume all temporaries are dead and
3582    all globals are stored at their canonical location. */
3583 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3584 {
3585     int i;
3586 
3587     for (i = s->nb_globals; i < s->nb_temps; i++) {
3588         TCGTemp *ts = &s->temps[i];
3589 
3590         switch (ts->kind) {
3591         case TEMP_LOCAL:
3592             temp_save(s, ts, allocated_regs);
3593             break;
3594         case TEMP_NORMAL:
3595         case TEMP_EBB:
3596             /* The liveness analysis already ensures that temps are dead.
3597                Keep an tcg_debug_assert for safety. */
3598             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3599             break;
3600         case TEMP_CONST:
3601             /* Similarly, we should have freed any allocated register. */
3602             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3603             break;
3604         default:
3605             g_assert_not_reached();
3606         }
3607     }
3608 
3609     save_globals(s, allocated_regs);
3610 }
3611 
3612 /*
3613  * At a conditional branch, we assume all temporaries are dead unless
3614  * explicitly live-across-conditional-branch; all globals and local
3615  * temps are synced to their location.
3616  */
3617 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3618 {
3619     sync_globals(s, allocated_regs);
3620 
3621     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3622         TCGTemp *ts = &s->temps[i];
3623         /*
3624          * The liveness analysis already ensures that temps are dead.
3625          * Keep tcg_debug_asserts for safety.
3626          */
3627         switch (ts->kind) {
3628         case TEMP_LOCAL:
3629             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3630             break;
3631         case TEMP_NORMAL:
3632             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3633             break;
3634         case TEMP_EBB:
3635         case TEMP_CONST:
3636             break;
3637         default:
3638             g_assert_not_reached();
3639         }
3640     }
3641 }
3642 
3643 /*
3644  * Specialized code generation for INDEX_op_mov_* with a constant.
3645  */
3646 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3647                                   tcg_target_ulong val, TCGLifeData arg_life,
3648                                   TCGRegSet preferred_regs)
3649 {
3650     /* ENV should not be modified.  */
3651     tcg_debug_assert(!temp_readonly(ots));
3652 
3653     /* The movi is not explicitly generated here.  */
3654     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3655     ots->val = val;
3656     ots->mem_coherent = 0;
3657     if (NEED_SYNC_ARG(0)) {
3658         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3659     } else if (IS_DEAD_ARG(0)) {
3660         temp_dead(s, ots);
3661     }
3662 }
3663 
3664 /*
3665  * Specialized code generation for INDEX_op_mov_*.
3666  */
3667 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3668 {
3669     const TCGLifeData arg_life = op->life;
3670     TCGRegSet allocated_regs, preferred_regs;
3671     TCGTemp *ts, *ots;
3672     TCGType otype, itype;
3673     TCGReg oreg, ireg;
3674 
3675     allocated_regs = s->reserved_regs;
3676     preferred_regs = output_pref(op, 0);
3677     ots = arg_temp(op->args[0]);
3678     ts = arg_temp(op->args[1]);
3679 
3680     /* ENV should not be modified.  */
3681     tcg_debug_assert(!temp_readonly(ots));
3682 
3683     /* Note that otype != itype for no-op truncation.  */
3684     otype = ots->type;
3685     itype = ts->type;
3686 
3687     if (ts->val_type == TEMP_VAL_CONST) {
3688         /* propagate constant or generate sti */
3689         tcg_target_ulong val = ts->val;
3690         if (IS_DEAD_ARG(1)) {
3691             temp_dead(s, ts);
3692         }
3693         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3694         return;
3695     }
3696 
3697     /* If the source value is in memory we're going to be forced
3698        to have it in a register in order to perform the copy.  Copy
3699        the SOURCE value into its own register first, that way we
3700        don't have to reload SOURCE the next time it is used. */
3701     if (ts->val_type == TEMP_VAL_MEM) {
3702         temp_load(s, ts, tcg_target_available_regs[itype],
3703                   allocated_regs, preferred_regs);
3704     }
3705     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3706     ireg = ts->reg;
3707 
3708     if (IS_DEAD_ARG(0)) {
3709         /* mov to a non-saved dead register makes no sense (even with
3710            liveness analysis disabled). */
3711         tcg_debug_assert(NEED_SYNC_ARG(0));
3712         if (!ots->mem_allocated) {
3713             temp_allocate_frame(s, ots);
3714         }
3715         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3716         if (IS_DEAD_ARG(1)) {
3717             temp_dead(s, ts);
3718         }
3719         temp_dead(s, ots);
3720         return;
3721     }
3722 
3723     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3724         /*
3725          * The mov can be suppressed.  Kill input first, so that it
3726          * is unlinked from reg_to_temp, then set the output to the
3727          * reg that we saved from the input.
3728          */
3729         temp_dead(s, ts);
3730         oreg = ireg;
3731     } else {
3732         if (ots->val_type == TEMP_VAL_REG) {
3733             oreg = ots->reg;
3734         } else {
3735             /* Make sure to not spill the input register during allocation. */
3736             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3737                                  allocated_regs | ((TCGRegSet)1 << ireg),
3738                                  preferred_regs, ots->indirect_base);
3739         }
3740         if (!tcg_out_mov(s, otype, oreg, ireg)) {
3741             /*
3742              * Cross register class move not supported.
3743              * Store the source register into the destination slot
3744              * and leave the destination temp as TEMP_VAL_MEM.
3745              */
3746             assert(!temp_readonly(ots));
3747             if (!ts->mem_allocated) {
3748                 temp_allocate_frame(s, ots);
3749             }
3750             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
3751             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
3752             ots->mem_coherent = 1;
3753             return;
3754         }
3755     }
3756     set_temp_val_reg(s, ots, oreg);
3757     ots->mem_coherent = 0;
3758 
3759     if (NEED_SYNC_ARG(0)) {
3760         temp_sync(s, ots, allocated_regs, 0, 0);
3761     }
3762 }
3763 
3764 /*
3765  * Specialized code generation for INDEX_op_dup_vec.
3766  */
3767 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3768 {
3769     const TCGLifeData arg_life = op->life;
3770     TCGRegSet dup_out_regs, dup_in_regs;
3771     TCGTemp *its, *ots;
3772     TCGType itype, vtype;
3773     unsigned vece;
3774     int lowpart_ofs;
3775     bool ok;
3776 
3777     ots = arg_temp(op->args[0]);
3778     its = arg_temp(op->args[1]);
3779 
3780     /* ENV should not be modified.  */
3781     tcg_debug_assert(!temp_readonly(ots));
3782 
3783     itype = its->type;
3784     vece = TCGOP_VECE(op);
3785     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3786 
3787     if (its->val_type == TEMP_VAL_CONST) {
3788         /* Propagate constant via movi -> dupi.  */
3789         tcg_target_ulong val = its->val;
3790         if (IS_DEAD_ARG(1)) {
3791             temp_dead(s, its);
3792         }
3793         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
3794         return;
3795     }
3796 
3797     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3798     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3799 
3800     /* Allocate the output register now.  */
3801     if (ots->val_type != TEMP_VAL_REG) {
3802         TCGRegSet allocated_regs = s->reserved_regs;
3803         TCGReg oreg;
3804 
3805         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3806             /* Make sure to not spill the input register. */
3807             tcg_regset_set_reg(allocated_regs, its->reg);
3808         }
3809         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3810                              output_pref(op, 0), ots->indirect_base);
3811         set_temp_val_reg(s, ots, oreg);
3812     }
3813 
3814     switch (its->val_type) {
3815     case TEMP_VAL_REG:
3816         /*
3817          * The dup constriaints must be broad, covering all possible VECE.
3818          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3819          * to fail, indicating that extra moves are required for that case.
3820          */
3821         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3822             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3823                 goto done;
3824             }
3825             /* Try again from memory or a vector input register.  */
3826         }
3827         if (!its->mem_coherent) {
3828             /*
3829              * The input register is not synced, and so an extra store
3830              * would be required to use memory.  Attempt an integer-vector
3831              * register move first.  We do not have a TCGRegSet for this.
3832              */
3833             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3834                 break;
3835             }
3836             /* Sync the temp back to its slot and load from there.  */
3837             temp_sync(s, its, s->reserved_regs, 0, 0);
3838         }
3839         /* fall through */
3840 
3841     case TEMP_VAL_MEM:
3842         lowpart_ofs = 0;
3843         if (HOST_BIG_ENDIAN) {
3844             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
3845         }
3846         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3847                              its->mem_offset + lowpart_ofs)) {
3848             goto done;
3849         }
3850         /* Load the input into the destination vector register. */
3851         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3852         break;
3853 
3854     default:
3855         g_assert_not_reached();
3856     }
3857 
3858     /* We now have a vector input register, so dup must succeed. */
3859     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3860     tcg_debug_assert(ok);
3861 
3862  done:
3863     ots->mem_coherent = 0;
3864     if (IS_DEAD_ARG(1)) {
3865         temp_dead(s, its);
3866     }
3867     if (NEED_SYNC_ARG(0)) {
3868         temp_sync(s, ots, s->reserved_regs, 0, 0);
3869     }
3870     if (IS_DEAD_ARG(0)) {
3871         temp_dead(s, ots);
3872     }
3873 }
3874 
3875 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3876 {
3877     const TCGLifeData arg_life = op->life;
3878     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3879     TCGRegSet i_allocated_regs;
3880     TCGRegSet o_allocated_regs;
3881     int i, k, nb_iargs, nb_oargs;
3882     TCGReg reg;
3883     TCGArg arg;
3884     const TCGArgConstraint *arg_ct;
3885     TCGTemp *ts;
3886     TCGArg new_args[TCG_MAX_OP_ARGS];
3887     int const_args[TCG_MAX_OP_ARGS];
3888 
3889     nb_oargs = def->nb_oargs;
3890     nb_iargs = def->nb_iargs;
3891 
3892     /* copy constants */
3893     memcpy(new_args + nb_oargs + nb_iargs,
3894            op->args + nb_oargs + nb_iargs,
3895            sizeof(TCGArg) * def->nb_cargs);
3896 
3897     i_allocated_regs = s->reserved_regs;
3898     o_allocated_regs = s->reserved_regs;
3899 
3900     /* satisfy input constraints */
3901     for (k = 0; k < nb_iargs; k++) {
3902         TCGRegSet i_preferred_regs, i_required_regs;
3903         bool allocate_new_reg, copyto_new_reg;
3904         TCGTemp *ts2;
3905         int i1, i2;
3906 
3907         i = def->args_ct[nb_oargs + k].sort_index;
3908         arg = op->args[i];
3909         arg_ct = &def->args_ct[i];
3910         ts = arg_temp(arg);
3911 
3912         if (ts->val_type == TEMP_VAL_CONST
3913             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3914             /* constant is OK for instruction */
3915             const_args[i] = 1;
3916             new_args[i] = ts->val;
3917             continue;
3918         }
3919 
3920         reg = ts->reg;
3921         i_preferred_regs = 0;
3922         i_required_regs = arg_ct->regs;
3923         allocate_new_reg = false;
3924         copyto_new_reg = false;
3925 
3926         switch (arg_ct->pair) {
3927         case 0: /* not paired */
3928             if (arg_ct->ialias) {
3929                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
3930 
3931                 /*
3932                  * If the input is readonly, then it cannot also be an
3933                  * output and aliased to itself.  If the input is not
3934                  * dead after the instruction, we must allocate a new
3935                  * register and move it.
3936                  */
3937                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3938                     allocate_new_reg = true;
3939                 } else if (ts->val_type == TEMP_VAL_REG) {
3940                     /*
3941                      * Check if the current register has already been
3942                      * allocated for another input.
3943                      */
3944                     allocate_new_reg =
3945                         tcg_regset_test_reg(i_allocated_regs, reg);
3946                 }
3947             }
3948             if (!allocate_new_reg) {
3949                 temp_load(s, ts, i_required_regs, i_allocated_regs,
3950                           i_preferred_regs);
3951                 reg = ts->reg;
3952                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
3953             }
3954             if (allocate_new_reg) {
3955                 /*
3956                  * Allocate a new register matching the constraint
3957                  * and move the temporary register into it.
3958                  */
3959                 temp_load(s, ts, tcg_target_available_regs[ts->type],
3960                           i_allocated_regs, 0);
3961                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
3962                                     i_preferred_regs, ts->indirect_base);
3963                 copyto_new_reg = true;
3964             }
3965             break;
3966 
3967         case 1:
3968             /* First of an input pair; if i1 == i2, the second is an output. */
3969             i1 = i;
3970             i2 = arg_ct->pair_index;
3971             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
3972 
3973             /*
3974              * It is easier to default to allocating a new pair
3975              * and to identify a few cases where it's not required.
3976              */
3977             if (arg_ct->ialias) {
3978                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
3979                 if (IS_DEAD_ARG(i1) &&
3980                     IS_DEAD_ARG(i2) &&
3981                     !temp_readonly(ts) &&
3982                     ts->val_type == TEMP_VAL_REG &&
3983                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
3984                     tcg_regset_test_reg(i_required_regs, reg) &&
3985                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
3986                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
3987                     (ts2
3988                      ? ts2->val_type == TEMP_VAL_REG &&
3989                        ts2->reg == reg + 1 &&
3990                        !temp_readonly(ts2)
3991                      : s->reg_to_temp[reg + 1] == NULL)) {
3992                     break;
3993                 }
3994             } else {
3995                 /* Without aliasing, the pair must also be an input. */
3996                 tcg_debug_assert(ts2);
3997                 if (ts->val_type == TEMP_VAL_REG &&
3998                     ts2->val_type == TEMP_VAL_REG &&
3999                     ts2->reg == reg + 1 &&
4000                     tcg_regset_test_reg(i_required_regs, reg)) {
4001                     break;
4002                 }
4003             }
4004             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4005                                      0, ts->indirect_base);
4006             goto do_pair;
4007 
4008         case 2: /* pair second */
4009             reg = new_args[arg_ct->pair_index] + 1;
4010             goto do_pair;
4011 
4012         case 3: /* ialias with second output, no first input */
4013             tcg_debug_assert(arg_ct->ialias);
4014             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4015 
4016             if (IS_DEAD_ARG(i) &&
4017                 !temp_readonly(ts) &&
4018                 ts->val_type == TEMP_VAL_REG &&
4019                 reg > 0 &&
4020                 s->reg_to_temp[reg - 1] == NULL &&
4021                 tcg_regset_test_reg(i_required_regs, reg) &&
4022                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4023                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4024                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4025                 break;
4026             }
4027             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4028                                      i_allocated_regs, 0,
4029                                      ts->indirect_base);
4030             tcg_regset_set_reg(i_allocated_regs, reg);
4031             reg += 1;
4032             goto do_pair;
4033 
4034         do_pair:
4035             /*
4036              * If an aliased input is not dead after the instruction,
4037              * we must allocate a new register and move it.
4038              */
4039             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4040                 TCGRegSet t_allocated_regs = i_allocated_regs;
4041 
4042                 /*
4043                  * Because of the alias, and the continued life, make sure
4044                  * that the temp is somewhere *other* than the reg pair,
4045                  * and we get a copy in reg.
4046                  */
4047                 tcg_regset_set_reg(t_allocated_regs, reg);
4048                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4049                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4050                     /* If ts was already in reg, copy it somewhere else. */
4051                     TCGReg nr;
4052                     bool ok;
4053 
4054                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4055                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4056                                        t_allocated_regs, 0, ts->indirect_base);
4057                     ok = tcg_out_mov(s, ts->type, nr, reg);
4058                     tcg_debug_assert(ok);
4059 
4060                     set_temp_val_reg(s, ts, nr);
4061                 } else {
4062                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4063                               t_allocated_regs, 0);
4064                     copyto_new_reg = true;
4065                 }
4066             } else {
4067                 /* Preferably allocate to reg, otherwise copy. */
4068                 i_required_regs = (TCGRegSet)1 << reg;
4069                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4070                           i_preferred_regs);
4071                 copyto_new_reg = ts->reg != reg;
4072             }
4073             break;
4074 
4075         default:
4076             g_assert_not_reached();
4077         }
4078 
4079         if (copyto_new_reg) {
4080             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4081                 /*
4082                  * Cross register class move not supported.  Sync the
4083                  * temp back to its slot and load from there.
4084                  */
4085                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4086                 tcg_out_ld(s, ts->type, reg,
4087                            ts->mem_base->reg, ts->mem_offset);
4088             }
4089         }
4090         new_args[i] = reg;
4091         const_args[i] = 0;
4092         tcg_regset_set_reg(i_allocated_regs, reg);
4093     }
4094 
4095     /* mark dead temporaries and free the associated registers */
4096     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4097         if (IS_DEAD_ARG(i)) {
4098             temp_dead(s, arg_temp(op->args[i]));
4099         }
4100     }
4101 
4102     if (def->flags & TCG_OPF_COND_BRANCH) {
4103         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4104     } else if (def->flags & TCG_OPF_BB_END) {
4105         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4106     } else {
4107         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4108             /* XXX: permit generic clobber register list ? */
4109             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4110                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4111                     tcg_reg_free(s, i, i_allocated_regs);
4112                 }
4113             }
4114         }
4115         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4116             /* sync globals if the op has side effects and might trigger
4117                an exception. */
4118             sync_globals(s, i_allocated_regs);
4119         }
4120 
4121         /* satisfy the output constraints */
4122         for(k = 0; k < nb_oargs; k++) {
4123             i = def->args_ct[k].sort_index;
4124             arg = op->args[i];
4125             arg_ct = &def->args_ct[i];
4126             ts = arg_temp(arg);
4127 
4128             /* ENV should not be modified.  */
4129             tcg_debug_assert(!temp_readonly(ts));
4130 
4131             switch (arg_ct->pair) {
4132             case 0: /* not paired */
4133                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4134                     reg = new_args[arg_ct->alias_index];
4135                 } else if (arg_ct->newreg) {
4136                     reg = tcg_reg_alloc(s, arg_ct->regs,
4137                                         i_allocated_regs | o_allocated_regs,
4138                                         output_pref(op, k), ts->indirect_base);
4139                 } else {
4140                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4141                                         output_pref(op, k), ts->indirect_base);
4142                 }
4143                 break;
4144 
4145             case 1: /* first of pair */
4146                 tcg_debug_assert(!arg_ct->newreg);
4147                 if (arg_ct->oalias) {
4148                     reg = new_args[arg_ct->alias_index];
4149                     break;
4150                 }
4151                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4152                                          output_pref(op, k), ts->indirect_base);
4153                 break;
4154 
4155             case 2: /* second of pair */
4156                 tcg_debug_assert(!arg_ct->newreg);
4157                 if (arg_ct->oalias) {
4158                     reg = new_args[arg_ct->alias_index];
4159                 } else {
4160                     reg = new_args[arg_ct->pair_index] + 1;
4161                 }
4162                 break;
4163 
4164             case 3: /* first of pair, aliasing with a second input */
4165                 tcg_debug_assert(!arg_ct->newreg);
4166                 reg = new_args[arg_ct->pair_index] - 1;
4167                 break;
4168 
4169             default:
4170                 g_assert_not_reached();
4171             }
4172             tcg_regset_set_reg(o_allocated_regs, reg);
4173             set_temp_val_reg(s, ts, reg);
4174             ts->mem_coherent = 0;
4175             new_args[i] = reg;
4176         }
4177     }
4178 
4179     /* emit instruction */
4180     if (def->flags & TCG_OPF_VECTOR) {
4181         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4182                        new_args, const_args);
4183     } else {
4184         tcg_out_op(s, op->opc, new_args, const_args);
4185     }
4186 
4187     /* move the outputs in the correct register if needed */
4188     for(i = 0; i < nb_oargs; i++) {
4189         ts = arg_temp(op->args[i]);
4190 
4191         /* ENV should not be modified.  */
4192         tcg_debug_assert(!temp_readonly(ts));
4193 
4194         if (NEED_SYNC_ARG(i)) {
4195             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4196         } else if (IS_DEAD_ARG(i)) {
4197             temp_dead(s, ts);
4198         }
4199     }
4200 }
4201 
4202 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4203 {
4204     const TCGLifeData arg_life = op->life;
4205     TCGTemp *ots, *itsl, *itsh;
4206     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4207 
4208     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4209     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4210     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4211 
4212     ots = arg_temp(op->args[0]);
4213     itsl = arg_temp(op->args[1]);
4214     itsh = arg_temp(op->args[2]);
4215 
4216     /* ENV should not be modified.  */
4217     tcg_debug_assert(!temp_readonly(ots));
4218 
4219     /* Allocate the output register now.  */
4220     if (ots->val_type != TEMP_VAL_REG) {
4221         TCGRegSet allocated_regs = s->reserved_regs;
4222         TCGRegSet dup_out_regs =
4223             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4224         TCGReg oreg;
4225 
4226         /* Make sure to not spill the input registers. */
4227         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4228             tcg_regset_set_reg(allocated_regs, itsl->reg);
4229         }
4230         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4231             tcg_regset_set_reg(allocated_regs, itsh->reg);
4232         }
4233 
4234         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4235                              output_pref(op, 0), ots->indirect_base);
4236         set_temp_val_reg(s, ots, oreg);
4237     }
4238 
4239     /* Promote dup2 of immediates to dupi_vec. */
4240     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4241         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4242         MemOp vece = MO_64;
4243 
4244         if (val == dup_const(MO_8, val)) {
4245             vece = MO_8;
4246         } else if (val == dup_const(MO_16, val)) {
4247             vece = MO_16;
4248         } else if (val == dup_const(MO_32, val)) {
4249             vece = MO_32;
4250         }
4251 
4252         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4253         goto done;
4254     }
4255 
4256     /* If the two inputs form one 64-bit value, try dupm_vec. */
4257     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4258         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4259         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4260         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4261 
4262         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4263         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4264 
4265         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4266                              its->mem_base->reg, its->mem_offset)) {
4267             goto done;
4268         }
4269     }
4270 
4271     /* Fall back to generic expansion. */
4272     return false;
4273 
4274  done:
4275     ots->mem_coherent = 0;
4276     if (IS_DEAD_ARG(1)) {
4277         temp_dead(s, itsl);
4278     }
4279     if (IS_DEAD_ARG(2)) {
4280         temp_dead(s, itsh);
4281     }
4282     if (NEED_SYNC_ARG(0)) {
4283         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4284     } else if (IS_DEAD_ARG(0)) {
4285         temp_dead(s, ots);
4286     }
4287     return true;
4288 }
4289 
4290 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4291                          TCGRegSet allocated_regs)
4292 {
4293     if (ts->val_type == TEMP_VAL_REG) {
4294         if (ts->reg != reg) {
4295             tcg_reg_free(s, reg, allocated_regs);
4296             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4297                 /*
4298                  * Cross register class move not supported.  Sync the
4299                  * temp back to its slot and load from there.
4300                  */
4301                 temp_sync(s, ts, allocated_regs, 0, 0);
4302                 tcg_out_ld(s, ts->type, reg,
4303                            ts->mem_base->reg, ts->mem_offset);
4304             }
4305         }
4306     } else {
4307         TCGRegSet arg_set = 0;
4308 
4309         tcg_reg_free(s, reg, allocated_regs);
4310         tcg_regset_set_reg(arg_set, reg);
4311         temp_load(s, ts, arg_set, allocated_regs, 0);
4312     }
4313 }
4314 
4315 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4316                          TCGRegSet allocated_regs)
4317 {
4318     /*
4319      * When the destination is on the stack, load up the temp and store.
4320      * If there are many call-saved registers, the temp might live to
4321      * see another use; otherwise it'll be discarded.
4322      */
4323     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4324     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4325                TCG_TARGET_CALL_STACK_OFFSET +
4326                stk_slot * sizeof(tcg_target_long));
4327 }
4328 
4329 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4330                             TCGTemp *ts, TCGRegSet *allocated_regs)
4331 {
4332     if (REG_P(l)) {
4333         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4334         load_arg_reg(s, reg, ts, *allocated_regs);
4335         tcg_regset_set_reg(*allocated_regs, reg);
4336     } else {
4337         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4338                      ts, *allocated_regs);
4339     }
4340 }
4341 
4342 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4343 {
4344     const int nb_oargs = TCGOP_CALLO(op);
4345     const int nb_iargs = TCGOP_CALLI(op);
4346     const TCGLifeData arg_life = op->life;
4347     const TCGHelperInfo *info = tcg_call_info(op);
4348     TCGRegSet allocated_regs = s->reserved_regs;
4349     int i;
4350 
4351     /*
4352      * Move inputs into place in reverse order,
4353      * so that we place stacked arguments first.
4354      */
4355     for (i = nb_iargs - 1; i >= 0; --i) {
4356         const TCGCallArgumentLoc *loc = &info->in[i];
4357         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4358 
4359         switch (loc->kind) {
4360         case TCG_CALL_ARG_NORMAL:
4361         case TCG_CALL_ARG_EXTEND_U:
4362         case TCG_CALL_ARG_EXTEND_S:
4363             load_arg_normal(s, loc, ts, &allocated_regs);
4364             break;
4365         default:
4366             g_assert_not_reached();
4367         }
4368     }
4369 
4370     /* Mark dead temporaries and free the associated registers.  */
4371     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4372         if (IS_DEAD_ARG(i)) {
4373             temp_dead(s, arg_temp(op->args[i]));
4374         }
4375     }
4376 
4377     /* Clobber call registers.  */
4378     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4379         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4380             tcg_reg_free(s, i, allocated_regs);
4381         }
4382     }
4383 
4384     /*
4385      * Save globals if they might be written by the helper,
4386      * sync them if they might be read.
4387      */
4388     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4389         /* Nothing to do */
4390     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4391         sync_globals(s, allocated_regs);
4392     } else {
4393         save_globals(s, allocated_regs);
4394     }
4395 
4396     tcg_out_call(s, tcg_call_func(op), info);
4397 
4398     /* Assign output registers and emit moves if needed.  */
4399     switch (info->out_kind) {
4400     case TCG_CALL_RET_NORMAL:
4401         for (i = 0; i < nb_oargs; i++) {
4402             TCGTemp *ts = arg_temp(op->args[i]);
4403             TCGReg reg = tcg_target_call_oarg_regs[i];
4404 
4405             /* ENV should not be modified.  */
4406             tcg_debug_assert(!temp_readonly(ts));
4407 
4408             set_temp_val_reg(s, ts, reg);
4409             ts->mem_coherent = 0;
4410         }
4411         break;
4412     default:
4413         g_assert_not_reached();
4414     }
4415 
4416     /* Flush or discard output registers as needed. */
4417     for (i = 0; i < nb_oargs; i++) {
4418         TCGTemp *ts = arg_temp(op->args[i]);
4419         if (NEED_SYNC_ARG(i)) {
4420             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4421         } else if (IS_DEAD_ARG(i)) {
4422             temp_dead(s, ts);
4423         }
4424     }
4425 }
4426 
4427 #ifdef CONFIG_PROFILER
4428 
4429 /* avoid copy/paste errors */
4430 #define PROF_ADD(to, from, field)                       \
4431     do {                                                \
4432         (to)->field += qatomic_read(&((from)->field));  \
4433     } while (0)
4434 
4435 #define PROF_MAX(to, from, field)                                       \
4436     do {                                                                \
4437         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4438         if (val__ > (to)->field) {                                      \
4439             (to)->field = val__;                                        \
4440         }                                                               \
4441     } while (0)
4442 
4443 /* Pass in a zero'ed @prof */
4444 static inline
4445 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4446 {
4447     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4448     unsigned int i;
4449 
4450     for (i = 0; i < n_ctxs; i++) {
4451         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4452         const TCGProfile *orig = &s->prof;
4453 
4454         if (counters) {
4455             PROF_ADD(prof, orig, cpu_exec_time);
4456             PROF_ADD(prof, orig, tb_count1);
4457             PROF_ADD(prof, orig, tb_count);
4458             PROF_ADD(prof, orig, op_count);
4459             PROF_MAX(prof, orig, op_count_max);
4460             PROF_ADD(prof, orig, temp_count);
4461             PROF_MAX(prof, orig, temp_count_max);
4462             PROF_ADD(prof, orig, del_op_count);
4463             PROF_ADD(prof, orig, code_in_len);
4464             PROF_ADD(prof, orig, code_out_len);
4465             PROF_ADD(prof, orig, search_out_len);
4466             PROF_ADD(prof, orig, interm_time);
4467             PROF_ADD(prof, orig, code_time);
4468             PROF_ADD(prof, orig, la_time);
4469             PROF_ADD(prof, orig, opt_time);
4470             PROF_ADD(prof, orig, restore_count);
4471             PROF_ADD(prof, orig, restore_time);
4472         }
4473         if (table) {
4474             int i;
4475 
4476             for (i = 0; i < NB_OPS; i++) {
4477                 PROF_ADD(prof, orig, table_op_count[i]);
4478             }
4479         }
4480     }
4481 }
4482 
4483 #undef PROF_ADD
4484 #undef PROF_MAX
4485 
4486 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4487 {
4488     tcg_profile_snapshot(prof, true, false);
4489 }
4490 
4491 static void tcg_profile_snapshot_table(TCGProfile *prof)
4492 {
4493     tcg_profile_snapshot(prof, false, true);
4494 }
4495 
4496 void tcg_dump_op_count(GString *buf)
4497 {
4498     TCGProfile prof = {};
4499     int i;
4500 
4501     tcg_profile_snapshot_table(&prof);
4502     for (i = 0; i < NB_OPS; i++) {
4503         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4504                                prof.table_op_count[i]);
4505     }
4506 }
4507 
4508 int64_t tcg_cpu_exec_time(void)
4509 {
4510     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4511     unsigned int i;
4512     int64_t ret = 0;
4513 
4514     for (i = 0; i < n_ctxs; i++) {
4515         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4516         const TCGProfile *prof = &s->prof;
4517 
4518         ret += qatomic_read(&prof->cpu_exec_time);
4519     }
4520     return ret;
4521 }
4522 #else
4523 void tcg_dump_op_count(GString *buf)
4524 {
4525     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4526 }
4527 
4528 int64_t tcg_cpu_exec_time(void)
4529 {
4530     error_report("%s: TCG profiler not compiled", __func__);
4531     exit(EXIT_FAILURE);
4532 }
4533 #endif
4534 
4535 
4536 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4537 {
4538 #ifdef CONFIG_PROFILER
4539     TCGProfile *prof = &s->prof;
4540 #endif
4541     int i, num_insns;
4542     TCGOp *op;
4543 
4544 #ifdef CONFIG_PROFILER
4545     {
4546         int n = 0;
4547 
4548         QTAILQ_FOREACH(op, &s->ops, link) {
4549             n++;
4550         }
4551         qatomic_set(&prof->op_count, prof->op_count + n);
4552         if (n > prof->op_count_max) {
4553             qatomic_set(&prof->op_count_max, n);
4554         }
4555 
4556         n = s->nb_temps;
4557         qatomic_set(&prof->temp_count, prof->temp_count + n);
4558         if (n > prof->temp_count_max) {
4559             qatomic_set(&prof->temp_count_max, n);
4560         }
4561     }
4562 #endif
4563 
4564 #ifdef DEBUG_DISAS
4565     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4566                  && qemu_log_in_addr_range(pc_start))) {
4567         FILE *logfile = qemu_log_trylock();
4568         if (logfile) {
4569             fprintf(logfile, "OP:\n");
4570             tcg_dump_ops(s, logfile, false);
4571             fprintf(logfile, "\n");
4572             qemu_log_unlock(logfile);
4573         }
4574     }
4575 #endif
4576 
4577 #ifdef CONFIG_DEBUG_TCG
4578     /* Ensure all labels referenced have been emitted.  */
4579     {
4580         TCGLabel *l;
4581         bool error = false;
4582 
4583         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4584             if (unlikely(!l->present) && l->refs) {
4585                 qemu_log_mask(CPU_LOG_TB_OP,
4586                               "$L%d referenced but not present.\n", l->id);
4587                 error = true;
4588             }
4589         }
4590         assert(!error);
4591     }
4592 #endif
4593 
4594 #ifdef CONFIG_PROFILER
4595     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4596 #endif
4597 
4598 #ifdef USE_TCG_OPTIMIZATIONS
4599     tcg_optimize(s);
4600 #endif
4601 
4602 #ifdef CONFIG_PROFILER
4603     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4604     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4605 #endif
4606 
4607     reachable_code_pass(s);
4608     liveness_pass_1(s);
4609 
4610     if (s->nb_indirects > 0) {
4611 #ifdef DEBUG_DISAS
4612         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4613                      && qemu_log_in_addr_range(pc_start))) {
4614             FILE *logfile = qemu_log_trylock();
4615             if (logfile) {
4616                 fprintf(logfile, "OP before indirect lowering:\n");
4617                 tcg_dump_ops(s, logfile, false);
4618                 fprintf(logfile, "\n");
4619                 qemu_log_unlock(logfile);
4620             }
4621         }
4622 #endif
4623         /* Replace indirect temps with direct temps.  */
4624         if (liveness_pass_2(s)) {
4625             /* If changes were made, re-run liveness.  */
4626             liveness_pass_1(s);
4627         }
4628     }
4629 
4630 #ifdef CONFIG_PROFILER
4631     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4632 #endif
4633 
4634 #ifdef DEBUG_DISAS
4635     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4636                  && qemu_log_in_addr_range(pc_start))) {
4637         FILE *logfile = qemu_log_trylock();
4638         if (logfile) {
4639             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4640             tcg_dump_ops(s, logfile, true);
4641             fprintf(logfile, "\n");
4642             qemu_log_unlock(logfile);
4643         }
4644     }
4645 #endif
4646 
4647     /* Initialize goto_tb jump offsets. */
4648     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
4649     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
4650     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
4651     if (TCG_TARGET_HAS_direct_jump) {
4652         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
4653         tcg_ctx->tb_jmp_target_addr = NULL;
4654     } else {
4655         tcg_ctx->tb_jmp_insn_offset = NULL;
4656         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
4657     }
4658 
4659     tcg_reg_alloc_start(s);
4660 
4661     /*
4662      * Reset the buffer pointers when restarting after overflow.
4663      * TODO: Move this into translate-all.c with the rest of the
4664      * buffer management.  Having only this done here is confusing.
4665      */
4666     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4667     s->code_ptr = s->code_buf;
4668 
4669 #ifdef TCG_TARGET_NEED_LDST_LABELS
4670     QSIMPLEQ_INIT(&s->ldst_labels);
4671 #endif
4672 #ifdef TCG_TARGET_NEED_POOL_LABELS
4673     s->pool_labels = NULL;
4674 #endif
4675 
4676     num_insns = -1;
4677     QTAILQ_FOREACH(op, &s->ops, link) {
4678         TCGOpcode opc = op->opc;
4679 
4680 #ifdef CONFIG_PROFILER
4681         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4682 #endif
4683 
4684         switch (opc) {
4685         case INDEX_op_mov_i32:
4686         case INDEX_op_mov_i64:
4687         case INDEX_op_mov_vec:
4688             tcg_reg_alloc_mov(s, op);
4689             break;
4690         case INDEX_op_dup_vec:
4691             tcg_reg_alloc_dup(s, op);
4692             break;
4693         case INDEX_op_insn_start:
4694             if (num_insns >= 0) {
4695                 size_t off = tcg_current_code_size(s);
4696                 s->gen_insn_end_off[num_insns] = off;
4697                 /* Assert that we do not overflow our stored offset.  */
4698                 assert(s->gen_insn_end_off[num_insns] == off);
4699             }
4700             num_insns++;
4701             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4702                 target_ulong a;
4703 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4704                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4705 #else
4706                 a = op->args[i];
4707 #endif
4708                 s->gen_insn_data[num_insns][i] = a;
4709             }
4710             break;
4711         case INDEX_op_discard:
4712             temp_dead(s, arg_temp(op->args[0]));
4713             break;
4714         case INDEX_op_set_label:
4715             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4716             tcg_out_label(s, arg_label(op->args[0]));
4717             break;
4718         case INDEX_op_call:
4719             tcg_reg_alloc_call(s, op);
4720             break;
4721         case INDEX_op_dup2_vec:
4722             if (tcg_reg_alloc_dup2(s, op)) {
4723                 break;
4724             }
4725             /* fall through */
4726         default:
4727             /* Sanity check that we've not introduced any unhandled opcodes. */
4728             tcg_debug_assert(tcg_op_supported(opc));
4729             /* Note: in order to speed up the code, it would be much
4730                faster to have specialized register allocator functions for
4731                some common argument patterns */
4732             tcg_reg_alloc_op(s, op);
4733             break;
4734         }
4735         /* Test for (pending) buffer overflow.  The assumption is that any
4736            one operation beginning below the high water mark cannot overrun
4737            the buffer completely.  Thus we can test for overflow after
4738            generating code without having to check during generation.  */
4739         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4740             return -1;
4741         }
4742         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4743         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4744             return -2;
4745         }
4746     }
4747     tcg_debug_assert(num_insns >= 0);
4748     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4749 
4750     /* Generate TB finalization at the end of block */
4751 #ifdef TCG_TARGET_NEED_LDST_LABELS
4752     i = tcg_out_ldst_finalize(s);
4753     if (i < 0) {
4754         return i;
4755     }
4756 #endif
4757 #ifdef TCG_TARGET_NEED_POOL_LABELS
4758     i = tcg_out_pool_finalize(s);
4759     if (i < 0) {
4760         return i;
4761     }
4762 #endif
4763     if (!tcg_resolve_relocs(s)) {
4764         return -2;
4765     }
4766 
4767 #ifndef CONFIG_TCG_INTERPRETER
4768     /* flush instruction cache */
4769     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4770                         (uintptr_t)s->code_buf,
4771                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4772 #endif
4773 
4774     return tcg_current_code_size(s);
4775 }
4776 
4777 #ifdef CONFIG_PROFILER
4778 void tcg_dump_info(GString *buf)
4779 {
4780     TCGProfile prof = {};
4781     const TCGProfile *s;
4782     int64_t tb_count;
4783     int64_t tb_div_count;
4784     int64_t tot;
4785 
4786     tcg_profile_snapshot_counters(&prof);
4787     s = &prof;
4788     tb_count = s->tb_count;
4789     tb_div_count = tb_count ? tb_count : 1;
4790     tot = s->interm_time + s->code_time;
4791 
4792     g_string_append_printf(buf, "JIT cycles          %" PRId64
4793                            " (%0.3f s at 2.4 GHz)\n",
4794                            tot, tot / 2.4e9);
4795     g_string_append_printf(buf, "translated TBs      %" PRId64
4796                            " (aborted=%" PRId64 " %0.1f%%)\n",
4797                            tb_count, s->tb_count1 - tb_count,
4798                            (double)(s->tb_count1 - s->tb_count)
4799                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4800     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4801                            (double)s->op_count / tb_div_count, s->op_count_max);
4802     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4803                            (double)s->del_op_count / tb_div_count);
4804     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4805                            (double)s->temp_count / tb_div_count,
4806                            s->temp_count_max);
4807     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4808                            (double)s->code_out_len / tb_div_count);
4809     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4810                            (double)s->search_out_len / tb_div_count);
4811 
4812     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4813                            s->op_count ? (double)tot / s->op_count : 0);
4814     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4815                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4816     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4817                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4818     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4819                            s->search_out_len ?
4820                            (double)tot / s->search_out_len : 0);
4821     if (tot == 0) {
4822         tot = 1;
4823     }
4824     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4825                            (double)s->interm_time / tot * 100.0);
4826     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4827                            (double)s->code_time / tot * 100.0);
4828     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4829                            (double)s->opt_time / (s->code_time ?
4830                                                   s->code_time : 1)
4831                            * 100.0);
4832     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4833                            (double)s->la_time / (s->code_time ?
4834                                                  s->code_time : 1) * 100.0);
4835     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4836                            s->restore_count);
4837     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4838                            s->restore_count ?
4839                            (double)s->restore_time / s->restore_count : 0);
4840 }
4841 #else
4842 void tcg_dump_info(GString *buf)
4843 {
4844     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4845 }
4846 #endif
4847 
4848 #ifdef ELF_HOST_MACHINE
4849 /* In order to use this feature, the backend needs to do three things:
4850 
4851    (1) Define ELF_HOST_MACHINE to indicate both what value to
4852        put into the ELF image and to indicate support for the feature.
4853 
4854    (2) Define tcg_register_jit.  This should create a buffer containing
4855        the contents of a .debug_frame section that describes the post-
4856        prologue unwind info for the tcg machine.
4857 
4858    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4859 */
4860 
4861 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4862 typedef enum {
4863     JIT_NOACTION = 0,
4864     JIT_REGISTER_FN,
4865     JIT_UNREGISTER_FN
4866 } jit_actions_t;
4867 
4868 struct jit_code_entry {
4869     struct jit_code_entry *next_entry;
4870     struct jit_code_entry *prev_entry;
4871     const void *symfile_addr;
4872     uint64_t symfile_size;
4873 };
4874 
4875 struct jit_descriptor {
4876     uint32_t version;
4877     uint32_t action_flag;
4878     struct jit_code_entry *relevant_entry;
4879     struct jit_code_entry *first_entry;
4880 };
4881 
4882 void __jit_debug_register_code(void) __attribute__((noinline));
4883 void __jit_debug_register_code(void)
4884 {
4885     asm("");
4886 }
4887 
4888 /* Must statically initialize the version, because GDB may check
4889    the version before we can set it.  */
4890 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4891 
4892 /* End GDB interface.  */
4893 
4894 static int find_string(const char *strtab, const char *str)
4895 {
4896     const char *p = strtab + 1;
4897 
4898     while (1) {
4899         if (strcmp(p, str) == 0) {
4900             return p - strtab;
4901         }
4902         p += strlen(p) + 1;
4903     }
4904 }
4905 
4906 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4907                                  const void *debug_frame,
4908                                  size_t debug_frame_size)
4909 {
4910     struct __attribute__((packed)) DebugInfo {
4911         uint32_t  len;
4912         uint16_t  version;
4913         uint32_t  abbrev;
4914         uint8_t   ptr_size;
4915         uint8_t   cu_die;
4916         uint16_t  cu_lang;
4917         uintptr_t cu_low_pc;
4918         uintptr_t cu_high_pc;
4919         uint8_t   fn_die;
4920         char      fn_name[16];
4921         uintptr_t fn_low_pc;
4922         uintptr_t fn_high_pc;
4923         uint8_t   cu_eoc;
4924     };
4925 
4926     struct ElfImage {
4927         ElfW(Ehdr) ehdr;
4928         ElfW(Phdr) phdr;
4929         ElfW(Shdr) shdr[7];
4930         ElfW(Sym)  sym[2];
4931         struct DebugInfo di;
4932         uint8_t    da[24];
4933         char       str[80];
4934     };
4935 
4936     struct ElfImage *img;
4937 
4938     static const struct ElfImage img_template = {
4939         .ehdr = {
4940             .e_ident[EI_MAG0] = ELFMAG0,
4941             .e_ident[EI_MAG1] = ELFMAG1,
4942             .e_ident[EI_MAG2] = ELFMAG2,
4943             .e_ident[EI_MAG3] = ELFMAG3,
4944             .e_ident[EI_CLASS] = ELF_CLASS,
4945             .e_ident[EI_DATA] = ELF_DATA,
4946             .e_ident[EI_VERSION] = EV_CURRENT,
4947             .e_type = ET_EXEC,
4948             .e_machine = ELF_HOST_MACHINE,
4949             .e_version = EV_CURRENT,
4950             .e_phoff = offsetof(struct ElfImage, phdr),
4951             .e_shoff = offsetof(struct ElfImage, shdr),
4952             .e_ehsize = sizeof(ElfW(Shdr)),
4953             .e_phentsize = sizeof(ElfW(Phdr)),
4954             .e_phnum = 1,
4955             .e_shentsize = sizeof(ElfW(Shdr)),
4956             .e_shnum = ARRAY_SIZE(img->shdr),
4957             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4958 #ifdef ELF_HOST_FLAGS
4959             .e_flags = ELF_HOST_FLAGS,
4960 #endif
4961 #ifdef ELF_OSABI
4962             .e_ident[EI_OSABI] = ELF_OSABI,
4963 #endif
4964         },
4965         .phdr = {
4966             .p_type = PT_LOAD,
4967             .p_flags = PF_X,
4968         },
4969         .shdr = {
4970             [0] = { .sh_type = SHT_NULL },
4971             /* Trick: The contents of code_gen_buffer are not present in
4972                this fake ELF file; that got allocated elsewhere.  Therefore
4973                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4974                will not look for contents.  We can record any address.  */
4975             [1] = { /* .text */
4976                 .sh_type = SHT_NOBITS,
4977                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4978             },
4979             [2] = { /* .debug_info */
4980                 .sh_type = SHT_PROGBITS,
4981                 .sh_offset = offsetof(struct ElfImage, di),
4982                 .sh_size = sizeof(struct DebugInfo),
4983             },
4984             [3] = { /* .debug_abbrev */
4985                 .sh_type = SHT_PROGBITS,
4986                 .sh_offset = offsetof(struct ElfImage, da),
4987                 .sh_size = sizeof(img->da),
4988             },
4989             [4] = { /* .debug_frame */
4990                 .sh_type = SHT_PROGBITS,
4991                 .sh_offset = sizeof(struct ElfImage),
4992             },
4993             [5] = { /* .symtab */
4994                 .sh_type = SHT_SYMTAB,
4995                 .sh_offset = offsetof(struct ElfImage, sym),
4996                 .sh_size = sizeof(img->sym),
4997                 .sh_info = 1,
4998                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4999                 .sh_entsize = sizeof(ElfW(Sym)),
5000             },
5001             [6] = { /* .strtab */
5002                 .sh_type = SHT_STRTAB,
5003                 .sh_offset = offsetof(struct ElfImage, str),
5004                 .sh_size = sizeof(img->str),
5005             }
5006         },
5007         .sym = {
5008             [1] = { /* code_gen_buffer */
5009                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5010                 .st_shndx = 1,
5011             }
5012         },
5013         .di = {
5014             .len = sizeof(struct DebugInfo) - 4,
5015             .version = 2,
5016             .ptr_size = sizeof(void *),
5017             .cu_die = 1,
5018             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5019             .fn_die = 2,
5020             .fn_name = "code_gen_buffer"
5021         },
5022         .da = {
5023             1,          /* abbrev number (the cu) */
5024             0x11, 1,    /* DW_TAG_compile_unit, has children */
5025             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5026             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5027             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5028             0, 0,       /* end of abbrev */
5029             2,          /* abbrev number (the fn) */
5030             0x2e, 0,    /* DW_TAG_subprogram, no children */
5031             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5032             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5033             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5034             0, 0,       /* end of abbrev */
5035             0           /* no more abbrev */
5036         },
5037         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5038                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5039     };
5040 
5041     /* We only need a single jit entry; statically allocate it.  */
5042     static struct jit_code_entry one_entry;
5043 
5044     uintptr_t buf = (uintptr_t)buf_ptr;
5045     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5046     DebugFrameHeader *dfh;
5047 
5048     img = g_malloc(img_size);
5049     *img = img_template;
5050 
5051     img->phdr.p_vaddr = buf;
5052     img->phdr.p_paddr = buf;
5053     img->phdr.p_memsz = buf_size;
5054 
5055     img->shdr[1].sh_name = find_string(img->str, ".text");
5056     img->shdr[1].sh_addr = buf;
5057     img->shdr[1].sh_size = buf_size;
5058 
5059     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5060     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5061 
5062     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5063     img->shdr[4].sh_size = debug_frame_size;
5064 
5065     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5066     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5067 
5068     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5069     img->sym[1].st_value = buf;
5070     img->sym[1].st_size = buf_size;
5071 
5072     img->di.cu_low_pc = buf;
5073     img->di.cu_high_pc = buf + buf_size;
5074     img->di.fn_low_pc = buf;
5075     img->di.fn_high_pc = buf + buf_size;
5076 
5077     dfh = (DebugFrameHeader *)(img + 1);
5078     memcpy(dfh, debug_frame, debug_frame_size);
5079     dfh->fde.func_start = buf;
5080     dfh->fde.func_len = buf_size;
5081 
5082 #ifdef DEBUG_JIT
5083     /* Enable this block to be able to debug the ELF image file creation.
5084        One can use readelf, objdump, or other inspection utilities.  */
5085     {
5086         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5087         FILE *f = fopen(jit, "w+b");
5088         if (f) {
5089             if (fwrite(img, img_size, 1, f) != img_size) {
5090                 /* Avoid stupid unused return value warning for fwrite.  */
5091             }
5092             fclose(f);
5093         }
5094     }
5095 #endif
5096 
5097     one_entry.symfile_addr = img;
5098     one_entry.symfile_size = img_size;
5099 
5100     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5101     __jit_debug_descriptor.relevant_entry = &one_entry;
5102     __jit_debug_descriptor.first_entry = &one_entry;
5103     __jit_debug_register_code();
5104 }
5105 #else
5106 /* No support for the feature.  Provide the entry point expected by exec.c,
5107    and implement the internal function we declared earlier.  */
5108 
5109 static void tcg_register_jit_int(const void *buf, size_t size,
5110                                  const void *debug_frame,
5111                                  size_t debug_frame_size)
5112 {
5113 }
5114 
5115 void tcg_register_jit(const void *buf, size_t buf_size)
5116 {
5117 }
5118 #endif /* ELF_HOST_MACHINE */
5119 
5120 #if !TCG_TARGET_MAYBE_vec
5121 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5122 {
5123     g_assert_not_reached();
5124 }
5125 #endif
5126