xref: /openbmc/qemu/tcg/tcg.c (revision bb18151d8bd9bedc497ee9d4e8d81b39a4e5bbf6)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 #include "accel/tcg/perf.h"
65 
66 /* Forward declarations for functions declared in tcg-target.c.inc and
67    used here. */
68 static void tcg_target_init(TCGContext *s);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(const void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103                        intptr_t arg2);
104 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106                          TCGReg ret, tcg_target_long arg);
107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
108 static void tcg_out_goto_tb(TCGContext *s, int which);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110                        const TCGArg args[TCG_MAX_OP_ARGS],
111                        const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120                            unsigned vecl, unsigned vece,
121                            const TCGArg args[TCG_MAX_OP_ARGS],
122                            const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125                                    TCGReg dst, TCGReg src)
126 {
127     g_assert_not_reached();
128 }
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130                                     TCGReg dst, TCGReg base, intptr_t offset)
131 {
132     g_assert_not_reached();
133 }
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135                                     TCGReg dst, int64_t arg)
136 {
137     g_assert_not_reached();
138 }
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                                   unsigned vecl, unsigned vece,
141                                   const TCGArg args[TCG_MAX_OP_ARGS],
142                                   const int const_args[TCG_MAX_OP_ARGS])
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152                          const TCGHelperInfo *info);
153 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 TCGContext tcg_init_ctx;
159 __thread TCGContext *tcg_ctx;
160 
161 TCGContext **tcg_ctxs;
162 unsigned int tcg_cur_ctxs;
163 unsigned int tcg_max_ctxs;
164 TCGv_env cpu_env = 0;
165 const void *tcg_code_gen_epilogue;
166 uintptr_t tcg_splitwx_diff;
167 
168 #ifndef CONFIG_TCG_INTERPRETER
169 tcg_prologue_fn *tcg_qemu_tb_exec;
170 #endif
171 
172 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
173 static TCGRegSet tcg_target_call_clobber_regs;
174 
175 #if TCG_TARGET_INSN_UNIT_SIZE == 1
176 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
177 {
178     *s->code_ptr++ = v;
179 }
180 
181 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
182                                                       uint8_t v)
183 {
184     *p = v;
185 }
186 #endif
187 
188 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
189 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
190 {
191     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
192         *s->code_ptr++ = v;
193     } else {
194         tcg_insn_unit *p = s->code_ptr;
195         memcpy(p, &v, sizeof(v));
196         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
197     }
198 }
199 
200 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
201                                                        uint16_t v)
202 {
203     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
204         *p = v;
205     } else {
206         memcpy(p, &v, sizeof(v));
207     }
208 }
209 #endif
210 
211 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
212 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
213 {
214     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
215         *s->code_ptr++ = v;
216     } else {
217         tcg_insn_unit *p = s->code_ptr;
218         memcpy(p, &v, sizeof(v));
219         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
220     }
221 }
222 
223 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
224                                                        uint32_t v)
225 {
226     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
227         *p = v;
228     } else {
229         memcpy(p, &v, sizeof(v));
230     }
231 }
232 #endif
233 
234 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
235 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
236 {
237     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
238         *s->code_ptr++ = v;
239     } else {
240         tcg_insn_unit *p = s->code_ptr;
241         memcpy(p, &v, sizeof(v));
242         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
243     }
244 }
245 
246 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
247                                                        uint64_t v)
248 {
249     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
250         *p = v;
251     } else {
252         memcpy(p, &v, sizeof(v));
253     }
254 }
255 #endif
256 
257 /* label relocation processing */
258 
259 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
260                           TCGLabel *l, intptr_t addend)
261 {
262     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
263 
264     r->type = type;
265     r->ptr = code_ptr;
266     r->addend = addend;
267     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
268 }
269 
270 static void tcg_out_label(TCGContext *s, TCGLabel *l)
271 {
272     tcg_debug_assert(!l->has_value);
273     l->has_value = 1;
274     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
275 }
276 
277 TCGLabel *gen_new_label(void)
278 {
279     TCGContext *s = tcg_ctx;
280     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
281 
282     memset(l, 0, sizeof(TCGLabel));
283     l->id = s->nb_labels++;
284     QSIMPLEQ_INIT(&l->relocs);
285 
286     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
287 
288     return l;
289 }
290 
291 static bool tcg_resolve_relocs(TCGContext *s)
292 {
293     TCGLabel *l;
294 
295     QSIMPLEQ_FOREACH(l, &s->labels, next) {
296         TCGRelocation *r;
297         uintptr_t value = l->u.value;
298 
299         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
300             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
301                 return false;
302             }
303         }
304     }
305     return true;
306 }
307 
308 static void set_jmp_reset_offset(TCGContext *s, int which)
309 {
310     /*
311      * We will check for overflow at the end of the opcode loop in
312      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
313      */
314     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
315 }
316 
317 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
318 {
319     /*
320      * We will check for overflow at the end of the opcode loop in
321      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
322      */
323     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
324 }
325 
326 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
327 {
328     /*
329      * Return the read-execute version of the pointer, for the benefit
330      * of any pc-relative addressing mode.
331      */
332     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
333 }
334 
335 /* Signal overflow, starting over with fewer guest insns. */
336 static G_NORETURN
337 void tcg_raise_tb_overflow(TCGContext *s)
338 {
339     siglongjmp(s->jmp_trans, -2);
340 }
341 
342 #define C_PFX1(P, A)                    P##A
343 #define C_PFX2(P, A, B)                 P##A##_##B
344 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
345 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
346 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
347 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
348 
349 /* Define an enumeration for the various combinations. */
350 
351 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
352 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
353 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
354 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
355 
356 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
357 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
358 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
359 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
360 
361 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
362 
363 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
364 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
365 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
366 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
367 
368 typedef enum {
369 #include "tcg-target-con-set.h"
370 } TCGConstraintSetIndex;
371 
372 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
373 
374 #undef C_O0_I1
375 #undef C_O0_I2
376 #undef C_O0_I3
377 #undef C_O0_I4
378 #undef C_O1_I1
379 #undef C_O1_I2
380 #undef C_O1_I3
381 #undef C_O1_I4
382 #undef C_N1_I2
383 #undef C_O2_I1
384 #undef C_O2_I2
385 #undef C_O2_I3
386 #undef C_O2_I4
387 
388 /* Put all of the constraint sets into an array, indexed by the enum. */
389 
390 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
391 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
392 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
393 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
394 
395 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
396 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
397 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
398 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
399 
400 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
401 
402 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
403 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
404 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
405 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
406 
407 static const TCGTargetOpDef constraint_sets[] = {
408 #include "tcg-target-con-set.h"
409 };
410 
411 
412 #undef C_O0_I1
413 #undef C_O0_I2
414 #undef C_O0_I3
415 #undef C_O0_I4
416 #undef C_O1_I1
417 #undef C_O1_I2
418 #undef C_O1_I3
419 #undef C_O1_I4
420 #undef C_N1_I2
421 #undef C_O2_I1
422 #undef C_O2_I2
423 #undef C_O2_I3
424 #undef C_O2_I4
425 
426 /* Expand the enumerator to be returned from tcg_target_op_def(). */
427 
428 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
429 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
430 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
431 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
432 
433 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
434 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
435 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
436 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
437 
438 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
439 
440 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
441 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
442 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
443 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
444 
445 #include "tcg-target.c.inc"
446 
447 static void alloc_tcg_plugin_context(TCGContext *s)
448 {
449 #ifdef CONFIG_PLUGIN
450     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
451     s->plugin_tb->insns =
452         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
453 #endif
454 }
455 
456 /*
457  * All TCG threads except the parent (i.e. the one that called tcg_context_init
458  * and registered the target's TCG globals) must register with this function
459  * before initiating translation.
460  *
461  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
462  * of tcg_region_init() for the reasoning behind this.
463  *
464  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
465  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
466  * is not used anymore for translation once this function is called.
467  *
468  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
469  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
470  */
471 #ifdef CONFIG_USER_ONLY
472 void tcg_register_thread(void)
473 {
474     tcg_ctx = &tcg_init_ctx;
475 }
476 #else
477 void tcg_register_thread(void)
478 {
479     TCGContext *s = g_malloc(sizeof(*s));
480     unsigned int i, n;
481 
482     *s = tcg_init_ctx;
483 
484     /* Relink mem_base.  */
485     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
486         if (tcg_init_ctx.temps[i].mem_base) {
487             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
488             tcg_debug_assert(b >= 0 && b < n);
489             s->temps[i].mem_base = &s->temps[b];
490         }
491     }
492 
493     /* Claim an entry in tcg_ctxs */
494     n = qatomic_fetch_inc(&tcg_cur_ctxs);
495     g_assert(n < tcg_max_ctxs);
496     qatomic_set(&tcg_ctxs[n], s);
497 
498     if (n > 0) {
499         alloc_tcg_plugin_context(s);
500         tcg_region_initial_alloc(s);
501     }
502 
503     tcg_ctx = s;
504 }
505 #endif /* !CONFIG_USER_ONLY */
506 
507 /* pool based memory allocation */
508 void *tcg_malloc_internal(TCGContext *s, int size)
509 {
510     TCGPool *p;
511     int pool_size;
512 
513     if (size > TCG_POOL_CHUNK_SIZE) {
514         /* big malloc: insert a new pool (XXX: could optimize) */
515         p = g_malloc(sizeof(TCGPool) + size);
516         p->size = size;
517         p->next = s->pool_first_large;
518         s->pool_first_large = p;
519         return p->data;
520     } else {
521         p = s->pool_current;
522         if (!p) {
523             p = s->pool_first;
524             if (!p)
525                 goto new_pool;
526         } else {
527             if (!p->next) {
528             new_pool:
529                 pool_size = TCG_POOL_CHUNK_SIZE;
530                 p = g_malloc(sizeof(TCGPool) + pool_size);
531                 p->size = pool_size;
532                 p->next = NULL;
533                 if (s->pool_current) {
534                     s->pool_current->next = p;
535                 } else {
536                     s->pool_first = p;
537                 }
538             } else {
539                 p = p->next;
540             }
541         }
542     }
543     s->pool_current = p;
544     s->pool_cur = p->data + size;
545     s->pool_end = p->data + p->size;
546     return p->data;
547 }
548 
549 void tcg_pool_reset(TCGContext *s)
550 {
551     TCGPool *p, *t;
552     for (p = s->pool_first_large; p; p = t) {
553         t = p->next;
554         g_free(p);
555     }
556     s->pool_first_large = NULL;
557     s->pool_cur = s->pool_end = NULL;
558     s->pool_current = NULL;
559 }
560 
561 #include "exec/helper-proto.h"
562 
563 static TCGHelperInfo all_helpers[] = {
564 #include "exec/helper-tcg.h"
565 };
566 static GHashTable *helper_table;
567 
568 #ifdef CONFIG_TCG_INTERPRETER
569 static ffi_type *typecode_to_ffi(int argmask)
570 {
571     switch (argmask) {
572     case dh_typecode_void:
573         return &ffi_type_void;
574     case dh_typecode_i32:
575         return &ffi_type_uint32;
576     case dh_typecode_s32:
577         return &ffi_type_sint32;
578     case dh_typecode_i64:
579         return &ffi_type_uint64;
580     case dh_typecode_s64:
581         return &ffi_type_sint64;
582     case dh_typecode_ptr:
583         return &ffi_type_pointer;
584     }
585     g_assert_not_reached();
586 }
587 
588 static void init_ffi_layouts(void)
589 {
590     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
591     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
592 
593     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
594         TCGHelperInfo *info = &all_helpers[i];
595         unsigned typemask = info->typemask;
596         gpointer hash = (gpointer)(uintptr_t)typemask;
597         struct {
598             ffi_cif cif;
599             ffi_type *args[];
600         } *ca;
601         ffi_status status;
602         int nargs;
603         ffi_cif *cif;
604 
605         cif = g_hash_table_lookup(ffi_table, hash);
606         if (cif) {
607             info->cif = cif;
608             continue;
609         }
610 
611         /* Ignoring the return type, find the last non-zero field. */
612         nargs = 32 - clz32(typemask >> 3);
613         nargs = DIV_ROUND_UP(nargs, 3);
614 
615         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
616         ca->cif.rtype = typecode_to_ffi(typemask & 7);
617         ca->cif.nargs = nargs;
618 
619         if (nargs != 0) {
620             ca->cif.arg_types = ca->args;
621             for (int j = 0; j < nargs; ++j) {
622                 int typecode = extract32(typemask, (j + 1) * 3, 3);
623                 ca->args[j] = typecode_to_ffi(typecode);
624             }
625         }
626 
627         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
628                               ca->cif.rtype, ca->cif.arg_types);
629         assert(status == FFI_OK);
630 
631         cif = &ca->cif;
632         info->cif = cif;
633         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
634     }
635 
636     g_hash_table_destroy(ffi_table);
637 }
638 #endif /* CONFIG_TCG_INTERPRETER */
639 
640 typedef struct TCGCumulativeArgs {
641     int arg_idx;                /* tcg_gen_callN args[] */
642     int info_in_idx;            /* TCGHelperInfo in[] */
643     int arg_slot;               /* regs+stack slot */
644     int ref_slot;               /* stack slots for references */
645 } TCGCumulativeArgs;
646 
647 static void layout_arg_even(TCGCumulativeArgs *cum)
648 {
649     cum->arg_slot += cum->arg_slot & 1;
650 }
651 
652 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
653                          TCGCallArgumentKind kind)
654 {
655     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
656 
657     *loc = (TCGCallArgumentLoc){
658         .kind = kind,
659         .arg_idx = cum->arg_idx,
660         .arg_slot = cum->arg_slot,
661     };
662     cum->info_in_idx++;
663     cum->arg_slot++;
664 }
665 
666 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
667                                 TCGHelperInfo *info, int n)
668 {
669     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
670 
671     for (int i = 0; i < n; ++i) {
672         /* Layout all using the same arg_idx, adjusting the subindex. */
673         loc[i] = (TCGCallArgumentLoc){
674             .kind = TCG_CALL_ARG_NORMAL,
675             .arg_idx = cum->arg_idx,
676             .tmp_subindex = i,
677             .arg_slot = cum->arg_slot + i,
678         };
679     }
680     cum->info_in_idx += n;
681     cum->arg_slot += n;
682 }
683 
684 static void init_call_layout(TCGHelperInfo *info)
685 {
686     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
687     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
688     unsigned typemask = info->typemask;
689     unsigned typecode;
690     TCGCumulativeArgs cum = { };
691 
692     /*
693      * Parse and place any function return value.
694      */
695     typecode = typemask & 7;
696     switch (typecode) {
697     case dh_typecode_void:
698         info->nr_out = 0;
699         break;
700     case dh_typecode_i32:
701     case dh_typecode_s32:
702     case dh_typecode_ptr:
703         info->nr_out = 1;
704         info->out_kind = TCG_CALL_RET_NORMAL;
705         break;
706     case dh_typecode_i64:
707     case dh_typecode_s64:
708         info->nr_out = 64 / TCG_TARGET_REG_BITS;
709         info->out_kind = TCG_CALL_RET_NORMAL;
710         break;
711     default:
712         g_assert_not_reached();
713     }
714     assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
715 
716     /*
717      * Parse and place function arguments.
718      */
719     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
720         TCGCallArgumentKind kind;
721         TCGType type;
722 
723         typecode = typemask & 7;
724         switch (typecode) {
725         case dh_typecode_i32:
726         case dh_typecode_s32:
727             type = TCG_TYPE_I32;
728             break;
729         case dh_typecode_i64:
730         case dh_typecode_s64:
731             type = TCG_TYPE_I64;
732             break;
733         case dh_typecode_ptr:
734             type = TCG_TYPE_PTR;
735             break;
736         default:
737             g_assert_not_reached();
738         }
739 
740         switch (type) {
741         case TCG_TYPE_I32:
742             switch (TCG_TARGET_CALL_ARG_I32) {
743             case TCG_CALL_ARG_EVEN:
744                 layout_arg_even(&cum);
745                 /* fall through */
746             case TCG_CALL_ARG_NORMAL:
747                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
748                 break;
749             case TCG_CALL_ARG_EXTEND:
750                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
751                 layout_arg_1(&cum, info, kind);
752                 break;
753             default:
754                 qemu_build_not_reached();
755             }
756             break;
757 
758         case TCG_TYPE_I64:
759             switch (TCG_TARGET_CALL_ARG_I64) {
760             case TCG_CALL_ARG_EVEN:
761                 layout_arg_even(&cum);
762                 /* fall through */
763             case TCG_CALL_ARG_NORMAL:
764                 if (TCG_TARGET_REG_BITS == 32) {
765                     layout_arg_normal_n(&cum, info, 2);
766                 } else {
767                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
768                 }
769                 break;
770             default:
771                 qemu_build_not_reached();
772             }
773             break;
774 
775         default:
776             g_assert_not_reached();
777         }
778     }
779     info->nr_in = cum.info_in_idx;
780 
781     /* Validate that we didn't overrun the input array. */
782     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
783     /* Validate the backend has enough argument space. */
784     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
785     assert(cum.ref_slot <= max_stk_slots);
786 }
787 
788 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
789 static void process_op_defs(TCGContext *s);
790 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
791                                             TCGReg reg, const char *name);
792 
793 static void tcg_context_init(unsigned max_cpus)
794 {
795     TCGContext *s = &tcg_init_ctx;
796     int op, total_args, n, i;
797     TCGOpDef *def;
798     TCGArgConstraint *args_ct;
799     TCGTemp *ts;
800 
801     memset(s, 0, sizeof(*s));
802     s->nb_globals = 0;
803 
804     /* Count total number of arguments and allocate the corresponding
805        space */
806     total_args = 0;
807     for(op = 0; op < NB_OPS; op++) {
808         def = &tcg_op_defs[op];
809         n = def->nb_iargs + def->nb_oargs;
810         total_args += n;
811     }
812 
813     args_ct = g_new0(TCGArgConstraint, total_args);
814 
815     for(op = 0; op < NB_OPS; op++) {
816         def = &tcg_op_defs[op];
817         def->args_ct = args_ct;
818         n = def->nb_iargs + def->nb_oargs;
819         args_ct += n;
820     }
821 
822     /* Register helpers.  */
823     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
824     helper_table = g_hash_table_new(NULL, NULL);
825 
826     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
827         init_call_layout(&all_helpers[i]);
828         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
829                             (gpointer)&all_helpers[i]);
830     }
831 
832 #ifdef CONFIG_TCG_INTERPRETER
833     init_ffi_layouts();
834 #endif
835 
836     tcg_target_init(s);
837     process_op_defs(s);
838 
839     /* Reverse the order of the saved registers, assuming they're all at
840        the start of tcg_target_reg_alloc_order.  */
841     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
842         int r = tcg_target_reg_alloc_order[n];
843         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
844             break;
845         }
846     }
847     for (i = 0; i < n; ++i) {
848         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
849     }
850     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
851         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
852     }
853 
854     alloc_tcg_plugin_context(s);
855 
856     tcg_ctx = s;
857     /*
858      * In user-mode we simply share the init context among threads, since we
859      * use a single region. See the documentation tcg_region_init() for the
860      * reasoning behind this.
861      * In softmmu we will have at most max_cpus TCG threads.
862      */
863 #ifdef CONFIG_USER_ONLY
864     tcg_ctxs = &tcg_ctx;
865     tcg_cur_ctxs = 1;
866     tcg_max_ctxs = 1;
867 #else
868     tcg_max_ctxs = max_cpus;
869     tcg_ctxs = g_new0(TCGContext *, max_cpus);
870 #endif
871 
872     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
873     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
874     cpu_env = temp_tcgv_ptr(ts);
875 }
876 
877 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
878 {
879     tcg_context_init(max_cpus);
880     tcg_region_init(tb_size, splitwx, max_cpus);
881 }
882 
883 /*
884  * Allocate TBs right before their corresponding translated code, making
885  * sure that TBs and code are on different cache lines.
886  */
887 TranslationBlock *tcg_tb_alloc(TCGContext *s)
888 {
889     uintptr_t align = qemu_icache_linesize;
890     TranslationBlock *tb;
891     void *next;
892 
893  retry:
894     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
895     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
896 
897     if (unlikely(next > s->code_gen_highwater)) {
898         if (tcg_region_alloc(s)) {
899             return NULL;
900         }
901         goto retry;
902     }
903     qatomic_set(&s->code_gen_ptr, next);
904     s->data_gen_ptr = NULL;
905     return tb;
906 }
907 
908 void tcg_prologue_init(TCGContext *s)
909 {
910     size_t prologue_size;
911 
912     s->code_ptr = s->code_gen_ptr;
913     s->code_buf = s->code_gen_ptr;
914     s->data_gen_ptr = NULL;
915 
916 #ifndef CONFIG_TCG_INTERPRETER
917     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
918 #endif
919 
920 #ifdef TCG_TARGET_NEED_POOL_LABELS
921     s->pool_labels = NULL;
922 #endif
923 
924     qemu_thread_jit_write();
925     /* Generate the prologue.  */
926     tcg_target_qemu_prologue(s);
927 
928 #ifdef TCG_TARGET_NEED_POOL_LABELS
929     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
930     {
931         int result = tcg_out_pool_finalize(s);
932         tcg_debug_assert(result == 0);
933     }
934 #endif
935 
936     prologue_size = tcg_current_code_size(s);
937     perf_report_prologue(s->code_gen_ptr, prologue_size);
938 
939 #ifndef CONFIG_TCG_INTERPRETER
940     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
941                         (uintptr_t)s->code_buf, prologue_size);
942 #endif
943 
944 #ifdef DEBUG_DISAS
945     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
946         FILE *logfile = qemu_log_trylock();
947         if (logfile) {
948             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
949             if (s->data_gen_ptr) {
950                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
951                 size_t data_size = prologue_size - code_size;
952                 size_t i;
953 
954                 disas(logfile, s->code_gen_ptr, code_size);
955 
956                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
957                     if (sizeof(tcg_target_ulong) == 8) {
958                         fprintf(logfile,
959                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
960                                 (uintptr_t)s->data_gen_ptr + i,
961                                 *(uint64_t *)(s->data_gen_ptr + i));
962                     } else {
963                         fprintf(logfile,
964                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
965                                 (uintptr_t)s->data_gen_ptr + i,
966                                 *(uint32_t *)(s->data_gen_ptr + i));
967                     }
968                 }
969             } else {
970                 disas(logfile, s->code_gen_ptr, prologue_size);
971             }
972             fprintf(logfile, "\n");
973             qemu_log_unlock(logfile);
974         }
975     }
976 #endif
977 
978 #ifndef CONFIG_TCG_INTERPRETER
979     /*
980      * Assert that goto_ptr is implemented completely, setting an epilogue.
981      * For tci, we use NULL as the signal to return from the interpreter,
982      * so skip this check.
983      */
984     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
985 #endif
986 
987     tcg_region_prologue_set(s);
988 }
989 
990 void tcg_func_start(TCGContext *s)
991 {
992     tcg_pool_reset(s);
993     s->nb_temps = s->nb_globals;
994 
995     /* No temps have been previously allocated for size or locality.  */
996     memset(s->free_temps, 0, sizeof(s->free_temps));
997 
998     /* No constant temps have been previously allocated. */
999     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1000         if (s->const_table[i]) {
1001             g_hash_table_remove_all(s->const_table[i]);
1002         }
1003     }
1004 
1005     s->nb_ops = 0;
1006     s->nb_labels = 0;
1007     s->current_frame_offset = s->frame_start;
1008 
1009 #ifdef CONFIG_DEBUG_TCG
1010     s->goto_tb_issue_mask = 0;
1011 #endif
1012 
1013     QTAILQ_INIT(&s->ops);
1014     QTAILQ_INIT(&s->free_ops);
1015     QSIMPLEQ_INIT(&s->labels);
1016 }
1017 
1018 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1019 {
1020     int n = s->nb_temps++;
1021 
1022     if (n >= TCG_MAX_TEMPS) {
1023         tcg_raise_tb_overflow(s);
1024     }
1025     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1026 }
1027 
1028 static TCGTemp *tcg_global_alloc(TCGContext *s)
1029 {
1030     TCGTemp *ts;
1031 
1032     tcg_debug_assert(s->nb_globals == s->nb_temps);
1033     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1034     s->nb_globals++;
1035     ts = tcg_temp_alloc(s);
1036     ts->kind = TEMP_GLOBAL;
1037 
1038     return ts;
1039 }
1040 
1041 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1042                                             TCGReg reg, const char *name)
1043 {
1044     TCGTemp *ts;
1045 
1046     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1047         tcg_abort();
1048     }
1049 
1050     ts = tcg_global_alloc(s);
1051     ts->base_type = type;
1052     ts->type = type;
1053     ts->kind = TEMP_FIXED;
1054     ts->reg = reg;
1055     ts->name = name;
1056     tcg_regset_set_reg(s->reserved_regs, reg);
1057 
1058     return ts;
1059 }
1060 
1061 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1062 {
1063     s->frame_start = start;
1064     s->frame_end = start + size;
1065     s->frame_temp
1066         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1067 }
1068 
1069 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1070                                      intptr_t offset, const char *name)
1071 {
1072     TCGContext *s = tcg_ctx;
1073     TCGTemp *base_ts = tcgv_ptr_temp(base);
1074     TCGTemp *ts = tcg_global_alloc(s);
1075     int indirect_reg = 0;
1076 
1077     switch (base_ts->kind) {
1078     case TEMP_FIXED:
1079         break;
1080     case TEMP_GLOBAL:
1081         /* We do not support double-indirect registers.  */
1082         tcg_debug_assert(!base_ts->indirect_reg);
1083         base_ts->indirect_base = 1;
1084         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1085                             ? 2 : 1);
1086         indirect_reg = 1;
1087         break;
1088     default:
1089         g_assert_not_reached();
1090     }
1091 
1092     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1093         TCGTemp *ts2 = tcg_global_alloc(s);
1094         char buf[64];
1095 
1096         ts->base_type = TCG_TYPE_I64;
1097         ts->type = TCG_TYPE_I32;
1098         ts->indirect_reg = indirect_reg;
1099         ts->mem_allocated = 1;
1100         ts->mem_base = base_ts;
1101         ts->mem_offset = offset;
1102         pstrcpy(buf, sizeof(buf), name);
1103         pstrcat(buf, sizeof(buf), "_0");
1104         ts->name = strdup(buf);
1105 
1106         tcg_debug_assert(ts2 == ts + 1);
1107         ts2->base_type = TCG_TYPE_I64;
1108         ts2->type = TCG_TYPE_I32;
1109         ts2->indirect_reg = indirect_reg;
1110         ts2->mem_allocated = 1;
1111         ts2->mem_base = base_ts;
1112         ts2->mem_offset = offset + 4;
1113         ts2->temp_subindex = 1;
1114         pstrcpy(buf, sizeof(buf), name);
1115         pstrcat(buf, sizeof(buf), "_1");
1116         ts2->name = strdup(buf);
1117     } else {
1118         ts->base_type = type;
1119         ts->type = type;
1120         ts->indirect_reg = indirect_reg;
1121         ts->mem_allocated = 1;
1122         ts->mem_base = base_ts;
1123         ts->mem_offset = offset;
1124         ts->name = name;
1125     }
1126     return ts;
1127 }
1128 
1129 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1130 {
1131     TCGContext *s = tcg_ctx;
1132     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1133     TCGTemp *ts;
1134     int idx, k;
1135 
1136     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1137     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1138     if (idx < TCG_MAX_TEMPS) {
1139         /* There is already an available temp with the right type.  */
1140         clear_bit(idx, s->free_temps[k].l);
1141 
1142         ts = &s->temps[idx];
1143         ts->temp_allocated = 1;
1144         tcg_debug_assert(ts->base_type == type);
1145         tcg_debug_assert(ts->kind == kind);
1146     } else {
1147         ts = tcg_temp_alloc(s);
1148         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1149             TCGTemp *ts2 = tcg_temp_alloc(s);
1150 
1151             ts->base_type = type;
1152             ts->type = TCG_TYPE_I32;
1153             ts->temp_allocated = 1;
1154             ts->kind = kind;
1155 
1156             tcg_debug_assert(ts2 == ts + 1);
1157             ts2->base_type = TCG_TYPE_I64;
1158             ts2->type = TCG_TYPE_I32;
1159             ts2->temp_allocated = 1;
1160             ts2->temp_subindex = 1;
1161             ts2->kind = kind;
1162         } else {
1163             ts->base_type = type;
1164             ts->type = type;
1165             ts->temp_allocated = 1;
1166             ts->kind = kind;
1167         }
1168     }
1169 
1170 #if defined(CONFIG_DEBUG_TCG)
1171     s->temps_in_use++;
1172 #endif
1173     return ts;
1174 }
1175 
1176 TCGv_vec tcg_temp_new_vec(TCGType type)
1177 {
1178     TCGTemp *t;
1179 
1180 #ifdef CONFIG_DEBUG_TCG
1181     switch (type) {
1182     case TCG_TYPE_V64:
1183         assert(TCG_TARGET_HAS_v64);
1184         break;
1185     case TCG_TYPE_V128:
1186         assert(TCG_TARGET_HAS_v128);
1187         break;
1188     case TCG_TYPE_V256:
1189         assert(TCG_TARGET_HAS_v256);
1190         break;
1191     default:
1192         g_assert_not_reached();
1193     }
1194 #endif
1195 
1196     t = tcg_temp_new_internal(type, 0);
1197     return temp_tcgv_vec(t);
1198 }
1199 
1200 /* Create a new temp of the same type as an existing temp.  */
1201 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1202 {
1203     TCGTemp *t = tcgv_vec_temp(match);
1204 
1205     tcg_debug_assert(t->temp_allocated != 0);
1206 
1207     t = tcg_temp_new_internal(t->base_type, 0);
1208     return temp_tcgv_vec(t);
1209 }
1210 
1211 void tcg_temp_free_internal(TCGTemp *ts)
1212 {
1213     TCGContext *s = tcg_ctx;
1214     int k, idx;
1215 
1216     switch (ts->kind) {
1217     case TEMP_CONST:
1218         /*
1219          * In order to simplify users of tcg_constant_*,
1220          * silently ignore free.
1221          */
1222         return;
1223     case TEMP_NORMAL:
1224     case TEMP_LOCAL:
1225         break;
1226     default:
1227         g_assert_not_reached();
1228     }
1229 
1230 #if defined(CONFIG_DEBUG_TCG)
1231     s->temps_in_use--;
1232     if (s->temps_in_use < 0) {
1233         fprintf(stderr, "More temporaries freed than allocated!\n");
1234     }
1235 #endif
1236 
1237     tcg_debug_assert(ts->temp_allocated != 0);
1238     ts->temp_allocated = 0;
1239 
1240     idx = temp_idx(ts);
1241     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1242     set_bit(idx, s->free_temps[k].l);
1243 }
1244 
1245 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1246 {
1247     TCGContext *s = tcg_ctx;
1248     GHashTable *h = s->const_table[type];
1249     TCGTemp *ts;
1250 
1251     if (h == NULL) {
1252         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1253         s->const_table[type] = h;
1254     }
1255 
1256     ts = g_hash_table_lookup(h, &val);
1257     if (ts == NULL) {
1258         int64_t *val_ptr;
1259 
1260         ts = tcg_temp_alloc(s);
1261 
1262         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1263             TCGTemp *ts2 = tcg_temp_alloc(s);
1264 
1265             tcg_debug_assert(ts2 == ts + 1);
1266 
1267             ts->base_type = TCG_TYPE_I64;
1268             ts->type = TCG_TYPE_I32;
1269             ts->kind = TEMP_CONST;
1270             ts->temp_allocated = 1;
1271 
1272             ts2->base_type = TCG_TYPE_I64;
1273             ts2->type = TCG_TYPE_I32;
1274             ts2->kind = TEMP_CONST;
1275             ts2->temp_allocated = 1;
1276             ts2->temp_subindex = 1;
1277 
1278             /*
1279              * Retain the full value of the 64-bit constant in the low
1280              * part, so that the hash table works.  Actual uses will
1281              * truncate the value to the low part.
1282              */
1283             ts[HOST_BIG_ENDIAN].val = val;
1284             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1285             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1286         } else {
1287             ts->base_type = type;
1288             ts->type = type;
1289             ts->kind = TEMP_CONST;
1290             ts->temp_allocated = 1;
1291             ts->val = val;
1292             val_ptr = &ts->val;
1293         }
1294         g_hash_table_insert(h, val_ptr, ts);
1295     }
1296 
1297     return ts;
1298 }
1299 
1300 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1301 {
1302     val = dup_const(vece, val);
1303     return temp_tcgv_vec(tcg_constant_internal(type, val));
1304 }
1305 
1306 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1307 {
1308     TCGTemp *t = tcgv_vec_temp(match);
1309 
1310     tcg_debug_assert(t->temp_allocated != 0);
1311     return tcg_constant_vec(t->base_type, vece, val);
1312 }
1313 
1314 TCGv_i32 tcg_const_i32(int32_t val)
1315 {
1316     TCGv_i32 t0;
1317     t0 = tcg_temp_new_i32();
1318     tcg_gen_movi_i32(t0, val);
1319     return t0;
1320 }
1321 
1322 TCGv_i64 tcg_const_i64(int64_t val)
1323 {
1324     TCGv_i64 t0;
1325     t0 = tcg_temp_new_i64();
1326     tcg_gen_movi_i64(t0, val);
1327     return t0;
1328 }
1329 
1330 TCGv_i32 tcg_const_local_i32(int32_t val)
1331 {
1332     TCGv_i32 t0;
1333     t0 = tcg_temp_local_new_i32();
1334     tcg_gen_movi_i32(t0, val);
1335     return t0;
1336 }
1337 
1338 TCGv_i64 tcg_const_local_i64(int64_t val)
1339 {
1340     TCGv_i64 t0;
1341     t0 = tcg_temp_local_new_i64();
1342     tcg_gen_movi_i64(t0, val);
1343     return t0;
1344 }
1345 
1346 #if defined(CONFIG_DEBUG_TCG)
1347 void tcg_clear_temp_count(void)
1348 {
1349     TCGContext *s = tcg_ctx;
1350     s->temps_in_use = 0;
1351 }
1352 
1353 int tcg_check_temp_count(void)
1354 {
1355     TCGContext *s = tcg_ctx;
1356     if (s->temps_in_use) {
1357         /* Clear the count so that we don't give another
1358          * warning immediately next time around.
1359          */
1360         s->temps_in_use = 0;
1361         return 1;
1362     }
1363     return 0;
1364 }
1365 #endif
1366 
1367 /* Return true if OP may appear in the opcode stream.
1368    Test the runtime variable that controls each opcode.  */
1369 bool tcg_op_supported(TCGOpcode op)
1370 {
1371     const bool have_vec
1372         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1373 
1374     switch (op) {
1375     case INDEX_op_discard:
1376     case INDEX_op_set_label:
1377     case INDEX_op_call:
1378     case INDEX_op_br:
1379     case INDEX_op_mb:
1380     case INDEX_op_insn_start:
1381     case INDEX_op_exit_tb:
1382     case INDEX_op_goto_tb:
1383     case INDEX_op_goto_ptr:
1384     case INDEX_op_qemu_ld_i32:
1385     case INDEX_op_qemu_st_i32:
1386     case INDEX_op_qemu_ld_i64:
1387     case INDEX_op_qemu_st_i64:
1388         return true;
1389 
1390     case INDEX_op_qemu_st8_i32:
1391         return TCG_TARGET_HAS_qemu_st8_i32;
1392 
1393     case INDEX_op_mov_i32:
1394     case INDEX_op_setcond_i32:
1395     case INDEX_op_brcond_i32:
1396     case INDEX_op_ld8u_i32:
1397     case INDEX_op_ld8s_i32:
1398     case INDEX_op_ld16u_i32:
1399     case INDEX_op_ld16s_i32:
1400     case INDEX_op_ld_i32:
1401     case INDEX_op_st8_i32:
1402     case INDEX_op_st16_i32:
1403     case INDEX_op_st_i32:
1404     case INDEX_op_add_i32:
1405     case INDEX_op_sub_i32:
1406     case INDEX_op_mul_i32:
1407     case INDEX_op_and_i32:
1408     case INDEX_op_or_i32:
1409     case INDEX_op_xor_i32:
1410     case INDEX_op_shl_i32:
1411     case INDEX_op_shr_i32:
1412     case INDEX_op_sar_i32:
1413         return true;
1414 
1415     case INDEX_op_movcond_i32:
1416         return TCG_TARGET_HAS_movcond_i32;
1417     case INDEX_op_div_i32:
1418     case INDEX_op_divu_i32:
1419         return TCG_TARGET_HAS_div_i32;
1420     case INDEX_op_rem_i32:
1421     case INDEX_op_remu_i32:
1422         return TCG_TARGET_HAS_rem_i32;
1423     case INDEX_op_div2_i32:
1424     case INDEX_op_divu2_i32:
1425         return TCG_TARGET_HAS_div2_i32;
1426     case INDEX_op_rotl_i32:
1427     case INDEX_op_rotr_i32:
1428         return TCG_TARGET_HAS_rot_i32;
1429     case INDEX_op_deposit_i32:
1430         return TCG_TARGET_HAS_deposit_i32;
1431     case INDEX_op_extract_i32:
1432         return TCG_TARGET_HAS_extract_i32;
1433     case INDEX_op_sextract_i32:
1434         return TCG_TARGET_HAS_sextract_i32;
1435     case INDEX_op_extract2_i32:
1436         return TCG_TARGET_HAS_extract2_i32;
1437     case INDEX_op_add2_i32:
1438         return TCG_TARGET_HAS_add2_i32;
1439     case INDEX_op_sub2_i32:
1440         return TCG_TARGET_HAS_sub2_i32;
1441     case INDEX_op_mulu2_i32:
1442         return TCG_TARGET_HAS_mulu2_i32;
1443     case INDEX_op_muls2_i32:
1444         return TCG_TARGET_HAS_muls2_i32;
1445     case INDEX_op_muluh_i32:
1446         return TCG_TARGET_HAS_muluh_i32;
1447     case INDEX_op_mulsh_i32:
1448         return TCG_TARGET_HAS_mulsh_i32;
1449     case INDEX_op_ext8s_i32:
1450         return TCG_TARGET_HAS_ext8s_i32;
1451     case INDEX_op_ext16s_i32:
1452         return TCG_TARGET_HAS_ext16s_i32;
1453     case INDEX_op_ext8u_i32:
1454         return TCG_TARGET_HAS_ext8u_i32;
1455     case INDEX_op_ext16u_i32:
1456         return TCG_TARGET_HAS_ext16u_i32;
1457     case INDEX_op_bswap16_i32:
1458         return TCG_TARGET_HAS_bswap16_i32;
1459     case INDEX_op_bswap32_i32:
1460         return TCG_TARGET_HAS_bswap32_i32;
1461     case INDEX_op_not_i32:
1462         return TCG_TARGET_HAS_not_i32;
1463     case INDEX_op_neg_i32:
1464         return TCG_TARGET_HAS_neg_i32;
1465     case INDEX_op_andc_i32:
1466         return TCG_TARGET_HAS_andc_i32;
1467     case INDEX_op_orc_i32:
1468         return TCG_TARGET_HAS_orc_i32;
1469     case INDEX_op_eqv_i32:
1470         return TCG_TARGET_HAS_eqv_i32;
1471     case INDEX_op_nand_i32:
1472         return TCG_TARGET_HAS_nand_i32;
1473     case INDEX_op_nor_i32:
1474         return TCG_TARGET_HAS_nor_i32;
1475     case INDEX_op_clz_i32:
1476         return TCG_TARGET_HAS_clz_i32;
1477     case INDEX_op_ctz_i32:
1478         return TCG_TARGET_HAS_ctz_i32;
1479     case INDEX_op_ctpop_i32:
1480         return TCG_TARGET_HAS_ctpop_i32;
1481 
1482     case INDEX_op_brcond2_i32:
1483     case INDEX_op_setcond2_i32:
1484         return TCG_TARGET_REG_BITS == 32;
1485 
1486     case INDEX_op_mov_i64:
1487     case INDEX_op_setcond_i64:
1488     case INDEX_op_brcond_i64:
1489     case INDEX_op_ld8u_i64:
1490     case INDEX_op_ld8s_i64:
1491     case INDEX_op_ld16u_i64:
1492     case INDEX_op_ld16s_i64:
1493     case INDEX_op_ld32u_i64:
1494     case INDEX_op_ld32s_i64:
1495     case INDEX_op_ld_i64:
1496     case INDEX_op_st8_i64:
1497     case INDEX_op_st16_i64:
1498     case INDEX_op_st32_i64:
1499     case INDEX_op_st_i64:
1500     case INDEX_op_add_i64:
1501     case INDEX_op_sub_i64:
1502     case INDEX_op_mul_i64:
1503     case INDEX_op_and_i64:
1504     case INDEX_op_or_i64:
1505     case INDEX_op_xor_i64:
1506     case INDEX_op_shl_i64:
1507     case INDEX_op_shr_i64:
1508     case INDEX_op_sar_i64:
1509     case INDEX_op_ext_i32_i64:
1510     case INDEX_op_extu_i32_i64:
1511         return TCG_TARGET_REG_BITS == 64;
1512 
1513     case INDEX_op_movcond_i64:
1514         return TCG_TARGET_HAS_movcond_i64;
1515     case INDEX_op_div_i64:
1516     case INDEX_op_divu_i64:
1517         return TCG_TARGET_HAS_div_i64;
1518     case INDEX_op_rem_i64:
1519     case INDEX_op_remu_i64:
1520         return TCG_TARGET_HAS_rem_i64;
1521     case INDEX_op_div2_i64:
1522     case INDEX_op_divu2_i64:
1523         return TCG_TARGET_HAS_div2_i64;
1524     case INDEX_op_rotl_i64:
1525     case INDEX_op_rotr_i64:
1526         return TCG_TARGET_HAS_rot_i64;
1527     case INDEX_op_deposit_i64:
1528         return TCG_TARGET_HAS_deposit_i64;
1529     case INDEX_op_extract_i64:
1530         return TCG_TARGET_HAS_extract_i64;
1531     case INDEX_op_sextract_i64:
1532         return TCG_TARGET_HAS_sextract_i64;
1533     case INDEX_op_extract2_i64:
1534         return TCG_TARGET_HAS_extract2_i64;
1535     case INDEX_op_extrl_i64_i32:
1536         return TCG_TARGET_HAS_extrl_i64_i32;
1537     case INDEX_op_extrh_i64_i32:
1538         return TCG_TARGET_HAS_extrh_i64_i32;
1539     case INDEX_op_ext8s_i64:
1540         return TCG_TARGET_HAS_ext8s_i64;
1541     case INDEX_op_ext16s_i64:
1542         return TCG_TARGET_HAS_ext16s_i64;
1543     case INDEX_op_ext32s_i64:
1544         return TCG_TARGET_HAS_ext32s_i64;
1545     case INDEX_op_ext8u_i64:
1546         return TCG_TARGET_HAS_ext8u_i64;
1547     case INDEX_op_ext16u_i64:
1548         return TCG_TARGET_HAS_ext16u_i64;
1549     case INDEX_op_ext32u_i64:
1550         return TCG_TARGET_HAS_ext32u_i64;
1551     case INDEX_op_bswap16_i64:
1552         return TCG_TARGET_HAS_bswap16_i64;
1553     case INDEX_op_bswap32_i64:
1554         return TCG_TARGET_HAS_bswap32_i64;
1555     case INDEX_op_bswap64_i64:
1556         return TCG_TARGET_HAS_bswap64_i64;
1557     case INDEX_op_not_i64:
1558         return TCG_TARGET_HAS_not_i64;
1559     case INDEX_op_neg_i64:
1560         return TCG_TARGET_HAS_neg_i64;
1561     case INDEX_op_andc_i64:
1562         return TCG_TARGET_HAS_andc_i64;
1563     case INDEX_op_orc_i64:
1564         return TCG_TARGET_HAS_orc_i64;
1565     case INDEX_op_eqv_i64:
1566         return TCG_TARGET_HAS_eqv_i64;
1567     case INDEX_op_nand_i64:
1568         return TCG_TARGET_HAS_nand_i64;
1569     case INDEX_op_nor_i64:
1570         return TCG_TARGET_HAS_nor_i64;
1571     case INDEX_op_clz_i64:
1572         return TCG_TARGET_HAS_clz_i64;
1573     case INDEX_op_ctz_i64:
1574         return TCG_TARGET_HAS_ctz_i64;
1575     case INDEX_op_ctpop_i64:
1576         return TCG_TARGET_HAS_ctpop_i64;
1577     case INDEX_op_add2_i64:
1578         return TCG_TARGET_HAS_add2_i64;
1579     case INDEX_op_sub2_i64:
1580         return TCG_TARGET_HAS_sub2_i64;
1581     case INDEX_op_mulu2_i64:
1582         return TCG_TARGET_HAS_mulu2_i64;
1583     case INDEX_op_muls2_i64:
1584         return TCG_TARGET_HAS_muls2_i64;
1585     case INDEX_op_muluh_i64:
1586         return TCG_TARGET_HAS_muluh_i64;
1587     case INDEX_op_mulsh_i64:
1588         return TCG_TARGET_HAS_mulsh_i64;
1589 
1590     case INDEX_op_mov_vec:
1591     case INDEX_op_dup_vec:
1592     case INDEX_op_dupm_vec:
1593     case INDEX_op_ld_vec:
1594     case INDEX_op_st_vec:
1595     case INDEX_op_add_vec:
1596     case INDEX_op_sub_vec:
1597     case INDEX_op_and_vec:
1598     case INDEX_op_or_vec:
1599     case INDEX_op_xor_vec:
1600     case INDEX_op_cmp_vec:
1601         return have_vec;
1602     case INDEX_op_dup2_vec:
1603         return have_vec && TCG_TARGET_REG_BITS == 32;
1604     case INDEX_op_not_vec:
1605         return have_vec && TCG_TARGET_HAS_not_vec;
1606     case INDEX_op_neg_vec:
1607         return have_vec && TCG_TARGET_HAS_neg_vec;
1608     case INDEX_op_abs_vec:
1609         return have_vec && TCG_TARGET_HAS_abs_vec;
1610     case INDEX_op_andc_vec:
1611         return have_vec && TCG_TARGET_HAS_andc_vec;
1612     case INDEX_op_orc_vec:
1613         return have_vec && TCG_TARGET_HAS_orc_vec;
1614     case INDEX_op_nand_vec:
1615         return have_vec && TCG_TARGET_HAS_nand_vec;
1616     case INDEX_op_nor_vec:
1617         return have_vec && TCG_TARGET_HAS_nor_vec;
1618     case INDEX_op_eqv_vec:
1619         return have_vec && TCG_TARGET_HAS_eqv_vec;
1620     case INDEX_op_mul_vec:
1621         return have_vec && TCG_TARGET_HAS_mul_vec;
1622     case INDEX_op_shli_vec:
1623     case INDEX_op_shri_vec:
1624     case INDEX_op_sari_vec:
1625         return have_vec && TCG_TARGET_HAS_shi_vec;
1626     case INDEX_op_shls_vec:
1627     case INDEX_op_shrs_vec:
1628     case INDEX_op_sars_vec:
1629         return have_vec && TCG_TARGET_HAS_shs_vec;
1630     case INDEX_op_shlv_vec:
1631     case INDEX_op_shrv_vec:
1632     case INDEX_op_sarv_vec:
1633         return have_vec && TCG_TARGET_HAS_shv_vec;
1634     case INDEX_op_rotli_vec:
1635         return have_vec && TCG_TARGET_HAS_roti_vec;
1636     case INDEX_op_rotls_vec:
1637         return have_vec && TCG_TARGET_HAS_rots_vec;
1638     case INDEX_op_rotlv_vec:
1639     case INDEX_op_rotrv_vec:
1640         return have_vec && TCG_TARGET_HAS_rotv_vec;
1641     case INDEX_op_ssadd_vec:
1642     case INDEX_op_usadd_vec:
1643     case INDEX_op_sssub_vec:
1644     case INDEX_op_ussub_vec:
1645         return have_vec && TCG_TARGET_HAS_sat_vec;
1646     case INDEX_op_smin_vec:
1647     case INDEX_op_umin_vec:
1648     case INDEX_op_smax_vec:
1649     case INDEX_op_umax_vec:
1650         return have_vec && TCG_TARGET_HAS_minmax_vec;
1651     case INDEX_op_bitsel_vec:
1652         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1653     case INDEX_op_cmpsel_vec:
1654         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1655 
1656     default:
1657         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1658         return true;
1659     }
1660 }
1661 
1662 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1663 
1664 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1665 {
1666     const TCGHelperInfo *info;
1667     TCGv_i64 extend_free[MAX_CALL_IARGS];
1668     int n_extend = 0;
1669     TCGOp *op;
1670     int i, n, pi = 0, total_args;
1671 
1672     info = g_hash_table_lookup(helper_table, (gpointer)func);
1673     total_args = info->nr_out + info->nr_in + 2;
1674     op = tcg_op_alloc(INDEX_op_call, total_args);
1675 
1676 #ifdef CONFIG_PLUGIN
1677     /* Flag helpers that may affect guest state */
1678     if (tcg_ctx->plugin_insn &&
1679         !(info->flags & TCG_CALL_PLUGIN) &&
1680         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1681         tcg_ctx->plugin_insn->calls_helpers = true;
1682     }
1683 #endif
1684 
1685     TCGOP_CALLO(op) = n = info->nr_out;
1686     switch (n) {
1687     case 0:
1688         tcg_debug_assert(ret == NULL);
1689         break;
1690     case 1:
1691         tcg_debug_assert(ret != NULL);
1692         op->args[pi++] = temp_arg(ret);
1693         break;
1694     case 2:
1695         tcg_debug_assert(ret != NULL);
1696         tcg_debug_assert(ret->base_type == ret->type + 1);
1697         tcg_debug_assert(ret->temp_subindex == 0);
1698         op->args[pi++] = temp_arg(ret);
1699         op->args[pi++] = temp_arg(ret + 1);
1700         break;
1701     default:
1702         g_assert_not_reached();
1703     }
1704 
1705     TCGOP_CALLI(op) = n = info->nr_in;
1706     for (i = 0; i < n; i++) {
1707         const TCGCallArgumentLoc *loc = &info->in[i];
1708         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1709 
1710         switch (loc->kind) {
1711         case TCG_CALL_ARG_NORMAL:
1712             op->args[pi++] = temp_arg(ts);
1713             break;
1714 
1715         case TCG_CALL_ARG_EXTEND_U:
1716         case TCG_CALL_ARG_EXTEND_S:
1717             {
1718                 TCGv_i64 temp = tcg_temp_new_i64();
1719                 TCGv_i32 orig = temp_tcgv_i32(ts);
1720 
1721                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1722                     tcg_gen_ext_i32_i64(temp, orig);
1723                 } else {
1724                     tcg_gen_extu_i32_i64(temp, orig);
1725                 }
1726                 op->args[pi++] = tcgv_i64_arg(temp);
1727                 extend_free[n_extend++] = temp;
1728             }
1729             break;
1730 
1731         default:
1732             g_assert_not_reached();
1733         }
1734     }
1735     op->args[pi++] = (uintptr_t)func;
1736     op->args[pi++] = (uintptr_t)info;
1737     tcg_debug_assert(pi == total_args);
1738 
1739     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1740 
1741     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1742     for (i = 0; i < n_extend; ++i) {
1743         tcg_temp_free_i64(extend_free[i]);
1744     }
1745 }
1746 
1747 static void tcg_reg_alloc_start(TCGContext *s)
1748 {
1749     int i, n;
1750 
1751     for (i = 0, n = s->nb_temps; i < n; i++) {
1752         TCGTemp *ts = &s->temps[i];
1753         TCGTempVal val = TEMP_VAL_MEM;
1754 
1755         switch (ts->kind) {
1756         case TEMP_CONST:
1757             val = TEMP_VAL_CONST;
1758             break;
1759         case TEMP_FIXED:
1760             val = TEMP_VAL_REG;
1761             break;
1762         case TEMP_GLOBAL:
1763             break;
1764         case TEMP_NORMAL:
1765         case TEMP_EBB:
1766             val = TEMP_VAL_DEAD;
1767             /* fall through */
1768         case TEMP_LOCAL:
1769             ts->mem_allocated = 0;
1770             break;
1771         default:
1772             g_assert_not_reached();
1773         }
1774         ts->val_type = val;
1775     }
1776 
1777     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1778 }
1779 
1780 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1781                                  TCGTemp *ts)
1782 {
1783     int idx = temp_idx(ts);
1784 
1785     switch (ts->kind) {
1786     case TEMP_FIXED:
1787     case TEMP_GLOBAL:
1788         pstrcpy(buf, buf_size, ts->name);
1789         break;
1790     case TEMP_LOCAL:
1791         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1792         break;
1793     case TEMP_EBB:
1794         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1795         break;
1796     case TEMP_NORMAL:
1797         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1798         break;
1799     case TEMP_CONST:
1800         switch (ts->type) {
1801         case TCG_TYPE_I32:
1802             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1803             break;
1804 #if TCG_TARGET_REG_BITS > 32
1805         case TCG_TYPE_I64:
1806             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1807             break;
1808 #endif
1809         case TCG_TYPE_V64:
1810         case TCG_TYPE_V128:
1811         case TCG_TYPE_V256:
1812             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1813                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1814             break;
1815         default:
1816             g_assert_not_reached();
1817         }
1818         break;
1819     }
1820     return buf;
1821 }
1822 
1823 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1824                              int buf_size, TCGArg arg)
1825 {
1826     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1827 }
1828 
1829 static const char * const cond_name[] =
1830 {
1831     [TCG_COND_NEVER] = "never",
1832     [TCG_COND_ALWAYS] = "always",
1833     [TCG_COND_EQ] = "eq",
1834     [TCG_COND_NE] = "ne",
1835     [TCG_COND_LT] = "lt",
1836     [TCG_COND_GE] = "ge",
1837     [TCG_COND_LE] = "le",
1838     [TCG_COND_GT] = "gt",
1839     [TCG_COND_LTU] = "ltu",
1840     [TCG_COND_GEU] = "geu",
1841     [TCG_COND_LEU] = "leu",
1842     [TCG_COND_GTU] = "gtu"
1843 };
1844 
1845 static const char * const ldst_name[] =
1846 {
1847     [MO_UB]   = "ub",
1848     [MO_SB]   = "sb",
1849     [MO_LEUW] = "leuw",
1850     [MO_LESW] = "lesw",
1851     [MO_LEUL] = "leul",
1852     [MO_LESL] = "lesl",
1853     [MO_LEUQ] = "leq",
1854     [MO_BEUW] = "beuw",
1855     [MO_BESW] = "besw",
1856     [MO_BEUL] = "beul",
1857     [MO_BESL] = "besl",
1858     [MO_BEUQ] = "beq",
1859 };
1860 
1861 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1862 #ifdef TARGET_ALIGNED_ONLY
1863     [MO_UNALN >> MO_ASHIFT]    = "un+",
1864     [MO_ALIGN >> MO_ASHIFT]    = "",
1865 #else
1866     [MO_UNALN >> MO_ASHIFT]    = "",
1867     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1868 #endif
1869     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1870     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1871     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1872     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1873     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1874     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1875 };
1876 
1877 static const char bswap_flag_name[][6] = {
1878     [TCG_BSWAP_IZ] = "iz",
1879     [TCG_BSWAP_OZ] = "oz",
1880     [TCG_BSWAP_OS] = "os",
1881     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1882     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1883 };
1884 
1885 static inline bool tcg_regset_single(TCGRegSet d)
1886 {
1887     return (d & (d - 1)) == 0;
1888 }
1889 
1890 static inline TCGReg tcg_regset_first(TCGRegSet d)
1891 {
1892     if (TCG_TARGET_NB_REGS <= 32) {
1893         return ctz32(d);
1894     } else {
1895         return ctz64(d);
1896     }
1897 }
1898 
1899 /* Return only the number of characters output -- no error return. */
1900 #define ne_fprintf(...) \
1901     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1902 
1903 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1904 {
1905     char buf[128];
1906     TCGOp *op;
1907 
1908     QTAILQ_FOREACH(op, &s->ops, link) {
1909         int i, k, nb_oargs, nb_iargs, nb_cargs;
1910         const TCGOpDef *def;
1911         TCGOpcode c;
1912         int col = 0;
1913 
1914         c = op->opc;
1915         def = &tcg_op_defs[c];
1916 
1917         if (c == INDEX_op_insn_start) {
1918             nb_oargs = 0;
1919             col += ne_fprintf(f, "\n ----");
1920 
1921             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1922                 target_ulong a;
1923 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1924                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1925 #else
1926                 a = op->args[i];
1927 #endif
1928                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1929             }
1930         } else if (c == INDEX_op_call) {
1931             const TCGHelperInfo *info = tcg_call_info(op);
1932             void *func = tcg_call_func(op);
1933 
1934             /* variable number of arguments */
1935             nb_oargs = TCGOP_CALLO(op);
1936             nb_iargs = TCGOP_CALLI(op);
1937             nb_cargs = def->nb_cargs;
1938 
1939             col += ne_fprintf(f, " %s ", def->name);
1940 
1941             /*
1942              * Print the function name from TCGHelperInfo, if available.
1943              * Note that plugins have a template function for the info,
1944              * but the actual function pointer comes from the plugin.
1945              */
1946             if (func == info->func) {
1947                 col += ne_fprintf(f, "%s", info->name);
1948             } else {
1949                 col += ne_fprintf(f, "plugin(%p)", func);
1950             }
1951 
1952             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1953             for (i = 0; i < nb_oargs; i++) {
1954                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1955                                                             op->args[i]));
1956             }
1957             for (i = 0; i < nb_iargs; i++) {
1958                 TCGArg arg = op->args[nb_oargs + i];
1959                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1960                 col += ne_fprintf(f, ",%s", t);
1961             }
1962         } else {
1963             col += ne_fprintf(f, " %s ", def->name);
1964 
1965             nb_oargs = def->nb_oargs;
1966             nb_iargs = def->nb_iargs;
1967             nb_cargs = def->nb_cargs;
1968 
1969             if (def->flags & TCG_OPF_VECTOR) {
1970                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1971                                   8 << TCGOP_VECE(op));
1972             }
1973 
1974             k = 0;
1975             for (i = 0; i < nb_oargs; i++) {
1976                 const char *sep =  k ? "," : "";
1977                 col += ne_fprintf(f, "%s%s", sep,
1978                                   tcg_get_arg_str(s, buf, sizeof(buf),
1979                                                   op->args[k++]));
1980             }
1981             for (i = 0; i < nb_iargs; i++) {
1982                 const char *sep =  k ? "," : "";
1983                 col += ne_fprintf(f, "%s%s", sep,
1984                                   tcg_get_arg_str(s, buf, sizeof(buf),
1985                                                   op->args[k++]));
1986             }
1987             switch (c) {
1988             case INDEX_op_brcond_i32:
1989             case INDEX_op_setcond_i32:
1990             case INDEX_op_movcond_i32:
1991             case INDEX_op_brcond2_i32:
1992             case INDEX_op_setcond2_i32:
1993             case INDEX_op_brcond_i64:
1994             case INDEX_op_setcond_i64:
1995             case INDEX_op_movcond_i64:
1996             case INDEX_op_cmp_vec:
1997             case INDEX_op_cmpsel_vec:
1998                 if (op->args[k] < ARRAY_SIZE(cond_name)
1999                     && cond_name[op->args[k]]) {
2000                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2001                 } else {
2002                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2003                 }
2004                 i = 1;
2005                 break;
2006             case INDEX_op_qemu_ld_i32:
2007             case INDEX_op_qemu_st_i32:
2008             case INDEX_op_qemu_st8_i32:
2009             case INDEX_op_qemu_ld_i64:
2010             case INDEX_op_qemu_st_i64:
2011                 {
2012                     MemOpIdx oi = op->args[k++];
2013                     MemOp op = get_memop(oi);
2014                     unsigned ix = get_mmuidx(oi);
2015 
2016                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2017                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2018                     } else {
2019                         const char *s_al, *s_op;
2020                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2021                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2022                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2023                     }
2024                     i = 1;
2025                 }
2026                 break;
2027             case INDEX_op_bswap16_i32:
2028             case INDEX_op_bswap16_i64:
2029             case INDEX_op_bswap32_i32:
2030             case INDEX_op_bswap32_i64:
2031             case INDEX_op_bswap64_i64:
2032                 {
2033                     TCGArg flags = op->args[k];
2034                     const char *name = NULL;
2035 
2036                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2037                         name = bswap_flag_name[flags];
2038                     }
2039                     if (name) {
2040                         col += ne_fprintf(f, ",%s", name);
2041                     } else {
2042                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2043                     }
2044                     i = k = 1;
2045                 }
2046                 break;
2047             default:
2048                 i = 0;
2049                 break;
2050             }
2051             switch (c) {
2052             case INDEX_op_set_label:
2053             case INDEX_op_br:
2054             case INDEX_op_brcond_i32:
2055             case INDEX_op_brcond_i64:
2056             case INDEX_op_brcond2_i32:
2057                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2058                                   arg_label(op->args[k])->id);
2059                 i++, k++;
2060                 break;
2061             default:
2062                 break;
2063             }
2064             for (; i < nb_cargs; i++, k++) {
2065                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2066                                   op->args[k]);
2067             }
2068         }
2069 
2070         if (have_prefs || op->life) {
2071             for (; col < 40; ++col) {
2072                 putc(' ', f);
2073             }
2074         }
2075 
2076         if (op->life) {
2077             unsigned life = op->life;
2078 
2079             if (life & (SYNC_ARG * 3)) {
2080                 ne_fprintf(f, "  sync:");
2081                 for (i = 0; i < 2; ++i) {
2082                     if (life & (SYNC_ARG << i)) {
2083                         ne_fprintf(f, " %d", i);
2084                     }
2085                 }
2086             }
2087             life /= DEAD_ARG;
2088             if (life) {
2089                 ne_fprintf(f, "  dead:");
2090                 for (i = 0; life; ++i, life >>= 1) {
2091                     if (life & 1) {
2092                         ne_fprintf(f, " %d", i);
2093                     }
2094                 }
2095             }
2096         }
2097 
2098         if (have_prefs) {
2099             for (i = 0; i < nb_oargs; ++i) {
2100                 TCGRegSet set = output_pref(op, i);
2101 
2102                 if (i == 0) {
2103                     ne_fprintf(f, "  pref=");
2104                 } else {
2105                     ne_fprintf(f, ",");
2106                 }
2107                 if (set == 0) {
2108                     ne_fprintf(f, "none");
2109                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2110                     ne_fprintf(f, "all");
2111 #ifdef CONFIG_DEBUG_TCG
2112                 } else if (tcg_regset_single(set)) {
2113                     TCGReg reg = tcg_regset_first(set);
2114                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2115 #endif
2116                 } else if (TCG_TARGET_NB_REGS <= 32) {
2117                     ne_fprintf(f, "0x%x", (uint32_t)set);
2118                 } else {
2119                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2120                 }
2121             }
2122         }
2123 
2124         putc('\n', f);
2125     }
2126 }
2127 
2128 /* we give more priority to constraints with less registers */
2129 static int get_constraint_priority(const TCGOpDef *def, int k)
2130 {
2131     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2132     int n = ctpop64(arg_ct->regs);
2133 
2134     /*
2135      * Sort constraints of a single register first, which includes output
2136      * aliases (which must exactly match the input already allocated).
2137      */
2138     if (n == 1 || arg_ct->oalias) {
2139         return INT_MAX;
2140     }
2141 
2142     /*
2143      * Sort register pairs next, first then second immediately after.
2144      * Arbitrarily sort multiple pairs by the index of the first reg;
2145      * there shouldn't be many pairs.
2146      */
2147     switch (arg_ct->pair) {
2148     case 1:
2149     case 3:
2150         return (k + 1) * 2;
2151     case 2:
2152         return (arg_ct->pair_index + 1) * 2 - 1;
2153     }
2154 
2155     /* Finally, sort by decreasing register count. */
2156     assert(n > 1);
2157     return -n;
2158 }
2159 
2160 /* sort from highest priority to lowest */
2161 static void sort_constraints(TCGOpDef *def, int start, int n)
2162 {
2163     int i, j;
2164     TCGArgConstraint *a = def->args_ct;
2165 
2166     for (i = 0; i < n; i++) {
2167         a[start + i].sort_index = start + i;
2168     }
2169     if (n <= 1) {
2170         return;
2171     }
2172     for (i = 0; i < n - 1; i++) {
2173         for (j = i + 1; j < n; j++) {
2174             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2175             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2176             if (p1 < p2) {
2177                 int tmp = a[start + i].sort_index;
2178                 a[start + i].sort_index = a[start + j].sort_index;
2179                 a[start + j].sort_index = tmp;
2180             }
2181         }
2182     }
2183 }
2184 
2185 static void process_op_defs(TCGContext *s)
2186 {
2187     TCGOpcode op;
2188 
2189     for (op = 0; op < NB_OPS; op++) {
2190         TCGOpDef *def = &tcg_op_defs[op];
2191         const TCGTargetOpDef *tdefs;
2192         bool saw_alias_pair = false;
2193         int i, o, i2, o2, nb_args;
2194 
2195         if (def->flags & TCG_OPF_NOT_PRESENT) {
2196             continue;
2197         }
2198 
2199         nb_args = def->nb_iargs + def->nb_oargs;
2200         if (nb_args == 0) {
2201             continue;
2202         }
2203 
2204         /*
2205          * Macro magic should make it impossible, but double-check that
2206          * the array index is in range.  Since the signness of an enum
2207          * is implementation defined, force the result to unsigned.
2208          */
2209         unsigned con_set = tcg_target_op_def(op);
2210         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2211         tdefs = &constraint_sets[con_set];
2212 
2213         for (i = 0; i < nb_args; i++) {
2214             const char *ct_str = tdefs->args_ct_str[i];
2215             bool input_p = i >= def->nb_oargs;
2216 
2217             /* Incomplete TCGTargetOpDef entry. */
2218             tcg_debug_assert(ct_str != NULL);
2219 
2220             switch (*ct_str) {
2221             case '0' ... '9':
2222                 o = *ct_str - '0';
2223                 tcg_debug_assert(input_p);
2224                 tcg_debug_assert(o < def->nb_oargs);
2225                 tcg_debug_assert(def->args_ct[o].regs != 0);
2226                 tcg_debug_assert(!def->args_ct[o].oalias);
2227                 def->args_ct[i] = def->args_ct[o];
2228                 /* The output sets oalias.  */
2229                 def->args_ct[o].oalias = 1;
2230                 def->args_ct[o].alias_index = i;
2231                 /* The input sets ialias. */
2232                 def->args_ct[i].ialias = 1;
2233                 def->args_ct[i].alias_index = o;
2234                 if (def->args_ct[i].pair) {
2235                     saw_alias_pair = true;
2236                 }
2237                 tcg_debug_assert(ct_str[1] == '\0');
2238                 continue;
2239 
2240             case '&':
2241                 tcg_debug_assert(!input_p);
2242                 def->args_ct[i].newreg = true;
2243                 ct_str++;
2244                 break;
2245 
2246             case 'p': /* plus */
2247                 /* Allocate to the register after the previous. */
2248                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2249                 o = i - 1;
2250                 tcg_debug_assert(!def->args_ct[o].pair);
2251                 tcg_debug_assert(!def->args_ct[o].ct);
2252                 def->args_ct[i] = (TCGArgConstraint){
2253                     .pair = 2,
2254                     .pair_index = o,
2255                     .regs = def->args_ct[o].regs << 1,
2256                 };
2257                 def->args_ct[o].pair = 1;
2258                 def->args_ct[o].pair_index = i;
2259                 tcg_debug_assert(ct_str[1] == '\0');
2260                 continue;
2261 
2262             case 'm': /* minus */
2263                 /* Allocate to the register before the previous. */
2264                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2265                 o = i - 1;
2266                 tcg_debug_assert(!def->args_ct[o].pair);
2267                 tcg_debug_assert(!def->args_ct[o].ct);
2268                 def->args_ct[i] = (TCGArgConstraint){
2269                     .pair = 1,
2270                     .pair_index = o,
2271                     .regs = def->args_ct[o].regs >> 1,
2272                 };
2273                 def->args_ct[o].pair = 2;
2274                 def->args_ct[o].pair_index = i;
2275                 tcg_debug_assert(ct_str[1] == '\0');
2276                 continue;
2277             }
2278 
2279             do {
2280                 switch (*ct_str) {
2281                 case 'i':
2282                     def->args_ct[i].ct |= TCG_CT_CONST;
2283                     break;
2284 
2285                 /* Include all of the target-specific constraints. */
2286 
2287 #undef CONST
2288 #define CONST(CASE, MASK) \
2289     case CASE: def->args_ct[i].ct |= MASK; break;
2290 #define REGS(CASE, MASK) \
2291     case CASE: def->args_ct[i].regs |= MASK; break;
2292 
2293 #include "tcg-target-con-str.h"
2294 
2295 #undef REGS
2296 #undef CONST
2297                 default:
2298                 case '0' ... '9':
2299                 case '&':
2300                 case 'p':
2301                 case 'm':
2302                     /* Typo in TCGTargetOpDef constraint. */
2303                     g_assert_not_reached();
2304                 }
2305             } while (*++ct_str != '\0');
2306         }
2307 
2308         /* TCGTargetOpDef entry with too much information? */
2309         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2310 
2311         /*
2312          * Fix up output pairs that are aliased with inputs.
2313          * When we created the alias, we copied pair from the output.
2314          * There are three cases:
2315          *    (1a) Pairs of inputs alias pairs of outputs.
2316          *    (1b) One input aliases the first of a pair of outputs.
2317          *    (2)  One input aliases the second of a pair of outputs.
2318          *
2319          * Case 1a is handled by making sure that the pair_index'es are
2320          * properly updated so that they appear the same as a pair of inputs.
2321          *
2322          * Case 1b is handled by setting the pair_index of the input to
2323          * itself, simply so it doesn't point to an unrelated argument.
2324          * Since we don't encounter the "second" during the input allocation
2325          * phase, nothing happens with the second half of the input pair.
2326          *
2327          * Case 2 is handled by setting the second input to pair=3, the
2328          * first output to pair=3, and the pair_index'es to match.
2329          */
2330         if (saw_alias_pair) {
2331             for (i = def->nb_oargs; i < nb_args; i++) {
2332                 /*
2333                  * Since [0-9pm] must be alone in the constraint string,
2334                  * the only way they can both be set is if the pair comes
2335                  * from the output alias.
2336                  */
2337                 if (!def->args_ct[i].ialias) {
2338                     continue;
2339                 }
2340                 switch (def->args_ct[i].pair) {
2341                 case 0:
2342                     break;
2343                 case 1:
2344                     o = def->args_ct[i].alias_index;
2345                     o2 = def->args_ct[o].pair_index;
2346                     tcg_debug_assert(def->args_ct[o].pair == 1);
2347                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2348                     if (def->args_ct[o2].oalias) {
2349                         /* Case 1a */
2350                         i2 = def->args_ct[o2].alias_index;
2351                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2352                         def->args_ct[i2].pair_index = i;
2353                         def->args_ct[i].pair_index = i2;
2354                     } else {
2355                         /* Case 1b */
2356                         def->args_ct[i].pair_index = i;
2357                     }
2358                     break;
2359                 case 2:
2360                     o = def->args_ct[i].alias_index;
2361                     o2 = def->args_ct[o].pair_index;
2362                     tcg_debug_assert(def->args_ct[o].pair == 2);
2363                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2364                     if (def->args_ct[o2].oalias) {
2365                         /* Case 1a */
2366                         i2 = def->args_ct[o2].alias_index;
2367                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2368                         def->args_ct[i2].pair_index = i;
2369                         def->args_ct[i].pair_index = i2;
2370                     } else {
2371                         /* Case 2 */
2372                         def->args_ct[i].pair = 3;
2373                         def->args_ct[o2].pair = 3;
2374                         def->args_ct[i].pair_index = o2;
2375                         def->args_ct[o2].pair_index = i;
2376                     }
2377                     break;
2378                 default:
2379                     g_assert_not_reached();
2380                 }
2381             }
2382         }
2383 
2384         /* sort the constraints (XXX: this is just an heuristic) */
2385         sort_constraints(def, 0, def->nb_oargs);
2386         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2387     }
2388 }
2389 
2390 void tcg_op_remove(TCGContext *s, TCGOp *op)
2391 {
2392     TCGLabel *label;
2393 
2394     switch (op->opc) {
2395     case INDEX_op_br:
2396         label = arg_label(op->args[0]);
2397         label->refs--;
2398         break;
2399     case INDEX_op_brcond_i32:
2400     case INDEX_op_brcond_i64:
2401         label = arg_label(op->args[3]);
2402         label->refs--;
2403         break;
2404     case INDEX_op_brcond2_i32:
2405         label = arg_label(op->args[5]);
2406         label->refs--;
2407         break;
2408     default:
2409         break;
2410     }
2411 
2412     QTAILQ_REMOVE(&s->ops, op, link);
2413     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2414     s->nb_ops--;
2415 
2416 #ifdef CONFIG_PROFILER
2417     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2418 #endif
2419 }
2420 
2421 void tcg_remove_ops_after(TCGOp *op)
2422 {
2423     TCGContext *s = tcg_ctx;
2424 
2425     while (true) {
2426         TCGOp *last = tcg_last_op();
2427         if (last == op) {
2428             return;
2429         }
2430         tcg_op_remove(s, last);
2431     }
2432 }
2433 
2434 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2435 {
2436     TCGContext *s = tcg_ctx;
2437     TCGOp *op = NULL;
2438 
2439     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2440         QTAILQ_FOREACH(op, &s->free_ops, link) {
2441             if (nargs <= op->nargs) {
2442                 QTAILQ_REMOVE(&s->free_ops, op, link);
2443                 nargs = op->nargs;
2444                 goto found;
2445             }
2446         }
2447     }
2448 
2449     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2450     nargs = MAX(4, nargs);
2451     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2452 
2453  found:
2454     memset(op, 0, offsetof(TCGOp, link));
2455     op->opc = opc;
2456     op->nargs = nargs;
2457 
2458     /* Check for bitfield overflow. */
2459     tcg_debug_assert(op->nargs == nargs);
2460 
2461     s->nb_ops++;
2462     return op;
2463 }
2464 
2465 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2466 {
2467     TCGOp *op = tcg_op_alloc(opc, nargs);
2468     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2469     return op;
2470 }
2471 
2472 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2473                             TCGOpcode opc, unsigned nargs)
2474 {
2475     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2476     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2477     return new_op;
2478 }
2479 
2480 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2481                            TCGOpcode opc, unsigned nargs)
2482 {
2483     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2484     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2485     return new_op;
2486 }
2487 
2488 /* Reachable analysis : remove unreachable code.  */
2489 static void reachable_code_pass(TCGContext *s)
2490 {
2491     TCGOp *op, *op_next;
2492     bool dead = false;
2493 
2494     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2495         bool remove = dead;
2496         TCGLabel *label;
2497 
2498         switch (op->opc) {
2499         case INDEX_op_set_label:
2500             label = arg_label(op->args[0]);
2501             if (label->refs == 0) {
2502                 /*
2503                  * While there is an occasional backward branch, virtually
2504                  * all branches generated by the translators are forward.
2505                  * Which means that generally we will have already removed
2506                  * all references to the label that will be, and there is
2507                  * little to be gained by iterating.
2508                  */
2509                 remove = true;
2510             } else {
2511                 /* Once we see a label, insns become live again.  */
2512                 dead = false;
2513                 remove = false;
2514 
2515                 /*
2516                  * Optimization can fold conditional branches to unconditional.
2517                  * If we find a label with one reference which is preceded by
2518                  * an unconditional branch to it, remove both.  This needed to
2519                  * wait until the dead code in between them was removed.
2520                  */
2521                 if (label->refs == 1) {
2522                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2523                     if (op_prev->opc == INDEX_op_br &&
2524                         label == arg_label(op_prev->args[0])) {
2525                         tcg_op_remove(s, op_prev);
2526                         remove = true;
2527                     }
2528                 }
2529             }
2530             break;
2531 
2532         case INDEX_op_br:
2533         case INDEX_op_exit_tb:
2534         case INDEX_op_goto_ptr:
2535             /* Unconditional branches; everything following is dead.  */
2536             dead = true;
2537             break;
2538 
2539         case INDEX_op_call:
2540             /* Notice noreturn helper calls, raising exceptions.  */
2541             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2542                 dead = true;
2543             }
2544             break;
2545 
2546         case INDEX_op_insn_start:
2547             /* Never remove -- we need to keep these for unwind.  */
2548             remove = false;
2549             break;
2550 
2551         default:
2552             break;
2553         }
2554 
2555         if (remove) {
2556             tcg_op_remove(s, op);
2557         }
2558     }
2559 }
2560 
2561 #define TS_DEAD  1
2562 #define TS_MEM   2
2563 
2564 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2565 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2566 
2567 /* For liveness_pass_1, the register preferences for a given temp.  */
2568 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2569 {
2570     return ts->state_ptr;
2571 }
2572 
2573 /* For liveness_pass_1, reset the preferences for a given temp to the
2574  * maximal regset for its type.
2575  */
2576 static inline void la_reset_pref(TCGTemp *ts)
2577 {
2578     *la_temp_pref(ts)
2579         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2580 }
2581 
2582 /* liveness analysis: end of function: all temps are dead, and globals
2583    should be in memory. */
2584 static void la_func_end(TCGContext *s, int ng, int nt)
2585 {
2586     int i;
2587 
2588     for (i = 0; i < ng; ++i) {
2589         s->temps[i].state = TS_DEAD | TS_MEM;
2590         la_reset_pref(&s->temps[i]);
2591     }
2592     for (i = ng; i < nt; ++i) {
2593         s->temps[i].state = TS_DEAD;
2594         la_reset_pref(&s->temps[i]);
2595     }
2596 }
2597 
2598 /* liveness analysis: end of basic block: all temps are dead, globals
2599    and local temps should be in memory. */
2600 static void la_bb_end(TCGContext *s, int ng, int nt)
2601 {
2602     int i;
2603 
2604     for (i = 0; i < nt; ++i) {
2605         TCGTemp *ts = &s->temps[i];
2606         int state;
2607 
2608         switch (ts->kind) {
2609         case TEMP_FIXED:
2610         case TEMP_GLOBAL:
2611         case TEMP_LOCAL:
2612             state = TS_DEAD | TS_MEM;
2613             break;
2614         case TEMP_NORMAL:
2615         case TEMP_EBB:
2616         case TEMP_CONST:
2617             state = TS_DEAD;
2618             break;
2619         default:
2620             g_assert_not_reached();
2621         }
2622         ts->state = state;
2623         la_reset_pref(ts);
2624     }
2625 }
2626 
2627 /* liveness analysis: sync globals back to memory.  */
2628 static void la_global_sync(TCGContext *s, int ng)
2629 {
2630     int i;
2631 
2632     for (i = 0; i < ng; ++i) {
2633         int state = s->temps[i].state;
2634         s->temps[i].state = state | TS_MEM;
2635         if (state == TS_DEAD) {
2636             /* If the global was previously dead, reset prefs.  */
2637             la_reset_pref(&s->temps[i]);
2638         }
2639     }
2640 }
2641 
2642 /*
2643  * liveness analysis: conditional branch: all temps are dead unless
2644  * explicitly live-across-conditional-branch, globals and local temps
2645  * should be synced.
2646  */
2647 static void la_bb_sync(TCGContext *s, int ng, int nt)
2648 {
2649     la_global_sync(s, ng);
2650 
2651     for (int i = ng; i < nt; ++i) {
2652         TCGTemp *ts = &s->temps[i];
2653         int state;
2654 
2655         switch (ts->kind) {
2656         case TEMP_LOCAL:
2657             state = ts->state;
2658             ts->state = state | TS_MEM;
2659             if (state != TS_DEAD) {
2660                 continue;
2661             }
2662             break;
2663         case TEMP_NORMAL:
2664             s->temps[i].state = TS_DEAD;
2665             break;
2666         case TEMP_EBB:
2667         case TEMP_CONST:
2668             continue;
2669         default:
2670             g_assert_not_reached();
2671         }
2672         la_reset_pref(&s->temps[i]);
2673     }
2674 }
2675 
2676 /* liveness analysis: sync globals back to memory and kill.  */
2677 static void la_global_kill(TCGContext *s, int ng)
2678 {
2679     int i;
2680 
2681     for (i = 0; i < ng; i++) {
2682         s->temps[i].state = TS_DEAD | TS_MEM;
2683         la_reset_pref(&s->temps[i]);
2684     }
2685 }
2686 
2687 /* liveness analysis: note live globals crossing calls.  */
2688 static void la_cross_call(TCGContext *s, int nt)
2689 {
2690     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2691     int i;
2692 
2693     for (i = 0; i < nt; i++) {
2694         TCGTemp *ts = &s->temps[i];
2695         if (!(ts->state & TS_DEAD)) {
2696             TCGRegSet *pset = la_temp_pref(ts);
2697             TCGRegSet set = *pset;
2698 
2699             set &= mask;
2700             /* If the combination is not possible, restart.  */
2701             if (set == 0) {
2702                 set = tcg_target_available_regs[ts->type] & mask;
2703             }
2704             *pset = set;
2705         }
2706     }
2707 }
2708 
2709 /* Liveness analysis : update the opc_arg_life array to tell if a
2710    given input arguments is dead. Instructions updating dead
2711    temporaries are removed. */
2712 static void liveness_pass_1(TCGContext *s)
2713 {
2714     int nb_globals = s->nb_globals;
2715     int nb_temps = s->nb_temps;
2716     TCGOp *op, *op_prev;
2717     TCGRegSet *prefs;
2718     int i;
2719 
2720     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2721     for (i = 0; i < nb_temps; ++i) {
2722         s->temps[i].state_ptr = prefs + i;
2723     }
2724 
2725     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2726     la_func_end(s, nb_globals, nb_temps);
2727 
2728     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2729         int nb_iargs, nb_oargs;
2730         TCGOpcode opc_new, opc_new2;
2731         bool have_opc_new2;
2732         TCGLifeData arg_life = 0;
2733         TCGTemp *ts;
2734         TCGOpcode opc = op->opc;
2735         const TCGOpDef *def = &tcg_op_defs[opc];
2736 
2737         switch (opc) {
2738         case INDEX_op_call:
2739             {
2740                 const TCGHelperInfo *info = tcg_call_info(op);
2741                 int call_flags = tcg_call_flags(op);
2742 
2743                 nb_oargs = TCGOP_CALLO(op);
2744                 nb_iargs = TCGOP_CALLI(op);
2745 
2746                 /* pure functions can be removed if their result is unused */
2747                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2748                     for (i = 0; i < nb_oargs; i++) {
2749                         ts = arg_temp(op->args[i]);
2750                         if (ts->state != TS_DEAD) {
2751                             goto do_not_remove_call;
2752                         }
2753                     }
2754                     goto do_remove;
2755                 }
2756             do_not_remove_call:
2757 
2758                 /* Output args are dead.  */
2759                 for (i = 0; i < nb_oargs; i++) {
2760                     ts = arg_temp(op->args[i]);
2761                     if (ts->state & TS_DEAD) {
2762                         arg_life |= DEAD_ARG << i;
2763                     }
2764                     if (ts->state & TS_MEM) {
2765                         arg_life |= SYNC_ARG << i;
2766                     }
2767                     ts->state = TS_DEAD;
2768                     la_reset_pref(ts);
2769                 }
2770 
2771                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
2772                 memset(op->output_pref, 0, sizeof(op->output_pref));
2773 
2774                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2775                                     TCG_CALL_NO_READ_GLOBALS))) {
2776                     la_global_kill(s, nb_globals);
2777                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2778                     la_global_sync(s, nb_globals);
2779                 }
2780 
2781                 /* Record arguments that die in this helper.  */
2782                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2783                     ts = arg_temp(op->args[i]);
2784                     if (ts->state & TS_DEAD) {
2785                         arg_life |= DEAD_ARG << i;
2786                     }
2787                 }
2788 
2789                 /* For all live registers, remove call-clobbered prefs.  */
2790                 la_cross_call(s, nb_temps);
2791 
2792                 /*
2793                  * Input arguments are live for preceding opcodes.
2794                  *
2795                  * For those arguments that die, and will be allocated in
2796                  * registers, clear the register set for that arg, to be
2797                  * filled in below.  For args that will be on the stack,
2798                  * reset to any available reg.  Process arguments in reverse
2799                  * order so that if a temp is used more than once, the stack
2800                  * reset to max happens before the register reset to 0.
2801                  */
2802                 for (i = nb_iargs - 1; i >= 0; i--) {
2803                     const TCGCallArgumentLoc *loc = &info->in[i];
2804                     ts = arg_temp(op->args[nb_oargs + i]);
2805 
2806                     if (ts->state & TS_DEAD) {
2807                         switch (loc->kind) {
2808                         case TCG_CALL_ARG_NORMAL:
2809                         case TCG_CALL_ARG_EXTEND_U:
2810                         case TCG_CALL_ARG_EXTEND_S:
2811                             if (REG_P(loc)) {
2812                                 *la_temp_pref(ts) = 0;
2813                                 break;
2814                             }
2815                             /* fall through */
2816                         default:
2817                             *la_temp_pref(ts) =
2818                                 tcg_target_available_regs[ts->type];
2819                             break;
2820                         }
2821                         ts->state &= ~TS_DEAD;
2822                     }
2823                 }
2824 
2825                 /*
2826                  * For each input argument, add its input register to prefs.
2827                  * If a temp is used once, this produces a single set bit;
2828                  * if a temp is used multiple times, this produces a set.
2829                  */
2830                 for (i = 0; i < nb_iargs; i++) {
2831                     const TCGCallArgumentLoc *loc = &info->in[i];
2832                     ts = arg_temp(op->args[nb_oargs + i]);
2833 
2834                     switch (loc->kind) {
2835                     case TCG_CALL_ARG_NORMAL:
2836                     case TCG_CALL_ARG_EXTEND_U:
2837                     case TCG_CALL_ARG_EXTEND_S:
2838                         if (REG_P(loc)) {
2839                             tcg_regset_set_reg(*la_temp_pref(ts),
2840                                 tcg_target_call_iarg_regs[loc->arg_slot]);
2841                         }
2842                         break;
2843                     default:
2844                         break;
2845                     }
2846                 }
2847             }
2848             break;
2849         case INDEX_op_insn_start:
2850             break;
2851         case INDEX_op_discard:
2852             /* mark the temporary as dead */
2853             ts = arg_temp(op->args[0]);
2854             ts->state = TS_DEAD;
2855             la_reset_pref(ts);
2856             break;
2857 
2858         case INDEX_op_add2_i32:
2859             opc_new = INDEX_op_add_i32;
2860             goto do_addsub2;
2861         case INDEX_op_sub2_i32:
2862             opc_new = INDEX_op_sub_i32;
2863             goto do_addsub2;
2864         case INDEX_op_add2_i64:
2865             opc_new = INDEX_op_add_i64;
2866             goto do_addsub2;
2867         case INDEX_op_sub2_i64:
2868             opc_new = INDEX_op_sub_i64;
2869         do_addsub2:
2870             nb_iargs = 4;
2871             nb_oargs = 2;
2872             /* Test if the high part of the operation is dead, but not
2873                the low part.  The result can be optimized to a simple
2874                add or sub.  This happens often for x86_64 guest when the
2875                cpu mode is set to 32 bit.  */
2876             if (arg_temp(op->args[1])->state == TS_DEAD) {
2877                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2878                     goto do_remove;
2879                 }
2880                 /* Replace the opcode and adjust the args in place,
2881                    leaving 3 unused args at the end.  */
2882                 op->opc = opc = opc_new;
2883                 op->args[1] = op->args[2];
2884                 op->args[2] = op->args[4];
2885                 /* Fall through and mark the single-word operation live.  */
2886                 nb_iargs = 2;
2887                 nb_oargs = 1;
2888             }
2889             goto do_not_remove;
2890 
2891         case INDEX_op_mulu2_i32:
2892             opc_new = INDEX_op_mul_i32;
2893             opc_new2 = INDEX_op_muluh_i32;
2894             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2895             goto do_mul2;
2896         case INDEX_op_muls2_i32:
2897             opc_new = INDEX_op_mul_i32;
2898             opc_new2 = INDEX_op_mulsh_i32;
2899             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2900             goto do_mul2;
2901         case INDEX_op_mulu2_i64:
2902             opc_new = INDEX_op_mul_i64;
2903             opc_new2 = INDEX_op_muluh_i64;
2904             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2905             goto do_mul2;
2906         case INDEX_op_muls2_i64:
2907             opc_new = INDEX_op_mul_i64;
2908             opc_new2 = INDEX_op_mulsh_i64;
2909             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2910             goto do_mul2;
2911         do_mul2:
2912             nb_iargs = 2;
2913             nb_oargs = 2;
2914             if (arg_temp(op->args[1])->state == TS_DEAD) {
2915                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2916                     /* Both parts of the operation are dead.  */
2917                     goto do_remove;
2918                 }
2919                 /* The high part of the operation is dead; generate the low. */
2920                 op->opc = opc = opc_new;
2921                 op->args[1] = op->args[2];
2922                 op->args[2] = op->args[3];
2923             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2924                 /* The low part of the operation is dead; generate the high. */
2925                 op->opc = opc = opc_new2;
2926                 op->args[0] = op->args[1];
2927                 op->args[1] = op->args[2];
2928                 op->args[2] = op->args[3];
2929             } else {
2930                 goto do_not_remove;
2931             }
2932             /* Mark the single-word operation live.  */
2933             nb_oargs = 1;
2934             goto do_not_remove;
2935 
2936         default:
2937             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2938             nb_iargs = def->nb_iargs;
2939             nb_oargs = def->nb_oargs;
2940 
2941             /* Test if the operation can be removed because all
2942                its outputs are dead. We assume that nb_oargs == 0
2943                implies side effects */
2944             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2945                 for (i = 0; i < nb_oargs; i++) {
2946                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2947                         goto do_not_remove;
2948                     }
2949                 }
2950                 goto do_remove;
2951             }
2952             goto do_not_remove;
2953 
2954         do_remove:
2955             tcg_op_remove(s, op);
2956             break;
2957 
2958         do_not_remove:
2959             for (i = 0; i < nb_oargs; i++) {
2960                 ts = arg_temp(op->args[i]);
2961 
2962                 /* Remember the preference of the uses that followed.  */
2963                 if (i < ARRAY_SIZE(op->output_pref)) {
2964                     op->output_pref[i] = *la_temp_pref(ts);
2965                 }
2966 
2967                 /* Output args are dead.  */
2968                 if (ts->state & TS_DEAD) {
2969                     arg_life |= DEAD_ARG << i;
2970                 }
2971                 if (ts->state & TS_MEM) {
2972                     arg_life |= SYNC_ARG << i;
2973                 }
2974                 ts->state = TS_DEAD;
2975                 la_reset_pref(ts);
2976             }
2977 
2978             /* If end of basic block, update.  */
2979             if (def->flags & TCG_OPF_BB_EXIT) {
2980                 la_func_end(s, nb_globals, nb_temps);
2981             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2982                 la_bb_sync(s, nb_globals, nb_temps);
2983             } else if (def->flags & TCG_OPF_BB_END) {
2984                 la_bb_end(s, nb_globals, nb_temps);
2985             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2986                 la_global_sync(s, nb_globals);
2987                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2988                     la_cross_call(s, nb_temps);
2989                 }
2990             }
2991 
2992             /* Record arguments that die in this opcode.  */
2993             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2994                 ts = arg_temp(op->args[i]);
2995                 if (ts->state & TS_DEAD) {
2996                     arg_life |= DEAD_ARG << i;
2997                 }
2998             }
2999 
3000             /* Input arguments are live for preceding opcodes.  */
3001             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3002                 ts = arg_temp(op->args[i]);
3003                 if (ts->state & TS_DEAD) {
3004                     /* For operands that were dead, initially allow
3005                        all regs for the type.  */
3006                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3007                     ts->state &= ~TS_DEAD;
3008                 }
3009             }
3010 
3011             /* Incorporate constraints for this operand.  */
3012             switch (opc) {
3013             case INDEX_op_mov_i32:
3014             case INDEX_op_mov_i64:
3015                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3016                    have proper constraints.  That said, special case
3017                    moves to propagate preferences backward.  */
3018                 if (IS_DEAD_ARG(1)) {
3019                     *la_temp_pref(arg_temp(op->args[0]))
3020                         = *la_temp_pref(arg_temp(op->args[1]));
3021                 }
3022                 break;
3023 
3024             default:
3025                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3026                     const TCGArgConstraint *ct = &def->args_ct[i];
3027                     TCGRegSet set, *pset;
3028 
3029                     ts = arg_temp(op->args[i]);
3030                     pset = la_temp_pref(ts);
3031                     set = *pset;
3032 
3033                     set &= ct->regs;
3034                     if (ct->ialias) {
3035                         set &= output_pref(op, ct->alias_index);
3036                     }
3037                     /* If the combination is not possible, restart.  */
3038                     if (set == 0) {
3039                         set = ct->regs;
3040                     }
3041                     *pset = set;
3042                 }
3043                 break;
3044             }
3045             break;
3046         }
3047         op->life = arg_life;
3048     }
3049 }
3050 
3051 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3052 static bool liveness_pass_2(TCGContext *s)
3053 {
3054     int nb_globals = s->nb_globals;
3055     int nb_temps, i;
3056     bool changes = false;
3057     TCGOp *op, *op_next;
3058 
3059     /* Create a temporary for each indirect global.  */
3060     for (i = 0; i < nb_globals; ++i) {
3061         TCGTemp *its = &s->temps[i];
3062         if (its->indirect_reg) {
3063             TCGTemp *dts = tcg_temp_alloc(s);
3064             dts->type = its->type;
3065             dts->base_type = its->base_type;
3066             dts->kind = TEMP_EBB;
3067             its->state_ptr = dts;
3068         } else {
3069             its->state_ptr = NULL;
3070         }
3071         /* All globals begin dead.  */
3072         its->state = TS_DEAD;
3073     }
3074     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3075         TCGTemp *its = &s->temps[i];
3076         its->state_ptr = NULL;
3077         its->state = TS_DEAD;
3078     }
3079 
3080     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3081         TCGOpcode opc = op->opc;
3082         const TCGOpDef *def = &tcg_op_defs[opc];
3083         TCGLifeData arg_life = op->life;
3084         int nb_iargs, nb_oargs, call_flags;
3085         TCGTemp *arg_ts, *dir_ts;
3086 
3087         if (opc == INDEX_op_call) {
3088             nb_oargs = TCGOP_CALLO(op);
3089             nb_iargs = TCGOP_CALLI(op);
3090             call_flags = tcg_call_flags(op);
3091         } else {
3092             nb_iargs = def->nb_iargs;
3093             nb_oargs = def->nb_oargs;
3094 
3095             /* Set flags similar to how calls require.  */
3096             if (def->flags & TCG_OPF_COND_BRANCH) {
3097                 /* Like reading globals: sync_globals */
3098                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3099             } else if (def->flags & TCG_OPF_BB_END) {
3100                 /* Like writing globals: save_globals */
3101                 call_flags = 0;
3102             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3103                 /* Like reading globals: sync_globals */
3104                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3105             } else {
3106                 /* No effect on globals.  */
3107                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3108                               TCG_CALL_NO_WRITE_GLOBALS);
3109             }
3110         }
3111 
3112         /* Make sure that input arguments are available.  */
3113         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3114             arg_ts = arg_temp(op->args[i]);
3115             dir_ts = arg_ts->state_ptr;
3116             if (dir_ts && arg_ts->state == TS_DEAD) {
3117                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3118                                   ? INDEX_op_ld_i32
3119                                   : INDEX_op_ld_i64);
3120                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3121 
3122                 lop->args[0] = temp_arg(dir_ts);
3123                 lop->args[1] = temp_arg(arg_ts->mem_base);
3124                 lop->args[2] = arg_ts->mem_offset;
3125 
3126                 /* Loaded, but synced with memory.  */
3127                 arg_ts->state = TS_MEM;
3128             }
3129         }
3130 
3131         /* Perform input replacement, and mark inputs that became dead.
3132            No action is required except keeping temp_state up to date
3133            so that we reload when needed.  */
3134         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3135             arg_ts = arg_temp(op->args[i]);
3136             dir_ts = arg_ts->state_ptr;
3137             if (dir_ts) {
3138                 op->args[i] = temp_arg(dir_ts);
3139                 changes = true;
3140                 if (IS_DEAD_ARG(i)) {
3141                     arg_ts->state = TS_DEAD;
3142                 }
3143             }
3144         }
3145 
3146         /* Liveness analysis should ensure that the following are
3147            all correct, for call sites and basic block end points.  */
3148         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3149             /* Nothing to do */
3150         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3151             for (i = 0; i < nb_globals; ++i) {
3152                 /* Liveness should see that globals are synced back,
3153                    that is, either TS_DEAD or TS_MEM.  */
3154                 arg_ts = &s->temps[i];
3155                 tcg_debug_assert(arg_ts->state_ptr == 0
3156                                  || arg_ts->state != 0);
3157             }
3158         } else {
3159             for (i = 0; i < nb_globals; ++i) {
3160                 /* Liveness should see that globals are saved back,
3161                    that is, TS_DEAD, waiting to be reloaded.  */
3162                 arg_ts = &s->temps[i];
3163                 tcg_debug_assert(arg_ts->state_ptr == 0
3164                                  || arg_ts->state == TS_DEAD);
3165             }
3166         }
3167 
3168         /* Outputs become available.  */
3169         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3170             arg_ts = arg_temp(op->args[0]);
3171             dir_ts = arg_ts->state_ptr;
3172             if (dir_ts) {
3173                 op->args[0] = temp_arg(dir_ts);
3174                 changes = true;
3175 
3176                 /* The output is now live and modified.  */
3177                 arg_ts->state = 0;
3178 
3179                 if (NEED_SYNC_ARG(0)) {
3180                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3181                                       ? INDEX_op_st_i32
3182                                       : INDEX_op_st_i64);
3183                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3184                     TCGTemp *out_ts = dir_ts;
3185 
3186                     if (IS_DEAD_ARG(0)) {
3187                         out_ts = arg_temp(op->args[1]);
3188                         arg_ts->state = TS_DEAD;
3189                         tcg_op_remove(s, op);
3190                     } else {
3191                         arg_ts->state = TS_MEM;
3192                     }
3193 
3194                     sop->args[0] = temp_arg(out_ts);
3195                     sop->args[1] = temp_arg(arg_ts->mem_base);
3196                     sop->args[2] = arg_ts->mem_offset;
3197                 } else {
3198                     tcg_debug_assert(!IS_DEAD_ARG(0));
3199                 }
3200             }
3201         } else {
3202             for (i = 0; i < nb_oargs; i++) {
3203                 arg_ts = arg_temp(op->args[i]);
3204                 dir_ts = arg_ts->state_ptr;
3205                 if (!dir_ts) {
3206                     continue;
3207                 }
3208                 op->args[i] = temp_arg(dir_ts);
3209                 changes = true;
3210 
3211                 /* The output is now live and modified.  */
3212                 arg_ts->state = 0;
3213 
3214                 /* Sync outputs upon their last write.  */
3215                 if (NEED_SYNC_ARG(i)) {
3216                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3217                                       ? INDEX_op_st_i32
3218                                       : INDEX_op_st_i64);
3219                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3220 
3221                     sop->args[0] = temp_arg(dir_ts);
3222                     sop->args[1] = temp_arg(arg_ts->mem_base);
3223                     sop->args[2] = arg_ts->mem_offset;
3224 
3225                     arg_ts->state = TS_MEM;
3226                 }
3227                 /* Drop outputs that are dead.  */
3228                 if (IS_DEAD_ARG(i)) {
3229                     arg_ts->state = TS_DEAD;
3230                 }
3231             }
3232         }
3233     }
3234 
3235     return changes;
3236 }
3237 
3238 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3239 {
3240     int size = tcg_type_size(ts->type);
3241     int align;
3242     intptr_t off;
3243 
3244     switch (ts->type) {
3245     case TCG_TYPE_I32:
3246         align = 4;
3247         break;
3248     case TCG_TYPE_I64:
3249     case TCG_TYPE_V64:
3250         align = 8;
3251         break;
3252     case TCG_TYPE_V128:
3253     case TCG_TYPE_V256:
3254         /* Note that we do not require aligned storage for V256. */
3255         align = 16;
3256         break;
3257     default:
3258         g_assert_not_reached();
3259     }
3260 
3261     /*
3262      * Assume the stack is sufficiently aligned.
3263      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3264      * and do not require 16 byte vector alignment.  This seems slightly
3265      * easier than fully parameterizing the above switch statement.
3266      */
3267     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3268     off = ROUND_UP(s->current_frame_offset, align);
3269 
3270     /* If we've exhausted the stack frame, restart with a smaller TB. */
3271     if (off + size > s->frame_end) {
3272         tcg_raise_tb_overflow(s);
3273     }
3274     s->current_frame_offset = off + size;
3275 
3276     ts->mem_offset = off;
3277 #if defined(__sparc__)
3278     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3279 #endif
3280     ts->mem_base = s->frame_temp;
3281     ts->mem_allocated = 1;
3282 }
3283 
3284 /* Assign @reg to @ts, and update reg_to_temp[]. */
3285 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3286 {
3287     if (ts->val_type == TEMP_VAL_REG) {
3288         TCGReg old = ts->reg;
3289         tcg_debug_assert(s->reg_to_temp[old] == ts);
3290         if (old == reg) {
3291             return;
3292         }
3293         s->reg_to_temp[old] = NULL;
3294     }
3295     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3296     s->reg_to_temp[reg] = ts;
3297     ts->val_type = TEMP_VAL_REG;
3298     ts->reg = reg;
3299 }
3300 
3301 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3302 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3303 {
3304     tcg_debug_assert(type != TEMP_VAL_REG);
3305     if (ts->val_type == TEMP_VAL_REG) {
3306         TCGReg reg = ts->reg;
3307         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3308         s->reg_to_temp[reg] = NULL;
3309     }
3310     ts->val_type = type;
3311 }
3312 
3313 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3314 
3315 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3316    mark it free; otherwise mark it dead.  */
3317 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3318 {
3319     TCGTempVal new_type;
3320 
3321     switch (ts->kind) {
3322     case TEMP_FIXED:
3323         return;
3324     case TEMP_GLOBAL:
3325     case TEMP_LOCAL:
3326         new_type = TEMP_VAL_MEM;
3327         break;
3328     case TEMP_NORMAL:
3329     case TEMP_EBB:
3330         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3331         break;
3332     case TEMP_CONST:
3333         new_type = TEMP_VAL_CONST;
3334         break;
3335     default:
3336         g_assert_not_reached();
3337     }
3338     set_temp_val_nonreg(s, ts, new_type);
3339 }
3340 
3341 /* Mark a temporary as dead.  */
3342 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3343 {
3344     temp_free_or_dead(s, ts, 1);
3345 }
3346 
3347 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3348    registers needs to be allocated to store a constant.  If 'free_or_dead'
3349    is non-zero, subsequently release the temporary; if it is positive, the
3350    temp is dead; if it is negative, the temp is free.  */
3351 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3352                       TCGRegSet preferred_regs, int free_or_dead)
3353 {
3354     if (!temp_readonly(ts) && !ts->mem_coherent) {
3355         if (!ts->mem_allocated) {
3356             temp_allocate_frame(s, ts);
3357         }
3358         switch (ts->val_type) {
3359         case TEMP_VAL_CONST:
3360             /* If we're going to free the temp immediately, then we won't
3361                require it later in a register, so attempt to store the
3362                constant to memory directly.  */
3363             if (free_or_dead
3364                 && tcg_out_sti(s, ts->type, ts->val,
3365                                ts->mem_base->reg, ts->mem_offset)) {
3366                 break;
3367             }
3368             temp_load(s, ts, tcg_target_available_regs[ts->type],
3369                       allocated_regs, preferred_regs);
3370             /* fallthrough */
3371 
3372         case TEMP_VAL_REG:
3373             tcg_out_st(s, ts->type, ts->reg,
3374                        ts->mem_base->reg, ts->mem_offset);
3375             break;
3376 
3377         case TEMP_VAL_MEM:
3378             break;
3379 
3380         case TEMP_VAL_DEAD:
3381         default:
3382             tcg_abort();
3383         }
3384         ts->mem_coherent = 1;
3385     }
3386     if (free_or_dead) {
3387         temp_free_or_dead(s, ts, free_or_dead);
3388     }
3389 }
3390 
3391 /* free register 'reg' by spilling the corresponding temporary if necessary */
3392 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3393 {
3394     TCGTemp *ts = s->reg_to_temp[reg];
3395     if (ts != NULL) {
3396         temp_sync(s, ts, allocated_regs, 0, -1);
3397     }
3398 }
3399 
3400 /**
3401  * tcg_reg_alloc:
3402  * @required_regs: Set of registers in which we must allocate.
3403  * @allocated_regs: Set of registers which must be avoided.
3404  * @preferred_regs: Set of registers we should prefer.
3405  * @rev: True if we search the registers in "indirect" order.
3406  *
3407  * The allocated register must be in @required_regs & ~@allocated_regs,
3408  * but if we can put it in @preferred_regs we may save a move later.
3409  */
3410 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3411                             TCGRegSet allocated_regs,
3412                             TCGRegSet preferred_regs, bool rev)
3413 {
3414     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3415     TCGRegSet reg_ct[2];
3416     const int *order;
3417 
3418     reg_ct[1] = required_regs & ~allocated_regs;
3419     tcg_debug_assert(reg_ct[1] != 0);
3420     reg_ct[0] = reg_ct[1] & preferred_regs;
3421 
3422     /* Skip the preferred_regs option if it cannot be satisfied,
3423        or if the preference made no difference.  */
3424     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3425 
3426     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3427 
3428     /* Try free registers, preferences first.  */
3429     for (j = f; j < 2; j++) {
3430         TCGRegSet set = reg_ct[j];
3431 
3432         if (tcg_regset_single(set)) {
3433             /* One register in the set.  */
3434             TCGReg reg = tcg_regset_first(set);
3435             if (s->reg_to_temp[reg] == NULL) {
3436                 return reg;
3437             }
3438         } else {
3439             for (i = 0; i < n; i++) {
3440                 TCGReg reg = order[i];
3441                 if (s->reg_to_temp[reg] == NULL &&
3442                     tcg_regset_test_reg(set, reg)) {
3443                     return reg;
3444                 }
3445             }
3446         }
3447     }
3448 
3449     /* We must spill something.  */
3450     for (j = f; j < 2; j++) {
3451         TCGRegSet set = reg_ct[j];
3452 
3453         if (tcg_regset_single(set)) {
3454             /* One register in the set.  */
3455             TCGReg reg = tcg_regset_first(set);
3456             tcg_reg_free(s, reg, allocated_regs);
3457             return reg;
3458         } else {
3459             for (i = 0; i < n; i++) {
3460                 TCGReg reg = order[i];
3461                 if (tcg_regset_test_reg(set, reg)) {
3462                     tcg_reg_free(s, reg, allocated_regs);
3463                     return reg;
3464                 }
3465             }
3466         }
3467     }
3468 
3469     tcg_abort();
3470 }
3471 
3472 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3473                                  TCGRegSet allocated_regs,
3474                                  TCGRegSet preferred_regs, bool rev)
3475 {
3476     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3477     TCGRegSet reg_ct[2];
3478     const int *order;
3479 
3480     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3481     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3482     tcg_debug_assert(reg_ct[1] != 0);
3483     reg_ct[0] = reg_ct[1] & preferred_regs;
3484 
3485     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3486 
3487     /*
3488      * Skip the preferred_regs option if it cannot be satisfied,
3489      * or if the preference made no difference.
3490      */
3491     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3492 
3493     /*
3494      * Minimize the number of flushes by looking for 2 free registers first,
3495      * then a single flush, then two flushes.
3496      */
3497     for (fmin = 2; fmin >= 0; fmin--) {
3498         for (j = k; j < 2; j++) {
3499             TCGRegSet set = reg_ct[j];
3500 
3501             for (i = 0; i < n; i++) {
3502                 TCGReg reg = order[i];
3503 
3504                 if (tcg_regset_test_reg(set, reg)) {
3505                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3506                     if (f >= fmin) {
3507                         tcg_reg_free(s, reg, allocated_regs);
3508                         tcg_reg_free(s, reg + 1, allocated_regs);
3509                         return reg;
3510                     }
3511                 }
3512             }
3513         }
3514     }
3515     tcg_abort();
3516 }
3517 
3518 /* Make sure the temporary is in a register.  If needed, allocate the register
3519    from DESIRED while avoiding ALLOCATED.  */
3520 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3521                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3522 {
3523     TCGReg reg;
3524 
3525     switch (ts->val_type) {
3526     case TEMP_VAL_REG:
3527         return;
3528     case TEMP_VAL_CONST:
3529         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3530                             preferred_regs, ts->indirect_base);
3531         if (ts->type <= TCG_TYPE_I64) {
3532             tcg_out_movi(s, ts->type, reg, ts->val);
3533         } else {
3534             uint64_t val = ts->val;
3535             MemOp vece = MO_64;
3536 
3537             /*
3538              * Find the minimal vector element that matches the constant.
3539              * The targets will, in general, have to do this search anyway,
3540              * do this generically.
3541              */
3542             if (val == dup_const(MO_8, val)) {
3543                 vece = MO_8;
3544             } else if (val == dup_const(MO_16, val)) {
3545                 vece = MO_16;
3546             } else if (val == dup_const(MO_32, val)) {
3547                 vece = MO_32;
3548             }
3549 
3550             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3551         }
3552         ts->mem_coherent = 0;
3553         break;
3554     case TEMP_VAL_MEM:
3555         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3556                             preferred_regs, ts->indirect_base);
3557         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3558         ts->mem_coherent = 1;
3559         break;
3560     case TEMP_VAL_DEAD:
3561     default:
3562         tcg_abort();
3563     }
3564     set_temp_val_reg(s, ts, reg);
3565 }
3566 
3567 /* Save a temporary to memory. 'allocated_regs' is used in case a
3568    temporary registers needs to be allocated to store a constant.  */
3569 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3570 {
3571     /* The liveness analysis already ensures that globals are back
3572        in memory. Keep an tcg_debug_assert for safety. */
3573     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3574 }
3575 
3576 /* save globals to their canonical location and assume they can be
3577    modified be the following code. 'allocated_regs' is used in case a
3578    temporary registers needs to be allocated to store a constant. */
3579 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3580 {
3581     int i, n;
3582 
3583     for (i = 0, n = s->nb_globals; i < n; i++) {
3584         temp_save(s, &s->temps[i], allocated_regs);
3585     }
3586 }
3587 
3588 /* sync globals to their canonical location and assume they can be
3589    read by the following code. 'allocated_regs' is used in case a
3590    temporary registers needs to be allocated to store a constant. */
3591 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3592 {
3593     int i, n;
3594 
3595     for (i = 0, n = s->nb_globals; i < n; i++) {
3596         TCGTemp *ts = &s->temps[i];
3597         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3598                          || ts->kind == TEMP_FIXED
3599                          || ts->mem_coherent);
3600     }
3601 }
3602 
3603 /* at the end of a basic block, we assume all temporaries are dead and
3604    all globals are stored at their canonical location. */
3605 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3606 {
3607     int i;
3608 
3609     for (i = s->nb_globals; i < s->nb_temps; i++) {
3610         TCGTemp *ts = &s->temps[i];
3611 
3612         switch (ts->kind) {
3613         case TEMP_LOCAL:
3614             temp_save(s, ts, allocated_regs);
3615             break;
3616         case TEMP_NORMAL:
3617         case TEMP_EBB:
3618             /* The liveness analysis already ensures that temps are dead.
3619                Keep an tcg_debug_assert for safety. */
3620             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3621             break;
3622         case TEMP_CONST:
3623             /* Similarly, we should have freed any allocated register. */
3624             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3625             break;
3626         default:
3627             g_assert_not_reached();
3628         }
3629     }
3630 
3631     save_globals(s, allocated_regs);
3632 }
3633 
3634 /*
3635  * At a conditional branch, we assume all temporaries are dead unless
3636  * explicitly live-across-conditional-branch; all globals and local
3637  * temps are synced to their location.
3638  */
3639 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3640 {
3641     sync_globals(s, allocated_regs);
3642 
3643     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3644         TCGTemp *ts = &s->temps[i];
3645         /*
3646          * The liveness analysis already ensures that temps are dead.
3647          * Keep tcg_debug_asserts for safety.
3648          */
3649         switch (ts->kind) {
3650         case TEMP_LOCAL:
3651             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3652             break;
3653         case TEMP_NORMAL:
3654             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3655             break;
3656         case TEMP_EBB:
3657         case TEMP_CONST:
3658             break;
3659         default:
3660             g_assert_not_reached();
3661         }
3662     }
3663 }
3664 
3665 /*
3666  * Specialized code generation for INDEX_op_mov_* with a constant.
3667  */
3668 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3669                                   tcg_target_ulong val, TCGLifeData arg_life,
3670                                   TCGRegSet preferred_regs)
3671 {
3672     /* ENV should not be modified.  */
3673     tcg_debug_assert(!temp_readonly(ots));
3674 
3675     /* The movi is not explicitly generated here.  */
3676     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3677     ots->val = val;
3678     ots->mem_coherent = 0;
3679     if (NEED_SYNC_ARG(0)) {
3680         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3681     } else if (IS_DEAD_ARG(0)) {
3682         temp_dead(s, ots);
3683     }
3684 }
3685 
3686 /*
3687  * Specialized code generation for INDEX_op_mov_*.
3688  */
3689 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3690 {
3691     const TCGLifeData arg_life = op->life;
3692     TCGRegSet allocated_regs, preferred_regs;
3693     TCGTemp *ts, *ots;
3694     TCGType otype, itype;
3695     TCGReg oreg, ireg;
3696 
3697     allocated_regs = s->reserved_regs;
3698     preferred_regs = output_pref(op, 0);
3699     ots = arg_temp(op->args[0]);
3700     ts = arg_temp(op->args[1]);
3701 
3702     /* ENV should not be modified.  */
3703     tcg_debug_assert(!temp_readonly(ots));
3704 
3705     /* Note that otype != itype for no-op truncation.  */
3706     otype = ots->type;
3707     itype = ts->type;
3708 
3709     if (ts->val_type == TEMP_VAL_CONST) {
3710         /* propagate constant or generate sti */
3711         tcg_target_ulong val = ts->val;
3712         if (IS_DEAD_ARG(1)) {
3713             temp_dead(s, ts);
3714         }
3715         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3716         return;
3717     }
3718 
3719     /* If the source value is in memory we're going to be forced
3720        to have it in a register in order to perform the copy.  Copy
3721        the SOURCE value into its own register first, that way we
3722        don't have to reload SOURCE the next time it is used. */
3723     if (ts->val_type == TEMP_VAL_MEM) {
3724         temp_load(s, ts, tcg_target_available_regs[itype],
3725                   allocated_regs, preferred_regs);
3726     }
3727     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3728     ireg = ts->reg;
3729 
3730     if (IS_DEAD_ARG(0)) {
3731         /* mov to a non-saved dead register makes no sense (even with
3732            liveness analysis disabled). */
3733         tcg_debug_assert(NEED_SYNC_ARG(0));
3734         if (!ots->mem_allocated) {
3735             temp_allocate_frame(s, ots);
3736         }
3737         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3738         if (IS_DEAD_ARG(1)) {
3739             temp_dead(s, ts);
3740         }
3741         temp_dead(s, ots);
3742         return;
3743     }
3744 
3745     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3746         /*
3747          * The mov can be suppressed.  Kill input first, so that it
3748          * is unlinked from reg_to_temp, then set the output to the
3749          * reg that we saved from the input.
3750          */
3751         temp_dead(s, ts);
3752         oreg = ireg;
3753     } else {
3754         if (ots->val_type == TEMP_VAL_REG) {
3755             oreg = ots->reg;
3756         } else {
3757             /* Make sure to not spill the input register during allocation. */
3758             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3759                                  allocated_regs | ((TCGRegSet)1 << ireg),
3760                                  preferred_regs, ots->indirect_base);
3761         }
3762         if (!tcg_out_mov(s, otype, oreg, ireg)) {
3763             /*
3764              * Cross register class move not supported.
3765              * Store the source register into the destination slot
3766              * and leave the destination temp as TEMP_VAL_MEM.
3767              */
3768             assert(!temp_readonly(ots));
3769             if (!ts->mem_allocated) {
3770                 temp_allocate_frame(s, ots);
3771             }
3772             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
3773             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
3774             ots->mem_coherent = 1;
3775             return;
3776         }
3777     }
3778     set_temp_val_reg(s, ots, oreg);
3779     ots->mem_coherent = 0;
3780 
3781     if (NEED_SYNC_ARG(0)) {
3782         temp_sync(s, ots, allocated_regs, 0, 0);
3783     }
3784 }
3785 
3786 /*
3787  * Specialized code generation for INDEX_op_dup_vec.
3788  */
3789 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3790 {
3791     const TCGLifeData arg_life = op->life;
3792     TCGRegSet dup_out_regs, dup_in_regs;
3793     TCGTemp *its, *ots;
3794     TCGType itype, vtype;
3795     unsigned vece;
3796     int lowpart_ofs;
3797     bool ok;
3798 
3799     ots = arg_temp(op->args[0]);
3800     its = arg_temp(op->args[1]);
3801 
3802     /* ENV should not be modified.  */
3803     tcg_debug_assert(!temp_readonly(ots));
3804 
3805     itype = its->type;
3806     vece = TCGOP_VECE(op);
3807     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3808 
3809     if (its->val_type == TEMP_VAL_CONST) {
3810         /* Propagate constant via movi -> dupi.  */
3811         tcg_target_ulong val = its->val;
3812         if (IS_DEAD_ARG(1)) {
3813             temp_dead(s, its);
3814         }
3815         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
3816         return;
3817     }
3818 
3819     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3820     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3821 
3822     /* Allocate the output register now.  */
3823     if (ots->val_type != TEMP_VAL_REG) {
3824         TCGRegSet allocated_regs = s->reserved_regs;
3825         TCGReg oreg;
3826 
3827         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3828             /* Make sure to not spill the input register. */
3829             tcg_regset_set_reg(allocated_regs, its->reg);
3830         }
3831         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3832                              output_pref(op, 0), ots->indirect_base);
3833         set_temp_val_reg(s, ots, oreg);
3834     }
3835 
3836     switch (its->val_type) {
3837     case TEMP_VAL_REG:
3838         /*
3839          * The dup constriaints must be broad, covering all possible VECE.
3840          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3841          * to fail, indicating that extra moves are required for that case.
3842          */
3843         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3844             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3845                 goto done;
3846             }
3847             /* Try again from memory or a vector input register.  */
3848         }
3849         if (!its->mem_coherent) {
3850             /*
3851              * The input register is not synced, and so an extra store
3852              * would be required to use memory.  Attempt an integer-vector
3853              * register move first.  We do not have a TCGRegSet for this.
3854              */
3855             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3856                 break;
3857             }
3858             /* Sync the temp back to its slot and load from there.  */
3859             temp_sync(s, its, s->reserved_regs, 0, 0);
3860         }
3861         /* fall through */
3862 
3863     case TEMP_VAL_MEM:
3864         lowpart_ofs = 0;
3865         if (HOST_BIG_ENDIAN) {
3866             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
3867         }
3868         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3869                              its->mem_offset + lowpart_ofs)) {
3870             goto done;
3871         }
3872         /* Load the input into the destination vector register. */
3873         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3874         break;
3875 
3876     default:
3877         g_assert_not_reached();
3878     }
3879 
3880     /* We now have a vector input register, so dup must succeed. */
3881     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3882     tcg_debug_assert(ok);
3883 
3884  done:
3885     ots->mem_coherent = 0;
3886     if (IS_DEAD_ARG(1)) {
3887         temp_dead(s, its);
3888     }
3889     if (NEED_SYNC_ARG(0)) {
3890         temp_sync(s, ots, s->reserved_regs, 0, 0);
3891     }
3892     if (IS_DEAD_ARG(0)) {
3893         temp_dead(s, ots);
3894     }
3895 }
3896 
3897 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3898 {
3899     const TCGLifeData arg_life = op->life;
3900     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3901     TCGRegSet i_allocated_regs;
3902     TCGRegSet o_allocated_regs;
3903     int i, k, nb_iargs, nb_oargs;
3904     TCGReg reg;
3905     TCGArg arg;
3906     const TCGArgConstraint *arg_ct;
3907     TCGTemp *ts;
3908     TCGArg new_args[TCG_MAX_OP_ARGS];
3909     int const_args[TCG_MAX_OP_ARGS];
3910 
3911     nb_oargs = def->nb_oargs;
3912     nb_iargs = def->nb_iargs;
3913 
3914     /* copy constants */
3915     memcpy(new_args + nb_oargs + nb_iargs,
3916            op->args + nb_oargs + nb_iargs,
3917            sizeof(TCGArg) * def->nb_cargs);
3918 
3919     i_allocated_regs = s->reserved_regs;
3920     o_allocated_regs = s->reserved_regs;
3921 
3922     /* satisfy input constraints */
3923     for (k = 0; k < nb_iargs; k++) {
3924         TCGRegSet i_preferred_regs, i_required_regs;
3925         bool allocate_new_reg, copyto_new_reg;
3926         TCGTemp *ts2;
3927         int i1, i2;
3928 
3929         i = def->args_ct[nb_oargs + k].sort_index;
3930         arg = op->args[i];
3931         arg_ct = &def->args_ct[i];
3932         ts = arg_temp(arg);
3933 
3934         if (ts->val_type == TEMP_VAL_CONST
3935             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3936             /* constant is OK for instruction */
3937             const_args[i] = 1;
3938             new_args[i] = ts->val;
3939             continue;
3940         }
3941 
3942         reg = ts->reg;
3943         i_preferred_regs = 0;
3944         i_required_regs = arg_ct->regs;
3945         allocate_new_reg = false;
3946         copyto_new_reg = false;
3947 
3948         switch (arg_ct->pair) {
3949         case 0: /* not paired */
3950             if (arg_ct->ialias) {
3951                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
3952 
3953                 /*
3954                  * If the input is readonly, then it cannot also be an
3955                  * output and aliased to itself.  If the input is not
3956                  * dead after the instruction, we must allocate a new
3957                  * register and move it.
3958                  */
3959                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3960                     allocate_new_reg = true;
3961                 } else if (ts->val_type == TEMP_VAL_REG) {
3962                     /*
3963                      * Check if the current register has already been
3964                      * allocated for another input.
3965                      */
3966                     allocate_new_reg =
3967                         tcg_regset_test_reg(i_allocated_regs, reg);
3968                 }
3969             }
3970             if (!allocate_new_reg) {
3971                 temp_load(s, ts, i_required_regs, i_allocated_regs,
3972                           i_preferred_regs);
3973                 reg = ts->reg;
3974                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
3975             }
3976             if (allocate_new_reg) {
3977                 /*
3978                  * Allocate a new register matching the constraint
3979                  * and move the temporary register into it.
3980                  */
3981                 temp_load(s, ts, tcg_target_available_regs[ts->type],
3982                           i_allocated_regs, 0);
3983                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
3984                                     i_preferred_regs, ts->indirect_base);
3985                 copyto_new_reg = true;
3986             }
3987             break;
3988 
3989         case 1:
3990             /* First of an input pair; if i1 == i2, the second is an output. */
3991             i1 = i;
3992             i2 = arg_ct->pair_index;
3993             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
3994 
3995             /*
3996              * It is easier to default to allocating a new pair
3997              * and to identify a few cases where it's not required.
3998              */
3999             if (arg_ct->ialias) {
4000                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4001                 if (IS_DEAD_ARG(i1) &&
4002                     IS_DEAD_ARG(i2) &&
4003                     !temp_readonly(ts) &&
4004                     ts->val_type == TEMP_VAL_REG &&
4005                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4006                     tcg_regset_test_reg(i_required_regs, reg) &&
4007                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4008                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4009                     (ts2
4010                      ? ts2->val_type == TEMP_VAL_REG &&
4011                        ts2->reg == reg + 1 &&
4012                        !temp_readonly(ts2)
4013                      : s->reg_to_temp[reg + 1] == NULL)) {
4014                     break;
4015                 }
4016             } else {
4017                 /* Without aliasing, the pair must also be an input. */
4018                 tcg_debug_assert(ts2);
4019                 if (ts->val_type == TEMP_VAL_REG &&
4020                     ts2->val_type == TEMP_VAL_REG &&
4021                     ts2->reg == reg + 1 &&
4022                     tcg_regset_test_reg(i_required_regs, reg)) {
4023                     break;
4024                 }
4025             }
4026             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4027                                      0, ts->indirect_base);
4028             goto do_pair;
4029 
4030         case 2: /* pair second */
4031             reg = new_args[arg_ct->pair_index] + 1;
4032             goto do_pair;
4033 
4034         case 3: /* ialias with second output, no first input */
4035             tcg_debug_assert(arg_ct->ialias);
4036             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4037 
4038             if (IS_DEAD_ARG(i) &&
4039                 !temp_readonly(ts) &&
4040                 ts->val_type == TEMP_VAL_REG &&
4041                 reg > 0 &&
4042                 s->reg_to_temp[reg - 1] == NULL &&
4043                 tcg_regset_test_reg(i_required_regs, reg) &&
4044                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4045                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4046                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4047                 break;
4048             }
4049             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4050                                      i_allocated_regs, 0,
4051                                      ts->indirect_base);
4052             tcg_regset_set_reg(i_allocated_regs, reg);
4053             reg += 1;
4054             goto do_pair;
4055 
4056         do_pair:
4057             /*
4058              * If an aliased input is not dead after the instruction,
4059              * we must allocate a new register and move it.
4060              */
4061             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4062                 TCGRegSet t_allocated_regs = i_allocated_regs;
4063 
4064                 /*
4065                  * Because of the alias, and the continued life, make sure
4066                  * that the temp is somewhere *other* than the reg pair,
4067                  * and we get a copy in reg.
4068                  */
4069                 tcg_regset_set_reg(t_allocated_regs, reg);
4070                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4071                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4072                     /* If ts was already in reg, copy it somewhere else. */
4073                     TCGReg nr;
4074                     bool ok;
4075 
4076                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4077                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4078                                        t_allocated_regs, 0, ts->indirect_base);
4079                     ok = tcg_out_mov(s, ts->type, nr, reg);
4080                     tcg_debug_assert(ok);
4081 
4082                     set_temp_val_reg(s, ts, nr);
4083                 } else {
4084                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4085                               t_allocated_regs, 0);
4086                     copyto_new_reg = true;
4087                 }
4088             } else {
4089                 /* Preferably allocate to reg, otherwise copy. */
4090                 i_required_regs = (TCGRegSet)1 << reg;
4091                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4092                           i_preferred_regs);
4093                 copyto_new_reg = ts->reg != reg;
4094             }
4095             break;
4096 
4097         default:
4098             g_assert_not_reached();
4099         }
4100 
4101         if (copyto_new_reg) {
4102             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4103                 /*
4104                  * Cross register class move not supported.  Sync the
4105                  * temp back to its slot and load from there.
4106                  */
4107                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4108                 tcg_out_ld(s, ts->type, reg,
4109                            ts->mem_base->reg, ts->mem_offset);
4110             }
4111         }
4112         new_args[i] = reg;
4113         const_args[i] = 0;
4114         tcg_regset_set_reg(i_allocated_regs, reg);
4115     }
4116 
4117     /* mark dead temporaries and free the associated registers */
4118     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4119         if (IS_DEAD_ARG(i)) {
4120             temp_dead(s, arg_temp(op->args[i]));
4121         }
4122     }
4123 
4124     if (def->flags & TCG_OPF_COND_BRANCH) {
4125         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4126     } else if (def->flags & TCG_OPF_BB_END) {
4127         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4128     } else {
4129         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4130             /* XXX: permit generic clobber register list ? */
4131             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4132                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4133                     tcg_reg_free(s, i, i_allocated_regs);
4134                 }
4135             }
4136         }
4137         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4138             /* sync globals if the op has side effects and might trigger
4139                an exception. */
4140             sync_globals(s, i_allocated_regs);
4141         }
4142 
4143         /* satisfy the output constraints */
4144         for(k = 0; k < nb_oargs; k++) {
4145             i = def->args_ct[k].sort_index;
4146             arg = op->args[i];
4147             arg_ct = &def->args_ct[i];
4148             ts = arg_temp(arg);
4149 
4150             /* ENV should not be modified.  */
4151             tcg_debug_assert(!temp_readonly(ts));
4152 
4153             switch (arg_ct->pair) {
4154             case 0: /* not paired */
4155                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4156                     reg = new_args[arg_ct->alias_index];
4157                 } else if (arg_ct->newreg) {
4158                     reg = tcg_reg_alloc(s, arg_ct->regs,
4159                                         i_allocated_regs | o_allocated_regs,
4160                                         output_pref(op, k), ts->indirect_base);
4161                 } else {
4162                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4163                                         output_pref(op, k), ts->indirect_base);
4164                 }
4165                 break;
4166 
4167             case 1: /* first of pair */
4168                 tcg_debug_assert(!arg_ct->newreg);
4169                 if (arg_ct->oalias) {
4170                     reg = new_args[arg_ct->alias_index];
4171                     break;
4172                 }
4173                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4174                                          output_pref(op, k), ts->indirect_base);
4175                 break;
4176 
4177             case 2: /* second of pair */
4178                 tcg_debug_assert(!arg_ct->newreg);
4179                 if (arg_ct->oalias) {
4180                     reg = new_args[arg_ct->alias_index];
4181                 } else {
4182                     reg = new_args[arg_ct->pair_index] + 1;
4183                 }
4184                 break;
4185 
4186             case 3: /* first of pair, aliasing with a second input */
4187                 tcg_debug_assert(!arg_ct->newreg);
4188                 reg = new_args[arg_ct->pair_index] - 1;
4189                 break;
4190 
4191             default:
4192                 g_assert_not_reached();
4193             }
4194             tcg_regset_set_reg(o_allocated_regs, reg);
4195             set_temp_val_reg(s, ts, reg);
4196             ts->mem_coherent = 0;
4197             new_args[i] = reg;
4198         }
4199     }
4200 
4201     /* emit instruction */
4202     if (def->flags & TCG_OPF_VECTOR) {
4203         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4204                        new_args, const_args);
4205     } else {
4206         tcg_out_op(s, op->opc, new_args, const_args);
4207     }
4208 
4209     /* move the outputs in the correct register if needed */
4210     for(i = 0; i < nb_oargs; i++) {
4211         ts = arg_temp(op->args[i]);
4212 
4213         /* ENV should not be modified.  */
4214         tcg_debug_assert(!temp_readonly(ts));
4215 
4216         if (NEED_SYNC_ARG(i)) {
4217             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4218         } else if (IS_DEAD_ARG(i)) {
4219             temp_dead(s, ts);
4220         }
4221     }
4222 }
4223 
4224 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4225 {
4226     const TCGLifeData arg_life = op->life;
4227     TCGTemp *ots, *itsl, *itsh;
4228     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4229 
4230     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4231     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4232     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4233 
4234     ots = arg_temp(op->args[0]);
4235     itsl = arg_temp(op->args[1]);
4236     itsh = arg_temp(op->args[2]);
4237 
4238     /* ENV should not be modified.  */
4239     tcg_debug_assert(!temp_readonly(ots));
4240 
4241     /* Allocate the output register now.  */
4242     if (ots->val_type != TEMP_VAL_REG) {
4243         TCGRegSet allocated_regs = s->reserved_regs;
4244         TCGRegSet dup_out_regs =
4245             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4246         TCGReg oreg;
4247 
4248         /* Make sure to not spill the input registers. */
4249         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4250             tcg_regset_set_reg(allocated_regs, itsl->reg);
4251         }
4252         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4253             tcg_regset_set_reg(allocated_regs, itsh->reg);
4254         }
4255 
4256         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4257                              output_pref(op, 0), ots->indirect_base);
4258         set_temp_val_reg(s, ots, oreg);
4259     }
4260 
4261     /* Promote dup2 of immediates to dupi_vec. */
4262     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4263         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4264         MemOp vece = MO_64;
4265 
4266         if (val == dup_const(MO_8, val)) {
4267             vece = MO_8;
4268         } else if (val == dup_const(MO_16, val)) {
4269             vece = MO_16;
4270         } else if (val == dup_const(MO_32, val)) {
4271             vece = MO_32;
4272         }
4273 
4274         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4275         goto done;
4276     }
4277 
4278     /* If the two inputs form one 64-bit value, try dupm_vec. */
4279     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4280         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4281         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4282         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4283 
4284         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4285         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4286 
4287         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4288                              its->mem_base->reg, its->mem_offset)) {
4289             goto done;
4290         }
4291     }
4292 
4293     /* Fall back to generic expansion. */
4294     return false;
4295 
4296  done:
4297     ots->mem_coherent = 0;
4298     if (IS_DEAD_ARG(1)) {
4299         temp_dead(s, itsl);
4300     }
4301     if (IS_DEAD_ARG(2)) {
4302         temp_dead(s, itsh);
4303     }
4304     if (NEED_SYNC_ARG(0)) {
4305         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4306     } else if (IS_DEAD_ARG(0)) {
4307         temp_dead(s, ots);
4308     }
4309     return true;
4310 }
4311 
4312 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4313                          TCGRegSet allocated_regs)
4314 {
4315     if (ts->val_type == TEMP_VAL_REG) {
4316         if (ts->reg != reg) {
4317             tcg_reg_free(s, reg, allocated_regs);
4318             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4319                 /*
4320                  * Cross register class move not supported.  Sync the
4321                  * temp back to its slot and load from there.
4322                  */
4323                 temp_sync(s, ts, allocated_regs, 0, 0);
4324                 tcg_out_ld(s, ts->type, reg,
4325                            ts->mem_base->reg, ts->mem_offset);
4326             }
4327         }
4328     } else {
4329         TCGRegSet arg_set = 0;
4330 
4331         tcg_reg_free(s, reg, allocated_regs);
4332         tcg_regset_set_reg(arg_set, reg);
4333         temp_load(s, ts, arg_set, allocated_regs, 0);
4334     }
4335 }
4336 
4337 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4338                          TCGRegSet allocated_regs)
4339 {
4340     /*
4341      * When the destination is on the stack, load up the temp and store.
4342      * If there are many call-saved registers, the temp might live to
4343      * see another use; otherwise it'll be discarded.
4344      */
4345     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4346     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4347                TCG_TARGET_CALL_STACK_OFFSET +
4348                stk_slot * sizeof(tcg_target_long));
4349 }
4350 
4351 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4352                             TCGTemp *ts, TCGRegSet *allocated_regs)
4353 {
4354     if (REG_P(l)) {
4355         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4356         load_arg_reg(s, reg, ts, *allocated_regs);
4357         tcg_regset_set_reg(*allocated_regs, reg);
4358     } else {
4359         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4360                      ts, *allocated_regs);
4361     }
4362 }
4363 
4364 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4365 {
4366     const int nb_oargs = TCGOP_CALLO(op);
4367     const int nb_iargs = TCGOP_CALLI(op);
4368     const TCGLifeData arg_life = op->life;
4369     const TCGHelperInfo *info = tcg_call_info(op);
4370     TCGRegSet allocated_regs = s->reserved_regs;
4371     int i;
4372 
4373     /*
4374      * Move inputs into place in reverse order,
4375      * so that we place stacked arguments first.
4376      */
4377     for (i = nb_iargs - 1; i >= 0; --i) {
4378         const TCGCallArgumentLoc *loc = &info->in[i];
4379         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4380 
4381         switch (loc->kind) {
4382         case TCG_CALL_ARG_NORMAL:
4383         case TCG_CALL_ARG_EXTEND_U:
4384         case TCG_CALL_ARG_EXTEND_S:
4385             load_arg_normal(s, loc, ts, &allocated_regs);
4386             break;
4387         default:
4388             g_assert_not_reached();
4389         }
4390     }
4391 
4392     /* Mark dead temporaries and free the associated registers.  */
4393     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4394         if (IS_DEAD_ARG(i)) {
4395             temp_dead(s, arg_temp(op->args[i]));
4396         }
4397     }
4398 
4399     /* Clobber call registers.  */
4400     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4401         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4402             tcg_reg_free(s, i, allocated_regs);
4403         }
4404     }
4405 
4406     /*
4407      * Save globals if they might be written by the helper,
4408      * sync them if they might be read.
4409      */
4410     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4411         /* Nothing to do */
4412     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4413         sync_globals(s, allocated_regs);
4414     } else {
4415         save_globals(s, allocated_regs);
4416     }
4417 
4418     tcg_out_call(s, tcg_call_func(op), info);
4419 
4420     /* Assign output registers and emit moves if needed.  */
4421     switch (info->out_kind) {
4422     case TCG_CALL_RET_NORMAL:
4423         for (i = 0; i < nb_oargs; i++) {
4424             TCGTemp *ts = arg_temp(op->args[i]);
4425             TCGReg reg = tcg_target_call_oarg_regs[i];
4426 
4427             /* ENV should not be modified.  */
4428             tcg_debug_assert(!temp_readonly(ts));
4429 
4430             set_temp_val_reg(s, ts, reg);
4431             ts->mem_coherent = 0;
4432         }
4433         break;
4434     default:
4435         g_assert_not_reached();
4436     }
4437 
4438     /* Flush or discard output registers as needed. */
4439     for (i = 0; i < nb_oargs; i++) {
4440         TCGTemp *ts = arg_temp(op->args[i]);
4441         if (NEED_SYNC_ARG(i)) {
4442             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4443         } else if (IS_DEAD_ARG(i)) {
4444             temp_dead(s, ts);
4445         }
4446     }
4447 }
4448 
4449 #ifdef CONFIG_PROFILER
4450 
4451 /* avoid copy/paste errors */
4452 #define PROF_ADD(to, from, field)                       \
4453     do {                                                \
4454         (to)->field += qatomic_read(&((from)->field));  \
4455     } while (0)
4456 
4457 #define PROF_MAX(to, from, field)                                       \
4458     do {                                                                \
4459         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4460         if (val__ > (to)->field) {                                      \
4461             (to)->field = val__;                                        \
4462         }                                                               \
4463     } while (0)
4464 
4465 /* Pass in a zero'ed @prof */
4466 static inline
4467 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4468 {
4469     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4470     unsigned int i;
4471 
4472     for (i = 0; i < n_ctxs; i++) {
4473         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4474         const TCGProfile *orig = &s->prof;
4475 
4476         if (counters) {
4477             PROF_ADD(prof, orig, cpu_exec_time);
4478             PROF_ADD(prof, orig, tb_count1);
4479             PROF_ADD(prof, orig, tb_count);
4480             PROF_ADD(prof, orig, op_count);
4481             PROF_MAX(prof, orig, op_count_max);
4482             PROF_ADD(prof, orig, temp_count);
4483             PROF_MAX(prof, orig, temp_count_max);
4484             PROF_ADD(prof, orig, del_op_count);
4485             PROF_ADD(prof, orig, code_in_len);
4486             PROF_ADD(prof, orig, code_out_len);
4487             PROF_ADD(prof, orig, search_out_len);
4488             PROF_ADD(prof, orig, interm_time);
4489             PROF_ADD(prof, orig, code_time);
4490             PROF_ADD(prof, orig, la_time);
4491             PROF_ADD(prof, orig, opt_time);
4492             PROF_ADD(prof, orig, restore_count);
4493             PROF_ADD(prof, orig, restore_time);
4494         }
4495         if (table) {
4496             int i;
4497 
4498             for (i = 0; i < NB_OPS; i++) {
4499                 PROF_ADD(prof, orig, table_op_count[i]);
4500             }
4501         }
4502     }
4503 }
4504 
4505 #undef PROF_ADD
4506 #undef PROF_MAX
4507 
4508 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4509 {
4510     tcg_profile_snapshot(prof, true, false);
4511 }
4512 
4513 static void tcg_profile_snapshot_table(TCGProfile *prof)
4514 {
4515     tcg_profile_snapshot(prof, false, true);
4516 }
4517 
4518 void tcg_dump_op_count(GString *buf)
4519 {
4520     TCGProfile prof = {};
4521     int i;
4522 
4523     tcg_profile_snapshot_table(&prof);
4524     for (i = 0; i < NB_OPS; i++) {
4525         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4526                                prof.table_op_count[i]);
4527     }
4528 }
4529 
4530 int64_t tcg_cpu_exec_time(void)
4531 {
4532     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4533     unsigned int i;
4534     int64_t ret = 0;
4535 
4536     for (i = 0; i < n_ctxs; i++) {
4537         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4538         const TCGProfile *prof = &s->prof;
4539 
4540         ret += qatomic_read(&prof->cpu_exec_time);
4541     }
4542     return ret;
4543 }
4544 #else
4545 void tcg_dump_op_count(GString *buf)
4546 {
4547     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4548 }
4549 
4550 int64_t tcg_cpu_exec_time(void)
4551 {
4552     error_report("%s: TCG profiler not compiled", __func__);
4553     exit(EXIT_FAILURE);
4554 }
4555 #endif
4556 
4557 
4558 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4559 {
4560 #ifdef CONFIG_PROFILER
4561     TCGProfile *prof = &s->prof;
4562 #endif
4563     int i, num_insns;
4564     TCGOp *op;
4565 
4566 #ifdef CONFIG_PROFILER
4567     {
4568         int n = 0;
4569 
4570         QTAILQ_FOREACH(op, &s->ops, link) {
4571             n++;
4572         }
4573         qatomic_set(&prof->op_count, prof->op_count + n);
4574         if (n > prof->op_count_max) {
4575             qatomic_set(&prof->op_count_max, n);
4576         }
4577 
4578         n = s->nb_temps;
4579         qatomic_set(&prof->temp_count, prof->temp_count + n);
4580         if (n > prof->temp_count_max) {
4581             qatomic_set(&prof->temp_count_max, n);
4582         }
4583     }
4584 #endif
4585 
4586 #ifdef DEBUG_DISAS
4587     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4588                  && qemu_log_in_addr_range(pc_start))) {
4589         FILE *logfile = qemu_log_trylock();
4590         if (logfile) {
4591             fprintf(logfile, "OP:\n");
4592             tcg_dump_ops(s, logfile, false);
4593             fprintf(logfile, "\n");
4594             qemu_log_unlock(logfile);
4595         }
4596     }
4597 #endif
4598 
4599 #ifdef CONFIG_DEBUG_TCG
4600     /* Ensure all labels referenced have been emitted.  */
4601     {
4602         TCGLabel *l;
4603         bool error = false;
4604 
4605         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4606             if (unlikely(!l->present) && l->refs) {
4607                 qemu_log_mask(CPU_LOG_TB_OP,
4608                               "$L%d referenced but not present.\n", l->id);
4609                 error = true;
4610             }
4611         }
4612         assert(!error);
4613     }
4614 #endif
4615 
4616 #ifdef CONFIG_PROFILER
4617     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4618 #endif
4619 
4620 #ifdef USE_TCG_OPTIMIZATIONS
4621     tcg_optimize(s);
4622 #endif
4623 
4624 #ifdef CONFIG_PROFILER
4625     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4626     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4627 #endif
4628 
4629     reachable_code_pass(s);
4630     liveness_pass_1(s);
4631 
4632     if (s->nb_indirects > 0) {
4633 #ifdef DEBUG_DISAS
4634         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4635                      && qemu_log_in_addr_range(pc_start))) {
4636             FILE *logfile = qemu_log_trylock();
4637             if (logfile) {
4638                 fprintf(logfile, "OP before indirect lowering:\n");
4639                 tcg_dump_ops(s, logfile, false);
4640                 fprintf(logfile, "\n");
4641                 qemu_log_unlock(logfile);
4642             }
4643         }
4644 #endif
4645         /* Replace indirect temps with direct temps.  */
4646         if (liveness_pass_2(s)) {
4647             /* If changes were made, re-run liveness.  */
4648             liveness_pass_1(s);
4649         }
4650     }
4651 
4652 #ifdef CONFIG_PROFILER
4653     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4654 #endif
4655 
4656 #ifdef DEBUG_DISAS
4657     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4658                  && qemu_log_in_addr_range(pc_start))) {
4659         FILE *logfile = qemu_log_trylock();
4660         if (logfile) {
4661             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4662             tcg_dump_ops(s, logfile, true);
4663             fprintf(logfile, "\n");
4664             qemu_log_unlock(logfile);
4665         }
4666     }
4667 #endif
4668 
4669     /* Initialize goto_tb jump offsets. */
4670     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
4671     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
4672     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
4673     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
4674 
4675     tcg_reg_alloc_start(s);
4676 
4677     /*
4678      * Reset the buffer pointers when restarting after overflow.
4679      * TODO: Move this into translate-all.c with the rest of the
4680      * buffer management.  Having only this done here is confusing.
4681      */
4682     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4683     s->code_ptr = s->code_buf;
4684 
4685 #ifdef TCG_TARGET_NEED_LDST_LABELS
4686     QSIMPLEQ_INIT(&s->ldst_labels);
4687 #endif
4688 #ifdef TCG_TARGET_NEED_POOL_LABELS
4689     s->pool_labels = NULL;
4690 #endif
4691 
4692     num_insns = -1;
4693     QTAILQ_FOREACH(op, &s->ops, link) {
4694         TCGOpcode opc = op->opc;
4695 
4696 #ifdef CONFIG_PROFILER
4697         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4698 #endif
4699 
4700         switch (opc) {
4701         case INDEX_op_mov_i32:
4702         case INDEX_op_mov_i64:
4703         case INDEX_op_mov_vec:
4704             tcg_reg_alloc_mov(s, op);
4705             break;
4706         case INDEX_op_dup_vec:
4707             tcg_reg_alloc_dup(s, op);
4708             break;
4709         case INDEX_op_insn_start:
4710             if (num_insns >= 0) {
4711                 size_t off = tcg_current_code_size(s);
4712                 s->gen_insn_end_off[num_insns] = off;
4713                 /* Assert that we do not overflow our stored offset.  */
4714                 assert(s->gen_insn_end_off[num_insns] == off);
4715             }
4716             num_insns++;
4717             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4718                 target_ulong a;
4719 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4720                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4721 #else
4722                 a = op->args[i];
4723 #endif
4724                 s->gen_insn_data[num_insns][i] = a;
4725             }
4726             break;
4727         case INDEX_op_discard:
4728             temp_dead(s, arg_temp(op->args[0]));
4729             break;
4730         case INDEX_op_set_label:
4731             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4732             tcg_out_label(s, arg_label(op->args[0]));
4733             break;
4734         case INDEX_op_call:
4735             tcg_reg_alloc_call(s, op);
4736             break;
4737         case INDEX_op_exit_tb:
4738             tcg_out_exit_tb(s, op->args[0]);
4739             break;
4740         case INDEX_op_goto_tb:
4741             tcg_out_goto_tb(s, op->args[0]);
4742             break;
4743         case INDEX_op_dup2_vec:
4744             if (tcg_reg_alloc_dup2(s, op)) {
4745                 break;
4746             }
4747             /* fall through */
4748         default:
4749             /* Sanity check that we've not introduced any unhandled opcodes. */
4750             tcg_debug_assert(tcg_op_supported(opc));
4751             /* Note: in order to speed up the code, it would be much
4752                faster to have specialized register allocator functions for
4753                some common argument patterns */
4754             tcg_reg_alloc_op(s, op);
4755             break;
4756         }
4757         /* Test for (pending) buffer overflow.  The assumption is that any
4758            one operation beginning below the high water mark cannot overrun
4759            the buffer completely.  Thus we can test for overflow after
4760            generating code without having to check during generation.  */
4761         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4762             return -1;
4763         }
4764         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4765         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4766             return -2;
4767         }
4768     }
4769     tcg_debug_assert(num_insns >= 0);
4770     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4771 
4772     /* Generate TB finalization at the end of block */
4773 #ifdef TCG_TARGET_NEED_LDST_LABELS
4774     i = tcg_out_ldst_finalize(s);
4775     if (i < 0) {
4776         return i;
4777     }
4778 #endif
4779 #ifdef TCG_TARGET_NEED_POOL_LABELS
4780     i = tcg_out_pool_finalize(s);
4781     if (i < 0) {
4782         return i;
4783     }
4784 #endif
4785     if (!tcg_resolve_relocs(s)) {
4786         return -2;
4787     }
4788 
4789 #ifndef CONFIG_TCG_INTERPRETER
4790     /* flush instruction cache */
4791     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4792                         (uintptr_t)s->code_buf,
4793                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4794 #endif
4795 
4796     return tcg_current_code_size(s);
4797 }
4798 
4799 #ifdef CONFIG_PROFILER
4800 void tcg_dump_info(GString *buf)
4801 {
4802     TCGProfile prof = {};
4803     const TCGProfile *s;
4804     int64_t tb_count;
4805     int64_t tb_div_count;
4806     int64_t tot;
4807 
4808     tcg_profile_snapshot_counters(&prof);
4809     s = &prof;
4810     tb_count = s->tb_count;
4811     tb_div_count = tb_count ? tb_count : 1;
4812     tot = s->interm_time + s->code_time;
4813 
4814     g_string_append_printf(buf, "JIT cycles          %" PRId64
4815                            " (%0.3f s at 2.4 GHz)\n",
4816                            tot, tot / 2.4e9);
4817     g_string_append_printf(buf, "translated TBs      %" PRId64
4818                            " (aborted=%" PRId64 " %0.1f%%)\n",
4819                            tb_count, s->tb_count1 - tb_count,
4820                            (double)(s->tb_count1 - s->tb_count)
4821                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4822     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4823                            (double)s->op_count / tb_div_count, s->op_count_max);
4824     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4825                            (double)s->del_op_count / tb_div_count);
4826     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4827                            (double)s->temp_count / tb_div_count,
4828                            s->temp_count_max);
4829     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4830                            (double)s->code_out_len / tb_div_count);
4831     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4832                            (double)s->search_out_len / tb_div_count);
4833 
4834     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4835                            s->op_count ? (double)tot / s->op_count : 0);
4836     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4837                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4838     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4839                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4840     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4841                            s->search_out_len ?
4842                            (double)tot / s->search_out_len : 0);
4843     if (tot == 0) {
4844         tot = 1;
4845     }
4846     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4847                            (double)s->interm_time / tot * 100.0);
4848     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4849                            (double)s->code_time / tot * 100.0);
4850     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4851                            (double)s->opt_time / (s->code_time ?
4852                                                   s->code_time : 1)
4853                            * 100.0);
4854     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4855                            (double)s->la_time / (s->code_time ?
4856                                                  s->code_time : 1) * 100.0);
4857     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4858                            s->restore_count);
4859     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4860                            s->restore_count ?
4861                            (double)s->restore_time / s->restore_count : 0);
4862 }
4863 #else
4864 void tcg_dump_info(GString *buf)
4865 {
4866     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4867 }
4868 #endif
4869 
4870 #ifdef ELF_HOST_MACHINE
4871 /* In order to use this feature, the backend needs to do three things:
4872 
4873    (1) Define ELF_HOST_MACHINE to indicate both what value to
4874        put into the ELF image and to indicate support for the feature.
4875 
4876    (2) Define tcg_register_jit.  This should create a buffer containing
4877        the contents of a .debug_frame section that describes the post-
4878        prologue unwind info for the tcg machine.
4879 
4880    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4881 */
4882 
4883 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4884 typedef enum {
4885     JIT_NOACTION = 0,
4886     JIT_REGISTER_FN,
4887     JIT_UNREGISTER_FN
4888 } jit_actions_t;
4889 
4890 struct jit_code_entry {
4891     struct jit_code_entry *next_entry;
4892     struct jit_code_entry *prev_entry;
4893     const void *symfile_addr;
4894     uint64_t symfile_size;
4895 };
4896 
4897 struct jit_descriptor {
4898     uint32_t version;
4899     uint32_t action_flag;
4900     struct jit_code_entry *relevant_entry;
4901     struct jit_code_entry *first_entry;
4902 };
4903 
4904 void __jit_debug_register_code(void) __attribute__((noinline));
4905 void __jit_debug_register_code(void)
4906 {
4907     asm("");
4908 }
4909 
4910 /* Must statically initialize the version, because GDB may check
4911    the version before we can set it.  */
4912 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4913 
4914 /* End GDB interface.  */
4915 
4916 static int find_string(const char *strtab, const char *str)
4917 {
4918     const char *p = strtab + 1;
4919 
4920     while (1) {
4921         if (strcmp(p, str) == 0) {
4922             return p - strtab;
4923         }
4924         p += strlen(p) + 1;
4925     }
4926 }
4927 
4928 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4929                                  const void *debug_frame,
4930                                  size_t debug_frame_size)
4931 {
4932     struct __attribute__((packed)) DebugInfo {
4933         uint32_t  len;
4934         uint16_t  version;
4935         uint32_t  abbrev;
4936         uint8_t   ptr_size;
4937         uint8_t   cu_die;
4938         uint16_t  cu_lang;
4939         uintptr_t cu_low_pc;
4940         uintptr_t cu_high_pc;
4941         uint8_t   fn_die;
4942         char      fn_name[16];
4943         uintptr_t fn_low_pc;
4944         uintptr_t fn_high_pc;
4945         uint8_t   cu_eoc;
4946     };
4947 
4948     struct ElfImage {
4949         ElfW(Ehdr) ehdr;
4950         ElfW(Phdr) phdr;
4951         ElfW(Shdr) shdr[7];
4952         ElfW(Sym)  sym[2];
4953         struct DebugInfo di;
4954         uint8_t    da[24];
4955         char       str[80];
4956     };
4957 
4958     struct ElfImage *img;
4959 
4960     static const struct ElfImage img_template = {
4961         .ehdr = {
4962             .e_ident[EI_MAG0] = ELFMAG0,
4963             .e_ident[EI_MAG1] = ELFMAG1,
4964             .e_ident[EI_MAG2] = ELFMAG2,
4965             .e_ident[EI_MAG3] = ELFMAG3,
4966             .e_ident[EI_CLASS] = ELF_CLASS,
4967             .e_ident[EI_DATA] = ELF_DATA,
4968             .e_ident[EI_VERSION] = EV_CURRENT,
4969             .e_type = ET_EXEC,
4970             .e_machine = ELF_HOST_MACHINE,
4971             .e_version = EV_CURRENT,
4972             .e_phoff = offsetof(struct ElfImage, phdr),
4973             .e_shoff = offsetof(struct ElfImage, shdr),
4974             .e_ehsize = sizeof(ElfW(Shdr)),
4975             .e_phentsize = sizeof(ElfW(Phdr)),
4976             .e_phnum = 1,
4977             .e_shentsize = sizeof(ElfW(Shdr)),
4978             .e_shnum = ARRAY_SIZE(img->shdr),
4979             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4980 #ifdef ELF_HOST_FLAGS
4981             .e_flags = ELF_HOST_FLAGS,
4982 #endif
4983 #ifdef ELF_OSABI
4984             .e_ident[EI_OSABI] = ELF_OSABI,
4985 #endif
4986         },
4987         .phdr = {
4988             .p_type = PT_LOAD,
4989             .p_flags = PF_X,
4990         },
4991         .shdr = {
4992             [0] = { .sh_type = SHT_NULL },
4993             /* Trick: The contents of code_gen_buffer are not present in
4994                this fake ELF file; that got allocated elsewhere.  Therefore
4995                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4996                will not look for contents.  We can record any address.  */
4997             [1] = { /* .text */
4998                 .sh_type = SHT_NOBITS,
4999                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5000             },
5001             [2] = { /* .debug_info */
5002                 .sh_type = SHT_PROGBITS,
5003                 .sh_offset = offsetof(struct ElfImage, di),
5004                 .sh_size = sizeof(struct DebugInfo),
5005             },
5006             [3] = { /* .debug_abbrev */
5007                 .sh_type = SHT_PROGBITS,
5008                 .sh_offset = offsetof(struct ElfImage, da),
5009                 .sh_size = sizeof(img->da),
5010             },
5011             [4] = { /* .debug_frame */
5012                 .sh_type = SHT_PROGBITS,
5013                 .sh_offset = sizeof(struct ElfImage),
5014             },
5015             [5] = { /* .symtab */
5016                 .sh_type = SHT_SYMTAB,
5017                 .sh_offset = offsetof(struct ElfImage, sym),
5018                 .sh_size = sizeof(img->sym),
5019                 .sh_info = 1,
5020                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5021                 .sh_entsize = sizeof(ElfW(Sym)),
5022             },
5023             [6] = { /* .strtab */
5024                 .sh_type = SHT_STRTAB,
5025                 .sh_offset = offsetof(struct ElfImage, str),
5026                 .sh_size = sizeof(img->str),
5027             }
5028         },
5029         .sym = {
5030             [1] = { /* code_gen_buffer */
5031                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5032                 .st_shndx = 1,
5033             }
5034         },
5035         .di = {
5036             .len = sizeof(struct DebugInfo) - 4,
5037             .version = 2,
5038             .ptr_size = sizeof(void *),
5039             .cu_die = 1,
5040             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5041             .fn_die = 2,
5042             .fn_name = "code_gen_buffer"
5043         },
5044         .da = {
5045             1,          /* abbrev number (the cu) */
5046             0x11, 1,    /* DW_TAG_compile_unit, has children */
5047             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5048             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5049             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5050             0, 0,       /* end of abbrev */
5051             2,          /* abbrev number (the fn) */
5052             0x2e, 0,    /* DW_TAG_subprogram, no children */
5053             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5054             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5055             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5056             0, 0,       /* end of abbrev */
5057             0           /* no more abbrev */
5058         },
5059         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5060                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5061     };
5062 
5063     /* We only need a single jit entry; statically allocate it.  */
5064     static struct jit_code_entry one_entry;
5065 
5066     uintptr_t buf = (uintptr_t)buf_ptr;
5067     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5068     DebugFrameHeader *dfh;
5069 
5070     img = g_malloc(img_size);
5071     *img = img_template;
5072 
5073     img->phdr.p_vaddr = buf;
5074     img->phdr.p_paddr = buf;
5075     img->phdr.p_memsz = buf_size;
5076 
5077     img->shdr[1].sh_name = find_string(img->str, ".text");
5078     img->shdr[1].sh_addr = buf;
5079     img->shdr[1].sh_size = buf_size;
5080 
5081     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5082     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5083 
5084     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5085     img->shdr[4].sh_size = debug_frame_size;
5086 
5087     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5088     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5089 
5090     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5091     img->sym[1].st_value = buf;
5092     img->sym[1].st_size = buf_size;
5093 
5094     img->di.cu_low_pc = buf;
5095     img->di.cu_high_pc = buf + buf_size;
5096     img->di.fn_low_pc = buf;
5097     img->di.fn_high_pc = buf + buf_size;
5098 
5099     dfh = (DebugFrameHeader *)(img + 1);
5100     memcpy(dfh, debug_frame, debug_frame_size);
5101     dfh->fde.func_start = buf;
5102     dfh->fde.func_len = buf_size;
5103 
5104 #ifdef DEBUG_JIT
5105     /* Enable this block to be able to debug the ELF image file creation.
5106        One can use readelf, objdump, or other inspection utilities.  */
5107     {
5108         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5109         FILE *f = fopen(jit, "w+b");
5110         if (f) {
5111             if (fwrite(img, img_size, 1, f) != img_size) {
5112                 /* Avoid stupid unused return value warning for fwrite.  */
5113             }
5114             fclose(f);
5115         }
5116     }
5117 #endif
5118 
5119     one_entry.symfile_addr = img;
5120     one_entry.symfile_size = img_size;
5121 
5122     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5123     __jit_debug_descriptor.relevant_entry = &one_entry;
5124     __jit_debug_descriptor.first_entry = &one_entry;
5125     __jit_debug_register_code();
5126 }
5127 #else
5128 /* No support for the feature.  Provide the entry point expected by exec.c,
5129    and implement the internal function we declared earlier.  */
5130 
5131 static void tcg_register_jit_int(const void *buf, size_t size,
5132                                  const void *debug_frame,
5133                                  size_t debug_frame_size)
5134 {
5135 }
5136 
5137 void tcg_register_jit(const void *buf, size_t buf_size)
5138 {
5139 }
5140 #endif /* ELF_HOST_MACHINE */
5141 
5142 #if !TCG_TARGET_MAYBE_vec
5143 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5144 {
5145     g_assert_not_reached();
5146 }
5147 #endif
5148