xref: /openbmc/qemu/tcg/tcg.c (revision 78817d3b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #if HOST_BIG_ENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg/tcg-ldst.h"
62 #include "tcg-internal.h"
63 #include "accel/tcg/perf.h"
64 
65 /* Forward declarations for functions declared in tcg-target.c.inc and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(const void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
102                        intptr_t arg2);
103 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105                          TCGReg ret, tcg_target_long arg);
106 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
108 static void tcg_out_goto_tb(TCGContext *s, int which);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110                        const TCGArg args[TCG_MAX_OP_ARGS],
111                        const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120                            unsigned vecl, unsigned vece,
121                            const TCGArg args[TCG_MAX_OP_ARGS],
122                            const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125                                    TCGReg dst, TCGReg src)
126 {
127     g_assert_not_reached();
128 }
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130                                     TCGReg dst, TCGReg base, intptr_t offset)
131 {
132     g_assert_not_reached();
133 }
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135                                     TCGReg dst, int64_t arg)
136 {
137     g_assert_not_reached();
138 }
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                                   unsigned vecl, unsigned vece,
141                                   const TCGArg args[TCG_MAX_OP_ARGS],
142                                   const int const_args[TCG_MAX_OP_ARGS])
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152                          const TCGHelperInfo *info);
153 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
154 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 TCGContext tcg_init_ctx;
160 __thread TCGContext *tcg_ctx;
161 
162 TCGContext **tcg_ctxs;
163 unsigned int tcg_cur_ctxs;
164 unsigned int tcg_max_ctxs;
165 TCGv_env cpu_env = 0;
166 const void *tcg_code_gen_epilogue;
167 uintptr_t tcg_splitwx_diff;
168 
169 #ifndef CONFIG_TCG_INTERPRETER
170 tcg_prologue_fn *tcg_qemu_tb_exec;
171 #endif
172 
173 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
174 static TCGRegSet tcg_target_call_clobber_regs;
175 
176 #if TCG_TARGET_INSN_UNIT_SIZE == 1
177 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
178 {
179     *s->code_ptr++ = v;
180 }
181 
182 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
183                                                       uint8_t v)
184 {
185     *p = v;
186 }
187 #endif
188 
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
190 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
191 {
192     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
193         *s->code_ptr++ = v;
194     } else {
195         tcg_insn_unit *p = s->code_ptr;
196         memcpy(p, &v, sizeof(v));
197         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
198     }
199 }
200 
201 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
202                                                        uint16_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
205         *p = v;
206     } else {
207         memcpy(p, &v, sizeof(v));
208     }
209 }
210 #endif
211 
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
213 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
214 {
215     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
216         *s->code_ptr++ = v;
217     } else {
218         tcg_insn_unit *p = s->code_ptr;
219         memcpy(p, &v, sizeof(v));
220         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
221     }
222 }
223 
224 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
225                                                        uint32_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
228         *p = v;
229     } else {
230         memcpy(p, &v, sizeof(v));
231     }
232 }
233 #endif
234 
235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
236 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
239         *s->code_ptr++ = v;
240     } else {
241         tcg_insn_unit *p = s->code_ptr;
242         memcpy(p, &v, sizeof(v));
243         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
244     }
245 }
246 
247 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
248                                                        uint64_t v)
249 {
250     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
251         *p = v;
252     } else {
253         memcpy(p, &v, sizeof(v));
254     }
255 }
256 #endif
257 
258 /* label relocation processing */
259 
260 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
261                           TCGLabel *l, intptr_t addend)
262 {
263     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
264 
265     r->type = type;
266     r->ptr = code_ptr;
267     r->addend = addend;
268     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
269 }
270 
271 static void tcg_out_label(TCGContext *s, TCGLabel *l)
272 {
273     tcg_debug_assert(!l->has_value);
274     l->has_value = 1;
275     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
276 }
277 
278 TCGLabel *gen_new_label(void)
279 {
280     TCGContext *s = tcg_ctx;
281     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
282 
283     memset(l, 0, sizeof(TCGLabel));
284     l->id = s->nb_labels++;
285     QSIMPLEQ_INIT(&l->relocs);
286 
287     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
288 
289     return l;
290 }
291 
292 static bool tcg_resolve_relocs(TCGContext *s)
293 {
294     TCGLabel *l;
295 
296     QSIMPLEQ_FOREACH(l, &s->labels, next) {
297         TCGRelocation *r;
298         uintptr_t value = l->u.value;
299 
300         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
301             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
302                 return false;
303             }
304         }
305     }
306     return true;
307 }
308 
309 static void set_jmp_reset_offset(TCGContext *s, int which)
310 {
311     /*
312      * We will check for overflow at the end of the opcode loop in
313      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
314      */
315     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
316 }
317 
318 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
319 {
320     /*
321      * We will check for overflow at the end of the opcode loop in
322      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
323      */
324     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
325 }
326 
327 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
328 {
329     /*
330      * Return the read-execute version of the pointer, for the benefit
331      * of any pc-relative addressing mode.
332      */
333     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
334 }
335 
336 /* Signal overflow, starting over with fewer guest insns. */
337 static G_NORETURN
338 void tcg_raise_tb_overflow(TCGContext *s)
339 {
340     siglongjmp(s->jmp_trans, -2);
341 }
342 
343 #define C_PFX1(P, A)                    P##A
344 #define C_PFX2(P, A, B)                 P##A##_##B
345 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
346 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
347 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
348 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
349 
350 /* Define an enumeration for the various combinations. */
351 
352 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
353 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
354 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
355 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
356 
357 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
358 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
359 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
360 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
361 
362 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
363 
364 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
365 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
366 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
368 
369 typedef enum {
370 #include "tcg-target-con-set.h"
371 } TCGConstraintSetIndex;
372 
373 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
374 
375 #undef C_O0_I1
376 #undef C_O0_I2
377 #undef C_O0_I3
378 #undef C_O0_I4
379 #undef C_O1_I1
380 #undef C_O1_I2
381 #undef C_O1_I3
382 #undef C_O1_I4
383 #undef C_N1_I2
384 #undef C_O2_I1
385 #undef C_O2_I2
386 #undef C_O2_I3
387 #undef C_O2_I4
388 
389 /* Put all of the constraint sets into an array, indexed by the enum. */
390 
391 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
392 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
393 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
394 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
395 
396 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
397 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
398 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
399 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
400 
401 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
402 
403 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
404 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
405 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
407 
408 static const TCGTargetOpDef constraint_sets[] = {
409 #include "tcg-target-con-set.h"
410 };
411 
412 
413 #undef C_O0_I1
414 #undef C_O0_I2
415 #undef C_O0_I3
416 #undef C_O0_I4
417 #undef C_O1_I1
418 #undef C_O1_I2
419 #undef C_O1_I3
420 #undef C_O1_I4
421 #undef C_N1_I2
422 #undef C_O2_I1
423 #undef C_O2_I2
424 #undef C_O2_I3
425 #undef C_O2_I4
426 
427 /* Expand the enumerator to be returned from tcg_target_op_def(). */
428 
429 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
430 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
431 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
432 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
433 
434 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
435 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
436 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
437 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
438 
439 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
440 
441 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
442 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
443 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
445 
446 #include "tcg-target.c.inc"
447 
448 static void alloc_tcg_plugin_context(TCGContext *s)
449 {
450 #ifdef CONFIG_PLUGIN
451     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
452     s->plugin_tb->insns =
453         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
454 #endif
455 }
456 
457 /*
458  * All TCG threads except the parent (i.e. the one that called tcg_context_init
459  * and registered the target's TCG globals) must register with this function
460  * before initiating translation.
461  *
462  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
463  * of tcg_region_init() for the reasoning behind this.
464  *
465  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
466  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
467  * is not used anymore for translation once this function is called.
468  *
469  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
470  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
471  */
472 #ifdef CONFIG_USER_ONLY
473 void tcg_register_thread(void)
474 {
475     tcg_ctx = &tcg_init_ctx;
476 }
477 #else
478 void tcg_register_thread(void)
479 {
480     TCGContext *s = g_malloc(sizeof(*s));
481     unsigned int i, n;
482 
483     *s = tcg_init_ctx;
484 
485     /* Relink mem_base.  */
486     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
487         if (tcg_init_ctx.temps[i].mem_base) {
488             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
489             tcg_debug_assert(b >= 0 && b < n);
490             s->temps[i].mem_base = &s->temps[b];
491         }
492     }
493 
494     /* Claim an entry in tcg_ctxs */
495     n = qatomic_fetch_inc(&tcg_cur_ctxs);
496     g_assert(n < tcg_max_ctxs);
497     qatomic_set(&tcg_ctxs[n], s);
498 
499     if (n > 0) {
500         alloc_tcg_plugin_context(s);
501         tcg_region_initial_alloc(s);
502     }
503 
504     tcg_ctx = s;
505 }
506 #endif /* !CONFIG_USER_ONLY */
507 
508 /* pool based memory allocation */
509 void *tcg_malloc_internal(TCGContext *s, int size)
510 {
511     TCGPool *p;
512     int pool_size;
513 
514     if (size > TCG_POOL_CHUNK_SIZE) {
515         /* big malloc: insert a new pool (XXX: could optimize) */
516         p = g_malloc(sizeof(TCGPool) + size);
517         p->size = size;
518         p->next = s->pool_first_large;
519         s->pool_first_large = p;
520         return p->data;
521     } else {
522         p = s->pool_current;
523         if (!p) {
524             p = s->pool_first;
525             if (!p)
526                 goto new_pool;
527         } else {
528             if (!p->next) {
529             new_pool:
530                 pool_size = TCG_POOL_CHUNK_SIZE;
531                 p = g_malloc(sizeof(TCGPool) + pool_size);
532                 p->size = pool_size;
533                 p->next = NULL;
534                 if (s->pool_current) {
535                     s->pool_current->next = p;
536                 } else {
537                     s->pool_first = p;
538                 }
539             } else {
540                 p = p->next;
541             }
542         }
543     }
544     s->pool_current = p;
545     s->pool_cur = p->data + size;
546     s->pool_end = p->data + p->size;
547     return p->data;
548 }
549 
550 void tcg_pool_reset(TCGContext *s)
551 {
552     TCGPool *p, *t;
553     for (p = s->pool_first_large; p; p = t) {
554         t = p->next;
555         g_free(p);
556     }
557     s->pool_first_large = NULL;
558     s->pool_cur = s->pool_end = NULL;
559     s->pool_current = NULL;
560 }
561 
562 #include "exec/helper-proto.h"
563 
564 static TCGHelperInfo all_helpers[] = {
565 #include "exec/helper-tcg.h"
566 };
567 static GHashTable *helper_table;
568 
569 #ifdef CONFIG_TCG_INTERPRETER
570 static ffi_type *typecode_to_ffi(int argmask)
571 {
572     /*
573      * libffi does not support __int128_t, so we have forced Int128
574      * to use the structure definition instead of the builtin type.
575      */
576     static ffi_type *ffi_type_i128_elements[3] = {
577         &ffi_type_uint64,
578         &ffi_type_uint64,
579         NULL
580     };
581     static ffi_type ffi_type_i128 = {
582         .size = 16,
583         .alignment = __alignof__(Int128),
584         .type = FFI_TYPE_STRUCT,
585         .elements = ffi_type_i128_elements,
586     };
587 
588     switch (argmask) {
589     case dh_typecode_void:
590         return &ffi_type_void;
591     case dh_typecode_i32:
592         return &ffi_type_uint32;
593     case dh_typecode_s32:
594         return &ffi_type_sint32;
595     case dh_typecode_i64:
596         return &ffi_type_uint64;
597     case dh_typecode_s64:
598         return &ffi_type_sint64;
599     case dh_typecode_ptr:
600         return &ffi_type_pointer;
601     case dh_typecode_i128:
602         return &ffi_type_i128;
603     }
604     g_assert_not_reached();
605 }
606 
607 static void init_ffi_layouts(void)
608 {
609     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
610     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
611 
612     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         TCGHelperInfo *info = &all_helpers[i];
614         unsigned typemask = info->typemask;
615         gpointer hash = (gpointer)(uintptr_t)typemask;
616         struct {
617             ffi_cif cif;
618             ffi_type *args[];
619         } *ca;
620         ffi_status status;
621         int nargs;
622         ffi_cif *cif;
623 
624         cif = g_hash_table_lookup(ffi_table, hash);
625         if (cif) {
626             info->cif = cif;
627             continue;
628         }
629 
630         /* Ignoring the return type, find the last non-zero field. */
631         nargs = 32 - clz32(typemask >> 3);
632         nargs = DIV_ROUND_UP(nargs, 3);
633         assert(nargs <= MAX_CALL_IARGS);
634 
635         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
636         ca->cif.rtype = typecode_to_ffi(typemask & 7);
637         ca->cif.nargs = nargs;
638 
639         if (nargs != 0) {
640             ca->cif.arg_types = ca->args;
641             for (int j = 0; j < nargs; ++j) {
642                 int typecode = extract32(typemask, (j + 1) * 3, 3);
643                 ca->args[j] = typecode_to_ffi(typecode);
644             }
645         }
646 
647         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
648                               ca->cif.rtype, ca->cif.arg_types);
649         assert(status == FFI_OK);
650 
651         cif = &ca->cif;
652         info->cif = cif;
653         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
654     }
655 
656     g_hash_table_destroy(ffi_table);
657 }
658 #endif /* CONFIG_TCG_INTERPRETER */
659 
660 typedef struct TCGCumulativeArgs {
661     int arg_idx;                /* tcg_gen_callN args[] */
662     int info_in_idx;            /* TCGHelperInfo in[] */
663     int arg_slot;               /* regs+stack slot */
664     int ref_slot;               /* stack slots for references */
665 } TCGCumulativeArgs;
666 
667 static void layout_arg_even(TCGCumulativeArgs *cum)
668 {
669     cum->arg_slot += cum->arg_slot & 1;
670 }
671 
672 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
673                          TCGCallArgumentKind kind)
674 {
675     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
676 
677     *loc = (TCGCallArgumentLoc){
678         .kind = kind,
679         .arg_idx = cum->arg_idx,
680         .arg_slot = cum->arg_slot,
681     };
682     cum->info_in_idx++;
683     cum->arg_slot++;
684 }
685 
686 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
687                                 TCGHelperInfo *info, int n)
688 {
689     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
690 
691     for (int i = 0; i < n; ++i) {
692         /* Layout all using the same arg_idx, adjusting the subindex. */
693         loc[i] = (TCGCallArgumentLoc){
694             .kind = TCG_CALL_ARG_NORMAL,
695             .arg_idx = cum->arg_idx,
696             .tmp_subindex = i,
697             .arg_slot = cum->arg_slot + i,
698         };
699     }
700     cum->info_in_idx += n;
701     cum->arg_slot += n;
702 }
703 
704 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
705 {
706     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
707     int n = 128 / TCG_TARGET_REG_BITS;
708 
709     /* The first subindex carries the pointer. */
710     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
711 
712     /*
713      * The callee is allowed to clobber memory associated with
714      * structure pass by-reference.  Therefore we must make copies.
715      * Allocate space from "ref_slot", which will be adjusted to
716      * follow the parameters on the stack.
717      */
718     loc[0].ref_slot = cum->ref_slot;
719 
720     /*
721      * Subsequent words also go into the reference slot, but
722      * do not accumulate into the regular arguments.
723      */
724     for (int i = 1; i < n; ++i) {
725         loc[i] = (TCGCallArgumentLoc){
726             .kind = TCG_CALL_ARG_BY_REF_N,
727             .arg_idx = cum->arg_idx,
728             .tmp_subindex = i,
729             .ref_slot = cum->ref_slot + i,
730         };
731     }
732     cum->info_in_idx += n;
733     cum->ref_slot += n;
734 }
735 
736 static void init_call_layout(TCGHelperInfo *info)
737 {
738     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
739     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
740     unsigned typemask = info->typemask;
741     unsigned typecode;
742     TCGCumulativeArgs cum = { };
743 
744     /*
745      * Parse and place any function return value.
746      */
747     typecode = typemask & 7;
748     switch (typecode) {
749     case dh_typecode_void:
750         info->nr_out = 0;
751         break;
752     case dh_typecode_i32:
753     case dh_typecode_s32:
754     case dh_typecode_ptr:
755         info->nr_out = 1;
756         info->out_kind = TCG_CALL_RET_NORMAL;
757         break;
758     case dh_typecode_i64:
759     case dh_typecode_s64:
760         info->nr_out = 64 / TCG_TARGET_REG_BITS;
761         info->out_kind = TCG_CALL_RET_NORMAL;
762         /* Query the last register now to trigger any assert early. */
763         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
764         break;
765     case dh_typecode_i128:
766         info->nr_out = 128 / TCG_TARGET_REG_BITS;
767         info->out_kind = TCG_TARGET_CALL_RET_I128;
768         switch (TCG_TARGET_CALL_RET_I128) {
769         case TCG_CALL_RET_NORMAL:
770             /* Query the last register now to trigger any assert early. */
771             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
772             break;
773         case TCG_CALL_RET_BY_VEC:
774             /* Query the single register now to trigger any assert early. */
775             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
776             break;
777         case TCG_CALL_RET_BY_REF:
778             /*
779              * Allocate the first argument to the output.
780              * We don't need to store this anywhere, just make it
781              * unavailable for use in the input loop below.
782              */
783             cum.arg_slot = 1;
784             break;
785         default:
786             qemu_build_not_reached();
787         }
788         break;
789     default:
790         g_assert_not_reached();
791     }
792 
793     /*
794      * Parse and place function arguments.
795      */
796     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
797         TCGCallArgumentKind kind;
798         TCGType type;
799 
800         typecode = typemask & 7;
801         switch (typecode) {
802         case dh_typecode_i32:
803         case dh_typecode_s32:
804             type = TCG_TYPE_I32;
805             break;
806         case dh_typecode_i64:
807         case dh_typecode_s64:
808             type = TCG_TYPE_I64;
809             break;
810         case dh_typecode_ptr:
811             type = TCG_TYPE_PTR;
812             break;
813         case dh_typecode_i128:
814             type = TCG_TYPE_I128;
815             break;
816         default:
817             g_assert_not_reached();
818         }
819 
820         switch (type) {
821         case TCG_TYPE_I32:
822             switch (TCG_TARGET_CALL_ARG_I32) {
823             case TCG_CALL_ARG_EVEN:
824                 layout_arg_even(&cum);
825                 /* fall through */
826             case TCG_CALL_ARG_NORMAL:
827                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
828                 break;
829             case TCG_CALL_ARG_EXTEND:
830                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
831                 layout_arg_1(&cum, info, kind);
832                 break;
833             default:
834                 qemu_build_not_reached();
835             }
836             break;
837 
838         case TCG_TYPE_I64:
839             switch (TCG_TARGET_CALL_ARG_I64) {
840             case TCG_CALL_ARG_EVEN:
841                 layout_arg_even(&cum);
842                 /* fall through */
843             case TCG_CALL_ARG_NORMAL:
844                 if (TCG_TARGET_REG_BITS == 32) {
845                     layout_arg_normal_n(&cum, info, 2);
846                 } else {
847                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
848                 }
849                 break;
850             default:
851                 qemu_build_not_reached();
852             }
853             break;
854 
855         case TCG_TYPE_I128:
856             switch (TCG_TARGET_CALL_ARG_I128) {
857             case TCG_CALL_ARG_EVEN:
858                 layout_arg_even(&cum);
859                 /* fall through */
860             case TCG_CALL_ARG_NORMAL:
861                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
862                 break;
863             case TCG_CALL_ARG_BY_REF:
864                 layout_arg_by_ref(&cum, info);
865                 break;
866             default:
867                 qemu_build_not_reached();
868             }
869             break;
870 
871         default:
872             g_assert_not_reached();
873         }
874     }
875     info->nr_in = cum.info_in_idx;
876 
877     /* Validate that we didn't overrun the input array. */
878     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
879     /* Validate the backend has enough argument space. */
880     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
881 
882     /*
883      * Relocate the "ref_slot" area to the end of the parameters.
884      * Minimizing this stack offset helps code size for x86,
885      * which has a signed 8-bit offset encoding.
886      */
887     if (cum.ref_slot != 0) {
888         int ref_base = 0;
889 
890         if (cum.arg_slot > max_reg_slots) {
891             int align = __alignof(Int128) / sizeof(tcg_target_long);
892 
893             ref_base = cum.arg_slot - max_reg_slots;
894             if (align > 1) {
895                 ref_base = ROUND_UP(ref_base, align);
896             }
897         }
898         assert(ref_base + cum.ref_slot <= max_stk_slots);
899 
900         if (ref_base != 0) {
901             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
902                 TCGCallArgumentLoc *loc = &info->in[i];
903                 switch (loc->kind) {
904                 case TCG_CALL_ARG_BY_REF:
905                 case TCG_CALL_ARG_BY_REF_N:
906                     loc->ref_slot += ref_base;
907                     break;
908                 default:
909                     break;
910                 }
911             }
912         }
913     }
914 }
915 
916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
917 static void process_op_defs(TCGContext *s);
918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
919                                             TCGReg reg, const char *name);
920 
921 static void tcg_context_init(unsigned max_cpus)
922 {
923     TCGContext *s = &tcg_init_ctx;
924     int op, total_args, n, i;
925     TCGOpDef *def;
926     TCGArgConstraint *args_ct;
927     TCGTemp *ts;
928 
929     memset(s, 0, sizeof(*s));
930     s->nb_globals = 0;
931 
932     /* Count total number of arguments and allocate the corresponding
933        space */
934     total_args = 0;
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         n = def->nb_iargs + def->nb_oargs;
938         total_args += n;
939     }
940 
941     args_ct = g_new0(TCGArgConstraint, total_args);
942 
943     for(op = 0; op < NB_OPS; op++) {
944         def = &tcg_op_defs[op];
945         def->args_ct = args_ct;
946         n = def->nb_iargs + def->nb_oargs;
947         args_ct += n;
948     }
949 
950     /* Register helpers.  */
951     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
952     helper_table = g_hash_table_new(NULL, NULL);
953 
954     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
955         init_call_layout(&all_helpers[i]);
956         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
957                             (gpointer)&all_helpers[i]);
958     }
959 
960 #ifdef CONFIG_TCG_INTERPRETER
961     init_ffi_layouts();
962 #endif
963 
964     tcg_target_init(s);
965     process_op_defs(s);
966 
967     /* Reverse the order of the saved registers, assuming they're all at
968        the start of tcg_target_reg_alloc_order.  */
969     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
970         int r = tcg_target_reg_alloc_order[n];
971         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
972             break;
973         }
974     }
975     for (i = 0; i < n; ++i) {
976         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
977     }
978     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
979         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
980     }
981 
982     alloc_tcg_plugin_context(s);
983 
984     tcg_ctx = s;
985     /*
986      * In user-mode we simply share the init context among threads, since we
987      * use a single region. See the documentation tcg_region_init() for the
988      * reasoning behind this.
989      * In softmmu we will have at most max_cpus TCG threads.
990      */
991 #ifdef CONFIG_USER_ONLY
992     tcg_ctxs = &tcg_ctx;
993     tcg_cur_ctxs = 1;
994     tcg_max_ctxs = 1;
995 #else
996     tcg_max_ctxs = max_cpus;
997     tcg_ctxs = g_new0(TCGContext *, max_cpus);
998 #endif
999 
1000     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1001     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1002     cpu_env = temp_tcgv_ptr(ts);
1003 }
1004 
1005 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1006 {
1007     tcg_context_init(max_cpus);
1008     tcg_region_init(tb_size, splitwx, max_cpus);
1009 }
1010 
1011 /*
1012  * Allocate TBs right before their corresponding translated code, making
1013  * sure that TBs and code are on different cache lines.
1014  */
1015 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1016 {
1017     uintptr_t align = qemu_icache_linesize;
1018     TranslationBlock *tb;
1019     void *next;
1020 
1021  retry:
1022     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1023     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1024 
1025     if (unlikely(next > s->code_gen_highwater)) {
1026         if (tcg_region_alloc(s)) {
1027             return NULL;
1028         }
1029         goto retry;
1030     }
1031     qatomic_set(&s->code_gen_ptr, next);
1032     s->data_gen_ptr = NULL;
1033     return tb;
1034 }
1035 
1036 void tcg_prologue_init(TCGContext *s)
1037 {
1038     size_t prologue_size;
1039 
1040     s->code_ptr = s->code_gen_ptr;
1041     s->code_buf = s->code_gen_ptr;
1042     s->data_gen_ptr = NULL;
1043 
1044 #ifndef CONFIG_TCG_INTERPRETER
1045     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1046 #endif
1047 
1048 #ifdef TCG_TARGET_NEED_POOL_LABELS
1049     s->pool_labels = NULL;
1050 #endif
1051 
1052     qemu_thread_jit_write();
1053     /* Generate the prologue.  */
1054     tcg_target_qemu_prologue(s);
1055 
1056 #ifdef TCG_TARGET_NEED_POOL_LABELS
1057     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1058     {
1059         int result = tcg_out_pool_finalize(s);
1060         tcg_debug_assert(result == 0);
1061     }
1062 #endif
1063 
1064     prologue_size = tcg_current_code_size(s);
1065     perf_report_prologue(s->code_gen_ptr, prologue_size);
1066 
1067 #ifndef CONFIG_TCG_INTERPRETER
1068     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1069                         (uintptr_t)s->code_buf, prologue_size);
1070 #endif
1071 
1072 #ifdef DEBUG_DISAS
1073     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1074         FILE *logfile = qemu_log_trylock();
1075         if (logfile) {
1076             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1077             if (s->data_gen_ptr) {
1078                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1079                 size_t data_size = prologue_size - code_size;
1080                 size_t i;
1081 
1082                 disas(logfile, s->code_gen_ptr, code_size);
1083 
1084                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1085                     if (sizeof(tcg_target_ulong) == 8) {
1086                         fprintf(logfile,
1087                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1088                                 (uintptr_t)s->data_gen_ptr + i,
1089                                 *(uint64_t *)(s->data_gen_ptr + i));
1090                     } else {
1091                         fprintf(logfile,
1092                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1093                                 (uintptr_t)s->data_gen_ptr + i,
1094                                 *(uint32_t *)(s->data_gen_ptr + i));
1095                     }
1096                 }
1097             } else {
1098                 disas(logfile, s->code_gen_ptr, prologue_size);
1099             }
1100             fprintf(logfile, "\n");
1101             qemu_log_unlock(logfile);
1102         }
1103     }
1104 #endif
1105 
1106 #ifndef CONFIG_TCG_INTERPRETER
1107     /*
1108      * Assert that goto_ptr is implemented completely, setting an epilogue.
1109      * For tci, we use NULL as the signal to return from the interpreter,
1110      * so skip this check.
1111      */
1112     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1113 #endif
1114 
1115     tcg_region_prologue_set(s);
1116 }
1117 
1118 void tcg_func_start(TCGContext *s)
1119 {
1120     tcg_pool_reset(s);
1121     s->nb_temps = s->nb_globals;
1122 
1123     /* No temps have been previously allocated for size or locality.  */
1124     memset(s->free_temps, 0, sizeof(s->free_temps));
1125 
1126     /* No constant temps have been previously allocated. */
1127     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1128         if (s->const_table[i]) {
1129             g_hash_table_remove_all(s->const_table[i]);
1130         }
1131     }
1132 
1133     s->nb_ops = 0;
1134     s->nb_labels = 0;
1135     s->current_frame_offset = s->frame_start;
1136 
1137 #ifdef CONFIG_DEBUG_TCG
1138     s->goto_tb_issue_mask = 0;
1139 #endif
1140 
1141     QTAILQ_INIT(&s->ops);
1142     QTAILQ_INIT(&s->free_ops);
1143     QSIMPLEQ_INIT(&s->labels);
1144 }
1145 
1146 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1147 {
1148     int n = s->nb_temps++;
1149 
1150     if (n >= TCG_MAX_TEMPS) {
1151         tcg_raise_tb_overflow(s);
1152     }
1153     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1154 }
1155 
1156 static TCGTemp *tcg_global_alloc(TCGContext *s)
1157 {
1158     TCGTemp *ts;
1159 
1160     tcg_debug_assert(s->nb_globals == s->nb_temps);
1161     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1162     s->nb_globals++;
1163     ts = tcg_temp_alloc(s);
1164     ts->kind = TEMP_GLOBAL;
1165 
1166     return ts;
1167 }
1168 
1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1170                                             TCGReg reg, const char *name)
1171 {
1172     TCGTemp *ts;
1173 
1174     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1175         tcg_abort();
1176     }
1177 
1178     ts = tcg_global_alloc(s);
1179     ts->base_type = type;
1180     ts->type = type;
1181     ts->kind = TEMP_FIXED;
1182     ts->reg = reg;
1183     ts->name = name;
1184     tcg_regset_set_reg(s->reserved_regs, reg);
1185 
1186     return ts;
1187 }
1188 
1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1190 {
1191     s->frame_start = start;
1192     s->frame_end = start + size;
1193     s->frame_temp
1194         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1195 }
1196 
1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1198                                      intptr_t offset, const char *name)
1199 {
1200     TCGContext *s = tcg_ctx;
1201     TCGTemp *base_ts = tcgv_ptr_temp(base);
1202     TCGTemp *ts = tcg_global_alloc(s);
1203     int indirect_reg = 0;
1204 
1205     switch (base_ts->kind) {
1206     case TEMP_FIXED:
1207         break;
1208     case TEMP_GLOBAL:
1209         /* We do not support double-indirect registers.  */
1210         tcg_debug_assert(!base_ts->indirect_reg);
1211         base_ts->indirect_base = 1;
1212         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1213                             ? 2 : 1);
1214         indirect_reg = 1;
1215         break;
1216     default:
1217         g_assert_not_reached();
1218     }
1219 
1220     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1221         TCGTemp *ts2 = tcg_global_alloc(s);
1222         char buf[64];
1223 
1224         ts->base_type = TCG_TYPE_I64;
1225         ts->type = TCG_TYPE_I32;
1226         ts->indirect_reg = indirect_reg;
1227         ts->mem_allocated = 1;
1228         ts->mem_base = base_ts;
1229         ts->mem_offset = offset;
1230         pstrcpy(buf, sizeof(buf), name);
1231         pstrcat(buf, sizeof(buf), "_0");
1232         ts->name = strdup(buf);
1233 
1234         tcg_debug_assert(ts2 == ts + 1);
1235         ts2->base_type = TCG_TYPE_I64;
1236         ts2->type = TCG_TYPE_I32;
1237         ts2->indirect_reg = indirect_reg;
1238         ts2->mem_allocated = 1;
1239         ts2->mem_base = base_ts;
1240         ts2->mem_offset = offset + 4;
1241         ts2->temp_subindex = 1;
1242         pstrcpy(buf, sizeof(buf), name);
1243         pstrcat(buf, sizeof(buf), "_1");
1244         ts2->name = strdup(buf);
1245     } else {
1246         ts->base_type = type;
1247         ts->type = type;
1248         ts->indirect_reg = indirect_reg;
1249         ts->mem_allocated = 1;
1250         ts->mem_base = base_ts;
1251         ts->mem_offset = offset;
1252         ts->name = name;
1253     }
1254     return ts;
1255 }
1256 
1257 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1258 {
1259     TCGContext *s = tcg_ctx;
1260     TCGTemp *ts;
1261     int n;
1262 
1263     if (kind == TEMP_EBB) {
1264         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1265 
1266         if (idx < TCG_MAX_TEMPS) {
1267             /* There is already an available temp with the right type.  */
1268             clear_bit(idx, s->free_temps[type].l);
1269 
1270             ts = &s->temps[idx];
1271             ts->temp_allocated = 1;
1272             tcg_debug_assert(ts->base_type == type);
1273             tcg_debug_assert(ts->kind == kind);
1274             goto done;
1275         }
1276     } else {
1277         tcg_debug_assert(kind == TEMP_TB);
1278     }
1279 
1280     switch (type) {
1281     case TCG_TYPE_I32:
1282     case TCG_TYPE_V64:
1283     case TCG_TYPE_V128:
1284     case TCG_TYPE_V256:
1285         n = 1;
1286         break;
1287     case TCG_TYPE_I64:
1288         n = 64 / TCG_TARGET_REG_BITS;
1289         break;
1290     case TCG_TYPE_I128:
1291         n = 128 / TCG_TARGET_REG_BITS;
1292         break;
1293     default:
1294         g_assert_not_reached();
1295     }
1296 
1297     ts = tcg_temp_alloc(s);
1298     ts->base_type = type;
1299     ts->temp_allocated = 1;
1300     ts->kind = kind;
1301 
1302     if (n == 1) {
1303         ts->type = type;
1304     } else {
1305         ts->type = TCG_TYPE_REG;
1306 
1307         for (int i = 1; i < n; ++i) {
1308             TCGTemp *ts2 = tcg_temp_alloc(s);
1309 
1310             tcg_debug_assert(ts2 == ts + i);
1311             ts2->base_type = type;
1312             ts2->type = TCG_TYPE_REG;
1313             ts2->temp_allocated = 1;
1314             ts2->temp_subindex = i;
1315             ts2->kind = kind;
1316         }
1317     }
1318 
1319  done:
1320 #if defined(CONFIG_DEBUG_TCG)
1321     s->temps_in_use++;
1322 #endif
1323     return ts;
1324 }
1325 
1326 TCGv_vec tcg_temp_new_vec(TCGType type)
1327 {
1328     TCGTemp *t;
1329 
1330 #ifdef CONFIG_DEBUG_TCG
1331     switch (type) {
1332     case TCG_TYPE_V64:
1333         assert(TCG_TARGET_HAS_v64);
1334         break;
1335     case TCG_TYPE_V128:
1336         assert(TCG_TARGET_HAS_v128);
1337         break;
1338     case TCG_TYPE_V256:
1339         assert(TCG_TARGET_HAS_v256);
1340         break;
1341     default:
1342         g_assert_not_reached();
1343     }
1344 #endif
1345 
1346     t = tcg_temp_new_internal(type, TEMP_EBB);
1347     return temp_tcgv_vec(t);
1348 }
1349 
1350 /* Create a new temp of the same type as an existing temp.  */
1351 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1352 {
1353     TCGTemp *t = tcgv_vec_temp(match);
1354 
1355     tcg_debug_assert(t->temp_allocated != 0);
1356 
1357     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1358     return temp_tcgv_vec(t);
1359 }
1360 
1361 void tcg_temp_free_internal(TCGTemp *ts)
1362 {
1363     TCGContext *s = tcg_ctx;
1364 
1365     switch (ts->kind) {
1366     case TEMP_CONST:
1367         /*
1368          * In order to simplify users of tcg_constant_*,
1369          * silently ignore free.
1370          */
1371         return;
1372     case TEMP_EBB:
1373     case TEMP_TB:
1374         break;
1375     default:
1376         g_assert_not_reached();
1377     }
1378 
1379     tcg_debug_assert(ts->temp_allocated != 0);
1380     ts->temp_allocated = 0;
1381 
1382 #if defined(CONFIG_DEBUG_TCG)
1383     assert(s->temps_in_use > 0);
1384     s->temps_in_use--;
1385 #endif
1386 
1387     if (ts->kind == TEMP_EBB) {
1388         int idx = temp_idx(ts);
1389         set_bit(idx, s->free_temps[ts->base_type].l);
1390     }
1391 }
1392 
1393 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1394 {
1395     TCGContext *s = tcg_ctx;
1396     GHashTable *h = s->const_table[type];
1397     TCGTemp *ts;
1398 
1399     if (h == NULL) {
1400         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1401         s->const_table[type] = h;
1402     }
1403 
1404     ts = g_hash_table_lookup(h, &val);
1405     if (ts == NULL) {
1406         int64_t *val_ptr;
1407 
1408         ts = tcg_temp_alloc(s);
1409 
1410         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1411             TCGTemp *ts2 = tcg_temp_alloc(s);
1412 
1413             tcg_debug_assert(ts2 == ts + 1);
1414 
1415             ts->base_type = TCG_TYPE_I64;
1416             ts->type = TCG_TYPE_I32;
1417             ts->kind = TEMP_CONST;
1418             ts->temp_allocated = 1;
1419 
1420             ts2->base_type = TCG_TYPE_I64;
1421             ts2->type = TCG_TYPE_I32;
1422             ts2->kind = TEMP_CONST;
1423             ts2->temp_allocated = 1;
1424             ts2->temp_subindex = 1;
1425 
1426             /*
1427              * Retain the full value of the 64-bit constant in the low
1428              * part, so that the hash table works.  Actual uses will
1429              * truncate the value to the low part.
1430              */
1431             ts[HOST_BIG_ENDIAN].val = val;
1432             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1433             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1434         } else {
1435             ts->base_type = type;
1436             ts->type = type;
1437             ts->kind = TEMP_CONST;
1438             ts->temp_allocated = 1;
1439             ts->val = val;
1440             val_ptr = &ts->val;
1441         }
1442         g_hash_table_insert(h, val_ptr, ts);
1443     }
1444 
1445     return ts;
1446 }
1447 
1448 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1449 {
1450     val = dup_const(vece, val);
1451     return temp_tcgv_vec(tcg_constant_internal(type, val));
1452 }
1453 
1454 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1455 {
1456     TCGTemp *t = tcgv_vec_temp(match);
1457 
1458     tcg_debug_assert(t->temp_allocated != 0);
1459     return tcg_constant_vec(t->base_type, vece, val);
1460 }
1461 
1462 TCGv_i32 tcg_const_i32(int32_t val)
1463 {
1464     TCGv_i32 t0;
1465     t0 = tcg_temp_new_i32();
1466     tcg_gen_movi_i32(t0, val);
1467     return t0;
1468 }
1469 
1470 TCGv_i64 tcg_const_i64(int64_t val)
1471 {
1472     TCGv_i64 t0;
1473     t0 = tcg_temp_new_i64();
1474     tcg_gen_movi_i64(t0, val);
1475     return t0;
1476 }
1477 
1478 TCGv_i32 tcg_const_local_i32(int32_t val)
1479 {
1480     TCGv_i32 t0;
1481     t0 = tcg_temp_local_new_i32();
1482     tcg_gen_movi_i32(t0, val);
1483     return t0;
1484 }
1485 
1486 TCGv_i64 tcg_const_local_i64(int64_t val)
1487 {
1488     TCGv_i64 t0;
1489     t0 = tcg_temp_local_new_i64();
1490     tcg_gen_movi_i64(t0, val);
1491     return t0;
1492 }
1493 
1494 #if defined(CONFIG_DEBUG_TCG)
1495 void tcg_clear_temp_count(void)
1496 {
1497     TCGContext *s = tcg_ctx;
1498     s->temps_in_use = 0;
1499 }
1500 
1501 int tcg_check_temp_count(void)
1502 {
1503     TCGContext *s = tcg_ctx;
1504     if (s->temps_in_use) {
1505         /* Clear the count so that we don't give another
1506          * warning immediately next time around.
1507          */
1508         s->temps_in_use = 0;
1509         return 1;
1510     }
1511     return 0;
1512 }
1513 #endif
1514 
1515 /* Return true if OP may appear in the opcode stream.
1516    Test the runtime variable that controls each opcode.  */
1517 bool tcg_op_supported(TCGOpcode op)
1518 {
1519     const bool have_vec
1520         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1521 
1522     switch (op) {
1523     case INDEX_op_discard:
1524     case INDEX_op_set_label:
1525     case INDEX_op_call:
1526     case INDEX_op_br:
1527     case INDEX_op_mb:
1528     case INDEX_op_insn_start:
1529     case INDEX_op_exit_tb:
1530     case INDEX_op_goto_tb:
1531     case INDEX_op_goto_ptr:
1532     case INDEX_op_qemu_ld_i32:
1533     case INDEX_op_qemu_st_i32:
1534     case INDEX_op_qemu_ld_i64:
1535     case INDEX_op_qemu_st_i64:
1536         return true;
1537 
1538     case INDEX_op_qemu_st8_i32:
1539         return TCG_TARGET_HAS_qemu_st8_i32;
1540 
1541     case INDEX_op_mov_i32:
1542     case INDEX_op_setcond_i32:
1543     case INDEX_op_brcond_i32:
1544     case INDEX_op_ld8u_i32:
1545     case INDEX_op_ld8s_i32:
1546     case INDEX_op_ld16u_i32:
1547     case INDEX_op_ld16s_i32:
1548     case INDEX_op_ld_i32:
1549     case INDEX_op_st8_i32:
1550     case INDEX_op_st16_i32:
1551     case INDEX_op_st_i32:
1552     case INDEX_op_add_i32:
1553     case INDEX_op_sub_i32:
1554     case INDEX_op_mul_i32:
1555     case INDEX_op_and_i32:
1556     case INDEX_op_or_i32:
1557     case INDEX_op_xor_i32:
1558     case INDEX_op_shl_i32:
1559     case INDEX_op_shr_i32:
1560     case INDEX_op_sar_i32:
1561         return true;
1562 
1563     case INDEX_op_movcond_i32:
1564         return TCG_TARGET_HAS_movcond_i32;
1565     case INDEX_op_div_i32:
1566     case INDEX_op_divu_i32:
1567         return TCG_TARGET_HAS_div_i32;
1568     case INDEX_op_rem_i32:
1569     case INDEX_op_remu_i32:
1570         return TCG_TARGET_HAS_rem_i32;
1571     case INDEX_op_div2_i32:
1572     case INDEX_op_divu2_i32:
1573         return TCG_TARGET_HAS_div2_i32;
1574     case INDEX_op_rotl_i32:
1575     case INDEX_op_rotr_i32:
1576         return TCG_TARGET_HAS_rot_i32;
1577     case INDEX_op_deposit_i32:
1578         return TCG_TARGET_HAS_deposit_i32;
1579     case INDEX_op_extract_i32:
1580         return TCG_TARGET_HAS_extract_i32;
1581     case INDEX_op_sextract_i32:
1582         return TCG_TARGET_HAS_sextract_i32;
1583     case INDEX_op_extract2_i32:
1584         return TCG_TARGET_HAS_extract2_i32;
1585     case INDEX_op_add2_i32:
1586         return TCG_TARGET_HAS_add2_i32;
1587     case INDEX_op_sub2_i32:
1588         return TCG_TARGET_HAS_sub2_i32;
1589     case INDEX_op_mulu2_i32:
1590         return TCG_TARGET_HAS_mulu2_i32;
1591     case INDEX_op_muls2_i32:
1592         return TCG_TARGET_HAS_muls2_i32;
1593     case INDEX_op_muluh_i32:
1594         return TCG_TARGET_HAS_muluh_i32;
1595     case INDEX_op_mulsh_i32:
1596         return TCG_TARGET_HAS_mulsh_i32;
1597     case INDEX_op_ext8s_i32:
1598         return TCG_TARGET_HAS_ext8s_i32;
1599     case INDEX_op_ext16s_i32:
1600         return TCG_TARGET_HAS_ext16s_i32;
1601     case INDEX_op_ext8u_i32:
1602         return TCG_TARGET_HAS_ext8u_i32;
1603     case INDEX_op_ext16u_i32:
1604         return TCG_TARGET_HAS_ext16u_i32;
1605     case INDEX_op_bswap16_i32:
1606         return TCG_TARGET_HAS_bswap16_i32;
1607     case INDEX_op_bswap32_i32:
1608         return TCG_TARGET_HAS_bswap32_i32;
1609     case INDEX_op_not_i32:
1610         return TCG_TARGET_HAS_not_i32;
1611     case INDEX_op_neg_i32:
1612         return TCG_TARGET_HAS_neg_i32;
1613     case INDEX_op_andc_i32:
1614         return TCG_TARGET_HAS_andc_i32;
1615     case INDEX_op_orc_i32:
1616         return TCG_TARGET_HAS_orc_i32;
1617     case INDEX_op_eqv_i32:
1618         return TCG_TARGET_HAS_eqv_i32;
1619     case INDEX_op_nand_i32:
1620         return TCG_TARGET_HAS_nand_i32;
1621     case INDEX_op_nor_i32:
1622         return TCG_TARGET_HAS_nor_i32;
1623     case INDEX_op_clz_i32:
1624         return TCG_TARGET_HAS_clz_i32;
1625     case INDEX_op_ctz_i32:
1626         return TCG_TARGET_HAS_ctz_i32;
1627     case INDEX_op_ctpop_i32:
1628         return TCG_TARGET_HAS_ctpop_i32;
1629 
1630     case INDEX_op_brcond2_i32:
1631     case INDEX_op_setcond2_i32:
1632         return TCG_TARGET_REG_BITS == 32;
1633 
1634     case INDEX_op_mov_i64:
1635     case INDEX_op_setcond_i64:
1636     case INDEX_op_brcond_i64:
1637     case INDEX_op_ld8u_i64:
1638     case INDEX_op_ld8s_i64:
1639     case INDEX_op_ld16u_i64:
1640     case INDEX_op_ld16s_i64:
1641     case INDEX_op_ld32u_i64:
1642     case INDEX_op_ld32s_i64:
1643     case INDEX_op_ld_i64:
1644     case INDEX_op_st8_i64:
1645     case INDEX_op_st16_i64:
1646     case INDEX_op_st32_i64:
1647     case INDEX_op_st_i64:
1648     case INDEX_op_add_i64:
1649     case INDEX_op_sub_i64:
1650     case INDEX_op_mul_i64:
1651     case INDEX_op_and_i64:
1652     case INDEX_op_or_i64:
1653     case INDEX_op_xor_i64:
1654     case INDEX_op_shl_i64:
1655     case INDEX_op_shr_i64:
1656     case INDEX_op_sar_i64:
1657     case INDEX_op_ext_i32_i64:
1658     case INDEX_op_extu_i32_i64:
1659         return TCG_TARGET_REG_BITS == 64;
1660 
1661     case INDEX_op_movcond_i64:
1662         return TCG_TARGET_HAS_movcond_i64;
1663     case INDEX_op_div_i64:
1664     case INDEX_op_divu_i64:
1665         return TCG_TARGET_HAS_div_i64;
1666     case INDEX_op_rem_i64:
1667     case INDEX_op_remu_i64:
1668         return TCG_TARGET_HAS_rem_i64;
1669     case INDEX_op_div2_i64:
1670     case INDEX_op_divu2_i64:
1671         return TCG_TARGET_HAS_div2_i64;
1672     case INDEX_op_rotl_i64:
1673     case INDEX_op_rotr_i64:
1674         return TCG_TARGET_HAS_rot_i64;
1675     case INDEX_op_deposit_i64:
1676         return TCG_TARGET_HAS_deposit_i64;
1677     case INDEX_op_extract_i64:
1678         return TCG_TARGET_HAS_extract_i64;
1679     case INDEX_op_sextract_i64:
1680         return TCG_TARGET_HAS_sextract_i64;
1681     case INDEX_op_extract2_i64:
1682         return TCG_TARGET_HAS_extract2_i64;
1683     case INDEX_op_extrl_i64_i32:
1684         return TCG_TARGET_HAS_extrl_i64_i32;
1685     case INDEX_op_extrh_i64_i32:
1686         return TCG_TARGET_HAS_extrh_i64_i32;
1687     case INDEX_op_ext8s_i64:
1688         return TCG_TARGET_HAS_ext8s_i64;
1689     case INDEX_op_ext16s_i64:
1690         return TCG_TARGET_HAS_ext16s_i64;
1691     case INDEX_op_ext32s_i64:
1692         return TCG_TARGET_HAS_ext32s_i64;
1693     case INDEX_op_ext8u_i64:
1694         return TCG_TARGET_HAS_ext8u_i64;
1695     case INDEX_op_ext16u_i64:
1696         return TCG_TARGET_HAS_ext16u_i64;
1697     case INDEX_op_ext32u_i64:
1698         return TCG_TARGET_HAS_ext32u_i64;
1699     case INDEX_op_bswap16_i64:
1700         return TCG_TARGET_HAS_bswap16_i64;
1701     case INDEX_op_bswap32_i64:
1702         return TCG_TARGET_HAS_bswap32_i64;
1703     case INDEX_op_bswap64_i64:
1704         return TCG_TARGET_HAS_bswap64_i64;
1705     case INDEX_op_not_i64:
1706         return TCG_TARGET_HAS_not_i64;
1707     case INDEX_op_neg_i64:
1708         return TCG_TARGET_HAS_neg_i64;
1709     case INDEX_op_andc_i64:
1710         return TCG_TARGET_HAS_andc_i64;
1711     case INDEX_op_orc_i64:
1712         return TCG_TARGET_HAS_orc_i64;
1713     case INDEX_op_eqv_i64:
1714         return TCG_TARGET_HAS_eqv_i64;
1715     case INDEX_op_nand_i64:
1716         return TCG_TARGET_HAS_nand_i64;
1717     case INDEX_op_nor_i64:
1718         return TCG_TARGET_HAS_nor_i64;
1719     case INDEX_op_clz_i64:
1720         return TCG_TARGET_HAS_clz_i64;
1721     case INDEX_op_ctz_i64:
1722         return TCG_TARGET_HAS_ctz_i64;
1723     case INDEX_op_ctpop_i64:
1724         return TCG_TARGET_HAS_ctpop_i64;
1725     case INDEX_op_add2_i64:
1726         return TCG_TARGET_HAS_add2_i64;
1727     case INDEX_op_sub2_i64:
1728         return TCG_TARGET_HAS_sub2_i64;
1729     case INDEX_op_mulu2_i64:
1730         return TCG_TARGET_HAS_mulu2_i64;
1731     case INDEX_op_muls2_i64:
1732         return TCG_TARGET_HAS_muls2_i64;
1733     case INDEX_op_muluh_i64:
1734         return TCG_TARGET_HAS_muluh_i64;
1735     case INDEX_op_mulsh_i64:
1736         return TCG_TARGET_HAS_mulsh_i64;
1737 
1738     case INDEX_op_mov_vec:
1739     case INDEX_op_dup_vec:
1740     case INDEX_op_dupm_vec:
1741     case INDEX_op_ld_vec:
1742     case INDEX_op_st_vec:
1743     case INDEX_op_add_vec:
1744     case INDEX_op_sub_vec:
1745     case INDEX_op_and_vec:
1746     case INDEX_op_or_vec:
1747     case INDEX_op_xor_vec:
1748     case INDEX_op_cmp_vec:
1749         return have_vec;
1750     case INDEX_op_dup2_vec:
1751         return have_vec && TCG_TARGET_REG_BITS == 32;
1752     case INDEX_op_not_vec:
1753         return have_vec && TCG_TARGET_HAS_not_vec;
1754     case INDEX_op_neg_vec:
1755         return have_vec && TCG_TARGET_HAS_neg_vec;
1756     case INDEX_op_abs_vec:
1757         return have_vec && TCG_TARGET_HAS_abs_vec;
1758     case INDEX_op_andc_vec:
1759         return have_vec && TCG_TARGET_HAS_andc_vec;
1760     case INDEX_op_orc_vec:
1761         return have_vec && TCG_TARGET_HAS_orc_vec;
1762     case INDEX_op_nand_vec:
1763         return have_vec && TCG_TARGET_HAS_nand_vec;
1764     case INDEX_op_nor_vec:
1765         return have_vec && TCG_TARGET_HAS_nor_vec;
1766     case INDEX_op_eqv_vec:
1767         return have_vec && TCG_TARGET_HAS_eqv_vec;
1768     case INDEX_op_mul_vec:
1769         return have_vec && TCG_TARGET_HAS_mul_vec;
1770     case INDEX_op_shli_vec:
1771     case INDEX_op_shri_vec:
1772     case INDEX_op_sari_vec:
1773         return have_vec && TCG_TARGET_HAS_shi_vec;
1774     case INDEX_op_shls_vec:
1775     case INDEX_op_shrs_vec:
1776     case INDEX_op_sars_vec:
1777         return have_vec && TCG_TARGET_HAS_shs_vec;
1778     case INDEX_op_shlv_vec:
1779     case INDEX_op_shrv_vec:
1780     case INDEX_op_sarv_vec:
1781         return have_vec && TCG_TARGET_HAS_shv_vec;
1782     case INDEX_op_rotli_vec:
1783         return have_vec && TCG_TARGET_HAS_roti_vec;
1784     case INDEX_op_rotls_vec:
1785         return have_vec && TCG_TARGET_HAS_rots_vec;
1786     case INDEX_op_rotlv_vec:
1787     case INDEX_op_rotrv_vec:
1788         return have_vec && TCG_TARGET_HAS_rotv_vec;
1789     case INDEX_op_ssadd_vec:
1790     case INDEX_op_usadd_vec:
1791     case INDEX_op_sssub_vec:
1792     case INDEX_op_ussub_vec:
1793         return have_vec && TCG_TARGET_HAS_sat_vec;
1794     case INDEX_op_smin_vec:
1795     case INDEX_op_umin_vec:
1796     case INDEX_op_smax_vec:
1797     case INDEX_op_umax_vec:
1798         return have_vec && TCG_TARGET_HAS_minmax_vec;
1799     case INDEX_op_bitsel_vec:
1800         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1801     case INDEX_op_cmpsel_vec:
1802         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1803 
1804     default:
1805         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1806         return true;
1807     }
1808 }
1809 
1810 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1811 
1812 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1813 {
1814     const TCGHelperInfo *info;
1815     TCGv_i64 extend_free[MAX_CALL_IARGS];
1816     int n_extend = 0;
1817     TCGOp *op;
1818     int i, n, pi = 0, total_args;
1819 
1820     info = g_hash_table_lookup(helper_table, (gpointer)func);
1821     total_args = info->nr_out + info->nr_in + 2;
1822     op = tcg_op_alloc(INDEX_op_call, total_args);
1823 
1824 #ifdef CONFIG_PLUGIN
1825     /* Flag helpers that may affect guest state */
1826     if (tcg_ctx->plugin_insn &&
1827         !(info->flags & TCG_CALL_PLUGIN) &&
1828         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1829         tcg_ctx->plugin_insn->calls_helpers = true;
1830     }
1831 #endif
1832 
1833     TCGOP_CALLO(op) = n = info->nr_out;
1834     switch (n) {
1835     case 0:
1836         tcg_debug_assert(ret == NULL);
1837         break;
1838     case 1:
1839         tcg_debug_assert(ret != NULL);
1840         op->args[pi++] = temp_arg(ret);
1841         break;
1842     case 2:
1843     case 4:
1844         tcg_debug_assert(ret != NULL);
1845         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1846         tcg_debug_assert(ret->temp_subindex == 0);
1847         for (i = 0; i < n; ++i) {
1848             op->args[pi++] = temp_arg(ret + i);
1849         }
1850         break;
1851     default:
1852         g_assert_not_reached();
1853     }
1854 
1855     TCGOP_CALLI(op) = n = info->nr_in;
1856     for (i = 0; i < n; i++) {
1857         const TCGCallArgumentLoc *loc = &info->in[i];
1858         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1859 
1860         switch (loc->kind) {
1861         case TCG_CALL_ARG_NORMAL:
1862         case TCG_CALL_ARG_BY_REF:
1863         case TCG_CALL_ARG_BY_REF_N:
1864             op->args[pi++] = temp_arg(ts);
1865             break;
1866 
1867         case TCG_CALL_ARG_EXTEND_U:
1868         case TCG_CALL_ARG_EXTEND_S:
1869             {
1870                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
1871                 TCGv_i32 orig = temp_tcgv_i32(ts);
1872 
1873                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1874                     tcg_gen_ext_i32_i64(temp, orig);
1875                 } else {
1876                     tcg_gen_extu_i32_i64(temp, orig);
1877                 }
1878                 op->args[pi++] = tcgv_i64_arg(temp);
1879                 extend_free[n_extend++] = temp;
1880             }
1881             break;
1882 
1883         default:
1884             g_assert_not_reached();
1885         }
1886     }
1887     op->args[pi++] = (uintptr_t)func;
1888     op->args[pi++] = (uintptr_t)info;
1889     tcg_debug_assert(pi == total_args);
1890 
1891     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1892 
1893     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1894     for (i = 0; i < n_extend; ++i) {
1895         tcg_temp_free_i64(extend_free[i]);
1896     }
1897 }
1898 
1899 static void tcg_reg_alloc_start(TCGContext *s)
1900 {
1901     int i, n;
1902 
1903     for (i = 0, n = s->nb_temps; i < n; i++) {
1904         TCGTemp *ts = &s->temps[i];
1905         TCGTempVal val = TEMP_VAL_MEM;
1906 
1907         switch (ts->kind) {
1908         case TEMP_CONST:
1909             val = TEMP_VAL_CONST;
1910             break;
1911         case TEMP_FIXED:
1912             val = TEMP_VAL_REG;
1913             break;
1914         case TEMP_GLOBAL:
1915             break;
1916         case TEMP_EBB:
1917             val = TEMP_VAL_DEAD;
1918             /* fall through */
1919         case TEMP_TB:
1920             ts->mem_allocated = 0;
1921             break;
1922         default:
1923             g_assert_not_reached();
1924         }
1925         ts->val_type = val;
1926     }
1927 
1928     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1929 }
1930 
1931 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1932                                  TCGTemp *ts)
1933 {
1934     int idx = temp_idx(ts);
1935 
1936     switch (ts->kind) {
1937     case TEMP_FIXED:
1938     case TEMP_GLOBAL:
1939         pstrcpy(buf, buf_size, ts->name);
1940         break;
1941     case TEMP_TB:
1942         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1943         break;
1944     case TEMP_EBB:
1945         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1946         break;
1947     case TEMP_CONST:
1948         switch (ts->type) {
1949         case TCG_TYPE_I32:
1950             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1951             break;
1952 #if TCG_TARGET_REG_BITS > 32
1953         case TCG_TYPE_I64:
1954             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1955             break;
1956 #endif
1957         case TCG_TYPE_V64:
1958         case TCG_TYPE_V128:
1959         case TCG_TYPE_V256:
1960             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1961                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1962             break;
1963         default:
1964             g_assert_not_reached();
1965         }
1966         break;
1967     }
1968     return buf;
1969 }
1970 
1971 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1972                              int buf_size, TCGArg arg)
1973 {
1974     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1975 }
1976 
1977 static const char * const cond_name[] =
1978 {
1979     [TCG_COND_NEVER] = "never",
1980     [TCG_COND_ALWAYS] = "always",
1981     [TCG_COND_EQ] = "eq",
1982     [TCG_COND_NE] = "ne",
1983     [TCG_COND_LT] = "lt",
1984     [TCG_COND_GE] = "ge",
1985     [TCG_COND_LE] = "le",
1986     [TCG_COND_GT] = "gt",
1987     [TCG_COND_LTU] = "ltu",
1988     [TCG_COND_GEU] = "geu",
1989     [TCG_COND_LEU] = "leu",
1990     [TCG_COND_GTU] = "gtu"
1991 };
1992 
1993 static const char * const ldst_name[] =
1994 {
1995     [MO_UB]   = "ub",
1996     [MO_SB]   = "sb",
1997     [MO_LEUW] = "leuw",
1998     [MO_LESW] = "lesw",
1999     [MO_LEUL] = "leul",
2000     [MO_LESL] = "lesl",
2001     [MO_LEUQ] = "leq",
2002     [MO_BEUW] = "beuw",
2003     [MO_BESW] = "besw",
2004     [MO_BEUL] = "beul",
2005     [MO_BESL] = "besl",
2006     [MO_BEUQ] = "beq",
2007 };
2008 
2009 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2010 #ifdef TARGET_ALIGNED_ONLY
2011     [MO_UNALN >> MO_ASHIFT]    = "un+",
2012     [MO_ALIGN >> MO_ASHIFT]    = "",
2013 #else
2014     [MO_UNALN >> MO_ASHIFT]    = "",
2015     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2016 #endif
2017     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2018     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2019     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2020     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2021     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2022     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2023 };
2024 
2025 static const char bswap_flag_name[][6] = {
2026     [TCG_BSWAP_IZ] = "iz",
2027     [TCG_BSWAP_OZ] = "oz",
2028     [TCG_BSWAP_OS] = "os",
2029     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2030     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2031 };
2032 
2033 static inline bool tcg_regset_single(TCGRegSet d)
2034 {
2035     return (d & (d - 1)) == 0;
2036 }
2037 
2038 static inline TCGReg tcg_regset_first(TCGRegSet d)
2039 {
2040     if (TCG_TARGET_NB_REGS <= 32) {
2041         return ctz32(d);
2042     } else {
2043         return ctz64(d);
2044     }
2045 }
2046 
2047 /* Return only the number of characters output -- no error return. */
2048 #define ne_fprintf(...) \
2049     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2050 
2051 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2052 {
2053     char buf[128];
2054     TCGOp *op;
2055 
2056     QTAILQ_FOREACH(op, &s->ops, link) {
2057         int i, k, nb_oargs, nb_iargs, nb_cargs;
2058         const TCGOpDef *def;
2059         TCGOpcode c;
2060         int col = 0;
2061 
2062         c = op->opc;
2063         def = &tcg_op_defs[c];
2064 
2065         if (c == INDEX_op_insn_start) {
2066             nb_oargs = 0;
2067             col += ne_fprintf(f, "\n ----");
2068 
2069             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2070                 target_ulong a;
2071 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2072                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2073 #else
2074                 a = op->args[i];
2075 #endif
2076                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2077             }
2078         } else if (c == INDEX_op_call) {
2079             const TCGHelperInfo *info = tcg_call_info(op);
2080             void *func = tcg_call_func(op);
2081 
2082             /* variable number of arguments */
2083             nb_oargs = TCGOP_CALLO(op);
2084             nb_iargs = TCGOP_CALLI(op);
2085             nb_cargs = def->nb_cargs;
2086 
2087             col += ne_fprintf(f, " %s ", def->name);
2088 
2089             /*
2090              * Print the function name from TCGHelperInfo, if available.
2091              * Note that plugins have a template function for the info,
2092              * but the actual function pointer comes from the plugin.
2093              */
2094             if (func == info->func) {
2095                 col += ne_fprintf(f, "%s", info->name);
2096             } else {
2097                 col += ne_fprintf(f, "plugin(%p)", func);
2098             }
2099 
2100             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2101             for (i = 0; i < nb_oargs; i++) {
2102                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2103                                                             op->args[i]));
2104             }
2105             for (i = 0; i < nb_iargs; i++) {
2106                 TCGArg arg = op->args[nb_oargs + i];
2107                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2108                 col += ne_fprintf(f, ",%s", t);
2109             }
2110         } else {
2111             col += ne_fprintf(f, " %s ", def->name);
2112 
2113             nb_oargs = def->nb_oargs;
2114             nb_iargs = def->nb_iargs;
2115             nb_cargs = def->nb_cargs;
2116 
2117             if (def->flags & TCG_OPF_VECTOR) {
2118                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2119                                   8 << TCGOP_VECE(op));
2120             }
2121 
2122             k = 0;
2123             for (i = 0; i < nb_oargs; i++) {
2124                 const char *sep =  k ? "," : "";
2125                 col += ne_fprintf(f, "%s%s", sep,
2126                                   tcg_get_arg_str(s, buf, sizeof(buf),
2127                                                   op->args[k++]));
2128             }
2129             for (i = 0; i < nb_iargs; i++) {
2130                 const char *sep =  k ? "," : "";
2131                 col += ne_fprintf(f, "%s%s", sep,
2132                                   tcg_get_arg_str(s, buf, sizeof(buf),
2133                                                   op->args[k++]));
2134             }
2135             switch (c) {
2136             case INDEX_op_brcond_i32:
2137             case INDEX_op_setcond_i32:
2138             case INDEX_op_movcond_i32:
2139             case INDEX_op_brcond2_i32:
2140             case INDEX_op_setcond2_i32:
2141             case INDEX_op_brcond_i64:
2142             case INDEX_op_setcond_i64:
2143             case INDEX_op_movcond_i64:
2144             case INDEX_op_cmp_vec:
2145             case INDEX_op_cmpsel_vec:
2146                 if (op->args[k] < ARRAY_SIZE(cond_name)
2147                     && cond_name[op->args[k]]) {
2148                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2149                 } else {
2150                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2151                 }
2152                 i = 1;
2153                 break;
2154             case INDEX_op_qemu_ld_i32:
2155             case INDEX_op_qemu_st_i32:
2156             case INDEX_op_qemu_st8_i32:
2157             case INDEX_op_qemu_ld_i64:
2158             case INDEX_op_qemu_st_i64:
2159                 {
2160                     MemOpIdx oi = op->args[k++];
2161                     MemOp op = get_memop(oi);
2162                     unsigned ix = get_mmuidx(oi);
2163 
2164                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2165                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2166                     } else {
2167                         const char *s_al, *s_op;
2168                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2169                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2170                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2171                     }
2172                     i = 1;
2173                 }
2174                 break;
2175             case INDEX_op_bswap16_i32:
2176             case INDEX_op_bswap16_i64:
2177             case INDEX_op_bswap32_i32:
2178             case INDEX_op_bswap32_i64:
2179             case INDEX_op_bswap64_i64:
2180                 {
2181                     TCGArg flags = op->args[k];
2182                     const char *name = NULL;
2183 
2184                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2185                         name = bswap_flag_name[flags];
2186                     }
2187                     if (name) {
2188                         col += ne_fprintf(f, ",%s", name);
2189                     } else {
2190                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2191                     }
2192                     i = k = 1;
2193                 }
2194                 break;
2195             default:
2196                 i = 0;
2197                 break;
2198             }
2199             switch (c) {
2200             case INDEX_op_set_label:
2201             case INDEX_op_br:
2202             case INDEX_op_brcond_i32:
2203             case INDEX_op_brcond_i64:
2204             case INDEX_op_brcond2_i32:
2205                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2206                                   arg_label(op->args[k])->id);
2207                 i++, k++;
2208                 break;
2209             default:
2210                 break;
2211             }
2212             for (; i < nb_cargs; i++, k++) {
2213                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2214                                   op->args[k]);
2215             }
2216         }
2217 
2218         if (have_prefs || op->life) {
2219             for (; col < 40; ++col) {
2220                 putc(' ', f);
2221             }
2222         }
2223 
2224         if (op->life) {
2225             unsigned life = op->life;
2226 
2227             if (life & (SYNC_ARG * 3)) {
2228                 ne_fprintf(f, "  sync:");
2229                 for (i = 0; i < 2; ++i) {
2230                     if (life & (SYNC_ARG << i)) {
2231                         ne_fprintf(f, " %d", i);
2232                     }
2233                 }
2234             }
2235             life /= DEAD_ARG;
2236             if (life) {
2237                 ne_fprintf(f, "  dead:");
2238                 for (i = 0; life; ++i, life >>= 1) {
2239                     if (life & 1) {
2240                         ne_fprintf(f, " %d", i);
2241                     }
2242                 }
2243             }
2244         }
2245 
2246         if (have_prefs) {
2247             for (i = 0; i < nb_oargs; ++i) {
2248                 TCGRegSet set = output_pref(op, i);
2249 
2250                 if (i == 0) {
2251                     ne_fprintf(f, "  pref=");
2252                 } else {
2253                     ne_fprintf(f, ",");
2254                 }
2255                 if (set == 0) {
2256                     ne_fprintf(f, "none");
2257                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2258                     ne_fprintf(f, "all");
2259 #ifdef CONFIG_DEBUG_TCG
2260                 } else if (tcg_regset_single(set)) {
2261                     TCGReg reg = tcg_regset_first(set);
2262                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2263 #endif
2264                 } else if (TCG_TARGET_NB_REGS <= 32) {
2265                     ne_fprintf(f, "0x%x", (uint32_t)set);
2266                 } else {
2267                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2268                 }
2269             }
2270         }
2271 
2272         putc('\n', f);
2273     }
2274 }
2275 
2276 /* we give more priority to constraints with less registers */
2277 static int get_constraint_priority(const TCGOpDef *def, int k)
2278 {
2279     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2280     int n = ctpop64(arg_ct->regs);
2281 
2282     /*
2283      * Sort constraints of a single register first, which includes output
2284      * aliases (which must exactly match the input already allocated).
2285      */
2286     if (n == 1 || arg_ct->oalias) {
2287         return INT_MAX;
2288     }
2289 
2290     /*
2291      * Sort register pairs next, first then second immediately after.
2292      * Arbitrarily sort multiple pairs by the index of the first reg;
2293      * there shouldn't be many pairs.
2294      */
2295     switch (arg_ct->pair) {
2296     case 1:
2297     case 3:
2298         return (k + 1) * 2;
2299     case 2:
2300         return (arg_ct->pair_index + 1) * 2 - 1;
2301     }
2302 
2303     /* Finally, sort by decreasing register count. */
2304     assert(n > 1);
2305     return -n;
2306 }
2307 
2308 /* sort from highest priority to lowest */
2309 static void sort_constraints(TCGOpDef *def, int start, int n)
2310 {
2311     int i, j;
2312     TCGArgConstraint *a = def->args_ct;
2313 
2314     for (i = 0; i < n; i++) {
2315         a[start + i].sort_index = start + i;
2316     }
2317     if (n <= 1) {
2318         return;
2319     }
2320     for (i = 0; i < n - 1; i++) {
2321         for (j = i + 1; j < n; j++) {
2322             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2323             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2324             if (p1 < p2) {
2325                 int tmp = a[start + i].sort_index;
2326                 a[start + i].sort_index = a[start + j].sort_index;
2327                 a[start + j].sort_index = tmp;
2328             }
2329         }
2330     }
2331 }
2332 
2333 static void process_op_defs(TCGContext *s)
2334 {
2335     TCGOpcode op;
2336 
2337     for (op = 0; op < NB_OPS; op++) {
2338         TCGOpDef *def = &tcg_op_defs[op];
2339         const TCGTargetOpDef *tdefs;
2340         bool saw_alias_pair = false;
2341         int i, o, i2, o2, nb_args;
2342 
2343         if (def->flags & TCG_OPF_NOT_PRESENT) {
2344             continue;
2345         }
2346 
2347         nb_args = def->nb_iargs + def->nb_oargs;
2348         if (nb_args == 0) {
2349             continue;
2350         }
2351 
2352         /*
2353          * Macro magic should make it impossible, but double-check that
2354          * the array index is in range.  Since the signness of an enum
2355          * is implementation defined, force the result to unsigned.
2356          */
2357         unsigned con_set = tcg_target_op_def(op);
2358         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2359         tdefs = &constraint_sets[con_set];
2360 
2361         for (i = 0; i < nb_args; i++) {
2362             const char *ct_str = tdefs->args_ct_str[i];
2363             bool input_p = i >= def->nb_oargs;
2364 
2365             /* Incomplete TCGTargetOpDef entry. */
2366             tcg_debug_assert(ct_str != NULL);
2367 
2368             switch (*ct_str) {
2369             case '0' ... '9':
2370                 o = *ct_str - '0';
2371                 tcg_debug_assert(input_p);
2372                 tcg_debug_assert(o < def->nb_oargs);
2373                 tcg_debug_assert(def->args_ct[o].regs != 0);
2374                 tcg_debug_assert(!def->args_ct[o].oalias);
2375                 def->args_ct[i] = def->args_ct[o];
2376                 /* The output sets oalias.  */
2377                 def->args_ct[o].oalias = 1;
2378                 def->args_ct[o].alias_index = i;
2379                 /* The input sets ialias. */
2380                 def->args_ct[i].ialias = 1;
2381                 def->args_ct[i].alias_index = o;
2382                 if (def->args_ct[i].pair) {
2383                     saw_alias_pair = true;
2384                 }
2385                 tcg_debug_assert(ct_str[1] == '\0');
2386                 continue;
2387 
2388             case '&':
2389                 tcg_debug_assert(!input_p);
2390                 def->args_ct[i].newreg = true;
2391                 ct_str++;
2392                 break;
2393 
2394             case 'p': /* plus */
2395                 /* Allocate to the register after the previous. */
2396                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2397                 o = i - 1;
2398                 tcg_debug_assert(!def->args_ct[o].pair);
2399                 tcg_debug_assert(!def->args_ct[o].ct);
2400                 def->args_ct[i] = (TCGArgConstraint){
2401                     .pair = 2,
2402                     .pair_index = o,
2403                     .regs = def->args_ct[o].regs << 1,
2404                 };
2405                 def->args_ct[o].pair = 1;
2406                 def->args_ct[o].pair_index = i;
2407                 tcg_debug_assert(ct_str[1] == '\0');
2408                 continue;
2409 
2410             case 'm': /* minus */
2411                 /* Allocate to the register before the previous. */
2412                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2413                 o = i - 1;
2414                 tcg_debug_assert(!def->args_ct[o].pair);
2415                 tcg_debug_assert(!def->args_ct[o].ct);
2416                 def->args_ct[i] = (TCGArgConstraint){
2417                     .pair = 1,
2418                     .pair_index = o,
2419                     .regs = def->args_ct[o].regs >> 1,
2420                 };
2421                 def->args_ct[o].pair = 2;
2422                 def->args_ct[o].pair_index = i;
2423                 tcg_debug_assert(ct_str[1] == '\0');
2424                 continue;
2425             }
2426 
2427             do {
2428                 switch (*ct_str) {
2429                 case 'i':
2430                     def->args_ct[i].ct |= TCG_CT_CONST;
2431                     break;
2432 
2433                 /* Include all of the target-specific constraints. */
2434 
2435 #undef CONST
2436 #define CONST(CASE, MASK) \
2437     case CASE: def->args_ct[i].ct |= MASK; break;
2438 #define REGS(CASE, MASK) \
2439     case CASE: def->args_ct[i].regs |= MASK; break;
2440 
2441 #include "tcg-target-con-str.h"
2442 
2443 #undef REGS
2444 #undef CONST
2445                 default:
2446                 case '0' ... '9':
2447                 case '&':
2448                 case 'p':
2449                 case 'm':
2450                     /* Typo in TCGTargetOpDef constraint. */
2451                     g_assert_not_reached();
2452                 }
2453             } while (*++ct_str != '\0');
2454         }
2455 
2456         /* TCGTargetOpDef entry with too much information? */
2457         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2458 
2459         /*
2460          * Fix up output pairs that are aliased with inputs.
2461          * When we created the alias, we copied pair from the output.
2462          * There are three cases:
2463          *    (1a) Pairs of inputs alias pairs of outputs.
2464          *    (1b) One input aliases the first of a pair of outputs.
2465          *    (2)  One input aliases the second of a pair of outputs.
2466          *
2467          * Case 1a is handled by making sure that the pair_index'es are
2468          * properly updated so that they appear the same as a pair of inputs.
2469          *
2470          * Case 1b is handled by setting the pair_index of the input to
2471          * itself, simply so it doesn't point to an unrelated argument.
2472          * Since we don't encounter the "second" during the input allocation
2473          * phase, nothing happens with the second half of the input pair.
2474          *
2475          * Case 2 is handled by setting the second input to pair=3, the
2476          * first output to pair=3, and the pair_index'es to match.
2477          */
2478         if (saw_alias_pair) {
2479             for (i = def->nb_oargs; i < nb_args; i++) {
2480                 /*
2481                  * Since [0-9pm] must be alone in the constraint string,
2482                  * the only way they can both be set is if the pair comes
2483                  * from the output alias.
2484                  */
2485                 if (!def->args_ct[i].ialias) {
2486                     continue;
2487                 }
2488                 switch (def->args_ct[i].pair) {
2489                 case 0:
2490                     break;
2491                 case 1:
2492                     o = def->args_ct[i].alias_index;
2493                     o2 = def->args_ct[o].pair_index;
2494                     tcg_debug_assert(def->args_ct[o].pair == 1);
2495                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2496                     if (def->args_ct[o2].oalias) {
2497                         /* Case 1a */
2498                         i2 = def->args_ct[o2].alias_index;
2499                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2500                         def->args_ct[i2].pair_index = i;
2501                         def->args_ct[i].pair_index = i2;
2502                     } else {
2503                         /* Case 1b */
2504                         def->args_ct[i].pair_index = i;
2505                     }
2506                     break;
2507                 case 2:
2508                     o = def->args_ct[i].alias_index;
2509                     o2 = def->args_ct[o].pair_index;
2510                     tcg_debug_assert(def->args_ct[o].pair == 2);
2511                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2512                     if (def->args_ct[o2].oalias) {
2513                         /* Case 1a */
2514                         i2 = def->args_ct[o2].alias_index;
2515                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2516                         def->args_ct[i2].pair_index = i;
2517                         def->args_ct[i].pair_index = i2;
2518                     } else {
2519                         /* Case 2 */
2520                         def->args_ct[i].pair = 3;
2521                         def->args_ct[o2].pair = 3;
2522                         def->args_ct[i].pair_index = o2;
2523                         def->args_ct[o2].pair_index = i;
2524                     }
2525                     break;
2526                 default:
2527                     g_assert_not_reached();
2528                 }
2529             }
2530         }
2531 
2532         /* sort the constraints (XXX: this is just an heuristic) */
2533         sort_constraints(def, 0, def->nb_oargs);
2534         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2535     }
2536 }
2537 
2538 void tcg_op_remove(TCGContext *s, TCGOp *op)
2539 {
2540     TCGLabel *label;
2541 
2542     switch (op->opc) {
2543     case INDEX_op_br:
2544         label = arg_label(op->args[0]);
2545         label->refs--;
2546         break;
2547     case INDEX_op_brcond_i32:
2548     case INDEX_op_brcond_i64:
2549         label = arg_label(op->args[3]);
2550         label->refs--;
2551         break;
2552     case INDEX_op_brcond2_i32:
2553         label = arg_label(op->args[5]);
2554         label->refs--;
2555         break;
2556     default:
2557         break;
2558     }
2559 
2560     QTAILQ_REMOVE(&s->ops, op, link);
2561     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2562     s->nb_ops--;
2563 
2564 #ifdef CONFIG_PROFILER
2565     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2566 #endif
2567 }
2568 
2569 void tcg_remove_ops_after(TCGOp *op)
2570 {
2571     TCGContext *s = tcg_ctx;
2572 
2573     while (true) {
2574         TCGOp *last = tcg_last_op();
2575         if (last == op) {
2576             return;
2577         }
2578         tcg_op_remove(s, last);
2579     }
2580 }
2581 
2582 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2583 {
2584     TCGContext *s = tcg_ctx;
2585     TCGOp *op = NULL;
2586 
2587     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2588         QTAILQ_FOREACH(op, &s->free_ops, link) {
2589             if (nargs <= op->nargs) {
2590                 QTAILQ_REMOVE(&s->free_ops, op, link);
2591                 nargs = op->nargs;
2592                 goto found;
2593             }
2594         }
2595     }
2596 
2597     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2598     nargs = MAX(4, nargs);
2599     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2600 
2601  found:
2602     memset(op, 0, offsetof(TCGOp, link));
2603     op->opc = opc;
2604     op->nargs = nargs;
2605 
2606     /* Check for bitfield overflow. */
2607     tcg_debug_assert(op->nargs == nargs);
2608 
2609     s->nb_ops++;
2610     return op;
2611 }
2612 
2613 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2614 {
2615     TCGOp *op = tcg_op_alloc(opc, nargs);
2616     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2617     return op;
2618 }
2619 
2620 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2621                             TCGOpcode opc, unsigned nargs)
2622 {
2623     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2624     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2625     return new_op;
2626 }
2627 
2628 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2629                            TCGOpcode opc, unsigned nargs)
2630 {
2631     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2632     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2633     return new_op;
2634 }
2635 
2636 /* Reachable analysis : remove unreachable code.  */
2637 static void __attribute__((noinline))
2638 reachable_code_pass(TCGContext *s)
2639 {
2640     TCGOp *op, *op_next, *op_prev;
2641     bool dead = false;
2642 
2643     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2644         bool remove = dead;
2645         TCGLabel *label;
2646 
2647         switch (op->opc) {
2648         case INDEX_op_set_label:
2649             label = arg_label(op->args[0]);
2650 
2651             /*
2652              * Optimization can fold conditional branches to unconditional.
2653              * If we find a label which is preceded by an unconditional
2654              * branch to next, remove the branch.  We couldn't do this when
2655              * processing the branch because any dead code between the branch
2656              * and label had not yet been removed.
2657              */
2658             op_prev = QTAILQ_PREV(op, link);
2659             if (op_prev->opc == INDEX_op_br &&
2660                 label == arg_label(op_prev->args[0])) {
2661                 tcg_op_remove(s, op_prev);
2662                 /* Fall through means insns become live again.  */
2663                 dead = false;
2664             }
2665 
2666             if (label->refs == 0) {
2667                 /*
2668                  * While there is an occasional backward branch, virtually
2669                  * all branches generated by the translators are forward.
2670                  * Which means that generally we will have already removed
2671                  * all references to the label that will be, and there is
2672                  * little to be gained by iterating.
2673                  */
2674                 remove = true;
2675             } else {
2676                 /* Once we see a label, insns become live again.  */
2677                 dead = false;
2678                 remove = false;
2679             }
2680             break;
2681 
2682         case INDEX_op_br:
2683         case INDEX_op_exit_tb:
2684         case INDEX_op_goto_ptr:
2685             /* Unconditional branches; everything following is dead.  */
2686             dead = true;
2687             break;
2688 
2689         case INDEX_op_call:
2690             /* Notice noreturn helper calls, raising exceptions.  */
2691             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2692                 dead = true;
2693             }
2694             break;
2695 
2696         case INDEX_op_insn_start:
2697             /* Never remove -- we need to keep these for unwind.  */
2698             remove = false;
2699             break;
2700 
2701         default:
2702             break;
2703         }
2704 
2705         if (remove) {
2706             tcg_op_remove(s, op);
2707         }
2708     }
2709 }
2710 
2711 #define TS_DEAD  1
2712 #define TS_MEM   2
2713 
2714 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2715 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2716 
2717 /* For liveness_pass_1, the register preferences for a given temp.  */
2718 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2719 {
2720     return ts->state_ptr;
2721 }
2722 
2723 /* For liveness_pass_1, reset the preferences for a given temp to the
2724  * maximal regset for its type.
2725  */
2726 static inline void la_reset_pref(TCGTemp *ts)
2727 {
2728     *la_temp_pref(ts)
2729         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2730 }
2731 
2732 /* liveness analysis: end of function: all temps are dead, and globals
2733    should be in memory. */
2734 static void la_func_end(TCGContext *s, int ng, int nt)
2735 {
2736     int i;
2737 
2738     for (i = 0; i < ng; ++i) {
2739         s->temps[i].state = TS_DEAD | TS_MEM;
2740         la_reset_pref(&s->temps[i]);
2741     }
2742     for (i = ng; i < nt; ++i) {
2743         s->temps[i].state = TS_DEAD;
2744         la_reset_pref(&s->temps[i]);
2745     }
2746 }
2747 
2748 /* liveness analysis: end of basic block: all temps are dead, globals
2749    and local temps should be in memory. */
2750 static void la_bb_end(TCGContext *s, int ng, int nt)
2751 {
2752     int i;
2753 
2754     for (i = 0; i < nt; ++i) {
2755         TCGTemp *ts = &s->temps[i];
2756         int state;
2757 
2758         switch (ts->kind) {
2759         case TEMP_FIXED:
2760         case TEMP_GLOBAL:
2761         case TEMP_TB:
2762             state = TS_DEAD | TS_MEM;
2763             break;
2764         case TEMP_EBB:
2765         case TEMP_CONST:
2766             state = TS_DEAD;
2767             break;
2768         default:
2769             g_assert_not_reached();
2770         }
2771         ts->state = state;
2772         la_reset_pref(ts);
2773     }
2774 }
2775 
2776 /* liveness analysis: sync globals back to memory.  */
2777 static void la_global_sync(TCGContext *s, int ng)
2778 {
2779     int i;
2780 
2781     for (i = 0; i < ng; ++i) {
2782         int state = s->temps[i].state;
2783         s->temps[i].state = state | TS_MEM;
2784         if (state == TS_DEAD) {
2785             /* If the global was previously dead, reset prefs.  */
2786             la_reset_pref(&s->temps[i]);
2787         }
2788     }
2789 }
2790 
2791 /*
2792  * liveness analysis: conditional branch: all temps are dead unless
2793  * explicitly live-across-conditional-branch, globals and local temps
2794  * should be synced.
2795  */
2796 static void la_bb_sync(TCGContext *s, int ng, int nt)
2797 {
2798     la_global_sync(s, ng);
2799 
2800     for (int i = ng; i < nt; ++i) {
2801         TCGTemp *ts = &s->temps[i];
2802         int state;
2803 
2804         switch (ts->kind) {
2805         case TEMP_TB:
2806             state = ts->state;
2807             ts->state = state | TS_MEM;
2808             if (state != TS_DEAD) {
2809                 continue;
2810             }
2811             break;
2812         case TEMP_EBB:
2813         case TEMP_CONST:
2814             continue;
2815         default:
2816             g_assert_not_reached();
2817         }
2818         la_reset_pref(&s->temps[i]);
2819     }
2820 }
2821 
2822 /* liveness analysis: sync globals back to memory and kill.  */
2823 static void la_global_kill(TCGContext *s, int ng)
2824 {
2825     int i;
2826 
2827     for (i = 0; i < ng; i++) {
2828         s->temps[i].state = TS_DEAD | TS_MEM;
2829         la_reset_pref(&s->temps[i]);
2830     }
2831 }
2832 
2833 /* liveness analysis: note live globals crossing calls.  */
2834 static void la_cross_call(TCGContext *s, int nt)
2835 {
2836     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2837     int i;
2838 
2839     for (i = 0; i < nt; i++) {
2840         TCGTemp *ts = &s->temps[i];
2841         if (!(ts->state & TS_DEAD)) {
2842             TCGRegSet *pset = la_temp_pref(ts);
2843             TCGRegSet set = *pset;
2844 
2845             set &= mask;
2846             /* If the combination is not possible, restart.  */
2847             if (set == 0) {
2848                 set = tcg_target_available_regs[ts->type] & mask;
2849             }
2850             *pset = set;
2851         }
2852     }
2853 }
2854 
2855 /*
2856  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
2857  * to TEMP_EBB, if possible.
2858  */
2859 static void __attribute__((noinline))
2860 liveness_pass_0(TCGContext *s)
2861 {
2862     void * const multiple_ebb = (void *)(uintptr_t)-1;
2863     int nb_temps = s->nb_temps;
2864     TCGOp *op, *ebb;
2865 
2866     for (int i = s->nb_globals; i < nb_temps; ++i) {
2867         s->temps[i].state_ptr = NULL;
2868     }
2869 
2870     /*
2871      * Represent each EBB by the op at which it begins.  In the case of
2872      * the first EBB, this is the first op, otherwise it is a label.
2873      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
2874      * within a single EBB, else MULTIPLE_EBB.
2875      */
2876     ebb = QTAILQ_FIRST(&s->ops);
2877     QTAILQ_FOREACH(op, &s->ops, link) {
2878         const TCGOpDef *def;
2879         int nb_oargs, nb_iargs;
2880 
2881         switch (op->opc) {
2882         case INDEX_op_set_label:
2883             ebb = op;
2884             continue;
2885         case INDEX_op_discard:
2886             continue;
2887         case INDEX_op_call:
2888             nb_oargs = TCGOP_CALLO(op);
2889             nb_iargs = TCGOP_CALLI(op);
2890             break;
2891         default:
2892             def = &tcg_op_defs[op->opc];
2893             nb_oargs = def->nb_oargs;
2894             nb_iargs = def->nb_iargs;
2895             break;
2896         }
2897 
2898         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
2899             TCGTemp *ts = arg_temp(op->args[i]);
2900 
2901             if (ts->kind != TEMP_TB) {
2902                 continue;
2903             }
2904             if (ts->state_ptr == NULL) {
2905                 ts->state_ptr = ebb;
2906             } else if (ts->state_ptr != ebb) {
2907                 ts->state_ptr = multiple_ebb;
2908             }
2909         }
2910     }
2911 
2912     /*
2913      * For TEMP_TB that turned out not to be used beyond one EBB,
2914      * reduce the liveness to TEMP_EBB.
2915      */
2916     for (int i = s->nb_globals; i < nb_temps; ++i) {
2917         TCGTemp *ts = &s->temps[i];
2918         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
2919             ts->kind = TEMP_EBB;
2920         }
2921     }
2922 }
2923 
2924 /* Liveness analysis : update the opc_arg_life array to tell if a
2925    given input arguments is dead. Instructions updating dead
2926    temporaries are removed. */
2927 static void __attribute__((noinline))
2928 liveness_pass_1(TCGContext *s)
2929 {
2930     int nb_globals = s->nb_globals;
2931     int nb_temps = s->nb_temps;
2932     TCGOp *op, *op_prev;
2933     TCGRegSet *prefs;
2934     int i;
2935 
2936     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2937     for (i = 0; i < nb_temps; ++i) {
2938         s->temps[i].state_ptr = prefs + i;
2939     }
2940 
2941     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2942     la_func_end(s, nb_globals, nb_temps);
2943 
2944     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2945         int nb_iargs, nb_oargs;
2946         TCGOpcode opc_new, opc_new2;
2947         bool have_opc_new2;
2948         TCGLifeData arg_life = 0;
2949         TCGTemp *ts;
2950         TCGOpcode opc = op->opc;
2951         const TCGOpDef *def = &tcg_op_defs[opc];
2952 
2953         switch (opc) {
2954         case INDEX_op_call:
2955             {
2956                 const TCGHelperInfo *info = tcg_call_info(op);
2957                 int call_flags = tcg_call_flags(op);
2958 
2959                 nb_oargs = TCGOP_CALLO(op);
2960                 nb_iargs = TCGOP_CALLI(op);
2961 
2962                 /* pure functions can be removed if their result is unused */
2963                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2964                     for (i = 0; i < nb_oargs; i++) {
2965                         ts = arg_temp(op->args[i]);
2966                         if (ts->state != TS_DEAD) {
2967                             goto do_not_remove_call;
2968                         }
2969                     }
2970                     goto do_remove;
2971                 }
2972             do_not_remove_call:
2973 
2974                 /* Output args are dead.  */
2975                 for (i = 0; i < nb_oargs; i++) {
2976                     ts = arg_temp(op->args[i]);
2977                     if (ts->state & TS_DEAD) {
2978                         arg_life |= DEAD_ARG << i;
2979                     }
2980                     if (ts->state & TS_MEM) {
2981                         arg_life |= SYNC_ARG << i;
2982                     }
2983                     ts->state = TS_DEAD;
2984                     la_reset_pref(ts);
2985                 }
2986 
2987                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
2988                 memset(op->output_pref, 0, sizeof(op->output_pref));
2989 
2990                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2991                                     TCG_CALL_NO_READ_GLOBALS))) {
2992                     la_global_kill(s, nb_globals);
2993                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2994                     la_global_sync(s, nb_globals);
2995                 }
2996 
2997                 /* Record arguments that die in this helper.  */
2998                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2999                     ts = arg_temp(op->args[i]);
3000                     if (ts->state & TS_DEAD) {
3001                         arg_life |= DEAD_ARG << i;
3002                     }
3003                 }
3004 
3005                 /* For all live registers, remove call-clobbered prefs.  */
3006                 la_cross_call(s, nb_temps);
3007 
3008                 /*
3009                  * Input arguments are live for preceding opcodes.
3010                  *
3011                  * For those arguments that die, and will be allocated in
3012                  * registers, clear the register set for that arg, to be
3013                  * filled in below.  For args that will be on the stack,
3014                  * reset to any available reg.  Process arguments in reverse
3015                  * order so that if a temp is used more than once, the stack
3016                  * reset to max happens before the register reset to 0.
3017                  */
3018                 for (i = nb_iargs - 1; i >= 0; i--) {
3019                     const TCGCallArgumentLoc *loc = &info->in[i];
3020                     ts = arg_temp(op->args[nb_oargs + i]);
3021 
3022                     if (ts->state & TS_DEAD) {
3023                         switch (loc->kind) {
3024                         case TCG_CALL_ARG_NORMAL:
3025                         case TCG_CALL_ARG_EXTEND_U:
3026                         case TCG_CALL_ARG_EXTEND_S:
3027                             if (REG_P(loc)) {
3028                                 *la_temp_pref(ts) = 0;
3029                                 break;
3030                             }
3031                             /* fall through */
3032                         default:
3033                             *la_temp_pref(ts) =
3034                                 tcg_target_available_regs[ts->type];
3035                             break;
3036                         }
3037                         ts->state &= ~TS_DEAD;
3038                     }
3039                 }
3040 
3041                 /*
3042                  * For each input argument, add its input register to prefs.
3043                  * If a temp is used once, this produces a single set bit;
3044                  * if a temp is used multiple times, this produces a set.
3045                  */
3046                 for (i = 0; i < nb_iargs; i++) {
3047                     const TCGCallArgumentLoc *loc = &info->in[i];
3048                     ts = arg_temp(op->args[nb_oargs + i]);
3049 
3050                     switch (loc->kind) {
3051                     case TCG_CALL_ARG_NORMAL:
3052                     case TCG_CALL_ARG_EXTEND_U:
3053                     case TCG_CALL_ARG_EXTEND_S:
3054                         if (REG_P(loc)) {
3055                             tcg_regset_set_reg(*la_temp_pref(ts),
3056                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3057                         }
3058                         break;
3059                     default:
3060                         break;
3061                     }
3062                 }
3063             }
3064             break;
3065         case INDEX_op_insn_start:
3066             break;
3067         case INDEX_op_discard:
3068             /* mark the temporary as dead */
3069             ts = arg_temp(op->args[0]);
3070             ts->state = TS_DEAD;
3071             la_reset_pref(ts);
3072             break;
3073 
3074         case INDEX_op_add2_i32:
3075             opc_new = INDEX_op_add_i32;
3076             goto do_addsub2;
3077         case INDEX_op_sub2_i32:
3078             opc_new = INDEX_op_sub_i32;
3079             goto do_addsub2;
3080         case INDEX_op_add2_i64:
3081             opc_new = INDEX_op_add_i64;
3082             goto do_addsub2;
3083         case INDEX_op_sub2_i64:
3084             opc_new = INDEX_op_sub_i64;
3085         do_addsub2:
3086             nb_iargs = 4;
3087             nb_oargs = 2;
3088             /* Test if the high part of the operation is dead, but not
3089                the low part.  The result can be optimized to a simple
3090                add or sub.  This happens often for x86_64 guest when the
3091                cpu mode is set to 32 bit.  */
3092             if (arg_temp(op->args[1])->state == TS_DEAD) {
3093                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3094                     goto do_remove;
3095                 }
3096                 /* Replace the opcode and adjust the args in place,
3097                    leaving 3 unused args at the end.  */
3098                 op->opc = opc = opc_new;
3099                 op->args[1] = op->args[2];
3100                 op->args[2] = op->args[4];
3101                 /* Fall through and mark the single-word operation live.  */
3102                 nb_iargs = 2;
3103                 nb_oargs = 1;
3104             }
3105             goto do_not_remove;
3106 
3107         case INDEX_op_mulu2_i32:
3108             opc_new = INDEX_op_mul_i32;
3109             opc_new2 = INDEX_op_muluh_i32;
3110             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3111             goto do_mul2;
3112         case INDEX_op_muls2_i32:
3113             opc_new = INDEX_op_mul_i32;
3114             opc_new2 = INDEX_op_mulsh_i32;
3115             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3116             goto do_mul2;
3117         case INDEX_op_mulu2_i64:
3118             opc_new = INDEX_op_mul_i64;
3119             opc_new2 = INDEX_op_muluh_i64;
3120             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3121             goto do_mul2;
3122         case INDEX_op_muls2_i64:
3123             opc_new = INDEX_op_mul_i64;
3124             opc_new2 = INDEX_op_mulsh_i64;
3125             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3126             goto do_mul2;
3127         do_mul2:
3128             nb_iargs = 2;
3129             nb_oargs = 2;
3130             if (arg_temp(op->args[1])->state == TS_DEAD) {
3131                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3132                     /* Both parts of the operation are dead.  */
3133                     goto do_remove;
3134                 }
3135                 /* The high part of the operation is dead; generate the low. */
3136                 op->opc = opc = opc_new;
3137                 op->args[1] = op->args[2];
3138                 op->args[2] = op->args[3];
3139             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3140                 /* The low part of the operation is dead; generate the high. */
3141                 op->opc = opc = opc_new2;
3142                 op->args[0] = op->args[1];
3143                 op->args[1] = op->args[2];
3144                 op->args[2] = op->args[3];
3145             } else {
3146                 goto do_not_remove;
3147             }
3148             /* Mark the single-word operation live.  */
3149             nb_oargs = 1;
3150             goto do_not_remove;
3151 
3152         default:
3153             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3154             nb_iargs = def->nb_iargs;
3155             nb_oargs = def->nb_oargs;
3156 
3157             /* Test if the operation can be removed because all
3158                its outputs are dead. We assume that nb_oargs == 0
3159                implies side effects */
3160             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3161                 for (i = 0; i < nb_oargs; i++) {
3162                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3163                         goto do_not_remove;
3164                     }
3165                 }
3166                 goto do_remove;
3167             }
3168             goto do_not_remove;
3169 
3170         do_remove:
3171             tcg_op_remove(s, op);
3172             break;
3173 
3174         do_not_remove:
3175             for (i = 0; i < nb_oargs; i++) {
3176                 ts = arg_temp(op->args[i]);
3177 
3178                 /* Remember the preference of the uses that followed.  */
3179                 if (i < ARRAY_SIZE(op->output_pref)) {
3180                     op->output_pref[i] = *la_temp_pref(ts);
3181                 }
3182 
3183                 /* Output args are dead.  */
3184                 if (ts->state & TS_DEAD) {
3185                     arg_life |= DEAD_ARG << i;
3186                 }
3187                 if (ts->state & TS_MEM) {
3188                     arg_life |= SYNC_ARG << i;
3189                 }
3190                 ts->state = TS_DEAD;
3191                 la_reset_pref(ts);
3192             }
3193 
3194             /* If end of basic block, update.  */
3195             if (def->flags & TCG_OPF_BB_EXIT) {
3196                 la_func_end(s, nb_globals, nb_temps);
3197             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3198                 la_bb_sync(s, nb_globals, nb_temps);
3199             } else if (def->flags & TCG_OPF_BB_END) {
3200                 la_bb_end(s, nb_globals, nb_temps);
3201             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3202                 la_global_sync(s, nb_globals);
3203                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3204                     la_cross_call(s, nb_temps);
3205                 }
3206             }
3207 
3208             /* Record arguments that die in this opcode.  */
3209             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3210                 ts = arg_temp(op->args[i]);
3211                 if (ts->state & TS_DEAD) {
3212                     arg_life |= DEAD_ARG << i;
3213                 }
3214             }
3215 
3216             /* Input arguments are live for preceding opcodes.  */
3217             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3218                 ts = arg_temp(op->args[i]);
3219                 if (ts->state & TS_DEAD) {
3220                     /* For operands that were dead, initially allow
3221                        all regs for the type.  */
3222                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3223                     ts->state &= ~TS_DEAD;
3224                 }
3225             }
3226 
3227             /* Incorporate constraints for this operand.  */
3228             switch (opc) {
3229             case INDEX_op_mov_i32:
3230             case INDEX_op_mov_i64:
3231                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3232                    have proper constraints.  That said, special case
3233                    moves to propagate preferences backward.  */
3234                 if (IS_DEAD_ARG(1)) {
3235                     *la_temp_pref(arg_temp(op->args[0]))
3236                         = *la_temp_pref(arg_temp(op->args[1]));
3237                 }
3238                 break;
3239 
3240             default:
3241                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3242                     const TCGArgConstraint *ct = &def->args_ct[i];
3243                     TCGRegSet set, *pset;
3244 
3245                     ts = arg_temp(op->args[i]);
3246                     pset = la_temp_pref(ts);
3247                     set = *pset;
3248 
3249                     set &= ct->regs;
3250                     if (ct->ialias) {
3251                         set &= output_pref(op, ct->alias_index);
3252                     }
3253                     /* If the combination is not possible, restart.  */
3254                     if (set == 0) {
3255                         set = ct->regs;
3256                     }
3257                     *pset = set;
3258                 }
3259                 break;
3260             }
3261             break;
3262         }
3263         op->life = arg_life;
3264     }
3265 }
3266 
3267 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3268 static bool __attribute__((noinline))
3269 liveness_pass_2(TCGContext *s)
3270 {
3271     int nb_globals = s->nb_globals;
3272     int nb_temps, i;
3273     bool changes = false;
3274     TCGOp *op, *op_next;
3275 
3276     /* Create a temporary for each indirect global.  */
3277     for (i = 0; i < nb_globals; ++i) {
3278         TCGTemp *its = &s->temps[i];
3279         if (its->indirect_reg) {
3280             TCGTemp *dts = tcg_temp_alloc(s);
3281             dts->type = its->type;
3282             dts->base_type = its->base_type;
3283             dts->temp_subindex = its->temp_subindex;
3284             dts->kind = TEMP_EBB;
3285             its->state_ptr = dts;
3286         } else {
3287             its->state_ptr = NULL;
3288         }
3289         /* All globals begin dead.  */
3290         its->state = TS_DEAD;
3291     }
3292     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3293         TCGTemp *its = &s->temps[i];
3294         its->state_ptr = NULL;
3295         its->state = TS_DEAD;
3296     }
3297 
3298     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3299         TCGOpcode opc = op->opc;
3300         const TCGOpDef *def = &tcg_op_defs[opc];
3301         TCGLifeData arg_life = op->life;
3302         int nb_iargs, nb_oargs, call_flags;
3303         TCGTemp *arg_ts, *dir_ts;
3304 
3305         if (opc == INDEX_op_call) {
3306             nb_oargs = TCGOP_CALLO(op);
3307             nb_iargs = TCGOP_CALLI(op);
3308             call_flags = tcg_call_flags(op);
3309         } else {
3310             nb_iargs = def->nb_iargs;
3311             nb_oargs = def->nb_oargs;
3312 
3313             /* Set flags similar to how calls require.  */
3314             if (def->flags & TCG_OPF_COND_BRANCH) {
3315                 /* Like reading globals: sync_globals */
3316                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3317             } else if (def->flags & TCG_OPF_BB_END) {
3318                 /* Like writing globals: save_globals */
3319                 call_flags = 0;
3320             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3321                 /* Like reading globals: sync_globals */
3322                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3323             } else {
3324                 /* No effect on globals.  */
3325                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3326                               TCG_CALL_NO_WRITE_GLOBALS);
3327             }
3328         }
3329 
3330         /* Make sure that input arguments are available.  */
3331         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3332             arg_ts = arg_temp(op->args[i]);
3333             dir_ts = arg_ts->state_ptr;
3334             if (dir_ts && arg_ts->state == TS_DEAD) {
3335                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3336                                   ? INDEX_op_ld_i32
3337                                   : INDEX_op_ld_i64);
3338                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3339 
3340                 lop->args[0] = temp_arg(dir_ts);
3341                 lop->args[1] = temp_arg(arg_ts->mem_base);
3342                 lop->args[2] = arg_ts->mem_offset;
3343 
3344                 /* Loaded, but synced with memory.  */
3345                 arg_ts->state = TS_MEM;
3346             }
3347         }
3348 
3349         /* Perform input replacement, and mark inputs that became dead.
3350            No action is required except keeping temp_state up to date
3351            so that we reload when needed.  */
3352         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3353             arg_ts = arg_temp(op->args[i]);
3354             dir_ts = arg_ts->state_ptr;
3355             if (dir_ts) {
3356                 op->args[i] = temp_arg(dir_ts);
3357                 changes = true;
3358                 if (IS_DEAD_ARG(i)) {
3359                     arg_ts->state = TS_DEAD;
3360                 }
3361             }
3362         }
3363 
3364         /* Liveness analysis should ensure that the following are
3365            all correct, for call sites and basic block end points.  */
3366         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3367             /* Nothing to do */
3368         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3369             for (i = 0; i < nb_globals; ++i) {
3370                 /* Liveness should see that globals are synced back,
3371                    that is, either TS_DEAD or TS_MEM.  */
3372                 arg_ts = &s->temps[i];
3373                 tcg_debug_assert(arg_ts->state_ptr == 0
3374                                  || arg_ts->state != 0);
3375             }
3376         } else {
3377             for (i = 0; i < nb_globals; ++i) {
3378                 /* Liveness should see that globals are saved back,
3379                    that is, TS_DEAD, waiting to be reloaded.  */
3380                 arg_ts = &s->temps[i];
3381                 tcg_debug_assert(arg_ts->state_ptr == 0
3382                                  || arg_ts->state == TS_DEAD);
3383             }
3384         }
3385 
3386         /* Outputs become available.  */
3387         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3388             arg_ts = arg_temp(op->args[0]);
3389             dir_ts = arg_ts->state_ptr;
3390             if (dir_ts) {
3391                 op->args[0] = temp_arg(dir_ts);
3392                 changes = true;
3393 
3394                 /* The output is now live and modified.  */
3395                 arg_ts->state = 0;
3396 
3397                 if (NEED_SYNC_ARG(0)) {
3398                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3399                                       ? INDEX_op_st_i32
3400                                       : INDEX_op_st_i64);
3401                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3402                     TCGTemp *out_ts = dir_ts;
3403 
3404                     if (IS_DEAD_ARG(0)) {
3405                         out_ts = arg_temp(op->args[1]);
3406                         arg_ts->state = TS_DEAD;
3407                         tcg_op_remove(s, op);
3408                     } else {
3409                         arg_ts->state = TS_MEM;
3410                     }
3411 
3412                     sop->args[0] = temp_arg(out_ts);
3413                     sop->args[1] = temp_arg(arg_ts->mem_base);
3414                     sop->args[2] = arg_ts->mem_offset;
3415                 } else {
3416                     tcg_debug_assert(!IS_DEAD_ARG(0));
3417                 }
3418             }
3419         } else {
3420             for (i = 0; i < nb_oargs; i++) {
3421                 arg_ts = arg_temp(op->args[i]);
3422                 dir_ts = arg_ts->state_ptr;
3423                 if (!dir_ts) {
3424                     continue;
3425                 }
3426                 op->args[i] = temp_arg(dir_ts);
3427                 changes = true;
3428 
3429                 /* The output is now live and modified.  */
3430                 arg_ts->state = 0;
3431 
3432                 /* Sync outputs upon their last write.  */
3433                 if (NEED_SYNC_ARG(i)) {
3434                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3435                                       ? INDEX_op_st_i32
3436                                       : INDEX_op_st_i64);
3437                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3438 
3439                     sop->args[0] = temp_arg(dir_ts);
3440                     sop->args[1] = temp_arg(arg_ts->mem_base);
3441                     sop->args[2] = arg_ts->mem_offset;
3442 
3443                     arg_ts->state = TS_MEM;
3444                 }
3445                 /* Drop outputs that are dead.  */
3446                 if (IS_DEAD_ARG(i)) {
3447                     arg_ts->state = TS_DEAD;
3448                 }
3449             }
3450         }
3451     }
3452 
3453     return changes;
3454 }
3455 
3456 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3457 {
3458     intptr_t off;
3459     int size, align;
3460 
3461     /* When allocating an object, look at the full type. */
3462     size = tcg_type_size(ts->base_type);
3463     switch (ts->base_type) {
3464     case TCG_TYPE_I32:
3465         align = 4;
3466         break;
3467     case TCG_TYPE_I64:
3468     case TCG_TYPE_V64:
3469         align = 8;
3470         break;
3471     case TCG_TYPE_I128:
3472     case TCG_TYPE_V128:
3473     case TCG_TYPE_V256:
3474         /*
3475          * Note that we do not require aligned storage for V256,
3476          * and that we provide alignment for I128 to match V128,
3477          * even if that's above what the host ABI requires.
3478          */
3479         align = 16;
3480         break;
3481     default:
3482         g_assert_not_reached();
3483     }
3484 
3485     /*
3486      * Assume the stack is sufficiently aligned.
3487      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3488      * and do not require 16 byte vector alignment.  This seems slightly
3489      * easier than fully parameterizing the above switch statement.
3490      */
3491     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3492     off = ROUND_UP(s->current_frame_offset, align);
3493 
3494     /* If we've exhausted the stack frame, restart with a smaller TB. */
3495     if (off + size > s->frame_end) {
3496         tcg_raise_tb_overflow(s);
3497     }
3498     s->current_frame_offset = off + size;
3499 #if defined(__sparc__)
3500     off += TCG_TARGET_STACK_BIAS;
3501 #endif
3502 
3503     /* If the object was subdivided, assign memory to all the parts. */
3504     if (ts->base_type != ts->type) {
3505         int part_size = tcg_type_size(ts->type);
3506         int part_count = size / part_size;
3507 
3508         /*
3509          * Each part is allocated sequentially in tcg_temp_new_internal.
3510          * Jump back to the first part by subtracting the current index.
3511          */
3512         ts -= ts->temp_subindex;
3513         for (int i = 0; i < part_count; ++i) {
3514             ts[i].mem_offset = off + i * part_size;
3515             ts[i].mem_base = s->frame_temp;
3516             ts[i].mem_allocated = 1;
3517         }
3518     } else {
3519         ts->mem_offset = off;
3520         ts->mem_base = s->frame_temp;
3521         ts->mem_allocated = 1;
3522     }
3523 }
3524 
3525 /* Assign @reg to @ts, and update reg_to_temp[]. */
3526 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3527 {
3528     if (ts->val_type == TEMP_VAL_REG) {
3529         TCGReg old = ts->reg;
3530         tcg_debug_assert(s->reg_to_temp[old] == ts);
3531         if (old == reg) {
3532             return;
3533         }
3534         s->reg_to_temp[old] = NULL;
3535     }
3536     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3537     s->reg_to_temp[reg] = ts;
3538     ts->val_type = TEMP_VAL_REG;
3539     ts->reg = reg;
3540 }
3541 
3542 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3543 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3544 {
3545     tcg_debug_assert(type != TEMP_VAL_REG);
3546     if (ts->val_type == TEMP_VAL_REG) {
3547         TCGReg reg = ts->reg;
3548         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3549         s->reg_to_temp[reg] = NULL;
3550     }
3551     ts->val_type = type;
3552 }
3553 
3554 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3555 
3556 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3557    mark it free; otherwise mark it dead.  */
3558 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3559 {
3560     TCGTempVal new_type;
3561 
3562     switch (ts->kind) {
3563     case TEMP_FIXED:
3564         return;
3565     case TEMP_GLOBAL:
3566     case TEMP_TB:
3567         new_type = TEMP_VAL_MEM;
3568         break;
3569     case TEMP_EBB:
3570         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3571         break;
3572     case TEMP_CONST:
3573         new_type = TEMP_VAL_CONST;
3574         break;
3575     default:
3576         g_assert_not_reached();
3577     }
3578     set_temp_val_nonreg(s, ts, new_type);
3579 }
3580 
3581 /* Mark a temporary as dead.  */
3582 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3583 {
3584     temp_free_or_dead(s, ts, 1);
3585 }
3586 
3587 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3588    registers needs to be allocated to store a constant.  If 'free_or_dead'
3589    is non-zero, subsequently release the temporary; if it is positive, the
3590    temp is dead; if it is negative, the temp is free.  */
3591 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3592                       TCGRegSet preferred_regs, int free_or_dead)
3593 {
3594     if (!temp_readonly(ts) && !ts->mem_coherent) {
3595         if (!ts->mem_allocated) {
3596             temp_allocate_frame(s, ts);
3597         }
3598         switch (ts->val_type) {
3599         case TEMP_VAL_CONST:
3600             /* If we're going to free the temp immediately, then we won't
3601                require it later in a register, so attempt to store the
3602                constant to memory directly.  */
3603             if (free_or_dead
3604                 && tcg_out_sti(s, ts->type, ts->val,
3605                                ts->mem_base->reg, ts->mem_offset)) {
3606                 break;
3607             }
3608             temp_load(s, ts, tcg_target_available_regs[ts->type],
3609                       allocated_regs, preferred_regs);
3610             /* fallthrough */
3611 
3612         case TEMP_VAL_REG:
3613             tcg_out_st(s, ts->type, ts->reg,
3614                        ts->mem_base->reg, ts->mem_offset);
3615             break;
3616 
3617         case TEMP_VAL_MEM:
3618             break;
3619 
3620         case TEMP_VAL_DEAD:
3621         default:
3622             tcg_abort();
3623         }
3624         ts->mem_coherent = 1;
3625     }
3626     if (free_or_dead) {
3627         temp_free_or_dead(s, ts, free_or_dead);
3628     }
3629 }
3630 
3631 /* free register 'reg' by spilling the corresponding temporary if necessary */
3632 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3633 {
3634     TCGTemp *ts = s->reg_to_temp[reg];
3635     if (ts != NULL) {
3636         temp_sync(s, ts, allocated_regs, 0, -1);
3637     }
3638 }
3639 
3640 /**
3641  * tcg_reg_alloc:
3642  * @required_regs: Set of registers in which we must allocate.
3643  * @allocated_regs: Set of registers which must be avoided.
3644  * @preferred_regs: Set of registers we should prefer.
3645  * @rev: True if we search the registers in "indirect" order.
3646  *
3647  * The allocated register must be in @required_regs & ~@allocated_regs,
3648  * but if we can put it in @preferred_regs we may save a move later.
3649  */
3650 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3651                             TCGRegSet allocated_regs,
3652                             TCGRegSet preferred_regs, bool rev)
3653 {
3654     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3655     TCGRegSet reg_ct[2];
3656     const int *order;
3657 
3658     reg_ct[1] = required_regs & ~allocated_regs;
3659     tcg_debug_assert(reg_ct[1] != 0);
3660     reg_ct[0] = reg_ct[1] & preferred_regs;
3661 
3662     /* Skip the preferred_regs option if it cannot be satisfied,
3663        or if the preference made no difference.  */
3664     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3665 
3666     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3667 
3668     /* Try free registers, preferences first.  */
3669     for (j = f; j < 2; j++) {
3670         TCGRegSet set = reg_ct[j];
3671 
3672         if (tcg_regset_single(set)) {
3673             /* One register in the set.  */
3674             TCGReg reg = tcg_regset_first(set);
3675             if (s->reg_to_temp[reg] == NULL) {
3676                 return reg;
3677             }
3678         } else {
3679             for (i = 0; i < n; i++) {
3680                 TCGReg reg = order[i];
3681                 if (s->reg_to_temp[reg] == NULL &&
3682                     tcg_regset_test_reg(set, reg)) {
3683                     return reg;
3684                 }
3685             }
3686         }
3687     }
3688 
3689     /* We must spill something.  */
3690     for (j = f; j < 2; j++) {
3691         TCGRegSet set = reg_ct[j];
3692 
3693         if (tcg_regset_single(set)) {
3694             /* One register in the set.  */
3695             TCGReg reg = tcg_regset_first(set);
3696             tcg_reg_free(s, reg, allocated_regs);
3697             return reg;
3698         } else {
3699             for (i = 0; i < n; i++) {
3700                 TCGReg reg = order[i];
3701                 if (tcg_regset_test_reg(set, reg)) {
3702                     tcg_reg_free(s, reg, allocated_regs);
3703                     return reg;
3704                 }
3705             }
3706         }
3707     }
3708 
3709     tcg_abort();
3710 }
3711 
3712 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3713                                  TCGRegSet allocated_regs,
3714                                  TCGRegSet preferred_regs, bool rev)
3715 {
3716     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3717     TCGRegSet reg_ct[2];
3718     const int *order;
3719 
3720     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3721     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3722     tcg_debug_assert(reg_ct[1] != 0);
3723     reg_ct[0] = reg_ct[1] & preferred_regs;
3724 
3725     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3726 
3727     /*
3728      * Skip the preferred_regs option if it cannot be satisfied,
3729      * or if the preference made no difference.
3730      */
3731     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3732 
3733     /*
3734      * Minimize the number of flushes by looking for 2 free registers first,
3735      * then a single flush, then two flushes.
3736      */
3737     for (fmin = 2; fmin >= 0; fmin--) {
3738         for (j = k; j < 2; j++) {
3739             TCGRegSet set = reg_ct[j];
3740 
3741             for (i = 0; i < n; i++) {
3742                 TCGReg reg = order[i];
3743 
3744                 if (tcg_regset_test_reg(set, reg)) {
3745                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3746                     if (f >= fmin) {
3747                         tcg_reg_free(s, reg, allocated_regs);
3748                         tcg_reg_free(s, reg + 1, allocated_regs);
3749                         return reg;
3750                     }
3751                 }
3752             }
3753         }
3754     }
3755     tcg_abort();
3756 }
3757 
3758 /* Make sure the temporary is in a register.  If needed, allocate the register
3759    from DESIRED while avoiding ALLOCATED.  */
3760 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3761                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3762 {
3763     TCGReg reg;
3764 
3765     switch (ts->val_type) {
3766     case TEMP_VAL_REG:
3767         return;
3768     case TEMP_VAL_CONST:
3769         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3770                             preferred_regs, ts->indirect_base);
3771         if (ts->type <= TCG_TYPE_I64) {
3772             tcg_out_movi(s, ts->type, reg, ts->val);
3773         } else {
3774             uint64_t val = ts->val;
3775             MemOp vece = MO_64;
3776 
3777             /*
3778              * Find the minimal vector element that matches the constant.
3779              * The targets will, in general, have to do this search anyway,
3780              * do this generically.
3781              */
3782             if (val == dup_const(MO_8, val)) {
3783                 vece = MO_8;
3784             } else if (val == dup_const(MO_16, val)) {
3785                 vece = MO_16;
3786             } else if (val == dup_const(MO_32, val)) {
3787                 vece = MO_32;
3788             }
3789 
3790             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3791         }
3792         ts->mem_coherent = 0;
3793         break;
3794     case TEMP_VAL_MEM:
3795         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3796                             preferred_regs, ts->indirect_base);
3797         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3798         ts->mem_coherent = 1;
3799         break;
3800     case TEMP_VAL_DEAD:
3801     default:
3802         tcg_abort();
3803     }
3804     set_temp_val_reg(s, ts, reg);
3805 }
3806 
3807 /* Save a temporary to memory. 'allocated_regs' is used in case a
3808    temporary registers needs to be allocated to store a constant.  */
3809 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3810 {
3811     /* The liveness analysis already ensures that globals are back
3812        in memory. Keep an tcg_debug_assert for safety. */
3813     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3814 }
3815 
3816 /* save globals to their canonical location and assume they can be
3817    modified be the following code. 'allocated_regs' is used in case a
3818    temporary registers needs to be allocated to store a constant. */
3819 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3820 {
3821     int i, n;
3822 
3823     for (i = 0, n = s->nb_globals; i < n; i++) {
3824         temp_save(s, &s->temps[i], allocated_regs);
3825     }
3826 }
3827 
3828 /* sync globals to their canonical location and assume they can be
3829    read by the following code. 'allocated_regs' is used in case a
3830    temporary registers needs to be allocated to store a constant. */
3831 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3832 {
3833     int i, n;
3834 
3835     for (i = 0, n = s->nb_globals; i < n; i++) {
3836         TCGTemp *ts = &s->temps[i];
3837         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3838                          || ts->kind == TEMP_FIXED
3839                          || ts->mem_coherent);
3840     }
3841 }
3842 
3843 /* at the end of a basic block, we assume all temporaries are dead and
3844    all globals are stored at their canonical location. */
3845 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3846 {
3847     int i;
3848 
3849     for (i = s->nb_globals; i < s->nb_temps; i++) {
3850         TCGTemp *ts = &s->temps[i];
3851 
3852         switch (ts->kind) {
3853         case TEMP_TB:
3854             temp_save(s, ts, allocated_regs);
3855             break;
3856         case TEMP_EBB:
3857             /* The liveness analysis already ensures that temps are dead.
3858                Keep an tcg_debug_assert for safety. */
3859             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3860             break;
3861         case TEMP_CONST:
3862             /* Similarly, we should have freed any allocated register. */
3863             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3864             break;
3865         default:
3866             g_assert_not_reached();
3867         }
3868     }
3869 
3870     save_globals(s, allocated_regs);
3871 }
3872 
3873 /*
3874  * At a conditional branch, we assume all temporaries are dead unless
3875  * explicitly live-across-conditional-branch; all globals and local
3876  * temps are synced to their location.
3877  */
3878 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3879 {
3880     sync_globals(s, allocated_regs);
3881 
3882     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3883         TCGTemp *ts = &s->temps[i];
3884         /*
3885          * The liveness analysis already ensures that temps are dead.
3886          * Keep tcg_debug_asserts for safety.
3887          */
3888         switch (ts->kind) {
3889         case TEMP_TB:
3890             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3891             break;
3892         case TEMP_EBB:
3893         case TEMP_CONST:
3894             break;
3895         default:
3896             g_assert_not_reached();
3897         }
3898     }
3899 }
3900 
3901 /*
3902  * Specialized code generation for INDEX_op_mov_* with a constant.
3903  */
3904 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3905                                   tcg_target_ulong val, TCGLifeData arg_life,
3906                                   TCGRegSet preferred_regs)
3907 {
3908     /* ENV should not be modified.  */
3909     tcg_debug_assert(!temp_readonly(ots));
3910 
3911     /* The movi is not explicitly generated here.  */
3912     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3913     ots->val = val;
3914     ots->mem_coherent = 0;
3915     if (NEED_SYNC_ARG(0)) {
3916         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3917     } else if (IS_DEAD_ARG(0)) {
3918         temp_dead(s, ots);
3919     }
3920 }
3921 
3922 /*
3923  * Specialized code generation for INDEX_op_mov_*.
3924  */
3925 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3926 {
3927     const TCGLifeData arg_life = op->life;
3928     TCGRegSet allocated_regs, preferred_regs;
3929     TCGTemp *ts, *ots;
3930     TCGType otype, itype;
3931     TCGReg oreg, ireg;
3932 
3933     allocated_regs = s->reserved_regs;
3934     preferred_regs = output_pref(op, 0);
3935     ots = arg_temp(op->args[0]);
3936     ts = arg_temp(op->args[1]);
3937 
3938     /* ENV should not be modified.  */
3939     tcg_debug_assert(!temp_readonly(ots));
3940 
3941     /* Note that otype != itype for no-op truncation.  */
3942     otype = ots->type;
3943     itype = ts->type;
3944 
3945     if (ts->val_type == TEMP_VAL_CONST) {
3946         /* propagate constant or generate sti */
3947         tcg_target_ulong val = ts->val;
3948         if (IS_DEAD_ARG(1)) {
3949             temp_dead(s, ts);
3950         }
3951         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3952         return;
3953     }
3954 
3955     /* If the source value is in memory we're going to be forced
3956        to have it in a register in order to perform the copy.  Copy
3957        the SOURCE value into its own register first, that way we
3958        don't have to reload SOURCE the next time it is used. */
3959     if (ts->val_type == TEMP_VAL_MEM) {
3960         temp_load(s, ts, tcg_target_available_regs[itype],
3961                   allocated_regs, preferred_regs);
3962     }
3963     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3964     ireg = ts->reg;
3965 
3966     if (IS_DEAD_ARG(0)) {
3967         /* mov to a non-saved dead register makes no sense (even with
3968            liveness analysis disabled). */
3969         tcg_debug_assert(NEED_SYNC_ARG(0));
3970         if (!ots->mem_allocated) {
3971             temp_allocate_frame(s, ots);
3972         }
3973         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3974         if (IS_DEAD_ARG(1)) {
3975             temp_dead(s, ts);
3976         }
3977         temp_dead(s, ots);
3978         return;
3979     }
3980 
3981     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3982         /*
3983          * The mov can be suppressed.  Kill input first, so that it
3984          * is unlinked from reg_to_temp, then set the output to the
3985          * reg that we saved from the input.
3986          */
3987         temp_dead(s, ts);
3988         oreg = ireg;
3989     } else {
3990         if (ots->val_type == TEMP_VAL_REG) {
3991             oreg = ots->reg;
3992         } else {
3993             /* Make sure to not spill the input register during allocation. */
3994             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3995                                  allocated_regs | ((TCGRegSet)1 << ireg),
3996                                  preferred_regs, ots->indirect_base);
3997         }
3998         if (!tcg_out_mov(s, otype, oreg, ireg)) {
3999             /*
4000              * Cross register class move not supported.
4001              * Store the source register into the destination slot
4002              * and leave the destination temp as TEMP_VAL_MEM.
4003              */
4004             assert(!temp_readonly(ots));
4005             if (!ts->mem_allocated) {
4006                 temp_allocate_frame(s, ots);
4007             }
4008             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4009             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4010             ots->mem_coherent = 1;
4011             return;
4012         }
4013     }
4014     set_temp_val_reg(s, ots, oreg);
4015     ots->mem_coherent = 0;
4016 
4017     if (NEED_SYNC_ARG(0)) {
4018         temp_sync(s, ots, allocated_regs, 0, 0);
4019     }
4020 }
4021 
4022 /*
4023  * Specialized code generation for INDEX_op_dup_vec.
4024  */
4025 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4026 {
4027     const TCGLifeData arg_life = op->life;
4028     TCGRegSet dup_out_regs, dup_in_regs;
4029     TCGTemp *its, *ots;
4030     TCGType itype, vtype;
4031     unsigned vece;
4032     int lowpart_ofs;
4033     bool ok;
4034 
4035     ots = arg_temp(op->args[0]);
4036     its = arg_temp(op->args[1]);
4037 
4038     /* ENV should not be modified.  */
4039     tcg_debug_assert(!temp_readonly(ots));
4040 
4041     itype = its->type;
4042     vece = TCGOP_VECE(op);
4043     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4044 
4045     if (its->val_type == TEMP_VAL_CONST) {
4046         /* Propagate constant via movi -> dupi.  */
4047         tcg_target_ulong val = its->val;
4048         if (IS_DEAD_ARG(1)) {
4049             temp_dead(s, its);
4050         }
4051         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4052         return;
4053     }
4054 
4055     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4056     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4057 
4058     /* Allocate the output register now.  */
4059     if (ots->val_type != TEMP_VAL_REG) {
4060         TCGRegSet allocated_regs = s->reserved_regs;
4061         TCGReg oreg;
4062 
4063         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4064             /* Make sure to not spill the input register. */
4065             tcg_regset_set_reg(allocated_regs, its->reg);
4066         }
4067         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4068                              output_pref(op, 0), ots->indirect_base);
4069         set_temp_val_reg(s, ots, oreg);
4070     }
4071 
4072     switch (its->val_type) {
4073     case TEMP_VAL_REG:
4074         /*
4075          * The dup constriaints must be broad, covering all possible VECE.
4076          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4077          * to fail, indicating that extra moves are required for that case.
4078          */
4079         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4080             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4081                 goto done;
4082             }
4083             /* Try again from memory or a vector input register.  */
4084         }
4085         if (!its->mem_coherent) {
4086             /*
4087              * The input register is not synced, and so an extra store
4088              * would be required to use memory.  Attempt an integer-vector
4089              * register move first.  We do not have a TCGRegSet for this.
4090              */
4091             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4092                 break;
4093             }
4094             /* Sync the temp back to its slot and load from there.  */
4095             temp_sync(s, its, s->reserved_regs, 0, 0);
4096         }
4097         /* fall through */
4098 
4099     case TEMP_VAL_MEM:
4100         lowpart_ofs = 0;
4101         if (HOST_BIG_ENDIAN) {
4102             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4103         }
4104         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4105                              its->mem_offset + lowpart_ofs)) {
4106             goto done;
4107         }
4108         /* Load the input into the destination vector register. */
4109         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4110         break;
4111 
4112     default:
4113         g_assert_not_reached();
4114     }
4115 
4116     /* We now have a vector input register, so dup must succeed. */
4117     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4118     tcg_debug_assert(ok);
4119 
4120  done:
4121     ots->mem_coherent = 0;
4122     if (IS_DEAD_ARG(1)) {
4123         temp_dead(s, its);
4124     }
4125     if (NEED_SYNC_ARG(0)) {
4126         temp_sync(s, ots, s->reserved_regs, 0, 0);
4127     }
4128     if (IS_DEAD_ARG(0)) {
4129         temp_dead(s, ots);
4130     }
4131 }
4132 
4133 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4134 {
4135     const TCGLifeData arg_life = op->life;
4136     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4137     TCGRegSet i_allocated_regs;
4138     TCGRegSet o_allocated_regs;
4139     int i, k, nb_iargs, nb_oargs;
4140     TCGReg reg;
4141     TCGArg arg;
4142     const TCGArgConstraint *arg_ct;
4143     TCGTemp *ts;
4144     TCGArg new_args[TCG_MAX_OP_ARGS];
4145     int const_args[TCG_MAX_OP_ARGS];
4146 
4147     nb_oargs = def->nb_oargs;
4148     nb_iargs = def->nb_iargs;
4149 
4150     /* copy constants */
4151     memcpy(new_args + nb_oargs + nb_iargs,
4152            op->args + nb_oargs + nb_iargs,
4153            sizeof(TCGArg) * def->nb_cargs);
4154 
4155     i_allocated_regs = s->reserved_regs;
4156     o_allocated_regs = s->reserved_regs;
4157 
4158     /* satisfy input constraints */
4159     for (k = 0; k < nb_iargs; k++) {
4160         TCGRegSet i_preferred_regs, i_required_regs;
4161         bool allocate_new_reg, copyto_new_reg;
4162         TCGTemp *ts2;
4163         int i1, i2;
4164 
4165         i = def->args_ct[nb_oargs + k].sort_index;
4166         arg = op->args[i];
4167         arg_ct = &def->args_ct[i];
4168         ts = arg_temp(arg);
4169 
4170         if (ts->val_type == TEMP_VAL_CONST
4171             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4172             /* constant is OK for instruction */
4173             const_args[i] = 1;
4174             new_args[i] = ts->val;
4175             continue;
4176         }
4177 
4178         reg = ts->reg;
4179         i_preferred_regs = 0;
4180         i_required_regs = arg_ct->regs;
4181         allocate_new_reg = false;
4182         copyto_new_reg = false;
4183 
4184         switch (arg_ct->pair) {
4185         case 0: /* not paired */
4186             if (arg_ct->ialias) {
4187                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4188 
4189                 /*
4190                  * If the input is readonly, then it cannot also be an
4191                  * output and aliased to itself.  If the input is not
4192                  * dead after the instruction, we must allocate a new
4193                  * register and move it.
4194                  */
4195                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4196                     allocate_new_reg = true;
4197                 } else if (ts->val_type == TEMP_VAL_REG) {
4198                     /*
4199                      * Check if the current register has already been
4200                      * allocated for another input.
4201                      */
4202                     allocate_new_reg =
4203                         tcg_regset_test_reg(i_allocated_regs, reg);
4204                 }
4205             }
4206             if (!allocate_new_reg) {
4207                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4208                           i_preferred_regs);
4209                 reg = ts->reg;
4210                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4211             }
4212             if (allocate_new_reg) {
4213                 /*
4214                  * Allocate a new register matching the constraint
4215                  * and move the temporary register into it.
4216                  */
4217                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4218                           i_allocated_regs, 0);
4219                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4220                                     i_preferred_regs, ts->indirect_base);
4221                 copyto_new_reg = true;
4222             }
4223             break;
4224 
4225         case 1:
4226             /* First of an input pair; if i1 == i2, the second is an output. */
4227             i1 = i;
4228             i2 = arg_ct->pair_index;
4229             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4230 
4231             /*
4232              * It is easier to default to allocating a new pair
4233              * and to identify a few cases where it's not required.
4234              */
4235             if (arg_ct->ialias) {
4236                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4237                 if (IS_DEAD_ARG(i1) &&
4238                     IS_DEAD_ARG(i2) &&
4239                     !temp_readonly(ts) &&
4240                     ts->val_type == TEMP_VAL_REG &&
4241                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4242                     tcg_regset_test_reg(i_required_regs, reg) &&
4243                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4244                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4245                     (ts2
4246                      ? ts2->val_type == TEMP_VAL_REG &&
4247                        ts2->reg == reg + 1 &&
4248                        !temp_readonly(ts2)
4249                      : s->reg_to_temp[reg + 1] == NULL)) {
4250                     break;
4251                 }
4252             } else {
4253                 /* Without aliasing, the pair must also be an input. */
4254                 tcg_debug_assert(ts2);
4255                 if (ts->val_type == TEMP_VAL_REG &&
4256                     ts2->val_type == TEMP_VAL_REG &&
4257                     ts2->reg == reg + 1 &&
4258                     tcg_regset_test_reg(i_required_regs, reg)) {
4259                     break;
4260                 }
4261             }
4262             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4263                                      0, ts->indirect_base);
4264             goto do_pair;
4265 
4266         case 2: /* pair second */
4267             reg = new_args[arg_ct->pair_index] + 1;
4268             goto do_pair;
4269 
4270         case 3: /* ialias with second output, no first input */
4271             tcg_debug_assert(arg_ct->ialias);
4272             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4273 
4274             if (IS_DEAD_ARG(i) &&
4275                 !temp_readonly(ts) &&
4276                 ts->val_type == TEMP_VAL_REG &&
4277                 reg > 0 &&
4278                 s->reg_to_temp[reg - 1] == NULL &&
4279                 tcg_regset_test_reg(i_required_regs, reg) &&
4280                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4281                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4282                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4283                 break;
4284             }
4285             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4286                                      i_allocated_regs, 0,
4287                                      ts->indirect_base);
4288             tcg_regset_set_reg(i_allocated_regs, reg);
4289             reg += 1;
4290             goto do_pair;
4291 
4292         do_pair:
4293             /*
4294              * If an aliased input is not dead after the instruction,
4295              * we must allocate a new register and move it.
4296              */
4297             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4298                 TCGRegSet t_allocated_regs = i_allocated_regs;
4299 
4300                 /*
4301                  * Because of the alias, and the continued life, make sure
4302                  * that the temp is somewhere *other* than the reg pair,
4303                  * and we get a copy in reg.
4304                  */
4305                 tcg_regset_set_reg(t_allocated_regs, reg);
4306                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4307                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4308                     /* If ts was already in reg, copy it somewhere else. */
4309                     TCGReg nr;
4310                     bool ok;
4311 
4312                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4313                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4314                                        t_allocated_regs, 0, ts->indirect_base);
4315                     ok = tcg_out_mov(s, ts->type, nr, reg);
4316                     tcg_debug_assert(ok);
4317 
4318                     set_temp_val_reg(s, ts, nr);
4319                 } else {
4320                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4321                               t_allocated_regs, 0);
4322                     copyto_new_reg = true;
4323                 }
4324             } else {
4325                 /* Preferably allocate to reg, otherwise copy. */
4326                 i_required_regs = (TCGRegSet)1 << reg;
4327                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4328                           i_preferred_regs);
4329                 copyto_new_reg = ts->reg != reg;
4330             }
4331             break;
4332 
4333         default:
4334             g_assert_not_reached();
4335         }
4336 
4337         if (copyto_new_reg) {
4338             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4339                 /*
4340                  * Cross register class move not supported.  Sync the
4341                  * temp back to its slot and load from there.
4342                  */
4343                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4344                 tcg_out_ld(s, ts->type, reg,
4345                            ts->mem_base->reg, ts->mem_offset);
4346             }
4347         }
4348         new_args[i] = reg;
4349         const_args[i] = 0;
4350         tcg_regset_set_reg(i_allocated_regs, reg);
4351     }
4352 
4353     /* mark dead temporaries and free the associated registers */
4354     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4355         if (IS_DEAD_ARG(i)) {
4356             temp_dead(s, arg_temp(op->args[i]));
4357         }
4358     }
4359 
4360     if (def->flags & TCG_OPF_COND_BRANCH) {
4361         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4362     } else if (def->flags & TCG_OPF_BB_END) {
4363         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4364     } else {
4365         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4366             /* XXX: permit generic clobber register list ? */
4367             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4368                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4369                     tcg_reg_free(s, i, i_allocated_regs);
4370                 }
4371             }
4372         }
4373         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4374             /* sync globals if the op has side effects and might trigger
4375                an exception. */
4376             sync_globals(s, i_allocated_regs);
4377         }
4378 
4379         /* satisfy the output constraints */
4380         for(k = 0; k < nb_oargs; k++) {
4381             i = def->args_ct[k].sort_index;
4382             arg = op->args[i];
4383             arg_ct = &def->args_ct[i];
4384             ts = arg_temp(arg);
4385 
4386             /* ENV should not be modified.  */
4387             tcg_debug_assert(!temp_readonly(ts));
4388 
4389             switch (arg_ct->pair) {
4390             case 0: /* not paired */
4391                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4392                     reg = new_args[arg_ct->alias_index];
4393                 } else if (arg_ct->newreg) {
4394                     reg = tcg_reg_alloc(s, arg_ct->regs,
4395                                         i_allocated_regs | o_allocated_regs,
4396                                         output_pref(op, k), ts->indirect_base);
4397                 } else {
4398                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4399                                         output_pref(op, k), ts->indirect_base);
4400                 }
4401                 break;
4402 
4403             case 1: /* first of pair */
4404                 tcg_debug_assert(!arg_ct->newreg);
4405                 if (arg_ct->oalias) {
4406                     reg = new_args[arg_ct->alias_index];
4407                     break;
4408                 }
4409                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4410                                          output_pref(op, k), ts->indirect_base);
4411                 break;
4412 
4413             case 2: /* second of pair */
4414                 tcg_debug_assert(!arg_ct->newreg);
4415                 if (arg_ct->oalias) {
4416                     reg = new_args[arg_ct->alias_index];
4417                 } else {
4418                     reg = new_args[arg_ct->pair_index] + 1;
4419                 }
4420                 break;
4421 
4422             case 3: /* first of pair, aliasing with a second input */
4423                 tcg_debug_assert(!arg_ct->newreg);
4424                 reg = new_args[arg_ct->pair_index] - 1;
4425                 break;
4426 
4427             default:
4428                 g_assert_not_reached();
4429             }
4430             tcg_regset_set_reg(o_allocated_regs, reg);
4431             set_temp_val_reg(s, ts, reg);
4432             ts->mem_coherent = 0;
4433             new_args[i] = reg;
4434         }
4435     }
4436 
4437     /* emit instruction */
4438     if (def->flags & TCG_OPF_VECTOR) {
4439         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4440                        new_args, const_args);
4441     } else {
4442         tcg_out_op(s, op->opc, new_args, const_args);
4443     }
4444 
4445     /* move the outputs in the correct register if needed */
4446     for(i = 0; i < nb_oargs; i++) {
4447         ts = arg_temp(op->args[i]);
4448 
4449         /* ENV should not be modified.  */
4450         tcg_debug_assert(!temp_readonly(ts));
4451 
4452         if (NEED_SYNC_ARG(i)) {
4453             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4454         } else if (IS_DEAD_ARG(i)) {
4455             temp_dead(s, ts);
4456         }
4457     }
4458 }
4459 
4460 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4461 {
4462     const TCGLifeData arg_life = op->life;
4463     TCGTemp *ots, *itsl, *itsh;
4464     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4465 
4466     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4467     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4468     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4469 
4470     ots = arg_temp(op->args[0]);
4471     itsl = arg_temp(op->args[1]);
4472     itsh = arg_temp(op->args[2]);
4473 
4474     /* ENV should not be modified.  */
4475     tcg_debug_assert(!temp_readonly(ots));
4476 
4477     /* Allocate the output register now.  */
4478     if (ots->val_type != TEMP_VAL_REG) {
4479         TCGRegSet allocated_regs = s->reserved_regs;
4480         TCGRegSet dup_out_regs =
4481             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4482         TCGReg oreg;
4483 
4484         /* Make sure to not spill the input registers. */
4485         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4486             tcg_regset_set_reg(allocated_regs, itsl->reg);
4487         }
4488         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4489             tcg_regset_set_reg(allocated_regs, itsh->reg);
4490         }
4491 
4492         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4493                              output_pref(op, 0), ots->indirect_base);
4494         set_temp_val_reg(s, ots, oreg);
4495     }
4496 
4497     /* Promote dup2 of immediates to dupi_vec. */
4498     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4499         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4500         MemOp vece = MO_64;
4501 
4502         if (val == dup_const(MO_8, val)) {
4503             vece = MO_8;
4504         } else if (val == dup_const(MO_16, val)) {
4505             vece = MO_16;
4506         } else if (val == dup_const(MO_32, val)) {
4507             vece = MO_32;
4508         }
4509 
4510         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4511         goto done;
4512     }
4513 
4514     /* If the two inputs form one 64-bit value, try dupm_vec. */
4515     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4516         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4517         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4518         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4519 
4520         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4521         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4522 
4523         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4524                              its->mem_base->reg, its->mem_offset)) {
4525             goto done;
4526         }
4527     }
4528 
4529     /* Fall back to generic expansion. */
4530     return false;
4531 
4532  done:
4533     ots->mem_coherent = 0;
4534     if (IS_DEAD_ARG(1)) {
4535         temp_dead(s, itsl);
4536     }
4537     if (IS_DEAD_ARG(2)) {
4538         temp_dead(s, itsh);
4539     }
4540     if (NEED_SYNC_ARG(0)) {
4541         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4542     } else if (IS_DEAD_ARG(0)) {
4543         temp_dead(s, ots);
4544     }
4545     return true;
4546 }
4547 
4548 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4549                          TCGRegSet allocated_regs)
4550 {
4551     if (ts->val_type == TEMP_VAL_REG) {
4552         if (ts->reg != reg) {
4553             tcg_reg_free(s, reg, allocated_regs);
4554             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4555                 /*
4556                  * Cross register class move not supported.  Sync the
4557                  * temp back to its slot and load from there.
4558                  */
4559                 temp_sync(s, ts, allocated_regs, 0, 0);
4560                 tcg_out_ld(s, ts->type, reg,
4561                            ts->mem_base->reg, ts->mem_offset);
4562             }
4563         }
4564     } else {
4565         TCGRegSet arg_set = 0;
4566 
4567         tcg_reg_free(s, reg, allocated_regs);
4568         tcg_regset_set_reg(arg_set, reg);
4569         temp_load(s, ts, arg_set, allocated_regs, 0);
4570     }
4571 }
4572 
4573 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4574                          TCGRegSet allocated_regs)
4575 {
4576     /*
4577      * When the destination is on the stack, load up the temp and store.
4578      * If there are many call-saved registers, the temp might live to
4579      * see another use; otherwise it'll be discarded.
4580      */
4581     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4582     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4583                TCG_TARGET_CALL_STACK_OFFSET +
4584                stk_slot * sizeof(tcg_target_long));
4585 }
4586 
4587 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4588                             TCGTemp *ts, TCGRegSet *allocated_regs)
4589 {
4590     if (REG_P(l)) {
4591         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4592         load_arg_reg(s, reg, ts, *allocated_regs);
4593         tcg_regset_set_reg(*allocated_regs, reg);
4594     } else {
4595         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4596                      ts, *allocated_regs);
4597     }
4598 }
4599 
4600 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4601                          intptr_t ref_off, TCGRegSet *allocated_regs)
4602 {
4603     TCGReg reg;
4604     int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4605 
4606     if (stk_slot < 0) {
4607         reg = tcg_target_call_iarg_regs[arg_slot];
4608         tcg_reg_free(s, reg, *allocated_regs);
4609         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4610         tcg_regset_set_reg(*allocated_regs, reg);
4611     } else {
4612         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4613                             *allocated_regs, 0, false);
4614         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4615         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4616                    TCG_TARGET_CALL_STACK_OFFSET
4617                    + stk_slot * sizeof(tcg_target_long));
4618     }
4619 }
4620 
4621 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4622 {
4623     const int nb_oargs = TCGOP_CALLO(op);
4624     const int nb_iargs = TCGOP_CALLI(op);
4625     const TCGLifeData arg_life = op->life;
4626     const TCGHelperInfo *info = tcg_call_info(op);
4627     TCGRegSet allocated_regs = s->reserved_regs;
4628     int i;
4629 
4630     /*
4631      * Move inputs into place in reverse order,
4632      * so that we place stacked arguments first.
4633      */
4634     for (i = nb_iargs - 1; i >= 0; --i) {
4635         const TCGCallArgumentLoc *loc = &info->in[i];
4636         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4637 
4638         switch (loc->kind) {
4639         case TCG_CALL_ARG_NORMAL:
4640         case TCG_CALL_ARG_EXTEND_U:
4641         case TCG_CALL_ARG_EXTEND_S:
4642             load_arg_normal(s, loc, ts, &allocated_regs);
4643             break;
4644         case TCG_CALL_ARG_BY_REF:
4645             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4646             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4647                          TCG_TARGET_CALL_STACK_OFFSET
4648                          + loc->ref_slot * sizeof(tcg_target_long),
4649                          &allocated_regs);
4650             break;
4651         case TCG_CALL_ARG_BY_REF_N:
4652             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4653             break;
4654         default:
4655             g_assert_not_reached();
4656         }
4657     }
4658 
4659     /* Mark dead temporaries and free the associated registers.  */
4660     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4661         if (IS_DEAD_ARG(i)) {
4662             temp_dead(s, arg_temp(op->args[i]));
4663         }
4664     }
4665 
4666     /* Clobber call registers.  */
4667     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4668         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4669             tcg_reg_free(s, i, allocated_regs);
4670         }
4671     }
4672 
4673     /*
4674      * Save globals if they might be written by the helper,
4675      * sync them if they might be read.
4676      */
4677     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4678         /* Nothing to do */
4679     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4680         sync_globals(s, allocated_regs);
4681     } else {
4682         save_globals(s, allocated_regs);
4683     }
4684 
4685     /*
4686      * If the ABI passes a pointer to the returned struct as the first
4687      * argument, load that now.  Pass a pointer to the output home slot.
4688      */
4689     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4690         TCGTemp *ts = arg_temp(op->args[0]);
4691 
4692         if (!ts->mem_allocated) {
4693             temp_allocate_frame(s, ts);
4694         }
4695         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4696     }
4697 
4698     tcg_out_call(s, tcg_call_func(op), info);
4699 
4700     /* Assign output registers and emit moves if needed.  */
4701     switch (info->out_kind) {
4702     case TCG_CALL_RET_NORMAL:
4703         for (i = 0; i < nb_oargs; i++) {
4704             TCGTemp *ts = arg_temp(op->args[i]);
4705             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4706 
4707             /* ENV should not be modified.  */
4708             tcg_debug_assert(!temp_readonly(ts));
4709 
4710             set_temp_val_reg(s, ts, reg);
4711             ts->mem_coherent = 0;
4712         }
4713         break;
4714 
4715     case TCG_CALL_RET_BY_VEC:
4716         {
4717             TCGTemp *ts = arg_temp(op->args[0]);
4718 
4719             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4720             tcg_debug_assert(ts->temp_subindex == 0);
4721             if (!ts->mem_allocated) {
4722                 temp_allocate_frame(s, ts);
4723             }
4724             tcg_out_st(s, TCG_TYPE_V128,
4725                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4726                        ts->mem_base->reg, ts->mem_offset);
4727         }
4728         /* fall through to mark all parts in memory */
4729 
4730     case TCG_CALL_RET_BY_REF:
4731         /* The callee has performed a write through the reference. */
4732         for (i = 0; i < nb_oargs; i++) {
4733             TCGTemp *ts = arg_temp(op->args[i]);
4734             ts->val_type = TEMP_VAL_MEM;
4735         }
4736         break;
4737 
4738     default:
4739         g_assert_not_reached();
4740     }
4741 
4742     /* Flush or discard output registers as needed. */
4743     for (i = 0; i < nb_oargs; i++) {
4744         TCGTemp *ts = arg_temp(op->args[i]);
4745         if (NEED_SYNC_ARG(i)) {
4746             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4747         } else if (IS_DEAD_ARG(i)) {
4748             temp_dead(s, ts);
4749         }
4750     }
4751 }
4752 
4753 #ifdef CONFIG_PROFILER
4754 
4755 /* avoid copy/paste errors */
4756 #define PROF_ADD(to, from, field)                       \
4757     do {                                                \
4758         (to)->field += qatomic_read(&((from)->field));  \
4759     } while (0)
4760 
4761 #define PROF_MAX(to, from, field)                                       \
4762     do {                                                                \
4763         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4764         if (val__ > (to)->field) {                                      \
4765             (to)->field = val__;                                        \
4766         }                                                               \
4767     } while (0)
4768 
4769 /* Pass in a zero'ed @prof */
4770 static inline
4771 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4772 {
4773     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4774     unsigned int i;
4775 
4776     for (i = 0; i < n_ctxs; i++) {
4777         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4778         const TCGProfile *orig = &s->prof;
4779 
4780         if (counters) {
4781             PROF_ADD(prof, orig, cpu_exec_time);
4782             PROF_ADD(prof, orig, tb_count1);
4783             PROF_ADD(prof, orig, tb_count);
4784             PROF_ADD(prof, orig, op_count);
4785             PROF_MAX(prof, orig, op_count_max);
4786             PROF_ADD(prof, orig, temp_count);
4787             PROF_MAX(prof, orig, temp_count_max);
4788             PROF_ADD(prof, orig, del_op_count);
4789             PROF_ADD(prof, orig, code_in_len);
4790             PROF_ADD(prof, orig, code_out_len);
4791             PROF_ADD(prof, orig, search_out_len);
4792             PROF_ADD(prof, orig, interm_time);
4793             PROF_ADD(prof, orig, code_time);
4794             PROF_ADD(prof, orig, la_time);
4795             PROF_ADD(prof, orig, opt_time);
4796             PROF_ADD(prof, orig, restore_count);
4797             PROF_ADD(prof, orig, restore_time);
4798         }
4799         if (table) {
4800             int i;
4801 
4802             for (i = 0; i < NB_OPS; i++) {
4803                 PROF_ADD(prof, orig, table_op_count[i]);
4804             }
4805         }
4806     }
4807 }
4808 
4809 #undef PROF_ADD
4810 #undef PROF_MAX
4811 
4812 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4813 {
4814     tcg_profile_snapshot(prof, true, false);
4815 }
4816 
4817 static void tcg_profile_snapshot_table(TCGProfile *prof)
4818 {
4819     tcg_profile_snapshot(prof, false, true);
4820 }
4821 
4822 void tcg_dump_op_count(GString *buf)
4823 {
4824     TCGProfile prof = {};
4825     int i;
4826 
4827     tcg_profile_snapshot_table(&prof);
4828     for (i = 0; i < NB_OPS; i++) {
4829         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4830                                prof.table_op_count[i]);
4831     }
4832 }
4833 
4834 int64_t tcg_cpu_exec_time(void)
4835 {
4836     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4837     unsigned int i;
4838     int64_t ret = 0;
4839 
4840     for (i = 0; i < n_ctxs; i++) {
4841         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4842         const TCGProfile *prof = &s->prof;
4843 
4844         ret += qatomic_read(&prof->cpu_exec_time);
4845     }
4846     return ret;
4847 }
4848 #else
4849 void tcg_dump_op_count(GString *buf)
4850 {
4851     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4852 }
4853 
4854 int64_t tcg_cpu_exec_time(void)
4855 {
4856     error_report("%s: TCG profiler not compiled", __func__);
4857     exit(EXIT_FAILURE);
4858 }
4859 #endif
4860 
4861 
4862 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4863 {
4864 #ifdef CONFIG_PROFILER
4865     TCGProfile *prof = &s->prof;
4866 #endif
4867     int i, num_insns;
4868     TCGOp *op;
4869 
4870 #ifdef CONFIG_PROFILER
4871     {
4872         int n = 0;
4873 
4874         QTAILQ_FOREACH(op, &s->ops, link) {
4875             n++;
4876         }
4877         qatomic_set(&prof->op_count, prof->op_count + n);
4878         if (n > prof->op_count_max) {
4879             qatomic_set(&prof->op_count_max, n);
4880         }
4881 
4882         n = s->nb_temps;
4883         qatomic_set(&prof->temp_count, prof->temp_count + n);
4884         if (n > prof->temp_count_max) {
4885             qatomic_set(&prof->temp_count_max, n);
4886         }
4887     }
4888 #endif
4889 
4890 #ifdef DEBUG_DISAS
4891     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4892                  && qemu_log_in_addr_range(pc_start))) {
4893         FILE *logfile = qemu_log_trylock();
4894         if (logfile) {
4895             fprintf(logfile, "OP:\n");
4896             tcg_dump_ops(s, logfile, false);
4897             fprintf(logfile, "\n");
4898             qemu_log_unlock(logfile);
4899         }
4900     }
4901 #endif
4902 
4903 #ifdef CONFIG_DEBUG_TCG
4904     /* Ensure all labels referenced have been emitted.  */
4905     {
4906         TCGLabel *l;
4907         bool error = false;
4908 
4909         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4910             if (unlikely(!l->present) && l->refs) {
4911                 qemu_log_mask(CPU_LOG_TB_OP,
4912                               "$L%d referenced but not present.\n", l->id);
4913                 error = true;
4914             }
4915         }
4916         assert(!error);
4917     }
4918 #endif
4919 
4920 #ifdef CONFIG_PROFILER
4921     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4922 #endif
4923 
4924 #ifdef USE_TCG_OPTIMIZATIONS
4925     tcg_optimize(s);
4926 #endif
4927 
4928 #ifdef CONFIG_PROFILER
4929     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4930     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4931 #endif
4932 
4933     reachable_code_pass(s);
4934     liveness_pass_0(s);
4935     liveness_pass_1(s);
4936 
4937     if (s->nb_indirects > 0) {
4938 #ifdef DEBUG_DISAS
4939         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4940                      && qemu_log_in_addr_range(pc_start))) {
4941             FILE *logfile = qemu_log_trylock();
4942             if (logfile) {
4943                 fprintf(logfile, "OP before indirect lowering:\n");
4944                 tcg_dump_ops(s, logfile, false);
4945                 fprintf(logfile, "\n");
4946                 qemu_log_unlock(logfile);
4947             }
4948         }
4949 #endif
4950         /* Replace indirect temps with direct temps.  */
4951         if (liveness_pass_2(s)) {
4952             /* If changes were made, re-run liveness.  */
4953             liveness_pass_1(s);
4954         }
4955     }
4956 
4957 #ifdef CONFIG_PROFILER
4958     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4959 #endif
4960 
4961 #ifdef DEBUG_DISAS
4962     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4963                  && qemu_log_in_addr_range(pc_start))) {
4964         FILE *logfile = qemu_log_trylock();
4965         if (logfile) {
4966             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4967             tcg_dump_ops(s, logfile, true);
4968             fprintf(logfile, "\n");
4969             qemu_log_unlock(logfile);
4970         }
4971     }
4972 #endif
4973 
4974     /* Initialize goto_tb jump offsets. */
4975     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
4976     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
4977     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
4978     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
4979 
4980     tcg_reg_alloc_start(s);
4981 
4982     /*
4983      * Reset the buffer pointers when restarting after overflow.
4984      * TODO: Move this into translate-all.c with the rest of the
4985      * buffer management.  Having only this done here is confusing.
4986      */
4987     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4988     s->code_ptr = s->code_buf;
4989 
4990 #ifdef TCG_TARGET_NEED_LDST_LABELS
4991     QSIMPLEQ_INIT(&s->ldst_labels);
4992 #endif
4993 #ifdef TCG_TARGET_NEED_POOL_LABELS
4994     s->pool_labels = NULL;
4995 #endif
4996 
4997     num_insns = -1;
4998     QTAILQ_FOREACH(op, &s->ops, link) {
4999         TCGOpcode opc = op->opc;
5000 
5001 #ifdef CONFIG_PROFILER
5002         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
5003 #endif
5004 
5005         switch (opc) {
5006         case INDEX_op_mov_i32:
5007         case INDEX_op_mov_i64:
5008         case INDEX_op_mov_vec:
5009             tcg_reg_alloc_mov(s, op);
5010             break;
5011         case INDEX_op_dup_vec:
5012             tcg_reg_alloc_dup(s, op);
5013             break;
5014         case INDEX_op_insn_start:
5015             if (num_insns >= 0) {
5016                 size_t off = tcg_current_code_size(s);
5017                 s->gen_insn_end_off[num_insns] = off;
5018                 /* Assert that we do not overflow our stored offset.  */
5019                 assert(s->gen_insn_end_off[num_insns] == off);
5020             }
5021             num_insns++;
5022             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5023                 target_ulong a;
5024 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5025                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5026 #else
5027                 a = op->args[i];
5028 #endif
5029                 s->gen_insn_data[num_insns][i] = a;
5030             }
5031             break;
5032         case INDEX_op_discard:
5033             temp_dead(s, arg_temp(op->args[0]));
5034             break;
5035         case INDEX_op_set_label:
5036             tcg_reg_alloc_bb_end(s, s->reserved_regs);
5037             tcg_out_label(s, arg_label(op->args[0]));
5038             break;
5039         case INDEX_op_call:
5040             tcg_reg_alloc_call(s, op);
5041             break;
5042         case INDEX_op_exit_tb:
5043             tcg_out_exit_tb(s, op->args[0]);
5044             break;
5045         case INDEX_op_goto_tb:
5046             tcg_out_goto_tb(s, op->args[0]);
5047             break;
5048         case INDEX_op_dup2_vec:
5049             if (tcg_reg_alloc_dup2(s, op)) {
5050                 break;
5051             }
5052             /* fall through */
5053         default:
5054             /* Sanity check that we've not introduced any unhandled opcodes. */
5055             tcg_debug_assert(tcg_op_supported(opc));
5056             /* Note: in order to speed up the code, it would be much
5057                faster to have specialized register allocator functions for
5058                some common argument patterns */
5059             tcg_reg_alloc_op(s, op);
5060             break;
5061         }
5062         /* Test for (pending) buffer overflow.  The assumption is that any
5063            one operation beginning below the high water mark cannot overrun
5064            the buffer completely.  Thus we can test for overflow after
5065            generating code without having to check during generation.  */
5066         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5067             return -1;
5068         }
5069         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5070         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5071             return -2;
5072         }
5073     }
5074     tcg_debug_assert(num_insns >= 0);
5075     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5076 
5077     /* Generate TB finalization at the end of block */
5078 #ifdef TCG_TARGET_NEED_LDST_LABELS
5079     i = tcg_out_ldst_finalize(s);
5080     if (i < 0) {
5081         return i;
5082     }
5083 #endif
5084 #ifdef TCG_TARGET_NEED_POOL_LABELS
5085     i = tcg_out_pool_finalize(s);
5086     if (i < 0) {
5087         return i;
5088     }
5089 #endif
5090     if (!tcg_resolve_relocs(s)) {
5091         return -2;
5092     }
5093 
5094 #ifndef CONFIG_TCG_INTERPRETER
5095     /* flush instruction cache */
5096     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5097                         (uintptr_t)s->code_buf,
5098                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5099 #endif
5100 
5101     return tcg_current_code_size(s);
5102 }
5103 
5104 #ifdef CONFIG_PROFILER
5105 void tcg_dump_info(GString *buf)
5106 {
5107     TCGProfile prof = {};
5108     const TCGProfile *s;
5109     int64_t tb_count;
5110     int64_t tb_div_count;
5111     int64_t tot;
5112 
5113     tcg_profile_snapshot_counters(&prof);
5114     s = &prof;
5115     tb_count = s->tb_count;
5116     tb_div_count = tb_count ? tb_count : 1;
5117     tot = s->interm_time + s->code_time;
5118 
5119     g_string_append_printf(buf, "JIT cycles          %" PRId64
5120                            " (%0.3f s at 2.4 GHz)\n",
5121                            tot, tot / 2.4e9);
5122     g_string_append_printf(buf, "translated TBs      %" PRId64
5123                            " (aborted=%" PRId64 " %0.1f%%)\n",
5124                            tb_count, s->tb_count1 - tb_count,
5125                            (double)(s->tb_count1 - s->tb_count)
5126                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5127     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5128                            (double)s->op_count / tb_div_count, s->op_count_max);
5129     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5130                            (double)s->del_op_count / tb_div_count);
5131     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5132                            (double)s->temp_count / tb_div_count,
5133                            s->temp_count_max);
5134     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5135                            (double)s->code_out_len / tb_div_count);
5136     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5137                            (double)s->search_out_len / tb_div_count);
5138 
5139     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5140                            s->op_count ? (double)tot / s->op_count : 0);
5141     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5142                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5143     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5144                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5145     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5146                            s->search_out_len ?
5147                            (double)tot / s->search_out_len : 0);
5148     if (tot == 0) {
5149         tot = 1;
5150     }
5151     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5152                            (double)s->interm_time / tot * 100.0);
5153     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5154                            (double)s->code_time / tot * 100.0);
5155     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5156                            (double)s->opt_time / (s->code_time ?
5157                                                   s->code_time : 1)
5158                            * 100.0);
5159     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5160                            (double)s->la_time / (s->code_time ?
5161                                                  s->code_time : 1) * 100.0);
5162     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5163                            s->restore_count);
5164     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5165                            s->restore_count ?
5166                            (double)s->restore_time / s->restore_count : 0);
5167 }
5168 #else
5169 void tcg_dump_info(GString *buf)
5170 {
5171     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5172 }
5173 #endif
5174 
5175 #ifdef ELF_HOST_MACHINE
5176 /* In order to use this feature, the backend needs to do three things:
5177 
5178    (1) Define ELF_HOST_MACHINE to indicate both what value to
5179        put into the ELF image and to indicate support for the feature.
5180 
5181    (2) Define tcg_register_jit.  This should create a buffer containing
5182        the contents of a .debug_frame section that describes the post-
5183        prologue unwind info for the tcg machine.
5184 
5185    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5186 */
5187 
5188 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5189 typedef enum {
5190     JIT_NOACTION = 0,
5191     JIT_REGISTER_FN,
5192     JIT_UNREGISTER_FN
5193 } jit_actions_t;
5194 
5195 struct jit_code_entry {
5196     struct jit_code_entry *next_entry;
5197     struct jit_code_entry *prev_entry;
5198     const void *symfile_addr;
5199     uint64_t symfile_size;
5200 };
5201 
5202 struct jit_descriptor {
5203     uint32_t version;
5204     uint32_t action_flag;
5205     struct jit_code_entry *relevant_entry;
5206     struct jit_code_entry *first_entry;
5207 };
5208 
5209 void __jit_debug_register_code(void) __attribute__((noinline));
5210 void __jit_debug_register_code(void)
5211 {
5212     asm("");
5213 }
5214 
5215 /* Must statically initialize the version, because GDB may check
5216    the version before we can set it.  */
5217 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5218 
5219 /* End GDB interface.  */
5220 
5221 static int find_string(const char *strtab, const char *str)
5222 {
5223     const char *p = strtab + 1;
5224 
5225     while (1) {
5226         if (strcmp(p, str) == 0) {
5227             return p - strtab;
5228         }
5229         p += strlen(p) + 1;
5230     }
5231 }
5232 
5233 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5234                                  const void *debug_frame,
5235                                  size_t debug_frame_size)
5236 {
5237     struct __attribute__((packed)) DebugInfo {
5238         uint32_t  len;
5239         uint16_t  version;
5240         uint32_t  abbrev;
5241         uint8_t   ptr_size;
5242         uint8_t   cu_die;
5243         uint16_t  cu_lang;
5244         uintptr_t cu_low_pc;
5245         uintptr_t cu_high_pc;
5246         uint8_t   fn_die;
5247         char      fn_name[16];
5248         uintptr_t fn_low_pc;
5249         uintptr_t fn_high_pc;
5250         uint8_t   cu_eoc;
5251     };
5252 
5253     struct ElfImage {
5254         ElfW(Ehdr) ehdr;
5255         ElfW(Phdr) phdr;
5256         ElfW(Shdr) shdr[7];
5257         ElfW(Sym)  sym[2];
5258         struct DebugInfo di;
5259         uint8_t    da[24];
5260         char       str[80];
5261     };
5262 
5263     struct ElfImage *img;
5264 
5265     static const struct ElfImage img_template = {
5266         .ehdr = {
5267             .e_ident[EI_MAG0] = ELFMAG0,
5268             .e_ident[EI_MAG1] = ELFMAG1,
5269             .e_ident[EI_MAG2] = ELFMAG2,
5270             .e_ident[EI_MAG3] = ELFMAG3,
5271             .e_ident[EI_CLASS] = ELF_CLASS,
5272             .e_ident[EI_DATA] = ELF_DATA,
5273             .e_ident[EI_VERSION] = EV_CURRENT,
5274             .e_type = ET_EXEC,
5275             .e_machine = ELF_HOST_MACHINE,
5276             .e_version = EV_CURRENT,
5277             .e_phoff = offsetof(struct ElfImage, phdr),
5278             .e_shoff = offsetof(struct ElfImage, shdr),
5279             .e_ehsize = sizeof(ElfW(Shdr)),
5280             .e_phentsize = sizeof(ElfW(Phdr)),
5281             .e_phnum = 1,
5282             .e_shentsize = sizeof(ElfW(Shdr)),
5283             .e_shnum = ARRAY_SIZE(img->shdr),
5284             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5285 #ifdef ELF_HOST_FLAGS
5286             .e_flags = ELF_HOST_FLAGS,
5287 #endif
5288 #ifdef ELF_OSABI
5289             .e_ident[EI_OSABI] = ELF_OSABI,
5290 #endif
5291         },
5292         .phdr = {
5293             .p_type = PT_LOAD,
5294             .p_flags = PF_X,
5295         },
5296         .shdr = {
5297             [0] = { .sh_type = SHT_NULL },
5298             /* Trick: The contents of code_gen_buffer are not present in
5299                this fake ELF file; that got allocated elsewhere.  Therefore
5300                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5301                will not look for contents.  We can record any address.  */
5302             [1] = { /* .text */
5303                 .sh_type = SHT_NOBITS,
5304                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5305             },
5306             [2] = { /* .debug_info */
5307                 .sh_type = SHT_PROGBITS,
5308                 .sh_offset = offsetof(struct ElfImage, di),
5309                 .sh_size = sizeof(struct DebugInfo),
5310             },
5311             [3] = { /* .debug_abbrev */
5312                 .sh_type = SHT_PROGBITS,
5313                 .sh_offset = offsetof(struct ElfImage, da),
5314                 .sh_size = sizeof(img->da),
5315             },
5316             [4] = { /* .debug_frame */
5317                 .sh_type = SHT_PROGBITS,
5318                 .sh_offset = sizeof(struct ElfImage),
5319             },
5320             [5] = { /* .symtab */
5321                 .sh_type = SHT_SYMTAB,
5322                 .sh_offset = offsetof(struct ElfImage, sym),
5323                 .sh_size = sizeof(img->sym),
5324                 .sh_info = 1,
5325                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5326                 .sh_entsize = sizeof(ElfW(Sym)),
5327             },
5328             [6] = { /* .strtab */
5329                 .sh_type = SHT_STRTAB,
5330                 .sh_offset = offsetof(struct ElfImage, str),
5331                 .sh_size = sizeof(img->str),
5332             }
5333         },
5334         .sym = {
5335             [1] = { /* code_gen_buffer */
5336                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5337                 .st_shndx = 1,
5338             }
5339         },
5340         .di = {
5341             .len = sizeof(struct DebugInfo) - 4,
5342             .version = 2,
5343             .ptr_size = sizeof(void *),
5344             .cu_die = 1,
5345             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5346             .fn_die = 2,
5347             .fn_name = "code_gen_buffer"
5348         },
5349         .da = {
5350             1,          /* abbrev number (the cu) */
5351             0x11, 1,    /* DW_TAG_compile_unit, has children */
5352             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5353             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5354             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5355             0, 0,       /* end of abbrev */
5356             2,          /* abbrev number (the fn) */
5357             0x2e, 0,    /* DW_TAG_subprogram, no children */
5358             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5359             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5360             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5361             0, 0,       /* end of abbrev */
5362             0           /* no more abbrev */
5363         },
5364         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5365                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5366     };
5367 
5368     /* We only need a single jit entry; statically allocate it.  */
5369     static struct jit_code_entry one_entry;
5370 
5371     uintptr_t buf = (uintptr_t)buf_ptr;
5372     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5373     DebugFrameHeader *dfh;
5374 
5375     img = g_malloc(img_size);
5376     *img = img_template;
5377 
5378     img->phdr.p_vaddr = buf;
5379     img->phdr.p_paddr = buf;
5380     img->phdr.p_memsz = buf_size;
5381 
5382     img->shdr[1].sh_name = find_string(img->str, ".text");
5383     img->shdr[1].sh_addr = buf;
5384     img->shdr[1].sh_size = buf_size;
5385 
5386     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5387     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5388 
5389     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5390     img->shdr[4].sh_size = debug_frame_size;
5391 
5392     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5393     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5394 
5395     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5396     img->sym[1].st_value = buf;
5397     img->sym[1].st_size = buf_size;
5398 
5399     img->di.cu_low_pc = buf;
5400     img->di.cu_high_pc = buf + buf_size;
5401     img->di.fn_low_pc = buf;
5402     img->di.fn_high_pc = buf + buf_size;
5403 
5404     dfh = (DebugFrameHeader *)(img + 1);
5405     memcpy(dfh, debug_frame, debug_frame_size);
5406     dfh->fde.func_start = buf;
5407     dfh->fde.func_len = buf_size;
5408 
5409 #ifdef DEBUG_JIT
5410     /* Enable this block to be able to debug the ELF image file creation.
5411        One can use readelf, objdump, or other inspection utilities.  */
5412     {
5413         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5414         FILE *f = fopen(jit, "w+b");
5415         if (f) {
5416             if (fwrite(img, img_size, 1, f) != img_size) {
5417                 /* Avoid stupid unused return value warning for fwrite.  */
5418             }
5419             fclose(f);
5420         }
5421     }
5422 #endif
5423 
5424     one_entry.symfile_addr = img;
5425     one_entry.symfile_size = img_size;
5426 
5427     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5428     __jit_debug_descriptor.relevant_entry = &one_entry;
5429     __jit_debug_descriptor.first_entry = &one_entry;
5430     __jit_debug_register_code();
5431 }
5432 #else
5433 /* No support for the feature.  Provide the entry point expected by exec.c,
5434    and implement the internal function we declared earlier.  */
5435 
5436 static void tcg_register_jit_int(const void *buf, size_t size,
5437                                  const void *debug_frame,
5438                                  size_t debug_frame_size)
5439 {
5440 }
5441 
5442 void tcg_register_jit(const void *buf, size_t buf_size)
5443 {
5444 }
5445 #endif /* ELF_HOST_MACHINE */
5446 
5447 #if !TCG_TARGET_MAYBE_vec
5448 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5449 {
5450     g_assert_not_reached();
5451 }
5452 #endif
5453