xref: /openbmc/qemu/tcg/tcg.c (revision de6cd7599b518f0c832cc85980196ec02c129a86)
1  /*
2   * Tiny Code Generator for QEMU
3   *
4   * Copyright (c) 2008 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  
27  /* Define to jump the ELF file used to communicate with GDB.  */
28  #undef DEBUG_JIT
29  
30  #include "qemu/error-report.h"
31  #include "qemu/cutils.h"
32  #include "qemu/host-utils.h"
33  #include "qemu/qemu-print.h"
34  #include "qemu/cacheflush.h"
35  #include "qemu/cacheinfo.h"
36  #include "qemu/timer.h"
37  #include "exec/translation-block.h"
38  #include "exec/tlb-common.h"
39  #include "tcg/tcg-op-common.h"
40  
41  #if UINTPTR_MAX == UINT32_MAX
42  # define ELF_CLASS  ELFCLASS32
43  #else
44  # define ELF_CLASS  ELFCLASS64
45  #endif
46  #if HOST_BIG_ENDIAN
47  # define ELF_DATA   ELFDATA2MSB
48  #else
49  # define ELF_DATA   ELFDATA2LSB
50  #endif
51  
52  #include "elf.h"
53  #include "exec/log.h"
54  #include "tcg/tcg-ldst.h"
55  #include "tcg/tcg-temp-internal.h"
56  #include "tcg-internal.h"
57  #include "accel/tcg/perf.h"
58  #ifdef CONFIG_USER_ONLY
59  #include "exec/user/guest-base.h"
60  #endif
61  
62  /* Forward declarations for functions declared in tcg-target.c.inc and
63     used here. */
64  static void tcg_target_init(TCGContext *s);
65  static void tcg_target_qemu_prologue(TCGContext *s);
66  static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
67                          intptr_t value, intptr_t addend);
68  
69  /* The CIE and FDE header definitions will be common to all hosts.  */
70  typedef struct {
71      uint32_t len __attribute__((aligned((sizeof(void *)))));
72      uint32_t id;
73      uint8_t version;
74      char augmentation[1];
75      uint8_t code_align;
76      uint8_t data_align;
77      uint8_t return_column;
78  } DebugFrameCIE;
79  
80  typedef struct QEMU_PACKED {
81      uint32_t len __attribute__((aligned((sizeof(void *)))));
82      uint32_t cie_offset;
83      uintptr_t func_start;
84      uintptr_t func_len;
85  } DebugFrameFDEHeader;
86  
87  typedef struct QEMU_PACKED {
88      DebugFrameCIE cie;
89      DebugFrameFDEHeader fde;
90  } DebugFrameHeader;
91  
92  typedef struct TCGLabelQemuLdst {
93      bool is_ld;             /* qemu_ld: true, qemu_st: false */
94      MemOpIdx oi;
95      TCGType type;           /* result type of a load */
96      TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
97      TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
98      TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
99      TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
100      const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
101      tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
102      QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
103  } TCGLabelQemuLdst;
104  
105  static void tcg_register_jit_int(const void *buf, size_t size,
106                                   const void *debug_frame,
107                                   size_t debug_frame_size)
108      __attribute__((unused));
109  
110  /* Forward declarations for functions declared and used in tcg-target.c.inc. */
111  static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
112                         intptr_t arg2);
113  static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
114  static void tcg_out_movi(TCGContext *s, TCGType type,
115                           TCGReg ret, tcg_target_long arg);
116  static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
117  static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
118  static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
119  static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
120  static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
121  static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
122  static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
123  static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
124  static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
125  static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
126  static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
127  static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
128  static void tcg_out_goto_tb(TCGContext *s, int which);
129  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
130                         const TCGArg args[TCG_MAX_OP_ARGS],
131                         const int const_args[TCG_MAX_OP_ARGS]);
132  #if TCG_TARGET_MAYBE_vec
133  static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134                              TCGReg dst, TCGReg src);
135  static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
136                               TCGReg dst, TCGReg base, intptr_t offset);
137  static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
138                               TCGReg dst, int64_t arg);
139  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                             unsigned vecl, unsigned vece,
141                             const TCGArg args[TCG_MAX_OP_ARGS],
142                             const int const_args[TCG_MAX_OP_ARGS]);
143  #else
144  static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
145                                     TCGReg dst, TCGReg src)
146  {
147      g_assert_not_reached();
148  }
149  static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
150                                      TCGReg dst, TCGReg base, intptr_t offset)
151  {
152      g_assert_not_reached();
153  }
154  static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
155                                      TCGReg dst, int64_t arg)
156  {
157      g_assert_not_reached();
158  }
159  static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
160                                    unsigned vecl, unsigned vece,
161                                    const TCGArg args[TCG_MAX_OP_ARGS],
162                                    const int const_args[TCG_MAX_OP_ARGS])
163  {
164      g_assert_not_reached();
165  }
166  #endif
167  static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
168                         intptr_t arg2);
169  static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
170                          TCGReg base, intptr_t ofs);
171  static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
172                           const TCGHelperInfo *info);
173  static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
174  static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
175  #ifdef TCG_TARGET_NEED_LDST_LABELS
176  static int tcg_out_ldst_finalize(TCGContext *s);
177  #endif
178  
179  typedef struct TCGLdstHelperParam {
180      TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
181      unsigned ntmp;
182      int tmp[3];
183  } TCGLdstHelperParam;
184  
185  static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
186                                     const TCGLdstHelperParam *p)
187      __attribute__((unused));
188  static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
189                                    bool load_sign, const TCGLdstHelperParam *p)
190      __attribute__((unused));
191  static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                     const TCGLdstHelperParam *p)
193      __attribute__((unused));
194  
195  static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
196      [MO_UB] = helper_ldub_mmu,
197      [MO_SB] = helper_ldsb_mmu,
198      [MO_UW] = helper_lduw_mmu,
199      [MO_SW] = helper_ldsw_mmu,
200      [MO_UL] = helper_ldul_mmu,
201      [MO_UQ] = helper_ldq_mmu,
202  #if TCG_TARGET_REG_BITS == 64
203      [MO_SL] = helper_ldsl_mmu,
204      [MO_128] = helper_ld16_mmu,
205  #endif
206  };
207  
208  static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
209      [MO_8]  = helper_stb_mmu,
210      [MO_16] = helper_stw_mmu,
211      [MO_32] = helper_stl_mmu,
212      [MO_64] = helper_stq_mmu,
213  #if TCG_TARGET_REG_BITS == 64
214      [MO_128] = helper_st16_mmu,
215  #endif
216  };
217  
218  typedef struct {
219      MemOp atom;   /* lg2 bits of atomicity required */
220      MemOp align;  /* lg2 bits of alignment to use */
221  } TCGAtomAlign;
222  
223  static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
224                                             MemOp host_atom, bool allow_two_ops)
225      __attribute__((unused));
226  
227  TCGContext tcg_init_ctx;
228  __thread TCGContext *tcg_ctx;
229  
230  TCGContext **tcg_ctxs;
231  unsigned int tcg_cur_ctxs;
232  unsigned int tcg_max_ctxs;
233  TCGv_env cpu_env = 0;
234  const void *tcg_code_gen_epilogue;
235  uintptr_t tcg_splitwx_diff;
236  
237  #ifndef CONFIG_TCG_INTERPRETER
238  tcg_prologue_fn *tcg_qemu_tb_exec;
239  #endif
240  
241  static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
242  static TCGRegSet tcg_target_call_clobber_regs;
243  
244  #if TCG_TARGET_INSN_UNIT_SIZE == 1
245  static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
246  {
247      *s->code_ptr++ = v;
248  }
249  
250  static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
251                                                        uint8_t v)
252  {
253      *p = v;
254  }
255  #endif
256  
257  #if TCG_TARGET_INSN_UNIT_SIZE <= 2
258  static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
259  {
260      if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
261          *s->code_ptr++ = v;
262      } else {
263          tcg_insn_unit *p = s->code_ptr;
264          memcpy(p, &v, sizeof(v));
265          s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
266      }
267  }
268  
269  static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
270                                                         uint16_t v)
271  {
272      if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
273          *p = v;
274      } else {
275          memcpy(p, &v, sizeof(v));
276      }
277  }
278  #endif
279  
280  #if TCG_TARGET_INSN_UNIT_SIZE <= 4
281  static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
282  {
283      if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
284          *s->code_ptr++ = v;
285      } else {
286          tcg_insn_unit *p = s->code_ptr;
287          memcpy(p, &v, sizeof(v));
288          s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
289      }
290  }
291  
292  static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
293                                                         uint32_t v)
294  {
295      if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
296          *p = v;
297      } else {
298          memcpy(p, &v, sizeof(v));
299      }
300  }
301  #endif
302  
303  #if TCG_TARGET_INSN_UNIT_SIZE <= 8
304  static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
305  {
306      if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
307          *s->code_ptr++ = v;
308      } else {
309          tcg_insn_unit *p = s->code_ptr;
310          memcpy(p, &v, sizeof(v));
311          s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
312      }
313  }
314  
315  static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
316                                                         uint64_t v)
317  {
318      if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
319          *p = v;
320      } else {
321          memcpy(p, &v, sizeof(v));
322      }
323  }
324  #endif
325  
326  /* label relocation processing */
327  
328  static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
329                            TCGLabel *l, intptr_t addend)
330  {
331      TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
332  
333      r->type = type;
334      r->ptr = code_ptr;
335      r->addend = addend;
336      QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
337  }
338  
339  static void tcg_out_label(TCGContext *s, TCGLabel *l)
340  {
341      tcg_debug_assert(!l->has_value);
342      l->has_value = 1;
343      l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
344  }
345  
346  TCGLabel *gen_new_label(void)
347  {
348      TCGContext *s = tcg_ctx;
349      TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
350  
351      memset(l, 0, sizeof(TCGLabel));
352      l->id = s->nb_labels++;
353      QSIMPLEQ_INIT(&l->branches);
354      QSIMPLEQ_INIT(&l->relocs);
355  
356      QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
357  
358      return l;
359  }
360  
361  static bool tcg_resolve_relocs(TCGContext *s)
362  {
363      TCGLabel *l;
364  
365      QSIMPLEQ_FOREACH(l, &s->labels, next) {
366          TCGRelocation *r;
367          uintptr_t value = l->u.value;
368  
369          QSIMPLEQ_FOREACH(r, &l->relocs, next) {
370              if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
371                  return false;
372              }
373          }
374      }
375      return true;
376  }
377  
378  static void set_jmp_reset_offset(TCGContext *s, int which)
379  {
380      /*
381       * We will check for overflow at the end of the opcode loop in
382       * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
383       */
384      s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
385  }
386  
387  static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
388  {
389      /*
390       * We will check for overflow at the end of the opcode loop in
391       * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
392       */
393      s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
394  }
395  
396  static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
397  {
398      /*
399       * Return the read-execute version of the pointer, for the benefit
400       * of any pc-relative addressing mode.
401       */
402      return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
403  }
404  
405  #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
406  static int tlb_mask_table_ofs(TCGContext *s, int which)
407  {
408      return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
409  }
410  #endif
411  
412  /* Signal overflow, starting over with fewer guest insns. */
413  static G_NORETURN
414  void tcg_raise_tb_overflow(TCGContext *s)
415  {
416      siglongjmp(s->jmp_trans, -2);
417  }
418  
419  /*
420   * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
421   * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
422   *
423   * However, tcg_out_helper_load_slots reuses this field to hold an
424   * argument slot number (which may designate a argument register or an
425   * argument stack slot), converting to TCGReg once all arguments that
426   * are destined for the stack are processed.
427   */
428  typedef struct TCGMovExtend {
429      unsigned dst;
430      TCGReg src;
431      TCGType dst_type;
432      TCGType src_type;
433      MemOp src_ext;
434  } TCGMovExtend;
435  
436  /**
437   * tcg_out_movext -- move and extend
438   * @s: tcg context
439   * @dst_type: integral type for destination
440   * @dst: destination register
441   * @src_type: integral type for source
442   * @src_ext: extension to apply to source
443   * @src: source register
444   *
445   * Move or extend @src into @dst, depending on @src_ext and the types.
446   */
447  static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
448                             TCGType src_type, MemOp src_ext, TCGReg src)
449  {
450      switch (src_ext) {
451      case MO_UB:
452          tcg_out_ext8u(s, dst, src);
453          break;
454      case MO_SB:
455          tcg_out_ext8s(s, dst_type, dst, src);
456          break;
457      case MO_UW:
458          tcg_out_ext16u(s, dst, src);
459          break;
460      case MO_SW:
461          tcg_out_ext16s(s, dst_type, dst, src);
462          break;
463      case MO_UL:
464      case MO_SL:
465          if (dst_type == TCG_TYPE_I32) {
466              if (src_type == TCG_TYPE_I32) {
467                  tcg_out_mov(s, TCG_TYPE_I32, dst, src);
468              } else {
469                  tcg_out_extrl_i64_i32(s, dst, src);
470              }
471          } else if (src_type == TCG_TYPE_I32) {
472              if (src_ext & MO_SIGN) {
473                  tcg_out_exts_i32_i64(s, dst, src);
474              } else {
475                  tcg_out_extu_i32_i64(s, dst, src);
476              }
477          } else {
478              if (src_ext & MO_SIGN) {
479                  tcg_out_ext32s(s, dst, src);
480              } else {
481                  tcg_out_ext32u(s, dst, src);
482              }
483          }
484          break;
485      case MO_UQ:
486          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
487          if (dst_type == TCG_TYPE_I32) {
488              tcg_out_extrl_i64_i32(s, dst, src);
489          } else {
490              tcg_out_mov(s, TCG_TYPE_I64, dst, src);
491          }
492          break;
493      default:
494          g_assert_not_reached();
495      }
496  }
497  
498  /* Minor variations on a theme, using a structure. */
499  static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
500                                      TCGReg src)
501  {
502      tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
503  }
504  
505  static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
506  {
507      tcg_out_movext1_new_src(s, i, i->src);
508  }
509  
510  /**
511   * tcg_out_movext2 -- move and extend two pair
512   * @s: tcg context
513   * @i1: first move description
514   * @i2: second move description
515   * @scratch: temporary register, or -1 for none
516   *
517   * As tcg_out_movext, for both @i1 and @i2, caring for overlap
518   * between the sources and destinations.
519   */
520  
521  static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
522                              const TCGMovExtend *i2, int scratch)
523  {
524      TCGReg src1 = i1->src;
525      TCGReg src2 = i2->src;
526  
527      if (i1->dst != src2) {
528          tcg_out_movext1(s, i1);
529          tcg_out_movext1(s, i2);
530          return;
531      }
532      if (i2->dst == src1) {
533          TCGType src1_type = i1->src_type;
534          TCGType src2_type = i2->src_type;
535  
536          if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
537              /* The data is now in the correct registers, now extend. */
538              src1 = i2->src;
539              src2 = i1->src;
540          } else {
541              tcg_debug_assert(scratch >= 0);
542              tcg_out_mov(s, src1_type, scratch, src1);
543              src1 = scratch;
544          }
545      }
546      tcg_out_movext1_new_src(s, i2, src2);
547      tcg_out_movext1_new_src(s, i1, src1);
548  }
549  
550  /**
551   * tcg_out_movext3 -- move and extend three pair
552   * @s: tcg context
553   * @i1: first move description
554   * @i2: second move description
555   * @i3: third move description
556   * @scratch: temporary register, or -1 for none
557   *
558   * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
559   * between the sources and destinations.
560   */
561  
562  static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
563                              const TCGMovExtend *i2, const TCGMovExtend *i3,
564                              int scratch)
565  {
566      TCGReg src1 = i1->src;
567      TCGReg src2 = i2->src;
568      TCGReg src3 = i3->src;
569  
570      if (i1->dst != src2 && i1->dst != src3) {
571          tcg_out_movext1(s, i1);
572          tcg_out_movext2(s, i2, i3, scratch);
573          return;
574      }
575      if (i2->dst != src1 && i2->dst != src3) {
576          tcg_out_movext1(s, i2);
577          tcg_out_movext2(s, i1, i3, scratch);
578          return;
579      }
580      if (i3->dst != src1 && i3->dst != src2) {
581          tcg_out_movext1(s, i3);
582          tcg_out_movext2(s, i1, i2, scratch);
583          return;
584      }
585  
586      /*
587       * There is a cycle.  Since there are only 3 nodes, the cycle is
588       * either "clockwise" or "anti-clockwise", and can be solved with
589       * a single scratch or two xchg.
590       */
591      if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
592          /* "Clockwise" */
593          if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
594              tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
595              /* The data is now in the correct registers, now extend. */
596              tcg_out_movext1_new_src(s, i1, i1->dst);
597              tcg_out_movext1_new_src(s, i2, i2->dst);
598              tcg_out_movext1_new_src(s, i3, i3->dst);
599          } else {
600              tcg_debug_assert(scratch >= 0);
601              tcg_out_mov(s, i1->src_type, scratch, src1);
602              tcg_out_movext1(s, i3);
603              tcg_out_movext1(s, i2);
604              tcg_out_movext1_new_src(s, i1, scratch);
605          }
606      } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
607          /* "Anti-clockwise" */
608          if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
609              tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
610              /* The data is now in the correct registers, now extend. */
611              tcg_out_movext1_new_src(s, i1, i1->dst);
612              tcg_out_movext1_new_src(s, i2, i2->dst);
613              tcg_out_movext1_new_src(s, i3, i3->dst);
614          } else {
615              tcg_debug_assert(scratch >= 0);
616              tcg_out_mov(s, i1->src_type, scratch, src1);
617              tcg_out_movext1(s, i2);
618              tcg_out_movext1(s, i3);
619              tcg_out_movext1_new_src(s, i1, scratch);
620          }
621      } else {
622          g_assert_not_reached();
623      }
624  }
625  
626  #define C_PFX1(P, A)                    P##A
627  #define C_PFX2(P, A, B)                 P##A##_##B
628  #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
629  #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
630  #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
631  #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
632  
633  /* Define an enumeration for the various combinations. */
634  
635  #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
636  #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
637  #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
638  #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
639  
640  #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
641  #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
642  #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
643  #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
644  
645  #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
646  
647  #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
648  #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
649  #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
650  #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
651  
652  typedef enum {
653  #include "tcg-target-con-set.h"
654  } TCGConstraintSetIndex;
655  
656  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
657  
658  #undef C_O0_I1
659  #undef C_O0_I2
660  #undef C_O0_I3
661  #undef C_O0_I4
662  #undef C_O1_I1
663  #undef C_O1_I2
664  #undef C_O1_I3
665  #undef C_O1_I4
666  #undef C_N1_I2
667  #undef C_O2_I1
668  #undef C_O2_I2
669  #undef C_O2_I3
670  #undef C_O2_I4
671  
672  /* Put all of the constraint sets into an array, indexed by the enum. */
673  
674  #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
675  #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
676  #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
677  #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
678  
679  #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
680  #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
681  #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
682  #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
683  
684  #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
685  
686  #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
687  #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
688  #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
689  #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
690  
691  static const TCGTargetOpDef constraint_sets[] = {
692  #include "tcg-target-con-set.h"
693  };
694  
695  
696  #undef C_O0_I1
697  #undef C_O0_I2
698  #undef C_O0_I3
699  #undef C_O0_I4
700  #undef C_O1_I1
701  #undef C_O1_I2
702  #undef C_O1_I3
703  #undef C_O1_I4
704  #undef C_N1_I2
705  #undef C_O2_I1
706  #undef C_O2_I2
707  #undef C_O2_I3
708  #undef C_O2_I4
709  
710  /* Expand the enumerator to be returned from tcg_target_op_def(). */
711  
712  #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
713  #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
714  #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
715  #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
716  
717  #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
718  #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
719  #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
720  #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
721  
722  #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
723  
724  #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
725  #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
726  #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
727  #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
728  
729  #include "tcg-target.c.inc"
730  
731  static void alloc_tcg_plugin_context(TCGContext *s)
732  {
733  #ifdef CONFIG_PLUGIN
734      s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
735      s->plugin_tb->insns =
736          g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
737  #endif
738  }
739  
740  /*
741   * All TCG threads except the parent (i.e. the one that called tcg_context_init
742   * and registered the target's TCG globals) must register with this function
743   * before initiating translation.
744   *
745   * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
746   * of tcg_region_init() for the reasoning behind this.
747   *
748   * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
749   * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
750   * is not used anymore for translation once this function is called.
751   *
752   * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
753   * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
754   */
755  #ifdef CONFIG_USER_ONLY
756  void tcg_register_thread(void)
757  {
758      tcg_ctx = &tcg_init_ctx;
759  }
760  #else
761  void tcg_register_thread(void)
762  {
763      TCGContext *s = g_malloc(sizeof(*s));
764      unsigned int i, n;
765  
766      *s = tcg_init_ctx;
767  
768      /* Relink mem_base.  */
769      for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
770          if (tcg_init_ctx.temps[i].mem_base) {
771              ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
772              tcg_debug_assert(b >= 0 && b < n);
773              s->temps[i].mem_base = &s->temps[b];
774          }
775      }
776  
777      /* Claim an entry in tcg_ctxs */
778      n = qatomic_fetch_inc(&tcg_cur_ctxs);
779      g_assert(n < tcg_max_ctxs);
780      qatomic_set(&tcg_ctxs[n], s);
781  
782      if (n > 0) {
783          alloc_tcg_plugin_context(s);
784          tcg_region_initial_alloc(s);
785      }
786  
787      tcg_ctx = s;
788  }
789  #endif /* !CONFIG_USER_ONLY */
790  
791  /* pool based memory allocation */
792  void *tcg_malloc_internal(TCGContext *s, int size)
793  {
794      TCGPool *p;
795      int pool_size;
796  
797      if (size > TCG_POOL_CHUNK_SIZE) {
798          /* big malloc: insert a new pool (XXX: could optimize) */
799          p = g_malloc(sizeof(TCGPool) + size);
800          p->size = size;
801          p->next = s->pool_first_large;
802          s->pool_first_large = p;
803          return p->data;
804      } else {
805          p = s->pool_current;
806          if (!p) {
807              p = s->pool_first;
808              if (!p)
809                  goto new_pool;
810          } else {
811              if (!p->next) {
812              new_pool:
813                  pool_size = TCG_POOL_CHUNK_SIZE;
814                  p = g_malloc(sizeof(TCGPool) + pool_size);
815                  p->size = pool_size;
816                  p->next = NULL;
817                  if (s->pool_current) {
818                      s->pool_current->next = p;
819                  } else {
820                      s->pool_first = p;
821                  }
822              } else {
823                  p = p->next;
824              }
825          }
826      }
827      s->pool_current = p;
828      s->pool_cur = p->data + size;
829      s->pool_end = p->data + p->size;
830      return p->data;
831  }
832  
833  void tcg_pool_reset(TCGContext *s)
834  {
835      TCGPool *p, *t;
836      for (p = s->pool_first_large; p; p = t) {
837          t = p->next;
838          g_free(p);
839      }
840      s->pool_first_large = NULL;
841      s->pool_cur = s->pool_end = NULL;
842      s->pool_current = NULL;
843  }
844  
845  /*
846   * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
847   * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
848   * We only use these for layout in tcg_out_ld_helper_ret and
849   * tcg_out_st_helper_args, and share them between several of
850   * the helpers, with the end result that it's easier to build manually.
851   */
852  
853  #if TCG_TARGET_REG_BITS == 32
854  # define dh_typecode_ttl  dh_typecode_i32
855  #else
856  # define dh_typecode_ttl  dh_typecode_i64
857  #endif
858  
859  static TCGHelperInfo info_helper_ld32_mmu = {
860      .flags = TCG_CALL_NO_WG,
861      .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
862                | dh_typemask(env, 1)
863                | dh_typemask(i64, 2)  /* uint64_t addr */
864                | dh_typemask(i32, 3)  /* unsigned oi */
865                | dh_typemask(ptr, 4)  /* uintptr_t ra */
866  };
867  
868  static TCGHelperInfo info_helper_ld64_mmu = {
869      .flags = TCG_CALL_NO_WG,
870      .typemask = dh_typemask(i64, 0)  /* return uint64_t */
871                | dh_typemask(env, 1)
872                | dh_typemask(i64, 2)  /* uint64_t addr */
873                | dh_typemask(i32, 3)  /* unsigned oi */
874                | dh_typemask(ptr, 4)  /* uintptr_t ra */
875  };
876  
877  static TCGHelperInfo info_helper_ld128_mmu = {
878      .flags = TCG_CALL_NO_WG,
879      .typemask = dh_typemask(i128, 0) /* return Int128 */
880                | dh_typemask(env, 1)
881                | dh_typemask(i64, 2)  /* uint64_t addr */
882                | dh_typemask(i32, 3)  /* unsigned oi */
883                | dh_typemask(ptr, 4)  /* uintptr_t ra */
884  };
885  
886  static TCGHelperInfo info_helper_st32_mmu = {
887      .flags = TCG_CALL_NO_WG,
888      .typemask = dh_typemask(void, 0)
889                | dh_typemask(env, 1)
890                | dh_typemask(i64, 2)  /* uint64_t addr */
891                | dh_typemask(i32, 3)  /* uint32_t data */
892                | dh_typemask(i32, 4)  /* unsigned oi */
893                | dh_typemask(ptr, 5)  /* uintptr_t ra */
894  };
895  
896  static TCGHelperInfo info_helper_st64_mmu = {
897      .flags = TCG_CALL_NO_WG,
898      .typemask = dh_typemask(void, 0)
899                | dh_typemask(env, 1)
900                | dh_typemask(i64, 2)  /* uint64_t addr */
901                | dh_typemask(i64, 3)  /* uint64_t data */
902                | dh_typemask(i32, 4)  /* unsigned oi */
903                | dh_typemask(ptr, 5)  /* uintptr_t ra */
904  };
905  
906  static TCGHelperInfo info_helper_st128_mmu = {
907      .flags = TCG_CALL_NO_WG,
908      .typemask = dh_typemask(void, 0)
909                | dh_typemask(env, 1)
910                | dh_typemask(i64, 2)  /* uint64_t addr */
911                | dh_typemask(i128, 3) /* Int128 data */
912                | dh_typemask(i32, 4)  /* unsigned oi */
913                | dh_typemask(ptr, 5)  /* uintptr_t ra */
914  };
915  
916  #ifdef CONFIG_TCG_INTERPRETER
917  static ffi_type *typecode_to_ffi(int argmask)
918  {
919      /*
920       * libffi does not support __int128_t, so we have forced Int128
921       * to use the structure definition instead of the builtin type.
922       */
923      static ffi_type *ffi_type_i128_elements[3] = {
924          &ffi_type_uint64,
925          &ffi_type_uint64,
926          NULL
927      };
928      static ffi_type ffi_type_i128 = {
929          .size = 16,
930          .alignment = __alignof__(Int128),
931          .type = FFI_TYPE_STRUCT,
932          .elements = ffi_type_i128_elements,
933      };
934  
935      switch (argmask) {
936      case dh_typecode_void:
937          return &ffi_type_void;
938      case dh_typecode_i32:
939          return &ffi_type_uint32;
940      case dh_typecode_s32:
941          return &ffi_type_sint32;
942      case dh_typecode_i64:
943          return &ffi_type_uint64;
944      case dh_typecode_s64:
945          return &ffi_type_sint64;
946      case dh_typecode_ptr:
947          return &ffi_type_pointer;
948      case dh_typecode_i128:
949          return &ffi_type_i128;
950      }
951      g_assert_not_reached();
952  }
953  
954  static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
955  {
956      unsigned typemask = info->typemask;
957      struct {
958          ffi_cif cif;
959          ffi_type *args[];
960      } *ca;
961      ffi_status status;
962      int nargs;
963  
964      /* Ignoring the return type, find the last non-zero field. */
965      nargs = 32 - clz32(typemask >> 3);
966      nargs = DIV_ROUND_UP(nargs, 3);
967      assert(nargs <= MAX_CALL_IARGS);
968  
969      ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
970      ca->cif.rtype = typecode_to_ffi(typemask & 7);
971      ca->cif.nargs = nargs;
972  
973      if (nargs != 0) {
974          ca->cif.arg_types = ca->args;
975          for (int j = 0; j < nargs; ++j) {
976              int typecode = extract32(typemask, (j + 1) * 3, 3);
977              ca->args[j] = typecode_to_ffi(typecode);
978          }
979      }
980  
981      status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
982                            ca->cif.rtype, ca->cif.arg_types);
983      assert(status == FFI_OK);
984  
985      return &ca->cif;
986  }
987  
988  #define HELPER_INFO_INIT(I)      (&(I)->cif)
989  #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
990  #else
991  #define HELPER_INFO_INIT(I)      (&(I)->init)
992  #define HELPER_INFO_INIT_VAL(I)  1
993  #endif /* CONFIG_TCG_INTERPRETER */
994  
995  static inline bool arg_slot_reg_p(unsigned arg_slot)
996  {
997      /*
998       * Split the sizeof away from the comparison to avoid Werror from
999       * "unsigned < 0 is always false", when iarg_regs is empty.
1000       */
1001      unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1002      return arg_slot < nreg;
1003  }
1004  
1005  static inline int arg_slot_stk_ofs(unsigned arg_slot)
1006  {
1007      unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1008      unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1009  
1010      tcg_debug_assert(stk_slot < max);
1011      return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1012  }
1013  
1014  typedef struct TCGCumulativeArgs {
1015      int arg_idx;                /* tcg_gen_callN args[] */
1016      int info_in_idx;            /* TCGHelperInfo in[] */
1017      int arg_slot;               /* regs+stack slot */
1018      int ref_slot;               /* stack slots for references */
1019  } TCGCumulativeArgs;
1020  
1021  static void layout_arg_even(TCGCumulativeArgs *cum)
1022  {
1023      cum->arg_slot += cum->arg_slot & 1;
1024  }
1025  
1026  static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1027                           TCGCallArgumentKind kind)
1028  {
1029      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1030  
1031      *loc = (TCGCallArgumentLoc){
1032          .kind = kind,
1033          .arg_idx = cum->arg_idx,
1034          .arg_slot = cum->arg_slot,
1035      };
1036      cum->info_in_idx++;
1037      cum->arg_slot++;
1038  }
1039  
1040  static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1041                                  TCGHelperInfo *info, int n)
1042  {
1043      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1044  
1045      for (int i = 0; i < n; ++i) {
1046          /* Layout all using the same arg_idx, adjusting the subindex. */
1047          loc[i] = (TCGCallArgumentLoc){
1048              .kind = TCG_CALL_ARG_NORMAL,
1049              .arg_idx = cum->arg_idx,
1050              .tmp_subindex = i,
1051              .arg_slot = cum->arg_slot + i,
1052          };
1053      }
1054      cum->info_in_idx += n;
1055      cum->arg_slot += n;
1056  }
1057  
1058  static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1059  {
1060      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1061      int n = 128 / TCG_TARGET_REG_BITS;
1062  
1063      /* The first subindex carries the pointer. */
1064      layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1065  
1066      /*
1067       * The callee is allowed to clobber memory associated with
1068       * structure pass by-reference.  Therefore we must make copies.
1069       * Allocate space from "ref_slot", which will be adjusted to
1070       * follow the parameters on the stack.
1071       */
1072      loc[0].ref_slot = cum->ref_slot;
1073  
1074      /*
1075       * Subsequent words also go into the reference slot, but
1076       * do not accumulate into the regular arguments.
1077       */
1078      for (int i = 1; i < n; ++i) {
1079          loc[i] = (TCGCallArgumentLoc){
1080              .kind = TCG_CALL_ARG_BY_REF_N,
1081              .arg_idx = cum->arg_idx,
1082              .tmp_subindex = i,
1083              .ref_slot = cum->ref_slot + i,
1084          };
1085      }
1086      cum->info_in_idx += n;
1087      cum->ref_slot += n;
1088  }
1089  
1090  static void init_call_layout(TCGHelperInfo *info)
1091  {
1092      int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1093      int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1094      unsigned typemask = info->typemask;
1095      unsigned typecode;
1096      TCGCumulativeArgs cum = { };
1097  
1098      /*
1099       * Parse and place any function return value.
1100       */
1101      typecode = typemask & 7;
1102      switch (typecode) {
1103      case dh_typecode_void:
1104          info->nr_out = 0;
1105          break;
1106      case dh_typecode_i32:
1107      case dh_typecode_s32:
1108      case dh_typecode_ptr:
1109          info->nr_out = 1;
1110          info->out_kind = TCG_CALL_RET_NORMAL;
1111          break;
1112      case dh_typecode_i64:
1113      case dh_typecode_s64:
1114          info->nr_out = 64 / TCG_TARGET_REG_BITS;
1115          info->out_kind = TCG_CALL_RET_NORMAL;
1116          /* Query the last register now to trigger any assert early. */
1117          tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1118          break;
1119      case dh_typecode_i128:
1120          info->nr_out = 128 / TCG_TARGET_REG_BITS;
1121          info->out_kind = TCG_TARGET_CALL_RET_I128;
1122          switch (TCG_TARGET_CALL_RET_I128) {
1123          case TCG_CALL_RET_NORMAL:
1124              /* Query the last register now to trigger any assert early. */
1125              tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1126              break;
1127          case TCG_CALL_RET_BY_VEC:
1128              /* Query the single register now to trigger any assert early. */
1129              tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1130              break;
1131          case TCG_CALL_RET_BY_REF:
1132              /*
1133               * Allocate the first argument to the output.
1134               * We don't need to store this anywhere, just make it
1135               * unavailable for use in the input loop below.
1136               */
1137              cum.arg_slot = 1;
1138              break;
1139          default:
1140              qemu_build_not_reached();
1141          }
1142          break;
1143      default:
1144          g_assert_not_reached();
1145      }
1146  
1147      /*
1148       * Parse and place function arguments.
1149       */
1150      for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1151          TCGCallArgumentKind kind;
1152          TCGType type;
1153  
1154          typecode = typemask & 7;
1155          switch (typecode) {
1156          case dh_typecode_i32:
1157          case dh_typecode_s32:
1158              type = TCG_TYPE_I32;
1159              break;
1160          case dh_typecode_i64:
1161          case dh_typecode_s64:
1162              type = TCG_TYPE_I64;
1163              break;
1164          case dh_typecode_ptr:
1165              type = TCG_TYPE_PTR;
1166              break;
1167          case dh_typecode_i128:
1168              type = TCG_TYPE_I128;
1169              break;
1170          default:
1171              g_assert_not_reached();
1172          }
1173  
1174          switch (type) {
1175          case TCG_TYPE_I32:
1176              switch (TCG_TARGET_CALL_ARG_I32) {
1177              case TCG_CALL_ARG_EVEN:
1178                  layout_arg_even(&cum);
1179                  /* fall through */
1180              case TCG_CALL_ARG_NORMAL:
1181                  layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1182                  break;
1183              case TCG_CALL_ARG_EXTEND:
1184                  kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1185                  layout_arg_1(&cum, info, kind);
1186                  break;
1187              default:
1188                  qemu_build_not_reached();
1189              }
1190              break;
1191  
1192          case TCG_TYPE_I64:
1193              switch (TCG_TARGET_CALL_ARG_I64) {
1194              case TCG_CALL_ARG_EVEN:
1195                  layout_arg_even(&cum);
1196                  /* fall through */
1197              case TCG_CALL_ARG_NORMAL:
1198                  if (TCG_TARGET_REG_BITS == 32) {
1199                      layout_arg_normal_n(&cum, info, 2);
1200                  } else {
1201                      layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1202                  }
1203                  break;
1204              default:
1205                  qemu_build_not_reached();
1206              }
1207              break;
1208  
1209          case TCG_TYPE_I128:
1210              switch (TCG_TARGET_CALL_ARG_I128) {
1211              case TCG_CALL_ARG_EVEN:
1212                  layout_arg_even(&cum);
1213                  /* fall through */
1214              case TCG_CALL_ARG_NORMAL:
1215                  layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1216                  break;
1217              case TCG_CALL_ARG_BY_REF:
1218                  layout_arg_by_ref(&cum, info);
1219                  break;
1220              default:
1221                  qemu_build_not_reached();
1222              }
1223              break;
1224  
1225          default:
1226              g_assert_not_reached();
1227          }
1228      }
1229      info->nr_in = cum.info_in_idx;
1230  
1231      /* Validate that we didn't overrun the input array. */
1232      assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1233      /* Validate the backend has enough argument space. */
1234      assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1235  
1236      /*
1237       * Relocate the "ref_slot" area to the end of the parameters.
1238       * Minimizing this stack offset helps code size for x86,
1239       * which has a signed 8-bit offset encoding.
1240       */
1241      if (cum.ref_slot != 0) {
1242          int ref_base = 0;
1243  
1244          if (cum.arg_slot > max_reg_slots) {
1245              int align = __alignof(Int128) / sizeof(tcg_target_long);
1246  
1247              ref_base = cum.arg_slot - max_reg_slots;
1248              if (align > 1) {
1249                  ref_base = ROUND_UP(ref_base, align);
1250              }
1251          }
1252          assert(ref_base + cum.ref_slot <= max_stk_slots);
1253          ref_base += max_reg_slots;
1254  
1255          if (ref_base != 0) {
1256              for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1257                  TCGCallArgumentLoc *loc = &info->in[i];
1258                  switch (loc->kind) {
1259                  case TCG_CALL_ARG_BY_REF:
1260                  case TCG_CALL_ARG_BY_REF_N:
1261                      loc->ref_slot += ref_base;
1262                      break;
1263                  default:
1264                      break;
1265                  }
1266              }
1267          }
1268      }
1269  }
1270  
1271  static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1272  static void process_op_defs(TCGContext *s);
1273  static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1274                                              TCGReg reg, const char *name);
1275  
1276  static void tcg_context_init(unsigned max_cpus)
1277  {
1278      TCGContext *s = &tcg_init_ctx;
1279      int op, total_args, n, i;
1280      TCGOpDef *def;
1281      TCGArgConstraint *args_ct;
1282      TCGTemp *ts;
1283  
1284      memset(s, 0, sizeof(*s));
1285      s->nb_globals = 0;
1286  
1287      /* Count total number of arguments and allocate the corresponding
1288         space */
1289      total_args = 0;
1290      for(op = 0; op < NB_OPS; op++) {
1291          def = &tcg_op_defs[op];
1292          n = def->nb_iargs + def->nb_oargs;
1293          total_args += n;
1294      }
1295  
1296      args_ct = g_new0(TCGArgConstraint, total_args);
1297  
1298      for(op = 0; op < NB_OPS; op++) {
1299          def = &tcg_op_defs[op];
1300          def->args_ct = args_ct;
1301          n = def->nb_iargs + def->nb_oargs;
1302          args_ct += n;
1303      }
1304  
1305      init_call_layout(&info_helper_ld32_mmu);
1306      init_call_layout(&info_helper_ld64_mmu);
1307      init_call_layout(&info_helper_ld128_mmu);
1308      init_call_layout(&info_helper_st32_mmu);
1309      init_call_layout(&info_helper_st64_mmu);
1310      init_call_layout(&info_helper_st128_mmu);
1311  
1312      tcg_target_init(s);
1313      process_op_defs(s);
1314  
1315      /* Reverse the order of the saved registers, assuming they're all at
1316         the start of tcg_target_reg_alloc_order.  */
1317      for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1318          int r = tcg_target_reg_alloc_order[n];
1319          if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1320              break;
1321          }
1322      }
1323      for (i = 0; i < n; ++i) {
1324          indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1325      }
1326      for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1327          indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1328      }
1329  
1330      alloc_tcg_plugin_context(s);
1331  
1332      tcg_ctx = s;
1333      /*
1334       * In user-mode we simply share the init context among threads, since we
1335       * use a single region. See the documentation tcg_region_init() for the
1336       * reasoning behind this.
1337       * In softmmu we will have at most max_cpus TCG threads.
1338       */
1339  #ifdef CONFIG_USER_ONLY
1340      tcg_ctxs = &tcg_ctx;
1341      tcg_cur_ctxs = 1;
1342      tcg_max_ctxs = 1;
1343  #else
1344      tcg_max_ctxs = max_cpus;
1345      tcg_ctxs = g_new0(TCGContext *, max_cpus);
1346  #endif
1347  
1348      tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1349      ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1350      cpu_env = temp_tcgv_ptr(ts);
1351  }
1352  
1353  void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1354  {
1355      tcg_context_init(max_cpus);
1356      tcg_region_init(tb_size, splitwx, max_cpus);
1357  }
1358  
1359  /*
1360   * Allocate TBs right before their corresponding translated code, making
1361   * sure that TBs and code are on different cache lines.
1362   */
1363  TranslationBlock *tcg_tb_alloc(TCGContext *s)
1364  {
1365      uintptr_t align = qemu_icache_linesize;
1366      TranslationBlock *tb;
1367      void *next;
1368  
1369   retry:
1370      tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1371      next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1372  
1373      if (unlikely(next > s->code_gen_highwater)) {
1374          if (tcg_region_alloc(s)) {
1375              return NULL;
1376          }
1377          goto retry;
1378      }
1379      qatomic_set(&s->code_gen_ptr, next);
1380      s->data_gen_ptr = NULL;
1381      return tb;
1382  }
1383  
1384  void tcg_prologue_init(TCGContext *s)
1385  {
1386      size_t prologue_size;
1387  
1388      s->code_ptr = s->code_gen_ptr;
1389      s->code_buf = s->code_gen_ptr;
1390      s->data_gen_ptr = NULL;
1391  
1392  #ifndef CONFIG_TCG_INTERPRETER
1393      tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1394  #endif
1395  
1396  #ifdef TCG_TARGET_NEED_POOL_LABELS
1397      s->pool_labels = NULL;
1398  #endif
1399  
1400      qemu_thread_jit_write();
1401      /* Generate the prologue.  */
1402      tcg_target_qemu_prologue(s);
1403  
1404  #ifdef TCG_TARGET_NEED_POOL_LABELS
1405      /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1406      {
1407          int result = tcg_out_pool_finalize(s);
1408          tcg_debug_assert(result == 0);
1409      }
1410  #endif
1411  
1412      prologue_size = tcg_current_code_size(s);
1413      perf_report_prologue(s->code_gen_ptr, prologue_size);
1414  
1415  #ifndef CONFIG_TCG_INTERPRETER
1416      flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1417                          (uintptr_t)s->code_buf, prologue_size);
1418  #endif
1419  
1420      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1421          FILE *logfile = qemu_log_trylock();
1422          if (logfile) {
1423              fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1424              if (s->data_gen_ptr) {
1425                  size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1426                  size_t data_size = prologue_size - code_size;
1427                  size_t i;
1428  
1429                  disas(logfile, s->code_gen_ptr, code_size);
1430  
1431                  for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1432                      if (sizeof(tcg_target_ulong) == 8) {
1433                          fprintf(logfile,
1434                                  "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1435                                  (uintptr_t)s->data_gen_ptr + i,
1436                                  *(uint64_t *)(s->data_gen_ptr + i));
1437                      } else {
1438                          fprintf(logfile,
1439                                  "0x%08" PRIxPTR ":  .long  0x%08x\n",
1440                                  (uintptr_t)s->data_gen_ptr + i,
1441                                  *(uint32_t *)(s->data_gen_ptr + i));
1442                      }
1443                  }
1444              } else {
1445                  disas(logfile, s->code_gen_ptr, prologue_size);
1446              }
1447              fprintf(logfile, "\n");
1448              qemu_log_unlock(logfile);
1449          }
1450      }
1451  
1452  #ifndef CONFIG_TCG_INTERPRETER
1453      /*
1454       * Assert that goto_ptr is implemented completely, setting an epilogue.
1455       * For tci, we use NULL as the signal to return from the interpreter,
1456       * so skip this check.
1457       */
1458      tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1459  #endif
1460  
1461      tcg_region_prologue_set(s);
1462  }
1463  
1464  void tcg_func_start(TCGContext *s)
1465  {
1466      tcg_pool_reset(s);
1467      s->nb_temps = s->nb_globals;
1468  
1469      /* No temps have been previously allocated for size or locality.  */
1470      memset(s->free_temps, 0, sizeof(s->free_temps));
1471  
1472      /* No constant temps have been previously allocated. */
1473      for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1474          if (s->const_table[i]) {
1475              g_hash_table_remove_all(s->const_table[i]);
1476          }
1477      }
1478  
1479      s->nb_ops = 0;
1480      s->nb_labels = 0;
1481      s->current_frame_offset = s->frame_start;
1482  
1483  #ifdef CONFIG_DEBUG_TCG
1484      s->goto_tb_issue_mask = 0;
1485  #endif
1486  
1487      QTAILQ_INIT(&s->ops);
1488      QTAILQ_INIT(&s->free_ops);
1489      QSIMPLEQ_INIT(&s->labels);
1490  
1491      tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1492                       s->addr_type == TCG_TYPE_I64);
1493  
1494  #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1495      tcg_debug_assert(s->tlb_fast_offset < 0);
1496      tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1497  #endif
1498  
1499      tcg_debug_assert(s->insn_start_words > 0);
1500  }
1501  
1502  static TCGTemp *tcg_temp_alloc(TCGContext *s)
1503  {
1504      int n = s->nb_temps++;
1505  
1506      if (n >= TCG_MAX_TEMPS) {
1507          tcg_raise_tb_overflow(s);
1508      }
1509      return memset(&s->temps[n], 0, sizeof(TCGTemp));
1510  }
1511  
1512  static TCGTemp *tcg_global_alloc(TCGContext *s)
1513  {
1514      TCGTemp *ts;
1515  
1516      tcg_debug_assert(s->nb_globals == s->nb_temps);
1517      tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1518      s->nb_globals++;
1519      ts = tcg_temp_alloc(s);
1520      ts->kind = TEMP_GLOBAL;
1521  
1522      return ts;
1523  }
1524  
1525  static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1526                                              TCGReg reg, const char *name)
1527  {
1528      TCGTemp *ts;
1529  
1530      tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1531  
1532      ts = tcg_global_alloc(s);
1533      ts->base_type = type;
1534      ts->type = type;
1535      ts->kind = TEMP_FIXED;
1536      ts->reg = reg;
1537      ts->name = name;
1538      tcg_regset_set_reg(s->reserved_regs, reg);
1539  
1540      return ts;
1541  }
1542  
1543  void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1544  {
1545      s->frame_start = start;
1546      s->frame_end = start + size;
1547      s->frame_temp
1548          = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1549  }
1550  
1551  TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1552                                       intptr_t offset, const char *name)
1553  {
1554      TCGContext *s = tcg_ctx;
1555      TCGTemp *base_ts = tcgv_ptr_temp(base);
1556      TCGTemp *ts = tcg_global_alloc(s);
1557      int indirect_reg = 0;
1558  
1559      switch (base_ts->kind) {
1560      case TEMP_FIXED:
1561          break;
1562      case TEMP_GLOBAL:
1563          /* We do not support double-indirect registers.  */
1564          tcg_debug_assert(!base_ts->indirect_reg);
1565          base_ts->indirect_base = 1;
1566          s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1567                              ? 2 : 1);
1568          indirect_reg = 1;
1569          break;
1570      default:
1571          g_assert_not_reached();
1572      }
1573  
1574      if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1575          TCGTemp *ts2 = tcg_global_alloc(s);
1576          char buf[64];
1577  
1578          ts->base_type = TCG_TYPE_I64;
1579          ts->type = TCG_TYPE_I32;
1580          ts->indirect_reg = indirect_reg;
1581          ts->mem_allocated = 1;
1582          ts->mem_base = base_ts;
1583          ts->mem_offset = offset;
1584          pstrcpy(buf, sizeof(buf), name);
1585          pstrcat(buf, sizeof(buf), "_0");
1586          ts->name = strdup(buf);
1587  
1588          tcg_debug_assert(ts2 == ts + 1);
1589          ts2->base_type = TCG_TYPE_I64;
1590          ts2->type = TCG_TYPE_I32;
1591          ts2->indirect_reg = indirect_reg;
1592          ts2->mem_allocated = 1;
1593          ts2->mem_base = base_ts;
1594          ts2->mem_offset = offset + 4;
1595          ts2->temp_subindex = 1;
1596          pstrcpy(buf, sizeof(buf), name);
1597          pstrcat(buf, sizeof(buf), "_1");
1598          ts2->name = strdup(buf);
1599      } else {
1600          ts->base_type = type;
1601          ts->type = type;
1602          ts->indirect_reg = indirect_reg;
1603          ts->mem_allocated = 1;
1604          ts->mem_base = base_ts;
1605          ts->mem_offset = offset;
1606          ts->name = name;
1607      }
1608      return ts;
1609  }
1610  
1611  TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1612  {
1613      TCGContext *s = tcg_ctx;
1614      TCGTemp *ts;
1615      int n;
1616  
1617      if (kind == TEMP_EBB) {
1618          int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1619  
1620          if (idx < TCG_MAX_TEMPS) {
1621              /* There is already an available temp with the right type.  */
1622              clear_bit(idx, s->free_temps[type].l);
1623  
1624              ts = &s->temps[idx];
1625              ts->temp_allocated = 1;
1626              tcg_debug_assert(ts->base_type == type);
1627              tcg_debug_assert(ts->kind == kind);
1628              return ts;
1629          }
1630      } else {
1631          tcg_debug_assert(kind == TEMP_TB);
1632      }
1633  
1634      switch (type) {
1635      case TCG_TYPE_I32:
1636      case TCG_TYPE_V64:
1637      case TCG_TYPE_V128:
1638      case TCG_TYPE_V256:
1639          n = 1;
1640          break;
1641      case TCG_TYPE_I64:
1642          n = 64 / TCG_TARGET_REG_BITS;
1643          break;
1644      case TCG_TYPE_I128:
1645          n = 128 / TCG_TARGET_REG_BITS;
1646          break;
1647      default:
1648          g_assert_not_reached();
1649      }
1650  
1651      ts = tcg_temp_alloc(s);
1652      ts->base_type = type;
1653      ts->temp_allocated = 1;
1654      ts->kind = kind;
1655  
1656      if (n == 1) {
1657          ts->type = type;
1658      } else {
1659          ts->type = TCG_TYPE_REG;
1660  
1661          for (int i = 1; i < n; ++i) {
1662              TCGTemp *ts2 = tcg_temp_alloc(s);
1663  
1664              tcg_debug_assert(ts2 == ts + i);
1665              ts2->base_type = type;
1666              ts2->type = TCG_TYPE_REG;
1667              ts2->temp_allocated = 1;
1668              ts2->temp_subindex = i;
1669              ts2->kind = kind;
1670          }
1671      }
1672      return ts;
1673  }
1674  
1675  TCGv_vec tcg_temp_new_vec(TCGType type)
1676  {
1677      TCGTemp *t;
1678  
1679  #ifdef CONFIG_DEBUG_TCG
1680      switch (type) {
1681      case TCG_TYPE_V64:
1682          assert(TCG_TARGET_HAS_v64);
1683          break;
1684      case TCG_TYPE_V128:
1685          assert(TCG_TARGET_HAS_v128);
1686          break;
1687      case TCG_TYPE_V256:
1688          assert(TCG_TARGET_HAS_v256);
1689          break;
1690      default:
1691          g_assert_not_reached();
1692      }
1693  #endif
1694  
1695      t = tcg_temp_new_internal(type, TEMP_EBB);
1696      return temp_tcgv_vec(t);
1697  }
1698  
1699  /* Create a new temp of the same type as an existing temp.  */
1700  TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1701  {
1702      TCGTemp *t = tcgv_vec_temp(match);
1703  
1704      tcg_debug_assert(t->temp_allocated != 0);
1705  
1706      t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1707      return temp_tcgv_vec(t);
1708  }
1709  
1710  void tcg_temp_free_internal(TCGTemp *ts)
1711  {
1712      TCGContext *s = tcg_ctx;
1713  
1714      switch (ts->kind) {
1715      case TEMP_CONST:
1716      case TEMP_TB:
1717          /* Silently ignore free. */
1718          break;
1719      case TEMP_EBB:
1720          tcg_debug_assert(ts->temp_allocated != 0);
1721          ts->temp_allocated = 0;
1722          set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1723          break;
1724      default:
1725          /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1726          g_assert_not_reached();
1727      }
1728  }
1729  
1730  TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1731  {
1732      TCGContext *s = tcg_ctx;
1733      GHashTable *h = s->const_table[type];
1734      TCGTemp *ts;
1735  
1736      if (h == NULL) {
1737          h = g_hash_table_new(g_int64_hash, g_int64_equal);
1738          s->const_table[type] = h;
1739      }
1740  
1741      ts = g_hash_table_lookup(h, &val);
1742      if (ts == NULL) {
1743          int64_t *val_ptr;
1744  
1745          ts = tcg_temp_alloc(s);
1746  
1747          if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1748              TCGTemp *ts2 = tcg_temp_alloc(s);
1749  
1750              tcg_debug_assert(ts2 == ts + 1);
1751  
1752              ts->base_type = TCG_TYPE_I64;
1753              ts->type = TCG_TYPE_I32;
1754              ts->kind = TEMP_CONST;
1755              ts->temp_allocated = 1;
1756  
1757              ts2->base_type = TCG_TYPE_I64;
1758              ts2->type = TCG_TYPE_I32;
1759              ts2->kind = TEMP_CONST;
1760              ts2->temp_allocated = 1;
1761              ts2->temp_subindex = 1;
1762  
1763              /*
1764               * Retain the full value of the 64-bit constant in the low
1765               * part, so that the hash table works.  Actual uses will
1766               * truncate the value to the low part.
1767               */
1768              ts[HOST_BIG_ENDIAN].val = val;
1769              ts[!HOST_BIG_ENDIAN].val = val >> 32;
1770              val_ptr = &ts[HOST_BIG_ENDIAN].val;
1771          } else {
1772              ts->base_type = type;
1773              ts->type = type;
1774              ts->kind = TEMP_CONST;
1775              ts->temp_allocated = 1;
1776              ts->val = val;
1777              val_ptr = &ts->val;
1778          }
1779          g_hash_table_insert(h, val_ptr, ts);
1780      }
1781  
1782      return ts;
1783  }
1784  
1785  TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1786  {
1787      val = dup_const(vece, val);
1788      return temp_tcgv_vec(tcg_constant_internal(type, val));
1789  }
1790  
1791  TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1792  {
1793      TCGTemp *t = tcgv_vec_temp(match);
1794  
1795      tcg_debug_assert(t->temp_allocated != 0);
1796      return tcg_constant_vec(t->base_type, vece, val);
1797  }
1798  
1799  #ifdef CONFIG_DEBUG_TCG
1800  size_t temp_idx(TCGTemp *ts)
1801  {
1802      ptrdiff_t n = ts - tcg_ctx->temps;
1803      assert(n >= 0 && n < tcg_ctx->nb_temps);
1804      return n;
1805  }
1806  
1807  TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1808  {
1809      uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1810  
1811      assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1812      assert(o % sizeof(TCGTemp) == 0);
1813  
1814      return (void *)tcg_ctx + (uintptr_t)v;
1815  }
1816  #endif /* CONFIG_DEBUG_TCG */
1817  
1818  /* Return true if OP may appear in the opcode stream.
1819     Test the runtime variable that controls each opcode.  */
1820  bool tcg_op_supported(TCGOpcode op)
1821  {
1822      const bool have_vec
1823          = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1824  
1825      switch (op) {
1826      case INDEX_op_discard:
1827      case INDEX_op_set_label:
1828      case INDEX_op_call:
1829      case INDEX_op_br:
1830      case INDEX_op_mb:
1831      case INDEX_op_insn_start:
1832      case INDEX_op_exit_tb:
1833      case INDEX_op_goto_tb:
1834      case INDEX_op_goto_ptr:
1835      case INDEX_op_qemu_ld_a32_i32:
1836      case INDEX_op_qemu_ld_a64_i32:
1837      case INDEX_op_qemu_st_a32_i32:
1838      case INDEX_op_qemu_st_a64_i32:
1839      case INDEX_op_qemu_ld_a32_i64:
1840      case INDEX_op_qemu_ld_a64_i64:
1841      case INDEX_op_qemu_st_a32_i64:
1842      case INDEX_op_qemu_st_a64_i64:
1843          return true;
1844  
1845      case INDEX_op_qemu_st8_a32_i32:
1846      case INDEX_op_qemu_st8_a64_i32:
1847          return TCG_TARGET_HAS_qemu_st8_i32;
1848  
1849      case INDEX_op_qemu_ld_a32_i128:
1850      case INDEX_op_qemu_ld_a64_i128:
1851      case INDEX_op_qemu_st_a32_i128:
1852      case INDEX_op_qemu_st_a64_i128:
1853          return TCG_TARGET_HAS_qemu_ldst_i128;
1854  
1855      case INDEX_op_mov_i32:
1856      case INDEX_op_setcond_i32:
1857      case INDEX_op_brcond_i32:
1858      case INDEX_op_ld8u_i32:
1859      case INDEX_op_ld8s_i32:
1860      case INDEX_op_ld16u_i32:
1861      case INDEX_op_ld16s_i32:
1862      case INDEX_op_ld_i32:
1863      case INDEX_op_st8_i32:
1864      case INDEX_op_st16_i32:
1865      case INDEX_op_st_i32:
1866      case INDEX_op_add_i32:
1867      case INDEX_op_sub_i32:
1868      case INDEX_op_mul_i32:
1869      case INDEX_op_and_i32:
1870      case INDEX_op_or_i32:
1871      case INDEX_op_xor_i32:
1872      case INDEX_op_shl_i32:
1873      case INDEX_op_shr_i32:
1874      case INDEX_op_sar_i32:
1875          return true;
1876  
1877      case INDEX_op_movcond_i32:
1878          return TCG_TARGET_HAS_movcond_i32;
1879      case INDEX_op_div_i32:
1880      case INDEX_op_divu_i32:
1881          return TCG_TARGET_HAS_div_i32;
1882      case INDEX_op_rem_i32:
1883      case INDEX_op_remu_i32:
1884          return TCG_TARGET_HAS_rem_i32;
1885      case INDEX_op_div2_i32:
1886      case INDEX_op_divu2_i32:
1887          return TCG_TARGET_HAS_div2_i32;
1888      case INDEX_op_rotl_i32:
1889      case INDEX_op_rotr_i32:
1890          return TCG_TARGET_HAS_rot_i32;
1891      case INDEX_op_deposit_i32:
1892          return TCG_TARGET_HAS_deposit_i32;
1893      case INDEX_op_extract_i32:
1894          return TCG_TARGET_HAS_extract_i32;
1895      case INDEX_op_sextract_i32:
1896          return TCG_TARGET_HAS_sextract_i32;
1897      case INDEX_op_extract2_i32:
1898          return TCG_TARGET_HAS_extract2_i32;
1899      case INDEX_op_add2_i32:
1900          return TCG_TARGET_HAS_add2_i32;
1901      case INDEX_op_sub2_i32:
1902          return TCG_TARGET_HAS_sub2_i32;
1903      case INDEX_op_mulu2_i32:
1904          return TCG_TARGET_HAS_mulu2_i32;
1905      case INDEX_op_muls2_i32:
1906          return TCG_TARGET_HAS_muls2_i32;
1907      case INDEX_op_muluh_i32:
1908          return TCG_TARGET_HAS_muluh_i32;
1909      case INDEX_op_mulsh_i32:
1910          return TCG_TARGET_HAS_mulsh_i32;
1911      case INDEX_op_ext8s_i32:
1912          return TCG_TARGET_HAS_ext8s_i32;
1913      case INDEX_op_ext16s_i32:
1914          return TCG_TARGET_HAS_ext16s_i32;
1915      case INDEX_op_ext8u_i32:
1916          return TCG_TARGET_HAS_ext8u_i32;
1917      case INDEX_op_ext16u_i32:
1918          return TCG_TARGET_HAS_ext16u_i32;
1919      case INDEX_op_bswap16_i32:
1920          return TCG_TARGET_HAS_bswap16_i32;
1921      case INDEX_op_bswap32_i32:
1922          return TCG_TARGET_HAS_bswap32_i32;
1923      case INDEX_op_not_i32:
1924          return TCG_TARGET_HAS_not_i32;
1925      case INDEX_op_neg_i32:
1926          return TCG_TARGET_HAS_neg_i32;
1927      case INDEX_op_andc_i32:
1928          return TCG_TARGET_HAS_andc_i32;
1929      case INDEX_op_orc_i32:
1930          return TCG_TARGET_HAS_orc_i32;
1931      case INDEX_op_eqv_i32:
1932          return TCG_TARGET_HAS_eqv_i32;
1933      case INDEX_op_nand_i32:
1934          return TCG_TARGET_HAS_nand_i32;
1935      case INDEX_op_nor_i32:
1936          return TCG_TARGET_HAS_nor_i32;
1937      case INDEX_op_clz_i32:
1938          return TCG_TARGET_HAS_clz_i32;
1939      case INDEX_op_ctz_i32:
1940          return TCG_TARGET_HAS_ctz_i32;
1941      case INDEX_op_ctpop_i32:
1942          return TCG_TARGET_HAS_ctpop_i32;
1943  
1944      case INDEX_op_brcond2_i32:
1945      case INDEX_op_setcond2_i32:
1946          return TCG_TARGET_REG_BITS == 32;
1947  
1948      case INDEX_op_mov_i64:
1949      case INDEX_op_setcond_i64:
1950      case INDEX_op_brcond_i64:
1951      case INDEX_op_ld8u_i64:
1952      case INDEX_op_ld8s_i64:
1953      case INDEX_op_ld16u_i64:
1954      case INDEX_op_ld16s_i64:
1955      case INDEX_op_ld32u_i64:
1956      case INDEX_op_ld32s_i64:
1957      case INDEX_op_ld_i64:
1958      case INDEX_op_st8_i64:
1959      case INDEX_op_st16_i64:
1960      case INDEX_op_st32_i64:
1961      case INDEX_op_st_i64:
1962      case INDEX_op_add_i64:
1963      case INDEX_op_sub_i64:
1964      case INDEX_op_mul_i64:
1965      case INDEX_op_and_i64:
1966      case INDEX_op_or_i64:
1967      case INDEX_op_xor_i64:
1968      case INDEX_op_shl_i64:
1969      case INDEX_op_shr_i64:
1970      case INDEX_op_sar_i64:
1971      case INDEX_op_ext_i32_i64:
1972      case INDEX_op_extu_i32_i64:
1973          return TCG_TARGET_REG_BITS == 64;
1974  
1975      case INDEX_op_movcond_i64:
1976          return TCG_TARGET_HAS_movcond_i64;
1977      case INDEX_op_div_i64:
1978      case INDEX_op_divu_i64:
1979          return TCG_TARGET_HAS_div_i64;
1980      case INDEX_op_rem_i64:
1981      case INDEX_op_remu_i64:
1982          return TCG_TARGET_HAS_rem_i64;
1983      case INDEX_op_div2_i64:
1984      case INDEX_op_divu2_i64:
1985          return TCG_TARGET_HAS_div2_i64;
1986      case INDEX_op_rotl_i64:
1987      case INDEX_op_rotr_i64:
1988          return TCG_TARGET_HAS_rot_i64;
1989      case INDEX_op_deposit_i64:
1990          return TCG_TARGET_HAS_deposit_i64;
1991      case INDEX_op_extract_i64:
1992          return TCG_TARGET_HAS_extract_i64;
1993      case INDEX_op_sextract_i64:
1994          return TCG_TARGET_HAS_sextract_i64;
1995      case INDEX_op_extract2_i64:
1996          return TCG_TARGET_HAS_extract2_i64;
1997      case INDEX_op_extrl_i64_i32:
1998          return TCG_TARGET_HAS_extrl_i64_i32;
1999      case INDEX_op_extrh_i64_i32:
2000          return TCG_TARGET_HAS_extrh_i64_i32;
2001      case INDEX_op_ext8s_i64:
2002          return TCG_TARGET_HAS_ext8s_i64;
2003      case INDEX_op_ext16s_i64:
2004          return TCG_TARGET_HAS_ext16s_i64;
2005      case INDEX_op_ext32s_i64:
2006          return TCG_TARGET_HAS_ext32s_i64;
2007      case INDEX_op_ext8u_i64:
2008          return TCG_TARGET_HAS_ext8u_i64;
2009      case INDEX_op_ext16u_i64:
2010          return TCG_TARGET_HAS_ext16u_i64;
2011      case INDEX_op_ext32u_i64:
2012          return TCG_TARGET_HAS_ext32u_i64;
2013      case INDEX_op_bswap16_i64:
2014          return TCG_TARGET_HAS_bswap16_i64;
2015      case INDEX_op_bswap32_i64:
2016          return TCG_TARGET_HAS_bswap32_i64;
2017      case INDEX_op_bswap64_i64:
2018          return TCG_TARGET_HAS_bswap64_i64;
2019      case INDEX_op_not_i64:
2020          return TCG_TARGET_HAS_not_i64;
2021      case INDEX_op_neg_i64:
2022          return TCG_TARGET_HAS_neg_i64;
2023      case INDEX_op_andc_i64:
2024          return TCG_TARGET_HAS_andc_i64;
2025      case INDEX_op_orc_i64:
2026          return TCG_TARGET_HAS_orc_i64;
2027      case INDEX_op_eqv_i64:
2028          return TCG_TARGET_HAS_eqv_i64;
2029      case INDEX_op_nand_i64:
2030          return TCG_TARGET_HAS_nand_i64;
2031      case INDEX_op_nor_i64:
2032          return TCG_TARGET_HAS_nor_i64;
2033      case INDEX_op_clz_i64:
2034          return TCG_TARGET_HAS_clz_i64;
2035      case INDEX_op_ctz_i64:
2036          return TCG_TARGET_HAS_ctz_i64;
2037      case INDEX_op_ctpop_i64:
2038          return TCG_TARGET_HAS_ctpop_i64;
2039      case INDEX_op_add2_i64:
2040          return TCG_TARGET_HAS_add2_i64;
2041      case INDEX_op_sub2_i64:
2042          return TCG_TARGET_HAS_sub2_i64;
2043      case INDEX_op_mulu2_i64:
2044          return TCG_TARGET_HAS_mulu2_i64;
2045      case INDEX_op_muls2_i64:
2046          return TCG_TARGET_HAS_muls2_i64;
2047      case INDEX_op_muluh_i64:
2048          return TCG_TARGET_HAS_muluh_i64;
2049      case INDEX_op_mulsh_i64:
2050          return TCG_TARGET_HAS_mulsh_i64;
2051  
2052      case INDEX_op_mov_vec:
2053      case INDEX_op_dup_vec:
2054      case INDEX_op_dupm_vec:
2055      case INDEX_op_ld_vec:
2056      case INDEX_op_st_vec:
2057      case INDEX_op_add_vec:
2058      case INDEX_op_sub_vec:
2059      case INDEX_op_and_vec:
2060      case INDEX_op_or_vec:
2061      case INDEX_op_xor_vec:
2062      case INDEX_op_cmp_vec:
2063          return have_vec;
2064      case INDEX_op_dup2_vec:
2065          return have_vec && TCG_TARGET_REG_BITS == 32;
2066      case INDEX_op_not_vec:
2067          return have_vec && TCG_TARGET_HAS_not_vec;
2068      case INDEX_op_neg_vec:
2069          return have_vec && TCG_TARGET_HAS_neg_vec;
2070      case INDEX_op_abs_vec:
2071          return have_vec && TCG_TARGET_HAS_abs_vec;
2072      case INDEX_op_andc_vec:
2073          return have_vec && TCG_TARGET_HAS_andc_vec;
2074      case INDEX_op_orc_vec:
2075          return have_vec && TCG_TARGET_HAS_orc_vec;
2076      case INDEX_op_nand_vec:
2077          return have_vec && TCG_TARGET_HAS_nand_vec;
2078      case INDEX_op_nor_vec:
2079          return have_vec && TCG_TARGET_HAS_nor_vec;
2080      case INDEX_op_eqv_vec:
2081          return have_vec && TCG_TARGET_HAS_eqv_vec;
2082      case INDEX_op_mul_vec:
2083          return have_vec && TCG_TARGET_HAS_mul_vec;
2084      case INDEX_op_shli_vec:
2085      case INDEX_op_shri_vec:
2086      case INDEX_op_sari_vec:
2087          return have_vec && TCG_TARGET_HAS_shi_vec;
2088      case INDEX_op_shls_vec:
2089      case INDEX_op_shrs_vec:
2090      case INDEX_op_sars_vec:
2091          return have_vec && TCG_TARGET_HAS_shs_vec;
2092      case INDEX_op_shlv_vec:
2093      case INDEX_op_shrv_vec:
2094      case INDEX_op_sarv_vec:
2095          return have_vec && TCG_TARGET_HAS_shv_vec;
2096      case INDEX_op_rotli_vec:
2097          return have_vec && TCG_TARGET_HAS_roti_vec;
2098      case INDEX_op_rotls_vec:
2099          return have_vec && TCG_TARGET_HAS_rots_vec;
2100      case INDEX_op_rotlv_vec:
2101      case INDEX_op_rotrv_vec:
2102          return have_vec && TCG_TARGET_HAS_rotv_vec;
2103      case INDEX_op_ssadd_vec:
2104      case INDEX_op_usadd_vec:
2105      case INDEX_op_sssub_vec:
2106      case INDEX_op_ussub_vec:
2107          return have_vec && TCG_TARGET_HAS_sat_vec;
2108      case INDEX_op_smin_vec:
2109      case INDEX_op_umin_vec:
2110      case INDEX_op_smax_vec:
2111      case INDEX_op_umax_vec:
2112          return have_vec && TCG_TARGET_HAS_minmax_vec;
2113      case INDEX_op_bitsel_vec:
2114          return have_vec && TCG_TARGET_HAS_bitsel_vec;
2115      case INDEX_op_cmpsel_vec:
2116          return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2117  
2118      default:
2119          tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2120          return true;
2121      }
2122  }
2123  
2124  static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2125  
2126  static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2127  {
2128      TCGv_i64 extend_free[MAX_CALL_IARGS];
2129      int n_extend = 0;
2130      TCGOp *op;
2131      int i, n, pi = 0, total_args;
2132  
2133      if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2134          init_call_layout(info);
2135          g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2136      }
2137  
2138      total_args = info->nr_out + info->nr_in + 2;
2139      op = tcg_op_alloc(INDEX_op_call, total_args);
2140  
2141  #ifdef CONFIG_PLUGIN
2142      /* Flag helpers that may affect guest state */
2143      if (tcg_ctx->plugin_insn &&
2144          !(info->flags & TCG_CALL_PLUGIN) &&
2145          !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2146          tcg_ctx->plugin_insn->calls_helpers = true;
2147      }
2148  #endif
2149  
2150      TCGOP_CALLO(op) = n = info->nr_out;
2151      switch (n) {
2152      case 0:
2153          tcg_debug_assert(ret == NULL);
2154          break;
2155      case 1:
2156          tcg_debug_assert(ret != NULL);
2157          op->args[pi++] = temp_arg(ret);
2158          break;
2159      case 2:
2160      case 4:
2161          tcg_debug_assert(ret != NULL);
2162          tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2163          tcg_debug_assert(ret->temp_subindex == 0);
2164          for (i = 0; i < n; ++i) {
2165              op->args[pi++] = temp_arg(ret + i);
2166          }
2167          break;
2168      default:
2169          g_assert_not_reached();
2170      }
2171  
2172      TCGOP_CALLI(op) = n = info->nr_in;
2173      for (i = 0; i < n; i++) {
2174          const TCGCallArgumentLoc *loc = &info->in[i];
2175          TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2176  
2177          switch (loc->kind) {
2178          case TCG_CALL_ARG_NORMAL:
2179          case TCG_CALL_ARG_BY_REF:
2180          case TCG_CALL_ARG_BY_REF_N:
2181              op->args[pi++] = temp_arg(ts);
2182              break;
2183  
2184          case TCG_CALL_ARG_EXTEND_U:
2185          case TCG_CALL_ARG_EXTEND_S:
2186              {
2187                  TCGv_i64 temp = tcg_temp_ebb_new_i64();
2188                  TCGv_i32 orig = temp_tcgv_i32(ts);
2189  
2190                  if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2191                      tcg_gen_ext_i32_i64(temp, orig);
2192                  } else {
2193                      tcg_gen_extu_i32_i64(temp, orig);
2194                  }
2195                  op->args[pi++] = tcgv_i64_arg(temp);
2196                  extend_free[n_extend++] = temp;
2197              }
2198              break;
2199  
2200          default:
2201              g_assert_not_reached();
2202          }
2203      }
2204      op->args[pi++] = (uintptr_t)info->func;
2205      op->args[pi++] = (uintptr_t)info;
2206      tcg_debug_assert(pi == total_args);
2207  
2208      QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2209  
2210      tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2211      for (i = 0; i < n_extend; ++i) {
2212          tcg_temp_free_i64(extend_free[i]);
2213      }
2214  }
2215  
2216  void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2217  {
2218      tcg_gen_callN(info, ret, NULL);
2219  }
2220  
2221  void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2222  {
2223      tcg_gen_callN(info, ret, &t1);
2224  }
2225  
2226  void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2227  {
2228      TCGTemp *args[2] = { t1, t2 };
2229      tcg_gen_callN(info, ret, args);
2230  }
2231  
2232  void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2233                     TCGTemp *t2, TCGTemp *t3)
2234  {
2235      TCGTemp *args[3] = { t1, t2, t3 };
2236      tcg_gen_callN(info, ret, args);
2237  }
2238  
2239  void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2240                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2241  {
2242      TCGTemp *args[4] = { t1, t2, t3, t4 };
2243      tcg_gen_callN(info, ret, args);
2244  }
2245  
2246  void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2247                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2248  {
2249      TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2250      tcg_gen_callN(info, ret, args);
2251  }
2252  
2253  void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2254                     TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2255  {
2256      TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2257      tcg_gen_callN(info, ret, args);
2258  }
2259  
2260  void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2261                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2262                     TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2263  {
2264      TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2265      tcg_gen_callN(info, ret, args);
2266  }
2267  
2268  static void tcg_reg_alloc_start(TCGContext *s)
2269  {
2270      int i, n;
2271  
2272      for (i = 0, n = s->nb_temps; i < n; i++) {
2273          TCGTemp *ts = &s->temps[i];
2274          TCGTempVal val = TEMP_VAL_MEM;
2275  
2276          switch (ts->kind) {
2277          case TEMP_CONST:
2278              val = TEMP_VAL_CONST;
2279              break;
2280          case TEMP_FIXED:
2281              val = TEMP_VAL_REG;
2282              break;
2283          case TEMP_GLOBAL:
2284              break;
2285          case TEMP_EBB:
2286              val = TEMP_VAL_DEAD;
2287              /* fall through */
2288          case TEMP_TB:
2289              ts->mem_allocated = 0;
2290              break;
2291          default:
2292              g_assert_not_reached();
2293          }
2294          ts->val_type = val;
2295      }
2296  
2297      memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2298  }
2299  
2300  static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2301                                   TCGTemp *ts)
2302  {
2303      int idx = temp_idx(ts);
2304  
2305      switch (ts->kind) {
2306      case TEMP_FIXED:
2307      case TEMP_GLOBAL:
2308          pstrcpy(buf, buf_size, ts->name);
2309          break;
2310      case TEMP_TB:
2311          snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2312          break;
2313      case TEMP_EBB:
2314          snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2315          break;
2316      case TEMP_CONST:
2317          switch (ts->type) {
2318          case TCG_TYPE_I32:
2319              snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2320              break;
2321  #if TCG_TARGET_REG_BITS > 32
2322          case TCG_TYPE_I64:
2323              snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2324              break;
2325  #endif
2326          case TCG_TYPE_V64:
2327          case TCG_TYPE_V128:
2328          case TCG_TYPE_V256:
2329              snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2330                       64 << (ts->type - TCG_TYPE_V64), ts->val);
2331              break;
2332          default:
2333              g_assert_not_reached();
2334          }
2335          break;
2336      }
2337      return buf;
2338  }
2339  
2340  static char *tcg_get_arg_str(TCGContext *s, char *buf,
2341                               int buf_size, TCGArg arg)
2342  {
2343      return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2344  }
2345  
2346  static const char * const cond_name[] =
2347  {
2348      [TCG_COND_NEVER] = "never",
2349      [TCG_COND_ALWAYS] = "always",
2350      [TCG_COND_EQ] = "eq",
2351      [TCG_COND_NE] = "ne",
2352      [TCG_COND_LT] = "lt",
2353      [TCG_COND_GE] = "ge",
2354      [TCG_COND_LE] = "le",
2355      [TCG_COND_GT] = "gt",
2356      [TCG_COND_LTU] = "ltu",
2357      [TCG_COND_GEU] = "geu",
2358      [TCG_COND_LEU] = "leu",
2359      [TCG_COND_GTU] = "gtu"
2360  };
2361  
2362  static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2363  {
2364      [MO_UB]   = "ub",
2365      [MO_SB]   = "sb",
2366      [MO_LEUW] = "leuw",
2367      [MO_LESW] = "lesw",
2368      [MO_LEUL] = "leul",
2369      [MO_LESL] = "lesl",
2370      [MO_LEUQ] = "leq",
2371      [MO_BEUW] = "beuw",
2372      [MO_BESW] = "besw",
2373      [MO_BEUL] = "beul",
2374      [MO_BESL] = "besl",
2375      [MO_BEUQ] = "beq",
2376      [MO_128 + MO_BE] = "beo",
2377      [MO_128 + MO_LE] = "leo",
2378  };
2379  
2380  static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2381      [MO_UNALN >> MO_ASHIFT]    = "un+",
2382      [MO_ALIGN >> MO_ASHIFT]    = "al+",
2383      [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2384      [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2385      [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2386      [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2387      [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2388      [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2389  };
2390  
2391  static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2392      [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2393      [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2394      [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2395      [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2396      [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2397      [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2398  };
2399  
2400  static const char bswap_flag_name[][6] = {
2401      [TCG_BSWAP_IZ] = "iz",
2402      [TCG_BSWAP_OZ] = "oz",
2403      [TCG_BSWAP_OS] = "os",
2404      [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2405      [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2406  };
2407  
2408  static inline bool tcg_regset_single(TCGRegSet d)
2409  {
2410      return (d & (d - 1)) == 0;
2411  }
2412  
2413  static inline TCGReg tcg_regset_first(TCGRegSet d)
2414  {
2415      if (TCG_TARGET_NB_REGS <= 32) {
2416          return ctz32(d);
2417      } else {
2418          return ctz64(d);
2419      }
2420  }
2421  
2422  /* Return only the number of characters output -- no error return. */
2423  #define ne_fprintf(...) \
2424      ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2425  
2426  static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2427  {
2428      char buf[128];
2429      TCGOp *op;
2430  
2431      QTAILQ_FOREACH(op, &s->ops, link) {
2432          int i, k, nb_oargs, nb_iargs, nb_cargs;
2433          const TCGOpDef *def;
2434          TCGOpcode c;
2435          int col = 0;
2436  
2437          c = op->opc;
2438          def = &tcg_op_defs[c];
2439  
2440          if (c == INDEX_op_insn_start) {
2441              nb_oargs = 0;
2442              col += ne_fprintf(f, "\n ----");
2443  
2444              for (i = 0, k = s->insn_start_words; i < k; ++i) {
2445                  col += ne_fprintf(f, " %016" PRIx64,
2446                                    tcg_get_insn_start_param(op, i));
2447              }
2448          } else if (c == INDEX_op_call) {
2449              const TCGHelperInfo *info = tcg_call_info(op);
2450              void *func = tcg_call_func(op);
2451  
2452              /* variable number of arguments */
2453              nb_oargs = TCGOP_CALLO(op);
2454              nb_iargs = TCGOP_CALLI(op);
2455              nb_cargs = def->nb_cargs;
2456  
2457              col += ne_fprintf(f, " %s ", def->name);
2458  
2459              /*
2460               * Print the function name from TCGHelperInfo, if available.
2461               * Note that plugins have a template function for the info,
2462               * but the actual function pointer comes from the plugin.
2463               */
2464              if (func == info->func) {
2465                  col += ne_fprintf(f, "%s", info->name);
2466              } else {
2467                  col += ne_fprintf(f, "plugin(%p)", func);
2468              }
2469  
2470              col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2471              for (i = 0; i < nb_oargs; i++) {
2472                  col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2473                                                              op->args[i]));
2474              }
2475              for (i = 0; i < nb_iargs; i++) {
2476                  TCGArg arg = op->args[nb_oargs + i];
2477                  const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2478                  col += ne_fprintf(f, ",%s", t);
2479              }
2480          } else {
2481              col += ne_fprintf(f, " %s ", def->name);
2482  
2483              nb_oargs = def->nb_oargs;
2484              nb_iargs = def->nb_iargs;
2485              nb_cargs = def->nb_cargs;
2486  
2487              if (def->flags & TCG_OPF_VECTOR) {
2488                  col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2489                                    8 << TCGOP_VECE(op));
2490              }
2491  
2492              k = 0;
2493              for (i = 0; i < nb_oargs; i++) {
2494                  const char *sep =  k ? "," : "";
2495                  col += ne_fprintf(f, "%s%s", sep,
2496                                    tcg_get_arg_str(s, buf, sizeof(buf),
2497                                                    op->args[k++]));
2498              }
2499              for (i = 0; i < nb_iargs; i++) {
2500                  const char *sep =  k ? "," : "";
2501                  col += ne_fprintf(f, "%s%s", sep,
2502                                    tcg_get_arg_str(s, buf, sizeof(buf),
2503                                                    op->args[k++]));
2504              }
2505              switch (c) {
2506              case INDEX_op_brcond_i32:
2507              case INDEX_op_setcond_i32:
2508              case INDEX_op_movcond_i32:
2509              case INDEX_op_brcond2_i32:
2510              case INDEX_op_setcond2_i32:
2511              case INDEX_op_brcond_i64:
2512              case INDEX_op_setcond_i64:
2513              case INDEX_op_movcond_i64:
2514              case INDEX_op_cmp_vec:
2515              case INDEX_op_cmpsel_vec:
2516                  if (op->args[k] < ARRAY_SIZE(cond_name)
2517                      && cond_name[op->args[k]]) {
2518                      col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2519                  } else {
2520                      col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2521                  }
2522                  i = 1;
2523                  break;
2524              case INDEX_op_qemu_ld_a32_i32:
2525              case INDEX_op_qemu_ld_a64_i32:
2526              case INDEX_op_qemu_st_a32_i32:
2527              case INDEX_op_qemu_st_a64_i32:
2528              case INDEX_op_qemu_st8_a32_i32:
2529              case INDEX_op_qemu_st8_a64_i32:
2530              case INDEX_op_qemu_ld_a32_i64:
2531              case INDEX_op_qemu_ld_a64_i64:
2532              case INDEX_op_qemu_st_a32_i64:
2533              case INDEX_op_qemu_st_a64_i64:
2534              case INDEX_op_qemu_ld_a32_i128:
2535              case INDEX_op_qemu_ld_a64_i128:
2536              case INDEX_op_qemu_st_a32_i128:
2537              case INDEX_op_qemu_st_a64_i128:
2538                  {
2539                      const char *s_al, *s_op, *s_at;
2540                      MemOpIdx oi = op->args[k++];
2541                      MemOp op = get_memop(oi);
2542                      unsigned ix = get_mmuidx(oi);
2543  
2544                      s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2545                      s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2546                      s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2547                      op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2548  
2549                      /* If all fields are accounted for, print symbolically. */
2550                      if (!op && s_al && s_op && s_at) {
2551                          col += ne_fprintf(f, ",%s%s%s,%u",
2552                                            s_at, s_al, s_op, ix);
2553                      } else {
2554                          op = get_memop(oi);
2555                          col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2556                      }
2557                      i = 1;
2558                  }
2559                  break;
2560              case INDEX_op_bswap16_i32:
2561              case INDEX_op_bswap16_i64:
2562              case INDEX_op_bswap32_i32:
2563              case INDEX_op_bswap32_i64:
2564              case INDEX_op_bswap64_i64:
2565                  {
2566                      TCGArg flags = op->args[k];
2567                      const char *name = NULL;
2568  
2569                      if (flags < ARRAY_SIZE(bswap_flag_name)) {
2570                          name = bswap_flag_name[flags];
2571                      }
2572                      if (name) {
2573                          col += ne_fprintf(f, ",%s", name);
2574                      } else {
2575                          col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2576                      }
2577                      i = k = 1;
2578                  }
2579                  break;
2580              default:
2581                  i = 0;
2582                  break;
2583              }
2584              switch (c) {
2585              case INDEX_op_set_label:
2586              case INDEX_op_br:
2587              case INDEX_op_brcond_i32:
2588              case INDEX_op_brcond_i64:
2589              case INDEX_op_brcond2_i32:
2590                  col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2591                                    arg_label(op->args[k])->id);
2592                  i++, k++;
2593                  break;
2594              case INDEX_op_mb:
2595                  {
2596                      TCGBar membar = op->args[k];
2597                      const char *b_op, *m_op;
2598  
2599                      switch (membar & TCG_BAR_SC) {
2600                      case 0:
2601                          b_op = "none";
2602                          break;
2603                      case TCG_BAR_LDAQ:
2604                          b_op = "acq";
2605                          break;
2606                      case TCG_BAR_STRL:
2607                          b_op = "rel";
2608                          break;
2609                      case TCG_BAR_SC:
2610                          b_op = "seq";
2611                          break;
2612                      default:
2613                          g_assert_not_reached();
2614                      }
2615  
2616                      switch (membar & TCG_MO_ALL) {
2617                      case 0:
2618                          m_op = "none";
2619                          break;
2620                      case TCG_MO_LD_LD:
2621                          m_op = "rr";
2622                          break;
2623                      case TCG_MO_LD_ST:
2624                          m_op = "rw";
2625                          break;
2626                      case TCG_MO_ST_LD:
2627                          m_op = "wr";
2628                          break;
2629                      case TCG_MO_ST_ST:
2630                          m_op = "ww";
2631                          break;
2632                      case TCG_MO_LD_LD | TCG_MO_LD_ST:
2633                          m_op = "rr+rw";
2634                          break;
2635                      case TCG_MO_LD_LD | TCG_MO_ST_LD:
2636                          m_op = "rr+wr";
2637                          break;
2638                      case TCG_MO_LD_LD | TCG_MO_ST_ST:
2639                          m_op = "rr+ww";
2640                          break;
2641                      case TCG_MO_LD_ST | TCG_MO_ST_LD:
2642                          m_op = "rw+wr";
2643                          break;
2644                      case TCG_MO_LD_ST | TCG_MO_ST_ST:
2645                          m_op = "rw+ww";
2646                          break;
2647                      case TCG_MO_ST_LD | TCG_MO_ST_ST:
2648                          m_op = "wr+ww";
2649                          break;
2650                      case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2651                          m_op = "rr+rw+wr";
2652                          break;
2653                      case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2654                          m_op = "rr+rw+ww";
2655                          break;
2656                      case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2657                          m_op = "rr+wr+ww";
2658                          break;
2659                      case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2660                          m_op = "rw+wr+ww";
2661                          break;
2662                      case TCG_MO_ALL:
2663                          m_op = "all";
2664                          break;
2665                      default:
2666                          g_assert_not_reached();
2667                      }
2668  
2669                      col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2670                      i++, k++;
2671                  }
2672                  break;
2673              default:
2674                  break;
2675              }
2676              for (; i < nb_cargs; i++, k++) {
2677                  col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2678                                    op->args[k]);
2679              }
2680          }
2681  
2682          if (have_prefs || op->life) {
2683              for (; col < 40; ++col) {
2684                  putc(' ', f);
2685              }
2686          }
2687  
2688          if (op->life) {
2689              unsigned life = op->life;
2690  
2691              if (life & (SYNC_ARG * 3)) {
2692                  ne_fprintf(f, "  sync:");
2693                  for (i = 0; i < 2; ++i) {
2694                      if (life & (SYNC_ARG << i)) {
2695                          ne_fprintf(f, " %d", i);
2696                      }
2697                  }
2698              }
2699              life /= DEAD_ARG;
2700              if (life) {
2701                  ne_fprintf(f, "  dead:");
2702                  for (i = 0; life; ++i, life >>= 1) {
2703                      if (life & 1) {
2704                          ne_fprintf(f, " %d", i);
2705                      }
2706                  }
2707              }
2708          }
2709  
2710          if (have_prefs) {
2711              for (i = 0; i < nb_oargs; ++i) {
2712                  TCGRegSet set = output_pref(op, i);
2713  
2714                  if (i == 0) {
2715                      ne_fprintf(f, "  pref=");
2716                  } else {
2717                      ne_fprintf(f, ",");
2718                  }
2719                  if (set == 0) {
2720                      ne_fprintf(f, "none");
2721                  } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2722                      ne_fprintf(f, "all");
2723  #ifdef CONFIG_DEBUG_TCG
2724                  } else if (tcg_regset_single(set)) {
2725                      TCGReg reg = tcg_regset_first(set);
2726                      ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2727  #endif
2728                  } else if (TCG_TARGET_NB_REGS <= 32) {
2729                      ne_fprintf(f, "0x%x", (uint32_t)set);
2730                  } else {
2731                      ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2732                  }
2733              }
2734          }
2735  
2736          putc('\n', f);
2737      }
2738  }
2739  
2740  /* we give more priority to constraints with less registers */
2741  static int get_constraint_priority(const TCGOpDef *def, int k)
2742  {
2743      const TCGArgConstraint *arg_ct = &def->args_ct[k];
2744      int n = ctpop64(arg_ct->regs);
2745  
2746      /*
2747       * Sort constraints of a single register first, which includes output
2748       * aliases (which must exactly match the input already allocated).
2749       */
2750      if (n == 1 || arg_ct->oalias) {
2751          return INT_MAX;
2752      }
2753  
2754      /*
2755       * Sort register pairs next, first then second immediately after.
2756       * Arbitrarily sort multiple pairs by the index of the first reg;
2757       * there shouldn't be many pairs.
2758       */
2759      switch (arg_ct->pair) {
2760      case 1:
2761      case 3:
2762          return (k + 1) * 2;
2763      case 2:
2764          return (arg_ct->pair_index + 1) * 2 - 1;
2765      }
2766  
2767      /* Finally, sort by decreasing register count. */
2768      assert(n > 1);
2769      return -n;
2770  }
2771  
2772  /* sort from highest priority to lowest */
2773  static void sort_constraints(TCGOpDef *def, int start, int n)
2774  {
2775      int i, j;
2776      TCGArgConstraint *a = def->args_ct;
2777  
2778      for (i = 0; i < n; i++) {
2779          a[start + i].sort_index = start + i;
2780      }
2781      if (n <= 1) {
2782          return;
2783      }
2784      for (i = 0; i < n - 1; i++) {
2785          for (j = i + 1; j < n; j++) {
2786              int p1 = get_constraint_priority(def, a[start + i].sort_index);
2787              int p2 = get_constraint_priority(def, a[start + j].sort_index);
2788              if (p1 < p2) {
2789                  int tmp = a[start + i].sort_index;
2790                  a[start + i].sort_index = a[start + j].sort_index;
2791                  a[start + j].sort_index = tmp;
2792              }
2793          }
2794      }
2795  }
2796  
2797  static void process_op_defs(TCGContext *s)
2798  {
2799      TCGOpcode op;
2800  
2801      for (op = 0; op < NB_OPS; op++) {
2802          TCGOpDef *def = &tcg_op_defs[op];
2803          const TCGTargetOpDef *tdefs;
2804          bool saw_alias_pair = false;
2805          int i, o, i2, o2, nb_args;
2806  
2807          if (def->flags & TCG_OPF_NOT_PRESENT) {
2808              continue;
2809          }
2810  
2811          nb_args = def->nb_iargs + def->nb_oargs;
2812          if (nb_args == 0) {
2813              continue;
2814          }
2815  
2816          /*
2817           * Macro magic should make it impossible, but double-check that
2818           * the array index is in range.  Since the signness of an enum
2819           * is implementation defined, force the result to unsigned.
2820           */
2821          unsigned con_set = tcg_target_op_def(op);
2822          tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2823          tdefs = &constraint_sets[con_set];
2824  
2825          for (i = 0; i < nb_args; i++) {
2826              const char *ct_str = tdefs->args_ct_str[i];
2827              bool input_p = i >= def->nb_oargs;
2828  
2829              /* Incomplete TCGTargetOpDef entry. */
2830              tcg_debug_assert(ct_str != NULL);
2831  
2832              switch (*ct_str) {
2833              case '0' ... '9':
2834                  o = *ct_str - '0';
2835                  tcg_debug_assert(input_p);
2836                  tcg_debug_assert(o < def->nb_oargs);
2837                  tcg_debug_assert(def->args_ct[o].regs != 0);
2838                  tcg_debug_assert(!def->args_ct[o].oalias);
2839                  def->args_ct[i] = def->args_ct[o];
2840                  /* The output sets oalias.  */
2841                  def->args_ct[o].oalias = 1;
2842                  def->args_ct[o].alias_index = i;
2843                  /* The input sets ialias. */
2844                  def->args_ct[i].ialias = 1;
2845                  def->args_ct[i].alias_index = o;
2846                  if (def->args_ct[i].pair) {
2847                      saw_alias_pair = true;
2848                  }
2849                  tcg_debug_assert(ct_str[1] == '\0');
2850                  continue;
2851  
2852              case '&':
2853                  tcg_debug_assert(!input_p);
2854                  def->args_ct[i].newreg = true;
2855                  ct_str++;
2856                  break;
2857  
2858              case 'p': /* plus */
2859                  /* Allocate to the register after the previous. */
2860                  tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2861                  o = i - 1;
2862                  tcg_debug_assert(!def->args_ct[o].pair);
2863                  tcg_debug_assert(!def->args_ct[o].ct);
2864                  def->args_ct[i] = (TCGArgConstraint){
2865                      .pair = 2,
2866                      .pair_index = o,
2867                      .regs = def->args_ct[o].regs << 1,
2868                  };
2869                  def->args_ct[o].pair = 1;
2870                  def->args_ct[o].pair_index = i;
2871                  tcg_debug_assert(ct_str[1] == '\0');
2872                  continue;
2873  
2874              case 'm': /* minus */
2875                  /* Allocate to the register before the previous. */
2876                  tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2877                  o = i - 1;
2878                  tcg_debug_assert(!def->args_ct[o].pair);
2879                  tcg_debug_assert(!def->args_ct[o].ct);
2880                  def->args_ct[i] = (TCGArgConstraint){
2881                      .pair = 1,
2882                      .pair_index = o,
2883                      .regs = def->args_ct[o].regs >> 1,
2884                  };
2885                  def->args_ct[o].pair = 2;
2886                  def->args_ct[o].pair_index = i;
2887                  tcg_debug_assert(ct_str[1] == '\0');
2888                  continue;
2889              }
2890  
2891              do {
2892                  switch (*ct_str) {
2893                  case 'i':
2894                      def->args_ct[i].ct |= TCG_CT_CONST;
2895                      break;
2896  
2897                  /* Include all of the target-specific constraints. */
2898  
2899  #undef CONST
2900  #define CONST(CASE, MASK) \
2901      case CASE: def->args_ct[i].ct |= MASK; break;
2902  #define REGS(CASE, MASK) \
2903      case CASE: def->args_ct[i].regs |= MASK; break;
2904  
2905  #include "tcg-target-con-str.h"
2906  
2907  #undef REGS
2908  #undef CONST
2909                  default:
2910                  case '0' ... '9':
2911                  case '&':
2912                  case 'p':
2913                  case 'm':
2914                      /* Typo in TCGTargetOpDef constraint. */
2915                      g_assert_not_reached();
2916                  }
2917              } while (*++ct_str != '\0');
2918          }
2919  
2920          /* TCGTargetOpDef entry with too much information? */
2921          tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2922  
2923          /*
2924           * Fix up output pairs that are aliased with inputs.
2925           * When we created the alias, we copied pair from the output.
2926           * There are three cases:
2927           *    (1a) Pairs of inputs alias pairs of outputs.
2928           *    (1b) One input aliases the first of a pair of outputs.
2929           *    (2)  One input aliases the second of a pair of outputs.
2930           *
2931           * Case 1a is handled by making sure that the pair_index'es are
2932           * properly updated so that they appear the same as a pair of inputs.
2933           *
2934           * Case 1b is handled by setting the pair_index of the input to
2935           * itself, simply so it doesn't point to an unrelated argument.
2936           * Since we don't encounter the "second" during the input allocation
2937           * phase, nothing happens with the second half of the input pair.
2938           *
2939           * Case 2 is handled by setting the second input to pair=3, the
2940           * first output to pair=3, and the pair_index'es to match.
2941           */
2942          if (saw_alias_pair) {
2943              for (i = def->nb_oargs; i < nb_args; i++) {
2944                  /*
2945                   * Since [0-9pm] must be alone in the constraint string,
2946                   * the only way they can both be set is if the pair comes
2947                   * from the output alias.
2948                   */
2949                  if (!def->args_ct[i].ialias) {
2950                      continue;
2951                  }
2952                  switch (def->args_ct[i].pair) {
2953                  case 0:
2954                      break;
2955                  case 1:
2956                      o = def->args_ct[i].alias_index;
2957                      o2 = def->args_ct[o].pair_index;
2958                      tcg_debug_assert(def->args_ct[o].pair == 1);
2959                      tcg_debug_assert(def->args_ct[o2].pair == 2);
2960                      if (def->args_ct[o2].oalias) {
2961                          /* Case 1a */
2962                          i2 = def->args_ct[o2].alias_index;
2963                          tcg_debug_assert(def->args_ct[i2].pair == 2);
2964                          def->args_ct[i2].pair_index = i;
2965                          def->args_ct[i].pair_index = i2;
2966                      } else {
2967                          /* Case 1b */
2968                          def->args_ct[i].pair_index = i;
2969                      }
2970                      break;
2971                  case 2:
2972                      o = def->args_ct[i].alias_index;
2973                      o2 = def->args_ct[o].pair_index;
2974                      tcg_debug_assert(def->args_ct[o].pair == 2);
2975                      tcg_debug_assert(def->args_ct[o2].pair == 1);
2976                      if (def->args_ct[o2].oalias) {
2977                          /* Case 1a */
2978                          i2 = def->args_ct[o2].alias_index;
2979                          tcg_debug_assert(def->args_ct[i2].pair == 1);
2980                          def->args_ct[i2].pair_index = i;
2981                          def->args_ct[i].pair_index = i2;
2982                      } else {
2983                          /* Case 2 */
2984                          def->args_ct[i].pair = 3;
2985                          def->args_ct[o2].pair = 3;
2986                          def->args_ct[i].pair_index = o2;
2987                          def->args_ct[o2].pair_index = i;
2988                      }
2989                      break;
2990                  default:
2991                      g_assert_not_reached();
2992                  }
2993              }
2994          }
2995  
2996          /* sort the constraints (XXX: this is just an heuristic) */
2997          sort_constraints(def, 0, def->nb_oargs);
2998          sort_constraints(def, def->nb_oargs, def->nb_iargs);
2999      }
3000  }
3001  
3002  static void remove_label_use(TCGOp *op, int idx)
3003  {
3004      TCGLabel *label = arg_label(op->args[idx]);
3005      TCGLabelUse *use;
3006  
3007      QSIMPLEQ_FOREACH(use, &label->branches, next) {
3008          if (use->op == op) {
3009              QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3010              return;
3011          }
3012      }
3013      g_assert_not_reached();
3014  }
3015  
3016  void tcg_op_remove(TCGContext *s, TCGOp *op)
3017  {
3018      switch (op->opc) {
3019      case INDEX_op_br:
3020          remove_label_use(op, 0);
3021          break;
3022      case INDEX_op_brcond_i32:
3023      case INDEX_op_brcond_i64:
3024          remove_label_use(op, 3);
3025          break;
3026      case INDEX_op_brcond2_i32:
3027          remove_label_use(op, 5);
3028          break;
3029      default:
3030          break;
3031      }
3032  
3033      QTAILQ_REMOVE(&s->ops, op, link);
3034      QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3035      s->nb_ops--;
3036  
3037  #ifdef CONFIG_PROFILER
3038      qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
3039  #endif
3040  }
3041  
3042  void tcg_remove_ops_after(TCGOp *op)
3043  {
3044      TCGContext *s = tcg_ctx;
3045  
3046      while (true) {
3047          TCGOp *last = tcg_last_op();
3048          if (last == op) {
3049              return;
3050          }
3051          tcg_op_remove(s, last);
3052      }
3053  }
3054  
3055  static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3056  {
3057      TCGContext *s = tcg_ctx;
3058      TCGOp *op = NULL;
3059  
3060      if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3061          QTAILQ_FOREACH(op, &s->free_ops, link) {
3062              if (nargs <= op->nargs) {
3063                  QTAILQ_REMOVE(&s->free_ops, op, link);
3064                  nargs = op->nargs;
3065                  goto found;
3066              }
3067          }
3068      }
3069  
3070      /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3071      nargs = MAX(4, nargs);
3072      op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3073  
3074   found:
3075      memset(op, 0, offsetof(TCGOp, link));
3076      op->opc = opc;
3077      op->nargs = nargs;
3078  
3079      /* Check for bitfield overflow. */
3080      tcg_debug_assert(op->nargs == nargs);
3081  
3082      s->nb_ops++;
3083      return op;
3084  }
3085  
3086  TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3087  {
3088      TCGOp *op = tcg_op_alloc(opc, nargs);
3089      QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3090      return op;
3091  }
3092  
3093  TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3094                              TCGOpcode opc, unsigned nargs)
3095  {
3096      TCGOp *new_op = tcg_op_alloc(opc, nargs);
3097      QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3098      return new_op;
3099  }
3100  
3101  TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3102                             TCGOpcode opc, unsigned nargs)
3103  {
3104      TCGOp *new_op = tcg_op_alloc(opc, nargs);
3105      QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3106      return new_op;
3107  }
3108  
3109  static void move_label_uses(TCGLabel *to, TCGLabel *from)
3110  {
3111      TCGLabelUse *u;
3112  
3113      QSIMPLEQ_FOREACH(u, &from->branches, next) {
3114          TCGOp *op = u->op;
3115          switch (op->opc) {
3116          case INDEX_op_br:
3117              op->args[0] = label_arg(to);
3118              break;
3119          case INDEX_op_brcond_i32:
3120          case INDEX_op_brcond_i64:
3121              op->args[3] = label_arg(to);
3122              break;
3123          case INDEX_op_brcond2_i32:
3124              op->args[5] = label_arg(to);
3125              break;
3126          default:
3127              g_assert_not_reached();
3128          }
3129      }
3130  
3131      QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3132  }
3133  
3134  /* Reachable analysis : remove unreachable code.  */
3135  static void __attribute__((noinline))
3136  reachable_code_pass(TCGContext *s)
3137  {
3138      TCGOp *op, *op_next, *op_prev;
3139      bool dead = false;
3140  
3141      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3142          bool remove = dead;
3143          TCGLabel *label;
3144  
3145          switch (op->opc) {
3146          case INDEX_op_set_label:
3147              label = arg_label(op->args[0]);
3148  
3149              /*
3150               * Note that the first op in the TB is always a load,
3151               * so there is always something before a label.
3152               */
3153              op_prev = QTAILQ_PREV(op, link);
3154  
3155              /*
3156               * If we find two sequential labels, move all branches to
3157               * reference the second label and remove the first label.
3158               * Do this before branch to next optimization, so that the
3159               * middle label is out of the way.
3160               */
3161              if (op_prev->opc == INDEX_op_set_label) {
3162                  move_label_uses(label, arg_label(op_prev->args[0]));
3163                  tcg_op_remove(s, op_prev);
3164                  op_prev = QTAILQ_PREV(op, link);
3165              }
3166  
3167              /*
3168               * Optimization can fold conditional branches to unconditional.
3169               * If we find a label which is preceded by an unconditional
3170               * branch to next, remove the branch.  We couldn't do this when
3171               * processing the branch because any dead code between the branch
3172               * and label had not yet been removed.
3173               */
3174              if (op_prev->opc == INDEX_op_br &&
3175                  label == arg_label(op_prev->args[0])) {
3176                  tcg_op_remove(s, op_prev);
3177                  /* Fall through means insns become live again.  */
3178                  dead = false;
3179              }
3180  
3181              if (QSIMPLEQ_EMPTY(&label->branches)) {
3182                  /*
3183                   * While there is an occasional backward branch, virtually
3184                   * all branches generated by the translators are forward.
3185                   * Which means that generally we will have already removed
3186                   * all references to the label that will be, and there is
3187                   * little to be gained by iterating.
3188                   */
3189                  remove = true;
3190              } else {
3191                  /* Once we see a label, insns become live again.  */
3192                  dead = false;
3193                  remove = false;
3194              }
3195              break;
3196  
3197          case INDEX_op_br:
3198          case INDEX_op_exit_tb:
3199          case INDEX_op_goto_ptr:
3200              /* Unconditional branches; everything following is dead.  */
3201              dead = true;
3202              break;
3203  
3204          case INDEX_op_call:
3205              /* Notice noreturn helper calls, raising exceptions.  */
3206              if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3207                  dead = true;
3208              }
3209              break;
3210  
3211          case INDEX_op_insn_start:
3212              /* Never remove -- we need to keep these for unwind.  */
3213              remove = false;
3214              break;
3215  
3216          default:
3217              break;
3218          }
3219  
3220          if (remove) {
3221              tcg_op_remove(s, op);
3222          }
3223      }
3224  }
3225  
3226  #define TS_DEAD  1
3227  #define TS_MEM   2
3228  
3229  #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3230  #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3231  
3232  /* For liveness_pass_1, the register preferences for a given temp.  */
3233  static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3234  {
3235      return ts->state_ptr;
3236  }
3237  
3238  /* For liveness_pass_1, reset the preferences for a given temp to the
3239   * maximal regset for its type.
3240   */
3241  static inline void la_reset_pref(TCGTemp *ts)
3242  {
3243      *la_temp_pref(ts)
3244          = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3245  }
3246  
3247  /* liveness analysis: end of function: all temps are dead, and globals
3248     should be in memory. */
3249  static void la_func_end(TCGContext *s, int ng, int nt)
3250  {
3251      int i;
3252  
3253      for (i = 0; i < ng; ++i) {
3254          s->temps[i].state = TS_DEAD | TS_MEM;
3255          la_reset_pref(&s->temps[i]);
3256      }
3257      for (i = ng; i < nt; ++i) {
3258          s->temps[i].state = TS_DEAD;
3259          la_reset_pref(&s->temps[i]);
3260      }
3261  }
3262  
3263  /* liveness analysis: end of basic block: all temps are dead, globals
3264     and local temps should be in memory. */
3265  static void la_bb_end(TCGContext *s, int ng, int nt)
3266  {
3267      int i;
3268  
3269      for (i = 0; i < nt; ++i) {
3270          TCGTemp *ts = &s->temps[i];
3271          int state;
3272  
3273          switch (ts->kind) {
3274          case TEMP_FIXED:
3275          case TEMP_GLOBAL:
3276          case TEMP_TB:
3277              state = TS_DEAD | TS_MEM;
3278              break;
3279          case TEMP_EBB:
3280          case TEMP_CONST:
3281              state = TS_DEAD;
3282              break;
3283          default:
3284              g_assert_not_reached();
3285          }
3286          ts->state = state;
3287          la_reset_pref(ts);
3288      }
3289  }
3290  
3291  /* liveness analysis: sync globals back to memory.  */
3292  static void la_global_sync(TCGContext *s, int ng)
3293  {
3294      int i;
3295  
3296      for (i = 0; i < ng; ++i) {
3297          int state = s->temps[i].state;
3298          s->temps[i].state = state | TS_MEM;
3299          if (state == TS_DEAD) {
3300              /* If the global was previously dead, reset prefs.  */
3301              la_reset_pref(&s->temps[i]);
3302          }
3303      }
3304  }
3305  
3306  /*
3307   * liveness analysis: conditional branch: all temps are dead unless
3308   * explicitly live-across-conditional-branch, globals and local temps
3309   * should be synced.
3310   */
3311  static void la_bb_sync(TCGContext *s, int ng, int nt)
3312  {
3313      la_global_sync(s, ng);
3314  
3315      for (int i = ng; i < nt; ++i) {
3316          TCGTemp *ts = &s->temps[i];
3317          int state;
3318  
3319          switch (ts->kind) {
3320          case TEMP_TB:
3321              state = ts->state;
3322              ts->state = state | TS_MEM;
3323              if (state != TS_DEAD) {
3324                  continue;
3325              }
3326              break;
3327          case TEMP_EBB:
3328          case TEMP_CONST:
3329              continue;
3330          default:
3331              g_assert_not_reached();
3332          }
3333          la_reset_pref(&s->temps[i]);
3334      }
3335  }
3336  
3337  /* liveness analysis: sync globals back to memory and kill.  */
3338  static void la_global_kill(TCGContext *s, int ng)
3339  {
3340      int i;
3341  
3342      for (i = 0; i < ng; i++) {
3343          s->temps[i].state = TS_DEAD | TS_MEM;
3344          la_reset_pref(&s->temps[i]);
3345      }
3346  }
3347  
3348  /* liveness analysis: note live globals crossing calls.  */
3349  static void la_cross_call(TCGContext *s, int nt)
3350  {
3351      TCGRegSet mask = ~tcg_target_call_clobber_regs;
3352      int i;
3353  
3354      for (i = 0; i < nt; i++) {
3355          TCGTemp *ts = &s->temps[i];
3356          if (!(ts->state & TS_DEAD)) {
3357              TCGRegSet *pset = la_temp_pref(ts);
3358              TCGRegSet set = *pset;
3359  
3360              set &= mask;
3361              /* If the combination is not possible, restart.  */
3362              if (set == 0) {
3363                  set = tcg_target_available_regs[ts->type] & mask;
3364              }
3365              *pset = set;
3366          }
3367      }
3368  }
3369  
3370  /*
3371   * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3372   * to TEMP_EBB, if possible.
3373   */
3374  static void __attribute__((noinline))
3375  liveness_pass_0(TCGContext *s)
3376  {
3377      void * const multiple_ebb = (void *)(uintptr_t)-1;
3378      int nb_temps = s->nb_temps;
3379      TCGOp *op, *ebb;
3380  
3381      for (int i = s->nb_globals; i < nb_temps; ++i) {
3382          s->temps[i].state_ptr = NULL;
3383      }
3384  
3385      /*
3386       * Represent each EBB by the op at which it begins.  In the case of
3387       * the first EBB, this is the first op, otherwise it is a label.
3388       * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3389       * within a single EBB, else MULTIPLE_EBB.
3390       */
3391      ebb = QTAILQ_FIRST(&s->ops);
3392      QTAILQ_FOREACH(op, &s->ops, link) {
3393          const TCGOpDef *def;
3394          int nb_oargs, nb_iargs;
3395  
3396          switch (op->opc) {
3397          case INDEX_op_set_label:
3398              ebb = op;
3399              continue;
3400          case INDEX_op_discard:
3401              continue;
3402          case INDEX_op_call:
3403              nb_oargs = TCGOP_CALLO(op);
3404              nb_iargs = TCGOP_CALLI(op);
3405              break;
3406          default:
3407              def = &tcg_op_defs[op->opc];
3408              nb_oargs = def->nb_oargs;
3409              nb_iargs = def->nb_iargs;
3410              break;
3411          }
3412  
3413          for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3414              TCGTemp *ts = arg_temp(op->args[i]);
3415  
3416              if (ts->kind != TEMP_TB) {
3417                  continue;
3418              }
3419              if (ts->state_ptr == NULL) {
3420                  ts->state_ptr = ebb;
3421              } else if (ts->state_ptr != ebb) {
3422                  ts->state_ptr = multiple_ebb;
3423              }
3424          }
3425      }
3426  
3427      /*
3428       * For TEMP_TB that turned out not to be used beyond one EBB,
3429       * reduce the liveness to TEMP_EBB.
3430       */
3431      for (int i = s->nb_globals; i < nb_temps; ++i) {
3432          TCGTemp *ts = &s->temps[i];
3433          if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3434              ts->kind = TEMP_EBB;
3435          }
3436      }
3437  }
3438  
3439  /* Liveness analysis : update the opc_arg_life array to tell if a
3440     given input arguments is dead. Instructions updating dead
3441     temporaries are removed. */
3442  static void __attribute__((noinline))
3443  liveness_pass_1(TCGContext *s)
3444  {
3445      int nb_globals = s->nb_globals;
3446      int nb_temps = s->nb_temps;
3447      TCGOp *op, *op_prev;
3448      TCGRegSet *prefs;
3449      int i;
3450  
3451      prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3452      for (i = 0; i < nb_temps; ++i) {
3453          s->temps[i].state_ptr = prefs + i;
3454      }
3455  
3456      /* ??? Should be redundant with the exit_tb that ends the TB.  */
3457      la_func_end(s, nb_globals, nb_temps);
3458  
3459      QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3460          int nb_iargs, nb_oargs;
3461          TCGOpcode opc_new, opc_new2;
3462          bool have_opc_new2;
3463          TCGLifeData arg_life = 0;
3464          TCGTemp *ts;
3465          TCGOpcode opc = op->opc;
3466          const TCGOpDef *def = &tcg_op_defs[opc];
3467  
3468          switch (opc) {
3469          case INDEX_op_call:
3470              {
3471                  const TCGHelperInfo *info = tcg_call_info(op);
3472                  int call_flags = tcg_call_flags(op);
3473  
3474                  nb_oargs = TCGOP_CALLO(op);
3475                  nb_iargs = TCGOP_CALLI(op);
3476  
3477                  /* pure functions can be removed if their result is unused */
3478                  if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3479                      for (i = 0; i < nb_oargs; i++) {
3480                          ts = arg_temp(op->args[i]);
3481                          if (ts->state != TS_DEAD) {
3482                              goto do_not_remove_call;
3483                          }
3484                      }
3485                      goto do_remove;
3486                  }
3487              do_not_remove_call:
3488  
3489                  /* Output args are dead.  */
3490                  for (i = 0; i < nb_oargs; i++) {
3491                      ts = arg_temp(op->args[i]);
3492                      if (ts->state & TS_DEAD) {
3493                          arg_life |= DEAD_ARG << i;
3494                      }
3495                      if (ts->state & TS_MEM) {
3496                          arg_life |= SYNC_ARG << i;
3497                      }
3498                      ts->state = TS_DEAD;
3499                      la_reset_pref(ts);
3500                  }
3501  
3502                  /* Not used -- it will be tcg_target_call_oarg_reg().  */
3503                  memset(op->output_pref, 0, sizeof(op->output_pref));
3504  
3505                  if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3506                                      TCG_CALL_NO_READ_GLOBALS))) {
3507                      la_global_kill(s, nb_globals);
3508                  } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3509                      la_global_sync(s, nb_globals);
3510                  }
3511  
3512                  /* Record arguments that die in this helper.  */
3513                  for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3514                      ts = arg_temp(op->args[i]);
3515                      if (ts->state & TS_DEAD) {
3516                          arg_life |= DEAD_ARG << i;
3517                      }
3518                  }
3519  
3520                  /* For all live registers, remove call-clobbered prefs.  */
3521                  la_cross_call(s, nb_temps);
3522  
3523                  /*
3524                   * Input arguments are live for preceding opcodes.
3525                   *
3526                   * For those arguments that die, and will be allocated in
3527                   * registers, clear the register set for that arg, to be
3528                   * filled in below.  For args that will be on the stack,
3529                   * reset to any available reg.  Process arguments in reverse
3530                   * order so that if a temp is used more than once, the stack
3531                   * reset to max happens before the register reset to 0.
3532                   */
3533                  for (i = nb_iargs - 1; i >= 0; i--) {
3534                      const TCGCallArgumentLoc *loc = &info->in[i];
3535                      ts = arg_temp(op->args[nb_oargs + i]);
3536  
3537                      if (ts->state & TS_DEAD) {
3538                          switch (loc->kind) {
3539                          case TCG_CALL_ARG_NORMAL:
3540                          case TCG_CALL_ARG_EXTEND_U:
3541                          case TCG_CALL_ARG_EXTEND_S:
3542                              if (arg_slot_reg_p(loc->arg_slot)) {
3543                                  *la_temp_pref(ts) = 0;
3544                                  break;
3545                              }
3546                              /* fall through */
3547                          default:
3548                              *la_temp_pref(ts) =
3549                                  tcg_target_available_regs[ts->type];
3550                              break;
3551                          }
3552                          ts->state &= ~TS_DEAD;
3553                      }
3554                  }
3555  
3556                  /*
3557                   * For each input argument, add its input register to prefs.
3558                   * If a temp is used once, this produces a single set bit;
3559                   * if a temp is used multiple times, this produces a set.
3560                   */
3561                  for (i = 0; i < nb_iargs; i++) {
3562                      const TCGCallArgumentLoc *loc = &info->in[i];
3563                      ts = arg_temp(op->args[nb_oargs + i]);
3564  
3565                      switch (loc->kind) {
3566                      case TCG_CALL_ARG_NORMAL:
3567                      case TCG_CALL_ARG_EXTEND_U:
3568                      case TCG_CALL_ARG_EXTEND_S:
3569                          if (arg_slot_reg_p(loc->arg_slot)) {
3570                              tcg_regset_set_reg(*la_temp_pref(ts),
3571                                  tcg_target_call_iarg_regs[loc->arg_slot]);
3572                          }
3573                          break;
3574                      default:
3575                          break;
3576                      }
3577                  }
3578              }
3579              break;
3580          case INDEX_op_insn_start:
3581              break;
3582          case INDEX_op_discard:
3583              /* mark the temporary as dead */
3584              ts = arg_temp(op->args[0]);
3585              ts->state = TS_DEAD;
3586              la_reset_pref(ts);
3587              break;
3588  
3589          case INDEX_op_add2_i32:
3590              opc_new = INDEX_op_add_i32;
3591              goto do_addsub2;
3592          case INDEX_op_sub2_i32:
3593              opc_new = INDEX_op_sub_i32;
3594              goto do_addsub2;
3595          case INDEX_op_add2_i64:
3596              opc_new = INDEX_op_add_i64;
3597              goto do_addsub2;
3598          case INDEX_op_sub2_i64:
3599              opc_new = INDEX_op_sub_i64;
3600          do_addsub2:
3601              nb_iargs = 4;
3602              nb_oargs = 2;
3603              /* Test if the high part of the operation is dead, but not
3604                 the low part.  The result can be optimized to a simple
3605                 add or sub.  This happens often for x86_64 guest when the
3606                 cpu mode is set to 32 bit.  */
3607              if (arg_temp(op->args[1])->state == TS_DEAD) {
3608                  if (arg_temp(op->args[0])->state == TS_DEAD) {
3609                      goto do_remove;
3610                  }
3611                  /* Replace the opcode and adjust the args in place,
3612                     leaving 3 unused args at the end.  */
3613                  op->opc = opc = opc_new;
3614                  op->args[1] = op->args[2];
3615                  op->args[2] = op->args[4];
3616                  /* Fall through and mark the single-word operation live.  */
3617                  nb_iargs = 2;
3618                  nb_oargs = 1;
3619              }
3620              goto do_not_remove;
3621  
3622          case INDEX_op_mulu2_i32:
3623              opc_new = INDEX_op_mul_i32;
3624              opc_new2 = INDEX_op_muluh_i32;
3625              have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3626              goto do_mul2;
3627          case INDEX_op_muls2_i32:
3628              opc_new = INDEX_op_mul_i32;
3629              opc_new2 = INDEX_op_mulsh_i32;
3630              have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3631              goto do_mul2;
3632          case INDEX_op_mulu2_i64:
3633              opc_new = INDEX_op_mul_i64;
3634              opc_new2 = INDEX_op_muluh_i64;
3635              have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3636              goto do_mul2;
3637          case INDEX_op_muls2_i64:
3638              opc_new = INDEX_op_mul_i64;
3639              opc_new2 = INDEX_op_mulsh_i64;
3640              have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3641              goto do_mul2;
3642          do_mul2:
3643              nb_iargs = 2;
3644              nb_oargs = 2;
3645              if (arg_temp(op->args[1])->state == TS_DEAD) {
3646                  if (arg_temp(op->args[0])->state == TS_DEAD) {
3647                      /* Both parts of the operation are dead.  */
3648                      goto do_remove;
3649                  }
3650                  /* The high part of the operation is dead; generate the low. */
3651                  op->opc = opc = opc_new;
3652                  op->args[1] = op->args[2];
3653                  op->args[2] = op->args[3];
3654              } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3655                  /* The low part of the operation is dead; generate the high. */
3656                  op->opc = opc = opc_new2;
3657                  op->args[0] = op->args[1];
3658                  op->args[1] = op->args[2];
3659                  op->args[2] = op->args[3];
3660              } else {
3661                  goto do_not_remove;
3662              }
3663              /* Mark the single-word operation live.  */
3664              nb_oargs = 1;
3665              goto do_not_remove;
3666  
3667          default:
3668              /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3669              nb_iargs = def->nb_iargs;
3670              nb_oargs = def->nb_oargs;
3671  
3672              /* Test if the operation can be removed because all
3673                 its outputs are dead. We assume that nb_oargs == 0
3674                 implies side effects */
3675              if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3676                  for (i = 0; i < nb_oargs; i++) {
3677                      if (arg_temp(op->args[i])->state != TS_DEAD) {
3678                          goto do_not_remove;
3679                      }
3680                  }
3681                  goto do_remove;
3682              }
3683              goto do_not_remove;
3684  
3685          do_remove:
3686              tcg_op_remove(s, op);
3687              break;
3688  
3689          do_not_remove:
3690              for (i = 0; i < nb_oargs; i++) {
3691                  ts = arg_temp(op->args[i]);
3692  
3693                  /* Remember the preference of the uses that followed.  */
3694                  if (i < ARRAY_SIZE(op->output_pref)) {
3695                      op->output_pref[i] = *la_temp_pref(ts);
3696                  }
3697  
3698                  /* Output args are dead.  */
3699                  if (ts->state & TS_DEAD) {
3700                      arg_life |= DEAD_ARG << i;
3701                  }
3702                  if (ts->state & TS_MEM) {
3703                      arg_life |= SYNC_ARG << i;
3704                  }
3705                  ts->state = TS_DEAD;
3706                  la_reset_pref(ts);
3707              }
3708  
3709              /* If end of basic block, update.  */
3710              if (def->flags & TCG_OPF_BB_EXIT) {
3711                  la_func_end(s, nb_globals, nb_temps);
3712              } else if (def->flags & TCG_OPF_COND_BRANCH) {
3713                  la_bb_sync(s, nb_globals, nb_temps);
3714              } else if (def->flags & TCG_OPF_BB_END) {
3715                  la_bb_end(s, nb_globals, nb_temps);
3716              } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3717                  la_global_sync(s, nb_globals);
3718                  if (def->flags & TCG_OPF_CALL_CLOBBER) {
3719                      la_cross_call(s, nb_temps);
3720                  }
3721              }
3722  
3723              /* Record arguments that die in this opcode.  */
3724              for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3725                  ts = arg_temp(op->args[i]);
3726                  if (ts->state & TS_DEAD) {
3727                      arg_life |= DEAD_ARG << i;
3728                  }
3729              }
3730  
3731              /* Input arguments are live for preceding opcodes.  */
3732              for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3733                  ts = arg_temp(op->args[i]);
3734                  if (ts->state & TS_DEAD) {
3735                      /* For operands that were dead, initially allow
3736                         all regs for the type.  */
3737                      *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3738                      ts->state &= ~TS_DEAD;
3739                  }
3740              }
3741  
3742              /* Incorporate constraints for this operand.  */
3743              switch (opc) {
3744              case INDEX_op_mov_i32:
3745              case INDEX_op_mov_i64:
3746                  /* Note that these are TCG_OPF_NOT_PRESENT and do not
3747                     have proper constraints.  That said, special case
3748                     moves to propagate preferences backward.  */
3749                  if (IS_DEAD_ARG(1)) {
3750                      *la_temp_pref(arg_temp(op->args[0]))
3751                          = *la_temp_pref(arg_temp(op->args[1]));
3752                  }
3753                  break;
3754  
3755              default:
3756                  for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3757                      const TCGArgConstraint *ct = &def->args_ct[i];
3758                      TCGRegSet set, *pset;
3759  
3760                      ts = arg_temp(op->args[i]);
3761                      pset = la_temp_pref(ts);
3762                      set = *pset;
3763  
3764                      set &= ct->regs;
3765                      if (ct->ialias) {
3766                          set &= output_pref(op, ct->alias_index);
3767                      }
3768                      /* If the combination is not possible, restart.  */
3769                      if (set == 0) {
3770                          set = ct->regs;
3771                      }
3772                      *pset = set;
3773                  }
3774                  break;
3775              }
3776              break;
3777          }
3778          op->life = arg_life;
3779      }
3780  }
3781  
3782  /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3783  static bool __attribute__((noinline))
3784  liveness_pass_2(TCGContext *s)
3785  {
3786      int nb_globals = s->nb_globals;
3787      int nb_temps, i;
3788      bool changes = false;
3789      TCGOp *op, *op_next;
3790  
3791      /* Create a temporary for each indirect global.  */
3792      for (i = 0; i < nb_globals; ++i) {
3793          TCGTemp *its = &s->temps[i];
3794          if (its->indirect_reg) {
3795              TCGTemp *dts = tcg_temp_alloc(s);
3796              dts->type = its->type;
3797              dts->base_type = its->base_type;
3798              dts->temp_subindex = its->temp_subindex;
3799              dts->kind = TEMP_EBB;
3800              its->state_ptr = dts;
3801          } else {
3802              its->state_ptr = NULL;
3803          }
3804          /* All globals begin dead.  */
3805          its->state = TS_DEAD;
3806      }
3807      for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3808          TCGTemp *its = &s->temps[i];
3809          its->state_ptr = NULL;
3810          its->state = TS_DEAD;
3811      }
3812  
3813      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3814          TCGOpcode opc = op->opc;
3815          const TCGOpDef *def = &tcg_op_defs[opc];
3816          TCGLifeData arg_life = op->life;
3817          int nb_iargs, nb_oargs, call_flags;
3818          TCGTemp *arg_ts, *dir_ts;
3819  
3820          if (opc == INDEX_op_call) {
3821              nb_oargs = TCGOP_CALLO(op);
3822              nb_iargs = TCGOP_CALLI(op);
3823              call_flags = tcg_call_flags(op);
3824          } else {
3825              nb_iargs = def->nb_iargs;
3826              nb_oargs = def->nb_oargs;
3827  
3828              /* Set flags similar to how calls require.  */
3829              if (def->flags & TCG_OPF_COND_BRANCH) {
3830                  /* Like reading globals: sync_globals */
3831                  call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3832              } else if (def->flags & TCG_OPF_BB_END) {
3833                  /* Like writing globals: save_globals */
3834                  call_flags = 0;
3835              } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3836                  /* Like reading globals: sync_globals */
3837                  call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3838              } else {
3839                  /* No effect on globals.  */
3840                  call_flags = (TCG_CALL_NO_READ_GLOBALS |
3841                                TCG_CALL_NO_WRITE_GLOBALS);
3842              }
3843          }
3844  
3845          /* Make sure that input arguments are available.  */
3846          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3847              arg_ts = arg_temp(op->args[i]);
3848              dir_ts = arg_ts->state_ptr;
3849              if (dir_ts && arg_ts->state == TS_DEAD) {
3850                  TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3851                                    ? INDEX_op_ld_i32
3852                                    : INDEX_op_ld_i64);
3853                  TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3854  
3855                  lop->args[0] = temp_arg(dir_ts);
3856                  lop->args[1] = temp_arg(arg_ts->mem_base);
3857                  lop->args[2] = arg_ts->mem_offset;
3858  
3859                  /* Loaded, but synced with memory.  */
3860                  arg_ts->state = TS_MEM;
3861              }
3862          }
3863  
3864          /* Perform input replacement, and mark inputs that became dead.
3865             No action is required except keeping temp_state up to date
3866             so that we reload when needed.  */
3867          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3868              arg_ts = arg_temp(op->args[i]);
3869              dir_ts = arg_ts->state_ptr;
3870              if (dir_ts) {
3871                  op->args[i] = temp_arg(dir_ts);
3872                  changes = true;
3873                  if (IS_DEAD_ARG(i)) {
3874                      arg_ts->state = TS_DEAD;
3875                  }
3876              }
3877          }
3878  
3879          /* Liveness analysis should ensure that the following are
3880             all correct, for call sites and basic block end points.  */
3881          if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3882              /* Nothing to do */
3883          } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3884              for (i = 0; i < nb_globals; ++i) {
3885                  /* Liveness should see that globals are synced back,
3886                     that is, either TS_DEAD or TS_MEM.  */
3887                  arg_ts = &s->temps[i];
3888                  tcg_debug_assert(arg_ts->state_ptr == 0
3889                                   || arg_ts->state != 0);
3890              }
3891          } else {
3892              for (i = 0; i < nb_globals; ++i) {
3893                  /* Liveness should see that globals are saved back,
3894                     that is, TS_DEAD, waiting to be reloaded.  */
3895                  arg_ts = &s->temps[i];
3896                  tcg_debug_assert(arg_ts->state_ptr == 0
3897                                   || arg_ts->state == TS_DEAD);
3898              }
3899          }
3900  
3901          /* Outputs become available.  */
3902          if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3903              arg_ts = arg_temp(op->args[0]);
3904              dir_ts = arg_ts->state_ptr;
3905              if (dir_ts) {
3906                  op->args[0] = temp_arg(dir_ts);
3907                  changes = true;
3908  
3909                  /* The output is now live and modified.  */
3910                  arg_ts->state = 0;
3911  
3912                  if (NEED_SYNC_ARG(0)) {
3913                      TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3914                                        ? INDEX_op_st_i32
3915                                        : INDEX_op_st_i64);
3916                      TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3917                      TCGTemp *out_ts = dir_ts;
3918  
3919                      if (IS_DEAD_ARG(0)) {
3920                          out_ts = arg_temp(op->args[1]);
3921                          arg_ts->state = TS_DEAD;
3922                          tcg_op_remove(s, op);
3923                      } else {
3924                          arg_ts->state = TS_MEM;
3925                      }
3926  
3927                      sop->args[0] = temp_arg(out_ts);
3928                      sop->args[1] = temp_arg(arg_ts->mem_base);
3929                      sop->args[2] = arg_ts->mem_offset;
3930                  } else {
3931                      tcg_debug_assert(!IS_DEAD_ARG(0));
3932                  }
3933              }
3934          } else {
3935              for (i = 0; i < nb_oargs; i++) {
3936                  arg_ts = arg_temp(op->args[i]);
3937                  dir_ts = arg_ts->state_ptr;
3938                  if (!dir_ts) {
3939                      continue;
3940                  }
3941                  op->args[i] = temp_arg(dir_ts);
3942                  changes = true;
3943  
3944                  /* The output is now live and modified.  */
3945                  arg_ts->state = 0;
3946  
3947                  /* Sync outputs upon their last write.  */
3948                  if (NEED_SYNC_ARG(i)) {
3949                      TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3950                                        ? INDEX_op_st_i32
3951                                        : INDEX_op_st_i64);
3952                      TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3953  
3954                      sop->args[0] = temp_arg(dir_ts);
3955                      sop->args[1] = temp_arg(arg_ts->mem_base);
3956                      sop->args[2] = arg_ts->mem_offset;
3957  
3958                      arg_ts->state = TS_MEM;
3959                  }
3960                  /* Drop outputs that are dead.  */
3961                  if (IS_DEAD_ARG(i)) {
3962                      arg_ts->state = TS_DEAD;
3963                  }
3964              }
3965          }
3966      }
3967  
3968      return changes;
3969  }
3970  
3971  static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3972  {
3973      intptr_t off;
3974      int size, align;
3975  
3976      /* When allocating an object, look at the full type. */
3977      size = tcg_type_size(ts->base_type);
3978      switch (ts->base_type) {
3979      case TCG_TYPE_I32:
3980          align = 4;
3981          break;
3982      case TCG_TYPE_I64:
3983      case TCG_TYPE_V64:
3984          align = 8;
3985          break;
3986      case TCG_TYPE_I128:
3987      case TCG_TYPE_V128:
3988      case TCG_TYPE_V256:
3989          /*
3990           * Note that we do not require aligned storage for V256,
3991           * and that we provide alignment for I128 to match V128,
3992           * even if that's above what the host ABI requires.
3993           */
3994          align = 16;
3995          break;
3996      default:
3997          g_assert_not_reached();
3998      }
3999  
4000      /*
4001       * Assume the stack is sufficiently aligned.
4002       * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4003       * and do not require 16 byte vector alignment.  This seems slightly
4004       * easier than fully parameterizing the above switch statement.
4005       */
4006      align = MIN(TCG_TARGET_STACK_ALIGN, align);
4007      off = ROUND_UP(s->current_frame_offset, align);
4008  
4009      /* If we've exhausted the stack frame, restart with a smaller TB. */
4010      if (off + size > s->frame_end) {
4011          tcg_raise_tb_overflow(s);
4012      }
4013      s->current_frame_offset = off + size;
4014  #if defined(__sparc__)
4015      off += TCG_TARGET_STACK_BIAS;
4016  #endif
4017  
4018      /* If the object was subdivided, assign memory to all the parts. */
4019      if (ts->base_type != ts->type) {
4020          int part_size = tcg_type_size(ts->type);
4021          int part_count = size / part_size;
4022  
4023          /*
4024           * Each part is allocated sequentially in tcg_temp_new_internal.
4025           * Jump back to the first part by subtracting the current index.
4026           */
4027          ts -= ts->temp_subindex;
4028          for (int i = 0; i < part_count; ++i) {
4029              ts[i].mem_offset = off + i * part_size;
4030              ts[i].mem_base = s->frame_temp;
4031              ts[i].mem_allocated = 1;
4032          }
4033      } else {
4034          ts->mem_offset = off;
4035          ts->mem_base = s->frame_temp;
4036          ts->mem_allocated = 1;
4037      }
4038  }
4039  
4040  /* Assign @reg to @ts, and update reg_to_temp[]. */
4041  static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4042  {
4043      if (ts->val_type == TEMP_VAL_REG) {
4044          TCGReg old = ts->reg;
4045          tcg_debug_assert(s->reg_to_temp[old] == ts);
4046          if (old == reg) {
4047              return;
4048          }
4049          s->reg_to_temp[old] = NULL;
4050      }
4051      tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4052      s->reg_to_temp[reg] = ts;
4053      ts->val_type = TEMP_VAL_REG;
4054      ts->reg = reg;
4055  }
4056  
4057  /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4058  static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4059  {
4060      tcg_debug_assert(type != TEMP_VAL_REG);
4061      if (ts->val_type == TEMP_VAL_REG) {
4062          TCGReg reg = ts->reg;
4063          tcg_debug_assert(s->reg_to_temp[reg] == ts);
4064          s->reg_to_temp[reg] = NULL;
4065      }
4066      ts->val_type = type;
4067  }
4068  
4069  static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4070  
4071  /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4072     mark it free; otherwise mark it dead.  */
4073  static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4074  {
4075      TCGTempVal new_type;
4076  
4077      switch (ts->kind) {
4078      case TEMP_FIXED:
4079          return;
4080      case TEMP_GLOBAL:
4081      case TEMP_TB:
4082          new_type = TEMP_VAL_MEM;
4083          break;
4084      case TEMP_EBB:
4085          new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4086          break;
4087      case TEMP_CONST:
4088          new_type = TEMP_VAL_CONST;
4089          break;
4090      default:
4091          g_assert_not_reached();
4092      }
4093      set_temp_val_nonreg(s, ts, new_type);
4094  }
4095  
4096  /* Mark a temporary as dead.  */
4097  static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4098  {
4099      temp_free_or_dead(s, ts, 1);
4100  }
4101  
4102  /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4103     registers needs to be allocated to store a constant.  If 'free_or_dead'
4104     is non-zero, subsequently release the temporary; if it is positive, the
4105     temp is dead; if it is negative, the temp is free.  */
4106  static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4107                        TCGRegSet preferred_regs, int free_or_dead)
4108  {
4109      if (!temp_readonly(ts) && !ts->mem_coherent) {
4110          if (!ts->mem_allocated) {
4111              temp_allocate_frame(s, ts);
4112          }
4113          switch (ts->val_type) {
4114          case TEMP_VAL_CONST:
4115              /* If we're going to free the temp immediately, then we won't
4116                 require it later in a register, so attempt to store the
4117                 constant to memory directly.  */
4118              if (free_or_dead
4119                  && tcg_out_sti(s, ts->type, ts->val,
4120                                 ts->mem_base->reg, ts->mem_offset)) {
4121                  break;
4122              }
4123              temp_load(s, ts, tcg_target_available_regs[ts->type],
4124                        allocated_regs, preferred_regs);
4125              /* fallthrough */
4126  
4127          case TEMP_VAL_REG:
4128              tcg_out_st(s, ts->type, ts->reg,
4129                         ts->mem_base->reg, ts->mem_offset);
4130              break;
4131  
4132          case TEMP_VAL_MEM:
4133              break;
4134  
4135          case TEMP_VAL_DEAD:
4136          default:
4137              g_assert_not_reached();
4138          }
4139          ts->mem_coherent = 1;
4140      }
4141      if (free_or_dead) {
4142          temp_free_or_dead(s, ts, free_or_dead);
4143      }
4144  }
4145  
4146  /* free register 'reg' by spilling the corresponding temporary if necessary */
4147  static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4148  {
4149      TCGTemp *ts = s->reg_to_temp[reg];
4150      if (ts != NULL) {
4151          temp_sync(s, ts, allocated_regs, 0, -1);
4152      }
4153  }
4154  
4155  /**
4156   * tcg_reg_alloc:
4157   * @required_regs: Set of registers in which we must allocate.
4158   * @allocated_regs: Set of registers which must be avoided.
4159   * @preferred_regs: Set of registers we should prefer.
4160   * @rev: True if we search the registers in "indirect" order.
4161   *
4162   * The allocated register must be in @required_regs & ~@allocated_regs,
4163   * but if we can put it in @preferred_regs we may save a move later.
4164   */
4165  static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4166                              TCGRegSet allocated_regs,
4167                              TCGRegSet preferred_regs, bool rev)
4168  {
4169      int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4170      TCGRegSet reg_ct[2];
4171      const int *order;
4172  
4173      reg_ct[1] = required_regs & ~allocated_regs;
4174      tcg_debug_assert(reg_ct[1] != 0);
4175      reg_ct[0] = reg_ct[1] & preferred_regs;
4176  
4177      /* Skip the preferred_regs option if it cannot be satisfied,
4178         or if the preference made no difference.  */
4179      f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4180  
4181      order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4182  
4183      /* Try free registers, preferences first.  */
4184      for (j = f; j < 2; j++) {
4185          TCGRegSet set = reg_ct[j];
4186  
4187          if (tcg_regset_single(set)) {
4188              /* One register in the set.  */
4189              TCGReg reg = tcg_regset_first(set);
4190              if (s->reg_to_temp[reg] == NULL) {
4191                  return reg;
4192              }
4193          } else {
4194              for (i = 0; i < n; i++) {
4195                  TCGReg reg = order[i];
4196                  if (s->reg_to_temp[reg] == NULL &&
4197                      tcg_regset_test_reg(set, reg)) {
4198                      return reg;
4199                  }
4200              }
4201          }
4202      }
4203  
4204      /* We must spill something.  */
4205      for (j = f; j < 2; j++) {
4206          TCGRegSet set = reg_ct[j];
4207  
4208          if (tcg_regset_single(set)) {
4209              /* One register in the set.  */
4210              TCGReg reg = tcg_regset_first(set);
4211              tcg_reg_free(s, reg, allocated_regs);
4212              return reg;
4213          } else {
4214              for (i = 0; i < n; i++) {
4215                  TCGReg reg = order[i];
4216                  if (tcg_regset_test_reg(set, reg)) {
4217                      tcg_reg_free(s, reg, allocated_regs);
4218                      return reg;
4219                  }
4220              }
4221          }
4222      }
4223  
4224      g_assert_not_reached();
4225  }
4226  
4227  static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4228                                   TCGRegSet allocated_regs,
4229                                   TCGRegSet preferred_regs, bool rev)
4230  {
4231      int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4232      TCGRegSet reg_ct[2];
4233      const int *order;
4234  
4235      /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4236      reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4237      tcg_debug_assert(reg_ct[1] != 0);
4238      reg_ct[0] = reg_ct[1] & preferred_regs;
4239  
4240      order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4241  
4242      /*
4243       * Skip the preferred_regs option if it cannot be satisfied,
4244       * or if the preference made no difference.
4245       */
4246      k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4247  
4248      /*
4249       * Minimize the number of flushes by looking for 2 free registers first,
4250       * then a single flush, then two flushes.
4251       */
4252      for (fmin = 2; fmin >= 0; fmin--) {
4253          for (j = k; j < 2; j++) {
4254              TCGRegSet set = reg_ct[j];
4255  
4256              for (i = 0; i < n; i++) {
4257                  TCGReg reg = order[i];
4258  
4259                  if (tcg_regset_test_reg(set, reg)) {
4260                      int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4261                      if (f >= fmin) {
4262                          tcg_reg_free(s, reg, allocated_regs);
4263                          tcg_reg_free(s, reg + 1, allocated_regs);
4264                          return reg;
4265                      }
4266                  }
4267              }
4268          }
4269      }
4270      g_assert_not_reached();
4271  }
4272  
4273  /* Make sure the temporary is in a register.  If needed, allocate the register
4274     from DESIRED while avoiding ALLOCATED.  */
4275  static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4276                        TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4277  {
4278      TCGReg reg;
4279  
4280      switch (ts->val_type) {
4281      case TEMP_VAL_REG:
4282          return;
4283      case TEMP_VAL_CONST:
4284          reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4285                              preferred_regs, ts->indirect_base);
4286          if (ts->type <= TCG_TYPE_I64) {
4287              tcg_out_movi(s, ts->type, reg, ts->val);
4288          } else {
4289              uint64_t val = ts->val;
4290              MemOp vece = MO_64;
4291  
4292              /*
4293               * Find the minimal vector element that matches the constant.
4294               * The targets will, in general, have to do this search anyway,
4295               * do this generically.
4296               */
4297              if (val == dup_const(MO_8, val)) {
4298                  vece = MO_8;
4299              } else if (val == dup_const(MO_16, val)) {
4300                  vece = MO_16;
4301              } else if (val == dup_const(MO_32, val)) {
4302                  vece = MO_32;
4303              }
4304  
4305              tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4306          }
4307          ts->mem_coherent = 0;
4308          break;
4309      case TEMP_VAL_MEM:
4310          reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4311                              preferred_regs, ts->indirect_base);
4312          tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4313          ts->mem_coherent = 1;
4314          break;
4315      case TEMP_VAL_DEAD:
4316      default:
4317          g_assert_not_reached();
4318      }
4319      set_temp_val_reg(s, ts, reg);
4320  }
4321  
4322  /* Save a temporary to memory. 'allocated_regs' is used in case a
4323     temporary registers needs to be allocated to store a constant.  */
4324  static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4325  {
4326      /* The liveness analysis already ensures that globals are back
4327         in memory. Keep an tcg_debug_assert for safety. */
4328      tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4329  }
4330  
4331  /* save globals to their canonical location and assume they can be
4332     modified be the following code. 'allocated_regs' is used in case a
4333     temporary registers needs to be allocated to store a constant. */
4334  static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4335  {
4336      int i, n;
4337  
4338      for (i = 0, n = s->nb_globals; i < n; i++) {
4339          temp_save(s, &s->temps[i], allocated_regs);
4340      }
4341  }
4342  
4343  /* sync globals to their canonical location and assume they can be
4344     read by the following code. 'allocated_regs' is used in case a
4345     temporary registers needs to be allocated to store a constant. */
4346  static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4347  {
4348      int i, n;
4349  
4350      for (i = 0, n = s->nb_globals; i < n; i++) {
4351          TCGTemp *ts = &s->temps[i];
4352          tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4353                           || ts->kind == TEMP_FIXED
4354                           || ts->mem_coherent);
4355      }
4356  }
4357  
4358  /* at the end of a basic block, we assume all temporaries are dead and
4359     all globals are stored at their canonical location. */
4360  static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4361  {
4362      int i;
4363  
4364      for (i = s->nb_globals; i < s->nb_temps; i++) {
4365          TCGTemp *ts = &s->temps[i];
4366  
4367          switch (ts->kind) {
4368          case TEMP_TB:
4369              temp_save(s, ts, allocated_regs);
4370              break;
4371          case TEMP_EBB:
4372              /* The liveness analysis already ensures that temps are dead.
4373                 Keep an tcg_debug_assert for safety. */
4374              tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4375              break;
4376          case TEMP_CONST:
4377              /* Similarly, we should have freed any allocated register. */
4378              tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4379              break;
4380          default:
4381              g_assert_not_reached();
4382          }
4383      }
4384  
4385      save_globals(s, allocated_regs);
4386  }
4387  
4388  /*
4389   * At a conditional branch, we assume all temporaries are dead unless
4390   * explicitly live-across-conditional-branch; all globals and local
4391   * temps are synced to their location.
4392   */
4393  static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4394  {
4395      sync_globals(s, allocated_regs);
4396  
4397      for (int i = s->nb_globals; i < s->nb_temps; i++) {
4398          TCGTemp *ts = &s->temps[i];
4399          /*
4400           * The liveness analysis already ensures that temps are dead.
4401           * Keep tcg_debug_asserts for safety.
4402           */
4403          switch (ts->kind) {
4404          case TEMP_TB:
4405              tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4406              break;
4407          case TEMP_EBB:
4408          case TEMP_CONST:
4409              break;
4410          default:
4411              g_assert_not_reached();
4412          }
4413      }
4414  }
4415  
4416  /*
4417   * Specialized code generation for INDEX_op_mov_* with a constant.
4418   */
4419  static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4420                                    tcg_target_ulong val, TCGLifeData arg_life,
4421                                    TCGRegSet preferred_regs)
4422  {
4423      /* ENV should not be modified.  */
4424      tcg_debug_assert(!temp_readonly(ots));
4425  
4426      /* The movi is not explicitly generated here.  */
4427      set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4428      ots->val = val;
4429      ots->mem_coherent = 0;
4430      if (NEED_SYNC_ARG(0)) {
4431          temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4432      } else if (IS_DEAD_ARG(0)) {
4433          temp_dead(s, ots);
4434      }
4435  }
4436  
4437  /*
4438   * Specialized code generation for INDEX_op_mov_*.
4439   */
4440  static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4441  {
4442      const TCGLifeData arg_life = op->life;
4443      TCGRegSet allocated_regs, preferred_regs;
4444      TCGTemp *ts, *ots;
4445      TCGType otype, itype;
4446      TCGReg oreg, ireg;
4447  
4448      allocated_regs = s->reserved_regs;
4449      preferred_regs = output_pref(op, 0);
4450      ots = arg_temp(op->args[0]);
4451      ts = arg_temp(op->args[1]);
4452  
4453      /* ENV should not be modified.  */
4454      tcg_debug_assert(!temp_readonly(ots));
4455  
4456      /* Note that otype != itype for no-op truncation.  */
4457      otype = ots->type;
4458      itype = ts->type;
4459  
4460      if (ts->val_type == TEMP_VAL_CONST) {
4461          /* propagate constant or generate sti */
4462          tcg_target_ulong val = ts->val;
4463          if (IS_DEAD_ARG(1)) {
4464              temp_dead(s, ts);
4465          }
4466          tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4467          return;
4468      }
4469  
4470      /* If the source value is in memory we're going to be forced
4471         to have it in a register in order to perform the copy.  Copy
4472         the SOURCE value into its own register first, that way we
4473         don't have to reload SOURCE the next time it is used. */
4474      if (ts->val_type == TEMP_VAL_MEM) {
4475          temp_load(s, ts, tcg_target_available_regs[itype],
4476                    allocated_regs, preferred_regs);
4477      }
4478      tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4479      ireg = ts->reg;
4480  
4481      if (IS_DEAD_ARG(0)) {
4482          /* mov to a non-saved dead register makes no sense (even with
4483             liveness analysis disabled). */
4484          tcg_debug_assert(NEED_SYNC_ARG(0));
4485          if (!ots->mem_allocated) {
4486              temp_allocate_frame(s, ots);
4487          }
4488          tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4489          if (IS_DEAD_ARG(1)) {
4490              temp_dead(s, ts);
4491          }
4492          temp_dead(s, ots);
4493          return;
4494      }
4495  
4496      if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4497          /*
4498           * The mov can be suppressed.  Kill input first, so that it
4499           * is unlinked from reg_to_temp, then set the output to the
4500           * reg that we saved from the input.
4501           */
4502          temp_dead(s, ts);
4503          oreg = ireg;
4504      } else {
4505          if (ots->val_type == TEMP_VAL_REG) {
4506              oreg = ots->reg;
4507          } else {
4508              /* Make sure to not spill the input register during allocation. */
4509              oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4510                                   allocated_regs | ((TCGRegSet)1 << ireg),
4511                                   preferred_regs, ots->indirect_base);
4512          }
4513          if (!tcg_out_mov(s, otype, oreg, ireg)) {
4514              /*
4515               * Cross register class move not supported.
4516               * Store the source register into the destination slot
4517               * and leave the destination temp as TEMP_VAL_MEM.
4518               */
4519              assert(!temp_readonly(ots));
4520              if (!ts->mem_allocated) {
4521                  temp_allocate_frame(s, ots);
4522              }
4523              tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4524              set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4525              ots->mem_coherent = 1;
4526              return;
4527          }
4528      }
4529      set_temp_val_reg(s, ots, oreg);
4530      ots->mem_coherent = 0;
4531  
4532      if (NEED_SYNC_ARG(0)) {
4533          temp_sync(s, ots, allocated_regs, 0, 0);
4534      }
4535  }
4536  
4537  /*
4538   * Specialized code generation for INDEX_op_dup_vec.
4539   */
4540  static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4541  {
4542      const TCGLifeData arg_life = op->life;
4543      TCGRegSet dup_out_regs, dup_in_regs;
4544      TCGTemp *its, *ots;
4545      TCGType itype, vtype;
4546      unsigned vece;
4547      int lowpart_ofs;
4548      bool ok;
4549  
4550      ots = arg_temp(op->args[0]);
4551      its = arg_temp(op->args[1]);
4552  
4553      /* ENV should not be modified.  */
4554      tcg_debug_assert(!temp_readonly(ots));
4555  
4556      itype = its->type;
4557      vece = TCGOP_VECE(op);
4558      vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4559  
4560      if (its->val_type == TEMP_VAL_CONST) {
4561          /* Propagate constant via movi -> dupi.  */
4562          tcg_target_ulong val = its->val;
4563          if (IS_DEAD_ARG(1)) {
4564              temp_dead(s, its);
4565          }
4566          tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4567          return;
4568      }
4569  
4570      dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4571      dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4572  
4573      /* Allocate the output register now.  */
4574      if (ots->val_type != TEMP_VAL_REG) {
4575          TCGRegSet allocated_regs = s->reserved_regs;
4576          TCGReg oreg;
4577  
4578          if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4579              /* Make sure to not spill the input register. */
4580              tcg_regset_set_reg(allocated_regs, its->reg);
4581          }
4582          oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4583                               output_pref(op, 0), ots->indirect_base);
4584          set_temp_val_reg(s, ots, oreg);
4585      }
4586  
4587      switch (its->val_type) {
4588      case TEMP_VAL_REG:
4589          /*
4590           * The dup constriaints must be broad, covering all possible VECE.
4591           * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4592           * to fail, indicating that extra moves are required for that case.
4593           */
4594          if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4595              if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4596                  goto done;
4597              }
4598              /* Try again from memory or a vector input register.  */
4599          }
4600          if (!its->mem_coherent) {
4601              /*
4602               * The input register is not synced, and so an extra store
4603               * would be required to use memory.  Attempt an integer-vector
4604               * register move first.  We do not have a TCGRegSet for this.
4605               */
4606              if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4607                  break;
4608              }
4609              /* Sync the temp back to its slot and load from there.  */
4610              temp_sync(s, its, s->reserved_regs, 0, 0);
4611          }
4612          /* fall through */
4613  
4614      case TEMP_VAL_MEM:
4615          lowpart_ofs = 0;
4616          if (HOST_BIG_ENDIAN) {
4617              lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4618          }
4619          if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4620                               its->mem_offset + lowpart_ofs)) {
4621              goto done;
4622          }
4623          /* Load the input into the destination vector register. */
4624          tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4625          break;
4626  
4627      default:
4628          g_assert_not_reached();
4629      }
4630  
4631      /* We now have a vector input register, so dup must succeed. */
4632      ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4633      tcg_debug_assert(ok);
4634  
4635   done:
4636      ots->mem_coherent = 0;
4637      if (IS_DEAD_ARG(1)) {
4638          temp_dead(s, its);
4639      }
4640      if (NEED_SYNC_ARG(0)) {
4641          temp_sync(s, ots, s->reserved_regs, 0, 0);
4642      }
4643      if (IS_DEAD_ARG(0)) {
4644          temp_dead(s, ots);
4645      }
4646  }
4647  
4648  static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4649  {
4650      const TCGLifeData arg_life = op->life;
4651      const TCGOpDef * const def = &tcg_op_defs[op->opc];
4652      TCGRegSet i_allocated_regs;
4653      TCGRegSet o_allocated_regs;
4654      int i, k, nb_iargs, nb_oargs;
4655      TCGReg reg;
4656      TCGArg arg;
4657      const TCGArgConstraint *arg_ct;
4658      TCGTemp *ts;
4659      TCGArg new_args[TCG_MAX_OP_ARGS];
4660      int const_args[TCG_MAX_OP_ARGS];
4661  
4662      nb_oargs = def->nb_oargs;
4663      nb_iargs = def->nb_iargs;
4664  
4665      /* copy constants */
4666      memcpy(new_args + nb_oargs + nb_iargs,
4667             op->args + nb_oargs + nb_iargs,
4668             sizeof(TCGArg) * def->nb_cargs);
4669  
4670      i_allocated_regs = s->reserved_regs;
4671      o_allocated_regs = s->reserved_regs;
4672  
4673      /* satisfy input constraints */
4674      for (k = 0; k < nb_iargs; k++) {
4675          TCGRegSet i_preferred_regs, i_required_regs;
4676          bool allocate_new_reg, copyto_new_reg;
4677          TCGTemp *ts2;
4678          int i1, i2;
4679  
4680          i = def->args_ct[nb_oargs + k].sort_index;
4681          arg = op->args[i];
4682          arg_ct = &def->args_ct[i];
4683          ts = arg_temp(arg);
4684  
4685          if (ts->val_type == TEMP_VAL_CONST
4686              && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4687              /* constant is OK for instruction */
4688              const_args[i] = 1;
4689              new_args[i] = ts->val;
4690              continue;
4691          }
4692  
4693          reg = ts->reg;
4694          i_preferred_regs = 0;
4695          i_required_regs = arg_ct->regs;
4696          allocate_new_reg = false;
4697          copyto_new_reg = false;
4698  
4699          switch (arg_ct->pair) {
4700          case 0: /* not paired */
4701              if (arg_ct->ialias) {
4702                  i_preferred_regs = output_pref(op, arg_ct->alias_index);
4703  
4704                  /*
4705                   * If the input is readonly, then it cannot also be an
4706                   * output and aliased to itself.  If the input is not
4707                   * dead after the instruction, we must allocate a new
4708                   * register and move it.
4709                   */
4710                  if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4711                      allocate_new_reg = true;
4712                  } else if (ts->val_type == TEMP_VAL_REG) {
4713                      /*
4714                       * Check if the current register has already been
4715                       * allocated for another input.
4716                       */
4717                      allocate_new_reg =
4718                          tcg_regset_test_reg(i_allocated_regs, reg);
4719                  }
4720              }
4721              if (!allocate_new_reg) {
4722                  temp_load(s, ts, i_required_regs, i_allocated_regs,
4723                            i_preferred_regs);
4724                  reg = ts->reg;
4725                  allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4726              }
4727              if (allocate_new_reg) {
4728                  /*
4729                   * Allocate a new register matching the constraint
4730                   * and move the temporary register into it.
4731                   */
4732                  temp_load(s, ts, tcg_target_available_regs[ts->type],
4733                            i_allocated_regs, 0);
4734                  reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4735                                      i_preferred_regs, ts->indirect_base);
4736                  copyto_new_reg = true;
4737              }
4738              break;
4739  
4740          case 1:
4741              /* First of an input pair; if i1 == i2, the second is an output. */
4742              i1 = i;
4743              i2 = arg_ct->pair_index;
4744              ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4745  
4746              /*
4747               * It is easier to default to allocating a new pair
4748               * and to identify a few cases where it's not required.
4749               */
4750              if (arg_ct->ialias) {
4751                  i_preferred_regs = output_pref(op, arg_ct->alias_index);
4752                  if (IS_DEAD_ARG(i1) &&
4753                      IS_DEAD_ARG(i2) &&
4754                      !temp_readonly(ts) &&
4755                      ts->val_type == TEMP_VAL_REG &&
4756                      ts->reg < TCG_TARGET_NB_REGS - 1 &&
4757                      tcg_regset_test_reg(i_required_regs, reg) &&
4758                      !tcg_regset_test_reg(i_allocated_regs, reg) &&
4759                      !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4760                      (ts2
4761                       ? ts2->val_type == TEMP_VAL_REG &&
4762                         ts2->reg == reg + 1 &&
4763                         !temp_readonly(ts2)
4764                       : s->reg_to_temp[reg + 1] == NULL)) {
4765                      break;
4766                  }
4767              } else {
4768                  /* Without aliasing, the pair must also be an input. */
4769                  tcg_debug_assert(ts2);
4770                  if (ts->val_type == TEMP_VAL_REG &&
4771                      ts2->val_type == TEMP_VAL_REG &&
4772                      ts2->reg == reg + 1 &&
4773                      tcg_regset_test_reg(i_required_regs, reg)) {
4774                      break;
4775                  }
4776              }
4777              reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4778                                       0, ts->indirect_base);
4779              goto do_pair;
4780  
4781          case 2: /* pair second */
4782              reg = new_args[arg_ct->pair_index] + 1;
4783              goto do_pair;
4784  
4785          case 3: /* ialias with second output, no first input */
4786              tcg_debug_assert(arg_ct->ialias);
4787              i_preferred_regs = output_pref(op, arg_ct->alias_index);
4788  
4789              if (IS_DEAD_ARG(i) &&
4790                  !temp_readonly(ts) &&
4791                  ts->val_type == TEMP_VAL_REG &&
4792                  reg > 0 &&
4793                  s->reg_to_temp[reg - 1] == NULL &&
4794                  tcg_regset_test_reg(i_required_regs, reg) &&
4795                  !tcg_regset_test_reg(i_allocated_regs, reg) &&
4796                  !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4797                  tcg_regset_set_reg(i_allocated_regs, reg - 1);
4798                  break;
4799              }
4800              reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4801                                       i_allocated_regs, 0,
4802                                       ts->indirect_base);
4803              tcg_regset_set_reg(i_allocated_regs, reg);
4804              reg += 1;
4805              goto do_pair;
4806  
4807          do_pair:
4808              /*
4809               * If an aliased input is not dead after the instruction,
4810               * we must allocate a new register and move it.
4811               */
4812              if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4813                  TCGRegSet t_allocated_regs = i_allocated_regs;
4814  
4815                  /*
4816                   * Because of the alias, and the continued life, make sure
4817                   * that the temp is somewhere *other* than the reg pair,
4818                   * and we get a copy in reg.
4819                   */
4820                  tcg_regset_set_reg(t_allocated_regs, reg);
4821                  tcg_regset_set_reg(t_allocated_regs, reg + 1);
4822                  if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4823                      /* If ts was already in reg, copy it somewhere else. */
4824                      TCGReg nr;
4825                      bool ok;
4826  
4827                      tcg_debug_assert(ts->kind != TEMP_FIXED);
4828                      nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4829                                         t_allocated_regs, 0, ts->indirect_base);
4830                      ok = tcg_out_mov(s, ts->type, nr, reg);
4831                      tcg_debug_assert(ok);
4832  
4833                      set_temp_val_reg(s, ts, nr);
4834                  } else {
4835                      temp_load(s, ts, tcg_target_available_regs[ts->type],
4836                                t_allocated_regs, 0);
4837                      copyto_new_reg = true;
4838                  }
4839              } else {
4840                  /* Preferably allocate to reg, otherwise copy. */
4841                  i_required_regs = (TCGRegSet)1 << reg;
4842                  temp_load(s, ts, i_required_regs, i_allocated_regs,
4843                            i_preferred_regs);
4844                  copyto_new_reg = ts->reg != reg;
4845              }
4846              break;
4847  
4848          default:
4849              g_assert_not_reached();
4850          }
4851  
4852          if (copyto_new_reg) {
4853              if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4854                  /*
4855                   * Cross register class move not supported.  Sync the
4856                   * temp back to its slot and load from there.
4857                   */
4858                  temp_sync(s, ts, i_allocated_regs, 0, 0);
4859                  tcg_out_ld(s, ts->type, reg,
4860                             ts->mem_base->reg, ts->mem_offset);
4861              }
4862          }
4863          new_args[i] = reg;
4864          const_args[i] = 0;
4865          tcg_regset_set_reg(i_allocated_regs, reg);
4866      }
4867  
4868      /* mark dead temporaries and free the associated registers */
4869      for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4870          if (IS_DEAD_ARG(i)) {
4871              temp_dead(s, arg_temp(op->args[i]));
4872          }
4873      }
4874  
4875      if (def->flags & TCG_OPF_COND_BRANCH) {
4876          tcg_reg_alloc_cbranch(s, i_allocated_regs);
4877      } else if (def->flags & TCG_OPF_BB_END) {
4878          tcg_reg_alloc_bb_end(s, i_allocated_regs);
4879      } else {
4880          if (def->flags & TCG_OPF_CALL_CLOBBER) {
4881              /* XXX: permit generic clobber register list ? */
4882              for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4883                  if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4884                      tcg_reg_free(s, i, i_allocated_regs);
4885                  }
4886              }
4887          }
4888          if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4889              /* sync globals if the op has side effects and might trigger
4890                 an exception. */
4891              sync_globals(s, i_allocated_regs);
4892          }
4893  
4894          /* satisfy the output constraints */
4895          for(k = 0; k < nb_oargs; k++) {
4896              i = def->args_ct[k].sort_index;
4897              arg = op->args[i];
4898              arg_ct = &def->args_ct[i];
4899              ts = arg_temp(arg);
4900  
4901              /* ENV should not be modified.  */
4902              tcg_debug_assert(!temp_readonly(ts));
4903  
4904              switch (arg_ct->pair) {
4905              case 0: /* not paired */
4906                  if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4907                      reg = new_args[arg_ct->alias_index];
4908                  } else if (arg_ct->newreg) {
4909                      reg = tcg_reg_alloc(s, arg_ct->regs,
4910                                          i_allocated_regs | o_allocated_regs,
4911                                          output_pref(op, k), ts->indirect_base);
4912                  } else {
4913                      reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4914                                          output_pref(op, k), ts->indirect_base);
4915                  }
4916                  break;
4917  
4918              case 1: /* first of pair */
4919                  tcg_debug_assert(!arg_ct->newreg);
4920                  if (arg_ct->oalias) {
4921                      reg = new_args[arg_ct->alias_index];
4922                      break;
4923                  }
4924                  reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4925                                           output_pref(op, k), ts->indirect_base);
4926                  break;
4927  
4928              case 2: /* second of pair */
4929                  tcg_debug_assert(!arg_ct->newreg);
4930                  if (arg_ct->oalias) {
4931                      reg = new_args[arg_ct->alias_index];
4932                  } else {
4933                      reg = new_args[arg_ct->pair_index] + 1;
4934                  }
4935                  break;
4936  
4937              case 3: /* first of pair, aliasing with a second input */
4938                  tcg_debug_assert(!arg_ct->newreg);
4939                  reg = new_args[arg_ct->pair_index] - 1;
4940                  break;
4941  
4942              default:
4943                  g_assert_not_reached();
4944              }
4945              tcg_regset_set_reg(o_allocated_regs, reg);
4946              set_temp_val_reg(s, ts, reg);
4947              ts->mem_coherent = 0;
4948              new_args[i] = reg;
4949          }
4950      }
4951  
4952      /* emit instruction */
4953      switch (op->opc) {
4954      case INDEX_op_ext8s_i32:
4955          tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4956          break;
4957      case INDEX_op_ext8s_i64:
4958          tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4959          break;
4960      case INDEX_op_ext8u_i32:
4961      case INDEX_op_ext8u_i64:
4962          tcg_out_ext8u(s, new_args[0], new_args[1]);
4963          break;
4964      case INDEX_op_ext16s_i32:
4965          tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4966          break;
4967      case INDEX_op_ext16s_i64:
4968          tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4969          break;
4970      case INDEX_op_ext16u_i32:
4971      case INDEX_op_ext16u_i64:
4972          tcg_out_ext16u(s, new_args[0], new_args[1]);
4973          break;
4974      case INDEX_op_ext32s_i64:
4975          tcg_out_ext32s(s, new_args[0], new_args[1]);
4976          break;
4977      case INDEX_op_ext32u_i64:
4978          tcg_out_ext32u(s, new_args[0], new_args[1]);
4979          break;
4980      case INDEX_op_ext_i32_i64:
4981          tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4982          break;
4983      case INDEX_op_extu_i32_i64:
4984          tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4985          break;
4986      case INDEX_op_extrl_i64_i32:
4987          tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4988          break;
4989      default:
4990          if (def->flags & TCG_OPF_VECTOR) {
4991              tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4992                             new_args, const_args);
4993          } else {
4994              tcg_out_op(s, op->opc, new_args, const_args);
4995          }
4996          break;
4997      }
4998  
4999      /* move the outputs in the correct register if needed */
5000      for(i = 0; i < nb_oargs; i++) {
5001          ts = arg_temp(op->args[i]);
5002  
5003          /* ENV should not be modified.  */
5004          tcg_debug_assert(!temp_readonly(ts));
5005  
5006          if (NEED_SYNC_ARG(i)) {
5007              temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5008          } else if (IS_DEAD_ARG(i)) {
5009              temp_dead(s, ts);
5010          }
5011      }
5012  }
5013  
5014  static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5015  {
5016      const TCGLifeData arg_life = op->life;
5017      TCGTemp *ots, *itsl, *itsh;
5018      TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5019  
5020      /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5021      tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5022      tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5023  
5024      ots = arg_temp(op->args[0]);
5025      itsl = arg_temp(op->args[1]);
5026      itsh = arg_temp(op->args[2]);
5027  
5028      /* ENV should not be modified.  */
5029      tcg_debug_assert(!temp_readonly(ots));
5030  
5031      /* Allocate the output register now.  */
5032      if (ots->val_type != TEMP_VAL_REG) {
5033          TCGRegSet allocated_regs = s->reserved_regs;
5034          TCGRegSet dup_out_regs =
5035              tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5036          TCGReg oreg;
5037  
5038          /* Make sure to not spill the input registers. */
5039          if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5040              tcg_regset_set_reg(allocated_regs, itsl->reg);
5041          }
5042          if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5043              tcg_regset_set_reg(allocated_regs, itsh->reg);
5044          }
5045  
5046          oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5047                               output_pref(op, 0), ots->indirect_base);
5048          set_temp_val_reg(s, ots, oreg);
5049      }
5050  
5051      /* Promote dup2 of immediates to dupi_vec. */
5052      if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5053          uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5054          MemOp vece = MO_64;
5055  
5056          if (val == dup_const(MO_8, val)) {
5057              vece = MO_8;
5058          } else if (val == dup_const(MO_16, val)) {
5059              vece = MO_16;
5060          } else if (val == dup_const(MO_32, val)) {
5061              vece = MO_32;
5062          }
5063  
5064          tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5065          goto done;
5066      }
5067  
5068      /* If the two inputs form one 64-bit value, try dupm_vec. */
5069      if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5070          itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5071          itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5072          TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5073  
5074          temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5075          temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5076  
5077          if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5078                               its->mem_base->reg, its->mem_offset)) {
5079              goto done;
5080          }
5081      }
5082  
5083      /* Fall back to generic expansion. */
5084      return false;
5085  
5086   done:
5087      ots->mem_coherent = 0;
5088      if (IS_DEAD_ARG(1)) {
5089          temp_dead(s, itsl);
5090      }
5091      if (IS_DEAD_ARG(2)) {
5092          temp_dead(s, itsh);
5093      }
5094      if (NEED_SYNC_ARG(0)) {
5095          temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5096      } else if (IS_DEAD_ARG(0)) {
5097          temp_dead(s, ots);
5098      }
5099      return true;
5100  }
5101  
5102  static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5103                           TCGRegSet allocated_regs)
5104  {
5105      if (ts->val_type == TEMP_VAL_REG) {
5106          if (ts->reg != reg) {
5107              tcg_reg_free(s, reg, allocated_regs);
5108              if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5109                  /*
5110                   * Cross register class move not supported.  Sync the
5111                   * temp back to its slot and load from there.
5112                   */
5113                  temp_sync(s, ts, allocated_regs, 0, 0);
5114                  tcg_out_ld(s, ts->type, reg,
5115                             ts->mem_base->reg, ts->mem_offset);
5116              }
5117          }
5118      } else {
5119          TCGRegSet arg_set = 0;
5120  
5121          tcg_reg_free(s, reg, allocated_regs);
5122          tcg_regset_set_reg(arg_set, reg);
5123          temp_load(s, ts, arg_set, allocated_regs, 0);
5124      }
5125  }
5126  
5127  static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5128                           TCGRegSet allocated_regs)
5129  {
5130      /*
5131       * When the destination is on the stack, load up the temp and store.
5132       * If there are many call-saved registers, the temp might live to
5133       * see another use; otherwise it'll be discarded.
5134       */
5135      temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5136      tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5137                 arg_slot_stk_ofs(arg_slot));
5138  }
5139  
5140  static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5141                              TCGTemp *ts, TCGRegSet *allocated_regs)
5142  {
5143      if (arg_slot_reg_p(l->arg_slot)) {
5144          TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5145          load_arg_reg(s, reg, ts, *allocated_regs);
5146          tcg_regset_set_reg(*allocated_regs, reg);
5147      } else {
5148          load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5149      }
5150  }
5151  
5152  static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5153                           intptr_t ref_off, TCGRegSet *allocated_regs)
5154  {
5155      TCGReg reg;
5156  
5157      if (arg_slot_reg_p(arg_slot)) {
5158          reg = tcg_target_call_iarg_regs[arg_slot];
5159          tcg_reg_free(s, reg, *allocated_regs);
5160          tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5161          tcg_regset_set_reg(*allocated_regs, reg);
5162      } else {
5163          reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5164                              *allocated_regs, 0, false);
5165          tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5166          tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5167                     arg_slot_stk_ofs(arg_slot));
5168      }
5169  }
5170  
5171  static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5172  {
5173      const int nb_oargs = TCGOP_CALLO(op);
5174      const int nb_iargs = TCGOP_CALLI(op);
5175      const TCGLifeData arg_life = op->life;
5176      const TCGHelperInfo *info = tcg_call_info(op);
5177      TCGRegSet allocated_regs = s->reserved_regs;
5178      int i;
5179  
5180      /*
5181       * Move inputs into place in reverse order,
5182       * so that we place stacked arguments first.
5183       */
5184      for (i = nb_iargs - 1; i >= 0; --i) {
5185          const TCGCallArgumentLoc *loc = &info->in[i];
5186          TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5187  
5188          switch (loc->kind) {
5189          case TCG_CALL_ARG_NORMAL:
5190          case TCG_CALL_ARG_EXTEND_U:
5191          case TCG_CALL_ARG_EXTEND_S:
5192              load_arg_normal(s, loc, ts, &allocated_regs);
5193              break;
5194          case TCG_CALL_ARG_BY_REF:
5195              load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5196              load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5197                           arg_slot_stk_ofs(loc->ref_slot),
5198                           &allocated_regs);
5199              break;
5200          case TCG_CALL_ARG_BY_REF_N:
5201              load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5202              break;
5203          default:
5204              g_assert_not_reached();
5205          }
5206      }
5207  
5208      /* Mark dead temporaries and free the associated registers.  */
5209      for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5210          if (IS_DEAD_ARG(i)) {
5211              temp_dead(s, arg_temp(op->args[i]));
5212          }
5213      }
5214  
5215      /* Clobber call registers.  */
5216      for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5217          if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5218              tcg_reg_free(s, i, allocated_regs);
5219          }
5220      }
5221  
5222      /*
5223       * Save globals if they might be written by the helper,
5224       * sync them if they might be read.
5225       */
5226      if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5227          /* Nothing to do */
5228      } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5229          sync_globals(s, allocated_regs);
5230      } else {
5231          save_globals(s, allocated_regs);
5232      }
5233  
5234      /*
5235       * If the ABI passes a pointer to the returned struct as the first
5236       * argument, load that now.  Pass a pointer to the output home slot.
5237       */
5238      if (info->out_kind == TCG_CALL_RET_BY_REF) {
5239          TCGTemp *ts = arg_temp(op->args[0]);
5240  
5241          if (!ts->mem_allocated) {
5242              temp_allocate_frame(s, ts);
5243          }
5244          load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5245      }
5246  
5247      tcg_out_call(s, tcg_call_func(op), info);
5248  
5249      /* Assign output registers and emit moves if needed.  */
5250      switch (info->out_kind) {
5251      case TCG_CALL_RET_NORMAL:
5252          for (i = 0; i < nb_oargs; i++) {
5253              TCGTemp *ts = arg_temp(op->args[i]);
5254              TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5255  
5256              /* ENV should not be modified.  */
5257              tcg_debug_assert(!temp_readonly(ts));
5258  
5259              set_temp_val_reg(s, ts, reg);
5260              ts->mem_coherent = 0;
5261          }
5262          break;
5263  
5264      case TCG_CALL_RET_BY_VEC:
5265          {
5266              TCGTemp *ts = arg_temp(op->args[0]);
5267  
5268              tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5269              tcg_debug_assert(ts->temp_subindex == 0);
5270              if (!ts->mem_allocated) {
5271                  temp_allocate_frame(s, ts);
5272              }
5273              tcg_out_st(s, TCG_TYPE_V128,
5274                         tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5275                         ts->mem_base->reg, ts->mem_offset);
5276          }
5277          /* fall through to mark all parts in memory */
5278  
5279      case TCG_CALL_RET_BY_REF:
5280          /* The callee has performed a write through the reference. */
5281          for (i = 0; i < nb_oargs; i++) {
5282              TCGTemp *ts = arg_temp(op->args[i]);
5283              ts->val_type = TEMP_VAL_MEM;
5284          }
5285          break;
5286  
5287      default:
5288          g_assert_not_reached();
5289      }
5290  
5291      /* Flush or discard output registers as needed. */
5292      for (i = 0; i < nb_oargs; i++) {
5293          TCGTemp *ts = arg_temp(op->args[i]);
5294          if (NEED_SYNC_ARG(i)) {
5295              temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5296          } else if (IS_DEAD_ARG(i)) {
5297              temp_dead(s, ts);
5298          }
5299      }
5300  }
5301  
5302  /**
5303   * atom_and_align_for_opc:
5304   * @s: tcg context
5305   * @opc: memory operation code
5306   * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5307   * @allow_two_ops: true if we are prepared to issue two operations
5308   *
5309   * Return the alignment and atomicity to use for the inline fast path
5310   * for the given memory operation.  The alignment may be larger than
5311   * that specified in @opc, and the correct alignment will be diagnosed
5312   * by the slow path helper.
5313   *
5314   * If @allow_two_ops, the host is prepared to test for 2x alignment,
5315   * and issue two loads or stores for subalignment.
5316   */
5317  static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5318                                             MemOp host_atom, bool allow_two_ops)
5319  {
5320      MemOp align = get_alignment_bits(opc);
5321      MemOp size = opc & MO_SIZE;
5322      MemOp half = size ? size - 1 : 0;
5323      MemOp atmax;
5324      MemOp atom;
5325  
5326      /* When serialized, no further atomicity required.  */
5327      if (s->gen_tb->cflags & CF_PARALLEL) {
5328          atom = opc & MO_ATOM_MASK;
5329      } else {
5330          atom = MO_ATOM_NONE;
5331      }
5332  
5333      switch (atom) {
5334      case MO_ATOM_NONE:
5335          /* The operation requires no specific atomicity. */
5336          atmax = MO_8;
5337          break;
5338  
5339      case MO_ATOM_IFALIGN:
5340          atmax = size;
5341          break;
5342  
5343      case MO_ATOM_IFALIGN_PAIR:
5344          atmax = half;
5345          break;
5346  
5347      case MO_ATOM_WITHIN16:
5348          atmax = size;
5349          if (size == MO_128) {
5350              /* Misalignment implies !within16, and therefore no atomicity. */
5351          } else if (host_atom != MO_ATOM_WITHIN16) {
5352              /* The host does not implement within16, so require alignment. */
5353              align = MAX(align, size);
5354          }
5355          break;
5356  
5357      case MO_ATOM_WITHIN16_PAIR:
5358          atmax = size;
5359          /*
5360           * Misalignment implies !within16, and therefore half atomicity.
5361           * Any host prepared for two operations can implement this with
5362           * half alignment.
5363           */
5364          if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5365              align = MAX(align, half);
5366          }
5367          break;
5368  
5369      case MO_ATOM_SUBALIGN:
5370          atmax = size;
5371          if (host_atom != MO_ATOM_SUBALIGN) {
5372              /* If unaligned but not odd, there are subobjects up to half. */
5373              if (allow_two_ops) {
5374                  align = MAX(align, half);
5375              } else {
5376                  align = MAX(align, size);
5377              }
5378          }
5379          break;
5380  
5381      default:
5382          g_assert_not_reached();
5383      }
5384  
5385      return (TCGAtomAlign){ .atom = atmax, .align = align };
5386  }
5387  
5388  /*
5389   * Similarly for qemu_ld/st slow path helpers.
5390   * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5391   * using only the provided backend tcg_out_* functions.
5392   */
5393  
5394  static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5395  {
5396      int ofs = arg_slot_stk_ofs(slot);
5397  
5398      /*
5399       * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5400       * require extension to uint64_t, adjust the address for uint32_t.
5401       */
5402      if (HOST_BIG_ENDIAN &&
5403          TCG_TARGET_REG_BITS == 64 &&
5404          type == TCG_TYPE_I32) {
5405          ofs += 4;
5406      }
5407      return ofs;
5408  }
5409  
5410  static void tcg_out_helper_load_slots(TCGContext *s,
5411                                        unsigned nmov, TCGMovExtend *mov,
5412                                        const TCGLdstHelperParam *parm)
5413  {
5414      unsigned i;
5415      TCGReg dst3;
5416  
5417      /*
5418       * Start from the end, storing to the stack first.
5419       * This frees those registers, so we need not consider overlap.
5420       */
5421      for (i = nmov; i-- > 0; ) {
5422          unsigned slot = mov[i].dst;
5423  
5424          if (arg_slot_reg_p(slot)) {
5425              goto found_reg;
5426          }
5427  
5428          TCGReg src = mov[i].src;
5429          TCGType dst_type = mov[i].dst_type;
5430          MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5431  
5432          /* The argument is going onto the stack; extend into scratch. */
5433          if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5434              tcg_debug_assert(parm->ntmp != 0);
5435              mov[i].dst = src = parm->tmp[0];
5436              tcg_out_movext1(s, &mov[i]);
5437          }
5438  
5439          tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5440                     tcg_out_helper_stk_ofs(dst_type, slot));
5441      }
5442      return;
5443  
5444   found_reg:
5445      /*
5446       * The remaining arguments are in registers.
5447       * Convert slot numbers to argument registers.
5448       */
5449      nmov = i + 1;
5450      for (i = 0; i < nmov; ++i) {
5451          mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5452      }
5453  
5454      switch (nmov) {
5455      case 4:
5456          /* The backend must have provided enough temps for the worst case. */
5457          tcg_debug_assert(parm->ntmp >= 2);
5458  
5459          dst3 = mov[3].dst;
5460          for (unsigned j = 0; j < 3; ++j) {
5461              if (dst3 == mov[j].src) {
5462                  /*
5463                   * Conflict. Copy the source to a temporary, perform the
5464                   * remaining moves, then the extension from our scratch
5465                   * on the way out.
5466                   */
5467                  TCGReg scratch = parm->tmp[1];
5468  
5469                  tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5470                  tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5471                  tcg_out_movext1_new_src(s, &mov[3], scratch);
5472                  break;
5473              }
5474          }
5475  
5476          /* No conflicts: perform this move and continue. */
5477          tcg_out_movext1(s, &mov[3]);
5478          /* fall through */
5479  
5480      case 3:
5481          tcg_out_movext3(s, mov, mov + 1, mov + 2,
5482                          parm->ntmp ? parm->tmp[0] : -1);
5483          break;
5484      case 2:
5485          tcg_out_movext2(s, mov, mov + 1,
5486                          parm->ntmp ? parm->tmp[0] : -1);
5487          break;
5488      case 1:
5489          tcg_out_movext1(s, mov);
5490          break;
5491      default:
5492          g_assert_not_reached();
5493      }
5494  }
5495  
5496  static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5497                                      TCGType type, tcg_target_long imm,
5498                                      const TCGLdstHelperParam *parm)
5499  {
5500      if (arg_slot_reg_p(slot)) {
5501          tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5502      } else {
5503          int ofs = tcg_out_helper_stk_ofs(type, slot);
5504          if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5505              tcg_debug_assert(parm->ntmp != 0);
5506              tcg_out_movi(s, type, parm->tmp[0], imm);
5507              tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5508          }
5509      }
5510  }
5511  
5512  static void tcg_out_helper_load_common_args(TCGContext *s,
5513                                              const TCGLabelQemuLdst *ldst,
5514                                              const TCGLdstHelperParam *parm,
5515                                              const TCGHelperInfo *info,
5516                                              unsigned next_arg)
5517  {
5518      TCGMovExtend ptr_mov = {
5519          .dst_type = TCG_TYPE_PTR,
5520          .src_type = TCG_TYPE_PTR,
5521          .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5522      };
5523      const TCGCallArgumentLoc *loc = &info->in[0];
5524      TCGType type;
5525      unsigned slot;
5526      tcg_target_ulong imm;
5527  
5528      /*
5529       * Handle env, which is always first.
5530       */
5531      ptr_mov.dst = loc->arg_slot;
5532      ptr_mov.src = TCG_AREG0;
5533      tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5534  
5535      /*
5536       * Handle oi.
5537       */
5538      imm = ldst->oi;
5539      loc = &info->in[next_arg];
5540      type = TCG_TYPE_I32;
5541      switch (loc->kind) {
5542      case TCG_CALL_ARG_NORMAL:
5543          break;
5544      case TCG_CALL_ARG_EXTEND_U:
5545      case TCG_CALL_ARG_EXTEND_S:
5546          /* No extension required for MemOpIdx. */
5547          tcg_debug_assert(imm <= INT32_MAX);
5548          type = TCG_TYPE_REG;
5549          break;
5550      default:
5551          g_assert_not_reached();
5552      }
5553      tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5554      next_arg++;
5555  
5556      /*
5557       * Handle ra.
5558       */
5559      loc = &info->in[next_arg];
5560      slot = loc->arg_slot;
5561      if (parm->ra_gen) {
5562          int arg_reg = -1;
5563          TCGReg ra_reg;
5564  
5565          if (arg_slot_reg_p(slot)) {
5566              arg_reg = tcg_target_call_iarg_regs[slot];
5567          }
5568          ra_reg = parm->ra_gen(s, ldst, arg_reg);
5569  
5570          ptr_mov.dst = slot;
5571          ptr_mov.src = ra_reg;
5572          tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5573      } else {
5574          imm = (uintptr_t)ldst->raddr;
5575          tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5576      }
5577  }
5578  
5579  static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5580                                         const TCGCallArgumentLoc *loc,
5581                                         TCGType dst_type, TCGType src_type,
5582                                         TCGReg lo, TCGReg hi)
5583  {
5584      MemOp reg_mo;
5585  
5586      if (dst_type <= TCG_TYPE_REG) {
5587          MemOp src_ext;
5588  
5589          switch (loc->kind) {
5590          case TCG_CALL_ARG_NORMAL:
5591              src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5592              break;
5593          case TCG_CALL_ARG_EXTEND_U:
5594              dst_type = TCG_TYPE_REG;
5595              src_ext = MO_UL;
5596              break;
5597          case TCG_CALL_ARG_EXTEND_S:
5598              dst_type = TCG_TYPE_REG;
5599              src_ext = MO_SL;
5600              break;
5601          default:
5602              g_assert_not_reached();
5603          }
5604  
5605          mov[0].dst = loc->arg_slot;
5606          mov[0].dst_type = dst_type;
5607          mov[0].src = lo;
5608          mov[0].src_type = src_type;
5609          mov[0].src_ext = src_ext;
5610          return 1;
5611      }
5612  
5613      if (TCG_TARGET_REG_BITS == 32) {
5614          assert(dst_type == TCG_TYPE_I64);
5615          reg_mo = MO_32;
5616      } else {
5617          assert(dst_type == TCG_TYPE_I128);
5618          reg_mo = MO_64;
5619      }
5620  
5621      mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5622      mov[0].src = lo;
5623      mov[0].dst_type = TCG_TYPE_REG;
5624      mov[0].src_type = TCG_TYPE_REG;
5625      mov[0].src_ext = reg_mo;
5626  
5627      mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5628      mov[1].src = hi;
5629      mov[1].dst_type = TCG_TYPE_REG;
5630      mov[1].src_type = TCG_TYPE_REG;
5631      mov[1].src_ext = reg_mo;
5632  
5633      return 2;
5634  }
5635  
5636  static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5637                                     const TCGLdstHelperParam *parm)
5638  {
5639      const TCGHelperInfo *info;
5640      const TCGCallArgumentLoc *loc;
5641      TCGMovExtend mov[2];
5642      unsigned next_arg, nmov;
5643      MemOp mop = get_memop(ldst->oi);
5644  
5645      switch (mop & MO_SIZE) {
5646      case MO_8:
5647      case MO_16:
5648      case MO_32:
5649          info = &info_helper_ld32_mmu;
5650          break;
5651      case MO_64:
5652          info = &info_helper_ld64_mmu;
5653          break;
5654      case MO_128:
5655          info = &info_helper_ld128_mmu;
5656          break;
5657      default:
5658          g_assert_not_reached();
5659      }
5660  
5661      /* Defer env argument. */
5662      next_arg = 1;
5663  
5664      loc = &info->in[next_arg];
5665      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5666          /*
5667           * 32-bit host with 32-bit guest: zero-extend the guest address
5668           * to 64-bits for the helper by storing the low part, then
5669           * load a zero for the high part.
5670           */
5671          tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5672                                 TCG_TYPE_I32, TCG_TYPE_I32,
5673                                 ldst->addrlo_reg, -1);
5674          tcg_out_helper_load_slots(s, 1, mov, parm);
5675  
5676          tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5677                                  TCG_TYPE_I32, 0, parm);
5678          next_arg += 2;
5679      } else {
5680          nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5681                                        ldst->addrlo_reg, ldst->addrhi_reg);
5682          tcg_out_helper_load_slots(s, nmov, mov, parm);
5683          next_arg += nmov;
5684      }
5685  
5686      switch (info->out_kind) {
5687      case TCG_CALL_RET_NORMAL:
5688      case TCG_CALL_RET_BY_VEC:
5689          break;
5690      case TCG_CALL_RET_BY_REF:
5691          /*
5692           * The return reference is in the first argument slot.
5693           * We need memory in which to return: re-use the top of stack.
5694           */
5695          {
5696              int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5697  
5698              if (arg_slot_reg_p(0)) {
5699                  tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5700                                   TCG_REG_CALL_STACK, ofs_slot0);
5701              } else {
5702                  tcg_debug_assert(parm->ntmp != 0);
5703                  tcg_out_addi_ptr(s, parm->tmp[0],
5704                                   TCG_REG_CALL_STACK, ofs_slot0);
5705                  tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5706                             TCG_REG_CALL_STACK, ofs_slot0);
5707              }
5708          }
5709          break;
5710      default:
5711          g_assert_not_reached();
5712      }
5713  
5714      tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5715  }
5716  
5717  static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5718                                    bool load_sign,
5719                                    const TCGLdstHelperParam *parm)
5720  {
5721      MemOp mop = get_memop(ldst->oi);
5722      TCGMovExtend mov[2];
5723      int ofs_slot0;
5724  
5725      switch (ldst->type) {
5726      case TCG_TYPE_I64:
5727          if (TCG_TARGET_REG_BITS == 32) {
5728              break;
5729          }
5730          /* fall through */
5731  
5732      case TCG_TYPE_I32:
5733          mov[0].dst = ldst->datalo_reg;
5734          mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5735          mov[0].dst_type = ldst->type;
5736          mov[0].src_type = TCG_TYPE_REG;
5737  
5738          /*
5739           * If load_sign, then we allowed the helper to perform the
5740           * appropriate sign extension to tcg_target_ulong, and all
5741           * we need now is a plain move.
5742           *
5743           * If they do not, then we expect the relevant extension
5744           * instruction to be no more expensive than a move, and
5745           * we thus save the icache etc by only using one of two
5746           * helper functions.
5747           */
5748          if (load_sign || !(mop & MO_SIGN)) {
5749              if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5750                  mov[0].src_ext = MO_32;
5751              } else {
5752                  mov[0].src_ext = MO_64;
5753              }
5754          } else {
5755              mov[0].src_ext = mop & MO_SSIZE;
5756          }
5757          tcg_out_movext1(s, mov);
5758          return;
5759  
5760      case TCG_TYPE_I128:
5761          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5762          ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5763          switch (TCG_TARGET_CALL_RET_I128) {
5764          case TCG_CALL_RET_NORMAL:
5765              break;
5766          case TCG_CALL_RET_BY_VEC:
5767              tcg_out_st(s, TCG_TYPE_V128,
5768                         tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5769                         TCG_REG_CALL_STACK, ofs_slot0);
5770              /* fall through */
5771          case TCG_CALL_RET_BY_REF:
5772              tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5773                         TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5774              tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5775                         TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5776              return;
5777          default:
5778              g_assert_not_reached();
5779          }
5780          break;
5781  
5782      default:
5783          g_assert_not_reached();
5784      }
5785  
5786      mov[0].dst = ldst->datalo_reg;
5787      mov[0].src =
5788          tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5789      mov[0].dst_type = TCG_TYPE_REG;
5790      mov[0].src_type = TCG_TYPE_REG;
5791      mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5792  
5793      mov[1].dst = ldst->datahi_reg;
5794      mov[1].src =
5795          tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5796      mov[1].dst_type = TCG_TYPE_REG;
5797      mov[1].src_type = TCG_TYPE_REG;
5798      mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5799  
5800      tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5801  }
5802  
5803  static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5804                                     const TCGLdstHelperParam *parm)
5805  {
5806      const TCGHelperInfo *info;
5807      const TCGCallArgumentLoc *loc;
5808      TCGMovExtend mov[4];
5809      TCGType data_type;
5810      unsigned next_arg, nmov, n;
5811      MemOp mop = get_memop(ldst->oi);
5812  
5813      switch (mop & MO_SIZE) {
5814      case MO_8:
5815      case MO_16:
5816      case MO_32:
5817          info = &info_helper_st32_mmu;
5818          data_type = TCG_TYPE_I32;
5819          break;
5820      case MO_64:
5821          info = &info_helper_st64_mmu;
5822          data_type = TCG_TYPE_I64;
5823          break;
5824      case MO_128:
5825          info = &info_helper_st128_mmu;
5826          data_type = TCG_TYPE_I128;
5827          break;
5828      default:
5829          g_assert_not_reached();
5830      }
5831  
5832      /* Defer env argument. */
5833      next_arg = 1;
5834      nmov = 0;
5835  
5836      /* Handle addr argument. */
5837      loc = &info->in[next_arg];
5838      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5839          /*
5840           * 32-bit host with 32-bit guest: zero-extend the guest address
5841           * to 64-bits for the helper by storing the low part.  Later,
5842           * after we have processed the register inputs, we will load a
5843           * zero for the high part.
5844           */
5845          tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5846                                 TCG_TYPE_I32, TCG_TYPE_I32,
5847                                 ldst->addrlo_reg, -1);
5848          next_arg += 2;
5849          nmov += 1;
5850      } else {
5851          n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5852                                     ldst->addrlo_reg, ldst->addrhi_reg);
5853          next_arg += n;
5854          nmov += n;
5855      }
5856  
5857      /* Handle data argument. */
5858      loc = &info->in[next_arg];
5859      switch (loc->kind) {
5860      case TCG_CALL_ARG_NORMAL:
5861      case TCG_CALL_ARG_EXTEND_U:
5862      case TCG_CALL_ARG_EXTEND_S:
5863          n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5864                                     ldst->datalo_reg, ldst->datahi_reg);
5865          next_arg += n;
5866          nmov += n;
5867          tcg_out_helper_load_slots(s, nmov, mov, parm);
5868          break;
5869  
5870      case TCG_CALL_ARG_BY_REF:
5871          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5872          tcg_debug_assert(data_type == TCG_TYPE_I128);
5873          tcg_out_st(s, TCG_TYPE_I64,
5874                     HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5875                     TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5876          tcg_out_st(s, TCG_TYPE_I64,
5877                     HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5878                     TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5879  
5880          tcg_out_helper_load_slots(s, nmov, mov, parm);
5881  
5882          if (arg_slot_reg_p(loc->arg_slot)) {
5883              tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5884                               TCG_REG_CALL_STACK,
5885                               arg_slot_stk_ofs(loc->ref_slot));
5886          } else {
5887              tcg_debug_assert(parm->ntmp != 0);
5888              tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5889                               arg_slot_stk_ofs(loc->ref_slot));
5890              tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5891                         TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5892          }
5893          next_arg += 2;
5894          break;
5895  
5896      default:
5897          g_assert_not_reached();
5898      }
5899  
5900      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5901          /* Zero extend the address by loading a zero for the high part. */
5902          loc = &info->in[1 + !HOST_BIG_ENDIAN];
5903          tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5904      }
5905  
5906      tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5907  }
5908  
5909  #ifdef CONFIG_PROFILER
5910  
5911  /* avoid copy/paste errors */
5912  #define PROF_ADD(to, from, field)                       \
5913      do {                                                \
5914          (to)->field += qatomic_read(&((from)->field));  \
5915      } while (0)
5916  
5917  #define PROF_MAX(to, from, field)                                       \
5918      do {                                                                \
5919          typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
5920          if (val__ > (to)->field) {                                      \
5921              (to)->field = val__;                                        \
5922          }                                                               \
5923      } while (0)
5924  
5925  /* Pass in a zero'ed @prof */
5926  static inline
5927  void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5928  {
5929      unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5930      unsigned int i;
5931  
5932      for (i = 0; i < n_ctxs; i++) {
5933          TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5934          const TCGProfile *orig = &s->prof;
5935  
5936          if (counters) {
5937              PROF_ADD(prof, orig, cpu_exec_time);
5938              PROF_ADD(prof, orig, tb_count1);
5939              PROF_ADD(prof, orig, tb_count);
5940              PROF_ADD(prof, orig, op_count);
5941              PROF_MAX(prof, orig, op_count_max);
5942              PROF_ADD(prof, orig, temp_count);
5943              PROF_MAX(prof, orig, temp_count_max);
5944              PROF_ADD(prof, orig, del_op_count);
5945              PROF_ADD(prof, orig, code_in_len);
5946              PROF_ADD(prof, orig, code_out_len);
5947              PROF_ADD(prof, orig, search_out_len);
5948              PROF_ADD(prof, orig, interm_time);
5949              PROF_ADD(prof, orig, code_time);
5950              PROF_ADD(prof, orig, la_time);
5951              PROF_ADD(prof, orig, opt_time);
5952              PROF_ADD(prof, orig, restore_count);
5953              PROF_ADD(prof, orig, restore_time);
5954          }
5955          if (table) {
5956              int i;
5957  
5958              for (i = 0; i < NB_OPS; i++) {
5959                  PROF_ADD(prof, orig, table_op_count[i]);
5960              }
5961          }
5962      }
5963  }
5964  
5965  #undef PROF_ADD
5966  #undef PROF_MAX
5967  
5968  static void tcg_profile_snapshot_counters(TCGProfile *prof)
5969  {
5970      tcg_profile_snapshot(prof, true, false);
5971  }
5972  
5973  static void tcg_profile_snapshot_table(TCGProfile *prof)
5974  {
5975      tcg_profile_snapshot(prof, false, true);
5976  }
5977  
5978  void tcg_dump_op_count(GString *buf)
5979  {
5980      TCGProfile prof = {};
5981      int i;
5982  
5983      tcg_profile_snapshot_table(&prof);
5984      for (i = 0; i < NB_OPS; i++) {
5985          g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5986                                 prof.table_op_count[i]);
5987      }
5988  }
5989  
5990  int64_t tcg_cpu_exec_time(void)
5991  {
5992      unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5993      unsigned int i;
5994      int64_t ret = 0;
5995  
5996      for (i = 0; i < n_ctxs; i++) {
5997          const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5998          const TCGProfile *prof = &s->prof;
5999  
6000          ret += qatomic_read(&prof->cpu_exec_time);
6001      }
6002      return ret;
6003  }
6004  #else
6005  void tcg_dump_op_count(GString *buf)
6006  {
6007      g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6008  }
6009  
6010  int64_t tcg_cpu_exec_time(void)
6011  {
6012      error_report("%s: TCG profiler not compiled", __func__);
6013      exit(EXIT_FAILURE);
6014  }
6015  #endif
6016  
6017  
6018  int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6019  {
6020  #ifdef CONFIG_PROFILER
6021      TCGProfile *prof = &s->prof;
6022  #endif
6023      int i, start_words, num_insns;
6024      TCGOp *op;
6025  
6026  #ifdef CONFIG_PROFILER
6027      {
6028          int n = 0;
6029  
6030          QTAILQ_FOREACH(op, &s->ops, link) {
6031              n++;
6032          }
6033          qatomic_set(&prof->op_count, prof->op_count + n);
6034          if (n > prof->op_count_max) {
6035              qatomic_set(&prof->op_count_max, n);
6036          }
6037  
6038          n = s->nb_temps;
6039          qatomic_set(&prof->temp_count, prof->temp_count + n);
6040          if (n > prof->temp_count_max) {
6041              qatomic_set(&prof->temp_count_max, n);
6042          }
6043      }
6044  #endif
6045  
6046      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6047                   && qemu_log_in_addr_range(pc_start))) {
6048          FILE *logfile = qemu_log_trylock();
6049          if (logfile) {
6050              fprintf(logfile, "OP:\n");
6051              tcg_dump_ops(s, logfile, false);
6052              fprintf(logfile, "\n");
6053              qemu_log_unlock(logfile);
6054          }
6055      }
6056  
6057  #ifdef CONFIG_DEBUG_TCG
6058      /* Ensure all labels referenced have been emitted.  */
6059      {
6060          TCGLabel *l;
6061          bool error = false;
6062  
6063          QSIMPLEQ_FOREACH(l, &s->labels, next) {
6064              if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6065                  qemu_log_mask(CPU_LOG_TB_OP,
6066                                "$L%d referenced but not present.\n", l->id);
6067                  error = true;
6068              }
6069          }
6070          assert(!error);
6071      }
6072  #endif
6073  
6074  #ifdef CONFIG_PROFILER
6075      qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
6076  #endif
6077  
6078      tcg_optimize(s);
6079  
6080  #ifdef CONFIG_PROFILER
6081      qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
6082      qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
6083  #endif
6084  
6085      reachable_code_pass(s);
6086      liveness_pass_0(s);
6087      liveness_pass_1(s);
6088  
6089      if (s->nb_indirects > 0) {
6090          if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6091                       && qemu_log_in_addr_range(pc_start))) {
6092              FILE *logfile = qemu_log_trylock();
6093              if (logfile) {
6094                  fprintf(logfile, "OP before indirect lowering:\n");
6095                  tcg_dump_ops(s, logfile, false);
6096                  fprintf(logfile, "\n");
6097                  qemu_log_unlock(logfile);
6098              }
6099          }
6100  
6101          /* Replace indirect temps with direct temps.  */
6102          if (liveness_pass_2(s)) {
6103              /* If changes were made, re-run liveness.  */
6104              liveness_pass_1(s);
6105          }
6106      }
6107  
6108  #ifdef CONFIG_PROFILER
6109      qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
6110  #endif
6111  
6112      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6113                   && qemu_log_in_addr_range(pc_start))) {
6114          FILE *logfile = qemu_log_trylock();
6115          if (logfile) {
6116              fprintf(logfile, "OP after optimization and liveness analysis:\n");
6117              tcg_dump_ops(s, logfile, true);
6118              fprintf(logfile, "\n");
6119              qemu_log_unlock(logfile);
6120          }
6121      }
6122  
6123      /* Initialize goto_tb jump offsets. */
6124      tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6125      tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6126      tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6127      tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6128  
6129      tcg_reg_alloc_start(s);
6130  
6131      /*
6132       * Reset the buffer pointers when restarting after overflow.
6133       * TODO: Move this into translate-all.c with the rest of the
6134       * buffer management.  Having only this done here is confusing.
6135       */
6136      s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6137      s->code_ptr = s->code_buf;
6138  
6139  #ifdef TCG_TARGET_NEED_LDST_LABELS
6140      QSIMPLEQ_INIT(&s->ldst_labels);
6141  #endif
6142  #ifdef TCG_TARGET_NEED_POOL_LABELS
6143      s->pool_labels = NULL;
6144  #endif
6145  
6146      start_words = s->insn_start_words;
6147      s->gen_insn_data =
6148          tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6149  
6150      num_insns = -1;
6151      QTAILQ_FOREACH(op, &s->ops, link) {
6152          TCGOpcode opc = op->opc;
6153  
6154  #ifdef CONFIG_PROFILER
6155          qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
6156  #endif
6157  
6158          switch (opc) {
6159          case INDEX_op_mov_i32:
6160          case INDEX_op_mov_i64:
6161          case INDEX_op_mov_vec:
6162              tcg_reg_alloc_mov(s, op);
6163              break;
6164          case INDEX_op_dup_vec:
6165              tcg_reg_alloc_dup(s, op);
6166              break;
6167          case INDEX_op_insn_start:
6168              if (num_insns >= 0) {
6169                  size_t off = tcg_current_code_size(s);
6170                  s->gen_insn_end_off[num_insns] = off;
6171                  /* Assert that we do not overflow our stored offset.  */
6172                  assert(s->gen_insn_end_off[num_insns] == off);
6173              }
6174              num_insns++;
6175              for (i = 0; i < start_words; ++i) {
6176                  s->gen_insn_data[num_insns * start_words + i] =
6177                      tcg_get_insn_start_param(op, i);
6178              }
6179              break;
6180          case INDEX_op_discard:
6181              temp_dead(s, arg_temp(op->args[0]));
6182              break;
6183          case INDEX_op_set_label:
6184              tcg_reg_alloc_bb_end(s, s->reserved_regs);
6185              tcg_out_label(s, arg_label(op->args[0]));
6186              break;
6187          case INDEX_op_call:
6188              tcg_reg_alloc_call(s, op);
6189              break;
6190          case INDEX_op_exit_tb:
6191              tcg_out_exit_tb(s, op->args[0]);
6192              break;
6193          case INDEX_op_goto_tb:
6194              tcg_out_goto_tb(s, op->args[0]);
6195              break;
6196          case INDEX_op_dup2_vec:
6197              if (tcg_reg_alloc_dup2(s, op)) {
6198                  break;
6199              }
6200              /* fall through */
6201          default:
6202              /* Sanity check that we've not introduced any unhandled opcodes. */
6203              tcg_debug_assert(tcg_op_supported(opc));
6204              /* Note: in order to speed up the code, it would be much
6205                 faster to have specialized register allocator functions for
6206                 some common argument patterns */
6207              tcg_reg_alloc_op(s, op);
6208              break;
6209          }
6210          /* Test for (pending) buffer overflow.  The assumption is that any
6211             one operation beginning below the high water mark cannot overrun
6212             the buffer completely.  Thus we can test for overflow after
6213             generating code without having to check during generation.  */
6214          if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6215              return -1;
6216          }
6217          /* Test for TB overflow, as seen by gen_insn_end_off.  */
6218          if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6219              return -2;
6220          }
6221      }
6222      tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6223      s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6224  
6225      /* Generate TB finalization at the end of block */
6226  #ifdef TCG_TARGET_NEED_LDST_LABELS
6227      i = tcg_out_ldst_finalize(s);
6228      if (i < 0) {
6229          return i;
6230      }
6231  #endif
6232  #ifdef TCG_TARGET_NEED_POOL_LABELS
6233      i = tcg_out_pool_finalize(s);
6234      if (i < 0) {
6235          return i;
6236      }
6237  #endif
6238      if (!tcg_resolve_relocs(s)) {
6239          return -2;
6240      }
6241  
6242  #ifndef CONFIG_TCG_INTERPRETER
6243      /* flush instruction cache */
6244      flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6245                          (uintptr_t)s->code_buf,
6246                          tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6247  #endif
6248  
6249      return tcg_current_code_size(s);
6250  }
6251  
6252  #ifdef CONFIG_PROFILER
6253  void tcg_dump_info(GString *buf)
6254  {
6255      TCGProfile prof = {};
6256      const TCGProfile *s;
6257      int64_t tb_count;
6258      int64_t tb_div_count;
6259      int64_t tot;
6260  
6261      tcg_profile_snapshot_counters(&prof);
6262      s = &prof;
6263      tb_count = s->tb_count;
6264      tb_div_count = tb_count ? tb_count : 1;
6265      tot = s->interm_time + s->code_time;
6266  
6267      g_string_append_printf(buf, "JIT cycles          %" PRId64
6268                             " (%0.3f s at 2.4 GHz)\n",
6269                             tot, tot / 2.4e9);
6270      g_string_append_printf(buf, "translated TBs      %" PRId64
6271                             " (aborted=%" PRId64 " %0.1f%%)\n",
6272                             tb_count, s->tb_count1 - tb_count,
6273                             (double)(s->tb_count1 - s->tb_count)
6274                             / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
6275      g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
6276                             (double)s->op_count / tb_div_count, s->op_count_max);
6277      g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
6278                             (double)s->del_op_count / tb_div_count);
6279      g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
6280                             (double)s->temp_count / tb_div_count,
6281                             s->temp_count_max);
6282      g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
6283                             (double)s->code_out_len / tb_div_count);
6284      g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
6285                             (double)s->search_out_len / tb_div_count);
6286  
6287      g_string_append_printf(buf, "cycles/op           %0.1f\n",
6288                             s->op_count ? (double)tot / s->op_count : 0);
6289      g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
6290                             s->code_in_len ? (double)tot / s->code_in_len : 0);
6291      g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
6292                             s->code_out_len ? (double)tot / s->code_out_len : 0);
6293      g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
6294                             s->search_out_len ?
6295                             (double)tot / s->search_out_len : 0);
6296      if (tot == 0) {
6297          tot = 1;
6298      }
6299      g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
6300                             (double)s->interm_time / tot * 100.0);
6301      g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
6302                             (double)s->code_time / tot * 100.0);
6303      g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
6304                             (double)s->opt_time / (s->code_time ?
6305                                                    s->code_time : 1)
6306                             * 100.0);
6307      g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
6308                             (double)s->la_time / (s->code_time ?
6309                                                   s->code_time : 1) * 100.0);
6310      g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
6311                             s->restore_count);
6312      g_string_append_printf(buf, "  avg cycles        %0.1f\n",
6313                             s->restore_count ?
6314                             (double)s->restore_time / s->restore_count : 0);
6315  }
6316  #else
6317  void tcg_dump_info(GString *buf)
6318  {
6319      g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6320  }
6321  #endif
6322  
6323  #ifdef ELF_HOST_MACHINE
6324  /* In order to use this feature, the backend needs to do three things:
6325  
6326     (1) Define ELF_HOST_MACHINE to indicate both what value to
6327         put into the ELF image and to indicate support for the feature.
6328  
6329     (2) Define tcg_register_jit.  This should create a buffer containing
6330         the contents of a .debug_frame section that describes the post-
6331         prologue unwind info for the tcg machine.
6332  
6333     (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6334  */
6335  
6336  /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6337  typedef enum {
6338      JIT_NOACTION = 0,
6339      JIT_REGISTER_FN,
6340      JIT_UNREGISTER_FN
6341  } jit_actions_t;
6342  
6343  struct jit_code_entry {
6344      struct jit_code_entry *next_entry;
6345      struct jit_code_entry *prev_entry;
6346      const void *symfile_addr;
6347      uint64_t symfile_size;
6348  };
6349  
6350  struct jit_descriptor {
6351      uint32_t version;
6352      uint32_t action_flag;
6353      struct jit_code_entry *relevant_entry;
6354      struct jit_code_entry *first_entry;
6355  };
6356  
6357  void __jit_debug_register_code(void) __attribute__((noinline));
6358  void __jit_debug_register_code(void)
6359  {
6360      asm("");
6361  }
6362  
6363  /* Must statically initialize the version, because GDB may check
6364     the version before we can set it.  */
6365  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6366  
6367  /* End GDB interface.  */
6368  
6369  static int find_string(const char *strtab, const char *str)
6370  {
6371      const char *p = strtab + 1;
6372  
6373      while (1) {
6374          if (strcmp(p, str) == 0) {
6375              return p - strtab;
6376          }
6377          p += strlen(p) + 1;
6378      }
6379  }
6380  
6381  static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6382                                   const void *debug_frame,
6383                                   size_t debug_frame_size)
6384  {
6385      struct __attribute__((packed)) DebugInfo {
6386          uint32_t  len;
6387          uint16_t  version;
6388          uint32_t  abbrev;
6389          uint8_t   ptr_size;
6390          uint8_t   cu_die;
6391          uint16_t  cu_lang;
6392          uintptr_t cu_low_pc;
6393          uintptr_t cu_high_pc;
6394          uint8_t   fn_die;
6395          char      fn_name[16];
6396          uintptr_t fn_low_pc;
6397          uintptr_t fn_high_pc;
6398          uint8_t   cu_eoc;
6399      };
6400  
6401      struct ElfImage {
6402          ElfW(Ehdr) ehdr;
6403          ElfW(Phdr) phdr;
6404          ElfW(Shdr) shdr[7];
6405          ElfW(Sym)  sym[2];
6406          struct DebugInfo di;
6407          uint8_t    da[24];
6408          char       str[80];
6409      };
6410  
6411      struct ElfImage *img;
6412  
6413      static const struct ElfImage img_template = {
6414          .ehdr = {
6415              .e_ident[EI_MAG0] = ELFMAG0,
6416              .e_ident[EI_MAG1] = ELFMAG1,
6417              .e_ident[EI_MAG2] = ELFMAG2,
6418              .e_ident[EI_MAG3] = ELFMAG3,
6419              .e_ident[EI_CLASS] = ELF_CLASS,
6420              .e_ident[EI_DATA] = ELF_DATA,
6421              .e_ident[EI_VERSION] = EV_CURRENT,
6422              .e_type = ET_EXEC,
6423              .e_machine = ELF_HOST_MACHINE,
6424              .e_version = EV_CURRENT,
6425              .e_phoff = offsetof(struct ElfImage, phdr),
6426              .e_shoff = offsetof(struct ElfImage, shdr),
6427              .e_ehsize = sizeof(ElfW(Shdr)),
6428              .e_phentsize = sizeof(ElfW(Phdr)),
6429              .e_phnum = 1,
6430              .e_shentsize = sizeof(ElfW(Shdr)),
6431              .e_shnum = ARRAY_SIZE(img->shdr),
6432              .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6433  #ifdef ELF_HOST_FLAGS
6434              .e_flags = ELF_HOST_FLAGS,
6435  #endif
6436  #ifdef ELF_OSABI
6437              .e_ident[EI_OSABI] = ELF_OSABI,
6438  #endif
6439          },
6440          .phdr = {
6441              .p_type = PT_LOAD,
6442              .p_flags = PF_X,
6443          },
6444          .shdr = {
6445              [0] = { .sh_type = SHT_NULL },
6446              /* Trick: The contents of code_gen_buffer are not present in
6447                 this fake ELF file; that got allocated elsewhere.  Therefore
6448                 we mark .text as SHT_NOBITS (similar to .bss) so that readers
6449                 will not look for contents.  We can record any address.  */
6450              [1] = { /* .text */
6451                  .sh_type = SHT_NOBITS,
6452                  .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6453              },
6454              [2] = { /* .debug_info */
6455                  .sh_type = SHT_PROGBITS,
6456                  .sh_offset = offsetof(struct ElfImage, di),
6457                  .sh_size = sizeof(struct DebugInfo),
6458              },
6459              [3] = { /* .debug_abbrev */
6460                  .sh_type = SHT_PROGBITS,
6461                  .sh_offset = offsetof(struct ElfImage, da),
6462                  .sh_size = sizeof(img->da),
6463              },
6464              [4] = { /* .debug_frame */
6465                  .sh_type = SHT_PROGBITS,
6466                  .sh_offset = sizeof(struct ElfImage),
6467              },
6468              [5] = { /* .symtab */
6469                  .sh_type = SHT_SYMTAB,
6470                  .sh_offset = offsetof(struct ElfImage, sym),
6471                  .sh_size = sizeof(img->sym),
6472                  .sh_info = 1,
6473                  .sh_link = ARRAY_SIZE(img->shdr) - 1,
6474                  .sh_entsize = sizeof(ElfW(Sym)),
6475              },
6476              [6] = { /* .strtab */
6477                  .sh_type = SHT_STRTAB,
6478                  .sh_offset = offsetof(struct ElfImage, str),
6479                  .sh_size = sizeof(img->str),
6480              }
6481          },
6482          .sym = {
6483              [1] = { /* code_gen_buffer */
6484                  .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6485                  .st_shndx = 1,
6486              }
6487          },
6488          .di = {
6489              .len = sizeof(struct DebugInfo) - 4,
6490              .version = 2,
6491              .ptr_size = sizeof(void *),
6492              .cu_die = 1,
6493              .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6494              .fn_die = 2,
6495              .fn_name = "code_gen_buffer"
6496          },
6497          .da = {
6498              1,          /* abbrev number (the cu) */
6499              0x11, 1,    /* DW_TAG_compile_unit, has children */
6500              0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6501              0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6502              0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6503              0, 0,       /* end of abbrev */
6504              2,          /* abbrev number (the fn) */
6505              0x2e, 0,    /* DW_TAG_subprogram, no children */
6506              0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6507              0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6508              0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6509              0, 0,       /* end of abbrev */
6510              0           /* no more abbrev */
6511          },
6512          .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6513                 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6514      };
6515  
6516      /* We only need a single jit entry; statically allocate it.  */
6517      static struct jit_code_entry one_entry;
6518  
6519      uintptr_t buf = (uintptr_t)buf_ptr;
6520      size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6521      DebugFrameHeader *dfh;
6522  
6523      img = g_malloc(img_size);
6524      *img = img_template;
6525  
6526      img->phdr.p_vaddr = buf;
6527      img->phdr.p_paddr = buf;
6528      img->phdr.p_memsz = buf_size;
6529  
6530      img->shdr[1].sh_name = find_string(img->str, ".text");
6531      img->shdr[1].sh_addr = buf;
6532      img->shdr[1].sh_size = buf_size;
6533  
6534      img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6535      img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6536  
6537      img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6538      img->shdr[4].sh_size = debug_frame_size;
6539  
6540      img->shdr[5].sh_name = find_string(img->str, ".symtab");
6541      img->shdr[6].sh_name = find_string(img->str, ".strtab");
6542  
6543      img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6544      img->sym[1].st_value = buf;
6545      img->sym[1].st_size = buf_size;
6546  
6547      img->di.cu_low_pc = buf;
6548      img->di.cu_high_pc = buf + buf_size;
6549      img->di.fn_low_pc = buf;
6550      img->di.fn_high_pc = buf + buf_size;
6551  
6552      dfh = (DebugFrameHeader *)(img + 1);
6553      memcpy(dfh, debug_frame, debug_frame_size);
6554      dfh->fde.func_start = buf;
6555      dfh->fde.func_len = buf_size;
6556  
6557  #ifdef DEBUG_JIT
6558      /* Enable this block to be able to debug the ELF image file creation.
6559         One can use readelf, objdump, or other inspection utilities.  */
6560      {
6561          g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6562          FILE *f = fopen(jit, "w+b");
6563          if (f) {
6564              if (fwrite(img, img_size, 1, f) != img_size) {
6565                  /* Avoid stupid unused return value warning for fwrite.  */
6566              }
6567              fclose(f);
6568          }
6569      }
6570  #endif
6571  
6572      one_entry.symfile_addr = img;
6573      one_entry.symfile_size = img_size;
6574  
6575      __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6576      __jit_debug_descriptor.relevant_entry = &one_entry;
6577      __jit_debug_descriptor.first_entry = &one_entry;
6578      __jit_debug_register_code();
6579  }
6580  #else
6581  /* No support for the feature.  Provide the entry point expected by exec.c,
6582     and implement the internal function we declared earlier.  */
6583  
6584  static void tcg_register_jit_int(const void *buf, size_t size,
6585                                   const void *debug_frame,
6586                                   size_t debug_frame_size)
6587  {
6588  }
6589  
6590  void tcg_register_jit(const void *buf, size_t buf_size)
6591  {
6592  }
6593  #endif /* ELF_HOST_MACHINE */
6594  
6595  #if !TCG_TARGET_MAYBE_vec
6596  void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6597  {
6598      g_assert_not_reached();
6599  }
6600  #endif
6601