xref: /openbmc/qemu/tcg/optimize.c (revision 4f8f199fa569492bb07efee02489f521629d275d)
1  /*
2   * Optimizations for Tiny Code Generator for QEMU
3   *
4   * Copyright (c) 2010 Samsung Electronics.
5   * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6   *
7   * Permission is hereby granted, free of charge, to any person obtaining a copy
8   * of this software and associated documentation files (the "Software"), to deal
9   * in the Software without restriction, including without limitation the rights
10   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11   * copies of the Software, and to permit persons to whom the Software is
12   * furnished to do so, subject to the following conditions:
13   *
14   * The above copyright notice and this permission notice shall be included in
15   * all copies or substantial portions of the Software.
16   *
17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23   * THE SOFTWARE.
24   */
25  
26  #include "qemu/osdep.h"
27  #include "qemu/int128.h"
28  #include "qemu/interval-tree.h"
29  #include "tcg/tcg-op-common.h"
30  #include "tcg-internal.h"
31  
32  #define CASE_OP_32_64(x)                        \
33          glue(glue(case INDEX_op_, x), _i32):    \
34          glue(glue(case INDEX_op_, x), _i64)
35  
36  #define CASE_OP_32_64_VEC(x)                    \
37          glue(glue(case INDEX_op_, x), _i32):    \
38          glue(glue(case INDEX_op_, x), _i64):    \
39          glue(glue(case INDEX_op_, x), _vec)
40  
41  typedef struct MemCopyInfo {
42      IntervalTreeNode itree;
43      QSIMPLEQ_ENTRY (MemCopyInfo) next;
44      TCGTemp *ts;
45      TCGType type;
46  } MemCopyInfo;
47  
48  typedef struct TempOptInfo {
49      bool is_const;
50      TCGTemp *prev_copy;
51      TCGTemp *next_copy;
52      QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
53      uint64_t val;
54      uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
55      uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
56  } TempOptInfo;
57  
58  typedef struct OptContext {
59      TCGContext *tcg;
60      TCGOp *prev_mb;
61      TCGTempSet temps_used;
62  
63      IntervalTreeRoot mem_copy;
64      QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
65  
66      /* In flight values from optimization. */
67      uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
68      uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
69      uint64_t s_mask;  /* mask of clrsb(value) bits */
70      TCGType type;
71  } OptContext;
72  
73  /* Calculate the smask for a specific value. */
74  static uint64_t smask_from_value(uint64_t value)
75  {
76      int rep = clrsb64(value);
77      return ~(~0ull >> rep);
78  }
79  
80  /*
81   * Calculate the smask for a given set of known-zeros.
82   * If there are lots of zeros on the left, we can consider the remainder
83   * an unsigned field, and thus the corresponding signed field is one bit
84   * larger.
85   */
86  static uint64_t smask_from_zmask(uint64_t zmask)
87  {
88      /*
89       * Only the 0 bits are significant for zmask, thus the msb itself
90       * must be zero, else we have no sign information.
91       */
92      int rep = clz64(zmask);
93      if (rep == 0) {
94          return 0;
95      }
96      rep -= 1;
97      return ~(~0ull >> rep);
98  }
99  
100  /*
101   * Recreate a properly left-aligned smask after manipulation.
102   * Some bit-shuffling, particularly shifts and rotates, may
103   * retain sign bits on the left, but may scatter disconnected
104   * sign bits on the right.  Retain only what remains to the left.
105   */
106  static uint64_t smask_from_smask(int64_t smask)
107  {
108      /* Only the 1 bits are significant for smask */
109      return smask_from_zmask(~smask);
110  }
111  
112  static inline TempOptInfo *ts_info(TCGTemp *ts)
113  {
114      return ts->state_ptr;
115  }
116  
117  static inline TempOptInfo *arg_info(TCGArg arg)
118  {
119      return ts_info(arg_temp(arg));
120  }
121  
122  static inline bool ts_is_const(TCGTemp *ts)
123  {
124      return ts_info(ts)->is_const;
125  }
126  
127  static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
128  {
129      TempOptInfo *ti = ts_info(ts);
130      return ti->is_const && ti->val == val;
131  }
132  
133  static inline bool arg_is_const(TCGArg arg)
134  {
135      return ts_is_const(arg_temp(arg));
136  }
137  
138  static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
139  {
140      return ts_is_const_val(arg_temp(arg), val);
141  }
142  
143  static inline bool ts_is_copy(TCGTemp *ts)
144  {
145      return ts_info(ts)->next_copy != ts;
146  }
147  
148  static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
149  {
150      return a->kind < b->kind ? b : a;
151  }
152  
153  /* Initialize and activate a temporary.  */
154  static void init_ts_info(OptContext *ctx, TCGTemp *ts)
155  {
156      size_t idx = temp_idx(ts);
157      TempOptInfo *ti;
158  
159      if (test_bit(idx, ctx->temps_used.l)) {
160          return;
161      }
162      set_bit(idx, ctx->temps_used.l);
163  
164      ti = ts->state_ptr;
165      if (ti == NULL) {
166          ti = tcg_malloc(sizeof(TempOptInfo));
167          ts->state_ptr = ti;
168      }
169  
170      ti->next_copy = ts;
171      ti->prev_copy = ts;
172      QSIMPLEQ_INIT(&ti->mem_copy);
173      if (ts->kind == TEMP_CONST) {
174          ti->is_const = true;
175          ti->val = ts->val;
176          ti->z_mask = ts->val;
177          ti->s_mask = smask_from_value(ts->val);
178      } else {
179          ti->is_const = false;
180          ti->z_mask = -1;
181          ti->s_mask = 0;
182      }
183  }
184  
185  static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
186  {
187      IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
188      return r ? container_of(r, MemCopyInfo, itree) : NULL;
189  }
190  
191  static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
192  {
193      IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
194      return r ? container_of(r, MemCopyInfo, itree) : NULL;
195  }
196  
197  static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
198  {
199      TCGTemp *ts = mc->ts;
200      TempOptInfo *ti = ts_info(ts);
201  
202      interval_tree_remove(&mc->itree, &ctx->mem_copy);
203      QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
204      QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
205  }
206  
207  static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
208  {
209      while (true) {
210          MemCopyInfo *mc = mem_copy_first(ctx, s, l);
211          if (!mc) {
212              break;
213          }
214          remove_mem_copy(ctx, mc);
215      }
216  }
217  
218  static void remove_mem_copy_all(OptContext *ctx)
219  {
220      remove_mem_copy_in(ctx, 0, -1);
221      tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
222  }
223  
224  static TCGTemp *find_better_copy(TCGTemp *ts)
225  {
226      TCGTemp *i, *ret;
227  
228      /* If this is already readonly, we can't do better. */
229      if (temp_readonly(ts)) {
230          return ts;
231      }
232  
233      ret = ts;
234      for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
235          ret = cmp_better_copy(ret, i);
236      }
237      return ret;
238  }
239  
240  static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
241  {
242      TempOptInfo *si = ts_info(src_ts);
243      TempOptInfo *di = ts_info(dst_ts);
244      MemCopyInfo *mc;
245  
246      QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
247          tcg_debug_assert(mc->ts == src_ts);
248          mc->ts = dst_ts;
249      }
250      QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
251  }
252  
253  /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
254  static void reset_ts(OptContext *ctx, TCGTemp *ts)
255  {
256      TempOptInfo *ti = ts_info(ts);
257      TCGTemp *pts = ti->prev_copy;
258      TCGTemp *nts = ti->next_copy;
259      TempOptInfo *pi = ts_info(pts);
260      TempOptInfo *ni = ts_info(nts);
261  
262      ni->prev_copy = ti->prev_copy;
263      pi->next_copy = ti->next_copy;
264      ti->next_copy = ts;
265      ti->prev_copy = ts;
266      ti->is_const = false;
267      ti->z_mask = -1;
268      ti->s_mask = 0;
269  
270      if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
271          if (ts == nts) {
272              /* Last temp copy being removed, the mem copies die. */
273              MemCopyInfo *mc;
274              QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
275                  interval_tree_remove(&mc->itree, &ctx->mem_copy);
276              }
277              QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
278          } else {
279              move_mem_copies(find_better_copy(nts), ts);
280          }
281      }
282  }
283  
284  static void reset_temp(OptContext *ctx, TCGArg arg)
285  {
286      reset_ts(ctx, arg_temp(arg));
287  }
288  
289  static void record_mem_copy(OptContext *ctx, TCGType type,
290                              TCGTemp *ts, intptr_t start, intptr_t last)
291  {
292      MemCopyInfo *mc;
293      TempOptInfo *ti;
294  
295      mc = QSIMPLEQ_FIRST(&ctx->mem_free);
296      if (mc) {
297          QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
298      } else {
299          mc = tcg_malloc(sizeof(*mc));
300      }
301  
302      memset(mc, 0, sizeof(*mc));
303      mc->itree.start = start;
304      mc->itree.last = last;
305      mc->type = type;
306      interval_tree_insert(&mc->itree, &ctx->mem_copy);
307  
308      ts = find_better_copy(ts);
309      ti = ts_info(ts);
310      mc->ts = ts;
311      QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
312  }
313  
314  static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
315  {
316      TCGTemp *i;
317  
318      if (ts1 == ts2) {
319          return true;
320      }
321  
322      if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
323          return false;
324      }
325  
326      for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
327          if (i == ts2) {
328              return true;
329          }
330      }
331  
332      return false;
333  }
334  
335  static bool args_are_copies(TCGArg arg1, TCGArg arg2)
336  {
337      return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
338  }
339  
340  static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
341  {
342      MemCopyInfo *mc;
343  
344      for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
345          if (mc->itree.start == s && mc->type == type) {
346              return find_better_copy(mc->ts);
347          }
348      }
349      return NULL;
350  }
351  
352  static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
353  {
354      TCGType type = ctx->type;
355      TCGTemp *ts;
356  
357      if (type == TCG_TYPE_I32) {
358          val = (int32_t)val;
359      }
360  
361      ts = tcg_constant_internal(type, val);
362      init_ts_info(ctx, ts);
363  
364      return temp_arg(ts);
365  }
366  
367  static TCGArg arg_new_temp(OptContext *ctx)
368  {
369      TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
370      init_ts_info(ctx, ts);
371      return temp_arg(ts);
372  }
373  
374  static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
375  {
376      TCGTemp *dst_ts = arg_temp(dst);
377      TCGTemp *src_ts = arg_temp(src);
378      TempOptInfo *di;
379      TempOptInfo *si;
380      TCGOpcode new_op;
381  
382      if (ts_are_copies(dst_ts, src_ts)) {
383          tcg_op_remove(ctx->tcg, op);
384          return true;
385      }
386  
387      reset_ts(ctx, dst_ts);
388      di = ts_info(dst_ts);
389      si = ts_info(src_ts);
390  
391      switch (ctx->type) {
392      case TCG_TYPE_I32:
393          new_op = INDEX_op_mov_i32;
394          break;
395      case TCG_TYPE_I64:
396          new_op = INDEX_op_mov_i64;
397          break;
398      case TCG_TYPE_V64:
399      case TCG_TYPE_V128:
400      case TCG_TYPE_V256:
401          /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
402          new_op = INDEX_op_mov_vec;
403          break;
404      default:
405          g_assert_not_reached();
406      }
407      op->opc = new_op;
408      op->args[0] = dst;
409      op->args[1] = src;
410  
411      di->z_mask = si->z_mask;
412      di->s_mask = si->s_mask;
413  
414      if (src_ts->type == dst_ts->type) {
415          TempOptInfo *ni = ts_info(si->next_copy);
416  
417          di->next_copy = si->next_copy;
418          di->prev_copy = src_ts;
419          ni->prev_copy = dst_ts;
420          si->next_copy = dst_ts;
421          di->is_const = si->is_const;
422          di->val = si->val;
423  
424          if (!QSIMPLEQ_EMPTY(&si->mem_copy)
425              && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
426              move_mem_copies(dst_ts, src_ts);
427          }
428      }
429      return true;
430  }
431  
432  static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
433                               TCGArg dst, uint64_t val)
434  {
435      /* Convert movi to mov with constant temp. */
436      return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
437  }
438  
439  static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
440  {
441      uint64_t l64, h64;
442  
443      switch (op) {
444      CASE_OP_32_64(add):
445          return x + y;
446  
447      CASE_OP_32_64(sub):
448          return x - y;
449  
450      CASE_OP_32_64(mul):
451          return x * y;
452  
453      CASE_OP_32_64_VEC(and):
454          return x & y;
455  
456      CASE_OP_32_64_VEC(or):
457          return x | y;
458  
459      CASE_OP_32_64_VEC(xor):
460          return x ^ y;
461  
462      case INDEX_op_shl_i32:
463          return (uint32_t)x << (y & 31);
464  
465      case INDEX_op_shl_i64:
466          return (uint64_t)x << (y & 63);
467  
468      case INDEX_op_shr_i32:
469          return (uint32_t)x >> (y & 31);
470  
471      case INDEX_op_shr_i64:
472          return (uint64_t)x >> (y & 63);
473  
474      case INDEX_op_sar_i32:
475          return (int32_t)x >> (y & 31);
476  
477      case INDEX_op_sar_i64:
478          return (int64_t)x >> (y & 63);
479  
480      case INDEX_op_rotr_i32:
481          return ror32(x, y & 31);
482  
483      case INDEX_op_rotr_i64:
484          return ror64(x, y & 63);
485  
486      case INDEX_op_rotl_i32:
487          return rol32(x, y & 31);
488  
489      case INDEX_op_rotl_i64:
490          return rol64(x, y & 63);
491  
492      CASE_OP_32_64_VEC(not):
493          return ~x;
494  
495      CASE_OP_32_64(neg):
496          return -x;
497  
498      CASE_OP_32_64_VEC(andc):
499          return x & ~y;
500  
501      CASE_OP_32_64_VEC(orc):
502          return x | ~y;
503  
504      CASE_OP_32_64_VEC(eqv):
505          return ~(x ^ y);
506  
507      CASE_OP_32_64_VEC(nand):
508          return ~(x & y);
509  
510      CASE_OP_32_64_VEC(nor):
511          return ~(x | y);
512  
513      case INDEX_op_clz_i32:
514          return (uint32_t)x ? clz32(x) : y;
515  
516      case INDEX_op_clz_i64:
517          return x ? clz64(x) : y;
518  
519      case INDEX_op_ctz_i32:
520          return (uint32_t)x ? ctz32(x) : y;
521  
522      case INDEX_op_ctz_i64:
523          return x ? ctz64(x) : y;
524  
525      case INDEX_op_ctpop_i32:
526          return ctpop32(x);
527  
528      case INDEX_op_ctpop_i64:
529          return ctpop64(x);
530  
531      CASE_OP_32_64(ext8s):
532          return (int8_t)x;
533  
534      CASE_OP_32_64(ext16s):
535          return (int16_t)x;
536  
537      CASE_OP_32_64(ext8u):
538          return (uint8_t)x;
539  
540      CASE_OP_32_64(ext16u):
541          return (uint16_t)x;
542  
543      CASE_OP_32_64(bswap16):
544          x = bswap16(x);
545          return y & TCG_BSWAP_OS ? (int16_t)x : x;
546  
547      CASE_OP_32_64(bswap32):
548          x = bswap32(x);
549          return y & TCG_BSWAP_OS ? (int32_t)x : x;
550  
551      case INDEX_op_bswap64_i64:
552          return bswap64(x);
553  
554      case INDEX_op_ext_i32_i64:
555      case INDEX_op_ext32s_i64:
556          return (int32_t)x;
557  
558      case INDEX_op_extu_i32_i64:
559      case INDEX_op_extrl_i64_i32:
560      case INDEX_op_ext32u_i64:
561          return (uint32_t)x;
562  
563      case INDEX_op_extrh_i64_i32:
564          return (uint64_t)x >> 32;
565  
566      case INDEX_op_muluh_i32:
567          return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
568      case INDEX_op_mulsh_i32:
569          return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
570  
571      case INDEX_op_muluh_i64:
572          mulu64(&l64, &h64, x, y);
573          return h64;
574      case INDEX_op_mulsh_i64:
575          muls64(&l64, &h64, x, y);
576          return h64;
577  
578      case INDEX_op_div_i32:
579          /* Avoid crashing on divide by zero, otherwise undefined.  */
580          return (int32_t)x / ((int32_t)y ? : 1);
581      case INDEX_op_divu_i32:
582          return (uint32_t)x / ((uint32_t)y ? : 1);
583      case INDEX_op_div_i64:
584          return (int64_t)x / ((int64_t)y ? : 1);
585      case INDEX_op_divu_i64:
586          return (uint64_t)x / ((uint64_t)y ? : 1);
587  
588      case INDEX_op_rem_i32:
589          return (int32_t)x % ((int32_t)y ? : 1);
590      case INDEX_op_remu_i32:
591          return (uint32_t)x % ((uint32_t)y ? : 1);
592      case INDEX_op_rem_i64:
593          return (int64_t)x % ((int64_t)y ? : 1);
594      case INDEX_op_remu_i64:
595          return (uint64_t)x % ((uint64_t)y ? : 1);
596  
597      default:
598          g_assert_not_reached();
599      }
600  }
601  
602  static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
603                                      uint64_t x, uint64_t y)
604  {
605      uint64_t res = do_constant_folding_2(op, x, y);
606      if (type == TCG_TYPE_I32) {
607          res = (int32_t)res;
608      }
609      return res;
610  }
611  
612  static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
613  {
614      switch (c) {
615      case TCG_COND_EQ:
616          return x == y;
617      case TCG_COND_NE:
618          return x != y;
619      case TCG_COND_LT:
620          return (int32_t)x < (int32_t)y;
621      case TCG_COND_GE:
622          return (int32_t)x >= (int32_t)y;
623      case TCG_COND_LE:
624          return (int32_t)x <= (int32_t)y;
625      case TCG_COND_GT:
626          return (int32_t)x > (int32_t)y;
627      case TCG_COND_LTU:
628          return x < y;
629      case TCG_COND_GEU:
630          return x >= y;
631      case TCG_COND_LEU:
632          return x <= y;
633      case TCG_COND_GTU:
634          return x > y;
635      case TCG_COND_TSTEQ:
636          return (x & y) == 0;
637      case TCG_COND_TSTNE:
638          return (x & y) != 0;
639      case TCG_COND_ALWAYS:
640      case TCG_COND_NEVER:
641          break;
642      }
643      g_assert_not_reached();
644  }
645  
646  static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
647  {
648      switch (c) {
649      case TCG_COND_EQ:
650          return x == y;
651      case TCG_COND_NE:
652          return x != y;
653      case TCG_COND_LT:
654          return (int64_t)x < (int64_t)y;
655      case TCG_COND_GE:
656          return (int64_t)x >= (int64_t)y;
657      case TCG_COND_LE:
658          return (int64_t)x <= (int64_t)y;
659      case TCG_COND_GT:
660          return (int64_t)x > (int64_t)y;
661      case TCG_COND_LTU:
662          return x < y;
663      case TCG_COND_GEU:
664          return x >= y;
665      case TCG_COND_LEU:
666          return x <= y;
667      case TCG_COND_GTU:
668          return x > y;
669      case TCG_COND_TSTEQ:
670          return (x & y) == 0;
671      case TCG_COND_TSTNE:
672          return (x & y) != 0;
673      case TCG_COND_ALWAYS:
674      case TCG_COND_NEVER:
675          break;
676      }
677      g_assert_not_reached();
678  }
679  
680  static int do_constant_folding_cond_eq(TCGCond c)
681  {
682      switch (c) {
683      case TCG_COND_GT:
684      case TCG_COND_LTU:
685      case TCG_COND_LT:
686      case TCG_COND_GTU:
687      case TCG_COND_NE:
688          return 0;
689      case TCG_COND_GE:
690      case TCG_COND_GEU:
691      case TCG_COND_LE:
692      case TCG_COND_LEU:
693      case TCG_COND_EQ:
694          return 1;
695      case TCG_COND_TSTEQ:
696      case TCG_COND_TSTNE:
697          return -1;
698      case TCG_COND_ALWAYS:
699      case TCG_COND_NEVER:
700          break;
701      }
702      g_assert_not_reached();
703  }
704  
705  /*
706   * Return -1 if the condition can't be simplified,
707   * and the result of the condition (0 or 1) if it can.
708   */
709  static int do_constant_folding_cond(TCGType type, TCGArg x,
710                                      TCGArg y, TCGCond c)
711  {
712      if (arg_is_const(x) && arg_is_const(y)) {
713          uint64_t xv = arg_info(x)->val;
714          uint64_t yv = arg_info(y)->val;
715  
716          switch (type) {
717          case TCG_TYPE_I32:
718              return do_constant_folding_cond_32(xv, yv, c);
719          case TCG_TYPE_I64:
720              return do_constant_folding_cond_64(xv, yv, c);
721          default:
722              /* Only scalar comparisons are optimizable */
723              return -1;
724          }
725      } else if (args_are_copies(x, y)) {
726          return do_constant_folding_cond_eq(c);
727      } else if (arg_is_const_val(y, 0)) {
728          switch (c) {
729          case TCG_COND_LTU:
730          case TCG_COND_TSTNE:
731              return 0;
732          case TCG_COND_GEU:
733          case TCG_COND_TSTEQ:
734              return 1;
735          default:
736              return -1;
737          }
738      }
739      return -1;
740  }
741  
742  /**
743   * swap_commutative:
744   * @dest: TCGArg of the destination argument, or NO_DEST.
745   * @p1: first paired argument
746   * @p2: second paired argument
747   *
748   * If *@p1 is a constant and *@p2 is not, swap.
749   * If *@p2 matches @dest, swap.
750   * Return true if a swap was performed.
751   */
752  
753  #define NO_DEST  temp_arg(NULL)
754  
755  static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
756  {
757      TCGArg a1 = *p1, a2 = *p2;
758      int sum = 0;
759      sum += arg_is_const(a1);
760      sum -= arg_is_const(a2);
761  
762      /* Prefer the constant in second argument, and then the form
763         op a, a, b, which is better handled on non-RISC hosts. */
764      if (sum > 0 || (sum == 0 && dest == a2)) {
765          *p1 = a2;
766          *p2 = a1;
767          return true;
768      }
769      return false;
770  }
771  
772  static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
773  {
774      int sum = 0;
775      sum += arg_is_const(p1[0]);
776      sum += arg_is_const(p1[1]);
777      sum -= arg_is_const(p2[0]);
778      sum -= arg_is_const(p2[1]);
779      if (sum > 0) {
780          TCGArg t;
781          t = p1[0], p1[0] = p2[0], p2[0] = t;
782          t = p1[1], p1[1] = p2[1], p2[1] = t;
783          return true;
784      }
785      return false;
786  }
787  
788  /*
789   * Return -1 if the condition can't be simplified,
790   * and the result of the condition (0 or 1) if it can.
791   */
792  static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
793                                       TCGArg *p1, TCGArg *p2, TCGArg *pcond)
794  {
795      TCGCond cond;
796      bool swap;
797      int r;
798  
799      swap = swap_commutative(dest, p1, p2);
800      cond = *pcond;
801      if (swap) {
802          *pcond = cond = tcg_swap_cond(cond);
803      }
804  
805      r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
806      if (r >= 0) {
807          return r;
808      }
809      if (!is_tst_cond(cond)) {
810          return -1;
811      }
812  
813      /*
814       * TSTNE x,x -> NE x,0
815       * TSTNE x,-1 -> NE x,0
816       */
817      if (args_are_copies(*p1, *p2) || arg_is_const_val(*p2, -1)) {
818          *p2 = arg_new_constant(ctx, 0);
819          *pcond = tcg_tst_eqne_cond(cond);
820          return -1;
821      }
822  
823      /* TSTNE x,sign -> LT x,0 */
824      if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
825                                 ? INT32_MIN : INT64_MIN))) {
826          *p2 = arg_new_constant(ctx, 0);
827          *pcond = tcg_tst_ltge_cond(cond);
828          return -1;
829      }
830  
831      /* Expand to AND with a temporary if no backend support. */
832      if (!TCG_TARGET_HAS_tst) {
833          TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32
834                               ? INDEX_op_and_i32 : INDEX_op_and_i64);
835          TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3);
836          TCGArg tmp = arg_new_temp(ctx);
837  
838          op2->args[0] = tmp;
839          op2->args[1] = *p1;
840          op2->args[2] = *p2;
841  
842          *p1 = tmp;
843          *p2 = arg_new_constant(ctx, 0);
844          *pcond = tcg_tst_eqne_cond(cond);
845      }
846      return -1;
847  }
848  
849  static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
850  {
851      TCGArg al, ah, bl, bh;
852      TCGCond c;
853      bool swap;
854      int r;
855  
856      swap = swap_commutative2(args, args + 2);
857      c = args[4];
858      if (swap) {
859          args[4] = c = tcg_swap_cond(c);
860      }
861  
862      al = args[0];
863      ah = args[1];
864      bl = args[2];
865      bh = args[3];
866  
867      if (arg_is_const(bl) && arg_is_const(bh)) {
868          tcg_target_ulong blv = arg_info(bl)->val;
869          tcg_target_ulong bhv = arg_info(bh)->val;
870          uint64_t b = deposit64(blv, 32, 32, bhv);
871  
872          if (arg_is_const(al) && arg_is_const(ah)) {
873              tcg_target_ulong alv = arg_info(al)->val;
874              tcg_target_ulong ahv = arg_info(ah)->val;
875              uint64_t a = deposit64(alv, 32, 32, ahv);
876  
877              r = do_constant_folding_cond_64(a, b, c);
878              if (r >= 0) {
879                  return r;
880              }
881          }
882  
883          if (b == 0) {
884              switch (c) {
885              case TCG_COND_LTU:
886              case TCG_COND_TSTNE:
887                  return 0;
888              case TCG_COND_GEU:
889              case TCG_COND_TSTEQ:
890                  return 1;
891              default:
892                  break;
893              }
894          }
895  
896          /* TSTNE x,-1 -> NE x,0 */
897          if (b == -1 && is_tst_cond(c)) {
898              args[3] = args[2] = arg_new_constant(ctx, 0);
899              args[4] = tcg_tst_eqne_cond(c);
900              return -1;
901          }
902  
903          /* TSTNE x,sign -> LT x,0 */
904          if (b == INT64_MIN && is_tst_cond(c)) {
905              /* bl must be 0, so copy that to bh */
906              args[3] = bl;
907              args[4] = tcg_tst_ltge_cond(c);
908              return -1;
909          }
910      }
911  
912      if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
913          r = do_constant_folding_cond_eq(c);
914          if (r >= 0) {
915              return r;
916          }
917  
918          /* TSTNE x,x -> NE x,0 */
919          if (is_tst_cond(c)) {
920              args[3] = args[2] = arg_new_constant(ctx, 0);
921              args[4] = tcg_tst_eqne_cond(c);
922              return -1;
923          }
924      }
925  
926      /* Expand to AND with a temporary if no backend support. */
927      if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
928          TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3);
929          TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3);
930          TCGArg t1 = arg_new_temp(ctx);
931          TCGArg t2 = arg_new_temp(ctx);
932  
933          op1->args[0] = t1;
934          op1->args[1] = al;
935          op1->args[2] = bl;
936          op2->args[0] = t2;
937          op2->args[1] = ah;
938          op2->args[2] = bh;
939  
940          args[0] = t1;
941          args[1] = t2;
942          args[3] = args[2] = arg_new_constant(ctx, 0);
943          args[4] = tcg_tst_eqne_cond(c);
944      }
945      return -1;
946  }
947  
948  static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
949  {
950      for (int i = 0; i < nb_args; i++) {
951          TCGTemp *ts = arg_temp(op->args[i]);
952          init_ts_info(ctx, ts);
953      }
954  }
955  
956  static void copy_propagate(OptContext *ctx, TCGOp *op,
957                             int nb_oargs, int nb_iargs)
958  {
959      for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
960          TCGTemp *ts = arg_temp(op->args[i]);
961          if (ts_is_copy(ts)) {
962              op->args[i] = temp_arg(find_better_copy(ts));
963          }
964      }
965  }
966  
967  static void finish_folding(OptContext *ctx, TCGOp *op)
968  {
969      const TCGOpDef *def = &tcg_op_defs[op->opc];
970      int i, nb_oargs;
971  
972      /*
973       * We only optimize extended basic blocks.  If the opcode ends a BB
974       * and is not a conditional branch, reset all temp data.
975       */
976      if (def->flags & TCG_OPF_BB_END) {
977          ctx->prev_mb = NULL;
978          if (!(def->flags & TCG_OPF_COND_BRANCH)) {
979              memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
980              remove_mem_copy_all(ctx);
981          }
982          return;
983      }
984  
985      nb_oargs = def->nb_oargs;
986      for (i = 0; i < nb_oargs; i++) {
987          TCGTemp *ts = arg_temp(op->args[i]);
988          reset_ts(ctx, ts);
989          /*
990           * Save the corresponding known-zero/sign bits mask for the
991           * first output argument (only one supported so far).
992           */
993          if (i == 0) {
994              ts_info(ts)->z_mask = ctx->z_mask;
995              ts_info(ts)->s_mask = ctx->s_mask;
996          }
997      }
998  }
999  
1000  /*
1001   * The fold_* functions return true when processing is complete,
1002   * usually by folding the operation to a constant or to a copy,
1003   * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
1004   * like collect information about the value produced, for use in
1005   * optimizing a subsequent operation.
1006   *
1007   * These first fold_* functions are all helpers, used by other
1008   * folders for more specific operations.
1009   */
1010  
1011  static bool fold_const1(OptContext *ctx, TCGOp *op)
1012  {
1013      if (arg_is_const(op->args[1])) {
1014          uint64_t t;
1015  
1016          t = arg_info(op->args[1])->val;
1017          t = do_constant_folding(op->opc, ctx->type, t, 0);
1018          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1019      }
1020      return false;
1021  }
1022  
1023  static bool fold_const2(OptContext *ctx, TCGOp *op)
1024  {
1025      if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1026          uint64_t t1 = arg_info(op->args[1])->val;
1027          uint64_t t2 = arg_info(op->args[2])->val;
1028  
1029          t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1030          return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1031      }
1032      return false;
1033  }
1034  
1035  static bool fold_commutative(OptContext *ctx, TCGOp *op)
1036  {
1037      swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038      return false;
1039  }
1040  
1041  static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1042  {
1043      swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1044      return fold_const2(ctx, op);
1045  }
1046  
1047  static bool fold_masks(OptContext *ctx, TCGOp *op)
1048  {
1049      uint64_t a_mask = ctx->a_mask;
1050      uint64_t z_mask = ctx->z_mask;
1051      uint64_t s_mask = ctx->s_mask;
1052  
1053      /*
1054       * 32-bit ops generate 32-bit results, which for the purpose of
1055       * simplifying tcg are sign-extended.  Certainly that's how we
1056       * represent our constants elsewhere.  Note that the bits will
1057       * be reset properly for a 64-bit value when encountering the
1058       * type changing opcodes.
1059       */
1060      if (ctx->type == TCG_TYPE_I32) {
1061          a_mask = (int32_t)a_mask;
1062          z_mask = (int32_t)z_mask;
1063          s_mask |= MAKE_64BIT_MASK(32, 32);
1064          ctx->z_mask = z_mask;
1065          ctx->s_mask = s_mask;
1066      }
1067  
1068      if (z_mask == 0) {
1069          return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
1070      }
1071      if (a_mask == 0) {
1072          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1073      }
1074      return false;
1075  }
1076  
1077  /*
1078   * Convert @op to NOT, if NOT is supported by the host.
1079   * Return true f the conversion is successful, which will still
1080   * indicate that the processing is complete.
1081   */
1082  static bool fold_not(OptContext *ctx, TCGOp *op);
1083  static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1084  {
1085      TCGOpcode not_op;
1086      bool have_not;
1087  
1088      switch (ctx->type) {
1089      case TCG_TYPE_I32:
1090          not_op = INDEX_op_not_i32;
1091          have_not = TCG_TARGET_HAS_not_i32;
1092          break;
1093      case TCG_TYPE_I64:
1094          not_op = INDEX_op_not_i64;
1095          have_not = TCG_TARGET_HAS_not_i64;
1096          break;
1097      case TCG_TYPE_V64:
1098      case TCG_TYPE_V128:
1099      case TCG_TYPE_V256:
1100          not_op = INDEX_op_not_vec;
1101          have_not = TCG_TARGET_HAS_not_vec;
1102          break;
1103      default:
1104          g_assert_not_reached();
1105      }
1106      if (have_not) {
1107          op->opc = not_op;
1108          op->args[1] = op->args[idx];
1109          return fold_not(ctx, op);
1110      }
1111      return false;
1112  }
1113  
1114  /* If the binary operation has first argument @i, fold to @i. */
1115  static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1116  {
1117      if (arg_is_const_val(op->args[1], i)) {
1118          return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1119      }
1120      return false;
1121  }
1122  
1123  /* If the binary operation has first argument @i, fold to NOT. */
1124  static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1125  {
1126      if (arg_is_const_val(op->args[1], i)) {
1127          return fold_to_not(ctx, op, 2);
1128      }
1129      return false;
1130  }
1131  
1132  /* If the binary operation has second argument @i, fold to @i. */
1133  static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1134  {
1135      if (arg_is_const_val(op->args[2], i)) {
1136          return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1137      }
1138      return false;
1139  }
1140  
1141  /* If the binary operation has second argument @i, fold to identity. */
1142  static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1143  {
1144      if (arg_is_const_val(op->args[2], i)) {
1145          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1146      }
1147      return false;
1148  }
1149  
1150  /* If the binary operation has second argument @i, fold to NOT. */
1151  static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1152  {
1153      if (arg_is_const_val(op->args[2], i)) {
1154          return fold_to_not(ctx, op, 1);
1155      }
1156      return false;
1157  }
1158  
1159  /* If the binary operation has both arguments equal, fold to @i. */
1160  static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1161  {
1162      if (args_are_copies(op->args[1], op->args[2])) {
1163          return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1164      }
1165      return false;
1166  }
1167  
1168  /* If the binary operation has both arguments equal, fold to identity. */
1169  static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1170  {
1171      if (args_are_copies(op->args[1], op->args[2])) {
1172          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1173      }
1174      return false;
1175  }
1176  
1177  /*
1178   * These outermost fold_<op> functions are sorted alphabetically.
1179   *
1180   * The ordering of the transformations should be:
1181   *   1) those that produce a constant
1182   *   2) those that produce a copy
1183   *   3) those that produce information about the result value.
1184   */
1185  
1186  static bool fold_add(OptContext *ctx, TCGOp *op)
1187  {
1188      if (fold_const2_commutative(ctx, op) ||
1189          fold_xi_to_x(ctx, op, 0)) {
1190          return true;
1191      }
1192      return false;
1193  }
1194  
1195  /* We cannot as yet do_constant_folding with vectors. */
1196  static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1197  {
1198      if (fold_commutative(ctx, op) ||
1199          fold_xi_to_x(ctx, op, 0)) {
1200          return true;
1201      }
1202      return false;
1203  }
1204  
1205  static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
1206  {
1207      bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
1208      bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
1209  
1210      if (a_const && b_const) {
1211          uint64_t al = arg_info(op->args[2])->val;
1212          uint64_t ah = arg_info(op->args[3])->val;
1213          uint64_t bl = arg_info(op->args[4])->val;
1214          uint64_t bh = arg_info(op->args[5])->val;
1215          TCGArg rl, rh;
1216          TCGOp *op2;
1217  
1218          if (ctx->type == TCG_TYPE_I32) {
1219              uint64_t a = deposit64(al, 32, 32, ah);
1220              uint64_t b = deposit64(bl, 32, 32, bh);
1221  
1222              if (add) {
1223                  a += b;
1224              } else {
1225                  a -= b;
1226              }
1227  
1228              al = sextract64(a, 0, 32);
1229              ah = sextract64(a, 32, 32);
1230          } else {
1231              Int128 a = int128_make128(al, ah);
1232              Int128 b = int128_make128(bl, bh);
1233  
1234              if (add) {
1235                  a = int128_add(a, b);
1236              } else {
1237                  a = int128_sub(a, b);
1238              }
1239  
1240              al = int128_getlo(a);
1241              ah = int128_gethi(a);
1242          }
1243  
1244          rl = op->args[0];
1245          rh = op->args[1];
1246  
1247          /* The proper opcode is supplied by tcg_opt_gen_mov. */
1248          op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1249  
1250          tcg_opt_gen_movi(ctx, op, rl, al);
1251          tcg_opt_gen_movi(ctx, op2, rh, ah);
1252          return true;
1253      }
1254  
1255      /* Fold sub2 r,x,i to add2 r,x,-i */
1256      if (!add && b_const) {
1257          uint64_t bl = arg_info(op->args[4])->val;
1258          uint64_t bh = arg_info(op->args[5])->val;
1259  
1260          /* Negate the two parts without assembling and disassembling. */
1261          bl = -bl;
1262          bh = ~bh + !bl;
1263  
1264          op->opc = (ctx->type == TCG_TYPE_I32
1265                     ? INDEX_op_add2_i32 : INDEX_op_add2_i64);
1266          op->args[4] = arg_new_constant(ctx, bl);
1267          op->args[5] = arg_new_constant(ctx, bh);
1268      }
1269      return false;
1270  }
1271  
1272  static bool fold_add2(OptContext *ctx, TCGOp *op)
1273  {
1274      /* Note that the high and low parts may be independently swapped. */
1275      swap_commutative(op->args[0], &op->args[2], &op->args[4]);
1276      swap_commutative(op->args[1], &op->args[3], &op->args[5]);
1277  
1278      return fold_addsub2(ctx, op, true);
1279  }
1280  
1281  static bool fold_and(OptContext *ctx, TCGOp *op)
1282  {
1283      uint64_t z1, z2;
1284  
1285      if (fold_const2_commutative(ctx, op) ||
1286          fold_xi_to_i(ctx, op, 0) ||
1287          fold_xi_to_x(ctx, op, -1) ||
1288          fold_xx_to_x(ctx, op)) {
1289          return true;
1290      }
1291  
1292      z1 = arg_info(op->args[1])->z_mask;
1293      z2 = arg_info(op->args[2])->z_mask;
1294      ctx->z_mask = z1 & z2;
1295  
1296      /*
1297       * Sign repetitions are perforce all identical, whether they are 1 or 0.
1298       * Bitwise operations preserve the relative quantity of the repetitions.
1299       */
1300      ctx->s_mask = arg_info(op->args[1])->s_mask
1301                  & arg_info(op->args[2])->s_mask;
1302  
1303      /*
1304       * Known-zeros does not imply known-ones.  Therefore unless
1305       * arg2 is constant, we can't infer affected bits from it.
1306       */
1307      if (arg_is_const(op->args[2])) {
1308          ctx->a_mask = z1 & ~z2;
1309      }
1310  
1311      return fold_masks(ctx, op);
1312  }
1313  
1314  static bool fold_andc(OptContext *ctx, TCGOp *op)
1315  {
1316      uint64_t z1;
1317  
1318      if (fold_const2(ctx, op) ||
1319          fold_xx_to_i(ctx, op, 0) ||
1320          fold_xi_to_x(ctx, op, 0) ||
1321          fold_ix_to_not(ctx, op, -1)) {
1322          return true;
1323      }
1324  
1325      z1 = arg_info(op->args[1])->z_mask;
1326  
1327      /*
1328       * Known-zeros does not imply known-ones.  Therefore unless
1329       * arg2 is constant, we can't infer anything from it.
1330       */
1331      if (arg_is_const(op->args[2])) {
1332          uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1333          ctx->a_mask = z1 & ~z2;
1334          z1 &= z2;
1335      }
1336      ctx->z_mask = z1;
1337  
1338      ctx->s_mask = arg_info(op->args[1])->s_mask
1339                  & arg_info(op->args[2])->s_mask;
1340      return fold_masks(ctx, op);
1341  }
1342  
1343  static bool fold_brcond(OptContext *ctx, TCGOp *op)
1344  {
1345      int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1346                                        &op->args[1], &op->args[2]);
1347      if (i == 0) {
1348          tcg_op_remove(ctx->tcg, op);
1349          return true;
1350      }
1351      if (i > 0) {
1352          op->opc = INDEX_op_br;
1353          op->args[0] = op->args[3];
1354      }
1355      return false;
1356  }
1357  
1358  static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1359  {
1360      TCGCond cond;
1361      TCGArg label;
1362      int i, inv = 0;
1363  
1364      i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1365      cond = op->args[4];
1366      label = op->args[5];
1367      if (i >= 0) {
1368          goto do_brcond_const;
1369      }
1370  
1371      switch (cond) {
1372      case TCG_COND_LT:
1373      case TCG_COND_GE:
1374          /*
1375           * Simplify LT/GE comparisons vs zero to a single compare
1376           * vs the high word of the input.
1377           */
1378          if (arg_is_const_val(op->args[2], 0) &&
1379              arg_is_const_val(op->args[3], 0)) {
1380              goto do_brcond_high;
1381          }
1382          break;
1383  
1384      case TCG_COND_NE:
1385          inv = 1;
1386          QEMU_FALLTHROUGH;
1387      case TCG_COND_EQ:
1388          /*
1389           * Simplify EQ/NE comparisons where one of the pairs
1390           * can be simplified.
1391           */
1392          i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1393                                       op->args[2], cond);
1394          switch (i ^ inv) {
1395          case 0:
1396              goto do_brcond_const;
1397          case 1:
1398              goto do_brcond_high;
1399          }
1400  
1401          i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1402                                       op->args[3], cond);
1403          switch (i ^ inv) {
1404          case 0:
1405              goto do_brcond_const;
1406          case 1:
1407              goto do_brcond_low;
1408          }
1409          break;
1410  
1411      case TCG_COND_TSTEQ:
1412      case TCG_COND_TSTNE:
1413          if (arg_is_const_val(op->args[2], 0)) {
1414              goto do_brcond_high;
1415          }
1416          if (arg_is_const_val(op->args[3], 0)) {
1417              goto do_brcond_low;
1418          }
1419          break;
1420  
1421      default:
1422          break;
1423  
1424      do_brcond_low:
1425          op->opc = INDEX_op_brcond_i32;
1426          op->args[1] = op->args[2];
1427          op->args[2] = cond;
1428          op->args[3] = label;
1429          return fold_brcond(ctx, op);
1430  
1431      do_brcond_high:
1432          op->opc = INDEX_op_brcond_i32;
1433          op->args[0] = op->args[1];
1434          op->args[1] = op->args[3];
1435          op->args[2] = cond;
1436          op->args[3] = label;
1437          return fold_brcond(ctx, op);
1438  
1439      do_brcond_const:
1440          if (i == 0) {
1441              tcg_op_remove(ctx->tcg, op);
1442              return true;
1443          }
1444          op->opc = INDEX_op_br;
1445          op->args[0] = label;
1446          break;
1447      }
1448      return false;
1449  }
1450  
1451  static bool fold_bswap(OptContext *ctx, TCGOp *op)
1452  {
1453      uint64_t z_mask, s_mask, sign;
1454  
1455      if (arg_is_const(op->args[1])) {
1456          uint64_t t = arg_info(op->args[1])->val;
1457  
1458          t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1459          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1460      }
1461  
1462      z_mask = arg_info(op->args[1])->z_mask;
1463  
1464      switch (op->opc) {
1465      case INDEX_op_bswap16_i32:
1466      case INDEX_op_bswap16_i64:
1467          z_mask = bswap16(z_mask);
1468          sign = INT16_MIN;
1469          break;
1470      case INDEX_op_bswap32_i32:
1471      case INDEX_op_bswap32_i64:
1472          z_mask = bswap32(z_mask);
1473          sign = INT32_MIN;
1474          break;
1475      case INDEX_op_bswap64_i64:
1476          z_mask = bswap64(z_mask);
1477          sign = INT64_MIN;
1478          break;
1479      default:
1480          g_assert_not_reached();
1481      }
1482      s_mask = smask_from_zmask(z_mask);
1483  
1484      switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1485      case TCG_BSWAP_OZ:
1486          break;
1487      case TCG_BSWAP_OS:
1488          /* If the sign bit may be 1, force all the bits above to 1. */
1489          if (z_mask & sign) {
1490              z_mask |= sign;
1491              s_mask = sign << 1;
1492          }
1493          break;
1494      default:
1495          /* The high bits are undefined: force all bits above the sign to 1. */
1496          z_mask |= sign << 1;
1497          s_mask = 0;
1498          break;
1499      }
1500      ctx->z_mask = z_mask;
1501      ctx->s_mask = s_mask;
1502  
1503      return fold_masks(ctx, op);
1504  }
1505  
1506  static bool fold_call(OptContext *ctx, TCGOp *op)
1507  {
1508      TCGContext *s = ctx->tcg;
1509      int nb_oargs = TCGOP_CALLO(op);
1510      int nb_iargs = TCGOP_CALLI(op);
1511      int flags, i;
1512  
1513      init_arguments(ctx, op, nb_oargs + nb_iargs);
1514      copy_propagate(ctx, op, nb_oargs, nb_iargs);
1515  
1516      /* If the function reads or writes globals, reset temp data. */
1517      flags = tcg_call_flags(op);
1518      if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1519          int nb_globals = s->nb_globals;
1520  
1521          for (i = 0; i < nb_globals; i++) {
1522              if (test_bit(i, ctx->temps_used.l)) {
1523                  reset_ts(ctx, &ctx->tcg->temps[i]);
1524              }
1525          }
1526      }
1527  
1528      /* If the function has side effects, reset mem data. */
1529      if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1530          remove_mem_copy_all(ctx);
1531      }
1532  
1533      /* Reset temp data for outputs. */
1534      for (i = 0; i < nb_oargs; i++) {
1535          reset_temp(ctx, op->args[i]);
1536      }
1537  
1538      /* Stop optimizing MB across calls. */
1539      ctx->prev_mb = NULL;
1540      return true;
1541  }
1542  
1543  static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1544  {
1545      uint64_t z_mask;
1546  
1547      if (arg_is_const(op->args[1])) {
1548          uint64_t t = arg_info(op->args[1])->val;
1549  
1550          if (t != 0) {
1551              t = do_constant_folding(op->opc, ctx->type, t, 0);
1552              return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1553          }
1554          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1555      }
1556  
1557      switch (ctx->type) {
1558      case TCG_TYPE_I32:
1559          z_mask = 31;
1560          break;
1561      case TCG_TYPE_I64:
1562          z_mask = 63;
1563          break;
1564      default:
1565          g_assert_not_reached();
1566      }
1567      ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1568      ctx->s_mask = smask_from_zmask(ctx->z_mask);
1569      return false;
1570  }
1571  
1572  static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1573  {
1574      if (fold_const1(ctx, op)) {
1575          return true;
1576      }
1577  
1578      switch (ctx->type) {
1579      case TCG_TYPE_I32:
1580          ctx->z_mask = 32 | 31;
1581          break;
1582      case TCG_TYPE_I64:
1583          ctx->z_mask = 64 | 63;
1584          break;
1585      default:
1586          g_assert_not_reached();
1587      }
1588      ctx->s_mask = smask_from_zmask(ctx->z_mask);
1589      return false;
1590  }
1591  
1592  static bool fold_deposit(OptContext *ctx, TCGOp *op)
1593  {
1594      TCGOpcode and_opc;
1595  
1596      if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1597          uint64_t t1 = arg_info(op->args[1])->val;
1598          uint64_t t2 = arg_info(op->args[2])->val;
1599  
1600          t1 = deposit64(t1, op->args[3], op->args[4], t2);
1601          return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1602      }
1603  
1604      switch (ctx->type) {
1605      case TCG_TYPE_I32:
1606          and_opc = INDEX_op_and_i32;
1607          break;
1608      case TCG_TYPE_I64:
1609          and_opc = INDEX_op_and_i64;
1610          break;
1611      default:
1612          g_assert_not_reached();
1613      }
1614  
1615      /* Inserting a value into zero at offset 0. */
1616      if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
1617          uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
1618  
1619          op->opc = and_opc;
1620          op->args[1] = op->args[2];
1621          op->args[2] = arg_new_constant(ctx, mask);
1622          ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1623          return false;
1624      }
1625  
1626      /* Inserting zero into a value. */
1627      if (arg_is_const_val(op->args[2], 0)) {
1628          uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
1629  
1630          op->opc = and_opc;
1631          op->args[2] = arg_new_constant(ctx, mask);
1632          ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1633          return false;
1634      }
1635  
1636      ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1637                              op->args[3], op->args[4],
1638                              arg_info(op->args[2])->z_mask);
1639      return false;
1640  }
1641  
1642  static bool fold_divide(OptContext *ctx, TCGOp *op)
1643  {
1644      if (fold_const2(ctx, op) ||
1645          fold_xi_to_x(ctx, op, 1)) {
1646          return true;
1647      }
1648      return false;
1649  }
1650  
1651  static bool fold_dup(OptContext *ctx, TCGOp *op)
1652  {
1653      if (arg_is_const(op->args[1])) {
1654          uint64_t t = arg_info(op->args[1])->val;
1655          t = dup_const(TCGOP_VECE(op), t);
1656          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1657      }
1658      return false;
1659  }
1660  
1661  static bool fold_dup2(OptContext *ctx, TCGOp *op)
1662  {
1663      if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1664          uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1665                                 arg_info(op->args[2])->val);
1666          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1667      }
1668  
1669      if (args_are_copies(op->args[1], op->args[2])) {
1670          op->opc = INDEX_op_dup_vec;
1671          TCGOP_VECE(op) = MO_32;
1672      }
1673      return false;
1674  }
1675  
1676  static bool fold_eqv(OptContext *ctx, TCGOp *op)
1677  {
1678      if (fold_const2_commutative(ctx, op) ||
1679          fold_xi_to_x(ctx, op, -1) ||
1680          fold_xi_to_not(ctx, op, 0)) {
1681          return true;
1682      }
1683  
1684      ctx->s_mask = arg_info(op->args[1])->s_mask
1685                  & arg_info(op->args[2])->s_mask;
1686      return false;
1687  }
1688  
1689  static bool fold_extract(OptContext *ctx, TCGOp *op)
1690  {
1691      uint64_t z_mask_old, z_mask;
1692      int pos = op->args[2];
1693      int len = op->args[3];
1694  
1695      if (arg_is_const(op->args[1])) {
1696          uint64_t t;
1697  
1698          t = arg_info(op->args[1])->val;
1699          t = extract64(t, pos, len);
1700          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1701      }
1702  
1703      z_mask_old = arg_info(op->args[1])->z_mask;
1704      z_mask = extract64(z_mask_old, pos, len);
1705      if (pos == 0) {
1706          ctx->a_mask = z_mask_old ^ z_mask;
1707      }
1708      ctx->z_mask = z_mask;
1709      ctx->s_mask = smask_from_zmask(z_mask);
1710  
1711      return fold_masks(ctx, op);
1712  }
1713  
1714  static bool fold_extract2(OptContext *ctx, TCGOp *op)
1715  {
1716      if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1717          uint64_t v1 = arg_info(op->args[1])->val;
1718          uint64_t v2 = arg_info(op->args[2])->val;
1719          int shr = op->args[3];
1720  
1721          if (op->opc == INDEX_op_extract2_i64) {
1722              v1 >>= shr;
1723              v2 <<= 64 - shr;
1724          } else {
1725              v1 = (uint32_t)v1 >> shr;
1726              v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1727          }
1728          return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1729      }
1730      return false;
1731  }
1732  
1733  static bool fold_exts(OptContext *ctx, TCGOp *op)
1734  {
1735      uint64_t s_mask_old, s_mask, z_mask, sign;
1736      bool type_change = false;
1737  
1738      if (fold_const1(ctx, op)) {
1739          return true;
1740      }
1741  
1742      z_mask = arg_info(op->args[1])->z_mask;
1743      s_mask = arg_info(op->args[1])->s_mask;
1744      s_mask_old = s_mask;
1745  
1746      switch (op->opc) {
1747      CASE_OP_32_64(ext8s):
1748          sign = INT8_MIN;
1749          z_mask = (uint8_t)z_mask;
1750          break;
1751      CASE_OP_32_64(ext16s):
1752          sign = INT16_MIN;
1753          z_mask = (uint16_t)z_mask;
1754          break;
1755      case INDEX_op_ext_i32_i64:
1756          type_change = true;
1757          QEMU_FALLTHROUGH;
1758      case INDEX_op_ext32s_i64:
1759          sign = INT32_MIN;
1760          z_mask = (uint32_t)z_mask;
1761          break;
1762      default:
1763          g_assert_not_reached();
1764      }
1765  
1766      if (z_mask & sign) {
1767          z_mask |= sign;
1768      }
1769      s_mask |= sign << 1;
1770  
1771      ctx->z_mask = z_mask;
1772      ctx->s_mask = s_mask;
1773      if (!type_change) {
1774          ctx->a_mask = s_mask & ~s_mask_old;
1775      }
1776  
1777      return fold_masks(ctx, op);
1778  }
1779  
1780  static bool fold_extu(OptContext *ctx, TCGOp *op)
1781  {
1782      uint64_t z_mask_old, z_mask;
1783      bool type_change = false;
1784  
1785      if (fold_const1(ctx, op)) {
1786          return true;
1787      }
1788  
1789      z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1790  
1791      switch (op->opc) {
1792      CASE_OP_32_64(ext8u):
1793          z_mask = (uint8_t)z_mask;
1794          break;
1795      CASE_OP_32_64(ext16u):
1796          z_mask = (uint16_t)z_mask;
1797          break;
1798      case INDEX_op_extrl_i64_i32:
1799      case INDEX_op_extu_i32_i64:
1800          type_change = true;
1801          QEMU_FALLTHROUGH;
1802      case INDEX_op_ext32u_i64:
1803          z_mask = (uint32_t)z_mask;
1804          break;
1805      case INDEX_op_extrh_i64_i32:
1806          type_change = true;
1807          z_mask >>= 32;
1808          break;
1809      default:
1810          g_assert_not_reached();
1811      }
1812  
1813      ctx->z_mask = z_mask;
1814      ctx->s_mask = smask_from_zmask(z_mask);
1815      if (!type_change) {
1816          ctx->a_mask = z_mask_old ^ z_mask;
1817      }
1818      return fold_masks(ctx, op);
1819  }
1820  
1821  static bool fold_mb(OptContext *ctx, TCGOp *op)
1822  {
1823      /* Eliminate duplicate and redundant fence instructions.  */
1824      if (ctx->prev_mb) {
1825          /*
1826           * Merge two barriers of the same type into one,
1827           * or a weaker barrier into a stronger one,
1828           * or two weaker barriers into a stronger one.
1829           *   mb X; mb Y => mb X|Y
1830           *   mb; strl => mb; st
1831           *   ldaq; mb => ld; mb
1832           *   ldaq; strl => ld; mb; st
1833           * Other combinations are also merged into a strong
1834           * barrier.  This is stricter than specified but for
1835           * the purposes of TCG is better than not optimizing.
1836           */
1837          ctx->prev_mb->args[0] |= op->args[0];
1838          tcg_op_remove(ctx->tcg, op);
1839      } else {
1840          ctx->prev_mb = op;
1841      }
1842      return true;
1843  }
1844  
1845  static bool fold_mov(OptContext *ctx, TCGOp *op)
1846  {
1847      return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1848  }
1849  
1850  static bool fold_movcond(OptContext *ctx, TCGOp *op)
1851  {
1852      int i;
1853  
1854      /*
1855       * Canonicalize the "false" input reg to match the destination reg so
1856       * that the tcg backend can implement a "move if true" operation.
1857       */
1858      if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1859          op->args[5] = tcg_invert_cond(op->args[5]);
1860      }
1861  
1862      i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
1863                                    &op->args[2], &op->args[5]);
1864      if (i >= 0) {
1865          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1866      }
1867  
1868      ctx->z_mask = arg_info(op->args[3])->z_mask
1869                  | arg_info(op->args[4])->z_mask;
1870      ctx->s_mask = arg_info(op->args[3])->s_mask
1871                  & arg_info(op->args[4])->s_mask;
1872  
1873      if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1874          uint64_t tv = arg_info(op->args[3])->val;
1875          uint64_t fv = arg_info(op->args[4])->val;
1876          TCGOpcode opc, negopc = 0;
1877          TCGCond cond = op->args[5];
1878  
1879          switch (ctx->type) {
1880          case TCG_TYPE_I32:
1881              opc = INDEX_op_setcond_i32;
1882              if (TCG_TARGET_HAS_negsetcond_i32) {
1883                  negopc = INDEX_op_negsetcond_i32;
1884              }
1885              tv = (int32_t)tv;
1886              fv = (int32_t)fv;
1887              break;
1888          case TCG_TYPE_I64:
1889              opc = INDEX_op_setcond_i64;
1890              if (TCG_TARGET_HAS_negsetcond_i64) {
1891                  negopc = INDEX_op_negsetcond_i64;
1892              }
1893              break;
1894          default:
1895              g_assert_not_reached();
1896          }
1897  
1898          if (tv == 1 && fv == 0) {
1899              op->opc = opc;
1900              op->args[3] = cond;
1901          } else if (fv == 1 && tv == 0) {
1902              op->opc = opc;
1903              op->args[3] = tcg_invert_cond(cond);
1904          } else if (negopc) {
1905              if (tv == -1 && fv == 0) {
1906                  op->opc = negopc;
1907                  op->args[3] = cond;
1908              } else if (fv == -1 && tv == 0) {
1909                  op->opc = negopc;
1910                  op->args[3] = tcg_invert_cond(cond);
1911              }
1912          }
1913      }
1914      return false;
1915  }
1916  
1917  static bool fold_mul(OptContext *ctx, TCGOp *op)
1918  {
1919      if (fold_const2(ctx, op) ||
1920          fold_xi_to_i(ctx, op, 0) ||
1921          fold_xi_to_x(ctx, op, 1)) {
1922          return true;
1923      }
1924      return false;
1925  }
1926  
1927  static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1928  {
1929      if (fold_const2_commutative(ctx, op) ||
1930          fold_xi_to_i(ctx, op, 0)) {
1931          return true;
1932      }
1933      return false;
1934  }
1935  
1936  static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1937  {
1938      swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1939  
1940      if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1941          uint64_t a = arg_info(op->args[2])->val;
1942          uint64_t b = arg_info(op->args[3])->val;
1943          uint64_t h, l;
1944          TCGArg rl, rh;
1945          TCGOp *op2;
1946  
1947          switch (op->opc) {
1948          case INDEX_op_mulu2_i32:
1949              l = (uint64_t)(uint32_t)a * (uint32_t)b;
1950              h = (int32_t)(l >> 32);
1951              l = (int32_t)l;
1952              break;
1953          case INDEX_op_muls2_i32:
1954              l = (int64_t)(int32_t)a * (int32_t)b;
1955              h = l >> 32;
1956              l = (int32_t)l;
1957              break;
1958          case INDEX_op_mulu2_i64:
1959              mulu64(&l, &h, a, b);
1960              break;
1961          case INDEX_op_muls2_i64:
1962              muls64(&l, &h, a, b);
1963              break;
1964          default:
1965              g_assert_not_reached();
1966          }
1967  
1968          rl = op->args[0];
1969          rh = op->args[1];
1970  
1971          /* The proper opcode is supplied by tcg_opt_gen_mov. */
1972          op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1973  
1974          tcg_opt_gen_movi(ctx, op, rl, l);
1975          tcg_opt_gen_movi(ctx, op2, rh, h);
1976          return true;
1977      }
1978      return false;
1979  }
1980  
1981  static bool fold_nand(OptContext *ctx, TCGOp *op)
1982  {
1983      if (fold_const2_commutative(ctx, op) ||
1984          fold_xi_to_not(ctx, op, -1)) {
1985          return true;
1986      }
1987  
1988      ctx->s_mask = arg_info(op->args[1])->s_mask
1989                  & arg_info(op->args[2])->s_mask;
1990      return false;
1991  }
1992  
1993  static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
1994  {
1995      /* Set to 1 all bits to the left of the rightmost.  */
1996      uint64_t z_mask = arg_info(op->args[1])->z_mask;
1997      ctx->z_mask = -(z_mask & -z_mask);
1998  
1999      /*
2000       * Because of fold_sub_to_neg, we want to always return true,
2001       * via finish_folding.
2002       */
2003      finish_folding(ctx, op);
2004      return true;
2005  }
2006  
2007  static bool fold_neg(OptContext *ctx, TCGOp *op)
2008  {
2009      return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2010  }
2011  
2012  static bool fold_nor(OptContext *ctx, TCGOp *op)
2013  {
2014      if (fold_const2_commutative(ctx, op) ||
2015          fold_xi_to_not(ctx, op, 0)) {
2016          return true;
2017      }
2018  
2019      ctx->s_mask = arg_info(op->args[1])->s_mask
2020                  & arg_info(op->args[2])->s_mask;
2021      return false;
2022  }
2023  
2024  static bool fold_not(OptContext *ctx, TCGOp *op)
2025  {
2026      if (fold_const1(ctx, op)) {
2027          return true;
2028      }
2029  
2030      ctx->s_mask = arg_info(op->args[1])->s_mask;
2031  
2032      /* Because of fold_to_not, we want to always return true, via finish. */
2033      finish_folding(ctx, op);
2034      return true;
2035  }
2036  
2037  static bool fold_or(OptContext *ctx, TCGOp *op)
2038  {
2039      if (fold_const2_commutative(ctx, op) ||
2040          fold_xi_to_x(ctx, op, 0) ||
2041          fold_xx_to_x(ctx, op)) {
2042          return true;
2043      }
2044  
2045      ctx->z_mask = arg_info(op->args[1])->z_mask
2046                  | arg_info(op->args[2])->z_mask;
2047      ctx->s_mask = arg_info(op->args[1])->s_mask
2048                  & arg_info(op->args[2])->s_mask;
2049      return fold_masks(ctx, op);
2050  }
2051  
2052  static bool fold_orc(OptContext *ctx, TCGOp *op)
2053  {
2054      if (fold_const2(ctx, op) ||
2055          fold_xx_to_i(ctx, op, -1) ||
2056          fold_xi_to_x(ctx, op, -1) ||
2057          fold_ix_to_not(ctx, op, 0)) {
2058          return true;
2059      }
2060  
2061      ctx->s_mask = arg_info(op->args[1])->s_mask
2062                  & arg_info(op->args[2])->s_mask;
2063      return false;
2064  }
2065  
2066  static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
2067  {
2068      const TCGOpDef *def = &tcg_op_defs[op->opc];
2069      MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2070      MemOp mop = get_memop(oi);
2071      int width = 8 * memop_size(mop);
2072  
2073      if (width < 64) {
2074          ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
2075          if (!(mop & MO_SIGN)) {
2076              ctx->z_mask = MAKE_64BIT_MASK(0, width);
2077              ctx->s_mask <<= 1;
2078          }
2079      }
2080  
2081      /* Opcodes that touch guest memory stop the mb optimization.  */
2082      ctx->prev_mb = NULL;
2083      return false;
2084  }
2085  
2086  static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2087  {
2088      /* Opcodes that touch guest memory stop the mb optimization.  */
2089      ctx->prev_mb = NULL;
2090      return false;
2091  }
2092  
2093  static bool fold_remainder(OptContext *ctx, TCGOp *op)
2094  {
2095      if (fold_const2(ctx, op) ||
2096          fold_xx_to_i(ctx, op, 0)) {
2097          return true;
2098      }
2099      return false;
2100  }
2101  
2102  static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2103  {
2104      TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
2105      TCGOpcode uext_opc = 0, sext_opc = 0;
2106      TCGCond cond = op->args[3];
2107      TCGArg ret, src1, src2;
2108      TCGOp *op2;
2109      uint64_t val;
2110      int sh;
2111      bool inv;
2112  
2113      if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2114          return;
2115      }
2116  
2117      src2 = op->args[2];
2118      val = arg_info(src2)->val;
2119      if (!is_power_of_2(val)) {
2120          return;
2121      }
2122      sh = ctz64(val);
2123  
2124      switch (ctx->type) {
2125      case TCG_TYPE_I32:
2126          and_opc = INDEX_op_and_i32;
2127          sub_opc = INDEX_op_sub_i32;
2128          xor_opc = INDEX_op_xor_i32;
2129          shr_opc = INDEX_op_shr_i32;
2130          neg_opc = INDEX_op_neg_i32;
2131          if (TCG_TARGET_extract_i32_valid(sh, 1)) {
2132              uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
2133              sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
2134          }
2135          break;
2136      case TCG_TYPE_I64:
2137          and_opc = INDEX_op_and_i64;
2138          sub_opc = INDEX_op_sub_i64;
2139          xor_opc = INDEX_op_xor_i64;
2140          shr_opc = INDEX_op_shr_i64;
2141          neg_opc = INDEX_op_neg_i64;
2142          if (TCG_TARGET_extract_i64_valid(sh, 1)) {
2143              uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
2144              sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
2145          }
2146          break;
2147      default:
2148          g_assert_not_reached();
2149      }
2150  
2151      ret = op->args[0];
2152      src1 = op->args[1];
2153      inv = cond == TCG_COND_TSTEQ;
2154  
2155      if (sh && sext_opc && neg && !inv) {
2156          op->opc = sext_opc;
2157          op->args[1] = src1;
2158          op->args[2] = sh;
2159          op->args[3] = 1;
2160          return;
2161      } else if (sh && uext_opc) {
2162          op->opc = uext_opc;
2163          op->args[1] = src1;
2164          op->args[2] = sh;
2165          op->args[3] = 1;
2166      } else {
2167          if (sh) {
2168              op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3);
2169              op2->args[0] = ret;
2170              op2->args[1] = src1;
2171              op2->args[2] = arg_new_constant(ctx, sh);
2172              src1 = ret;
2173          }
2174          op->opc = and_opc;
2175          op->args[1] = src1;
2176          op->args[2] = arg_new_constant(ctx, 1);
2177      }
2178  
2179      if (neg && inv) {
2180          op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3);
2181          op2->args[0] = ret;
2182          op2->args[1] = ret;
2183          op2->args[2] = arg_new_constant(ctx, 1);
2184      } else if (inv) {
2185          op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3);
2186          op2->args[0] = ret;
2187          op2->args[1] = ret;
2188          op2->args[2] = arg_new_constant(ctx, 1);
2189      } else if (neg) {
2190          op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2);
2191          op2->args[0] = ret;
2192          op2->args[1] = ret;
2193      }
2194  }
2195  
2196  static bool fold_setcond(OptContext *ctx, TCGOp *op)
2197  {
2198      int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2199                                        &op->args[2], &op->args[3]);
2200      if (i >= 0) {
2201          return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2202      }
2203      fold_setcond_tst_pow2(ctx, op, false);
2204  
2205      ctx->z_mask = 1;
2206      ctx->s_mask = smask_from_zmask(1);
2207      return false;
2208  }
2209  
2210  static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2211  {
2212      int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2213                                        &op->args[2], &op->args[3]);
2214      if (i >= 0) {
2215          return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2216      }
2217      fold_setcond_tst_pow2(ctx, op, true);
2218  
2219      /* Value is {0,-1} so all bits are repetitions of the sign. */
2220      ctx->s_mask = -1;
2221      return false;
2222  }
2223  
2224  static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2225  {
2226      TCGCond cond;
2227      int i, inv = 0;
2228  
2229      i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2230      cond = op->args[5];
2231      if (i >= 0) {
2232          goto do_setcond_const;
2233      }
2234  
2235      switch (cond) {
2236      case TCG_COND_LT:
2237      case TCG_COND_GE:
2238          /*
2239           * Simplify LT/GE comparisons vs zero to a single compare
2240           * vs the high word of the input.
2241           */
2242          if (arg_is_const_val(op->args[3], 0) &&
2243              arg_is_const_val(op->args[4], 0)) {
2244              goto do_setcond_high;
2245          }
2246          break;
2247  
2248      case TCG_COND_NE:
2249          inv = 1;
2250          QEMU_FALLTHROUGH;
2251      case TCG_COND_EQ:
2252          /*
2253           * Simplify EQ/NE comparisons where one of the pairs
2254           * can be simplified.
2255           */
2256          i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2257                                       op->args[3], cond);
2258          switch (i ^ inv) {
2259          case 0:
2260              goto do_setcond_const;
2261          case 1:
2262              goto do_setcond_high;
2263          }
2264  
2265          i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2266                                       op->args[4], cond);
2267          switch (i ^ inv) {
2268          case 0:
2269              goto do_setcond_const;
2270          case 1:
2271              goto do_setcond_low;
2272          }
2273          break;
2274  
2275      case TCG_COND_TSTEQ:
2276      case TCG_COND_TSTNE:
2277          if (arg_is_const_val(op->args[2], 0)) {
2278              goto do_setcond_high;
2279          }
2280          if (arg_is_const_val(op->args[4], 0)) {
2281              goto do_setcond_low;
2282          }
2283          break;
2284  
2285      default:
2286          break;
2287  
2288      do_setcond_low:
2289          op->args[2] = op->args[3];
2290          op->args[3] = cond;
2291          op->opc = INDEX_op_setcond_i32;
2292          return fold_setcond(ctx, op);
2293  
2294      do_setcond_high:
2295          op->args[1] = op->args[2];
2296          op->args[2] = op->args[4];
2297          op->args[3] = cond;
2298          op->opc = INDEX_op_setcond_i32;
2299          return fold_setcond(ctx, op);
2300      }
2301  
2302      ctx->z_mask = 1;
2303      ctx->s_mask = smask_from_zmask(1);
2304      return false;
2305  
2306   do_setcond_const:
2307      return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2308  }
2309  
2310  static bool fold_sextract(OptContext *ctx, TCGOp *op)
2311  {
2312      uint64_t z_mask, s_mask, s_mask_old;
2313      int pos = op->args[2];
2314      int len = op->args[3];
2315  
2316      if (arg_is_const(op->args[1])) {
2317          uint64_t t;
2318  
2319          t = arg_info(op->args[1])->val;
2320          t = sextract64(t, pos, len);
2321          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
2322      }
2323  
2324      z_mask = arg_info(op->args[1])->z_mask;
2325      z_mask = sextract64(z_mask, pos, len);
2326      ctx->z_mask = z_mask;
2327  
2328      s_mask_old = arg_info(op->args[1])->s_mask;
2329      s_mask = sextract64(s_mask_old, pos, len);
2330      s_mask |= MAKE_64BIT_MASK(len, 64 - len);
2331      ctx->s_mask = s_mask;
2332  
2333      if (pos == 0) {
2334          ctx->a_mask = s_mask & ~s_mask_old;
2335      }
2336  
2337      return fold_masks(ctx, op);
2338  }
2339  
2340  static bool fold_shift(OptContext *ctx, TCGOp *op)
2341  {
2342      uint64_t s_mask, z_mask, sign;
2343  
2344      if (fold_const2(ctx, op) ||
2345          fold_ix_to_i(ctx, op, 0) ||
2346          fold_xi_to_x(ctx, op, 0)) {
2347          return true;
2348      }
2349  
2350      s_mask = arg_info(op->args[1])->s_mask;
2351      z_mask = arg_info(op->args[1])->z_mask;
2352  
2353      if (arg_is_const(op->args[2])) {
2354          int sh = arg_info(op->args[2])->val;
2355  
2356          ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2357  
2358          s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2359          ctx->s_mask = smask_from_smask(s_mask);
2360  
2361          return fold_masks(ctx, op);
2362      }
2363  
2364      switch (op->opc) {
2365      CASE_OP_32_64(sar):
2366          /*
2367           * Arithmetic right shift will not reduce the number of
2368           * input sign repetitions.
2369           */
2370          ctx->s_mask = s_mask;
2371          break;
2372      CASE_OP_32_64(shr):
2373          /*
2374           * If the sign bit is known zero, then logical right shift
2375           * will not reduced the number of input sign repetitions.
2376           */
2377          sign = (s_mask & -s_mask) >> 1;
2378          if (sign && !(z_mask & sign)) {
2379              ctx->s_mask = s_mask;
2380          }
2381          break;
2382      default:
2383          break;
2384      }
2385  
2386      return false;
2387  }
2388  
2389  static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2390  {
2391      TCGOpcode neg_op;
2392      bool have_neg;
2393  
2394      if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2395          return false;
2396      }
2397  
2398      switch (ctx->type) {
2399      case TCG_TYPE_I32:
2400          neg_op = INDEX_op_neg_i32;
2401          have_neg = true;
2402          break;
2403      case TCG_TYPE_I64:
2404          neg_op = INDEX_op_neg_i64;
2405          have_neg = true;
2406          break;
2407      case TCG_TYPE_V64:
2408      case TCG_TYPE_V128:
2409      case TCG_TYPE_V256:
2410          neg_op = INDEX_op_neg_vec;
2411          have_neg = (TCG_TARGET_HAS_neg_vec &&
2412                      tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2413          break;
2414      default:
2415          g_assert_not_reached();
2416      }
2417      if (have_neg) {
2418          op->opc = neg_op;
2419          op->args[1] = op->args[2];
2420          return fold_neg_no_const(ctx, op);
2421      }
2422      return false;
2423  }
2424  
2425  /* We cannot as yet do_constant_folding with vectors. */
2426  static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2427  {
2428      if (fold_xx_to_i(ctx, op, 0) ||
2429          fold_xi_to_x(ctx, op, 0) ||
2430          fold_sub_to_neg(ctx, op)) {
2431          return true;
2432      }
2433      return false;
2434  }
2435  
2436  static bool fold_sub(OptContext *ctx, TCGOp *op)
2437  {
2438      if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
2439          return true;
2440      }
2441  
2442      /* Fold sub r,x,i to add r,x,-i */
2443      if (arg_is_const(op->args[2])) {
2444          uint64_t val = arg_info(op->args[2])->val;
2445  
2446          op->opc = (ctx->type == TCG_TYPE_I32
2447                     ? INDEX_op_add_i32 : INDEX_op_add_i64);
2448          op->args[2] = arg_new_constant(ctx, -val);
2449      }
2450      return false;
2451  }
2452  
2453  static bool fold_sub2(OptContext *ctx, TCGOp *op)
2454  {
2455      return fold_addsub2(ctx, op, false);
2456  }
2457  
2458  static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2459  {
2460      /* We can't do any folding with a load, but we can record bits. */
2461      switch (op->opc) {
2462      CASE_OP_32_64(ld8s):
2463          ctx->s_mask = MAKE_64BIT_MASK(8, 56);
2464          break;
2465      CASE_OP_32_64(ld8u):
2466          ctx->z_mask = MAKE_64BIT_MASK(0, 8);
2467          ctx->s_mask = MAKE_64BIT_MASK(9, 55);
2468          break;
2469      CASE_OP_32_64(ld16s):
2470          ctx->s_mask = MAKE_64BIT_MASK(16, 48);
2471          break;
2472      CASE_OP_32_64(ld16u):
2473          ctx->z_mask = MAKE_64BIT_MASK(0, 16);
2474          ctx->s_mask = MAKE_64BIT_MASK(17, 47);
2475          break;
2476      case INDEX_op_ld32s_i64:
2477          ctx->s_mask = MAKE_64BIT_MASK(32, 32);
2478          break;
2479      case INDEX_op_ld32u_i64:
2480          ctx->z_mask = MAKE_64BIT_MASK(0, 32);
2481          ctx->s_mask = MAKE_64BIT_MASK(33, 31);
2482          break;
2483      default:
2484          g_assert_not_reached();
2485      }
2486      return false;
2487  }
2488  
2489  static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2490  {
2491      TCGTemp *dst, *src;
2492      intptr_t ofs;
2493      TCGType type;
2494  
2495      if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2496          return false;
2497      }
2498  
2499      type = ctx->type;
2500      ofs = op->args[2];
2501      dst = arg_temp(op->args[0]);
2502      src = find_mem_copy_for(ctx, type, ofs);
2503      if (src && src->base_type == type) {
2504          return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2505      }
2506  
2507      reset_ts(ctx, dst);
2508      record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2509      return true;
2510  }
2511  
2512  static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2513  {
2514      intptr_t ofs = op->args[2];
2515      intptr_t lm1;
2516  
2517      if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2518          remove_mem_copy_all(ctx);
2519          return false;
2520      }
2521  
2522      switch (op->opc) {
2523      CASE_OP_32_64(st8):
2524          lm1 = 0;
2525          break;
2526      CASE_OP_32_64(st16):
2527          lm1 = 1;
2528          break;
2529      case INDEX_op_st32_i64:
2530      case INDEX_op_st_i32:
2531          lm1 = 3;
2532          break;
2533      case INDEX_op_st_i64:
2534          lm1 = 7;
2535          break;
2536      case INDEX_op_st_vec:
2537          lm1 = tcg_type_size(ctx->type) - 1;
2538          break;
2539      default:
2540          g_assert_not_reached();
2541      }
2542      remove_mem_copy_in(ctx, ofs, ofs + lm1);
2543      return false;
2544  }
2545  
2546  static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2547  {
2548      TCGTemp *src;
2549      intptr_t ofs, last;
2550      TCGType type;
2551  
2552      if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2553          fold_tcg_st(ctx, op);
2554          return false;
2555      }
2556  
2557      src = arg_temp(op->args[0]);
2558      ofs = op->args[2];
2559      type = ctx->type;
2560  
2561      /*
2562       * Eliminate duplicate stores of a constant.
2563       * This happens frequently when the target ISA zero-extends.
2564       */
2565      if (ts_is_const(src)) {
2566          TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2567          if (src == prev) {
2568              tcg_op_remove(ctx->tcg, op);
2569              return true;
2570          }
2571      }
2572  
2573      last = ofs + tcg_type_size(type) - 1;
2574      remove_mem_copy_in(ctx, ofs, last);
2575      record_mem_copy(ctx, type, src, ofs, last);
2576      return false;
2577  }
2578  
2579  static bool fold_xor(OptContext *ctx, TCGOp *op)
2580  {
2581      if (fold_const2_commutative(ctx, op) ||
2582          fold_xx_to_i(ctx, op, 0) ||
2583          fold_xi_to_x(ctx, op, 0) ||
2584          fold_xi_to_not(ctx, op, -1)) {
2585          return true;
2586      }
2587  
2588      ctx->z_mask = arg_info(op->args[1])->z_mask
2589                  | arg_info(op->args[2])->z_mask;
2590      ctx->s_mask = arg_info(op->args[1])->s_mask
2591                  & arg_info(op->args[2])->s_mask;
2592      return fold_masks(ctx, op);
2593  }
2594  
2595  /* Propagate constants and copies, fold constant expressions. */
2596  void tcg_optimize(TCGContext *s)
2597  {
2598      int nb_temps, i;
2599      TCGOp *op, *op_next;
2600      OptContext ctx = { .tcg = s };
2601  
2602      QSIMPLEQ_INIT(&ctx.mem_free);
2603  
2604      /* Array VALS has an element for each temp.
2605         If this temp holds a constant then its value is kept in VALS' element.
2606         If this temp is a copy of other ones then the other copies are
2607         available through the doubly linked circular list. */
2608  
2609      nb_temps = s->nb_temps;
2610      for (i = 0; i < nb_temps; ++i) {
2611          s->temps[i].state_ptr = NULL;
2612      }
2613  
2614      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2615          TCGOpcode opc = op->opc;
2616          const TCGOpDef *def;
2617          bool done = false;
2618  
2619          /* Calls are special. */
2620          if (opc == INDEX_op_call) {
2621              fold_call(&ctx, op);
2622              continue;
2623          }
2624  
2625          def = &tcg_op_defs[opc];
2626          init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2627          copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2628  
2629          /* Pre-compute the type of the operation. */
2630          if (def->flags & TCG_OPF_VECTOR) {
2631              ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2632          } else if (def->flags & TCG_OPF_64BIT) {
2633              ctx.type = TCG_TYPE_I64;
2634          } else {
2635              ctx.type = TCG_TYPE_I32;
2636          }
2637  
2638          /* Assume all bits affected, no bits known zero, no sign reps. */
2639          ctx.a_mask = -1;
2640          ctx.z_mask = -1;
2641          ctx.s_mask = 0;
2642  
2643          /*
2644           * Process each opcode.
2645           * Sorted alphabetically by opcode as much as possible.
2646           */
2647          switch (opc) {
2648          CASE_OP_32_64(add):
2649              done = fold_add(&ctx, op);
2650              break;
2651          case INDEX_op_add_vec:
2652              done = fold_add_vec(&ctx, op);
2653              break;
2654          CASE_OP_32_64(add2):
2655              done = fold_add2(&ctx, op);
2656              break;
2657          CASE_OP_32_64_VEC(and):
2658              done = fold_and(&ctx, op);
2659              break;
2660          CASE_OP_32_64_VEC(andc):
2661              done = fold_andc(&ctx, op);
2662              break;
2663          CASE_OP_32_64(brcond):
2664              done = fold_brcond(&ctx, op);
2665              break;
2666          case INDEX_op_brcond2_i32:
2667              done = fold_brcond2(&ctx, op);
2668              break;
2669          CASE_OP_32_64(bswap16):
2670          CASE_OP_32_64(bswap32):
2671          case INDEX_op_bswap64_i64:
2672              done = fold_bswap(&ctx, op);
2673              break;
2674          CASE_OP_32_64(clz):
2675          CASE_OP_32_64(ctz):
2676              done = fold_count_zeros(&ctx, op);
2677              break;
2678          CASE_OP_32_64(ctpop):
2679              done = fold_ctpop(&ctx, op);
2680              break;
2681          CASE_OP_32_64(deposit):
2682              done = fold_deposit(&ctx, op);
2683              break;
2684          CASE_OP_32_64(div):
2685          CASE_OP_32_64(divu):
2686              done = fold_divide(&ctx, op);
2687              break;
2688          case INDEX_op_dup_vec:
2689              done = fold_dup(&ctx, op);
2690              break;
2691          case INDEX_op_dup2_vec:
2692              done = fold_dup2(&ctx, op);
2693              break;
2694          CASE_OP_32_64_VEC(eqv):
2695              done = fold_eqv(&ctx, op);
2696              break;
2697          CASE_OP_32_64(extract):
2698              done = fold_extract(&ctx, op);
2699              break;
2700          CASE_OP_32_64(extract2):
2701              done = fold_extract2(&ctx, op);
2702              break;
2703          CASE_OP_32_64(ext8s):
2704          CASE_OP_32_64(ext16s):
2705          case INDEX_op_ext32s_i64:
2706          case INDEX_op_ext_i32_i64:
2707              done = fold_exts(&ctx, op);
2708              break;
2709          CASE_OP_32_64(ext8u):
2710          CASE_OP_32_64(ext16u):
2711          case INDEX_op_ext32u_i64:
2712          case INDEX_op_extu_i32_i64:
2713          case INDEX_op_extrl_i64_i32:
2714          case INDEX_op_extrh_i64_i32:
2715              done = fold_extu(&ctx, op);
2716              break;
2717          CASE_OP_32_64(ld8s):
2718          CASE_OP_32_64(ld8u):
2719          CASE_OP_32_64(ld16s):
2720          CASE_OP_32_64(ld16u):
2721          case INDEX_op_ld32s_i64:
2722          case INDEX_op_ld32u_i64:
2723              done = fold_tcg_ld(&ctx, op);
2724              break;
2725          case INDEX_op_ld_i32:
2726          case INDEX_op_ld_i64:
2727          case INDEX_op_ld_vec:
2728              done = fold_tcg_ld_memcopy(&ctx, op);
2729              break;
2730          CASE_OP_32_64(st8):
2731          CASE_OP_32_64(st16):
2732          case INDEX_op_st32_i64:
2733              done = fold_tcg_st(&ctx, op);
2734              break;
2735          case INDEX_op_st_i32:
2736          case INDEX_op_st_i64:
2737          case INDEX_op_st_vec:
2738              done = fold_tcg_st_memcopy(&ctx, op);
2739              break;
2740          case INDEX_op_mb:
2741              done = fold_mb(&ctx, op);
2742              break;
2743          CASE_OP_32_64_VEC(mov):
2744              done = fold_mov(&ctx, op);
2745              break;
2746          CASE_OP_32_64(movcond):
2747              done = fold_movcond(&ctx, op);
2748              break;
2749          CASE_OP_32_64(mul):
2750              done = fold_mul(&ctx, op);
2751              break;
2752          CASE_OP_32_64(mulsh):
2753          CASE_OP_32_64(muluh):
2754              done = fold_mul_highpart(&ctx, op);
2755              break;
2756          CASE_OP_32_64(muls2):
2757          CASE_OP_32_64(mulu2):
2758              done = fold_multiply2(&ctx, op);
2759              break;
2760          CASE_OP_32_64_VEC(nand):
2761              done = fold_nand(&ctx, op);
2762              break;
2763          CASE_OP_32_64(neg):
2764              done = fold_neg(&ctx, op);
2765              break;
2766          CASE_OP_32_64_VEC(nor):
2767              done = fold_nor(&ctx, op);
2768              break;
2769          CASE_OP_32_64_VEC(not):
2770              done = fold_not(&ctx, op);
2771              break;
2772          CASE_OP_32_64_VEC(or):
2773              done = fold_or(&ctx, op);
2774              break;
2775          CASE_OP_32_64_VEC(orc):
2776              done = fold_orc(&ctx, op);
2777              break;
2778          case INDEX_op_qemu_ld_a32_i32:
2779          case INDEX_op_qemu_ld_a64_i32:
2780          case INDEX_op_qemu_ld_a32_i64:
2781          case INDEX_op_qemu_ld_a64_i64:
2782          case INDEX_op_qemu_ld_a32_i128:
2783          case INDEX_op_qemu_ld_a64_i128:
2784              done = fold_qemu_ld(&ctx, op);
2785              break;
2786          case INDEX_op_qemu_st8_a32_i32:
2787          case INDEX_op_qemu_st8_a64_i32:
2788          case INDEX_op_qemu_st_a32_i32:
2789          case INDEX_op_qemu_st_a64_i32:
2790          case INDEX_op_qemu_st_a32_i64:
2791          case INDEX_op_qemu_st_a64_i64:
2792          case INDEX_op_qemu_st_a32_i128:
2793          case INDEX_op_qemu_st_a64_i128:
2794              done = fold_qemu_st(&ctx, op);
2795              break;
2796          CASE_OP_32_64(rem):
2797          CASE_OP_32_64(remu):
2798              done = fold_remainder(&ctx, op);
2799              break;
2800          CASE_OP_32_64(rotl):
2801          CASE_OP_32_64(rotr):
2802          CASE_OP_32_64(sar):
2803          CASE_OP_32_64(shl):
2804          CASE_OP_32_64(shr):
2805              done = fold_shift(&ctx, op);
2806              break;
2807          CASE_OP_32_64(setcond):
2808              done = fold_setcond(&ctx, op);
2809              break;
2810          CASE_OP_32_64(negsetcond):
2811              done = fold_negsetcond(&ctx, op);
2812              break;
2813          case INDEX_op_setcond2_i32:
2814              done = fold_setcond2(&ctx, op);
2815              break;
2816          CASE_OP_32_64(sextract):
2817              done = fold_sextract(&ctx, op);
2818              break;
2819          CASE_OP_32_64(sub):
2820              done = fold_sub(&ctx, op);
2821              break;
2822          case INDEX_op_sub_vec:
2823              done = fold_sub_vec(&ctx, op);
2824              break;
2825          CASE_OP_32_64(sub2):
2826              done = fold_sub2(&ctx, op);
2827              break;
2828          CASE_OP_32_64_VEC(xor):
2829              done = fold_xor(&ctx, op);
2830              break;
2831          default:
2832              break;
2833          }
2834  
2835          if (!done) {
2836              finish_folding(&ctx, op);
2837          }
2838      }
2839  }
2840