xref: /openbmc/qemu/tcg/optimize.c (revision 1141159c)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "tcg/tcg-op.h"
29 #include "tcg-internal.h"
30 
31 #define CASE_OP_32_64(x)                        \
32         glue(glue(case INDEX_op_, x), _i32):    \
33         glue(glue(case INDEX_op_, x), _i64)
34 
35 #define CASE_OP_32_64_VEC(x)                    \
36         glue(glue(case INDEX_op_, x), _i32):    \
37         glue(glue(case INDEX_op_, x), _i64):    \
38         glue(glue(case INDEX_op_, x), _vec)
39 
40 typedef struct TempOptInfo {
41     bool is_const;
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     uint64_t val;
45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
46     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
47 } TempOptInfo;
48 
49 typedef struct OptContext {
50     TCGContext *tcg;
51     TCGOp *prev_mb;
52     TCGTempSet temps_used;
53 
54     /* In flight values from optimization. */
55     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
56     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
57     uint64_t s_mask;  /* mask of clrsb(value) bits */
58     TCGType type;
59 } OptContext;
60 
61 /* Calculate the smask for a specific value. */
62 static uint64_t smask_from_value(uint64_t value)
63 {
64     int rep = clrsb64(value);
65     return ~(~0ull >> rep);
66 }
67 
68 /*
69  * Calculate the smask for a given set of known-zeros.
70  * If there are lots of zeros on the left, we can consider the remainder
71  * an unsigned field, and thus the corresponding signed field is one bit
72  * larger.
73  */
74 static uint64_t smask_from_zmask(uint64_t zmask)
75 {
76     /*
77      * Only the 0 bits are significant for zmask, thus the msb itself
78      * must be zero, else we have no sign information.
79      */
80     int rep = clz64(zmask);
81     if (rep == 0) {
82         return 0;
83     }
84     rep -= 1;
85     return ~(~0ull >> rep);
86 }
87 
88 /*
89  * Recreate a properly left-aligned smask after manipulation.
90  * Some bit-shuffling, particularly shifts and rotates, may
91  * retain sign bits on the left, but may scatter disconnected
92  * sign bits on the right.  Retain only what remains to the left.
93  */
94 static uint64_t smask_from_smask(int64_t smask)
95 {
96     /* Only the 1 bits are significant for smask */
97     return smask_from_zmask(~smask);
98 }
99 
100 static inline TempOptInfo *ts_info(TCGTemp *ts)
101 {
102     return ts->state_ptr;
103 }
104 
105 static inline TempOptInfo *arg_info(TCGArg arg)
106 {
107     return ts_info(arg_temp(arg));
108 }
109 
110 static inline bool ts_is_const(TCGTemp *ts)
111 {
112     return ts_info(ts)->is_const;
113 }
114 
115 static inline bool arg_is_const(TCGArg arg)
116 {
117     return ts_is_const(arg_temp(arg));
118 }
119 
120 static inline bool ts_is_copy(TCGTemp *ts)
121 {
122     return ts_info(ts)->next_copy != ts;
123 }
124 
125 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
126 static void reset_ts(TCGTemp *ts)
127 {
128     TempOptInfo *ti = ts_info(ts);
129     TempOptInfo *pi = ts_info(ti->prev_copy);
130     TempOptInfo *ni = ts_info(ti->next_copy);
131 
132     ni->prev_copy = ti->prev_copy;
133     pi->next_copy = ti->next_copy;
134     ti->next_copy = ts;
135     ti->prev_copy = ts;
136     ti->is_const = false;
137     ti->z_mask = -1;
138     ti->s_mask = 0;
139 }
140 
141 static void reset_temp(TCGArg arg)
142 {
143     reset_ts(arg_temp(arg));
144 }
145 
146 /* Initialize and activate a temporary.  */
147 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
148 {
149     size_t idx = temp_idx(ts);
150     TempOptInfo *ti;
151 
152     if (test_bit(idx, ctx->temps_used.l)) {
153         return;
154     }
155     set_bit(idx, ctx->temps_used.l);
156 
157     ti = ts->state_ptr;
158     if (ti == NULL) {
159         ti = tcg_malloc(sizeof(TempOptInfo));
160         ts->state_ptr = ti;
161     }
162 
163     ti->next_copy = ts;
164     ti->prev_copy = ts;
165     if (ts->kind == TEMP_CONST) {
166         ti->is_const = true;
167         ti->val = ts->val;
168         ti->z_mask = ts->val;
169         ti->s_mask = smask_from_value(ts->val);
170     } else {
171         ti->is_const = false;
172         ti->z_mask = -1;
173         ti->s_mask = 0;
174     }
175 }
176 
177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
178 {
179     TCGTemp *i, *g, *l;
180 
181     /* If this is already readonly, we can't do better. */
182     if (temp_readonly(ts)) {
183         return ts;
184     }
185 
186     g = l = NULL;
187     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
188         if (temp_readonly(i)) {
189             return i;
190         } else if (i->kind > ts->kind) {
191             if (i->kind == TEMP_GLOBAL) {
192                 g = i;
193             } else if (i->kind == TEMP_TB) {
194                 l = i;
195             }
196         }
197     }
198 
199     /* If we didn't find a better representation, return the same temp. */
200     return g ? g : l ? l : ts;
201 }
202 
203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
204 {
205     TCGTemp *i;
206 
207     if (ts1 == ts2) {
208         return true;
209     }
210 
211     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
212         return false;
213     }
214 
215     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
216         if (i == ts2) {
217             return true;
218         }
219     }
220 
221     return false;
222 }
223 
224 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
225 {
226     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
227 }
228 
229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
230 {
231     TCGTemp *dst_ts = arg_temp(dst);
232     TCGTemp *src_ts = arg_temp(src);
233     TempOptInfo *di;
234     TempOptInfo *si;
235     TCGOpcode new_op;
236 
237     if (ts_are_copies(dst_ts, src_ts)) {
238         tcg_op_remove(ctx->tcg, op);
239         return true;
240     }
241 
242     reset_ts(dst_ts);
243     di = ts_info(dst_ts);
244     si = ts_info(src_ts);
245 
246     switch (ctx->type) {
247     case TCG_TYPE_I32:
248         new_op = INDEX_op_mov_i32;
249         break;
250     case TCG_TYPE_I64:
251         new_op = INDEX_op_mov_i64;
252         break;
253     case TCG_TYPE_V64:
254     case TCG_TYPE_V128:
255     case TCG_TYPE_V256:
256         /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
257         new_op = INDEX_op_mov_vec;
258         break;
259     default:
260         g_assert_not_reached();
261     }
262     op->opc = new_op;
263     op->args[0] = dst;
264     op->args[1] = src;
265 
266     di->z_mask = si->z_mask;
267     di->s_mask = si->s_mask;
268 
269     if (src_ts->type == dst_ts->type) {
270         TempOptInfo *ni = ts_info(si->next_copy);
271 
272         di->next_copy = si->next_copy;
273         di->prev_copy = src_ts;
274         ni->prev_copy = dst_ts;
275         si->next_copy = dst_ts;
276         di->is_const = si->is_const;
277         di->val = si->val;
278     }
279     return true;
280 }
281 
282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
283                              TCGArg dst, uint64_t val)
284 {
285     TCGTemp *tv;
286 
287     if (ctx->type == TCG_TYPE_I32) {
288         val = (int32_t)val;
289     }
290 
291     /* Convert movi to mov with constant temp. */
292     tv = tcg_constant_internal(ctx->type, val);
293     init_ts_info(ctx, tv);
294     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
295 }
296 
297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
298 {
299     uint64_t l64, h64;
300 
301     switch (op) {
302     CASE_OP_32_64(add):
303         return x + y;
304 
305     CASE_OP_32_64(sub):
306         return x - y;
307 
308     CASE_OP_32_64(mul):
309         return x * y;
310 
311     CASE_OP_32_64_VEC(and):
312         return x & y;
313 
314     CASE_OP_32_64_VEC(or):
315         return x | y;
316 
317     CASE_OP_32_64_VEC(xor):
318         return x ^ y;
319 
320     case INDEX_op_shl_i32:
321         return (uint32_t)x << (y & 31);
322 
323     case INDEX_op_shl_i64:
324         return (uint64_t)x << (y & 63);
325 
326     case INDEX_op_shr_i32:
327         return (uint32_t)x >> (y & 31);
328 
329     case INDEX_op_shr_i64:
330         return (uint64_t)x >> (y & 63);
331 
332     case INDEX_op_sar_i32:
333         return (int32_t)x >> (y & 31);
334 
335     case INDEX_op_sar_i64:
336         return (int64_t)x >> (y & 63);
337 
338     case INDEX_op_rotr_i32:
339         return ror32(x, y & 31);
340 
341     case INDEX_op_rotr_i64:
342         return ror64(x, y & 63);
343 
344     case INDEX_op_rotl_i32:
345         return rol32(x, y & 31);
346 
347     case INDEX_op_rotl_i64:
348         return rol64(x, y & 63);
349 
350     CASE_OP_32_64_VEC(not):
351         return ~x;
352 
353     CASE_OP_32_64(neg):
354         return -x;
355 
356     CASE_OP_32_64_VEC(andc):
357         return x & ~y;
358 
359     CASE_OP_32_64_VEC(orc):
360         return x | ~y;
361 
362     CASE_OP_32_64_VEC(eqv):
363         return ~(x ^ y);
364 
365     CASE_OP_32_64_VEC(nand):
366         return ~(x & y);
367 
368     CASE_OP_32_64_VEC(nor):
369         return ~(x | y);
370 
371     case INDEX_op_clz_i32:
372         return (uint32_t)x ? clz32(x) : y;
373 
374     case INDEX_op_clz_i64:
375         return x ? clz64(x) : y;
376 
377     case INDEX_op_ctz_i32:
378         return (uint32_t)x ? ctz32(x) : y;
379 
380     case INDEX_op_ctz_i64:
381         return x ? ctz64(x) : y;
382 
383     case INDEX_op_ctpop_i32:
384         return ctpop32(x);
385 
386     case INDEX_op_ctpop_i64:
387         return ctpop64(x);
388 
389     CASE_OP_32_64(ext8s):
390         return (int8_t)x;
391 
392     CASE_OP_32_64(ext16s):
393         return (int16_t)x;
394 
395     CASE_OP_32_64(ext8u):
396         return (uint8_t)x;
397 
398     CASE_OP_32_64(ext16u):
399         return (uint16_t)x;
400 
401     CASE_OP_32_64(bswap16):
402         x = bswap16(x);
403         return y & TCG_BSWAP_OS ? (int16_t)x : x;
404 
405     CASE_OP_32_64(bswap32):
406         x = bswap32(x);
407         return y & TCG_BSWAP_OS ? (int32_t)x : x;
408 
409     case INDEX_op_bswap64_i64:
410         return bswap64(x);
411 
412     case INDEX_op_ext_i32_i64:
413     case INDEX_op_ext32s_i64:
414         return (int32_t)x;
415 
416     case INDEX_op_extu_i32_i64:
417     case INDEX_op_extrl_i64_i32:
418     case INDEX_op_ext32u_i64:
419         return (uint32_t)x;
420 
421     case INDEX_op_extrh_i64_i32:
422         return (uint64_t)x >> 32;
423 
424     case INDEX_op_muluh_i32:
425         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
426     case INDEX_op_mulsh_i32:
427         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
428 
429     case INDEX_op_muluh_i64:
430         mulu64(&l64, &h64, x, y);
431         return h64;
432     case INDEX_op_mulsh_i64:
433         muls64(&l64, &h64, x, y);
434         return h64;
435 
436     case INDEX_op_div_i32:
437         /* Avoid crashing on divide by zero, otherwise undefined.  */
438         return (int32_t)x / ((int32_t)y ? : 1);
439     case INDEX_op_divu_i32:
440         return (uint32_t)x / ((uint32_t)y ? : 1);
441     case INDEX_op_div_i64:
442         return (int64_t)x / ((int64_t)y ? : 1);
443     case INDEX_op_divu_i64:
444         return (uint64_t)x / ((uint64_t)y ? : 1);
445 
446     case INDEX_op_rem_i32:
447         return (int32_t)x % ((int32_t)y ? : 1);
448     case INDEX_op_remu_i32:
449         return (uint32_t)x % ((uint32_t)y ? : 1);
450     case INDEX_op_rem_i64:
451         return (int64_t)x % ((int64_t)y ? : 1);
452     case INDEX_op_remu_i64:
453         return (uint64_t)x % ((uint64_t)y ? : 1);
454 
455     default:
456         g_assert_not_reached();
457     }
458 }
459 
460 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
461                                     uint64_t x, uint64_t y)
462 {
463     uint64_t res = do_constant_folding_2(op, x, y);
464     if (type == TCG_TYPE_I32) {
465         res = (int32_t)res;
466     }
467     return res;
468 }
469 
470 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
471 {
472     switch (c) {
473     case TCG_COND_EQ:
474         return x == y;
475     case TCG_COND_NE:
476         return x != y;
477     case TCG_COND_LT:
478         return (int32_t)x < (int32_t)y;
479     case TCG_COND_GE:
480         return (int32_t)x >= (int32_t)y;
481     case TCG_COND_LE:
482         return (int32_t)x <= (int32_t)y;
483     case TCG_COND_GT:
484         return (int32_t)x > (int32_t)y;
485     case TCG_COND_LTU:
486         return x < y;
487     case TCG_COND_GEU:
488         return x >= y;
489     case TCG_COND_LEU:
490         return x <= y;
491     case TCG_COND_GTU:
492         return x > y;
493     default:
494         g_assert_not_reached();
495     }
496 }
497 
498 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
499 {
500     switch (c) {
501     case TCG_COND_EQ:
502         return x == y;
503     case TCG_COND_NE:
504         return x != y;
505     case TCG_COND_LT:
506         return (int64_t)x < (int64_t)y;
507     case TCG_COND_GE:
508         return (int64_t)x >= (int64_t)y;
509     case TCG_COND_LE:
510         return (int64_t)x <= (int64_t)y;
511     case TCG_COND_GT:
512         return (int64_t)x > (int64_t)y;
513     case TCG_COND_LTU:
514         return x < y;
515     case TCG_COND_GEU:
516         return x >= y;
517     case TCG_COND_LEU:
518         return x <= y;
519     case TCG_COND_GTU:
520         return x > y;
521     default:
522         g_assert_not_reached();
523     }
524 }
525 
526 static bool do_constant_folding_cond_eq(TCGCond c)
527 {
528     switch (c) {
529     case TCG_COND_GT:
530     case TCG_COND_LTU:
531     case TCG_COND_LT:
532     case TCG_COND_GTU:
533     case TCG_COND_NE:
534         return 0;
535     case TCG_COND_GE:
536     case TCG_COND_GEU:
537     case TCG_COND_LE:
538     case TCG_COND_LEU:
539     case TCG_COND_EQ:
540         return 1;
541     default:
542         g_assert_not_reached();
543     }
544 }
545 
546 /*
547  * Return -1 if the condition can't be simplified,
548  * and the result of the condition (0 or 1) if it can.
549  */
550 static int do_constant_folding_cond(TCGType type, TCGArg x,
551                                     TCGArg y, TCGCond c)
552 {
553     if (arg_is_const(x) && arg_is_const(y)) {
554         uint64_t xv = arg_info(x)->val;
555         uint64_t yv = arg_info(y)->val;
556 
557         switch (type) {
558         case TCG_TYPE_I32:
559             return do_constant_folding_cond_32(xv, yv, c);
560         case TCG_TYPE_I64:
561             return do_constant_folding_cond_64(xv, yv, c);
562         default:
563             /* Only scalar comparisons are optimizable */
564             return -1;
565         }
566     } else if (args_are_copies(x, y)) {
567         return do_constant_folding_cond_eq(c);
568     } else if (arg_is_const(y) && arg_info(y)->val == 0) {
569         switch (c) {
570         case TCG_COND_LTU:
571             return 0;
572         case TCG_COND_GEU:
573             return 1;
574         default:
575             return -1;
576         }
577     }
578     return -1;
579 }
580 
581 /*
582  * Return -1 if the condition can't be simplified,
583  * and the result of the condition (0 or 1) if it can.
584  */
585 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
586 {
587     TCGArg al = p1[0], ah = p1[1];
588     TCGArg bl = p2[0], bh = p2[1];
589 
590     if (arg_is_const(bl) && arg_is_const(bh)) {
591         tcg_target_ulong blv = arg_info(bl)->val;
592         tcg_target_ulong bhv = arg_info(bh)->val;
593         uint64_t b = deposit64(blv, 32, 32, bhv);
594 
595         if (arg_is_const(al) && arg_is_const(ah)) {
596             tcg_target_ulong alv = arg_info(al)->val;
597             tcg_target_ulong ahv = arg_info(ah)->val;
598             uint64_t a = deposit64(alv, 32, 32, ahv);
599             return do_constant_folding_cond_64(a, b, c);
600         }
601         if (b == 0) {
602             switch (c) {
603             case TCG_COND_LTU:
604                 return 0;
605             case TCG_COND_GEU:
606                 return 1;
607             default:
608                 break;
609             }
610         }
611     }
612     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
613         return do_constant_folding_cond_eq(c);
614     }
615     return -1;
616 }
617 
618 /**
619  * swap_commutative:
620  * @dest: TCGArg of the destination argument, or NO_DEST.
621  * @p1: first paired argument
622  * @p2: second paired argument
623  *
624  * If *@p1 is a constant and *@p2 is not, swap.
625  * If *@p2 matches @dest, swap.
626  * Return true if a swap was performed.
627  */
628 
629 #define NO_DEST  temp_arg(NULL)
630 
631 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
632 {
633     TCGArg a1 = *p1, a2 = *p2;
634     int sum = 0;
635     sum += arg_is_const(a1);
636     sum -= arg_is_const(a2);
637 
638     /* Prefer the constant in second argument, and then the form
639        op a, a, b, which is better handled on non-RISC hosts. */
640     if (sum > 0 || (sum == 0 && dest == a2)) {
641         *p1 = a2;
642         *p2 = a1;
643         return true;
644     }
645     return false;
646 }
647 
648 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
649 {
650     int sum = 0;
651     sum += arg_is_const(p1[0]);
652     sum += arg_is_const(p1[1]);
653     sum -= arg_is_const(p2[0]);
654     sum -= arg_is_const(p2[1]);
655     if (sum > 0) {
656         TCGArg t;
657         t = p1[0], p1[0] = p2[0], p2[0] = t;
658         t = p1[1], p1[1] = p2[1], p2[1] = t;
659         return true;
660     }
661     return false;
662 }
663 
664 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
665 {
666     for (int i = 0; i < nb_args; i++) {
667         TCGTemp *ts = arg_temp(op->args[i]);
668         init_ts_info(ctx, ts);
669     }
670 }
671 
672 static void copy_propagate(OptContext *ctx, TCGOp *op,
673                            int nb_oargs, int nb_iargs)
674 {
675     TCGContext *s = ctx->tcg;
676 
677     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
678         TCGTemp *ts = arg_temp(op->args[i]);
679         if (ts_is_copy(ts)) {
680             op->args[i] = temp_arg(find_better_copy(s, ts));
681         }
682     }
683 }
684 
685 static void finish_folding(OptContext *ctx, TCGOp *op)
686 {
687     const TCGOpDef *def = &tcg_op_defs[op->opc];
688     int i, nb_oargs;
689 
690     /*
691      * For an opcode that ends a BB, reset all temp data.
692      * We do no cross-BB optimization.
693      */
694     if (def->flags & TCG_OPF_BB_END) {
695         memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
696         ctx->prev_mb = NULL;
697         return;
698     }
699 
700     nb_oargs = def->nb_oargs;
701     for (i = 0; i < nb_oargs; i++) {
702         TCGTemp *ts = arg_temp(op->args[i]);
703         reset_ts(ts);
704         /*
705          * Save the corresponding known-zero/sign bits mask for the
706          * first output argument (only one supported so far).
707          */
708         if (i == 0) {
709             ts_info(ts)->z_mask = ctx->z_mask;
710             ts_info(ts)->s_mask = ctx->s_mask;
711         }
712     }
713 }
714 
715 /*
716  * The fold_* functions return true when processing is complete,
717  * usually by folding the operation to a constant or to a copy,
718  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
719  * like collect information about the value produced, for use in
720  * optimizing a subsequent operation.
721  *
722  * These first fold_* functions are all helpers, used by other
723  * folders for more specific operations.
724  */
725 
726 static bool fold_const1(OptContext *ctx, TCGOp *op)
727 {
728     if (arg_is_const(op->args[1])) {
729         uint64_t t;
730 
731         t = arg_info(op->args[1])->val;
732         t = do_constant_folding(op->opc, ctx->type, t, 0);
733         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
734     }
735     return false;
736 }
737 
738 static bool fold_const2(OptContext *ctx, TCGOp *op)
739 {
740     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
741         uint64_t t1 = arg_info(op->args[1])->val;
742         uint64_t t2 = arg_info(op->args[2])->val;
743 
744         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
745         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
746     }
747     return false;
748 }
749 
750 static bool fold_commutative(OptContext *ctx, TCGOp *op)
751 {
752     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
753     return false;
754 }
755 
756 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
757 {
758     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
759     return fold_const2(ctx, op);
760 }
761 
762 static bool fold_masks(OptContext *ctx, TCGOp *op)
763 {
764     uint64_t a_mask = ctx->a_mask;
765     uint64_t z_mask = ctx->z_mask;
766     uint64_t s_mask = ctx->s_mask;
767 
768     /*
769      * 32-bit ops generate 32-bit results, which for the purpose of
770      * simplifying tcg are sign-extended.  Certainly that's how we
771      * represent our constants elsewhere.  Note that the bits will
772      * be reset properly for a 64-bit value when encountering the
773      * type changing opcodes.
774      */
775     if (ctx->type == TCG_TYPE_I32) {
776         a_mask = (int32_t)a_mask;
777         z_mask = (int32_t)z_mask;
778         s_mask |= MAKE_64BIT_MASK(32, 32);
779         ctx->z_mask = z_mask;
780         ctx->s_mask = s_mask;
781     }
782 
783     if (z_mask == 0) {
784         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
785     }
786     if (a_mask == 0) {
787         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
788     }
789     return false;
790 }
791 
792 /*
793  * Convert @op to NOT, if NOT is supported by the host.
794  * Return true f the conversion is successful, which will still
795  * indicate that the processing is complete.
796  */
797 static bool fold_not(OptContext *ctx, TCGOp *op);
798 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
799 {
800     TCGOpcode not_op;
801     bool have_not;
802 
803     switch (ctx->type) {
804     case TCG_TYPE_I32:
805         not_op = INDEX_op_not_i32;
806         have_not = TCG_TARGET_HAS_not_i32;
807         break;
808     case TCG_TYPE_I64:
809         not_op = INDEX_op_not_i64;
810         have_not = TCG_TARGET_HAS_not_i64;
811         break;
812     case TCG_TYPE_V64:
813     case TCG_TYPE_V128:
814     case TCG_TYPE_V256:
815         not_op = INDEX_op_not_vec;
816         have_not = TCG_TARGET_HAS_not_vec;
817         break;
818     default:
819         g_assert_not_reached();
820     }
821     if (have_not) {
822         op->opc = not_op;
823         op->args[1] = op->args[idx];
824         return fold_not(ctx, op);
825     }
826     return false;
827 }
828 
829 /* If the binary operation has first argument @i, fold to @i. */
830 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
831 {
832     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
833         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
834     }
835     return false;
836 }
837 
838 /* If the binary operation has first argument @i, fold to NOT. */
839 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
840 {
841     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
842         return fold_to_not(ctx, op, 2);
843     }
844     return false;
845 }
846 
847 /* If the binary operation has second argument @i, fold to @i. */
848 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
849 {
850     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
851         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
852     }
853     return false;
854 }
855 
856 /* If the binary operation has second argument @i, fold to identity. */
857 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
858 {
859     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
860         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
861     }
862     return false;
863 }
864 
865 /* If the binary operation has second argument @i, fold to NOT. */
866 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
867 {
868     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
869         return fold_to_not(ctx, op, 1);
870     }
871     return false;
872 }
873 
874 /* If the binary operation has both arguments equal, fold to @i. */
875 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
876 {
877     if (args_are_copies(op->args[1], op->args[2])) {
878         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
879     }
880     return false;
881 }
882 
883 /* If the binary operation has both arguments equal, fold to identity. */
884 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
885 {
886     if (args_are_copies(op->args[1], op->args[2])) {
887         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
888     }
889     return false;
890 }
891 
892 /*
893  * These outermost fold_<op> functions are sorted alphabetically.
894  *
895  * The ordering of the transformations should be:
896  *   1) those that produce a constant
897  *   2) those that produce a copy
898  *   3) those that produce information about the result value.
899  */
900 
901 static bool fold_add(OptContext *ctx, TCGOp *op)
902 {
903     if (fold_const2_commutative(ctx, op) ||
904         fold_xi_to_x(ctx, op, 0)) {
905         return true;
906     }
907     return false;
908 }
909 
910 /* We cannot as yet do_constant_folding with vectors. */
911 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
912 {
913     if (fold_commutative(ctx, op) ||
914         fold_xi_to_x(ctx, op, 0)) {
915         return true;
916     }
917     return false;
918 }
919 
920 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
921 {
922     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
923         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
924         uint64_t al = arg_info(op->args[2])->val;
925         uint64_t ah = arg_info(op->args[3])->val;
926         uint64_t bl = arg_info(op->args[4])->val;
927         uint64_t bh = arg_info(op->args[5])->val;
928         TCGArg rl, rh;
929         TCGOp *op2;
930 
931         if (ctx->type == TCG_TYPE_I32) {
932             uint64_t a = deposit64(al, 32, 32, ah);
933             uint64_t b = deposit64(bl, 32, 32, bh);
934 
935             if (add) {
936                 a += b;
937             } else {
938                 a -= b;
939             }
940 
941             al = sextract64(a, 0, 32);
942             ah = sextract64(a, 32, 32);
943         } else {
944             Int128 a = int128_make128(al, ah);
945             Int128 b = int128_make128(bl, bh);
946 
947             if (add) {
948                 a = int128_add(a, b);
949             } else {
950                 a = int128_sub(a, b);
951             }
952 
953             al = int128_getlo(a);
954             ah = int128_gethi(a);
955         }
956 
957         rl = op->args[0];
958         rh = op->args[1];
959 
960         /* The proper opcode is supplied by tcg_opt_gen_mov. */
961         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
962 
963         tcg_opt_gen_movi(ctx, op, rl, al);
964         tcg_opt_gen_movi(ctx, op2, rh, ah);
965         return true;
966     }
967     return false;
968 }
969 
970 static bool fold_add2(OptContext *ctx, TCGOp *op)
971 {
972     /* Note that the high and low parts may be independently swapped. */
973     swap_commutative(op->args[0], &op->args[2], &op->args[4]);
974     swap_commutative(op->args[1], &op->args[3], &op->args[5]);
975 
976     return fold_addsub2(ctx, op, true);
977 }
978 
979 static bool fold_and(OptContext *ctx, TCGOp *op)
980 {
981     uint64_t z1, z2;
982 
983     if (fold_const2_commutative(ctx, op) ||
984         fold_xi_to_i(ctx, op, 0) ||
985         fold_xi_to_x(ctx, op, -1) ||
986         fold_xx_to_x(ctx, op)) {
987         return true;
988     }
989 
990     z1 = arg_info(op->args[1])->z_mask;
991     z2 = arg_info(op->args[2])->z_mask;
992     ctx->z_mask = z1 & z2;
993 
994     /*
995      * Sign repetitions are perforce all identical, whether they are 1 or 0.
996      * Bitwise operations preserve the relative quantity of the repetitions.
997      */
998     ctx->s_mask = arg_info(op->args[1])->s_mask
999                 & arg_info(op->args[2])->s_mask;
1000 
1001     /*
1002      * Known-zeros does not imply known-ones.  Therefore unless
1003      * arg2 is constant, we can't infer affected bits from it.
1004      */
1005     if (arg_is_const(op->args[2])) {
1006         ctx->a_mask = z1 & ~z2;
1007     }
1008 
1009     return fold_masks(ctx, op);
1010 }
1011 
1012 static bool fold_andc(OptContext *ctx, TCGOp *op)
1013 {
1014     uint64_t z1;
1015 
1016     if (fold_const2(ctx, op) ||
1017         fold_xx_to_i(ctx, op, 0) ||
1018         fold_xi_to_x(ctx, op, 0) ||
1019         fold_ix_to_not(ctx, op, -1)) {
1020         return true;
1021     }
1022 
1023     z1 = arg_info(op->args[1])->z_mask;
1024 
1025     /*
1026      * Known-zeros does not imply known-ones.  Therefore unless
1027      * arg2 is constant, we can't infer anything from it.
1028      */
1029     if (arg_is_const(op->args[2])) {
1030         uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1031         ctx->a_mask = z1 & ~z2;
1032         z1 &= z2;
1033     }
1034     ctx->z_mask = z1;
1035 
1036     ctx->s_mask = arg_info(op->args[1])->s_mask
1037                 & arg_info(op->args[2])->s_mask;
1038     return fold_masks(ctx, op);
1039 }
1040 
1041 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1042 {
1043     TCGCond cond = op->args[2];
1044     int i;
1045 
1046     if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1047         op->args[2] = cond = tcg_swap_cond(cond);
1048     }
1049 
1050     i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1051     if (i == 0) {
1052         tcg_op_remove(ctx->tcg, op);
1053         return true;
1054     }
1055     if (i > 0) {
1056         op->opc = INDEX_op_br;
1057         op->args[0] = op->args[3];
1058     }
1059     return false;
1060 }
1061 
1062 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1063 {
1064     TCGCond cond = op->args[4];
1065     TCGArg label = op->args[5];
1066     int i, inv = 0;
1067 
1068     if (swap_commutative2(&op->args[0], &op->args[2])) {
1069         op->args[4] = cond = tcg_swap_cond(cond);
1070     }
1071 
1072     i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1073     if (i >= 0) {
1074         goto do_brcond_const;
1075     }
1076 
1077     switch (cond) {
1078     case TCG_COND_LT:
1079     case TCG_COND_GE:
1080         /*
1081          * Simplify LT/GE comparisons vs zero to a single compare
1082          * vs the high word of the input.
1083          */
1084         if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1085             arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1086             goto do_brcond_high;
1087         }
1088         break;
1089 
1090     case TCG_COND_NE:
1091         inv = 1;
1092         QEMU_FALLTHROUGH;
1093     case TCG_COND_EQ:
1094         /*
1095          * Simplify EQ/NE comparisons where one of the pairs
1096          * can be simplified.
1097          */
1098         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1099                                      op->args[2], cond);
1100         switch (i ^ inv) {
1101         case 0:
1102             goto do_brcond_const;
1103         case 1:
1104             goto do_brcond_high;
1105         }
1106 
1107         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1108                                      op->args[3], cond);
1109         switch (i ^ inv) {
1110         case 0:
1111             goto do_brcond_const;
1112         case 1:
1113             op->opc = INDEX_op_brcond_i32;
1114             op->args[1] = op->args[2];
1115             op->args[2] = cond;
1116             op->args[3] = label;
1117             break;
1118         }
1119         break;
1120 
1121     default:
1122         break;
1123 
1124     do_brcond_high:
1125         op->opc = INDEX_op_brcond_i32;
1126         op->args[0] = op->args[1];
1127         op->args[1] = op->args[3];
1128         op->args[2] = cond;
1129         op->args[3] = label;
1130         break;
1131 
1132     do_brcond_const:
1133         if (i == 0) {
1134             tcg_op_remove(ctx->tcg, op);
1135             return true;
1136         }
1137         op->opc = INDEX_op_br;
1138         op->args[0] = label;
1139         break;
1140     }
1141     return false;
1142 }
1143 
1144 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1145 {
1146     uint64_t z_mask, s_mask, sign;
1147 
1148     if (arg_is_const(op->args[1])) {
1149         uint64_t t = arg_info(op->args[1])->val;
1150 
1151         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1152         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1153     }
1154 
1155     z_mask = arg_info(op->args[1])->z_mask;
1156 
1157     switch (op->opc) {
1158     case INDEX_op_bswap16_i32:
1159     case INDEX_op_bswap16_i64:
1160         z_mask = bswap16(z_mask);
1161         sign = INT16_MIN;
1162         break;
1163     case INDEX_op_bswap32_i32:
1164     case INDEX_op_bswap32_i64:
1165         z_mask = bswap32(z_mask);
1166         sign = INT32_MIN;
1167         break;
1168     case INDEX_op_bswap64_i64:
1169         z_mask = bswap64(z_mask);
1170         sign = INT64_MIN;
1171         break;
1172     default:
1173         g_assert_not_reached();
1174     }
1175     s_mask = smask_from_zmask(z_mask);
1176 
1177     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1178     case TCG_BSWAP_OZ:
1179         break;
1180     case TCG_BSWAP_OS:
1181         /* If the sign bit may be 1, force all the bits above to 1. */
1182         if (z_mask & sign) {
1183             z_mask |= sign;
1184             s_mask = sign << 1;
1185         }
1186         break;
1187     default:
1188         /* The high bits are undefined: force all bits above the sign to 1. */
1189         z_mask |= sign << 1;
1190         s_mask = 0;
1191         break;
1192     }
1193     ctx->z_mask = z_mask;
1194     ctx->s_mask = s_mask;
1195 
1196     return fold_masks(ctx, op);
1197 }
1198 
1199 static bool fold_call(OptContext *ctx, TCGOp *op)
1200 {
1201     TCGContext *s = ctx->tcg;
1202     int nb_oargs = TCGOP_CALLO(op);
1203     int nb_iargs = TCGOP_CALLI(op);
1204     int flags, i;
1205 
1206     init_arguments(ctx, op, nb_oargs + nb_iargs);
1207     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1208 
1209     /* If the function reads or writes globals, reset temp data. */
1210     flags = tcg_call_flags(op);
1211     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1212         int nb_globals = s->nb_globals;
1213 
1214         for (i = 0; i < nb_globals; i++) {
1215             if (test_bit(i, ctx->temps_used.l)) {
1216                 reset_ts(&ctx->tcg->temps[i]);
1217             }
1218         }
1219     }
1220 
1221     /* Reset temp data for outputs. */
1222     for (i = 0; i < nb_oargs; i++) {
1223         reset_temp(op->args[i]);
1224     }
1225 
1226     /* Stop optimizing MB across calls. */
1227     ctx->prev_mb = NULL;
1228     return true;
1229 }
1230 
1231 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1232 {
1233     uint64_t z_mask;
1234 
1235     if (arg_is_const(op->args[1])) {
1236         uint64_t t = arg_info(op->args[1])->val;
1237 
1238         if (t != 0) {
1239             t = do_constant_folding(op->opc, ctx->type, t, 0);
1240             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1241         }
1242         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1243     }
1244 
1245     switch (ctx->type) {
1246     case TCG_TYPE_I32:
1247         z_mask = 31;
1248         break;
1249     case TCG_TYPE_I64:
1250         z_mask = 63;
1251         break;
1252     default:
1253         g_assert_not_reached();
1254     }
1255     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1256     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1257     return false;
1258 }
1259 
1260 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1261 {
1262     if (fold_const1(ctx, op)) {
1263         return true;
1264     }
1265 
1266     switch (ctx->type) {
1267     case TCG_TYPE_I32:
1268         ctx->z_mask = 32 | 31;
1269         break;
1270     case TCG_TYPE_I64:
1271         ctx->z_mask = 64 | 63;
1272         break;
1273     default:
1274         g_assert_not_reached();
1275     }
1276     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1277     return false;
1278 }
1279 
1280 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1281 {
1282     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1283         uint64_t t1 = arg_info(op->args[1])->val;
1284         uint64_t t2 = arg_info(op->args[2])->val;
1285 
1286         t1 = deposit64(t1, op->args[3], op->args[4], t2);
1287         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1288     }
1289 
1290     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1291                             op->args[3], op->args[4],
1292                             arg_info(op->args[2])->z_mask);
1293     return false;
1294 }
1295 
1296 static bool fold_divide(OptContext *ctx, TCGOp *op)
1297 {
1298     if (fold_const2(ctx, op) ||
1299         fold_xi_to_x(ctx, op, 1)) {
1300         return true;
1301     }
1302     return false;
1303 }
1304 
1305 static bool fold_dup(OptContext *ctx, TCGOp *op)
1306 {
1307     if (arg_is_const(op->args[1])) {
1308         uint64_t t = arg_info(op->args[1])->val;
1309         t = dup_const(TCGOP_VECE(op), t);
1310         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1311     }
1312     return false;
1313 }
1314 
1315 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1316 {
1317     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1318         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1319                                arg_info(op->args[2])->val);
1320         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1321     }
1322 
1323     if (args_are_copies(op->args[1], op->args[2])) {
1324         op->opc = INDEX_op_dup_vec;
1325         TCGOP_VECE(op) = MO_32;
1326     }
1327     return false;
1328 }
1329 
1330 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1331 {
1332     if (fold_const2_commutative(ctx, op) ||
1333         fold_xi_to_x(ctx, op, -1) ||
1334         fold_xi_to_not(ctx, op, 0)) {
1335         return true;
1336     }
1337 
1338     ctx->s_mask = arg_info(op->args[1])->s_mask
1339                 & arg_info(op->args[2])->s_mask;
1340     return false;
1341 }
1342 
1343 static bool fold_extract(OptContext *ctx, TCGOp *op)
1344 {
1345     uint64_t z_mask_old, z_mask;
1346     int pos = op->args[2];
1347     int len = op->args[3];
1348 
1349     if (arg_is_const(op->args[1])) {
1350         uint64_t t;
1351 
1352         t = arg_info(op->args[1])->val;
1353         t = extract64(t, pos, len);
1354         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1355     }
1356 
1357     z_mask_old = arg_info(op->args[1])->z_mask;
1358     z_mask = extract64(z_mask_old, pos, len);
1359     if (pos == 0) {
1360         ctx->a_mask = z_mask_old ^ z_mask;
1361     }
1362     ctx->z_mask = z_mask;
1363     ctx->s_mask = smask_from_zmask(z_mask);
1364 
1365     return fold_masks(ctx, op);
1366 }
1367 
1368 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1369 {
1370     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1371         uint64_t v1 = arg_info(op->args[1])->val;
1372         uint64_t v2 = arg_info(op->args[2])->val;
1373         int shr = op->args[3];
1374 
1375         if (op->opc == INDEX_op_extract2_i64) {
1376             v1 >>= shr;
1377             v2 <<= 64 - shr;
1378         } else {
1379             v1 = (uint32_t)v1 >> shr;
1380             v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1381         }
1382         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1383     }
1384     return false;
1385 }
1386 
1387 static bool fold_exts(OptContext *ctx, TCGOp *op)
1388 {
1389     uint64_t s_mask_old, s_mask, z_mask, sign;
1390     bool type_change = false;
1391 
1392     if (fold_const1(ctx, op)) {
1393         return true;
1394     }
1395 
1396     z_mask = arg_info(op->args[1])->z_mask;
1397     s_mask = arg_info(op->args[1])->s_mask;
1398     s_mask_old = s_mask;
1399 
1400     switch (op->opc) {
1401     CASE_OP_32_64(ext8s):
1402         sign = INT8_MIN;
1403         z_mask = (uint8_t)z_mask;
1404         break;
1405     CASE_OP_32_64(ext16s):
1406         sign = INT16_MIN;
1407         z_mask = (uint16_t)z_mask;
1408         break;
1409     case INDEX_op_ext_i32_i64:
1410         type_change = true;
1411         QEMU_FALLTHROUGH;
1412     case INDEX_op_ext32s_i64:
1413         sign = INT32_MIN;
1414         z_mask = (uint32_t)z_mask;
1415         break;
1416     default:
1417         g_assert_not_reached();
1418     }
1419 
1420     if (z_mask & sign) {
1421         z_mask |= sign;
1422     }
1423     s_mask |= sign << 1;
1424 
1425     ctx->z_mask = z_mask;
1426     ctx->s_mask = s_mask;
1427     if (!type_change) {
1428         ctx->a_mask = s_mask & ~s_mask_old;
1429     }
1430 
1431     return fold_masks(ctx, op);
1432 }
1433 
1434 static bool fold_extu(OptContext *ctx, TCGOp *op)
1435 {
1436     uint64_t z_mask_old, z_mask;
1437     bool type_change = false;
1438 
1439     if (fold_const1(ctx, op)) {
1440         return true;
1441     }
1442 
1443     z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1444 
1445     switch (op->opc) {
1446     CASE_OP_32_64(ext8u):
1447         z_mask = (uint8_t)z_mask;
1448         break;
1449     CASE_OP_32_64(ext16u):
1450         z_mask = (uint16_t)z_mask;
1451         break;
1452     case INDEX_op_extrl_i64_i32:
1453     case INDEX_op_extu_i32_i64:
1454         type_change = true;
1455         QEMU_FALLTHROUGH;
1456     case INDEX_op_ext32u_i64:
1457         z_mask = (uint32_t)z_mask;
1458         break;
1459     case INDEX_op_extrh_i64_i32:
1460         type_change = true;
1461         z_mask >>= 32;
1462         break;
1463     default:
1464         g_assert_not_reached();
1465     }
1466 
1467     ctx->z_mask = z_mask;
1468     ctx->s_mask = smask_from_zmask(z_mask);
1469     if (!type_change) {
1470         ctx->a_mask = z_mask_old ^ z_mask;
1471     }
1472     return fold_masks(ctx, op);
1473 }
1474 
1475 static bool fold_mb(OptContext *ctx, TCGOp *op)
1476 {
1477     /* Eliminate duplicate and redundant fence instructions.  */
1478     if (ctx->prev_mb) {
1479         /*
1480          * Merge two barriers of the same type into one,
1481          * or a weaker barrier into a stronger one,
1482          * or two weaker barriers into a stronger one.
1483          *   mb X; mb Y => mb X|Y
1484          *   mb; strl => mb; st
1485          *   ldaq; mb => ld; mb
1486          *   ldaq; strl => ld; mb; st
1487          * Other combinations are also merged into a strong
1488          * barrier.  This is stricter than specified but for
1489          * the purposes of TCG is better than not optimizing.
1490          */
1491         ctx->prev_mb->args[0] |= op->args[0];
1492         tcg_op_remove(ctx->tcg, op);
1493     } else {
1494         ctx->prev_mb = op;
1495     }
1496     return true;
1497 }
1498 
1499 static bool fold_mov(OptContext *ctx, TCGOp *op)
1500 {
1501     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1502 }
1503 
1504 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1505 {
1506     TCGCond cond = op->args[5];
1507     int i;
1508 
1509     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1510         op->args[5] = cond = tcg_swap_cond(cond);
1511     }
1512     /*
1513      * Canonicalize the "false" input reg to match the destination reg so
1514      * that the tcg backend can implement a "move if true" operation.
1515      */
1516     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1517         op->args[5] = cond = tcg_invert_cond(cond);
1518     }
1519 
1520     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1521     if (i >= 0) {
1522         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1523     }
1524 
1525     ctx->z_mask = arg_info(op->args[3])->z_mask
1526                 | arg_info(op->args[4])->z_mask;
1527     ctx->s_mask = arg_info(op->args[3])->s_mask
1528                 & arg_info(op->args[4])->s_mask;
1529 
1530     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1531         uint64_t tv = arg_info(op->args[3])->val;
1532         uint64_t fv = arg_info(op->args[4])->val;
1533         TCGOpcode opc;
1534 
1535         switch (ctx->type) {
1536         case TCG_TYPE_I32:
1537             opc = INDEX_op_setcond_i32;
1538             break;
1539         case TCG_TYPE_I64:
1540             opc = INDEX_op_setcond_i64;
1541             break;
1542         default:
1543             g_assert_not_reached();
1544         }
1545 
1546         if (tv == 1 && fv == 0) {
1547             op->opc = opc;
1548             op->args[3] = cond;
1549         } else if (fv == 1 && tv == 0) {
1550             op->opc = opc;
1551             op->args[3] = tcg_invert_cond(cond);
1552         }
1553     }
1554     return false;
1555 }
1556 
1557 static bool fold_mul(OptContext *ctx, TCGOp *op)
1558 {
1559     if (fold_const2(ctx, op) ||
1560         fold_xi_to_i(ctx, op, 0) ||
1561         fold_xi_to_x(ctx, op, 1)) {
1562         return true;
1563     }
1564     return false;
1565 }
1566 
1567 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1568 {
1569     if (fold_const2_commutative(ctx, op) ||
1570         fold_xi_to_i(ctx, op, 0)) {
1571         return true;
1572     }
1573     return false;
1574 }
1575 
1576 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1577 {
1578     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1579 
1580     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1581         uint64_t a = arg_info(op->args[2])->val;
1582         uint64_t b = arg_info(op->args[3])->val;
1583         uint64_t h, l;
1584         TCGArg rl, rh;
1585         TCGOp *op2;
1586 
1587         switch (op->opc) {
1588         case INDEX_op_mulu2_i32:
1589             l = (uint64_t)(uint32_t)a * (uint32_t)b;
1590             h = (int32_t)(l >> 32);
1591             l = (int32_t)l;
1592             break;
1593         case INDEX_op_muls2_i32:
1594             l = (int64_t)(int32_t)a * (int32_t)b;
1595             h = l >> 32;
1596             l = (int32_t)l;
1597             break;
1598         case INDEX_op_mulu2_i64:
1599             mulu64(&l, &h, a, b);
1600             break;
1601         case INDEX_op_muls2_i64:
1602             muls64(&l, &h, a, b);
1603             break;
1604         default:
1605             g_assert_not_reached();
1606         }
1607 
1608         rl = op->args[0];
1609         rh = op->args[1];
1610 
1611         /* The proper opcode is supplied by tcg_opt_gen_mov. */
1612         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1613 
1614         tcg_opt_gen_movi(ctx, op, rl, l);
1615         tcg_opt_gen_movi(ctx, op2, rh, h);
1616         return true;
1617     }
1618     return false;
1619 }
1620 
1621 static bool fold_nand(OptContext *ctx, TCGOp *op)
1622 {
1623     if (fold_const2_commutative(ctx, op) ||
1624         fold_xi_to_not(ctx, op, -1)) {
1625         return true;
1626     }
1627 
1628     ctx->s_mask = arg_info(op->args[1])->s_mask
1629                 & arg_info(op->args[2])->s_mask;
1630     return false;
1631 }
1632 
1633 static bool fold_neg(OptContext *ctx, TCGOp *op)
1634 {
1635     uint64_t z_mask;
1636 
1637     if (fold_const1(ctx, op)) {
1638         return true;
1639     }
1640 
1641     /* Set to 1 all bits to the left of the rightmost.  */
1642     z_mask = arg_info(op->args[1])->z_mask;
1643     ctx->z_mask = -(z_mask & -z_mask);
1644 
1645     /*
1646      * Because of fold_sub_to_neg, we want to always return true,
1647      * via finish_folding.
1648      */
1649     finish_folding(ctx, op);
1650     return true;
1651 }
1652 
1653 static bool fold_nor(OptContext *ctx, TCGOp *op)
1654 {
1655     if (fold_const2_commutative(ctx, op) ||
1656         fold_xi_to_not(ctx, op, 0)) {
1657         return true;
1658     }
1659 
1660     ctx->s_mask = arg_info(op->args[1])->s_mask
1661                 & arg_info(op->args[2])->s_mask;
1662     return false;
1663 }
1664 
1665 static bool fold_not(OptContext *ctx, TCGOp *op)
1666 {
1667     if (fold_const1(ctx, op)) {
1668         return true;
1669     }
1670 
1671     ctx->s_mask = arg_info(op->args[1])->s_mask;
1672 
1673     /* Because of fold_to_not, we want to always return true, via finish. */
1674     finish_folding(ctx, op);
1675     return true;
1676 }
1677 
1678 static bool fold_or(OptContext *ctx, TCGOp *op)
1679 {
1680     if (fold_const2_commutative(ctx, op) ||
1681         fold_xi_to_x(ctx, op, 0) ||
1682         fold_xx_to_x(ctx, op)) {
1683         return true;
1684     }
1685 
1686     ctx->z_mask = arg_info(op->args[1])->z_mask
1687                 | arg_info(op->args[2])->z_mask;
1688     ctx->s_mask = arg_info(op->args[1])->s_mask
1689                 & arg_info(op->args[2])->s_mask;
1690     return fold_masks(ctx, op);
1691 }
1692 
1693 static bool fold_orc(OptContext *ctx, TCGOp *op)
1694 {
1695     if (fold_const2(ctx, op) ||
1696         fold_xx_to_i(ctx, op, -1) ||
1697         fold_xi_to_x(ctx, op, -1) ||
1698         fold_ix_to_not(ctx, op, 0)) {
1699         return true;
1700     }
1701 
1702     ctx->s_mask = arg_info(op->args[1])->s_mask
1703                 & arg_info(op->args[2])->s_mask;
1704     return false;
1705 }
1706 
1707 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1708 {
1709     const TCGOpDef *def = &tcg_op_defs[op->opc];
1710     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1711     MemOp mop = get_memop(oi);
1712     int width = 8 * memop_size(mop);
1713 
1714     if (width < 64) {
1715         ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1716         if (!(mop & MO_SIGN)) {
1717             ctx->z_mask = MAKE_64BIT_MASK(0, width);
1718             ctx->s_mask <<= 1;
1719         }
1720     }
1721 
1722     /* Opcodes that touch guest memory stop the mb optimization.  */
1723     ctx->prev_mb = NULL;
1724     return false;
1725 }
1726 
1727 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1728 {
1729     /* Opcodes that touch guest memory stop the mb optimization.  */
1730     ctx->prev_mb = NULL;
1731     return false;
1732 }
1733 
1734 static bool fold_remainder(OptContext *ctx, TCGOp *op)
1735 {
1736     if (fold_const2(ctx, op) ||
1737         fold_xx_to_i(ctx, op, 0)) {
1738         return true;
1739     }
1740     return false;
1741 }
1742 
1743 static bool fold_setcond(OptContext *ctx, TCGOp *op)
1744 {
1745     TCGCond cond = op->args[3];
1746     int i;
1747 
1748     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1749         op->args[3] = cond = tcg_swap_cond(cond);
1750     }
1751 
1752     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1753     if (i >= 0) {
1754         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1755     }
1756 
1757     ctx->z_mask = 1;
1758     ctx->s_mask = smask_from_zmask(1);
1759     return false;
1760 }
1761 
1762 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1763 {
1764     TCGCond cond = op->args[5];
1765     int i, inv = 0;
1766 
1767     if (swap_commutative2(&op->args[1], &op->args[3])) {
1768         op->args[5] = cond = tcg_swap_cond(cond);
1769     }
1770 
1771     i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1772     if (i >= 0) {
1773         goto do_setcond_const;
1774     }
1775 
1776     switch (cond) {
1777     case TCG_COND_LT:
1778     case TCG_COND_GE:
1779         /*
1780          * Simplify LT/GE comparisons vs zero to a single compare
1781          * vs the high word of the input.
1782          */
1783         if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1784             arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1785             goto do_setcond_high;
1786         }
1787         break;
1788 
1789     case TCG_COND_NE:
1790         inv = 1;
1791         QEMU_FALLTHROUGH;
1792     case TCG_COND_EQ:
1793         /*
1794          * Simplify EQ/NE comparisons where one of the pairs
1795          * can be simplified.
1796          */
1797         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1798                                      op->args[3], cond);
1799         switch (i ^ inv) {
1800         case 0:
1801             goto do_setcond_const;
1802         case 1:
1803             goto do_setcond_high;
1804         }
1805 
1806         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1807                                      op->args[4], cond);
1808         switch (i ^ inv) {
1809         case 0:
1810             goto do_setcond_const;
1811         case 1:
1812             op->args[2] = op->args[3];
1813             op->args[3] = cond;
1814             op->opc = INDEX_op_setcond_i32;
1815             break;
1816         }
1817         break;
1818 
1819     default:
1820         break;
1821 
1822     do_setcond_high:
1823         op->args[1] = op->args[2];
1824         op->args[2] = op->args[4];
1825         op->args[3] = cond;
1826         op->opc = INDEX_op_setcond_i32;
1827         break;
1828     }
1829 
1830     ctx->z_mask = 1;
1831     ctx->s_mask = smask_from_zmask(1);
1832     return false;
1833 
1834  do_setcond_const:
1835     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1836 }
1837 
1838 static bool fold_sextract(OptContext *ctx, TCGOp *op)
1839 {
1840     uint64_t z_mask, s_mask, s_mask_old;
1841     int pos = op->args[2];
1842     int len = op->args[3];
1843 
1844     if (arg_is_const(op->args[1])) {
1845         uint64_t t;
1846 
1847         t = arg_info(op->args[1])->val;
1848         t = sextract64(t, pos, len);
1849         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1850     }
1851 
1852     z_mask = arg_info(op->args[1])->z_mask;
1853     z_mask = sextract64(z_mask, pos, len);
1854     ctx->z_mask = z_mask;
1855 
1856     s_mask_old = arg_info(op->args[1])->s_mask;
1857     s_mask = sextract64(s_mask_old, pos, len);
1858     s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1859     ctx->s_mask = s_mask;
1860 
1861     if (pos == 0) {
1862         ctx->a_mask = s_mask & ~s_mask_old;
1863     }
1864 
1865     return fold_masks(ctx, op);
1866 }
1867 
1868 static bool fold_shift(OptContext *ctx, TCGOp *op)
1869 {
1870     uint64_t s_mask, z_mask, sign;
1871 
1872     if (fold_const2(ctx, op) ||
1873         fold_ix_to_i(ctx, op, 0) ||
1874         fold_xi_to_x(ctx, op, 0)) {
1875         return true;
1876     }
1877 
1878     s_mask = arg_info(op->args[1])->s_mask;
1879     z_mask = arg_info(op->args[1])->z_mask;
1880 
1881     if (arg_is_const(op->args[2])) {
1882         int sh = arg_info(op->args[2])->val;
1883 
1884         ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1885 
1886         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1887         ctx->s_mask = smask_from_smask(s_mask);
1888 
1889         return fold_masks(ctx, op);
1890     }
1891 
1892     switch (op->opc) {
1893     CASE_OP_32_64(sar):
1894         /*
1895          * Arithmetic right shift will not reduce the number of
1896          * input sign repetitions.
1897          */
1898         ctx->s_mask = s_mask;
1899         break;
1900     CASE_OP_32_64(shr):
1901         /*
1902          * If the sign bit is known zero, then logical right shift
1903          * will not reduced the number of input sign repetitions.
1904          */
1905         sign = (s_mask & -s_mask) >> 1;
1906         if (!(z_mask & sign)) {
1907             ctx->s_mask = s_mask;
1908         }
1909         break;
1910     default:
1911         break;
1912     }
1913 
1914     return false;
1915 }
1916 
1917 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1918 {
1919     TCGOpcode neg_op;
1920     bool have_neg;
1921 
1922     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1923         return false;
1924     }
1925 
1926     switch (ctx->type) {
1927     case TCG_TYPE_I32:
1928         neg_op = INDEX_op_neg_i32;
1929         have_neg = TCG_TARGET_HAS_neg_i32;
1930         break;
1931     case TCG_TYPE_I64:
1932         neg_op = INDEX_op_neg_i64;
1933         have_neg = TCG_TARGET_HAS_neg_i64;
1934         break;
1935     case TCG_TYPE_V64:
1936     case TCG_TYPE_V128:
1937     case TCG_TYPE_V256:
1938         neg_op = INDEX_op_neg_vec;
1939         have_neg = (TCG_TARGET_HAS_neg_vec &&
1940                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
1941         break;
1942     default:
1943         g_assert_not_reached();
1944     }
1945     if (have_neg) {
1946         op->opc = neg_op;
1947         op->args[1] = op->args[2];
1948         return fold_neg(ctx, op);
1949     }
1950     return false;
1951 }
1952 
1953 /* We cannot as yet do_constant_folding with vectors. */
1954 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
1955 {
1956     if (fold_xx_to_i(ctx, op, 0) ||
1957         fold_xi_to_x(ctx, op, 0) ||
1958         fold_sub_to_neg(ctx, op)) {
1959         return true;
1960     }
1961     return false;
1962 }
1963 
1964 static bool fold_sub(OptContext *ctx, TCGOp *op)
1965 {
1966     return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
1967 }
1968 
1969 static bool fold_sub2(OptContext *ctx, TCGOp *op)
1970 {
1971     return fold_addsub2(ctx, op, false);
1972 }
1973 
1974 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
1975 {
1976     /* We can't do any folding with a load, but we can record bits. */
1977     switch (op->opc) {
1978     CASE_OP_32_64(ld8s):
1979         ctx->s_mask = MAKE_64BIT_MASK(8, 56);
1980         break;
1981     CASE_OP_32_64(ld8u):
1982         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
1983         ctx->s_mask = MAKE_64BIT_MASK(9, 55);
1984         break;
1985     CASE_OP_32_64(ld16s):
1986         ctx->s_mask = MAKE_64BIT_MASK(16, 48);
1987         break;
1988     CASE_OP_32_64(ld16u):
1989         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
1990         ctx->s_mask = MAKE_64BIT_MASK(17, 47);
1991         break;
1992     case INDEX_op_ld32s_i64:
1993         ctx->s_mask = MAKE_64BIT_MASK(32, 32);
1994         break;
1995     case INDEX_op_ld32u_i64:
1996         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
1997         ctx->s_mask = MAKE_64BIT_MASK(33, 31);
1998         break;
1999     default:
2000         g_assert_not_reached();
2001     }
2002     return false;
2003 }
2004 
2005 static bool fold_xor(OptContext *ctx, TCGOp *op)
2006 {
2007     if (fold_const2_commutative(ctx, op) ||
2008         fold_xx_to_i(ctx, op, 0) ||
2009         fold_xi_to_x(ctx, op, 0) ||
2010         fold_xi_to_not(ctx, op, -1)) {
2011         return true;
2012     }
2013 
2014     ctx->z_mask = arg_info(op->args[1])->z_mask
2015                 | arg_info(op->args[2])->z_mask;
2016     ctx->s_mask = arg_info(op->args[1])->s_mask
2017                 & arg_info(op->args[2])->s_mask;
2018     return fold_masks(ctx, op);
2019 }
2020 
2021 /* Propagate constants and copies, fold constant expressions. */
2022 void tcg_optimize(TCGContext *s)
2023 {
2024     int nb_temps, i;
2025     TCGOp *op, *op_next;
2026     OptContext ctx = { .tcg = s };
2027 
2028     /* Array VALS has an element for each temp.
2029        If this temp holds a constant then its value is kept in VALS' element.
2030        If this temp is a copy of other ones then the other copies are
2031        available through the doubly linked circular list. */
2032 
2033     nb_temps = s->nb_temps;
2034     for (i = 0; i < nb_temps; ++i) {
2035         s->temps[i].state_ptr = NULL;
2036     }
2037 
2038     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2039         TCGOpcode opc = op->opc;
2040         const TCGOpDef *def;
2041         bool done = false;
2042 
2043         /* Calls are special. */
2044         if (opc == INDEX_op_call) {
2045             fold_call(&ctx, op);
2046             continue;
2047         }
2048 
2049         def = &tcg_op_defs[opc];
2050         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2051         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2052 
2053         /* Pre-compute the type of the operation. */
2054         if (def->flags & TCG_OPF_VECTOR) {
2055             ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2056         } else if (def->flags & TCG_OPF_64BIT) {
2057             ctx.type = TCG_TYPE_I64;
2058         } else {
2059             ctx.type = TCG_TYPE_I32;
2060         }
2061 
2062         /* Assume all bits affected, no bits known zero, no sign reps. */
2063         ctx.a_mask = -1;
2064         ctx.z_mask = -1;
2065         ctx.s_mask = 0;
2066 
2067         /*
2068          * Process each opcode.
2069          * Sorted alphabetically by opcode as much as possible.
2070          */
2071         switch (opc) {
2072         CASE_OP_32_64(add):
2073             done = fold_add(&ctx, op);
2074             break;
2075         case INDEX_op_add_vec:
2076             done = fold_add_vec(&ctx, op);
2077             break;
2078         CASE_OP_32_64(add2):
2079             done = fold_add2(&ctx, op);
2080             break;
2081         CASE_OP_32_64_VEC(and):
2082             done = fold_and(&ctx, op);
2083             break;
2084         CASE_OP_32_64_VEC(andc):
2085             done = fold_andc(&ctx, op);
2086             break;
2087         CASE_OP_32_64(brcond):
2088             done = fold_brcond(&ctx, op);
2089             break;
2090         case INDEX_op_brcond2_i32:
2091             done = fold_brcond2(&ctx, op);
2092             break;
2093         CASE_OP_32_64(bswap16):
2094         CASE_OP_32_64(bswap32):
2095         case INDEX_op_bswap64_i64:
2096             done = fold_bswap(&ctx, op);
2097             break;
2098         CASE_OP_32_64(clz):
2099         CASE_OP_32_64(ctz):
2100             done = fold_count_zeros(&ctx, op);
2101             break;
2102         CASE_OP_32_64(ctpop):
2103             done = fold_ctpop(&ctx, op);
2104             break;
2105         CASE_OP_32_64(deposit):
2106             done = fold_deposit(&ctx, op);
2107             break;
2108         CASE_OP_32_64(div):
2109         CASE_OP_32_64(divu):
2110             done = fold_divide(&ctx, op);
2111             break;
2112         case INDEX_op_dup_vec:
2113             done = fold_dup(&ctx, op);
2114             break;
2115         case INDEX_op_dup2_vec:
2116             done = fold_dup2(&ctx, op);
2117             break;
2118         CASE_OP_32_64_VEC(eqv):
2119             done = fold_eqv(&ctx, op);
2120             break;
2121         CASE_OP_32_64(extract):
2122             done = fold_extract(&ctx, op);
2123             break;
2124         CASE_OP_32_64(extract2):
2125             done = fold_extract2(&ctx, op);
2126             break;
2127         CASE_OP_32_64(ext8s):
2128         CASE_OP_32_64(ext16s):
2129         case INDEX_op_ext32s_i64:
2130         case INDEX_op_ext_i32_i64:
2131             done = fold_exts(&ctx, op);
2132             break;
2133         CASE_OP_32_64(ext8u):
2134         CASE_OP_32_64(ext16u):
2135         case INDEX_op_ext32u_i64:
2136         case INDEX_op_extu_i32_i64:
2137         case INDEX_op_extrl_i64_i32:
2138         case INDEX_op_extrh_i64_i32:
2139             done = fold_extu(&ctx, op);
2140             break;
2141         CASE_OP_32_64(ld8s):
2142         CASE_OP_32_64(ld8u):
2143         CASE_OP_32_64(ld16s):
2144         CASE_OP_32_64(ld16u):
2145         case INDEX_op_ld32s_i64:
2146         case INDEX_op_ld32u_i64:
2147             done = fold_tcg_ld(&ctx, op);
2148             break;
2149         case INDEX_op_mb:
2150             done = fold_mb(&ctx, op);
2151             break;
2152         CASE_OP_32_64_VEC(mov):
2153             done = fold_mov(&ctx, op);
2154             break;
2155         CASE_OP_32_64(movcond):
2156             done = fold_movcond(&ctx, op);
2157             break;
2158         CASE_OP_32_64(mul):
2159             done = fold_mul(&ctx, op);
2160             break;
2161         CASE_OP_32_64(mulsh):
2162         CASE_OP_32_64(muluh):
2163             done = fold_mul_highpart(&ctx, op);
2164             break;
2165         CASE_OP_32_64(muls2):
2166         CASE_OP_32_64(mulu2):
2167             done = fold_multiply2(&ctx, op);
2168             break;
2169         CASE_OP_32_64_VEC(nand):
2170             done = fold_nand(&ctx, op);
2171             break;
2172         CASE_OP_32_64(neg):
2173             done = fold_neg(&ctx, op);
2174             break;
2175         CASE_OP_32_64_VEC(nor):
2176             done = fold_nor(&ctx, op);
2177             break;
2178         CASE_OP_32_64_VEC(not):
2179             done = fold_not(&ctx, op);
2180             break;
2181         CASE_OP_32_64_VEC(or):
2182             done = fold_or(&ctx, op);
2183             break;
2184         CASE_OP_32_64_VEC(orc):
2185             done = fold_orc(&ctx, op);
2186             break;
2187         case INDEX_op_qemu_ld_a32_i32:
2188         case INDEX_op_qemu_ld_a64_i32:
2189         case INDEX_op_qemu_ld_a32_i64:
2190         case INDEX_op_qemu_ld_a64_i64:
2191         case INDEX_op_qemu_ld_a32_i128:
2192         case INDEX_op_qemu_ld_a64_i128:
2193             done = fold_qemu_ld(&ctx, op);
2194             break;
2195         case INDEX_op_qemu_st8_a32_i32:
2196         case INDEX_op_qemu_st8_a64_i32:
2197         case INDEX_op_qemu_st_a32_i32:
2198         case INDEX_op_qemu_st_a64_i32:
2199         case INDEX_op_qemu_st_a32_i64:
2200         case INDEX_op_qemu_st_a64_i64:
2201         case INDEX_op_qemu_st_a32_i128:
2202         case INDEX_op_qemu_st_a64_i128:
2203             done = fold_qemu_st(&ctx, op);
2204             break;
2205         CASE_OP_32_64(rem):
2206         CASE_OP_32_64(remu):
2207             done = fold_remainder(&ctx, op);
2208             break;
2209         CASE_OP_32_64(rotl):
2210         CASE_OP_32_64(rotr):
2211         CASE_OP_32_64(sar):
2212         CASE_OP_32_64(shl):
2213         CASE_OP_32_64(shr):
2214             done = fold_shift(&ctx, op);
2215             break;
2216         CASE_OP_32_64(setcond):
2217             done = fold_setcond(&ctx, op);
2218             break;
2219         case INDEX_op_setcond2_i32:
2220             done = fold_setcond2(&ctx, op);
2221             break;
2222         CASE_OP_32_64(sextract):
2223             done = fold_sextract(&ctx, op);
2224             break;
2225         CASE_OP_32_64(sub):
2226             done = fold_sub(&ctx, op);
2227             break;
2228         case INDEX_op_sub_vec:
2229             done = fold_sub_vec(&ctx, op);
2230             break;
2231         CASE_OP_32_64(sub2):
2232             done = fold_sub2(&ctx, op);
2233             break;
2234         CASE_OP_32_64_VEC(xor):
2235             done = fold_xor(&ctx, op);
2236             break;
2237         default:
2238             break;
2239         }
2240 
2241         if (!done) {
2242             finish_folding(&ctx, op);
2243         }
2244     }
2245 }
2246