xref: /openbmc/qemu/tcg/optimize.c (revision 97a5b35c)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "tcg/tcg-op.h"
29 #include "tcg-internal.h"
30 
31 #define CASE_OP_32_64(x)                        \
32         glue(glue(case INDEX_op_, x), _i32):    \
33         glue(glue(case INDEX_op_, x), _i64)
34 
35 #define CASE_OP_32_64_VEC(x)                    \
36         glue(glue(case INDEX_op_, x), _i32):    \
37         glue(glue(case INDEX_op_, x), _i64):    \
38         glue(glue(case INDEX_op_, x), _vec)
39 
40 typedef struct TempOptInfo {
41     bool is_const;
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     uint64_t val;
45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
46     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
47 } TempOptInfo;
48 
49 typedef struct OptContext {
50     TCGContext *tcg;
51     TCGOp *prev_mb;
52     TCGTempSet temps_used;
53 
54     /* In flight values from optimization. */
55     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
56     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
57     uint64_t s_mask;  /* mask of clrsb(value) bits */
58     TCGType type;
59 } OptContext;
60 
61 /* Calculate the smask for a specific value. */
62 static uint64_t smask_from_value(uint64_t value)
63 {
64     int rep = clrsb64(value);
65     return ~(~0ull >> rep);
66 }
67 
68 /*
69  * Calculate the smask for a given set of known-zeros.
70  * If there are lots of zeros on the left, we can consider the remainder
71  * an unsigned field, and thus the corresponding signed field is one bit
72  * larger.
73  */
74 static uint64_t smask_from_zmask(uint64_t zmask)
75 {
76     /*
77      * Only the 0 bits are significant for zmask, thus the msb itself
78      * must be zero, else we have no sign information.
79      */
80     int rep = clz64(zmask);
81     if (rep == 0) {
82         return 0;
83     }
84     rep -= 1;
85     return ~(~0ull >> rep);
86 }
87 
88 /*
89  * Recreate a properly left-aligned smask after manipulation.
90  * Some bit-shuffling, particularly shifts and rotates, may
91  * retain sign bits on the left, but may scatter disconnected
92  * sign bits on the right.  Retain only what remains to the left.
93  */
94 static uint64_t smask_from_smask(int64_t smask)
95 {
96     /* Only the 1 bits are significant for smask */
97     return smask_from_zmask(~smask);
98 }
99 
100 static inline TempOptInfo *ts_info(TCGTemp *ts)
101 {
102     return ts->state_ptr;
103 }
104 
105 static inline TempOptInfo *arg_info(TCGArg arg)
106 {
107     return ts_info(arg_temp(arg));
108 }
109 
110 static inline bool ts_is_const(TCGTemp *ts)
111 {
112     return ts_info(ts)->is_const;
113 }
114 
115 static inline bool arg_is_const(TCGArg arg)
116 {
117     return ts_is_const(arg_temp(arg));
118 }
119 
120 static inline bool ts_is_copy(TCGTemp *ts)
121 {
122     return ts_info(ts)->next_copy != ts;
123 }
124 
125 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
126 static void reset_ts(TCGTemp *ts)
127 {
128     TempOptInfo *ti = ts_info(ts);
129     TempOptInfo *pi = ts_info(ti->prev_copy);
130     TempOptInfo *ni = ts_info(ti->next_copy);
131 
132     ni->prev_copy = ti->prev_copy;
133     pi->next_copy = ti->next_copy;
134     ti->next_copy = ts;
135     ti->prev_copy = ts;
136     ti->is_const = false;
137     ti->z_mask = -1;
138     ti->s_mask = 0;
139 }
140 
141 static void reset_temp(TCGArg arg)
142 {
143     reset_ts(arg_temp(arg));
144 }
145 
146 /* Initialize and activate a temporary.  */
147 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
148 {
149     size_t idx = temp_idx(ts);
150     TempOptInfo *ti;
151 
152     if (test_bit(idx, ctx->temps_used.l)) {
153         return;
154     }
155     set_bit(idx, ctx->temps_used.l);
156 
157     ti = ts->state_ptr;
158     if (ti == NULL) {
159         ti = tcg_malloc(sizeof(TempOptInfo));
160         ts->state_ptr = ti;
161     }
162 
163     ti->next_copy = ts;
164     ti->prev_copy = ts;
165     if (ts->kind == TEMP_CONST) {
166         ti->is_const = true;
167         ti->val = ts->val;
168         ti->z_mask = ts->val;
169         ti->s_mask = smask_from_value(ts->val);
170     } else {
171         ti->is_const = false;
172         ti->z_mask = -1;
173         ti->s_mask = 0;
174     }
175 }
176 
177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
178 {
179     TCGTemp *i, *g, *l;
180 
181     /* If this is already readonly, we can't do better. */
182     if (temp_readonly(ts)) {
183         return ts;
184     }
185 
186     g = l = NULL;
187     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
188         if (temp_readonly(i)) {
189             return i;
190         } else if (i->kind > ts->kind) {
191             if (i->kind == TEMP_GLOBAL) {
192                 g = i;
193             } else if (i->kind == TEMP_LOCAL) {
194                 l = i;
195             }
196         }
197     }
198 
199     /* If we didn't find a better representation, return the same temp. */
200     return g ? g : l ? l : ts;
201 }
202 
203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
204 {
205     TCGTemp *i;
206 
207     if (ts1 == ts2) {
208         return true;
209     }
210 
211     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
212         return false;
213     }
214 
215     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
216         if (i == ts2) {
217             return true;
218         }
219     }
220 
221     return false;
222 }
223 
224 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
225 {
226     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
227 }
228 
229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
230 {
231     TCGTemp *dst_ts = arg_temp(dst);
232     TCGTemp *src_ts = arg_temp(src);
233     TempOptInfo *di;
234     TempOptInfo *si;
235     TCGOpcode new_op;
236 
237     if (ts_are_copies(dst_ts, src_ts)) {
238         tcg_op_remove(ctx->tcg, op);
239         return true;
240     }
241 
242     reset_ts(dst_ts);
243     di = ts_info(dst_ts);
244     si = ts_info(src_ts);
245 
246     switch (ctx->type) {
247     case TCG_TYPE_I32:
248         new_op = INDEX_op_mov_i32;
249         break;
250     case TCG_TYPE_I64:
251         new_op = INDEX_op_mov_i64;
252         break;
253     case TCG_TYPE_V64:
254     case TCG_TYPE_V128:
255     case TCG_TYPE_V256:
256         /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
257         new_op = INDEX_op_mov_vec;
258         break;
259     default:
260         g_assert_not_reached();
261     }
262     op->opc = new_op;
263     op->args[0] = dst;
264     op->args[1] = src;
265 
266     di->z_mask = si->z_mask;
267     di->s_mask = si->s_mask;
268 
269     if (src_ts->type == dst_ts->type) {
270         TempOptInfo *ni = ts_info(si->next_copy);
271 
272         di->next_copy = si->next_copy;
273         di->prev_copy = src_ts;
274         ni->prev_copy = dst_ts;
275         si->next_copy = dst_ts;
276         di->is_const = si->is_const;
277         di->val = si->val;
278     }
279     return true;
280 }
281 
282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
283                              TCGArg dst, uint64_t val)
284 {
285     TCGTemp *tv;
286 
287     if (ctx->type == TCG_TYPE_I32) {
288         val = (int32_t)val;
289     }
290 
291     /* Convert movi to mov with constant temp. */
292     tv = tcg_constant_internal(ctx->type, val);
293     init_ts_info(ctx, tv);
294     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
295 }
296 
297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
298 {
299     uint64_t l64, h64;
300 
301     switch (op) {
302     CASE_OP_32_64(add):
303         return x + y;
304 
305     CASE_OP_32_64(sub):
306         return x - y;
307 
308     CASE_OP_32_64(mul):
309         return x * y;
310 
311     CASE_OP_32_64(and):
312         return x & y;
313 
314     CASE_OP_32_64(or):
315         return x | y;
316 
317     CASE_OP_32_64(xor):
318         return x ^ y;
319 
320     case INDEX_op_shl_i32:
321         return (uint32_t)x << (y & 31);
322 
323     case INDEX_op_shl_i64:
324         return (uint64_t)x << (y & 63);
325 
326     case INDEX_op_shr_i32:
327         return (uint32_t)x >> (y & 31);
328 
329     case INDEX_op_shr_i64:
330         return (uint64_t)x >> (y & 63);
331 
332     case INDEX_op_sar_i32:
333         return (int32_t)x >> (y & 31);
334 
335     case INDEX_op_sar_i64:
336         return (int64_t)x >> (y & 63);
337 
338     case INDEX_op_rotr_i32:
339         return ror32(x, y & 31);
340 
341     case INDEX_op_rotr_i64:
342         return ror64(x, y & 63);
343 
344     case INDEX_op_rotl_i32:
345         return rol32(x, y & 31);
346 
347     case INDEX_op_rotl_i64:
348         return rol64(x, y & 63);
349 
350     CASE_OP_32_64(not):
351         return ~x;
352 
353     CASE_OP_32_64(neg):
354         return -x;
355 
356     CASE_OP_32_64(andc):
357         return x & ~y;
358 
359     CASE_OP_32_64(orc):
360         return x | ~y;
361 
362     CASE_OP_32_64(eqv):
363         return ~(x ^ y);
364 
365     CASE_OP_32_64(nand):
366         return ~(x & y);
367 
368     CASE_OP_32_64(nor):
369         return ~(x | y);
370 
371     case INDEX_op_clz_i32:
372         return (uint32_t)x ? clz32(x) : y;
373 
374     case INDEX_op_clz_i64:
375         return x ? clz64(x) : y;
376 
377     case INDEX_op_ctz_i32:
378         return (uint32_t)x ? ctz32(x) : y;
379 
380     case INDEX_op_ctz_i64:
381         return x ? ctz64(x) : y;
382 
383     case INDEX_op_ctpop_i32:
384         return ctpop32(x);
385 
386     case INDEX_op_ctpop_i64:
387         return ctpop64(x);
388 
389     CASE_OP_32_64(ext8s):
390         return (int8_t)x;
391 
392     CASE_OP_32_64(ext16s):
393         return (int16_t)x;
394 
395     CASE_OP_32_64(ext8u):
396         return (uint8_t)x;
397 
398     CASE_OP_32_64(ext16u):
399         return (uint16_t)x;
400 
401     CASE_OP_32_64(bswap16):
402         x = bswap16(x);
403         return y & TCG_BSWAP_OS ? (int16_t)x : x;
404 
405     CASE_OP_32_64(bswap32):
406         x = bswap32(x);
407         return y & TCG_BSWAP_OS ? (int32_t)x : x;
408 
409     case INDEX_op_bswap64_i64:
410         return bswap64(x);
411 
412     case INDEX_op_ext_i32_i64:
413     case INDEX_op_ext32s_i64:
414         return (int32_t)x;
415 
416     case INDEX_op_extu_i32_i64:
417     case INDEX_op_extrl_i64_i32:
418     case INDEX_op_ext32u_i64:
419         return (uint32_t)x;
420 
421     case INDEX_op_extrh_i64_i32:
422         return (uint64_t)x >> 32;
423 
424     case INDEX_op_muluh_i32:
425         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
426     case INDEX_op_mulsh_i32:
427         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
428 
429     case INDEX_op_muluh_i64:
430         mulu64(&l64, &h64, x, y);
431         return h64;
432     case INDEX_op_mulsh_i64:
433         muls64(&l64, &h64, x, y);
434         return h64;
435 
436     case INDEX_op_div_i32:
437         /* Avoid crashing on divide by zero, otherwise undefined.  */
438         return (int32_t)x / ((int32_t)y ? : 1);
439     case INDEX_op_divu_i32:
440         return (uint32_t)x / ((uint32_t)y ? : 1);
441     case INDEX_op_div_i64:
442         return (int64_t)x / ((int64_t)y ? : 1);
443     case INDEX_op_divu_i64:
444         return (uint64_t)x / ((uint64_t)y ? : 1);
445 
446     case INDEX_op_rem_i32:
447         return (int32_t)x % ((int32_t)y ? : 1);
448     case INDEX_op_remu_i32:
449         return (uint32_t)x % ((uint32_t)y ? : 1);
450     case INDEX_op_rem_i64:
451         return (int64_t)x % ((int64_t)y ? : 1);
452     case INDEX_op_remu_i64:
453         return (uint64_t)x % ((uint64_t)y ? : 1);
454 
455     default:
456         fprintf(stderr,
457                 "Unrecognized operation %d in do_constant_folding.\n", op);
458         tcg_abort();
459     }
460 }
461 
462 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
463                                     uint64_t x, uint64_t y)
464 {
465     uint64_t res = do_constant_folding_2(op, x, y);
466     if (type == TCG_TYPE_I32) {
467         res = (int32_t)res;
468     }
469     return res;
470 }
471 
472 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
473 {
474     switch (c) {
475     case TCG_COND_EQ:
476         return x == y;
477     case TCG_COND_NE:
478         return x != y;
479     case TCG_COND_LT:
480         return (int32_t)x < (int32_t)y;
481     case TCG_COND_GE:
482         return (int32_t)x >= (int32_t)y;
483     case TCG_COND_LE:
484         return (int32_t)x <= (int32_t)y;
485     case TCG_COND_GT:
486         return (int32_t)x > (int32_t)y;
487     case TCG_COND_LTU:
488         return x < y;
489     case TCG_COND_GEU:
490         return x >= y;
491     case TCG_COND_LEU:
492         return x <= y;
493     case TCG_COND_GTU:
494         return x > y;
495     default:
496         tcg_abort();
497     }
498 }
499 
500 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
501 {
502     switch (c) {
503     case TCG_COND_EQ:
504         return x == y;
505     case TCG_COND_NE:
506         return x != y;
507     case TCG_COND_LT:
508         return (int64_t)x < (int64_t)y;
509     case TCG_COND_GE:
510         return (int64_t)x >= (int64_t)y;
511     case TCG_COND_LE:
512         return (int64_t)x <= (int64_t)y;
513     case TCG_COND_GT:
514         return (int64_t)x > (int64_t)y;
515     case TCG_COND_LTU:
516         return x < y;
517     case TCG_COND_GEU:
518         return x >= y;
519     case TCG_COND_LEU:
520         return x <= y;
521     case TCG_COND_GTU:
522         return x > y;
523     default:
524         tcg_abort();
525     }
526 }
527 
528 static bool do_constant_folding_cond_eq(TCGCond c)
529 {
530     switch (c) {
531     case TCG_COND_GT:
532     case TCG_COND_LTU:
533     case TCG_COND_LT:
534     case TCG_COND_GTU:
535     case TCG_COND_NE:
536         return 0;
537     case TCG_COND_GE:
538     case TCG_COND_GEU:
539     case TCG_COND_LE:
540     case TCG_COND_LEU:
541     case TCG_COND_EQ:
542         return 1;
543     default:
544         tcg_abort();
545     }
546 }
547 
548 /*
549  * Return -1 if the condition can't be simplified,
550  * and the result of the condition (0 or 1) if it can.
551  */
552 static int do_constant_folding_cond(TCGType type, TCGArg x,
553                                     TCGArg y, TCGCond c)
554 {
555     uint64_t xv = arg_info(x)->val;
556     uint64_t yv = arg_info(y)->val;
557 
558     if (arg_is_const(x) && arg_is_const(y)) {
559         switch (type) {
560         case TCG_TYPE_I32:
561             return do_constant_folding_cond_32(xv, yv, c);
562         case TCG_TYPE_I64:
563             return do_constant_folding_cond_64(xv, yv, c);
564         default:
565             /* Only scalar comparisons are optimizable */
566             return -1;
567         }
568     } else if (args_are_copies(x, y)) {
569         return do_constant_folding_cond_eq(c);
570     } else if (arg_is_const(y) && yv == 0) {
571         switch (c) {
572         case TCG_COND_LTU:
573             return 0;
574         case TCG_COND_GEU:
575             return 1;
576         default:
577             return -1;
578         }
579     }
580     return -1;
581 }
582 
583 /*
584  * Return -1 if the condition can't be simplified,
585  * and the result of the condition (0 or 1) if it can.
586  */
587 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
588 {
589     TCGArg al = p1[0], ah = p1[1];
590     TCGArg bl = p2[0], bh = p2[1];
591 
592     if (arg_is_const(bl) && arg_is_const(bh)) {
593         tcg_target_ulong blv = arg_info(bl)->val;
594         tcg_target_ulong bhv = arg_info(bh)->val;
595         uint64_t b = deposit64(blv, 32, 32, bhv);
596 
597         if (arg_is_const(al) && arg_is_const(ah)) {
598             tcg_target_ulong alv = arg_info(al)->val;
599             tcg_target_ulong ahv = arg_info(ah)->val;
600             uint64_t a = deposit64(alv, 32, 32, ahv);
601             return do_constant_folding_cond_64(a, b, c);
602         }
603         if (b == 0) {
604             switch (c) {
605             case TCG_COND_LTU:
606                 return 0;
607             case TCG_COND_GEU:
608                 return 1;
609             default:
610                 break;
611             }
612         }
613     }
614     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
615         return do_constant_folding_cond_eq(c);
616     }
617     return -1;
618 }
619 
620 /**
621  * swap_commutative:
622  * @dest: TCGArg of the destination argument, or NO_DEST.
623  * @p1: first paired argument
624  * @p2: second paired argument
625  *
626  * If *@p1 is a constant and *@p2 is not, swap.
627  * If *@p2 matches @dest, swap.
628  * Return true if a swap was performed.
629  */
630 
631 #define NO_DEST  temp_arg(NULL)
632 
633 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
634 {
635     TCGArg a1 = *p1, a2 = *p2;
636     int sum = 0;
637     sum += arg_is_const(a1);
638     sum -= arg_is_const(a2);
639 
640     /* Prefer the constant in second argument, and then the form
641        op a, a, b, which is better handled on non-RISC hosts. */
642     if (sum > 0 || (sum == 0 && dest == a2)) {
643         *p1 = a2;
644         *p2 = a1;
645         return true;
646     }
647     return false;
648 }
649 
650 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
651 {
652     int sum = 0;
653     sum += arg_is_const(p1[0]);
654     sum += arg_is_const(p1[1]);
655     sum -= arg_is_const(p2[0]);
656     sum -= arg_is_const(p2[1]);
657     if (sum > 0) {
658         TCGArg t;
659         t = p1[0], p1[0] = p2[0], p2[0] = t;
660         t = p1[1], p1[1] = p2[1], p2[1] = t;
661         return true;
662     }
663     return false;
664 }
665 
666 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
667 {
668     for (int i = 0; i < nb_args; i++) {
669         TCGTemp *ts = arg_temp(op->args[i]);
670         if (ts) {
671             init_ts_info(ctx, ts);
672         }
673     }
674 }
675 
676 static void copy_propagate(OptContext *ctx, TCGOp *op,
677                            int nb_oargs, int nb_iargs)
678 {
679     TCGContext *s = ctx->tcg;
680 
681     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
682         TCGTemp *ts = arg_temp(op->args[i]);
683         if (ts && ts_is_copy(ts)) {
684             op->args[i] = temp_arg(find_better_copy(s, ts));
685         }
686     }
687 }
688 
689 static void finish_folding(OptContext *ctx, TCGOp *op)
690 {
691     const TCGOpDef *def = &tcg_op_defs[op->opc];
692     int i, nb_oargs;
693 
694     /*
695      * For an opcode that ends a BB, reset all temp data.
696      * We do no cross-BB optimization.
697      */
698     if (def->flags & TCG_OPF_BB_END) {
699         memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
700         ctx->prev_mb = NULL;
701         return;
702     }
703 
704     nb_oargs = def->nb_oargs;
705     for (i = 0; i < nb_oargs; i++) {
706         TCGTemp *ts = arg_temp(op->args[i]);
707         reset_ts(ts);
708         /*
709          * Save the corresponding known-zero/sign bits mask for the
710          * first output argument (only one supported so far).
711          */
712         if (i == 0) {
713             ts_info(ts)->z_mask = ctx->z_mask;
714             ts_info(ts)->s_mask = ctx->s_mask;
715         }
716     }
717 }
718 
719 /*
720  * The fold_* functions return true when processing is complete,
721  * usually by folding the operation to a constant or to a copy,
722  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
723  * like collect information about the value produced, for use in
724  * optimizing a subsequent operation.
725  *
726  * These first fold_* functions are all helpers, used by other
727  * folders for more specific operations.
728  */
729 
730 static bool fold_const1(OptContext *ctx, TCGOp *op)
731 {
732     if (arg_is_const(op->args[1])) {
733         uint64_t t;
734 
735         t = arg_info(op->args[1])->val;
736         t = do_constant_folding(op->opc, ctx->type, t, 0);
737         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
738     }
739     return false;
740 }
741 
742 static bool fold_const2(OptContext *ctx, TCGOp *op)
743 {
744     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
745         uint64_t t1 = arg_info(op->args[1])->val;
746         uint64_t t2 = arg_info(op->args[2])->val;
747 
748         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
749         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
750     }
751     return false;
752 }
753 
754 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
755 {
756     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
757     return fold_const2(ctx, op);
758 }
759 
760 static bool fold_masks(OptContext *ctx, TCGOp *op)
761 {
762     uint64_t a_mask = ctx->a_mask;
763     uint64_t z_mask = ctx->z_mask;
764     uint64_t s_mask = ctx->s_mask;
765 
766     /*
767      * 32-bit ops generate 32-bit results, which for the purpose of
768      * simplifying tcg are sign-extended.  Certainly that's how we
769      * represent our constants elsewhere.  Note that the bits will
770      * be reset properly for a 64-bit value when encountering the
771      * type changing opcodes.
772      */
773     if (ctx->type == TCG_TYPE_I32) {
774         a_mask = (int32_t)a_mask;
775         z_mask = (int32_t)z_mask;
776         s_mask |= MAKE_64BIT_MASK(32, 32);
777         ctx->z_mask = z_mask;
778         ctx->s_mask = s_mask;
779     }
780 
781     if (z_mask == 0) {
782         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
783     }
784     if (a_mask == 0) {
785         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
786     }
787     return false;
788 }
789 
790 /*
791  * Convert @op to NOT, if NOT is supported by the host.
792  * Return true f the conversion is successful, which will still
793  * indicate that the processing is complete.
794  */
795 static bool fold_not(OptContext *ctx, TCGOp *op);
796 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
797 {
798     TCGOpcode not_op;
799     bool have_not;
800 
801     switch (ctx->type) {
802     case TCG_TYPE_I32:
803         not_op = INDEX_op_not_i32;
804         have_not = TCG_TARGET_HAS_not_i32;
805         break;
806     case TCG_TYPE_I64:
807         not_op = INDEX_op_not_i64;
808         have_not = TCG_TARGET_HAS_not_i64;
809         break;
810     case TCG_TYPE_V64:
811     case TCG_TYPE_V128:
812     case TCG_TYPE_V256:
813         not_op = INDEX_op_not_vec;
814         have_not = TCG_TARGET_HAS_not_vec;
815         break;
816     default:
817         g_assert_not_reached();
818     }
819     if (have_not) {
820         op->opc = not_op;
821         op->args[1] = op->args[idx];
822         return fold_not(ctx, op);
823     }
824     return false;
825 }
826 
827 /* If the binary operation has first argument @i, fold to @i. */
828 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
829 {
830     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
831         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
832     }
833     return false;
834 }
835 
836 /* If the binary operation has first argument @i, fold to NOT. */
837 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
838 {
839     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
840         return fold_to_not(ctx, op, 2);
841     }
842     return false;
843 }
844 
845 /* If the binary operation has second argument @i, fold to @i. */
846 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
847 {
848     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
849         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
850     }
851     return false;
852 }
853 
854 /* If the binary operation has second argument @i, fold to identity. */
855 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
856 {
857     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
858         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
859     }
860     return false;
861 }
862 
863 /* If the binary operation has second argument @i, fold to NOT. */
864 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
865 {
866     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
867         return fold_to_not(ctx, op, 1);
868     }
869     return false;
870 }
871 
872 /* If the binary operation has both arguments equal, fold to @i. */
873 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
874 {
875     if (args_are_copies(op->args[1], op->args[2])) {
876         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
877     }
878     return false;
879 }
880 
881 /* If the binary operation has both arguments equal, fold to identity. */
882 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
883 {
884     if (args_are_copies(op->args[1], op->args[2])) {
885         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
886     }
887     return false;
888 }
889 
890 /*
891  * These outermost fold_<op> functions are sorted alphabetically.
892  *
893  * The ordering of the transformations should be:
894  *   1) those that produce a constant
895  *   2) those that produce a copy
896  *   3) those that produce information about the result value.
897  */
898 
899 static bool fold_add(OptContext *ctx, TCGOp *op)
900 {
901     if (fold_const2_commutative(ctx, op) ||
902         fold_xi_to_x(ctx, op, 0)) {
903         return true;
904     }
905     return false;
906 }
907 
908 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
909 {
910     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
911         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
912         uint64_t al = arg_info(op->args[2])->val;
913         uint64_t ah = arg_info(op->args[3])->val;
914         uint64_t bl = arg_info(op->args[4])->val;
915         uint64_t bh = arg_info(op->args[5])->val;
916         TCGArg rl, rh;
917         TCGOp *op2;
918 
919         if (ctx->type == TCG_TYPE_I32) {
920             uint64_t a = deposit64(al, 32, 32, ah);
921             uint64_t b = deposit64(bl, 32, 32, bh);
922 
923             if (add) {
924                 a += b;
925             } else {
926                 a -= b;
927             }
928 
929             al = sextract64(a, 0, 32);
930             ah = sextract64(a, 32, 32);
931         } else {
932             Int128 a = int128_make128(al, ah);
933             Int128 b = int128_make128(bl, bh);
934 
935             if (add) {
936                 a = int128_add(a, b);
937             } else {
938                 a = int128_sub(a, b);
939             }
940 
941             al = int128_getlo(a);
942             ah = int128_gethi(a);
943         }
944 
945         rl = op->args[0];
946         rh = op->args[1];
947 
948         /* The proper opcode is supplied by tcg_opt_gen_mov. */
949         op2 = tcg_op_insert_before(ctx->tcg, op, 0);
950 
951         tcg_opt_gen_movi(ctx, op, rl, al);
952         tcg_opt_gen_movi(ctx, op2, rh, ah);
953         return true;
954     }
955     return false;
956 }
957 
958 static bool fold_add2(OptContext *ctx, TCGOp *op)
959 {
960     /* Note that the high and low parts may be independently swapped. */
961     swap_commutative(op->args[0], &op->args[2], &op->args[4]);
962     swap_commutative(op->args[1], &op->args[3], &op->args[5]);
963 
964     return fold_addsub2(ctx, op, true);
965 }
966 
967 static bool fold_and(OptContext *ctx, TCGOp *op)
968 {
969     uint64_t z1, z2;
970 
971     if (fold_const2_commutative(ctx, op) ||
972         fold_xi_to_i(ctx, op, 0) ||
973         fold_xi_to_x(ctx, op, -1) ||
974         fold_xx_to_x(ctx, op)) {
975         return true;
976     }
977 
978     z1 = arg_info(op->args[1])->z_mask;
979     z2 = arg_info(op->args[2])->z_mask;
980     ctx->z_mask = z1 & z2;
981 
982     /*
983      * Sign repetitions are perforce all identical, whether they are 1 or 0.
984      * Bitwise operations preserve the relative quantity of the repetitions.
985      */
986     ctx->s_mask = arg_info(op->args[1])->s_mask
987                 & arg_info(op->args[2])->s_mask;
988 
989     /*
990      * Known-zeros does not imply known-ones.  Therefore unless
991      * arg2 is constant, we can't infer affected bits from it.
992      */
993     if (arg_is_const(op->args[2])) {
994         ctx->a_mask = z1 & ~z2;
995     }
996 
997     return fold_masks(ctx, op);
998 }
999 
1000 static bool fold_andc(OptContext *ctx, TCGOp *op)
1001 {
1002     uint64_t z1;
1003 
1004     if (fold_const2(ctx, op) ||
1005         fold_xx_to_i(ctx, op, 0) ||
1006         fold_xi_to_x(ctx, op, 0) ||
1007         fold_ix_to_not(ctx, op, -1)) {
1008         return true;
1009     }
1010 
1011     z1 = arg_info(op->args[1])->z_mask;
1012 
1013     /*
1014      * Known-zeros does not imply known-ones.  Therefore unless
1015      * arg2 is constant, we can't infer anything from it.
1016      */
1017     if (arg_is_const(op->args[2])) {
1018         uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1019         ctx->a_mask = z1 & ~z2;
1020         z1 &= z2;
1021     }
1022     ctx->z_mask = z1;
1023 
1024     ctx->s_mask = arg_info(op->args[1])->s_mask
1025                 & arg_info(op->args[2])->s_mask;
1026     return fold_masks(ctx, op);
1027 }
1028 
1029 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1030 {
1031     TCGCond cond = op->args[2];
1032     int i;
1033 
1034     if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1035         op->args[2] = cond = tcg_swap_cond(cond);
1036     }
1037 
1038     i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1039     if (i == 0) {
1040         tcg_op_remove(ctx->tcg, op);
1041         return true;
1042     }
1043     if (i > 0) {
1044         op->opc = INDEX_op_br;
1045         op->args[0] = op->args[3];
1046     }
1047     return false;
1048 }
1049 
1050 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1051 {
1052     TCGCond cond = op->args[4];
1053     TCGArg label = op->args[5];
1054     int i, inv = 0;
1055 
1056     if (swap_commutative2(&op->args[0], &op->args[2])) {
1057         op->args[4] = cond = tcg_swap_cond(cond);
1058     }
1059 
1060     i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1061     if (i >= 0) {
1062         goto do_brcond_const;
1063     }
1064 
1065     switch (cond) {
1066     case TCG_COND_LT:
1067     case TCG_COND_GE:
1068         /*
1069          * Simplify LT/GE comparisons vs zero to a single compare
1070          * vs the high word of the input.
1071          */
1072         if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1073             arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1074             goto do_brcond_high;
1075         }
1076         break;
1077 
1078     case TCG_COND_NE:
1079         inv = 1;
1080         QEMU_FALLTHROUGH;
1081     case TCG_COND_EQ:
1082         /*
1083          * Simplify EQ/NE comparisons where one of the pairs
1084          * can be simplified.
1085          */
1086         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1087                                      op->args[2], cond);
1088         switch (i ^ inv) {
1089         case 0:
1090             goto do_brcond_const;
1091         case 1:
1092             goto do_brcond_high;
1093         }
1094 
1095         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1096                                      op->args[3], cond);
1097         switch (i ^ inv) {
1098         case 0:
1099             goto do_brcond_const;
1100         case 1:
1101             op->opc = INDEX_op_brcond_i32;
1102             op->args[1] = op->args[2];
1103             op->args[2] = cond;
1104             op->args[3] = label;
1105             break;
1106         }
1107         break;
1108 
1109     default:
1110         break;
1111 
1112     do_brcond_high:
1113         op->opc = INDEX_op_brcond_i32;
1114         op->args[0] = op->args[1];
1115         op->args[1] = op->args[3];
1116         op->args[2] = cond;
1117         op->args[3] = label;
1118         break;
1119 
1120     do_brcond_const:
1121         if (i == 0) {
1122             tcg_op_remove(ctx->tcg, op);
1123             return true;
1124         }
1125         op->opc = INDEX_op_br;
1126         op->args[0] = label;
1127         break;
1128     }
1129     return false;
1130 }
1131 
1132 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1133 {
1134     uint64_t z_mask, s_mask, sign;
1135 
1136     if (arg_is_const(op->args[1])) {
1137         uint64_t t = arg_info(op->args[1])->val;
1138 
1139         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1140         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1141     }
1142 
1143     z_mask = arg_info(op->args[1])->z_mask;
1144 
1145     switch (op->opc) {
1146     case INDEX_op_bswap16_i32:
1147     case INDEX_op_bswap16_i64:
1148         z_mask = bswap16(z_mask);
1149         sign = INT16_MIN;
1150         break;
1151     case INDEX_op_bswap32_i32:
1152     case INDEX_op_bswap32_i64:
1153         z_mask = bswap32(z_mask);
1154         sign = INT32_MIN;
1155         break;
1156     case INDEX_op_bswap64_i64:
1157         z_mask = bswap64(z_mask);
1158         sign = INT64_MIN;
1159         break;
1160     default:
1161         g_assert_not_reached();
1162     }
1163     s_mask = smask_from_zmask(z_mask);
1164 
1165     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1166     case TCG_BSWAP_OZ:
1167         break;
1168     case TCG_BSWAP_OS:
1169         /* If the sign bit may be 1, force all the bits above to 1. */
1170         if (z_mask & sign) {
1171             z_mask |= sign;
1172             s_mask = sign << 1;
1173         }
1174         break;
1175     default:
1176         /* The high bits are undefined: force all bits above the sign to 1. */
1177         z_mask |= sign << 1;
1178         s_mask = 0;
1179         break;
1180     }
1181     ctx->z_mask = z_mask;
1182     ctx->s_mask = s_mask;
1183 
1184     return fold_masks(ctx, op);
1185 }
1186 
1187 static bool fold_call(OptContext *ctx, TCGOp *op)
1188 {
1189     TCGContext *s = ctx->tcg;
1190     int nb_oargs = TCGOP_CALLO(op);
1191     int nb_iargs = TCGOP_CALLI(op);
1192     int flags, i;
1193 
1194     init_arguments(ctx, op, nb_oargs + nb_iargs);
1195     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1196 
1197     /* If the function reads or writes globals, reset temp data. */
1198     flags = tcg_call_flags(op);
1199     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1200         int nb_globals = s->nb_globals;
1201 
1202         for (i = 0; i < nb_globals; i++) {
1203             if (test_bit(i, ctx->temps_used.l)) {
1204                 reset_ts(&ctx->tcg->temps[i]);
1205             }
1206         }
1207     }
1208 
1209     /* Reset temp data for outputs. */
1210     for (i = 0; i < nb_oargs; i++) {
1211         reset_temp(op->args[i]);
1212     }
1213 
1214     /* Stop optimizing MB across calls. */
1215     ctx->prev_mb = NULL;
1216     return true;
1217 }
1218 
1219 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1220 {
1221     uint64_t z_mask;
1222 
1223     if (arg_is_const(op->args[1])) {
1224         uint64_t t = arg_info(op->args[1])->val;
1225 
1226         if (t != 0) {
1227             t = do_constant_folding(op->opc, ctx->type, t, 0);
1228             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1229         }
1230         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1231     }
1232 
1233     switch (ctx->type) {
1234     case TCG_TYPE_I32:
1235         z_mask = 31;
1236         break;
1237     case TCG_TYPE_I64:
1238         z_mask = 63;
1239         break;
1240     default:
1241         g_assert_not_reached();
1242     }
1243     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1244     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1245     return false;
1246 }
1247 
1248 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1249 {
1250     if (fold_const1(ctx, op)) {
1251         return true;
1252     }
1253 
1254     switch (ctx->type) {
1255     case TCG_TYPE_I32:
1256         ctx->z_mask = 32 | 31;
1257         break;
1258     case TCG_TYPE_I64:
1259         ctx->z_mask = 64 | 63;
1260         break;
1261     default:
1262         g_assert_not_reached();
1263     }
1264     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1265     return false;
1266 }
1267 
1268 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1269 {
1270     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1271         uint64_t t1 = arg_info(op->args[1])->val;
1272         uint64_t t2 = arg_info(op->args[2])->val;
1273 
1274         t1 = deposit64(t1, op->args[3], op->args[4], t2);
1275         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1276     }
1277 
1278     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1279                             op->args[3], op->args[4],
1280                             arg_info(op->args[2])->z_mask);
1281     return false;
1282 }
1283 
1284 static bool fold_divide(OptContext *ctx, TCGOp *op)
1285 {
1286     if (fold_const2(ctx, op) ||
1287         fold_xi_to_x(ctx, op, 1)) {
1288         return true;
1289     }
1290     return false;
1291 }
1292 
1293 static bool fold_dup(OptContext *ctx, TCGOp *op)
1294 {
1295     if (arg_is_const(op->args[1])) {
1296         uint64_t t = arg_info(op->args[1])->val;
1297         t = dup_const(TCGOP_VECE(op), t);
1298         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1299     }
1300     return false;
1301 }
1302 
1303 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1304 {
1305     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1306         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1307                                arg_info(op->args[2])->val);
1308         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1309     }
1310 
1311     if (args_are_copies(op->args[1], op->args[2])) {
1312         op->opc = INDEX_op_dup_vec;
1313         TCGOP_VECE(op) = MO_32;
1314     }
1315     return false;
1316 }
1317 
1318 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1319 {
1320     if (fold_const2_commutative(ctx, op) ||
1321         fold_xi_to_x(ctx, op, -1) ||
1322         fold_xi_to_not(ctx, op, 0)) {
1323         return true;
1324     }
1325 
1326     ctx->s_mask = arg_info(op->args[1])->s_mask
1327                 & arg_info(op->args[2])->s_mask;
1328     return false;
1329 }
1330 
1331 static bool fold_extract(OptContext *ctx, TCGOp *op)
1332 {
1333     uint64_t z_mask_old, z_mask;
1334     int pos = op->args[2];
1335     int len = op->args[3];
1336 
1337     if (arg_is_const(op->args[1])) {
1338         uint64_t t;
1339 
1340         t = arg_info(op->args[1])->val;
1341         t = extract64(t, pos, len);
1342         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1343     }
1344 
1345     z_mask_old = arg_info(op->args[1])->z_mask;
1346     z_mask = extract64(z_mask_old, pos, len);
1347     if (pos == 0) {
1348         ctx->a_mask = z_mask_old ^ z_mask;
1349     }
1350     ctx->z_mask = z_mask;
1351     ctx->s_mask = smask_from_zmask(z_mask);
1352 
1353     return fold_masks(ctx, op);
1354 }
1355 
1356 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1357 {
1358     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1359         uint64_t v1 = arg_info(op->args[1])->val;
1360         uint64_t v2 = arg_info(op->args[2])->val;
1361         int shr = op->args[3];
1362 
1363         if (op->opc == INDEX_op_extract2_i64) {
1364             v1 >>= shr;
1365             v2 <<= 64 - shr;
1366         } else {
1367             v1 = (uint32_t)v1 >> shr;
1368             v2 = (int32_t)v2 << (32 - shr);
1369         }
1370         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1371     }
1372     return false;
1373 }
1374 
1375 static bool fold_exts(OptContext *ctx, TCGOp *op)
1376 {
1377     uint64_t s_mask_old, s_mask, z_mask, sign;
1378     bool type_change = false;
1379 
1380     if (fold_const1(ctx, op)) {
1381         return true;
1382     }
1383 
1384     z_mask = arg_info(op->args[1])->z_mask;
1385     s_mask = arg_info(op->args[1])->s_mask;
1386     s_mask_old = s_mask;
1387 
1388     switch (op->opc) {
1389     CASE_OP_32_64(ext8s):
1390         sign = INT8_MIN;
1391         z_mask = (uint8_t)z_mask;
1392         break;
1393     CASE_OP_32_64(ext16s):
1394         sign = INT16_MIN;
1395         z_mask = (uint16_t)z_mask;
1396         break;
1397     case INDEX_op_ext_i32_i64:
1398         type_change = true;
1399         QEMU_FALLTHROUGH;
1400     case INDEX_op_ext32s_i64:
1401         sign = INT32_MIN;
1402         z_mask = (uint32_t)z_mask;
1403         break;
1404     default:
1405         g_assert_not_reached();
1406     }
1407 
1408     if (z_mask & sign) {
1409         z_mask |= sign;
1410     }
1411     s_mask |= sign << 1;
1412 
1413     ctx->z_mask = z_mask;
1414     ctx->s_mask = s_mask;
1415     if (!type_change) {
1416         ctx->a_mask = s_mask & ~s_mask_old;
1417     }
1418 
1419     return fold_masks(ctx, op);
1420 }
1421 
1422 static bool fold_extu(OptContext *ctx, TCGOp *op)
1423 {
1424     uint64_t z_mask_old, z_mask;
1425     bool type_change = false;
1426 
1427     if (fold_const1(ctx, op)) {
1428         return true;
1429     }
1430 
1431     z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1432 
1433     switch (op->opc) {
1434     CASE_OP_32_64(ext8u):
1435         z_mask = (uint8_t)z_mask;
1436         break;
1437     CASE_OP_32_64(ext16u):
1438         z_mask = (uint16_t)z_mask;
1439         break;
1440     case INDEX_op_extrl_i64_i32:
1441     case INDEX_op_extu_i32_i64:
1442         type_change = true;
1443         QEMU_FALLTHROUGH;
1444     case INDEX_op_ext32u_i64:
1445         z_mask = (uint32_t)z_mask;
1446         break;
1447     case INDEX_op_extrh_i64_i32:
1448         type_change = true;
1449         z_mask >>= 32;
1450         break;
1451     default:
1452         g_assert_not_reached();
1453     }
1454 
1455     ctx->z_mask = z_mask;
1456     ctx->s_mask = smask_from_zmask(z_mask);
1457     if (!type_change) {
1458         ctx->a_mask = z_mask_old ^ z_mask;
1459     }
1460     return fold_masks(ctx, op);
1461 }
1462 
1463 static bool fold_mb(OptContext *ctx, TCGOp *op)
1464 {
1465     /* Eliminate duplicate and redundant fence instructions.  */
1466     if (ctx->prev_mb) {
1467         /*
1468          * Merge two barriers of the same type into one,
1469          * or a weaker barrier into a stronger one,
1470          * or two weaker barriers into a stronger one.
1471          *   mb X; mb Y => mb X|Y
1472          *   mb; strl => mb; st
1473          *   ldaq; mb => ld; mb
1474          *   ldaq; strl => ld; mb; st
1475          * Other combinations are also merged into a strong
1476          * barrier.  This is stricter than specified but for
1477          * the purposes of TCG is better than not optimizing.
1478          */
1479         ctx->prev_mb->args[0] |= op->args[0];
1480         tcg_op_remove(ctx->tcg, op);
1481     } else {
1482         ctx->prev_mb = op;
1483     }
1484     return true;
1485 }
1486 
1487 static bool fold_mov(OptContext *ctx, TCGOp *op)
1488 {
1489     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1490 }
1491 
1492 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1493 {
1494     TCGCond cond = op->args[5];
1495     int i;
1496 
1497     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1498         op->args[5] = cond = tcg_swap_cond(cond);
1499     }
1500     /*
1501      * Canonicalize the "false" input reg to match the destination reg so
1502      * that the tcg backend can implement a "move if true" operation.
1503      */
1504     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1505         op->args[5] = cond = tcg_invert_cond(cond);
1506     }
1507 
1508     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1509     if (i >= 0) {
1510         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1511     }
1512 
1513     ctx->z_mask = arg_info(op->args[3])->z_mask
1514                 | arg_info(op->args[4])->z_mask;
1515     ctx->s_mask = arg_info(op->args[3])->s_mask
1516                 & arg_info(op->args[4])->s_mask;
1517 
1518     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1519         uint64_t tv = arg_info(op->args[3])->val;
1520         uint64_t fv = arg_info(op->args[4])->val;
1521         TCGOpcode opc;
1522 
1523         switch (ctx->type) {
1524         case TCG_TYPE_I32:
1525             opc = INDEX_op_setcond_i32;
1526             break;
1527         case TCG_TYPE_I64:
1528             opc = INDEX_op_setcond_i64;
1529             break;
1530         default:
1531             g_assert_not_reached();
1532         }
1533 
1534         if (tv == 1 && fv == 0) {
1535             op->opc = opc;
1536             op->args[3] = cond;
1537         } else if (fv == 1 && tv == 0) {
1538             op->opc = opc;
1539             op->args[3] = tcg_invert_cond(cond);
1540         }
1541     }
1542     return false;
1543 }
1544 
1545 static bool fold_mul(OptContext *ctx, TCGOp *op)
1546 {
1547     if (fold_const2(ctx, op) ||
1548         fold_xi_to_i(ctx, op, 0) ||
1549         fold_xi_to_x(ctx, op, 1)) {
1550         return true;
1551     }
1552     return false;
1553 }
1554 
1555 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1556 {
1557     if (fold_const2_commutative(ctx, op) ||
1558         fold_xi_to_i(ctx, op, 0)) {
1559         return true;
1560     }
1561     return false;
1562 }
1563 
1564 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1565 {
1566     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1567 
1568     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1569         uint64_t a = arg_info(op->args[2])->val;
1570         uint64_t b = arg_info(op->args[3])->val;
1571         uint64_t h, l;
1572         TCGArg rl, rh;
1573         TCGOp *op2;
1574 
1575         switch (op->opc) {
1576         case INDEX_op_mulu2_i32:
1577             l = (uint64_t)(uint32_t)a * (uint32_t)b;
1578             h = (int32_t)(l >> 32);
1579             l = (int32_t)l;
1580             break;
1581         case INDEX_op_muls2_i32:
1582             l = (int64_t)(int32_t)a * (int32_t)b;
1583             h = l >> 32;
1584             l = (int32_t)l;
1585             break;
1586         case INDEX_op_mulu2_i64:
1587             mulu64(&l, &h, a, b);
1588             break;
1589         case INDEX_op_muls2_i64:
1590             muls64(&l, &h, a, b);
1591             break;
1592         default:
1593             g_assert_not_reached();
1594         }
1595 
1596         rl = op->args[0];
1597         rh = op->args[1];
1598 
1599         /* The proper opcode is supplied by tcg_opt_gen_mov. */
1600         op2 = tcg_op_insert_before(ctx->tcg, op, 0);
1601 
1602         tcg_opt_gen_movi(ctx, op, rl, l);
1603         tcg_opt_gen_movi(ctx, op2, rh, h);
1604         return true;
1605     }
1606     return false;
1607 }
1608 
1609 static bool fold_nand(OptContext *ctx, TCGOp *op)
1610 {
1611     if (fold_const2_commutative(ctx, op) ||
1612         fold_xi_to_not(ctx, op, -1)) {
1613         return true;
1614     }
1615 
1616     ctx->s_mask = arg_info(op->args[1])->s_mask
1617                 & arg_info(op->args[2])->s_mask;
1618     return false;
1619 }
1620 
1621 static bool fold_neg(OptContext *ctx, TCGOp *op)
1622 {
1623     uint64_t z_mask;
1624 
1625     if (fold_const1(ctx, op)) {
1626         return true;
1627     }
1628 
1629     /* Set to 1 all bits to the left of the rightmost.  */
1630     z_mask = arg_info(op->args[1])->z_mask;
1631     ctx->z_mask = -(z_mask & -z_mask);
1632 
1633     /*
1634      * Because of fold_sub_to_neg, we want to always return true,
1635      * via finish_folding.
1636      */
1637     finish_folding(ctx, op);
1638     return true;
1639 }
1640 
1641 static bool fold_nor(OptContext *ctx, TCGOp *op)
1642 {
1643     if (fold_const2_commutative(ctx, op) ||
1644         fold_xi_to_not(ctx, op, 0)) {
1645         return true;
1646     }
1647 
1648     ctx->s_mask = arg_info(op->args[1])->s_mask
1649                 & arg_info(op->args[2])->s_mask;
1650     return false;
1651 }
1652 
1653 static bool fold_not(OptContext *ctx, TCGOp *op)
1654 {
1655     if (fold_const1(ctx, op)) {
1656         return true;
1657     }
1658 
1659     ctx->s_mask = arg_info(op->args[1])->s_mask;
1660 
1661     /* Because of fold_to_not, we want to always return true, via finish. */
1662     finish_folding(ctx, op);
1663     return true;
1664 }
1665 
1666 static bool fold_or(OptContext *ctx, TCGOp *op)
1667 {
1668     if (fold_const2_commutative(ctx, op) ||
1669         fold_xi_to_x(ctx, op, 0) ||
1670         fold_xx_to_x(ctx, op)) {
1671         return true;
1672     }
1673 
1674     ctx->z_mask = arg_info(op->args[1])->z_mask
1675                 | arg_info(op->args[2])->z_mask;
1676     ctx->s_mask = arg_info(op->args[1])->s_mask
1677                 & arg_info(op->args[2])->s_mask;
1678     return fold_masks(ctx, op);
1679 }
1680 
1681 static bool fold_orc(OptContext *ctx, TCGOp *op)
1682 {
1683     if (fold_const2(ctx, op) ||
1684         fold_xx_to_i(ctx, op, -1) ||
1685         fold_xi_to_x(ctx, op, -1) ||
1686         fold_ix_to_not(ctx, op, 0)) {
1687         return true;
1688     }
1689 
1690     ctx->s_mask = arg_info(op->args[1])->s_mask
1691                 & arg_info(op->args[2])->s_mask;
1692     return false;
1693 }
1694 
1695 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1696 {
1697     const TCGOpDef *def = &tcg_op_defs[op->opc];
1698     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1699     MemOp mop = get_memop(oi);
1700     int width = 8 * memop_size(mop);
1701 
1702     if (width < 64) {
1703         ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1704         if (!(mop & MO_SIGN)) {
1705             ctx->z_mask = MAKE_64BIT_MASK(0, width);
1706             ctx->s_mask <<= 1;
1707         }
1708     }
1709 
1710     /* Opcodes that touch guest memory stop the mb optimization.  */
1711     ctx->prev_mb = NULL;
1712     return false;
1713 }
1714 
1715 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1716 {
1717     /* Opcodes that touch guest memory stop the mb optimization.  */
1718     ctx->prev_mb = NULL;
1719     return false;
1720 }
1721 
1722 static bool fold_remainder(OptContext *ctx, TCGOp *op)
1723 {
1724     if (fold_const2(ctx, op) ||
1725         fold_xx_to_i(ctx, op, 0)) {
1726         return true;
1727     }
1728     return false;
1729 }
1730 
1731 static bool fold_setcond(OptContext *ctx, TCGOp *op)
1732 {
1733     TCGCond cond = op->args[3];
1734     int i;
1735 
1736     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1737         op->args[3] = cond = tcg_swap_cond(cond);
1738     }
1739 
1740     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1741     if (i >= 0) {
1742         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1743     }
1744 
1745     ctx->z_mask = 1;
1746     ctx->s_mask = smask_from_zmask(1);
1747     return false;
1748 }
1749 
1750 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1751 {
1752     TCGCond cond = op->args[5];
1753     int i, inv = 0;
1754 
1755     if (swap_commutative2(&op->args[1], &op->args[3])) {
1756         op->args[5] = cond = tcg_swap_cond(cond);
1757     }
1758 
1759     i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1760     if (i >= 0) {
1761         goto do_setcond_const;
1762     }
1763 
1764     switch (cond) {
1765     case TCG_COND_LT:
1766     case TCG_COND_GE:
1767         /*
1768          * Simplify LT/GE comparisons vs zero to a single compare
1769          * vs the high word of the input.
1770          */
1771         if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1772             arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1773             goto do_setcond_high;
1774         }
1775         break;
1776 
1777     case TCG_COND_NE:
1778         inv = 1;
1779         QEMU_FALLTHROUGH;
1780     case TCG_COND_EQ:
1781         /*
1782          * Simplify EQ/NE comparisons where one of the pairs
1783          * can be simplified.
1784          */
1785         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1786                                      op->args[3], cond);
1787         switch (i ^ inv) {
1788         case 0:
1789             goto do_setcond_const;
1790         case 1:
1791             goto do_setcond_high;
1792         }
1793 
1794         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1795                                      op->args[4], cond);
1796         switch (i ^ inv) {
1797         case 0:
1798             goto do_setcond_const;
1799         case 1:
1800             op->args[2] = op->args[3];
1801             op->args[3] = cond;
1802             op->opc = INDEX_op_setcond_i32;
1803             break;
1804         }
1805         break;
1806 
1807     default:
1808         break;
1809 
1810     do_setcond_high:
1811         op->args[1] = op->args[2];
1812         op->args[2] = op->args[4];
1813         op->args[3] = cond;
1814         op->opc = INDEX_op_setcond_i32;
1815         break;
1816     }
1817 
1818     ctx->z_mask = 1;
1819     ctx->s_mask = smask_from_zmask(1);
1820     return false;
1821 
1822  do_setcond_const:
1823     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1824 }
1825 
1826 static bool fold_sextract(OptContext *ctx, TCGOp *op)
1827 {
1828     uint64_t z_mask, s_mask, s_mask_old;
1829     int pos = op->args[2];
1830     int len = op->args[3];
1831 
1832     if (arg_is_const(op->args[1])) {
1833         uint64_t t;
1834 
1835         t = arg_info(op->args[1])->val;
1836         t = sextract64(t, pos, len);
1837         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1838     }
1839 
1840     z_mask = arg_info(op->args[1])->z_mask;
1841     z_mask = sextract64(z_mask, pos, len);
1842     ctx->z_mask = z_mask;
1843 
1844     s_mask_old = arg_info(op->args[1])->s_mask;
1845     s_mask = sextract64(s_mask_old, pos, len);
1846     s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1847     ctx->s_mask = s_mask;
1848 
1849     if (pos == 0) {
1850         ctx->a_mask = s_mask & ~s_mask_old;
1851     }
1852 
1853     return fold_masks(ctx, op);
1854 }
1855 
1856 static bool fold_shift(OptContext *ctx, TCGOp *op)
1857 {
1858     uint64_t s_mask, z_mask, sign;
1859 
1860     if (fold_const2(ctx, op) ||
1861         fold_ix_to_i(ctx, op, 0) ||
1862         fold_xi_to_x(ctx, op, 0)) {
1863         return true;
1864     }
1865 
1866     s_mask = arg_info(op->args[1])->s_mask;
1867     z_mask = arg_info(op->args[1])->z_mask;
1868 
1869     if (arg_is_const(op->args[2])) {
1870         int sh = arg_info(op->args[2])->val;
1871 
1872         ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1873 
1874         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1875         ctx->s_mask = smask_from_smask(s_mask);
1876 
1877         return fold_masks(ctx, op);
1878     }
1879 
1880     switch (op->opc) {
1881     CASE_OP_32_64(sar):
1882         /*
1883          * Arithmetic right shift will not reduce the number of
1884          * input sign repetitions.
1885          */
1886         ctx->s_mask = s_mask;
1887         break;
1888     CASE_OP_32_64(shr):
1889         /*
1890          * If the sign bit is known zero, then logical right shift
1891          * will not reduced the number of input sign repetitions.
1892          */
1893         sign = (s_mask & -s_mask) >> 1;
1894         if (!(z_mask & sign)) {
1895             ctx->s_mask = s_mask;
1896         }
1897         break;
1898     default:
1899         break;
1900     }
1901 
1902     return false;
1903 }
1904 
1905 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1906 {
1907     TCGOpcode neg_op;
1908     bool have_neg;
1909 
1910     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1911         return false;
1912     }
1913 
1914     switch (ctx->type) {
1915     case TCG_TYPE_I32:
1916         neg_op = INDEX_op_neg_i32;
1917         have_neg = TCG_TARGET_HAS_neg_i32;
1918         break;
1919     case TCG_TYPE_I64:
1920         neg_op = INDEX_op_neg_i64;
1921         have_neg = TCG_TARGET_HAS_neg_i64;
1922         break;
1923     case TCG_TYPE_V64:
1924     case TCG_TYPE_V128:
1925     case TCG_TYPE_V256:
1926         neg_op = INDEX_op_neg_vec;
1927         have_neg = (TCG_TARGET_HAS_neg_vec &&
1928                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
1929         break;
1930     default:
1931         g_assert_not_reached();
1932     }
1933     if (have_neg) {
1934         op->opc = neg_op;
1935         op->args[1] = op->args[2];
1936         return fold_neg(ctx, op);
1937     }
1938     return false;
1939 }
1940 
1941 static bool fold_sub(OptContext *ctx, TCGOp *op)
1942 {
1943     if (fold_const2(ctx, op) ||
1944         fold_xx_to_i(ctx, op, 0) ||
1945         fold_xi_to_x(ctx, op, 0) ||
1946         fold_sub_to_neg(ctx, op)) {
1947         return true;
1948     }
1949     return false;
1950 }
1951 
1952 static bool fold_sub2(OptContext *ctx, TCGOp *op)
1953 {
1954     return fold_addsub2(ctx, op, false);
1955 }
1956 
1957 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
1958 {
1959     /* We can't do any folding with a load, but we can record bits. */
1960     switch (op->opc) {
1961     CASE_OP_32_64(ld8s):
1962         ctx->s_mask = MAKE_64BIT_MASK(8, 56);
1963         break;
1964     CASE_OP_32_64(ld8u):
1965         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
1966         ctx->s_mask = MAKE_64BIT_MASK(9, 55);
1967         break;
1968     CASE_OP_32_64(ld16s):
1969         ctx->s_mask = MAKE_64BIT_MASK(16, 48);
1970         break;
1971     CASE_OP_32_64(ld16u):
1972         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
1973         ctx->s_mask = MAKE_64BIT_MASK(17, 47);
1974         break;
1975     case INDEX_op_ld32s_i64:
1976         ctx->s_mask = MAKE_64BIT_MASK(32, 32);
1977         break;
1978     case INDEX_op_ld32u_i64:
1979         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
1980         ctx->s_mask = MAKE_64BIT_MASK(33, 31);
1981         break;
1982     default:
1983         g_assert_not_reached();
1984     }
1985     return false;
1986 }
1987 
1988 static bool fold_xor(OptContext *ctx, TCGOp *op)
1989 {
1990     if (fold_const2_commutative(ctx, op) ||
1991         fold_xx_to_i(ctx, op, 0) ||
1992         fold_xi_to_x(ctx, op, 0) ||
1993         fold_xi_to_not(ctx, op, -1)) {
1994         return true;
1995     }
1996 
1997     ctx->z_mask = arg_info(op->args[1])->z_mask
1998                 | arg_info(op->args[2])->z_mask;
1999     ctx->s_mask = arg_info(op->args[1])->s_mask
2000                 & arg_info(op->args[2])->s_mask;
2001     return fold_masks(ctx, op);
2002 }
2003 
2004 /* Propagate constants and copies, fold constant expressions. */
2005 void tcg_optimize(TCGContext *s)
2006 {
2007     int nb_temps, i;
2008     TCGOp *op, *op_next;
2009     OptContext ctx = { .tcg = s };
2010 
2011     /* Array VALS has an element for each temp.
2012        If this temp holds a constant then its value is kept in VALS' element.
2013        If this temp is a copy of other ones then the other copies are
2014        available through the doubly linked circular list. */
2015 
2016     nb_temps = s->nb_temps;
2017     for (i = 0; i < nb_temps; ++i) {
2018         s->temps[i].state_ptr = NULL;
2019     }
2020 
2021     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2022         TCGOpcode opc = op->opc;
2023         const TCGOpDef *def;
2024         bool done = false;
2025 
2026         /* Calls are special. */
2027         if (opc == INDEX_op_call) {
2028             fold_call(&ctx, op);
2029             continue;
2030         }
2031 
2032         def = &tcg_op_defs[opc];
2033         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2034         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2035 
2036         /* Pre-compute the type of the operation. */
2037         if (def->flags & TCG_OPF_VECTOR) {
2038             ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2039         } else if (def->flags & TCG_OPF_64BIT) {
2040             ctx.type = TCG_TYPE_I64;
2041         } else {
2042             ctx.type = TCG_TYPE_I32;
2043         }
2044 
2045         /* Assume all bits affected, no bits known zero, no sign reps. */
2046         ctx.a_mask = -1;
2047         ctx.z_mask = -1;
2048         ctx.s_mask = 0;
2049 
2050         /*
2051          * Process each opcode.
2052          * Sorted alphabetically by opcode as much as possible.
2053          */
2054         switch (opc) {
2055         CASE_OP_32_64_VEC(add):
2056             done = fold_add(&ctx, op);
2057             break;
2058         CASE_OP_32_64(add2):
2059             done = fold_add2(&ctx, op);
2060             break;
2061         CASE_OP_32_64_VEC(and):
2062             done = fold_and(&ctx, op);
2063             break;
2064         CASE_OP_32_64_VEC(andc):
2065             done = fold_andc(&ctx, op);
2066             break;
2067         CASE_OP_32_64(brcond):
2068             done = fold_brcond(&ctx, op);
2069             break;
2070         case INDEX_op_brcond2_i32:
2071             done = fold_brcond2(&ctx, op);
2072             break;
2073         CASE_OP_32_64(bswap16):
2074         CASE_OP_32_64(bswap32):
2075         case INDEX_op_bswap64_i64:
2076             done = fold_bswap(&ctx, op);
2077             break;
2078         CASE_OP_32_64(clz):
2079         CASE_OP_32_64(ctz):
2080             done = fold_count_zeros(&ctx, op);
2081             break;
2082         CASE_OP_32_64(ctpop):
2083             done = fold_ctpop(&ctx, op);
2084             break;
2085         CASE_OP_32_64(deposit):
2086             done = fold_deposit(&ctx, op);
2087             break;
2088         CASE_OP_32_64(div):
2089         CASE_OP_32_64(divu):
2090             done = fold_divide(&ctx, op);
2091             break;
2092         case INDEX_op_dup_vec:
2093             done = fold_dup(&ctx, op);
2094             break;
2095         case INDEX_op_dup2_vec:
2096             done = fold_dup2(&ctx, op);
2097             break;
2098         CASE_OP_32_64(eqv):
2099             done = fold_eqv(&ctx, op);
2100             break;
2101         CASE_OP_32_64(extract):
2102             done = fold_extract(&ctx, op);
2103             break;
2104         CASE_OP_32_64(extract2):
2105             done = fold_extract2(&ctx, op);
2106             break;
2107         CASE_OP_32_64(ext8s):
2108         CASE_OP_32_64(ext16s):
2109         case INDEX_op_ext32s_i64:
2110         case INDEX_op_ext_i32_i64:
2111             done = fold_exts(&ctx, op);
2112             break;
2113         CASE_OP_32_64(ext8u):
2114         CASE_OP_32_64(ext16u):
2115         case INDEX_op_ext32u_i64:
2116         case INDEX_op_extu_i32_i64:
2117         case INDEX_op_extrl_i64_i32:
2118         case INDEX_op_extrh_i64_i32:
2119             done = fold_extu(&ctx, op);
2120             break;
2121         CASE_OP_32_64(ld8s):
2122         CASE_OP_32_64(ld8u):
2123         CASE_OP_32_64(ld16s):
2124         CASE_OP_32_64(ld16u):
2125         case INDEX_op_ld32s_i64:
2126         case INDEX_op_ld32u_i64:
2127             done = fold_tcg_ld(&ctx, op);
2128             break;
2129         case INDEX_op_mb:
2130             done = fold_mb(&ctx, op);
2131             break;
2132         CASE_OP_32_64_VEC(mov):
2133             done = fold_mov(&ctx, op);
2134             break;
2135         CASE_OP_32_64(movcond):
2136             done = fold_movcond(&ctx, op);
2137             break;
2138         CASE_OP_32_64(mul):
2139             done = fold_mul(&ctx, op);
2140             break;
2141         CASE_OP_32_64(mulsh):
2142         CASE_OP_32_64(muluh):
2143             done = fold_mul_highpart(&ctx, op);
2144             break;
2145         CASE_OP_32_64(muls2):
2146         CASE_OP_32_64(mulu2):
2147             done = fold_multiply2(&ctx, op);
2148             break;
2149         CASE_OP_32_64(nand):
2150             done = fold_nand(&ctx, op);
2151             break;
2152         CASE_OP_32_64(neg):
2153             done = fold_neg(&ctx, op);
2154             break;
2155         CASE_OP_32_64(nor):
2156             done = fold_nor(&ctx, op);
2157             break;
2158         CASE_OP_32_64_VEC(not):
2159             done = fold_not(&ctx, op);
2160             break;
2161         CASE_OP_32_64_VEC(or):
2162             done = fold_or(&ctx, op);
2163             break;
2164         CASE_OP_32_64_VEC(orc):
2165             done = fold_orc(&ctx, op);
2166             break;
2167         case INDEX_op_qemu_ld_i32:
2168         case INDEX_op_qemu_ld_i64:
2169             done = fold_qemu_ld(&ctx, op);
2170             break;
2171         case INDEX_op_qemu_st_i32:
2172         case INDEX_op_qemu_st8_i32:
2173         case INDEX_op_qemu_st_i64:
2174             done = fold_qemu_st(&ctx, op);
2175             break;
2176         CASE_OP_32_64(rem):
2177         CASE_OP_32_64(remu):
2178             done = fold_remainder(&ctx, op);
2179             break;
2180         CASE_OP_32_64(rotl):
2181         CASE_OP_32_64(rotr):
2182         CASE_OP_32_64(sar):
2183         CASE_OP_32_64(shl):
2184         CASE_OP_32_64(shr):
2185             done = fold_shift(&ctx, op);
2186             break;
2187         CASE_OP_32_64(setcond):
2188             done = fold_setcond(&ctx, op);
2189             break;
2190         case INDEX_op_setcond2_i32:
2191             done = fold_setcond2(&ctx, op);
2192             break;
2193         CASE_OP_32_64(sextract):
2194             done = fold_sextract(&ctx, op);
2195             break;
2196         CASE_OP_32_64_VEC(sub):
2197             done = fold_sub(&ctx, op);
2198             break;
2199         CASE_OP_32_64(sub2):
2200             done = fold_sub2(&ctx, op);
2201             break;
2202         CASE_OP_32_64_VEC(xor):
2203             done = fold_xor(&ctx, op);
2204             break;
2205         default:
2206             break;
2207         }
2208 
2209         if (!done) {
2210             finish_folding(&ctx, op);
2211         }
2212     }
2213 }
2214