xref: /openbmc/qemu/tcg/optimize.c (revision 1b4a234278f04ade4dd358224edc3defcd37fda7)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "tcg/tcg-op-common.h"
29 #include "tcg-internal.h"
30 
31 #define CASE_OP_32_64(x)                        \
32         glue(glue(case INDEX_op_, x), _i32):    \
33         glue(glue(case INDEX_op_, x), _i64)
34 
35 #define CASE_OP_32_64_VEC(x)                    \
36         glue(glue(case INDEX_op_, x), _i32):    \
37         glue(glue(case INDEX_op_, x), _i64):    \
38         glue(glue(case INDEX_op_, x), _vec)
39 
40 typedef struct TempOptInfo {
41     bool is_const;
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     uint64_t val;
45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
46     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
47 } TempOptInfo;
48 
49 typedef struct OptContext {
50     TCGContext *tcg;
51     TCGOp *prev_mb;
52     TCGTempSet temps_used;
53 
54     /* In flight values from optimization. */
55     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
56     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
57     uint64_t s_mask;  /* mask of clrsb(value) bits */
58     TCGType type;
59 } OptContext;
60 
61 /* Calculate the smask for a specific value. */
62 static uint64_t smask_from_value(uint64_t value)
63 {
64     int rep = clrsb64(value);
65     return ~(~0ull >> rep);
66 }
67 
68 /*
69  * Calculate the smask for a given set of known-zeros.
70  * If there are lots of zeros on the left, we can consider the remainder
71  * an unsigned field, and thus the corresponding signed field is one bit
72  * larger.
73  */
74 static uint64_t smask_from_zmask(uint64_t zmask)
75 {
76     /*
77      * Only the 0 bits are significant for zmask, thus the msb itself
78      * must be zero, else we have no sign information.
79      */
80     int rep = clz64(zmask);
81     if (rep == 0) {
82         return 0;
83     }
84     rep -= 1;
85     return ~(~0ull >> rep);
86 }
87 
88 /*
89  * Recreate a properly left-aligned smask after manipulation.
90  * Some bit-shuffling, particularly shifts and rotates, may
91  * retain sign bits on the left, but may scatter disconnected
92  * sign bits on the right.  Retain only what remains to the left.
93  */
94 static uint64_t smask_from_smask(int64_t smask)
95 {
96     /* Only the 1 bits are significant for smask */
97     return smask_from_zmask(~smask);
98 }
99 
100 static inline TempOptInfo *ts_info(TCGTemp *ts)
101 {
102     return ts->state_ptr;
103 }
104 
105 static inline TempOptInfo *arg_info(TCGArg arg)
106 {
107     return ts_info(arg_temp(arg));
108 }
109 
110 static inline bool ts_is_const(TCGTemp *ts)
111 {
112     return ts_info(ts)->is_const;
113 }
114 
115 static inline bool arg_is_const(TCGArg arg)
116 {
117     return ts_is_const(arg_temp(arg));
118 }
119 
120 static inline bool ts_is_copy(TCGTemp *ts)
121 {
122     return ts_info(ts)->next_copy != ts;
123 }
124 
125 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
126 static void reset_ts(TCGTemp *ts)
127 {
128     TempOptInfo *ti = ts_info(ts);
129     TempOptInfo *pi = ts_info(ti->prev_copy);
130     TempOptInfo *ni = ts_info(ti->next_copy);
131 
132     ni->prev_copy = ti->prev_copy;
133     pi->next_copy = ti->next_copy;
134     ti->next_copy = ts;
135     ti->prev_copy = ts;
136     ti->is_const = false;
137     ti->z_mask = -1;
138     ti->s_mask = 0;
139 }
140 
141 static void reset_temp(TCGArg arg)
142 {
143     reset_ts(arg_temp(arg));
144 }
145 
146 /* Initialize and activate a temporary.  */
147 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
148 {
149     size_t idx = temp_idx(ts);
150     TempOptInfo *ti;
151 
152     if (test_bit(idx, ctx->temps_used.l)) {
153         return;
154     }
155     set_bit(idx, ctx->temps_used.l);
156 
157     ti = ts->state_ptr;
158     if (ti == NULL) {
159         ti = tcg_malloc(sizeof(TempOptInfo));
160         ts->state_ptr = ti;
161     }
162 
163     ti->next_copy = ts;
164     ti->prev_copy = ts;
165     if (ts->kind == TEMP_CONST) {
166         ti->is_const = true;
167         ti->val = ts->val;
168         ti->z_mask = ts->val;
169         ti->s_mask = smask_from_value(ts->val);
170     } else {
171         ti->is_const = false;
172         ti->z_mask = -1;
173         ti->s_mask = 0;
174     }
175 }
176 
177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
178 {
179     TCGTemp *i, *g, *l;
180 
181     /* If this is already readonly, we can't do better. */
182     if (temp_readonly(ts)) {
183         return ts;
184     }
185 
186     g = l = NULL;
187     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
188         if (temp_readonly(i)) {
189             return i;
190         } else if (i->kind > ts->kind) {
191             if (i->kind == TEMP_GLOBAL) {
192                 g = i;
193             } else if (i->kind == TEMP_TB) {
194                 l = i;
195             }
196         }
197     }
198 
199     /* If we didn't find a better representation, return the same temp. */
200     return g ? g : l ? l : ts;
201 }
202 
203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
204 {
205     TCGTemp *i;
206 
207     if (ts1 == ts2) {
208         return true;
209     }
210 
211     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
212         return false;
213     }
214 
215     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
216         if (i == ts2) {
217             return true;
218         }
219     }
220 
221     return false;
222 }
223 
224 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
225 {
226     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
227 }
228 
229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
230 {
231     TCGTemp *dst_ts = arg_temp(dst);
232     TCGTemp *src_ts = arg_temp(src);
233     TempOptInfo *di;
234     TempOptInfo *si;
235     TCGOpcode new_op;
236 
237     if (ts_are_copies(dst_ts, src_ts)) {
238         tcg_op_remove(ctx->tcg, op);
239         return true;
240     }
241 
242     reset_ts(dst_ts);
243     di = ts_info(dst_ts);
244     si = ts_info(src_ts);
245 
246     switch (ctx->type) {
247     case TCG_TYPE_I32:
248         new_op = INDEX_op_mov_i32;
249         break;
250     case TCG_TYPE_I64:
251         new_op = INDEX_op_mov_i64;
252         break;
253     case TCG_TYPE_V64:
254     case TCG_TYPE_V128:
255     case TCG_TYPE_V256:
256         /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
257         new_op = INDEX_op_mov_vec;
258         break;
259     default:
260         g_assert_not_reached();
261     }
262     op->opc = new_op;
263     op->args[0] = dst;
264     op->args[1] = src;
265 
266     di->z_mask = si->z_mask;
267     di->s_mask = si->s_mask;
268 
269     if (src_ts->type == dst_ts->type) {
270         TempOptInfo *ni = ts_info(si->next_copy);
271 
272         di->next_copy = si->next_copy;
273         di->prev_copy = src_ts;
274         ni->prev_copy = dst_ts;
275         si->next_copy = dst_ts;
276         di->is_const = si->is_const;
277         di->val = si->val;
278     }
279     return true;
280 }
281 
282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
283                              TCGArg dst, uint64_t val)
284 {
285     TCGTemp *tv;
286 
287     if (ctx->type == TCG_TYPE_I32) {
288         val = (int32_t)val;
289     }
290 
291     /* Convert movi to mov with constant temp. */
292     tv = tcg_constant_internal(ctx->type, val);
293     init_ts_info(ctx, tv);
294     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
295 }
296 
297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
298 {
299     uint64_t l64, h64;
300 
301     switch (op) {
302     CASE_OP_32_64(add):
303         return x + y;
304 
305     CASE_OP_32_64(sub):
306         return x - y;
307 
308     CASE_OP_32_64(mul):
309         return x * y;
310 
311     CASE_OP_32_64_VEC(and):
312         return x & y;
313 
314     CASE_OP_32_64_VEC(or):
315         return x | y;
316 
317     CASE_OP_32_64_VEC(xor):
318         return x ^ y;
319 
320     case INDEX_op_shl_i32:
321         return (uint32_t)x << (y & 31);
322 
323     case INDEX_op_shl_i64:
324         return (uint64_t)x << (y & 63);
325 
326     case INDEX_op_shr_i32:
327         return (uint32_t)x >> (y & 31);
328 
329     case INDEX_op_shr_i64:
330         return (uint64_t)x >> (y & 63);
331 
332     case INDEX_op_sar_i32:
333         return (int32_t)x >> (y & 31);
334 
335     case INDEX_op_sar_i64:
336         return (int64_t)x >> (y & 63);
337 
338     case INDEX_op_rotr_i32:
339         return ror32(x, y & 31);
340 
341     case INDEX_op_rotr_i64:
342         return ror64(x, y & 63);
343 
344     case INDEX_op_rotl_i32:
345         return rol32(x, y & 31);
346 
347     case INDEX_op_rotl_i64:
348         return rol64(x, y & 63);
349 
350     CASE_OP_32_64_VEC(not):
351         return ~x;
352 
353     CASE_OP_32_64(neg):
354         return -x;
355 
356     CASE_OP_32_64_VEC(andc):
357         return x & ~y;
358 
359     CASE_OP_32_64_VEC(orc):
360         return x | ~y;
361 
362     CASE_OP_32_64_VEC(eqv):
363         return ~(x ^ y);
364 
365     CASE_OP_32_64_VEC(nand):
366         return ~(x & y);
367 
368     CASE_OP_32_64_VEC(nor):
369         return ~(x | y);
370 
371     case INDEX_op_clz_i32:
372         return (uint32_t)x ? clz32(x) : y;
373 
374     case INDEX_op_clz_i64:
375         return x ? clz64(x) : y;
376 
377     case INDEX_op_ctz_i32:
378         return (uint32_t)x ? ctz32(x) : y;
379 
380     case INDEX_op_ctz_i64:
381         return x ? ctz64(x) : y;
382 
383     case INDEX_op_ctpop_i32:
384         return ctpop32(x);
385 
386     case INDEX_op_ctpop_i64:
387         return ctpop64(x);
388 
389     CASE_OP_32_64(ext8s):
390         return (int8_t)x;
391 
392     CASE_OP_32_64(ext16s):
393         return (int16_t)x;
394 
395     CASE_OP_32_64(ext8u):
396         return (uint8_t)x;
397 
398     CASE_OP_32_64(ext16u):
399         return (uint16_t)x;
400 
401     CASE_OP_32_64(bswap16):
402         x = bswap16(x);
403         return y & TCG_BSWAP_OS ? (int16_t)x : x;
404 
405     CASE_OP_32_64(bswap32):
406         x = bswap32(x);
407         return y & TCG_BSWAP_OS ? (int32_t)x : x;
408 
409     case INDEX_op_bswap64_i64:
410         return bswap64(x);
411 
412     case INDEX_op_ext_i32_i64:
413     case INDEX_op_ext32s_i64:
414         return (int32_t)x;
415 
416     case INDEX_op_extu_i32_i64:
417     case INDEX_op_extrl_i64_i32:
418     case INDEX_op_ext32u_i64:
419         return (uint32_t)x;
420 
421     case INDEX_op_extrh_i64_i32:
422         return (uint64_t)x >> 32;
423 
424     case INDEX_op_muluh_i32:
425         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
426     case INDEX_op_mulsh_i32:
427         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
428 
429     case INDEX_op_muluh_i64:
430         mulu64(&l64, &h64, x, y);
431         return h64;
432     case INDEX_op_mulsh_i64:
433         muls64(&l64, &h64, x, y);
434         return h64;
435 
436     case INDEX_op_div_i32:
437         /* Avoid crashing on divide by zero, otherwise undefined.  */
438         return (int32_t)x / ((int32_t)y ? : 1);
439     case INDEX_op_divu_i32:
440         return (uint32_t)x / ((uint32_t)y ? : 1);
441     case INDEX_op_div_i64:
442         return (int64_t)x / ((int64_t)y ? : 1);
443     case INDEX_op_divu_i64:
444         return (uint64_t)x / ((uint64_t)y ? : 1);
445 
446     case INDEX_op_rem_i32:
447         return (int32_t)x % ((int32_t)y ? : 1);
448     case INDEX_op_remu_i32:
449         return (uint32_t)x % ((uint32_t)y ? : 1);
450     case INDEX_op_rem_i64:
451         return (int64_t)x % ((int64_t)y ? : 1);
452     case INDEX_op_remu_i64:
453         return (uint64_t)x % ((uint64_t)y ? : 1);
454 
455     default:
456         g_assert_not_reached();
457     }
458 }
459 
460 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
461                                     uint64_t x, uint64_t y)
462 {
463     uint64_t res = do_constant_folding_2(op, x, y);
464     if (type == TCG_TYPE_I32) {
465         res = (int32_t)res;
466     }
467     return res;
468 }
469 
470 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
471 {
472     switch (c) {
473     case TCG_COND_EQ:
474         return x == y;
475     case TCG_COND_NE:
476         return x != y;
477     case TCG_COND_LT:
478         return (int32_t)x < (int32_t)y;
479     case TCG_COND_GE:
480         return (int32_t)x >= (int32_t)y;
481     case TCG_COND_LE:
482         return (int32_t)x <= (int32_t)y;
483     case TCG_COND_GT:
484         return (int32_t)x > (int32_t)y;
485     case TCG_COND_LTU:
486         return x < y;
487     case TCG_COND_GEU:
488         return x >= y;
489     case TCG_COND_LEU:
490         return x <= y;
491     case TCG_COND_GTU:
492         return x > y;
493     default:
494         g_assert_not_reached();
495     }
496 }
497 
498 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
499 {
500     switch (c) {
501     case TCG_COND_EQ:
502         return x == y;
503     case TCG_COND_NE:
504         return x != y;
505     case TCG_COND_LT:
506         return (int64_t)x < (int64_t)y;
507     case TCG_COND_GE:
508         return (int64_t)x >= (int64_t)y;
509     case TCG_COND_LE:
510         return (int64_t)x <= (int64_t)y;
511     case TCG_COND_GT:
512         return (int64_t)x > (int64_t)y;
513     case TCG_COND_LTU:
514         return x < y;
515     case TCG_COND_GEU:
516         return x >= y;
517     case TCG_COND_LEU:
518         return x <= y;
519     case TCG_COND_GTU:
520         return x > y;
521     default:
522         g_assert_not_reached();
523     }
524 }
525 
526 static bool do_constant_folding_cond_eq(TCGCond c)
527 {
528     switch (c) {
529     case TCG_COND_GT:
530     case TCG_COND_LTU:
531     case TCG_COND_LT:
532     case TCG_COND_GTU:
533     case TCG_COND_NE:
534         return 0;
535     case TCG_COND_GE:
536     case TCG_COND_GEU:
537     case TCG_COND_LE:
538     case TCG_COND_LEU:
539     case TCG_COND_EQ:
540         return 1;
541     default:
542         g_assert_not_reached();
543     }
544 }
545 
546 /*
547  * Return -1 if the condition can't be simplified,
548  * and the result of the condition (0 or 1) if it can.
549  */
550 static int do_constant_folding_cond(TCGType type, TCGArg x,
551                                     TCGArg y, TCGCond c)
552 {
553     if (arg_is_const(x) && arg_is_const(y)) {
554         uint64_t xv = arg_info(x)->val;
555         uint64_t yv = arg_info(y)->val;
556 
557         switch (type) {
558         case TCG_TYPE_I32:
559             return do_constant_folding_cond_32(xv, yv, c);
560         case TCG_TYPE_I64:
561             return do_constant_folding_cond_64(xv, yv, c);
562         default:
563             /* Only scalar comparisons are optimizable */
564             return -1;
565         }
566     } else if (args_are_copies(x, y)) {
567         return do_constant_folding_cond_eq(c);
568     } else if (arg_is_const(y) && arg_info(y)->val == 0) {
569         switch (c) {
570         case TCG_COND_LTU:
571             return 0;
572         case TCG_COND_GEU:
573             return 1;
574         default:
575             return -1;
576         }
577     }
578     return -1;
579 }
580 
581 /*
582  * Return -1 if the condition can't be simplified,
583  * and the result of the condition (0 or 1) if it can.
584  */
585 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
586 {
587     TCGArg al = p1[0], ah = p1[1];
588     TCGArg bl = p2[0], bh = p2[1];
589 
590     if (arg_is_const(bl) && arg_is_const(bh)) {
591         tcg_target_ulong blv = arg_info(bl)->val;
592         tcg_target_ulong bhv = arg_info(bh)->val;
593         uint64_t b = deposit64(blv, 32, 32, bhv);
594 
595         if (arg_is_const(al) && arg_is_const(ah)) {
596             tcg_target_ulong alv = arg_info(al)->val;
597             tcg_target_ulong ahv = arg_info(ah)->val;
598             uint64_t a = deposit64(alv, 32, 32, ahv);
599             return do_constant_folding_cond_64(a, b, c);
600         }
601         if (b == 0) {
602             switch (c) {
603             case TCG_COND_LTU:
604                 return 0;
605             case TCG_COND_GEU:
606                 return 1;
607             default:
608                 break;
609             }
610         }
611     }
612     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
613         return do_constant_folding_cond_eq(c);
614     }
615     return -1;
616 }
617 
618 /**
619  * swap_commutative:
620  * @dest: TCGArg of the destination argument, or NO_DEST.
621  * @p1: first paired argument
622  * @p2: second paired argument
623  *
624  * If *@p1 is a constant and *@p2 is not, swap.
625  * If *@p2 matches @dest, swap.
626  * Return true if a swap was performed.
627  */
628 
629 #define NO_DEST  temp_arg(NULL)
630 
631 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
632 {
633     TCGArg a1 = *p1, a2 = *p2;
634     int sum = 0;
635     sum += arg_is_const(a1);
636     sum -= arg_is_const(a2);
637 
638     /* Prefer the constant in second argument, and then the form
639        op a, a, b, which is better handled on non-RISC hosts. */
640     if (sum > 0 || (sum == 0 && dest == a2)) {
641         *p1 = a2;
642         *p2 = a1;
643         return true;
644     }
645     return false;
646 }
647 
648 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
649 {
650     int sum = 0;
651     sum += arg_is_const(p1[0]);
652     sum += arg_is_const(p1[1]);
653     sum -= arg_is_const(p2[0]);
654     sum -= arg_is_const(p2[1]);
655     if (sum > 0) {
656         TCGArg t;
657         t = p1[0], p1[0] = p2[0], p2[0] = t;
658         t = p1[1], p1[1] = p2[1], p2[1] = t;
659         return true;
660     }
661     return false;
662 }
663 
664 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
665 {
666     for (int i = 0; i < nb_args; i++) {
667         TCGTemp *ts = arg_temp(op->args[i]);
668         init_ts_info(ctx, ts);
669     }
670 }
671 
672 static void copy_propagate(OptContext *ctx, TCGOp *op,
673                            int nb_oargs, int nb_iargs)
674 {
675     TCGContext *s = ctx->tcg;
676 
677     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
678         TCGTemp *ts = arg_temp(op->args[i]);
679         if (ts_is_copy(ts)) {
680             op->args[i] = temp_arg(find_better_copy(s, ts));
681         }
682     }
683 }
684 
685 static void finish_folding(OptContext *ctx, TCGOp *op)
686 {
687     const TCGOpDef *def = &tcg_op_defs[op->opc];
688     int i, nb_oargs;
689 
690     /*
691      * For an opcode that ends a BB, reset all temp data.
692      * We do no cross-BB optimization.
693      */
694     if (def->flags & TCG_OPF_BB_END) {
695         memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
696         ctx->prev_mb = NULL;
697         return;
698     }
699 
700     nb_oargs = def->nb_oargs;
701     for (i = 0; i < nb_oargs; i++) {
702         TCGTemp *ts = arg_temp(op->args[i]);
703         reset_ts(ts);
704         /*
705          * Save the corresponding known-zero/sign bits mask for the
706          * first output argument (only one supported so far).
707          */
708         if (i == 0) {
709             ts_info(ts)->z_mask = ctx->z_mask;
710             ts_info(ts)->s_mask = ctx->s_mask;
711         }
712     }
713 }
714 
715 /*
716  * The fold_* functions return true when processing is complete,
717  * usually by folding the operation to a constant or to a copy,
718  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
719  * like collect information about the value produced, for use in
720  * optimizing a subsequent operation.
721  *
722  * These first fold_* functions are all helpers, used by other
723  * folders for more specific operations.
724  */
725 
726 static bool fold_const1(OptContext *ctx, TCGOp *op)
727 {
728     if (arg_is_const(op->args[1])) {
729         uint64_t t;
730 
731         t = arg_info(op->args[1])->val;
732         t = do_constant_folding(op->opc, ctx->type, t, 0);
733         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
734     }
735     return false;
736 }
737 
738 static bool fold_const2(OptContext *ctx, TCGOp *op)
739 {
740     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
741         uint64_t t1 = arg_info(op->args[1])->val;
742         uint64_t t2 = arg_info(op->args[2])->val;
743 
744         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
745         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
746     }
747     return false;
748 }
749 
750 static bool fold_commutative(OptContext *ctx, TCGOp *op)
751 {
752     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
753     return false;
754 }
755 
756 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
757 {
758     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
759     return fold_const2(ctx, op);
760 }
761 
762 static bool fold_masks(OptContext *ctx, TCGOp *op)
763 {
764     uint64_t a_mask = ctx->a_mask;
765     uint64_t z_mask = ctx->z_mask;
766     uint64_t s_mask = ctx->s_mask;
767 
768     /*
769      * 32-bit ops generate 32-bit results, which for the purpose of
770      * simplifying tcg are sign-extended.  Certainly that's how we
771      * represent our constants elsewhere.  Note that the bits will
772      * be reset properly for a 64-bit value when encountering the
773      * type changing opcodes.
774      */
775     if (ctx->type == TCG_TYPE_I32) {
776         a_mask = (int32_t)a_mask;
777         z_mask = (int32_t)z_mask;
778         s_mask |= MAKE_64BIT_MASK(32, 32);
779         ctx->z_mask = z_mask;
780         ctx->s_mask = s_mask;
781     }
782 
783     if (z_mask == 0) {
784         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
785     }
786     if (a_mask == 0) {
787         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
788     }
789     return false;
790 }
791 
792 /*
793  * Convert @op to NOT, if NOT is supported by the host.
794  * Return true f the conversion is successful, which will still
795  * indicate that the processing is complete.
796  */
797 static bool fold_not(OptContext *ctx, TCGOp *op);
798 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
799 {
800     TCGOpcode not_op;
801     bool have_not;
802 
803     switch (ctx->type) {
804     case TCG_TYPE_I32:
805         not_op = INDEX_op_not_i32;
806         have_not = TCG_TARGET_HAS_not_i32;
807         break;
808     case TCG_TYPE_I64:
809         not_op = INDEX_op_not_i64;
810         have_not = TCG_TARGET_HAS_not_i64;
811         break;
812     case TCG_TYPE_V64:
813     case TCG_TYPE_V128:
814     case TCG_TYPE_V256:
815         not_op = INDEX_op_not_vec;
816         have_not = TCG_TARGET_HAS_not_vec;
817         break;
818     default:
819         g_assert_not_reached();
820     }
821     if (have_not) {
822         op->opc = not_op;
823         op->args[1] = op->args[idx];
824         return fold_not(ctx, op);
825     }
826     return false;
827 }
828 
829 /* If the binary operation has first argument @i, fold to @i. */
830 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
831 {
832     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
833         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
834     }
835     return false;
836 }
837 
838 /* If the binary operation has first argument @i, fold to NOT. */
839 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
840 {
841     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
842         return fold_to_not(ctx, op, 2);
843     }
844     return false;
845 }
846 
847 /* If the binary operation has second argument @i, fold to @i. */
848 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
849 {
850     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
851         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
852     }
853     return false;
854 }
855 
856 /* If the binary operation has second argument @i, fold to identity. */
857 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
858 {
859     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
860         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
861     }
862     return false;
863 }
864 
865 /* If the binary operation has second argument @i, fold to NOT. */
866 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
867 {
868     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
869         return fold_to_not(ctx, op, 1);
870     }
871     return false;
872 }
873 
874 /* If the binary operation has both arguments equal, fold to @i. */
875 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
876 {
877     if (args_are_copies(op->args[1], op->args[2])) {
878         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
879     }
880     return false;
881 }
882 
883 /* If the binary operation has both arguments equal, fold to identity. */
884 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
885 {
886     if (args_are_copies(op->args[1], op->args[2])) {
887         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
888     }
889     return false;
890 }
891 
892 /*
893  * These outermost fold_<op> functions are sorted alphabetically.
894  *
895  * The ordering of the transformations should be:
896  *   1) those that produce a constant
897  *   2) those that produce a copy
898  *   3) those that produce information about the result value.
899  */
900 
901 static bool fold_add(OptContext *ctx, TCGOp *op)
902 {
903     if (fold_const2_commutative(ctx, op) ||
904         fold_xi_to_x(ctx, op, 0)) {
905         return true;
906     }
907     return false;
908 }
909 
910 /* We cannot as yet do_constant_folding with vectors. */
911 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
912 {
913     if (fold_commutative(ctx, op) ||
914         fold_xi_to_x(ctx, op, 0)) {
915         return true;
916     }
917     return false;
918 }
919 
920 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
921 {
922     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
923         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
924         uint64_t al = arg_info(op->args[2])->val;
925         uint64_t ah = arg_info(op->args[3])->val;
926         uint64_t bl = arg_info(op->args[4])->val;
927         uint64_t bh = arg_info(op->args[5])->val;
928         TCGArg rl, rh;
929         TCGOp *op2;
930 
931         if (ctx->type == TCG_TYPE_I32) {
932             uint64_t a = deposit64(al, 32, 32, ah);
933             uint64_t b = deposit64(bl, 32, 32, bh);
934 
935             if (add) {
936                 a += b;
937             } else {
938                 a -= b;
939             }
940 
941             al = sextract64(a, 0, 32);
942             ah = sextract64(a, 32, 32);
943         } else {
944             Int128 a = int128_make128(al, ah);
945             Int128 b = int128_make128(bl, bh);
946 
947             if (add) {
948                 a = int128_add(a, b);
949             } else {
950                 a = int128_sub(a, b);
951             }
952 
953             al = int128_getlo(a);
954             ah = int128_gethi(a);
955         }
956 
957         rl = op->args[0];
958         rh = op->args[1];
959 
960         /* The proper opcode is supplied by tcg_opt_gen_mov. */
961         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
962 
963         tcg_opt_gen_movi(ctx, op, rl, al);
964         tcg_opt_gen_movi(ctx, op2, rh, ah);
965         return true;
966     }
967     return false;
968 }
969 
970 static bool fold_add2(OptContext *ctx, TCGOp *op)
971 {
972     /* Note that the high and low parts may be independently swapped. */
973     swap_commutative(op->args[0], &op->args[2], &op->args[4]);
974     swap_commutative(op->args[1], &op->args[3], &op->args[5]);
975 
976     return fold_addsub2(ctx, op, true);
977 }
978 
979 static bool fold_and(OptContext *ctx, TCGOp *op)
980 {
981     uint64_t z1, z2;
982 
983     if (fold_const2_commutative(ctx, op) ||
984         fold_xi_to_i(ctx, op, 0) ||
985         fold_xi_to_x(ctx, op, -1) ||
986         fold_xx_to_x(ctx, op)) {
987         return true;
988     }
989 
990     z1 = arg_info(op->args[1])->z_mask;
991     z2 = arg_info(op->args[2])->z_mask;
992     ctx->z_mask = z1 & z2;
993 
994     /*
995      * Sign repetitions are perforce all identical, whether they are 1 or 0.
996      * Bitwise operations preserve the relative quantity of the repetitions.
997      */
998     ctx->s_mask = arg_info(op->args[1])->s_mask
999                 & arg_info(op->args[2])->s_mask;
1000 
1001     /*
1002      * Known-zeros does not imply known-ones.  Therefore unless
1003      * arg2 is constant, we can't infer affected bits from it.
1004      */
1005     if (arg_is_const(op->args[2])) {
1006         ctx->a_mask = z1 & ~z2;
1007     }
1008 
1009     return fold_masks(ctx, op);
1010 }
1011 
1012 static bool fold_andc(OptContext *ctx, TCGOp *op)
1013 {
1014     uint64_t z1;
1015 
1016     if (fold_const2(ctx, op) ||
1017         fold_xx_to_i(ctx, op, 0) ||
1018         fold_xi_to_x(ctx, op, 0) ||
1019         fold_ix_to_not(ctx, op, -1)) {
1020         return true;
1021     }
1022 
1023     z1 = arg_info(op->args[1])->z_mask;
1024 
1025     /*
1026      * Known-zeros does not imply known-ones.  Therefore unless
1027      * arg2 is constant, we can't infer anything from it.
1028      */
1029     if (arg_is_const(op->args[2])) {
1030         uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1031         ctx->a_mask = z1 & ~z2;
1032         z1 &= z2;
1033     }
1034     ctx->z_mask = z1;
1035 
1036     ctx->s_mask = arg_info(op->args[1])->s_mask
1037                 & arg_info(op->args[2])->s_mask;
1038     return fold_masks(ctx, op);
1039 }
1040 
1041 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1042 {
1043     TCGCond cond = op->args[2];
1044     int i;
1045 
1046     if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1047         op->args[2] = cond = tcg_swap_cond(cond);
1048     }
1049 
1050     i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1051     if (i == 0) {
1052         tcg_op_remove(ctx->tcg, op);
1053         return true;
1054     }
1055     if (i > 0) {
1056         op->opc = INDEX_op_br;
1057         op->args[0] = op->args[3];
1058     }
1059     return false;
1060 }
1061 
1062 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1063 {
1064     TCGCond cond = op->args[4];
1065     TCGArg label = op->args[5];
1066     int i, inv = 0;
1067 
1068     if (swap_commutative2(&op->args[0], &op->args[2])) {
1069         op->args[4] = cond = tcg_swap_cond(cond);
1070     }
1071 
1072     i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1073     if (i >= 0) {
1074         goto do_brcond_const;
1075     }
1076 
1077     switch (cond) {
1078     case TCG_COND_LT:
1079     case TCG_COND_GE:
1080         /*
1081          * Simplify LT/GE comparisons vs zero to a single compare
1082          * vs the high word of the input.
1083          */
1084         if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1085             arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1086             goto do_brcond_high;
1087         }
1088         break;
1089 
1090     case TCG_COND_NE:
1091         inv = 1;
1092         QEMU_FALLTHROUGH;
1093     case TCG_COND_EQ:
1094         /*
1095          * Simplify EQ/NE comparisons where one of the pairs
1096          * can be simplified.
1097          */
1098         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1099                                      op->args[2], cond);
1100         switch (i ^ inv) {
1101         case 0:
1102             goto do_brcond_const;
1103         case 1:
1104             goto do_brcond_high;
1105         }
1106 
1107         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1108                                      op->args[3], cond);
1109         switch (i ^ inv) {
1110         case 0:
1111             goto do_brcond_const;
1112         case 1:
1113             op->opc = INDEX_op_brcond_i32;
1114             op->args[1] = op->args[2];
1115             op->args[2] = cond;
1116             op->args[3] = label;
1117             break;
1118         }
1119         break;
1120 
1121     default:
1122         break;
1123 
1124     do_brcond_high:
1125         op->opc = INDEX_op_brcond_i32;
1126         op->args[0] = op->args[1];
1127         op->args[1] = op->args[3];
1128         op->args[2] = cond;
1129         op->args[3] = label;
1130         break;
1131 
1132     do_brcond_const:
1133         if (i == 0) {
1134             tcg_op_remove(ctx->tcg, op);
1135             return true;
1136         }
1137         op->opc = INDEX_op_br;
1138         op->args[0] = label;
1139         break;
1140     }
1141     return false;
1142 }
1143 
1144 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1145 {
1146     uint64_t z_mask, s_mask, sign;
1147 
1148     if (arg_is_const(op->args[1])) {
1149         uint64_t t = arg_info(op->args[1])->val;
1150 
1151         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1152         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1153     }
1154 
1155     z_mask = arg_info(op->args[1])->z_mask;
1156 
1157     switch (op->opc) {
1158     case INDEX_op_bswap16_i32:
1159     case INDEX_op_bswap16_i64:
1160         z_mask = bswap16(z_mask);
1161         sign = INT16_MIN;
1162         break;
1163     case INDEX_op_bswap32_i32:
1164     case INDEX_op_bswap32_i64:
1165         z_mask = bswap32(z_mask);
1166         sign = INT32_MIN;
1167         break;
1168     case INDEX_op_bswap64_i64:
1169         z_mask = bswap64(z_mask);
1170         sign = INT64_MIN;
1171         break;
1172     default:
1173         g_assert_not_reached();
1174     }
1175     s_mask = smask_from_zmask(z_mask);
1176 
1177     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1178     case TCG_BSWAP_OZ:
1179         break;
1180     case TCG_BSWAP_OS:
1181         /* If the sign bit may be 1, force all the bits above to 1. */
1182         if (z_mask & sign) {
1183             z_mask |= sign;
1184             s_mask = sign << 1;
1185         }
1186         break;
1187     default:
1188         /* The high bits are undefined: force all bits above the sign to 1. */
1189         z_mask |= sign << 1;
1190         s_mask = 0;
1191         break;
1192     }
1193     ctx->z_mask = z_mask;
1194     ctx->s_mask = s_mask;
1195 
1196     return fold_masks(ctx, op);
1197 }
1198 
1199 static bool fold_call(OptContext *ctx, TCGOp *op)
1200 {
1201     TCGContext *s = ctx->tcg;
1202     int nb_oargs = TCGOP_CALLO(op);
1203     int nb_iargs = TCGOP_CALLI(op);
1204     int flags, i;
1205 
1206     init_arguments(ctx, op, nb_oargs + nb_iargs);
1207     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1208 
1209     /* If the function reads or writes globals, reset temp data. */
1210     flags = tcg_call_flags(op);
1211     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1212         int nb_globals = s->nb_globals;
1213 
1214         for (i = 0; i < nb_globals; i++) {
1215             if (test_bit(i, ctx->temps_used.l)) {
1216                 reset_ts(&ctx->tcg->temps[i]);
1217             }
1218         }
1219     }
1220 
1221     /* Reset temp data for outputs. */
1222     for (i = 0; i < nb_oargs; i++) {
1223         reset_temp(op->args[i]);
1224     }
1225 
1226     /* Stop optimizing MB across calls. */
1227     ctx->prev_mb = NULL;
1228     return true;
1229 }
1230 
1231 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1232 {
1233     uint64_t z_mask;
1234 
1235     if (arg_is_const(op->args[1])) {
1236         uint64_t t = arg_info(op->args[1])->val;
1237 
1238         if (t != 0) {
1239             t = do_constant_folding(op->opc, ctx->type, t, 0);
1240             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1241         }
1242         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1243     }
1244 
1245     switch (ctx->type) {
1246     case TCG_TYPE_I32:
1247         z_mask = 31;
1248         break;
1249     case TCG_TYPE_I64:
1250         z_mask = 63;
1251         break;
1252     default:
1253         g_assert_not_reached();
1254     }
1255     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1256     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1257     return false;
1258 }
1259 
1260 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1261 {
1262     if (fold_const1(ctx, op)) {
1263         return true;
1264     }
1265 
1266     switch (ctx->type) {
1267     case TCG_TYPE_I32:
1268         ctx->z_mask = 32 | 31;
1269         break;
1270     case TCG_TYPE_I64:
1271         ctx->z_mask = 64 | 63;
1272         break;
1273     default:
1274         g_assert_not_reached();
1275     }
1276     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1277     return false;
1278 }
1279 
1280 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1281 {
1282     TCGOpcode and_opc;
1283 
1284     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1285         uint64_t t1 = arg_info(op->args[1])->val;
1286         uint64_t t2 = arg_info(op->args[2])->val;
1287 
1288         t1 = deposit64(t1, op->args[3], op->args[4], t2);
1289         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1290     }
1291 
1292     switch (ctx->type) {
1293     case TCG_TYPE_I32:
1294         and_opc = INDEX_op_and_i32;
1295         break;
1296     case TCG_TYPE_I64:
1297         and_opc = INDEX_op_and_i64;
1298         break;
1299     default:
1300         g_assert_not_reached();
1301     }
1302 
1303     /* Inserting a value into zero at offset 0. */
1304     if (arg_is_const(op->args[1])
1305         && arg_info(op->args[1])->val == 0
1306         && op->args[3] == 0) {
1307         uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
1308 
1309         op->opc = and_opc;
1310         op->args[1] = op->args[2];
1311         op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
1312         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1313         return false;
1314     }
1315 
1316     /* Inserting zero into a value. */
1317     if (arg_is_const(op->args[2])
1318         && arg_info(op->args[2])->val == 0) {
1319         uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
1320 
1321         op->opc = and_opc;
1322         op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
1323         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1324         return false;
1325     }
1326 
1327     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1328                             op->args[3], op->args[4],
1329                             arg_info(op->args[2])->z_mask);
1330     return false;
1331 }
1332 
1333 static bool fold_divide(OptContext *ctx, TCGOp *op)
1334 {
1335     if (fold_const2(ctx, op) ||
1336         fold_xi_to_x(ctx, op, 1)) {
1337         return true;
1338     }
1339     return false;
1340 }
1341 
1342 static bool fold_dup(OptContext *ctx, TCGOp *op)
1343 {
1344     if (arg_is_const(op->args[1])) {
1345         uint64_t t = arg_info(op->args[1])->val;
1346         t = dup_const(TCGOP_VECE(op), t);
1347         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1348     }
1349     return false;
1350 }
1351 
1352 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1353 {
1354     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1355         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1356                                arg_info(op->args[2])->val);
1357         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1358     }
1359 
1360     if (args_are_copies(op->args[1], op->args[2])) {
1361         op->opc = INDEX_op_dup_vec;
1362         TCGOP_VECE(op) = MO_32;
1363     }
1364     return false;
1365 }
1366 
1367 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1368 {
1369     if (fold_const2_commutative(ctx, op) ||
1370         fold_xi_to_x(ctx, op, -1) ||
1371         fold_xi_to_not(ctx, op, 0)) {
1372         return true;
1373     }
1374 
1375     ctx->s_mask = arg_info(op->args[1])->s_mask
1376                 & arg_info(op->args[2])->s_mask;
1377     return false;
1378 }
1379 
1380 static bool fold_extract(OptContext *ctx, TCGOp *op)
1381 {
1382     uint64_t z_mask_old, z_mask;
1383     int pos = op->args[2];
1384     int len = op->args[3];
1385 
1386     if (arg_is_const(op->args[1])) {
1387         uint64_t t;
1388 
1389         t = arg_info(op->args[1])->val;
1390         t = extract64(t, pos, len);
1391         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1392     }
1393 
1394     z_mask_old = arg_info(op->args[1])->z_mask;
1395     z_mask = extract64(z_mask_old, pos, len);
1396     if (pos == 0) {
1397         ctx->a_mask = z_mask_old ^ z_mask;
1398     }
1399     ctx->z_mask = z_mask;
1400     ctx->s_mask = smask_from_zmask(z_mask);
1401 
1402     return fold_masks(ctx, op);
1403 }
1404 
1405 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1406 {
1407     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1408         uint64_t v1 = arg_info(op->args[1])->val;
1409         uint64_t v2 = arg_info(op->args[2])->val;
1410         int shr = op->args[3];
1411 
1412         if (op->opc == INDEX_op_extract2_i64) {
1413             v1 >>= shr;
1414             v2 <<= 64 - shr;
1415         } else {
1416             v1 = (uint32_t)v1 >> shr;
1417             v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1418         }
1419         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1420     }
1421     return false;
1422 }
1423 
1424 static bool fold_exts(OptContext *ctx, TCGOp *op)
1425 {
1426     uint64_t s_mask_old, s_mask, z_mask, sign;
1427     bool type_change = false;
1428 
1429     if (fold_const1(ctx, op)) {
1430         return true;
1431     }
1432 
1433     z_mask = arg_info(op->args[1])->z_mask;
1434     s_mask = arg_info(op->args[1])->s_mask;
1435     s_mask_old = s_mask;
1436 
1437     switch (op->opc) {
1438     CASE_OP_32_64(ext8s):
1439         sign = INT8_MIN;
1440         z_mask = (uint8_t)z_mask;
1441         break;
1442     CASE_OP_32_64(ext16s):
1443         sign = INT16_MIN;
1444         z_mask = (uint16_t)z_mask;
1445         break;
1446     case INDEX_op_ext_i32_i64:
1447         type_change = true;
1448         QEMU_FALLTHROUGH;
1449     case INDEX_op_ext32s_i64:
1450         sign = INT32_MIN;
1451         z_mask = (uint32_t)z_mask;
1452         break;
1453     default:
1454         g_assert_not_reached();
1455     }
1456 
1457     if (z_mask & sign) {
1458         z_mask |= sign;
1459     }
1460     s_mask |= sign << 1;
1461 
1462     ctx->z_mask = z_mask;
1463     ctx->s_mask = s_mask;
1464     if (!type_change) {
1465         ctx->a_mask = s_mask & ~s_mask_old;
1466     }
1467 
1468     return fold_masks(ctx, op);
1469 }
1470 
1471 static bool fold_extu(OptContext *ctx, TCGOp *op)
1472 {
1473     uint64_t z_mask_old, z_mask;
1474     bool type_change = false;
1475 
1476     if (fold_const1(ctx, op)) {
1477         return true;
1478     }
1479 
1480     z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1481 
1482     switch (op->opc) {
1483     CASE_OP_32_64(ext8u):
1484         z_mask = (uint8_t)z_mask;
1485         break;
1486     CASE_OP_32_64(ext16u):
1487         z_mask = (uint16_t)z_mask;
1488         break;
1489     case INDEX_op_extrl_i64_i32:
1490     case INDEX_op_extu_i32_i64:
1491         type_change = true;
1492         QEMU_FALLTHROUGH;
1493     case INDEX_op_ext32u_i64:
1494         z_mask = (uint32_t)z_mask;
1495         break;
1496     case INDEX_op_extrh_i64_i32:
1497         type_change = true;
1498         z_mask >>= 32;
1499         break;
1500     default:
1501         g_assert_not_reached();
1502     }
1503 
1504     ctx->z_mask = z_mask;
1505     ctx->s_mask = smask_from_zmask(z_mask);
1506     if (!type_change) {
1507         ctx->a_mask = z_mask_old ^ z_mask;
1508     }
1509     return fold_masks(ctx, op);
1510 }
1511 
1512 static bool fold_mb(OptContext *ctx, TCGOp *op)
1513 {
1514     /* Eliminate duplicate and redundant fence instructions.  */
1515     if (ctx->prev_mb) {
1516         /*
1517          * Merge two barriers of the same type into one,
1518          * or a weaker barrier into a stronger one,
1519          * or two weaker barriers into a stronger one.
1520          *   mb X; mb Y => mb X|Y
1521          *   mb; strl => mb; st
1522          *   ldaq; mb => ld; mb
1523          *   ldaq; strl => ld; mb; st
1524          * Other combinations are also merged into a strong
1525          * barrier.  This is stricter than specified but for
1526          * the purposes of TCG is better than not optimizing.
1527          */
1528         ctx->prev_mb->args[0] |= op->args[0];
1529         tcg_op_remove(ctx->tcg, op);
1530     } else {
1531         ctx->prev_mb = op;
1532     }
1533     return true;
1534 }
1535 
1536 static bool fold_mov(OptContext *ctx, TCGOp *op)
1537 {
1538     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1539 }
1540 
1541 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1542 {
1543     TCGCond cond = op->args[5];
1544     int i;
1545 
1546     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1547         op->args[5] = cond = tcg_swap_cond(cond);
1548     }
1549     /*
1550      * Canonicalize the "false" input reg to match the destination reg so
1551      * that the tcg backend can implement a "move if true" operation.
1552      */
1553     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1554         op->args[5] = cond = tcg_invert_cond(cond);
1555     }
1556 
1557     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1558     if (i >= 0) {
1559         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1560     }
1561 
1562     ctx->z_mask = arg_info(op->args[3])->z_mask
1563                 | arg_info(op->args[4])->z_mask;
1564     ctx->s_mask = arg_info(op->args[3])->s_mask
1565                 & arg_info(op->args[4])->s_mask;
1566 
1567     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1568         uint64_t tv = arg_info(op->args[3])->val;
1569         uint64_t fv = arg_info(op->args[4])->val;
1570         TCGOpcode opc, negopc = 0;
1571 
1572         switch (ctx->type) {
1573         case TCG_TYPE_I32:
1574             opc = INDEX_op_setcond_i32;
1575             if (TCG_TARGET_HAS_negsetcond_i32) {
1576                 negopc = INDEX_op_negsetcond_i32;
1577             }
1578             tv = (int32_t)tv;
1579             fv = (int32_t)fv;
1580             break;
1581         case TCG_TYPE_I64:
1582             opc = INDEX_op_setcond_i64;
1583             if (TCG_TARGET_HAS_negsetcond_i64) {
1584                 negopc = INDEX_op_negsetcond_i64;
1585             }
1586             break;
1587         default:
1588             g_assert_not_reached();
1589         }
1590 
1591         if (tv == 1 && fv == 0) {
1592             op->opc = opc;
1593             op->args[3] = cond;
1594         } else if (fv == 1 && tv == 0) {
1595             op->opc = opc;
1596             op->args[3] = tcg_invert_cond(cond);
1597         } else if (negopc) {
1598             if (tv == -1 && fv == 0) {
1599                 op->opc = negopc;
1600                 op->args[3] = cond;
1601             } else if (fv == -1 && tv == 0) {
1602                 op->opc = negopc;
1603                 op->args[3] = tcg_invert_cond(cond);
1604             }
1605         }
1606     }
1607     return false;
1608 }
1609 
1610 static bool fold_mul(OptContext *ctx, TCGOp *op)
1611 {
1612     if (fold_const2(ctx, op) ||
1613         fold_xi_to_i(ctx, op, 0) ||
1614         fold_xi_to_x(ctx, op, 1)) {
1615         return true;
1616     }
1617     return false;
1618 }
1619 
1620 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1621 {
1622     if (fold_const2_commutative(ctx, op) ||
1623         fold_xi_to_i(ctx, op, 0)) {
1624         return true;
1625     }
1626     return false;
1627 }
1628 
1629 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1630 {
1631     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1632 
1633     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1634         uint64_t a = arg_info(op->args[2])->val;
1635         uint64_t b = arg_info(op->args[3])->val;
1636         uint64_t h, l;
1637         TCGArg rl, rh;
1638         TCGOp *op2;
1639 
1640         switch (op->opc) {
1641         case INDEX_op_mulu2_i32:
1642             l = (uint64_t)(uint32_t)a * (uint32_t)b;
1643             h = (int32_t)(l >> 32);
1644             l = (int32_t)l;
1645             break;
1646         case INDEX_op_muls2_i32:
1647             l = (int64_t)(int32_t)a * (int32_t)b;
1648             h = l >> 32;
1649             l = (int32_t)l;
1650             break;
1651         case INDEX_op_mulu2_i64:
1652             mulu64(&l, &h, a, b);
1653             break;
1654         case INDEX_op_muls2_i64:
1655             muls64(&l, &h, a, b);
1656             break;
1657         default:
1658             g_assert_not_reached();
1659         }
1660 
1661         rl = op->args[0];
1662         rh = op->args[1];
1663 
1664         /* The proper opcode is supplied by tcg_opt_gen_mov. */
1665         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1666 
1667         tcg_opt_gen_movi(ctx, op, rl, l);
1668         tcg_opt_gen_movi(ctx, op2, rh, h);
1669         return true;
1670     }
1671     return false;
1672 }
1673 
1674 static bool fold_nand(OptContext *ctx, TCGOp *op)
1675 {
1676     if (fold_const2_commutative(ctx, op) ||
1677         fold_xi_to_not(ctx, op, -1)) {
1678         return true;
1679     }
1680 
1681     ctx->s_mask = arg_info(op->args[1])->s_mask
1682                 & arg_info(op->args[2])->s_mask;
1683     return false;
1684 }
1685 
1686 static bool fold_neg(OptContext *ctx, TCGOp *op)
1687 {
1688     uint64_t z_mask;
1689 
1690     if (fold_const1(ctx, op)) {
1691         return true;
1692     }
1693 
1694     /* Set to 1 all bits to the left of the rightmost.  */
1695     z_mask = arg_info(op->args[1])->z_mask;
1696     ctx->z_mask = -(z_mask & -z_mask);
1697 
1698     /*
1699      * Because of fold_sub_to_neg, we want to always return true,
1700      * via finish_folding.
1701      */
1702     finish_folding(ctx, op);
1703     return true;
1704 }
1705 
1706 static bool fold_nor(OptContext *ctx, TCGOp *op)
1707 {
1708     if (fold_const2_commutative(ctx, op) ||
1709         fold_xi_to_not(ctx, op, 0)) {
1710         return true;
1711     }
1712 
1713     ctx->s_mask = arg_info(op->args[1])->s_mask
1714                 & arg_info(op->args[2])->s_mask;
1715     return false;
1716 }
1717 
1718 static bool fold_not(OptContext *ctx, TCGOp *op)
1719 {
1720     if (fold_const1(ctx, op)) {
1721         return true;
1722     }
1723 
1724     ctx->s_mask = arg_info(op->args[1])->s_mask;
1725 
1726     /* Because of fold_to_not, we want to always return true, via finish. */
1727     finish_folding(ctx, op);
1728     return true;
1729 }
1730 
1731 static bool fold_or(OptContext *ctx, TCGOp *op)
1732 {
1733     if (fold_const2_commutative(ctx, op) ||
1734         fold_xi_to_x(ctx, op, 0) ||
1735         fold_xx_to_x(ctx, op)) {
1736         return true;
1737     }
1738 
1739     ctx->z_mask = arg_info(op->args[1])->z_mask
1740                 | arg_info(op->args[2])->z_mask;
1741     ctx->s_mask = arg_info(op->args[1])->s_mask
1742                 & arg_info(op->args[2])->s_mask;
1743     return fold_masks(ctx, op);
1744 }
1745 
1746 static bool fold_orc(OptContext *ctx, TCGOp *op)
1747 {
1748     if (fold_const2(ctx, op) ||
1749         fold_xx_to_i(ctx, op, -1) ||
1750         fold_xi_to_x(ctx, op, -1) ||
1751         fold_ix_to_not(ctx, op, 0)) {
1752         return true;
1753     }
1754 
1755     ctx->s_mask = arg_info(op->args[1])->s_mask
1756                 & arg_info(op->args[2])->s_mask;
1757     return false;
1758 }
1759 
1760 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1761 {
1762     const TCGOpDef *def = &tcg_op_defs[op->opc];
1763     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1764     MemOp mop = get_memop(oi);
1765     int width = 8 * memop_size(mop);
1766 
1767     if (width < 64) {
1768         ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1769         if (!(mop & MO_SIGN)) {
1770             ctx->z_mask = MAKE_64BIT_MASK(0, width);
1771             ctx->s_mask <<= 1;
1772         }
1773     }
1774 
1775     /* Opcodes that touch guest memory stop the mb optimization.  */
1776     ctx->prev_mb = NULL;
1777     return false;
1778 }
1779 
1780 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1781 {
1782     /* Opcodes that touch guest memory stop the mb optimization.  */
1783     ctx->prev_mb = NULL;
1784     return false;
1785 }
1786 
1787 static bool fold_remainder(OptContext *ctx, TCGOp *op)
1788 {
1789     if (fold_const2(ctx, op) ||
1790         fold_xx_to_i(ctx, op, 0)) {
1791         return true;
1792     }
1793     return false;
1794 }
1795 
1796 static bool fold_setcond(OptContext *ctx, TCGOp *op)
1797 {
1798     TCGCond cond = op->args[3];
1799     int i;
1800 
1801     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1802         op->args[3] = cond = tcg_swap_cond(cond);
1803     }
1804 
1805     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1806     if (i >= 0) {
1807         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1808     }
1809 
1810     ctx->z_mask = 1;
1811     ctx->s_mask = smask_from_zmask(1);
1812     return false;
1813 }
1814 
1815 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
1816 {
1817     TCGCond cond = op->args[3];
1818     int i;
1819 
1820     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1821         op->args[3] = cond = tcg_swap_cond(cond);
1822     }
1823 
1824     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1825     if (i >= 0) {
1826         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
1827     }
1828 
1829     /* Value is {0,-1} so all bits are repetitions of the sign. */
1830     ctx->s_mask = -1;
1831     return false;
1832 }
1833 
1834 
1835 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1836 {
1837     TCGCond cond = op->args[5];
1838     int i, inv = 0;
1839 
1840     if (swap_commutative2(&op->args[1], &op->args[3])) {
1841         op->args[5] = cond = tcg_swap_cond(cond);
1842     }
1843 
1844     i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1845     if (i >= 0) {
1846         goto do_setcond_const;
1847     }
1848 
1849     switch (cond) {
1850     case TCG_COND_LT:
1851     case TCG_COND_GE:
1852         /*
1853          * Simplify LT/GE comparisons vs zero to a single compare
1854          * vs the high word of the input.
1855          */
1856         if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1857             arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1858             goto do_setcond_high;
1859         }
1860         break;
1861 
1862     case TCG_COND_NE:
1863         inv = 1;
1864         QEMU_FALLTHROUGH;
1865     case TCG_COND_EQ:
1866         /*
1867          * Simplify EQ/NE comparisons where one of the pairs
1868          * can be simplified.
1869          */
1870         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1871                                      op->args[3], cond);
1872         switch (i ^ inv) {
1873         case 0:
1874             goto do_setcond_const;
1875         case 1:
1876             goto do_setcond_high;
1877         }
1878 
1879         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1880                                      op->args[4], cond);
1881         switch (i ^ inv) {
1882         case 0:
1883             goto do_setcond_const;
1884         case 1:
1885             op->args[2] = op->args[3];
1886             op->args[3] = cond;
1887             op->opc = INDEX_op_setcond_i32;
1888             break;
1889         }
1890         break;
1891 
1892     default:
1893         break;
1894 
1895     do_setcond_high:
1896         op->args[1] = op->args[2];
1897         op->args[2] = op->args[4];
1898         op->args[3] = cond;
1899         op->opc = INDEX_op_setcond_i32;
1900         break;
1901     }
1902 
1903     ctx->z_mask = 1;
1904     ctx->s_mask = smask_from_zmask(1);
1905     return false;
1906 
1907  do_setcond_const:
1908     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1909 }
1910 
1911 static bool fold_sextract(OptContext *ctx, TCGOp *op)
1912 {
1913     uint64_t z_mask, s_mask, s_mask_old;
1914     int pos = op->args[2];
1915     int len = op->args[3];
1916 
1917     if (arg_is_const(op->args[1])) {
1918         uint64_t t;
1919 
1920         t = arg_info(op->args[1])->val;
1921         t = sextract64(t, pos, len);
1922         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1923     }
1924 
1925     z_mask = arg_info(op->args[1])->z_mask;
1926     z_mask = sextract64(z_mask, pos, len);
1927     ctx->z_mask = z_mask;
1928 
1929     s_mask_old = arg_info(op->args[1])->s_mask;
1930     s_mask = sextract64(s_mask_old, pos, len);
1931     s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1932     ctx->s_mask = s_mask;
1933 
1934     if (pos == 0) {
1935         ctx->a_mask = s_mask & ~s_mask_old;
1936     }
1937 
1938     return fold_masks(ctx, op);
1939 }
1940 
1941 static bool fold_shift(OptContext *ctx, TCGOp *op)
1942 {
1943     uint64_t s_mask, z_mask, sign;
1944 
1945     if (fold_const2(ctx, op) ||
1946         fold_ix_to_i(ctx, op, 0) ||
1947         fold_xi_to_x(ctx, op, 0)) {
1948         return true;
1949     }
1950 
1951     s_mask = arg_info(op->args[1])->s_mask;
1952     z_mask = arg_info(op->args[1])->z_mask;
1953 
1954     if (arg_is_const(op->args[2])) {
1955         int sh = arg_info(op->args[2])->val;
1956 
1957         ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1958 
1959         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1960         ctx->s_mask = smask_from_smask(s_mask);
1961 
1962         return fold_masks(ctx, op);
1963     }
1964 
1965     switch (op->opc) {
1966     CASE_OP_32_64(sar):
1967         /*
1968          * Arithmetic right shift will not reduce the number of
1969          * input sign repetitions.
1970          */
1971         ctx->s_mask = s_mask;
1972         break;
1973     CASE_OP_32_64(shr):
1974         /*
1975          * If the sign bit is known zero, then logical right shift
1976          * will not reduced the number of input sign repetitions.
1977          */
1978         sign = (s_mask & -s_mask) >> 1;
1979         if (!(z_mask & sign)) {
1980             ctx->s_mask = s_mask;
1981         }
1982         break;
1983     default:
1984         break;
1985     }
1986 
1987     return false;
1988 }
1989 
1990 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1991 {
1992     TCGOpcode neg_op;
1993     bool have_neg;
1994 
1995     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1996         return false;
1997     }
1998 
1999     switch (ctx->type) {
2000     case TCG_TYPE_I32:
2001         neg_op = INDEX_op_neg_i32;
2002         have_neg = TCG_TARGET_HAS_neg_i32;
2003         break;
2004     case TCG_TYPE_I64:
2005         neg_op = INDEX_op_neg_i64;
2006         have_neg = TCG_TARGET_HAS_neg_i64;
2007         break;
2008     case TCG_TYPE_V64:
2009     case TCG_TYPE_V128:
2010     case TCG_TYPE_V256:
2011         neg_op = INDEX_op_neg_vec;
2012         have_neg = (TCG_TARGET_HAS_neg_vec &&
2013                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2014         break;
2015     default:
2016         g_assert_not_reached();
2017     }
2018     if (have_neg) {
2019         op->opc = neg_op;
2020         op->args[1] = op->args[2];
2021         return fold_neg(ctx, op);
2022     }
2023     return false;
2024 }
2025 
2026 /* We cannot as yet do_constant_folding with vectors. */
2027 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2028 {
2029     if (fold_xx_to_i(ctx, op, 0) ||
2030         fold_xi_to_x(ctx, op, 0) ||
2031         fold_sub_to_neg(ctx, op)) {
2032         return true;
2033     }
2034     return false;
2035 }
2036 
2037 static bool fold_sub(OptContext *ctx, TCGOp *op)
2038 {
2039     return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
2040 }
2041 
2042 static bool fold_sub2(OptContext *ctx, TCGOp *op)
2043 {
2044     return fold_addsub2(ctx, op, false);
2045 }
2046 
2047 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2048 {
2049     /* We can't do any folding with a load, but we can record bits. */
2050     switch (op->opc) {
2051     CASE_OP_32_64(ld8s):
2052         ctx->s_mask = MAKE_64BIT_MASK(8, 56);
2053         break;
2054     CASE_OP_32_64(ld8u):
2055         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
2056         ctx->s_mask = MAKE_64BIT_MASK(9, 55);
2057         break;
2058     CASE_OP_32_64(ld16s):
2059         ctx->s_mask = MAKE_64BIT_MASK(16, 48);
2060         break;
2061     CASE_OP_32_64(ld16u):
2062         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
2063         ctx->s_mask = MAKE_64BIT_MASK(17, 47);
2064         break;
2065     case INDEX_op_ld32s_i64:
2066         ctx->s_mask = MAKE_64BIT_MASK(32, 32);
2067         break;
2068     case INDEX_op_ld32u_i64:
2069         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
2070         ctx->s_mask = MAKE_64BIT_MASK(33, 31);
2071         break;
2072     default:
2073         g_assert_not_reached();
2074     }
2075     return false;
2076 }
2077 
2078 static bool fold_xor(OptContext *ctx, TCGOp *op)
2079 {
2080     if (fold_const2_commutative(ctx, op) ||
2081         fold_xx_to_i(ctx, op, 0) ||
2082         fold_xi_to_x(ctx, op, 0) ||
2083         fold_xi_to_not(ctx, op, -1)) {
2084         return true;
2085     }
2086 
2087     ctx->z_mask = arg_info(op->args[1])->z_mask
2088                 | arg_info(op->args[2])->z_mask;
2089     ctx->s_mask = arg_info(op->args[1])->s_mask
2090                 & arg_info(op->args[2])->s_mask;
2091     return fold_masks(ctx, op);
2092 }
2093 
2094 /* Propagate constants and copies, fold constant expressions. */
2095 void tcg_optimize(TCGContext *s)
2096 {
2097     int nb_temps, i;
2098     TCGOp *op, *op_next;
2099     OptContext ctx = { .tcg = s };
2100 
2101     /* Array VALS has an element for each temp.
2102        If this temp holds a constant then its value is kept in VALS' element.
2103        If this temp is a copy of other ones then the other copies are
2104        available through the doubly linked circular list. */
2105 
2106     nb_temps = s->nb_temps;
2107     for (i = 0; i < nb_temps; ++i) {
2108         s->temps[i].state_ptr = NULL;
2109     }
2110 
2111     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2112         TCGOpcode opc = op->opc;
2113         const TCGOpDef *def;
2114         bool done = false;
2115 
2116         /* Calls are special. */
2117         if (opc == INDEX_op_call) {
2118             fold_call(&ctx, op);
2119             continue;
2120         }
2121 
2122         def = &tcg_op_defs[opc];
2123         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2124         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2125 
2126         /* Pre-compute the type of the operation. */
2127         if (def->flags & TCG_OPF_VECTOR) {
2128             ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2129         } else if (def->flags & TCG_OPF_64BIT) {
2130             ctx.type = TCG_TYPE_I64;
2131         } else {
2132             ctx.type = TCG_TYPE_I32;
2133         }
2134 
2135         /* Assume all bits affected, no bits known zero, no sign reps. */
2136         ctx.a_mask = -1;
2137         ctx.z_mask = -1;
2138         ctx.s_mask = 0;
2139 
2140         /*
2141          * Process each opcode.
2142          * Sorted alphabetically by opcode as much as possible.
2143          */
2144         switch (opc) {
2145         CASE_OP_32_64(add):
2146             done = fold_add(&ctx, op);
2147             break;
2148         case INDEX_op_add_vec:
2149             done = fold_add_vec(&ctx, op);
2150             break;
2151         CASE_OP_32_64(add2):
2152             done = fold_add2(&ctx, op);
2153             break;
2154         CASE_OP_32_64_VEC(and):
2155             done = fold_and(&ctx, op);
2156             break;
2157         CASE_OP_32_64_VEC(andc):
2158             done = fold_andc(&ctx, op);
2159             break;
2160         CASE_OP_32_64(brcond):
2161             done = fold_brcond(&ctx, op);
2162             break;
2163         case INDEX_op_brcond2_i32:
2164             done = fold_brcond2(&ctx, op);
2165             break;
2166         CASE_OP_32_64(bswap16):
2167         CASE_OP_32_64(bswap32):
2168         case INDEX_op_bswap64_i64:
2169             done = fold_bswap(&ctx, op);
2170             break;
2171         CASE_OP_32_64(clz):
2172         CASE_OP_32_64(ctz):
2173             done = fold_count_zeros(&ctx, op);
2174             break;
2175         CASE_OP_32_64(ctpop):
2176             done = fold_ctpop(&ctx, op);
2177             break;
2178         CASE_OP_32_64(deposit):
2179             done = fold_deposit(&ctx, op);
2180             break;
2181         CASE_OP_32_64(div):
2182         CASE_OP_32_64(divu):
2183             done = fold_divide(&ctx, op);
2184             break;
2185         case INDEX_op_dup_vec:
2186             done = fold_dup(&ctx, op);
2187             break;
2188         case INDEX_op_dup2_vec:
2189             done = fold_dup2(&ctx, op);
2190             break;
2191         CASE_OP_32_64_VEC(eqv):
2192             done = fold_eqv(&ctx, op);
2193             break;
2194         CASE_OP_32_64(extract):
2195             done = fold_extract(&ctx, op);
2196             break;
2197         CASE_OP_32_64(extract2):
2198             done = fold_extract2(&ctx, op);
2199             break;
2200         CASE_OP_32_64(ext8s):
2201         CASE_OP_32_64(ext16s):
2202         case INDEX_op_ext32s_i64:
2203         case INDEX_op_ext_i32_i64:
2204             done = fold_exts(&ctx, op);
2205             break;
2206         CASE_OP_32_64(ext8u):
2207         CASE_OP_32_64(ext16u):
2208         case INDEX_op_ext32u_i64:
2209         case INDEX_op_extu_i32_i64:
2210         case INDEX_op_extrl_i64_i32:
2211         case INDEX_op_extrh_i64_i32:
2212             done = fold_extu(&ctx, op);
2213             break;
2214         CASE_OP_32_64(ld8s):
2215         CASE_OP_32_64(ld8u):
2216         CASE_OP_32_64(ld16s):
2217         CASE_OP_32_64(ld16u):
2218         case INDEX_op_ld32s_i64:
2219         case INDEX_op_ld32u_i64:
2220             done = fold_tcg_ld(&ctx, op);
2221             break;
2222         case INDEX_op_mb:
2223             done = fold_mb(&ctx, op);
2224             break;
2225         CASE_OP_32_64_VEC(mov):
2226             done = fold_mov(&ctx, op);
2227             break;
2228         CASE_OP_32_64(movcond):
2229             done = fold_movcond(&ctx, op);
2230             break;
2231         CASE_OP_32_64(mul):
2232             done = fold_mul(&ctx, op);
2233             break;
2234         CASE_OP_32_64(mulsh):
2235         CASE_OP_32_64(muluh):
2236             done = fold_mul_highpart(&ctx, op);
2237             break;
2238         CASE_OP_32_64(muls2):
2239         CASE_OP_32_64(mulu2):
2240             done = fold_multiply2(&ctx, op);
2241             break;
2242         CASE_OP_32_64_VEC(nand):
2243             done = fold_nand(&ctx, op);
2244             break;
2245         CASE_OP_32_64(neg):
2246             done = fold_neg(&ctx, op);
2247             break;
2248         CASE_OP_32_64_VEC(nor):
2249             done = fold_nor(&ctx, op);
2250             break;
2251         CASE_OP_32_64_VEC(not):
2252             done = fold_not(&ctx, op);
2253             break;
2254         CASE_OP_32_64_VEC(or):
2255             done = fold_or(&ctx, op);
2256             break;
2257         CASE_OP_32_64_VEC(orc):
2258             done = fold_orc(&ctx, op);
2259             break;
2260         case INDEX_op_qemu_ld_a32_i32:
2261         case INDEX_op_qemu_ld_a64_i32:
2262         case INDEX_op_qemu_ld_a32_i64:
2263         case INDEX_op_qemu_ld_a64_i64:
2264         case INDEX_op_qemu_ld_a32_i128:
2265         case INDEX_op_qemu_ld_a64_i128:
2266             done = fold_qemu_ld(&ctx, op);
2267             break;
2268         case INDEX_op_qemu_st8_a32_i32:
2269         case INDEX_op_qemu_st8_a64_i32:
2270         case INDEX_op_qemu_st_a32_i32:
2271         case INDEX_op_qemu_st_a64_i32:
2272         case INDEX_op_qemu_st_a32_i64:
2273         case INDEX_op_qemu_st_a64_i64:
2274         case INDEX_op_qemu_st_a32_i128:
2275         case INDEX_op_qemu_st_a64_i128:
2276             done = fold_qemu_st(&ctx, op);
2277             break;
2278         CASE_OP_32_64(rem):
2279         CASE_OP_32_64(remu):
2280             done = fold_remainder(&ctx, op);
2281             break;
2282         CASE_OP_32_64(rotl):
2283         CASE_OP_32_64(rotr):
2284         CASE_OP_32_64(sar):
2285         CASE_OP_32_64(shl):
2286         CASE_OP_32_64(shr):
2287             done = fold_shift(&ctx, op);
2288             break;
2289         CASE_OP_32_64(setcond):
2290             done = fold_setcond(&ctx, op);
2291             break;
2292         CASE_OP_32_64(negsetcond):
2293             done = fold_negsetcond(&ctx, op);
2294             break;
2295         case INDEX_op_setcond2_i32:
2296             done = fold_setcond2(&ctx, op);
2297             break;
2298         CASE_OP_32_64(sextract):
2299             done = fold_sextract(&ctx, op);
2300             break;
2301         CASE_OP_32_64(sub):
2302             done = fold_sub(&ctx, op);
2303             break;
2304         case INDEX_op_sub_vec:
2305             done = fold_sub_vec(&ctx, op);
2306             break;
2307         CASE_OP_32_64(sub2):
2308             done = fold_sub2(&ctx, op);
2309             break;
2310         CASE_OP_32_64_VEC(xor):
2311             done = fold_xor(&ctx, op);
2312             break;
2313         default:
2314             break;
2315         }
2316 
2317         if (!done) {
2318             finish_folding(&ctx, op);
2319         }
2320     }
2321 }
2322