xref: /openbmc/qemu/tcg/optimize.c (revision adda0ad56bd28d5a809051cbd190fda5798ec4e4)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32 
33 
34 typedef struct MemCopyInfo {
35     IntervalTreeNode itree;
36     QSIMPLEQ_ENTRY (MemCopyInfo) next;
37     TCGTemp *ts;
38     TCGType type;
39 } MemCopyInfo;
40 
41 typedef struct TempOptInfo {
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
46     uint64_t o_mask;  /* mask bit is 1 if and only if value bit is 1 */
47     uint64_t s_mask;  /* mask bit is 1 if value bit matches msb */
48 } TempOptInfo;
49 
50 typedef struct OptContext {
51     TCGContext *tcg;
52     TCGOp *prev_mb;
53     TCGTempSet temps_used;
54 
55     IntervalTreeRoot mem_copy;
56     QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
57 
58     /* In flight values from optimization. */
59     TCGType type;
60     int carry_state;  /* -1 = non-constant, {0,1} = constant carry-in */
61 } OptContext;
62 
63 static inline TempOptInfo *ts_info(TCGTemp *ts)
64 {
65     return ts->state_ptr;
66 }
67 
68 static inline TempOptInfo *arg_info(TCGArg arg)
69 {
70     return ts_info(arg_temp(arg));
71 }
72 
73 static inline bool ti_is_const(TempOptInfo *ti)
74 {
75     /* If all bits that are not known zeros are known ones, it's constant. */
76     return ti->z_mask == ti->o_mask;
77 }
78 
79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81     /* If constant, both z_mask and o_mask contain the value. */
82     return ti->z_mask;
83 }
84 
85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
86 {
87     return ti_is_const(ti) && ti_const_val(ti) == val;
88 }
89 
90 static inline bool ts_is_const(TCGTemp *ts)
91 {
92     return ti_is_const(ts_info(ts));
93 }
94 
95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
96 {
97     return ti_is_const_val(ts_info(ts), val);
98 }
99 
100 static inline bool arg_is_const(TCGArg arg)
101 {
102     return ts_is_const(arg_temp(arg));
103 }
104 
105 static inline uint64_t arg_const_val(TCGArg arg)
106 {
107     return ti_const_val(arg_info(arg));
108 }
109 
110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
111 {
112     return ts_is_const_val(arg_temp(arg), val);
113 }
114 
115 static inline bool ts_is_copy(TCGTemp *ts)
116 {
117     return ts_info(ts)->next_copy != ts;
118 }
119 
120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
121 {
122     return a->kind < b->kind ? b : a;
123 }
124 
125 /* Initialize and activate a temporary.  */
126 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
127 {
128     size_t idx = temp_idx(ts);
129     TempOptInfo *ti;
130 
131     if (test_bit(idx, ctx->temps_used.l)) {
132         return;
133     }
134     set_bit(idx, ctx->temps_used.l);
135 
136     ti = ts->state_ptr;
137     if (ti == NULL) {
138         ti = tcg_malloc(sizeof(TempOptInfo));
139         ts->state_ptr = ti;
140     }
141 
142     ti->next_copy = ts;
143     ti->prev_copy = ts;
144     QSIMPLEQ_INIT(&ti->mem_copy);
145     if (ts->kind == TEMP_CONST) {
146         ti->z_mask = ts->val;
147         ti->o_mask = ts->val;
148         ti->s_mask = INT64_MIN >> clrsb64(ts->val);
149     } else {
150         ti->z_mask = -1;
151         ti->o_mask = 0;
152         ti->s_mask = 0;
153     }
154 }
155 
156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
157 {
158     IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
159     return r ? container_of(r, MemCopyInfo, itree) : NULL;
160 }
161 
162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
163 {
164     IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
165     return r ? container_of(r, MemCopyInfo, itree) : NULL;
166 }
167 
168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
169 {
170     TCGTemp *ts = mc->ts;
171     TempOptInfo *ti = ts_info(ts);
172 
173     interval_tree_remove(&mc->itree, &ctx->mem_copy);
174     QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
175     QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
176 }
177 
178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
179 {
180     while (true) {
181         MemCopyInfo *mc = mem_copy_first(ctx, s, l);
182         if (!mc) {
183             break;
184         }
185         remove_mem_copy(ctx, mc);
186     }
187 }
188 
189 static void remove_mem_copy_all(OptContext *ctx)
190 {
191     remove_mem_copy_in(ctx, 0, -1);
192     tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
193 }
194 
195 static TCGTemp *find_better_copy(TCGTemp *ts)
196 {
197     TCGTemp *i, *ret;
198 
199     /* If this is already readonly, we can't do better. */
200     if (temp_readonly(ts)) {
201         return ts;
202     }
203 
204     ret = ts;
205     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
206         ret = cmp_better_copy(ret, i);
207     }
208     return ret;
209 }
210 
211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
212 {
213     TempOptInfo *si = ts_info(src_ts);
214     TempOptInfo *di = ts_info(dst_ts);
215     MemCopyInfo *mc;
216 
217     QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
218         tcg_debug_assert(mc->ts == src_ts);
219         mc->ts = dst_ts;
220     }
221     QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
222 }
223 
224 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
225 static void reset_ts(OptContext *ctx, TCGTemp *ts)
226 {
227     TempOptInfo *ti = ts_info(ts);
228     TCGTemp *pts = ti->prev_copy;
229     TCGTemp *nts = ti->next_copy;
230     TempOptInfo *pi = ts_info(pts);
231     TempOptInfo *ni = ts_info(nts);
232 
233     ni->prev_copy = ti->prev_copy;
234     pi->next_copy = ti->next_copy;
235     ti->next_copy = ts;
236     ti->prev_copy = ts;
237     ti->z_mask = -1;
238     ti->o_mask = 0;
239     ti->s_mask = 0;
240 
241     if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
242         if (ts == nts) {
243             /* Last temp copy being removed, the mem copies die. */
244             MemCopyInfo *mc;
245             QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
246                 interval_tree_remove(&mc->itree, &ctx->mem_copy);
247             }
248             QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
249         } else {
250             move_mem_copies(find_better_copy(nts), ts);
251         }
252     }
253 }
254 
255 static void reset_temp(OptContext *ctx, TCGArg arg)
256 {
257     reset_ts(ctx, arg_temp(arg));
258 }
259 
260 static void record_mem_copy(OptContext *ctx, TCGType type,
261                             TCGTemp *ts, intptr_t start, intptr_t last)
262 {
263     MemCopyInfo *mc;
264     TempOptInfo *ti;
265 
266     mc = QSIMPLEQ_FIRST(&ctx->mem_free);
267     if (mc) {
268         QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
269     } else {
270         mc = tcg_malloc(sizeof(*mc));
271     }
272 
273     memset(mc, 0, sizeof(*mc));
274     mc->itree.start = start;
275     mc->itree.last = last;
276     mc->type = type;
277     interval_tree_insert(&mc->itree, &ctx->mem_copy);
278 
279     ts = find_better_copy(ts);
280     ti = ts_info(ts);
281     mc->ts = ts;
282     QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
283 }
284 
285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
286 {
287     TCGTemp *i;
288 
289     if (ts1 == ts2) {
290         return true;
291     }
292 
293     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
294         return false;
295     }
296 
297     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
298         if (i == ts2) {
299             return true;
300         }
301     }
302 
303     return false;
304 }
305 
306 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
307 {
308     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
309 }
310 
311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
312 {
313     MemCopyInfo *mc;
314 
315     for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
316         if (mc->itree.start == s && mc->type == type) {
317             return find_better_copy(mc->ts);
318         }
319     }
320     return NULL;
321 }
322 
323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
324 {
325     TCGType type = ctx->type;
326     TCGTemp *ts;
327 
328     if (type == TCG_TYPE_I32) {
329         val = (int32_t)val;
330     }
331 
332     ts = tcg_constant_internal(type, val);
333     init_ts_info(ctx, ts);
334 
335     return temp_arg(ts);
336 }
337 
338 static TCGArg arg_new_temp(OptContext *ctx)
339 {
340     TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
341     init_ts_info(ctx, ts);
342     return temp_arg(ts);
343 }
344 
345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
346                                TCGOpcode opc, unsigned narg)
347 {
348     return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
349 }
350 
351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
352                                 TCGOpcode opc, unsigned narg)
353 {
354     return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
355 }
356 
357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
358 {
359     TCGTemp *dst_ts = arg_temp(dst);
360     TCGTemp *src_ts = arg_temp(src);
361     TempOptInfo *di;
362     TempOptInfo *si;
363     TCGOpcode new_op;
364 
365     if (ts_are_copies(dst_ts, src_ts)) {
366         tcg_op_remove(ctx->tcg, op);
367         return true;
368     }
369 
370     reset_ts(ctx, dst_ts);
371     di = ts_info(dst_ts);
372     si = ts_info(src_ts);
373 
374     switch (ctx->type) {
375     case TCG_TYPE_I32:
376     case TCG_TYPE_I64:
377         new_op = INDEX_op_mov;
378         break;
379     case TCG_TYPE_V64:
380     case TCG_TYPE_V128:
381     case TCG_TYPE_V256:
382         /* TCGOP_TYPE and TCGOP_VECE remain unchanged.  */
383         new_op = INDEX_op_mov_vec;
384         break;
385     default:
386         g_assert_not_reached();
387     }
388     op->opc = new_op;
389     op->args[0] = dst;
390     op->args[1] = src;
391 
392     di->z_mask = si->z_mask;
393     di->o_mask = si->o_mask;
394     di->s_mask = si->s_mask;
395 
396     if (src_ts->type == dst_ts->type) {
397         TempOptInfo *ni = ts_info(si->next_copy);
398 
399         di->next_copy = si->next_copy;
400         di->prev_copy = src_ts;
401         ni->prev_copy = dst_ts;
402         si->next_copy = dst_ts;
403 
404         if (!QSIMPLEQ_EMPTY(&si->mem_copy)
405             && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
406             move_mem_copies(dst_ts, src_ts);
407         }
408     } else if (dst_ts->type == TCG_TYPE_I32) {
409         di->z_mask = (int32_t)di->z_mask;
410         di->o_mask = (int32_t)di->o_mask;
411         di->s_mask |= INT32_MIN;
412     } else {
413         di->z_mask |= MAKE_64BIT_MASK(32, 32);
414         di->o_mask = (uint32_t)di->o_mask;
415         di->s_mask = INT64_MIN;
416     }
417     return true;
418 }
419 
420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
421                              TCGArg dst, uint64_t val)
422 {
423     /* Convert movi to mov with constant temp. */
424     return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
425 }
426 
427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
428                                       uint64_t x, uint64_t y)
429 {
430     uint64_t l64, h64;
431 
432     switch (op) {
433     case INDEX_op_add:
434         return x + y;
435 
436     case INDEX_op_sub:
437         return x - y;
438 
439     case INDEX_op_mul:
440         return x * y;
441 
442     case INDEX_op_and:
443     case INDEX_op_and_vec:
444         return x & y;
445 
446     case INDEX_op_or:
447     case INDEX_op_or_vec:
448         return x | y;
449 
450     case INDEX_op_xor:
451     case INDEX_op_xor_vec:
452         return x ^ y;
453 
454     case INDEX_op_shl:
455         if (type == TCG_TYPE_I32) {
456             return (uint32_t)x << (y & 31);
457         }
458         return (uint64_t)x << (y & 63);
459 
460     case INDEX_op_shr:
461         if (type == TCG_TYPE_I32) {
462             return (uint32_t)x >> (y & 31);
463         }
464         return (uint64_t)x >> (y & 63);
465 
466     case INDEX_op_sar:
467         if (type == TCG_TYPE_I32) {
468             return (int32_t)x >> (y & 31);
469         }
470         return (int64_t)x >> (y & 63);
471 
472     case INDEX_op_rotr:
473         if (type == TCG_TYPE_I32) {
474             return ror32(x, y & 31);
475         }
476         return ror64(x, y & 63);
477 
478     case INDEX_op_rotl:
479         if (type == TCG_TYPE_I32) {
480             return rol32(x, y & 31);
481         }
482         return rol64(x, y & 63);
483 
484     case INDEX_op_not:
485     case INDEX_op_not_vec:
486         return ~x;
487 
488     case INDEX_op_neg:
489         return -x;
490 
491     case INDEX_op_andc:
492     case INDEX_op_andc_vec:
493         return x & ~y;
494 
495     case INDEX_op_orc:
496     case INDEX_op_orc_vec:
497         return x | ~y;
498 
499     case INDEX_op_eqv:
500     case INDEX_op_eqv_vec:
501         return ~(x ^ y);
502 
503     case INDEX_op_nand:
504     case INDEX_op_nand_vec:
505         return ~(x & y);
506 
507     case INDEX_op_nor:
508     case INDEX_op_nor_vec:
509         return ~(x | y);
510 
511     case INDEX_op_clz:
512         if (type == TCG_TYPE_I32) {
513             return (uint32_t)x ? clz32(x) : y;
514         }
515         return x ? clz64(x) : y;
516 
517     case INDEX_op_ctz:
518         if (type == TCG_TYPE_I32) {
519             return (uint32_t)x ? ctz32(x) : y;
520         }
521         return x ? ctz64(x) : y;
522 
523     case INDEX_op_ctpop:
524         return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
525 
526     case INDEX_op_bswap16:
527         x = bswap16(x);
528         return y & TCG_BSWAP_OS ? (int16_t)x : x;
529 
530     case INDEX_op_bswap32:
531         x = bswap32(x);
532         return y & TCG_BSWAP_OS ? (int32_t)x : x;
533 
534     case INDEX_op_bswap64:
535         return bswap64(x);
536 
537     case INDEX_op_ext_i32_i64:
538         return (int32_t)x;
539 
540     case INDEX_op_extu_i32_i64:
541     case INDEX_op_extrl_i64_i32:
542         return (uint32_t)x;
543 
544     case INDEX_op_extrh_i64_i32:
545         return (uint64_t)x >> 32;
546 
547     case INDEX_op_muluh:
548         if (type == TCG_TYPE_I32) {
549             return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550         }
551         mulu64(&l64, &h64, x, y);
552         return h64;
553 
554     case INDEX_op_mulsh:
555         if (type == TCG_TYPE_I32) {
556             return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
557         }
558         muls64(&l64, &h64, x, y);
559         return h64;
560 
561     case INDEX_op_divs:
562         /* Avoid crashing on divide by zero, otherwise undefined.  */
563         if (type == TCG_TYPE_I32) {
564             return (int32_t)x / ((int32_t)y ? : 1);
565         }
566         return (int64_t)x / ((int64_t)y ? : 1);
567 
568     case INDEX_op_divu:
569         if (type == TCG_TYPE_I32) {
570             return (uint32_t)x / ((uint32_t)y ? : 1);
571         }
572         return (uint64_t)x / ((uint64_t)y ? : 1);
573 
574     case INDEX_op_rems:
575         if (type == TCG_TYPE_I32) {
576             return (int32_t)x % ((int32_t)y ? : 1);
577         }
578         return (int64_t)x % ((int64_t)y ? : 1);
579 
580     case INDEX_op_remu:
581         if (type == TCG_TYPE_I32) {
582             return (uint32_t)x % ((uint32_t)y ? : 1);
583         }
584         return (uint64_t)x % ((uint64_t)y ? : 1);
585 
586     default:
587         g_assert_not_reached();
588     }
589 }
590 
591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
592                                     uint64_t x, uint64_t y)
593 {
594     uint64_t res = do_constant_folding_2(op, type, x, y);
595     if (type == TCG_TYPE_I32) {
596         res = (int32_t)res;
597     }
598     return res;
599 }
600 
601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
602 {
603     switch (c) {
604     case TCG_COND_EQ:
605         return x == y;
606     case TCG_COND_NE:
607         return x != y;
608     case TCG_COND_LT:
609         return (int32_t)x < (int32_t)y;
610     case TCG_COND_GE:
611         return (int32_t)x >= (int32_t)y;
612     case TCG_COND_LE:
613         return (int32_t)x <= (int32_t)y;
614     case TCG_COND_GT:
615         return (int32_t)x > (int32_t)y;
616     case TCG_COND_LTU:
617         return x < y;
618     case TCG_COND_GEU:
619         return x >= y;
620     case TCG_COND_LEU:
621         return x <= y;
622     case TCG_COND_GTU:
623         return x > y;
624     case TCG_COND_TSTEQ:
625         return (x & y) == 0;
626     case TCG_COND_TSTNE:
627         return (x & y) != 0;
628     case TCG_COND_ALWAYS:
629     case TCG_COND_NEVER:
630         break;
631     }
632     g_assert_not_reached();
633 }
634 
635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
636 {
637     switch (c) {
638     case TCG_COND_EQ:
639         return x == y;
640     case TCG_COND_NE:
641         return x != y;
642     case TCG_COND_LT:
643         return (int64_t)x < (int64_t)y;
644     case TCG_COND_GE:
645         return (int64_t)x >= (int64_t)y;
646     case TCG_COND_LE:
647         return (int64_t)x <= (int64_t)y;
648     case TCG_COND_GT:
649         return (int64_t)x > (int64_t)y;
650     case TCG_COND_LTU:
651         return x < y;
652     case TCG_COND_GEU:
653         return x >= y;
654     case TCG_COND_LEU:
655         return x <= y;
656     case TCG_COND_GTU:
657         return x > y;
658     case TCG_COND_TSTEQ:
659         return (x & y) == 0;
660     case TCG_COND_TSTNE:
661         return (x & y) != 0;
662     case TCG_COND_ALWAYS:
663     case TCG_COND_NEVER:
664         break;
665     }
666     g_assert_not_reached();
667 }
668 
669 static int do_constant_folding_cond_eq(TCGCond c)
670 {
671     switch (c) {
672     case TCG_COND_GT:
673     case TCG_COND_LTU:
674     case TCG_COND_LT:
675     case TCG_COND_GTU:
676     case TCG_COND_NE:
677         return 0;
678     case TCG_COND_GE:
679     case TCG_COND_GEU:
680     case TCG_COND_LE:
681     case TCG_COND_LEU:
682     case TCG_COND_EQ:
683         return 1;
684     case TCG_COND_TSTEQ:
685     case TCG_COND_TSTNE:
686         return -1;
687     case TCG_COND_ALWAYS:
688     case TCG_COND_NEVER:
689         break;
690     }
691     g_assert_not_reached();
692 }
693 
694 /*
695  * Return -1 if the condition can't be simplified,
696  * and the result of the condition (0 or 1) if it can.
697  */
698 static int do_constant_folding_cond(TCGType type, TCGArg x,
699                                     TCGArg y, TCGCond c)
700 {
701     if (arg_is_const(x) && arg_is_const(y)) {
702         uint64_t xv = arg_const_val(x);
703         uint64_t yv = arg_const_val(y);
704 
705         switch (type) {
706         case TCG_TYPE_I32:
707             return do_constant_folding_cond_32(xv, yv, c);
708         case TCG_TYPE_I64:
709             return do_constant_folding_cond_64(xv, yv, c);
710         default:
711             /* Only scalar comparisons are optimizable */
712             return -1;
713         }
714     } else if (args_are_copies(x, y)) {
715         return do_constant_folding_cond_eq(c);
716     } else if (arg_is_const_val(y, 0)) {
717         switch (c) {
718         case TCG_COND_LTU:
719         case TCG_COND_TSTNE:
720             return 0;
721         case TCG_COND_GEU:
722         case TCG_COND_TSTEQ:
723             return 1;
724         default:
725             return -1;
726         }
727     }
728     return -1;
729 }
730 
731 /**
732  * swap_commutative:
733  * @dest: TCGArg of the destination argument, or NO_DEST.
734  * @p1: first paired argument
735  * @p2: second paired argument
736  *
737  * If *@p1 is a constant and *@p2 is not, swap.
738  * If *@p2 matches @dest, swap.
739  * Return true if a swap was performed.
740  */
741 
742 #define NO_DEST  temp_arg(NULL)
743 
744 static int pref_commutative(TempOptInfo *ti)
745 {
746     /* Slight preference for non-zero constants second. */
747     return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
748 }
749 
750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
751 {
752     TCGArg a1 = *p1, a2 = *p2;
753     int sum = 0;
754     sum += pref_commutative(arg_info(a1));
755     sum -= pref_commutative(arg_info(a2));
756 
757     /* Prefer the constant in second argument, and then the form
758        op a, a, b, which is better handled on non-RISC hosts. */
759     if (sum > 0 || (sum == 0 && dest == a2)) {
760         *p1 = a2;
761         *p2 = a1;
762         return true;
763     }
764     return false;
765 }
766 
767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
768 {
769     int sum = 0;
770     sum += pref_commutative(arg_info(p1[0]));
771     sum += pref_commutative(arg_info(p1[1]));
772     sum -= pref_commutative(arg_info(p2[0]));
773     sum -= pref_commutative(arg_info(p2[1]));
774     if (sum > 0) {
775         TCGArg t;
776         t = p1[0], p1[0] = p2[0], p2[0] = t;
777         t = p1[1], p1[1] = p2[1], p2[1] = t;
778         return true;
779     }
780     return false;
781 }
782 
783 /*
784  * Return -1 if the condition can't be simplified,
785  * and the result of the condition (0 or 1) if it can.
786  */
787 static bool fold_and(OptContext *ctx, TCGOp *op);
788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
789                                      TCGArg *p1, TCGArg *p2, TCGArg *pcond)
790 {
791     TCGCond cond;
792     TempOptInfo *i1;
793     bool swap;
794     int r;
795 
796     swap = swap_commutative(dest, p1, p2);
797     cond = *pcond;
798     if (swap) {
799         *pcond = cond = tcg_swap_cond(cond);
800     }
801 
802     r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
803     if (r >= 0) {
804         return r;
805     }
806     if (!is_tst_cond(cond)) {
807         return -1;
808     }
809 
810     i1 = arg_info(*p1);
811 
812     /*
813      * TSTNE x,x -> NE x,0
814      * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
815      */
816     if (args_are_copies(*p1, *p2) ||
817         (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
818         *p2 = arg_new_constant(ctx, 0);
819         *pcond = tcg_tst_eqne_cond(cond);
820         return -1;
821     }
822 
823     /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
824     if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
825         *p2 = arg_new_constant(ctx, 0);
826         *pcond = tcg_tst_ltge_cond(cond);
827         return -1;
828     }
829 
830     /* Expand to AND with a temporary if no backend support. */
831     if (!TCG_TARGET_HAS_tst) {
832         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
833         TCGArg tmp = arg_new_temp(ctx);
834 
835         op2->args[0] = tmp;
836         op2->args[1] = *p1;
837         op2->args[2] = *p2;
838         fold_and(ctx, op2);
839 
840         *p1 = tmp;
841         *p2 = arg_new_constant(ctx, 0);
842         *pcond = tcg_tst_eqne_cond(cond);
843     }
844     return -1;
845 }
846 
847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
848 {
849     TCGArg al, ah, bl, bh;
850     TCGCond c;
851     bool swap;
852     int r;
853 
854     swap = swap_commutative2(args, args + 2);
855     c = args[4];
856     if (swap) {
857         args[4] = c = tcg_swap_cond(c);
858     }
859 
860     al = args[0];
861     ah = args[1];
862     bl = args[2];
863     bh = args[3];
864 
865     if (arg_is_const(bl) && arg_is_const(bh)) {
866         tcg_target_ulong blv = arg_const_val(bl);
867         tcg_target_ulong bhv = arg_const_val(bh);
868         uint64_t b = deposit64(blv, 32, 32, bhv);
869 
870         if (arg_is_const(al) && arg_is_const(ah)) {
871             tcg_target_ulong alv = arg_const_val(al);
872             tcg_target_ulong ahv = arg_const_val(ah);
873             uint64_t a = deposit64(alv, 32, 32, ahv);
874 
875             r = do_constant_folding_cond_64(a, b, c);
876             if (r >= 0) {
877                 return r;
878             }
879         }
880 
881         if (b == 0) {
882             switch (c) {
883             case TCG_COND_LTU:
884             case TCG_COND_TSTNE:
885                 return 0;
886             case TCG_COND_GEU:
887             case TCG_COND_TSTEQ:
888                 return 1;
889             default:
890                 break;
891             }
892         }
893 
894         /* TSTNE x,-1 -> NE x,0 */
895         if (b == -1 && is_tst_cond(c)) {
896             args[3] = args[2] = arg_new_constant(ctx, 0);
897             args[4] = tcg_tst_eqne_cond(c);
898             return -1;
899         }
900 
901         /* TSTNE x,sign -> LT x,0 */
902         if (b == INT64_MIN && is_tst_cond(c)) {
903             /* bl must be 0, so copy that to bh */
904             args[3] = bl;
905             args[4] = tcg_tst_ltge_cond(c);
906             return -1;
907         }
908     }
909 
910     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
911         r = do_constant_folding_cond_eq(c);
912         if (r >= 0) {
913             return r;
914         }
915 
916         /* TSTNE x,x -> NE x,0 */
917         if (is_tst_cond(c)) {
918             args[3] = args[2] = arg_new_constant(ctx, 0);
919             args[4] = tcg_tst_eqne_cond(c);
920             return -1;
921         }
922     }
923 
924     /* Expand to AND with a temporary if no backend support. */
925     if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
926         TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
927         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
928         TCGArg t1 = arg_new_temp(ctx);
929         TCGArg t2 = arg_new_temp(ctx);
930 
931         op1->args[0] = t1;
932         op1->args[1] = al;
933         op1->args[2] = bl;
934         fold_and(ctx, op1);
935 
936         op2->args[0] = t2;
937         op2->args[1] = ah;
938         op2->args[2] = bh;
939         fold_and(ctx, op1);
940 
941         args[0] = t1;
942         args[1] = t2;
943         args[3] = args[2] = arg_new_constant(ctx, 0);
944         args[4] = tcg_tst_eqne_cond(c);
945     }
946     return -1;
947 }
948 
949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
950 {
951     for (int i = 0; i < nb_args; i++) {
952         TCGTemp *ts = arg_temp(op->args[i]);
953         init_ts_info(ctx, ts);
954     }
955 }
956 
957 static void copy_propagate(OptContext *ctx, TCGOp *op,
958                            int nb_oargs, int nb_iargs)
959 {
960     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
961         TCGTemp *ts = arg_temp(op->args[i]);
962         if (ts_is_copy(ts)) {
963             op->args[i] = temp_arg(find_better_copy(ts));
964         }
965     }
966 }
967 
968 static void finish_bb(OptContext *ctx)
969 {
970     /* We only optimize memory barriers across basic blocks. */
971     ctx->prev_mb = NULL;
972 }
973 
974 static void finish_ebb(OptContext *ctx)
975 {
976     finish_bb(ctx);
977     /* We only optimize across extended basic blocks. */
978     memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
979     remove_mem_copy_all(ctx);
980 }
981 
982 static bool finish_folding(OptContext *ctx, TCGOp *op)
983 {
984     const TCGOpDef *def = &tcg_op_defs[op->opc];
985     int i, nb_oargs;
986 
987     nb_oargs = def->nb_oargs;
988     for (i = 0; i < nb_oargs; i++) {
989         TCGTemp *ts = arg_temp(op->args[i]);
990         reset_ts(ctx, ts);
991     }
992     return true;
993 }
994 
995 /*
996  * The fold_* functions return true when processing is complete,
997  * usually by folding the operation to a constant or to a copy,
998  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
999  * like collect information about the value produced, for use in
1000  * optimizing a subsequent operation.
1001  *
1002  * These first fold_* functions are all helpers, used by other
1003  * folders for more specific operations.
1004  */
1005 
1006 static bool fold_const1(OptContext *ctx, TCGOp *op)
1007 {
1008     if (arg_is_const(op->args[1])) {
1009         uint64_t t = arg_const_val(op->args[1]);
1010 
1011         t = do_constant_folding(op->opc, ctx->type, t, 0);
1012         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013     }
1014     return false;
1015 }
1016 
1017 static bool fold_const2(OptContext *ctx, TCGOp *op)
1018 {
1019     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1020         uint64_t t1 = arg_const_val(op->args[1]);
1021         uint64_t t2 = arg_const_val(op->args[2]);
1022 
1023         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1024         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025     }
1026     return false;
1027 }
1028 
1029 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030 {
1031     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032     return false;
1033 }
1034 
1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036 {
1037     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038     return fold_const2(ctx, op);
1039 }
1040 
1041 /*
1042  * Record "zero" and "sign" masks for the single output of @op.
1043  * See TempOptInfo definition of z_mask and s_mask.
1044  * If z_mask allows, fold the output to constant zero.
1045  * The passed s_mask may be augmented by z_mask.
1046  */
1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048                                 uint64_t z_mask, uint64_t o_mask,
1049                                 int64_t s_mask, uint64_t a_mask)
1050 {
1051     const TCGOpDef *def = &tcg_op_defs[op->opc];
1052     TCGTemp *ts;
1053     TempOptInfo *ti;
1054     int rep;
1055 
1056     /* Only single-output opcodes are supported here. */
1057     tcg_debug_assert(def->nb_oargs == 1);
1058 
1059     /*
1060      * 32-bit ops generate 32-bit results, which for the purpose of
1061      * simplifying tcg are sign-extended.  Certainly that's how we
1062      * represent our constants elsewhere.  Note that the bits will
1063      * be reset properly for a 64-bit value when encountering the
1064      * type changing opcodes.
1065      */
1066     if (ctx->type == TCG_TYPE_I32) {
1067         z_mask = (int32_t)z_mask;
1068         o_mask = (int32_t)o_mask;
1069         s_mask |= INT32_MIN;
1070         a_mask = (uint32_t)a_mask;
1071     }
1072 
1073     /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074     tcg_debug_assert((o_mask & ~z_mask) == 0);
1075 
1076     /* All bits that are not known zero are known one is a constant. */
1077     if (z_mask == o_mask) {
1078         return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
1079     }
1080 
1081     /* If no bits are affected, the operation devolves to a copy. */
1082     if (a_mask == 0) {
1083         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084     }
1085 
1086     ts = arg_temp(op->args[0]);
1087     reset_ts(ctx, ts);
1088 
1089     ti = ts_info(ts);
1090     ti->z_mask = z_mask;
1091 
1092     /* Canonicalize s_mask and incorporate data from z_mask. */
1093     rep = clz64(~s_mask);
1094     rep = MAX(rep, clz64(z_mask));
1095     rep = MAX(rep, clz64(~o_mask));
1096     rep = MAX(rep - 1, 0);
1097     ti->s_mask = INT64_MIN >> rep;
1098 
1099     return false;
1100 }
1101 
1102 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1103                             uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1104 {
1105     fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1);
1106     return true;
1107 }
1108 
1109 static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1110                            uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1111 {
1112     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1113 }
1114 
1115 static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1116                           uint64_t z_mask, uint64_t o_mask)
1117 {
1118     return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1119 }
1120 
1121 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1122                           uint64_t z_mask, uint64_t s_mask)
1123 {
1124     return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
1125 }
1126 
1127 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1128 {
1129     return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
1130 }
1131 
1132 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1133 {
1134     return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
1135 }
1136 
1137 /*
1138  * Convert @op to NOT, if NOT is supported by the host.
1139  * Return true f the conversion is successful, which will still
1140  * indicate that the processing is complete.
1141  */
1142 static bool fold_not(OptContext *ctx, TCGOp *op);
1143 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1144 {
1145     TCGOpcode not_op;
1146     bool have_not;
1147 
1148     switch (ctx->type) {
1149     case TCG_TYPE_I32:
1150     case TCG_TYPE_I64:
1151         not_op = INDEX_op_not;
1152         have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1153         break;
1154     case TCG_TYPE_V64:
1155     case TCG_TYPE_V128:
1156     case TCG_TYPE_V256:
1157         not_op = INDEX_op_not_vec;
1158         have_not = TCG_TARGET_HAS_not_vec;
1159         break;
1160     default:
1161         g_assert_not_reached();
1162     }
1163     if (have_not) {
1164         op->opc = not_op;
1165         op->args[1] = op->args[idx];
1166         return fold_not(ctx, op);
1167     }
1168     return false;
1169 }
1170 
1171 /* If the binary operation has first argument @i, fold to @i. */
1172 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1173 {
1174     if (arg_is_const_val(op->args[1], i)) {
1175         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1176     }
1177     return false;
1178 }
1179 
1180 /* If the binary operation has first argument @i, fold to NOT. */
1181 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1182 {
1183     if (arg_is_const_val(op->args[1], i)) {
1184         return fold_to_not(ctx, op, 2);
1185     }
1186     return false;
1187 }
1188 
1189 /* If the binary operation has second argument @i, fold to @i. */
1190 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1191 {
1192     if (arg_is_const_val(op->args[2], i)) {
1193         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1194     }
1195     return false;
1196 }
1197 
1198 /* If the binary operation has second argument @i, fold to identity. */
1199 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1200 {
1201     if (arg_is_const_val(op->args[2], i)) {
1202         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1203     }
1204     return false;
1205 }
1206 
1207 /* If the binary operation has second argument @i, fold to NOT. */
1208 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1209 {
1210     if (arg_is_const_val(op->args[2], i)) {
1211         return fold_to_not(ctx, op, 1);
1212     }
1213     return false;
1214 }
1215 
1216 /* If the binary operation has both arguments equal, fold to @i. */
1217 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1218 {
1219     if (args_are_copies(op->args[1], op->args[2])) {
1220         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1221     }
1222     return false;
1223 }
1224 
1225 /* If the binary operation has both arguments equal, fold to identity. */
1226 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1227 {
1228     if (args_are_copies(op->args[1], op->args[2])) {
1229         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1230     }
1231     return false;
1232 }
1233 
1234 /*
1235  * These outermost fold_<op> functions are sorted alphabetically.
1236  *
1237  * The ordering of the transformations should be:
1238  *   1) those that produce a constant
1239  *   2) those that produce a copy
1240  *   3) those that produce information about the result value.
1241  */
1242 
1243 static bool fold_addco(OptContext *ctx, TCGOp *op);
1244 static bool fold_or(OptContext *ctx, TCGOp *op);
1245 static bool fold_orc(OptContext *ctx, TCGOp *op);
1246 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1247 static bool fold_xor(OptContext *ctx, TCGOp *op);
1248 
1249 static bool fold_add(OptContext *ctx, TCGOp *op)
1250 {
1251     if (fold_const2_commutative(ctx, op) ||
1252         fold_xi_to_x(ctx, op, 0)) {
1253         return true;
1254     }
1255     return finish_folding(ctx, op);
1256 }
1257 
1258 /* We cannot as yet do_constant_folding with vectors. */
1259 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1260 {
1261     if (fold_commutative(ctx, op) ||
1262         fold_xi_to_x(ctx, op, 0)) {
1263         return true;
1264     }
1265     return finish_folding(ctx, op);
1266 }
1267 
1268 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1269 {
1270     TempOptInfo *t2;
1271 
1272     op = QTAILQ_PREV(op, link);
1273     switch (op->opc) {
1274     case INDEX_op_addco:
1275         op->opc = INDEX_op_add;
1276         fold_add(ctx, op);
1277         break;
1278     case INDEX_op_addcio:
1279         op->opc = INDEX_op_addci;
1280         break;
1281     case INDEX_op_addc1o:
1282         op->opc = INDEX_op_add;
1283         t2 = arg_info(op->args[2]);
1284         if (ti_is_const(t2)) {
1285             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1286             /* Perform other constant folding, if needed. */
1287             fold_add(ctx, op);
1288         } else {
1289             TCGArg ret = op->args[0];
1290             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1291             op->args[0] = ret;
1292             op->args[1] = ret;
1293             op->args[2] = arg_new_constant(ctx, 1);
1294         }
1295         break;
1296     default:
1297         g_assert_not_reached();
1298     }
1299 }
1300 
1301 static bool fold_addci(OptContext *ctx, TCGOp *op)
1302 {
1303     fold_commutative(ctx, op);
1304 
1305     if (ctx->carry_state < 0) {
1306         return finish_folding(ctx, op);
1307     }
1308 
1309     squash_prev_carryout(ctx, op);
1310     op->opc = INDEX_op_add;
1311 
1312     if (ctx->carry_state > 0) {
1313         TempOptInfo *t2 = arg_info(op->args[2]);
1314 
1315         /*
1316          * Propagate the known carry-in into a constant, if possible.
1317          * Otherwise emit a second add +1.
1318          */
1319         if (ti_is_const(t2)) {
1320             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1321         } else {
1322             TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1323 
1324             op2->args[0] = op->args[0];
1325             op2->args[1] = op->args[1];
1326             op2->args[2] = op->args[2];
1327             fold_add(ctx, op2);
1328 
1329             op->args[1] = op->args[0];
1330             op->args[2] = arg_new_constant(ctx, 1);
1331         }
1332     }
1333 
1334     ctx->carry_state = -1;
1335     return fold_add(ctx, op);
1336 }
1337 
1338 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1339 {
1340     TempOptInfo *t1, *t2;
1341     int carry_out = -1;
1342     uint64_t sum, max;
1343 
1344     fold_commutative(ctx, op);
1345     t1 = arg_info(op->args[1]);
1346     t2 = arg_info(op->args[2]);
1347 
1348     /*
1349      * The z_mask value is >= the maximum value that can be represented
1350      * with the known zero bits.  So adding the z_mask values will not
1351      * overflow if and only if the true values cannot overflow.
1352      */
1353     if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1354         !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1355         carry_out = 0;
1356     }
1357 
1358     if (ctx->carry_state < 0) {
1359         ctx->carry_state = carry_out;
1360         return finish_folding(ctx, op);
1361     }
1362 
1363     squash_prev_carryout(ctx, op);
1364     if (ctx->carry_state == 0) {
1365         goto do_addco;
1366     }
1367 
1368     /* Propagate the known carry-in into a constant, if possible. */
1369     max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1370     if (ti_is_const(t2)) {
1371         uint64_t v = ti_const_val(t2) & max;
1372         if (v < max) {
1373             op->args[2] = arg_new_constant(ctx, v + 1);
1374             goto do_addco;
1375         }
1376         /* max + known carry in produces known carry out. */
1377         carry_out = 1;
1378     }
1379     if (ti_is_const(t1)) {
1380         uint64_t v = ti_const_val(t1) & max;
1381         if (v < max) {
1382             op->args[1] = arg_new_constant(ctx, v + 1);
1383             goto do_addco;
1384         }
1385         carry_out = 1;
1386     }
1387 
1388     /* Adjust the opcode to remember the known carry-in. */
1389     op->opc = INDEX_op_addc1o;
1390     ctx->carry_state = carry_out;
1391     return finish_folding(ctx, op);
1392 
1393  do_addco:
1394     op->opc = INDEX_op_addco;
1395     return fold_addco(ctx, op);
1396 }
1397 
1398 static bool fold_addco(OptContext *ctx, TCGOp *op)
1399 {
1400     TempOptInfo *t1, *t2;
1401     int carry_out = -1;
1402     uint64_t ign;
1403 
1404     fold_commutative(ctx, op);
1405     t1 = arg_info(op->args[1]);
1406     t2 = arg_info(op->args[2]);
1407 
1408     if (ti_is_const(t2)) {
1409         uint64_t v2 = ti_const_val(t2);
1410 
1411         if (ti_is_const(t1)) {
1412             uint64_t v1 = ti_const_val(t1);
1413             /* Given sign-extension of z_mask for I32, we need not truncate. */
1414             carry_out = uadd64_overflow(v1, v2, &ign);
1415         } else if (v2 == 0) {
1416             carry_out = 0;
1417         }
1418     } else {
1419         /*
1420          * The z_mask value is >= the maximum value that can be represented
1421          * with the known zero bits.  So adding the z_mask values will not
1422          * overflow if and only if the true values cannot overflow.
1423          */
1424         if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1425             carry_out = 0;
1426         }
1427     }
1428     ctx->carry_state = carry_out;
1429     return finish_folding(ctx, op);
1430 }
1431 
1432 static bool fold_and(OptContext *ctx, TCGOp *op)
1433 {
1434     uint64_t z_mask, o_mask, s_mask, a_mask;
1435     TempOptInfo *t1, *t2;
1436 
1437     if (fold_const2_commutative(ctx, op)) {
1438         return true;
1439     }
1440 
1441     t1 = arg_info(op->args[1]);
1442     t2 = arg_info(op->args[2]);
1443 
1444     z_mask = t1->z_mask & t2->z_mask;
1445     o_mask = t1->o_mask & t2->o_mask;
1446 
1447     /*
1448      * Sign repetitions are perforce all identical, whether they are 1 or 0.
1449      * Bitwise operations preserve the relative quantity of the repetitions.
1450      */
1451     s_mask = t1->s_mask & t2->s_mask;
1452 
1453     /* Affected bits are those not known zero, masked by those known one. */
1454     a_mask = t1->z_mask & ~t2->o_mask;
1455 
1456     if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1457         if (ti_is_const(t2)) {
1458             /*
1459              * Canonicalize on extract, if valid.  This aids x86 with its
1460              * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1461              * which does not require matching operands.  Other backends can
1462              * trivially expand the extract to AND during code generation.
1463              */
1464             uint64_t val = ti_const_val(t2);
1465             if (!(val & (val + 1))) {
1466                 unsigned len = ctz64(~val);
1467                 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1468                     op->opc = INDEX_op_extract;
1469                     op->args[2] = 0;
1470                     op->args[3] = len;
1471                 }
1472             }
1473         } else {
1474             fold_xx_to_x(ctx, op);
1475         }
1476     }
1477     return true;
1478 }
1479 
1480 static bool fold_andc(OptContext *ctx, TCGOp *op)
1481 {
1482     uint64_t z_mask, o_mask, s_mask, a_mask;
1483     TempOptInfo *t1, *t2;
1484 
1485     if (fold_const2(ctx, op)) {
1486         return true;
1487     }
1488 
1489     t1 = arg_info(op->args[1]);
1490     t2 = arg_info(op->args[2]);
1491 
1492     if (ti_is_const(t2)) {
1493         /* Fold andc r,x,i to and r,x,~i. */
1494         switch (ctx->type) {
1495         case TCG_TYPE_I32:
1496         case TCG_TYPE_I64:
1497             op->opc = INDEX_op_and;
1498             break;
1499         case TCG_TYPE_V64:
1500         case TCG_TYPE_V128:
1501         case TCG_TYPE_V256:
1502             op->opc = INDEX_op_and_vec;
1503             break;
1504         default:
1505             g_assert_not_reached();
1506         }
1507         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1508         return fold_and(ctx, op);
1509     }
1510     if (fold_xx_to_i(ctx, op, 0) ||
1511         fold_ix_to_not(ctx, op, -1)) {
1512         return true;
1513     }
1514 
1515     z_mask = t1->z_mask & ~t2->o_mask;
1516     o_mask = t1->o_mask & ~t2->z_mask;
1517     s_mask = t1->s_mask & t2->s_mask;
1518 
1519     /* Affected bits are those not known zero, masked by those known zero. */
1520     a_mask = t1->z_mask & t2->z_mask;
1521 
1522     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
1523 }
1524 
1525 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1526 {
1527     /* If true and false values are the same, eliminate the cmp. */
1528     if (args_are_copies(op->args[2], op->args[3])) {
1529         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1530     }
1531 
1532     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1533         uint64_t tv = arg_const_val(op->args[2]);
1534         uint64_t fv = arg_const_val(op->args[3]);
1535 
1536         if (tv == -1 && fv == 0) {
1537             return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1538         }
1539         if (tv == 0 && fv == -1) {
1540             if (TCG_TARGET_HAS_not_vec) {
1541                 op->opc = INDEX_op_not_vec;
1542                 return fold_not(ctx, op);
1543             } else {
1544                 op->opc = INDEX_op_xor_vec;
1545                 op->args[2] = arg_new_constant(ctx, -1);
1546                 return fold_xor(ctx, op);
1547             }
1548         }
1549     }
1550     if (arg_is_const(op->args[2])) {
1551         uint64_t tv = arg_const_val(op->args[2]);
1552         if (tv == -1) {
1553             op->opc = INDEX_op_or_vec;
1554             op->args[2] = op->args[3];
1555             return fold_or(ctx, op);
1556         }
1557         if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1558             op->opc = INDEX_op_andc_vec;
1559             op->args[2] = op->args[1];
1560             op->args[1] = op->args[3];
1561             return fold_andc(ctx, op);
1562         }
1563     }
1564     if (arg_is_const(op->args[3])) {
1565         uint64_t fv = arg_const_val(op->args[3]);
1566         if (fv == 0) {
1567             op->opc = INDEX_op_and_vec;
1568             return fold_and(ctx, op);
1569         }
1570         if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1571             op->opc = INDEX_op_orc_vec;
1572             op->args[2] = op->args[1];
1573             op->args[1] = op->args[3];
1574             return fold_orc(ctx, op);
1575         }
1576     }
1577     return finish_folding(ctx, op);
1578 }
1579 
1580 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1581 {
1582     int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1583                                       &op->args[1], &op->args[2]);
1584     if (i == 0) {
1585         tcg_op_remove(ctx->tcg, op);
1586         return true;
1587     }
1588     if (i > 0) {
1589         op->opc = INDEX_op_br;
1590         op->args[0] = op->args[3];
1591         finish_ebb(ctx);
1592     } else {
1593         finish_bb(ctx);
1594     }
1595     return true;
1596 }
1597 
1598 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1599 {
1600     TCGCond cond;
1601     TCGArg label;
1602     int i, inv = 0;
1603 
1604     i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1605     cond = op->args[4];
1606     label = op->args[5];
1607     if (i >= 0) {
1608         goto do_brcond_const;
1609     }
1610 
1611     switch (cond) {
1612     case TCG_COND_LT:
1613     case TCG_COND_GE:
1614         /*
1615          * Simplify LT/GE comparisons vs zero to a single compare
1616          * vs the high word of the input.
1617          */
1618         if (arg_is_const_val(op->args[2], 0) &&
1619             arg_is_const_val(op->args[3], 0)) {
1620             goto do_brcond_high;
1621         }
1622         break;
1623 
1624     case TCG_COND_NE:
1625         inv = 1;
1626         QEMU_FALLTHROUGH;
1627     case TCG_COND_EQ:
1628         /*
1629          * Simplify EQ/NE comparisons where one of the pairs
1630          * can be simplified.
1631          */
1632         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1633                                      op->args[2], cond);
1634         switch (i ^ inv) {
1635         case 0:
1636             goto do_brcond_const;
1637         case 1:
1638             goto do_brcond_high;
1639         }
1640 
1641         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1642                                      op->args[3], cond);
1643         switch (i ^ inv) {
1644         case 0:
1645             goto do_brcond_const;
1646         case 1:
1647             goto do_brcond_low;
1648         }
1649         break;
1650 
1651     case TCG_COND_TSTEQ:
1652     case TCG_COND_TSTNE:
1653         if (arg_is_const_val(op->args[2], 0)) {
1654             goto do_brcond_high;
1655         }
1656         if (arg_is_const_val(op->args[3], 0)) {
1657             goto do_brcond_low;
1658         }
1659         break;
1660 
1661     default:
1662         break;
1663 
1664     do_brcond_low:
1665         op->opc = INDEX_op_brcond;
1666         op->args[1] = op->args[2];
1667         op->args[2] = cond;
1668         op->args[3] = label;
1669         return fold_brcond(ctx, op);
1670 
1671     do_brcond_high:
1672         op->opc = INDEX_op_brcond;
1673         op->args[0] = op->args[1];
1674         op->args[1] = op->args[3];
1675         op->args[2] = cond;
1676         op->args[3] = label;
1677         return fold_brcond(ctx, op);
1678 
1679     do_brcond_const:
1680         if (i == 0) {
1681             tcg_op_remove(ctx->tcg, op);
1682             return true;
1683         }
1684         op->opc = INDEX_op_br;
1685         op->args[0] = label;
1686         finish_ebb(ctx);
1687         return true;
1688     }
1689 
1690     finish_bb(ctx);
1691     return true;
1692 }
1693 
1694 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1695 {
1696     uint64_t z_mask, o_mask, s_mask;
1697     TempOptInfo *t1 = arg_info(op->args[1]);
1698     int flags = op->args[2];
1699 
1700     if (ti_is_const(t1)) {
1701         return tcg_opt_gen_movi(ctx, op, op->args[0],
1702                                 do_constant_folding(op->opc, ctx->type,
1703                                                     ti_const_val(t1), flags));
1704     }
1705 
1706     z_mask = t1->z_mask;
1707     o_mask = t1->o_mask;
1708     s_mask = 0;
1709 
1710     switch (op->opc) {
1711     case INDEX_op_bswap16:
1712         z_mask = bswap16(z_mask);
1713         o_mask = bswap16(o_mask);
1714         if (flags & TCG_BSWAP_OS) {
1715             z_mask = (int16_t)z_mask;
1716             o_mask = (int16_t)o_mask;
1717             s_mask = INT16_MIN;
1718         } else if (!(flags & TCG_BSWAP_OZ)) {
1719             z_mask |= MAKE_64BIT_MASK(16, 48);
1720         }
1721         break;
1722     case INDEX_op_bswap32:
1723         z_mask = bswap32(z_mask);
1724         o_mask = bswap32(o_mask);
1725         if (flags & TCG_BSWAP_OS) {
1726             z_mask = (int32_t)z_mask;
1727             o_mask = (int32_t)o_mask;
1728             s_mask = INT32_MIN;
1729         } else if (!(flags & TCG_BSWAP_OZ)) {
1730             z_mask |= MAKE_64BIT_MASK(32, 32);
1731         }
1732         break;
1733     case INDEX_op_bswap64:
1734         z_mask = bswap64(z_mask);
1735         o_mask = bswap64(o_mask);
1736         break;
1737     default:
1738         g_assert_not_reached();
1739     }
1740 
1741     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1742 }
1743 
1744 static bool fold_call(OptContext *ctx, TCGOp *op)
1745 {
1746     TCGContext *s = ctx->tcg;
1747     int nb_oargs = TCGOP_CALLO(op);
1748     int nb_iargs = TCGOP_CALLI(op);
1749     int flags, i;
1750 
1751     init_arguments(ctx, op, nb_oargs + nb_iargs);
1752     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1753 
1754     /* If the function reads or writes globals, reset temp data. */
1755     flags = tcg_call_flags(op);
1756     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1757         int nb_globals = s->nb_globals;
1758 
1759         for (i = 0; i < nb_globals; i++) {
1760             if (test_bit(i, ctx->temps_used.l)) {
1761                 reset_ts(ctx, &ctx->tcg->temps[i]);
1762             }
1763         }
1764     }
1765 
1766     /* If the function has side effects, reset mem data. */
1767     if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1768         remove_mem_copy_all(ctx);
1769     }
1770 
1771     /* Reset temp data for outputs. */
1772     for (i = 0; i < nb_oargs; i++) {
1773         reset_temp(ctx, op->args[i]);
1774     }
1775 
1776     /* Stop optimizing MB across calls. */
1777     ctx->prev_mb = NULL;
1778     return true;
1779 }
1780 
1781 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1782 {
1783     /* Canonicalize the comparison to put immediate second. */
1784     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1785         op->args[3] = tcg_swap_cond(op->args[3]);
1786     }
1787     return finish_folding(ctx, op);
1788 }
1789 
1790 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1791 {
1792     /* If true and false values are the same, eliminate the cmp. */
1793     if (args_are_copies(op->args[3], op->args[4])) {
1794         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1795     }
1796 
1797     /* Canonicalize the comparison to put immediate second. */
1798     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1799         op->args[5] = tcg_swap_cond(op->args[5]);
1800     }
1801     /*
1802      * Canonicalize the "false" input reg to match the destination,
1803      * so that the tcg backend can implement "move if true".
1804      */
1805     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1806         op->args[5] = tcg_invert_cond(op->args[5]);
1807     }
1808     return finish_folding(ctx, op);
1809 }
1810 
1811 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1812 {
1813     uint64_t z_mask, s_mask;
1814     TempOptInfo *t1 = arg_info(op->args[1]);
1815     TempOptInfo *t2 = arg_info(op->args[2]);
1816 
1817     if (ti_is_const(t1)) {
1818         uint64_t t = ti_const_val(t1);
1819 
1820         if (t != 0) {
1821             t = do_constant_folding(op->opc, ctx->type, t, 0);
1822             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1823         }
1824         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1825     }
1826 
1827     switch (ctx->type) {
1828     case TCG_TYPE_I32:
1829         z_mask = 31;
1830         break;
1831     case TCG_TYPE_I64:
1832         z_mask = 63;
1833         break;
1834     default:
1835         g_assert_not_reached();
1836     }
1837     s_mask = ~z_mask;
1838     z_mask |= t2->z_mask;
1839     s_mask &= t2->s_mask;
1840 
1841     return fold_masks_zs(ctx, op, z_mask, s_mask);
1842 }
1843 
1844 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1845 {
1846     uint64_t z_mask;
1847 
1848     if (fold_const1(ctx, op)) {
1849         return true;
1850     }
1851 
1852     switch (ctx->type) {
1853     case TCG_TYPE_I32:
1854         z_mask = 32 | 31;
1855         break;
1856     case TCG_TYPE_I64:
1857         z_mask = 64 | 63;
1858         break;
1859     default:
1860         g_assert_not_reached();
1861     }
1862     return fold_masks_z(ctx, op, z_mask);
1863 }
1864 
1865 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1866 {
1867     TempOptInfo *t1 = arg_info(op->args[1]);
1868     TempOptInfo *t2 = arg_info(op->args[2]);
1869     int ofs = op->args[3];
1870     int len = op->args[4];
1871     int width = 8 * tcg_type_size(ctx->type);
1872     uint64_t z_mask, o_mask, s_mask;
1873 
1874     if (ti_is_const(t1) && ti_is_const(t2)) {
1875         return tcg_opt_gen_movi(ctx, op, op->args[0],
1876                                 deposit64(ti_const_val(t1), ofs, len,
1877                                           ti_const_val(t2)));
1878     }
1879 
1880     /* Inserting a value into zero at offset 0. */
1881     if (ti_is_const_val(t1, 0) && ofs == 0) {
1882         uint64_t mask = MAKE_64BIT_MASK(0, len);
1883 
1884         op->opc = INDEX_op_and;
1885         op->args[1] = op->args[2];
1886         op->args[2] = arg_new_constant(ctx, mask);
1887         return fold_and(ctx, op);
1888     }
1889 
1890     /* Inserting zero into a value. */
1891     if (ti_is_const_val(t2, 0)) {
1892         uint64_t mask = deposit64(-1, ofs, len, 0);
1893 
1894         op->opc = INDEX_op_and;
1895         op->args[2] = arg_new_constant(ctx, mask);
1896         return fold_and(ctx, op);
1897     }
1898 
1899     /* The s_mask from the top portion of the deposit is still valid. */
1900     if (ofs + len == width) {
1901         s_mask = t2->s_mask << ofs;
1902     } else {
1903         s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1904     }
1905 
1906     z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1907     o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1908 
1909     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1910 }
1911 
1912 static bool fold_divide(OptContext *ctx, TCGOp *op)
1913 {
1914     if (fold_const2(ctx, op) ||
1915         fold_xi_to_x(ctx, op, 1)) {
1916         return true;
1917     }
1918     return finish_folding(ctx, op);
1919 }
1920 
1921 static bool fold_dup(OptContext *ctx, TCGOp *op)
1922 {
1923     if (arg_is_const(op->args[1])) {
1924         uint64_t t = arg_const_val(op->args[1]);
1925         t = dup_const(TCGOP_VECE(op), t);
1926         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1927     }
1928     return finish_folding(ctx, op);
1929 }
1930 
1931 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1932 {
1933     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1934         uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1935                                arg_const_val(op->args[2]));
1936         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1937     }
1938 
1939     if (args_are_copies(op->args[1], op->args[2])) {
1940         op->opc = INDEX_op_dup_vec;
1941         TCGOP_VECE(op) = MO_32;
1942     }
1943     return finish_folding(ctx, op);
1944 }
1945 
1946 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1947 {
1948     uint64_t z_mask, o_mask, s_mask;
1949     TempOptInfo *t1, *t2;
1950 
1951     if (fold_const2_commutative(ctx, op)) {
1952         return true;
1953     }
1954 
1955     t2 = arg_info(op->args[2]);
1956     if (ti_is_const(t2)) {
1957         /* Fold eqv r,x,i to xor r,x,~i. */
1958         switch (ctx->type) {
1959         case TCG_TYPE_I32:
1960         case TCG_TYPE_I64:
1961             op->opc = INDEX_op_xor;
1962             break;
1963         case TCG_TYPE_V64:
1964         case TCG_TYPE_V128:
1965         case TCG_TYPE_V256:
1966             op->opc = INDEX_op_xor_vec;
1967             break;
1968         default:
1969             g_assert_not_reached();
1970         }
1971         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1972         return fold_xor(ctx, op);
1973     }
1974 
1975     t1 = arg_info(op->args[1]);
1976 
1977     z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1978     o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
1979     s_mask = t1->s_mask & t2->s_mask;
1980 
1981     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1982 }
1983 
1984 static bool fold_extract(OptContext *ctx, TCGOp *op)
1985 {
1986     uint64_t z_mask, o_mask, a_mask;
1987     TempOptInfo *t1 = arg_info(op->args[1]);
1988     int pos = op->args[2];
1989     int len = op->args[3];
1990 
1991     if (ti_is_const(t1)) {
1992         return tcg_opt_gen_movi(ctx, op, op->args[0],
1993                                 extract64(ti_const_val(t1), pos, len));
1994     }
1995 
1996     z_mask = extract64(t1->z_mask, pos, len);
1997     o_mask = extract64(t1->o_mask, pos, len);
1998     a_mask = pos ? -1 : t1->z_mask ^ z_mask;
1999 
2000     return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
2001 }
2002 
2003 static bool fold_extract2(OptContext *ctx, TCGOp *op)
2004 {
2005     TempOptInfo *t1 = arg_info(op->args[1]);
2006     TempOptInfo *t2 = arg_info(op->args[2]);
2007     uint64_t z1 = t1->z_mask;
2008     uint64_t z2 = t2->z_mask;
2009     uint64_t o1 = t1->o_mask;
2010     uint64_t o2 = t2->o_mask;
2011     int shr = op->args[3];
2012 
2013     if (ctx->type == TCG_TYPE_I32) {
2014         z1 = (uint32_t)z1 >> shr;
2015         o1 = (uint32_t)o1 >> shr;
2016         z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2017         o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2018     } else {
2019         z1 >>= shr;
2020         o1 >>= shr;
2021         z2 <<= 64 - shr;
2022         o2 <<= 64 - shr;
2023     }
2024 
2025     return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
2026 }
2027 
2028 static bool fold_exts(OptContext *ctx, TCGOp *op)
2029 {
2030     uint64_t z_mask, o_mask, s_mask;
2031     TempOptInfo *t1;
2032 
2033     if (fold_const1(ctx, op)) {
2034         return true;
2035     }
2036 
2037     t1 = arg_info(op->args[1]);
2038     z_mask = t1->z_mask;
2039     o_mask = t1->o_mask;
2040     s_mask = t1->s_mask;
2041 
2042     switch (op->opc) {
2043     case INDEX_op_ext_i32_i64:
2044         s_mask |= INT32_MIN;
2045         z_mask = (int32_t)z_mask;
2046         o_mask = (int32_t)o_mask;
2047         break;
2048     default:
2049         g_assert_not_reached();
2050     }
2051     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2052 }
2053 
2054 static bool fold_extu(OptContext *ctx, TCGOp *op)
2055 {
2056     uint64_t z_mask, o_mask;
2057     TempOptInfo *t1;
2058 
2059     if (fold_const1(ctx, op)) {
2060         return true;
2061     }
2062 
2063     t1 = arg_info(op->args[1]);
2064     z_mask = t1->z_mask;
2065     o_mask = t1->o_mask;
2066 
2067     switch (op->opc) {
2068     case INDEX_op_extrl_i64_i32:
2069     case INDEX_op_extu_i32_i64:
2070         z_mask = (uint32_t)z_mask;
2071         o_mask = (uint32_t)o_mask;
2072         break;
2073     case INDEX_op_extrh_i64_i32:
2074         z_mask >>= 32;
2075         o_mask >>= 32;
2076         break;
2077     default:
2078         g_assert_not_reached();
2079     }
2080     return fold_masks_zo(ctx, op, z_mask, o_mask);
2081 }
2082 
2083 static bool fold_mb(OptContext *ctx, TCGOp *op)
2084 {
2085     /* Eliminate duplicate and redundant fence instructions.  */
2086     if (ctx->prev_mb) {
2087         /*
2088          * Merge two barriers of the same type into one,
2089          * or a weaker barrier into a stronger one,
2090          * or two weaker barriers into a stronger one.
2091          *   mb X; mb Y => mb X|Y
2092          *   mb; strl => mb; st
2093          *   ldaq; mb => ld; mb
2094          *   ldaq; strl => ld; mb; st
2095          * Other combinations are also merged into a strong
2096          * barrier.  This is stricter than specified but for
2097          * the purposes of TCG is better than not optimizing.
2098          */
2099         ctx->prev_mb->args[0] |= op->args[0];
2100         tcg_op_remove(ctx->tcg, op);
2101     } else {
2102         ctx->prev_mb = op;
2103     }
2104     return true;
2105 }
2106 
2107 static bool fold_mov(OptContext *ctx, TCGOp *op)
2108 {
2109     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2110 }
2111 
2112 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2113 {
2114     uint64_t z_mask, o_mask, s_mask;
2115     TempOptInfo *tt, *ft;
2116     int i;
2117 
2118     /* If true and false values are the same, eliminate the cmp. */
2119     if (args_are_copies(op->args[3], op->args[4])) {
2120         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2121     }
2122 
2123     /*
2124      * Canonicalize the "false" input reg to match the destination reg so
2125      * that the tcg backend can implement a "move if true" operation.
2126      */
2127     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2128         op->args[5] = tcg_invert_cond(op->args[5]);
2129     }
2130 
2131     i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2132                                   &op->args[2], &op->args[5]);
2133     if (i >= 0) {
2134         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2135     }
2136 
2137     tt = arg_info(op->args[3]);
2138     ft = arg_info(op->args[4]);
2139     z_mask = tt->z_mask | ft->z_mask;
2140     o_mask = tt->o_mask & ft->o_mask;
2141     s_mask = tt->s_mask & ft->s_mask;
2142 
2143     if (ti_is_const(tt) && ti_is_const(ft)) {
2144         uint64_t tv = ti_const_val(tt);
2145         uint64_t fv = ti_const_val(ft);
2146         TCGCond cond = op->args[5];
2147 
2148         if (tv == 1 && fv == 0) {
2149             op->opc = INDEX_op_setcond;
2150             op->args[3] = cond;
2151         } else if (fv == 1 && tv == 0) {
2152             op->opc = INDEX_op_setcond;
2153             op->args[3] = tcg_invert_cond(cond);
2154         } else if (tv == -1 && fv == 0) {
2155             op->opc = INDEX_op_negsetcond;
2156             op->args[3] = cond;
2157         } else if (fv == -1 && tv == 0) {
2158             op->opc = INDEX_op_negsetcond;
2159             op->args[3] = tcg_invert_cond(cond);
2160         }
2161     }
2162 
2163     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2164 }
2165 
2166 static bool fold_mul(OptContext *ctx, TCGOp *op)
2167 {
2168     if (fold_const2(ctx, op) ||
2169         fold_xi_to_i(ctx, op, 0) ||
2170         fold_xi_to_x(ctx, op, 1)) {
2171         return true;
2172     }
2173     return finish_folding(ctx, op);
2174 }
2175 
2176 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2177 {
2178     if (fold_const2_commutative(ctx, op) ||
2179         fold_xi_to_i(ctx, op, 0)) {
2180         return true;
2181     }
2182     return finish_folding(ctx, op);
2183 }
2184 
2185 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2186 {
2187     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2188 
2189     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2190         uint64_t a = arg_const_val(op->args[2]);
2191         uint64_t b = arg_const_val(op->args[3]);
2192         uint64_t h, l;
2193         TCGArg rl, rh;
2194         TCGOp *op2;
2195 
2196         switch (op->opc) {
2197         case INDEX_op_mulu2:
2198             if (ctx->type == TCG_TYPE_I32) {
2199                 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2200                 h = (int32_t)(l >> 32);
2201                 l = (int32_t)l;
2202             } else {
2203                 mulu64(&l, &h, a, b);
2204             }
2205             break;
2206         case INDEX_op_muls2:
2207             if (ctx->type == TCG_TYPE_I32) {
2208                 l = (int64_t)(int32_t)a * (int32_t)b;
2209                 h = l >> 32;
2210                 l = (int32_t)l;
2211             } else {
2212                 muls64(&l, &h, a, b);
2213             }
2214             break;
2215         default:
2216             g_assert_not_reached();
2217         }
2218 
2219         rl = op->args[0];
2220         rh = op->args[1];
2221 
2222         /* The proper opcode is supplied by tcg_opt_gen_mov. */
2223         op2 = opt_insert_before(ctx, op, 0, 2);
2224 
2225         tcg_opt_gen_movi(ctx, op, rl, l);
2226         tcg_opt_gen_movi(ctx, op2, rh, h);
2227         return true;
2228     }
2229     return finish_folding(ctx, op);
2230 }
2231 
2232 static bool fold_nand(OptContext *ctx, TCGOp *op)
2233 {
2234     uint64_t z_mask, o_mask, s_mask;
2235     TempOptInfo *t1, *t2;
2236 
2237     if (fold_const2_commutative(ctx, op) ||
2238         fold_xi_to_not(ctx, op, -1)) {
2239         return true;
2240     }
2241 
2242     t1 = arg_info(op->args[1]);
2243     t2 = arg_info(op->args[2]);
2244 
2245     z_mask = ~(t1->o_mask & t2->o_mask);
2246     o_mask = ~(t1->z_mask & t2->z_mask);
2247     s_mask = t1->s_mask & t2->s_mask;
2248 
2249     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2250 }
2251 
2252 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2253 {
2254     /* Set to 1 all bits to the left of the rightmost.  */
2255     uint64_t z_mask = arg_info(op->args[1])->z_mask;
2256     z_mask = -(z_mask & -z_mask);
2257 
2258     return fold_masks_z(ctx, op, z_mask);
2259 }
2260 
2261 static bool fold_neg(OptContext *ctx, TCGOp *op)
2262 {
2263     return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2264 }
2265 
2266 static bool fold_nor(OptContext *ctx, TCGOp *op)
2267 {
2268     uint64_t z_mask, o_mask, s_mask;
2269     TempOptInfo *t1, *t2;
2270 
2271     if (fold_const2_commutative(ctx, op) ||
2272         fold_xi_to_not(ctx, op, 0)) {
2273         return true;
2274     }
2275 
2276     t1 = arg_info(op->args[1]);
2277     t2 = arg_info(op->args[2]);
2278 
2279     z_mask = ~(t1->o_mask | t2->o_mask);
2280     o_mask = ~(t1->z_mask | t2->z_mask);
2281     s_mask = t1->s_mask & t2->s_mask;
2282 
2283     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2284 }
2285 
2286 static bool fold_not(OptContext *ctx, TCGOp *op)
2287 {
2288     TempOptInfo *t1;
2289 
2290     if (fold_const1(ctx, op)) {
2291         return true;
2292     }
2293 
2294     t1 = arg_info(op->args[1]);
2295     return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
2296 }
2297 
2298 static bool fold_or(OptContext *ctx, TCGOp *op)
2299 {
2300     uint64_t z_mask, o_mask, s_mask, a_mask;
2301     TempOptInfo *t1, *t2;
2302 
2303     if (fold_const2_commutative(ctx, op) ||
2304         fold_xi_to_x(ctx, op, 0) ||
2305         fold_xx_to_x(ctx, op)) {
2306         return true;
2307     }
2308 
2309     t1 = arg_info(op->args[1]);
2310     t2 = arg_info(op->args[2]);
2311 
2312     z_mask = t1->z_mask | t2->z_mask;
2313     o_mask = t1->o_mask | t2->o_mask;
2314     s_mask = t1->s_mask & t2->s_mask;
2315 
2316     /* Affected bits are those not known one, masked by those known zero. */
2317     a_mask = ~t1->o_mask & t2->z_mask;
2318 
2319     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2320 }
2321 
2322 static bool fold_orc(OptContext *ctx, TCGOp *op)
2323 {
2324     uint64_t z_mask, o_mask, s_mask, a_mask;
2325     TempOptInfo *t1, *t2;
2326 
2327     if (fold_const2(ctx, op)) {
2328         return true;
2329     }
2330 
2331     t2 = arg_info(op->args[2]);
2332     if (ti_is_const(t2)) {
2333         /* Fold orc r,x,i to or r,x,~i. */
2334         switch (ctx->type) {
2335         case TCG_TYPE_I32:
2336         case TCG_TYPE_I64:
2337             op->opc = INDEX_op_or;
2338             break;
2339         case TCG_TYPE_V64:
2340         case TCG_TYPE_V128:
2341         case TCG_TYPE_V256:
2342             op->opc = INDEX_op_or_vec;
2343             break;
2344         default:
2345             g_assert_not_reached();
2346         }
2347         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2348         return fold_or(ctx, op);
2349     }
2350     if (fold_xx_to_i(ctx, op, -1) ||
2351         fold_ix_to_not(ctx, op, 0)) {
2352         return true;
2353     }
2354     t1 = arg_info(op->args[1]);
2355 
2356     z_mask = t1->z_mask | ~t2->o_mask;
2357     o_mask = t1->o_mask | ~t2->z_mask;
2358     s_mask = t1->s_mask & t2->s_mask;
2359 
2360     /* Affected bits are those not known one, masked by those known one. */
2361     a_mask = ~t1->o_mask & t2->o_mask;
2362 
2363     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2364 }
2365 
2366 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2367 {
2368     const TCGOpDef *def = &tcg_op_defs[op->opc];
2369     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2370     MemOp mop = get_memop(oi);
2371     int width = 8 * memop_size(mop);
2372     uint64_t z_mask = -1, s_mask = 0;
2373 
2374     if (width < 64) {
2375         if (mop & MO_SIGN) {
2376             s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2377         } else {
2378             z_mask = MAKE_64BIT_MASK(0, width);
2379         }
2380     }
2381 
2382     /* Opcodes that touch guest memory stop the mb optimization.  */
2383     ctx->prev_mb = NULL;
2384 
2385     return fold_masks_zs(ctx, op, z_mask, s_mask);
2386 }
2387 
2388 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2389 {
2390     /* Opcodes that touch guest memory stop the mb optimization.  */
2391     ctx->prev_mb = NULL;
2392     return finish_folding(ctx, op);
2393 }
2394 
2395 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2396 {
2397     /* Opcodes that touch guest memory stop the mb optimization.  */
2398     ctx->prev_mb = NULL;
2399     return true;
2400 }
2401 
2402 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2403 {
2404     if (fold_const2(ctx, op) ||
2405         fold_xx_to_i(ctx, op, 0)) {
2406         return true;
2407     }
2408     return finish_folding(ctx, op);
2409 }
2410 
2411 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
2412 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2413 {
2414     uint64_t a_zmask, b_val;
2415     TCGCond cond;
2416 
2417     if (!arg_is_const(op->args[2])) {
2418         return false;
2419     }
2420 
2421     a_zmask = arg_info(op->args[1])->z_mask;
2422     b_val = arg_const_val(op->args[2]);
2423     cond = op->args[3];
2424 
2425     if (ctx->type == TCG_TYPE_I32) {
2426         a_zmask = (uint32_t)a_zmask;
2427         b_val = (uint32_t)b_val;
2428     }
2429 
2430     /*
2431      * A with only low bits set vs B with high bits set means that A < B.
2432      */
2433     if (a_zmask < b_val) {
2434         bool inv = false;
2435 
2436         switch (cond) {
2437         case TCG_COND_NE:
2438         case TCG_COND_LEU:
2439         case TCG_COND_LTU:
2440             inv = true;
2441             /* fall through */
2442         case TCG_COND_GTU:
2443         case TCG_COND_GEU:
2444         case TCG_COND_EQ:
2445             return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2446         default:
2447             break;
2448         }
2449     }
2450 
2451     /*
2452      * A with only lsb set is already boolean.
2453      */
2454     if (a_zmask <= 1) {
2455         bool convert = false;
2456         bool inv = false;
2457 
2458         switch (cond) {
2459         case TCG_COND_EQ:
2460             inv = true;
2461             /* fall through */
2462         case TCG_COND_NE:
2463             convert = (b_val == 0);
2464             break;
2465         case TCG_COND_LTU:
2466         case TCG_COND_TSTEQ:
2467             inv = true;
2468             /* fall through */
2469         case TCG_COND_GEU:
2470         case TCG_COND_TSTNE:
2471             convert = (b_val == 1);
2472             break;
2473         default:
2474             break;
2475         }
2476         if (convert) {
2477             if (!inv && !neg) {
2478                 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2479             }
2480 
2481             if (!inv) {
2482                 op->opc = INDEX_op_neg;
2483             } else if (neg) {
2484                 op->opc = INDEX_op_add;
2485                 op->args[2] = arg_new_constant(ctx, -1);
2486             } else {
2487                 op->opc = INDEX_op_xor;
2488                 op->args[2] = arg_new_constant(ctx, 1);
2489             }
2490             return -1;
2491         }
2492     }
2493     return 0;
2494 }
2495 
2496 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2497 {
2498     TCGCond cond = op->args[3];
2499     TCGArg ret, src1, src2;
2500     TCGOp *op2;
2501     uint64_t val;
2502     int sh;
2503     bool inv;
2504 
2505     if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2506         return;
2507     }
2508 
2509     src2 = op->args[2];
2510     val = arg_const_val(src2);
2511     if (!is_power_of_2(val)) {
2512         return;
2513     }
2514     sh = ctz64(val);
2515 
2516     ret = op->args[0];
2517     src1 = op->args[1];
2518     inv = cond == TCG_COND_TSTEQ;
2519 
2520     if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2521         op->opc = INDEX_op_sextract;
2522         op->args[1] = src1;
2523         op->args[2] = sh;
2524         op->args[3] = 1;
2525         return;
2526     } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2527         op->opc = INDEX_op_extract;
2528         op->args[1] = src1;
2529         op->args[2] = sh;
2530         op->args[3] = 1;
2531     } else {
2532         if (sh) {
2533             op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2534             op2->args[0] = ret;
2535             op2->args[1] = src1;
2536             op2->args[2] = arg_new_constant(ctx, sh);
2537             src1 = ret;
2538         }
2539         op->opc = INDEX_op_and;
2540         op->args[1] = src1;
2541         op->args[2] = arg_new_constant(ctx, 1);
2542     }
2543 
2544     if (neg && inv) {
2545         op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2546         op2->args[0] = ret;
2547         op2->args[1] = ret;
2548         op2->args[2] = arg_new_constant(ctx, -1);
2549     } else if (inv) {
2550         op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2551         op2->args[0] = ret;
2552         op2->args[1] = ret;
2553         op2->args[2] = arg_new_constant(ctx, 1);
2554     } else if (neg) {
2555         op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2556         op2->args[0] = ret;
2557         op2->args[1] = ret;
2558     }
2559 }
2560 
2561 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2562 {
2563     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2564                                       &op->args[2], &op->args[3]);
2565     if (i >= 0) {
2566         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2567     }
2568 
2569     i = fold_setcond_zmask(ctx, op, false);
2570     if (i > 0) {
2571         return true;
2572     }
2573     if (i == 0) {
2574         fold_setcond_tst_pow2(ctx, op, false);
2575     }
2576 
2577     return fold_masks_z(ctx, op, 1);
2578 }
2579 
2580 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2581 {
2582     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2583                                       &op->args[2], &op->args[3]);
2584     if (i >= 0) {
2585         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2586     }
2587 
2588     i = fold_setcond_zmask(ctx, op, true);
2589     if (i > 0) {
2590         return true;
2591     }
2592     if (i == 0) {
2593         fold_setcond_tst_pow2(ctx, op, true);
2594     }
2595 
2596     /* Value is {0,-1} so all bits are repetitions of the sign. */
2597     return fold_masks_s(ctx, op, -1);
2598 }
2599 
2600 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2601 {
2602     TCGCond cond;
2603     int i, inv = 0;
2604 
2605     i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2606     cond = op->args[5];
2607     if (i >= 0) {
2608         goto do_setcond_const;
2609     }
2610 
2611     switch (cond) {
2612     case TCG_COND_LT:
2613     case TCG_COND_GE:
2614         /*
2615          * Simplify LT/GE comparisons vs zero to a single compare
2616          * vs the high word of the input.
2617          */
2618         if (arg_is_const_val(op->args[3], 0) &&
2619             arg_is_const_val(op->args[4], 0)) {
2620             goto do_setcond_high;
2621         }
2622         break;
2623 
2624     case TCG_COND_NE:
2625         inv = 1;
2626         QEMU_FALLTHROUGH;
2627     case TCG_COND_EQ:
2628         /*
2629          * Simplify EQ/NE comparisons where one of the pairs
2630          * can be simplified.
2631          */
2632         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2633                                      op->args[3], cond);
2634         switch (i ^ inv) {
2635         case 0:
2636             goto do_setcond_const;
2637         case 1:
2638             goto do_setcond_high;
2639         }
2640 
2641         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2642                                      op->args[4], cond);
2643         switch (i ^ inv) {
2644         case 0:
2645             goto do_setcond_const;
2646         case 1:
2647             goto do_setcond_low;
2648         }
2649         break;
2650 
2651     case TCG_COND_TSTEQ:
2652     case TCG_COND_TSTNE:
2653         if (arg_is_const_val(op->args[3], 0)) {
2654             goto do_setcond_high;
2655         }
2656         if (arg_is_const_val(op->args[4], 0)) {
2657             goto do_setcond_low;
2658         }
2659         break;
2660 
2661     default:
2662         break;
2663 
2664     do_setcond_low:
2665         op->args[2] = op->args[3];
2666         op->args[3] = cond;
2667         op->opc = INDEX_op_setcond;
2668         return fold_setcond(ctx, op);
2669 
2670     do_setcond_high:
2671         op->args[1] = op->args[2];
2672         op->args[2] = op->args[4];
2673         op->args[3] = cond;
2674         op->opc = INDEX_op_setcond;
2675         return fold_setcond(ctx, op);
2676     }
2677 
2678     return fold_masks_z(ctx, op, 1);
2679 
2680  do_setcond_const:
2681     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2682 }
2683 
2684 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2685 {
2686     uint64_t z_mask, o_mask, s_mask, a_mask;
2687     TempOptInfo *t1 = arg_info(op->args[1]);
2688     int pos = op->args[2];
2689     int len = op->args[3];
2690 
2691     if (ti_is_const(t1)) {
2692         return tcg_opt_gen_movi(ctx, op, op->args[0],
2693                                 sextract64(ti_const_val(t1), pos, len));
2694     }
2695 
2696     s_mask = t1->s_mask >> pos;
2697     s_mask |= -1ull << (len - 1);
2698     a_mask = pos ? -1 : s_mask & ~t1->s_mask;
2699 
2700     z_mask = sextract64(t1->z_mask, pos, len);
2701     o_mask = sextract64(t1->o_mask, pos, len);
2702 
2703     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2704 }
2705 
2706 static bool fold_shift(OptContext *ctx, TCGOp *op)
2707 {
2708     uint64_t s_mask, z_mask, o_mask;
2709     TempOptInfo *t1, *t2;
2710 
2711     if (fold_const2(ctx, op) ||
2712         fold_ix_to_i(ctx, op, 0) ||
2713         fold_xi_to_x(ctx, op, 0)) {
2714         return true;
2715     }
2716 
2717     t1 = arg_info(op->args[1]);
2718     t2 = arg_info(op->args[2]);
2719     s_mask = t1->s_mask;
2720     z_mask = t1->z_mask;
2721     o_mask = t1->o_mask;
2722 
2723     if (ti_is_const(t2)) {
2724         int sh = ti_const_val(t2);
2725 
2726         z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2727         o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
2728         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2729 
2730         return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2731     }
2732 
2733     switch (op->opc) {
2734     case INDEX_op_sar:
2735         /*
2736          * Arithmetic right shift will not reduce the number of
2737          * input sign repetitions.
2738          */
2739         return fold_masks_s(ctx, op, s_mask);
2740     case INDEX_op_shr:
2741         /*
2742          * If the sign bit is known zero, then logical right shift
2743          * will not reduce the number of input sign repetitions.
2744          */
2745         if (~z_mask & -s_mask) {
2746             return fold_masks_s(ctx, op, s_mask);
2747         }
2748         break;
2749     default:
2750         break;
2751     }
2752 
2753     return finish_folding(ctx, op);
2754 }
2755 
2756 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2757 {
2758     TCGOpcode neg_op;
2759     bool have_neg;
2760 
2761     if (!arg_is_const_val(op->args[1], 0)) {
2762         return false;
2763     }
2764 
2765     switch (ctx->type) {
2766     case TCG_TYPE_I32:
2767     case TCG_TYPE_I64:
2768         neg_op = INDEX_op_neg;
2769         have_neg = true;
2770         break;
2771     case TCG_TYPE_V64:
2772     case TCG_TYPE_V128:
2773     case TCG_TYPE_V256:
2774         neg_op = INDEX_op_neg_vec;
2775         have_neg = (TCG_TARGET_HAS_neg_vec &&
2776                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2777         break;
2778     default:
2779         g_assert_not_reached();
2780     }
2781     if (have_neg) {
2782         op->opc = neg_op;
2783         op->args[1] = op->args[2];
2784         return fold_neg_no_const(ctx, op);
2785     }
2786     return false;
2787 }
2788 
2789 /* We cannot as yet do_constant_folding with vectors. */
2790 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2791 {
2792     if (fold_xx_to_i(ctx, op, 0) ||
2793         fold_xi_to_x(ctx, op, 0) ||
2794         fold_sub_to_neg(ctx, op)) {
2795         return true;
2796     }
2797     return finish_folding(ctx, op);
2798 }
2799 
2800 static bool fold_sub(OptContext *ctx, TCGOp *op)
2801 {
2802     if (fold_const2(ctx, op) ||
2803         fold_xx_to_i(ctx, op, 0) ||
2804         fold_xi_to_x(ctx, op, 0) ||
2805         fold_sub_to_neg(ctx, op)) {
2806         return true;
2807     }
2808 
2809     /* Fold sub r,x,i to add r,x,-i */
2810     if (arg_is_const(op->args[2])) {
2811         uint64_t val = arg_const_val(op->args[2]);
2812 
2813         op->opc = INDEX_op_add;
2814         op->args[2] = arg_new_constant(ctx, -val);
2815     }
2816     return finish_folding(ctx, op);
2817 }
2818 
2819 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2820 {
2821     TempOptInfo *t2;
2822 
2823     op = QTAILQ_PREV(op, link);
2824     switch (op->opc) {
2825     case INDEX_op_subbo:
2826         op->opc = INDEX_op_sub;
2827         fold_sub(ctx, op);
2828         break;
2829     case INDEX_op_subbio:
2830         op->opc = INDEX_op_subbi;
2831         break;
2832     case INDEX_op_subb1o:
2833         t2 = arg_info(op->args[2]);
2834         if (ti_is_const(t2)) {
2835             op->opc = INDEX_op_add;
2836             op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2837             /* Perform other constant folding, if needed. */
2838             fold_add(ctx, op);
2839         } else {
2840             TCGArg ret = op->args[0];
2841             op->opc = INDEX_op_sub;
2842             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2843             op->args[0] = ret;
2844             op->args[1] = ret;
2845             op->args[2] = arg_new_constant(ctx, -1);
2846         }
2847         break;
2848     default:
2849         g_assert_not_reached();
2850     }
2851 }
2852 
2853 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2854 {
2855     TempOptInfo *t2;
2856     int borrow_in = ctx->carry_state;
2857 
2858     if (borrow_in < 0) {
2859         return finish_folding(ctx, op);
2860     }
2861     ctx->carry_state = -1;
2862 
2863     squash_prev_borrowout(ctx, op);
2864     if (borrow_in == 0) {
2865         op->opc = INDEX_op_sub;
2866         return fold_sub(ctx, op);
2867     }
2868 
2869     /*
2870      * Propagate the known carry-in into any constant, then negate to
2871      * transform from sub to add.  If there is no constant, emit a
2872      * separate add -1.
2873      */
2874     t2 = arg_info(op->args[2]);
2875     if (ti_is_const(t2)) {
2876         op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2877     } else {
2878         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2879 
2880         op2->args[0] = op->args[0];
2881         op2->args[1] = op->args[1];
2882         op2->args[2] = op->args[2];
2883         fold_sub(ctx, op2);
2884 
2885         op->args[1] = op->args[0];
2886         op->args[2] = arg_new_constant(ctx, -1);
2887     }
2888     op->opc = INDEX_op_add;
2889     return fold_add(ctx, op);
2890 }
2891 
2892 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2893 {
2894     TempOptInfo *t1, *t2;
2895     int borrow_out = -1;
2896 
2897     if (ctx->carry_state < 0) {
2898         return finish_folding(ctx, op);
2899     }
2900 
2901     squash_prev_borrowout(ctx, op);
2902     if (ctx->carry_state == 0) {
2903         goto do_subbo;
2904     }
2905 
2906     t1 = arg_info(op->args[1]);
2907     t2 = arg_info(op->args[2]);
2908 
2909     /* Propagate the known borrow-in into a constant, if possible. */
2910     if (ti_is_const(t2)) {
2911         uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2912         uint64_t v = ti_const_val(t2) & max;
2913 
2914         if (v < max) {
2915             op->args[2] = arg_new_constant(ctx, v + 1);
2916             goto do_subbo;
2917         }
2918         /* subtracting max + 1 produces known borrow out. */
2919         borrow_out = 1;
2920     }
2921     if (ti_is_const(t1)) {
2922         uint64_t v = ti_const_val(t1);
2923         if (v != 0) {
2924             op->args[2] = arg_new_constant(ctx, v - 1);
2925             goto do_subbo;
2926         }
2927     }
2928 
2929     /* Adjust the opcode to remember the known carry-in. */
2930     op->opc = INDEX_op_subb1o;
2931     ctx->carry_state = borrow_out;
2932     return finish_folding(ctx, op);
2933 
2934  do_subbo:
2935     op->opc = INDEX_op_subbo;
2936     return fold_subbo(ctx, op);
2937 }
2938 
2939 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2940 {
2941     TempOptInfo *t1 = arg_info(op->args[1]);
2942     TempOptInfo *t2 = arg_info(op->args[2]);
2943     int borrow_out = -1;
2944 
2945     if (ti_is_const(t2)) {
2946         uint64_t v2 = ti_const_val(t2);
2947         if (v2 == 0) {
2948             borrow_out = 0;
2949         } else if (ti_is_const(t1)) {
2950             uint64_t v1 = ti_const_val(t1);
2951             borrow_out = v1 < v2;
2952         }
2953     }
2954     ctx->carry_state = borrow_out;
2955     return finish_folding(ctx, op);
2956 }
2957 
2958 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2959 {
2960     uint64_t z_mask = -1, s_mask = 0;
2961 
2962     /* We can't do any folding with a load, but we can record bits. */
2963     switch (op->opc) {
2964     case INDEX_op_ld8s:
2965         s_mask = INT8_MIN;
2966         break;
2967     case INDEX_op_ld8u:
2968         z_mask = MAKE_64BIT_MASK(0, 8);
2969         break;
2970     case INDEX_op_ld16s:
2971         s_mask = INT16_MIN;
2972         break;
2973     case INDEX_op_ld16u:
2974         z_mask = MAKE_64BIT_MASK(0, 16);
2975         break;
2976     case INDEX_op_ld32s:
2977         s_mask = INT32_MIN;
2978         break;
2979     case INDEX_op_ld32u:
2980         z_mask = MAKE_64BIT_MASK(0, 32);
2981         break;
2982     default:
2983         g_assert_not_reached();
2984     }
2985     return fold_masks_zs(ctx, op, z_mask, s_mask);
2986 }
2987 
2988 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2989 {
2990     TCGTemp *dst, *src;
2991     intptr_t ofs;
2992     TCGType type;
2993 
2994     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2995         return finish_folding(ctx, op);
2996     }
2997 
2998     type = ctx->type;
2999     ofs = op->args[2];
3000     dst = arg_temp(op->args[0]);
3001     src = find_mem_copy_for(ctx, type, ofs);
3002     if (src && src->base_type == type) {
3003         return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3004     }
3005 
3006     reset_ts(ctx, dst);
3007     record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3008     return true;
3009 }
3010 
3011 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3012 {
3013     intptr_t ofs = op->args[2];
3014     intptr_t lm1;
3015 
3016     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3017         remove_mem_copy_all(ctx);
3018         return true;
3019     }
3020 
3021     switch (op->opc) {
3022     case INDEX_op_st8:
3023         lm1 = 0;
3024         break;
3025     case INDEX_op_st16:
3026         lm1 = 1;
3027         break;
3028     case INDEX_op_st32:
3029         lm1 = 3;
3030         break;
3031     case INDEX_op_st:
3032     case INDEX_op_st_vec:
3033         lm1 = tcg_type_size(ctx->type) - 1;
3034         break;
3035     default:
3036         g_assert_not_reached();
3037     }
3038     remove_mem_copy_in(ctx, ofs, ofs + lm1);
3039     return true;
3040 }
3041 
3042 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3043 {
3044     TCGTemp *src;
3045     intptr_t ofs, last;
3046     TCGType type;
3047 
3048     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3049         return fold_tcg_st(ctx, op);
3050     }
3051 
3052     src = arg_temp(op->args[0]);
3053     ofs = op->args[2];
3054     type = ctx->type;
3055 
3056     /*
3057      * Eliminate duplicate stores of a constant.
3058      * This happens frequently when the target ISA zero-extends.
3059      */
3060     if (ts_is_const(src)) {
3061         TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3062         if (src == prev) {
3063             tcg_op_remove(ctx->tcg, op);
3064             return true;
3065         }
3066     }
3067 
3068     last = ofs + tcg_type_size(type) - 1;
3069     remove_mem_copy_in(ctx, ofs, last);
3070     record_mem_copy(ctx, type, src, ofs, last);
3071     return true;
3072 }
3073 
3074 static bool fold_xor(OptContext *ctx, TCGOp *op)
3075 {
3076     uint64_t z_mask, o_mask, s_mask;
3077     TempOptInfo *t1, *t2;
3078 
3079     if (fold_const2_commutative(ctx, op) ||
3080         fold_xx_to_i(ctx, op, 0) ||
3081         fold_xi_to_x(ctx, op, 0) ||
3082         fold_xi_to_not(ctx, op, -1)) {
3083         return true;
3084     }
3085 
3086     t1 = arg_info(op->args[1]);
3087     t2 = arg_info(op->args[2]);
3088 
3089     z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3090     o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
3091     s_mask = t1->s_mask & t2->s_mask;
3092 
3093     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
3094 }
3095 
3096 /* Propagate constants and copies, fold constant expressions. */
3097 void tcg_optimize(TCGContext *s)
3098 {
3099     int nb_temps, i;
3100     TCGOp *op, *op_next;
3101     OptContext ctx = { .tcg = s };
3102 
3103     QSIMPLEQ_INIT(&ctx.mem_free);
3104 
3105     /* Array VALS has an element for each temp.
3106        If this temp holds a constant then its value is kept in VALS' element.
3107        If this temp is a copy of other ones then the other copies are
3108        available through the doubly linked circular list. */
3109 
3110     nb_temps = s->nb_temps;
3111     for (i = 0; i < nb_temps; ++i) {
3112         s->temps[i].state_ptr = NULL;
3113     }
3114 
3115     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3116         TCGOpcode opc = op->opc;
3117         const TCGOpDef *def;
3118         bool done = false;
3119 
3120         /* Calls are special. */
3121         if (opc == INDEX_op_call) {
3122             fold_call(&ctx, op);
3123             continue;
3124         }
3125 
3126         def = &tcg_op_defs[opc];
3127         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3128         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3129 
3130         /* Pre-compute the type of the operation. */
3131         ctx.type = TCGOP_TYPE(op);
3132 
3133         /*
3134          * Process each opcode.
3135          * Sorted alphabetically by opcode as much as possible.
3136          */
3137         switch (opc) {
3138         case INDEX_op_add:
3139             done = fold_add(&ctx, op);
3140             break;
3141         case INDEX_op_add_vec:
3142             done = fold_add_vec(&ctx, op);
3143             break;
3144         case INDEX_op_addci:
3145             done = fold_addci(&ctx, op);
3146             break;
3147         case INDEX_op_addcio:
3148             done = fold_addcio(&ctx, op);
3149             break;
3150         case INDEX_op_addco:
3151             done = fold_addco(&ctx, op);
3152             break;
3153         case INDEX_op_and:
3154         case INDEX_op_and_vec:
3155             done = fold_and(&ctx, op);
3156             break;
3157         case INDEX_op_andc:
3158         case INDEX_op_andc_vec:
3159             done = fold_andc(&ctx, op);
3160             break;
3161         case INDEX_op_brcond:
3162             done = fold_brcond(&ctx, op);
3163             break;
3164         case INDEX_op_brcond2_i32:
3165             done = fold_brcond2(&ctx, op);
3166             break;
3167         case INDEX_op_bswap16:
3168         case INDEX_op_bswap32:
3169         case INDEX_op_bswap64:
3170             done = fold_bswap(&ctx, op);
3171             break;
3172         case INDEX_op_clz:
3173         case INDEX_op_ctz:
3174             done = fold_count_zeros(&ctx, op);
3175             break;
3176         case INDEX_op_ctpop:
3177             done = fold_ctpop(&ctx, op);
3178             break;
3179         case INDEX_op_deposit:
3180             done = fold_deposit(&ctx, op);
3181             break;
3182         case INDEX_op_divs:
3183         case INDEX_op_divu:
3184             done = fold_divide(&ctx, op);
3185             break;
3186         case INDEX_op_dup_vec:
3187             done = fold_dup(&ctx, op);
3188             break;
3189         case INDEX_op_dup2_vec:
3190             done = fold_dup2(&ctx, op);
3191             break;
3192         case INDEX_op_eqv:
3193         case INDEX_op_eqv_vec:
3194             done = fold_eqv(&ctx, op);
3195             break;
3196         case INDEX_op_extract:
3197             done = fold_extract(&ctx, op);
3198             break;
3199         case INDEX_op_extract2:
3200             done = fold_extract2(&ctx, op);
3201             break;
3202         case INDEX_op_ext_i32_i64:
3203             done = fold_exts(&ctx, op);
3204             break;
3205         case INDEX_op_extu_i32_i64:
3206         case INDEX_op_extrl_i64_i32:
3207         case INDEX_op_extrh_i64_i32:
3208             done = fold_extu(&ctx, op);
3209             break;
3210         case INDEX_op_ld8s:
3211         case INDEX_op_ld8u:
3212         case INDEX_op_ld16s:
3213         case INDEX_op_ld16u:
3214         case INDEX_op_ld32s:
3215         case INDEX_op_ld32u:
3216             done = fold_tcg_ld(&ctx, op);
3217             break;
3218         case INDEX_op_ld:
3219         case INDEX_op_ld_vec:
3220             done = fold_tcg_ld_memcopy(&ctx, op);
3221             break;
3222         case INDEX_op_st8:
3223         case INDEX_op_st16:
3224         case INDEX_op_st32:
3225             done = fold_tcg_st(&ctx, op);
3226             break;
3227         case INDEX_op_st:
3228         case INDEX_op_st_vec:
3229             done = fold_tcg_st_memcopy(&ctx, op);
3230             break;
3231         case INDEX_op_mb:
3232             done = fold_mb(&ctx, op);
3233             break;
3234         case INDEX_op_mov:
3235         case INDEX_op_mov_vec:
3236             done = fold_mov(&ctx, op);
3237             break;
3238         case INDEX_op_movcond:
3239             done = fold_movcond(&ctx, op);
3240             break;
3241         case INDEX_op_mul:
3242             done = fold_mul(&ctx, op);
3243             break;
3244         case INDEX_op_mulsh:
3245         case INDEX_op_muluh:
3246             done = fold_mul_highpart(&ctx, op);
3247             break;
3248         case INDEX_op_muls2:
3249         case INDEX_op_mulu2:
3250             done = fold_multiply2(&ctx, op);
3251             break;
3252         case INDEX_op_nand:
3253         case INDEX_op_nand_vec:
3254             done = fold_nand(&ctx, op);
3255             break;
3256         case INDEX_op_neg:
3257             done = fold_neg(&ctx, op);
3258             break;
3259         case INDEX_op_nor:
3260         case INDEX_op_nor_vec:
3261             done = fold_nor(&ctx, op);
3262             break;
3263         case INDEX_op_not:
3264         case INDEX_op_not_vec:
3265             done = fold_not(&ctx, op);
3266             break;
3267         case INDEX_op_or:
3268         case INDEX_op_or_vec:
3269             done = fold_or(&ctx, op);
3270             break;
3271         case INDEX_op_orc:
3272         case INDEX_op_orc_vec:
3273             done = fold_orc(&ctx, op);
3274             break;
3275         case INDEX_op_qemu_ld:
3276             done = fold_qemu_ld_1reg(&ctx, op);
3277             break;
3278         case INDEX_op_qemu_ld2:
3279             done = fold_qemu_ld_2reg(&ctx, op);
3280             break;
3281         case INDEX_op_qemu_st:
3282         case INDEX_op_qemu_st2:
3283             done = fold_qemu_st(&ctx, op);
3284             break;
3285         case INDEX_op_rems:
3286         case INDEX_op_remu:
3287             done = fold_remainder(&ctx, op);
3288             break;
3289         case INDEX_op_rotl:
3290         case INDEX_op_rotr:
3291         case INDEX_op_sar:
3292         case INDEX_op_shl:
3293         case INDEX_op_shr:
3294             done = fold_shift(&ctx, op);
3295             break;
3296         case INDEX_op_setcond:
3297             done = fold_setcond(&ctx, op);
3298             break;
3299         case INDEX_op_negsetcond:
3300             done = fold_negsetcond(&ctx, op);
3301             break;
3302         case INDEX_op_setcond2_i32:
3303             done = fold_setcond2(&ctx, op);
3304             break;
3305         case INDEX_op_cmp_vec:
3306             done = fold_cmp_vec(&ctx, op);
3307             break;
3308         case INDEX_op_cmpsel_vec:
3309             done = fold_cmpsel_vec(&ctx, op);
3310             break;
3311         case INDEX_op_bitsel_vec:
3312             done = fold_bitsel_vec(&ctx, op);
3313             break;
3314         case INDEX_op_sextract:
3315             done = fold_sextract(&ctx, op);
3316             break;
3317         case INDEX_op_sub:
3318             done = fold_sub(&ctx, op);
3319             break;
3320         case INDEX_op_subbi:
3321             done = fold_subbi(&ctx, op);
3322             break;
3323         case INDEX_op_subbio:
3324             done = fold_subbio(&ctx, op);
3325             break;
3326         case INDEX_op_subbo:
3327             done = fold_subbo(&ctx, op);
3328             break;
3329         case INDEX_op_sub_vec:
3330             done = fold_sub_vec(&ctx, op);
3331             break;
3332         case INDEX_op_xor:
3333         case INDEX_op_xor_vec:
3334             done = fold_xor(&ctx, op);
3335             break;
3336         case INDEX_op_set_label:
3337         case INDEX_op_br:
3338         case INDEX_op_exit_tb:
3339         case INDEX_op_goto_tb:
3340         case INDEX_op_goto_ptr:
3341             finish_ebb(&ctx);
3342             done = true;
3343             break;
3344         default:
3345             done = finish_folding(&ctx, op);
3346             break;
3347         }
3348         tcg_debug_assert(done);
3349     }
3350 }
3351