xref: /openbmc/qemu/tcg/optimize.c (revision ae3c12a0)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "exec/cpu-common.h"
29 #include "tcg-op.h"
30 
31 #define CASE_OP_32_64(x)                        \
32         glue(glue(case INDEX_op_, x), _i32):    \
33         glue(glue(case INDEX_op_, x), _i64)
34 
35 #define CASE_OP_32_64_VEC(x)                    \
36         glue(glue(case INDEX_op_, x), _i32):    \
37         glue(glue(case INDEX_op_, x), _i64):    \
38         glue(glue(case INDEX_op_, x), _vec)
39 
40 struct tcg_temp_info {
41     bool is_const;
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     tcg_target_ulong val;
45     tcg_target_ulong mask;
46 };
47 
48 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
49 {
50     return ts->state_ptr;
51 }
52 
53 static inline struct tcg_temp_info *arg_info(TCGArg arg)
54 {
55     return ts_info(arg_temp(arg));
56 }
57 
58 static inline bool ts_is_const(TCGTemp *ts)
59 {
60     return ts_info(ts)->is_const;
61 }
62 
63 static inline bool arg_is_const(TCGArg arg)
64 {
65     return ts_is_const(arg_temp(arg));
66 }
67 
68 static inline bool ts_is_copy(TCGTemp *ts)
69 {
70     return ts_info(ts)->next_copy != ts;
71 }
72 
73 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
74 static void reset_ts(TCGTemp *ts)
75 {
76     struct tcg_temp_info *ti = ts_info(ts);
77     struct tcg_temp_info *pi = ts_info(ti->prev_copy);
78     struct tcg_temp_info *ni = ts_info(ti->next_copy);
79 
80     ni->prev_copy = ti->prev_copy;
81     pi->next_copy = ti->next_copy;
82     ti->next_copy = ts;
83     ti->prev_copy = ts;
84     ti->is_const = false;
85     ti->mask = -1;
86 }
87 
88 static void reset_temp(TCGArg arg)
89 {
90     reset_ts(arg_temp(arg));
91 }
92 
93 /* Initialize and activate a temporary.  */
94 static void init_ts_info(struct tcg_temp_info *infos,
95                          TCGTempSet *temps_used, TCGTemp *ts)
96 {
97     size_t idx = temp_idx(ts);
98     if (!test_bit(idx, temps_used->l)) {
99         struct tcg_temp_info *ti = &infos[idx];
100 
101         ts->state_ptr = ti;
102         ti->next_copy = ts;
103         ti->prev_copy = ts;
104         ti->is_const = false;
105         ti->mask = -1;
106         set_bit(idx, temps_used->l);
107     }
108 }
109 
110 static void init_arg_info(struct tcg_temp_info *infos,
111                           TCGTempSet *temps_used, TCGArg arg)
112 {
113     init_ts_info(infos, temps_used, arg_temp(arg));
114 }
115 
116 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
117 {
118     TCGTemp *i;
119 
120     /* If this is already a global, we can't do better. */
121     if (ts->temp_global) {
122         return ts;
123     }
124 
125     /* Search for a global first. */
126     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
127         if (i->temp_global) {
128             return i;
129         }
130     }
131 
132     /* If it is a temp, search for a temp local. */
133     if (!ts->temp_local) {
134         for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
135             if (ts->temp_local) {
136                 return i;
137             }
138         }
139     }
140 
141     /* Failure to find a better representation, return the same temp. */
142     return ts;
143 }
144 
145 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
146 {
147     TCGTemp *i;
148 
149     if (ts1 == ts2) {
150         return true;
151     }
152 
153     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
154         return false;
155     }
156 
157     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
158         if (i == ts2) {
159             return true;
160         }
161     }
162 
163     return false;
164 }
165 
166 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
167 {
168     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
169 }
170 
171 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
172 {
173     const TCGOpDef *def;
174     TCGOpcode new_op;
175     tcg_target_ulong mask;
176     struct tcg_temp_info *di = arg_info(dst);
177 
178     def = &tcg_op_defs[op->opc];
179     if (def->flags & TCG_OPF_VECTOR) {
180         new_op = INDEX_op_dupi_vec;
181     } else if (def->flags & TCG_OPF_64BIT) {
182         new_op = INDEX_op_movi_i64;
183     } else {
184         new_op = INDEX_op_movi_i32;
185     }
186     op->opc = new_op;
187     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
188     op->args[0] = dst;
189     op->args[1] = val;
190 
191     reset_temp(dst);
192     di->is_const = true;
193     di->val = val;
194     mask = val;
195     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
196         /* High bits of the destination are now garbage.  */
197         mask |= ~0xffffffffull;
198     }
199     di->mask = mask;
200 }
201 
202 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
203 {
204     TCGTemp *dst_ts = arg_temp(dst);
205     TCGTemp *src_ts = arg_temp(src);
206     const TCGOpDef *def;
207     struct tcg_temp_info *di;
208     struct tcg_temp_info *si;
209     tcg_target_ulong mask;
210     TCGOpcode new_op;
211 
212     if (ts_are_copies(dst_ts, src_ts)) {
213         tcg_op_remove(s, op);
214         return;
215     }
216 
217     reset_ts(dst_ts);
218     di = ts_info(dst_ts);
219     si = ts_info(src_ts);
220     def = &tcg_op_defs[op->opc];
221     if (def->flags & TCG_OPF_VECTOR) {
222         new_op = INDEX_op_mov_vec;
223     } else if (def->flags & TCG_OPF_64BIT) {
224         new_op = INDEX_op_mov_i64;
225     } else {
226         new_op = INDEX_op_mov_i32;
227     }
228     op->opc = new_op;
229     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
230     op->args[0] = dst;
231     op->args[1] = src;
232 
233     mask = si->mask;
234     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
235         /* High bits of the destination are now garbage.  */
236         mask |= ~0xffffffffull;
237     }
238     di->mask = mask;
239 
240     if (src_ts->type == dst_ts->type) {
241         struct tcg_temp_info *ni = ts_info(si->next_copy);
242 
243         di->next_copy = si->next_copy;
244         di->prev_copy = src_ts;
245         ni->prev_copy = dst_ts;
246         si->next_copy = dst_ts;
247         di->is_const = si->is_const;
248         di->val = si->val;
249     }
250 }
251 
252 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
253 {
254     uint64_t l64, h64;
255 
256     switch (op) {
257     CASE_OP_32_64(add):
258         return x + y;
259 
260     CASE_OP_32_64(sub):
261         return x - y;
262 
263     CASE_OP_32_64(mul):
264         return x * y;
265 
266     CASE_OP_32_64(and):
267         return x & y;
268 
269     CASE_OP_32_64(or):
270         return x | y;
271 
272     CASE_OP_32_64(xor):
273         return x ^ y;
274 
275     case INDEX_op_shl_i32:
276         return (uint32_t)x << (y & 31);
277 
278     case INDEX_op_shl_i64:
279         return (uint64_t)x << (y & 63);
280 
281     case INDEX_op_shr_i32:
282         return (uint32_t)x >> (y & 31);
283 
284     case INDEX_op_shr_i64:
285         return (uint64_t)x >> (y & 63);
286 
287     case INDEX_op_sar_i32:
288         return (int32_t)x >> (y & 31);
289 
290     case INDEX_op_sar_i64:
291         return (int64_t)x >> (y & 63);
292 
293     case INDEX_op_rotr_i32:
294         return ror32(x, y & 31);
295 
296     case INDEX_op_rotr_i64:
297         return ror64(x, y & 63);
298 
299     case INDEX_op_rotl_i32:
300         return rol32(x, y & 31);
301 
302     case INDEX_op_rotl_i64:
303         return rol64(x, y & 63);
304 
305     CASE_OP_32_64(not):
306         return ~x;
307 
308     CASE_OP_32_64(neg):
309         return -x;
310 
311     CASE_OP_32_64(andc):
312         return x & ~y;
313 
314     CASE_OP_32_64(orc):
315         return x | ~y;
316 
317     CASE_OP_32_64(eqv):
318         return ~(x ^ y);
319 
320     CASE_OP_32_64(nand):
321         return ~(x & y);
322 
323     CASE_OP_32_64(nor):
324         return ~(x | y);
325 
326     case INDEX_op_clz_i32:
327         return (uint32_t)x ? clz32(x) : y;
328 
329     case INDEX_op_clz_i64:
330         return x ? clz64(x) : y;
331 
332     case INDEX_op_ctz_i32:
333         return (uint32_t)x ? ctz32(x) : y;
334 
335     case INDEX_op_ctz_i64:
336         return x ? ctz64(x) : y;
337 
338     case INDEX_op_ctpop_i32:
339         return ctpop32(x);
340 
341     case INDEX_op_ctpop_i64:
342         return ctpop64(x);
343 
344     CASE_OP_32_64(ext8s):
345         return (int8_t)x;
346 
347     CASE_OP_32_64(ext16s):
348         return (int16_t)x;
349 
350     CASE_OP_32_64(ext8u):
351         return (uint8_t)x;
352 
353     CASE_OP_32_64(ext16u):
354         return (uint16_t)x;
355 
356     CASE_OP_32_64(bswap16):
357         return bswap16(x);
358 
359     CASE_OP_32_64(bswap32):
360         return bswap32(x);
361 
362     case INDEX_op_bswap64_i64:
363         return bswap64(x);
364 
365     case INDEX_op_ext_i32_i64:
366     case INDEX_op_ext32s_i64:
367         return (int32_t)x;
368 
369     case INDEX_op_extu_i32_i64:
370     case INDEX_op_extrl_i64_i32:
371     case INDEX_op_ext32u_i64:
372         return (uint32_t)x;
373 
374     case INDEX_op_extrh_i64_i32:
375         return (uint64_t)x >> 32;
376 
377     case INDEX_op_muluh_i32:
378         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
379     case INDEX_op_mulsh_i32:
380         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
381 
382     case INDEX_op_muluh_i64:
383         mulu64(&l64, &h64, x, y);
384         return h64;
385     case INDEX_op_mulsh_i64:
386         muls64(&l64, &h64, x, y);
387         return h64;
388 
389     case INDEX_op_div_i32:
390         /* Avoid crashing on divide by zero, otherwise undefined.  */
391         return (int32_t)x / ((int32_t)y ? : 1);
392     case INDEX_op_divu_i32:
393         return (uint32_t)x / ((uint32_t)y ? : 1);
394     case INDEX_op_div_i64:
395         return (int64_t)x / ((int64_t)y ? : 1);
396     case INDEX_op_divu_i64:
397         return (uint64_t)x / ((uint64_t)y ? : 1);
398 
399     case INDEX_op_rem_i32:
400         return (int32_t)x % ((int32_t)y ? : 1);
401     case INDEX_op_remu_i32:
402         return (uint32_t)x % ((uint32_t)y ? : 1);
403     case INDEX_op_rem_i64:
404         return (int64_t)x % ((int64_t)y ? : 1);
405     case INDEX_op_remu_i64:
406         return (uint64_t)x % ((uint64_t)y ? : 1);
407 
408     default:
409         fprintf(stderr,
410                 "Unrecognized operation %d in do_constant_folding.\n", op);
411         tcg_abort();
412     }
413 }
414 
415 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
416 {
417     const TCGOpDef *def = &tcg_op_defs[op];
418     TCGArg res = do_constant_folding_2(op, x, y);
419     if (!(def->flags & TCG_OPF_64BIT)) {
420         res = (int32_t)res;
421     }
422     return res;
423 }
424 
425 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
426 {
427     switch (c) {
428     case TCG_COND_EQ:
429         return x == y;
430     case TCG_COND_NE:
431         return x != y;
432     case TCG_COND_LT:
433         return (int32_t)x < (int32_t)y;
434     case TCG_COND_GE:
435         return (int32_t)x >= (int32_t)y;
436     case TCG_COND_LE:
437         return (int32_t)x <= (int32_t)y;
438     case TCG_COND_GT:
439         return (int32_t)x > (int32_t)y;
440     case TCG_COND_LTU:
441         return x < y;
442     case TCG_COND_GEU:
443         return x >= y;
444     case TCG_COND_LEU:
445         return x <= y;
446     case TCG_COND_GTU:
447         return x > y;
448     default:
449         tcg_abort();
450     }
451 }
452 
453 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
454 {
455     switch (c) {
456     case TCG_COND_EQ:
457         return x == y;
458     case TCG_COND_NE:
459         return x != y;
460     case TCG_COND_LT:
461         return (int64_t)x < (int64_t)y;
462     case TCG_COND_GE:
463         return (int64_t)x >= (int64_t)y;
464     case TCG_COND_LE:
465         return (int64_t)x <= (int64_t)y;
466     case TCG_COND_GT:
467         return (int64_t)x > (int64_t)y;
468     case TCG_COND_LTU:
469         return x < y;
470     case TCG_COND_GEU:
471         return x >= y;
472     case TCG_COND_LEU:
473         return x <= y;
474     case TCG_COND_GTU:
475         return x > y;
476     default:
477         tcg_abort();
478     }
479 }
480 
481 static bool do_constant_folding_cond_eq(TCGCond c)
482 {
483     switch (c) {
484     case TCG_COND_GT:
485     case TCG_COND_LTU:
486     case TCG_COND_LT:
487     case TCG_COND_GTU:
488     case TCG_COND_NE:
489         return 0;
490     case TCG_COND_GE:
491     case TCG_COND_GEU:
492     case TCG_COND_LE:
493     case TCG_COND_LEU:
494     case TCG_COND_EQ:
495         return 1;
496     default:
497         tcg_abort();
498     }
499 }
500 
501 /* Return 2 if the condition can't be simplified, and the result
502    of the condition (0 or 1) if it can */
503 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
504                                        TCGArg y, TCGCond c)
505 {
506     tcg_target_ulong xv = arg_info(x)->val;
507     tcg_target_ulong yv = arg_info(y)->val;
508     if (arg_is_const(x) && arg_is_const(y)) {
509         const TCGOpDef *def = &tcg_op_defs[op];
510         tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
511         if (def->flags & TCG_OPF_64BIT) {
512             return do_constant_folding_cond_64(xv, yv, c);
513         } else {
514             return do_constant_folding_cond_32(xv, yv, c);
515         }
516     } else if (args_are_copies(x, y)) {
517         return do_constant_folding_cond_eq(c);
518     } else if (arg_is_const(y) && yv == 0) {
519         switch (c) {
520         case TCG_COND_LTU:
521             return 0;
522         case TCG_COND_GEU:
523             return 1;
524         default:
525             return 2;
526         }
527     }
528     return 2;
529 }
530 
531 /* Return 2 if the condition can't be simplified, and the result
532    of the condition (0 or 1) if it can */
533 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
534 {
535     TCGArg al = p1[0], ah = p1[1];
536     TCGArg bl = p2[0], bh = p2[1];
537 
538     if (arg_is_const(bl) && arg_is_const(bh)) {
539         tcg_target_ulong blv = arg_info(bl)->val;
540         tcg_target_ulong bhv = arg_info(bh)->val;
541         uint64_t b = deposit64(blv, 32, 32, bhv);
542 
543         if (arg_is_const(al) && arg_is_const(ah)) {
544             tcg_target_ulong alv = arg_info(al)->val;
545             tcg_target_ulong ahv = arg_info(ah)->val;
546             uint64_t a = deposit64(alv, 32, 32, ahv);
547             return do_constant_folding_cond_64(a, b, c);
548         }
549         if (b == 0) {
550             switch (c) {
551             case TCG_COND_LTU:
552                 return 0;
553             case TCG_COND_GEU:
554                 return 1;
555             default:
556                 break;
557             }
558         }
559     }
560     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
561         return do_constant_folding_cond_eq(c);
562     }
563     return 2;
564 }
565 
566 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
567 {
568     TCGArg a1 = *p1, a2 = *p2;
569     int sum = 0;
570     sum += arg_is_const(a1);
571     sum -= arg_is_const(a2);
572 
573     /* Prefer the constant in second argument, and then the form
574        op a, a, b, which is better handled on non-RISC hosts. */
575     if (sum > 0 || (sum == 0 && dest == a2)) {
576         *p1 = a2;
577         *p2 = a1;
578         return true;
579     }
580     return false;
581 }
582 
583 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
584 {
585     int sum = 0;
586     sum += arg_is_const(p1[0]);
587     sum += arg_is_const(p1[1]);
588     sum -= arg_is_const(p2[0]);
589     sum -= arg_is_const(p2[1]);
590     if (sum > 0) {
591         TCGArg t;
592         t = p1[0], p1[0] = p2[0], p2[0] = t;
593         t = p1[1], p1[1] = p2[1], p2[1] = t;
594         return true;
595     }
596     return false;
597 }
598 
599 /* Propagate constants and copies, fold constant expressions. */
600 void tcg_optimize(TCGContext *s)
601 {
602     int nb_temps, nb_globals;
603     TCGOp *op, *op_next, *prev_mb = NULL;
604     struct tcg_temp_info *infos;
605     TCGTempSet temps_used;
606 
607     /* Array VALS has an element for each temp.
608        If this temp holds a constant then its value is kept in VALS' element.
609        If this temp is a copy of other ones then the other copies are
610        available through the doubly linked circular list. */
611 
612     nb_temps = s->nb_temps;
613     nb_globals = s->nb_globals;
614     bitmap_zero(temps_used.l, nb_temps);
615     infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
616 
617     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
618         tcg_target_ulong mask, partmask, affected;
619         int nb_oargs, nb_iargs, i;
620         TCGArg tmp;
621         TCGOpcode opc = op->opc;
622         const TCGOpDef *def = &tcg_op_defs[opc];
623 
624         /* Count the arguments, and initialize the temps that are
625            going to be used */
626         if (opc == INDEX_op_call) {
627             nb_oargs = TCGOP_CALLO(op);
628             nb_iargs = TCGOP_CALLI(op);
629             for (i = 0; i < nb_oargs + nb_iargs; i++) {
630                 TCGTemp *ts = arg_temp(op->args[i]);
631                 if (ts) {
632                     init_ts_info(infos, &temps_used, ts);
633                 }
634             }
635         } else {
636             nb_oargs = def->nb_oargs;
637             nb_iargs = def->nb_iargs;
638             for (i = 0; i < nb_oargs + nb_iargs; i++) {
639                 init_arg_info(infos, &temps_used, op->args[i]);
640             }
641         }
642 
643         /* Do copy propagation */
644         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
645             TCGTemp *ts = arg_temp(op->args[i]);
646             if (ts && ts_is_copy(ts)) {
647                 op->args[i] = temp_arg(find_better_copy(s, ts));
648             }
649         }
650 
651         /* For commutative operations make constant second argument */
652         switch (opc) {
653         CASE_OP_32_64_VEC(add):
654         CASE_OP_32_64_VEC(mul):
655         CASE_OP_32_64_VEC(and):
656         CASE_OP_32_64_VEC(or):
657         CASE_OP_32_64_VEC(xor):
658         CASE_OP_32_64(eqv):
659         CASE_OP_32_64(nand):
660         CASE_OP_32_64(nor):
661         CASE_OP_32_64(muluh):
662         CASE_OP_32_64(mulsh):
663             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
664             break;
665         CASE_OP_32_64(brcond):
666             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
667                 op->args[2] = tcg_swap_cond(op->args[2]);
668             }
669             break;
670         CASE_OP_32_64(setcond):
671             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
672                 op->args[3] = tcg_swap_cond(op->args[3]);
673             }
674             break;
675         CASE_OP_32_64(movcond):
676             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
677                 op->args[5] = tcg_swap_cond(op->args[5]);
678             }
679             /* For movcond, we canonicalize the "false" input reg to match
680                the destination reg so that the tcg backend can implement
681                a "move if true" operation.  */
682             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
683                 op->args[5] = tcg_invert_cond(op->args[5]);
684             }
685             break;
686         CASE_OP_32_64(add2):
687             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
688             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
689             break;
690         CASE_OP_32_64(mulu2):
691         CASE_OP_32_64(muls2):
692             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
693             break;
694         case INDEX_op_brcond2_i32:
695             if (swap_commutative2(&op->args[0], &op->args[2])) {
696                 op->args[4] = tcg_swap_cond(op->args[4]);
697             }
698             break;
699         case INDEX_op_setcond2_i32:
700             if (swap_commutative2(&op->args[1], &op->args[3])) {
701                 op->args[5] = tcg_swap_cond(op->args[5]);
702             }
703             break;
704         default:
705             break;
706         }
707 
708         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
709            and "sub r, 0, a => neg r, a" case.  */
710         switch (opc) {
711         CASE_OP_32_64(shl):
712         CASE_OP_32_64(shr):
713         CASE_OP_32_64(sar):
714         CASE_OP_32_64(rotl):
715         CASE_OP_32_64(rotr):
716             if (arg_is_const(op->args[1])
717                 && arg_info(op->args[1])->val == 0) {
718                 tcg_opt_gen_movi(s, op, op->args[0], 0);
719                 continue;
720             }
721             break;
722         CASE_OP_32_64_VEC(sub):
723             {
724                 TCGOpcode neg_op;
725                 bool have_neg;
726 
727                 if (arg_is_const(op->args[2])) {
728                     /* Proceed with possible constant folding. */
729                     break;
730                 }
731                 if (opc == INDEX_op_sub_i32) {
732                     neg_op = INDEX_op_neg_i32;
733                     have_neg = TCG_TARGET_HAS_neg_i32;
734                 } else if (opc == INDEX_op_sub_i64) {
735                     neg_op = INDEX_op_neg_i64;
736                     have_neg = TCG_TARGET_HAS_neg_i64;
737                 } else if (TCG_TARGET_HAS_neg_vec) {
738                     TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
739                     unsigned vece = TCGOP_VECE(op);
740                     neg_op = INDEX_op_neg_vec;
741                     have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
742                 } else {
743                     break;
744                 }
745                 if (!have_neg) {
746                     break;
747                 }
748                 if (arg_is_const(op->args[1])
749                     && arg_info(op->args[1])->val == 0) {
750                     op->opc = neg_op;
751                     reset_temp(op->args[0]);
752                     op->args[1] = op->args[2];
753                     continue;
754                 }
755             }
756             break;
757         CASE_OP_32_64_VEC(xor):
758         CASE_OP_32_64(nand):
759             if (!arg_is_const(op->args[1])
760                 && arg_is_const(op->args[2])
761                 && arg_info(op->args[2])->val == -1) {
762                 i = 1;
763                 goto try_not;
764             }
765             break;
766         CASE_OP_32_64(nor):
767             if (!arg_is_const(op->args[1])
768                 && arg_is_const(op->args[2])
769                 && arg_info(op->args[2])->val == 0) {
770                 i = 1;
771                 goto try_not;
772             }
773             break;
774         CASE_OP_32_64_VEC(andc):
775             if (!arg_is_const(op->args[2])
776                 && arg_is_const(op->args[1])
777                 && arg_info(op->args[1])->val == -1) {
778                 i = 2;
779                 goto try_not;
780             }
781             break;
782         CASE_OP_32_64_VEC(orc):
783         CASE_OP_32_64(eqv):
784             if (!arg_is_const(op->args[2])
785                 && arg_is_const(op->args[1])
786                 && arg_info(op->args[1])->val == 0) {
787                 i = 2;
788                 goto try_not;
789             }
790             break;
791         try_not:
792             {
793                 TCGOpcode not_op;
794                 bool have_not;
795 
796                 if (def->flags & TCG_OPF_VECTOR) {
797                     not_op = INDEX_op_not_vec;
798                     have_not = TCG_TARGET_HAS_not_vec;
799                 } else if (def->flags & TCG_OPF_64BIT) {
800                     not_op = INDEX_op_not_i64;
801                     have_not = TCG_TARGET_HAS_not_i64;
802                 } else {
803                     not_op = INDEX_op_not_i32;
804                     have_not = TCG_TARGET_HAS_not_i32;
805                 }
806                 if (!have_not) {
807                     break;
808                 }
809                 op->opc = not_op;
810                 reset_temp(op->args[0]);
811                 op->args[1] = op->args[i];
812                 continue;
813             }
814         default:
815             break;
816         }
817 
818         /* Simplify expression for "op r, a, const => mov r, a" cases */
819         switch (opc) {
820         CASE_OP_32_64_VEC(add):
821         CASE_OP_32_64_VEC(sub):
822         CASE_OP_32_64_VEC(or):
823         CASE_OP_32_64_VEC(xor):
824         CASE_OP_32_64_VEC(andc):
825         CASE_OP_32_64(shl):
826         CASE_OP_32_64(shr):
827         CASE_OP_32_64(sar):
828         CASE_OP_32_64(rotl):
829         CASE_OP_32_64(rotr):
830             if (!arg_is_const(op->args[1])
831                 && arg_is_const(op->args[2])
832                 && arg_info(op->args[2])->val == 0) {
833                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
834                 continue;
835             }
836             break;
837         CASE_OP_32_64_VEC(and):
838         CASE_OP_32_64_VEC(orc):
839         CASE_OP_32_64(eqv):
840             if (!arg_is_const(op->args[1])
841                 && arg_is_const(op->args[2])
842                 && arg_info(op->args[2])->val == -1) {
843                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
844                 continue;
845             }
846             break;
847         default:
848             break;
849         }
850 
851         /* Simplify using known-zero bits. Currently only ops with a single
852            output argument is supported. */
853         mask = -1;
854         affected = -1;
855         switch (opc) {
856         CASE_OP_32_64(ext8s):
857             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
858                 break;
859             }
860         CASE_OP_32_64(ext8u):
861             mask = 0xff;
862             goto and_const;
863         CASE_OP_32_64(ext16s):
864             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
865                 break;
866             }
867         CASE_OP_32_64(ext16u):
868             mask = 0xffff;
869             goto and_const;
870         case INDEX_op_ext32s_i64:
871             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
872                 break;
873             }
874         case INDEX_op_ext32u_i64:
875             mask = 0xffffffffU;
876             goto and_const;
877 
878         CASE_OP_32_64(and):
879             mask = arg_info(op->args[2])->mask;
880             if (arg_is_const(op->args[2])) {
881         and_const:
882                 affected = arg_info(op->args[1])->mask & ~mask;
883             }
884             mask = arg_info(op->args[1])->mask & mask;
885             break;
886 
887         case INDEX_op_ext_i32_i64:
888             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
889                 break;
890             }
891         case INDEX_op_extu_i32_i64:
892             /* We do not compute affected as it is a size changing op.  */
893             mask = (uint32_t)arg_info(op->args[1])->mask;
894             break;
895 
896         CASE_OP_32_64(andc):
897             /* Known-zeros does not imply known-ones.  Therefore unless
898                op->args[2] is constant, we can't infer anything from it.  */
899             if (arg_is_const(op->args[2])) {
900                 mask = ~arg_info(op->args[2])->mask;
901                 goto and_const;
902             }
903             /* But we certainly know nothing outside args[1] may be set. */
904             mask = arg_info(op->args[1])->mask;
905             break;
906 
907         case INDEX_op_sar_i32:
908             if (arg_is_const(op->args[2])) {
909                 tmp = arg_info(op->args[2])->val & 31;
910                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
911             }
912             break;
913         case INDEX_op_sar_i64:
914             if (arg_is_const(op->args[2])) {
915                 tmp = arg_info(op->args[2])->val & 63;
916                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
917             }
918             break;
919 
920         case INDEX_op_shr_i32:
921             if (arg_is_const(op->args[2])) {
922                 tmp = arg_info(op->args[2])->val & 31;
923                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
924             }
925             break;
926         case INDEX_op_shr_i64:
927             if (arg_is_const(op->args[2])) {
928                 tmp = arg_info(op->args[2])->val & 63;
929                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
930             }
931             break;
932 
933         case INDEX_op_extrl_i64_i32:
934             mask = (uint32_t)arg_info(op->args[1])->mask;
935             break;
936         case INDEX_op_extrh_i64_i32:
937             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
938             break;
939 
940         CASE_OP_32_64(shl):
941             if (arg_is_const(op->args[2])) {
942                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
943                 mask = arg_info(op->args[1])->mask << tmp;
944             }
945             break;
946 
947         CASE_OP_32_64(neg):
948             /* Set to 1 all bits to the left of the rightmost.  */
949             mask = -(arg_info(op->args[1])->mask
950                      & -arg_info(op->args[1])->mask);
951             break;
952 
953         CASE_OP_32_64(deposit):
954             mask = deposit64(arg_info(op->args[1])->mask,
955                              op->args[3], op->args[4],
956                              arg_info(op->args[2])->mask);
957             break;
958 
959         CASE_OP_32_64(extract):
960             mask = extract64(arg_info(op->args[1])->mask,
961                              op->args[2], op->args[3]);
962             if (op->args[2] == 0) {
963                 affected = arg_info(op->args[1])->mask & ~mask;
964             }
965             break;
966         CASE_OP_32_64(sextract):
967             mask = sextract64(arg_info(op->args[1])->mask,
968                               op->args[2], op->args[3]);
969             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
970                 affected = arg_info(op->args[1])->mask & ~mask;
971             }
972             break;
973 
974         CASE_OP_32_64(or):
975         CASE_OP_32_64(xor):
976             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
977             break;
978 
979         case INDEX_op_clz_i32:
980         case INDEX_op_ctz_i32:
981             mask = arg_info(op->args[2])->mask | 31;
982             break;
983 
984         case INDEX_op_clz_i64:
985         case INDEX_op_ctz_i64:
986             mask = arg_info(op->args[2])->mask | 63;
987             break;
988 
989         case INDEX_op_ctpop_i32:
990             mask = 32 | 31;
991             break;
992         case INDEX_op_ctpop_i64:
993             mask = 64 | 63;
994             break;
995 
996         CASE_OP_32_64(setcond):
997         case INDEX_op_setcond2_i32:
998             mask = 1;
999             break;
1000 
1001         CASE_OP_32_64(movcond):
1002             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1003             break;
1004 
1005         CASE_OP_32_64(ld8u):
1006             mask = 0xff;
1007             break;
1008         CASE_OP_32_64(ld16u):
1009             mask = 0xffff;
1010             break;
1011         case INDEX_op_ld32u_i64:
1012             mask = 0xffffffffu;
1013             break;
1014 
1015         CASE_OP_32_64(qemu_ld):
1016             {
1017                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1018                 TCGMemOp mop = get_memop(oi);
1019                 if (!(mop & MO_SIGN)) {
1020                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1021                 }
1022             }
1023             break;
1024 
1025         default:
1026             break;
1027         }
1028 
1029         /* 32-bit ops generate 32-bit results.  For the result is zero test
1030            below, we can ignore high bits, but for further optimizations we
1031            need to record that the high bits contain garbage.  */
1032         partmask = mask;
1033         if (!(def->flags & TCG_OPF_64BIT)) {
1034             mask |= ~(tcg_target_ulong)0xffffffffu;
1035             partmask &= 0xffffffffu;
1036             affected &= 0xffffffffu;
1037         }
1038 
1039         if (partmask == 0) {
1040             tcg_debug_assert(nb_oargs == 1);
1041             tcg_opt_gen_movi(s, op, op->args[0], 0);
1042             continue;
1043         }
1044         if (affected == 0) {
1045             tcg_debug_assert(nb_oargs == 1);
1046             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1047             continue;
1048         }
1049 
1050         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1051         switch (opc) {
1052         CASE_OP_32_64_VEC(and):
1053         CASE_OP_32_64_VEC(mul):
1054         CASE_OP_32_64(muluh):
1055         CASE_OP_32_64(mulsh):
1056             if (arg_is_const(op->args[2])
1057                 && arg_info(op->args[2])->val == 0) {
1058                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1059                 continue;
1060             }
1061             break;
1062         default:
1063             break;
1064         }
1065 
1066         /* Simplify expression for "op r, a, a => mov r, a" cases */
1067         switch (opc) {
1068         CASE_OP_32_64_VEC(or):
1069         CASE_OP_32_64_VEC(and):
1070             if (args_are_copies(op->args[1], op->args[2])) {
1071                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1072                 continue;
1073             }
1074             break;
1075         default:
1076             break;
1077         }
1078 
1079         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1080         switch (opc) {
1081         CASE_OP_32_64_VEC(andc):
1082         CASE_OP_32_64_VEC(sub):
1083         CASE_OP_32_64_VEC(xor):
1084             if (args_are_copies(op->args[1], op->args[2])) {
1085                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1086                 continue;
1087             }
1088             break;
1089         default:
1090             break;
1091         }
1092 
1093         /* Propagate constants through copy operations and do constant
1094            folding.  Constants will be substituted to arguments by register
1095            allocator where needed and possible.  Also detect copies. */
1096         switch (opc) {
1097         CASE_OP_32_64_VEC(mov):
1098             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1099             break;
1100         CASE_OP_32_64(movi):
1101         case INDEX_op_dupi_vec:
1102             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1103             break;
1104 
1105         case INDEX_op_dup_vec:
1106             if (arg_is_const(op->args[1])) {
1107                 tmp = arg_info(op->args[1])->val;
1108                 tmp = dup_const(TCGOP_VECE(op), tmp);
1109                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1110                 break;
1111             }
1112             goto do_default;
1113 
1114         CASE_OP_32_64(not):
1115         CASE_OP_32_64(neg):
1116         CASE_OP_32_64(ext8s):
1117         CASE_OP_32_64(ext8u):
1118         CASE_OP_32_64(ext16s):
1119         CASE_OP_32_64(ext16u):
1120         CASE_OP_32_64(ctpop):
1121         CASE_OP_32_64(bswap16):
1122         CASE_OP_32_64(bswap32):
1123         case INDEX_op_bswap64_i64:
1124         case INDEX_op_ext32s_i64:
1125         case INDEX_op_ext32u_i64:
1126         case INDEX_op_ext_i32_i64:
1127         case INDEX_op_extu_i32_i64:
1128         case INDEX_op_extrl_i64_i32:
1129         case INDEX_op_extrh_i64_i32:
1130             if (arg_is_const(op->args[1])) {
1131                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1132                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1133                 break;
1134             }
1135             goto do_default;
1136 
1137         CASE_OP_32_64(add):
1138         CASE_OP_32_64(sub):
1139         CASE_OP_32_64(mul):
1140         CASE_OP_32_64(or):
1141         CASE_OP_32_64(and):
1142         CASE_OP_32_64(xor):
1143         CASE_OP_32_64(shl):
1144         CASE_OP_32_64(shr):
1145         CASE_OP_32_64(sar):
1146         CASE_OP_32_64(rotl):
1147         CASE_OP_32_64(rotr):
1148         CASE_OP_32_64(andc):
1149         CASE_OP_32_64(orc):
1150         CASE_OP_32_64(eqv):
1151         CASE_OP_32_64(nand):
1152         CASE_OP_32_64(nor):
1153         CASE_OP_32_64(muluh):
1154         CASE_OP_32_64(mulsh):
1155         CASE_OP_32_64(div):
1156         CASE_OP_32_64(divu):
1157         CASE_OP_32_64(rem):
1158         CASE_OP_32_64(remu):
1159             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1160                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1161                                           arg_info(op->args[2])->val);
1162                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1163                 break;
1164             }
1165             goto do_default;
1166 
1167         CASE_OP_32_64(clz):
1168         CASE_OP_32_64(ctz):
1169             if (arg_is_const(op->args[1])) {
1170                 TCGArg v = arg_info(op->args[1])->val;
1171                 if (v != 0) {
1172                     tmp = do_constant_folding(opc, v, 0);
1173                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1174                 } else {
1175                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1176                 }
1177                 break;
1178             }
1179             goto do_default;
1180 
1181         CASE_OP_32_64(deposit):
1182             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1183                 tmp = deposit64(arg_info(op->args[1])->val,
1184                                 op->args[3], op->args[4],
1185                                 arg_info(op->args[2])->val);
1186                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1187                 break;
1188             }
1189             goto do_default;
1190 
1191         CASE_OP_32_64(extract):
1192             if (arg_is_const(op->args[1])) {
1193                 tmp = extract64(arg_info(op->args[1])->val,
1194                                 op->args[2], op->args[3]);
1195                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1196                 break;
1197             }
1198             goto do_default;
1199 
1200         CASE_OP_32_64(sextract):
1201             if (arg_is_const(op->args[1])) {
1202                 tmp = sextract64(arg_info(op->args[1])->val,
1203                                  op->args[2], op->args[3]);
1204                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1205                 break;
1206             }
1207             goto do_default;
1208 
1209         CASE_OP_32_64(extract2):
1210             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1211                 TCGArg v1 = arg_info(op->args[1])->val;
1212                 TCGArg v2 = arg_info(op->args[2])->val;
1213 
1214                 if (opc == INDEX_op_extract2_i64) {
1215                     tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
1216                 } else {
1217                     tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3]));
1218                     tmp = (int32_t)tmp;
1219                 }
1220                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1221                 break;
1222             }
1223             goto do_default;
1224 
1225         CASE_OP_32_64(setcond):
1226             tmp = do_constant_folding_cond(opc, op->args[1],
1227                                            op->args[2], op->args[3]);
1228             if (tmp != 2) {
1229                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1230                 break;
1231             }
1232             goto do_default;
1233 
1234         CASE_OP_32_64(brcond):
1235             tmp = do_constant_folding_cond(opc, op->args[0],
1236                                            op->args[1], op->args[2]);
1237             if (tmp != 2) {
1238                 if (tmp) {
1239                     bitmap_zero(temps_used.l, nb_temps);
1240                     op->opc = INDEX_op_br;
1241                     op->args[0] = op->args[3];
1242                 } else {
1243                     tcg_op_remove(s, op);
1244                 }
1245                 break;
1246             }
1247             goto do_default;
1248 
1249         CASE_OP_32_64(movcond):
1250             tmp = do_constant_folding_cond(opc, op->args[1],
1251                                            op->args[2], op->args[5]);
1252             if (tmp != 2) {
1253                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1254                 break;
1255             }
1256             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1257                 tcg_target_ulong tv = arg_info(op->args[3])->val;
1258                 tcg_target_ulong fv = arg_info(op->args[4])->val;
1259                 TCGCond cond = op->args[5];
1260                 if (fv == 1 && tv == 0) {
1261                     cond = tcg_invert_cond(cond);
1262                 } else if (!(tv == 1 && fv == 0)) {
1263                     goto do_default;
1264                 }
1265                 op->args[3] = cond;
1266                 op->opc = opc = (opc == INDEX_op_movcond_i32
1267                                  ? INDEX_op_setcond_i32
1268                                  : INDEX_op_setcond_i64);
1269                 nb_iargs = 2;
1270             }
1271             goto do_default;
1272 
1273         case INDEX_op_add2_i32:
1274         case INDEX_op_sub2_i32:
1275             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1276                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1277                 uint32_t al = arg_info(op->args[2])->val;
1278                 uint32_t ah = arg_info(op->args[3])->val;
1279                 uint32_t bl = arg_info(op->args[4])->val;
1280                 uint32_t bh = arg_info(op->args[5])->val;
1281                 uint64_t a = ((uint64_t)ah << 32) | al;
1282                 uint64_t b = ((uint64_t)bh << 32) | bl;
1283                 TCGArg rl, rh;
1284                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1285 
1286                 if (opc == INDEX_op_add2_i32) {
1287                     a += b;
1288                 } else {
1289                     a -= b;
1290                 }
1291 
1292                 rl = op->args[0];
1293                 rh = op->args[1];
1294                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1295                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1296                 break;
1297             }
1298             goto do_default;
1299 
1300         case INDEX_op_mulu2_i32:
1301             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1302                 uint32_t a = arg_info(op->args[2])->val;
1303                 uint32_t b = arg_info(op->args[3])->val;
1304                 uint64_t r = (uint64_t)a * b;
1305                 TCGArg rl, rh;
1306                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1307 
1308                 rl = op->args[0];
1309                 rh = op->args[1];
1310                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1311                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1312                 break;
1313             }
1314             goto do_default;
1315 
1316         case INDEX_op_brcond2_i32:
1317             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1318                                             op->args[4]);
1319             if (tmp != 2) {
1320                 if (tmp) {
1321             do_brcond_true:
1322                     bitmap_zero(temps_used.l, nb_temps);
1323                     op->opc = INDEX_op_br;
1324                     op->args[0] = op->args[5];
1325                 } else {
1326             do_brcond_false:
1327                     tcg_op_remove(s, op);
1328                 }
1329             } else if ((op->args[4] == TCG_COND_LT
1330                         || op->args[4] == TCG_COND_GE)
1331                        && arg_is_const(op->args[2])
1332                        && arg_info(op->args[2])->val == 0
1333                        && arg_is_const(op->args[3])
1334                        && arg_info(op->args[3])->val == 0) {
1335                 /* Simplify LT/GE comparisons vs zero to a single compare
1336                    vs the high word of the input.  */
1337             do_brcond_high:
1338                 bitmap_zero(temps_used.l, nb_temps);
1339                 op->opc = INDEX_op_brcond_i32;
1340                 op->args[0] = op->args[1];
1341                 op->args[1] = op->args[3];
1342                 op->args[2] = op->args[4];
1343                 op->args[3] = op->args[5];
1344             } else if (op->args[4] == TCG_COND_EQ) {
1345                 /* Simplify EQ comparisons where one of the pairs
1346                    can be simplified.  */
1347                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1348                                                op->args[0], op->args[2],
1349                                                TCG_COND_EQ);
1350                 if (tmp == 0) {
1351                     goto do_brcond_false;
1352                 } else if (tmp == 1) {
1353                     goto do_brcond_high;
1354                 }
1355                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1356                                                op->args[1], op->args[3],
1357                                                TCG_COND_EQ);
1358                 if (tmp == 0) {
1359                     goto do_brcond_false;
1360                 } else if (tmp != 1) {
1361                     goto do_default;
1362                 }
1363             do_brcond_low:
1364                 bitmap_zero(temps_used.l, nb_temps);
1365                 op->opc = INDEX_op_brcond_i32;
1366                 op->args[1] = op->args[2];
1367                 op->args[2] = op->args[4];
1368                 op->args[3] = op->args[5];
1369             } else if (op->args[4] == TCG_COND_NE) {
1370                 /* Simplify NE comparisons where one of the pairs
1371                    can be simplified.  */
1372                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1373                                                op->args[0], op->args[2],
1374                                                TCG_COND_NE);
1375                 if (tmp == 0) {
1376                     goto do_brcond_high;
1377                 } else if (tmp == 1) {
1378                     goto do_brcond_true;
1379                 }
1380                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1381                                                op->args[1], op->args[3],
1382                                                TCG_COND_NE);
1383                 if (tmp == 0) {
1384                     goto do_brcond_low;
1385                 } else if (tmp == 1) {
1386                     goto do_brcond_true;
1387                 }
1388                 goto do_default;
1389             } else {
1390                 goto do_default;
1391             }
1392             break;
1393 
1394         case INDEX_op_setcond2_i32:
1395             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1396                                             op->args[5]);
1397             if (tmp != 2) {
1398             do_setcond_const:
1399                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1400             } else if ((op->args[5] == TCG_COND_LT
1401                         || op->args[5] == TCG_COND_GE)
1402                        && arg_is_const(op->args[3])
1403                        && arg_info(op->args[3])->val == 0
1404                        && arg_is_const(op->args[4])
1405                        && arg_info(op->args[4])->val == 0) {
1406                 /* Simplify LT/GE comparisons vs zero to a single compare
1407                    vs the high word of the input.  */
1408             do_setcond_high:
1409                 reset_temp(op->args[0]);
1410                 arg_info(op->args[0])->mask = 1;
1411                 op->opc = INDEX_op_setcond_i32;
1412                 op->args[1] = op->args[2];
1413                 op->args[2] = op->args[4];
1414                 op->args[3] = op->args[5];
1415             } else if (op->args[5] == TCG_COND_EQ) {
1416                 /* Simplify EQ comparisons where one of the pairs
1417                    can be simplified.  */
1418                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1419                                                op->args[1], op->args[3],
1420                                                TCG_COND_EQ);
1421                 if (tmp == 0) {
1422                     goto do_setcond_const;
1423                 } else if (tmp == 1) {
1424                     goto do_setcond_high;
1425                 }
1426                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1427                                                op->args[2], op->args[4],
1428                                                TCG_COND_EQ);
1429                 if (tmp == 0) {
1430                     goto do_setcond_high;
1431                 } else if (tmp != 1) {
1432                     goto do_default;
1433                 }
1434             do_setcond_low:
1435                 reset_temp(op->args[0]);
1436                 arg_info(op->args[0])->mask = 1;
1437                 op->opc = INDEX_op_setcond_i32;
1438                 op->args[2] = op->args[3];
1439                 op->args[3] = op->args[5];
1440             } else if (op->args[5] == TCG_COND_NE) {
1441                 /* Simplify NE comparisons where one of the pairs
1442                    can be simplified.  */
1443                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1444                                                op->args[1], op->args[3],
1445                                                TCG_COND_NE);
1446                 if (tmp == 0) {
1447                     goto do_setcond_high;
1448                 } else if (tmp == 1) {
1449                     goto do_setcond_const;
1450                 }
1451                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1452                                                op->args[2], op->args[4],
1453                                                TCG_COND_NE);
1454                 if (tmp == 0) {
1455                     goto do_setcond_low;
1456                 } else if (tmp == 1) {
1457                     goto do_setcond_const;
1458                 }
1459                 goto do_default;
1460             } else {
1461                 goto do_default;
1462             }
1463             break;
1464 
1465         case INDEX_op_call:
1466             if (!(op->args[nb_oargs + nb_iargs + 1]
1467                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1468                 for (i = 0; i < nb_globals; i++) {
1469                     if (test_bit(i, temps_used.l)) {
1470                         reset_ts(&s->temps[i]);
1471                     }
1472                 }
1473             }
1474             goto do_reset_output;
1475 
1476         default:
1477         do_default:
1478             /* Default case: we know nothing about operation (or were unable
1479                to compute the operation result) so no propagation is done.
1480                We trash everything if the operation is the end of a basic
1481                block, otherwise we only trash the output args.  "mask" is
1482                the non-zero bits mask for the first output arg.  */
1483             if (def->flags & TCG_OPF_BB_END) {
1484                 bitmap_zero(temps_used.l, nb_temps);
1485             } else {
1486         do_reset_output:
1487                 for (i = 0; i < nb_oargs; i++) {
1488                     reset_temp(op->args[i]);
1489                     /* Save the corresponding known-zero bits mask for the
1490                        first output argument (only one supported so far). */
1491                     if (i == 0) {
1492                         arg_info(op->args[i])->mask = mask;
1493                     }
1494                 }
1495             }
1496             break;
1497         }
1498 
1499         /* Eliminate duplicate and redundant fence instructions.  */
1500         if (prev_mb) {
1501             switch (opc) {
1502             case INDEX_op_mb:
1503                 /* Merge two barriers of the same type into one,
1504                  * or a weaker barrier into a stronger one,
1505                  * or two weaker barriers into a stronger one.
1506                  *   mb X; mb Y => mb X|Y
1507                  *   mb; strl => mb; st
1508                  *   ldaq; mb => ld; mb
1509                  *   ldaq; strl => ld; mb; st
1510                  * Other combinations are also merged into a strong
1511                  * barrier.  This is stricter than specified but for
1512                  * the purposes of TCG is better than not optimizing.
1513                  */
1514                 prev_mb->args[0] |= op->args[0];
1515                 tcg_op_remove(s, op);
1516                 break;
1517 
1518             default:
1519                 /* Opcodes that end the block stop the optimization.  */
1520                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1521                     break;
1522                 }
1523                 /* fallthru */
1524             case INDEX_op_qemu_ld_i32:
1525             case INDEX_op_qemu_ld_i64:
1526             case INDEX_op_qemu_st_i32:
1527             case INDEX_op_qemu_st_i64:
1528             case INDEX_op_call:
1529                 /* Opcodes that touch guest memory stop the optimization.  */
1530                 prev_mb = NULL;
1531                 break;
1532             }
1533         } else if (opc == INDEX_op_mb) {
1534             prev_mb = op;
1535         }
1536     }
1537 }
1538