xref: /openbmc/qemu/tcg/optimize.c (revision 7eceff5b)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "exec/cpu-common.h"
29 #include "tcg-op.h"
30 
31 #define CASE_OP_32_64(x)                        \
32         glue(glue(case INDEX_op_, x), _i32):    \
33         glue(glue(case INDEX_op_, x), _i64)
34 
35 #define CASE_OP_32_64_VEC(x)                    \
36         glue(glue(case INDEX_op_, x), _i32):    \
37         glue(glue(case INDEX_op_, x), _i64):    \
38         glue(glue(case INDEX_op_, x), _vec)
39 
40 struct tcg_temp_info {
41     bool is_const;
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     tcg_target_ulong val;
45     tcg_target_ulong mask;
46 };
47 
48 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
49 {
50     return ts->state_ptr;
51 }
52 
53 static inline struct tcg_temp_info *arg_info(TCGArg arg)
54 {
55     return ts_info(arg_temp(arg));
56 }
57 
58 static inline bool ts_is_const(TCGTemp *ts)
59 {
60     return ts_info(ts)->is_const;
61 }
62 
63 static inline bool arg_is_const(TCGArg arg)
64 {
65     return ts_is_const(arg_temp(arg));
66 }
67 
68 static inline bool ts_is_copy(TCGTemp *ts)
69 {
70     return ts_info(ts)->next_copy != ts;
71 }
72 
73 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
74 static void reset_ts(TCGTemp *ts)
75 {
76     struct tcg_temp_info *ti = ts_info(ts);
77     struct tcg_temp_info *pi = ts_info(ti->prev_copy);
78     struct tcg_temp_info *ni = ts_info(ti->next_copy);
79 
80     ni->prev_copy = ti->prev_copy;
81     pi->next_copy = ti->next_copy;
82     ti->next_copy = ts;
83     ti->prev_copy = ts;
84     ti->is_const = false;
85     ti->mask = -1;
86 }
87 
88 static void reset_temp(TCGArg arg)
89 {
90     reset_ts(arg_temp(arg));
91 }
92 
93 /* Initialize and activate a temporary.  */
94 static void init_ts_info(struct tcg_temp_info *infos,
95                          TCGTempSet *temps_used, TCGTemp *ts)
96 {
97     size_t idx = temp_idx(ts);
98     if (!test_bit(idx, temps_used->l)) {
99         struct tcg_temp_info *ti = &infos[idx];
100 
101         ts->state_ptr = ti;
102         ti->next_copy = ts;
103         ti->prev_copy = ts;
104         ti->is_const = false;
105         ti->mask = -1;
106         set_bit(idx, temps_used->l);
107     }
108 }
109 
110 static void init_arg_info(struct tcg_temp_info *infos,
111                           TCGTempSet *temps_used, TCGArg arg)
112 {
113     init_ts_info(infos, temps_used, arg_temp(arg));
114 }
115 
116 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
117 {
118     TCGTemp *i;
119 
120     /* If this is already a global, we can't do better. */
121     if (ts->temp_global) {
122         return ts;
123     }
124 
125     /* Search for a global first. */
126     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
127         if (i->temp_global) {
128             return i;
129         }
130     }
131 
132     /* If it is a temp, search for a temp local. */
133     if (!ts->temp_local) {
134         for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
135             if (ts->temp_local) {
136                 return i;
137             }
138         }
139     }
140 
141     /* Failure to find a better representation, return the same temp. */
142     return ts;
143 }
144 
145 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
146 {
147     TCGTemp *i;
148 
149     if (ts1 == ts2) {
150         return true;
151     }
152 
153     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
154         return false;
155     }
156 
157     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
158         if (i == ts2) {
159             return true;
160         }
161     }
162 
163     return false;
164 }
165 
166 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
167 {
168     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
169 }
170 
171 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
172 {
173     const TCGOpDef *def;
174     TCGOpcode new_op;
175     tcg_target_ulong mask;
176     struct tcg_temp_info *di = arg_info(dst);
177 
178     def = &tcg_op_defs[op->opc];
179     if (def->flags & TCG_OPF_VECTOR) {
180         new_op = INDEX_op_dupi_vec;
181     } else if (def->flags & TCG_OPF_64BIT) {
182         new_op = INDEX_op_movi_i64;
183     } else {
184         new_op = INDEX_op_movi_i32;
185     }
186     op->opc = new_op;
187     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
188     op->args[0] = dst;
189     op->args[1] = val;
190 
191     reset_temp(dst);
192     di->is_const = true;
193     di->val = val;
194     mask = val;
195     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
196         /* High bits of the destination are now garbage.  */
197         mask |= ~0xffffffffull;
198     }
199     di->mask = mask;
200 }
201 
202 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
203 {
204     TCGTemp *dst_ts = arg_temp(dst);
205     TCGTemp *src_ts = arg_temp(src);
206     const TCGOpDef *def;
207     struct tcg_temp_info *di;
208     struct tcg_temp_info *si;
209     tcg_target_ulong mask;
210     TCGOpcode new_op;
211 
212     if (ts_are_copies(dst_ts, src_ts)) {
213         tcg_op_remove(s, op);
214         return;
215     }
216 
217     reset_ts(dst_ts);
218     di = ts_info(dst_ts);
219     si = ts_info(src_ts);
220     def = &tcg_op_defs[op->opc];
221     if (def->flags & TCG_OPF_VECTOR) {
222         new_op = INDEX_op_mov_vec;
223     } else if (def->flags & TCG_OPF_64BIT) {
224         new_op = INDEX_op_mov_i64;
225     } else {
226         new_op = INDEX_op_mov_i32;
227     }
228     op->opc = new_op;
229     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
230     op->args[0] = dst;
231     op->args[1] = src;
232 
233     mask = si->mask;
234     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
235         /* High bits of the destination are now garbage.  */
236         mask |= ~0xffffffffull;
237     }
238     di->mask = mask;
239 
240     if (src_ts->type == dst_ts->type) {
241         struct tcg_temp_info *ni = ts_info(si->next_copy);
242 
243         di->next_copy = si->next_copy;
244         di->prev_copy = src_ts;
245         ni->prev_copy = dst_ts;
246         si->next_copy = dst_ts;
247         di->is_const = si->is_const;
248         di->val = si->val;
249     }
250 }
251 
252 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
253 {
254     uint64_t l64, h64;
255 
256     switch (op) {
257     CASE_OP_32_64(add):
258         return x + y;
259 
260     CASE_OP_32_64(sub):
261         return x - y;
262 
263     CASE_OP_32_64(mul):
264         return x * y;
265 
266     CASE_OP_32_64(and):
267         return x & y;
268 
269     CASE_OP_32_64(or):
270         return x | y;
271 
272     CASE_OP_32_64(xor):
273         return x ^ y;
274 
275     case INDEX_op_shl_i32:
276         return (uint32_t)x << (y & 31);
277 
278     case INDEX_op_shl_i64:
279         return (uint64_t)x << (y & 63);
280 
281     case INDEX_op_shr_i32:
282         return (uint32_t)x >> (y & 31);
283 
284     case INDEX_op_shr_i64:
285         return (uint64_t)x >> (y & 63);
286 
287     case INDEX_op_sar_i32:
288         return (int32_t)x >> (y & 31);
289 
290     case INDEX_op_sar_i64:
291         return (int64_t)x >> (y & 63);
292 
293     case INDEX_op_rotr_i32:
294         return ror32(x, y & 31);
295 
296     case INDEX_op_rotr_i64:
297         return ror64(x, y & 63);
298 
299     case INDEX_op_rotl_i32:
300         return rol32(x, y & 31);
301 
302     case INDEX_op_rotl_i64:
303         return rol64(x, y & 63);
304 
305     CASE_OP_32_64(not):
306         return ~x;
307 
308     CASE_OP_32_64(neg):
309         return -x;
310 
311     CASE_OP_32_64(andc):
312         return x & ~y;
313 
314     CASE_OP_32_64(orc):
315         return x | ~y;
316 
317     CASE_OP_32_64(eqv):
318         return ~(x ^ y);
319 
320     CASE_OP_32_64(nand):
321         return ~(x & y);
322 
323     CASE_OP_32_64(nor):
324         return ~(x | y);
325 
326     case INDEX_op_clz_i32:
327         return (uint32_t)x ? clz32(x) : y;
328 
329     case INDEX_op_clz_i64:
330         return x ? clz64(x) : y;
331 
332     case INDEX_op_ctz_i32:
333         return (uint32_t)x ? ctz32(x) : y;
334 
335     case INDEX_op_ctz_i64:
336         return x ? ctz64(x) : y;
337 
338     case INDEX_op_ctpop_i32:
339         return ctpop32(x);
340 
341     case INDEX_op_ctpop_i64:
342         return ctpop64(x);
343 
344     CASE_OP_32_64(ext8s):
345         return (int8_t)x;
346 
347     CASE_OP_32_64(ext16s):
348         return (int16_t)x;
349 
350     CASE_OP_32_64(ext8u):
351         return (uint8_t)x;
352 
353     CASE_OP_32_64(ext16u):
354         return (uint16_t)x;
355 
356     case INDEX_op_ext_i32_i64:
357     case INDEX_op_ext32s_i64:
358         return (int32_t)x;
359 
360     case INDEX_op_extu_i32_i64:
361     case INDEX_op_extrl_i64_i32:
362     case INDEX_op_ext32u_i64:
363         return (uint32_t)x;
364 
365     case INDEX_op_extrh_i64_i32:
366         return (uint64_t)x >> 32;
367 
368     case INDEX_op_muluh_i32:
369         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
370     case INDEX_op_mulsh_i32:
371         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
372 
373     case INDEX_op_muluh_i64:
374         mulu64(&l64, &h64, x, y);
375         return h64;
376     case INDEX_op_mulsh_i64:
377         muls64(&l64, &h64, x, y);
378         return h64;
379 
380     case INDEX_op_div_i32:
381         /* Avoid crashing on divide by zero, otherwise undefined.  */
382         return (int32_t)x / ((int32_t)y ? : 1);
383     case INDEX_op_divu_i32:
384         return (uint32_t)x / ((uint32_t)y ? : 1);
385     case INDEX_op_div_i64:
386         return (int64_t)x / ((int64_t)y ? : 1);
387     case INDEX_op_divu_i64:
388         return (uint64_t)x / ((uint64_t)y ? : 1);
389 
390     case INDEX_op_rem_i32:
391         return (int32_t)x % ((int32_t)y ? : 1);
392     case INDEX_op_remu_i32:
393         return (uint32_t)x % ((uint32_t)y ? : 1);
394     case INDEX_op_rem_i64:
395         return (int64_t)x % ((int64_t)y ? : 1);
396     case INDEX_op_remu_i64:
397         return (uint64_t)x % ((uint64_t)y ? : 1);
398 
399     default:
400         fprintf(stderr,
401                 "Unrecognized operation %d in do_constant_folding.\n", op);
402         tcg_abort();
403     }
404 }
405 
406 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
407 {
408     const TCGOpDef *def = &tcg_op_defs[op];
409     TCGArg res = do_constant_folding_2(op, x, y);
410     if (!(def->flags & TCG_OPF_64BIT)) {
411         res = (int32_t)res;
412     }
413     return res;
414 }
415 
416 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
417 {
418     switch (c) {
419     case TCG_COND_EQ:
420         return x == y;
421     case TCG_COND_NE:
422         return x != y;
423     case TCG_COND_LT:
424         return (int32_t)x < (int32_t)y;
425     case TCG_COND_GE:
426         return (int32_t)x >= (int32_t)y;
427     case TCG_COND_LE:
428         return (int32_t)x <= (int32_t)y;
429     case TCG_COND_GT:
430         return (int32_t)x > (int32_t)y;
431     case TCG_COND_LTU:
432         return x < y;
433     case TCG_COND_GEU:
434         return x >= y;
435     case TCG_COND_LEU:
436         return x <= y;
437     case TCG_COND_GTU:
438         return x > y;
439     default:
440         tcg_abort();
441     }
442 }
443 
444 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
445 {
446     switch (c) {
447     case TCG_COND_EQ:
448         return x == y;
449     case TCG_COND_NE:
450         return x != y;
451     case TCG_COND_LT:
452         return (int64_t)x < (int64_t)y;
453     case TCG_COND_GE:
454         return (int64_t)x >= (int64_t)y;
455     case TCG_COND_LE:
456         return (int64_t)x <= (int64_t)y;
457     case TCG_COND_GT:
458         return (int64_t)x > (int64_t)y;
459     case TCG_COND_LTU:
460         return x < y;
461     case TCG_COND_GEU:
462         return x >= y;
463     case TCG_COND_LEU:
464         return x <= y;
465     case TCG_COND_GTU:
466         return x > y;
467     default:
468         tcg_abort();
469     }
470 }
471 
472 static bool do_constant_folding_cond_eq(TCGCond c)
473 {
474     switch (c) {
475     case TCG_COND_GT:
476     case TCG_COND_LTU:
477     case TCG_COND_LT:
478     case TCG_COND_GTU:
479     case TCG_COND_NE:
480         return 0;
481     case TCG_COND_GE:
482     case TCG_COND_GEU:
483     case TCG_COND_LE:
484     case TCG_COND_LEU:
485     case TCG_COND_EQ:
486         return 1;
487     default:
488         tcg_abort();
489     }
490 }
491 
492 /* Return 2 if the condition can't be simplified, and the result
493    of the condition (0 or 1) if it can */
494 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
495                                        TCGArg y, TCGCond c)
496 {
497     tcg_target_ulong xv = arg_info(x)->val;
498     tcg_target_ulong yv = arg_info(y)->val;
499     if (arg_is_const(x) && arg_is_const(y)) {
500         const TCGOpDef *def = &tcg_op_defs[op];
501         tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
502         if (def->flags & TCG_OPF_64BIT) {
503             return do_constant_folding_cond_64(xv, yv, c);
504         } else {
505             return do_constant_folding_cond_32(xv, yv, c);
506         }
507     } else if (args_are_copies(x, y)) {
508         return do_constant_folding_cond_eq(c);
509     } else if (arg_is_const(y) && yv == 0) {
510         switch (c) {
511         case TCG_COND_LTU:
512             return 0;
513         case TCG_COND_GEU:
514             return 1;
515         default:
516             return 2;
517         }
518     }
519     return 2;
520 }
521 
522 /* Return 2 if the condition can't be simplified, and the result
523    of the condition (0 or 1) if it can */
524 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
525 {
526     TCGArg al = p1[0], ah = p1[1];
527     TCGArg bl = p2[0], bh = p2[1];
528 
529     if (arg_is_const(bl) && arg_is_const(bh)) {
530         tcg_target_ulong blv = arg_info(bl)->val;
531         tcg_target_ulong bhv = arg_info(bh)->val;
532         uint64_t b = deposit64(blv, 32, 32, bhv);
533 
534         if (arg_is_const(al) && arg_is_const(ah)) {
535             tcg_target_ulong alv = arg_info(al)->val;
536             tcg_target_ulong ahv = arg_info(ah)->val;
537             uint64_t a = deposit64(alv, 32, 32, ahv);
538             return do_constant_folding_cond_64(a, b, c);
539         }
540         if (b == 0) {
541             switch (c) {
542             case TCG_COND_LTU:
543                 return 0;
544             case TCG_COND_GEU:
545                 return 1;
546             default:
547                 break;
548             }
549         }
550     }
551     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
552         return do_constant_folding_cond_eq(c);
553     }
554     return 2;
555 }
556 
557 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
558 {
559     TCGArg a1 = *p1, a2 = *p2;
560     int sum = 0;
561     sum += arg_is_const(a1);
562     sum -= arg_is_const(a2);
563 
564     /* Prefer the constant in second argument, and then the form
565        op a, a, b, which is better handled on non-RISC hosts. */
566     if (sum > 0 || (sum == 0 && dest == a2)) {
567         *p1 = a2;
568         *p2 = a1;
569         return true;
570     }
571     return false;
572 }
573 
574 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
575 {
576     int sum = 0;
577     sum += arg_is_const(p1[0]);
578     sum += arg_is_const(p1[1]);
579     sum -= arg_is_const(p2[0]);
580     sum -= arg_is_const(p2[1]);
581     if (sum > 0) {
582         TCGArg t;
583         t = p1[0], p1[0] = p2[0], p2[0] = t;
584         t = p1[1], p1[1] = p2[1], p2[1] = t;
585         return true;
586     }
587     return false;
588 }
589 
590 /* Propagate constants and copies, fold constant expressions. */
591 void tcg_optimize(TCGContext *s)
592 {
593     int nb_temps, nb_globals;
594     TCGOp *op, *op_next, *prev_mb = NULL;
595     struct tcg_temp_info *infos;
596     TCGTempSet temps_used;
597 
598     /* Array VALS has an element for each temp.
599        If this temp holds a constant then its value is kept in VALS' element.
600        If this temp is a copy of other ones then the other copies are
601        available through the doubly linked circular list. */
602 
603     nb_temps = s->nb_temps;
604     nb_globals = s->nb_globals;
605     bitmap_zero(temps_used.l, nb_temps);
606     infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
607 
608     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
609         tcg_target_ulong mask, partmask, affected;
610         int nb_oargs, nb_iargs, i;
611         TCGArg tmp;
612         TCGOpcode opc = op->opc;
613         const TCGOpDef *def = &tcg_op_defs[opc];
614 
615         /* Count the arguments, and initialize the temps that are
616            going to be used */
617         if (opc == INDEX_op_call) {
618             nb_oargs = TCGOP_CALLO(op);
619             nb_iargs = TCGOP_CALLI(op);
620             for (i = 0; i < nb_oargs + nb_iargs; i++) {
621                 TCGTemp *ts = arg_temp(op->args[i]);
622                 if (ts) {
623                     init_ts_info(infos, &temps_used, ts);
624                 }
625             }
626         } else {
627             nb_oargs = def->nb_oargs;
628             nb_iargs = def->nb_iargs;
629             for (i = 0; i < nb_oargs + nb_iargs; i++) {
630                 init_arg_info(infos, &temps_used, op->args[i]);
631             }
632         }
633 
634         /* Do copy propagation */
635         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
636             TCGTemp *ts = arg_temp(op->args[i]);
637             if (ts && ts_is_copy(ts)) {
638                 op->args[i] = temp_arg(find_better_copy(s, ts));
639             }
640         }
641 
642         /* For commutative operations make constant second argument */
643         switch (opc) {
644         CASE_OP_32_64_VEC(add):
645         CASE_OP_32_64_VEC(mul):
646         CASE_OP_32_64_VEC(and):
647         CASE_OP_32_64_VEC(or):
648         CASE_OP_32_64_VEC(xor):
649         CASE_OP_32_64(eqv):
650         CASE_OP_32_64(nand):
651         CASE_OP_32_64(nor):
652         CASE_OP_32_64(muluh):
653         CASE_OP_32_64(mulsh):
654             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
655             break;
656         CASE_OP_32_64(brcond):
657             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
658                 op->args[2] = tcg_swap_cond(op->args[2]);
659             }
660             break;
661         CASE_OP_32_64(setcond):
662             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
663                 op->args[3] = tcg_swap_cond(op->args[3]);
664             }
665             break;
666         CASE_OP_32_64(movcond):
667             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
668                 op->args[5] = tcg_swap_cond(op->args[5]);
669             }
670             /* For movcond, we canonicalize the "false" input reg to match
671                the destination reg so that the tcg backend can implement
672                a "move if true" operation.  */
673             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
674                 op->args[5] = tcg_invert_cond(op->args[5]);
675             }
676             break;
677         CASE_OP_32_64(add2):
678             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
679             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
680             break;
681         CASE_OP_32_64(mulu2):
682         CASE_OP_32_64(muls2):
683             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
684             break;
685         case INDEX_op_brcond2_i32:
686             if (swap_commutative2(&op->args[0], &op->args[2])) {
687                 op->args[4] = tcg_swap_cond(op->args[4]);
688             }
689             break;
690         case INDEX_op_setcond2_i32:
691             if (swap_commutative2(&op->args[1], &op->args[3])) {
692                 op->args[5] = tcg_swap_cond(op->args[5]);
693             }
694             break;
695         default:
696             break;
697         }
698 
699         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
700            and "sub r, 0, a => neg r, a" case.  */
701         switch (opc) {
702         CASE_OP_32_64(shl):
703         CASE_OP_32_64(shr):
704         CASE_OP_32_64(sar):
705         CASE_OP_32_64(rotl):
706         CASE_OP_32_64(rotr):
707             if (arg_is_const(op->args[1])
708                 && arg_info(op->args[1])->val == 0) {
709                 tcg_opt_gen_movi(s, op, op->args[0], 0);
710                 continue;
711             }
712             break;
713         CASE_OP_32_64_VEC(sub):
714             {
715                 TCGOpcode neg_op;
716                 bool have_neg;
717 
718                 if (arg_is_const(op->args[2])) {
719                     /* Proceed with possible constant folding. */
720                     break;
721                 }
722                 if (opc == INDEX_op_sub_i32) {
723                     neg_op = INDEX_op_neg_i32;
724                     have_neg = TCG_TARGET_HAS_neg_i32;
725                 } else if (opc == INDEX_op_sub_i64) {
726                     neg_op = INDEX_op_neg_i64;
727                     have_neg = TCG_TARGET_HAS_neg_i64;
728                 } else {
729                     neg_op = INDEX_op_neg_vec;
730                     have_neg = TCG_TARGET_HAS_neg_vec;
731                 }
732                 if (!have_neg) {
733                     break;
734                 }
735                 if (arg_is_const(op->args[1])
736                     && arg_info(op->args[1])->val == 0) {
737                     op->opc = neg_op;
738                     reset_temp(op->args[0]);
739                     op->args[1] = op->args[2];
740                     continue;
741                 }
742             }
743             break;
744         CASE_OP_32_64_VEC(xor):
745         CASE_OP_32_64(nand):
746             if (!arg_is_const(op->args[1])
747                 && arg_is_const(op->args[2])
748                 && arg_info(op->args[2])->val == -1) {
749                 i = 1;
750                 goto try_not;
751             }
752             break;
753         CASE_OP_32_64(nor):
754             if (!arg_is_const(op->args[1])
755                 && arg_is_const(op->args[2])
756                 && arg_info(op->args[2])->val == 0) {
757                 i = 1;
758                 goto try_not;
759             }
760             break;
761         CASE_OP_32_64_VEC(andc):
762             if (!arg_is_const(op->args[2])
763                 && arg_is_const(op->args[1])
764                 && arg_info(op->args[1])->val == -1) {
765                 i = 2;
766                 goto try_not;
767             }
768             break;
769         CASE_OP_32_64_VEC(orc):
770         CASE_OP_32_64(eqv):
771             if (!arg_is_const(op->args[2])
772                 && arg_is_const(op->args[1])
773                 && arg_info(op->args[1])->val == 0) {
774                 i = 2;
775                 goto try_not;
776             }
777             break;
778         try_not:
779             {
780                 TCGOpcode not_op;
781                 bool have_not;
782 
783                 if (def->flags & TCG_OPF_VECTOR) {
784                     not_op = INDEX_op_not_vec;
785                     have_not = TCG_TARGET_HAS_not_vec;
786                 } else if (def->flags & TCG_OPF_64BIT) {
787                     not_op = INDEX_op_not_i64;
788                     have_not = TCG_TARGET_HAS_not_i64;
789                 } else {
790                     not_op = INDEX_op_not_i32;
791                     have_not = TCG_TARGET_HAS_not_i32;
792                 }
793                 if (!have_not) {
794                     break;
795                 }
796                 op->opc = not_op;
797                 reset_temp(op->args[0]);
798                 op->args[1] = op->args[i];
799                 continue;
800             }
801         default:
802             break;
803         }
804 
805         /* Simplify expression for "op r, a, const => mov r, a" cases */
806         switch (opc) {
807         CASE_OP_32_64_VEC(add):
808         CASE_OP_32_64_VEC(sub):
809         CASE_OP_32_64_VEC(or):
810         CASE_OP_32_64_VEC(xor):
811         CASE_OP_32_64_VEC(andc):
812         CASE_OP_32_64(shl):
813         CASE_OP_32_64(shr):
814         CASE_OP_32_64(sar):
815         CASE_OP_32_64(rotl):
816         CASE_OP_32_64(rotr):
817             if (!arg_is_const(op->args[1])
818                 && arg_is_const(op->args[2])
819                 && arg_info(op->args[2])->val == 0) {
820                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
821                 continue;
822             }
823             break;
824         CASE_OP_32_64_VEC(and):
825         CASE_OP_32_64_VEC(orc):
826         CASE_OP_32_64(eqv):
827             if (!arg_is_const(op->args[1])
828                 && arg_is_const(op->args[2])
829                 && arg_info(op->args[2])->val == -1) {
830                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
831                 continue;
832             }
833             break;
834         default:
835             break;
836         }
837 
838         /* Simplify using known-zero bits. Currently only ops with a single
839            output argument is supported. */
840         mask = -1;
841         affected = -1;
842         switch (opc) {
843         CASE_OP_32_64(ext8s):
844             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
845                 break;
846             }
847         CASE_OP_32_64(ext8u):
848             mask = 0xff;
849             goto and_const;
850         CASE_OP_32_64(ext16s):
851             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
852                 break;
853             }
854         CASE_OP_32_64(ext16u):
855             mask = 0xffff;
856             goto and_const;
857         case INDEX_op_ext32s_i64:
858             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
859                 break;
860             }
861         case INDEX_op_ext32u_i64:
862             mask = 0xffffffffU;
863             goto and_const;
864 
865         CASE_OP_32_64(and):
866             mask = arg_info(op->args[2])->mask;
867             if (arg_is_const(op->args[2])) {
868         and_const:
869                 affected = arg_info(op->args[1])->mask & ~mask;
870             }
871             mask = arg_info(op->args[1])->mask & mask;
872             break;
873 
874         case INDEX_op_ext_i32_i64:
875             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
876                 break;
877             }
878         case INDEX_op_extu_i32_i64:
879             /* We do not compute affected as it is a size changing op.  */
880             mask = (uint32_t)arg_info(op->args[1])->mask;
881             break;
882 
883         CASE_OP_32_64(andc):
884             /* Known-zeros does not imply known-ones.  Therefore unless
885                op->args[2] is constant, we can't infer anything from it.  */
886             if (arg_is_const(op->args[2])) {
887                 mask = ~arg_info(op->args[2])->mask;
888                 goto and_const;
889             }
890             /* But we certainly know nothing outside args[1] may be set. */
891             mask = arg_info(op->args[1])->mask;
892             break;
893 
894         case INDEX_op_sar_i32:
895             if (arg_is_const(op->args[2])) {
896                 tmp = arg_info(op->args[2])->val & 31;
897                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
898             }
899             break;
900         case INDEX_op_sar_i64:
901             if (arg_is_const(op->args[2])) {
902                 tmp = arg_info(op->args[2])->val & 63;
903                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
904             }
905             break;
906 
907         case INDEX_op_shr_i32:
908             if (arg_is_const(op->args[2])) {
909                 tmp = arg_info(op->args[2])->val & 31;
910                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
911             }
912             break;
913         case INDEX_op_shr_i64:
914             if (arg_is_const(op->args[2])) {
915                 tmp = arg_info(op->args[2])->val & 63;
916                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
917             }
918             break;
919 
920         case INDEX_op_extrl_i64_i32:
921             mask = (uint32_t)arg_info(op->args[1])->mask;
922             break;
923         case INDEX_op_extrh_i64_i32:
924             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
925             break;
926 
927         CASE_OP_32_64(shl):
928             if (arg_is_const(op->args[2])) {
929                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
930                 mask = arg_info(op->args[1])->mask << tmp;
931             }
932             break;
933 
934         CASE_OP_32_64(neg):
935             /* Set to 1 all bits to the left of the rightmost.  */
936             mask = -(arg_info(op->args[1])->mask
937                      & -arg_info(op->args[1])->mask);
938             break;
939 
940         CASE_OP_32_64(deposit):
941             mask = deposit64(arg_info(op->args[1])->mask,
942                              op->args[3], op->args[4],
943                              arg_info(op->args[2])->mask);
944             break;
945 
946         CASE_OP_32_64(extract):
947             mask = extract64(arg_info(op->args[1])->mask,
948                              op->args[2], op->args[3]);
949             if (op->args[2] == 0) {
950                 affected = arg_info(op->args[1])->mask & ~mask;
951             }
952             break;
953         CASE_OP_32_64(sextract):
954             mask = sextract64(arg_info(op->args[1])->mask,
955                               op->args[2], op->args[3]);
956             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
957                 affected = arg_info(op->args[1])->mask & ~mask;
958             }
959             break;
960 
961         CASE_OP_32_64(or):
962         CASE_OP_32_64(xor):
963             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
964             break;
965 
966         case INDEX_op_clz_i32:
967         case INDEX_op_ctz_i32:
968             mask = arg_info(op->args[2])->mask | 31;
969             break;
970 
971         case INDEX_op_clz_i64:
972         case INDEX_op_ctz_i64:
973             mask = arg_info(op->args[2])->mask | 63;
974             break;
975 
976         case INDEX_op_ctpop_i32:
977             mask = 32 | 31;
978             break;
979         case INDEX_op_ctpop_i64:
980             mask = 64 | 63;
981             break;
982 
983         CASE_OP_32_64(setcond):
984         case INDEX_op_setcond2_i32:
985             mask = 1;
986             break;
987 
988         CASE_OP_32_64(movcond):
989             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
990             break;
991 
992         CASE_OP_32_64(ld8u):
993             mask = 0xff;
994             break;
995         CASE_OP_32_64(ld16u):
996             mask = 0xffff;
997             break;
998         case INDEX_op_ld32u_i64:
999             mask = 0xffffffffu;
1000             break;
1001 
1002         CASE_OP_32_64(qemu_ld):
1003             {
1004                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1005                 TCGMemOp mop = get_memop(oi);
1006                 if (!(mop & MO_SIGN)) {
1007                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1008                 }
1009             }
1010             break;
1011 
1012         default:
1013             break;
1014         }
1015 
1016         /* 32-bit ops generate 32-bit results.  For the result is zero test
1017            below, we can ignore high bits, but for further optimizations we
1018            need to record that the high bits contain garbage.  */
1019         partmask = mask;
1020         if (!(def->flags & TCG_OPF_64BIT)) {
1021             mask |= ~(tcg_target_ulong)0xffffffffu;
1022             partmask &= 0xffffffffu;
1023             affected &= 0xffffffffu;
1024         }
1025 
1026         if (partmask == 0) {
1027             tcg_debug_assert(nb_oargs == 1);
1028             tcg_opt_gen_movi(s, op, op->args[0], 0);
1029             continue;
1030         }
1031         if (affected == 0) {
1032             tcg_debug_assert(nb_oargs == 1);
1033             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1034             continue;
1035         }
1036 
1037         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1038         switch (opc) {
1039         CASE_OP_32_64_VEC(and):
1040         CASE_OP_32_64_VEC(mul):
1041         CASE_OP_32_64(muluh):
1042         CASE_OP_32_64(mulsh):
1043             if (arg_is_const(op->args[2])
1044                 && arg_info(op->args[2])->val == 0) {
1045                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1046                 continue;
1047             }
1048             break;
1049         default:
1050             break;
1051         }
1052 
1053         /* Simplify expression for "op r, a, a => mov r, a" cases */
1054         switch (opc) {
1055         CASE_OP_32_64_VEC(or):
1056         CASE_OP_32_64_VEC(and):
1057             if (args_are_copies(op->args[1], op->args[2])) {
1058                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1059                 continue;
1060             }
1061             break;
1062         default:
1063             break;
1064         }
1065 
1066         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1067         switch (opc) {
1068         CASE_OP_32_64_VEC(andc):
1069         CASE_OP_32_64_VEC(sub):
1070         CASE_OP_32_64_VEC(xor):
1071             if (args_are_copies(op->args[1], op->args[2])) {
1072                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1073                 continue;
1074             }
1075             break;
1076         default:
1077             break;
1078         }
1079 
1080         /* Propagate constants through copy operations and do constant
1081            folding.  Constants will be substituted to arguments by register
1082            allocator where needed and possible.  Also detect copies. */
1083         switch (opc) {
1084         CASE_OP_32_64_VEC(mov):
1085             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1086             break;
1087         CASE_OP_32_64(movi):
1088         case INDEX_op_dupi_vec:
1089             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1090             break;
1091 
1092         case INDEX_op_dup_vec:
1093             if (arg_is_const(op->args[1])) {
1094                 tmp = arg_info(op->args[1])->val;
1095                 tmp = dup_const(TCGOP_VECE(op), tmp);
1096                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1097                 continue;
1098             }
1099             break;
1100 
1101         CASE_OP_32_64(not):
1102         CASE_OP_32_64(neg):
1103         CASE_OP_32_64(ext8s):
1104         CASE_OP_32_64(ext8u):
1105         CASE_OP_32_64(ext16s):
1106         CASE_OP_32_64(ext16u):
1107         CASE_OP_32_64(ctpop):
1108         case INDEX_op_ext32s_i64:
1109         case INDEX_op_ext32u_i64:
1110         case INDEX_op_ext_i32_i64:
1111         case INDEX_op_extu_i32_i64:
1112         case INDEX_op_extrl_i64_i32:
1113         case INDEX_op_extrh_i64_i32:
1114             if (arg_is_const(op->args[1])) {
1115                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1116                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1117                 break;
1118             }
1119             goto do_default;
1120 
1121         CASE_OP_32_64(add):
1122         CASE_OP_32_64(sub):
1123         CASE_OP_32_64(mul):
1124         CASE_OP_32_64(or):
1125         CASE_OP_32_64(and):
1126         CASE_OP_32_64(xor):
1127         CASE_OP_32_64(shl):
1128         CASE_OP_32_64(shr):
1129         CASE_OP_32_64(sar):
1130         CASE_OP_32_64(rotl):
1131         CASE_OP_32_64(rotr):
1132         CASE_OP_32_64(andc):
1133         CASE_OP_32_64(orc):
1134         CASE_OP_32_64(eqv):
1135         CASE_OP_32_64(nand):
1136         CASE_OP_32_64(nor):
1137         CASE_OP_32_64(muluh):
1138         CASE_OP_32_64(mulsh):
1139         CASE_OP_32_64(div):
1140         CASE_OP_32_64(divu):
1141         CASE_OP_32_64(rem):
1142         CASE_OP_32_64(remu):
1143             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1144                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1145                                           arg_info(op->args[2])->val);
1146                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1147                 break;
1148             }
1149             goto do_default;
1150 
1151         CASE_OP_32_64(clz):
1152         CASE_OP_32_64(ctz):
1153             if (arg_is_const(op->args[1])) {
1154                 TCGArg v = arg_info(op->args[1])->val;
1155                 if (v != 0) {
1156                     tmp = do_constant_folding(opc, v, 0);
1157                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1158                 } else {
1159                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1160                 }
1161                 break;
1162             }
1163             goto do_default;
1164 
1165         CASE_OP_32_64(deposit):
1166             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1167                 tmp = deposit64(arg_info(op->args[1])->val,
1168                                 op->args[3], op->args[4],
1169                                 arg_info(op->args[2])->val);
1170                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1171                 break;
1172             }
1173             goto do_default;
1174 
1175         CASE_OP_32_64(extract):
1176             if (arg_is_const(op->args[1])) {
1177                 tmp = extract64(arg_info(op->args[1])->val,
1178                                 op->args[2], op->args[3]);
1179                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1180                 break;
1181             }
1182             goto do_default;
1183 
1184         CASE_OP_32_64(sextract):
1185             if (arg_is_const(op->args[1])) {
1186                 tmp = sextract64(arg_info(op->args[1])->val,
1187                                  op->args[2], op->args[3]);
1188                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1189                 break;
1190             }
1191             goto do_default;
1192 
1193         CASE_OP_32_64(setcond):
1194             tmp = do_constant_folding_cond(opc, op->args[1],
1195                                            op->args[2], op->args[3]);
1196             if (tmp != 2) {
1197                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1198                 break;
1199             }
1200             goto do_default;
1201 
1202         CASE_OP_32_64(brcond):
1203             tmp = do_constant_folding_cond(opc, op->args[0],
1204                                            op->args[1], op->args[2]);
1205             if (tmp != 2) {
1206                 if (tmp) {
1207                     bitmap_zero(temps_used.l, nb_temps);
1208                     op->opc = INDEX_op_br;
1209                     op->args[0] = op->args[3];
1210                 } else {
1211                     tcg_op_remove(s, op);
1212                 }
1213                 break;
1214             }
1215             goto do_default;
1216 
1217         CASE_OP_32_64(movcond):
1218             tmp = do_constant_folding_cond(opc, op->args[1],
1219                                            op->args[2], op->args[5]);
1220             if (tmp != 2) {
1221                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1222                 break;
1223             }
1224             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1225                 tcg_target_ulong tv = arg_info(op->args[3])->val;
1226                 tcg_target_ulong fv = arg_info(op->args[4])->val;
1227                 TCGCond cond = op->args[5];
1228                 if (fv == 1 && tv == 0) {
1229                     cond = tcg_invert_cond(cond);
1230                 } else if (!(tv == 1 && fv == 0)) {
1231                     goto do_default;
1232                 }
1233                 op->args[3] = cond;
1234                 op->opc = opc = (opc == INDEX_op_movcond_i32
1235                                  ? INDEX_op_setcond_i32
1236                                  : INDEX_op_setcond_i64);
1237                 nb_iargs = 2;
1238             }
1239             goto do_default;
1240 
1241         case INDEX_op_add2_i32:
1242         case INDEX_op_sub2_i32:
1243             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1244                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1245                 uint32_t al = arg_info(op->args[2])->val;
1246                 uint32_t ah = arg_info(op->args[3])->val;
1247                 uint32_t bl = arg_info(op->args[4])->val;
1248                 uint32_t bh = arg_info(op->args[5])->val;
1249                 uint64_t a = ((uint64_t)ah << 32) | al;
1250                 uint64_t b = ((uint64_t)bh << 32) | bl;
1251                 TCGArg rl, rh;
1252                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1253 
1254                 if (opc == INDEX_op_add2_i32) {
1255                     a += b;
1256                 } else {
1257                     a -= b;
1258                 }
1259 
1260                 rl = op->args[0];
1261                 rh = op->args[1];
1262                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1263                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1264                 break;
1265             }
1266             goto do_default;
1267 
1268         case INDEX_op_mulu2_i32:
1269             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1270                 uint32_t a = arg_info(op->args[2])->val;
1271                 uint32_t b = arg_info(op->args[3])->val;
1272                 uint64_t r = (uint64_t)a * b;
1273                 TCGArg rl, rh;
1274                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1275 
1276                 rl = op->args[0];
1277                 rh = op->args[1];
1278                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1279                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1280                 break;
1281             }
1282             goto do_default;
1283 
1284         case INDEX_op_brcond2_i32:
1285             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1286                                             op->args[4]);
1287             if (tmp != 2) {
1288                 if (tmp) {
1289             do_brcond_true:
1290                     bitmap_zero(temps_used.l, nb_temps);
1291                     op->opc = INDEX_op_br;
1292                     op->args[0] = op->args[5];
1293                 } else {
1294             do_brcond_false:
1295                     tcg_op_remove(s, op);
1296                 }
1297             } else if ((op->args[4] == TCG_COND_LT
1298                         || op->args[4] == TCG_COND_GE)
1299                        && arg_is_const(op->args[2])
1300                        && arg_info(op->args[2])->val == 0
1301                        && arg_is_const(op->args[3])
1302                        && arg_info(op->args[3])->val == 0) {
1303                 /* Simplify LT/GE comparisons vs zero to a single compare
1304                    vs the high word of the input.  */
1305             do_brcond_high:
1306                 bitmap_zero(temps_used.l, nb_temps);
1307                 op->opc = INDEX_op_brcond_i32;
1308                 op->args[0] = op->args[1];
1309                 op->args[1] = op->args[3];
1310                 op->args[2] = op->args[4];
1311                 op->args[3] = op->args[5];
1312             } else if (op->args[4] == TCG_COND_EQ) {
1313                 /* Simplify EQ comparisons where one of the pairs
1314                    can be simplified.  */
1315                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1316                                                op->args[0], op->args[2],
1317                                                TCG_COND_EQ);
1318                 if (tmp == 0) {
1319                     goto do_brcond_false;
1320                 } else if (tmp == 1) {
1321                     goto do_brcond_high;
1322                 }
1323                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1324                                                op->args[1], op->args[3],
1325                                                TCG_COND_EQ);
1326                 if (tmp == 0) {
1327                     goto do_brcond_false;
1328                 } else if (tmp != 1) {
1329                     goto do_default;
1330                 }
1331             do_brcond_low:
1332                 bitmap_zero(temps_used.l, nb_temps);
1333                 op->opc = INDEX_op_brcond_i32;
1334                 op->args[1] = op->args[2];
1335                 op->args[2] = op->args[4];
1336                 op->args[3] = op->args[5];
1337             } else if (op->args[4] == TCG_COND_NE) {
1338                 /* Simplify NE comparisons where one of the pairs
1339                    can be simplified.  */
1340                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1341                                                op->args[0], op->args[2],
1342                                                TCG_COND_NE);
1343                 if (tmp == 0) {
1344                     goto do_brcond_high;
1345                 } else if (tmp == 1) {
1346                     goto do_brcond_true;
1347                 }
1348                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1349                                                op->args[1], op->args[3],
1350                                                TCG_COND_NE);
1351                 if (tmp == 0) {
1352                     goto do_brcond_low;
1353                 } else if (tmp == 1) {
1354                     goto do_brcond_true;
1355                 }
1356                 goto do_default;
1357             } else {
1358                 goto do_default;
1359             }
1360             break;
1361 
1362         case INDEX_op_setcond2_i32:
1363             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1364                                             op->args[5]);
1365             if (tmp != 2) {
1366             do_setcond_const:
1367                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1368             } else if ((op->args[5] == TCG_COND_LT
1369                         || op->args[5] == TCG_COND_GE)
1370                        && arg_is_const(op->args[3])
1371                        && arg_info(op->args[3])->val == 0
1372                        && arg_is_const(op->args[4])
1373                        && arg_info(op->args[4])->val == 0) {
1374                 /* Simplify LT/GE comparisons vs zero to a single compare
1375                    vs the high word of the input.  */
1376             do_setcond_high:
1377                 reset_temp(op->args[0]);
1378                 arg_info(op->args[0])->mask = 1;
1379                 op->opc = INDEX_op_setcond_i32;
1380                 op->args[1] = op->args[2];
1381                 op->args[2] = op->args[4];
1382                 op->args[3] = op->args[5];
1383             } else if (op->args[5] == TCG_COND_EQ) {
1384                 /* Simplify EQ comparisons where one of the pairs
1385                    can be simplified.  */
1386                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1387                                                op->args[1], op->args[3],
1388                                                TCG_COND_EQ);
1389                 if (tmp == 0) {
1390                     goto do_setcond_const;
1391                 } else if (tmp == 1) {
1392                     goto do_setcond_high;
1393                 }
1394                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1395                                                op->args[2], op->args[4],
1396                                                TCG_COND_EQ);
1397                 if (tmp == 0) {
1398                     goto do_setcond_high;
1399                 } else if (tmp != 1) {
1400                     goto do_default;
1401                 }
1402             do_setcond_low:
1403                 reset_temp(op->args[0]);
1404                 arg_info(op->args[0])->mask = 1;
1405                 op->opc = INDEX_op_setcond_i32;
1406                 op->args[2] = op->args[3];
1407                 op->args[3] = op->args[5];
1408             } else if (op->args[5] == TCG_COND_NE) {
1409                 /* Simplify NE comparisons where one of the pairs
1410                    can be simplified.  */
1411                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1412                                                op->args[1], op->args[3],
1413                                                TCG_COND_NE);
1414                 if (tmp == 0) {
1415                     goto do_setcond_high;
1416                 } else if (tmp == 1) {
1417                     goto do_setcond_const;
1418                 }
1419                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1420                                                op->args[2], op->args[4],
1421                                                TCG_COND_NE);
1422                 if (tmp == 0) {
1423                     goto do_setcond_low;
1424                 } else if (tmp == 1) {
1425                     goto do_setcond_const;
1426                 }
1427                 goto do_default;
1428             } else {
1429                 goto do_default;
1430             }
1431             break;
1432 
1433         case INDEX_op_call:
1434             if (!(op->args[nb_oargs + nb_iargs + 1]
1435                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1436                 for (i = 0; i < nb_globals; i++) {
1437                     if (test_bit(i, temps_used.l)) {
1438                         reset_ts(&s->temps[i]);
1439                     }
1440                 }
1441             }
1442             goto do_reset_output;
1443 
1444         default:
1445         do_default:
1446             /* Default case: we know nothing about operation (or were unable
1447                to compute the operation result) so no propagation is done.
1448                We trash everything if the operation is the end of a basic
1449                block, otherwise we only trash the output args.  "mask" is
1450                the non-zero bits mask for the first output arg.  */
1451             if (def->flags & TCG_OPF_BB_END) {
1452                 bitmap_zero(temps_used.l, nb_temps);
1453             } else {
1454         do_reset_output:
1455                 for (i = 0; i < nb_oargs; i++) {
1456                     reset_temp(op->args[i]);
1457                     /* Save the corresponding known-zero bits mask for the
1458                        first output argument (only one supported so far). */
1459                     if (i == 0) {
1460                         arg_info(op->args[i])->mask = mask;
1461                     }
1462                 }
1463             }
1464             break;
1465         }
1466 
1467         /* Eliminate duplicate and redundant fence instructions.  */
1468         if (prev_mb) {
1469             switch (opc) {
1470             case INDEX_op_mb:
1471                 /* Merge two barriers of the same type into one,
1472                  * or a weaker barrier into a stronger one,
1473                  * or two weaker barriers into a stronger one.
1474                  *   mb X; mb Y => mb X|Y
1475                  *   mb; strl => mb; st
1476                  *   ldaq; mb => ld; mb
1477                  *   ldaq; strl => ld; mb; st
1478                  * Other combinations are also merged into a strong
1479                  * barrier.  This is stricter than specified but for
1480                  * the purposes of TCG is better than not optimizing.
1481                  */
1482                 prev_mb->args[0] |= op->args[0];
1483                 tcg_op_remove(s, op);
1484                 break;
1485 
1486             default:
1487                 /* Opcodes that end the block stop the optimization.  */
1488                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1489                     break;
1490                 }
1491                 /* fallthru */
1492             case INDEX_op_qemu_ld_i32:
1493             case INDEX_op_qemu_ld_i64:
1494             case INDEX_op_qemu_st_i32:
1495             case INDEX_op_qemu_st_i64:
1496             case INDEX_op_call:
1497                 /* Opcodes that touch guest memory stop the optimization.  */
1498                 prev_mb = NULL;
1499                 break;
1500             }
1501         } else if (opc == INDEX_op_mb) {
1502             prev_mb = op;
1503         }
1504     }
1505 }
1506