xref: /openbmc/qemu/tcg/optimize.c (revision 8e6fe6b8)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "exec/cpu-common.h"
28 #include "tcg-op.h"
29 
30 #define CASE_OP_32_64(x)                        \
31         glue(glue(case INDEX_op_, x), _i32):    \
32         glue(glue(case INDEX_op_, x), _i64)
33 
34 #define CASE_OP_32_64_VEC(x)                    \
35         glue(glue(case INDEX_op_, x), _i32):    \
36         glue(glue(case INDEX_op_, x), _i64):    \
37         glue(glue(case INDEX_op_, x), _vec)
38 
39 struct tcg_temp_info {
40     bool is_const;
41     TCGTemp *prev_copy;
42     TCGTemp *next_copy;
43     tcg_target_ulong val;
44     tcg_target_ulong mask;
45 };
46 
47 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
48 {
49     return ts->state_ptr;
50 }
51 
52 static inline struct tcg_temp_info *arg_info(TCGArg arg)
53 {
54     return ts_info(arg_temp(arg));
55 }
56 
57 static inline bool ts_is_const(TCGTemp *ts)
58 {
59     return ts_info(ts)->is_const;
60 }
61 
62 static inline bool arg_is_const(TCGArg arg)
63 {
64     return ts_is_const(arg_temp(arg));
65 }
66 
67 static inline bool ts_is_copy(TCGTemp *ts)
68 {
69     return ts_info(ts)->next_copy != ts;
70 }
71 
72 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
73 static void reset_ts(TCGTemp *ts)
74 {
75     struct tcg_temp_info *ti = ts_info(ts);
76     struct tcg_temp_info *pi = ts_info(ti->prev_copy);
77     struct tcg_temp_info *ni = ts_info(ti->next_copy);
78 
79     ni->prev_copy = ti->prev_copy;
80     pi->next_copy = ti->next_copy;
81     ti->next_copy = ts;
82     ti->prev_copy = ts;
83     ti->is_const = false;
84     ti->mask = -1;
85 }
86 
87 static void reset_temp(TCGArg arg)
88 {
89     reset_ts(arg_temp(arg));
90 }
91 
92 /* Initialize and activate a temporary.  */
93 static void init_ts_info(struct tcg_temp_info *infos,
94                          TCGTempSet *temps_used, TCGTemp *ts)
95 {
96     size_t idx = temp_idx(ts);
97     if (!test_bit(idx, temps_used->l)) {
98         struct tcg_temp_info *ti = &infos[idx];
99 
100         ts->state_ptr = ti;
101         ti->next_copy = ts;
102         ti->prev_copy = ts;
103         ti->is_const = false;
104         ti->mask = -1;
105         set_bit(idx, temps_used->l);
106     }
107 }
108 
109 static void init_arg_info(struct tcg_temp_info *infos,
110                           TCGTempSet *temps_used, TCGArg arg)
111 {
112     init_ts_info(infos, temps_used, arg_temp(arg));
113 }
114 
115 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
116 {
117     TCGTemp *i;
118 
119     /* If this is already a global, we can't do better. */
120     if (ts->temp_global) {
121         return ts;
122     }
123 
124     /* Search for a global first. */
125     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
126         if (i->temp_global) {
127             return i;
128         }
129     }
130 
131     /* If it is a temp, search for a temp local. */
132     if (!ts->temp_local) {
133         for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
134             if (ts->temp_local) {
135                 return i;
136             }
137         }
138     }
139 
140     /* Failure to find a better representation, return the same temp. */
141     return ts;
142 }
143 
144 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
145 {
146     TCGTemp *i;
147 
148     if (ts1 == ts2) {
149         return true;
150     }
151 
152     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
153         return false;
154     }
155 
156     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
157         if (i == ts2) {
158             return true;
159         }
160     }
161 
162     return false;
163 }
164 
165 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
166 {
167     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
168 }
169 
170 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
171 {
172     const TCGOpDef *def;
173     TCGOpcode new_op;
174     tcg_target_ulong mask;
175     struct tcg_temp_info *di = arg_info(dst);
176 
177     def = &tcg_op_defs[op->opc];
178     if (def->flags & TCG_OPF_VECTOR) {
179         new_op = INDEX_op_dupi_vec;
180     } else if (def->flags & TCG_OPF_64BIT) {
181         new_op = INDEX_op_movi_i64;
182     } else {
183         new_op = INDEX_op_movi_i32;
184     }
185     op->opc = new_op;
186     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
187     op->args[0] = dst;
188     op->args[1] = val;
189 
190     reset_temp(dst);
191     di->is_const = true;
192     di->val = val;
193     mask = val;
194     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
195         /* High bits of the destination are now garbage.  */
196         mask |= ~0xffffffffull;
197     }
198     di->mask = mask;
199 }
200 
201 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
202 {
203     TCGTemp *dst_ts = arg_temp(dst);
204     TCGTemp *src_ts = arg_temp(src);
205     const TCGOpDef *def;
206     struct tcg_temp_info *di;
207     struct tcg_temp_info *si;
208     tcg_target_ulong mask;
209     TCGOpcode new_op;
210 
211     if (ts_are_copies(dst_ts, src_ts)) {
212         tcg_op_remove(s, op);
213         return;
214     }
215 
216     reset_ts(dst_ts);
217     di = ts_info(dst_ts);
218     si = ts_info(src_ts);
219     def = &tcg_op_defs[op->opc];
220     if (def->flags & TCG_OPF_VECTOR) {
221         new_op = INDEX_op_mov_vec;
222     } else if (def->flags & TCG_OPF_64BIT) {
223         new_op = INDEX_op_mov_i64;
224     } else {
225         new_op = INDEX_op_mov_i32;
226     }
227     op->opc = new_op;
228     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
229     op->args[0] = dst;
230     op->args[1] = src;
231 
232     mask = si->mask;
233     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
234         /* High bits of the destination are now garbage.  */
235         mask |= ~0xffffffffull;
236     }
237     di->mask = mask;
238 
239     if (src_ts->type == dst_ts->type) {
240         struct tcg_temp_info *ni = ts_info(si->next_copy);
241 
242         di->next_copy = si->next_copy;
243         di->prev_copy = src_ts;
244         ni->prev_copy = dst_ts;
245         si->next_copy = dst_ts;
246         di->is_const = si->is_const;
247         di->val = si->val;
248     }
249 }
250 
251 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
252 {
253     uint64_t l64, h64;
254 
255     switch (op) {
256     CASE_OP_32_64(add):
257         return x + y;
258 
259     CASE_OP_32_64(sub):
260         return x - y;
261 
262     CASE_OP_32_64(mul):
263         return x * y;
264 
265     CASE_OP_32_64(and):
266         return x & y;
267 
268     CASE_OP_32_64(or):
269         return x | y;
270 
271     CASE_OP_32_64(xor):
272         return x ^ y;
273 
274     case INDEX_op_shl_i32:
275         return (uint32_t)x << (y & 31);
276 
277     case INDEX_op_shl_i64:
278         return (uint64_t)x << (y & 63);
279 
280     case INDEX_op_shr_i32:
281         return (uint32_t)x >> (y & 31);
282 
283     case INDEX_op_shr_i64:
284         return (uint64_t)x >> (y & 63);
285 
286     case INDEX_op_sar_i32:
287         return (int32_t)x >> (y & 31);
288 
289     case INDEX_op_sar_i64:
290         return (int64_t)x >> (y & 63);
291 
292     case INDEX_op_rotr_i32:
293         return ror32(x, y & 31);
294 
295     case INDEX_op_rotr_i64:
296         return ror64(x, y & 63);
297 
298     case INDEX_op_rotl_i32:
299         return rol32(x, y & 31);
300 
301     case INDEX_op_rotl_i64:
302         return rol64(x, y & 63);
303 
304     CASE_OP_32_64(not):
305         return ~x;
306 
307     CASE_OP_32_64(neg):
308         return -x;
309 
310     CASE_OP_32_64(andc):
311         return x & ~y;
312 
313     CASE_OP_32_64(orc):
314         return x | ~y;
315 
316     CASE_OP_32_64(eqv):
317         return ~(x ^ y);
318 
319     CASE_OP_32_64(nand):
320         return ~(x & y);
321 
322     CASE_OP_32_64(nor):
323         return ~(x | y);
324 
325     case INDEX_op_clz_i32:
326         return (uint32_t)x ? clz32(x) : y;
327 
328     case INDEX_op_clz_i64:
329         return x ? clz64(x) : y;
330 
331     case INDEX_op_ctz_i32:
332         return (uint32_t)x ? ctz32(x) : y;
333 
334     case INDEX_op_ctz_i64:
335         return x ? ctz64(x) : y;
336 
337     case INDEX_op_ctpop_i32:
338         return ctpop32(x);
339 
340     case INDEX_op_ctpop_i64:
341         return ctpop64(x);
342 
343     CASE_OP_32_64(ext8s):
344         return (int8_t)x;
345 
346     CASE_OP_32_64(ext16s):
347         return (int16_t)x;
348 
349     CASE_OP_32_64(ext8u):
350         return (uint8_t)x;
351 
352     CASE_OP_32_64(ext16u):
353         return (uint16_t)x;
354 
355     CASE_OP_32_64(bswap16):
356         return bswap16(x);
357 
358     CASE_OP_32_64(bswap32):
359         return bswap32(x);
360 
361     case INDEX_op_bswap64_i64:
362         return bswap64(x);
363 
364     case INDEX_op_ext_i32_i64:
365     case INDEX_op_ext32s_i64:
366         return (int32_t)x;
367 
368     case INDEX_op_extu_i32_i64:
369     case INDEX_op_extrl_i64_i32:
370     case INDEX_op_ext32u_i64:
371         return (uint32_t)x;
372 
373     case INDEX_op_extrh_i64_i32:
374         return (uint64_t)x >> 32;
375 
376     case INDEX_op_muluh_i32:
377         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
378     case INDEX_op_mulsh_i32:
379         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
380 
381     case INDEX_op_muluh_i64:
382         mulu64(&l64, &h64, x, y);
383         return h64;
384     case INDEX_op_mulsh_i64:
385         muls64(&l64, &h64, x, y);
386         return h64;
387 
388     case INDEX_op_div_i32:
389         /* Avoid crashing on divide by zero, otherwise undefined.  */
390         return (int32_t)x / ((int32_t)y ? : 1);
391     case INDEX_op_divu_i32:
392         return (uint32_t)x / ((uint32_t)y ? : 1);
393     case INDEX_op_div_i64:
394         return (int64_t)x / ((int64_t)y ? : 1);
395     case INDEX_op_divu_i64:
396         return (uint64_t)x / ((uint64_t)y ? : 1);
397 
398     case INDEX_op_rem_i32:
399         return (int32_t)x % ((int32_t)y ? : 1);
400     case INDEX_op_remu_i32:
401         return (uint32_t)x % ((uint32_t)y ? : 1);
402     case INDEX_op_rem_i64:
403         return (int64_t)x % ((int64_t)y ? : 1);
404     case INDEX_op_remu_i64:
405         return (uint64_t)x % ((uint64_t)y ? : 1);
406 
407     default:
408         fprintf(stderr,
409                 "Unrecognized operation %d in do_constant_folding.\n", op);
410         tcg_abort();
411     }
412 }
413 
414 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
415 {
416     const TCGOpDef *def = &tcg_op_defs[op];
417     TCGArg res = do_constant_folding_2(op, x, y);
418     if (!(def->flags & TCG_OPF_64BIT)) {
419         res = (int32_t)res;
420     }
421     return res;
422 }
423 
424 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
425 {
426     switch (c) {
427     case TCG_COND_EQ:
428         return x == y;
429     case TCG_COND_NE:
430         return x != y;
431     case TCG_COND_LT:
432         return (int32_t)x < (int32_t)y;
433     case TCG_COND_GE:
434         return (int32_t)x >= (int32_t)y;
435     case TCG_COND_LE:
436         return (int32_t)x <= (int32_t)y;
437     case TCG_COND_GT:
438         return (int32_t)x > (int32_t)y;
439     case TCG_COND_LTU:
440         return x < y;
441     case TCG_COND_GEU:
442         return x >= y;
443     case TCG_COND_LEU:
444         return x <= y;
445     case TCG_COND_GTU:
446         return x > y;
447     default:
448         tcg_abort();
449     }
450 }
451 
452 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
453 {
454     switch (c) {
455     case TCG_COND_EQ:
456         return x == y;
457     case TCG_COND_NE:
458         return x != y;
459     case TCG_COND_LT:
460         return (int64_t)x < (int64_t)y;
461     case TCG_COND_GE:
462         return (int64_t)x >= (int64_t)y;
463     case TCG_COND_LE:
464         return (int64_t)x <= (int64_t)y;
465     case TCG_COND_GT:
466         return (int64_t)x > (int64_t)y;
467     case TCG_COND_LTU:
468         return x < y;
469     case TCG_COND_GEU:
470         return x >= y;
471     case TCG_COND_LEU:
472         return x <= y;
473     case TCG_COND_GTU:
474         return x > y;
475     default:
476         tcg_abort();
477     }
478 }
479 
480 static bool do_constant_folding_cond_eq(TCGCond c)
481 {
482     switch (c) {
483     case TCG_COND_GT:
484     case TCG_COND_LTU:
485     case TCG_COND_LT:
486     case TCG_COND_GTU:
487     case TCG_COND_NE:
488         return 0;
489     case TCG_COND_GE:
490     case TCG_COND_GEU:
491     case TCG_COND_LE:
492     case TCG_COND_LEU:
493     case TCG_COND_EQ:
494         return 1;
495     default:
496         tcg_abort();
497     }
498 }
499 
500 /* Return 2 if the condition can't be simplified, and the result
501    of the condition (0 or 1) if it can */
502 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
503                                        TCGArg y, TCGCond c)
504 {
505     tcg_target_ulong xv = arg_info(x)->val;
506     tcg_target_ulong yv = arg_info(y)->val;
507     if (arg_is_const(x) && arg_is_const(y)) {
508         const TCGOpDef *def = &tcg_op_defs[op];
509         tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
510         if (def->flags & TCG_OPF_64BIT) {
511             return do_constant_folding_cond_64(xv, yv, c);
512         } else {
513             return do_constant_folding_cond_32(xv, yv, c);
514         }
515     } else if (args_are_copies(x, y)) {
516         return do_constant_folding_cond_eq(c);
517     } else if (arg_is_const(y) && yv == 0) {
518         switch (c) {
519         case TCG_COND_LTU:
520             return 0;
521         case TCG_COND_GEU:
522             return 1;
523         default:
524             return 2;
525         }
526     }
527     return 2;
528 }
529 
530 /* Return 2 if the condition can't be simplified, and the result
531    of the condition (0 or 1) if it can */
532 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
533 {
534     TCGArg al = p1[0], ah = p1[1];
535     TCGArg bl = p2[0], bh = p2[1];
536 
537     if (arg_is_const(bl) && arg_is_const(bh)) {
538         tcg_target_ulong blv = arg_info(bl)->val;
539         tcg_target_ulong bhv = arg_info(bh)->val;
540         uint64_t b = deposit64(blv, 32, 32, bhv);
541 
542         if (arg_is_const(al) && arg_is_const(ah)) {
543             tcg_target_ulong alv = arg_info(al)->val;
544             tcg_target_ulong ahv = arg_info(ah)->val;
545             uint64_t a = deposit64(alv, 32, 32, ahv);
546             return do_constant_folding_cond_64(a, b, c);
547         }
548         if (b == 0) {
549             switch (c) {
550             case TCG_COND_LTU:
551                 return 0;
552             case TCG_COND_GEU:
553                 return 1;
554             default:
555                 break;
556             }
557         }
558     }
559     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
560         return do_constant_folding_cond_eq(c);
561     }
562     return 2;
563 }
564 
565 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
566 {
567     TCGArg a1 = *p1, a2 = *p2;
568     int sum = 0;
569     sum += arg_is_const(a1);
570     sum -= arg_is_const(a2);
571 
572     /* Prefer the constant in second argument, and then the form
573        op a, a, b, which is better handled on non-RISC hosts. */
574     if (sum > 0 || (sum == 0 && dest == a2)) {
575         *p1 = a2;
576         *p2 = a1;
577         return true;
578     }
579     return false;
580 }
581 
582 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
583 {
584     int sum = 0;
585     sum += arg_is_const(p1[0]);
586     sum += arg_is_const(p1[1]);
587     sum -= arg_is_const(p2[0]);
588     sum -= arg_is_const(p2[1]);
589     if (sum > 0) {
590         TCGArg t;
591         t = p1[0], p1[0] = p2[0], p2[0] = t;
592         t = p1[1], p1[1] = p2[1], p2[1] = t;
593         return true;
594     }
595     return false;
596 }
597 
598 /* Propagate constants and copies, fold constant expressions. */
599 void tcg_optimize(TCGContext *s)
600 {
601     int nb_temps, nb_globals;
602     TCGOp *op, *op_next, *prev_mb = NULL;
603     struct tcg_temp_info *infos;
604     TCGTempSet temps_used;
605 
606     /* Array VALS has an element for each temp.
607        If this temp holds a constant then its value is kept in VALS' element.
608        If this temp is a copy of other ones then the other copies are
609        available through the doubly linked circular list. */
610 
611     nb_temps = s->nb_temps;
612     nb_globals = s->nb_globals;
613     bitmap_zero(temps_used.l, nb_temps);
614     infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
615 
616     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
617         tcg_target_ulong mask, partmask, affected;
618         int nb_oargs, nb_iargs, i;
619         TCGArg tmp;
620         TCGOpcode opc = op->opc;
621         const TCGOpDef *def = &tcg_op_defs[opc];
622 
623         /* Count the arguments, and initialize the temps that are
624            going to be used */
625         if (opc == INDEX_op_call) {
626             nb_oargs = TCGOP_CALLO(op);
627             nb_iargs = TCGOP_CALLI(op);
628             for (i = 0; i < nb_oargs + nb_iargs; i++) {
629                 TCGTemp *ts = arg_temp(op->args[i]);
630                 if (ts) {
631                     init_ts_info(infos, &temps_used, ts);
632                 }
633             }
634         } else {
635             nb_oargs = def->nb_oargs;
636             nb_iargs = def->nb_iargs;
637             for (i = 0; i < nb_oargs + nb_iargs; i++) {
638                 init_arg_info(infos, &temps_used, op->args[i]);
639             }
640         }
641 
642         /* Do copy propagation */
643         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
644             TCGTemp *ts = arg_temp(op->args[i]);
645             if (ts && ts_is_copy(ts)) {
646                 op->args[i] = temp_arg(find_better_copy(s, ts));
647             }
648         }
649 
650         /* For commutative operations make constant second argument */
651         switch (opc) {
652         CASE_OP_32_64_VEC(add):
653         CASE_OP_32_64_VEC(mul):
654         CASE_OP_32_64_VEC(and):
655         CASE_OP_32_64_VEC(or):
656         CASE_OP_32_64_VEC(xor):
657         CASE_OP_32_64(eqv):
658         CASE_OP_32_64(nand):
659         CASE_OP_32_64(nor):
660         CASE_OP_32_64(muluh):
661         CASE_OP_32_64(mulsh):
662             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
663             break;
664         CASE_OP_32_64(brcond):
665             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
666                 op->args[2] = tcg_swap_cond(op->args[2]);
667             }
668             break;
669         CASE_OP_32_64(setcond):
670             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
671                 op->args[3] = tcg_swap_cond(op->args[3]);
672             }
673             break;
674         CASE_OP_32_64(movcond):
675             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
676                 op->args[5] = tcg_swap_cond(op->args[5]);
677             }
678             /* For movcond, we canonicalize the "false" input reg to match
679                the destination reg so that the tcg backend can implement
680                a "move if true" operation.  */
681             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
682                 op->args[5] = tcg_invert_cond(op->args[5]);
683             }
684             break;
685         CASE_OP_32_64(add2):
686             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
687             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
688             break;
689         CASE_OP_32_64(mulu2):
690         CASE_OP_32_64(muls2):
691             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
692             break;
693         case INDEX_op_brcond2_i32:
694             if (swap_commutative2(&op->args[0], &op->args[2])) {
695                 op->args[4] = tcg_swap_cond(op->args[4]);
696             }
697             break;
698         case INDEX_op_setcond2_i32:
699             if (swap_commutative2(&op->args[1], &op->args[3])) {
700                 op->args[5] = tcg_swap_cond(op->args[5]);
701             }
702             break;
703         default:
704             break;
705         }
706 
707         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
708            and "sub r, 0, a => neg r, a" case.  */
709         switch (opc) {
710         CASE_OP_32_64(shl):
711         CASE_OP_32_64(shr):
712         CASE_OP_32_64(sar):
713         CASE_OP_32_64(rotl):
714         CASE_OP_32_64(rotr):
715             if (arg_is_const(op->args[1])
716                 && arg_info(op->args[1])->val == 0) {
717                 tcg_opt_gen_movi(s, op, op->args[0], 0);
718                 continue;
719             }
720             break;
721         CASE_OP_32_64_VEC(sub):
722             {
723                 TCGOpcode neg_op;
724                 bool have_neg;
725 
726                 if (arg_is_const(op->args[2])) {
727                     /* Proceed with possible constant folding. */
728                     break;
729                 }
730                 if (opc == INDEX_op_sub_i32) {
731                     neg_op = INDEX_op_neg_i32;
732                     have_neg = TCG_TARGET_HAS_neg_i32;
733                 } else if (opc == INDEX_op_sub_i64) {
734                     neg_op = INDEX_op_neg_i64;
735                     have_neg = TCG_TARGET_HAS_neg_i64;
736                 } else if (TCG_TARGET_HAS_neg_vec) {
737                     TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
738                     unsigned vece = TCGOP_VECE(op);
739                     neg_op = INDEX_op_neg_vec;
740                     have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
741                 } else {
742                     break;
743                 }
744                 if (!have_neg) {
745                     break;
746                 }
747                 if (arg_is_const(op->args[1])
748                     && arg_info(op->args[1])->val == 0) {
749                     op->opc = neg_op;
750                     reset_temp(op->args[0]);
751                     op->args[1] = op->args[2];
752                     continue;
753                 }
754             }
755             break;
756         CASE_OP_32_64_VEC(xor):
757         CASE_OP_32_64(nand):
758             if (!arg_is_const(op->args[1])
759                 && arg_is_const(op->args[2])
760                 && arg_info(op->args[2])->val == -1) {
761                 i = 1;
762                 goto try_not;
763             }
764             break;
765         CASE_OP_32_64(nor):
766             if (!arg_is_const(op->args[1])
767                 && arg_is_const(op->args[2])
768                 && arg_info(op->args[2])->val == 0) {
769                 i = 1;
770                 goto try_not;
771             }
772             break;
773         CASE_OP_32_64_VEC(andc):
774             if (!arg_is_const(op->args[2])
775                 && arg_is_const(op->args[1])
776                 && arg_info(op->args[1])->val == -1) {
777                 i = 2;
778                 goto try_not;
779             }
780             break;
781         CASE_OP_32_64_VEC(orc):
782         CASE_OP_32_64(eqv):
783             if (!arg_is_const(op->args[2])
784                 && arg_is_const(op->args[1])
785                 && arg_info(op->args[1])->val == 0) {
786                 i = 2;
787                 goto try_not;
788             }
789             break;
790         try_not:
791             {
792                 TCGOpcode not_op;
793                 bool have_not;
794 
795                 if (def->flags & TCG_OPF_VECTOR) {
796                     not_op = INDEX_op_not_vec;
797                     have_not = TCG_TARGET_HAS_not_vec;
798                 } else if (def->flags & TCG_OPF_64BIT) {
799                     not_op = INDEX_op_not_i64;
800                     have_not = TCG_TARGET_HAS_not_i64;
801                 } else {
802                     not_op = INDEX_op_not_i32;
803                     have_not = TCG_TARGET_HAS_not_i32;
804                 }
805                 if (!have_not) {
806                     break;
807                 }
808                 op->opc = not_op;
809                 reset_temp(op->args[0]);
810                 op->args[1] = op->args[i];
811                 continue;
812             }
813         default:
814             break;
815         }
816 
817         /* Simplify expression for "op r, a, const => mov r, a" cases */
818         switch (opc) {
819         CASE_OP_32_64_VEC(add):
820         CASE_OP_32_64_VEC(sub):
821         CASE_OP_32_64_VEC(or):
822         CASE_OP_32_64_VEC(xor):
823         CASE_OP_32_64_VEC(andc):
824         CASE_OP_32_64(shl):
825         CASE_OP_32_64(shr):
826         CASE_OP_32_64(sar):
827         CASE_OP_32_64(rotl):
828         CASE_OP_32_64(rotr):
829             if (!arg_is_const(op->args[1])
830                 && arg_is_const(op->args[2])
831                 && arg_info(op->args[2])->val == 0) {
832                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
833                 continue;
834             }
835             break;
836         CASE_OP_32_64_VEC(and):
837         CASE_OP_32_64_VEC(orc):
838         CASE_OP_32_64(eqv):
839             if (!arg_is_const(op->args[1])
840                 && arg_is_const(op->args[2])
841                 && arg_info(op->args[2])->val == -1) {
842                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
843                 continue;
844             }
845             break;
846         default:
847             break;
848         }
849 
850         /* Simplify using known-zero bits. Currently only ops with a single
851            output argument is supported. */
852         mask = -1;
853         affected = -1;
854         switch (opc) {
855         CASE_OP_32_64(ext8s):
856             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
857                 break;
858             }
859         CASE_OP_32_64(ext8u):
860             mask = 0xff;
861             goto and_const;
862         CASE_OP_32_64(ext16s):
863             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
864                 break;
865             }
866         CASE_OP_32_64(ext16u):
867             mask = 0xffff;
868             goto and_const;
869         case INDEX_op_ext32s_i64:
870             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
871                 break;
872             }
873         case INDEX_op_ext32u_i64:
874             mask = 0xffffffffU;
875             goto and_const;
876 
877         CASE_OP_32_64(and):
878             mask = arg_info(op->args[2])->mask;
879             if (arg_is_const(op->args[2])) {
880         and_const:
881                 affected = arg_info(op->args[1])->mask & ~mask;
882             }
883             mask = arg_info(op->args[1])->mask & mask;
884             break;
885 
886         case INDEX_op_ext_i32_i64:
887             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
888                 break;
889             }
890         case INDEX_op_extu_i32_i64:
891             /* We do not compute affected as it is a size changing op.  */
892             mask = (uint32_t)arg_info(op->args[1])->mask;
893             break;
894 
895         CASE_OP_32_64(andc):
896             /* Known-zeros does not imply known-ones.  Therefore unless
897                op->args[2] is constant, we can't infer anything from it.  */
898             if (arg_is_const(op->args[2])) {
899                 mask = ~arg_info(op->args[2])->mask;
900                 goto and_const;
901             }
902             /* But we certainly know nothing outside args[1] may be set. */
903             mask = arg_info(op->args[1])->mask;
904             break;
905 
906         case INDEX_op_sar_i32:
907             if (arg_is_const(op->args[2])) {
908                 tmp = arg_info(op->args[2])->val & 31;
909                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
910             }
911             break;
912         case INDEX_op_sar_i64:
913             if (arg_is_const(op->args[2])) {
914                 tmp = arg_info(op->args[2])->val & 63;
915                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
916             }
917             break;
918 
919         case INDEX_op_shr_i32:
920             if (arg_is_const(op->args[2])) {
921                 tmp = arg_info(op->args[2])->val & 31;
922                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
923             }
924             break;
925         case INDEX_op_shr_i64:
926             if (arg_is_const(op->args[2])) {
927                 tmp = arg_info(op->args[2])->val & 63;
928                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
929             }
930             break;
931 
932         case INDEX_op_extrl_i64_i32:
933             mask = (uint32_t)arg_info(op->args[1])->mask;
934             break;
935         case INDEX_op_extrh_i64_i32:
936             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
937             break;
938 
939         CASE_OP_32_64(shl):
940             if (arg_is_const(op->args[2])) {
941                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
942                 mask = arg_info(op->args[1])->mask << tmp;
943             }
944             break;
945 
946         CASE_OP_32_64(neg):
947             /* Set to 1 all bits to the left of the rightmost.  */
948             mask = -(arg_info(op->args[1])->mask
949                      & -arg_info(op->args[1])->mask);
950             break;
951 
952         CASE_OP_32_64(deposit):
953             mask = deposit64(arg_info(op->args[1])->mask,
954                              op->args[3], op->args[4],
955                              arg_info(op->args[2])->mask);
956             break;
957 
958         CASE_OP_32_64(extract):
959             mask = extract64(arg_info(op->args[1])->mask,
960                              op->args[2], op->args[3]);
961             if (op->args[2] == 0) {
962                 affected = arg_info(op->args[1])->mask & ~mask;
963             }
964             break;
965         CASE_OP_32_64(sextract):
966             mask = sextract64(arg_info(op->args[1])->mask,
967                               op->args[2], op->args[3]);
968             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
969                 affected = arg_info(op->args[1])->mask & ~mask;
970             }
971             break;
972 
973         CASE_OP_32_64(or):
974         CASE_OP_32_64(xor):
975             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
976             break;
977 
978         case INDEX_op_clz_i32:
979         case INDEX_op_ctz_i32:
980             mask = arg_info(op->args[2])->mask | 31;
981             break;
982 
983         case INDEX_op_clz_i64:
984         case INDEX_op_ctz_i64:
985             mask = arg_info(op->args[2])->mask | 63;
986             break;
987 
988         case INDEX_op_ctpop_i32:
989             mask = 32 | 31;
990             break;
991         case INDEX_op_ctpop_i64:
992             mask = 64 | 63;
993             break;
994 
995         CASE_OP_32_64(setcond):
996         case INDEX_op_setcond2_i32:
997             mask = 1;
998             break;
999 
1000         CASE_OP_32_64(movcond):
1001             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1002             break;
1003 
1004         CASE_OP_32_64(ld8u):
1005             mask = 0xff;
1006             break;
1007         CASE_OP_32_64(ld16u):
1008             mask = 0xffff;
1009             break;
1010         case INDEX_op_ld32u_i64:
1011             mask = 0xffffffffu;
1012             break;
1013 
1014         CASE_OP_32_64(qemu_ld):
1015             {
1016                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1017                 TCGMemOp mop = get_memop(oi);
1018                 if (!(mop & MO_SIGN)) {
1019                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1020                 }
1021             }
1022             break;
1023 
1024         default:
1025             break;
1026         }
1027 
1028         /* 32-bit ops generate 32-bit results.  For the result is zero test
1029            below, we can ignore high bits, but for further optimizations we
1030            need to record that the high bits contain garbage.  */
1031         partmask = mask;
1032         if (!(def->flags & TCG_OPF_64BIT)) {
1033             mask |= ~(tcg_target_ulong)0xffffffffu;
1034             partmask &= 0xffffffffu;
1035             affected &= 0xffffffffu;
1036         }
1037 
1038         if (partmask == 0) {
1039             tcg_debug_assert(nb_oargs == 1);
1040             tcg_opt_gen_movi(s, op, op->args[0], 0);
1041             continue;
1042         }
1043         if (affected == 0) {
1044             tcg_debug_assert(nb_oargs == 1);
1045             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1046             continue;
1047         }
1048 
1049         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1050         switch (opc) {
1051         CASE_OP_32_64_VEC(and):
1052         CASE_OP_32_64_VEC(mul):
1053         CASE_OP_32_64(muluh):
1054         CASE_OP_32_64(mulsh):
1055             if (arg_is_const(op->args[2])
1056                 && arg_info(op->args[2])->val == 0) {
1057                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1058                 continue;
1059             }
1060             break;
1061         default:
1062             break;
1063         }
1064 
1065         /* Simplify expression for "op r, a, a => mov r, a" cases */
1066         switch (opc) {
1067         CASE_OP_32_64_VEC(or):
1068         CASE_OP_32_64_VEC(and):
1069             if (args_are_copies(op->args[1], op->args[2])) {
1070                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1071                 continue;
1072             }
1073             break;
1074         default:
1075             break;
1076         }
1077 
1078         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1079         switch (opc) {
1080         CASE_OP_32_64_VEC(andc):
1081         CASE_OP_32_64_VEC(sub):
1082         CASE_OP_32_64_VEC(xor):
1083             if (args_are_copies(op->args[1], op->args[2])) {
1084                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1085                 continue;
1086             }
1087             break;
1088         default:
1089             break;
1090         }
1091 
1092         /* Propagate constants through copy operations and do constant
1093            folding.  Constants will be substituted to arguments by register
1094            allocator where needed and possible.  Also detect copies. */
1095         switch (opc) {
1096         CASE_OP_32_64_VEC(mov):
1097             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1098             break;
1099         CASE_OP_32_64(movi):
1100         case INDEX_op_dupi_vec:
1101             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1102             break;
1103 
1104         case INDEX_op_dup_vec:
1105             if (arg_is_const(op->args[1])) {
1106                 tmp = arg_info(op->args[1])->val;
1107                 tmp = dup_const(TCGOP_VECE(op), tmp);
1108                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1109                 break;
1110             }
1111             goto do_default;
1112 
1113         CASE_OP_32_64(not):
1114         CASE_OP_32_64(neg):
1115         CASE_OP_32_64(ext8s):
1116         CASE_OP_32_64(ext8u):
1117         CASE_OP_32_64(ext16s):
1118         CASE_OP_32_64(ext16u):
1119         CASE_OP_32_64(ctpop):
1120         CASE_OP_32_64(bswap16):
1121         CASE_OP_32_64(bswap32):
1122         case INDEX_op_bswap64_i64:
1123         case INDEX_op_ext32s_i64:
1124         case INDEX_op_ext32u_i64:
1125         case INDEX_op_ext_i32_i64:
1126         case INDEX_op_extu_i32_i64:
1127         case INDEX_op_extrl_i64_i32:
1128         case INDEX_op_extrh_i64_i32:
1129             if (arg_is_const(op->args[1])) {
1130                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1131                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1132                 break;
1133             }
1134             goto do_default;
1135 
1136         CASE_OP_32_64(add):
1137         CASE_OP_32_64(sub):
1138         CASE_OP_32_64(mul):
1139         CASE_OP_32_64(or):
1140         CASE_OP_32_64(and):
1141         CASE_OP_32_64(xor):
1142         CASE_OP_32_64(shl):
1143         CASE_OP_32_64(shr):
1144         CASE_OP_32_64(sar):
1145         CASE_OP_32_64(rotl):
1146         CASE_OP_32_64(rotr):
1147         CASE_OP_32_64(andc):
1148         CASE_OP_32_64(orc):
1149         CASE_OP_32_64(eqv):
1150         CASE_OP_32_64(nand):
1151         CASE_OP_32_64(nor):
1152         CASE_OP_32_64(muluh):
1153         CASE_OP_32_64(mulsh):
1154         CASE_OP_32_64(div):
1155         CASE_OP_32_64(divu):
1156         CASE_OP_32_64(rem):
1157         CASE_OP_32_64(remu):
1158             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1159                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1160                                           arg_info(op->args[2])->val);
1161                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1162                 break;
1163             }
1164             goto do_default;
1165 
1166         CASE_OP_32_64(clz):
1167         CASE_OP_32_64(ctz):
1168             if (arg_is_const(op->args[1])) {
1169                 TCGArg v = arg_info(op->args[1])->val;
1170                 if (v != 0) {
1171                     tmp = do_constant_folding(opc, v, 0);
1172                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1173                 } else {
1174                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1175                 }
1176                 break;
1177             }
1178             goto do_default;
1179 
1180         CASE_OP_32_64(deposit):
1181             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1182                 tmp = deposit64(arg_info(op->args[1])->val,
1183                                 op->args[3], op->args[4],
1184                                 arg_info(op->args[2])->val);
1185                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1186                 break;
1187             }
1188             goto do_default;
1189 
1190         CASE_OP_32_64(extract):
1191             if (arg_is_const(op->args[1])) {
1192                 tmp = extract64(arg_info(op->args[1])->val,
1193                                 op->args[2], op->args[3]);
1194                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1195                 break;
1196             }
1197             goto do_default;
1198 
1199         CASE_OP_32_64(sextract):
1200             if (arg_is_const(op->args[1])) {
1201                 tmp = sextract64(arg_info(op->args[1])->val,
1202                                  op->args[2], op->args[3]);
1203                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1204                 break;
1205             }
1206             goto do_default;
1207 
1208         CASE_OP_32_64(extract2):
1209             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1210                 TCGArg v1 = arg_info(op->args[1])->val;
1211                 TCGArg v2 = arg_info(op->args[2])->val;
1212 
1213                 if (opc == INDEX_op_extract2_i64) {
1214                     tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
1215                 } else {
1216                     tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3]));
1217                     tmp = (int32_t)tmp;
1218                 }
1219                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1220                 break;
1221             }
1222             goto do_default;
1223 
1224         CASE_OP_32_64(setcond):
1225             tmp = do_constant_folding_cond(opc, op->args[1],
1226                                            op->args[2], op->args[3]);
1227             if (tmp != 2) {
1228                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1229                 break;
1230             }
1231             goto do_default;
1232 
1233         CASE_OP_32_64(brcond):
1234             tmp = do_constant_folding_cond(opc, op->args[0],
1235                                            op->args[1], op->args[2]);
1236             if (tmp != 2) {
1237                 if (tmp) {
1238                     bitmap_zero(temps_used.l, nb_temps);
1239                     op->opc = INDEX_op_br;
1240                     op->args[0] = op->args[3];
1241                 } else {
1242                     tcg_op_remove(s, op);
1243                 }
1244                 break;
1245             }
1246             goto do_default;
1247 
1248         CASE_OP_32_64(movcond):
1249             tmp = do_constant_folding_cond(opc, op->args[1],
1250                                            op->args[2], op->args[5]);
1251             if (tmp != 2) {
1252                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1253                 break;
1254             }
1255             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1256                 tcg_target_ulong tv = arg_info(op->args[3])->val;
1257                 tcg_target_ulong fv = arg_info(op->args[4])->val;
1258                 TCGCond cond = op->args[5];
1259                 if (fv == 1 && tv == 0) {
1260                     cond = tcg_invert_cond(cond);
1261                 } else if (!(tv == 1 && fv == 0)) {
1262                     goto do_default;
1263                 }
1264                 op->args[3] = cond;
1265                 op->opc = opc = (opc == INDEX_op_movcond_i32
1266                                  ? INDEX_op_setcond_i32
1267                                  : INDEX_op_setcond_i64);
1268                 nb_iargs = 2;
1269             }
1270             goto do_default;
1271 
1272         case INDEX_op_add2_i32:
1273         case INDEX_op_sub2_i32:
1274             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1275                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1276                 uint32_t al = arg_info(op->args[2])->val;
1277                 uint32_t ah = arg_info(op->args[3])->val;
1278                 uint32_t bl = arg_info(op->args[4])->val;
1279                 uint32_t bh = arg_info(op->args[5])->val;
1280                 uint64_t a = ((uint64_t)ah << 32) | al;
1281                 uint64_t b = ((uint64_t)bh << 32) | bl;
1282                 TCGArg rl, rh;
1283                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1284 
1285                 if (opc == INDEX_op_add2_i32) {
1286                     a += b;
1287                 } else {
1288                     a -= b;
1289                 }
1290 
1291                 rl = op->args[0];
1292                 rh = op->args[1];
1293                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1294                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1295                 break;
1296             }
1297             goto do_default;
1298 
1299         case INDEX_op_mulu2_i32:
1300             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1301                 uint32_t a = arg_info(op->args[2])->val;
1302                 uint32_t b = arg_info(op->args[3])->val;
1303                 uint64_t r = (uint64_t)a * b;
1304                 TCGArg rl, rh;
1305                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1306 
1307                 rl = op->args[0];
1308                 rh = op->args[1];
1309                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1310                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1311                 break;
1312             }
1313             goto do_default;
1314 
1315         case INDEX_op_brcond2_i32:
1316             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1317                                             op->args[4]);
1318             if (tmp != 2) {
1319                 if (tmp) {
1320             do_brcond_true:
1321                     bitmap_zero(temps_used.l, nb_temps);
1322                     op->opc = INDEX_op_br;
1323                     op->args[0] = op->args[5];
1324                 } else {
1325             do_brcond_false:
1326                     tcg_op_remove(s, op);
1327                 }
1328             } else if ((op->args[4] == TCG_COND_LT
1329                         || op->args[4] == TCG_COND_GE)
1330                        && arg_is_const(op->args[2])
1331                        && arg_info(op->args[2])->val == 0
1332                        && arg_is_const(op->args[3])
1333                        && arg_info(op->args[3])->val == 0) {
1334                 /* Simplify LT/GE comparisons vs zero to a single compare
1335                    vs the high word of the input.  */
1336             do_brcond_high:
1337                 bitmap_zero(temps_used.l, nb_temps);
1338                 op->opc = INDEX_op_brcond_i32;
1339                 op->args[0] = op->args[1];
1340                 op->args[1] = op->args[3];
1341                 op->args[2] = op->args[4];
1342                 op->args[3] = op->args[5];
1343             } else if (op->args[4] == TCG_COND_EQ) {
1344                 /* Simplify EQ comparisons where one of the pairs
1345                    can be simplified.  */
1346                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1347                                                op->args[0], op->args[2],
1348                                                TCG_COND_EQ);
1349                 if (tmp == 0) {
1350                     goto do_brcond_false;
1351                 } else if (tmp == 1) {
1352                     goto do_brcond_high;
1353                 }
1354                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1355                                                op->args[1], op->args[3],
1356                                                TCG_COND_EQ);
1357                 if (tmp == 0) {
1358                     goto do_brcond_false;
1359                 } else if (tmp != 1) {
1360                     goto do_default;
1361                 }
1362             do_brcond_low:
1363                 bitmap_zero(temps_used.l, nb_temps);
1364                 op->opc = INDEX_op_brcond_i32;
1365                 op->args[1] = op->args[2];
1366                 op->args[2] = op->args[4];
1367                 op->args[3] = op->args[5];
1368             } else if (op->args[4] == TCG_COND_NE) {
1369                 /* Simplify NE comparisons where one of the pairs
1370                    can be simplified.  */
1371                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1372                                                op->args[0], op->args[2],
1373                                                TCG_COND_NE);
1374                 if (tmp == 0) {
1375                     goto do_brcond_high;
1376                 } else if (tmp == 1) {
1377                     goto do_brcond_true;
1378                 }
1379                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1380                                                op->args[1], op->args[3],
1381                                                TCG_COND_NE);
1382                 if (tmp == 0) {
1383                     goto do_brcond_low;
1384                 } else if (tmp == 1) {
1385                     goto do_brcond_true;
1386                 }
1387                 goto do_default;
1388             } else {
1389                 goto do_default;
1390             }
1391             break;
1392 
1393         case INDEX_op_setcond2_i32:
1394             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1395                                             op->args[5]);
1396             if (tmp != 2) {
1397             do_setcond_const:
1398                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1399             } else if ((op->args[5] == TCG_COND_LT
1400                         || op->args[5] == TCG_COND_GE)
1401                        && arg_is_const(op->args[3])
1402                        && arg_info(op->args[3])->val == 0
1403                        && arg_is_const(op->args[4])
1404                        && arg_info(op->args[4])->val == 0) {
1405                 /* Simplify LT/GE comparisons vs zero to a single compare
1406                    vs the high word of the input.  */
1407             do_setcond_high:
1408                 reset_temp(op->args[0]);
1409                 arg_info(op->args[0])->mask = 1;
1410                 op->opc = INDEX_op_setcond_i32;
1411                 op->args[1] = op->args[2];
1412                 op->args[2] = op->args[4];
1413                 op->args[3] = op->args[5];
1414             } else if (op->args[5] == TCG_COND_EQ) {
1415                 /* Simplify EQ comparisons where one of the pairs
1416                    can be simplified.  */
1417                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1418                                                op->args[1], op->args[3],
1419                                                TCG_COND_EQ);
1420                 if (tmp == 0) {
1421                     goto do_setcond_const;
1422                 } else if (tmp == 1) {
1423                     goto do_setcond_high;
1424                 }
1425                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1426                                                op->args[2], op->args[4],
1427                                                TCG_COND_EQ);
1428                 if (tmp == 0) {
1429                     goto do_setcond_high;
1430                 } else if (tmp != 1) {
1431                     goto do_default;
1432                 }
1433             do_setcond_low:
1434                 reset_temp(op->args[0]);
1435                 arg_info(op->args[0])->mask = 1;
1436                 op->opc = INDEX_op_setcond_i32;
1437                 op->args[2] = op->args[3];
1438                 op->args[3] = op->args[5];
1439             } else if (op->args[5] == TCG_COND_NE) {
1440                 /* Simplify NE comparisons where one of the pairs
1441                    can be simplified.  */
1442                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1443                                                op->args[1], op->args[3],
1444                                                TCG_COND_NE);
1445                 if (tmp == 0) {
1446                     goto do_setcond_high;
1447                 } else if (tmp == 1) {
1448                     goto do_setcond_const;
1449                 }
1450                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1451                                                op->args[2], op->args[4],
1452                                                TCG_COND_NE);
1453                 if (tmp == 0) {
1454                     goto do_setcond_low;
1455                 } else if (tmp == 1) {
1456                     goto do_setcond_const;
1457                 }
1458                 goto do_default;
1459             } else {
1460                 goto do_default;
1461             }
1462             break;
1463 
1464         case INDEX_op_call:
1465             if (!(op->args[nb_oargs + nb_iargs + 1]
1466                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1467                 for (i = 0; i < nb_globals; i++) {
1468                     if (test_bit(i, temps_used.l)) {
1469                         reset_ts(&s->temps[i]);
1470                     }
1471                 }
1472             }
1473             goto do_reset_output;
1474 
1475         default:
1476         do_default:
1477             /* Default case: we know nothing about operation (or were unable
1478                to compute the operation result) so no propagation is done.
1479                We trash everything if the operation is the end of a basic
1480                block, otherwise we only trash the output args.  "mask" is
1481                the non-zero bits mask for the first output arg.  */
1482             if (def->flags & TCG_OPF_BB_END) {
1483                 bitmap_zero(temps_used.l, nb_temps);
1484             } else {
1485         do_reset_output:
1486                 for (i = 0; i < nb_oargs; i++) {
1487                     reset_temp(op->args[i]);
1488                     /* Save the corresponding known-zero bits mask for the
1489                        first output argument (only one supported so far). */
1490                     if (i == 0) {
1491                         arg_info(op->args[i])->mask = mask;
1492                     }
1493                 }
1494             }
1495             break;
1496         }
1497 
1498         /* Eliminate duplicate and redundant fence instructions.  */
1499         if (prev_mb) {
1500             switch (opc) {
1501             case INDEX_op_mb:
1502                 /* Merge two barriers of the same type into one,
1503                  * or a weaker barrier into a stronger one,
1504                  * or two weaker barriers into a stronger one.
1505                  *   mb X; mb Y => mb X|Y
1506                  *   mb; strl => mb; st
1507                  *   ldaq; mb => ld; mb
1508                  *   ldaq; strl => ld; mb; st
1509                  * Other combinations are also merged into a strong
1510                  * barrier.  This is stricter than specified but for
1511                  * the purposes of TCG is better than not optimizing.
1512                  */
1513                 prev_mb->args[0] |= op->args[0];
1514                 tcg_op_remove(s, op);
1515                 break;
1516 
1517             default:
1518                 /* Opcodes that end the block stop the optimization.  */
1519                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1520                     break;
1521                 }
1522                 /* fallthru */
1523             case INDEX_op_qemu_ld_i32:
1524             case INDEX_op_qemu_ld_i64:
1525             case INDEX_op_qemu_st_i32:
1526             case INDEX_op_qemu_st_i64:
1527             case INDEX_op_call:
1528                 /* Opcodes that touch guest memory stop the optimization.  */
1529                 prev_mb = NULL;
1530                 break;
1531             }
1532         } else if (opc == INDEX_op_mb) {
1533             prev_mb = op;
1534         }
1535     }
1536 }
1537