xref: /openbmc/qemu/tcg/optimize.c (revision 1c2adb95)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "exec/cpu-common.h"
29 #include "tcg-op.h"
30 
31 #define CASE_OP_32_64(x)                        \
32         glue(glue(case INDEX_op_, x), _i32):    \
33         glue(glue(case INDEX_op_, x), _i64)
34 
35 struct tcg_temp_info {
36     bool is_const;
37     TCGTemp *prev_copy;
38     TCGTemp *next_copy;
39     tcg_target_ulong val;
40     tcg_target_ulong mask;
41 };
42 
43 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
44 {
45     return ts->state_ptr;
46 }
47 
48 static inline struct tcg_temp_info *arg_info(TCGArg arg)
49 {
50     return ts_info(arg_temp(arg));
51 }
52 
53 static inline bool ts_is_const(TCGTemp *ts)
54 {
55     return ts_info(ts)->is_const;
56 }
57 
58 static inline bool arg_is_const(TCGArg arg)
59 {
60     return ts_is_const(arg_temp(arg));
61 }
62 
63 static inline bool ts_is_copy(TCGTemp *ts)
64 {
65     return ts_info(ts)->next_copy != ts;
66 }
67 
68 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
69 static void reset_ts(TCGTemp *ts)
70 {
71     struct tcg_temp_info *ti = ts_info(ts);
72     struct tcg_temp_info *pi = ts_info(ti->prev_copy);
73     struct tcg_temp_info *ni = ts_info(ti->next_copy);
74 
75     ni->prev_copy = ti->prev_copy;
76     pi->next_copy = ti->next_copy;
77     ti->next_copy = ts;
78     ti->prev_copy = ts;
79     ti->is_const = false;
80     ti->mask = -1;
81 }
82 
83 static void reset_temp(TCGArg arg)
84 {
85     reset_ts(arg_temp(arg));
86 }
87 
88 /* Initialize and activate a temporary.  */
89 static void init_ts_info(struct tcg_temp_info *infos,
90                          TCGTempSet *temps_used, TCGTemp *ts)
91 {
92     size_t idx = temp_idx(ts);
93     if (!test_bit(idx, temps_used->l)) {
94         struct tcg_temp_info *ti = &infos[idx];
95 
96         ts->state_ptr = ti;
97         ti->next_copy = ts;
98         ti->prev_copy = ts;
99         ti->is_const = false;
100         ti->mask = -1;
101         set_bit(idx, temps_used->l);
102     }
103 }
104 
105 static void init_arg_info(struct tcg_temp_info *infos,
106                           TCGTempSet *temps_used, TCGArg arg)
107 {
108     init_ts_info(infos, temps_used, arg_temp(arg));
109 }
110 
111 static int op_bits(TCGOpcode op)
112 {
113     const TCGOpDef *def = &tcg_op_defs[op];
114     return def->flags & TCG_OPF_64BIT ? 64 : 32;
115 }
116 
117 static TCGOpcode op_to_mov(TCGOpcode op)
118 {
119     switch (op_bits(op)) {
120     case 32:
121         return INDEX_op_mov_i32;
122     case 64:
123         return INDEX_op_mov_i64;
124     default:
125         fprintf(stderr, "op_to_mov: unexpected return value of "
126                 "function op_bits.\n");
127         tcg_abort();
128     }
129 }
130 
131 static TCGOpcode op_to_movi(TCGOpcode op)
132 {
133     switch (op_bits(op)) {
134     case 32:
135         return INDEX_op_movi_i32;
136     case 64:
137         return INDEX_op_movi_i64;
138     default:
139         fprintf(stderr, "op_to_movi: unexpected return value of "
140                 "function op_bits.\n");
141         tcg_abort();
142     }
143 }
144 
145 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
146 {
147     TCGTemp *i;
148 
149     /* If this is already a global, we can't do better. */
150     if (ts->temp_global) {
151         return ts;
152     }
153 
154     /* Search for a global first. */
155     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
156         if (i->temp_global) {
157             return i;
158         }
159     }
160 
161     /* If it is a temp, search for a temp local. */
162     if (!ts->temp_local) {
163         for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
164             if (ts->temp_local) {
165                 return i;
166             }
167         }
168     }
169 
170     /* Failure to find a better representation, return the same temp. */
171     return ts;
172 }
173 
174 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
175 {
176     TCGTemp *i;
177 
178     if (ts1 == ts2) {
179         return true;
180     }
181 
182     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
183         return false;
184     }
185 
186     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
187         if (i == ts2) {
188             return true;
189         }
190     }
191 
192     return false;
193 }
194 
195 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
196 {
197     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
198 }
199 
200 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
201 {
202     TCGOpcode new_op = op_to_movi(op->opc);
203     tcg_target_ulong mask;
204     struct tcg_temp_info *di = arg_info(dst);
205 
206     op->opc = new_op;
207 
208     reset_temp(dst);
209     di->is_const = true;
210     di->val = val;
211     mask = val;
212     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
213         /* High bits of the destination are now garbage.  */
214         mask |= ~0xffffffffull;
215     }
216     di->mask = mask;
217 
218     op->args[0] = dst;
219     op->args[1] = val;
220 }
221 
222 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
223 {
224     TCGTemp *dst_ts = arg_temp(dst);
225     TCGTemp *src_ts = arg_temp(src);
226     struct tcg_temp_info *di;
227     struct tcg_temp_info *si;
228     tcg_target_ulong mask;
229     TCGOpcode new_op;
230 
231     if (ts_are_copies(dst_ts, src_ts)) {
232         tcg_op_remove(s, op);
233         return;
234     }
235 
236     reset_ts(dst_ts);
237     di = ts_info(dst_ts);
238     si = ts_info(src_ts);
239     new_op = op_to_mov(op->opc);
240 
241     op->opc = new_op;
242     op->args[0] = dst;
243     op->args[1] = src;
244 
245     mask = si->mask;
246     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
247         /* High bits of the destination are now garbage.  */
248         mask |= ~0xffffffffull;
249     }
250     di->mask = mask;
251 
252     if (src_ts->type == dst_ts->type) {
253         struct tcg_temp_info *ni = ts_info(si->next_copy);
254 
255         di->next_copy = si->next_copy;
256         di->prev_copy = src_ts;
257         ni->prev_copy = dst_ts;
258         si->next_copy = dst_ts;
259         di->is_const = si->is_const;
260         di->val = si->val;
261     }
262 }
263 
264 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
265 {
266     uint64_t l64, h64;
267 
268     switch (op) {
269     CASE_OP_32_64(add):
270         return x + y;
271 
272     CASE_OP_32_64(sub):
273         return x - y;
274 
275     CASE_OP_32_64(mul):
276         return x * y;
277 
278     CASE_OP_32_64(and):
279         return x & y;
280 
281     CASE_OP_32_64(or):
282         return x | y;
283 
284     CASE_OP_32_64(xor):
285         return x ^ y;
286 
287     case INDEX_op_shl_i32:
288         return (uint32_t)x << (y & 31);
289 
290     case INDEX_op_shl_i64:
291         return (uint64_t)x << (y & 63);
292 
293     case INDEX_op_shr_i32:
294         return (uint32_t)x >> (y & 31);
295 
296     case INDEX_op_shr_i64:
297         return (uint64_t)x >> (y & 63);
298 
299     case INDEX_op_sar_i32:
300         return (int32_t)x >> (y & 31);
301 
302     case INDEX_op_sar_i64:
303         return (int64_t)x >> (y & 63);
304 
305     case INDEX_op_rotr_i32:
306         return ror32(x, y & 31);
307 
308     case INDEX_op_rotr_i64:
309         return ror64(x, y & 63);
310 
311     case INDEX_op_rotl_i32:
312         return rol32(x, y & 31);
313 
314     case INDEX_op_rotl_i64:
315         return rol64(x, y & 63);
316 
317     CASE_OP_32_64(not):
318         return ~x;
319 
320     CASE_OP_32_64(neg):
321         return -x;
322 
323     CASE_OP_32_64(andc):
324         return x & ~y;
325 
326     CASE_OP_32_64(orc):
327         return x | ~y;
328 
329     CASE_OP_32_64(eqv):
330         return ~(x ^ y);
331 
332     CASE_OP_32_64(nand):
333         return ~(x & y);
334 
335     CASE_OP_32_64(nor):
336         return ~(x | y);
337 
338     case INDEX_op_clz_i32:
339         return (uint32_t)x ? clz32(x) : y;
340 
341     case INDEX_op_clz_i64:
342         return x ? clz64(x) : y;
343 
344     case INDEX_op_ctz_i32:
345         return (uint32_t)x ? ctz32(x) : y;
346 
347     case INDEX_op_ctz_i64:
348         return x ? ctz64(x) : y;
349 
350     case INDEX_op_ctpop_i32:
351         return ctpop32(x);
352 
353     case INDEX_op_ctpop_i64:
354         return ctpop64(x);
355 
356     CASE_OP_32_64(ext8s):
357         return (int8_t)x;
358 
359     CASE_OP_32_64(ext16s):
360         return (int16_t)x;
361 
362     CASE_OP_32_64(ext8u):
363         return (uint8_t)x;
364 
365     CASE_OP_32_64(ext16u):
366         return (uint16_t)x;
367 
368     case INDEX_op_ext_i32_i64:
369     case INDEX_op_ext32s_i64:
370         return (int32_t)x;
371 
372     case INDEX_op_extu_i32_i64:
373     case INDEX_op_extrl_i64_i32:
374     case INDEX_op_ext32u_i64:
375         return (uint32_t)x;
376 
377     case INDEX_op_extrh_i64_i32:
378         return (uint64_t)x >> 32;
379 
380     case INDEX_op_muluh_i32:
381         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
382     case INDEX_op_mulsh_i32:
383         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
384 
385     case INDEX_op_muluh_i64:
386         mulu64(&l64, &h64, x, y);
387         return h64;
388     case INDEX_op_mulsh_i64:
389         muls64(&l64, &h64, x, y);
390         return h64;
391 
392     case INDEX_op_div_i32:
393         /* Avoid crashing on divide by zero, otherwise undefined.  */
394         return (int32_t)x / ((int32_t)y ? : 1);
395     case INDEX_op_divu_i32:
396         return (uint32_t)x / ((uint32_t)y ? : 1);
397     case INDEX_op_div_i64:
398         return (int64_t)x / ((int64_t)y ? : 1);
399     case INDEX_op_divu_i64:
400         return (uint64_t)x / ((uint64_t)y ? : 1);
401 
402     case INDEX_op_rem_i32:
403         return (int32_t)x % ((int32_t)y ? : 1);
404     case INDEX_op_remu_i32:
405         return (uint32_t)x % ((uint32_t)y ? : 1);
406     case INDEX_op_rem_i64:
407         return (int64_t)x % ((int64_t)y ? : 1);
408     case INDEX_op_remu_i64:
409         return (uint64_t)x % ((uint64_t)y ? : 1);
410 
411     default:
412         fprintf(stderr,
413                 "Unrecognized operation %d in do_constant_folding.\n", op);
414         tcg_abort();
415     }
416 }
417 
418 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
419 {
420     TCGArg res = do_constant_folding_2(op, x, y);
421     if (op_bits(op) == 32) {
422         res = (int32_t)res;
423     }
424     return res;
425 }
426 
427 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
428 {
429     switch (c) {
430     case TCG_COND_EQ:
431         return x == y;
432     case TCG_COND_NE:
433         return x != y;
434     case TCG_COND_LT:
435         return (int32_t)x < (int32_t)y;
436     case TCG_COND_GE:
437         return (int32_t)x >= (int32_t)y;
438     case TCG_COND_LE:
439         return (int32_t)x <= (int32_t)y;
440     case TCG_COND_GT:
441         return (int32_t)x > (int32_t)y;
442     case TCG_COND_LTU:
443         return x < y;
444     case TCG_COND_GEU:
445         return x >= y;
446     case TCG_COND_LEU:
447         return x <= y;
448     case TCG_COND_GTU:
449         return x > y;
450     default:
451         tcg_abort();
452     }
453 }
454 
455 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
456 {
457     switch (c) {
458     case TCG_COND_EQ:
459         return x == y;
460     case TCG_COND_NE:
461         return x != y;
462     case TCG_COND_LT:
463         return (int64_t)x < (int64_t)y;
464     case TCG_COND_GE:
465         return (int64_t)x >= (int64_t)y;
466     case TCG_COND_LE:
467         return (int64_t)x <= (int64_t)y;
468     case TCG_COND_GT:
469         return (int64_t)x > (int64_t)y;
470     case TCG_COND_LTU:
471         return x < y;
472     case TCG_COND_GEU:
473         return x >= y;
474     case TCG_COND_LEU:
475         return x <= y;
476     case TCG_COND_GTU:
477         return x > y;
478     default:
479         tcg_abort();
480     }
481 }
482 
483 static bool do_constant_folding_cond_eq(TCGCond c)
484 {
485     switch (c) {
486     case TCG_COND_GT:
487     case TCG_COND_LTU:
488     case TCG_COND_LT:
489     case TCG_COND_GTU:
490     case TCG_COND_NE:
491         return 0;
492     case TCG_COND_GE:
493     case TCG_COND_GEU:
494     case TCG_COND_LE:
495     case TCG_COND_LEU:
496     case TCG_COND_EQ:
497         return 1;
498     default:
499         tcg_abort();
500     }
501 }
502 
503 /* Return 2 if the condition can't be simplified, and the result
504    of the condition (0 or 1) if it can */
505 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
506                                        TCGArg y, TCGCond c)
507 {
508     tcg_target_ulong xv = arg_info(x)->val;
509     tcg_target_ulong yv = arg_info(y)->val;
510     if (arg_is_const(x) && arg_is_const(y)) {
511         switch (op_bits(op)) {
512         case 32:
513             return do_constant_folding_cond_32(xv, yv, c);
514         case 64:
515             return do_constant_folding_cond_64(xv, yv, c);
516         default:
517             tcg_abort();
518         }
519     } else if (args_are_copies(x, y)) {
520         return do_constant_folding_cond_eq(c);
521     } else if (arg_is_const(y) && yv == 0) {
522         switch (c) {
523         case TCG_COND_LTU:
524             return 0;
525         case TCG_COND_GEU:
526             return 1;
527         default:
528             return 2;
529         }
530     }
531     return 2;
532 }
533 
534 /* Return 2 if the condition can't be simplified, and the result
535    of the condition (0 or 1) if it can */
536 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
537 {
538     TCGArg al = p1[0], ah = p1[1];
539     TCGArg bl = p2[0], bh = p2[1];
540 
541     if (arg_is_const(bl) && arg_is_const(bh)) {
542         tcg_target_ulong blv = arg_info(bl)->val;
543         tcg_target_ulong bhv = arg_info(bh)->val;
544         uint64_t b = deposit64(blv, 32, 32, bhv);
545 
546         if (arg_is_const(al) && arg_is_const(ah)) {
547             tcg_target_ulong alv = arg_info(al)->val;
548             tcg_target_ulong ahv = arg_info(ah)->val;
549             uint64_t a = deposit64(alv, 32, 32, ahv);
550             return do_constant_folding_cond_64(a, b, c);
551         }
552         if (b == 0) {
553             switch (c) {
554             case TCG_COND_LTU:
555                 return 0;
556             case TCG_COND_GEU:
557                 return 1;
558             default:
559                 break;
560             }
561         }
562     }
563     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
564         return do_constant_folding_cond_eq(c);
565     }
566     return 2;
567 }
568 
569 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
570 {
571     TCGArg a1 = *p1, a2 = *p2;
572     int sum = 0;
573     sum += arg_is_const(a1);
574     sum -= arg_is_const(a2);
575 
576     /* Prefer the constant in second argument, and then the form
577        op a, a, b, which is better handled on non-RISC hosts. */
578     if (sum > 0 || (sum == 0 && dest == a2)) {
579         *p1 = a2;
580         *p2 = a1;
581         return true;
582     }
583     return false;
584 }
585 
586 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
587 {
588     int sum = 0;
589     sum += arg_is_const(p1[0]);
590     sum += arg_is_const(p1[1]);
591     sum -= arg_is_const(p2[0]);
592     sum -= arg_is_const(p2[1]);
593     if (sum > 0) {
594         TCGArg t;
595         t = p1[0], p1[0] = p2[0], p2[0] = t;
596         t = p1[1], p1[1] = p2[1], p2[1] = t;
597         return true;
598     }
599     return false;
600 }
601 
602 /* Propagate constants and copies, fold constant expressions. */
603 void tcg_optimize(TCGContext *s)
604 {
605     int oi, oi_next, nb_temps, nb_globals;
606     TCGOp *prev_mb = NULL;
607     struct tcg_temp_info *infos;
608     TCGTempSet temps_used;
609 
610     /* Array VALS has an element for each temp.
611        If this temp holds a constant then its value is kept in VALS' element.
612        If this temp is a copy of other ones then the other copies are
613        available through the doubly linked circular list. */
614 
615     nb_temps = s->nb_temps;
616     nb_globals = s->nb_globals;
617     bitmap_zero(temps_used.l, nb_temps);
618     infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
619 
620     for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
621         tcg_target_ulong mask, partmask, affected;
622         int nb_oargs, nb_iargs, i;
623         TCGArg tmp;
624 
625         TCGOp * const op = &s->gen_op_buf[oi];
626         TCGOpcode opc = op->opc;
627         const TCGOpDef *def = &tcg_op_defs[opc];
628 
629         oi_next = op->next;
630 
631         /* Count the arguments, and initialize the temps that are
632            going to be used */
633         if (opc == INDEX_op_call) {
634             nb_oargs = op->callo;
635             nb_iargs = op->calli;
636             for (i = 0; i < nb_oargs + nb_iargs; i++) {
637                 TCGTemp *ts = arg_temp(op->args[i]);
638                 if (ts) {
639                     init_ts_info(infos, &temps_used, ts);
640                 }
641             }
642         } else {
643             nb_oargs = def->nb_oargs;
644             nb_iargs = def->nb_iargs;
645             for (i = 0; i < nb_oargs + nb_iargs; i++) {
646                 init_arg_info(infos, &temps_used, op->args[i]);
647             }
648         }
649 
650         /* Do copy propagation */
651         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
652             TCGTemp *ts = arg_temp(op->args[i]);
653             if (ts && ts_is_copy(ts)) {
654                 op->args[i] = temp_arg(find_better_copy(s, ts));
655             }
656         }
657 
658         /* For commutative operations make constant second argument */
659         switch (opc) {
660         CASE_OP_32_64(add):
661         CASE_OP_32_64(mul):
662         CASE_OP_32_64(and):
663         CASE_OP_32_64(or):
664         CASE_OP_32_64(xor):
665         CASE_OP_32_64(eqv):
666         CASE_OP_32_64(nand):
667         CASE_OP_32_64(nor):
668         CASE_OP_32_64(muluh):
669         CASE_OP_32_64(mulsh):
670             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
671             break;
672         CASE_OP_32_64(brcond):
673             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
674                 op->args[2] = tcg_swap_cond(op->args[2]);
675             }
676             break;
677         CASE_OP_32_64(setcond):
678             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
679                 op->args[3] = tcg_swap_cond(op->args[3]);
680             }
681             break;
682         CASE_OP_32_64(movcond):
683             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
684                 op->args[5] = tcg_swap_cond(op->args[5]);
685             }
686             /* For movcond, we canonicalize the "false" input reg to match
687                the destination reg so that the tcg backend can implement
688                a "move if true" operation.  */
689             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
690                 op->args[5] = tcg_invert_cond(op->args[5]);
691             }
692             break;
693         CASE_OP_32_64(add2):
694             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
695             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
696             break;
697         CASE_OP_32_64(mulu2):
698         CASE_OP_32_64(muls2):
699             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
700             break;
701         case INDEX_op_brcond2_i32:
702             if (swap_commutative2(&op->args[0], &op->args[2])) {
703                 op->args[4] = tcg_swap_cond(op->args[4]);
704             }
705             break;
706         case INDEX_op_setcond2_i32:
707             if (swap_commutative2(&op->args[1], &op->args[3])) {
708                 op->args[5] = tcg_swap_cond(op->args[5]);
709             }
710             break;
711         default:
712             break;
713         }
714 
715         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
716            and "sub r, 0, a => neg r, a" case.  */
717         switch (opc) {
718         CASE_OP_32_64(shl):
719         CASE_OP_32_64(shr):
720         CASE_OP_32_64(sar):
721         CASE_OP_32_64(rotl):
722         CASE_OP_32_64(rotr):
723             if (arg_is_const(op->args[1])
724                 && arg_info(op->args[1])->val == 0) {
725                 tcg_opt_gen_movi(s, op, op->args[0], 0);
726                 continue;
727             }
728             break;
729         CASE_OP_32_64(sub):
730             {
731                 TCGOpcode neg_op;
732                 bool have_neg;
733 
734                 if (arg_is_const(op->args[2])) {
735                     /* Proceed with possible constant folding. */
736                     break;
737                 }
738                 if (opc == INDEX_op_sub_i32) {
739                     neg_op = INDEX_op_neg_i32;
740                     have_neg = TCG_TARGET_HAS_neg_i32;
741                 } else {
742                     neg_op = INDEX_op_neg_i64;
743                     have_neg = TCG_TARGET_HAS_neg_i64;
744                 }
745                 if (!have_neg) {
746                     break;
747                 }
748                 if (arg_is_const(op->args[1])
749                     && arg_info(op->args[1])->val == 0) {
750                     op->opc = neg_op;
751                     reset_temp(op->args[0]);
752                     op->args[1] = op->args[2];
753                     continue;
754                 }
755             }
756             break;
757         CASE_OP_32_64(xor):
758         CASE_OP_32_64(nand):
759             if (!arg_is_const(op->args[1])
760                 && arg_is_const(op->args[2])
761                 && arg_info(op->args[2])->val == -1) {
762                 i = 1;
763                 goto try_not;
764             }
765             break;
766         CASE_OP_32_64(nor):
767             if (!arg_is_const(op->args[1])
768                 && arg_is_const(op->args[2])
769                 && arg_info(op->args[2])->val == 0) {
770                 i = 1;
771                 goto try_not;
772             }
773             break;
774         CASE_OP_32_64(andc):
775             if (!arg_is_const(op->args[2])
776                 && arg_is_const(op->args[1])
777                 && arg_info(op->args[1])->val == -1) {
778                 i = 2;
779                 goto try_not;
780             }
781             break;
782         CASE_OP_32_64(orc):
783         CASE_OP_32_64(eqv):
784             if (!arg_is_const(op->args[2])
785                 && arg_is_const(op->args[1])
786                 && arg_info(op->args[1])->val == 0) {
787                 i = 2;
788                 goto try_not;
789             }
790             break;
791         try_not:
792             {
793                 TCGOpcode not_op;
794                 bool have_not;
795 
796                 if (def->flags & TCG_OPF_64BIT) {
797                     not_op = INDEX_op_not_i64;
798                     have_not = TCG_TARGET_HAS_not_i64;
799                 } else {
800                     not_op = INDEX_op_not_i32;
801                     have_not = TCG_TARGET_HAS_not_i32;
802                 }
803                 if (!have_not) {
804                     break;
805                 }
806                 op->opc = not_op;
807                 reset_temp(op->args[0]);
808                 op->args[1] = op->args[i];
809                 continue;
810             }
811         default:
812             break;
813         }
814 
815         /* Simplify expression for "op r, a, const => mov r, a" cases */
816         switch (opc) {
817         CASE_OP_32_64(add):
818         CASE_OP_32_64(sub):
819         CASE_OP_32_64(shl):
820         CASE_OP_32_64(shr):
821         CASE_OP_32_64(sar):
822         CASE_OP_32_64(rotl):
823         CASE_OP_32_64(rotr):
824         CASE_OP_32_64(or):
825         CASE_OP_32_64(xor):
826         CASE_OP_32_64(andc):
827             if (!arg_is_const(op->args[1])
828                 && arg_is_const(op->args[2])
829                 && arg_info(op->args[2])->val == 0) {
830                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
831                 continue;
832             }
833             break;
834         CASE_OP_32_64(and):
835         CASE_OP_32_64(orc):
836         CASE_OP_32_64(eqv):
837             if (!arg_is_const(op->args[1])
838                 && arg_is_const(op->args[2])
839                 && arg_info(op->args[2])->val == -1) {
840                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
841                 continue;
842             }
843             break;
844         default:
845             break;
846         }
847 
848         /* Simplify using known-zero bits. Currently only ops with a single
849            output argument is supported. */
850         mask = -1;
851         affected = -1;
852         switch (opc) {
853         CASE_OP_32_64(ext8s):
854             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
855                 break;
856             }
857         CASE_OP_32_64(ext8u):
858             mask = 0xff;
859             goto and_const;
860         CASE_OP_32_64(ext16s):
861             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
862                 break;
863             }
864         CASE_OP_32_64(ext16u):
865             mask = 0xffff;
866             goto and_const;
867         case INDEX_op_ext32s_i64:
868             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
869                 break;
870             }
871         case INDEX_op_ext32u_i64:
872             mask = 0xffffffffU;
873             goto and_const;
874 
875         CASE_OP_32_64(and):
876             mask = arg_info(op->args[2])->mask;
877             if (arg_is_const(op->args[2])) {
878         and_const:
879                 affected = arg_info(op->args[1])->mask & ~mask;
880             }
881             mask = arg_info(op->args[1])->mask & mask;
882             break;
883 
884         case INDEX_op_ext_i32_i64:
885             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
886                 break;
887             }
888         case INDEX_op_extu_i32_i64:
889             /* We do not compute affected as it is a size changing op.  */
890             mask = (uint32_t)arg_info(op->args[1])->mask;
891             break;
892 
893         CASE_OP_32_64(andc):
894             /* Known-zeros does not imply known-ones.  Therefore unless
895                op->args[2] is constant, we can't infer anything from it.  */
896             if (arg_is_const(op->args[2])) {
897                 mask = ~arg_info(op->args[2])->mask;
898                 goto and_const;
899             }
900             /* But we certainly know nothing outside args[1] may be set. */
901             mask = arg_info(op->args[1])->mask;
902             break;
903 
904         case INDEX_op_sar_i32:
905             if (arg_is_const(op->args[2])) {
906                 tmp = arg_info(op->args[2])->val & 31;
907                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
908             }
909             break;
910         case INDEX_op_sar_i64:
911             if (arg_is_const(op->args[2])) {
912                 tmp = arg_info(op->args[2])->val & 63;
913                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
914             }
915             break;
916 
917         case INDEX_op_shr_i32:
918             if (arg_is_const(op->args[2])) {
919                 tmp = arg_info(op->args[2])->val & 31;
920                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
921             }
922             break;
923         case INDEX_op_shr_i64:
924             if (arg_is_const(op->args[2])) {
925                 tmp = arg_info(op->args[2])->val & 63;
926                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
927             }
928             break;
929 
930         case INDEX_op_extrl_i64_i32:
931             mask = (uint32_t)arg_info(op->args[1])->mask;
932             break;
933         case INDEX_op_extrh_i64_i32:
934             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
935             break;
936 
937         CASE_OP_32_64(shl):
938             if (arg_is_const(op->args[2])) {
939                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
940                 mask = arg_info(op->args[1])->mask << tmp;
941             }
942             break;
943 
944         CASE_OP_32_64(neg):
945             /* Set to 1 all bits to the left of the rightmost.  */
946             mask = -(arg_info(op->args[1])->mask
947                      & -arg_info(op->args[1])->mask);
948             break;
949 
950         CASE_OP_32_64(deposit):
951             mask = deposit64(arg_info(op->args[1])->mask,
952                              op->args[3], op->args[4],
953                              arg_info(op->args[2])->mask);
954             break;
955 
956         CASE_OP_32_64(extract):
957             mask = extract64(arg_info(op->args[1])->mask,
958                              op->args[2], op->args[3]);
959             if (op->args[2] == 0) {
960                 affected = arg_info(op->args[1])->mask & ~mask;
961             }
962             break;
963         CASE_OP_32_64(sextract):
964             mask = sextract64(arg_info(op->args[1])->mask,
965                               op->args[2], op->args[3]);
966             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
967                 affected = arg_info(op->args[1])->mask & ~mask;
968             }
969             break;
970 
971         CASE_OP_32_64(or):
972         CASE_OP_32_64(xor):
973             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
974             break;
975 
976         case INDEX_op_clz_i32:
977         case INDEX_op_ctz_i32:
978             mask = arg_info(op->args[2])->mask | 31;
979             break;
980 
981         case INDEX_op_clz_i64:
982         case INDEX_op_ctz_i64:
983             mask = arg_info(op->args[2])->mask | 63;
984             break;
985 
986         case INDEX_op_ctpop_i32:
987             mask = 32 | 31;
988             break;
989         case INDEX_op_ctpop_i64:
990             mask = 64 | 63;
991             break;
992 
993         CASE_OP_32_64(setcond):
994         case INDEX_op_setcond2_i32:
995             mask = 1;
996             break;
997 
998         CASE_OP_32_64(movcond):
999             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1000             break;
1001 
1002         CASE_OP_32_64(ld8u):
1003             mask = 0xff;
1004             break;
1005         CASE_OP_32_64(ld16u):
1006             mask = 0xffff;
1007             break;
1008         case INDEX_op_ld32u_i64:
1009             mask = 0xffffffffu;
1010             break;
1011 
1012         CASE_OP_32_64(qemu_ld):
1013             {
1014                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1015                 TCGMemOp mop = get_memop(oi);
1016                 if (!(mop & MO_SIGN)) {
1017                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1018                 }
1019             }
1020             break;
1021 
1022         default:
1023             break;
1024         }
1025 
1026         /* 32-bit ops generate 32-bit results.  For the result is zero test
1027            below, we can ignore high bits, but for further optimizations we
1028            need to record that the high bits contain garbage.  */
1029         partmask = mask;
1030         if (!(def->flags & TCG_OPF_64BIT)) {
1031             mask |= ~(tcg_target_ulong)0xffffffffu;
1032             partmask &= 0xffffffffu;
1033             affected &= 0xffffffffu;
1034         }
1035 
1036         if (partmask == 0) {
1037             tcg_debug_assert(nb_oargs == 1);
1038             tcg_opt_gen_movi(s, op, op->args[0], 0);
1039             continue;
1040         }
1041         if (affected == 0) {
1042             tcg_debug_assert(nb_oargs == 1);
1043             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1044             continue;
1045         }
1046 
1047         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1048         switch (opc) {
1049         CASE_OP_32_64(and):
1050         CASE_OP_32_64(mul):
1051         CASE_OP_32_64(muluh):
1052         CASE_OP_32_64(mulsh):
1053             if (arg_is_const(op->args[2])
1054                 && arg_info(op->args[2])->val == 0) {
1055                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1056                 continue;
1057             }
1058             break;
1059         default:
1060             break;
1061         }
1062 
1063         /* Simplify expression for "op r, a, a => mov r, a" cases */
1064         switch (opc) {
1065         CASE_OP_32_64(or):
1066         CASE_OP_32_64(and):
1067             if (args_are_copies(op->args[1], op->args[2])) {
1068                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1069                 continue;
1070             }
1071             break;
1072         default:
1073             break;
1074         }
1075 
1076         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1077         switch (opc) {
1078         CASE_OP_32_64(andc):
1079         CASE_OP_32_64(sub):
1080         CASE_OP_32_64(xor):
1081             if (args_are_copies(op->args[1], op->args[2])) {
1082                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1083                 continue;
1084             }
1085             break;
1086         default:
1087             break;
1088         }
1089 
1090         /* Propagate constants through copy operations and do constant
1091            folding.  Constants will be substituted to arguments by register
1092            allocator where needed and possible.  Also detect copies. */
1093         switch (opc) {
1094         CASE_OP_32_64(mov):
1095             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1096             break;
1097         CASE_OP_32_64(movi):
1098             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1099             break;
1100 
1101         CASE_OP_32_64(not):
1102         CASE_OP_32_64(neg):
1103         CASE_OP_32_64(ext8s):
1104         CASE_OP_32_64(ext8u):
1105         CASE_OP_32_64(ext16s):
1106         CASE_OP_32_64(ext16u):
1107         CASE_OP_32_64(ctpop):
1108         case INDEX_op_ext32s_i64:
1109         case INDEX_op_ext32u_i64:
1110         case INDEX_op_ext_i32_i64:
1111         case INDEX_op_extu_i32_i64:
1112         case INDEX_op_extrl_i64_i32:
1113         case INDEX_op_extrh_i64_i32:
1114             if (arg_is_const(op->args[1])) {
1115                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1116                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1117                 break;
1118             }
1119             goto do_default;
1120 
1121         CASE_OP_32_64(add):
1122         CASE_OP_32_64(sub):
1123         CASE_OP_32_64(mul):
1124         CASE_OP_32_64(or):
1125         CASE_OP_32_64(and):
1126         CASE_OP_32_64(xor):
1127         CASE_OP_32_64(shl):
1128         CASE_OP_32_64(shr):
1129         CASE_OP_32_64(sar):
1130         CASE_OP_32_64(rotl):
1131         CASE_OP_32_64(rotr):
1132         CASE_OP_32_64(andc):
1133         CASE_OP_32_64(orc):
1134         CASE_OP_32_64(eqv):
1135         CASE_OP_32_64(nand):
1136         CASE_OP_32_64(nor):
1137         CASE_OP_32_64(muluh):
1138         CASE_OP_32_64(mulsh):
1139         CASE_OP_32_64(div):
1140         CASE_OP_32_64(divu):
1141         CASE_OP_32_64(rem):
1142         CASE_OP_32_64(remu):
1143             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1144                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1145                                           arg_info(op->args[2])->val);
1146                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1147                 break;
1148             }
1149             goto do_default;
1150 
1151         CASE_OP_32_64(clz):
1152         CASE_OP_32_64(ctz):
1153             if (arg_is_const(op->args[1])) {
1154                 TCGArg v = arg_info(op->args[1])->val;
1155                 if (v != 0) {
1156                     tmp = do_constant_folding(opc, v, 0);
1157                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1158                 } else {
1159                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1160                 }
1161                 break;
1162             }
1163             goto do_default;
1164 
1165         CASE_OP_32_64(deposit):
1166             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1167                 tmp = deposit64(arg_info(op->args[1])->val,
1168                                 op->args[3], op->args[4],
1169                                 arg_info(op->args[2])->val);
1170                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1171                 break;
1172             }
1173             goto do_default;
1174 
1175         CASE_OP_32_64(extract):
1176             if (arg_is_const(op->args[1])) {
1177                 tmp = extract64(arg_info(op->args[1])->val,
1178                                 op->args[2], op->args[3]);
1179                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1180                 break;
1181             }
1182             goto do_default;
1183 
1184         CASE_OP_32_64(sextract):
1185             if (arg_is_const(op->args[1])) {
1186                 tmp = sextract64(arg_info(op->args[1])->val,
1187                                  op->args[2], op->args[3]);
1188                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1189                 break;
1190             }
1191             goto do_default;
1192 
1193         CASE_OP_32_64(setcond):
1194             tmp = do_constant_folding_cond(opc, op->args[1],
1195                                            op->args[2], op->args[3]);
1196             if (tmp != 2) {
1197                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1198                 break;
1199             }
1200             goto do_default;
1201 
1202         CASE_OP_32_64(brcond):
1203             tmp = do_constant_folding_cond(opc, op->args[0],
1204                                            op->args[1], op->args[2]);
1205             if (tmp != 2) {
1206                 if (tmp) {
1207                     bitmap_zero(temps_used.l, nb_temps);
1208                     op->opc = INDEX_op_br;
1209                     op->args[0] = op->args[3];
1210                 } else {
1211                     tcg_op_remove(s, op);
1212                 }
1213                 break;
1214             }
1215             goto do_default;
1216 
1217         CASE_OP_32_64(movcond):
1218             tmp = do_constant_folding_cond(opc, op->args[1],
1219                                            op->args[2], op->args[5]);
1220             if (tmp != 2) {
1221                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1222                 break;
1223             }
1224             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1225                 tcg_target_ulong tv = arg_info(op->args[3])->val;
1226                 tcg_target_ulong fv = arg_info(op->args[4])->val;
1227                 TCGCond cond = op->args[5];
1228                 if (fv == 1 && tv == 0) {
1229                     cond = tcg_invert_cond(cond);
1230                 } else if (!(tv == 1 && fv == 0)) {
1231                     goto do_default;
1232                 }
1233                 op->args[3] = cond;
1234                 op->opc = opc = (opc == INDEX_op_movcond_i32
1235                                  ? INDEX_op_setcond_i32
1236                                  : INDEX_op_setcond_i64);
1237                 nb_iargs = 2;
1238             }
1239             goto do_default;
1240 
1241         case INDEX_op_add2_i32:
1242         case INDEX_op_sub2_i32:
1243             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1244                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1245                 uint32_t al = arg_info(op->args[2])->val;
1246                 uint32_t ah = arg_info(op->args[3])->val;
1247                 uint32_t bl = arg_info(op->args[4])->val;
1248                 uint32_t bh = arg_info(op->args[5])->val;
1249                 uint64_t a = ((uint64_t)ah << 32) | al;
1250                 uint64_t b = ((uint64_t)bh << 32) | bl;
1251                 TCGArg rl, rh;
1252                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1253 
1254                 if (opc == INDEX_op_add2_i32) {
1255                     a += b;
1256                 } else {
1257                     a -= b;
1258                 }
1259 
1260                 rl = op->args[0];
1261                 rh = op->args[1];
1262                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1263                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1264 
1265                 /* We've done all we need to do with the movi.  Skip it.  */
1266                 oi_next = op2->next;
1267                 break;
1268             }
1269             goto do_default;
1270 
1271         case INDEX_op_mulu2_i32:
1272             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1273                 uint32_t a = arg_info(op->args[2])->val;
1274                 uint32_t b = arg_info(op->args[3])->val;
1275                 uint64_t r = (uint64_t)a * b;
1276                 TCGArg rl, rh;
1277                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1278 
1279                 rl = op->args[0];
1280                 rh = op->args[1];
1281                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1282                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1283 
1284                 /* We've done all we need to do with the movi.  Skip it.  */
1285                 oi_next = op2->next;
1286                 break;
1287             }
1288             goto do_default;
1289 
1290         case INDEX_op_brcond2_i32:
1291             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1292                                             op->args[4]);
1293             if (tmp != 2) {
1294                 if (tmp) {
1295             do_brcond_true:
1296                     bitmap_zero(temps_used.l, nb_temps);
1297                     op->opc = INDEX_op_br;
1298                     op->args[0] = op->args[5];
1299                 } else {
1300             do_brcond_false:
1301                     tcg_op_remove(s, op);
1302                 }
1303             } else if ((op->args[4] == TCG_COND_LT
1304                         || op->args[4] == TCG_COND_GE)
1305                        && arg_is_const(op->args[2])
1306                        && arg_info(op->args[2])->val == 0
1307                        && arg_is_const(op->args[3])
1308                        && arg_info(op->args[3])->val == 0) {
1309                 /* Simplify LT/GE comparisons vs zero to a single compare
1310                    vs the high word of the input.  */
1311             do_brcond_high:
1312                 bitmap_zero(temps_used.l, nb_temps);
1313                 op->opc = INDEX_op_brcond_i32;
1314                 op->args[0] = op->args[1];
1315                 op->args[1] = op->args[3];
1316                 op->args[2] = op->args[4];
1317                 op->args[3] = op->args[5];
1318             } else if (op->args[4] == TCG_COND_EQ) {
1319                 /* Simplify EQ comparisons where one of the pairs
1320                    can be simplified.  */
1321                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1322                                                op->args[0], op->args[2],
1323                                                TCG_COND_EQ);
1324                 if (tmp == 0) {
1325                     goto do_brcond_false;
1326                 } else if (tmp == 1) {
1327                     goto do_brcond_high;
1328                 }
1329                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1330                                                op->args[1], op->args[3],
1331                                                TCG_COND_EQ);
1332                 if (tmp == 0) {
1333                     goto do_brcond_false;
1334                 } else if (tmp != 1) {
1335                     goto do_default;
1336                 }
1337             do_brcond_low:
1338                 bitmap_zero(temps_used.l, nb_temps);
1339                 op->opc = INDEX_op_brcond_i32;
1340                 op->args[1] = op->args[2];
1341                 op->args[2] = op->args[4];
1342                 op->args[3] = op->args[5];
1343             } else if (op->args[4] == TCG_COND_NE) {
1344                 /* Simplify NE comparisons where one of the pairs
1345                    can be simplified.  */
1346                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1347                                                op->args[0], op->args[2],
1348                                                TCG_COND_NE);
1349                 if (tmp == 0) {
1350                     goto do_brcond_high;
1351                 } else if (tmp == 1) {
1352                     goto do_brcond_true;
1353                 }
1354                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1355                                                op->args[1], op->args[3],
1356                                                TCG_COND_NE);
1357                 if (tmp == 0) {
1358                     goto do_brcond_low;
1359                 } else if (tmp == 1) {
1360                     goto do_brcond_true;
1361                 }
1362                 goto do_default;
1363             } else {
1364                 goto do_default;
1365             }
1366             break;
1367 
1368         case INDEX_op_setcond2_i32:
1369             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1370                                             op->args[5]);
1371             if (tmp != 2) {
1372             do_setcond_const:
1373                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1374             } else if ((op->args[5] == TCG_COND_LT
1375                         || op->args[5] == TCG_COND_GE)
1376                        && arg_is_const(op->args[3])
1377                        && arg_info(op->args[3])->val == 0
1378                        && arg_is_const(op->args[4])
1379                        && arg_info(op->args[4])->val == 0) {
1380                 /* Simplify LT/GE comparisons vs zero to a single compare
1381                    vs the high word of the input.  */
1382             do_setcond_high:
1383                 reset_temp(op->args[0]);
1384                 arg_info(op->args[0])->mask = 1;
1385                 op->opc = INDEX_op_setcond_i32;
1386                 op->args[1] = op->args[2];
1387                 op->args[2] = op->args[4];
1388                 op->args[3] = op->args[5];
1389             } else if (op->args[5] == TCG_COND_EQ) {
1390                 /* Simplify EQ comparisons where one of the pairs
1391                    can be simplified.  */
1392                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1393                                                op->args[1], op->args[3],
1394                                                TCG_COND_EQ);
1395                 if (tmp == 0) {
1396                     goto do_setcond_const;
1397                 } else if (tmp == 1) {
1398                     goto do_setcond_high;
1399                 }
1400                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1401                                                op->args[2], op->args[4],
1402                                                TCG_COND_EQ);
1403                 if (tmp == 0) {
1404                     goto do_setcond_high;
1405                 } else if (tmp != 1) {
1406                     goto do_default;
1407                 }
1408             do_setcond_low:
1409                 reset_temp(op->args[0]);
1410                 arg_info(op->args[0])->mask = 1;
1411                 op->opc = INDEX_op_setcond_i32;
1412                 op->args[2] = op->args[3];
1413                 op->args[3] = op->args[5];
1414             } else if (op->args[5] == TCG_COND_NE) {
1415                 /* Simplify NE comparisons where one of the pairs
1416                    can be simplified.  */
1417                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1418                                                op->args[1], op->args[3],
1419                                                TCG_COND_NE);
1420                 if (tmp == 0) {
1421                     goto do_setcond_high;
1422                 } else if (tmp == 1) {
1423                     goto do_setcond_const;
1424                 }
1425                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1426                                                op->args[2], op->args[4],
1427                                                TCG_COND_NE);
1428                 if (tmp == 0) {
1429                     goto do_setcond_low;
1430                 } else if (tmp == 1) {
1431                     goto do_setcond_const;
1432                 }
1433                 goto do_default;
1434             } else {
1435                 goto do_default;
1436             }
1437             break;
1438 
1439         case INDEX_op_call:
1440             if (!(op->args[nb_oargs + nb_iargs + 1]
1441                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1442                 for (i = 0; i < nb_globals; i++) {
1443                     if (test_bit(i, temps_used.l)) {
1444                         reset_ts(&s->temps[i]);
1445                     }
1446                 }
1447             }
1448             goto do_reset_output;
1449 
1450         default:
1451         do_default:
1452             /* Default case: we know nothing about operation (or were unable
1453                to compute the operation result) so no propagation is done.
1454                We trash everything if the operation is the end of a basic
1455                block, otherwise we only trash the output args.  "mask" is
1456                the non-zero bits mask for the first output arg.  */
1457             if (def->flags & TCG_OPF_BB_END) {
1458                 bitmap_zero(temps_used.l, nb_temps);
1459             } else {
1460         do_reset_output:
1461                 for (i = 0; i < nb_oargs; i++) {
1462                     reset_temp(op->args[i]);
1463                     /* Save the corresponding known-zero bits mask for the
1464                        first output argument (only one supported so far). */
1465                     if (i == 0) {
1466                         arg_info(op->args[i])->mask = mask;
1467                     }
1468                 }
1469             }
1470             break;
1471         }
1472 
1473         /* Eliminate duplicate and redundant fence instructions.  */
1474         if (prev_mb) {
1475             switch (opc) {
1476             case INDEX_op_mb:
1477                 /* Merge two barriers of the same type into one,
1478                  * or a weaker barrier into a stronger one,
1479                  * or two weaker barriers into a stronger one.
1480                  *   mb X; mb Y => mb X|Y
1481                  *   mb; strl => mb; st
1482                  *   ldaq; mb => ld; mb
1483                  *   ldaq; strl => ld; mb; st
1484                  * Other combinations are also merged into a strong
1485                  * barrier.  This is stricter than specified but for
1486                  * the purposes of TCG is better than not optimizing.
1487                  */
1488                 prev_mb->args[0] |= op->args[0];
1489                 tcg_op_remove(s, op);
1490                 break;
1491 
1492             default:
1493                 /* Opcodes that end the block stop the optimization.  */
1494                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1495                     break;
1496                 }
1497                 /* fallthru */
1498             case INDEX_op_qemu_ld_i32:
1499             case INDEX_op_qemu_ld_i64:
1500             case INDEX_op_qemu_st_i32:
1501             case INDEX_op_qemu_st_i64:
1502             case INDEX_op_call:
1503                 /* Opcodes that touch guest memory stop the optimization.  */
1504                 prev_mb = NULL;
1505                 break;
1506             }
1507         } else if (opc == INDEX_op_mb) {
1508             prev_mb = op;
1509         }
1510     }
1511 }
1512