xref: /openbmc/qemu/tcg/optimize.c (revision 4c4465ff)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "tcg/tcg-op.h"
28 
29 #define CASE_OP_32_64(x)                        \
30         glue(glue(case INDEX_op_, x), _i32):    \
31         glue(glue(case INDEX_op_, x), _i64)
32 
33 #define CASE_OP_32_64_VEC(x)                    \
34         glue(glue(case INDEX_op_, x), _i32):    \
35         glue(glue(case INDEX_op_, x), _i64):    \
36         glue(glue(case INDEX_op_, x), _vec)
37 
38 struct tcg_temp_info {
39     bool is_const;
40     TCGTemp *prev_copy;
41     TCGTemp *next_copy;
42     tcg_target_ulong val;
43     tcg_target_ulong mask;
44 };
45 
46 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
47 {
48     return ts->state_ptr;
49 }
50 
51 static inline struct tcg_temp_info *arg_info(TCGArg arg)
52 {
53     return ts_info(arg_temp(arg));
54 }
55 
56 static inline bool ts_is_const(TCGTemp *ts)
57 {
58     return ts_info(ts)->is_const;
59 }
60 
61 static inline bool arg_is_const(TCGArg arg)
62 {
63     return ts_is_const(arg_temp(arg));
64 }
65 
66 static inline bool ts_is_copy(TCGTemp *ts)
67 {
68     return ts_info(ts)->next_copy != ts;
69 }
70 
71 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
72 static void reset_ts(TCGTemp *ts)
73 {
74     struct tcg_temp_info *ti = ts_info(ts);
75     struct tcg_temp_info *pi = ts_info(ti->prev_copy);
76     struct tcg_temp_info *ni = ts_info(ti->next_copy);
77 
78     ni->prev_copy = ti->prev_copy;
79     pi->next_copy = ti->next_copy;
80     ti->next_copy = ts;
81     ti->prev_copy = ts;
82     ti->is_const = false;
83     ti->mask = -1;
84 }
85 
86 static void reset_temp(TCGArg arg)
87 {
88     reset_ts(arg_temp(arg));
89 }
90 
91 /* Initialize and activate a temporary.  */
92 static void init_ts_info(struct tcg_temp_info *infos,
93                          TCGTempSet *temps_used, TCGTemp *ts)
94 {
95     size_t idx = temp_idx(ts);
96     if (!test_bit(idx, temps_used->l)) {
97         struct tcg_temp_info *ti = &infos[idx];
98 
99         ts->state_ptr = ti;
100         ti->next_copy = ts;
101         ti->prev_copy = ts;
102         ti->is_const = false;
103         ti->mask = -1;
104         set_bit(idx, temps_used->l);
105     }
106 }
107 
108 static void init_arg_info(struct tcg_temp_info *infos,
109                           TCGTempSet *temps_used, TCGArg arg)
110 {
111     init_ts_info(infos, temps_used, arg_temp(arg));
112 }
113 
114 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
115 {
116     TCGTemp *i;
117 
118     /* If this is already a global, we can't do better. */
119     if (ts->temp_global) {
120         return ts;
121     }
122 
123     /* Search for a global first. */
124     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
125         if (i->temp_global) {
126             return i;
127         }
128     }
129 
130     /* If it is a temp, search for a temp local. */
131     if (!ts->temp_local) {
132         for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
133             if (ts->temp_local) {
134                 return i;
135             }
136         }
137     }
138 
139     /* Failure to find a better representation, return the same temp. */
140     return ts;
141 }
142 
143 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
144 {
145     TCGTemp *i;
146 
147     if (ts1 == ts2) {
148         return true;
149     }
150 
151     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
152         return false;
153     }
154 
155     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
156         if (i == ts2) {
157             return true;
158         }
159     }
160 
161     return false;
162 }
163 
164 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
165 {
166     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
167 }
168 
169 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
170 {
171     const TCGOpDef *def;
172     TCGOpcode new_op;
173     tcg_target_ulong mask;
174     struct tcg_temp_info *di = arg_info(dst);
175 
176     def = &tcg_op_defs[op->opc];
177     if (def->flags & TCG_OPF_VECTOR) {
178         new_op = INDEX_op_dupi_vec;
179     } else if (def->flags & TCG_OPF_64BIT) {
180         new_op = INDEX_op_movi_i64;
181     } else {
182         new_op = INDEX_op_movi_i32;
183     }
184     op->opc = new_op;
185     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
186     op->args[0] = dst;
187     op->args[1] = val;
188 
189     reset_temp(dst);
190     di->is_const = true;
191     di->val = val;
192     mask = val;
193     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
194         /* High bits of the destination are now garbage.  */
195         mask |= ~0xffffffffull;
196     }
197     di->mask = mask;
198 }
199 
200 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
201 {
202     TCGTemp *dst_ts = arg_temp(dst);
203     TCGTemp *src_ts = arg_temp(src);
204     const TCGOpDef *def;
205     struct tcg_temp_info *di;
206     struct tcg_temp_info *si;
207     tcg_target_ulong mask;
208     TCGOpcode new_op;
209 
210     if (ts_are_copies(dst_ts, src_ts)) {
211         tcg_op_remove(s, op);
212         return;
213     }
214 
215     reset_ts(dst_ts);
216     di = ts_info(dst_ts);
217     si = ts_info(src_ts);
218     def = &tcg_op_defs[op->opc];
219     if (def->flags & TCG_OPF_VECTOR) {
220         new_op = INDEX_op_mov_vec;
221     } else if (def->flags & TCG_OPF_64BIT) {
222         new_op = INDEX_op_mov_i64;
223     } else {
224         new_op = INDEX_op_mov_i32;
225     }
226     op->opc = new_op;
227     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
228     op->args[0] = dst;
229     op->args[1] = src;
230 
231     mask = si->mask;
232     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
233         /* High bits of the destination are now garbage.  */
234         mask |= ~0xffffffffull;
235     }
236     di->mask = mask;
237 
238     if (src_ts->type == dst_ts->type) {
239         struct tcg_temp_info *ni = ts_info(si->next_copy);
240 
241         di->next_copy = si->next_copy;
242         di->prev_copy = src_ts;
243         ni->prev_copy = dst_ts;
244         si->next_copy = dst_ts;
245         di->is_const = si->is_const;
246         di->val = si->val;
247     }
248 }
249 
250 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
251 {
252     uint64_t l64, h64;
253 
254     switch (op) {
255     CASE_OP_32_64(add):
256         return x + y;
257 
258     CASE_OP_32_64(sub):
259         return x - y;
260 
261     CASE_OP_32_64(mul):
262         return x * y;
263 
264     CASE_OP_32_64(and):
265         return x & y;
266 
267     CASE_OP_32_64(or):
268         return x | y;
269 
270     CASE_OP_32_64(xor):
271         return x ^ y;
272 
273     case INDEX_op_shl_i32:
274         return (uint32_t)x << (y & 31);
275 
276     case INDEX_op_shl_i64:
277         return (uint64_t)x << (y & 63);
278 
279     case INDEX_op_shr_i32:
280         return (uint32_t)x >> (y & 31);
281 
282     case INDEX_op_shr_i64:
283         return (uint64_t)x >> (y & 63);
284 
285     case INDEX_op_sar_i32:
286         return (int32_t)x >> (y & 31);
287 
288     case INDEX_op_sar_i64:
289         return (int64_t)x >> (y & 63);
290 
291     case INDEX_op_rotr_i32:
292         return ror32(x, y & 31);
293 
294     case INDEX_op_rotr_i64:
295         return ror64(x, y & 63);
296 
297     case INDEX_op_rotl_i32:
298         return rol32(x, y & 31);
299 
300     case INDEX_op_rotl_i64:
301         return rol64(x, y & 63);
302 
303     CASE_OP_32_64(not):
304         return ~x;
305 
306     CASE_OP_32_64(neg):
307         return -x;
308 
309     CASE_OP_32_64(andc):
310         return x & ~y;
311 
312     CASE_OP_32_64(orc):
313         return x | ~y;
314 
315     CASE_OP_32_64(eqv):
316         return ~(x ^ y);
317 
318     CASE_OP_32_64(nand):
319         return ~(x & y);
320 
321     CASE_OP_32_64(nor):
322         return ~(x | y);
323 
324     case INDEX_op_clz_i32:
325         return (uint32_t)x ? clz32(x) : y;
326 
327     case INDEX_op_clz_i64:
328         return x ? clz64(x) : y;
329 
330     case INDEX_op_ctz_i32:
331         return (uint32_t)x ? ctz32(x) : y;
332 
333     case INDEX_op_ctz_i64:
334         return x ? ctz64(x) : y;
335 
336     case INDEX_op_ctpop_i32:
337         return ctpop32(x);
338 
339     case INDEX_op_ctpop_i64:
340         return ctpop64(x);
341 
342     CASE_OP_32_64(ext8s):
343         return (int8_t)x;
344 
345     CASE_OP_32_64(ext16s):
346         return (int16_t)x;
347 
348     CASE_OP_32_64(ext8u):
349         return (uint8_t)x;
350 
351     CASE_OP_32_64(ext16u):
352         return (uint16_t)x;
353 
354     CASE_OP_32_64(bswap16):
355         return bswap16(x);
356 
357     CASE_OP_32_64(bswap32):
358         return bswap32(x);
359 
360     case INDEX_op_bswap64_i64:
361         return bswap64(x);
362 
363     case INDEX_op_ext_i32_i64:
364     case INDEX_op_ext32s_i64:
365         return (int32_t)x;
366 
367     case INDEX_op_extu_i32_i64:
368     case INDEX_op_extrl_i64_i32:
369     case INDEX_op_ext32u_i64:
370         return (uint32_t)x;
371 
372     case INDEX_op_extrh_i64_i32:
373         return (uint64_t)x >> 32;
374 
375     case INDEX_op_muluh_i32:
376         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
377     case INDEX_op_mulsh_i32:
378         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
379 
380     case INDEX_op_muluh_i64:
381         mulu64(&l64, &h64, x, y);
382         return h64;
383     case INDEX_op_mulsh_i64:
384         muls64(&l64, &h64, x, y);
385         return h64;
386 
387     case INDEX_op_div_i32:
388         /* Avoid crashing on divide by zero, otherwise undefined.  */
389         return (int32_t)x / ((int32_t)y ? : 1);
390     case INDEX_op_divu_i32:
391         return (uint32_t)x / ((uint32_t)y ? : 1);
392     case INDEX_op_div_i64:
393         return (int64_t)x / ((int64_t)y ? : 1);
394     case INDEX_op_divu_i64:
395         return (uint64_t)x / ((uint64_t)y ? : 1);
396 
397     case INDEX_op_rem_i32:
398         return (int32_t)x % ((int32_t)y ? : 1);
399     case INDEX_op_remu_i32:
400         return (uint32_t)x % ((uint32_t)y ? : 1);
401     case INDEX_op_rem_i64:
402         return (int64_t)x % ((int64_t)y ? : 1);
403     case INDEX_op_remu_i64:
404         return (uint64_t)x % ((uint64_t)y ? : 1);
405 
406     default:
407         fprintf(stderr,
408                 "Unrecognized operation %d in do_constant_folding.\n", op);
409         tcg_abort();
410     }
411 }
412 
413 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
414 {
415     const TCGOpDef *def = &tcg_op_defs[op];
416     TCGArg res = do_constant_folding_2(op, x, y);
417     if (!(def->flags & TCG_OPF_64BIT)) {
418         res = (int32_t)res;
419     }
420     return res;
421 }
422 
423 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
424 {
425     switch (c) {
426     case TCG_COND_EQ:
427         return x == y;
428     case TCG_COND_NE:
429         return x != y;
430     case TCG_COND_LT:
431         return (int32_t)x < (int32_t)y;
432     case TCG_COND_GE:
433         return (int32_t)x >= (int32_t)y;
434     case TCG_COND_LE:
435         return (int32_t)x <= (int32_t)y;
436     case TCG_COND_GT:
437         return (int32_t)x > (int32_t)y;
438     case TCG_COND_LTU:
439         return x < y;
440     case TCG_COND_GEU:
441         return x >= y;
442     case TCG_COND_LEU:
443         return x <= y;
444     case TCG_COND_GTU:
445         return x > y;
446     default:
447         tcg_abort();
448     }
449 }
450 
451 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
452 {
453     switch (c) {
454     case TCG_COND_EQ:
455         return x == y;
456     case TCG_COND_NE:
457         return x != y;
458     case TCG_COND_LT:
459         return (int64_t)x < (int64_t)y;
460     case TCG_COND_GE:
461         return (int64_t)x >= (int64_t)y;
462     case TCG_COND_LE:
463         return (int64_t)x <= (int64_t)y;
464     case TCG_COND_GT:
465         return (int64_t)x > (int64_t)y;
466     case TCG_COND_LTU:
467         return x < y;
468     case TCG_COND_GEU:
469         return x >= y;
470     case TCG_COND_LEU:
471         return x <= y;
472     case TCG_COND_GTU:
473         return x > y;
474     default:
475         tcg_abort();
476     }
477 }
478 
479 static bool do_constant_folding_cond_eq(TCGCond c)
480 {
481     switch (c) {
482     case TCG_COND_GT:
483     case TCG_COND_LTU:
484     case TCG_COND_LT:
485     case TCG_COND_GTU:
486     case TCG_COND_NE:
487         return 0;
488     case TCG_COND_GE:
489     case TCG_COND_GEU:
490     case TCG_COND_LE:
491     case TCG_COND_LEU:
492     case TCG_COND_EQ:
493         return 1;
494     default:
495         tcg_abort();
496     }
497 }
498 
499 /* Return 2 if the condition can't be simplified, and the result
500    of the condition (0 or 1) if it can */
501 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
502                                        TCGArg y, TCGCond c)
503 {
504     tcg_target_ulong xv = arg_info(x)->val;
505     tcg_target_ulong yv = arg_info(y)->val;
506     if (arg_is_const(x) && arg_is_const(y)) {
507         const TCGOpDef *def = &tcg_op_defs[op];
508         tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
509         if (def->flags & TCG_OPF_64BIT) {
510             return do_constant_folding_cond_64(xv, yv, c);
511         } else {
512             return do_constant_folding_cond_32(xv, yv, c);
513         }
514     } else if (args_are_copies(x, y)) {
515         return do_constant_folding_cond_eq(c);
516     } else if (arg_is_const(y) && yv == 0) {
517         switch (c) {
518         case TCG_COND_LTU:
519             return 0;
520         case TCG_COND_GEU:
521             return 1;
522         default:
523             return 2;
524         }
525     }
526     return 2;
527 }
528 
529 /* Return 2 if the condition can't be simplified, and the result
530    of the condition (0 or 1) if it can */
531 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
532 {
533     TCGArg al = p1[0], ah = p1[1];
534     TCGArg bl = p2[0], bh = p2[1];
535 
536     if (arg_is_const(bl) && arg_is_const(bh)) {
537         tcg_target_ulong blv = arg_info(bl)->val;
538         tcg_target_ulong bhv = arg_info(bh)->val;
539         uint64_t b = deposit64(blv, 32, 32, bhv);
540 
541         if (arg_is_const(al) && arg_is_const(ah)) {
542             tcg_target_ulong alv = arg_info(al)->val;
543             tcg_target_ulong ahv = arg_info(ah)->val;
544             uint64_t a = deposit64(alv, 32, 32, ahv);
545             return do_constant_folding_cond_64(a, b, c);
546         }
547         if (b == 0) {
548             switch (c) {
549             case TCG_COND_LTU:
550                 return 0;
551             case TCG_COND_GEU:
552                 return 1;
553             default:
554                 break;
555             }
556         }
557     }
558     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
559         return do_constant_folding_cond_eq(c);
560     }
561     return 2;
562 }
563 
564 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
565 {
566     TCGArg a1 = *p1, a2 = *p2;
567     int sum = 0;
568     sum += arg_is_const(a1);
569     sum -= arg_is_const(a2);
570 
571     /* Prefer the constant in second argument, and then the form
572        op a, a, b, which is better handled on non-RISC hosts. */
573     if (sum > 0 || (sum == 0 && dest == a2)) {
574         *p1 = a2;
575         *p2 = a1;
576         return true;
577     }
578     return false;
579 }
580 
581 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
582 {
583     int sum = 0;
584     sum += arg_is_const(p1[0]);
585     sum += arg_is_const(p1[1]);
586     sum -= arg_is_const(p2[0]);
587     sum -= arg_is_const(p2[1]);
588     if (sum > 0) {
589         TCGArg t;
590         t = p1[0], p1[0] = p2[0], p2[0] = t;
591         t = p1[1], p1[1] = p2[1], p2[1] = t;
592         return true;
593     }
594     return false;
595 }
596 
597 /* Propagate constants and copies, fold constant expressions. */
598 void tcg_optimize(TCGContext *s)
599 {
600     int nb_temps, nb_globals;
601     TCGOp *op, *op_next, *prev_mb = NULL;
602     struct tcg_temp_info *infos;
603     TCGTempSet temps_used;
604 
605     /* Array VALS has an element for each temp.
606        If this temp holds a constant then its value is kept in VALS' element.
607        If this temp is a copy of other ones then the other copies are
608        available through the doubly linked circular list. */
609 
610     nb_temps = s->nb_temps;
611     nb_globals = s->nb_globals;
612     bitmap_zero(temps_used.l, nb_temps);
613     infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
614 
615     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
616         tcg_target_ulong mask, partmask, affected;
617         int nb_oargs, nb_iargs, i;
618         TCGArg tmp;
619         TCGOpcode opc = op->opc;
620         const TCGOpDef *def = &tcg_op_defs[opc];
621 
622         /* Count the arguments, and initialize the temps that are
623            going to be used */
624         if (opc == INDEX_op_call) {
625             nb_oargs = TCGOP_CALLO(op);
626             nb_iargs = TCGOP_CALLI(op);
627             for (i = 0; i < nb_oargs + nb_iargs; i++) {
628                 TCGTemp *ts = arg_temp(op->args[i]);
629                 if (ts) {
630                     init_ts_info(infos, &temps_used, ts);
631                 }
632             }
633         } else {
634             nb_oargs = def->nb_oargs;
635             nb_iargs = def->nb_iargs;
636             for (i = 0; i < nb_oargs + nb_iargs; i++) {
637                 init_arg_info(infos, &temps_used, op->args[i]);
638             }
639         }
640 
641         /* Do copy propagation */
642         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
643             TCGTemp *ts = arg_temp(op->args[i]);
644             if (ts && ts_is_copy(ts)) {
645                 op->args[i] = temp_arg(find_better_copy(s, ts));
646             }
647         }
648 
649         /* For commutative operations make constant second argument */
650         switch (opc) {
651         CASE_OP_32_64_VEC(add):
652         CASE_OP_32_64_VEC(mul):
653         CASE_OP_32_64_VEC(and):
654         CASE_OP_32_64_VEC(or):
655         CASE_OP_32_64_VEC(xor):
656         CASE_OP_32_64(eqv):
657         CASE_OP_32_64(nand):
658         CASE_OP_32_64(nor):
659         CASE_OP_32_64(muluh):
660         CASE_OP_32_64(mulsh):
661             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
662             break;
663         CASE_OP_32_64(brcond):
664             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
665                 op->args[2] = tcg_swap_cond(op->args[2]);
666             }
667             break;
668         CASE_OP_32_64(setcond):
669             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
670                 op->args[3] = tcg_swap_cond(op->args[3]);
671             }
672             break;
673         CASE_OP_32_64(movcond):
674             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
675                 op->args[5] = tcg_swap_cond(op->args[5]);
676             }
677             /* For movcond, we canonicalize the "false" input reg to match
678                the destination reg so that the tcg backend can implement
679                a "move if true" operation.  */
680             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
681                 op->args[5] = tcg_invert_cond(op->args[5]);
682             }
683             break;
684         CASE_OP_32_64(add2):
685             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
686             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
687             break;
688         CASE_OP_32_64(mulu2):
689         CASE_OP_32_64(muls2):
690             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
691             break;
692         case INDEX_op_brcond2_i32:
693             if (swap_commutative2(&op->args[0], &op->args[2])) {
694                 op->args[4] = tcg_swap_cond(op->args[4]);
695             }
696             break;
697         case INDEX_op_setcond2_i32:
698             if (swap_commutative2(&op->args[1], &op->args[3])) {
699                 op->args[5] = tcg_swap_cond(op->args[5]);
700             }
701             break;
702         default:
703             break;
704         }
705 
706         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
707            and "sub r, 0, a => neg r, a" case.  */
708         switch (opc) {
709         CASE_OP_32_64(shl):
710         CASE_OP_32_64(shr):
711         CASE_OP_32_64(sar):
712         CASE_OP_32_64(rotl):
713         CASE_OP_32_64(rotr):
714             if (arg_is_const(op->args[1])
715                 && arg_info(op->args[1])->val == 0) {
716                 tcg_opt_gen_movi(s, op, op->args[0], 0);
717                 continue;
718             }
719             break;
720         CASE_OP_32_64_VEC(sub):
721             {
722                 TCGOpcode neg_op;
723                 bool have_neg;
724 
725                 if (arg_is_const(op->args[2])) {
726                     /* Proceed with possible constant folding. */
727                     break;
728                 }
729                 if (opc == INDEX_op_sub_i32) {
730                     neg_op = INDEX_op_neg_i32;
731                     have_neg = TCG_TARGET_HAS_neg_i32;
732                 } else if (opc == INDEX_op_sub_i64) {
733                     neg_op = INDEX_op_neg_i64;
734                     have_neg = TCG_TARGET_HAS_neg_i64;
735                 } else if (TCG_TARGET_HAS_neg_vec) {
736                     TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
737                     unsigned vece = TCGOP_VECE(op);
738                     neg_op = INDEX_op_neg_vec;
739                     have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
740                 } else {
741                     break;
742                 }
743                 if (!have_neg) {
744                     break;
745                 }
746                 if (arg_is_const(op->args[1])
747                     && arg_info(op->args[1])->val == 0) {
748                     op->opc = neg_op;
749                     reset_temp(op->args[0]);
750                     op->args[1] = op->args[2];
751                     continue;
752                 }
753             }
754             break;
755         CASE_OP_32_64_VEC(xor):
756         CASE_OP_32_64(nand):
757             if (!arg_is_const(op->args[1])
758                 && arg_is_const(op->args[2])
759                 && arg_info(op->args[2])->val == -1) {
760                 i = 1;
761                 goto try_not;
762             }
763             break;
764         CASE_OP_32_64(nor):
765             if (!arg_is_const(op->args[1])
766                 && arg_is_const(op->args[2])
767                 && arg_info(op->args[2])->val == 0) {
768                 i = 1;
769                 goto try_not;
770             }
771             break;
772         CASE_OP_32_64_VEC(andc):
773             if (!arg_is_const(op->args[2])
774                 && arg_is_const(op->args[1])
775                 && arg_info(op->args[1])->val == -1) {
776                 i = 2;
777                 goto try_not;
778             }
779             break;
780         CASE_OP_32_64_VEC(orc):
781         CASE_OP_32_64(eqv):
782             if (!arg_is_const(op->args[2])
783                 && arg_is_const(op->args[1])
784                 && arg_info(op->args[1])->val == 0) {
785                 i = 2;
786                 goto try_not;
787             }
788             break;
789         try_not:
790             {
791                 TCGOpcode not_op;
792                 bool have_not;
793 
794                 if (def->flags & TCG_OPF_VECTOR) {
795                     not_op = INDEX_op_not_vec;
796                     have_not = TCG_TARGET_HAS_not_vec;
797                 } else if (def->flags & TCG_OPF_64BIT) {
798                     not_op = INDEX_op_not_i64;
799                     have_not = TCG_TARGET_HAS_not_i64;
800                 } else {
801                     not_op = INDEX_op_not_i32;
802                     have_not = TCG_TARGET_HAS_not_i32;
803                 }
804                 if (!have_not) {
805                     break;
806                 }
807                 op->opc = not_op;
808                 reset_temp(op->args[0]);
809                 op->args[1] = op->args[i];
810                 continue;
811             }
812         default:
813             break;
814         }
815 
816         /* Simplify expression for "op r, a, const => mov r, a" cases */
817         switch (opc) {
818         CASE_OP_32_64_VEC(add):
819         CASE_OP_32_64_VEC(sub):
820         CASE_OP_32_64_VEC(or):
821         CASE_OP_32_64_VEC(xor):
822         CASE_OP_32_64_VEC(andc):
823         CASE_OP_32_64(shl):
824         CASE_OP_32_64(shr):
825         CASE_OP_32_64(sar):
826         CASE_OP_32_64(rotl):
827         CASE_OP_32_64(rotr):
828             if (!arg_is_const(op->args[1])
829                 && arg_is_const(op->args[2])
830                 && arg_info(op->args[2])->val == 0) {
831                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
832                 continue;
833             }
834             break;
835         CASE_OP_32_64_VEC(and):
836         CASE_OP_32_64_VEC(orc):
837         CASE_OP_32_64(eqv):
838             if (!arg_is_const(op->args[1])
839                 && arg_is_const(op->args[2])
840                 && arg_info(op->args[2])->val == -1) {
841                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
842                 continue;
843             }
844             break;
845         default:
846             break;
847         }
848 
849         /* Simplify using known-zero bits. Currently only ops with a single
850            output argument is supported. */
851         mask = -1;
852         affected = -1;
853         switch (opc) {
854         CASE_OP_32_64(ext8s):
855             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
856                 break;
857             }
858             QEMU_FALLTHROUGH;
859         CASE_OP_32_64(ext8u):
860             mask = 0xff;
861             goto and_const;
862         CASE_OP_32_64(ext16s):
863             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
864                 break;
865             }
866             QEMU_FALLTHROUGH;
867         CASE_OP_32_64(ext16u):
868             mask = 0xffff;
869             goto and_const;
870         case INDEX_op_ext32s_i64:
871             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
872                 break;
873             }
874             QEMU_FALLTHROUGH;
875         case INDEX_op_ext32u_i64:
876             mask = 0xffffffffU;
877             goto and_const;
878 
879         CASE_OP_32_64(and):
880             mask = arg_info(op->args[2])->mask;
881             if (arg_is_const(op->args[2])) {
882         and_const:
883                 affected = arg_info(op->args[1])->mask & ~mask;
884             }
885             mask = arg_info(op->args[1])->mask & mask;
886             break;
887 
888         case INDEX_op_ext_i32_i64:
889             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
890                 break;
891             }
892             QEMU_FALLTHROUGH;
893         case INDEX_op_extu_i32_i64:
894             /* We do not compute affected as it is a size changing op.  */
895             mask = (uint32_t)arg_info(op->args[1])->mask;
896             break;
897 
898         CASE_OP_32_64(andc):
899             /* Known-zeros does not imply known-ones.  Therefore unless
900                op->args[2] is constant, we can't infer anything from it.  */
901             if (arg_is_const(op->args[2])) {
902                 mask = ~arg_info(op->args[2])->mask;
903                 goto and_const;
904             }
905             /* But we certainly know nothing outside args[1] may be set. */
906             mask = arg_info(op->args[1])->mask;
907             break;
908 
909         case INDEX_op_sar_i32:
910             if (arg_is_const(op->args[2])) {
911                 tmp = arg_info(op->args[2])->val & 31;
912                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
913             }
914             break;
915         case INDEX_op_sar_i64:
916             if (arg_is_const(op->args[2])) {
917                 tmp = arg_info(op->args[2])->val & 63;
918                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
919             }
920             break;
921 
922         case INDEX_op_shr_i32:
923             if (arg_is_const(op->args[2])) {
924                 tmp = arg_info(op->args[2])->val & 31;
925                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
926             }
927             break;
928         case INDEX_op_shr_i64:
929             if (arg_is_const(op->args[2])) {
930                 tmp = arg_info(op->args[2])->val & 63;
931                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
932             }
933             break;
934 
935         case INDEX_op_extrl_i64_i32:
936             mask = (uint32_t)arg_info(op->args[1])->mask;
937             break;
938         case INDEX_op_extrh_i64_i32:
939             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
940             break;
941 
942         CASE_OP_32_64(shl):
943             if (arg_is_const(op->args[2])) {
944                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
945                 mask = arg_info(op->args[1])->mask << tmp;
946             }
947             break;
948 
949         CASE_OP_32_64(neg):
950             /* Set to 1 all bits to the left of the rightmost.  */
951             mask = -(arg_info(op->args[1])->mask
952                      & -arg_info(op->args[1])->mask);
953             break;
954 
955         CASE_OP_32_64(deposit):
956             mask = deposit64(arg_info(op->args[1])->mask,
957                              op->args[3], op->args[4],
958                              arg_info(op->args[2])->mask);
959             break;
960 
961         CASE_OP_32_64(extract):
962             mask = extract64(arg_info(op->args[1])->mask,
963                              op->args[2], op->args[3]);
964             if (op->args[2] == 0) {
965                 affected = arg_info(op->args[1])->mask & ~mask;
966             }
967             break;
968         CASE_OP_32_64(sextract):
969             mask = sextract64(arg_info(op->args[1])->mask,
970                               op->args[2], op->args[3]);
971             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
972                 affected = arg_info(op->args[1])->mask & ~mask;
973             }
974             break;
975 
976         CASE_OP_32_64(or):
977         CASE_OP_32_64(xor):
978             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
979             break;
980 
981         case INDEX_op_clz_i32:
982         case INDEX_op_ctz_i32:
983             mask = arg_info(op->args[2])->mask | 31;
984             break;
985 
986         case INDEX_op_clz_i64:
987         case INDEX_op_ctz_i64:
988             mask = arg_info(op->args[2])->mask | 63;
989             break;
990 
991         case INDEX_op_ctpop_i32:
992             mask = 32 | 31;
993             break;
994         case INDEX_op_ctpop_i64:
995             mask = 64 | 63;
996             break;
997 
998         CASE_OP_32_64(setcond):
999         case INDEX_op_setcond2_i32:
1000             mask = 1;
1001             break;
1002 
1003         CASE_OP_32_64(movcond):
1004             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1005             break;
1006 
1007         CASE_OP_32_64(ld8u):
1008             mask = 0xff;
1009             break;
1010         CASE_OP_32_64(ld16u):
1011             mask = 0xffff;
1012             break;
1013         case INDEX_op_ld32u_i64:
1014             mask = 0xffffffffu;
1015             break;
1016 
1017         CASE_OP_32_64(qemu_ld):
1018             {
1019                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1020                 MemOp mop = get_memop(oi);
1021                 if (!(mop & MO_SIGN)) {
1022                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1023                 }
1024             }
1025             break;
1026 
1027         default:
1028             break;
1029         }
1030 
1031         /* 32-bit ops generate 32-bit results.  For the result is zero test
1032            below, we can ignore high bits, but for further optimizations we
1033            need to record that the high bits contain garbage.  */
1034         partmask = mask;
1035         if (!(def->flags & TCG_OPF_64BIT)) {
1036             mask |= ~(tcg_target_ulong)0xffffffffu;
1037             partmask &= 0xffffffffu;
1038             affected &= 0xffffffffu;
1039         }
1040 
1041         if (partmask == 0) {
1042             tcg_debug_assert(nb_oargs == 1);
1043             tcg_opt_gen_movi(s, op, op->args[0], 0);
1044             continue;
1045         }
1046         if (affected == 0) {
1047             tcg_debug_assert(nb_oargs == 1);
1048             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1049             continue;
1050         }
1051 
1052         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1053         switch (opc) {
1054         CASE_OP_32_64_VEC(and):
1055         CASE_OP_32_64_VEC(mul):
1056         CASE_OP_32_64(muluh):
1057         CASE_OP_32_64(mulsh):
1058             if (arg_is_const(op->args[2])
1059                 && arg_info(op->args[2])->val == 0) {
1060                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1061                 continue;
1062             }
1063             break;
1064         default:
1065             break;
1066         }
1067 
1068         /* Simplify expression for "op r, a, a => mov r, a" cases */
1069         switch (opc) {
1070         CASE_OP_32_64_VEC(or):
1071         CASE_OP_32_64_VEC(and):
1072             if (args_are_copies(op->args[1], op->args[2])) {
1073                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1074                 continue;
1075             }
1076             break;
1077         default:
1078             break;
1079         }
1080 
1081         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1082         switch (opc) {
1083         CASE_OP_32_64_VEC(andc):
1084         CASE_OP_32_64_VEC(sub):
1085         CASE_OP_32_64_VEC(xor):
1086             if (args_are_copies(op->args[1], op->args[2])) {
1087                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1088                 continue;
1089             }
1090             break;
1091         default:
1092             break;
1093         }
1094 
1095         /* Propagate constants through copy operations and do constant
1096            folding.  Constants will be substituted to arguments by register
1097            allocator where needed and possible.  Also detect copies. */
1098         switch (opc) {
1099         CASE_OP_32_64_VEC(mov):
1100             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1101             break;
1102         CASE_OP_32_64(movi):
1103         case INDEX_op_dupi_vec:
1104             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1105             break;
1106 
1107         case INDEX_op_dup_vec:
1108             if (arg_is_const(op->args[1])) {
1109                 tmp = arg_info(op->args[1])->val;
1110                 tmp = dup_const(TCGOP_VECE(op), tmp);
1111                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1112                 break;
1113             }
1114             goto do_default;
1115 
1116         case INDEX_op_dup2_vec:
1117             assert(TCG_TARGET_REG_BITS == 32);
1118             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1119                 tmp = arg_info(op->args[1])->val;
1120                 if (tmp == arg_info(op->args[2])->val) {
1121                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1122                     break;
1123                 }
1124             } else if (args_are_copies(op->args[1], op->args[2])) {
1125                 op->opc = INDEX_op_dup_vec;
1126                 TCGOP_VECE(op) = MO_32;
1127                 nb_iargs = 1;
1128             }
1129             goto do_default;
1130 
1131         CASE_OP_32_64(not):
1132         CASE_OP_32_64(neg):
1133         CASE_OP_32_64(ext8s):
1134         CASE_OP_32_64(ext8u):
1135         CASE_OP_32_64(ext16s):
1136         CASE_OP_32_64(ext16u):
1137         CASE_OP_32_64(ctpop):
1138         CASE_OP_32_64(bswap16):
1139         CASE_OP_32_64(bswap32):
1140         case INDEX_op_bswap64_i64:
1141         case INDEX_op_ext32s_i64:
1142         case INDEX_op_ext32u_i64:
1143         case INDEX_op_ext_i32_i64:
1144         case INDEX_op_extu_i32_i64:
1145         case INDEX_op_extrl_i64_i32:
1146         case INDEX_op_extrh_i64_i32:
1147             if (arg_is_const(op->args[1])) {
1148                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1149                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1150                 break;
1151             }
1152             goto do_default;
1153 
1154         CASE_OP_32_64(add):
1155         CASE_OP_32_64(sub):
1156         CASE_OP_32_64(mul):
1157         CASE_OP_32_64(or):
1158         CASE_OP_32_64(and):
1159         CASE_OP_32_64(xor):
1160         CASE_OP_32_64(shl):
1161         CASE_OP_32_64(shr):
1162         CASE_OP_32_64(sar):
1163         CASE_OP_32_64(rotl):
1164         CASE_OP_32_64(rotr):
1165         CASE_OP_32_64(andc):
1166         CASE_OP_32_64(orc):
1167         CASE_OP_32_64(eqv):
1168         CASE_OP_32_64(nand):
1169         CASE_OP_32_64(nor):
1170         CASE_OP_32_64(muluh):
1171         CASE_OP_32_64(mulsh):
1172         CASE_OP_32_64(div):
1173         CASE_OP_32_64(divu):
1174         CASE_OP_32_64(rem):
1175         CASE_OP_32_64(remu):
1176             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1177                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1178                                           arg_info(op->args[2])->val);
1179                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1180                 break;
1181             }
1182             goto do_default;
1183 
1184         CASE_OP_32_64(clz):
1185         CASE_OP_32_64(ctz):
1186             if (arg_is_const(op->args[1])) {
1187                 TCGArg v = arg_info(op->args[1])->val;
1188                 if (v != 0) {
1189                     tmp = do_constant_folding(opc, v, 0);
1190                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1191                 } else {
1192                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1193                 }
1194                 break;
1195             }
1196             goto do_default;
1197 
1198         CASE_OP_32_64(deposit):
1199             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1200                 tmp = deposit64(arg_info(op->args[1])->val,
1201                                 op->args[3], op->args[4],
1202                                 arg_info(op->args[2])->val);
1203                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1204                 break;
1205             }
1206             goto do_default;
1207 
1208         CASE_OP_32_64(extract):
1209             if (arg_is_const(op->args[1])) {
1210                 tmp = extract64(arg_info(op->args[1])->val,
1211                                 op->args[2], op->args[3]);
1212                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1213                 break;
1214             }
1215             goto do_default;
1216 
1217         CASE_OP_32_64(sextract):
1218             if (arg_is_const(op->args[1])) {
1219                 tmp = sextract64(arg_info(op->args[1])->val,
1220                                  op->args[2], op->args[3]);
1221                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1222                 break;
1223             }
1224             goto do_default;
1225 
1226         CASE_OP_32_64(extract2):
1227             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1228                 TCGArg v1 = arg_info(op->args[1])->val;
1229                 TCGArg v2 = arg_info(op->args[2])->val;
1230 
1231                 if (opc == INDEX_op_extract2_i64) {
1232                     tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
1233                 } else {
1234                     tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
1235                                     ((uint32_t)v2 << (32 - op->args[3])));
1236                 }
1237                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1238                 break;
1239             }
1240             goto do_default;
1241 
1242         CASE_OP_32_64(setcond):
1243             tmp = do_constant_folding_cond(opc, op->args[1],
1244                                            op->args[2], op->args[3]);
1245             if (tmp != 2) {
1246                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1247                 break;
1248             }
1249             goto do_default;
1250 
1251         CASE_OP_32_64(brcond):
1252             tmp = do_constant_folding_cond(opc, op->args[0],
1253                                            op->args[1], op->args[2]);
1254             if (tmp != 2) {
1255                 if (tmp) {
1256                     bitmap_zero(temps_used.l, nb_temps);
1257                     op->opc = INDEX_op_br;
1258                     op->args[0] = op->args[3];
1259                 } else {
1260                     tcg_op_remove(s, op);
1261                 }
1262                 break;
1263             }
1264             goto do_default;
1265 
1266         CASE_OP_32_64(movcond):
1267             tmp = do_constant_folding_cond(opc, op->args[1],
1268                                            op->args[2], op->args[5]);
1269             if (tmp != 2) {
1270                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1271                 break;
1272             }
1273             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1274                 tcg_target_ulong tv = arg_info(op->args[3])->val;
1275                 tcg_target_ulong fv = arg_info(op->args[4])->val;
1276                 TCGCond cond = op->args[5];
1277                 if (fv == 1 && tv == 0) {
1278                     cond = tcg_invert_cond(cond);
1279                 } else if (!(tv == 1 && fv == 0)) {
1280                     goto do_default;
1281                 }
1282                 op->args[3] = cond;
1283                 op->opc = opc = (opc == INDEX_op_movcond_i32
1284                                  ? INDEX_op_setcond_i32
1285                                  : INDEX_op_setcond_i64);
1286                 nb_iargs = 2;
1287             }
1288             goto do_default;
1289 
1290         case INDEX_op_add2_i32:
1291         case INDEX_op_sub2_i32:
1292             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1293                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1294                 uint32_t al = arg_info(op->args[2])->val;
1295                 uint32_t ah = arg_info(op->args[3])->val;
1296                 uint32_t bl = arg_info(op->args[4])->val;
1297                 uint32_t bh = arg_info(op->args[5])->val;
1298                 uint64_t a = ((uint64_t)ah << 32) | al;
1299                 uint64_t b = ((uint64_t)bh << 32) | bl;
1300                 TCGArg rl, rh;
1301                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1302 
1303                 if (opc == INDEX_op_add2_i32) {
1304                     a += b;
1305                 } else {
1306                     a -= b;
1307                 }
1308 
1309                 rl = op->args[0];
1310                 rh = op->args[1];
1311                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1312                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1313                 break;
1314             }
1315             goto do_default;
1316 
1317         case INDEX_op_mulu2_i32:
1318             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1319                 uint32_t a = arg_info(op->args[2])->val;
1320                 uint32_t b = arg_info(op->args[3])->val;
1321                 uint64_t r = (uint64_t)a * b;
1322                 TCGArg rl, rh;
1323                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1324 
1325                 rl = op->args[0];
1326                 rh = op->args[1];
1327                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1328                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1329                 break;
1330             }
1331             goto do_default;
1332 
1333         case INDEX_op_brcond2_i32:
1334             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1335                                             op->args[4]);
1336             if (tmp != 2) {
1337                 if (tmp) {
1338             do_brcond_true:
1339                     bitmap_zero(temps_used.l, nb_temps);
1340                     op->opc = INDEX_op_br;
1341                     op->args[0] = op->args[5];
1342                 } else {
1343             do_brcond_false:
1344                     tcg_op_remove(s, op);
1345                 }
1346             } else if ((op->args[4] == TCG_COND_LT
1347                         || op->args[4] == TCG_COND_GE)
1348                        && arg_is_const(op->args[2])
1349                        && arg_info(op->args[2])->val == 0
1350                        && arg_is_const(op->args[3])
1351                        && arg_info(op->args[3])->val == 0) {
1352                 /* Simplify LT/GE comparisons vs zero to a single compare
1353                    vs the high word of the input.  */
1354             do_brcond_high:
1355                 bitmap_zero(temps_used.l, nb_temps);
1356                 op->opc = INDEX_op_brcond_i32;
1357                 op->args[0] = op->args[1];
1358                 op->args[1] = op->args[3];
1359                 op->args[2] = op->args[4];
1360                 op->args[3] = op->args[5];
1361             } else if (op->args[4] == TCG_COND_EQ) {
1362                 /* Simplify EQ comparisons where one of the pairs
1363                    can be simplified.  */
1364                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1365                                                op->args[0], op->args[2],
1366                                                TCG_COND_EQ);
1367                 if (tmp == 0) {
1368                     goto do_brcond_false;
1369                 } else if (tmp == 1) {
1370                     goto do_brcond_high;
1371                 }
1372                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1373                                                op->args[1], op->args[3],
1374                                                TCG_COND_EQ);
1375                 if (tmp == 0) {
1376                     goto do_brcond_false;
1377                 } else if (tmp != 1) {
1378                     goto do_default;
1379                 }
1380             do_brcond_low:
1381                 bitmap_zero(temps_used.l, nb_temps);
1382                 op->opc = INDEX_op_brcond_i32;
1383                 op->args[1] = op->args[2];
1384                 op->args[2] = op->args[4];
1385                 op->args[3] = op->args[5];
1386             } else if (op->args[4] == TCG_COND_NE) {
1387                 /* Simplify NE comparisons where one of the pairs
1388                    can be simplified.  */
1389                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1390                                                op->args[0], op->args[2],
1391                                                TCG_COND_NE);
1392                 if (tmp == 0) {
1393                     goto do_brcond_high;
1394                 } else if (tmp == 1) {
1395                     goto do_brcond_true;
1396                 }
1397                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1398                                                op->args[1], op->args[3],
1399                                                TCG_COND_NE);
1400                 if (tmp == 0) {
1401                     goto do_brcond_low;
1402                 } else if (tmp == 1) {
1403                     goto do_brcond_true;
1404                 }
1405                 goto do_default;
1406             } else {
1407                 goto do_default;
1408             }
1409             break;
1410 
1411         case INDEX_op_setcond2_i32:
1412             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1413                                             op->args[5]);
1414             if (tmp != 2) {
1415             do_setcond_const:
1416                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1417             } else if ((op->args[5] == TCG_COND_LT
1418                         || op->args[5] == TCG_COND_GE)
1419                        && arg_is_const(op->args[3])
1420                        && arg_info(op->args[3])->val == 0
1421                        && arg_is_const(op->args[4])
1422                        && arg_info(op->args[4])->val == 0) {
1423                 /* Simplify LT/GE comparisons vs zero to a single compare
1424                    vs the high word of the input.  */
1425             do_setcond_high:
1426                 reset_temp(op->args[0]);
1427                 arg_info(op->args[0])->mask = 1;
1428                 op->opc = INDEX_op_setcond_i32;
1429                 op->args[1] = op->args[2];
1430                 op->args[2] = op->args[4];
1431                 op->args[3] = op->args[5];
1432             } else if (op->args[5] == TCG_COND_EQ) {
1433                 /* Simplify EQ comparisons where one of the pairs
1434                    can be simplified.  */
1435                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1436                                                op->args[1], op->args[3],
1437                                                TCG_COND_EQ);
1438                 if (tmp == 0) {
1439                     goto do_setcond_const;
1440                 } else if (tmp == 1) {
1441                     goto do_setcond_high;
1442                 }
1443                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1444                                                op->args[2], op->args[4],
1445                                                TCG_COND_EQ);
1446                 if (tmp == 0) {
1447                     goto do_setcond_high;
1448                 } else if (tmp != 1) {
1449                     goto do_default;
1450                 }
1451             do_setcond_low:
1452                 reset_temp(op->args[0]);
1453                 arg_info(op->args[0])->mask = 1;
1454                 op->opc = INDEX_op_setcond_i32;
1455                 op->args[2] = op->args[3];
1456                 op->args[3] = op->args[5];
1457             } else if (op->args[5] == TCG_COND_NE) {
1458                 /* Simplify NE comparisons where one of the pairs
1459                    can be simplified.  */
1460                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1461                                                op->args[1], op->args[3],
1462                                                TCG_COND_NE);
1463                 if (tmp == 0) {
1464                     goto do_setcond_high;
1465                 } else if (tmp == 1) {
1466                     goto do_setcond_const;
1467                 }
1468                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1469                                                op->args[2], op->args[4],
1470                                                TCG_COND_NE);
1471                 if (tmp == 0) {
1472                     goto do_setcond_low;
1473                 } else if (tmp == 1) {
1474                     goto do_setcond_const;
1475                 }
1476                 goto do_default;
1477             } else {
1478                 goto do_default;
1479             }
1480             break;
1481 
1482         case INDEX_op_call:
1483             if (!(op->args[nb_oargs + nb_iargs + 1]
1484                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1485                 for (i = 0; i < nb_globals; i++) {
1486                     if (test_bit(i, temps_used.l)) {
1487                         reset_ts(&s->temps[i]);
1488                     }
1489                 }
1490             }
1491             goto do_reset_output;
1492 
1493         default:
1494         do_default:
1495             /* Default case: we know nothing about operation (or were unable
1496                to compute the operation result) so no propagation is done.
1497                We trash everything if the operation is the end of a basic
1498                block, otherwise we only trash the output args.  "mask" is
1499                the non-zero bits mask for the first output arg.  */
1500             if (def->flags & TCG_OPF_BB_END) {
1501                 bitmap_zero(temps_used.l, nb_temps);
1502             } else {
1503         do_reset_output:
1504                 for (i = 0; i < nb_oargs; i++) {
1505                     reset_temp(op->args[i]);
1506                     /* Save the corresponding known-zero bits mask for the
1507                        first output argument (only one supported so far). */
1508                     if (i == 0) {
1509                         arg_info(op->args[i])->mask = mask;
1510                     }
1511                 }
1512             }
1513             break;
1514         }
1515 
1516         /* Eliminate duplicate and redundant fence instructions.  */
1517         if (prev_mb) {
1518             switch (opc) {
1519             case INDEX_op_mb:
1520                 /* Merge two barriers of the same type into one,
1521                  * or a weaker barrier into a stronger one,
1522                  * or two weaker barriers into a stronger one.
1523                  *   mb X; mb Y => mb X|Y
1524                  *   mb; strl => mb; st
1525                  *   ldaq; mb => ld; mb
1526                  *   ldaq; strl => ld; mb; st
1527                  * Other combinations are also merged into a strong
1528                  * barrier.  This is stricter than specified but for
1529                  * the purposes of TCG is better than not optimizing.
1530                  */
1531                 prev_mb->args[0] |= op->args[0];
1532                 tcg_op_remove(s, op);
1533                 break;
1534 
1535             default:
1536                 /* Opcodes that end the block stop the optimization.  */
1537                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1538                     break;
1539                 }
1540                 /* fallthru */
1541             case INDEX_op_qemu_ld_i32:
1542             case INDEX_op_qemu_ld_i64:
1543             case INDEX_op_qemu_st_i32:
1544             case INDEX_op_qemu_st8_i32:
1545             case INDEX_op_qemu_st_i64:
1546             case INDEX_op_call:
1547                 /* Opcodes that touch guest memory stop the optimization.  */
1548                 prev_mb = NULL;
1549                 break;
1550             }
1551         } else if (opc == INDEX_op_mb) {
1552             prev_mb = op;
1553         }
1554     }
1555 }
1556