xref: /openbmc/qemu/tcg/optimize.c (revision bcd82a96)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 
28 
29 #include "qemu-common.h"
30 #include "tcg-op.h"
31 
32 #define CASE_OP_32_64(x)                        \
33         glue(glue(case INDEX_op_, x), _i32):    \
34         glue(glue(case INDEX_op_, x), _i64)
35 
36 struct tcg_temp_info {
37     bool is_const;
38     uint16_t prev_copy;
39     uint16_t next_copy;
40     tcg_target_ulong val;
41     tcg_target_ulong mask;
42 };
43 
44 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
45 static TCGTempSet temps_used;
46 
47 static inline bool temp_is_const(TCGArg arg)
48 {
49     return temps[arg].is_const;
50 }
51 
52 static inline bool temp_is_copy(TCGArg arg)
53 {
54     return temps[arg].next_copy != arg;
55 }
56 
57 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
58 static void reset_temp(TCGArg temp)
59 {
60     temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
61     temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
62     temps[temp].next_copy = temp;
63     temps[temp].prev_copy = temp;
64     temps[temp].is_const = false;
65     temps[temp].mask = -1;
66 }
67 
68 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
69 static void reset_all_temps(int nb_temps)
70 {
71     bitmap_zero(temps_used.l, nb_temps);
72 }
73 
74 /* Initialize and activate a temporary.  */
75 static void init_temp_info(TCGArg temp)
76 {
77     if (!test_bit(temp, temps_used.l)) {
78         temps[temp].next_copy = temp;
79         temps[temp].prev_copy = temp;
80         temps[temp].is_const = false;
81         temps[temp].mask = -1;
82         set_bit(temp, temps_used.l);
83     }
84 }
85 
86 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
87                                 TCGOpcode opc, int nargs)
88 {
89     int oi = s->gen_next_op_idx;
90     int pi = s->gen_next_parm_idx;
91     int prev = old_op->prev;
92     int next = old_op - s->gen_op_buf;
93     TCGOp *new_op;
94 
95     tcg_debug_assert(oi < OPC_BUF_SIZE);
96     tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
97     s->gen_next_op_idx = oi + 1;
98     s->gen_next_parm_idx = pi + nargs;
99 
100     new_op = &s->gen_op_buf[oi];
101     *new_op = (TCGOp){
102         .opc = opc,
103         .args = pi,
104         .prev = prev,
105         .next = next
106     };
107     if (prev >= 0) {
108         s->gen_op_buf[prev].next = oi;
109     } else {
110         s->gen_first_op_idx = oi;
111     }
112     old_op->prev = oi;
113 
114     return new_op;
115 }
116 
117 static int op_bits(TCGOpcode op)
118 {
119     const TCGOpDef *def = &tcg_op_defs[op];
120     return def->flags & TCG_OPF_64BIT ? 64 : 32;
121 }
122 
123 static TCGOpcode op_to_mov(TCGOpcode op)
124 {
125     switch (op_bits(op)) {
126     case 32:
127         return INDEX_op_mov_i32;
128     case 64:
129         return INDEX_op_mov_i64;
130     default:
131         fprintf(stderr, "op_to_mov: unexpected return value of "
132                 "function op_bits.\n");
133         tcg_abort();
134     }
135 }
136 
137 static TCGOpcode op_to_movi(TCGOpcode op)
138 {
139     switch (op_bits(op)) {
140     case 32:
141         return INDEX_op_movi_i32;
142     case 64:
143         return INDEX_op_movi_i64;
144     default:
145         fprintf(stderr, "op_to_movi: unexpected return value of "
146                 "function op_bits.\n");
147         tcg_abort();
148     }
149 }
150 
151 static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
152 {
153     TCGArg i;
154 
155     /* If this is already a global, we can't do better. */
156     if (temp < s->nb_globals) {
157         return temp;
158     }
159 
160     /* Search for a global first. */
161     for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
162         if (i < s->nb_globals) {
163             return i;
164         }
165     }
166 
167     /* If it is a temp, search for a temp local. */
168     if (!s->temps[temp].temp_local) {
169         for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
170             if (s->temps[i].temp_local) {
171                 return i;
172             }
173         }
174     }
175 
176     /* Failure to find a better representation, return the same temp. */
177     return temp;
178 }
179 
180 static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
181 {
182     TCGArg i;
183 
184     if (arg1 == arg2) {
185         return true;
186     }
187 
188     if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) {
189         return false;
190     }
191 
192     for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
193         if (i == arg2) {
194             return true;
195         }
196     }
197 
198     return false;
199 }
200 
201 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
202                              TCGArg dst, TCGArg val)
203 {
204     TCGOpcode new_op = op_to_movi(op->opc);
205     tcg_target_ulong mask;
206 
207     op->opc = new_op;
208 
209     reset_temp(dst);
210     temps[dst].is_const = true;
211     temps[dst].val = val;
212     mask = val;
213     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
214         /* High bits of the destination are now garbage.  */
215         mask |= ~0xffffffffull;
216     }
217     temps[dst].mask = mask;
218 
219     args[0] = dst;
220     args[1] = val;
221 }
222 
223 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
224                             TCGArg dst, TCGArg src)
225 {
226     if (temps_are_copies(dst, src)) {
227         tcg_op_remove(s, op);
228         return;
229     }
230 
231     TCGOpcode new_op = op_to_mov(op->opc);
232     tcg_target_ulong mask;
233 
234     op->opc = new_op;
235 
236     reset_temp(dst);
237     mask = temps[src].mask;
238     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
239         /* High bits of the destination are now garbage.  */
240         mask |= ~0xffffffffull;
241     }
242     temps[dst].mask = mask;
243 
244     if (s->temps[src].type == s->temps[dst].type) {
245         temps[dst].next_copy = temps[src].next_copy;
246         temps[dst].prev_copy = src;
247         temps[temps[dst].next_copy].prev_copy = dst;
248         temps[src].next_copy = dst;
249         temps[dst].is_const = temps[src].is_const;
250         temps[dst].val = temps[src].val;
251     }
252 
253     args[0] = dst;
254     args[1] = src;
255 }
256 
257 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
258 {
259     uint64_t l64, h64;
260 
261     switch (op) {
262     CASE_OP_32_64(add):
263         return x + y;
264 
265     CASE_OP_32_64(sub):
266         return x - y;
267 
268     CASE_OP_32_64(mul):
269         return x * y;
270 
271     CASE_OP_32_64(and):
272         return x & y;
273 
274     CASE_OP_32_64(or):
275         return x | y;
276 
277     CASE_OP_32_64(xor):
278         return x ^ y;
279 
280     case INDEX_op_shl_i32:
281         return (uint32_t)x << (y & 31);
282 
283     case INDEX_op_shl_i64:
284         return (uint64_t)x << (y & 63);
285 
286     case INDEX_op_shr_i32:
287         return (uint32_t)x >> (y & 31);
288 
289     case INDEX_op_shr_i64:
290         return (uint64_t)x >> (y & 63);
291 
292     case INDEX_op_sar_i32:
293         return (int32_t)x >> (y & 31);
294 
295     case INDEX_op_sar_i64:
296         return (int64_t)x >> (y & 63);
297 
298     case INDEX_op_rotr_i32:
299         return ror32(x, y & 31);
300 
301     case INDEX_op_rotr_i64:
302         return ror64(x, y & 63);
303 
304     case INDEX_op_rotl_i32:
305         return rol32(x, y & 31);
306 
307     case INDEX_op_rotl_i64:
308         return rol64(x, y & 63);
309 
310     CASE_OP_32_64(not):
311         return ~x;
312 
313     CASE_OP_32_64(neg):
314         return -x;
315 
316     CASE_OP_32_64(andc):
317         return x & ~y;
318 
319     CASE_OP_32_64(orc):
320         return x | ~y;
321 
322     CASE_OP_32_64(eqv):
323         return ~(x ^ y);
324 
325     CASE_OP_32_64(nand):
326         return ~(x & y);
327 
328     CASE_OP_32_64(nor):
329         return ~(x | y);
330 
331     CASE_OP_32_64(ext8s):
332         return (int8_t)x;
333 
334     CASE_OP_32_64(ext16s):
335         return (int16_t)x;
336 
337     CASE_OP_32_64(ext8u):
338         return (uint8_t)x;
339 
340     CASE_OP_32_64(ext16u):
341         return (uint16_t)x;
342 
343     case INDEX_op_ext_i32_i64:
344     case INDEX_op_ext32s_i64:
345         return (int32_t)x;
346 
347     case INDEX_op_extu_i32_i64:
348     case INDEX_op_extrl_i64_i32:
349     case INDEX_op_ext32u_i64:
350         return (uint32_t)x;
351 
352     case INDEX_op_extrh_i64_i32:
353         return (uint64_t)x >> 32;
354 
355     case INDEX_op_muluh_i32:
356         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
357     case INDEX_op_mulsh_i32:
358         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
359 
360     case INDEX_op_muluh_i64:
361         mulu64(&l64, &h64, x, y);
362         return h64;
363     case INDEX_op_mulsh_i64:
364         muls64(&l64, &h64, x, y);
365         return h64;
366 
367     case INDEX_op_div_i32:
368         /* Avoid crashing on divide by zero, otherwise undefined.  */
369         return (int32_t)x / ((int32_t)y ? : 1);
370     case INDEX_op_divu_i32:
371         return (uint32_t)x / ((uint32_t)y ? : 1);
372     case INDEX_op_div_i64:
373         return (int64_t)x / ((int64_t)y ? : 1);
374     case INDEX_op_divu_i64:
375         return (uint64_t)x / ((uint64_t)y ? : 1);
376 
377     case INDEX_op_rem_i32:
378         return (int32_t)x % ((int32_t)y ? : 1);
379     case INDEX_op_remu_i32:
380         return (uint32_t)x % ((uint32_t)y ? : 1);
381     case INDEX_op_rem_i64:
382         return (int64_t)x % ((int64_t)y ? : 1);
383     case INDEX_op_remu_i64:
384         return (uint64_t)x % ((uint64_t)y ? : 1);
385 
386     default:
387         fprintf(stderr,
388                 "Unrecognized operation %d in do_constant_folding.\n", op);
389         tcg_abort();
390     }
391 }
392 
393 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
394 {
395     TCGArg res = do_constant_folding_2(op, x, y);
396     if (op_bits(op) == 32) {
397         res = (int32_t)res;
398     }
399     return res;
400 }
401 
402 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
403 {
404     switch (c) {
405     case TCG_COND_EQ:
406         return x == y;
407     case TCG_COND_NE:
408         return x != y;
409     case TCG_COND_LT:
410         return (int32_t)x < (int32_t)y;
411     case TCG_COND_GE:
412         return (int32_t)x >= (int32_t)y;
413     case TCG_COND_LE:
414         return (int32_t)x <= (int32_t)y;
415     case TCG_COND_GT:
416         return (int32_t)x > (int32_t)y;
417     case TCG_COND_LTU:
418         return x < y;
419     case TCG_COND_GEU:
420         return x >= y;
421     case TCG_COND_LEU:
422         return x <= y;
423     case TCG_COND_GTU:
424         return x > y;
425     default:
426         tcg_abort();
427     }
428 }
429 
430 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
431 {
432     switch (c) {
433     case TCG_COND_EQ:
434         return x == y;
435     case TCG_COND_NE:
436         return x != y;
437     case TCG_COND_LT:
438         return (int64_t)x < (int64_t)y;
439     case TCG_COND_GE:
440         return (int64_t)x >= (int64_t)y;
441     case TCG_COND_LE:
442         return (int64_t)x <= (int64_t)y;
443     case TCG_COND_GT:
444         return (int64_t)x > (int64_t)y;
445     case TCG_COND_LTU:
446         return x < y;
447     case TCG_COND_GEU:
448         return x >= y;
449     case TCG_COND_LEU:
450         return x <= y;
451     case TCG_COND_GTU:
452         return x > y;
453     default:
454         tcg_abort();
455     }
456 }
457 
458 static bool do_constant_folding_cond_eq(TCGCond c)
459 {
460     switch (c) {
461     case TCG_COND_GT:
462     case TCG_COND_LTU:
463     case TCG_COND_LT:
464     case TCG_COND_GTU:
465     case TCG_COND_NE:
466         return 0;
467     case TCG_COND_GE:
468     case TCG_COND_GEU:
469     case TCG_COND_LE:
470     case TCG_COND_LEU:
471     case TCG_COND_EQ:
472         return 1;
473     default:
474         tcg_abort();
475     }
476 }
477 
478 /* Return 2 if the condition can't be simplified, and the result
479    of the condition (0 or 1) if it can */
480 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
481                                        TCGArg y, TCGCond c)
482 {
483     if (temp_is_const(x) && temp_is_const(y)) {
484         switch (op_bits(op)) {
485         case 32:
486             return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
487         case 64:
488             return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
489         default:
490             tcg_abort();
491         }
492     } else if (temps_are_copies(x, y)) {
493         return do_constant_folding_cond_eq(c);
494     } else if (temp_is_const(y) && temps[y].val == 0) {
495         switch (c) {
496         case TCG_COND_LTU:
497             return 0;
498         case TCG_COND_GEU:
499             return 1;
500         default:
501             return 2;
502         }
503     } else {
504         return 2;
505     }
506 }
507 
508 /* Return 2 if the condition can't be simplified, and the result
509    of the condition (0 or 1) if it can */
510 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
511 {
512     TCGArg al = p1[0], ah = p1[1];
513     TCGArg bl = p2[0], bh = p2[1];
514 
515     if (temp_is_const(bl) && temp_is_const(bh)) {
516         uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
517 
518         if (temp_is_const(al) && temp_is_const(ah)) {
519             uint64_t a;
520             a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
521             return do_constant_folding_cond_64(a, b, c);
522         }
523         if (b == 0) {
524             switch (c) {
525             case TCG_COND_LTU:
526                 return 0;
527             case TCG_COND_GEU:
528                 return 1;
529             default:
530                 break;
531             }
532         }
533     }
534     if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
535         return do_constant_folding_cond_eq(c);
536     }
537     return 2;
538 }
539 
540 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
541 {
542     TCGArg a1 = *p1, a2 = *p2;
543     int sum = 0;
544     sum += temp_is_const(a1);
545     sum -= temp_is_const(a2);
546 
547     /* Prefer the constant in second argument, and then the form
548        op a, a, b, which is better handled on non-RISC hosts. */
549     if (sum > 0 || (sum == 0 && dest == a2)) {
550         *p1 = a2;
551         *p2 = a1;
552         return true;
553     }
554     return false;
555 }
556 
557 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
558 {
559     int sum = 0;
560     sum += temp_is_const(p1[0]);
561     sum += temp_is_const(p1[1]);
562     sum -= temp_is_const(p2[0]);
563     sum -= temp_is_const(p2[1]);
564     if (sum > 0) {
565         TCGArg t;
566         t = p1[0], p1[0] = p2[0], p2[0] = t;
567         t = p1[1], p1[1] = p2[1], p2[1] = t;
568         return true;
569     }
570     return false;
571 }
572 
573 /* Propagate constants and copies, fold constant expressions. */
574 void tcg_optimize(TCGContext *s)
575 {
576     int oi, oi_next, nb_temps, nb_globals;
577 
578     /* Array VALS has an element for each temp.
579        If this temp holds a constant then its value is kept in VALS' element.
580        If this temp is a copy of other ones then the other copies are
581        available through the doubly linked circular list. */
582 
583     nb_temps = s->nb_temps;
584     nb_globals = s->nb_globals;
585     reset_all_temps(nb_temps);
586 
587     for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
588         tcg_target_ulong mask, partmask, affected;
589         int nb_oargs, nb_iargs, i;
590         TCGArg tmp;
591 
592         TCGOp * const op = &s->gen_op_buf[oi];
593         TCGArg * const args = &s->gen_opparam_buf[op->args];
594         TCGOpcode opc = op->opc;
595         const TCGOpDef *def = &tcg_op_defs[opc];
596 
597         oi_next = op->next;
598 
599         /* Count the arguments, and initialize the temps that are
600            going to be used */
601         if (opc == INDEX_op_call) {
602             nb_oargs = op->callo;
603             nb_iargs = op->calli;
604             for (i = 0; i < nb_oargs + nb_iargs; i++) {
605                 tmp = args[i];
606                 if (tmp != TCG_CALL_DUMMY_ARG) {
607                     init_temp_info(tmp);
608                 }
609             }
610         } else {
611             nb_oargs = def->nb_oargs;
612             nb_iargs = def->nb_iargs;
613             for (i = 0; i < nb_oargs + nb_iargs; i++) {
614                 init_temp_info(args[i]);
615             }
616         }
617 
618         /* Do copy propagation */
619         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
620             if (temp_is_copy(args[i])) {
621                 args[i] = find_better_copy(s, args[i]);
622             }
623         }
624 
625         /* For commutative operations make constant second argument */
626         switch (opc) {
627         CASE_OP_32_64(add):
628         CASE_OP_32_64(mul):
629         CASE_OP_32_64(and):
630         CASE_OP_32_64(or):
631         CASE_OP_32_64(xor):
632         CASE_OP_32_64(eqv):
633         CASE_OP_32_64(nand):
634         CASE_OP_32_64(nor):
635         CASE_OP_32_64(muluh):
636         CASE_OP_32_64(mulsh):
637             swap_commutative(args[0], &args[1], &args[2]);
638             break;
639         CASE_OP_32_64(brcond):
640             if (swap_commutative(-1, &args[0], &args[1])) {
641                 args[2] = tcg_swap_cond(args[2]);
642             }
643             break;
644         CASE_OP_32_64(setcond):
645             if (swap_commutative(args[0], &args[1], &args[2])) {
646                 args[3] = tcg_swap_cond(args[3]);
647             }
648             break;
649         CASE_OP_32_64(movcond):
650             if (swap_commutative(-1, &args[1], &args[2])) {
651                 args[5] = tcg_swap_cond(args[5]);
652             }
653             /* For movcond, we canonicalize the "false" input reg to match
654                the destination reg so that the tcg backend can implement
655                a "move if true" operation.  */
656             if (swap_commutative(args[0], &args[4], &args[3])) {
657                 args[5] = tcg_invert_cond(args[5]);
658             }
659             break;
660         CASE_OP_32_64(add2):
661             swap_commutative(args[0], &args[2], &args[4]);
662             swap_commutative(args[1], &args[3], &args[5]);
663             break;
664         CASE_OP_32_64(mulu2):
665         CASE_OP_32_64(muls2):
666             swap_commutative(args[0], &args[2], &args[3]);
667             break;
668         case INDEX_op_brcond2_i32:
669             if (swap_commutative2(&args[0], &args[2])) {
670                 args[4] = tcg_swap_cond(args[4]);
671             }
672             break;
673         case INDEX_op_setcond2_i32:
674             if (swap_commutative2(&args[1], &args[3])) {
675                 args[5] = tcg_swap_cond(args[5]);
676             }
677             break;
678         default:
679             break;
680         }
681 
682         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
683            and "sub r, 0, a => neg r, a" case.  */
684         switch (opc) {
685         CASE_OP_32_64(shl):
686         CASE_OP_32_64(shr):
687         CASE_OP_32_64(sar):
688         CASE_OP_32_64(rotl):
689         CASE_OP_32_64(rotr):
690             if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
691                 tcg_opt_gen_movi(s, op, args, args[0], 0);
692                 continue;
693             }
694             break;
695         CASE_OP_32_64(sub):
696             {
697                 TCGOpcode neg_op;
698                 bool have_neg;
699 
700                 if (temp_is_const(args[2])) {
701                     /* Proceed with possible constant folding. */
702                     break;
703                 }
704                 if (opc == INDEX_op_sub_i32) {
705                     neg_op = INDEX_op_neg_i32;
706                     have_neg = TCG_TARGET_HAS_neg_i32;
707                 } else {
708                     neg_op = INDEX_op_neg_i64;
709                     have_neg = TCG_TARGET_HAS_neg_i64;
710                 }
711                 if (!have_neg) {
712                     break;
713                 }
714                 if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
715                     op->opc = neg_op;
716                     reset_temp(args[0]);
717                     args[1] = args[2];
718                     continue;
719                 }
720             }
721             break;
722         CASE_OP_32_64(xor):
723         CASE_OP_32_64(nand):
724             if (!temp_is_const(args[1])
725                 && temp_is_const(args[2]) && temps[args[2]].val == -1) {
726                 i = 1;
727                 goto try_not;
728             }
729             break;
730         CASE_OP_32_64(nor):
731             if (!temp_is_const(args[1])
732                 && temp_is_const(args[2]) && temps[args[2]].val == 0) {
733                 i = 1;
734                 goto try_not;
735             }
736             break;
737         CASE_OP_32_64(andc):
738             if (!temp_is_const(args[2])
739                 && temp_is_const(args[1]) && temps[args[1]].val == -1) {
740                 i = 2;
741                 goto try_not;
742             }
743             break;
744         CASE_OP_32_64(orc):
745         CASE_OP_32_64(eqv):
746             if (!temp_is_const(args[2])
747                 && temp_is_const(args[1]) && temps[args[1]].val == 0) {
748                 i = 2;
749                 goto try_not;
750             }
751             break;
752         try_not:
753             {
754                 TCGOpcode not_op;
755                 bool have_not;
756 
757                 if (def->flags & TCG_OPF_64BIT) {
758                     not_op = INDEX_op_not_i64;
759                     have_not = TCG_TARGET_HAS_not_i64;
760                 } else {
761                     not_op = INDEX_op_not_i32;
762                     have_not = TCG_TARGET_HAS_not_i32;
763                 }
764                 if (!have_not) {
765                     break;
766                 }
767                 op->opc = not_op;
768                 reset_temp(args[0]);
769                 args[1] = args[i];
770                 continue;
771             }
772         default:
773             break;
774         }
775 
776         /* Simplify expression for "op r, a, const => mov r, a" cases */
777         switch (opc) {
778         CASE_OP_32_64(add):
779         CASE_OP_32_64(sub):
780         CASE_OP_32_64(shl):
781         CASE_OP_32_64(shr):
782         CASE_OP_32_64(sar):
783         CASE_OP_32_64(rotl):
784         CASE_OP_32_64(rotr):
785         CASE_OP_32_64(or):
786         CASE_OP_32_64(xor):
787         CASE_OP_32_64(andc):
788             if (!temp_is_const(args[1])
789                 && temp_is_const(args[2]) && temps[args[2]].val == 0) {
790                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
791                 continue;
792             }
793             break;
794         CASE_OP_32_64(and):
795         CASE_OP_32_64(orc):
796         CASE_OP_32_64(eqv):
797             if (!temp_is_const(args[1])
798                 && temp_is_const(args[2]) && temps[args[2]].val == -1) {
799                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
800                 continue;
801             }
802             break;
803         default:
804             break;
805         }
806 
807         /* Simplify using known-zero bits. Currently only ops with a single
808            output argument is supported. */
809         mask = -1;
810         affected = -1;
811         switch (opc) {
812         CASE_OP_32_64(ext8s):
813             if ((temps[args[1]].mask & 0x80) != 0) {
814                 break;
815             }
816         CASE_OP_32_64(ext8u):
817             mask = 0xff;
818             goto and_const;
819         CASE_OP_32_64(ext16s):
820             if ((temps[args[1]].mask & 0x8000) != 0) {
821                 break;
822             }
823         CASE_OP_32_64(ext16u):
824             mask = 0xffff;
825             goto and_const;
826         case INDEX_op_ext32s_i64:
827             if ((temps[args[1]].mask & 0x80000000) != 0) {
828                 break;
829             }
830         case INDEX_op_ext32u_i64:
831             mask = 0xffffffffU;
832             goto and_const;
833 
834         CASE_OP_32_64(and):
835             mask = temps[args[2]].mask;
836             if (temp_is_const(args[2])) {
837         and_const:
838                 affected = temps[args[1]].mask & ~mask;
839             }
840             mask = temps[args[1]].mask & mask;
841             break;
842 
843         case INDEX_op_ext_i32_i64:
844             if ((temps[args[1]].mask & 0x80000000) != 0) {
845                 break;
846             }
847         case INDEX_op_extu_i32_i64:
848             /* We do not compute affected as it is a size changing op.  */
849             mask = (uint32_t)temps[args[1]].mask;
850             break;
851 
852         CASE_OP_32_64(andc):
853             /* Known-zeros does not imply known-ones.  Therefore unless
854                args[2] is constant, we can't infer anything from it.  */
855             if (temp_is_const(args[2])) {
856                 mask = ~temps[args[2]].mask;
857                 goto and_const;
858             }
859             /* But we certainly know nothing outside args[1] may be set. */
860             mask = temps[args[1]].mask;
861             break;
862 
863         case INDEX_op_sar_i32:
864             if (temp_is_const(args[2])) {
865                 tmp = temps[args[2]].val & 31;
866                 mask = (int32_t)temps[args[1]].mask >> tmp;
867             }
868             break;
869         case INDEX_op_sar_i64:
870             if (temp_is_const(args[2])) {
871                 tmp = temps[args[2]].val & 63;
872                 mask = (int64_t)temps[args[1]].mask >> tmp;
873             }
874             break;
875 
876         case INDEX_op_shr_i32:
877             if (temp_is_const(args[2])) {
878                 tmp = temps[args[2]].val & 31;
879                 mask = (uint32_t)temps[args[1]].mask >> tmp;
880             }
881             break;
882         case INDEX_op_shr_i64:
883             if (temp_is_const(args[2])) {
884                 tmp = temps[args[2]].val & 63;
885                 mask = (uint64_t)temps[args[1]].mask >> tmp;
886             }
887             break;
888 
889         case INDEX_op_extrl_i64_i32:
890             mask = (uint32_t)temps[args[1]].mask;
891             break;
892         case INDEX_op_extrh_i64_i32:
893             mask = (uint64_t)temps[args[1]].mask >> 32;
894             break;
895 
896         CASE_OP_32_64(shl):
897             if (temp_is_const(args[2])) {
898                 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
899                 mask = temps[args[1]].mask << tmp;
900             }
901             break;
902 
903         CASE_OP_32_64(neg):
904             /* Set to 1 all bits to the left of the rightmost.  */
905             mask = -(temps[args[1]].mask & -temps[args[1]].mask);
906             break;
907 
908         CASE_OP_32_64(deposit):
909             mask = deposit64(temps[args[1]].mask, args[3], args[4],
910                              temps[args[2]].mask);
911             break;
912 
913         CASE_OP_32_64(or):
914         CASE_OP_32_64(xor):
915             mask = temps[args[1]].mask | temps[args[2]].mask;
916             break;
917 
918         CASE_OP_32_64(setcond):
919         case INDEX_op_setcond2_i32:
920             mask = 1;
921             break;
922 
923         CASE_OP_32_64(movcond):
924             mask = temps[args[3]].mask | temps[args[4]].mask;
925             break;
926 
927         CASE_OP_32_64(ld8u):
928             mask = 0xff;
929             break;
930         CASE_OP_32_64(ld16u):
931             mask = 0xffff;
932             break;
933         case INDEX_op_ld32u_i64:
934             mask = 0xffffffffu;
935             break;
936 
937         CASE_OP_32_64(qemu_ld):
938             {
939                 TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
940                 TCGMemOp mop = get_memop(oi);
941                 if (!(mop & MO_SIGN)) {
942                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
943                 }
944             }
945             break;
946 
947         default:
948             break;
949         }
950 
951         /* 32-bit ops generate 32-bit results.  For the result is zero test
952            below, we can ignore high bits, but for further optimizations we
953            need to record that the high bits contain garbage.  */
954         partmask = mask;
955         if (!(def->flags & TCG_OPF_64BIT)) {
956             mask |= ~(tcg_target_ulong)0xffffffffu;
957             partmask &= 0xffffffffu;
958             affected &= 0xffffffffu;
959         }
960 
961         if (partmask == 0) {
962             tcg_debug_assert(nb_oargs == 1);
963             tcg_opt_gen_movi(s, op, args, args[0], 0);
964             continue;
965         }
966         if (affected == 0) {
967             tcg_debug_assert(nb_oargs == 1);
968             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
969             continue;
970         }
971 
972         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
973         switch (opc) {
974         CASE_OP_32_64(and):
975         CASE_OP_32_64(mul):
976         CASE_OP_32_64(muluh):
977         CASE_OP_32_64(mulsh):
978             if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) {
979                 tcg_opt_gen_movi(s, op, args, args[0], 0);
980                 continue;
981             }
982             break;
983         default:
984             break;
985         }
986 
987         /* Simplify expression for "op r, a, a => mov r, a" cases */
988         switch (opc) {
989         CASE_OP_32_64(or):
990         CASE_OP_32_64(and):
991             if (temps_are_copies(args[1], args[2])) {
992                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
993                 continue;
994             }
995             break;
996         default:
997             break;
998         }
999 
1000         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1001         switch (opc) {
1002         CASE_OP_32_64(andc):
1003         CASE_OP_32_64(sub):
1004         CASE_OP_32_64(xor):
1005             if (temps_are_copies(args[1], args[2])) {
1006                 tcg_opt_gen_movi(s, op, args, args[0], 0);
1007                 continue;
1008             }
1009             break;
1010         default:
1011             break;
1012         }
1013 
1014         /* Propagate constants through copy operations and do constant
1015            folding.  Constants will be substituted to arguments by register
1016            allocator where needed and possible.  Also detect copies. */
1017         switch (opc) {
1018         CASE_OP_32_64(mov):
1019             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
1020             break;
1021         CASE_OP_32_64(movi):
1022             tcg_opt_gen_movi(s, op, args, args[0], args[1]);
1023             break;
1024 
1025         CASE_OP_32_64(not):
1026         CASE_OP_32_64(neg):
1027         CASE_OP_32_64(ext8s):
1028         CASE_OP_32_64(ext8u):
1029         CASE_OP_32_64(ext16s):
1030         CASE_OP_32_64(ext16u):
1031         case INDEX_op_ext32s_i64:
1032         case INDEX_op_ext32u_i64:
1033         case INDEX_op_ext_i32_i64:
1034         case INDEX_op_extu_i32_i64:
1035         case INDEX_op_extrl_i64_i32:
1036         case INDEX_op_extrh_i64_i32:
1037             if (temp_is_const(args[1])) {
1038                 tmp = do_constant_folding(opc, temps[args[1]].val, 0);
1039                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1040                 break;
1041             }
1042             goto do_default;
1043 
1044         CASE_OP_32_64(add):
1045         CASE_OP_32_64(sub):
1046         CASE_OP_32_64(mul):
1047         CASE_OP_32_64(or):
1048         CASE_OP_32_64(and):
1049         CASE_OP_32_64(xor):
1050         CASE_OP_32_64(shl):
1051         CASE_OP_32_64(shr):
1052         CASE_OP_32_64(sar):
1053         CASE_OP_32_64(rotl):
1054         CASE_OP_32_64(rotr):
1055         CASE_OP_32_64(andc):
1056         CASE_OP_32_64(orc):
1057         CASE_OP_32_64(eqv):
1058         CASE_OP_32_64(nand):
1059         CASE_OP_32_64(nor):
1060         CASE_OP_32_64(muluh):
1061         CASE_OP_32_64(mulsh):
1062         CASE_OP_32_64(div):
1063         CASE_OP_32_64(divu):
1064         CASE_OP_32_64(rem):
1065         CASE_OP_32_64(remu):
1066             if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1067                 tmp = do_constant_folding(opc, temps[args[1]].val,
1068                                           temps[args[2]].val);
1069                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1070                 break;
1071             }
1072             goto do_default;
1073 
1074         CASE_OP_32_64(deposit):
1075             if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1076                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
1077                                 temps[args[2]].val);
1078                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1079                 break;
1080             }
1081             goto do_default;
1082 
1083         CASE_OP_32_64(setcond):
1084             tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
1085             if (tmp != 2) {
1086                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1087                 break;
1088             }
1089             goto do_default;
1090 
1091         CASE_OP_32_64(brcond):
1092             tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
1093             if (tmp != 2) {
1094                 if (tmp) {
1095                     reset_all_temps(nb_temps);
1096                     op->opc = INDEX_op_br;
1097                     args[0] = args[3];
1098                 } else {
1099                     tcg_op_remove(s, op);
1100                 }
1101                 break;
1102             }
1103             goto do_default;
1104 
1105         CASE_OP_32_64(movcond):
1106             tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
1107             if (tmp != 2) {
1108                 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
1109                 break;
1110             }
1111             goto do_default;
1112 
1113         case INDEX_op_add2_i32:
1114         case INDEX_op_sub2_i32:
1115             if (temp_is_const(args[2]) && temp_is_const(args[3])
1116                 && temp_is_const(args[4]) && temp_is_const(args[5])) {
1117                 uint32_t al = temps[args[2]].val;
1118                 uint32_t ah = temps[args[3]].val;
1119                 uint32_t bl = temps[args[4]].val;
1120                 uint32_t bh = temps[args[5]].val;
1121                 uint64_t a = ((uint64_t)ah << 32) | al;
1122                 uint64_t b = ((uint64_t)bh << 32) | bl;
1123                 TCGArg rl, rh;
1124                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1125                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1126 
1127                 if (opc == INDEX_op_add2_i32) {
1128                     a += b;
1129                 } else {
1130                     a -= b;
1131                 }
1132 
1133                 rl = args[0];
1134                 rh = args[1];
1135                 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a);
1136                 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32));
1137 
1138                 /* We've done all we need to do with the movi.  Skip it.  */
1139                 oi_next = op2->next;
1140                 break;
1141             }
1142             goto do_default;
1143 
1144         case INDEX_op_mulu2_i32:
1145             if (temp_is_const(args[2]) && temp_is_const(args[3])) {
1146                 uint32_t a = temps[args[2]].val;
1147                 uint32_t b = temps[args[3]].val;
1148                 uint64_t r = (uint64_t)a * b;
1149                 TCGArg rl, rh;
1150                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1151                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1152 
1153                 rl = args[0];
1154                 rh = args[1];
1155                 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r);
1156                 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32));
1157 
1158                 /* We've done all we need to do with the movi.  Skip it.  */
1159                 oi_next = op2->next;
1160                 break;
1161             }
1162             goto do_default;
1163 
1164         case INDEX_op_brcond2_i32:
1165             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1166             if (tmp != 2) {
1167                 if (tmp) {
1168             do_brcond_true:
1169                     reset_all_temps(nb_temps);
1170                     op->opc = INDEX_op_br;
1171                     args[0] = args[5];
1172                 } else {
1173             do_brcond_false:
1174                     tcg_op_remove(s, op);
1175                 }
1176             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1177                        && temp_is_const(args[2]) && temps[args[2]].val == 0
1178                        && temp_is_const(args[3]) && temps[args[3]].val == 0) {
1179                 /* Simplify LT/GE comparisons vs zero to a single compare
1180                    vs the high word of the input.  */
1181             do_brcond_high:
1182                 reset_all_temps(nb_temps);
1183                 op->opc = INDEX_op_brcond_i32;
1184                 args[0] = args[1];
1185                 args[1] = args[3];
1186                 args[2] = args[4];
1187                 args[3] = args[5];
1188             } else if (args[4] == TCG_COND_EQ) {
1189                 /* Simplify EQ comparisons where one of the pairs
1190                    can be simplified.  */
1191                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1192                                                args[0], args[2], TCG_COND_EQ);
1193                 if (tmp == 0) {
1194                     goto do_brcond_false;
1195                 } else if (tmp == 1) {
1196                     goto do_brcond_high;
1197                 }
1198                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1199                                                args[1], args[3], TCG_COND_EQ);
1200                 if (tmp == 0) {
1201                     goto do_brcond_false;
1202                 } else if (tmp != 1) {
1203                     goto do_default;
1204                 }
1205             do_brcond_low:
1206                 reset_all_temps(nb_temps);
1207                 op->opc = INDEX_op_brcond_i32;
1208                 args[1] = args[2];
1209                 args[2] = args[4];
1210                 args[3] = args[5];
1211             } else if (args[4] == TCG_COND_NE) {
1212                 /* Simplify NE comparisons where one of the pairs
1213                    can be simplified.  */
1214                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1215                                                args[0], args[2], TCG_COND_NE);
1216                 if (tmp == 0) {
1217                     goto do_brcond_high;
1218                 } else if (tmp == 1) {
1219                     goto do_brcond_true;
1220                 }
1221                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1222                                                args[1], args[3], TCG_COND_NE);
1223                 if (tmp == 0) {
1224                     goto do_brcond_low;
1225                 } else if (tmp == 1) {
1226                     goto do_brcond_true;
1227                 }
1228                 goto do_default;
1229             } else {
1230                 goto do_default;
1231             }
1232             break;
1233 
1234         case INDEX_op_setcond2_i32:
1235             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1236             if (tmp != 2) {
1237             do_setcond_const:
1238                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1239             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1240                        && temp_is_const(args[3]) && temps[args[3]].val == 0
1241                        && temp_is_const(args[4]) && temps[args[4]].val == 0) {
1242                 /* Simplify LT/GE comparisons vs zero to a single compare
1243                    vs the high word of the input.  */
1244             do_setcond_high:
1245                 reset_temp(args[0]);
1246                 temps[args[0]].mask = 1;
1247                 op->opc = INDEX_op_setcond_i32;
1248                 args[1] = args[2];
1249                 args[2] = args[4];
1250                 args[3] = args[5];
1251             } else if (args[5] == TCG_COND_EQ) {
1252                 /* Simplify EQ comparisons where one of the pairs
1253                    can be simplified.  */
1254                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1255                                                args[1], args[3], TCG_COND_EQ);
1256                 if (tmp == 0) {
1257                     goto do_setcond_const;
1258                 } else if (tmp == 1) {
1259                     goto do_setcond_high;
1260                 }
1261                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1262                                                args[2], args[4], TCG_COND_EQ);
1263                 if (tmp == 0) {
1264                     goto do_setcond_high;
1265                 } else if (tmp != 1) {
1266                     goto do_default;
1267                 }
1268             do_setcond_low:
1269                 reset_temp(args[0]);
1270                 temps[args[0]].mask = 1;
1271                 op->opc = INDEX_op_setcond_i32;
1272                 args[2] = args[3];
1273                 args[3] = args[5];
1274             } else if (args[5] == TCG_COND_NE) {
1275                 /* Simplify NE comparisons where one of the pairs
1276                    can be simplified.  */
1277                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1278                                                args[1], args[3], TCG_COND_NE);
1279                 if (tmp == 0) {
1280                     goto do_setcond_high;
1281                 } else if (tmp == 1) {
1282                     goto do_setcond_const;
1283                 }
1284                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1285                                                args[2], args[4], TCG_COND_NE);
1286                 if (tmp == 0) {
1287                     goto do_setcond_low;
1288                 } else if (tmp == 1) {
1289                     goto do_setcond_const;
1290                 }
1291                 goto do_default;
1292             } else {
1293                 goto do_default;
1294             }
1295             break;
1296 
1297         case INDEX_op_call:
1298             if (!(args[nb_oargs + nb_iargs + 1]
1299                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1300                 for (i = 0; i < nb_globals; i++) {
1301                     if (test_bit(i, temps_used.l)) {
1302                         reset_temp(i);
1303                     }
1304                 }
1305             }
1306             goto do_reset_output;
1307 
1308         default:
1309         do_default:
1310             /* Default case: we know nothing about operation (or were unable
1311                to compute the operation result) so no propagation is done.
1312                We trash everything if the operation is the end of a basic
1313                block, otherwise we only trash the output args.  "mask" is
1314                the non-zero bits mask for the first output arg.  */
1315             if (def->flags & TCG_OPF_BB_END) {
1316                 reset_all_temps(nb_temps);
1317             } else {
1318         do_reset_output:
1319                 for (i = 0; i < nb_oargs; i++) {
1320                     reset_temp(args[i]);
1321                     /* Save the corresponding known-zero bits mask for the
1322                        first output argument (only one supported so far). */
1323                     if (i == 0) {
1324                         temps[args[i]].mask = mask;
1325                     }
1326                 }
1327             }
1328             break;
1329         }
1330     }
1331 }
1332