xref: /openbmc/qemu/tcg/optimize.c (revision a53ba65fff5d4e944b179715adb0d89f35b5b3a6)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32 
33 
34 typedef struct MemCopyInfo {
35     IntervalTreeNode itree;
36     QSIMPLEQ_ENTRY (MemCopyInfo) next;
37     TCGTemp *ts;
38     TCGType type;
39 } MemCopyInfo;
40 
41 typedef struct TempOptInfo {
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
46     uint64_t o_mask;  /* mask bit is 1 if and only if value bit is 1 */
47     uint64_t s_mask;  /* mask bit is 1 if value bit matches msb */
48 } TempOptInfo;
49 
50 typedef struct OptContext {
51     TCGContext *tcg;
52     TCGOp *prev_mb;
53     TCGTempSet temps_used;
54 
55     IntervalTreeRoot mem_copy;
56     QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
57 
58     /* In flight values from optimization. */
59     TCGType type;
60     int carry_state;  /* -1 = non-constant, {0,1} = constant carry-in */
61 } OptContext;
62 
ts_info(TCGTemp * ts)63 static inline TempOptInfo *ts_info(TCGTemp *ts)
64 {
65     return ts->state_ptr;
66 }
67 
arg_info(TCGArg arg)68 static inline TempOptInfo *arg_info(TCGArg arg)
69 {
70     return ts_info(arg_temp(arg));
71 }
72 
ti_is_const(TempOptInfo * ti)73 static inline bool ti_is_const(TempOptInfo *ti)
74 {
75     /* If all bits that are not known zeros are known ones, it's constant. */
76     return ti->z_mask == ti->o_mask;
77 }
78 
ti_const_val(TempOptInfo * ti)79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81     /* If constant, both z_mask and o_mask contain the value. */
82     return ti->z_mask;
83 }
84 
ti_is_const_val(TempOptInfo * ti,uint64_t val)85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
86 {
87     return ti_is_const(ti) && ti_const_val(ti) == val;
88 }
89 
ts_is_const(TCGTemp * ts)90 static inline bool ts_is_const(TCGTemp *ts)
91 {
92     return ti_is_const(ts_info(ts));
93 }
94 
ts_is_const_val(TCGTemp * ts,uint64_t val)95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
96 {
97     return ti_is_const_val(ts_info(ts), val);
98 }
99 
arg_is_const(TCGArg arg)100 static inline bool arg_is_const(TCGArg arg)
101 {
102     return ts_is_const(arg_temp(arg));
103 }
104 
arg_const_val(TCGArg arg)105 static inline uint64_t arg_const_val(TCGArg arg)
106 {
107     return ti_const_val(arg_info(arg));
108 }
109 
arg_is_const_val(TCGArg arg,uint64_t val)110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
111 {
112     return ts_is_const_val(arg_temp(arg), val);
113 }
114 
ts_is_copy(TCGTemp * ts)115 static inline bool ts_is_copy(TCGTemp *ts)
116 {
117     return ts_info(ts)->next_copy != ts;
118 }
119 
cmp_better_copy(TCGTemp * a,TCGTemp * b)120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
121 {
122     return a->kind < b->kind ? b : a;
123 }
124 
125 /* Initialize and activate a temporary.  */
init_ts_info(OptContext * ctx,TCGTemp * ts)126 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
127 {
128     size_t idx = temp_idx(ts);
129     TempOptInfo *ti;
130 
131     if (test_bit(idx, ctx->temps_used.l)) {
132         return;
133     }
134     set_bit(idx, ctx->temps_used.l);
135 
136     ti = ts->state_ptr;
137     if (ti == NULL) {
138         ti = tcg_malloc(sizeof(TempOptInfo));
139         ts->state_ptr = ti;
140     }
141 
142     ti->next_copy = ts;
143     ti->prev_copy = ts;
144     QSIMPLEQ_INIT(&ti->mem_copy);
145     if (ts->kind == TEMP_CONST) {
146         ti->z_mask = ts->val;
147         ti->o_mask = ts->val;
148         ti->s_mask = INT64_MIN >> clrsb64(ts->val);
149     } else {
150         ti->z_mask = -1;
151         ti->o_mask = 0;
152         ti->s_mask = 0;
153     }
154 }
155 
mem_copy_first(OptContext * ctx,intptr_t s,intptr_t l)156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
157 {
158     IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
159     return r ? container_of(r, MemCopyInfo, itree) : NULL;
160 }
161 
mem_copy_next(MemCopyInfo * mem,intptr_t s,intptr_t l)162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
163 {
164     IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
165     return r ? container_of(r, MemCopyInfo, itree) : NULL;
166 }
167 
remove_mem_copy(OptContext * ctx,MemCopyInfo * mc)168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
169 {
170     TCGTemp *ts = mc->ts;
171     TempOptInfo *ti = ts_info(ts);
172 
173     interval_tree_remove(&mc->itree, &ctx->mem_copy);
174     QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
175     QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
176 }
177 
remove_mem_copy_in(OptContext * ctx,intptr_t s,intptr_t l)178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
179 {
180     while (true) {
181         MemCopyInfo *mc = mem_copy_first(ctx, s, l);
182         if (!mc) {
183             break;
184         }
185         remove_mem_copy(ctx, mc);
186     }
187 }
188 
remove_mem_copy_all(OptContext * ctx)189 static void remove_mem_copy_all(OptContext *ctx)
190 {
191     remove_mem_copy_in(ctx, 0, -1);
192     tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
193 }
194 
find_better_copy(TCGTemp * ts)195 static TCGTemp *find_better_copy(TCGTemp *ts)
196 {
197     TCGTemp *i, *ret;
198 
199     /* If this is already readonly, we can't do better. */
200     if (temp_readonly(ts)) {
201         return ts;
202     }
203 
204     ret = ts;
205     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
206         ret = cmp_better_copy(ret, i);
207     }
208     return ret;
209 }
210 
move_mem_copies(TCGTemp * dst_ts,TCGTemp * src_ts)211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
212 {
213     TempOptInfo *si = ts_info(src_ts);
214     TempOptInfo *di = ts_info(dst_ts);
215     MemCopyInfo *mc;
216 
217     QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
218         tcg_debug_assert(mc->ts == src_ts);
219         mc->ts = dst_ts;
220     }
221     QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
222 }
223 
224 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
reset_ts(OptContext * ctx,TCGTemp * ts)225 static void reset_ts(OptContext *ctx, TCGTemp *ts)
226 {
227     TempOptInfo *ti = ts_info(ts);
228     TCGTemp *pts = ti->prev_copy;
229     TCGTemp *nts = ti->next_copy;
230     TempOptInfo *pi = ts_info(pts);
231     TempOptInfo *ni = ts_info(nts);
232 
233     ni->prev_copy = ti->prev_copy;
234     pi->next_copy = ti->next_copy;
235     ti->next_copy = ts;
236     ti->prev_copy = ts;
237     ti->z_mask = -1;
238     ti->o_mask = 0;
239     ti->s_mask = 0;
240 
241     if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
242         if (ts == nts) {
243             /* Last temp copy being removed, the mem copies die. */
244             MemCopyInfo *mc;
245             QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
246                 interval_tree_remove(&mc->itree, &ctx->mem_copy);
247             }
248             QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
249         } else {
250             move_mem_copies(find_better_copy(nts), ts);
251         }
252     }
253 }
254 
reset_temp(OptContext * ctx,TCGArg arg)255 static void reset_temp(OptContext *ctx, TCGArg arg)
256 {
257     reset_ts(ctx, arg_temp(arg));
258 }
259 
record_mem_copy(OptContext * ctx,TCGType type,TCGTemp * ts,intptr_t start,intptr_t last)260 static void record_mem_copy(OptContext *ctx, TCGType type,
261                             TCGTemp *ts, intptr_t start, intptr_t last)
262 {
263     MemCopyInfo *mc;
264     TempOptInfo *ti;
265 
266     mc = QSIMPLEQ_FIRST(&ctx->mem_free);
267     if (mc) {
268         QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
269     } else {
270         mc = tcg_malloc(sizeof(*mc));
271     }
272 
273     memset(mc, 0, sizeof(*mc));
274     mc->itree.start = start;
275     mc->itree.last = last;
276     mc->type = type;
277     interval_tree_insert(&mc->itree, &ctx->mem_copy);
278 
279     ts = find_better_copy(ts);
280     ti = ts_info(ts);
281     mc->ts = ts;
282     QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
283 }
284 
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
286 {
287     TCGTemp *i;
288 
289     if (ts1 == ts2) {
290         return true;
291     }
292 
293     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
294         return false;
295     }
296 
297     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
298         if (i == ts2) {
299             return true;
300         }
301     }
302 
303     return false;
304 }
305 
args_are_copies(TCGArg arg1,TCGArg arg2)306 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
307 {
308     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
309 }
310 
find_mem_copy_for(OptContext * ctx,TCGType type,intptr_t s)311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
312 {
313     MemCopyInfo *mc;
314 
315     for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
316         if (mc->itree.start == s && mc->type == type) {
317             return find_better_copy(mc->ts);
318         }
319     }
320     return NULL;
321 }
322 
arg_new_constant(OptContext * ctx,uint64_t val)323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
324 {
325     TCGType type = ctx->type;
326     TCGTemp *ts;
327 
328     if (type == TCG_TYPE_I32) {
329         val = (int32_t)val;
330     }
331 
332     ts = tcg_constant_internal(type, val);
333     init_ts_info(ctx, ts);
334 
335     return temp_arg(ts);
336 }
337 
arg_new_temp(OptContext * ctx)338 static TCGArg arg_new_temp(OptContext *ctx)
339 {
340     TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
341     init_ts_info(ctx, ts);
342     return temp_arg(ts);
343 }
344 
opt_insert_after(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
346                                TCGOpcode opc, unsigned narg)
347 {
348     return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
349 }
350 
opt_insert_before(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
352                                 TCGOpcode opc, unsigned narg)
353 {
354     return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
355 }
356 
tcg_opt_gen_mov(OptContext * ctx,TCGOp * op,TCGArg dst,TCGArg src)357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
358 {
359     TCGTemp *dst_ts = arg_temp(dst);
360     TCGTemp *src_ts = arg_temp(src);
361     TempOptInfo *di;
362     TempOptInfo *si;
363     TCGOpcode new_op;
364 
365     if (ts_are_copies(dst_ts, src_ts)) {
366         tcg_op_remove(ctx->tcg, op);
367         return true;
368     }
369 
370     reset_ts(ctx, dst_ts);
371     di = ts_info(dst_ts);
372     si = ts_info(src_ts);
373 
374     switch (ctx->type) {
375     case TCG_TYPE_I32:
376     case TCG_TYPE_I64:
377         new_op = INDEX_op_mov;
378         break;
379     case TCG_TYPE_V64:
380     case TCG_TYPE_V128:
381     case TCG_TYPE_V256:
382         /* TCGOP_TYPE and TCGOP_VECE remain unchanged.  */
383         new_op = INDEX_op_mov_vec;
384         break;
385     default:
386         g_assert_not_reached();
387     }
388     op->opc = new_op;
389     op->args[0] = dst;
390     op->args[1] = src;
391 
392     di->z_mask = si->z_mask;
393     di->o_mask = si->o_mask;
394     di->s_mask = si->s_mask;
395 
396     if (src_ts->type == dst_ts->type) {
397         TempOptInfo *ni = ts_info(si->next_copy);
398 
399         di->next_copy = si->next_copy;
400         di->prev_copy = src_ts;
401         ni->prev_copy = dst_ts;
402         si->next_copy = dst_ts;
403 
404         if (!QSIMPLEQ_EMPTY(&si->mem_copy)
405             && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
406             move_mem_copies(dst_ts, src_ts);
407         }
408     } else if (dst_ts->type == TCG_TYPE_I32) {
409         di->z_mask = (int32_t)di->z_mask;
410         di->o_mask = (int32_t)di->o_mask;
411         di->s_mask |= INT32_MIN;
412     } else {
413         di->z_mask |= MAKE_64BIT_MASK(32, 32);
414         di->o_mask = (uint32_t)di->o_mask;
415         di->s_mask = INT64_MIN;
416     }
417     return true;
418 }
419 
tcg_opt_gen_movi(OptContext * ctx,TCGOp * op,TCGArg dst,uint64_t val)420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
421                              TCGArg dst, uint64_t val)
422 {
423     /* Convert movi to mov with constant temp. */
424     return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
425 }
426 
do_constant_folding_2(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
428                                       uint64_t x, uint64_t y)
429 {
430     uint64_t l64, h64;
431 
432     switch (op) {
433     case INDEX_op_add:
434         return x + y;
435 
436     case INDEX_op_sub:
437         return x - y;
438 
439     case INDEX_op_mul:
440         return x * y;
441 
442     case INDEX_op_and:
443     case INDEX_op_and_vec:
444         return x & y;
445 
446     case INDEX_op_or:
447     case INDEX_op_or_vec:
448         return x | y;
449 
450     case INDEX_op_xor:
451     case INDEX_op_xor_vec:
452         return x ^ y;
453 
454     case INDEX_op_shl:
455         if (type == TCG_TYPE_I32) {
456             return (uint32_t)x << (y & 31);
457         }
458         return (uint64_t)x << (y & 63);
459 
460     case INDEX_op_shr:
461         if (type == TCG_TYPE_I32) {
462             return (uint32_t)x >> (y & 31);
463         }
464         return (uint64_t)x >> (y & 63);
465 
466     case INDEX_op_sar:
467         if (type == TCG_TYPE_I32) {
468             return (int32_t)x >> (y & 31);
469         }
470         return (int64_t)x >> (y & 63);
471 
472     case INDEX_op_rotr:
473         if (type == TCG_TYPE_I32) {
474             return ror32(x, y & 31);
475         }
476         return ror64(x, y & 63);
477 
478     case INDEX_op_rotl:
479         if (type == TCG_TYPE_I32) {
480             return rol32(x, y & 31);
481         }
482         return rol64(x, y & 63);
483 
484     case INDEX_op_not:
485     case INDEX_op_not_vec:
486         return ~x;
487 
488     case INDEX_op_neg:
489         return -x;
490 
491     case INDEX_op_andc:
492     case INDEX_op_andc_vec:
493         return x & ~y;
494 
495     case INDEX_op_orc:
496     case INDEX_op_orc_vec:
497         return x | ~y;
498 
499     case INDEX_op_eqv:
500     case INDEX_op_eqv_vec:
501         return ~(x ^ y);
502 
503     case INDEX_op_nand:
504     case INDEX_op_nand_vec:
505         return ~(x & y);
506 
507     case INDEX_op_nor:
508     case INDEX_op_nor_vec:
509         return ~(x | y);
510 
511     case INDEX_op_clz:
512         if (type == TCG_TYPE_I32) {
513             return (uint32_t)x ? clz32(x) : y;
514         }
515         return x ? clz64(x) : y;
516 
517     case INDEX_op_ctz:
518         if (type == TCG_TYPE_I32) {
519             return (uint32_t)x ? ctz32(x) : y;
520         }
521         return x ? ctz64(x) : y;
522 
523     case INDEX_op_ctpop:
524         return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
525 
526     case INDEX_op_bswap16:
527         x = bswap16(x);
528         return y & TCG_BSWAP_OS ? (int16_t)x : x;
529 
530     case INDEX_op_bswap32:
531         x = bswap32(x);
532         return y & TCG_BSWAP_OS ? (int32_t)x : x;
533 
534     case INDEX_op_bswap64:
535         return bswap64(x);
536 
537     case INDEX_op_ext_i32_i64:
538         return (int32_t)x;
539 
540     case INDEX_op_extu_i32_i64:
541     case INDEX_op_extrl_i64_i32:
542         return (uint32_t)x;
543 
544     case INDEX_op_extrh_i64_i32:
545         return (uint64_t)x >> 32;
546 
547     case INDEX_op_muluh:
548         if (type == TCG_TYPE_I32) {
549             return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550         }
551         mulu64(&l64, &h64, x, y);
552         return h64;
553 
554     case INDEX_op_mulsh:
555         if (type == TCG_TYPE_I32) {
556             return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
557         }
558         muls64(&l64, &h64, x, y);
559         return h64;
560 
561     case INDEX_op_divs:
562         /* Avoid crashing on divide by zero, otherwise undefined.  */
563         if (type == TCG_TYPE_I32) {
564             return (int32_t)x / ((int32_t)y ? : 1);
565         }
566         return (int64_t)x / ((int64_t)y ? : 1);
567 
568     case INDEX_op_divu:
569         if (type == TCG_TYPE_I32) {
570             return (uint32_t)x / ((uint32_t)y ? : 1);
571         }
572         return (uint64_t)x / ((uint64_t)y ? : 1);
573 
574     case INDEX_op_rems:
575         if (type == TCG_TYPE_I32) {
576             return (int32_t)x % ((int32_t)y ? : 1);
577         }
578         return (int64_t)x % ((int64_t)y ? : 1);
579 
580     case INDEX_op_remu:
581         if (type == TCG_TYPE_I32) {
582             return (uint32_t)x % ((uint32_t)y ? : 1);
583         }
584         return (uint64_t)x % ((uint64_t)y ? : 1);
585 
586     default:
587         g_assert_not_reached();
588     }
589 }
590 
do_constant_folding(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
592                                     uint64_t x, uint64_t y)
593 {
594     uint64_t res = do_constant_folding_2(op, type, x, y);
595     if (type == TCG_TYPE_I32) {
596         res = (int32_t)res;
597     }
598     return res;
599 }
600 
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
602 {
603     switch (c) {
604     case TCG_COND_EQ:
605         return x == y;
606     case TCG_COND_NE:
607         return x != y;
608     case TCG_COND_LT:
609         return (int32_t)x < (int32_t)y;
610     case TCG_COND_GE:
611         return (int32_t)x >= (int32_t)y;
612     case TCG_COND_LE:
613         return (int32_t)x <= (int32_t)y;
614     case TCG_COND_GT:
615         return (int32_t)x > (int32_t)y;
616     case TCG_COND_LTU:
617         return x < y;
618     case TCG_COND_GEU:
619         return x >= y;
620     case TCG_COND_LEU:
621         return x <= y;
622     case TCG_COND_GTU:
623         return x > y;
624     case TCG_COND_TSTEQ:
625         return (x & y) == 0;
626     case TCG_COND_TSTNE:
627         return (x & y) != 0;
628     case TCG_COND_ALWAYS:
629     case TCG_COND_NEVER:
630         break;
631     }
632     g_assert_not_reached();
633 }
634 
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
636 {
637     switch (c) {
638     case TCG_COND_EQ:
639         return x == y;
640     case TCG_COND_NE:
641         return x != y;
642     case TCG_COND_LT:
643         return (int64_t)x < (int64_t)y;
644     case TCG_COND_GE:
645         return (int64_t)x >= (int64_t)y;
646     case TCG_COND_LE:
647         return (int64_t)x <= (int64_t)y;
648     case TCG_COND_GT:
649         return (int64_t)x > (int64_t)y;
650     case TCG_COND_LTU:
651         return x < y;
652     case TCG_COND_GEU:
653         return x >= y;
654     case TCG_COND_LEU:
655         return x <= y;
656     case TCG_COND_GTU:
657         return x > y;
658     case TCG_COND_TSTEQ:
659         return (x & y) == 0;
660     case TCG_COND_TSTNE:
661         return (x & y) != 0;
662     case TCG_COND_ALWAYS:
663     case TCG_COND_NEVER:
664         break;
665     }
666     g_assert_not_reached();
667 }
668 
do_constant_folding_cond_eq(TCGCond c)669 static int do_constant_folding_cond_eq(TCGCond c)
670 {
671     switch (c) {
672     case TCG_COND_GT:
673     case TCG_COND_LTU:
674     case TCG_COND_LT:
675     case TCG_COND_GTU:
676     case TCG_COND_NE:
677         return 0;
678     case TCG_COND_GE:
679     case TCG_COND_GEU:
680     case TCG_COND_LE:
681     case TCG_COND_LEU:
682     case TCG_COND_EQ:
683         return 1;
684     case TCG_COND_TSTEQ:
685     case TCG_COND_TSTNE:
686         return -1;
687     case TCG_COND_ALWAYS:
688     case TCG_COND_NEVER:
689         break;
690     }
691     g_assert_not_reached();
692 }
693 
694 /*
695  * Return -1 if the condition can't be simplified,
696  * and the result of the condition (0 or 1) if it can.
697  */
do_constant_folding_cond(TCGType type,TCGArg x,TCGArg y,TCGCond c)698 static int do_constant_folding_cond(TCGType type, TCGArg x,
699                                     TCGArg y, TCGCond c)
700 {
701     if (arg_is_const(x) && arg_is_const(y)) {
702         uint64_t xv = arg_const_val(x);
703         uint64_t yv = arg_const_val(y);
704 
705         switch (type) {
706         case TCG_TYPE_I32:
707             return do_constant_folding_cond_32(xv, yv, c);
708         case TCG_TYPE_I64:
709             return do_constant_folding_cond_64(xv, yv, c);
710         default:
711             /* Only scalar comparisons are optimizable */
712             return -1;
713         }
714     } else if (args_are_copies(x, y)) {
715         return do_constant_folding_cond_eq(c);
716     } else if (arg_is_const_val(y, 0)) {
717         switch (c) {
718         case TCG_COND_LTU:
719         case TCG_COND_TSTNE:
720             return 0;
721         case TCG_COND_GEU:
722         case TCG_COND_TSTEQ:
723             return 1;
724         default:
725             return -1;
726         }
727     }
728     return -1;
729 }
730 
731 /**
732  * swap_commutative:
733  * @dest: TCGArg of the destination argument, or NO_DEST.
734  * @p1: first paired argument
735  * @p2: second paired argument
736  *
737  * If *@p1 is a constant and *@p2 is not, swap.
738  * If *@p2 matches @dest, swap.
739  * Return true if a swap was performed.
740  */
741 
742 #define NO_DEST  temp_arg(NULL)
743 
pref_commutative(TempOptInfo * ti)744 static int pref_commutative(TempOptInfo *ti)
745 {
746     /* Slight preference for non-zero constants second. */
747     return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
748 }
749 
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
751 {
752     TCGArg a1 = *p1, a2 = *p2;
753     int sum = 0;
754     sum += pref_commutative(arg_info(a1));
755     sum -= pref_commutative(arg_info(a2));
756 
757     /* Prefer the constant in second argument, and then the form
758        op a, a, b, which is better handled on non-RISC hosts. */
759     if (sum > 0 || (sum == 0 && dest == a2)) {
760         *p1 = a2;
761         *p2 = a1;
762         return true;
763     }
764     return false;
765 }
766 
swap_commutative2(TCGArg * p1,TCGArg * p2)767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
768 {
769     int sum = 0;
770     sum += pref_commutative(arg_info(p1[0]));
771     sum += pref_commutative(arg_info(p1[1]));
772     sum -= pref_commutative(arg_info(p2[0]));
773     sum -= pref_commutative(arg_info(p2[1]));
774     if (sum > 0) {
775         TCGArg t;
776         t = p1[0], p1[0] = p2[0], p2[0] = t;
777         t = p1[1], p1[1] = p2[1], p2[1] = t;
778         return true;
779     }
780     return false;
781 }
782 
783 /*
784  * Return -1 if the condition can't be simplified,
785  * and the result of the condition (0 or 1) if it can.
786  */
787 static bool fold_and(OptContext *ctx, TCGOp *op);
do_constant_folding_cond1(OptContext * ctx,TCGOp * op,TCGArg dest,TCGArg * p1,TCGArg * p2,TCGArg * pcond)788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
789                                      TCGArg *p1, TCGArg *p2, TCGArg *pcond)
790 {
791     TCGCond cond;
792     TempOptInfo *i1;
793     bool swap;
794     int r;
795 
796     swap = swap_commutative(dest, p1, p2);
797     cond = *pcond;
798     if (swap) {
799         *pcond = cond = tcg_swap_cond(cond);
800     }
801 
802     r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
803     if (r >= 0) {
804         return r;
805     }
806     if (!is_tst_cond(cond)) {
807         return -1;
808     }
809 
810     i1 = arg_info(*p1);
811 
812     /*
813      * TSTNE x,x -> NE x,0
814      * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
815      */
816     if (args_are_copies(*p1, *p2) ||
817         (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
818         *p2 = arg_new_constant(ctx, 0);
819         *pcond = tcg_tst_eqne_cond(cond);
820         return -1;
821     }
822 
823     /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
824     if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
825         *p2 = arg_new_constant(ctx, 0);
826         *pcond = tcg_tst_ltge_cond(cond);
827         return -1;
828     }
829 
830     /* Expand to AND with a temporary if no backend support. */
831     if (!TCG_TARGET_HAS_tst) {
832         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
833         TCGArg tmp = arg_new_temp(ctx);
834 
835         op2->args[0] = tmp;
836         op2->args[1] = *p1;
837         op2->args[2] = *p2;
838         fold_and(ctx, op2);
839 
840         *p1 = tmp;
841         *p2 = arg_new_constant(ctx, 0);
842         *pcond = tcg_tst_eqne_cond(cond);
843     }
844     return -1;
845 }
846 
do_constant_folding_cond2(OptContext * ctx,TCGOp * op,TCGArg * args)847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
848 {
849     TCGArg al, ah, bl, bh;
850     TCGCond c;
851     bool swap;
852     int r;
853 
854     swap = swap_commutative2(args, args + 2);
855     c = args[4];
856     if (swap) {
857         args[4] = c = tcg_swap_cond(c);
858     }
859 
860     al = args[0];
861     ah = args[1];
862     bl = args[2];
863     bh = args[3];
864 
865     if (arg_is_const(bl) && arg_is_const(bh)) {
866         tcg_target_ulong blv = arg_const_val(bl);
867         tcg_target_ulong bhv = arg_const_val(bh);
868         uint64_t b = deposit64(blv, 32, 32, bhv);
869 
870         if (arg_is_const(al) && arg_is_const(ah)) {
871             tcg_target_ulong alv = arg_const_val(al);
872             tcg_target_ulong ahv = arg_const_val(ah);
873             uint64_t a = deposit64(alv, 32, 32, ahv);
874 
875             r = do_constant_folding_cond_64(a, b, c);
876             if (r >= 0) {
877                 return r;
878             }
879         }
880 
881         if (b == 0) {
882             switch (c) {
883             case TCG_COND_LTU:
884             case TCG_COND_TSTNE:
885                 return 0;
886             case TCG_COND_GEU:
887             case TCG_COND_TSTEQ:
888                 return 1;
889             default:
890                 break;
891             }
892         }
893 
894         /* TSTNE x,-1 -> NE x,0 */
895         if (b == -1 && is_tst_cond(c)) {
896             args[3] = args[2] = arg_new_constant(ctx, 0);
897             args[4] = tcg_tst_eqne_cond(c);
898             return -1;
899         }
900 
901         /* TSTNE x,sign -> LT x,0 */
902         if (b == INT64_MIN && is_tst_cond(c)) {
903             /* bl must be 0, so copy that to bh */
904             args[3] = bl;
905             args[4] = tcg_tst_ltge_cond(c);
906             return -1;
907         }
908     }
909 
910     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
911         r = do_constant_folding_cond_eq(c);
912         if (r >= 0) {
913             return r;
914         }
915 
916         /* TSTNE x,x -> NE x,0 */
917         if (is_tst_cond(c)) {
918             args[3] = args[2] = arg_new_constant(ctx, 0);
919             args[4] = tcg_tst_eqne_cond(c);
920             return -1;
921         }
922     }
923 
924     /* Expand to AND with a temporary if no backend support. */
925     if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
926         TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
927         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
928         TCGArg t1 = arg_new_temp(ctx);
929         TCGArg t2 = arg_new_temp(ctx);
930 
931         op1->args[0] = t1;
932         op1->args[1] = al;
933         op1->args[2] = bl;
934         fold_and(ctx, op1);
935 
936         op2->args[0] = t2;
937         op2->args[1] = ah;
938         op2->args[2] = bh;
939         fold_and(ctx, op1);
940 
941         args[0] = t1;
942         args[1] = t2;
943         args[3] = args[2] = arg_new_constant(ctx, 0);
944         args[4] = tcg_tst_eqne_cond(c);
945     }
946     return -1;
947 }
948 
init_arguments(OptContext * ctx,TCGOp * op,int nb_args)949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
950 {
951     for (int i = 0; i < nb_args; i++) {
952         TCGTemp *ts = arg_temp(op->args[i]);
953         init_ts_info(ctx, ts);
954     }
955 }
956 
copy_propagate(OptContext * ctx,TCGOp * op,int nb_oargs,int nb_iargs)957 static void copy_propagate(OptContext *ctx, TCGOp *op,
958                            int nb_oargs, int nb_iargs)
959 {
960     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
961         TCGTemp *ts = arg_temp(op->args[i]);
962         if (ts_is_copy(ts)) {
963             op->args[i] = temp_arg(find_better_copy(ts));
964         }
965     }
966 }
967 
finish_bb(OptContext * ctx)968 static void finish_bb(OptContext *ctx)
969 {
970     /* We only optimize memory barriers across basic blocks. */
971     ctx->prev_mb = NULL;
972 }
973 
finish_ebb(OptContext * ctx)974 static void finish_ebb(OptContext *ctx)
975 {
976     finish_bb(ctx);
977     /* We only optimize across extended basic blocks. */
978     memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
979     remove_mem_copy_all(ctx);
980 }
981 
finish_folding(OptContext * ctx,TCGOp * op)982 static bool finish_folding(OptContext *ctx, TCGOp *op)
983 {
984     const TCGOpDef *def = &tcg_op_defs[op->opc];
985     int i, nb_oargs;
986 
987     nb_oargs = def->nb_oargs;
988     for (i = 0; i < nb_oargs; i++) {
989         TCGTemp *ts = arg_temp(op->args[i]);
990         reset_ts(ctx, ts);
991     }
992     return true;
993 }
994 
995 /*
996  * The fold_* functions return true when processing is complete,
997  * usually by folding the operation to a constant or to a copy,
998  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
999  * like collect information about the value produced, for use in
1000  * optimizing a subsequent operation.
1001  *
1002  * These first fold_* functions are all helpers, used by other
1003  * folders for more specific operations.
1004  */
1005 
fold_const1(OptContext * ctx,TCGOp * op)1006 static bool fold_const1(OptContext *ctx, TCGOp *op)
1007 {
1008     if (arg_is_const(op->args[1])) {
1009         uint64_t t = arg_const_val(op->args[1]);
1010 
1011         t = do_constant_folding(op->opc, ctx->type, t, 0);
1012         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013     }
1014     return false;
1015 }
1016 
fold_const2(OptContext * ctx,TCGOp * op)1017 static bool fold_const2(OptContext *ctx, TCGOp *op)
1018 {
1019     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1020         uint64_t t1 = arg_const_val(op->args[1]);
1021         uint64_t t2 = arg_const_val(op->args[2]);
1022 
1023         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1024         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025     }
1026     return false;
1027 }
1028 
fold_commutative(OptContext * ctx,TCGOp * op)1029 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030 {
1031     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032     return false;
1033 }
1034 
fold_const2_commutative(OptContext * ctx,TCGOp * op)1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036 {
1037     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038     return fold_const2(ctx, op);
1039 }
1040 
1041 /*
1042  * Record "zero" and "sign" masks for the single output of @op.
1043  * See TempOptInfo definition of z_mask and s_mask.
1044  * If z_mask allows, fold the output to constant zero.
1045  * The passed s_mask may be augmented by z_mask.
1046  */
fold_masks_zosa_int(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048                                 uint64_t z_mask, uint64_t o_mask,
1049                                 int64_t s_mask, uint64_t a_mask)
1050 {
1051     const TCGOpDef *def = &tcg_op_defs[op->opc];
1052     TCGTemp *ts;
1053     TempOptInfo *ti;
1054     int rep;
1055 
1056     /* Only single-output opcodes are supported here. */
1057     tcg_debug_assert(def->nb_oargs == 1);
1058 
1059     /*
1060      * 32-bit ops generate 32-bit results, which for the purpose of
1061      * simplifying tcg are sign-extended.  Certainly that's how we
1062      * represent our constants elsewhere.  Note that the bits will
1063      * be reset properly for a 64-bit value when encountering the
1064      * type changing opcodes.
1065      */
1066     if (ctx->type == TCG_TYPE_I32) {
1067         z_mask = (int32_t)z_mask;
1068         o_mask = (int32_t)o_mask;
1069         s_mask |= INT32_MIN;
1070         a_mask = (uint32_t)a_mask;
1071     }
1072 
1073     /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074     tcg_debug_assert((o_mask & ~z_mask) == 0);
1075 
1076     /* All bits that are not known zero are known one is a constant. */
1077     if (z_mask == o_mask) {
1078         return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
1079     }
1080 
1081     /* If no bits are affected, the operation devolves to a copy. */
1082     if (a_mask == 0) {
1083         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084     }
1085 
1086     ts = arg_temp(op->args[0]);
1087     reset_ts(ctx, ts);
1088 
1089     ti = ts_info(ts);
1090     ti->z_mask = z_mask;
1091     ti->o_mask = o_mask;
1092 
1093     /* Canonicalize s_mask and incorporate data from [zo]_mask. */
1094     rep = clz64(~s_mask);
1095     rep = MAX(rep, clz64(z_mask));
1096     rep = MAX(rep, clz64(~o_mask));
1097     rep = MAX(rep - 1, 0);
1098     ti->s_mask = INT64_MIN >> rep;
1099 
1100     return false;
1101 }
1102 
fold_masks_zosa(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1103 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1104                             uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1105 {
1106     fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask);
1107     return true;
1108 }
1109 
fold_masks_zos(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,uint64_t s_mask)1110 static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1111                            uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1112 {
1113     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1114 }
1115 
fold_masks_zo(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask)1116 static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1117                           uint64_t z_mask, uint64_t o_mask)
1118 {
1119     return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1120 }
1121 
fold_masks_zs(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t s_mask)1122 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1123                           uint64_t z_mask, uint64_t s_mask)
1124 {
1125     return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
1126 }
1127 
fold_masks_z(OptContext * ctx,TCGOp * op,uint64_t z_mask)1128 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1129 {
1130     return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
1131 }
1132 
fold_masks_s(OptContext * ctx,TCGOp * op,uint64_t s_mask)1133 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1134 {
1135     return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
1136 }
1137 
1138 /*
1139  * Convert @op to NOT, if NOT is supported by the host.
1140  * Return true f the conversion is successful, which will still
1141  * indicate that the processing is complete.
1142  */
1143 static bool fold_not(OptContext *ctx, TCGOp *op);
fold_to_not(OptContext * ctx,TCGOp * op,int idx)1144 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1145 {
1146     TCGOpcode not_op;
1147     bool have_not;
1148 
1149     switch (ctx->type) {
1150     case TCG_TYPE_I32:
1151     case TCG_TYPE_I64:
1152         not_op = INDEX_op_not;
1153         have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1154         break;
1155     case TCG_TYPE_V64:
1156     case TCG_TYPE_V128:
1157     case TCG_TYPE_V256:
1158         not_op = INDEX_op_not_vec;
1159         have_not = TCG_TARGET_HAS_not_vec;
1160         break;
1161     default:
1162         g_assert_not_reached();
1163     }
1164     if (have_not) {
1165         op->opc = not_op;
1166         op->args[1] = op->args[idx];
1167         return fold_not(ctx, op);
1168     }
1169     return false;
1170 }
1171 
1172 /* If the binary operation has first argument @i, fold to @i. */
fold_ix_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1173 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1174 {
1175     if (arg_is_const_val(op->args[1], i)) {
1176         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1177     }
1178     return false;
1179 }
1180 
1181 /* If the binary operation has first argument @i, fold to NOT. */
fold_ix_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1182 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1183 {
1184     if (arg_is_const_val(op->args[1], i)) {
1185         return fold_to_not(ctx, op, 2);
1186     }
1187     return false;
1188 }
1189 
1190 /* If the binary operation has second argument @i, fold to @i. */
fold_xi_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1191 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1192 {
1193     if (arg_is_const_val(op->args[2], i)) {
1194         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1195     }
1196     return false;
1197 }
1198 
1199 /* If the binary operation has second argument @i, fold to identity. */
fold_xi_to_x(OptContext * ctx,TCGOp * op,uint64_t i)1200 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1201 {
1202     if (arg_is_const_val(op->args[2], i)) {
1203         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1204     }
1205     return false;
1206 }
1207 
1208 /* If the binary operation has second argument @i, fold to NOT. */
fold_xi_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1209 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1210 {
1211     if (arg_is_const_val(op->args[2], i)) {
1212         return fold_to_not(ctx, op, 1);
1213     }
1214     return false;
1215 }
1216 
1217 /* If the binary operation has both arguments equal, fold to @i. */
fold_xx_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1218 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1219 {
1220     if (args_are_copies(op->args[1], op->args[2])) {
1221         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1222     }
1223     return false;
1224 }
1225 
1226 /* If the binary operation has both arguments equal, fold to identity. */
fold_xx_to_x(OptContext * ctx,TCGOp * op)1227 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1228 {
1229     if (args_are_copies(op->args[1], op->args[2])) {
1230         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1231     }
1232     return false;
1233 }
1234 
1235 /*
1236  * These outermost fold_<op> functions are sorted alphabetically.
1237  *
1238  * The ordering of the transformations should be:
1239  *   1) those that produce a constant
1240  *   2) those that produce a copy
1241  *   3) those that produce information about the result value.
1242  */
1243 
1244 static bool fold_addco(OptContext *ctx, TCGOp *op);
1245 static bool fold_or(OptContext *ctx, TCGOp *op);
1246 static bool fold_orc(OptContext *ctx, TCGOp *op);
1247 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1248 static bool fold_xor(OptContext *ctx, TCGOp *op);
1249 
fold_add(OptContext * ctx,TCGOp * op)1250 static bool fold_add(OptContext *ctx, TCGOp *op)
1251 {
1252     if (fold_const2_commutative(ctx, op) ||
1253         fold_xi_to_x(ctx, op, 0)) {
1254         return true;
1255     }
1256     return finish_folding(ctx, op);
1257 }
1258 
1259 /* We cannot as yet do_constant_folding with vectors. */
fold_add_vec(OptContext * ctx,TCGOp * op)1260 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1261 {
1262     if (fold_commutative(ctx, op) ||
1263         fold_xi_to_x(ctx, op, 0)) {
1264         return true;
1265     }
1266     return finish_folding(ctx, op);
1267 }
1268 
squash_prev_carryout(OptContext * ctx,TCGOp * op)1269 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1270 {
1271     TempOptInfo *t2;
1272 
1273     op = QTAILQ_PREV(op, link);
1274     switch (op->opc) {
1275     case INDEX_op_addco:
1276         op->opc = INDEX_op_add;
1277         fold_add(ctx, op);
1278         break;
1279     case INDEX_op_addcio:
1280         op->opc = INDEX_op_addci;
1281         break;
1282     case INDEX_op_addc1o:
1283         op->opc = INDEX_op_add;
1284         t2 = arg_info(op->args[2]);
1285         if (ti_is_const(t2)) {
1286             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1287             /* Perform other constant folding, if needed. */
1288             fold_add(ctx, op);
1289         } else {
1290             TCGArg ret = op->args[0];
1291             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1292             op->args[0] = ret;
1293             op->args[1] = ret;
1294             op->args[2] = arg_new_constant(ctx, 1);
1295         }
1296         break;
1297     default:
1298         g_assert_not_reached();
1299     }
1300 }
1301 
fold_addci(OptContext * ctx,TCGOp * op)1302 static bool fold_addci(OptContext *ctx, TCGOp *op)
1303 {
1304     fold_commutative(ctx, op);
1305 
1306     if (ctx->carry_state < 0) {
1307         return finish_folding(ctx, op);
1308     }
1309 
1310     squash_prev_carryout(ctx, op);
1311     op->opc = INDEX_op_add;
1312 
1313     if (ctx->carry_state > 0) {
1314         TempOptInfo *t2 = arg_info(op->args[2]);
1315 
1316         /*
1317          * Propagate the known carry-in into a constant, if possible.
1318          * Otherwise emit a second add +1.
1319          */
1320         if (ti_is_const(t2)) {
1321             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1322         } else {
1323             TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1324 
1325             op2->args[0] = op->args[0];
1326             op2->args[1] = op->args[1];
1327             op2->args[2] = op->args[2];
1328             fold_add(ctx, op2);
1329 
1330             op->args[1] = op->args[0];
1331             op->args[2] = arg_new_constant(ctx, 1);
1332         }
1333     }
1334 
1335     ctx->carry_state = -1;
1336     return fold_add(ctx, op);
1337 }
1338 
fold_addcio(OptContext * ctx,TCGOp * op)1339 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1340 {
1341     TempOptInfo *t1, *t2;
1342     int carry_out = -1;
1343     uint64_t sum, max;
1344 
1345     fold_commutative(ctx, op);
1346     t1 = arg_info(op->args[1]);
1347     t2 = arg_info(op->args[2]);
1348 
1349     /*
1350      * The z_mask value is >= the maximum value that can be represented
1351      * with the known zero bits.  So adding the z_mask values will not
1352      * overflow if and only if the true values cannot overflow.
1353      */
1354     if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1355         !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1356         carry_out = 0;
1357     }
1358 
1359     if (ctx->carry_state < 0) {
1360         ctx->carry_state = carry_out;
1361         return finish_folding(ctx, op);
1362     }
1363 
1364     squash_prev_carryout(ctx, op);
1365     if (ctx->carry_state == 0) {
1366         goto do_addco;
1367     }
1368 
1369     /* Propagate the known carry-in into a constant, if possible. */
1370     max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1371     if (ti_is_const(t2)) {
1372         uint64_t v = ti_const_val(t2) & max;
1373         if (v < max) {
1374             op->args[2] = arg_new_constant(ctx, v + 1);
1375             goto do_addco;
1376         }
1377         /* max + known carry in produces known carry out. */
1378         carry_out = 1;
1379     }
1380     if (ti_is_const(t1)) {
1381         uint64_t v = ti_const_val(t1) & max;
1382         if (v < max) {
1383             op->args[1] = arg_new_constant(ctx, v + 1);
1384             goto do_addco;
1385         }
1386         carry_out = 1;
1387     }
1388 
1389     /* Adjust the opcode to remember the known carry-in. */
1390     op->opc = INDEX_op_addc1o;
1391     ctx->carry_state = carry_out;
1392     return finish_folding(ctx, op);
1393 
1394  do_addco:
1395     op->opc = INDEX_op_addco;
1396     return fold_addco(ctx, op);
1397 }
1398 
fold_addco(OptContext * ctx,TCGOp * op)1399 static bool fold_addco(OptContext *ctx, TCGOp *op)
1400 {
1401     TempOptInfo *t1, *t2;
1402     int carry_out = -1;
1403     uint64_t ign;
1404 
1405     fold_commutative(ctx, op);
1406     t1 = arg_info(op->args[1]);
1407     t2 = arg_info(op->args[2]);
1408 
1409     if (ti_is_const(t2)) {
1410         uint64_t v2 = ti_const_val(t2);
1411 
1412         if (ti_is_const(t1)) {
1413             uint64_t v1 = ti_const_val(t1);
1414             /* Given sign-extension of z_mask for I32, we need not truncate. */
1415             carry_out = uadd64_overflow(v1, v2, &ign);
1416         } else if (v2 == 0) {
1417             carry_out = 0;
1418         }
1419     } else {
1420         /*
1421          * The z_mask value is >= the maximum value that can be represented
1422          * with the known zero bits.  So adding the z_mask values will not
1423          * overflow if and only if the true values cannot overflow.
1424          */
1425         if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1426             carry_out = 0;
1427         }
1428     }
1429     ctx->carry_state = carry_out;
1430     return finish_folding(ctx, op);
1431 }
1432 
fold_and(OptContext * ctx,TCGOp * op)1433 static bool fold_and(OptContext *ctx, TCGOp *op)
1434 {
1435     uint64_t z_mask, o_mask, s_mask, a_mask;
1436     TempOptInfo *t1, *t2;
1437 
1438     if (fold_const2_commutative(ctx, op)) {
1439         return true;
1440     }
1441 
1442     t1 = arg_info(op->args[1]);
1443     t2 = arg_info(op->args[2]);
1444 
1445     z_mask = t1->z_mask & t2->z_mask;
1446     o_mask = t1->o_mask & t2->o_mask;
1447 
1448     /*
1449      * Sign repetitions are perforce all identical, whether they are 1 or 0.
1450      * Bitwise operations preserve the relative quantity of the repetitions.
1451      */
1452     s_mask = t1->s_mask & t2->s_mask;
1453 
1454     /* Affected bits are those not known zero, masked by those known one. */
1455     a_mask = t1->z_mask & ~t2->o_mask;
1456 
1457     if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1458         if (op->opc == INDEX_op_and && ti_is_const(t2)) {
1459             /*
1460              * Canonicalize on extract, if valid.  This aids x86 with its
1461              * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1462              * which does not require matching operands.  Other backends can
1463              * trivially expand the extract to AND during code generation.
1464              */
1465             uint64_t val = ti_const_val(t2);
1466             if (!(val & (val + 1))) {
1467                 unsigned len = ctz64(~val);
1468                 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1469                     op->opc = INDEX_op_extract;
1470                     op->args[2] = 0;
1471                     op->args[3] = len;
1472                 }
1473             }
1474         } else {
1475             fold_xx_to_x(ctx, op);
1476         }
1477     }
1478     return true;
1479 }
1480 
fold_andc(OptContext * ctx,TCGOp * op)1481 static bool fold_andc(OptContext *ctx, TCGOp *op)
1482 {
1483     uint64_t z_mask, o_mask, s_mask, a_mask;
1484     TempOptInfo *t1, *t2;
1485 
1486     if (fold_const2(ctx, op)) {
1487         return true;
1488     }
1489 
1490     t1 = arg_info(op->args[1]);
1491     t2 = arg_info(op->args[2]);
1492 
1493     if (ti_is_const(t2)) {
1494         /* Fold andc r,x,i to and r,x,~i. */
1495         switch (ctx->type) {
1496         case TCG_TYPE_I32:
1497         case TCG_TYPE_I64:
1498             op->opc = INDEX_op_and;
1499             break;
1500         case TCG_TYPE_V64:
1501         case TCG_TYPE_V128:
1502         case TCG_TYPE_V256:
1503             op->opc = INDEX_op_and_vec;
1504             break;
1505         default:
1506             g_assert_not_reached();
1507         }
1508         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1509         return fold_and(ctx, op);
1510     }
1511     if (fold_xx_to_i(ctx, op, 0) ||
1512         fold_ix_to_not(ctx, op, -1)) {
1513         return true;
1514     }
1515 
1516     z_mask = t1->z_mask & ~t2->o_mask;
1517     o_mask = t1->o_mask & ~t2->z_mask;
1518     s_mask = t1->s_mask & t2->s_mask;
1519 
1520     /* Affected bits are those not known zero, masked by those known zero. */
1521     a_mask = t1->z_mask & t2->z_mask;
1522 
1523     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
1524 }
1525 
fold_bitsel_vec(OptContext * ctx,TCGOp * op)1526 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1527 {
1528     /* If true and false values are the same, eliminate the cmp. */
1529     if (args_are_copies(op->args[2], op->args[3])) {
1530         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1531     }
1532 
1533     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1534         uint64_t tv = arg_const_val(op->args[2]);
1535         uint64_t fv = arg_const_val(op->args[3]);
1536 
1537         if (tv == -1 && fv == 0) {
1538             return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1539         }
1540         if (tv == 0 && fv == -1) {
1541             if (TCG_TARGET_HAS_not_vec) {
1542                 op->opc = INDEX_op_not_vec;
1543                 return fold_not(ctx, op);
1544             } else {
1545                 op->opc = INDEX_op_xor_vec;
1546                 op->args[2] = arg_new_constant(ctx, -1);
1547                 return fold_xor(ctx, op);
1548             }
1549         }
1550     }
1551     if (arg_is_const(op->args[2])) {
1552         uint64_t tv = arg_const_val(op->args[2]);
1553         if (tv == -1) {
1554             op->opc = INDEX_op_or_vec;
1555             op->args[2] = op->args[3];
1556             return fold_or(ctx, op);
1557         }
1558         if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1559             op->opc = INDEX_op_andc_vec;
1560             op->args[2] = op->args[1];
1561             op->args[1] = op->args[3];
1562             return fold_andc(ctx, op);
1563         }
1564     }
1565     if (arg_is_const(op->args[3])) {
1566         uint64_t fv = arg_const_val(op->args[3]);
1567         if (fv == 0) {
1568             op->opc = INDEX_op_and_vec;
1569             return fold_and(ctx, op);
1570         }
1571         if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1572             TCGArg ta = op->args[2];
1573             op->opc = INDEX_op_orc_vec;
1574             op->args[2] = op->args[1];
1575             op->args[1] = ta;
1576             return fold_orc(ctx, op);
1577         }
1578     }
1579     return finish_folding(ctx, op);
1580 }
1581 
fold_brcond(OptContext * ctx,TCGOp * op)1582 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1583 {
1584     int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1585                                       &op->args[1], &op->args[2]);
1586     if (i == 0) {
1587         tcg_op_remove(ctx->tcg, op);
1588         return true;
1589     }
1590     if (i > 0) {
1591         op->opc = INDEX_op_br;
1592         op->args[0] = op->args[3];
1593         finish_ebb(ctx);
1594     } else {
1595         finish_bb(ctx);
1596     }
1597     return true;
1598 }
1599 
fold_brcond2(OptContext * ctx,TCGOp * op)1600 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1601 {
1602     TCGCond cond;
1603     TCGArg label;
1604     int i, inv = 0;
1605 
1606     i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1607     cond = op->args[4];
1608     label = op->args[5];
1609     if (i >= 0) {
1610         goto do_brcond_const;
1611     }
1612 
1613     switch (cond) {
1614     case TCG_COND_LT:
1615     case TCG_COND_GE:
1616         /*
1617          * Simplify LT/GE comparisons vs zero to a single compare
1618          * vs the high word of the input.
1619          */
1620         if (arg_is_const_val(op->args[2], 0) &&
1621             arg_is_const_val(op->args[3], 0)) {
1622             goto do_brcond_high;
1623         }
1624         break;
1625 
1626     case TCG_COND_NE:
1627         inv = 1;
1628         QEMU_FALLTHROUGH;
1629     case TCG_COND_EQ:
1630         /*
1631          * Simplify EQ/NE comparisons where one of the pairs
1632          * can be simplified.
1633          */
1634         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1635                                      op->args[2], cond);
1636         switch (i ^ inv) {
1637         case 0:
1638             goto do_brcond_const;
1639         case 1:
1640             goto do_brcond_high;
1641         }
1642 
1643         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1644                                      op->args[3], cond);
1645         switch (i ^ inv) {
1646         case 0:
1647             goto do_brcond_const;
1648         case 1:
1649             goto do_brcond_low;
1650         }
1651         break;
1652 
1653     case TCG_COND_TSTEQ:
1654     case TCG_COND_TSTNE:
1655         if (arg_is_const_val(op->args[2], 0)) {
1656             goto do_brcond_high;
1657         }
1658         if (arg_is_const_val(op->args[3], 0)) {
1659             goto do_brcond_low;
1660         }
1661         break;
1662 
1663     default:
1664         break;
1665 
1666     do_brcond_low:
1667         op->opc = INDEX_op_brcond;
1668         op->args[1] = op->args[2];
1669         op->args[2] = cond;
1670         op->args[3] = label;
1671         return fold_brcond(ctx, op);
1672 
1673     do_brcond_high:
1674         op->opc = INDEX_op_brcond;
1675         op->args[0] = op->args[1];
1676         op->args[1] = op->args[3];
1677         op->args[2] = cond;
1678         op->args[3] = label;
1679         return fold_brcond(ctx, op);
1680 
1681     do_brcond_const:
1682         if (i == 0) {
1683             tcg_op_remove(ctx->tcg, op);
1684             return true;
1685         }
1686         op->opc = INDEX_op_br;
1687         op->args[0] = label;
1688         finish_ebb(ctx);
1689         return true;
1690     }
1691 
1692     finish_bb(ctx);
1693     return true;
1694 }
1695 
fold_bswap(OptContext * ctx,TCGOp * op)1696 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1697 {
1698     uint64_t z_mask, o_mask, s_mask;
1699     TempOptInfo *t1 = arg_info(op->args[1]);
1700     int flags = op->args[2];
1701 
1702     if (ti_is_const(t1)) {
1703         return tcg_opt_gen_movi(ctx, op, op->args[0],
1704                                 do_constant_folding(op->opc, ctx->type,
1705                                                     ti_const_val(t1), flags));
1706     }
1707 
1708     z_mask = t1->z_mask;
1709     o_mask = t1->o_mask;
1710     s_mask = 0;
1711 
1712     switch (op->opc) {
1713     case INDEX_op_bswap16:
1714         z_mask = bswap16(z_mask);
1715         o_mask = bswap16(o_mask);
1716         if (flags & TCG_BSWAP_OS) {
1717             z_mask = (int16_t)z_mask;
1718             o_mask = (int16_t)o_mask;
1719             s_mask = INT16_MIN;
1720         } else if (!(flags & TCG_BSWAP_OZ)) {
1721             z_mask |= MAKE_64BIT_MASK(16, 48);
1722         }
1723         break;
1724     case INDEX_op_bswap32:
1725         z_mask = bswap32(z_mask);
1726         o_mask = bswap32(o_mask);
1727         if (flags & TCG_BSWAP_OS) {
1728             z_mask = (int32_t)z_mask;
1729             o_mask = (int32_t)o_mask;
1730             s_mask = INT32_MIN;
1731         } else if (!(flags & TCG_BSWAP_OZ)) {
1732             z_mask |= MAKE_64BIT_MASK(32, 32);
1733         }
1734         break;
1735     case INDEX_op_bswap64:
1736         z_mask = bswap64(z_mask);
1737         o_mask = bswap64(o_mask);
1738         break;
1739     default:
1740         g_assert_not_reached();
1741     }
1742 
1743     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1744 }
1745 
fold_call(OptContext * ctx,TCGOp * op)1746 static bool fold_call(OptContext *ctx, TCGOp *op)
1747 {
1748     TCGContext *s = ctx->tcg;
1749     int nb_oargs = TCGOP_CALLO(op);
1750     int nb_iargs = TCGOP_CALLI(op);
1751     int flags, i;
1752 
1753     init_arguments(ctx, op, nb_oargs + nb_iargs);
1754     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1755 
1756     /* If the function reads or writes globals, reset temp data. */
1757     flags = tcg_call_flags(op);
1758     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1759         int nb_globals = s->nb_globals;
1760 
1761         for (i = 0; i < nb_globals; i++) {
1762             if (test_bit(i, ctx->temps_used.l)) {
1763                 reset_ts(ctx, &ctx->tcg->temps[i]);
1764             }
1765         }
1766     }
1767 
1768     /* If the function has side effects, reset mem data. */
1769     if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1770         remove_mem_copy_all(ctx);
1771     }
1772 
1773     /* Reset temp data for outputs. */
1774     for (i = 0; i < nb_oargs; i++) {
1775         reset_temp(ctx, op->args[i]);
1776     }
1777 
1778     /* Stop optimizing MB across calls. */
1779     ctx->prev_mb = NULL;
1780     return true;
1781 }
1782 
fold_cmp_vec(OptContext * ctx,TCGOp * op)1783 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1784 {
1785     /* Canonicalize the comparison to put immediate second. */
1786     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1787         op->args[3] = tcg_swap_cond(op->args[3]);
1788     }
1789     return finish_folding(ctx, op);
1790 }
1791 
fold_cmpsel_vec(OptContext * ctx,TCGOp * op)1792 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1793 {
1794     /* If true and false values are the same, eliminate the cmp. */
1795     if (args_are_copies(op->args[3], op->args[4])) {
1796         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1797     }
1798 
1799     /* Canonicalize the comparison to put immediate second. */
1800     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1801         op->args[5] = tcg_swap_cond(op->args[5]);
1802     }
1803     /*
1804      * Canonicalize the "false" input reg to match the destination,
1805      * so that the tcg backend can implement "move if true".
1806      */
1807     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1808         op->args[5] = tcg_invert_cond(op->args[5]);
1809     }
1810     return finish_folding(ctx, op);
1811 }
1812 
fold_count_zeros(OptContext * ctx,TCGOp * op)1813 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1814 {
1815     uint64_t z_mask, s_mask;
1816     TempOptInfo *t1 = arg_info(op->args[1]);
1817     TempOptInfo *t2 = arg_info(op->args[2]);
1818 
1819     if (ti_is_const(t1)) {
1820         uint64_t t = ti_const_val(t1);
1821 
1822         if (t != 0) {
1823             t = do_constant_folding(op->opc, ctx->type, t, 0);
1824             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1825         }
1826         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1827     }
1828 
1829     switch (ctx->type) {
1830     case TCG_TYPE_I32:
1831         z_mask = 31;
1832         break;
1833     case TCG_TYPE_I64:
1834         z_mask = 63;
1835         break;
1836     default:
1837         g_assert_not_reached();
1838     }
1839     s_mask = ~z_mask;
1840     z_mask |= t2->z_mask;
1841     s_mask &= t2->s_mask;
1842 
1843     return fold_masks_zs(ctx, op, z_mask, s_mask);
1844 }
1845 
fold_ctpop(OptContext * ctx,TCGOp * op)1846 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1847 {
1848     uint64_t z_mask;
1849 
1850     if (fold_const1(ctx, op)) {
1851         return true;
1852     }
1853 
1854     switch (ctx->type) {
1855     case TCG_TYPE_I32:
1856         z_mask = 32 | 31;
1857         break;
1858     case TCG_TYPE_I64:
1859         z_mask = 64 | 63;
1860         break;
1861     default:
1862         g_assert_not_reached();
1863     }
1864     return fold_masks_z(ctx, op, z_mask);
1865 }
1866 
fold_deposit(OptContext * ctx,TCGOp * op)1867 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1868 {
1869     TempOptInfo *t1 = arg_info(op->args[1]);
1870     TempOptInfo *t2 = arg_info(op->args[2]);
1871     int ofs = op->args[3];
1872     int len = op->args[4];
1873     int width = 8 * tcg_type_size(ctx->type);
1874     uint64_t z_mask, o_mask, s_mask;
1875 
1876     if (ti_is_const(t1) && ti_is_const(t2)) {
1877         return tcg_opt_gen_movi(ctx, op, op->args[0],
1878                                 deposit64(ti_const_val(t1), ofs, len,
1879                                           ti_const_val(t2)));
1880     }
1881 
1882     /* Inserting a value into zero at offset 0. */
1883     if (ti_is_const_val(t1, 0) && ofs == 0) {
1884         uint64_t mask = MAKE_64BIT_MASK(0, len);
1885 
1886         op->opc = INDEX_op_and;
1887         op->args[1] = op->args[2];
1888         op->args[2] = arg_new_constant(ctx, mask);
1889         return fold_and(ctx, op);
1890     }
1891 
1892     /* Inserting zero into a value. */
1893     if (ti_is_const_val(t2, 0)) {
1894         uint64_t mask = deposit64(-1, ofs, len, 0);
1895 
1896         op->opc = INDEX_op_and;
1897         op->args[2] = arg_new_constant(ctx, mask);
1898         return fold_and(ctx, op);
1899     }
1900 
1901     /* The s_mask from the top portion of the deposit is still valid. */
1902     if (ofs + len == width) {
1903         s_mask = t2->s_mask << ofs;
1904     } else {
1905         s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1906     }
1907 
1908     z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1909     o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1910 
1911     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1912 }
1913 
fold_divide(OptContext * ctx,TCGOp * op)1914 static bool fold_divide(OptContext *ctx, TCGOp *op)
1915 {
1916     if (fold_const2(ctx, op) ||
1917         fold_xi_to_x(ctx, op, 1)) {
1918         return true;
1919     }
1920     return finish_folding(ctx, op);
1921 }
1922 
fold_dup(OptContext * ctx,TCGOp * op)1923 static bool fold_dup(OptContext *ctx, TCGOp *op)
1924 {
1925     if (arg_is_const(op->args[1])) {
1926         uint64_t t = arg_const_val(op->args[1]);
1927         t = dup_const(TCGOP_VECE(op), t);
1928         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1929     }
1930     return finish_folding(ctx, op);
1931 }
1932 
fold_dup2(OptContext * ctx,TCGOp * op)1933 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1934 {
1935     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1936         uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1937                                arg_const_val(op->args[2]));
1938         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1939     }
1940 
1941     if (args_are_copies(op->args[1], op->args[2])) {
1942         op->opc = INDEX_op_dup_vec;
1943         TCGOP_VECE(op) = MO_32;
1944     }
1945     return finish_folding(ctx, op);
1946 }
1947 
fold_eqv(OptContext * ctx,TCGOp * op)1948 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1949 {
1950     uint64_t z_mask, o_mask, s_mask;
1951     TempOptInfo *t1, *t2;
1952 
1953     if (fold_const2_commutative(ctx, op)) {
1954         return true;
1955     }
1956 
1957     t2 = arg_info(op->args[2]);
1958     if (ti_is_const(t2)) {
1959         /* Fold eqv r,x,i to xor r,x,~i. */
1960         switch (ctx->type) {
1961         case TCG_TYPE_I32:
1962         case TCG_TYPE_I64:
1963             op->opc = INDEX_op_xor;
1964             break;
1965         case TCG_TYPE_V64:
1966         case TCG_TYPE_V128:
1967         case TCG_TYPE_V256:
1968             op->opc = INDEX_op_xor_vec;
1969             break;
1970         default:
1971             g_assert_not_reached();
1972         }
1973         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1974         return fold_xor(ctx, op);
1975     }
1976 
1977     t1 = arg_info(op->args[1]);
1978 
1979     z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1980     o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
1981     s_mask = t1->s_mask & t2->s_mask;
1982 
1983     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1984 }
1985 
fold_extract(OptContext * ctx,TCGOp * op)1986 static bool fold_extract(OptContext *ctx, TCGOp *op)
1987 {
1988     uint64_t z_mask, o_mask, a_mask;
1989     TempOptInfo *t1 = arg_info(op->args[1]);
1990     int pos = op->args[2];
1991     int len = op->args[3];
1992 
1993     if (ti_is_const(t1)) {
1994         return tcg_opt_gen_movi(ctx, op, op->args[0],
1995                                 extract64(ti_const_val(t1), pos, len));
1996     }
1997 
1998     z_mask = extract64(t1->z_mask, pos, len);
1999     o_mask = extract64(t1->o_mask, pos, len);
2000     a_mask = pos ? -1 : t1->z_mask ^ z_mask;
2001 
2002     return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
2003 }
2004 
fold_extract2(OptContext * ctx,TCGOp * op)2005 static bool fold_extract2(OptContext *ctx, TCGOp *op)
2006 {
2007     TempOptInfo *t1 = arg_info(op->args[1]);
2008     TempOptInfo *t2 = arg_info(op->args[2]);
2009     uint64_t z1 = t1->z_mask;
2010     uint64_t z2 = t2->z_mask;
2011     uint64_t o1 = t1->o_mask;
2012     uint64_t o2 = t2->o_mask;
2013     int shr = op->args[3];
2014 
2015     if (ctx->type == TCG_TYPE_I32) {
2016         z1 = (uint32_t)z1 >> shr;
2017         o1 = (uint32_t)o1 >> shr;
2018         z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2019         o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2020     } else {
2021         z1 >>= shr;
2022         o1 >>= shr;
2023         z2 <<= 64 - shr;
2024         o2 <<= 64 - shr;
2025     }
2026 
2027     return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
2028 }
2029 
fold_exts(OptContext * ctx,TCGOp * op)2030 static bool fold_exts(OptContext *ctx, TCGOp *op)
2031 {
2032     uint64_t z_mask, o_mask, s_mask;
2033     TempOptInfo *t1;
2034 
2035     if (fold_const1(ctx, op)) {
2036         return true;
2037     }
2038 
2039     t1 = arg_info(op->args[1]);
2040     z_mask = t1->z_mask;
2041     o_mask = t1->o_mask;
2042     s_mask = t1->s_mask;
2043 
2044     switch (op->opc) {
2045     case INDEX_op_ext_i32_i64:
2046         s_mask |= INT32_MIN;
2047         z_mask = (int32_t)z_mask;
2048         o_mask = (int32_t)o_mask;
2049         break;
2050     default:
2051         g_assert_not_reached();
2052     }
2053     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2054 }
2055 
fold_extu(OptContext * ctx,TCGOp * op)2056 static bool fold_extu(OptContext *ctx, TCGOp *op)
2057 {
2058     uint64_t z_mask, o_mask;
2059     TempOptInfo *t1;
2060 
2061     if (fold_const1(ctx, op)) {
2062         return true;
2063     }
2064 
2065     t1 = arg_info(op->args[1]);
2066     z_mask = t1->z_mask;
2067     o_mask = t1->o_mask;
2068 
2069     switch (op->opc) {
2070     case INDEX_op_extrl_i64_i32:
2071     case INDEX_op_extu_i32_i64:
2072         z_mask = (uint32_t)z_mask;
2073         o_mask = (uint32_t)o_mask;
2074         break;
2075     case INDEX_op_extrh_i64_i32:
2076         z_mask >>= 32;
2077         o_mask >>= 32;
2078         break;
2079     default:
2080         g_assert_not_reached();
2081     }
2082     return fold_masks_zo(ctx, op, z_mask, o_mask);
2083 }
2084 
fold_mb(OptContext * ctx,TCGOp * op)2085 static bool fold_mb(OptContext *ctx, TCGOp *op)
2086 {
2087     /* Eliminate duplicate and redundant fence instructions.  */
2088     if (ctx->prev_mb) {
2089         /*
2090          * Merge two barriers of the same type into one,
2091          * or a weaker barrier into a stronger one,
2092          * or two weaker barriers into a stronger one.
2093          *   mb X; mb Y => mb X|Y
2094          *   mb; strl => mb; st
2095          *   ldaq; mb => ld; mb
2096          *   ldaq; strl => ld; mb; st
2097          * Other combinations are also merged into a strong
2098          * barrier.  This is stricter than specified but for
2099          * the purposes of TCG is better than not optimizing.
2100          */
2101         ctx->prev_mb->args[0] |= op->args[0];
2102         tcg_op_remove(ctx->tcg, op);
2103     } else {
2104         ctx->prev_mb = op;
2105     }
2106     return true;
2107 }
2108 
fold_mov(OptContext * ctx,TCGOp * op)2109 static bool fold_mov(OptContext *ctx, TCGOp *op)
2110 {
2111     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2112 }
2113 
fold_movcond(OptContext * ctx,TCGOp * op)2114 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2115 {
2116     uint64_t z_mask, o_mask, s_mask;
2117     TempOptInfo *tt, *ft;
2118     int i;
2119 
2120     /* If true and false values are the same, eliminate the cmp. */
2121     if (args_are_copies(op->args[3], op->args[4])) {
2122         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2123     }
2124 
2125     /*
2126      * Canonicalize the "false" input reg to match the destination reg so
2127      * that the tcg backend can implement a "move if true" operation.
2128      */
2129     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2130         op->args[5] = tcg_invert_cond(op->args[5]);
2131     }
2132 
2133     i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2134                                   &op->args[2], &op->args[5]);
2135     if (i >= 0) {
2136         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2137     }
2138 
2139     tt = arg_info(op->args[3]);
2140     ft = arg_info(op->args[4]);
2141     z_mask = tt->z_mask | ft->z_mask;
2142     o_mask = tt->o_mask & ft->o_mask;
2143     s_mask = tt->s_mask & ft->s_mask;
2144 
2145     if (ti_is_const(tt) && ti_is_const(ft)) {
2146         uint64_t tv = ti_const_val(tt);
2147         uint64_t fv = ti_const_val(ft);
2148         TCGCond cond = op->args[5];
2149 
2150         if (tv == 1 && fv == 0) {
2151             op->opc = INDEX_op_setcond;
2152             op->args[3] = cond;
2153         } else if (fv == 1 && tv == 0) {
2154             op->opc = INDEX_op_setcond;
2155             op->args[3] = tcg_invert_cond(cond);
2156         } else if (tv == -1 && fv == 0) {
2157             op->opc = INDEX_op_negsetcond;
2158             op->args[3] = cond;
2159         } else if (fv == -1 && tv == 0) {
2160             op->opc = INDEX_op_negsetcond;
2161             op->args[3] = tcg_invert_cond(cond);
2162         }
2163     }
2164 
2165     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2166 }
2167 
fold_mul(OptContext * ctx,TCGOp * op)2168 static bool fold_mul(OptContext *ctx, TCGOp *op)
2169 {
2170     if (fold_const2(ctx, op) ||
2171         fold_xi_to_i(ctx, op, 0) ||
2172         fold_xi_to_x(ctx, op, 1)) {
2173         return true;
2174     }
2175     return finish_folding(ctx, op);
2176 }
2177 
fold_mul_highpart(OptContext * ctx,TCGOp * op)2178 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2179 {
2180     if (fold_const2_commutative(ctx, op) ||
2181         fold_xi_to_i(ctx, op, 0)) {
2182         return true;
2183     }
2184     return finish_folding(ctx, op);
2185 }
2186 
fold_multiply2(OptContext * ctx,TCGOp * op)2187 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2188 {
2189     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2190 
2191     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2192         uint64_t a = arg_const_val(op->args[2]);
2193         uint64_t b = arg_const_val(op->args[3]);
2194         uint64_t h, l;
2195         TCGArg rl, rh;
2196         TCGOp *op2;
2197 
2198         switch (op->opc) {
2199         case INDEX_op_mulu2:
2200             if (ctx->type == TCG_TYPE_I32) {
2201                 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2202                 h = (int32_t)(l >> 32);
2203                 l = (int32_t)l;
2204             } else {
2205                 mulu64(&l, &h, a, b);
2206             }
2207             break;
2208         case INDEX_op_muls2:
2209             if (ctx->type == TCG_TYPE_I32) {
2210                 l = (int64_t)(int32_t)a * (int32_t)b;
2211                 h = l >> 32;
2212                 l = (int32_t)l;
2213             } else {
2214                 muls64(&l, &h, a, b);
2215             }
2216             break;
2217         default:
2218             g_assert_not_reached();
2219         }
2220 
2221         rl = op->args[0];
2222         rh = op->args[1];
2223 
2224         /* The proper opcode is supplied by tcg_opt_gen_mov. */
2225         op2 = opt_insert_before(ctx, op, 0, 2);
2226 
2227         tcg_opt_gen_movi(ctx, op, rl, l);
2228         tcg_opt_gen_movi(ctx, op2, rh, h);
2229         return true;
2230     }
2231     return finish_folding(ctx, op);
2232 }
2233 
fold_nand(OptContext * ctx,TCGOp * op)2234 static bool fold_nand(OptContext *ctx, TCGOp *op)
2235 {
2236     uint64_t z_mask, o_mask, s_mask;
2237     TempOptInfo *t1, *t2;
2238 
2239     if (fold_const2_commutative(ctx, op) ||
2240         fold_xi_to_not(ctx, op, -1)) {
2241         return true;
2242     }
2243 
2244     t1 = arg_info(op->args[1]);
2245     t2 = arg_info(op->args[2]);
2246 
2247     z_mask = ~(t1->o_mask & t2->o_mask);
2248     o_mask = ~(t1->z_mask & t2->z_mask);
2249     s_mask = t1->s_mask & t2->s_mask;
2250 
2251     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2252 }
2253 
fold_neg_no_const(OptContext * ctx,TCGOp * op)2254 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2255 {
2256     /* Set to 1 all bits to the left of the rightmost.  */
2257     uint64_t z_mask = arg_info(op->args[1])->z_mask;
2258     z_mask = -(z_mask & -z_mask);
2259 
2260     return fold_masks_z(ctx, op, z_mask);
2261 }
2262 
fold_neg(OptContext * ctx,TCGOp * op)2263 static bool fold_neg(OptContext *ctx, TCGOp *op)
2264 {
2265     return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2266 }
2267 
fold_nor(OptContext * ctx,TCGOp * op)2268 static bool fold_nor(OptContext *ctx, TCGOp *op)
2269 {
2270     uint64_t z_mask, o_mask, s_mask;
2271     TempOptInfo *t1, *t2;
2272 
2273     if (fold_const2_commutative(ctx, op) ||
2274         fold_xi_to_not(ctx, op, 0)) {
2275         return true;
2276     }
2277 
2278     t1 = arg_info(op->args[1]);
2279     t2 = arg_info(op->args[2]);
2280 
2281     z_mask = ~(t1->o_mask | t2->o_mask);
2282     o_mask = ~(t1->z_mask | t2->z_mask);
2283     s_mask = t1->s_mask & t2->s_mask;
2284 
2285     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2286 }
2287 
fold_not(OptContext * ctx,TCGOp * op)2288 static bool fold_not(OptContext *ctx, TCGOp *op)
2289 {
2290     TempOptInfo *t1;
2291 
2292     if (fold_const1(ctx, op)) {
2293         return true;
2294     }
2295 
2296     t1 = arg_info(op->args[1]);
2297     return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
2298 }
2299 
fold_or(OptContext * ctx,TCGOp * op)2300 static bool fold_or(OptContext *ctx, TCGOp *op)
2301 {
2302     uint64_t z_mask, o_mask, s_mask, a_mask;
2303     TempOptInfo *t1, *t2;
2304 
2305     if (fold_const2_commutative(ctx, op) ||
2306         fold_xi_to_x(ctx, op, 0) ||
2307         fold_xx_to_x(ctx, op)) {
2308         return true;
2309     }
2310 
2311     t1 = arg_info(op->args[1]);
2312     t2 = arg_info(op->args[2]);
2313 
2314     z_mask = t1->z_mask | t2->z_mask;
2315     o_mask = t1->o_mask | t2->o_mask;
2316     s_mask = t1->s_mask & t2->s_mask;
2317 
2318     /* Affected bits are those not known one, masked by those known zero. */
2319     a_mask = ~t1->o_mask & t2->z_mask;
2320 
2321     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2322 }
2323 
fold_orc(OptContext * ctx,TCGOp * op)2324 static bool fold_orc(OptContext *ctx, TCGOp *op)
2325 {
2326     uint64_t z_mask, o_mask, s_mask, a_mask;
2327     TempOptInfo *t1, *t2;
2328 
2329     if (fold_const2(ctx, op)) {
2330         return true;
2331     }
2332 
2333     t2 = arg_info(op->args[2]);
2334     if (ti_is_const(t2)) {
2335         /* Fold orc r,x,i to or r,x,~i. */
2336         switch (ctx->type) {
2337         case TCG_TYPE_I32:
2338         case TCG_TYPE_I64:
2339             op->opc = INDEX_op_or;
2340             break;
2341         case TCG_TYPE_V64:
2342         case TCG_TYPE_V128:
2343         case TCG_TYPE_V256:
2344             op->opc = INDEX_op_or_vec;
2345             break;
2346         default:
2347             g_assert_not_reached();
2348         }
2349         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2350         return fold_or(ctx, op);
2351     }
2352     if (fold_xx_to_i(ctx, op, -1) ||
2353         fold_ix_to_not(ctx, op, 0)) {
2354         return true;
2355     }
2356     t1 = arg_info(op->args[1]);
2357 
2358     z_mask = t1->z_mask | ~t2->o_mask;
2359     o_mask = t1->o_mask | ~t2->z_mask;
2360     s_mask = t1->s_mask & t2->s_mask;
2361 
2362     /* Affected bits are those not known one, masked by those known one. */
2363     a_mask = ~t1->o_mask & ~t2->o_mask;
2364 
2365     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2366 }
2367 
fold_qemu_ld_1reg(OptContext * ctx,TCGOp * op)2368 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2369 {
2370     const TCGOpDef *def = &tcg_op_defs[op->opc];
2371     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2372     MemOp mop = get_memop(oi);
2373     int width = 8 * memop_size(mop);
2374     uint64_t z_mask = -1, s_mask = 0;
2375 
2376     if (width < 64) {
2377         if (mop & MO_SIGN) {
2378             s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2379         } else {
2380             z_mask = MAKE_64BIT_MASK(0, width);
2381         }
2382     }
2383 
2384     /* Opcodes that touch guest memory stop the mb optimization.  */
2385     ctx->prev_mb = NULL;
2386 
2387     return fold_masks_zs(ctx, op, z_mask, s_mask);
2388 }
2389 
fold_qemu_ld_2reg(OptContext * ctx,TCGOp * op)2390 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2391 {
2392     /* Opcodes that touch guest memory stop the mb optimization.  */
2393     ctx->prev_mb = NULL;
2394     return finish_folding(ctx, op);
2395 }
2396 
fold_qemu_st(OptContext * ctx,TCGOp * op)2397 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2398 {
2399     /* Opcodes that touch guest memory stop the mb optimization.  */
2400     ctx->prev_mb = NULL;
2401     return true;
2402 }
2403 
fold_remainder(OptContext * ctx,TCGOp * op)2404 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2405 {
2406     if (fold_const2(ctx, op) ||
2407         fold_xx_to_i(ctx, op, 0)) {
2408         return true;
2409     }
2410     return finish_folding(ctx, op);
2411 }
2412 
2413 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
fold_setcond_zmask(OptContext * ctx,TCGOp * op,bool neg)2414 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2415 {
2416     uint64_t a_zmask, b_val;
2417     TCGCond cond;
2418 
2419     if (!arg_is_const(op->args[2])) {
2420         return false;
2421     }
2422 
2423     a_zmask = arg_info(op->args[1])->z_mask;
2424     b_val = arg_const_val(op->args[2]);
2425     cond = op->args[3];
2426 
2427     if (ctx->type == TCG_TYPE_I32) {
2428         a_zmask = (uint32_t)a_zmask;
2429         b_val = (uint32_t)b_val;
2430     }
2431 
2432     /*
2433      * A with only low bits set vs B with high bits set means that A < B.
2434      */
2435     if (a_zmask < b_val) {
2436         bool inv = false;
2437 
2438         switch (cond) {
2439         case TCG_COND_NE:
2440         case TCG_COND_LEU:
2441         case TCG_COND_LTU:
2442             inv = true;
2443             /* fall through */
2444         case TCG_COND_GTU:
2445         case TCG_COND_GEU:
2446         case TCG_COND_EQ:
2447             return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2448         default:
2449             break;
2450         }
2451     }
2452 
2453     /*
2454      * A with only lsb set is already boolean.
2455      */
2456     if (a_zmask <= 1) {
2457         bool convert = false;
2458         bool inv = false;
2459 
2460         switch (cond) {
2461         case TCG_COND_EQ:
2462             inv = true;
2463             /* fall through */
2464         case TCG_COND_NE:
2465             convert = (b_val == 0);
2466             break;
2467         case TCG_COND_LTU:
2468         case TCG_COND_TSTEQ:
2469             inv = true;
2470             /* fall through */
2471         case TCG_COND_GEU:
2472         case TCG_COND_TSTNE:
2473             convert = (b_val == 1);
2474             break;
2475         default:
2476             break;
2477         }
2478         if (convert) {
2479             if (!inv && !neg) {
2480                 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2481             }
2482 
2483             if (!inv) {
2484                 op->opc = INDEX_op_neg;
2485             } else if (neg) {
2486                 op->opc = INDEX_op_add;
2487                 op->args[2] = arg_new_constant(ctx, -1);
2488             } else {
2489                 op->opc = INDEX_op_xor;
2490                 op->args[2] = arg_new_constant(ctx, 1);
2491             }
2492             return -1;
2493         }
2494     }
2495     return 0;
2496 }
2497 
fold_setcond_tst_pow2(OptContext * ctx,TCGOp * op,bool neg)2498 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2499 {
2500     TCGCond cond = op->args[3];
2501     TCGArg ret, src1, src2;
2502     TCGOp *op2;
2503     uint64_t val;
2504     int sh;
2505     bool inv;
2506 
2507     if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2508         return;
2509     }
2510 
2511     src2 = op->args[2];
2512     val = arg_const_val(src2);
2513     if (!is_power_of_2(val)) {
2514         return;
2515     }
2516     sh = ctz64(val);
2517 
2518     ret = op->args[0];
2519     src1 = op->args[1];
2520     inv = cond == TCG_COND_TSTEQ;
2521 
2522     if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2523         op->opc = INDEX_op_sextract;
2524         op->args[1] = src1;
2525         op->args[2] = sh;
2526         op->args[3] = 1;
2527         return;
2528     } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2529         op->opc = INDEX_op_extract;
2530         op->args[1] = src1;
2531         op->args[2] = sh;
2532         op->args[3] = 1;
2533     } else {
2534         if (sh) {
2535             op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2536             op2->args[0] = ret;
2537             op2->args[1] = src1;
2538             op2->args[2] = arg_new_constant(ctx, sh);
2539             src1 = ret;
2540         }
2541         op->opc = INDEX_op_and;
2542         op->args[1] = src1;
2543         op->args[2] = arg_new_constant(ctx, 1);
2544     }
2545 
2546     if (neg && inv) {
2547         op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2548         op2->args[0] = ret;
2549         op2->args[1] = ret;
2550         op2->args[2] = arg_new_constant(ctx, -1);
2551     } else if (inv) {
2552         op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2553         op2->args[0] = ret;
2554         op2->args[1] = ret;
2555         op2->args[2] = arg_new_constant(ctx, 1);
2556     } else if (neg) {
2557         op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2558         op2->args[0] = ret;
2559         op2->args[1] = ret;
2560     }
2561 }
2562 
fold_setcond(OptContext * ctx,TCGOp * op)2563 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2564 {
2565     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2566                                       &op->args[2], &op->args[3]);
2567     if (i >= 0) {
2568         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2569     }
2570 
2571     i = fold_setcond_zmask(ctx, op, false);
2572     if (i > 0) {
2573         return true;
2574     }
2575     if (i == 0) {
2576         fold_setcond_tst_pow2(ctx, op, false);
2577     }
2578 
2579     return fold_masks_z(ctx, op, 1);
2580 }
2581 
fold_negsetcond(OptContext * ctx,TCGOp * op)2582 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2583 {
2584     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2585                                       &op->args[2], &op->args[3]);
2586     if (i >= 0) {
2587         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2588     }
2589 
2590     i = fold_setcond_zmask(ctx, op, true);
2591     if (i > 0) {
2592         return true;
2593     }
2594     if (i == 0) {
2595         fold_setcond_tst_pow2(ctx, op, true);
2596     }
2597 
2598     /* Value is {0,-1} so all bits are repetitions of the sign. */
2599     return fold_masks_s(ctx, op, -1);
2600 }
2601 
fold_setcond2(OptContext * ctx,TCGOp * op)2602 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2603 {
2604     TCGCond cond;
2605     int i, inv = 0;
2606 
2607     i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2608     cond = op->args[5];
2609     if (i >= 0) {
2610         goto do_setcond_const;
2611     }
2612 
2613     switch (cond) {
2614     case TCG_COND_LT:
2615     case TCG_COND_GE:
2616         /*
2617          * Simplify LT/GE comparisons vs zero to a single compare
2618          * vs the high word of the input.
2619          */
2620         if (arg_is_const_val(op->args[3], 0) &&
2621             arg_is_const_val(op->args[4], 0)) {
2622             goto do_setcond_high;
2623         }
2624         break;
2625 
2626     case TCG_COND_NE:
2627         inv = 1;
2628         QEMU_FALLTHROUGH;
2629     case TCG_COND_EQ:
2630         /*
2631          * Simplify EQ/NE comparisons where one of the pairs
2632          * can be simplified.
2633          */
2634         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2635                                      op->args[3], cond);
2636         switch (i ^ inv) {
2637         case 0:
2638             goto do_setcond_const;
2639         case 1:
2640             goto do_setcond_high;
2641         }
2642 
2643         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2644                                      op->args[4], cond);
2645         switch (i ^ inv) {
2646         case 0:
2647             goto do_setcond_const;
2648         case 1:
2649             goto do_setcond_low;
2650         }
2651         break;
2652 
2653     case TCG_COND_TSTEQ:
2654     case TCG_COND_TSTNE:
2655         if (arg_is_const_val(op->args[3], 0)) {
2656             goto do_setcond_high;
2657         }
2658         if (arg_is_const_val(op->args[4], 0)) {
2659             goto do_setcond_low;
2660         }
2661         break;
2662 
2663     default:
2664         break;
2665 
2666     do_setcond_low:
2667         op->args[2] = op->args[3];
2668         op->args[3] = cond;
2669         op->opc = INDEX_op_setcond;
2670         return fold_setcond(ctx, op);
2671 
2672     do_setcond_high:
2673         op->args[1] = op->args[2];
2674         op->args[2] = op->args[4];
2675         op->args[3] = cond;
2676         op->opc = INDEX_op_setcond;
2677         return fold_setcond(ctx, op);
2678     }
2679 
2680     return fold_masks_z(ctx, op, 1);
2681 
2682  do_setcond_const:
2683     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2684 }
2685 
fold_sextract(OptContext * ctx,TCGOp * op)2686 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2687 {
2688     uint64_t z_mask, o_mask, s_mask, a_mask;
2689     TempOptInfo *t1 = arg_info(op->args[1]);
2690     int pos = op->args[2];
2691     int len = op->args[3];
2692 
2693     if (ti_is_const(t1)) {
2694         return tcg_opt_gen_movi(ctx, op, op->args[0],
2695                                 sextract64(ti_const_val(t1), pos, len));
2696     }
2697 
2698     s_mask = t1->s_mask >> pos;
2699     s_mask |= -1ull << (len - 1);
2700     a_mask = pos ? -1 : s_mask & ~t1->s_mask;
2701 
2702     z_mask = sextract64(t1->z_mask, pos, len);
2703     o_mask = sextract64(t1->o_mask, pos, len);
2704 
2705     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2706 }
2707 
fold_shift(OptContext * ctx,TCGOp * op)2708 static bool fold_shift(OptContext *ctx, TCGOp *op)
2709 {
2710     uint64_t s_mask, z_mask, o_mask;
2711     TempOptInfo *t1, *t2;
2712 
2713     if (fold_const2(ctx, op) ||
2714         fold_ix_to_i(ctx, op, 0) ||
2715         fold_xi_to_x(ctx, op, 0)) {
2716         return true;
2717     }
2718 
2719     t1 = arg_info(op->args[1]);
2720     t2 = arg_info(op->args[2]);
2721     s_mask = t1->s_mask;
2722     z_mask = t1->z_mask;
2723     o_mask = t1->o_mask;
2724 
2725     if (ti_is_const(t2)) {
2726         int sh = ti_const_val(t2);
2727 
2728         z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2729         o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
2730         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2731 
2732         return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2733     }
2734 
2735     switch (op->opc) {
2736     case INDEX_op_sar:
2737         /*
2738          * Arithmetic right shift will not reduce the number of
2739          * input sign repetitions.
2740          */
2741         return fold_masks_s(ctx, op, s_mask);
2742     case INDEX_op_shr:
2743         /*
2744          * If the sign bit is known zero, then logical right shift
2745          * will not reduce the number of input sign repetitions.
2746          */
2747         if (~z_mask & -s_mask) {
2748             return fold_masks_s(ctx, op, s_mask);
2749         }
2750         break;
2751     default:
2752         break;
2753     }
2754 
2755     return finish_folding(ctx, op);
2756 }
2757 
fold_sub_to_neg(OptContext * ctx,TCGOp * op)2758 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2759 {
2760     TCGOpcode neg_op;
2761     bool have_neg;
2762 
2763     if (!arg_is_const_val(op->args[1], 0)) {
2764         return false;
2765     }
2766 
2767     switch (ctx->type) {
2768     case TCG_TYPE_I32:
2769     case TCG_TYPE_I64:
2770         neg_op = INDEX_op_neg;
2771         have_neg = true;
2772         break;
2773     case TCG_TYPE_V64:
2774     case TCG_TYPE_V128:
2775     case TCG_TYPE_V256:
2776         neg_op = INDEX_op_neg_vec;
2777         have_neg = (TCG_TARGET_HAS_neg_vec &&
2778                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2779         break;
2780     default:
2781         g_assert_not_reached();
2782     }
2783     if (have_neg) {
2784         op->opc = neg_op;
2785         op->args[1] = op->args[2];
2786         return fold_neg_no_const(ctx, op);
2787     }
2788     return false;
2789 }
2790 
2791 /* We cannot as yet do_constant_folding with vectors. */
fold_sub_vec(OptContext * ctx,TCGOp * op)2792 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2793 {
2794     if (fold_xx_to_i(ctx, op, 0) ||
2795         fold_xi_to_x(ctx, op, 0) ||
2796         fold_sub_to_neg(ctx, op)) {
2797         return true;
2798     }
2799     return finish_folding(ctx, op);
2800 }
2801 
fold_sub(OptContext * ctx,TCGOp * op)2802 static bool fold_sub(OptContext *ctx, TCGOp *op)
2803 {
2804     if (fold_const2(ctx, op) ||
2805         fold_xx_to_i(ctx, op, 0) ||
2806         fold_xi_to_x(ctx, op, 0) ||
2807         fold_sub_to_neg(ctx, op)) {
2808         return true;
2809     }
2810 
2811     /* Fold sub r,x,i to add r,x,-i */
2812     if (arg_is_const(op->args[2])) {
2813         uint64_t val = arg_const_val(op->args[2]);
2814 
2815         op->opc = INDEX_op_add;
2816         op->args[2] = arg_new_constant(ctx, -val);
2817     }
2818     return finish_folding(ctx, op);
2819 }
2820 
squash_prev_borrowout(OptContext * ctx,TCGOp * op)2821 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2822 {
2823     TempOptInfo *t2;
2824 
2825     op = QTAILQ_PREV(op, link);
2826     switch (op->opc) {
2827     case INDEX_op_subbo:
2828         op->opc = INDEX_op_sub;
2829         fold_sub(ctx, op);
2830         break;
2831     case INDEX_op_subbio:
2832         op->opc = INDEX_op_subbi;
2833         break;
2834     case INDEX_op_subb1o:
2835         t2 = arg_info(op->args[2]);
2836         if (ti_is_const(t2)) {
2837             op->opc = INDEX_op_add;
2838             op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2839             /* Perform other constant folding, if needed. */
2840             fold_add(ctx, op);
2841         } else {
2842             TCGArg ret = op->args[0];
2843             op->opc = INDEX_op_sub;
2844             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2845             op->args[0] = ret;
2846             op->args[1] = ret;
2847             op->args[2] = arg_new_constant(ctx, -1);
2848         }
2849         break;
2850     default:
2851         g_assert_not_reached();
2852     }
2853 }
2854 
fold_subbi(OptContext * ctx,TCGOp * op)2855 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2856 {
2857     TempOptInfo *t2;
2858     int borrow_in = ctx->carry_state;
2859 
2860     if (borrow_in < 0) {
2861         return finish_folding(ctx, op);
2862     }
2863     ctx->carry_state = -1;
2864 
2865     squash_prev_borrowout(ctx, op);
2866     if (borrow_in == 0) {
2867         op->opc = INDEX_op_sub;
2868         return fold_sub(ctx, op);
2869     }
2870 
2871     /*
2872      * Propagate the known carry-in into any constant, then negate to
2873      * transform from sub to add.  If there is no constant, emit a
2874      * separate add -1.
2875      */
2876     t2 = arg_info(op->args[2]);
2877     if (ti_is_const(t2)) {
2878         op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2879     } else {
2880         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2881 
2882         op2->args[0] = op->args[0];
2883         op2->args[1] = op->args[1];
2884         op2->args[2] = op->args[2];
2885         fold_sub(ctx, op2);
2886 
2887         op->args[1] = op->args[0];
2888         op->args[2] = arg_new_constant(ctx, -1);
2889     }
2890     op->opc = INDEX_op_add;
2891     return fold_add(ctx, op);
2892 }
2893 
fold_subbio(OptContext * ctx,TCGOp * op)2894 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2895 {
2896     TempOptInfo *t1, *t2;
2897     int borrow_out = -1;
2898 
2899     if (ctx->carry_state < 0) {
2900         return finish_folding(ctx, op);
2901     }
2902 
2903     squash_prev_borrowout(ctx, op);
2904     if (ctx->carry_state == 0) {
2905         goto do_subbo;
2906     }
2907 
2908     t1 = arg_info(op->args[1]);
2909     t2 = arg_info(op->args[2]);
2910 
2911     /* Propagate the known borrow-in into a constant, if possible. */
2912     if (ti_is_const(t2)) {
2913         uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2914         uint64_t v = ti_const_val(t2) & max;
2915 
2916         if (v < max) {
2917             op->args[2] = arg_new_constant(ctx, v + 1);
2918             goto do_subbo;
2919         }
2920         /* subtracting max + 1 produces known borrow out. */
2921         borrow_out = 1;
2922     }
2923     if (ti_is_const(t1)) {
2924         uint64_t v = ti_const_val(t1);
2925         if (v != 0) {
2926             op->args[2] = arg_new_constant(ctx, v - 1);
2927             goto do_subbo;
2928         }
2929     }
2930 
2931     /* Adjust the opcode to remember the known carry-in. */
2932     op->opc = INDEX_op_subb1o;
2933     ctx->carry_state = borrow_out;
2934     return finish_folding(ctx, op);
2935 
2936  do_subbo:
2937     op->opc = INDEX_op_subbo;
2938     return fold_subbo(ctx, op);
2939 }
2940 
fold_subbo(OptContext * ctx,TCGOp * op)2941 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2942 {
2943     TempOptInfo *t1 = arg_info(op->args[1]);
2944     TempOptInfo *t2 = arg_info(op->args[2]);
2945     int borrow_out = -1;
2946 
2947     if (ti_is_const(t2)) {
2948         uint64_t v2 = ti_const_val(t2);
2949         if (v2 == 0) {
2950             borrow_out = 0;
2951         } else if (ti_is_const(t1)) {
2952             uint64_t v1 = ti_const_val(t1);
2953             borrow_out = v1 < v2;
2954         }
2955     }
2956     ctx->carry_state = borrow_out;
2957     return finish_folding(ctx, op);
2958 }
2959 
fold_tcg_ld(OptContext * ctx,TCGOp * op)2960 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2961 {
2962     uint64_t z_mask = -1, s_mask = 0;
2963 
2964     /* We can't do any folding with a load, but we can record bits. */
2965     switch (op->opc) {
2966     case INDEX_op_ld8s:
2967         s_mask = INT8_MIN;
2968         break;
2969     case INDEX_op_ld8u:
2970         z_mask = MAKE_64BIT_MASK(0, 8);
2971         break;
2972     case INDEX_op_ld16s:
2973         s_mask = INT16_MIN;
2974         break;
2975     case INDEX_op_ld16u:
2976         z_mask = MAKE_64BIT_MASK(0, 16);
2977         break;
2978     case INDEX_op_ld32s:
2979         s_mask = INT32_MIN;
2980         break;
2981     case INDEX_op_ld32u:
2982         z_mask = MAKE_64BIT_MASK(0, 32);
2983         break;
2984     default:
2985         g_assert_not_reached();
2986     }
2987     return fold_masks_zs(ctx, op, z_mask, s_mask);
2988 }
2989 
fold_tcg_ld_memcopy(OptContext * ctx,TCGOp * op)2990 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2991 {
2992     TCGTemp *dst, *src;
2993     intptr_t ofs;
2994     TCGType type;
2995 
2996     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2997         return finish_folding(ctx, op);
2998     }
2999 
3000     type = ctx->type;
3001     ofs = op->args[2];
3002     dst = arg_temp(op->args[0]);
3003     src = find_mem_copy_for(ctx, type, ofs);
3004     if (src && src->base_type == type) {
3005         return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3006     }
3007 
3008     reset_ts(ctx, dst);
3009     record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3010     return true;
3011 }
3012 
fold_tcg_st(OptContext * ctx,TCGOp * op)3013 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3014 {
3015     intptr_t ofs = op->args[2];
3016     intptr_t lm1;
3017 
3018     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3019         remove_mem_copy_all(ctx);
3020         return true;
3021     }
3022 
3023     switch (op->opc) {
3024     case INDEX_op_st8:
3025         lm1 = 0;
3026         break;
3027     case INDEX_op_st16:
3028         lm1 = 1;
3029         break;
3030     case INDEX_op_st32:
3031         lm1 = 3;
3032         break;
3033     case INDEX_op_st:
3034     case INDEX_op_st_vec:
3035         lm1 = tcg_type_size(ctx->type) - 1;
3036         break;
3037     default:
3038         g_assert_not_reached();
3039     }
3040     remove_mem_copy_in(ctx, ofs, ofs + lm1);
3041     return true;
3042 }
3043 
fold_tcg_st_memcopy(OptContext * ctx,TCGOp * op)3044 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3045 {
3046     TCGTemp *src;
3047     intptr_t ofs, last;
3048     TCGType type;
3049 
3050     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3051         return fold_tcg_st(ctx, op);
3052     }
3053 
3054     src = arg_temp(op->args[0]);
3055     ofs = op->args[2];
3056     type = ctx->type;
3057 
3058     /*
3059      * Eliminate duplicate stores of a constant.
3060      * This happens frequently when the target ISA zero-extends.
3061      */
3062     if (ts_is_const(src)) {
3063         TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3064         if (src == prev) {
3065             tcg_op_remove(ctx->tcg, op);
3066             return true;
3067         }
3068     }
3069 
3070     last = ofs + tcg_type_size(type) - 1;
3071     remove_mem_copy_in(ctx, ofs, last);
3072     record_mem_copy(ctx, type, src, ofs, last);
3073     return true;
3074 }
3075 
fold_xor(OptContext * ctx,TCGOp * op)3076 static bool fold_xor(OptContext *ctx, TCGOp *op)
3077 {
3078     uint64_t z_mask, o_mask, s_mask;
3079     TempOptInfo *t1, *t2;
3080 
3081     if (fold_const2_commutative(ctx, op) ||
3082         fold_xx_to_i(ctx, op, 0) ||
3083         fold_xi_to_x(ctx, op, 0) ||
3084         fold_xi_to_not(ctx, op, -1)) {
3085         return true;
3086     }
3087 
3088     t1 = arg_info(op->args[1]);
3089     t2 = arg_info(op->args[2]);
3090 
3091     z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3092     o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
3093     s_mask = t1->s_mask & t2->s_mask;
3094 
3095     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
3096 }
3097 
3098 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)3099 void tcg_optimize(TCGContext *s)
3100 {
3101     int nb_temps, i;
3102     TCGOp *op, *op_next;
3103     OptContext ctx = { .tcg = s };
3104 
3105     QSIMPLEQ_INIT(&ctx.mem_free);
3106 
3107     /* Array VALS has an element for each temp.
3108        If this temp holds a constant then its value is kept in VALS' element.
3109        If this temp is a copy of other ones then the other copies are
3110        available through the doubly linked circular list. */
3111 
3112     nb_temps = s->nb_temps;
3113     for (i = 0; i < nb_temps; ++i) {
3114         s->temps[i].state_ptr = NULL;
3115     }
3116 
3117     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3118         TCGOpcode opc = op->opc;
3119         const TCGOpDef *def;
3120         bool done = false;
3121 
3122         /* Calls are special. */
3123         if (opc == INDEX_op_call) {
3124             fold_call(&ctx, op);
3125             continue;
3126         }
3127 
3128         def = &tcg_op_defs[opc];
3129         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3130         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3131 
3132         /* Pre-compute the type of the operation. */
3133         ctx.type = TCGOP_TYPE(op);
3134 
3135         /*
3136          * Process each opcode.
3137          * Sorted alphabetically by opcode as much as possible.
3138          */
3139         switch (opc) {
3140         case INDEX_op_add:
3141             done = fold_add(&ctx, op);
3142             break;
3143         case INDEX_op_add_vec:
3144             done = fold_add_vec(&ctx, op);
3145             break;
3146         case INDEX_op_addci:
3147             done = fold_addci(&ctx, op);
3148             break;
3149         case INDEX_op_addcio:
3150             done = fold_addcio(&ctx, op);
3151             break;
3152         case INDEX_op_addco:
3153             done = fold_addco(&ctx, op);
3154             break;
3155         case INDEX_op_and:
3156         case INDEX_op_and_vec:
3157             done = fold_and(&ctx, op);
3158             break;
3159         case INDEX_op_andc:
3160         case INDEX_op_andc_vec:
3161             done = fold_andc(&ctx, op);
3162             break;
3163         case INDEX_op_brcond:
3164             done = fold_brcond(&ctx, op);
3165             break;
3166         case INDEX_op_brcond2_i32:
3167             done = fold_brcond2(&ctx, op);
3168             break;
3169         case INDEX_op_bswap16:
3170         case INDEX_op_bswap32:
3171         case INDEX_op_bswap64:
3172             done = fold_bswap(&ctx, op);
3173             break;
3174         case INDEX_op_clz:
3175         case INDEX_op_ctz:
3176             done = fold_count_zeros(&ctx, op);
3177             break;
3178         case INDEX_op_ctpop:
3179             done = fold_ctpop(&ctx, op);
3180             break;
3181         case INDEX_op_deposit:
3182             done = fold_deposit(&ctx, op);
3183             break;
3184         case INDEX_op_divs:
3185         case INDEX_op_divu:
3186             done = fold_divide(&ctx, op);
3187             break;
3188         case INDEX_op_dup_vec:
3189             done = fold_dup(&ctx, op);
3190             break;
3191         case INDEX_op_dup2_vec:
3192             done = fold_dup2(&ctx, op);
3193             break;
3194         case INDEX_op_eqv:
3195         case INDEX_op_eqv_vec:
3196             done = fold_eqv(&ctx, op);
3197             break;
3198         case INDEX_op_extract:
3199             done = fold_extract(&ctx, op);
3200             break;
3201         case INDEX_op_extract2:
3202             done = fold_extract2(&ctx, op);
3203             break;
3204         case INDEX_op_ext_i32_i64:
3205             done = fold_exts(&ctx, op);
3206             break;
3207         case INDEX_op_extu_i32_i64:
3208         case INDEX_op_extrl_i64_i32:
3209         case INDEX_op_extrh_i64_i32:
3210             done = fold_extu(&ctx, op);
3211             break;
3212         case INDEX_op_ld8s:
3213         case INDEX_op_ld8u:
3214         case INDEX_op_ld16s:
3215         case INDEX_op_ld16u:
3216         case INDEX_op_ld32s:
3217         case INDEX_op_ld32u:
3218             done = fold_tcg_ld(&ctx, op);
3219             break;
3220         case INDEX_op_ld:
3221         case INDEX_op_ld_vec:
3222             done = fold_tcg_ld_memcopy(&ctx, op);
3223             break;
3224         case INDEX_op_st8:
3225         case INDEX_op_st16:
3226         case INDEX_op_st32:
3227             done = fold_tcg_st(&ctx, op);
3228             break;
3229         case INDEX_op_st:
3230         case INDEX_op_st_vec:
3231             done = fold_tcg_st_memcopy(&ctx, op);
3232             break;
3233         case INDEX_op_mb:
3234             done = fold_mb(&ctx, op);
3235             break;
3236         case INDEX_op_mov:
3237         case INDEX_op_mov_vec:
3238             done = fold_mov(&ctx, op);
3239             break;
3240         case INDEX_op_movcond:
3241             done = fold_movcond(&ctx, op);
3242             break;
3243         case INDEX_op_mul:
3244             done = fold_mul(&ctx, op);
3245             break;
3246         case INDEX_op_mulsh:
3247         case INDEX_op_muluh:
3248             done = fold_mul_highpart(&ctx, op);
3249             break;
3250         case INDEX_op_muls2:
3251         case INDEX_op_mulu2:
3252             done = fold_multiply2(&ctx, op);
3253             break;
3254         case INDEX_op_nand:
3255         case INDEX_op_nand_vec:
3256             done = fold_nand(&ctx, op);
3257             break;
3258         case INDEX_op_neg:
3259             done = fold_neg(&ctx, op);
3260             break;
3261         case INDEX_op_nor:
3262         case INDEX_op_nor_vec:
3263             done = fold_nor(&ctx, op);
3264             break;
3265         case INDEX_op_not:
3266         case INDEX_op_not_vec:
3267             done = fold_not(&ctx, op);
3268             break;
3269         case INDEX_op_or:
3270         case INDEX_op_or_vec:
3271             done = fold_or(&ctx, op);
3272             break;
3273         case INDEX_op_orc:
3274         case INDEX_op_orc_vec:
3275             done = fold_orc(&ctx, op);
3276             break;
3277         case INDEX_op_qemu_ld:
3278             done = fold_qemu_ld_1reg(&ctx, op);
3279             break;
3280         case INDEX_op_qemu_ld2:
3281             done = fold_qemu_ld_2reg(&ctx, op);
3282             break;
3283         case INDEX_op_qemu_st:
3284         case INDEX_op_qemu_st2:
3285             done = fold_qemu_st(&ctx, op);
3286             break;
3287         case INDEX_op_rems:
3288         case INDEX_op_remu:
3289             done = fold_remainder(&ctx, op);
3290             break;
3291         case INDEX_op_rotl:
3292         case INDEX_op_rotr:
3293         case INDEX_op_sar:
3294         case INDEX_op_shl:
3295         case INDEX_op_shr:
3296             done = fold_shift(&ctx, op);
3297             break;
3298         case INDEX_op_setcond:
3299             done = fold_setcond(&ctx, op);
3300             break;
3301         case INDEX_op_negsetcond:
3302             done = fold_negsetcond(&ctx, op);
3303             break;
3304         case INDEX_op_setcond2_i32:
3305             done = fold_setcond2(&ctx, op);
3306             break;
3307         case INDEX_op_cmp_vec:
3308             done = fold_cmp_vec(&ctx, op);
3309             break;
3310         case INDEX_op_cmpsel_vec:
3311             done = fold_cmpsel_vec(&ctx, op);
3312             break;
3313         case INDEX_op_bitsel_vec:
3314             done = fold_bitsel_vec(&ctx, op);
3315             break;
3316         case INDEX_op_sextract:
3317             done = fold_sextract(&ctx, op);
3318             break;
3319         case INDEX_op_sub:
3320             done = fold_sub(&ctx, op);
3321             break;
3322         case INDEX_op_subbi:
3323             done = fold_subbi(&ctx, op);
3324             break;
3325         case INDEX_op_subbio:
3326             done = fold_subbio(&ctx, op);
3327             break;
3328         case INDEX_op_subbo:
3329             done = fold_subbo(&ctx, op);
3330             break;
3331         case INDEX_op_sub_vec:
3332             done = fold_sub_vec(&ctx, op);
3333             break;
3334         case INDEX_op_xor:
3335         case INDEX_op_xor_vec:
3336             done = fold_xor(&ctx, op);
3337             break;
3338         case INDEX_op_set_label:
3339         case INDEX_op_br:
3340         case INDEX_op_exit_tb:
3341         case INDEX_op_goto_tb:
3342         case INDEX_op_goto_ptr:
3343             finish_ebb(&ctx);
3344             done = true;
3345             break;
3346         default:
3347             done = finish_folding(&ctx, op);
3348             break;
3349         }
3350         tcg_debug_assert(done);
3351     }
3352 }
3353