1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32
33
34 typedef struct MemCopyInfo {
35 IntervalTreeNode itree;
36 QSIMPLEQ_ENTRY (MemCopyInfo) next;
37 TCGTemp *ts;
38 TCGType type;
39 } MemCopyInfo;
40
41 typedef struct TempOptInfo {
42 TCGTemp *prev_copy;
43 TCGTemp *next_copy;
44 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
45 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
46 uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */
47 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
48 } TempOptInfo;
49
50 typedef struct OptContext {
51 TCGContext *tcg;
52 TCGOp *prev_mb;
53 TCGTempSet temps_used;
54
55 IntervalTreeRoot mem_copy;
56 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
57
58 /* In flight values from optimization. */
59 TCGType type;
60 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */
61 } OptContext;
62
ts_info(TCGTemp * ts)63 static inline TempOptInfo *ts_info(TCGTemp *ts)
64 {
65 return ts->state_ptr;
66 }
67
arg_info(TCGArg arg)68 static inline TempOptInfo *arg_info(TCGArg arg)
69 {
70 return ts_info(arg_temp(arg));
71 }
72
ti_is_const(TempOptInfo * ti)73 static inline bool ti_is_const(TempOptInfo *ti)
74 {
75 /* If all bits that are not known zeros are known ones, it's constant. */
76 return ti->z_mask == ti->o_mask;
77 }
78
ti_const_val(TempOptInfo * ti)79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81 /* If constant, both z_mask and o_mask contain the value. */
82 return ti->z_mask;
83 }
84
ti_is_const_val(TempOptInfo * ti,uint64_t val)85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
86 {
87 return ti_is_const(ti) && ti_const_val(ti) == val;
88 }
89
ts_is_const(TCGTemp * ts)90 static inline bool ts_is_const(TCGTemp *ts)
91 {
92 return ti_is_const(ts_info(ts));
93 }
94
ts_is_const_val(TCGTemp * ts,uint64_t val)95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
96 {
97 return ti_is_const_val(ts_info(ts), val);
98 }
99
arg_is_const(TCGArg arg)100 static inline bool arg_is_const(TCGArg arg)
101 {
102 return ts_is_const(arg_temp(arg));
103 }
104
arg_const_val(TCGArg arg)105 static inline uint64_t arg_const_val(TCGArg arg)
106 {
107 return ti_const_val(arg_info(arg));
108 }
109
arg_is_const_val(TCGArg arg,uint64_t val)110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
111 {
112 return ts_is_const_val(arg_temp(arg), val);
113 }
114
ts_is_copy(TCGTemp * ts)115 static inline bool ts_is_copy(TCGTemp *ts)
116 {
117 return ts_info(ts)->next_copy != ts;
118 }
119
cmp_better_copy(TCGTemp * a,TCGTemp * b)120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
121 {
122 return a->kind < b->kind ? b : a;
123 }
124
125 /* Initialize and activate a temporary. */
init_ts_info(OptContext * ctx,TCGTemp * ts)126 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
127 {
128 size_t idx = temp_idx(ts);
129 TempOptInfo *ti;
130
131 if (test_bit(idx, ctx->temps_used.l)) {
132 return;
133 }
134 set_bit(idx, ctx->temps_used.l);
135
136 ti = ts->state_ptr;
137 if (ti == NULL) {
138 ti = tcg_malloc(sizeof(TempOptInfo));
139 ts->state_ptr = ti;
140 }
141
142 ti->next_copy = ts;
143 ti->prev_copy = ts;
144 QSIMPLEQ_INIT(&ti->mem_copy);
145 if (ts->kind == TEMP_CONST) {
146 ti->z_mask = ts->val;
147 ti->o_mask = ts->val;
148 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
149 } else {
150 ti->z_mask = -1;
151 ti->o_mask = 0;
152 ti->s_mask = 0;
153 }
154 }
155
mem_copy_first(OptContext * ctx,intptr_t s,intptr_t l)156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
157 {
158 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
159 return r ? container_of(r, MemCopyInfo, itree) : NULL;
160 }
161
mem_copy_next(MemCopyInfo * mem,intptr_t s,intptr_t l)162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
163 {
164 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
165 return r ? container_of(r, MemCopyInfo, itree) : NULL;
166 }
167
remove_mem_copy(OptContext * ctx,MemCopyInfo * mc)168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
169 {
170 TCGTemp *ts = mc->ts;
171 TempOptInfo *ti = ts_info(ts);
172
173 interval_tree_remove(&mc->itree, &ctx->mem_copy);
174 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
175 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
176 }
177
remove_mem_copy_in(OptContext * ctx,intptr_t s,intptr_t l)178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
179 {
180 while (true) {
181 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
182 if (!mc) {
183 break;
184 }
185 remove_mem_copy(ctx, mc);
186 }
187 }
188
remove_mem_copy_all(OptContext * ctx)189 static void remove_mem_copy_all(OptContext *ctx)
190 {
191 remove_mem_copy_in(ctx, 0, -1);
192 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
193 }
194
find_better_copy(TCGTemp * ts)195 static TCGTemp *find_better_copy(TCGTemp *ts)
196 {
197 TCGTemp *i, *ret;
198
199 /* If this is already readonly, we can't do better. */
200 if (temp_readonly(ts)) {
201 return ts;
202 }
203
204 ret = ts;
205 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
206 ret = cmp_better_copy(ret, i);
207 }
208 return ret;
209 }
210
move_mem_copies(TCGTemp * dst_ts,TCGTemp * src_ts)211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
212 {
213 TempOptInfo *si = ts_info(src_ts);
214 TempOptInfo *di = ts_info(dst_ts);
215 MemCopyInfo *mc;
216
217 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
218 tcg_debug_assert(mc->ts == src_ts);
219 mc->ts = dst_ts;
220 }
221 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
222 }
223
224 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
reset_ts(OptContext * ctx,TCGTemp * ts)225 static void reset_ts(OptContext *ctx, TCGTemp *ts)
226 {
227 TempOptInfo *ti = ts_info(ts);
228 TCGTemp *pts = ti->prev_copy;
229 TCGTemp *nts = ti->next_copy;
230 TempOptInfo *pi = ts_info(pts);
231 TempOptInfo *ni = ts_info(nts);
232
233 ni->prev_copy = ti->prev_copy;
234 pi->next_copy = ti->next_copy;
235 ti->next_copy = ts;
236 ti->prev_copy = ts;
237 ti->z_mask = -1;
238 ti->o_mask = 0;
239 ti->s_mask = 0;
240
241 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
242 if (ts == nts) {
243 /* Last temp copy being removed, the mem copies die. */
244 MemCopyInfo *mc;
245 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
246 interval_tree_remove(&mc->itree, &ctx->mem_copy);
247 }
248 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
249 } else {
250 move_mem_copies(find_better_copy(nts), ts);
251 }
252 }
253 }
254
reset_temp(OptContext * ctx,TCGArg arg)255 static void reset_temp(OptContext *ctx, TCGArg arg)
256 {
257 reset_ts(ctx, arg_temp(arg));
258 }
259
record_mem_copy(OptContext * ctx,TCGType type,TCGTemp * ts,intptr_t start,intptr_t last)260 static void record_mem_copy(OptContext *ctx, TCGType type,
261 TCGTemp *ts, intptr_t start, intptr_t last)
262 {
263 MemCopyInfo *mc;
264 TempOptInfo *ti;
265
266 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
267 if (mc) {
268 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
269 } else {
270 mc = tcg_malloc(sizeof(*mc));
271 }
272
273 memset(mc, 0, sizeof(*mc));
274 mc->itree.start = start;
275 mc->itree.last = last;
276 mc->type = type;
277 interval_tree_insert(&mc->itree, &ctx->mem_copy);
278
279 ts = find_better_copy(ts);
280 ti = ts_info(ts);
281 mc->ts = ts;
282 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
283 }
284
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
286 {
287 TCGTemp *i;
288
289 if (ts1 == ts2) {
290 return true;
291 }
292
293 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
294 return false;
295 }
296
297 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
298 if (i == ts2) {
299 return true;
300 }
301 }
302
303 return false;
304 }
305
args_are_copies(TCGArg arg1,TCGArg arg2)306 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
307 {
308 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
309 }
310
find_mem_copy_for(OptContext * ctx,TCGType type,intptr_t s)311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
312 {
313 MemCopyInfo *mc;
314
315 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
316 if (mc->itree.start == s && mc->type == type) {
317 return find_better_copy(mc->ts);
318 }
319 }
320 return NULL;
321 }
322
arg_new_constant(OptContext * ctx,uint64_t val)323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
324 {
325 TCGType type = ctx->type;
326 TCGTemp *ts;
327
328 if (type == TCG_TYPE_I32) {
329 val = (int32_t)val;
330 }
331
332 ts = tcg_constant_internal(type, val);
333 init_ts_info(ctx, ts);
334
335 return temp_arg(ts);
336 }
337
arg_new_temp(OptContext * ctx)338 static TCGArg arg_new_temp(OptContext *ctx)
339 {
340 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
341 init_ts_info(ctx, ts);
342 return temp_arg(ts);
343 }
344
opt_insert_after(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
346 TCGOpcode opc, unsigned narg)
347 {
348 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
349 }
350
opt_insert_before(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
352 TCGOpcode opc, unsigned narg)
353 {
354 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
355 }
356
tcg_opt_gen_mov(OptContext * ctx,TCGOp * op,TCGArg dst,TCGArg src)357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
358 {
359 TCGTemp *dst_ts = arg_temp(dst);
360 TCGTemp *src_ts = arg_temp(src);
361 TempOptInfo *di;
362 TempOptInfo *si;
363 TCGOpcode new_op;
364
365 if (ts_are_copies(dst_ts, src_ts)) {
366 tcg_op_remove(ctx->tcg, op);
367 return true;
368 }
369
370 reset_ts(ctx, dst_ts);
371 di = ts_info(dst_ts);
372 si = ts_info(src_ts);
373
374 switch (ctx->type) {
375 case TCG_TYPE_I32:
376 case TCG_TYPE_I64:
377 new_op = INDEX_op_mov;
378 break;
379 case TCG_TYPE_V64:
380 case TCG_TYPE_V128:
381 case TCG_TYPE_V256:
382 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
383 new_op = INDEX_op_mov_vec;
384 break;
385 default:
386 g_assert_not_reached();
387 }
388 op->opc = new_op;
389 op->args[0] = dst;
390 op->args[1] = src;
391
392 di->z_mask = si->z_mask;
393 di->o_mask = si->o_mask;
394 di->s_mask = si->s_mask;
395
396 if (src_ts->type == dst_ts->type) {
397 TempOptInfo *ni = ts_info(si->next_copy);
398
399 di->next_copy = si->next_copy;
400 di->prev_copy = src_ts;
401 ni->prev_copy = dst_ts;
402 si->next_copy = dst_ts;
403
404 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
405 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
406 move_mem_copies(dst_ts, src_ts);
407 }
408 } else if (dst_ts->type == TCG_TYPE_I32) {
409 di->z_mask = (int32_t)di->z_mask;
410 di->o_mask = (int32_t)di->o_mask;
411 di->s_mask |= INT32_MIN;
412 } else {
413 di->z_mask |= MAKE_64BIT_MASK(32, 32);
414 di->o_mask = (uint32_t)di->o_mask;
415 di->s_mask = INT64_MIN;
416 }
417 return true;
418 }
419
tcg_opt_gen_movi(OptContext * ctx,TCGOp * op,TCGArg dst,uint64_t val)420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
421 TCGArg dst, uint64_t val)
422 {
423 /* Convert movi to mov with constant temp. */
424 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
425 }
426
do_constant_folding_2(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
428 uint64_t x, uint64_t y)
429 {
430 uint64_t l64, h64;
431
432 switch (op) {
433 case INDEX_op_add:
434 return x + y;
435
436 case INDEX_op_sub:
437 return x - y;
438
439 case INDEX_op_mul:
440 return x * y;
441
442 case INDEX_op_and:
443 case INDEX_op_and_vec:
444 return x & y;
445
446 case INDEX_op_or:
447 case INDEX_op_or_vec:
448 return x | y;
449
450 case INDEX_op_xor:
451 case INDEX_op_xor_vec:
452 return x ^ y;
453
454 case INDEX_op_shl:
455 if (type == TCG_TYPE_I32) {
456 return (uint32_t)x << (y & 31);
457 }
458 return (uint64_t)x << (y & 63);
459
460 case INDEX_op_shr:
461 if (type == TCG_TYPE_I32) {
462 return (uint32_t)x >> (y & 31);
463 }
464 return (uint64_t)x >> (y & 63);
465
466 case INDEX_op_sar:
467 if (type == TCG_TYPE_I32) {
468 return (int32_t)x >> (y & 31);
469 }
470 return (int64_t)x >> (y & 63);
471
472 case INDEX_op_rotr:
473 if (type == TCG_TYPE_I32) {
474 return ror32(x, y & 31);
475 }
476 return ror64(x, y & 63);
477
478 case INDEX_op_rotl:
479 if (type == TCG_TYPE_I32) {
480 return rol32(x, y & 31);
481 }
482 return rol64(x, y & 63);
483
484 case INDEX_op_not:
485 case INDEX_op_not_vec:
486 return ~x;
487
488 case INDEX_op_neg:
489 return -x;
490
491 case INDEX_op_andc:
492 case INDEX_op_andc_vec:
493 return x & ~y;
494
495 case INDEX_op_orc:
496 case INDEX_op_orc_vec:
497 return x | ~y;
498
499 case INDEX_op_eqv:
500 case INDEX_op_eqv_vec:
501 return ~(x ^ y);
502
503 case INDEX_op_nand:
504 case INDEX_op_nand_vec:
505 return ~(x & y);
506
507 case INDEX_op_nor:
508 case INDEX_op_nor_vec:
509 return ~(x | y);
510
511 case INDEX_op_clz:
512 if (type == TCG_TYPE_I32) {
513 return (uint32_t)x ? clz32(x) : y;
514 }
515 return x ? clz64(x) : y;
516
517 case INDEX_op_ctz:
518 if (type == TCG_TYPE_I32) {
519 return (uint32_t)x ? ctz32(x) : y;
520 }
521 return x ? ctz64(x) : y;
522
523 case INDEX_op_ctpop:
524 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
525
526 case INDEX_op_bswap16:
527 x = bswap16(x);
528 return y & TCG_BSWAP_OS ? (int16_t)x : x;
529
530 case INDEX_op_bswap32:
531 x = bswap32(x);
532 return y & TCG_BSWAP_OS ? (int32_t)x : x;
533
534 case INDEX_op_bswap64:
535 return bswap64(x);
536
537 case INDEX_op_ext_i32_i64:
538 return (int32_t)x;
539
540 case INDEX_op_extu_i32_i64:
541 case INDEX_op_extrl_i64_i32:
542 return (uint32_t)x;
543
544 case INDEX_op_extrh_i64_i32:
545 return (uint64_t)x >> 32;
546
547 case INDEX_op_muluh:
548 if (type == TCG_TYPE_I32) {
549 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550 }
551 mulu64(&l64, &h64, x, y);
552 return h64;
553
554 case INDEX_op_mulsh:
555 if (type == TCG_TYPE_I32) {
556 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
557 }
558 muls64(&l64, &h64, x, y);
559 return h64;
560
561 case INDEX_op_divs:
562 /* Avoid crashing on divide by zero, otherwise undefined. */
563 if (type == TCG_TYPE_I32) {
564 return (int32_t)x / ((int32_t)y ? : 1);
565 }
566 return (int64_t)x / ((int64_t)y ? : 1);
567
568 case INDEX_op_divu:
569 if (type == TCG_TYPE_I32) {
570 return (uint32_t)x / ((uint32_t)y ? : 1);
571 }
572 return (uint64_t)x / ((uint64_t)y ? : 1);
573
574 case INDEX_op_rems:
575 if (type == TCG_TYPE_I32) {
576 return (int32_t)x % ((int32_t)y ? : 1);
577 }
578 return (int64_t)x % ((int64_t)y ? : 1);
579
580 case INDEX_op_remu:
581 if (type == TCG_TYPE_I32) {
582 return (uint32_t)x % ((uint32_t)y ? : 1);
583 }
584 return (uint64_t)x % ((uint64_t)y ? : 1);
585
586 default:
587 g_assert_not_reached();
588 }
589 }
590
do_constant_folding(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
592 uint64_t x, uint64_t y)
593 {
594 uint64_t res = do_constant_folding_2(op, type, x, y);
595 if (type == TCG_TYPE_I32) {
596 res = (int32_t)res;
597 }
598 return res;
599 }
600
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
602 {
603 switch (c) {
604 case TCG_COND_EQ:
605 return x == y;
606 case TCG_COND_NE:
607 return x != y;
608 case TCG_COND_LT:
609 return (int32_t)x < (int32_t)y;
610 case TCG_COND_GE:
611 return (int32_t)x >= (int32_t)y;
612 case TCG_COND_LE:
613 return (int32_t)x <= (int32_t)y;
614 case TCG_COND_GT:
615 return (int32_t)x > (int32_t)y;
616 case TCG_COND_LTU:
617 return x < y;
618 case TCG_COND_GEU:
619 return x >= y;
620 case TCG_COND_LEU:
621 return x <= y;
622 case TCG_COND_GTU:
623 return x > y;
624 case TCG_COND_TSTEQ:
625 return (x & y) == 0;
626 case TCG_COND_TSTNE:
627 return (x & y) != 0;
628 case TCG_COND_ALWAYS:
629 case TCG_COND_NEVER:
630 break;
631 }
632 g_assert_not_reached();
633 }
634
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
636 {
637 switch (c) {
638 case TCG_COND_EQ:
639 return x == y;
640 case TCG_COND_NE:
641 return x != y;
642 case TCG_COND_LT:
643 return (int64_t)x < (int64_t)y;
644 case TCG_COND_GE:
645 return (int64_t)x >= (int64_t)y;
646 case TCG_COND_LE:
647 return (int64_t)x <= (int64_t)y;
648 case TCG_COND_GT:
649 return (int64_t)x > (int64_t)y;
650 case TCG_COND_LTU:
651 return x < y;
652 case TCG_COND_GEU:
653 return x >= y;
654 case TCG_COND_LEU:
655 return x <= y;
656 case TCG_COND_GTU:
657 return x > y;
658 case TCG_COND_TSTEQ:
659 return (x & y) == 0;
660 case TCG_COND_TSTNE:
661 return (x & y) != 0;
662 case TCG_COND_ALWAYS:
663 case TCG_COND_NEVER:
664 break;
665 }
666 g_assert_not_reached();
667 }
668
do_constant_folding_cond_eq(TCGCond c)669 static int do_constant_folding_cond_eq(TCGCond c)
670 {
671 switch (c) {
672 case TCG_COND_GT:
673 case TCG_COND_LTU:
674 case TCG_COND_LT:
675 case TCG_COND_GTU:
676 case TCG_COND_NE:
677 return 0;
678 case TCG_COND_GE:
679 case TCG_COND_GEU:
680 case TCG_COND_LE:
681 case TCG_COND_LEU:
682 case TCG_COND_EQ:
683 return 1;
684 case TCG_COND_TSTEQ:
685 case TCG_COND_TSTNE:
686 return -1;
687 case TCG_COND_ALWAYS:
688 case TCG_COND_NEVER:
689 break;
690 }
691 g_assert_not_reached();
692 }
693
694 /*
695 * Return -1 if the condition can't be simplified,
696 * and the result of the condition (0 or 1) if it can.
697 */
do_constant_folding_cond(TCGType type,TCGArg x,TCGArg y,TCGCond c)698 static int do_constant_folding_cond(TCGType type, TCGArg x,
699 TCGArg y, TCGCond c)
700 {
701 if (arg_is_const(x) && arg_is_const(y)) {
702 uint64_t xv = arg_const_val(x);
703 uint64_t yv = arg_const_val(y);
704
705 switch (type) {
706 case TCG_TYPE_I32:
707 return do_constant_folding_cond_32(xv, yv, c);
708 case TCG_TYPE_I64:
709 return do_constant_folding_cond_64(xv, yv, c);
710 default:
711 /* Only scalar comparisons are optimizable */
712 return -1;
713 }
714 } else if (args_are_copies(x, y)) {
715 return do_constant_folding_cond_eq(c);
716 } else if (arg_is_const_val(y, 0)) {
717 switch (c) {
718 case TCG_COND_LTU:
719 case TCG_COND_TSTNE:
720 return 0;
721 case TCG_COND_GEU:
722 case TCG_COND_TSTEQ:
723 return 1;
724 default:
725 return -1;
726 }
727 }
728 return -1;
729 }
730
731 /**
732 * swap_commutative:
733 * @dest: TCGArg of the destination argument, or NO_DEST.
734 * @p1: first paired argument
735 * @p2: second paired argument
736 *
737 * If *@p1 is a constant and *@p2 is not, swap.
738 * If *@p2 matches @dest, swap.
739 * Return true if a swap was performed.
740 */
741
742 #define NO_DEST temp_arg(NULL)
743
pref_commutative(TempOptInfo * ti)744 static int pref_commutative(TempOptInfo *ti)
745 {
746 /* Slight preference for non-zero constants second. */
747 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
748 }
749
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
751 {
752 TCGArg a1 = *p1, a2 = *p2;
753 int sum = 0;
754 sum += pref_commutative(arg_info(a1));
755 sum -= pref_commutative(arg_info(a2));
756
757 /* Prefer the constant in second argument, and then the form
758 op a, a, b, which is better handled on non-RISC hosts. */
759 if (sum > 0 || (sum == 0 && dest == a2)) {
760 *p1 = a2;
761 *p2 = a1;
762 return true;
763 }
764 return false;
765 }
766
swap_commutative2(TCGArg * p1,TCGArg * p2)767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
768 {
769 int sum = 0;
770 sum += pref_commutative(arg_info(p1[0]));
771 sum += pref_commutative(arg_info(p1[1]));
772 sum -= pref_commutative(arg_info(p2[0]));
773 sum -= pref_commutative(arg_info(p2[1]));
774 if (sum > 0) {
775 TCGArg t;
776 t = p1[0], p1[0] = p2[0], p2[0] = t;
777 t = p1[1], p1[1] = p2[1], p2[1] = t;
778 return true;
779 }
780 return false;
781 }
782
783 /*
784 * Return -1 if the condition can't be simplified,
785 * and the result of the condition (0 or 1) if it can.
786 */
787 static bool fold_and(OptContext *ctx, TCGOp *op);
do_constant_folding_cond1(OptContext * ctx,TCGOp * op,TCGArg dest,TCGArg * p1,TCGArg * p2,TCGArg * pcond)788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
789 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
790 {
791 TCGCond cond;
792 TempOptInfo *i1;
793 bool swap;
794 int r;
795
796 swap = swap_commutative(dest, p1, p2);
797 cond = *pcond;
798 if (swap) {
799 *pcond = cond = tcg_swap_cond(cond);
800 }
801
802 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
803 if (r >= 0) {
804 return r;
805 }
806 if (!is_tst_cond(cond)) {
807 return -1;
808 }
809
810 i1 = arg_info(*p1);
811
812 /*
813 * TSTNE x,x -> NE x,0
814 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
815 */
816 if (args_are_copies(*p1, *p2) ||
817 (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
818 *p2 = arg_new_constant(ctx, 0);
819 *pcond = tcg_tst_eqne_cond(cond);
820 return -1;
821 }
822
823 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
824 if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
825 *p2 = arg_new_constant(ctx, 0);
826 *pcond = tcg_tst_ltge_cond(cond);
827 return -1;
828 }
829
830 /* Expand to AND with a temporary if no backend support. */
831 if (!TCG_TARGET_HAS_tst) {
832 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
833 TCGArg tmp = arg_new_temp(ctx);
834
835 op2->args[0] = tmp;
836 op2->args[1] = *p1;
837 op2->args[2] = *p2;
838 fold_and(ctx, op2);
839
840 *p1 = tmp;
841 *p2 = arg_new_constant(ctx, 0);
842 *pcond = tcg_tst_eqne_cond(cond);
843 }
844 return -1;
845 }
846
do_constant_folding_cond2(OptContext * ctx,TCGOp * op,TCGArg * args)847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
848 {
849 TCGArg al, ah, bl, bh;
850 TCGCond c;
851 bool swap;
852 int r;
853
854 swap = swap_commutative2(args, args + 2);
855 c = args[4];
856 if (swap) {
857 args[4] = c = tcg_swap_cond(c);
858 }
859
860 al = args[0];
861 ah = args[1];
862 bl = args[2];
863 bh = args[3];
864
865 if (arg_is_const(bl) && arg_is_const(bh)) {
866 tcg_target_ulong blv = arg_const_val(bl);
867 tcg_target_ulong bhv = arg_const_val(bh);
868 uint64_t b = deposit64(blv, 32, 32, bhv);
869
870 if (arg_is_const(al) && arg_is_const(ah)) {
871 tcg_target_ulong alv = arg_const_val(al);
872 tcg_target_ulong ahv = arg_const_val(ah);
873 uint64_t a = deposit64(alv, 32, 32, ahv);
874
875 r = do_constant_folding_cond_64(a, b, c);
876 if (r >= 0) {
877 return r;
878 }
879 }
880
881 if (b == 0) {
882 switch (c) {
883 case TCG_COND_LTU:
884 case TCG_COND_TSTNE:
885 return 0;
886 case TCG_COND_GEU:
887 case TCG_COND_TSTEQ:
888 return 1;
889 default:
890 break;
891 }
892 }
893
894 /* TSTNE x,-1 -> NE x,0 */
895 if (b == -1 && is_tst_cond(c)) {
896 args[3] = args[2] = arg_new_constant(ctx, 0);
897 args[4] = tcg_tst_eqne_cond(c);
898 return -1;
899 }
900
901 /* TSTNE x,sign -> LT x,0 */
902 if (b == INT64_MIN && is_tst_cond(c)) {
903 /* bl must be 0, so copy that to bh */
904 args[3] = bl;
905 args[4] = tcg_tst_ltge_cond(c);
906 return -1;
907 }
908 }
909
910 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
911 r = do_constant_folding_cond_eq(c);
912 if (r >= 0) {
913 return r;
914 }
915
916 /* TSTNE x,x -> NE x,0 */
917 if (is_tst_cond(c)) {
918 args[3] = args[2] = arg_new_constant(ctx, 0);
919 args[4] = tcg_tst_eqne_cond(c);
920 return -1;
921 }
922 }
923
924 /* Expand to AND with a temporary if no backend support. */
925 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
926 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
927 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
928 TCGArg t1 = arg_new_temp(ctx);
929 TCGArg t2 = arg_new_temp(ctx);
930
931 op1->args[0] = t1;
932 op1->args[1] = al;
933 op1->args[2] = bl;
934 fold_and(ctx, op1);
935
936 op2->args[0] = t2;
937 op2->args[1] = ah;
938 op2->args[2] = bh;
939 fold_and(ctx, op1);
940
941 args[0] = t1;
942 args[1] = t2;
943 args[3] = args[2] = arg_new_constant(ctx, 0);
944 args[4] = tcg_tst_eqne_cond(c);
945 }
946 return -1;
947 }
948
init_arguments(OptContext * ctx,TCGOp * op,int nb_args)949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
950 {
951 for (int i = 0; i < nb_args; i++) {
952 TCGTemp *ts = arg_temp(op->args[i]);
953 init_ts_info(ctx, ts);
954 }
955 }
956
copy_propagate(OptContext * ctx,TCGOp * op,int nb_oargs,int nb_iargs)957 static void copy_propagate(OptContext *ctx, TCGOp *op,
958 int nb_oargs, int nb_iargs)
959 {
960 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
961 TCGTemp *ts = arg_temp(op->args[i]);
962 if (ts_is_copy(ts)) {
963 op->args[i] = temp_arg(find_better_copy(ts));
964 }
965 }
966 }
967
finish_bb(OptContext * ctx)968 static void finish_bb(OptContext *ctx)
969 {
970 /* We only optimize memory barriers across basic blocks. */
971 ctx->prev_mb = NULL;
972 }
973
finish_ebb(OptContext * ctx)974 static void finish_ebb(OptContext *ctx)
975 {
976 finish_bb(ctx);
977 /* We only optimize across extended basic blocks. */
978 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
979 remove_mem_copy_all(ctx);
980 }
981
finish_folding(OptContext * ctx,TCGOp * op)982 static bool finish_folding(OptContext *ctx, TCGOp *op)
983 {
984 const TCGOpDef *def = &tcg_op_defs[op->opc];
985 int i, nb_oargs;
986
987 nb_oargs = def->nb_oargs;
988 for (i = 0; i < nb_oargs; i++) {
989 TCGTemp *ts = arg_temp(op->args[i]);
990 reset_ts(ctx, ts);
991 }
992 return true;
993 }
994
995 /*
996 * The fold_* functions return true when processing is complete,
997 * usually by folding the operation to a constant or to a copy,
998 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
999 * like collect information about the value produced, for use in
1000 * optimizing a subsequent operation.
1001 *
1002 * These first fold_* functions are all helpers, used by other
1003 * folders for more specific operations.
1004 */
1005
fold_const1(OptContext * ctx,TCGOp * op)1006 static bool fold_const1(OptContext *ctx, TCGOp *op)
1007 {
1008 if (arg_is_const(op->args[1])) {
1009 uint64_t t = arg_const_val(op->args[1]);
1010
1011 t = do_constant_folding(op->opc, ctx->type, t, 0);
1012 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013 }
1014 return false;
1015 }
1016
fold_const2(OptContext * ctx,TCGOp * op)1017 static bool fold_const2(OptContext *ctx, TCGOp *op)
1018 {
1019 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1020 uint64_t t1 = arg_const_val(op->args[1]);
1021 uint64_t t2 = arg_const_val(op->args[2]);
1022
1023 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1024 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025 }
1026 return false;
1027 }
1028
fold_commutative(OptContext * ctx,TCGOp * op)1029 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030 {
1031 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032 return false;
1033 }
1034
fold_const2_commutative(OptContext * ctx,TCGOp * op)1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036 {
1037 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038 return fold_const2(ctx, op);
1039 }
1040
1041 /*
1042 * Record "zero" and "sign" masks for the single output of @op.
1043 * See TempOptInfo definition of z_mask and s_mask.
1044 * If z_mask allows, fold the output to constant zero.
1045 * The passed s_mask may be augmented by z_mask.
1046 */
fold_masks_zosa_int(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048 uint64_t z_mask, uint64_t o_mask,
1049 int64_t s_mask, uint64_t a_mask)
1050 {
1051 const TCGOpDef *def = &tcg_op_defs[op->opc];
1052 TCGTemp *ts;
1053 TempOptInfo *ti;
1054 int rep;
1055
1056 /* Only single-output opcodes are supported here. */
1057 tcg_debug_assert(def->nb_oargs == 1);
1058
1059 /*
1060 * 32-bit ops generate 32-bit results, which for the purpose of
1061 * simplifying tcg are sign-extended. Certainly that's how we
1062 * represent our constants elsewhere. Note that the bits will
1063 * be reset properly for a 64-bit value when encountering the
1064 * type changing opcodes.
1065 */
1066 if (ctx->type == TCG_TYPE_I32) {
1067 z_mask = (int32_t)z_mask;
1068 o_mask = (int32_t)o_mask;
1069 s_mask |= INT32_MIN;
1070 a_mask = (uint32_t)a_mask;
1071 }
1072
1073 /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074 tcg_debug_assert((o_mask & ~z_mask) == 0);
1075
1076 /* All bits that are not known zero are known one is a constant. */
1077 if (z_mask == o_mask) {
1078 return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
1079 }
1080
1081 /* If no bits are affected, the operation devolves to a copy. */
1082 if (a_mask == 0) {
1083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084 }
1085
1086 ts = arg_temp(op->args[0]);
1087 reset_ts(ctx, ts);
1088
1089 ti = ts_info(ts);
1090 ti->z_mask = z_mask;
1091 ti->o_mask = o_mask;
1092
1093 /* Canonicalize s_mask and incorporate data from [zo]_mask. */
1094 rep = clz64(~s_mask);
1095 rep = MAX(rep, clz64(z_mask));
1096 rep = MAX(rep, clz64(~o_mask));
1097 rep = MAX(rep - 1, 0);
1098 ti->s_mask = INT64_MIN >> rep;
1099
1100 return false;
1101 }
1102
fold_masks_zosa(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1103 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1104 uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1105 {
1106 fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask);
1107 return true;
1108 }
1109
fold_masks_zos(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,uint64_t s_mask)1110 static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1111 uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1112 {
1113 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1114 }
1115
fold_masks_zo(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask)1116 static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1117 uint64_t z_mask, uint64_t o_mask)
1118 {
1119 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1120 }
1121
fold_masks_zs(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t s_mask)1122 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1123 uint64_t z_mask, uint64_t s_mask)
1124 {
1125 return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
1126 }
1127
fold_masks_z(OptContext * ctx,TCGOp * op,uint64_t z_mask)1128 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1129 {
1130 return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
1131 }
1132
fold_masks_s(OptContext * ctx,TCGOp * op,uint64_t s_mask)1133 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1134 {
1135 return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
1136 }
1137
1138 /*
1139 * Convert @op to NOT, if NOT is supported by the host.
1140 * Return true f the conversion is successful, which will still
1141 * indicate that the processing is complete.
1142 */
1143 static bool fold_not(OptContext *ctx, TCGOp *op);
fold_to_not(OptContext * ctx,TCGOp * op,int idx)1144 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1145 {
1146 TCGOpcode not_op;
1147 bool have_not;
1148
1149 switch (ctx->type) {
1150 case TCG_TYPE_I32:
1151 case TCG_TYPE_I64:
1152 not_op = INDEX_op_not;
1153 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1154 break;
1155 case TCG_TYPE_V64:
1156 case TCG_TYPE_V128:
1157 case TCG_TYPE_V256:
1158 not_op = INDEX_op_not_vec;
1159 have_not = TCG_TARGET_HAS_not_vec;
1160 break;
1161 default:
1162 g_assert_not_reached();
1163 }
1164 if (have_not) {
1165 op->opc = not_op;
1166 op->args[1] = op->args[idx];
1167 return fold_not(ctx, op);
1168 }
1169 return false;
1170 }
1171
1172 /* If the binary operation has first argument @i, fold to @i. */
fold_ix_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1173 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1174 {
1175 if (arg_is_const_val(op->args[1], i)) {
1176 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1177 }
1178 return false;
1179 }
1180
1181 /* If the binary operation has first argument @i, fold to NOT. */
fold_ix_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1182 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1183 {
1184 if (arg_is_const_val(op->args[1], i)) {
1185 return fold_to_not(ctx, op, 2);
1186 }
1187 return false;
1188 }
1189
1190 /* If the binary operation has second argument @i, fold to @i. */
fold_xi_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1191 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1192 {
1193 if (arg_is_const_val(op->args[2], i)) {
1194 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1195 }
1196 return false;
1197 }
1198
1199 /* If the binary operation has second argument @i, fold to identity. */
fold_xi_to_x(OptContext * ctx,TCGOp * op,uint64_t i)1200 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1201 {
1202 if (arg_is_const_val(op->args[2], i)) {
1203 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1204 }
1205 return false;
1206 }
1207
1208 /* If the binary operation has second argument @i, fold to NOT. */
fold_xi_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1209 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1210 {
1211 if (arg_is_const_val(op->args[2], i)) {
1212 return fold_to_not(ctx, op, 1);
1213 }
1214 return false;
1215 }
1216
1217 /* If the binary operation has both arguments equal, fold to @i. */
fold_xx_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1218 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1219 {
1220 if (args_are_copies(op->args[1], op->args[2])) {
1221 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1222 }
1223 return false;
1224 }
1225
1226 /* If the binary operation has both arguments equal, fold to identity. */
fold_xx_to_x(OptContext * ctx,TCGOp * op)1227 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1228 {
1229 if (args_are_copies(op->args[1], op->args[2])) {
1230 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1231 }
1232 return false;
1233 }
1234
1235 /*
1236 * These outermost fold_<op> functions are sorted alphabetically.
1237 *
1238 * The ordering of the transformations should be:
1239 * 1) those that produce a constant
1240 * 2) those that produce a copy
1241 * 3) those that produce information about the result value.
1242 */
1243
1244 static bool fold_addco(OptContext *ctx, TCGOp *op);
1245 static bool fold_or(OptContext *ctx, TCGOp *op);
1246 static bool fold_orc(OptContext *ctx, TCGOp *op);
1247 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1248 static bool fold_xor(OptContext *ctx, TCGOp *op);
1249
fold_add(OptContext * ctx,TCGOp * op)1250 static bool fold_add(OptContext *ctx, TCGOp *op)
1251 {
1252 if (fold_const2_commutative(ctx, op) ||
1253 fold_xi_to_x(ctx, op, 0)) {
1254 return true;
1255 }
1256 return finish_folding(ctx, op);
1257 }
1258
1259 /* We cannot as yet do_constant_folding with vectors. */
fold_add_vec(OptContext * ctx,TCGOp * op)1260 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1261 {
1262 if (fold_commutative(ctx, op) ||
1263 fold_xi_to_x(ctx, op, 0)) {
1264 return true;
1265 }
1266 return finish_folding(ctx, op);
1267 }
1268
squash_prev_carryout(OptContext * ctx,TCGOp * op)1269 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1270 {
1271 TempOptInfo *t2;
1272
1273 op = QTAILQ_PREV(op, link);
1274 switch (op->opc) {
1275 case INDEX_op_addco:
1276 op->opc = INDEX_op_add;
1277 fold_add(ctx, op);
1278 break;
1279 case INDEX_op_addcio:
1280 op->opc = INDEX_op_addci;
1281 break;
1282 case INDEX_op_addc1o:
1283 op->opc = INDEX_op_add;
1284 t2 = arg_info(op->args[2]);
1285 if (ti_is_const(t2)) {
1286 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1287 /* Perform other constant folding, if needed. */
1288 fold_add(ctx, op);
1289 } else {
1290 TCGArg ret = op->args[0];
1291 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1292 op->args[0] = ret;
1293 op->args[1] = ret;
1294 op->args[2] = arg_new_constant(ctx, 1);
1295 }
1296 break;
1297 default:
1298 g_assert_not_reached();
1299 }
1300 }
1301
fold_addci(OptContext * ctx,TCGOp * op)1302 static bool fold_addci(OptContext *ctx, TCGOp *op)
1303 {
1304 fold_commutative(ctx, op);
1305
1306 if (ctx->carry_state < 0) {
1307 return finish_folding(ctx, op);
1308 }
1309
1310 squash_prev_carryout(ctx, op);
1311 op->opc = INDEX_op_add;
1312
1313 if (ctx->carry_state > 0) {
1314 TempOptInfo *t2 = arg_info(op->args[2]);
1315
1316 /*
1317 * Propagate the known carry-in into a constant, if possible.
1318 * Otherwise emit a second add +1.
1319 */
1320 if (ti_is_const(t2)) {
1321 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1322 } else {
1323 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1324
1325 op2->args[0] = op->args[0];
1326 op2->args[1] = op->args[1];
1327 op2->args[2] = op->args[2];
1328 fold_add(ctx, op2);
1329
1330 op->args[1] = op->args[0];
1331 op->args[2] = arg_new_constant(ctx, 1);
1332 }
1333 }
1334
1335 ctx->carry_state = -1;
1336 return fold_add(ctx, op);
1337 }
1338
fold_addcio(OptContext * ctx,TCGOp * op)1339 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1340 {
1341 TempOptInfo *t1, *t2;
1342 int carry_out = -1;
1343 uint64_t sum, max;
1344
1345 fold_commutative(ctx, op);
1346 t1 = arg_info(op->args[1]);
1347 t2 = arg_info(op->args[2]);
1348
1349 /*
1350 * The z_mask value is >= the maximum value that can be represented
1351 * with the known zero bits. So adding the z_mask values will not
1352 * overflow if and only if the true values cannot overflow.
1353 */
1354 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1355 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1356 carry_out = 0;
1357 }
1358
1359 if (ctx->carry_state < 0) {
1360 ctx->carry_state = carry_out;
1361 return finish_folding(ctx, op);
1362 }
1363
1364 squash_prev_carryout(ctx, op);
1365 if (ctx->carry_state == 0) {
1366 goto do_addco;
1367 }
1368
1369 /* Propagate the known carry-in into a constant, if possible. */
1370 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1371 if (ti_is_const(t2)) {
1372 uint64_t v = ti_const_val(t2) & max;
1373 if (v < max) {
1374 op->args[2] = arg_new_constant(ctx, v + 1);
1375 goto do_addco;
1376 }
1377 /* max + known carry in produces known carry out. */
1378 carry_out = 1;
1379 }
1380 if (ti_is_const(t1)) {
1381 uint64_t v = ti_const_val(t1) & max;
1382 if (v < max) {
1383 op->args[1] = arg_new_constant(ctx, v + 1);
1384 goto do_addco;
1385 }
1386 carry_out = 1;
1387 }
1388
1389 /* Adjust the opcode to remember the known carry-in. */
1390 op->opc = INDEX_op_addc1o;
1391 ctx->carry_state = carry_out;
1392 return finish_folding(ctx, op);
1393
1394 do_addco:
1395 op->opc = INDEX_op_addco;
1396 return fold_addco(ctx, op);
1397 }
1398
fold_addco(OptContext * ctx,TCGOp * op)1399 static bool fold_addco(OptContext *ctx, TCGOp *op)
1400 {
1401 TempOptInfo *t1, *t2;
1402 int carry_out = -1;
1403 uint64_t ign;
1404
1405 fold_commutative(ctx, op);
1406 t1 = arg_info(op->args[1]);
1407 t2 = arg_info(op->args[2]);
1408
1409 if (ti_is_const(t2)) {
1410 uint64_t v2 = ti_const_val(t2);
1411
1412 if (ti_is_const(t1)) {
1413 uint64_t v1 = ti_const_val(t1);
1414 /* Given sign-extension of z_mask for I32, we need not truncate. */
1415 carry_out = uadd64_overflow(v1, v2, &ign);
1416 } else if (v2 == 0) {
1417 carry_out = 0;
1418 }
1419 } else {
1420 /*
1421 * The z_mask value is >= the maximum value that can be represented
1422 * with the known zero bits. So adding the z_mask values will not
1423 * overflow if and only if the true values cannot overflow.
1424 */
1425 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1426 carry_out = 0;
1427 }
1428 }
1429 ctx->carry_state = carry_out;
1430 return finish_folding(ctx, op);
1431 }
1432
fold_and(OptContext * ctx,TCGOp * op)1433 static bool fold_and(OptContext *ctx, TCGOp *op)
1434 {
1435 uint64_t z_mask, o_mask, s_mask, a_mask;
1436 TempOptInfo *t1, *t2;
1437
1438 if (fold_const2_commutative(ctx, op)) {
1439 return true;
1440 }
1441
1442 t1 = arg_info(op->args[1]);
1443 t2 = arg_info(op->args[2]);
1444
1445 z_mask = t1->z_mask & t2->z_mask;
1446 o_mask = t1->o_mask & t2->o_mask;
1447
1448 /*
1449 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1450 * Bitwise operations preserve the relative quantity of the repetitions.
1451 */
1452 s_mask = t1->s_mask & t2->s_mask;
1453
1454 /* Affected bits are those not known zero, masked by those known one. */
1455 a_mask = t1->z_mask & ~t2->o_mask;
1456
1457 if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1458 if (op->opc == INDEX_op_and && ti_is_const(t2)) {
1459 /*
1460 * Canonicalize on extract, if valid. This aids x86 with its
1461 * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1462 * which does not require matching operands. Other backends can
1463 * trivially expand the extract to AND during code generation.
1464 */
1465 uint64_t val = ti_const_val(t2);
1466 if (!(val & (val + 1))) {
1467 unsigned len = ctz64(~val);
1468 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1469 op->opc = INDEX_op_extract;
1470 op->args[2] = 0;
1471 op->args[3] = len;
1472 }
1473 }
1474 } else {
1475 fold_xx_to_x(ctx, op);
1476 }
1477 }
1478 return true;
1479 }
1480
fold_andc(OptContext * ctx,TCGOp * op)1481 static bool fold_andc(OptContext *ctx, TCGOp *op)
1482 {
1483 uint64_t z_mask, o_mask, s_mask, a_mask;
1484 TempOptInfo *t1, *t2;
1485
1486 if (fold_const2(ctx, op)) {
1487 return true;
1488 }
1489
1490 t1 = arg_info(op->args[1]);
1491 t2 = arg_info(op->args[2]);
1492
1493 if (ti_is_const(t2)) {
1494 /* Fold andc r,x,i to and r,x,~i. */
1495 switch (ctx->type) {
1496 case TCG_TYPE_I32:
1497 case TCG_TYPE_I64:
1498 op->opc = INDEX_op_and;
1499 break;
1500 case TCG_TYPE_V64:
1501 case TCG_TYPE_V128:
1502 case TCG_TYPE_V256:
1503 op->opc = INDEX_op_and_vec;
1504 break;
1505 default:
1506 g_assert_not_reached();
1507 }
1508 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1509 return fold_and(ctx, op);
1510 }
1511 if (fold_xx_to_i(ctx, op, 0) ||
1512 fold_ix_to_not(ctx, op, -1)) {
1513 return true;
1514 }
1515
1516 z_mask = t1->z_mask & ~t2->o_mask;
1517 o_mask = t1->o_mask & ~t2->z_mask;
1518 s_mask = t1->s_mask & t2->s_mask;
1519
1520 /* Affected bits are those not known zero, masked by those known zero. */
1521 a_mask = t1->z_mask & t2->z_mask;
1522
1523 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
1524 }
1525
fold_bitsel_vec(OptContext * ctx,TCGOp * op)1526 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1527 {
1528 /* If true and false values are the same, eliminate the cmp. */
1529 if (args_are_copies(op->args[2], op->args[3])) {
1530 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1531 }
1532
1533 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1534 uint64_t tv = arg_const_val(op->args[2]);
1535 uint64_t fv = arg_const_val(op->args[3]);
1536
1537 if (tv == -1 && fv == 0) {
1538 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1539 }
1540 if (tv == 0 && fv == -1) {
1541 if (TCG_TARGET_HAS_not_vec) {
1542 op->opc = INDEX_op_not_vec;
1543 return fold_not(ctx, op);
1544 } else {
1545 op->opc = INDEX_op_xor_vec;
1546 op->args[2] = arg_new_constant(ctx, -1);
1547 return fold_xor(ctx, op);
1548 }
1549 }
1550 }
1551 if (arg_is_const(op->args[2])) {
1552 uint64_t tv = arg_const_val(op->args[2]);
1553 if (tv == -1) {
1554 op->opc = INDEX_op_or_vec;
1555 op->args[2] = op->args[3];
1556 return fold_or(ctx, op);
1557 }
1558 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1559 op->opc = INDEX_op_andc_vec;
1560 op->args[2] = op->args[1];
1561 op->args[1] = op->args[3];
1562 return fold_andc(ctx, op);
1563 }
1564 }
1565 if (arg_is_const(op->args[3])) {
1566 uint64_t fv = arg_const_val(op->args[3]);
1567 if (fv == 0) {
1568 op->opc = INDEX_op_and_vec;
1569 return fold_and(ctx, op);
1570 }
1571 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1572 TCGArg ta = op->args[2];
1573 op->opc = INDEX_op_orc_vec;
1574 op->args[2] = op->args[1];
1575 op->args[1] = ta;
1576 return fold_orc(ctx, op);
1577 }
1578 }
1579 return finish_folding(ctx, op);
1580 }
1581
fold_brcond(OptContext * ctx,TCGOp * op)1582 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1583 {
1584 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1585 &op->args[1], &op->args[2]);
1586 if (i == 0) {
1587 tcg_op_remove(ctx->tcg, op);
1588 return true;
1589 }
1590 if (i > 0) {
1591 op->opc = INDEX_op_br;
1592 op->args[0] = op->args[3];
1593 finish_ebb(ctx);
1594 } else {
1595 finish_bb(ctx);
1596 }
1597 return true;
1598 }
1599
fold_brcond2(OptContext * ctx,TCGOp * op)1600 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1601 {
1602 TCGCond cond;
1603 TCGArg label;
1604 int i, inv = 0;
1605
1606 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1607 cond = op->args[4];
1608 label = op->args[5];
1609 if (i >= 0) {
1610 goto do_brcond_const;
1611 }
1612
1613 switch (cond) {
1614 case TCG_COND_LT:
1615 case TCG_COND_GE:
1616 /*
1617 * Simplify LT/GE comparisons vs zero to a single compare
1618 * vs the high word of the input.
1619 */
1620 if (arg_is_const_val(op->args[2], 0) &&
1621 arg_is_const_val(op->args[3], 0)) {
1622 goto do_brcond_high;
1623 }
1624 break;
1625
1626 case TCG_COND_NE:
1627 inv = 1;
1628 QEMU_FALLTHROUGH;
1629 case TCG_COND_EQ:
1630 /*
1631 * Simplify EQ/NE comparisons where one of the pairs
1632 * can be simplified.
1633 */
1634 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1635 op->args[2], cond);
1636 switch (i ^ inv) {
1637 case 0:
1638 goto do_brcond_const;
1639 case 1:
1640 goto do_brcond_high;
1641 }
1642
1643 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1644 op->args[3], cond);
1645 switch (i ^ inv) {
1646 case 0:
1647 goto do_brcond_const;
1648 case 1:
1649 goto do_brcond_low;
1650 }
1651 break;
1652
1653 case TCG_COND_TSTEQ:
1654 case TCG_COND_TSTNE:
1655 if (arg_is_const_val(op->args[2], 0)) {
1656 goto do_brcond_high;
1657 }
1658 if (arg_is_const_val(op->args[3], 0)) {
1659 goto do_brcond_low;
1660 }
1661 break;
1662
1663 default:
1664 break;
1665
1666 do_brcond_low:
1667 op->opc = INDEX_op_brcond;
1668 op->args[1] = op->args[2];
1669 op->args[2] = cond;
1670 op->args[3] = label;
1671 return fold_brcond(ctx, op);
1672
1673 do_brcond_high:
1674 op->opc = INDEX_op_brcond;
1675 op->args[0] = op->args[1];
1676 op->args[1] = op->args[3];
1677 op->args[2] = cond;
1678 op->args[3] = label;
1679 return fold_brcond(ctx, op);
1680
1681 do_brcond_const:
1682 if (i == 0) {
1683 tcg_op_remove(ctx->tcg, op);
1684 return true;
1685 }
1686 op->opc = INDEX_op_br;
1687 op->args[0] = label;
1688 finish_ebb(ctx);
1689 return true;
1690 }
1691
1692 finish_bb(ctx);
1693 return true;
1694 }
1695
fold_bswap(OptContext * ctx,TCGOp * op)1696 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1697 {
1698 uint64_t z_mask, o_mask, s_mask;
1699 TempOptInfo *t1 = arg_info(op->args[1]);
1700 int flags = op->args[2];
1701
1702 if (ti_is_const(t1)) {
1703 return tcg_opt_gen_movi(ctx, op, op->args[0],
1704 do_constant_folding(op->opc, ctx->type,
1705 ti_const_val(t1), flags));
1706 }
1707
1708 z_mask = t1->z_mask;
1709 o_mask = t1->o_mask;
1710 s_mask = 0;
1711
1712 switch (op->opc) {
1713 case INDEX_op_bswap16:
1714 z_mask = bswap16(z_mask);
1715 o_mask = bswap16(o_mask);
1716 if (flags & TCG_BSWAP_OS) {
1717 z_mask = (int16_t)z_mask;
1718 o_mask = (int16_t)o_mask;
1719 s_mask = INT16_MIN;
1720 } else if (!(flags & TCG_BSWAP_OZ)) {
1721 z_mask |= MAKE_64BIT_MASK(16, 48);
1722 }
1723 break;
1724 case INDEX_op_bswap32:
1725 z_mask = bswap32(z_mask);
1726 o_mask = bswap32(o_mask);
1727 if (flags & TCG_BSWAP_OS) {
1728 z_mask = (int32_t)z_mask;
1729 o_mask = (int32_t)o_mask;
1730 s_mask = INT32_MIN;
1731 } else if (!(flags & TCG_BSWAP_OZ)) {
1732 z_mask |= MAKE_64BIT_MASK(32, 32);
1733 }
1734 break;
1735 case INDEX_op_bswap64:
1736 z_mask = bswap64(z_mask);
1737 o_mask = bswap64(o_mask);
1738 break;
1739 default:
1740 g_assert_not_reached();
1741 }
1742
1743 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1744 }
1745
fold_call(OptContext * ctx,TCGOp * op)1746 static bool fold_call(OptContext *ctx, TCGOp *op)
1747 {
1748 TCGContext *s = ctx->tcg;
1749 int nb_oargs = TCGOP_CALLO(op);
1750 int nb_iargs = TCGOP_CALLI(op);
1751 int flags, i;
1752
1753 init_arguments(ctx, op, nb_oargs + nb_iargs);
1754 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1755
1756 /* If the function reads or writes globals, reset temp data. */
1757 flags = tcg_call_flags(op);
1758 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1759 int nb_globals = s->nb_globals;
1760
1761 for (i = 0; i < nb_globals; i++) {
1762 if (test_bit(i, ctx->temps_used.l)) {
1763 reset_ts(ctx, &ctx->tcg->temps[i]);
1764 }
1765 }
1766 }
1767
1768 /* If the function has side effects, reset mem data. */
1769 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1770 remove_mem_copy_all(ctx);
1771 }
1772
1773 /* Reset temp data for outputs. */
1774 for (i = 0; i < nb_oargs; i++) {
1775 reset_temp(ctx, op->args[i]);
1776 }
1777
1778 /* Stop optimizing MB across calls. */
1779 ctx->prev_mb = NULL;
1780 return true;
1781 }
1782
fold_cmp_vec(OptContext * ctx,TCGOp * op)1783 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1784 {
1785 /* Canonicalize the comparison to put immediate second. */
1786 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1787 op->args[3] = tcg_swap_cond(op->args[3]);
1788 }
1789 return finish_folding(ctx, op);
1790 }
1791
fold_cmpsel_vec(OptContext * ctx,TCGOp * op)1792 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1793 {
1794 /* If true and false values are the same, eliminate the cmp. */
1795 if (args_are_copies(op->args[3], op->args[4])) {
1796 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1797 }
1798
1799 /* Canonicalize the comparison to put immediate second. */
1800 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1801 op->args[5] = tcg_swap_cond(op->args[5]);
1802 }
1803 /*
1804 * Canonicalize the "false" input reg to match the destination,
1805 * so that the tcg backend can implement "move if true".
1806 */
1807 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1808 op->args[5] = tcg_invert_cond(op->args[5]);
1809 }
1810 return finish_folding(ctx, op);
1811 }
1812
fold_count_zeros(OptContext * ctx,TCGOp * op)1813 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1814 {
1815 uint64_t z_mask, s_mask;
1816 TempOptInfo *t1 = arg_info(op->args[1]);
1817 TempOptInfo *t2 = arg_info(op->args[2]);
1818
1819 if (ti_is_const(t1)) {
1820 uint64_t t = ti_const_val(t1);
1821
1822 if (t != 0) {
1823 t = do_constant_folding(op->opc, ctx->type, t, 0);
1824 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1825 }
1826 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1827 }
1828
1829 switch (ctx->type) {
1830 case TCG_TYPE_I32:
1831 z_mask = 31;
1832 break;
1833 case TCG_TYPE_I64:
1834 z_mask = 63;
1835 break;
1836 default:
1837 g_assert_not_reached();
1838 }
1839 s_mask = ~z_mask;
1840 z_mask |= t2->z_mask;
1841 s_mask &= t2->s_mask;
1842
1843 return fold_masks_zs(ctx, op, z_mask, s_mask);
1844 }
1845
fold_ctpop(OptContext * ctx,TCGOp * op)1846 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1847 {
1848 uint64_t z_mask;
1849
1850 if (fold_const1(ctx, op)) {
1851 return true;
1852 }
1853
1854 switch (ctx->type) {
1855 case TCG_TYPE_I32:
1856 z_mask = 32 | 31;
1857 break;
1858 case TCG_TYPE_I64:
1859 z_mask = 64 | 63;
1860 break;
1861 default:
1862 g_assert_not_reached();
1863 }
1864 return fold_masks_z(ctx, op, z_mask);
1865 }
1866
fold_deposit(OptContext * ctx,TCGOp * op)1867 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1868 {
1869 TempOptInfo *t1 = arg_info(op->args[1]);
1870 TempOptInfo *t2 = arg_info(op->args[2]);
1871 int ofs = op->args[3];
1872 int len = op->args[4];
1873 int width = 8 * tcg_type_size(ctx->type);
1874 uint64_t z_mask, o_mask, s_mask;
1875
1876 if (ti_is_const(t1) && ti_is_const(t2)) {
1877 return tcg_opt_gen_movi(ctx, op, op->args[0],
1878 deposit64(ti_const_val(t1), ofs, len,
1879 ti_const_val(t2)));
1880 }
1881
1882 /* Inserting a value into zero at offset 0. */
1883 if (ti_is_const_val(t1, 0) && ofs == 0) {
1884 uint64_t mask = MAKE_64BIT_MASK(0, len);
1885
1886 op->opc = INDEX_op_and;
1887 op->args[1] = op->args[2];
1888 op->args[2] = arg_new_constant(ctx, mask);
1889 return fold_and(ctx, op);
1890 }
1891
1892 /* Inserting zero into a value. */
1893 if (ti_is_const_val(t2, 0)) {
1894 uint64_t mask = deposit64(-1, ofs, len, 0);
1895
1896 op->opc = INDEX_op_and;
1897 op->args[2] = arg_new_constant(ctx, mask);
1898 return fold_and(ctx, op);
1899 }
1900
1901 /* The s_mask from the top portion of the deposit is still valid. */
1902 if (ofs + len == width) {
1903 s_mask = t2->s_mask << ofs;
1904 } else {
1905 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1906 }
1907
1908 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1909 o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1910
1911 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1912 }
1913
fold_divide(OptContext * ctx,TCGOp * op)1914 static bool fold_divide(OptContext *ctx, TCGOp *op)
1915 {
1916 if (fold_const2(ctx, op) ||
1917 fold_xi_to_x(ctx, op, 1)) {
1918 return true;
1919 }
1920 return finish_folding(ctx, op);
1921 }
1922
fold_dup(OptContext * ctx,TCGOp * op)1923 static bool fold_dup(OptContext *ctx, TCGOp *op)
1924 {
1925 if (arg_is_const(op->args[1])) {
1926 uint64_t t = arg_const_val(op->args[1]);
1927 t = dup_const(TCGOP_VECE(op), t);
1928 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1929 }
1930 return finish_folding(ctx, op);
1931 }
1932
fold_dup2(OptContext * ctx,TCGOp * op)1933 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1934 {
1935 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1936 uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1937 arg_const_val(op->args[2]));
1938 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1939 }
1940
1941 if (args_are_copies(op->args[1], op->args[2])) {
1942 op->opc = INDEX_op_dup_vec;
1943 TCGOP_VECE(op) = MO_32;
1944 }
1945 return finish_folding(ctx, op);
1946 }
1947
fold_eqv(OptContext * ctx,TCGOp * op)1948 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1949 {
1950 uint64_t z_mask, o_mask, s_mask;
1951 TempOptInfo *t1, *t2;
1952
1953 if (fold_const2_commutative(ctx, op)) {
1954 return true;
1955 }
1956
1957 t2 = arg_info(op->args[2]);
1958 if (ti_is_const(t2)) {
1959 /* Fold eqv r,x,i to xor r,x,~i. */
1960 switch (ctx->type) {
1961 case TCG_TYPE_I32:
1962 case TCG_TYPE_I64:
1963 op->opc = INDEX_op_xor;
1964 break;
1965 case TCG_TYPE_V64:
1966 case TCG_TYPE_V128:
1967 case TCG_TYPE_V256:
1968 op->opc = INDEX_op_xor_vec;
1969 break;
1970 default:
1971 g_assert_not_reached();
1972 }
1973 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1974 return fold_xor(ctx, op);
1975 }
1976
1977 t1 = arg_info(op->args[1]);
1978
1979 z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1980 o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
1981 s_mask = t1->s_mask & t2->s_mask;
1982
1983 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1984 }
1985
fold_extract(OptContext * ctx,TCGOp * op)1986 static bool fold_extract(OptContext *ctx, TCGOp *op)
1987 {
1988 uint64_t z_mask, o_mask, a_mask;
1989 TempOptInfo *t1 = arg_info(op->args[1]);
1990 int pos = op->args[2];
1991 int len = op->args[3];
1992
1993 if (ti_is_const(t1)) {
1994 return tcg_opt_gen_movi(ctx, op, op->args[0],
1995 extract64(ti_const_val(t1), pos, len));
1996 }
1997
1998 z_mask = extract64(t1->z_mask, pos, len);
1999 o_mask = extract64(t1->o_mask, pos, len);
2000 a_mask = pos ? -1 : t1->z_mask ^ z_mask;
2001
2002 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
2003 }
2004
fold_extract2(OptContext * ctx,TCGOp * op)2005 static bool fold_extract2(OptContext *ctx, TCGOp *op)
2006 {
2007 TempOptInfo *t1 = arg_info(op->args[1]);
2008 TempOptInfo *t2 = arg_info(op->args[2]);
2009 uint64_t z1 = t1->z_mask;
2010 uint64_t z2 = t2->z_mask;
2011 uint64_t o1 = t1->o_mask;
2012 uint64_t o2 = t2->o_mask;
2013 int shr = op->args[3];
2014
2015 if (ctx->type == TCG_TYPE_I32) {
2016 z1 = (uint32_t)z1 >> shr;
2017 o1 = (uint32_t)o1 >> shr;
2018 z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2019 o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2020 } else {
2021 z1 >>= shr;
2022 o1 >>= shr;
2023 z2 <<= 64 - shr;
2024 o2 <<= 64 - shr;
2025 }
2026
2027 return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
2028 }
2029
fold_exts(OptContext * ctx,TCGOp * op)2030 static bool fold_exts(OptContext *ctx, TCGOp *op)
2031 {
2032 uint64_t z_mask, o_mask, s_mask;
2033 TempOptInfo *t1;
2034
2035 if (fold_const1(ctx, op)) {
2036 return true;
2037 }
2038
2039 t1 = arg_info(op->args[1]);
2040 z_mask = t1->z_mask;
2041 o_mask = t1->o_mask;
2042 s_mask = t1->s_mask;
2043
2044 switch (op->opc) {
2045 case INDEX_op_ext_i32_i64:
2046 s_mask |= INT32_MIN;
2047 z_mask = (int32_t)z_mask;
2048 o_mask = (int32_t)o_mask;
2049 break;
2050 default:
2051 g_assert_not_reached();
2052 }
2053 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2054 }
2055
fold_extu(OptContext * ctx,TCGOp * op)2056 static bool fold_extu(OptContext *ctx, TCGOp *op)
2057 {
2058 uint64_t z_mask, o_mask;
2059 TempOptInfo *t1;
2060
2061 if (fold_const1(ctx, op)) {
2062 return true;
2063 }
2064
2065 t1 = arg_info(op->args[1]);
2066 z_mask = t1->z_mask;
2067 o_mask = t1->o_mask;
2068
2069 switch (op->opc) {
2070 case INDEX_op_extrl_i64_i32:
2071 case INDEX_op_extu_i32_i64:
2072 z_mask = (uint32_t)z_mask;
2073 o_mask = (uint32_t)o_mask;
2074 break;
2075 case INDEX_op_extrh_i64_i32:
2076 z_mask >>= 32;
2077 o_mask >>= 32;
2078 break;
2079 default:
2080 g_assert_not_reached();
2081 }
2082 return fold_masks_zo(ctx, op, z_mask, o_mask);
2083 }
2084
fold_mb(OptContext * ctx,TCGOp * op)2085 static bool fold_mb(OptContext *ctx, TCGOp *op)
2086 {
2087 /* Eliminate duplicate and redundant fence instructions. */
2088 if (ctx->prev_mb) {
2089 /*
2090 * Merge two barriers of the same type into one,
2091 * or a weaker barrier into a stronger one,
2092 * or two weaker barriers into a stronger one.
2093 * mb X; mb Y => mb X|Y
2094 * mb; strl => mb; st
2095 * ldaq; mb => ld; mb
2096 * ldaq; strl => ld; mb; st
2097 * Other combinations are also merged into a strong
2098 * barrier. This is stricter than specified but for
2099 * the purposes of TCG is better than not optimizing.
2100 */
2101 ctx->prev_mb->args[0] |= op->args[0];
2102 tcg_op_remove(ctx->tcg, op);
2103 } else {
2104 ctx->prev_mb = op;
2105 }
2106 return true;
2107 }
2108
fold_mov(OptContext * ctx,TCGOp * op)2109 static bool fold_mov(OptContext *ctx, TCGOp *op)
2110 {
2111 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2112 }
2113
fold_movcond(OptContext * ctx,TCGOp * op)2114 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2115 {
2116 uint64_t z_mask, o_mask, s_mask;
2117 TempOptInfo *tt, *ft;
2118 int i;
2119
2120 /* If true and false values are the same, eliminate the cmp. */
2121 if (args_are_copies(op->args[3], op->args[4])) {
2122 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2123 }
2124
2125 /*
2126 * Canonicalize the "false" input reg to match the destination reg so
2127 * that the tcg backend can implement a "move if true" operation.
2128 */
2129 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2130 op->args[5] = tcg_invert_cond(op->args[5]);
2131 }
2132
2133 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2134 &op->args[2], &op->args[5]);
2135 if (i >= 0) {
2136 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2137 }
2138
2139 tt = arg_info(op->args[3]);
2140 ft = arg_info(op->args[4]);
2141 z_mask = tt->z_mask | ft->z_mask;
2142 o_mask = tt->o_mask & ft->o_mask;
2143 s_mask = tt->s_mask & ft->s_mask;
2144
2145 if (ti_is_const(tt) && ti_is_const(ft)) {
2146 uint64_t tv = ti_const_val(tt);
2147 uint64_t fv = ti_const_val(ft);
2148 TCGCond cond = op->args[5];
2149
2150 if (tv == 1 && fv == 0) {
2151 op->opc = INDEX_op_setcond;
2152 op->args[3] = cond;
2153 } else if (fv == 1 && tv == 0) {
2154 op->opc = INDEX_op_setcond;
2155 op->args[3] = tcg_invert_cond(cond);
2156 } else if (tv == -1 && fv == 0) {
2157 op->opc = INDEX_op_negsetcond;
2158 op->args[3] = cond;
2159 } else if (fv == -1 && tv == 0) {
2160 op->opc = INDEX_op_negsetcond;
2161 op->args[3] = tcg_invert_cond(cond);
2162 }
2163 }
2164
2165 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2166 }
2167
fold_mul(OptContext * ctx,TCGOp * op)2168 static bool fold_mul(OptContext *ctx, TCGOp *op)
2169 {
2170 if (fold_const2(ctx, op) ||
2171 fold_xi_to_i(ctx, op, 0) ||
2172 fold_xi_to_x(ctx, op, 1)) {
2173 return true;
2174 }
2175 return finish_folding(ctx, op);
2176 }
2177
fold_mul_highpart(OptContext * ctx,TCGOp * op)2178 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2179 {
2180 if (fold_const2_commutative(ctx, op) ||
2181 fold_xi_to_i(ctx, op, 0)) {
2182 return true;
2183 }
2184 return finish_folding(ctx, op);
2185 }
2186
fold_multiply2(OptContext * ctx,TCGOp * op)2187 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2188 {
2189 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2190
2191 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2192 uint64_t a = arg_const_val(op->args[2]);
2193 uint64_t b = arg_const_val(op->args[3]);
2194 uint64_t h, l;
2195 TCGArg rl, rh;
2196 TCGOp *op2;
2197
2198 switch (op->opc) {
2199 case INDEX_op_mulu2:
2200 if (ctx->type == TCG_TYPE_I32) {
2201 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2202 h = (int32_t)(l >> 32);
2203 l = (int32_t)l;
2204 } else {
2205 mulu64(&l, &h, a, b);
2206 }
2207 break;
2208 case INDEX_op_muls2:
2209 if (ctx->type == TCG_TYPE_I32) {
2210 l = (int64_t)(int32_t)a * (int32_t)b;
2211 h = l >> 32;
2212 l = (int32_t)l;
2213 } else {
2214 muls64(&l, &h, a, b);
2215 }
2216 break;
2217 default:
2218 g_assert_not_reached();
2219 }
2220
2221 rl = op->args[0];
2222 rh = op->args[1];
2223
2224 /* The proper opcode is supplied by tcg_opt_gen_mov. */
2225 op2 = opt_insert_before(ctx, op, 0, 2);
2226
2227 tcg_opt_gen_movi(ctx, op, rl, l);
2228 tcg_opt_gen_movi(ctx, op2, rh, h);
2229 return true;
2230 }
2231 return finish_folding(ctx, op);
2232 }
2233
fold_nand(OptContext * ctx,TCGOp * op)2234 static bool fold_nand(OptContext *ctx, TCGOp *op)
2235 {
2236 uint64_t z_mask, o_mask, s_mask;
2237 TempOptInfo *t1, *t2;
2238
2239 if (fold_const2_commutative(ctx, op) ||
2240 fold_xi_to_not(ctx, op, -1)) {
2241 return true;
2242 }
2243
2244 t1 = arg_info(op->args[1]);
2245 t2 = arg_info(op->args[2]);
2246
2247 z_mask = ~(t1->o_mask & t2->o_mask);
2248 o_mask = ~(t1->z_mask & t2->z_mask);
2249 s_mask = t1->s_mask & t2->s_mask;
2250
2251 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2252 }
2253
fold_neg_no_const(OptContext * ctx,TCGOp * op)2254 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2255 {
2256 /* Set to 1 all bits to the left of the rightmost. */
2257 uint64_t z_mask = arg_info(op->args[1])->z_mask;
2258 z_mask = -(z_mask & -z_mask);
2259
2260 return fold_masks_z(ctx, op, z_mask);
2261 }
2262
fold_neg(OptContext * ctx,TCGOp * op)2263 static bool fold_neg(OptContext *ctx, TCGOp *op)
2264 {
2265 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2266 }
2267
fold_nor(OptContext * ctx,TCGOp * op)2268 static bool fold_nor(OptContext *ctx, TCGOp *op)
2269 {
2270 uint64_t z_mask, o_mask, s_mask;
2271 TempOptInfo *t1, *t2;
2272
2273 if (fold_const2_commutative(ctx, op) ||
2274 fold_xi_to_not(ctx, op, 0)) {
2275 return true;
2276 }
2277
2278 t1 = arg_info(op->args[1]);
2279 t2 = arg_info(op->args[2]);
2280
2281 z_mask = ~(t1->o_mask | t2->o_mask);
2282 o_mask = ~(t1->z_mask | t2->z_mask);
2283 s_mask = t1->s_mask & t2->s_mask;
2284
2285 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2286 }
2287
fold_not(OptContext * ctx,TCGOp * op)2288 static bool fold_not(OptContext *ctx, TCGOp *op)
2289 {
2290 TempOptInfo *t1;
2291
2292 if (fold_const1(ctx, op)) {
2293 return true;
2294 }
2295
2296 t1 = arg_info(op->args[1]);
2297 return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
2298 }
2299
fold_or(OptContext * ctx,TCGOp * op)2300 static bool fold_or(OptContext *ctx, TCGOp *op)
2301 {
2302 uint64_t z_mask, o_mask, s_mask, a_mask;
2303 TempOptInfo *t1, *t2;
2304
2305 if (fold_const2_commutative(ctx, op) ||
2306 fold_xi_to_x(ctx, op, 0) ||
2307 fold_xx_to_x(ctx, op)) {
2308 return true;
2309 }
2310
2311 t1 = arg_info(op->args[1]);
2312 t2 = arg_info(op->args[2]);
2313
2314 z_mask = t1->z_mask | t2->z_mask;
2315 o_mask = t1->o_mask | t2->o_mask;
2316 s_mask = t1->s_mask & t2->s_mask;
2317
2318 /* Affected bits are those not known one, masked by those known zero. */
2319 a_mask = ~t1->o_mask & t2->z_mask;
2320
2321 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2322 }
2323
fold_orc(OptContext * ctx,TCGOp * op)2324 static bool fold_orc(OptContext *ctx, TCGOp *op)
2325 {
2326 uint64_t z_mask, o_mask, s_mask, a_mask;
2327 TempOptInfo *t1, *t2;
2328
2329 if (fold_const2(ctx, op)) {
2330 return true;
2331 }
2332
2333 t2 = arg_info(op->args[2]);
2334 if (ti_is_const(t2)) {
2335 /* Fold orc r,x,i to or r,x,~i. */
2336 switch (ctx->type) {
2337 case TCG_TYPE_I32:
2338 case TCG_TYPE_I64:
2339 op->opc = INDEX_op_or;
2340 break;
2341 case TCG_TYPE_V64:
2342 case TCG_TYPE_V128:
2343 case TCG_TYPE_V256:
2344 op->opc = INDEX_op_or_vec;
2345 break;
2346 default:
2347 g_assert_not_reached();
2348 }
2349 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2350 return fold_or(ctx, op);
2351 }
2352 if (fold_xx_to_i(ctx, op, -1) ||
2353 fold_ix_to_not(ctx, op, 0)) {
2354 return true;
2355 }
2356 t1 = arg_info(op->args[1]);
2357
2358 z_mask = t1->z_mask | ~t2->o_mask;
2359 o_mask = t1->o_mask | ~t2->z_mask;
2360 s_mask = t1->s_mask & t2->s_mask;
2361
2362 /* Affected bits are those not known one, masked by those known one. */
2363 a_mask = ~t1->o_mask & ~t2->o_mask;
2364
2365 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2366 }
2367
fold_qemu_ld_1reg(OptContext * ctx,TCGOp * op)2368 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2369 {
2370 const TCGOpDef *def = &tcg_op_defs[op->opc];
2371 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2372 MemOp mop = get_memop(oi);
2373 int width = 8 * memop_size(mop);
2374 uint64_t z_mask = -1, s_mask = 0;
2375
2376 if (width < 64) {
2377 if (mop & MO_SIGN) {
2378 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2379 } else {
2380 z_mask = MAKE_64BIT_MASK(0, width);
2381 }
2382 }
2383
2384 /* Opcodes that touch guest memory stop the mb optimization. */
2385 ctx->prev_mb = NULL;
2386
2387 return fold_masks_zs(ctx, op, z_mask, s_mask);
2388 }
2389
fold_qemu_ld_2reg(OptContext * ctx,TCGOp * op)2390 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2391 {
2392 /* Opcodes that touch guest memory stop the mb optimization. */
2393 ctx->prev_mb = NULL;
2394 return finish_folding(ctx, op);
2395 }
2396
fold_qemu_st(OptContext * ctx,TCGOp * op)2397 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2398 {
2399 /* Opcodes that touch guest memory stop the mb optimization. */
2400 ctx->prev_mb = NULL;
2401 return true;
2402 }
2403
fold_remainder(OptContext * ctx,TCGOp * op)2404 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2405 {
2406 if (fold_const2(ctx, op) ||
2407 fold_xx_to_i(ctx, op, 0)) {
2408 return true;
2409 }
2410 return finish_folding(ctx, op);
2411 }
2412
2413 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
fold_setcond_zmask(OptContext * ctx,TCGOp * op,bool neg)2414 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2415 {
2416 uint64_t a_zmask, b_val;
2417 TCGCond cond;
2418
2419 if (!arg_is_const(op->args[2])) {
2420 return false;
2421 }
2422
2423 a_zmask = arg_info(op->args[1])->z_mask;
2424 b_val = arg_const_val(op->args[2]);
2425 cond = op->args[3];
2426
2427 if (ctx->type == TCG_TYPE_I32) {
2428 a_zmask = (uint32_t)a_zmask;
2429 b_val = (uint32_t)b_val;
2430 }
2431
2432 /*
2433 * A with only low bits set vs B with high bits set means that A < B.
2434 */
2435 if (a_zmask < b_val) {
2436 bool inv = false;
2437
2438 switch (cond) {
2439 case TCG_COND_NE:
2440 case TCG_COND_LEU:
2441 case TCG_COND_LTU:
2442 inv = true;
2443 /* fall through */
2444 case TCG_COND_GTU:
2445 case TCG_COND_GEU:
2446 case TCG_COND_EQ:
2447 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2448 default:
2449 break;
2450 }
2451 }
2452
2453 /*
2454 * A with only lsb set is already boolean.
2455 */
2456 if (a_zmask <= 1) {
2457 bool convert = false;
2458 bool inv = false;
2459
2460 switch (cond) {
2461 case TCG_COND_EQ:
2462 inv = true;
2463 /* fall through */
2464 case TCG_COND_NE:
2465 convert = (b_val == 0);
2466 break;
2467 case TCG_COND_LTU:
2468 case TCG_COND_TSTEQ:
2469 inv = true;
2470 /* fall through */
2471 case TCG_COND_GEU:
2472 case TCG_COND_TSTNE:
2473 convert = (b_val == 1);
2474 break;
2475 default:
2476 break;
2477 }
2478 if (convert) {
2479 if (!inv && !neg) {
2480 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2481 }
2482
2483 if (!inv) {
2484 op->opc = INDEX_op_neg;
2485 } else if (neg) {
2486 op->opc = INDEX_op_add;
2487 op->args[2] = arg_new_constant(ctx, -1);
2488 } else {
2489 op->opc = INDEX_op_xor;
2490 op->args[2] = arg_new_constant(ctx, 1);
2491 }
2492 return -1;
2493 }
2494 }
2495 return 0;
2496 }
2497
fold_setcond_tst_pow2(OptContext * ctx,TCGOp * op,bool neg)2498 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2499 {
2500 TCGCond cond = op->args[3];
2501 TCGArg ret, src1, src2;
2502 TCGOp *op2;
2503 uint64_t val;
2504 int sh;
2505 bool inv;
2506
2507 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2508 return;
2509 }
2510
2511 src2 = op->args[2];
2512 val = arg_const_val(src2);
2513 if (!is_power_of_2(val)) {
2514 return;
2515 }
2516 sh = ctz64(val);
2517
2518 ret = op->args[0];
2519 src1 = op->args[1];
2520 inv = cond == TCG_COND_TSTEQ;
2521
2522 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2523 op->opc = INDEX_op_sextract;
2524 op->args[1] = src1;
2525 op->args[2] = sh;
2526 op->args[3] = 1;
2527 return;
2528 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2529 op->opc = INDEX_op_extract;
2530 op->args[1] = src1;
2531 op->args[2] = sh;
2532 op->args[3] = 1;
2533 } else {
2534 if (sh) {
2535 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2536 op2->args[0] = ret;
2537 op2->args[1] = src1;
2538 op2->args[2] = arg_new_constant(ctx, sh);
2539 src1 = ret;
2540 }
2541 op->opc = INDEX_op_and;
2542 op->args[1] = src1;
2543 op->args[2] = arg_new_constant(ctx, 1);
2544 }
2545
2546 if (neg && inv) {
2547 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2548 op2->args[0] = ret;
2549 op2->args[1] = ret;
2550 op2->args[2] = arg_new_constant(ctx, -1);
2551 } else if (inv) {
2552 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2553 op2->args[0] = ret;
2554 op2->args[1] = ret;
2555 op2->args[2] = arg_new_constant(ctx, 1);
2556 } else if (neg) {
2557 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2558 op2->args[0] = ret;
2559 op2->args[1] = ret;
2560 }
2561 }
2562
fold_setcond(OptContext * ctx,TCGOp * op)2563 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2564 {
2565 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2566 &op->args[2], &op->args[3]);
2567 if (i >= 0) {
2568 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2569 }
2570
2571 i = fold_setcond_zmask(ctx, op, false);
2572 if (i > 0) {
2573 return true;
2574 }
2575 if (i == 0) {
2576 fold_setcond_tst_pow2(ctx, op, false);
2577 }
2578
2579 return fold_masks_z(ctx, op, 1);
2580 }
2581
fold_negsetcond(OptContext * ctx,TCGOp * op)2582 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2583 {
2584 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2585 &op->args[2], &op->args[3]);
2586 if (i >= 0) {
2587 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2588 }
2589
2590 i = fold_setcond_zmask(ctx, op, true);
2591 if (i > 0) {
2592 return true;
2593 }
2594 if (i == 0) {
2595 fold_setcond_tst_pow2(ctx, op, true);
2596 }
2597
2598 /* Value is {0,-1} so all bits are repetitions of the sign. */
2599 return fold_masks_s(ctx, op, -1);
2600 }
2601
fold_setcond2(OptContext * ctx,TCGOp * op)2602 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2603 {
2604 TCGCond cond;
2605 int i, inv = 0;
2606
2607 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2608 cond = op->args[5];
2609 if (i >= 0) {
2610 goto do_setcond_const;
2611 }
2612
2613 switch (cond) {
2614 case TCG_COND_LT:
2615 case TCG_COND_GE:
2616 /*
2617 * Simplify LT/GE comparisons vs zero to a single compare
2618 * vs the high word of the input.
2619 */
2620 if (arg_is_const_val(op->args[3], 0) &&
2621 arg_is_const_val(op->args[4], 0)) {
2622 goto do_setcond_high;
2623 }
2624 break;
2625
2626 case TCG_COND_NE:
2627 inv = 1;
2628 QEMU_FALLTHROUGH;
2629 case TCG_COND_EQ:
2630 /*
2631 * Simplify EQ/NE comparisons where one of the pairs
2632 * can be simplified.
2633 */
2634 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2635 op->args[3], cond);
2636 switch (i ^ inv) {
2637 case 0:
2638 goto do_setcond_const;
2639 case 1:
2640 goto do_setcond_high;
2641 }
2642
2643 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2644 op->args[4], cond);
2645 switch (i ^ inv) {
2646 case 0:
2647 goto do_setcond_const;
2648 case 1:
2649 goto do_setcond_low;
2650 }
2651 break;
2652
2653 case TCG_COND_TSTEQ:
2654 case TCG_COND_TSTNE:
2655 if (arg_is_const_val(op->args[3], 0)) {
2656 goto do_setcond_high;
2657 }
2658 if (arg_is_const_val(op->args[4], 0)) {
2659 goto do_setcond_low;
2660 }
2661 break;
2662
2663 default:
2664 break;
2665
2666 do_setcond_low:
2667 op->args[2] = op->args[3];
2668 op->args[3] = cond;
2669 op->opc = INDEX_op_setcond;
2670 return fold_setcond(ctx, op);
2671
2672 do_setcond_high:
2673 op->args[1] = op->args[2];
2674 op->args[2] = op->args[4];
2675 op->args[3] = cond;
2676 op->opc = INDEX_op_setcond;
2677 return fold_setcond(ctx, op);
2678 }
2679
2680 return fold_masks_z(ctx, op, 1);
2681
2682 do_setcond_const:
2683 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2684 }
2685
fold_sextract(OptContext * ctx,TCGOp * op)2686 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2687 {
2688 uint64_t z_mask, o_mask, s_mask, a_mask;
2689 TempOptInfo *t1 = arg_info(op->args[1]);
2690 int pos = op->args[2];
2691 int len = op->args[3];
2692
2693 if (ti_is_const(t1)) {
2694 return tcg_opt_gen_movi(ctx, op, op->args[0],
2695 sextract64(ti_const_val(t1), pos, len));
2696 }
2697
2698 s_mask = t1->s_mask >> pos;
2699 s_mask |= -1ull << (len - 1);
2700 a_mask = pos ? -1 : s_mask & ~t1->s_mask;
2701
2702 z_mask = sextract64(t1->z_mask, pos, len);
2703 o_mask = sextract64(t1->o_mask, pos, len);
2704
2705 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2706 }
2707
fold_shift(OptContext * ctx,TCGOp * op)2708 static bool fold_shift(OptContext *ctx, TCGOp *op)
2709 {
2710 uint64_t s_mask, z_mask, o_mask;
2711 TempOptInfo *t1, *t2;
2712
2713 if (fold_const2(ctx, op) ||
2714 fold_ix_to_i(ctx, op, 0) ||
2715 fold_xi_to_x(ctx, op, 0)) {
2716 return true;
2717 }
2718
2719 t1 = arg_info(op->args[1]);
2720 t2 = arg_info(op->args[2]);
2721 s_mask = t1->s_mask;
2722 z_mask = t1->z_mask;
2723 o_mask = t1->o_mask;
2724
2725 if (ti_is_const(t2)) {
2726 int sh = ti_const_val(t2);
2727
2728 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2729 o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
2730 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2731
2732 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2733 }
2734
2735 switch (op->opc) {
2736 case INDEX_op_sar:
2737 /*
2738 * Arithmetic right shift will not reduce the number of
2739 * input sign repetitions.
2740 */
2741 return fold_masks_s(ctx, op, s_mask);
2742 case INDEX_op_shr:
2743 /*
2744 * If the sign bit is known zero, then logical right shift
2745 * will not reduce the number of input sign repetitions.
2746 */
2747 if (~z_mask & -s_mask) {
2748 return fold_masks_s(ctx, op, s_mask);
2749 }
2750 break;
2751 default:
2752 break;
2753 }
2754
2755 return finish_folding(ctx, op);
2756 }
2757
fold_sub_to_neg(OptContext * ctx,TCGOp * op)2758 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2759 {
2760 TCGOpcode neg_op;
2761 bool have_neg;
2762
2763 if (!arg_is_const_val(op->args[1], 0)) {
2764 return false;
2765 }
2766
2767 switch (ctx->type) {
2768 case TCG_TYPE_I32:
2769 case TCG_TYPE_I64:
2770 neg_op = INDEX_op_neg;
2771 have_neg = true;
2772 break;
2773 case TCG_TYPE_V64:
2774 case TCG_TYPE_V128:
2775 case TCG_TYPE_V256:
2776 neg_op = INDEX_op_neg_vec;
2777 have_neg = (TCG_TARGET_HAS_neg_vec &&
2778 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2779 break;
2780 default:
2781 g_assert_not_reached();
2782 }
2783 if (have_neg) {
2784 op->opc = neg_op;
2785 op->args[1] = op->args[2];
2786 return fold_neg_no_const(ctx, op);
2787 }
2788 return false;
2789 }
2790
2791 /* We cannot as yet do_constant_folding with vectors. */
fold_sub_vec(OptContext * ctx,TCGOp * op)2792 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2793 {
2794 if (fold_xx_to_i(ctx, op, 0) ||
2795 fold_xi_to_x(ctx, op, 0) ||
2796 fold_sub_to_neg(ctx, op)) {
2797 return true;
2798 }
2799 return finish_folding(ctx, op);
2800 }
2801
fold_sub(OptContext * ctx,TCGOp * op)2802 static bool fold_sub(OptContext *ctx, TCGOp *op)
2803 {
2804 if (fold_const2(ctx, op) ||
2805 fold_xx_to_i(ctx, op, 0) ||
2806 fold_xi_to_x(ctx, op, 0) ||
2807 fold_sub_to_neg(ctx, op)) {
2808 return true;
2809 }
2810
2811 /* Fold sub r,x,i to add r,x,-i */
2812 if (arg_is_const(op->args[2])) {
2813 uint64_t val = arg_const_val(op->args[2]);
2814
2815 op->opc = INDEX_op_add;
2816 op->args[2] = arg_new_constant(ctx, -val);
2817 }
2818 return finish_folding(ctx, op);
2819 }
2820
squash_prev_borrowout(OptContext * ctx,TCGOp * op)2821 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2822 {
2823 TempOptInfo *t2;
2824
2825 op = QTAILQ_PREV(op, link);
2826 switch (op->opc) {
2827 case INDEX_op_subbo:
2828 op->opc = INDEX_op_sub;
2829 fold_sub(ctx, op);
2830 break;
2831 case INDEX_op_subbio:
2832 op->opc = INDEX_op_subbi;
2833 break;
2834 case INDEX_op_subb1o:
2835 t2 = arg_info(op->args[2]);
2836 if (ti_is_const(t2)) {
2837 op->opc = INDEX_op_add;
2838 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2839 /* Perform other constant folding, if needed. */
2840 fold_add(ctx, op);
2841 } else {
2842 TCGArg ret = op->args[0];
2843 op->opc = INDEX_op_sub;
2844 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2845 op->args[0] = ret;
2846 op->args[1] = ret;
2847 op->args[2] = arg_new_constant(ctx, -1);
2848 }
2849 break;
2850 default:
2851 g_assert_not_reached();
2852 }
2853 }
2854
fold_subbi(OptContext * ctx,TCGOp * op)2855 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2856 {
2857 TempOptInfo *t2;
2858 int borrow_in = ctx->carry_state;
2859
2860 if (borrow_in < 0) {
2861 return finish_folding(ctx, op);
2862 }
2863 ctx->carry_state = -1;
2864
2865 squash_prev_borrowout(ctx, op);
2866 if (borrow_in == 0) {
2867 op->opc = INDEX_op_sub;
2868 return fold_sub(ctx, op);
2869 }
2870
2871 /*
2872 * Propagate the known carry-in into any constant, then negate to
2873 * transform from sub to add. If there is no constant, emit a
2874 * separate add -1.
2875 */
2876 t2 = arg_info(op->args[2]);
2877 if (ti_is_const(t2)) {
2878 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2879 } else {
2880 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2881
2882 op2->args[0] = op->args[0];
2883 op2->args[1] = op->args[1];
2884 op2->args[2] = op->args[2];
2885 fold_sub(ctx, op2);
2886
2887 op->args[1] = op->args[0];
2888 op->args[2] = arg_new_constant(ctx, -1);
2889 }
2890 op->opc = INDEX_op_add;
2891 return fold_add(ctx, op);
2892 }
2893
fold_subbio(OptContext * ctx,TCGOp * op)2894 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2895 {
2896 TempOptInfo *t1, *t2;
2897 int borrow_out = -1;
2898
2899 if (ctx->carry_state < 0) {
2900 return finish_folding(ctx, op);
2901 }
2902
2903 squash_prev_borrowout(ctx, op);
2904 if (ctx->carry_state == 0) {
2905 goto do_subbo;
2906 }
2907
2908 t1 = arg_info(op->args[1]);
2909 t2 = arg_info(op->args[2]);
2910
2911 /* Propagate the known borrow-in into a constant, if possible. */
2912 if (ti_is_const(t2)) {
2913 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2914 uint64_t v = ti_const_val(t2) & max;
2915
2916 if (v < max) {
2917 op->args[2] = arg_new_constant(ctx, v + 1);
2918 goto do_subbo;
2919 }
2920 /* subtracting max + 1 produces known borrow out. */
2921 borrow_out = 1;
2922 }
2923 if (ti_is_const(t1)) {
2924 uint64_t v = ti_const_val(t1);
2925 if (v != 0) {
2926 op->args[2] = arg_new_constant(ctx, v - 1);
2927 goto do_subbo;
2928 }
2929 }
2930
2931 /* Adjust the opcode to remember the known carry-in. */
2932 op->opc = INDEX_op_subb1o;
2933 ctx->carry_state = borrow_out;
2934 return finish_folding(ctx, op);
2935
2936 do_subbo:
2937 op->opc = INDEX_op_subbo;
2938 return fold_subbo(ctx, op);
2939 }
2940
fold_subbo(OptContext * ctx,TCGOp * op)2941 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2942 {
2943 TempOptInfo *t1 = arg_info(op->args[1]);
2944 TempOptInfo *t2 = arg_info(op->args[2]);
2945 int borrow_out = -1;
2946
2947 if (ti_is_const(t2)) {
2948 uint64_t v2 = ti_const_val(t2);
2949 if (v2 == 0) {
2950 borrow_out = 0;
2951 } else if (ti_is_const(t1)) {
2952 uint64_t v1 = ti_const_val(t1);
2953 borrow_out = v1 < v2;
2954 }
2955 }
2956 ctx->carry_state = borrow_out;
2957 return finish_folding(ctx, op);
2958 }
2959
fold_tcg_ld(OptContext * ctx,TCGOp * op)2960 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2961 {
2962 uint64_t z_mask = -1, s_mask = 0;
2963
2964 /* We can't do any folding with a load, but we can record bits. */
2965 switch (op->opc) {
2966 case INDEX_op_ld8s:
2967 s_mask = INT8_MIN;
2968 break;
2969 case INDEX_op_ld8u:
2970 z_mask = MAKE_64BIT_MASK(0, 8);
2971 break;
2972 case INDEX_op_ld16s:
2973 s_mask = INT16_MIN;
2974 break;
2975 case INDEX_op_ld16u:
2976 z_mask = MAKE_64BIT_MASK(0, 16);
2977 break;
2978 case INDEX_op_ld32s:
2979 s_mask = INT32_MIN;
2980 break;
2981 case INDEX_op_ld32u:
2982 z_mask = MAKE_64BIT_MASK(0, 32);
2983 break;
2984 default:
2985 g_assert_not_reached();
2986 }
2987 return fold_masks_zs(ctx, op, z_mask, s_mask);
2988 }
2989
fold_tcg_ld_memcopy(OptContext * ctx,TCGOp * op)2990 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2991 {
2992 TCGTemp *dst, *src;
2993 intptr_t ofs;
2994 TCGType type;
2995
2996 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2997 return finish_folding(ctx, op);
2998 }
2999
3000 type = ctx->type;
3001 ofs = op->args[2];
3002 dst = arg_temp(op->args[0]);
3003 src = find_mem_copy_for(ctx, type, ofs);
3004 if (src && src->base_type == type) {
3005 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3006 }
3007
3008 reset_ts(ctx, dst);
3009 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3010 return true;
3011 }
3012
fold_tcg_st(OptContext * ctx,TCGOp * op)3013 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3014 {
3015 intptr_t ofs = op->args[2];
3016 intptr_t lm1;
3017
3018 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3019 remove_mem_copy_all(ctx);
3020 return true;
3021 }
3022
3023 switch (op->opc) {
3024 case INDEX_op_st8:
3025 lm1 = 0;
3026 break;
3027 case INDEX_op_st16:
3028 lm1 = 1;
3029 break;
3030 case INDEX_op_st32:
3031 lm1 = 3;
3032 break;
3033 case INDEX_op_st:
3034 case INDEX_op_st_vec:
3035 lm1 = tcg_type_size(ctx->type) - 1;
3036 break;
3037 default:
3038 g_assert_not_reached();
3039 }
3040 remove_mem_copy_in(ctx, ofs, ofs + lm1);
3041 return true;
3042 }
3043
fold_tcg_st_memcopy(OptContext * ctx,TCGOp * op)3044 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3045 {
3046 TCGTemp *src;
3047 intptr_t ofs, last;
3048 TCGType type;
3049
3050 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3051 return fold_tcg_st(ctx, op);
3052 }
3053
3054 src = arg_temp(op->args[0]);
3055 ofs = op->args[2];
3056 type = ctx->type;
3057
3058 /*
3059 * Eliminate duplicate stores of a constant.
3060 * This happens frequently when the target ISA zero-extends.
3061 */
3062 if (ts_is_const(src)) {
3063 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3064 if (src == prev) {
3065 tcg_op_remove(ctx->tcg, op);
3066 return true;
3067 }
3068 }
3069
3070 last = ofs + tcg_type_size(type) - 1;
3071 remove_mem_copy_in(ctx, ofs, last);
3072 record_mem_copy(ctx, type, src, ofs, last);
3073 return true;
3074 }
3075
fold_xor(OptContext * ctx,TCGOp * op)3076 static bool fold_xor(OptContext *ctx, TCGOp *op)
3077 {
3078 uint64_t z_mask, o_mask, s_mask;
3079 TempOptInfo *t1, *t2;
3080
3081 if (fold_const2_commutative(ctx, op) ||
3082 fold_xx_to_i(ctx, op, 0) ||
3083 fold_xi_to_x(ctx, op, 0) ||
3084 fold_xi_to_not(ctx, op, -1)) {
3085 return true;
3086 }
3087
3088 t1 = arg_info(op->args[1]);
3089 t2 = arg_info(op->args[2]);
3090
3091 z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3092 o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
3093 s_mask = t1->s_mask & t2->s_mask;
3094
3095 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
3096 }
3097
3098 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)3099 void tcg_optimize(TCGContext *s)
3100 {
3101 int nb_temps, i;
3102 TCGOp *op, *op_next;
3103 OptContext ctx = { .tcg = s };
3104
3105 QSIMPLEQ_INIT(&ctx.mem_free);
3106
3107 /* Array VALS has an element for each temp.
3108 If this temp holds a constant then its value is kept in VALS' element.
3109 If this temp is a copy of other ones then the other copies are
3110 available through the doubly linked circular list. */
3111
3112 nb_temps = s->nb_temps;
3113 for (i = 0; i < nb_temps; ++i) {
3114 s->temps[i].state_ptr = NULL;
3115 }
3116
3117 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3118 TCGOpcode opc = op->opc;
3119 const TCGOpDef *def;
3120 bool done = false;
3121
3122 /* Calls are special. */
3123 if (opc == INDEX_op_call) {
3124 fold_call(&ctx, op);
3125 continue;
3126 }
3127
3128 def = &tcg_op_defs[opc];
3129 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3130 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3131
3132 /* Pre-compute the type of the operation. */
3133 ctx.type = TCGOP_TYPE(op);
3134
3135 /*
3136 * Process each opcode.
3137 * Sorted alphabetically by opcode as much as possible.
3138 */
3139 switch (opc) {
3140 case INDEX_op_add:
3141 done = fold_add(&ctx, op);
3142 break;
3143 case INDEX_op_add_vec:
3144 done = fold_add_vec(&ctx, op);
3145 break;
3146 case INDEX_op_addci:
3147 done = fold_addci(&ctx, op);
3148 break;
3149 case INDEX_op_addcio:
3150 done = fold_addcio(&ctx, op);
3151 break;
3152 case INDEX_op_addco:
3153 done = fold_addco(&ctx, op);
3154 break;
3155 case INDEX_op_and:
3156 case INDEX_op_and_vec:
3157 done = fold_and(&ctx, op);
3158 break;
3159 case INDEX_op_andc:
3160 case INDEX_op_andc_vec:
3161 done = fold_andc(&ctx, op);
3162 break;
3163 case INDEX_op_brcond:
3164 done = fold_brcond(&ctx, op);
3165 break;
3166 case INDEX_op_brcond2_i32:
3167 done = fold_brcond2(&ctx, op);
3168 break;
3169 case INDEX_op_bswap16:
3170 case INDEX_op_bswap32:
3171 case INDEX_op_bswap64:
3172 done = fold_bswap(&ctx, op);
3173 break;
3174 case INDEX_op_clz:
3175 case INDEX_op_ctz:
3176 done = fold_count_zeros(&ctx, op);
3177 break;
3178 case INDEX_op_ctpop:
3179 done = fold_ctpop(&ctx, op);
3180 break;
3181 case INDEX_op_deposit:
3182 done = fold_deposit(&ctx, op);
3183 break;
3184 case INDEX_op_divs:
3185 case INDEX_op_divu:
3186 done = fold_divide(&ctx, op);
3187 break;
3188 case INDEX_op_dup_vec:
3189 done = fold_dup(&ctx, op);
3190 break;
3191 case INDEX_op_dup2_vec:
3192 done = fold_dup2(&ctx, op);
3193 break;
3194 case INDEX_op_eqv:
3195 case INDEX_op_eqv_vec:
3196 done = fold_eqv(&ctx, op);
3197 break;
3198 case INDEX_op_extract:
3199 done = fold_extract(&ctx, op);
3200 break;
3201 case INDEX_op_extract2:
3202 done = fold_extract2(&ctx, op);
3203 break;
3204 case INDEX_op_ext_i32_i64:
3205 done = fold_exts(&ctx, op);
3206 break;
3207 case INDEX_op_extu_i32_i64:
3208 case INDEX_op_extrl_i64_i32:
3209 case INDEX_op_extrh_i64_i32:
3210 done = fold_extu(&ctx, op);
3211 break;
3212 case INDEX_op_ld8s:
3213 case INDEX_op_ld8u:
3214 case INDEX_op_ld16s:
3215 case INDEX_op_ld16u:
3216 case INDEX_op_ld32s:
3217 case INDEX_op_ld32u:
3218 done = fold_tcg_ld(&ctx, op);
3219 break;
3220 case INDEX_op_ld:
3221 case INDEX_op_ld_vec:
3222 done = fold_tcg_ld_memcopy(&ctx, op);
3223 break;
3224 case INDEX_op_st8:
3225 case INDEX_op_st16:
3226 case INDEX_op_st32:
3227 done = fold_tcg_st(&ctx, op);
3228 break;
3229 case INDEX_op_st:
3230 case INDEX_op_st_vec:
3231 done = fold_tcg_st_memcopy(&ctx, op);
3232 break;
3233 case INDEX_op_mb:
3234 done = fold_mb(&ctx, op);
3235 break;
3236 case INDEX_op_mov:
3237 case INDEX_op_mov_vec:
3238 done = fold_mov(&ctx, op);
3239 break;
3240 case INDEX_op_movcond:
3241 done = fold_movcond(&ctx, op);
3242 break;
3243 case INDEX_op_mul:
3244 done = fold_mul(&ctx, op);
3245 break;
3246 case INDEX_op_mulsh:
3247 case INDEX_op_muluh:
3248 done = fold_mul_highpart(&ctx, op);
3249 break;
3250 case INDEX_op_muls2:
3251 case INDEX_op_mulu2:
3252 done = fold_multiply2(&ctx, op);
3253 break;
3254 case INDEX_op_nand:
3255 case INDEX_op_nand_vec:
3256 done = fold_nand(&ctx, op);
3257 break;
3258 case INDEX_op_neg:
3259 done = fold_neg(&ctx, op);
3260 break;
3261 case INDEX_op_nor:
3262 case INDEX_op_nor_vec:
3263 done = fold_nor(&ctx, op);
3264 break;
3265 case INDEX_op_not:
3266 case INDEX_op_not_vec:
3267 done = fold_not(&ctx, op);
3268 break;
3269 case INDEX_op_or:
3270 case INDEX_op_or_vec:
3271 done = fold_or(&ctx, op);
3272 break;
3273 case INDEX_op_orc:
3274 case INDEX_op_orc_vec:
3275 done = fold_orc(&ctx, op);
3276 break;
3277 case INDEX_op_qemu_ld:
3278 done = fold_qemu_ld_1reg(&ctx, op);
3279 break;
3280 case INDEX_op_qemu_ld2:
3281 done = fold_qemu_ld_2reg(&ctx, op);
3282 break;
3283 case INDEX_op_qemu_st:
3284 case INDEX_op_qemu_st2:
3285 done = fold_qemu_st(&ctx, op);
3286 break;
3287 case INDEX_op_rems:
3288 case INDEX_op_remu:
3289 done = fold_remainder(&ctx, op);
3290 break;
3291 case INDEX_op_rotl:
3292 case INDEX_op_rotr:
3293 case INDEX_op_sar:
3294 case INDEX_op_shl:
3295 case INDEX_op_shr:
3296 done = fold_shift(&ctx, op);
3297 break;
3298 case INDEX_op_setcond:
3299 done = fold_setcond(&ctx, op);
3300 break;
3301 case INDEX_op_negsetcond:
3302 done = fold_negsetcond(&ctx, op);
3303 break;
3304 case INDEX_op_setcond2_i32:
3305 done = fold_setcond2(&ctx, op);
3306 break;
3307 case INDEX_op_cmp_vec:
3308 done = fold_cmp_vec(&ctx, op);
3309 break;
3310 case INDEX_op_cmpsel_vec:
3311 done = fold_cmpsel_vec(&ctx, op);
3312 break;
3313 case INDEX_op_bitsel_vec:
3314 done = fold_bitsel_vec(&ctx, op);
3315 break;
3316 case INDEX_op_sextract:
3317 done = fold_sextract(&ctx, op);
3318 break;
3319 case INDEX_op_sub:
3320 done = fold_sub(&ctx, op);
3321 break;
3322 case INDEX_op_subbi:
3323 done = fold_subbi(&ctx, op);
3324 break;
3325 case INDEX_op_subbio:
3326 done = fold_subbio(&ctx, op);
3327 break;
3328 case INDEX_op_subbo:
3329 done = fold_subbo(&ctx, op);
3330 break;
3331 case INDEX_op_sub_vec:
3332 done = fold_sub_vec(&ctx, op);
3333 break;
3334 case INDEX_op_xor:
3335 case INDEX_op_xor_vec:
3336 done = fold_xor(&ctx, op);
3337 break;
3338 case INDEX_op_set_label:
3339 case INDEX_op_br:
3340 case INDEX_op_exit_tb:
3341 case INDEX_op_goto_tb:
3342 case INDEX_op_goto_ptr:
3343 finish_ebb(&ctx);
3344 done = true;
3345 break;
3346 default:
3347 done = finish_folding(&ctx, op);
3348 break;
3349 }
3350 tcg_debug_assert(done);
3351 }
3352 }
3353