1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32
33 #define CASE_OP_32_64(x) \
34 glue(glue(case INDEX_op_, x), _i32): \
35 glue(glue(case INDEX_op_, x), _i64)
36
37 #define CASE_OP_32_64_VEC(x) \
38 glue(glue(case INDEX_op_, x), _i32): \
39 glue(glue(case INDEX_op_, x), _i64): \
40 glue(glue(case INDEX_op_, x), _vec)
41
42 typedef struct MemCopyInfo {
43 IntervalTreeNode itree;
44 QSIMPLEQ_ENTRY (MemCopyInfo) next;
45 TCGTemp *ts;
46 TCGType type;
47 } MemCopyInfo;
48
49 typedef struct TempOptInfo {
50 bool is_const;
51 TCGTemp *prev_copy;
52 TCGTemp *next_copy;
53 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
54 uint64_t val;
55 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
56 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
57 } TempOptInfo;
58
59 typedef struct OptContext {
60 TCGContext *tcg;
61 TCGOp *prev_mb;
62 TCGTempSet temps_used;
63
64 IntervalTreeRoot mem_copy;
65 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
66
67 /* In flight values from optimization. */
68 TCGType type;
69 } OptContext;
70
ts_info(TCGTemp * ts)71 static inline TempOptInfo *ts_info(TCGTemp *ts)
72 {
73 return ts->state_ptr;
74 }
75
arg_info(TCGArg arg)76 static inline TempOptInfo *arg_info(TCGArg arg)
77 {
78 return ts_info(arg_temp(arg));
79 }
80
ti_is_const(TempOptInfo * ti)81 static inline bool ti_is_const(TempOptInfo *ti)
82 {
83 return ti->is_const;
84 }
85
ti_const_val(TempOptInfo * ti)86 static inline uint64_t ti_const_val(TempOptInfo *ti)
87 {
88 return ti->val;
89 }
90
ti_is_const_val(TempOptInfo * ti,uint64_t val)91 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
92 {
93 return ti_is_const(ti) && ti_const_val(ti) == val;
94 }
95
ts_is_const(TCGTemp * ts)96 static inline bool ts_is_const(TCGTemp *ts)
97 {
98 return ti_is_const(ts_info(ts));
99 }
100
ts_is_const_val(TCGTemp * ts,uint64_t val)101 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
102 {
103 return ti_is_const_val(ts_info(ts), val);
104 }
105
arg_is_const(TCGArg arg)106 static inline bool arg_is_const(TCGArg arg)
107 {
108 return ts_is_const(arg_temp(arg));
109 }
110
arg_is_const_val(TCGArg arg,uint64_t val)111 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
112 {
113 return ts_is_const_val(arg_temp(arg), val);
114 }
115
ts_is_copy(TCGTemp * ts)116 static inline bool ts_is_copy(TCGTemp *ts)
117 {
118 return ts_info(ts)->next_copy != ts;
119 }
120
cmp_better_copy(TCGTemp * a,TCGTemp * b)121 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
122 {
123 return a->kind < b->kind ? b : a;
124 }
125
126 /* Initialize and activate a temporary. */
init_ts_info(OptContext * ctx,TCGTemp * ts)127 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
128 {
129 size_t idx = temp_idx(ts);
130 TempOptInfo *ti;
131
132 if (test_bit(idx, ctx->temps_used.l)) {
133 return;
134 }
135 set_bit(idx, ctx->temps_used.l);
136
137 ti = ts->state_ptr;
138 if (ti == NULL) {
139 ti = tcg_malloc(sizeof(TempOptInfo));
140 ts->state_ptr = ti;
141 }
142
143 ti->next_copy = ts;
144 ti->prev_copy = ts;
145 QSIMPLEQ_INIT(&ti->mem_copy);
146 if (ts->kind == TEMP_CONST) {
147 ti->is_const = true;
148 ti->val = ts->val;
149 ti->z_mask = ts->val;
150 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
151 } else {
152 ti->is_const = false;
153 ti->z_mask = -1;
154 ti->s_mask = 0;
155 }
156 }
157
mem_copy_first(OptContext * ctx,intptr_t s,intptr_t l)158 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
159 {
160 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
161 return r ? container_of(r, MemCopyInfo, itree) : NULL;
162 }
163
mem_copy_next(MemCopyInfo * mem,intptr_t s,intptr_t l)164 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
165 {
166 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
167 return r ? container_of(r, MemCopyInfo, itree) : NULL;
168 }
169
remove_mem_copy(OptContext * ctx,MemCopyInfo * mc)170 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
171 {
172 TCGTemp *ts = mc->ts;
173 TempOptInfo *ti = ts_info(ts);
174
175 interval_tree_remove(&mc->itree, &ctx->mem_copy);
176 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
177 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
178 }
179
remove_mem_copy_in(OptContext * ctx,intptr_t s,intptr_t l)180 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
181 {
182 while (true) {
183 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
184 if (!mc) {
185 break;
186 }
187 remove_mem_copy(ctx, mc);
188 }
189 }
190
remove_mem_copy_all(OptContext * ctx)191 static void remove_mem_copy_all(OptContext *ctx)
192 {
193 remove_mem_copy_in(ctx, 0, -1);
194 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
195 }
196
find_better_copy(TCGTemp * ts)197 static TCGTemp *find_better_copy(TCGTemp *ts)
198 {
199 TCGTemp *i, *ret;
200
201 /* If this is already readonly, we can't do better. */
202 if (temp_readonly(ts)) {
203 return ts;
204 }
205
206 ret = ts;
207 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
208 ret = cmp_better_copy(ret, i);
209 }
210 return ret;
211 }
212
move_mem_copies(TCGTemp * dst_ts,TCGTemp * src_ts)213 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
214 {
215 TempOptInfo *si = ts_info(src_ts);
216 TempOptInfo *di = ts_info(dst_ts);
217 MemCopyInfo *mc;
218
219 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
220 tcg_debug_assert(mc->ts == src_ts);
221 mc->ts = dst_ts;
222 }
223 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
224 }
225
226 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
reset_ts(OptContext * ctx,TCGTemp * ts)227 static void reset_ts(OptContext *ctx, TCGTemp *ts)
228 {
229 TempOptInfo *ti = ts_info(ts);
230 TCGTemp *pts = ti->prev_copy;
231 TCGTemp *nts = ti->next_copy;
232 TempOptInfo *pi = ts_info(pts);
233 TempOptInfo *ni = ts_info(nts);
234
235 ni->prev_copy = ti->prev_copy;
236 pi->next_copy = ti->next_copy;
237 ti->next_copy = ts;
238 ti->prev_copy = ts;
239 ti->is_const = false;
240 ti->z_mask = -1;
241 ti->s_mask = 0;
242
243 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
244 if (ts == nts) {
245 /* Last temp copy being removed, the mem copies die. */
246 MemCopyInfo *mc;
247 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
248 interval_tree_remove(&mc->itree, &ctx->mem_copy);
249 }
250 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
251 } else {
252 move_mem_copies(find_better_copy(nts), ts);
253 }
254 }
255 }
256
reset_temp(OptContext * ctx,TCGArg arg)257 static void reset_temp(OptContext *ctx, TCGArg arg)
258 {
259 reset_ts(ctx, arg_temp(arg));
260 }
261
record_mem_copy(OptContext * ctx,TCGType type,TCGTemp * ts,intptr_t start,intptr_t last)262 static void record_mem_copy(OptContext *ctx, TCGType type,
263 TCGTemp *ts, intptr_t start, intptr_t last)
264 {
265 MemCopyInfo *mc;
266 TempOptInfo *ti;
267
268 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
269 if (mc) {
270 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
271 } else {
272 mc = tcg_malloc(sizeof(*mc));
273 }
274
275 memset(mc, 0, sizeof(*mc));
276 mc->itree.start = start;
277 mc->itree.last = last;
278 mc->type = type;
279 interval_tree_insert(&mc->itree, &ctx->mem_copy);
280
281 ts = find_better_copy(ts);
282 ti = ts_info(ts);
283 mc->ts = ts;
284 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
285 }
286
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)287 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
288 {
289 TCGTemp *i;
290
291 if (ts1 == ts2) {
292 return true;
293 }
294
295 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
296 return false;
297 }
298
299 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
300 if (i == ts2) {
301 return true;
302 }
303 }
304
305 return false;
306 }
307
args_are_copies(TCGArg arg1,TCGArg arg2)308 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
309 {
310 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
311 }
312
find_mem_copy_for(OptContext * ctx,TCGType type,intptr_t s)313 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
314 {
315 MemCopyInfo *mc;
316
317 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
318 if (mc->itree.start == s && mc->type == type) {
319 return find_better_copy(mc->ts);
320 }
321 }
322 return NULL;
323 }
324
arg_new_constant(OptContext * ctx,uint64_t val)325 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
326 {
327 TCGType type = ctx->type;
328 TCGTemp *ts;
329
330 if (type == TCG_TYPE_I32) {
331 val = (int32_t)val;
332 }
333
334 ts = tcg_constant_internal(type, val);
335 init_ts_info(ctx, ts);
336
337 return temp_arg(ts);
338 }
339
arg_new_temp(OptContext * ctx)340 static TCGArg arg_new_temp(OptContext *ctx)
341 {
342 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
343 init_ts_info(ctx, ts);
344 return temp_arg(ts);
345 }
346
tcg_opt_gen_mov(OptContext * ctx,TCGOp * op,TCGArg dst,TCGArg src)347 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
348 {
349 TCGTemp *dst_ts = arg_temp(dst);
350 TCGTemp *src_ts = arg_temp(src);
351 TempOptInfo *di;
352 TempOptInfo *si;
353 TCGOpcode new_op;
354
355 if (ts_are_copies(dst_ts, src_ts)) {
356 tcg_op_remove(ctx->tcg, op);
357 return true;
358 }
359
360 reset_ts(ctx, dst_ts);
361 di = ts_info(dst_ts);
362 si = ts_info(src_ts);
363
364 switch (ctx->type) {
365 case TCG_TYPE_I32:
366 new_op = INDEX_op_mov_i32;
367 break;
368 case TCG_TYPE_I64:
369 new_op = INDEX_op_mov_i64;
370 break;
371 case TCG_TYPE_V64:
372 case TCG_TYPE_V128:
373 case TCG_TYPE_V256:
374 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
375 new_op = INDEX_op_mov_vec;
376 break;
377 default:
378 g_assert_not_reached();
379 }
380 op->opc = new_op;
381 op->args[0] = dst;
382 op->args[1] = src;
383
384 di->z_mask = si->z_mask;
385 di->s_mask = si->s_mask;
386
387 if (src_ts->type == dst_ts->type) {
388 TempOptInfo *ni = ts_info(si->next_copy);
389
390 di->next_copy = si->next_copy;
391 di->prev_copy = src_ts;
392 ni->prev_copy = dst_ts;
393 si->next_copy = dst_ts;
394 di->is_const = si->is_const;
395 di->val = si->val;
396
397 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
398 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
399 move_mem_copies(dst_ts, src_ts);
400 }
401 }
402 return true;
403 }
404
tcg_opt_gen_movi(OptContext * ctx,TCGOp * op,TCGArg dst,uint64_t val)405 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
406 TCGArg dst, uint64_t val)
407 {
408 /* Convert movi to mov with constant temp. */
409 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
410 }
411
do_constant_folding_2(TCGOpcode op,uint64_t x,uint64_t y)412 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
413 {
414 uint64_t l64, h64;
415
416 switch (op) {
417 CASE_OP_32_64(add):
418 return x + y;
419
420 CASE_OP_32_64(sub):
421 return x - y;
422
423 CASE_OP_32_64(mul):
424 return x * y;
425
426 CASE_OP_32_64_VEC(and):
427 return x & y;
428
429 CASE_OP_32_64_VEC(or):
430 return x | y;
431
432 CASE_OP_32_64_VEC(xor):
433 return x ^ y;
434
435 case INDEX_op_shl_i32:
436 return (uint32_t)x << (y & 31);
437
438 case INDEX_op_shl_i64:
439 return (uint64_t)x << (y & 63);
440
441 case INDEX_op_shr_i32:
442 return (uint32_t)x >> (y & 31);
443
444 case INDEX_op_shr_i64:
445 return (uint64_t)x >> (y & 63);
446
447 case INDEX_op_sar_i32:
448 return (int32_t)x >> (y & 31);
449
450 case INDEX_op_sar_i64:
451 return (int64_t)x >> (y & 63);
452
453 case INDEX_op_rotr_i32:
454 return ror32(x, y & 31);
455
456 case INDEX_op_rotr_i64:
457 return ror64(x, y & 63);
458
459 case INDEX_op_rotl_i32:
460 return rol32(x, y & 31);
461
462 case INDEX_op_rotl_i64:
463 return rol64(x, y & 63);
464
465 CASE_OP_32_64_VEC(not):
466 return ~x;
467
468 CASE_OP_32_64(neg):
469 return -x;
470
471 CASE_OP_32_64_VEC(andc):
472 return x & ~y;
473
474 CASE_OP_32_64_VEC(orc):
475 return x | ~y;
476
477 CASE_OP_32_64_VEC(eqv):
478 return ~(x ^ y);
479
480 CASE_OP_32_64_VEC(nand):
481 return ~(x & y);
482
483 CASE_OP_32_64_VEC(nor):
484 return ~(x | y);
485
486 case INDEX_op_clz_i32:
487 return (uint32_t)x ? clz32(x) : y;
488
489 case INDEX_op_clz_i64:
490 return x ? clz64(x) : y;
491
492 case INDEX_op_ctz_i32:
493 return (uint32_t)x ? ctz32(x) : y;
494
495 case INDEX_op_ctz_i64:
496 return x ? ctz64(x) : y;
497
498 case INDEX_op_ctpop_i32:
499 return ctpop32(x);
500
501 case INDEX_op_ctpop_i64:
502 return ctpop64(x);
503
504 CASE_OP_32_64(ext8s):
505 return (int8_t)x;
506
507 CASE_OP_32_64(ext16s):
508 return (int16_t)x;
509
510 CASE_OP_32_64(ext8u):
511 return (uint8_t)x;
512
513 CASE_OP_32_64(ext16u):
514 return (uint16_t)x;
515
516 CASE_OP_32_64(bswap16):
517 x = bswap16(x);
518 return y & TCG_BSWAP_OS ? (int16_t)x : x;
519
520 CASE_OP_32_64(bswap32):
521 x = bswap32(x);
522 return y & TCG_BSWAP_OS ? (int32_t)x : x;
523
524 case INDEX_op_bswap64_i64:
525 return bswap64(x);
526
527 case INDEX_op_ext_i32_i64:
528 case INDEX_op_ext32s_i64:
529 return (int32_t)x;
530
531 case INDEX_op_extu_i32_i64:
532 case INDEX_op_extrl_i64_i32:
533 case INDEX_op_ext32u_i64:
534 return (uint32_t)x;
535
536 case INDEX_op_extrh_i64_i32:
537 return (uint64_t)x >> 32;
538
539 case INDEX_op_muluh_i32:
540 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
541 case INDEX_op_mulsh_i32:
542 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
543
544 case INDEX_op_muluh_i64:
545 mulu64(&l64, &h64, x, y);
546 return h64;
547 case INDEX_op_mulsh_i64:
548 muls64(&l64, &h64, x, y);
549 return h64;
550
551 case INDEX_op_div_i32:
552 /* Avoid crashing on divide by zero, otherwise undefined. */
553 return (int32_t)x / ((int32_t)y ? : 1);
554 case INDEX_op_divu_i32:
555 return (uint32_t)x / ((uint32_t)y ? : 1);
556 case INDEX_op_div_i64:
557 return (int64_t)x / ((int64_t)y ? : 1);
558 case INDEX_op_divu_i64:
559 return (uint64_t)x / ((uint64_t)y ? : 1);
560
561 case INDEX_op_rem_i32:
562 return (int32_t)x % ((int32_t)y ? : 1);
563 case INDEX_op_remu_i32:
564 return (uint32_t)x % ((uint32_t)y ? : 1);
565 case INDEX_op_rem_i64:
566 return (int64_t)x % ((int64_t)y ? : 1);
567 case INDEX_op_remu_i64:
568 return (uint64_t)x % ((uint64_t)y ? : 1);
569
570 default:
571 g_assert_not_reached();
572 }
573 }
574
do_constant_folding(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)575 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
576 uint64_t x, uint64_t y)
577 {
578 uint64_t res = do_constant_folding_2(op, x, y);
579 if (type == TCG_TYPE_I32) {
580 res = (int32_t)res;
581 }
582 return res;
583 }
584
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)585 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
586 {
587 switch (c) {
588 case TCG_COND_EQ:
589 return x == y;
590 case TCG_COND_NE:
591 return x != y;
592 case TCG_COND_LT:
593 return (int32_t)x < (int32_t)y;
594 case TCG_COND_GE:
595 return (int32_t)x >= (int32_t)y;
596 case TCG_COND_LE:
597 return (int32_t)x <= (int32_t)y;
598 case TCG_COND_GT:
599 return (int32_t)x > (int32_t)y;
600 case TCG_COND_LTU:
601 return x < y;
602 case TCG_COND_GEU:
603 return x >= y;
604 case TCG_COND_LEU:
605 return x <= y;
606 case TCG_COND_GTU:
607 return x > y;
608 case TCG_COND_TSTEQ:
609 return (x & y) == 0;
610 case TCG_COND_TSTNE:
611 return (x & y) != 0;
612 case TCG_COND_ALWAYS:
613 case TCG_COND_NEVER:
614 break;
615 }
616 g_assert_not_reached();
617 }
618
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)619 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
620 {
621 switch (c) {
622 case TCG_COND_EQ:
623 return x == y;
624 case TCG_COND_NE:
625 return x != y;
626 case TCG_COND_LT:
627 return (int64_t)x < (int64_t)y;
628 case TCG_COND_GE:
629 return (int64_t)x >= (int64_t)y;
630 case TCG_COND_LE:
631 return (int64_t)x <= (int64_t)y;
632 case TCG_COND_GT:
633 return (int64_t)x > (int64_t)y;
634 case TCG_COND_LTU:
635 return x < y;
636 case TCG_COND_GEU:
637 return x >= y;
638 case TCG_COND_LEU:
639 return x <= y;
640 case TCG_COND_GTU:
641 return x > y;
642 case TCG_COND_TSTEQ:
643 return (x & y) == 0;
644 case TCG_COND_TSTNE:
645 return (x & y) != 0;
646 case TCG_COND_ALWAYS:
647 case TCG_COND_NEVER:
648 break;
649 }
650 g_assert_not_reached();
651 }
652
do_constant_folding_cond_eq(TCGCond c)653 static int do_constant_folding_cond_eq(TCGCond c)
654 {
655 switch (c) {
656 case TCG_COND_GT:
657 case TCG_COND_LTU:
658 case TCG_COND_LT:
659 case TCG_COND_GTU:
660 case TCG_COND_NE:
661 return 0;
662 case TCG_COND_GE:
663 case TCG_COND_GEU:
664 case TCG_COND_LE:
665 case TCG_COND_LEU:
666 case TCG_COND_EQ:
667 return 1;
668 case TCG_COND_TSTEQ:
669 case TCG_COND_TSTNE:
670 return -1;
671 case TCG_COND_ALWAYS:
672 case TCG_COND_NEVER:
673 break;
674 }
675 g_assert_not_reached();
676 }
677
678 /*
679 * Return -1 if the condition can't be simplified,
680 * and the result of the condition (0 or 1) if it can.
681 */
do_constant_folding_cond(TCGType type,TCGArg x,TCGArg y,TCGCond c)682 static int do_constant_folding_cond(TCGType type, TCGArg x,
683 TCGArg y, TCGCond c)
684 {
685 if (arg_is_const(x) && arg_is_const(y)) {
686 uint64_t xv = arg_info(x)->val;
687 uint64_t yv = arg_info(y)->val;
688
689 switch (type) {
690 case TCG_TYPE_I32:
691 return do_constant_folding_cond_32(xv, yv, c);
692 case TCG_TYPE_I64:
693 return do_constant_folding_cond_64(xv, yv, c);
694 default:
695 /* Only scalar comparisons are optimizable */
696 return -1;
697 }
698 } else if (args_are_copies(x, y)) {
699 return do_constant_folding_cond_eq(c);
700 } else if (arg_is_const_val(y, 0)) {
701 switch (c) {
702 case TCG_COND_LTU:
703 case TCG_COND_TSTNE:
704 return 0;
705 case TCG_COND_GEU:
706 case TCG_COND_TSTEQ:
707 return 1;
708 default:
709 return -1;
710 }
711 }
712 return -1;
713 }
714
715 /**
716 * swap_commutative:
717 * @dest: TCGArg of the destination argument, or NO_DEST.
718 * @p1: first paired argument
719 * @p2: second paired argument
720 *
721 * If *@p1 is a constant and *@p2 is not, swap.
722 * If *@p2 matches @dest, swap.
723 * Return true if a swap was performed.
724 */
725
726 #define NO_DEST temp_arg(NULL)
727
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)728 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
729 {
730 TCGArg a1 = *p1, a2 = *p2;
731 int sum = 0;
732 sum += arg_is_const(a1);
733 sum -= arg_is_const(a2);
734
735 /* Prefer the constant in second argument, and then the form
736 op a, a, b, which is better handled on non-RISC hosts. */
737 if (sum > 0 || (sum == 0 && dest == a2)) {
738 *p1 = a2;
739 *p2 = a1;
740 return true;
741 }
742 return false;
743 }
744
swap_commutative2(TCGArg * p1,TCGArg * p2)745 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
746 {
747 int sum = 0;
748 sum += arg_is_const(p1[0]);
749 sum += arg_is_const(p1[1]);
750 sum -= arg_is_const(p2[0]);
751 sum -= arg_is_const(p2[1]);
752 if (sum > 0) {
753 TCGArg t;
754 t = p1[0], p1[0] = p2[0], p2[0] = t;
755 t = p1[1], p1[1] = p2[1], p2[1] = t;
756 return true;
757 }
758 return false;
759 }
760
761 /*
762 * Return -1 if the condition can't be simplified,
763 * and the result of the condition (0 or 1) if it can.
764 */
do_constant_folding_cond1(OptContext * ctx,TCGOp * op,TCGArg dest,TCGArg * p1,TCGArg * p2,TCGArg * pcond)765 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
766 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
767 {
768 TCGCond cond;
769 TempOptInfo *i1;
770 bool swap;
771 int r;
772
773 swap = swap_commutative(dest, p1, p2);
774 cond = *pcond;
775 if (swap) {
776 *pcond = cond = tcg_swap_cond(cond);
777 }
778
779 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
780 if (r >= 0) {
781 return r;
782 }
783 if (!is_tst_cond(cond)) {
784 return -1;
785 }
786
787 i1 = arg_info(*p1);
788
789 /*
790 * TSTNE x,x -> NE x,0
791 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
792 */
793 if (args_are_copies(*p1, *p2) ||
794 (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
795 *p2 = arg_new_constant(ctx, 0);
796 *pcond = tcg_tst_eqne_cond(cond);
797 return -1;
798 }
799
800 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
801 if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
802 *p2 = arg_new_constant(ctx, 0);
803 *pcond = tcg_tst_ltge_cond(cond);
804 return -1;
805 }
806
807 /* Expand to AND with a temporary if no backend support. */
808 if (!TCG_TARGET_HAS_tst) {
809 TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32
810 ? INDEX_op_and_i32 : INDEX_op_and_i64);
811 TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3);
812 TCGArg tmp = arg_new_temp(ctx);
813
814 op2->args[0] = tmp;
815 op2->args[1] = *p1;
816 op2->args[2] = *p2;
817
818 *p1 = tmp;
819 *p2 = arg_new_constant(ctx, 0);
820 *pcond = tcg_tst_eqne_cond(cond);
821 }
822 return -1;
823 }
824
do_constant_folding_cond2(OptContext * ctx,TCGOp * op,TCGArg * args)825 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
826 {
827 TCGArg al, ah, bl, bh;
828 TCGCond c;
829 bool swap;
830 int r;
831
832 swap = swap_commutative2(args, args + 2);
833 c = args[4];
834 if (swap) {
835 args[4] = c = tcg_swap_cond(c);
836 }
837
838 al = args[0];
839 ah = args[1];
840 bl = args[2];
841 bh = args[3];
842
843 if (arg_is_const(bl) && arg_is_const(bh)) {
844 tcg_target_ulong blv = arg_info(bl)->val;
845 tcg_target_ulong bhv = arg_info(bh)->val;
846 uint64_t b = deposit64(blv, 32, 32, bhv);
847
848 if (arg_is_const(al) && arg_is_const(ah)) {
849 tcg_target_ulong alv = arg_info(al)->val;
850 tcg_target_ulong ahv = arg_info(ah)->val;
851 uint64_t a = deposit64(alv, 32, 32, ahv);
852
853 r = do_constant_folding_cond_64(a, b, c);
854 if (r >= 0) {
855 return r;
856 }
857 }
858
859 if (b == 0) {
860 switch (c) {
861 case TCG_COND_LTU:
862 case TCG_COND_TSTNE:
863 return 0;
864 case TCG_COND_GEU:
865 case TCG_COND_TSTEQ:
866 return 1;
867 default:
868 break;
869 }
870 }
871
872 /* TSTNE x,-1 -> NE x,0 */
873 if (b == -1 && is_tst_cond(c)) {
874 args[3] = args[2] = arg_new_constant(ctx, 0);
875 args[4] = tcg_tst_eqne_cond(c);
876 return -1;
877 }
878
879 /* TSTNE x,sign -> LT x,0 */
880 if (b == INT64_MIN && is_tst_cond(c)) {
881 /* bl must be 0, so copy that to bh */
882 args[3] = bl;
883 args[4] = tcg_tst_ltge_cond(c);
884 return -1;
885 }
886 }
887
888 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
889 r = do_constant_folding_cond_eq(c);
890 if (r >= 0) {
891 return r;
892 }
893
894 /* TSTNE x,x -> NE x,0 */
895 if (is_tst_cond(c)) {
896 args[3] = args[2] = arg_new_constant(ctx, 0);
897 args[4] = tcg_tst_eqne_cond(c);
898 return -1;
899 }
900 }
901
902 /* Expand to AND with a temporary if no backend support. */
903 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
904 TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3);
905 TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3);
906 TCGArg t1 = arg_new_temp(ctx);
907 TCGArg t2 = arg_new_temp(ctx);
908
909 op1->args[0] = t1;
910 op1->args[1] = al;
911 op1->args[2] = bl;
912 op2->args[0] = t2;
913 op2->args[1] = ah;
914 op2->args[2] = bh;
915
916 args[0] = t1;
917 args[1] = t2;
918 args[3] = args[2] = arg_new_constant(ctx, 0);
919 args[4] = tcg_tst_eqne_cond(c);
920 }
921 return -1;
922 }
923
init_arguments(OptContext * ctx,TCGOp * op,int nb_args)924 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
925 {
926 for (int i = 0; i < nb_args; i++) {
927 TCGTemp *ts = arg_temp(op->args[i]);
928 init_ts_info(ctx, ts);
929 }
930 }
931
copy_propagate(OptContext * ctx,TCGOp * op,int nb_oargs,int nb_iargs)932 static void copy_propagate(OptContext *ctx, TCGOp *op,
933 int nb_oargs, int nb_iargs)
934 {
935 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
936 TCGTemp *ts = arg_temp(op->args[i]);
937 if (ts_is_copy(ts)) {
938 op->args[i] = temp_arg(find_better_copy(ts));
939 }
940 }
941 }
942
finish_bb(OptContext * ctx)943 static void finish_bb(OptContext *ctx)
944 {
945 /* We only optimize memory barriers across basic blocks. */
946 ctx->prev_mb = NULL;
947 }
948
finish_ebb(OptContext * ctx)949 static void finish_ebb(OptContext *ctx)
950 {
951 finish_bb(ctx);
952 /* We only optimize across extended basic blocks. */
953 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
954 remove_mem_copy_all(ctx);
955 }
956
finish_folding(OptContext * ctx,TCGOp * op)957 static bool finish_folding(OptContext *ctx, TCGOp *op)
958 {
959 const TCGOpDef *def = &tcg_op_defs[op->opc];
960 int i, nb_oargs;
961
962 nb_oargs = def->nb_oargs;
963 for (i = 0; i < nb_oargs; i++) {
964 TCGTemp *ts = arg_temp(op->args[i]);
965 reset_ts(ctx, ts);
966 }
967 return true;
968 }
969
970 /*
971 * The fold_* functions return true when processing is complete,
972 * usually by folding the operation to a constant or to a copy,
973 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
974 * like collect information about the value produced, for use in
975 * optimizing a subsequent operation.
976 *
977 * These first fold_* functions are all helpers, used by other
978 * folders for more specific operations.
979 */
980
fold_const1(OptContext * ctx,TCGOp * op)981 static bool fold_const1(OptContext *ctx, TCGOp *op)
982 {
983 if (arg_is_const(op->args[1])) {
984 uint64_t t;
985
986 t = arg_info(op->args[1])->val;
987 t = do_constant_folding(op->opc, ctx->type, t, 0);
988 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
989 }
990 return false;
991 }
992
fold_const2(OptContext * ctx,TCGOp * op)993 static bool fold_const2(OptContext *ctx, TCGOp *op)
994 {
995 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
996 uint64_t t1 = arg_info(op->args[1])->val;
997 uint64_t t2 = arg_info(op->args[2])->val;
998
999 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1000 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1001 }
1002 return false;
1003 }
1004
fold_commutative(OptContext * ctx,TCGOp * op)1005 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1006 {
1007 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1008 return false;
1009 }
1010
fold_const2_commutative(OptContext * ctx,TCGOp * op)1011 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1012 {
1013 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1014 return fold_const2(ctx, op);
1015 }
1016
1017 /*
1018 * Record "zero" and "sign" masks for the single output of @op.
1019 * See TempOptInfo definition of z_mask and s_mask.
1020 * If z_mask allows, fold the output to constant zero.
1021 * The passed s_mask may be augmented by z_mask.
1022 */
fold_masks_zs(OptContext * ctx,TCGOp * op,uint64_t z_mask,int64_t s_mask)1023 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1024 uint64_t z_mask, int64_t s_mask)
1025 {
1026 const TCGOpDef *def = &tcg_op_defs[op->opc];
1027 TCGTemp *ts;
1028 TempOptInfo *ti;
1029 int rep;
1030
1031 /* Only single-output opcodes are supported here. */
1032 tcg_debug_assert(def->nb_oargs == 1);
1033
1034 /*
1035 * 32-bit ops generate 32-bit results, which for the purpose of
1036 * simplifying tcg are sign-extended. Certainly that's how we
1037 * represent our constants elsewhere. Note that the bits will
1038 * be reset properly for a 64-bit value when encountering the
1039 * type changing opcodes.
1040 */
1041 if (ctx->type == TCG_TYPE_I32) {
1042 z_mask = (int32_t)z_mask;
1043 s_mask |= INT32_MIN;
1044 }
1045
1046 if (z_mask == 0) {
1047 return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
1048 }
1049
1050 ts = arg_temp(op->args[0]);
1051 reset_ts(ctx, ts);
1052
1053 ti = ts_info(ts);
1054 ti->z_mask = z_mask;
1055
1056 /* Canonicalize s_mask and incorporate data from z_mask. */
1057 rep = clz64(~s_mask);
1058 rep = MAX(rep, clz64(z_mask));
1059 rep = MAX(rep - 1, 0);
1060 ti->s_mask = INT64_MIN >> rep;
1061
1062 return true;
1063 }
1064
fold_masks_z(OptContext * ctx,TCGOp * op,uint64_t z_mask)1065 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1066 {
1067 return fold_masks_zs(ctx, op, z_mask, 0);
1068 }
1069
fold_masks_s(OptContext * ctx,TCGOp * op,uint64_t s_mask)1070 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1071 {
1072 return fold_masks_zs(ctx, op, -1, s_mask);
1073 }
1074
1075 /*
1076 * An "affected" mask bit is 0 if and only if the result is identical
1077 * to the first input. Thus if the entire mask is 0, the operation
1078 * is equivalent to a copy.
1079 */
fold_affected_mask(OptContext * ctx,TCGOp * op,uint64_t a_mask)1080 static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
1081 {
1082 if (ctx->type == TCG_TYPE_I32) {
1083 a_mask = (uint32_t)a_mask;
1084 }
1085 if (a_mask == 0) {
1086 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1087 }
1088 return false;
1089 }
1090
1091 /*
1092 * Convert @op to NOT, if NOT is supported by the host.
1093 * Return true f the conversion is successful, which will still
1094 * indicate that the processing is complete.
1095 */
1096 static bool fold_not(OptContext *ctx, TCGOp *op);
fold_to_not(OptContext * ctx,TCGOp * op,int idx)1097 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1098 {
1099 TCGOpcode not_op;
1100 bool have_not;
1101
1102 switch (ctx->type) {
1103 case TCG_TYPE_I32:
1104 not_op = INDEX_op_not_i32;
1105 have_not = TCG_TARGET_HAS_not_i32;
1106 break;
1107 case TCG_TYPE_I64:
1108 not_op = INDEX_op_not_i64;
1109 have_not = TCG_TARGET_HAS_not_i64;
1110 break;
1111 case TCG_TYPE_V64:
1112 case TCG_TYPE_V128:
1113 case TCG_TYPE_V256:
1114 not_op = INDEX_op_not_vec;
1115 have_not = TCG_TARGET_HAS_not_vec;
1116 break;
1117 default:
1118 g_assert_not_reached();
1119 }
1120 if (have_not) {
1121 op->opc = not_op;
1122 op->args[1] = op->args[idx];
1123 return fold_not(ctx, op);
1124 }
1125 return false;
1126 }
1127
1128 /* If the binary operation has first argument @i, fold to @i. */
fold_ix_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1129 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1130 {
1131 if (arg_is_const_val(op->args[1], i)) {
1132 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1133 }
1134 return false;
1135 }
1136
1137 /* If the binary operation has first argument @i, fold to NOT. */
fold_ix_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1138 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1139 {
1140 if (arg_is_const_val(op->args[1], i)) {
1141 return fold_to_not(ctx, op, 2);
1142 }
1143 return false;
1144 }
1145
1146 /* If the binary operation has second argument @i, fold to @i. */
fold_xi_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1147 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1148 {
1149 if (arg_is_const_val(op->args[2], i)) {
1150 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1151 }
1152 return false;
1153 }
1154
1155 /* If the binary operation has second argument @i, fold to identity. */
fold_xi_to_x(OptContext * ctx,TCGOp * op,uint64_t i)1156 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1157 {
1158 if (arg_is_const_val(op->args[2], i)) {
1159 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1160 }
1161 return false;
1162 }
1163
1164 /* If the binary operation has second argument @i, fold to NOT. */
fold_xi_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1165 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1166 {
1167 if (arg_is_const_val(op->args[2], i)) {
1168 return fold_to_not(ctx, op, 1);
1169 }
1170 return false;
1171 }
1172
1173 /* If the binary operation has both arguments equal, fold to @i. */
fold_xx_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1174 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1175 {
1176 if (args_are_copies(op->args[1], op->args[2])) {
1177 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1178 }
1179 return false;
1180 }
1181
1182 /* If the binary operation has both arguments equal, fold to identity. */
fold_xx_to_x(OptContext * ctx,TCGOp * op)1183 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1184 {
1185 if (args_are_copies(op->args[1], op->args[2])) {
1186 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1187 }
1188 return false;
1189 }
1190
1191 /*
1192 * These outermost fold_<op> functions are sorted alphabetically.
1193 *
1194 * The ordering of the transformations should be:
1195 * 1) those that produce a constant
1196 * 2) those that produce a copy
1197 * 3) those that produce information about the result value.
1198 */
1199
1200 static bool fold_or(OptContext *ctx, TCGOp *op);
1201 static bool fold_orc(OptContext *ctx, TCGOp *op);
1202 static bool fold_xor(OptContext *ctx, TCGOp *op);
1203
fold_add(OptContext * ctx,TCGOp * op)1204 static bool fold_add(OptContext *ctx, TCGOp *op)
1205 {
1206 if (fold_const2_commutative(ctx, op) ||
1207 fold_xi_to_x(ctx, op, 0)) {
1208 return true;
1209 }
1210 return finish_folding(ctx, op);
1211 }
1212
1213 /* We cannot as yet do_constant_folding with vectors. */
fold_add_vec(OptContext * ctx,TCGOp * op)1214 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1215 {
1216 if (fold_commutative(ctx, op) ||
1217 fold_xi_to_x(ctx, op, 0)) {
1218 return true;
1219 }
1220 return finish_folding(ctx, op);
1221 }
1222
fold_addsub2(OptContext * ctx,TCGOp * op,bool add)1223 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
1224 {
1225 bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
1226 bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
1227
1228 if (a_const && b_const) {
1229 uint64_t al = arg_info(op->args[2])->val;
1230 uint64_t ah = arg_info(op->args[3])->val;
1231 uint64_t bl = arg_info(op->args[4])->val;
1232 uint64_t bh = arg_info(op->args[5])->val;
1233 TCGArg rl, rh;
1234 TCGOp *op2;
1235
1236 if (ctx->type == TCG_TYPE_I32) {
1237 uint64_t a = deposit64(al, 32, 32, ah);
1238 uint64_t b = deposit64(bl, 32, 32, bh);
1239
1240 if (add) {
1241 a += b;
1242 } else {
1243 a -= b;
1244 }
1245
1246 al = sextract64(a, 0, 32);
1247 ah = sextract64(a, 32, 32);
1248 } else {
1249 Int128 a = int128_make128(al, ah);
1250 Int128 b = int128_make128(bl, bh);
1251
1252 if (add) {
1253 a = int128_add(a, b);
1254 } else {
1255 a = int128_sub(a, b);
1256 }
1257
1258 al = int128_getlo(a);
1259 ah = int128_gethi(a);
1260 }
1261
1262 rl = op->args[0];
1263 rh = op->args[1];
1264
1265 /* The proper opcode is supplied by tcg_opt_gen_mov. */
1266 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1267
1268 tcg_opt_gen_movi(ctx, op, rl, al);
1269 tcg_opt_gen_movi(ctx, op2, rh, ah);
1270 return true;
1271 }
1272
1273 /* Fold sub2 r,x,i to add2 r,x,-i */
1274 if (!add && b_const) {
1275 uint64_t bl = arg_info(op->args[4])->val;
1276 uint64_t bh = arg_info(op->args[5])->val;
1277
1278 /* Negate the two parts without assembling and disassembling. */
1279 bl = -bl;
1280 bh = ~bh + !bl;
1281
1282 op->opc = (ctx->type == TCG_TYPE_I32
1283 ? INDEX_op_add2_i32 : INDEX_op_add2_i64);
1284 op->args[4] = arg_new_constant(ctx, bl);
1285 op->args[5] = arg_new_constant(ctx, bh);
1286 }
1287 return finish_folding(ctx, op);
1288 }
1289
fold_add2(OptContext * ctx,TCGOp * op)1290 static bool fold_add2(OptContext *ctx, TCGOp *op)
1291 {
1292 /* Note that the high and low parts may be independently swapped. */
1293 swap_commutative(op->args[0], &op->args[2], &op->args[4]);
1294 swap_commutative(op->args[1], &op->args[3], &op->args[5]);
1295
1296 return fold_addsub2(ctx, op, true);
1297 }
1298
fold_and(OptContext * ctx,TCGOp * op)1299 static bool fold_and(OptContext *ctx, TCGOp *op)
1300 {
1301 uint64_t z1, z2, z_mask, s_mask;
1302 TempOptInfo *t1, *t2;
1303
1304 if (fold_const2_commutative(ctx, op) ||
1305 fold_xi_to_i(ctx, op, 0) ||
1306 fold_xi_to_x(ctx, op, -1) ||
1307 fold_xx_to_x(ctx, op)) {
1308 return true;
1309 }
1310
1311 t1 = arg_info(op->args[1]);
1312 t2 = arg_info(op->args[2]);
1313 z1 = t1->z_mask;
1314 z2 = t2->z_mask;
1315
1316 /*
1317 * Known-zeros does not imply known-ones. Therefore unless
1318 * arg2 is constant, we can't infer affected bits from it.
1319 */
1320 if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
1321 return true;
1322 }
1323
1324 z_mask = z1 & z2;
1325
1326 /*
1327 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1328 * Bitwise operations preserve the relative quantity of the repetitions.
1329 */
1330 s_mask = t1->s_mask & t2->s_mask;
1331
1332 return fold_masks_zs(ctx, op, z_mask, s_mask);
1333 }
1334
fold_andc(OptContext * ctx,TCGOp * op)1335 static bool fold_andc(OptContext *ctx, TCGOp *op)
1336 {
1337 uint64_t z_mask, s_mask;
1338 TempOptInfo *t1, *t2;
1339
1340 if (fold_const2(ctx, op) ||
1341 fold_xx_to_i(ctx, op, 0) ||
1342 fold_xi_to_x(ctx, op, 0) ||
1343 fold_ix_to_not(ctx, op, -1)) {
1344 return true;
1345 }
1346
1347 t1 = arg_info(op->args[1]);
1348 t2 = arg_info(op->args[2]);
1349 z_mask = t1->z_mask;
1350
1351 /*
1352 * Known-zeros does not imply known-ones. Therefore unless
1353 * arg2 is constant, we can't infer anything from it.
1354 */
1355 if (ti_is_const(t2)) {
1356 uint64_t v2 = ti_const_val(t2);
1357 if (fold_affected_mask(ctx, op, z_mask & v2)) {
1358 return true;
1359 }
1360 z_mask &= ~v2;
1361 }
1362
1363 s_mask = t1->s_mask & t2->s_mask;
1364 return fold_masks_zs(ctx, op, z_mask, s_mask);
1365 }
1366
fold_bitsel_vec(OptContext * ctx,TCGOp * op)1367 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1368 {
1369 /* If true and false values are the same, eliminate the cmp. */
1370 if (args_are_copies(op->args[2], op->args[3])) {
1371 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1372 }
1373
1374 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1375 uint64_t tv = arg_info(op->args[2])->val;
1376 uint64_t fv = arg_info(op->args[3])->val;
1377
1378 if (tv == -1 && fv == 0) {
1379 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1380 }
1381 if (tv == 0 && fv == -1) {
1382 if (TCG_TARGET_HAS_not_vec) {
1383 op->opc = INDEX_op_not_vec;
1384 return fold_not(ctx, op);
1385 } else {
1386 op->opc = INDEX_op_xor_vec;
1387 op->args[2] = arg_new_constant(ctx, -1);
1388 return fold_xor(ctx, op);
1389 }
1390 }
1391 }
1392 if (arg_is_const(op->args[2])) {
1393 uint64_t tv = arg_info(op->args[2])->val;
1394 if (tv == -1) {
1395 op->opc = INDEX_op_or_vec;
1396 op->args[2] = op->args[3];
1397 return fold_or(ctx, op);
1398 }
1399 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1400 op->opc = INDEX_op_andc_vec;
1401 op->args[2] = op->args[1];
1402 op->args[1] = op->args[3];
1403 return fold_andc(ctx, op);
1404 }
1405 }
1406 if (arg_is_const(op->args[3])) {
1407 uint64_t fv = arg_info(op->args[3])->val;
1408 if (fv == 0) {
1409 op->opc = INDEX_op_and_vec;
1410 return fold_and(ctx, op);
1411 }
1412 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1413 op->opc = INDEX_op_orc_vec;
1414 op->args[2] = op->args[1];
1415 op->args[1] = op->args[3];
1416 return fold_orc(ctx, op);
1417 }
1418 }
1419 return finish_folding(ctx, op);
1420 }
1421
fold_brcond(OptContext * ctx,TCGOp * op)1422 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1423 {
1424 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1425 &op->args[1], &op->args[2]);
1426 if (i == 0) {
1427 tcg_op_remove(ctx->tcg, op);
1428 return true;
1429 }
1430 if (i > 0) {
1431 op->opc = INDEX_op_br;
1432 op->args[0] = op->args[3];
1433 finish_ebb(ctx);
1434 } else {
1435 finish_bb(ctx);
1436 }
1437 return true;
1438 }
1439
fold_brcond2(OptContext * ctx,TCGOp * op)1440 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1441 {
1442 TCGCond cond;
1443 TCGArg label;
1444 int i, inv = 0;
1445
1446 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1447 cond = op->args[4];
1448 label = op->args[5];
1449 if (i >= 0) {
1450 goto do_brcond_const;
1451 }
1452
1453 switch (cond) {
1454 case TCG_COND_LT:
1455 case TCG_COND_GE:
1456 /*
1457 * Simplify LT/GE comparisons vs zero to a single compare
1458 * vs the high word of the input.
1459 */
1460 if (arg_is_const_val(op->args[2], 0) &&
1461 arg_is_const_val(op->args[3], 0)) {
1462 goto do_brcond_high;
1463 }
1464 break;
1465
1466 case TCG_COND_NE:
1467 inv = 1;
1468 QEMU_FALLTHROUGH;
1469 case TCG_COND_EQ:
1470 /*
1471 * Simplify EQ/NE comparisons where one of the pairs
1472 * can be simplified.
1473 */
1474 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1475 op->args[2], cond);
1476 switch (i ^ inv) {
1477 case 0:
1478 goto do_brcond_const;
1479 case 1:
1480 goto do_brcond_high;
1481 }
1482
1483 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1484 op->args[3], cond);
1485 switch (i ^ inv) {
1486 case 0:
1487 goto do_brcond_const;
1488 case 1:
1489 goto do_brcond_low;
1490 }
1491 break;
1492
1493 case TCG_COND_TSTEQ:
1494 case TCG_COND_TSTNE:
1495 if (arg_is_const_val(op->args[2], 0)) {
1496 goto do_brcond_high;
1497 }
1498 if (arg_is_const_val(op->args[3], 0)) {
1499 goto do_brcond_low;
1500 }
1501 break;
1502
1503 default:
1504 break;
1505
1506 do_brcond_low:
1507 op->opc = INDEX_op_brcond_i32;
1508 op->args[1] = op->args[2];
1509 op->args[2] = cond;
1510 op->args[3] = label;
1511 return fold_brcond(ctx, op);
1512
1513 do_brcond_high:
1514 op->opc = INDEX_op_brcond_i32;
1515 op->args[0] = op->args[1];
1516 op->args[1] = op->args[3];
1517 op->args[2] = cond;
1518 op->args[3] = label;
1519 return fold_brcond(ctx, op);
1520
1521 do_brcond_const:
1522 if (i == 0) {
1523 tcg_op_remove(ctx->tcg, op);
1524 return true;
1525 }
1526 op->opc = INDEX_op_br;
1527 op->args[0] = label;
1528 finish_ebb(ctx);
1529 return true;
1530 }
1531
1532 finish_bb(ctx);
1533 return true;
1534 }
1535
fold_bswap(OptContext * ctx,TCGOp * op)1536 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1537 {
1538 uint64_t z_mask, s_mask, sign;
1539 TempOptInfo *t1 = arg_info(op->args[1]);
1540
1541 if (ti_is_const(t1)) {
1542 return tcg_opt_gen_movi(ctx, op, op->args[0],
1543 do_constant_folding(op->opc, ctx->type,
1544 ti_const_val(t1),
1545 op->args[2]));
1546 }
1547
1548 z_mask = t1->z_mask;
1549 switch (op->opc) {
1550 case INDEX_op_bswap16_i32:
1551 case INDEX_op_bswap16_i64:
1552 z_mask = bswap16(z_mask);
1553 sign = INT16_MIN;
1554 break;
1555 case INDEX_op_bswap32_i32:
1556 case INDEX_op_bswap32_i64:
1557 z_mask = bswap32(z_mask);
1558 sign = INT32_MIN;
1559 break;
1560 case INDEX_op_bswap64_i64:
1561 z_mask = bswap64(z_mask);
1562 sign = INT64_MIN;
1563 break;
1564 default:
1565 g_assert_not_reached();
1566 }
1567
1568 s_mask = 0;
1569 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1570 case TCG_BSWAP_OZ:
1571 break;
1572 case TCG_BSWAP_OS:
1573 /* If the sign bit may be 1, force all the bits above to 1. */
1574 if (z_mask & sign) {
1575 z_mask |= sign;
1576 }
1577 /* The value and therefore s_mask is explicitly sign-extended. */
1578 s_mask = sign;
1579 break;
1580 default:
1581 /* The high bits are undefined: force all bits above the sign to 1. */
1582 z_mask |= sign << 1;
1583 break;
1584 }
1585
1586 return fold_masks_zs(ctx, op, z_mask, s_mask);
1587 }
1588
fold_call(OptContext * ctx,TCGOp * op)1589 static bool fold_call(OptContext *ctx, TCGOp *op)
1590 {
1591 TCGContext *s = ctx->tcg;
1592 int nb_oargs = TCGOP_CALLO(op);
1593 int nb_iargs = TCGOP_CALLI(op);
1594 int flags, i;
1595
1596 init_arguments(ctx, op, nb_oargs + nb_iargs);
1597 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1598
1599 /* If the function reads or writes globals, reset temp data. */
1600 flags = tcg_call_flags(op);
1601 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1602 int nb_globals = s->nb_globals;
1603
1604 for (i = 0; i < nb_globals; i++) {
1605 if (test_bit(i, ctx->temps_used.l)) {
1606 reset_ts(ctx, &ctx->tcg->temps[i]);
1607 }
1608 }
1609 }
1610
1611 /* If the function has side effects, reset mem data. */
1612 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1613 remove_mem_copy_all(ctx);
1614 }
1615
1616 /* Reset temp data for outputs. */
1617 for (i = 0; i < nb_oargs; i++) {
1618 reset_temp(ctx, op->args[i]);
1619 }
1620
1621 /* Stop optimizing MB across calls. */
1622 ctx->prev_mb = NULL;
1623 return true;
1624 }
1625
fold_cmp_vec(OptContext * ctx,TCGOp * op)1626 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1627 {
1628 /* Canonicalize the comparison to put immediate second. */
1629 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1630 op->args[3] = tcg_swap_cond(op->args[3]);
1631 }
1632 return finish_folding(ctx, op);
1633 }
1634
fold_cmpsel_vec(OptContext * ctx,TCGOp * op)1635 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1636 {
1637 /* If true and false values are the same, eliminate the cmp. */
1638 if (args_are_copies(op->args[3], op->args[4])) {
1639 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1640 }
1641
1642 /* Canonicalize the comparison to put immediate second. */
1643 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1644 op->args[5] = tcg_swap_cond(op->args[5]);
1645 }
1646 /*
1647 * Canonicalize the "false" input reg to match the destination,
1648 * so that the tcg backend can implement "move if true".
1649 */
1650 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1651 op->args[5] = tcg_invert_cond(op->args[5]);
1652 }
1653 return finish_folding(ctx, op);
1654 }
1655
fold_count_zeros(OptContext * ctx,TCGOp * op)1656 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1657 {
1658 uint64_t z_mask, s_mask;
1659 TempOptInfo *t1 = arg_info(op->args[1]);
1660 TempOptInfo *t2 = arg_info(op->args[2]);
1661
1662 if (ti_is_const(t1)) {
1663 uint64_t t = ti_const_val(t1);
1664
1665 if (t != 0) {
1666 t = do_constant_folding(op->opc, ctx->type, t, 0);
1667 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1668 }
1669 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1670 }
1671
1672 switch (ctx->type) {
1673 case TCG_TYPE_I32:
1674 z_mask = 31;
1675 break;
1676 case TCG_TYPE_I64:
1677 z_mask = 63;
1678 break;
1679 default:
1680 g_assert_not_reached();
1681 }
1682 s_mask = ~z_mask;
1683 z_mask |= t2->z_mask;
1684 s_mask &= t2->s_mask;
1685
1686 return fold_masks_zs(ctx, op, z_mask, s_mask);
1687 }
1688
fold_ctpop(OptContext * ctx,TCGOp * op)1689 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1690 {
1691 uint64_t z_mask;
1692
1693 if (fold_const1(ctx, op)) {
1694 return true;
1695 }
1696
1697 switch (ctx->type) {
1698 case TCG_TYPE_I32:
1699 z_mask = 32 | 31;
1700 break;
1701 case TCG_TYPE_I64:
1702 z_mask = 64 | 63;
1703 break;
1704 default:
1705 g_assert_not_reached();
1706 }
1707 return fold_masks_z(ctx, op, z_mask);
1708 }
1709
fold_deposit(OptContext * ctx,TCGOp * op)1710 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1711 {
1712 TempOptInfo *t1 = arg_info(op->args[1]);
1713 TempOptInfo *t2 = arg_info(op->args[2]);
1714 int ofs = op->args[3];
1715 int len = op->args[4];
1716 int width;
1717 TCGOpcode and_opc;
1718 uint64_t z_mask, s_mask;
1719
1720 if (ti_is_const(t1) && ti_is_const(t2)) {
1721 return tcg_opt_gen_movi(ctx, op, op->args[0],
1722 deposit64(ti_const_val(t1), ofs, len,
1723 ti_const_val(t2)));
1724 }
1725
1726 switch (ctx->type) {
1727 case TCG_TYPE_I32:
1728 and_opc = INDEX_op_and_i32;
1729 width = 32;
1730 break;
1731 case TCG_TYPE_I64:
1732 and_opc = INDEX_op_and_i64;
1733 width = 64;
1734 break;
1735 default:
1736 g_assert_not_reached();
1737 }
1738
1739 /* Inserting a value into zero at offset 0. */
1740 if (ti_is_const_val(t1, 0) && ofs == 0) {
1741 uint64_t mask = MAKE_64BIT_MASK(0, len);
1742
1743 op->opc = and_opc;
1744 op->args[1] = op->args[2];
1745 op->args[2] = arg_new_constant(ctx, mask);
1746 return fold_and(ctx, op);
1747 }
1748
1749 /* Inserting zero into a value. */
1750 if (ti_is_const_val(t2, 0)) {
1751 uint64_t mask = deposit64(-1, ofs, len, 0);
1752
1753 op->opc = and_opc;
1754 op->args[2] = arg_new_constant(ctx, mask);
1755 return fold_and(ctx, op);
1756 }
1757
1758 /* The s_mask from the top portion of the deposit is still valid. */
1759 if (ofs + len == width) {
1760 s_mask = t2->s_mask << ofs;
1761 } else {
1762 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1763 }
1764
1765 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1766 return fold_masks_zs(ctx, op, z_mask, s_mask);
1767 }
1768
fold_divide(OptContext * ctx,TCGOp * op)1769 static bool fold_divide(OptContext *ctx, TCGOp *op)
1770 {
1771 if (fold_const2(ctx, op) ||
1772 fold_xi_to_x(ctx, op, 1)) {
1773 return true;
1774 }
1775 return finish_folding(ctx, op);
1776 }
1777
fold_dup(OptContext * ctx,TCGOp * op)1778 static bool fold_dup(OptContext *ctx, TCGOp *op)
1779 {
1780 if (arg_is_const(op->args[1])) {
1781 uint64_t t = arg_info(op->args[1])->val;
1782 t = dup_const(TCGOP_VECE(op), t);
1783 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1784 }
1785 return finish_folding(ctx, op);
1786 }
1787
fold_dup2(OptContext * ctx,TCGOp * op)1788 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1789 {
1790 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1791 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1792 arg_info(op->args[2])->val);
1793 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1794 }
1795
1796 if (args_are_copies(op->args[1], op->args[2])) {
1797 op->opc = INDEX_op_dup_vec;
1798 TCGOP_VECE(op) = MO_32;
1799 }
1800 return finish_folding(ctx, op);
1801 }
1802
fold_eqv(OptContext * ctx,TCGOp * op)1803 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1804 {
1805 uint64_t s_mask;
1806
1807 if (fold_const2_commutative(ctx, op) ||
1808 fold_xi_to_x(ctx, op, -1) ||
1809 fold_xi_to_not(ctx, op, 0)) {
1810 return true;
1811 }
1812
1813 s_mask = arg_info(op->args[1])->s_mask
1814 & arg_info(op->args[2])->s_mask;
1815 return fold_masks_s(ctx, op, s_mask);
1816 }
1817
fold_extract(OptContext * ctx,TCGOp * op)1818 static bool fold_extract(OptContext *ctx, TCGOp *op)
1819 {
1820 uint64_t z_mask_old, z_mask;
1821 TempOptInfo *t1 = arg_info(op->args[1]);
1822 int pos = op->args[2];
1823 int len = op->args[3];
1824
1825 if (ti_is_const(t1)) {
1826 return tcg_opt_gen_movi(ctx, op, op->args[0],
1827 extract64(ti_const_val(t1), pos, len));
1828 }
1829
1830 z_mask_old = t1->z_mask;
1831 z_mask = extract64(z_mask_old, pos, len);
1832 if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
1833 return true;
1834 }
1835
1836 return fold_masks_z(ctx, op, z_mask);
1837 }
1838
fold_extract2(OptContext * ctx,TCGOp * op)1839 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1840 {
1841 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1842 uint64_t v1 = arg_info(op->args[1])->val;
1843 uint64_t v2 = arg_info(op->args[2])->val;
1844 int shr = op->args[3];
1845
1846 if (op->opc == INDEX_op_extract2_i64) {
1847 v1 >>= shr;
1848 v2 <<= 64 - shr;
1849 } else {
1850 v1 = (uint32_t)v1 >> shr;
1851 v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1852 }
1853 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1854 }
1855 return finish_folding(ctx, op);
1856 }
1857
fold_exts(OptContext * ctx,TCGOp * op)1858 static bool fold_exts(OptContext *ctx, TCGOp *op)
1859 {
1860 uint64_t s_mask_old, s_mask, z_mask;
1861 bool type_change = false;
1862 TempOptInfo *t1;
1863
1864 if (fold_const1(ctx, op)) {
1865 return true;
1866 }
1867
1868 t1 = arg_info(op->args[1]);
1869 z_mask = t1->z_mask;
1870 s_mask = t1->s_mask;
1871 s_mask_old = s_mask;
1872
1873 switch (op->opc) {
1874 CASE_OP_32_64(ext8s):
1875 s_mask |= INT8_MIN;
1876 z_mask = (int8_t)z_mask;
1877 break;
1878 CASE_OP_32_64(ext16s):
1879 s_mask |= INT16_MIN;
1880 z_mask = (int16_t)z_mask;
1881 break;
1882 case INDEX_op_ext_i32_i64:
1883 type_change = true;
1884 QEMU_FALLTHROUGH;
1885 case INDEX_op_ext32s_i64:
1886 s_mask |= INT32_MIN;
1887 z_mask = (int32_t)z_mask;
1888 break;
1889 default:
1890 g_assert_not_reached();
1891 }
1892
1893 if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
1894 return true;
1895 }
1896
1897 return fold_masks_zs(ctx, op, z_mask, s_mask);
1898 }
1899
fold_extu(OptContext * ctx,TCGOp * op)1900 static bool fold_extu(OptContext *ctx, TCGOp *op)
1901 {
1902 uint64_t z_mask_old, z_mask;
1903 bool type_change = false;
1904
1905 if (fold_const1(ctx, op)) {
1906 return true;
1907 }
1908
1909 z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1910
1911 switch (op->opc) {
1912 CASE_OP_32_64(ext8u):
1913 z_mask = (uint8_t)z_mask;
1914 break;
1915 CASE_OP_32_64(ext16u):
1916 z_mask = (uint16_t)z_mask;
1917 break;
1918 case INDEX_op_extrl_i64_i32:
1919 case INDEX_op_extu_i32_i64:
1920 type_change = true;
1921 QEMU_FALLTHROUGH;
1922 case INDEX_op_ext32u_i64:
1923 z_mask = (uint32_t)z_mask;
1924 break;
1925 case INDEX_op_extrh_i64_i32:
1926 type_change = true;
1927 z_mask >>= 32;
1928 break;
1929 default:
1930 g_assert_not_reached();
1931 }
1932
1933 if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
1934 return true;
1935 }
1936
1937 return fold_masks_z(ctx, op, z_mask);
1938 }
1939
fold_mb(OptContext * ctx,TCGOp * op)1940 static bool fold_mb(OptContext *ctx, TCGOp *op)
1941 {
1942 /* Eliminate duplicate and redundant fence instructions. */
1943 if (ctx->prev_mb) {
1944 /*
1945 * Merge two barriers of the same type into one,
1946 * or a weaker barrier into a stronger one,
1947 * or two weaker barriers into a stronger one.
1948 * mb X; mb Y => mb X|Y
1949 * mb; strl => mb; st
1950 * ldaq; mb => ld; mb
1951 * ldaq; strl => ld; mb; st
1952 * Other combinations are also merged into a strong
1953 * barrier. This is stricter than specified but for
1954 * the purposes of TCG is better than not optimizing.
1955 */
1956 ctx->prev_mb->args[0] |= op->args[0];
1957 tcg_op_remove(ctx->tcg, op);
1958 } else {
1959 ctx->prev_mb = op;
1960 }
1961 return true;
1962 }
1963
fold_mov(OptContext * ctx,TCGOp * op)1964 static bool fold_mov(OptContext *ctx, TCGOp *op)
1965 {
1966 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1967 }
1968
fold_movcond(OptContext * ctx,TCGOp * op)1969 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1970 {
1971 uint64_t z_mask, s_mask;
1972 TempOptInfo *tt, *ft;
1973 int i;
1974
1975 /* If true and false values are the same, eliminate the cmp. */
1976 if (args_are_copies(op->args[3], op->args[4])) {
1977 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1978 }
1979
1980 /*
1981 * Canonicalize the "false" input reg to match the destination reg so
1982 * that the tcg backend can implement a "move if true" operation.
1983 */
1984 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1985 op->args[5] = tcg_invert_cond(op->args[5]);
1986 }
1987
1988 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
1989 &op->args[2], &op->args[5]);
1990 if (i >= 0) {
1991 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1992 }
1993
1994 tt = arg_info(op->args[3]);
1995 ft = arg_info(op->args[4]);
1996 z_mask = tt->z_mask | ft->z_mask;
1997 s_mask = tt->s_mask & ft->s_mask;
1998
1999 if (ti_is_const(tt) && ti_is_const(ft)) {
2000 uint64_t tv = ti_const_val(tt);
2001 uint64_t fv = ti_const_val(ft);
2002 TCGOpcode opc, negopc = 0;
2003 TCGCond cond = op->args[5];
2004
2005 switch (ctx->type) {
2006 case TCG_TYPE_I32:
2007 opc = INDEX_op_setcond_i32;
2008 if (TCG_TARGET_HAS_negsetcond_i32) {
2009 negopc = INDEX_op_negsetcond_i32;
2010 }
2011 tv = (int32_t)tv;
2012 fv = (int32_t)fv;
2013 break;
2014 case TCG_TYPE_I64:
2015 opc = INDEX_op_setcond_i64;
2016 if (TCG_TARGET_HAS_negsetcond_i64) {
2017 negopc = INDEX_op_negsetcond_i64;
2018 }
2019 break;
2020 default:
2021 g_assert_not_reached();
2022 }
2023
2024 if (tv == 1 && fv == 0) {
2025 op->opc = opc;
2026 op->args[3] = cond;
2027 } else if (fv == 1 && tv == 0) {
2028 op->opc = opc;
2029 op->args[3] = tcg_invert_cond(cond);
2030 } else if (negopc) {
2031 if (tv == -1 && fv == 0) {
2032 op->opc = negopc;
2033 op->args[3] = cond;
2034 } else if (fv == -1 && tv == 0) {
2035 op->opc = negopc;
2036 op->args[3] = tcg_invert_cond(cond);
2037 }
2038 }
2039 }
2040
2041 return fold_masks_zs(ctx, op, z_mask, s_mask);
2042 }
2043
fold_mul(OptContext * ctx,TCGOp * op)2044 static bool fold_mul(OptContext *ctx, TCGOp *op)
2045 {
2046 if (fold_const2(ctx, op) ||
2047 fold_xi_to_i(ctx, op, 0) ||
2048 fold_xi_to_x(ctx, op, 1)) {
2049 return true;
2050 }
2051 return finish_folding(ctx, op);
2052 }
2053
fold_mul_highpart(OptContext * ctx,TCGOp * op)2054 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2055 {
2056 if (fold_const2_commutative(ctx, op) ||
2057 fold_xi_to_i(ctx, op, 0)) {
2058 return true;
2059 }
2060 return finish_folding(ctx, op);
2061 }
2062
fold_multiply2(OptContext * ctx,TCGOp * op)2063 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2064 {
2065 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2066
2067 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2068 uint64_t a = arg_info(op->args[2])->val;
2069 uint64_t b = arg_info(op->args[3])->val;
2070 uint64_t h, l;
2071 TCGArg rl, rh;
2072 TCGOp *op2;
2073
2074 switch (op->opc) {
2075 case INDEX_op_mulu2_i32:
2076 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2077 h = (int32_t)(l >> 32);
2078 l = (int32_t)l;
2079 break;
2080 case INDEX_op_muls2_i32:
2081 l = (int64_t)(int32_t)a * (int32_t)b;
2082 h = l >> 32;
2083 l = (int32_t)l;
2084 break;
2085 case INDEX_op_mulu2_i64:
2086 mulu64(&l, &h, a, b);
2087 break;
2088 case INDEX_op_muls2_i64:
2089 muls64(&l, &h, a, b);
2090 break;
2091 default:
2092 g_assert_not_reached();
2093 }
2094
2095 rl = op->args[0];
2096 rh = op->args[1];
2097
2098 /* The proper opcode is supplied by tcg_opt_gen_mov. */
2099 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
2100
2101 tcg_opt_gen_movi(ctx, op, rl, l);
2102 tcg_opt_gen_movi(ctx, op2, rh, h);
2103 return true;
2104 }
2105 return finish_folding(ctx, op);
2106 }
2107
fold_nand(OptContext * ctx,TCGOp * op)2108 static bool fold_nand(OptContext *ctx, TCGOp *op)
2109 {
2110 uint64_t s_mask;
2111
2112 if (fold_const2_commutative(ctx, op) ||
2113 fold_xi_to_not(ctx, op, -1)) {
2114 return true;
2115 }
2116
2117 s_mask = arg_info(op->args[1])->s_mask
2118 & arg_info(op->args[2])->s_mask;
2119 return fold_masks_s(ctx, op, s_mask);
2120 }
2121
fold_neg_no_const(OptContext * ctx,TCGOp * op)2122 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2123 {
2124 /* Set to 1 all bits to the left of the rightmost. */
2125 uint64_t z_mask = arg_info(op->args[1])->z_mask;
2126 z_mask = -(z_mask & -z_mask);
2127
2128 return fold_masks_z(ctx, op, z_mask);
2129 }
2130
fold_neg(OptContext * ctx,TCGOp * op)2131 static bool fold_neg(OptContext *ctx, TCGOp *op)
2132 {
2133 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2134 }
2135
fold_nor(OptContext * ctx,TCGOp * op)2136 static bool fold_nor(OptContext *ctx, TCGOp *op)
2137 {
2138 uint64_t s_mask;
2139
2140 if (fold_const2_commutative(ctx, op) ||
2141 fold_xi_to_not(ctx, op, 0)) {
2142 return true;
2143 }
2144
2145 s_mask = arg_info(op->args[1])->s_mask
2146 & arg_info(op->args[2])->s_mask;
2147 return fold_masks_s(ctx, op, s_mask);
2148 }
2149
fold_not(OptContext * ctx,TCGOp * op)2150 static bool fold_not(OptContext *ctx, TCGOp *op)
2151 {
2152 if (fold_const1(ctx, op)) {
2153 return true;
2154 }
2155 return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
2156 }
2157
fold_or(OptContext * ctx,TCGOp * op)2158 static bool fold_or(OptContext *ctx, TCGOp *op)
2159 {
2160 uint64_t z_mask, s_mask;
2161 TempOptInfo *t1, *t2;
2162
2163 if (fold_const2_commutative(ctx, op) ||
2164 fold_xi_to_x(ctx, op, 0) ||
2165 fold_xx_to_x(ctx, op)) {
2166 return true;
2167 }
2168
2169 t1 = arg_info(op->args[1]);
2170 t2 = arg_info(op->args[2]);
2171 z_mask = t1->z_mask | t2->z_mask;
2172 s_mask = t1->s_mask & t2->s_mask;
2173 return fold_masks_zs(ctx, op, z_mask, s_mask);
2174 }
2175
fold_orc(OptContext * ctx,TCGOp * op)2176 static bool fold_orc(OptContext *ctx, TCGOp *op)
2177 {
2178 uint64_t s_mask;
2179
2180 if (fold_const2(ctx, op) ||
2181 fold_xx_to_i(ctx, op, -1) ||
2182 fold_xi_to_x(ctx, op, -1) ||
2183 fold_ix_to_not(ctx, op, 0)) {
2184 return true;
2185 }
2186
2187 s_mask = arg_info(op->args[1])->s_mask
2188 & arg_info(op->args[2])->s_mask;
2189 return fold_masks_s(ctx, op, s_mask);
2190 }
2191
fold_qemu_ld_1reg(OptContext * ctx,TCGOp * op)2192 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2193 {
2194 const TCGOpDef *def = &tcg_op_defs[op->opc];
2195 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2196 MemOp mop = get_memop(oi);
2197 int width = 8 * memop_size(mop);
2198 uint64_t z_mask = -1, s_mask = 0;
2199
2200 if (width < 64) {
2201 if (mop & MO_SIGN) {
2202 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2203 } else {
2204 z_mask = MAKE_64BIT_MASK(0, width);
2205 }
2206 }
2207
2208 /* Opcodes that touch guest memory stop the mb optimization. */
2209 ctx->prev_mb = NULL;
2210
2211 return fold_masks_zs(ctx, op, z_mask, s_mask);
2212 }
2213
fold_qemu_ld_2reg(OptContext * ctx,TCGOp * op)2214 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2215 {
2216 /* Opcodes that touch guest memory stop the mb optimization. */
2217 ctx->prev_mb = NULL;
2218 return finish_folding(ctx, op);
2219 }
2220
fold_qemu_st(OptContext * ctx,TCGOp * op)2221 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2222 {
2223 /* Opcodes that touch guest memory stop the mb optimization. */
2224 ctx->prev_mb = NULL;
2225 return true;
2226 }
2227
fold_remainder(OptContext * ctx,TCGOp * op)2228 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2229 {
2230 if (fold_const2(ctx, op) ||
2231 fold_xx_to_i(ctx, op, 0)) {
2232 return true;
2233 }
2234 return finish_folding(ctx, op);
2235 }
2236
2237 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
fold_setcond_zmask(OptContext * ctx,TCGOp * op,bool neg)2238 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2239 {
2240 uint64_t a_zmask, b_val;
2241 TCGCond cond;
2242
2243 if (!arg_is_const(op->args[2])) {
2244 return false;
2245 }
2246
2247 a_zmask = arg_info(op->args[1])->z_mask;
2248 b_val = arg_info(op->args[2])->val;
2249 cond = op->args[3];
2250
2251 if (ctx->type == TCG_TYPE_I32) {
2252 a_zmask = (uint32_t)a_zmask;
2253 b_val = (uint32_t)b_val;
2254 }
2255
2256 /*
2257 * A with only low bits set vs B with high bits set means that A < B.
2258 */
2259 if (a_zmask < b_val) {
2260 bool inv = false;
2261
2262 switch (cond) {
2263 case TCG_COND_NE:
2264 case TCG_COND_LEU:
2265 case TCG_COND_LTU:
2266 inv = true;
2267 /* fall through */
2268 case TCG_COND_GTU:
2269 case TCG_COND_GEU:
2270 case TCG_COND_EQ:
2271 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2272 default:
2273 break;
2274 }
2275 }
2276
2277 /*
2278 * A with only lsb set is already boolean.
2279 */
2280 if (a_zmask <= 1) {
2281 bool convert = false;
2282 bool inv = false;
2283
2284 switch (cond) {
2285 case TCG_COND_EQ:
2286 inv = true;
2287 /* fall through */
2288 case TCG_COND_NE:
2289 convert = (b_val == 0);
2290 break;
2291 case TCG_COND_LTU:
2292 case TCG_COND_TSTEQ:
2293 inv = true;
2294 /* fall through */
2295 case TCG_COND_GEU:
2296 case TCG_COND_TSTNE:
2297 convert = (b_val == 1);
2298 break;
2299 default:
2300 break;
2301 }
2302 if (convert) {
2303 TCGOpcode add_opc, xor_opc, neg_opc;
2304
2305 if (!inv && !neg) {
2306 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2307 }
2308
2309 switch (ctx->type) {
2310 case TCG_TYPE_I32:
2311 add_opc = INDEX_op_add_i32;
2312 neg_opc = INDEX_op_neg_i32;
2313 xor_opc = INDEX_op_xor_i32;
2314 break;
2315 case TCG_TYPE_I64:
2316 add_opc = INDEX_op_add_i64;
2317 neg_opc = INDEX_op_neg_i64;
2318 xor_opc = INDEX_op_xor_i64;
2319 break;
2320 default:
2321 g_assert_not_reached();
2322 }
2323
2324 if (!inv) {
2325 op->opc = neg_opc;
2326 } else if (neg) {
2327 op->opc = add_opc;
2328 op->args[2] = arg_new_constant(ctx, -1);
2329 } else {
2330 op->opc = xor_opc;
2331 op->args[2] = arg_new_constant(ctx, 1);
2332 }
2333 return -1;
2334 }
2335 }
2336 return 0;
2337 }
2338
fold_setcond_tst_pow2(OptContext * ctx,TCGOp * op,bool neg)2339 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2340 {
2341 TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
2342 TCGOpcode uext_opc = 0, sext_opc = 0;
2343 TCGCond cond = op->args[3];
2344 TCGArg ret, src1, src2;
2345 TCGOp *op2;
2346 uint64_t val;
2347 int sh;
2348 bool inv;
2349
2350 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2351 return;
2352 }
2353
2354 src2 = op->args[2];
2355 val = arg_info(src2)->val;
2356 if (!is_power_of_2(val)) {
2357 return;
2358 }
2359 sh = ctz64(val);
2360
2361 switch (ctx->type) {
2362 case TCG_TYPE_I32:
2363 and_opc = INDEX_op_and_i32;
2364 sub_opc = INDEX_op_sub_i32;
2365 xor_opc = INDEX_op_xor_i32;
2366 shr_opc = INDEX_op_shr_i32;
2367 neg_opc = INDEX_op_neg_i32;
2368 if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
2369 uext_opc = INDEX_op_extract_i32;
2370 }
2371 if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
2372 sext_opc = INDEX_op_sextract_i32;
2373 }
2374 break;
2375 case TCG_TYPE_I64:
2376 and_opc = INDEX_op_and_i64;
2377 sub_opc = INDEX_op_sub_i64;
2378 xor_opc = INDEX_op_xor_i64;
2379 shr_opc = INDEX_op_shr_i64;
2380 neg_opc = INDEX_op_neg_i64;
2381 if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
2382 uext_opc = INDEX_op_extract_i64;
2383 }
2384 if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
2385 sext_opc = INDEX_op_sextract_i64;
2386 }
2387 break;
2388 default:
2389 g_assert_not_reached();
2390 }
2391
2392 ret = op->args[0];
2393 src1 = op->args[1];
2394 inv = cond == TCG_COND_TSTEQ;
2395
2396 if (sh && sext_opc && neg && !inv) {
2397 op->opc = sext_opc;
2398 op->args[1] = src1;
2399 op->args[2] = sh;
2400 op->args[3] = 1;
2401 return;
2402 } else if (sh && uext_opc) {
2403 op->opc = uext_opc;
2404 op->args[1] = src1;
2405 op->args[2] = sh;
2406 op->args[3] = 1;
2407 } else {
2408 if (sh) {
2409 op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3);
2410 op2->args[0] = ret;
2411 op2->args[1] = src1;
2412 op2->args[2] = arg_new_constant(ctx, sh);
2413 src1 = ret;
2414 }
2415 op->opc = and_opc;
2416 op->args[1] = src1;
2417 op->args[2] = arg_new_constant(ctx, 1);
2418 }
2419
2420 if (neg && inv) {
2421 op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3);
2422 op2->args[0] = ret;
2423 op2->args[1] = ret;
2424 op2->args[2] = arg_new_constant(ctx, 1);
2425 } else if (inv) {
2426 op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3);
2427 op2->args[0] = ret;
2428 op2->args[1] = ret;
2429 op2->args[2] = arg_new_constant(ctx, 1);
2430 } else if (neg) {
2431 op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2);
2432 op2->args[0] = ret;
2433 op2->args[1] = ret;
2434 }
2435 }
2436
fold_setcond(OptContext * ctx,TCGOp * op)2437 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2438 {
2439 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2440 &op->args[2], &op->args[3]);
2441 if (i >= 0) {
2442 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2443 }
2444
2445 i = fold_setcond_zmask(ctx, op, false);
2446 if (i > 0) {
2447 return true;
2448 }
2449 if (i == 0) {
2450 fold_setcond_tst_pow2(ctx, op, false);
2451 }
2452
2453 return fold_masks_z(ctx, op, 1);
2454 }
2455
fold_negsetcond(OptContext * ctx,TCGOp * op)2456 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2457 {
2458 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2459 &op->args[2], &op->args[3]);
2460 if (i >= 0) {
2461 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2462 }
2463
2464 i = fold_setcond_zmask(ctx, op, true);
2465 if (i > 0) {
2466 return true;
2467 }
2468 if (i == 0) {
2469 fold_setcond_tst_pow2(ctx, op, true);
2470 }
2471
2472 /* Value is {0,-1} so all bits are repetitions of the sign. */
2473 return fold_masks_s(ctx, op, -1);
2474 }
2475
fold_setcond2(OptContext * ctx,TCGOp * op)2476 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2477 {
2478 TCGCond cond;
2479 int i, inv = 0;
2480
2481 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2482 cond = op->args[5];
2483 if (i >= 0) {
2484 goto do_setcond_const;
2485 }
2486
2487 switch (cond) {
2488 case TCG_COND_LT:
2489 case TCG_COND_GE:
2490 /*
2491 * Simplify LT/GE comparisons vs zero to a single compare
2492 * vs the high word of the input.
2493 */
2494 if (arg_is_const_val(op->args[3], 0) &&
2495 arg_is_const_val(op->args[4], 0)) {
2496 goto do_setcond_high;
2497 }
2498 break;
2499
2500 case TCG_COND_NE:
2501 inv = 1;
2502 QEMU_FALLTHROUGH;
2503 case TCG_COND_EQ:
2504 /*
2505 * Simplify EQ/NE comparisons where one of the pairs
2506 * can be simplified.
2507 */
2508 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2509 op->args[3], cond);
2510 switch (i ^ inv) {
2511 case 0:
2512 goto do_setcond_const;
2513 case 1:
2514 goto do_setcond_high;
2515 }
2516
2517 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2518 op->args[4], cond);
2519 switch (i ^ inv) {
2520 case 0:
2521 goto do_setcond_const;
2522 case 1:
2523 goto do_setcond_low;
2524 }
2525 break;
2526
2527 case TCG_COND_TSTEQ:
2528 case TCG_COND_TSTNE:
2529 if (arg_is_const_val(op->args[3], 0)) {
2530 goto do_setcond_high;
2531 }
2532 if (arg_is_const_val(op->args[4], 0)) {
2533 goto do_setcond_low;
2534 }
2535 break;
2536
2537 default:
2538 break;
2539
2540 do_setcond_low:
2541 op->args[2] = op->args[3];
2542 op->args[3] = cond;
2543 op->opc = INDEX_op_setcond_i32;
2544 return fold_setcond(ctx, op);
2545
2546 do_setcond_high:
2547 op->args[1] = op->args[2];
2548 op->args[2] = op->args[4];
2549 op->args[3] = cond;
2550 op->opc = INDEX_op_setcond_i32;
2551 return fold_setcond(ctx, op);
2552 }
2553
2554 return fold_masks_z(ctx, op, 1);
2555
2556 do_setcond_const:
2557 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2558 }
2559
fold_sextract(OptContext * ctx,TCGOp * op)2560 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2561 {
2562 uint64_t z_mask, s_mask, s_mask_old;
2563 TempOptInfo *t1 = arg_info(op->args[1]);
2564 int pos = op->args[2];
2565 int len = op->args[3];
2566
2567 if (ti_is_const(t1)) {
2568 return tcg_opt_gen_movi(ctx, op, op->args[0],
2569 sextract64(ti_const_val(t1), pos, len));
2570 }
2571
2572 s_mask_old = t1->s_mask;
2573 s_mask = s_mask_old >> pos;
2574 s_mask |= -1ull << (len - 1);
2575
2576 if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
2577 return true;
2578 }
2579
2580 z_mask = sextract64(t1->z_mask, pos, len);
2581 return fold_masks_zs(ctx, op, z_mask, s_mask);
2582 }
2583
fold_shift(OptContext * ctx,TCGOp * op)2584 static bool fold_shift(OptContext *ctx, TCGOp *op)
2585 {
2586 uint64_t s_mask, z_mask;
2587 TempOptInfo *t1, *t2;
2588
2589 if (fold_const2(ctx, op) ||
2590 fold_ix_to_i(ctx, op, 0) ||
2591 fold_xi_to_x(ctx, op, 0)) {
2592 return true;
2593 }
2594
2595 t1 = arg_info(op->args[1]);
2596 t2 = arg_info(op->args[2]);
2597 s_mask = t1->s_mask;
2598 z_mask = t1->z_mask;
2599
2600 if (ti_is_const(t2)) {
2601 int sh = ti_const_val(t2);
2602
2603 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2604 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2605
2606 return fold_masks_zs(ctx, op, z_mask, s_mask);
2607 }
2608
2609 switch (op->opc) {
2610 CASE_OP_32_64(sar):
2611 /*
2612 * Arithmetic right shift will not reduce the number of
2613 * input sign repetitions.
2614 */
2615 return fold_masks_s(ctx, op, s_mask);
2616 CASE_OP_32_64(shr):
2617 /*
2618 * If the sign bit is known zero, then logical right shift
2619 * will not reduce the number of input sign repetitions.
2620 */
2621 if (~z_mask & -s_mask) {
2622 return fold_masks_s(ctx, op, s_mask);
2623 }
2624 break;
2625 default:
2626 break;
2627 }
2628
2629 return finish_folding(ctx, op);
2630 }
2631
fold_sub_to_neg(OptContext * ctx,TCGOp * op)2632 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2633 {
2634 TCGOpcode neg_op;
2635 bool have_neg;
2636
2637 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2638 return false;
2639 }
2640
2641 switch (ctx->type) {
2642 case TCG_TYPE_I32:
2643 neg_op = INDEX_op_neg_i32;
2644 have_neg = true;
2645 break;
2646 case TCG_TYPE_I64:
2647 neg_op = INDEX_op_neg_i64;
2648 have_neg = true;
2649 break;
2650 case TCG_TYPE_V64:
2651 case TCG_TYPE_V128:
2652 case TCG_TYPE_V256:
2653 neg_op = INDEX_op_neg_vec;
2654 have_neg = (TCG_TARGET_HAS_neg_vec &&
2655 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2656 break;
2657 default:
2658 g_assert_not_reached();
2659 }
2660 if (have_neg) {
2661 op->opc = neg_op;
2662 op->args[1] = op->args[2];
2663 return fold_neg_no_const(ctx, op);
2664 }
2665 return false;
2666 }
2667
2668 /* We cannot as yet do_constant_folding with vectors. */
fold_sub_vec(OptContext * ctx,TCGOp * op)2669 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2670 {
2671 if (fold_xx_to_i(ctx, op, 0) ||
2672 fold_xi_to_x(ctx, op, 0) ||
2673 fold_sub_to_neg(ctx, op)) {
2674 return true;
2675 }
2676 return finish_folding(ctx, op);
2677 }
2678
fold_sub(OptContext * ctx,TCGOp * op)2679 static bool fold_sub(OptContext *ctx, TCGOp *op)
2680 {
2681 if (fold_const2(ctx, op) ||
2682 fold_xx_to_i(ctx, op, 0) ||
2683 fold_xi_to_x(ctx, op, 0) ||
2684 fold_sub_to_neg(ctx, op)) {
2685 return true;
2686 }
2687
2688 /* Fold sub r,x,i to add r,x,-i */
2689 if (arg_is_const(op->args[2])) {
2690 uint64_t val = arg_info(op->args[2])->val;
2691
2692 op->opc = (ctx->type == TCG_TYPE_I32
2693 ? INDEX_op_add_i32 : INDEX_op_add_i64);
2694 op->args[2] = arg_new_constant(ctx, -val);
2695 }
2696 return finish_folding(ctx, op);
2697 }
2698
fold_sub2(OptContext * ctx,TCGOp * op)2699 static bool fold_sub2(OptContext *ctx, TCGOp *op)
2700 {
2701 return fold_addsub2(ctx, op, false);
2702 }
2703
fold_tcg_ld(OptContext * ctx,TCGOp * op)2704 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2705 {
2706 uint64_t z_mask = -1, s_mask = 0;
2707
2708 /* We can't do any folding with a load, but we can record bits. */
2709 switch (op->opc) {
2710 CASE_OP_32_64(ld8s):
2711 s_mask = INT8_MIN;
2712 break;
2713 CASE_OP_32_64(ld8u):
2714 z_mask = MAKE_64BIT_MASK(0, 8);
2715 break;
2716 CASE_OP_32_64(ld16s):
2717 s_mask = INT16_MIN;
2718 break;
2719 CASE_OP_32_64(ld16u):
2720 z_mask = MAKE_64BIT_MASK(0, 16);
2721 break;
2722 case INDEX_op_ld32s_i64:
2723 s_mask = INT32_MIN;
2724 break;
2725 case INDEX_op_ld32u_i64:
2726 z_mask = MAKE_64BIT_MASK(0, 32);
2727 break;
2728 default:
2729 g_assert_not_reached();
2730 }
2731 return fold_masks_zs(ctx, op, z_mask, s_mask);
2732 }
2733
fold_tcg_ld_memcopy(OptContext * ctx,TCGOp * op)2734 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2735 {
2736 TCGTemp *dst, *src;
2737 intptr_t ofs;
2738 TCGType type;
2739
2740 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2741 return finish_folding(ctx, op);
2742 }
2743
2744 type = ctx->type;
2745 ofs = op->args[2];
2746 dst = arg_temp(op->args[0]);
2747 src = find_mem_copy_for(ctx, type, ofs);
2748 if (src && src->base_type == type) {
2749 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2750 }
2751
2752 reset_ts(ctx, dst);
2753 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2754 return true;
2755 }
2756
fold_tcg_st(OptContext * ctx,TCGOp * op)2757 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2758 {
2759 intptr_t ofs = op->args[2];
2760 intptr_t lm1;
2761
2762 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2763 remove_mem_copy_all(ctx);
2764 return true;
2765 }
2766
2767 switch (op->opc) {
2768 CASE_OP_32_64(st8):
2769 lm1 = 0;
2770 break;
2771 CASE_OP_32_64(st16):
2772 lm1 = 1;
2773 break;
2774 case INDEX_op_st32_i64:
2775 case INDEX_op_st_i32:
2776 lm1 = 3;
2777 break;
2778 case INDEX_op_st_i64:
2779 lm1 = 7;
2780 break;
2781 case INDEX_op_st_vec:
2782 lm1 = tcg_type_size(ctx->type) - 1;
2783 break;
2784 default:
2785 g_assert_not_reached();
2786 }
2787 remove_mem_copy_in(ctx, ofs, ofs + lm1);
2788 return true;
2789 }
2790
fold_tcg_st_memcopy(OptContext * ctx,TCGOp * op)2791 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2792 {
2793 TCGTemp *src;
2794 intptr_t ofs, last;
2795 TCGType type;
2796
2797 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2798 return fold_tcg_st(ctx, op);
2799 }
2800
2801 src = arg_temp(op->args[0]);
2802 ofs = op->args[2];
2803 type = ctx->type;
2804
2805 /*
2806 * Eliminate duplicate stores of a constant.
2807 * This happens frequently when the target ISA zero-extends.
2808 */
2809 if (ts_is_const(src)) {
2810 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2811 if (src == prev) {
2812 tcg_op_remove(ctx->tcg, op);
2813 return true;
2814 }
2815 }
2816
2817 last = ofs + tcg_type_size(type) - 1;
2818 remove_mem_copy_in(ctx, ofs, last);
2819 record_mem_copy(ctx, type, src, ofs, last);
2820 return true;
2821 }
2822
fold_xor(OptContext * ctx,TCGOp * op)2823 static bool fold_xor(OptContext *ctx, TCGOp *op)
2824 {
2825 uint64_t z_mask, s_mask;
2826 TempOptInfo *t1, *t2;
2827
2828 if (fold_const2_commutative(ctx, op) ||
2829 fold_xx_to_i(ctx, op, 0) ||
2830 fold_xi_to_x(ctx, op, 0) ||
2831 fold_xi_to_not(ctx, op, -1)) {
2832 return true;
2833 }
2834
2835 t1 = arg_info(op->args[1]);
2836 t2 = arg_info(op->args[2]);
2837 z_mask = t1->z_mask | t2->z_mask;
2838 s_mask = t1->s_mask & t2->s_mask;
2839 return fold_masks_zs(ctx, op, z_mask, s_mask);
2840 }
2841
2842 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)2843 void tcg_optimize(TCGContext *s)
2844 {
2845 int nb_temps, i;
2846 TCGOp *op, *op_next;
2847 OptContext ctx = { .tcg = s };
2848
2849 QSIMPLEQ_INIT(&ctx.mem_free);
2850
2851 /* Array VALS has an element for each temp.
2852 If this temp holds a constant then its value is kept in VALS' element.
2853 If this temp is a copy of other ones then the other copies are
2854 available through the doubly linked circular list. */
2855
2856 nb_temps = s->nb_temps;
2857 for (i = 0; i < nb_temps; ++i) {
2858 s->temps[i].state_ptr = NULL;
2859 }
2860
2861 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2862 TCGOpcode opc = op->opc;
2863 const TCGOpDef *def;
2864 bool done = false;
2865
2866 /* Calls are special. */
2867 if (opc == INDEX_op_call) {
2868 fold_call(&ctx, op);
2869 continue;
2870 }
2871
2872 def = &tcg_op_defs[opc];
2873 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2874 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2875
2876 /* Pre-compute the type of the operation. */
2877 ctx.type = TCGOP_TYPE(op);
2878
2879 /*
2880 * Process each opcode.
2881 * Sorted alphabetically by opcode as much as possible.
2882 */
2883 switch (opc) {
2884 CASE_OP_32_64(add):
2885 done = fold_add(&ctx, op);
2886 break;
2887 case INDEX_op_add_vec:
2888 done = fold_add_vec(&ctx, op);
2889 break;
2890 CASE_OP_32_64(add2):
2891 done = fold_add2(&ctx, op);
2892 break;
2893 CASE_OP_32_64_VEC(and):
2894 done = fold_and(&ctx, op);
2895 break;
2896 CASE_OP_32_64_VEC(andc):
2897 done = fold_andc(&ctx, op);
2898 break;
2899 CASE_OP_32_64(brcond):
2900 done = fold_brcond(&ctx, op);
2901 break;
2902 case INDEX_op_brcond2_i32:
2903 done = fold_brcond2(&ctx, op);
2904 break;
2905 CASE_OP_32_64(bswap16):
2906 CASE_OP_32_64(bswap32):
2907 case INDEX_op_bswap64_i64:
2908 done = fold_bswap(&ctx, op);
2909 break;
2910 CASE_OP_32_64(clz):
2911 CASE_OP_32_64(ctz):
2912 done = fold_count_zeros(&ctx, op);
2913 break;
2914 CASE_OP_32_64(ctpop):
2915 done = fold_ctpop(&ctx, op);
2916 break;
2917 CASE_OP_32_64(deposit):
2918 done = fold_deposit(&ctx, op);
2919 break;
2920 CASE_OP_32_64(div):
2921 CASE_OP_32_64(divu):
2922 done = fold_divide(&ctx, op);
2923 break;
2924 case INDEX_op_dup_vec:
2925 done = fold_dup(&ctx, op);
2926 break;
2927 case INDEX_op_dup2_vec:
2928 done = fold_dup2(&ctx, op);
2929 break;
2930 CASE_OP_32_64_VEC(eqv):
2931 done = fold_eqv(&ctx, op);
2932 break;
2933 CASE_OP_32_64(extract):
2934 done = fold_extract(&ctx, op);
2935 break;
2936 CASE_OP_32_64(extract2):
2937 done = fold_extract2(&ctx, op);
2938 break;
2939 CASE_OP_32_64(ext8s):
2940 CASE_OP_32_64(ext16s):
2941 case INDEX_op_ext32s_i64:
2942 case INDEX_op_ext_i32_i64:
2943 done = fold_exts(&ctx, op);
2944 break;
2945 CASE_OP_32_64(ext8u):
2946 CASE_OP_32_64(ext16u):
2947 case INDEX_op_ext32u_i64:
2948 case INDEX_op_extu_i32_i64:
2949 case INDEX_op_extrl_i64_i32:
2950 case INDEX_op_extrh_i64_i32:
2951 done = fold_extu(&ctx, op);
2952 break;
2953 CASE_OP_32_64(ld8s):
2954 CASE_OP_32_64(ld8u):
2955 CASE_OP_32_64(ld16s):
2956 CASE_OP_32_64(ld16u):
2957 case INDEX_op_ld32s_i64:
2958 case INDEX_op_ld32u_i64:
2959 done = fold_tcg_ld(&ctx, op);
2960 break;
2961 case INDEX_op_ld_i32:
2962 case INDEX_op_ld_i64:
2963 case INDEX_op_ld_vec:
2964 done = fold_tcg_ld_memcopy(&ctx, op);
2965 break;
2966 CASE_OP_32_64(st8):
2967 CASE_OP_32_64(st16):
2968 case INDEX_op_st32_i64:
2969 done = fold_tcg_st(&ctx, op);
2970 break;
2971 case INDEX_op_st_i32:
2972 case INDEX_op_st_i64:
2973 case INDEX_op_st_vec:
2974 done = fold_tcg_st_memcopy(&ctx, op);
2975 break;
2976 case INDEX_op_mb:
2977 done = fold_mb(&ctx, op);
2978 break;
2979 CASE_OP_32_64_VEC(mov):
2980 done = fold_mov(&ctx, op);
2981 break;
2982 CASE_OP_32_64(movcond):
2983 done = fold_movcond(&ctx, op);
2984 break;
2985 CASE_OP_32_64(mul):
2986 done = fold_mul(&ctx, op);
2987 break;
2988 CASE_OP_32_64(mulsh):
2989 CASE_OP_32_64(muluh):
2990 done = fold_mul_highpart(&ctx, op);
2991 break;
2992 CASE_OP_32_64(muls2):
2993 CASE_OP_32_64(mulu2):
2994 done = fold_multiply2(&ctx, op);
2995 break;
2996 CASE_OP_32_64_VEC(nand):
2997 done = fold_nand(&ctx, op);
2998 break;
2999 CASE_OP_32_64(neg):
3000 done = fold_neg(&ctx, op);
3001 break;
3002 CASE_OP_32_64_VEC(nor):
3003 done = fold_nor(&ctx, op);
3004 break;
3005 CASE_OP_32_64_VEC(not):
3006 done = fold_not(&ctx, op);
3007 break;
3008 CASE_OP_32_64_VEC(or):
3009 done = fold_or(&ctx, op);
3010 break;
3011 CASE_OP_32_64_VEC(orc):
3012 done = fold_orc(&ctx, op);
3013 break;
3014 case INDEX_op_qemu_ld_i32:
3015 done = fold_qemu_ld_1reg(&ctx, op);
3016 break;
3017 case INDEX_op_qemu_ld_i64:
3018 if (TCG_TARGET_REG_BITS == 64) {
3019 done = fold_qemu_ld_1reg(&ctx, op);
3020 break;
3021 }
3022 QEMU_FALLTHROUGH;
3023 case INDEX_op_qemu_ld_i128:
3024 done = fold_qemu_ld_2reg(&ctx, op);
3025 break;
3026 case INDEX_op_qemu_st8_i32:
3027 case INDEX_op_qemu_st_i32:
3028 case INDEX_op_qemu_st_i64:
3029 case INDEX_op_qemu_st_i128:
3030 done = fold_qemu_st(&ctx, op);
3031 break;
3032 CASE_OP_32_64(rem):
3033 CASE_OP_32_64(remu):
3034 done = fold_remainder(&ctx, op);
3035 break;
3036 CASE_OP_32_64(rotl):
3037 CASE_OP_32_64(rotr):
3038 CASE_OP_32_64(sar):
3039 CASE_OP_32_64(shl):
3040 CASE_OP_32_64(shr):
3041 done = fold_shift(&ctx, op);
3042 break;
3043 CASE_OP_32_64(setcond):
3044 done = fold_setcond(&ctx, op);
3045 break;
3046 CASE_OP_32_64(negsetcond):
3047 done = fold_negsetcond(&ctx, op);
3048 break;
3049 case INDEX_op_setcond2_i32:
3050 done = fold_setcond2(&ctx, op);
3051 break;
3052 case INDEX_op_cmp_vec:
3053 done = fold_cmp_vec(&ctx, op);
3054 break;
3055 case INDEX_op_cmpsel_vec:
3056 done = fold_cmpsel_vec(&ctx, op);
3057 break;
3058 case INDEX_op_bitsel_vec:
3059 done = fold_bitsel_vec(&ctx, op);
3060 break;
3061 CASE_OP_32_64(sextract):
3062 done = fold_sextract(&ctx, op);
3063 break;
3064 CASE_OP_32_64(sub):
3065 done = fold_sub(&ctx, op);
3066 break;
3067 case INDEX_op_sub_vec:
3068 done = fold_sub_vec(&ctx, op);
3069 break;
3070 CASE_OP_32_64(sub2):
3071 done = fold_sub2(&ctx, op);
3072 break;
3073 CASE_OP_32_64_VEC(xor):
3074 done = fold_xor(&ctx, op);
3075 break;
3076 case INDEX_op_set_label:
3077 case INDEX_op_br:
3078 case INDEX_op_exit_tb:
3079 case INDEX_op_goto_tb:
3080 case INDEX_op_goto_ptr:
3081 finish_ebb(&ctx);
3082 done = true;
3083 break;
3084 default:
3085 done = finish_folding(&ctx, op);
3086 break;
3087 }
3088 tcg_debug_assert(done);
3089 }
3090 }
3091