tcg/optimize: Handle TCG_COND_TST{EQ,NE}
[qemu/ar7.git] / tcg / optimize.c
blob2ed6322f97ec37c3bdc20b8e1cf0928aafd58bc5
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
32 #define CASE_OP_32_64(x) \
33 glue(glue(case INDEX_op_, x), _i32): \
34 glue(glue(case INDEX_op_, x), _i64)
36 #define CASE_OP_32_64_VEC(x) \
37 glue(glue(case INDEX_op_, x), _i32): \
38 glue(glue(case INDEX_op_, x), _i64): \
39 glue(glue(case INDEX_op_, x), _vec)
41 typedef struct MemCopyInfo {
42 IntervalTreeNode itree;
43 QSIMPLEQ_ENTRY (MemCopyInfo) next;
44 TCGTemp *ts;
45 TCGType type;
46 } MemCopyInfo;
48 typedef struct TempOptInfo {
49 bool is_const;
50 TCGTemp *prev_copy;
51 TCGTemp *next_copy;
52 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
53 uint64_t val;
54 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
55 uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
56 } TempOptInfo;
58 typedef struct OptContext {
59 TCGContext *tcg;
60 TCGOp *prev_mb;
61 TCGTempSet temps_used;
63 IntervalTreeRoot mem_copy;
64 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
66 /* In flight values from optimization. */
67 uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
68 uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
69 uint64_t s_mask; /* mask of clrsb(value) bits */
70 TCGType type;
71 } OptContext;
73 /* Calculate the smask for a specific value. */
74 static uint64_t smask_from_value(uint64_t value)
76 int rep = clrsb64(value);
77 return ~(~0ull >> rep);
81 * Calculate the smask for a given set of known-zeros.
82 * If there are lots of zeros on the left, we can consider the remainder
83 * an unsigned field, and thus the corresponding signed field is one bit
84 * larger.
86 static uint64_t smask_from_zmask(uint64_t zmask)
89 * Only the 0 bits are significant for zmask, thus the msb itself
90 * must be zero, else we have no sign information.
92 int rep = clz64(zmask);
93 if (rep == 0) {
94 return 0;
96 rep -= 1;
97 return ~(~0ull >> rep);
101 * Recreate a properly left-aligned smask after manipulation.
102 * Some bit-shuffling, particularly shifts and rotates, may
103 * retain sign bits on the left, but may scatter disconnected
104 * sign bits on the right. Retain only what remains to the left.
106 static uint64_t smask_from_smask(int64_t smask)
108 /* Only the 1 bits are significant for smask */
109 return smask_from_zmask(~smask);
112 static inline TempOptInfo *ts_info(TCGTemp *ts)
114 return ts->state_ptr;
117 static inline TempOptInfo *arg_info(TCGArg arg)
119 return ts_info(arg_temp(arg));
122 static inline bool ts_is_const(TCGTemp *ts)
124 return ts_info(ts)->is_const;
127 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
129 TempOptInfo *ti = ts_info(ts);
130 return ti->is_const && ti->val == val;
133 static inline bool arg_is_const(TCGArg arg)
135 return ts_is_const(arg_temp(arg));
138 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
140 return ts_is_const_val(arg_temp(arg), val);
143 static inline bool ts_is_copy(TCGTemp *ts)
145 return ts_info(ts)->next_copy != ts;
148 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
150 return a->kind < b->kind ? b : a;
153 /* Initialize and activate a temporary. */
154 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
156 size_t idx = temp_idx(ts);
157 TempOptInfo *ti;
159 if (test_bit(idx, ctx->temps_used.l)) {
160 return;
162 set_bit(idx, ctx->temps_used.l);
164 ti = ts->state_ptr;
165 if (ti == NULL) {
166 ti = tcg_malloc(sizeof(TempOptInfo));
167 ts->state_ptr = ti;
170 ti->next_copy = ts;
171 ti->prev_copy = ts;
172 QSIMPLEQ_INIT(&ti->mem_copy);
173 if (ts->kind == TEMP_CONST) {
174 ti->is_const = true;
175 ti->val = ts->val;
176 ti->z_mask = ts->val;
177 ti->s_mask = smask_from_value(ts->val);
178 } else {
179 ti->is_const = false;
180 ti->z_mask = -1;
181 ti->s_mask = 0;
185 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
187 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
188 return r ? container_of(r, MemCopyInfo, itree) : NULL;
191 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
193 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
194 return r ? container_of(r, MemCopyInfo, itree) : NULL;
197 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
199 TCGTemp *ts = mc->ts;
200 TempOptInfo *ti = ts_info(ts);
202 interval_tree_remove(&mc->itree, &ctx->mem_copy);
203 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
204 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
207 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
209 while (true) {
210 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
211 if (!mc) {
212 break;
214 remove_mem_copy(ctx, mc);
218 static void remove_mem_copy_all(OptContext *ctx)
220 remove_mem_copy_in(ctx, 0, -1);
221 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
224 static TCGTemp *find_better_copy(TCGTemp *ts)
226 TCGTemp *i, *ret;
228 /* If this is already readonly, we can't do better. */
229 if (temp_readonly(ts)) {
230 return ts;
233 ret = ts;
234 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
235 ret = cmp_better_copy(ret, i);
237 return ret;
240 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
242 TempOptInfo *si = ts_info(src_ts);
243 TempOptInfo *di = ts_info(dst_ts);
244 MemCopyInfo *mc;
246 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
247 tcg_debug_assert(mc->ts == src_ts);
248 mc->ts = dst_ts;
250 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
253 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
254 static void reset_ts(OptContext *ctx, TCGTemp *ts)
256 TempOptInfo *ti = ts_info(ts);
257 TCGTemp *pts = ti->prev_copy;
258 TCGTemp *nts = ti->next_copy;
259 TempOptInfo *pi = ts_info(pts);
260 TempOptInfo *ni = ts_info(nts);
262 ni->prev_copy = ti->prev_copy;
263 pi->next_copy = ti->next_copy;
264 ti->next_copy = ts;
265 ti->prev_copy = ts;
266 ti->is_const = false;
267 ti->z_mask = -1;
268 ti->s_mask = 0;
270 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
271 if (ts == nts) {
272 /* Last temp copy being removed, the mem copies die. */
273 MemCopyInfo *mc;
274 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
275 interval_tree_remove(&mc->itree, &ctx->mem_copy);
277 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
278 } else {
279 move_mem_copies(find_better_copy(nts), ts);
284 static void reset_temp(OptContext *ctx, TCGArg arg)
286 reset_ts(ctx, arg_temp(arg));
289 static void record_mem_copy(OptContext *ctx, TCGType type,
290 TCGTemp *ts, intptr_t start, intptr_t last)
292 MemCopyInfo *mc;
293 TempOptInfo *ti;
295 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
296 if (mc) {
297 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
298 } else {
299 mc = tcg_malloc(sizeof(*mc));
302 memset(mc, 0, sizeof(*mc));
303 mc->itree.start = start;
304 mc->itree.last = last;
305 mc->type = type;
306 interval_tree_insert(&mc->itree, &ctx->mem_copy);
308 ts = find_better_copy(ts);
309 ti = ts_info(ts);
310 mc->ts = ts;
311 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
314 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
316 TCGTemp *i;
318 if (ts1 == ts2) {
319 return true;
322 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
323 return false;
326 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
327 if (i == ts2) {
328 return true;
332 return false;
335 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
337 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
340 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
342 MemCopyInfo *mc;
344 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
345 if (mc->itree.start == s && mc->type == type) {
346 return find_better_copy(mc->ts);
349 return NULL;
352 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
354 TCGType type = ctx->type;
355 TCGTemp *ts;
357 if (type == TCG_TYPE_I32) {
358 val = (int32_t)val;
361 ts = tcg_constant_internal(type, val);
362 init_ts_info(ctx, ts);
364 return temp_arg(ts);
367 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
369 TCGTemp *dst_ts = arg_temp(dst);
370 TCGTemp *src_ts = arg_temp(src);
371 TempOptInfo *di;
372 TempOptInfo *si;
373 TCGOpcode new_op;
375 if (ts_are_copies(dst_ts, src_ts)) {
376 tcg_op_remove(ctx->tcg, op);
377 return true;
380 reset_ts(ctx, dst_ts);
381 di = ts_info(dst_ts);
382 si = ts_info(src_ts);
384 switch (ctx->type) {
385 case TCG_TYPE_I32:
386 new_op = INDEX_op_mov_i32;
387 break;
388 case TCG_TYPE_I64:
389 new_op = INDEX_op_mov_i64;
390 break;
391 case TCG_TYPE_V64:
392 case TCG_TYPE_V128:
393 case TCG_TYPE_V256:
394 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
395 new_op = INDEX_op_mov_vec;
396 break;
397 default:
398 g_assert_not_reached();
400 op->opc = new_op;
401 op->args[0] = dst;
402 op->args[1] = src;
404 di->z_mask = si->z_mask;
405 di->s_mask = si->s_mask;
407 if (src_ts->type == dst_ts->type) {
408 TempOptInfo *ni = ts_info(si->next_copy);
410 di->next_copy = si->next_copy;
411 di->prev_copy = src_ts;
412 ni->prev_copy = dst_ts;
413 si->next_copy = dst_ts;
414 di->is_const = si->is_const;
415 di->val = si->val;
417 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
418 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
419 move_mem_copies(dst_ts, src_ts);
422 return true;
425 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
426 TCGArg dst, uint64_t val)
428 /* Convert movi to mov with constant temp. */
429 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
432 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
434 uint64_t l64, h64;
436 switch (op) {
437 CASE_OP_32_64(add):
438 return x + y;
440 CASE_OP_32_64(sub):
441 return x - y;
443 CASE_OP_32_64(mul):
444 return x * y;
446 CASE_OP_32_64_VEC(and):
447 return x & y;
449 CASE_OP_32_64_VEC(or):
450 return x | y;
452 CASE_OP_32_64_VEC(xor):
453 return x ^ y;
455 case INDEX_op_shl_i32:
456 return (uint32_t)x << (y & 31);
458 case INDEX_op_shl_i64:
459 return (uint64_t)x << (y & 63);
461 case INDEX_op_shr_i32:
462 return (uint32_t)x >> (y & 31);
464 case INDEX_op_shr_i64:
465 return (uint64_t)x >> (y & 63);
467 case INDEX_op_sar_i32:
468 return (int32_t)x >> (y & 31);
470 case INDEX_op_sar_i64:
471 return (int64_t)x >> (y & 63);
473 case INDEX_op_rotr_i32:
474 return ror32(x, y & 31);
476 case INDEX_op_rotr_i64:
477 return ror64(x, y & 63);
479 case INDEX_op_rotl_i32:
480 return rol32(x, y & 31);
482 case INDEX_op_rotl_i64:
483 return rol64(x, y & 63);
485 CASE_OP_32_64_VEC(not):
486 return ~x;
488 CASE_OP_32_64(neg):
489 return -x;
491 CASE_OP_32_64_VEC(andc):
492 return x & ~y;
494 CASE_OP_32_64_VEC(orc):
495 return x | ~y;
497 CASE_OP_32_64_VEC(eqv):
498 return ~(x ^ y);
500 CASE_OP_32_64_VEC(nand):
501 return ~(x & y);
503 CASE_OP_32_64_VEC(nor):
504 return ~(x | y);
506 case INDEX_op_clz_i32:
507 return (uint32_t)x ? clz32(x) : y;
509 case INDEX_op_clz_i64:
510 return x ? clz64(x) : y;
512 case INDEX_op_ctz_i32:
513 return (uint32_t)x ? ctz32(x) : y;
515 case INDEX_op_ctz_i64:
516 return x ? ctz64(x) : y;
518 case INDEX_op_ctpop_i32:
519 return ctpop32(x);
521 case INDEX_op_ctpop_i64:
522 return ctpop64(x);
524 CASE_OP_32_64(ext8s):
525 return (int8_t)x;
527 CASE_OP_32_64(ext16s):
528 return (int16_t)x;
530 CASE_OP_32_64(ext8u):
531 return (uint8_t)x;
533 CASE_OP_32_64(ext16u):
534 return (uint16_t)x;
536 CASE_OP_32_64(bswap16):
537 x = bswap16(x);
538 return y & TCG_BSWAP_OS ? (int16_t)x : x;
540 CASE_OP_32_64(bswap32):
541 x = bswap32(x);
542 return y & TCG_BSWAP_OS ? (int32_t)x : x;
544 case INDEX_op_bswap64_i64:
545 return bswap64(x);
547 case INDEX_op_ext_i32_i64:
548 case INDEX_op_ext32s_i64:
549 return (int32_t)x;
551 case INDEX_op_extu_i32_i64:
552 case INDEX_op_extrl_i64_i32:
553 case INDEX_op_ext32u_i64:
554 return (uint32_t)x;
556 case INDEX_op_extrh_i64_i32:
557 return (uint64_t)x >> 32;
559 case INDEX_op_muluh_i32:
560 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
561 case INDEX_op_mulsh_i32:
562 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
564 case INDEX_op_muluh_i64:
565 mulu64(&l64, &h64, x, y);
566 return h64;
567 case INDEX_op_mulsh_i64:
568 muls64(&l64, &h64, x, y);
569 return h64;
571 case INDEX_op_div_i32:
572 /* Avoid crashing on divide by zero, otherwise undefined. */
573 return (int32_t)x / ((int32_t)y ? : 1);
574 case INDEX_op_divu_i32:
575 return (uint32_t)x / ((uint32_t)y ? : 1);
576 case INDEX_op_div_i64:
577 return (int64_t)x / ((int64_t)y ? : 1);
578 case INDEX_op_divu_i64:
579 return (uint64_t)x / ((uint64_t)y ? : 1);
581 case INDEX_op_rem_i32:
582 return (int32_t)x % ((int32_t)y ? : 1);
583 case INDEX_op_remu_i32:
584 return (uint32_t)x % ((uint32_t)y ? : 1);
585 case INDEX_op_rem_i64:
586 return (int64_t)x % ((int64_t)y ? : 1);
587 case INDEX_op_remu_i64:
588 return (uint64_t)x % ((uint64_t)y ? : 1);
590 default:
591 g_assert_not_reached();
595 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
596 uint64_t x, uint64_t y)
598 uint64_t res = do_constant_folding_2(op, x, y);
599 if (type == TCG_TYPE_I32) {
600 res = (int32_t)res;
602 return res;
605 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
607 switch (c) {
608 case TCG_COND_EQ:
609 return x == y;
610 case TCG_COND_NE:
611 return x != y;
612 case TCG_COND_LT:
613 return (int32_t)x < (int32_t)y;
614 case TCG_COND_GE:
615 return (int32_t)x >= (int32_t)y;
616 case TCG_COND_LE:
617 return (int32_t)x <= (int32_t)y;
618 case TCG_COND_GT:
619 return (int32_t)x > (int32_t)y;
620 case TCG_COND_LTU:
621 return x < y;
622 case TCG_COND_GEU:
623 return x >= y;
624 case TCG_COND_LEU:
625 return x <= y;
626 case TCG_COND_GTU:
627 return x > y;
628 case TCG_COND_TSTEQ:
629 return (x & y) == 0;
630 case TCG_COND_TSTNE:
631 return (x & y) != 0;
632 case TCG_COND_ALWAYS:
633 case TCG_COND_NEVER:
634 break;
636 g_assert_not_reached();
639 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
641 switch (c) {
642 case TCG_COND_EQ:
643 return x == y;
644 case TCG_COND_NE:
645 return x != y;
646 case TCG_COND_LT:
647 return (int64_t)x < (int64_t)y;
648 case TCG_COND_GE:
649 return (int64_t)x >= (int64_t)y;
650 case TCG_COND_LE:
651 return (int64_t)x <= (int64_t)y;
652 case TCG_COND_GT:
653 return (int64_t)x > (int64_t)y;
654 case TCG_COND_LTU:
655 return x < y;
656 case TCG_COND_GEU:
657 return x >= y;
658 case TCG_COND_LEU:
659 return x <= y;
660 case TCG_COND_GTU:
661 return x > y;
662 case TCG_COND_TSTEQ:
663 return (x & y) == 0;
664 case TCG_COND_TSTNE:
665 return (x & y) != 0;
666 case TCG_COND_ALWAYS:
667 case TCG_COND_NEVER:
668 break;
670 g_assert_not_reached();
673 static int do_constant_folding_cond_eq(TCGCond c)
675 switch (c) {
676 case TCG_COND_GT:
677 case TCG_COND_LTU:
678 case TCG_COND_LT:
679 case TCG_COND_GTU:
680 case TCG_COND_NE:
681 return 0;
682 case TCG_COND_GE:
683 case TCG_COND_GEU:
684 case TCG_COND_LE:
685 case TCG_COND_LEU:
686 case TCG_COND_EQ:
687 return 1;
688 case TCG_COND_TSTEQ:
689 case TCG_COND_TSTNE:
690 return -1;
691 case TCG_COND_ALWAYS:
692 case TCG_COND_NEVER:
693 break;
695 g_assert_not_reached();
699 * Return -1 if the condition can't be simplified,
700 * and the result of the condition (0 or 1) if it can.
702 static int do_constant_folding_cond(TCGType type, TCGArg x,
703 TCGArg y, TCGCond c)
705 if (arg_is_const(x) && arg_is_const(y)) {
706 uint64_t xv = arg_info(x)->val;
707 uint64_t yv = arg_info(y)->val;
709 switch (type) {
710 case TCG_TYPE_I32:
711 return do_constant_folding_cond_32(xv, yv, c);
712 case TCG_TYPE_I64:
713 return do_constant_folding_cond_64(xv, yv, c);
714 default:
715 /* Only scalar comparisons are optimizable */
716 return -1;
718 } else if (args_are_copies(x, y)) {
719 return do_constant_folding_cond_eq(c);
720 } else if (arg_is_const_val(y, 0)) {
721 switch (c) {
722 case TCG_COND_LTU:
723 case TCG_COND_TSTNE:
724 return 0;
725 case TCG_COND_GEU:
726 case TCG_COND_TSTEQ:
727 return 1;
728 default:
729 return -1;
732 return -1;
736 * swap_commutative:
737 * @dest: TCGArg of the destination argument, or NO_DEST.
738 * @p1: first paired argument
739 * @p2: second paired argument
741 * If *@p1 is a constant and *@p2 is not, swap.
742 * If *@p2 matches @dest, swap.
743 * Return true if a swap was performed.
746 #define NO_DEST temp_arg(NULL)
748 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
750 TCGArg a1 = *p1, a2 = *p2;
751 int sum = 0;
752 sum += arg_is_const(a1);
753 sum -= arg_is_const(a2);
755 /* Prefer the constant in second argument, and then the form
756 op a, a, b, which is better handled on non-RISC hosts. */
757 if (sum > 0 || (sum == 0 && dest == a2)) {
758 *p1 = a2;
759 *p2 = a1;
760 return true;
762 return false;
765 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
767 int sum = 0;
768 sum += arg_is_const(p1[0]);
769 sum += arg_is_const(p1[1]);
770 sum -= arg_is_const(p2[0]);
771 sum -= arg_is_const(p2[1]);
772 if (sum > 0) {
773 TCGArg t;
774 t = p1[0], p1[0] = p2[0], p2[0] = t;
775 t = p1[1], p1[1] = p2[1], p2[1] = t;
776 return true;
778 return false;
782 * Return -1 if the condition can't be simplified,
783 * and the result of the condition (0 or 1) if it can.
785 static int do_constant_folding_cond1(OptContext *ctx, TCGArg dest,
786 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
788 TCGCond cond;
789 bool swap;
790 int r;
792 swap = swap_commutative(dest, p1, p2);
793 cond = *pcond;
794 if (swap) {
795 *pcond = cond = tcg_swap_cond(cond);
798 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
799 if (r >= 0) {
800 return r;
802 if (!is_tst_cond(cond)) {
803 return -1;
807 * TSTNE x,x -> NE x,0
808 * TSTNE x,-1 -> NE x,0
810 if (args_are_copies(*p1, *p2) || arg_is_const_val(*p2, -1)) {
811 *p2 = arg_new_constant(ctx, 0);
812 *pcond = tcg_tst_eqne_cond(cond);
813 return -1;
816 /* TSTNE x,sign -> LT x,0 */
817 if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
818 ? INT32_MIN : INT64_MIN))) {
819 *p2 = arg_new_constant(ctx, 0);
820 *pcond = tcg_tst_ltge_cond(cond);
822 return -1;
825 static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args)
827 TCGArg al, ah, bl, bh;
828 TCGCond c;
829 bool swap;
830 int r;
832 swap = swap_commutative2(args, args + 2);
833 c = args[4];
834 if (swap) {
835 args[4] = c = tcg_swap_cond(c);
838 al = args[0];
839 ah = args[1];
840 bl = args[2];
841 bh = args[3];
843 if (arg_is_const(bl) && arg_is_const(bh)) {
844 tcg_target_ulong blv = arg_info(bl)->val;
845 tcg_target_ulong bhv = arg_info(bh)->val;
846 uint64_t b = deposit64(blv, 32, 32, bhv);
848 if (arg_is_const(al) && arg_is_const(ah)) {
849 tcg_target_ulong alv = arg_info(al)->val;
850 tcg_target_ulong ahv = arg_info(ah)->val;
851 uint64_t a = deposit64(alv, 32, 32, ahv);
853 r = do_constant_folding_cond_64(a, b, c);
854 if (r >= 0) {
855 return r;
859 if (b == 0) {
860 switch (c) {
861 case TCG_COND_LTU:
862 case TCG_COND_TSTNE:
863 return 0;
864 case TCG_COND_GEU:
865 case TCG_COND_TSTEQ:
866 return 1;
867 default:
868 break;
872 /* TSTNE x,-1 -> NE x,0 */
873 if (b == -1 && is_tst_cond(c)) {
874 args[3] = args[2] = arg_new_constant(ctx, 0);
875 args[4] = tcg_tst_eqne_cond(c);
876 return -1;
879 /* TSTNE x,sign -> LT x,0 */
880 if (b == INT64_MIN && is_tst_cond(c)) {
881 /* bl must be 0, so copy that to bh */
882 args[3] = bl;
883 args[4] = tcg_tst_ltge_cond(c);
884 return -1;
888 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
889 r = do_constant_folding_cond_eq(c);
890 if (r >= 0) {
891 return r;
894 /* TSTNE x,x -> NE x,0 */
895 if (is_tst_cond(c)) {
896 args[3] = args[2] = arg_new_constant(ctx, 0);
897 args[4] = tcg_tst_eqne_cond(c);
898 return -1;
901 return -1;
904 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
906 for (int i = 0; i < nb_args; i++) {
907 TCGTemp *ts = arg_temp(op->args[i]);
908 init_ts_info(ctx, ts);
912 static void copy_propagate(OptContext *ctx, TCGOp *op,
913 int nb_oargs, int nb_iargs)
915 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
916 TCGTemp *ts = arg_temp(op->args[i]);
917 if (ts_is_copy(ts)) {
918 op->args[i] = temp_arg(find_better_copy(ts));
923 static void finish_folding(OptContext *ctx, TCGOp *op)
925 const TCGOpDef *def = &tcg_op_defs[op->opc];
926 int i, nb_oargs;
929 * We only optimize extended basic blocks. If the opcode ends a BB
930 * and is not a conditional branch, reset all temp data.
932 if (def->flags & TCG_OPF_BB_END) {
933 ctx->prev_mb = NULL;
934 if (!(def->flags & TCG_OPF_COND_BRANCH)) {
935 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
936 remove_mem_copy_all(ctx);
938 return;
941 nb_oargs = def->nb_oargs;
942 for (i = 0; i < nb_oargs; i++) {
943 TCGTemp *ts = arg_temp(op->args[i]);
944 reset_ts(ctx, ts);
946 * Save the corresponding known-zero/sign bits mask for the
947 * first output argument (only one supported so far).
949 if (i == 0) {
950 ts_info(ts)->z_mask = ctx->z_mask;
951 ts_info(ts)->s_mask = ctx->s_mask;
957 * The fold_* functions return true when processing is complete,
958 * usually by folding the operation to a constant or to a copy,
959 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
960 * like collect information about the value produced, for use in
961 * optimizing a subsequent operation.
963 * These first fold_* functions are all helpers, used by other
964 * folders for more specific operations.
967 static bool fold_const1(OptContext *ctx, TCGOp *op)
969 if (arg_is_const(op->args[1])) {
970 uint64_t t;
972 t = arg_info(op->args[1])->val;
973 t = do_constant_folding(op->opc, ctx->type, t, 0);
974 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
976 return false;
979 static bool fold_const2(OptContext *ctx, TCGOp *op)
981 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
982 uint64_t t1 = arg_info(op->args[1])->val;
983 uint64_t t2 = arg_info(op->args[2])->val;
985 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
986 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
988 return false;
991 static bool fold_commutative(OptContext *ctx, TCGOp *op)
993 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
994 return false;
997 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
999 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1000 return fold_const2(ctx, op);
1003 static bool fold_masks(OptContext *ctx, TCGOp *op)
1005 uint64_t a_mask = ctx->a_mask;
1006 uint64_t z_mask = ctx->z_mask;
1007 uint64_t s_mask = ctx->s_mask;
1010 * 32-bit ops generate 32-bit results, which for the purpose of
1011 * simplifying tcg are sign-extended. Certainly that's how we
1012 * represent our constants elsewhere. Note that the bits will
1013 * be reset properly for a 64-bit value when encountering the
1014 * type changing opcodes.
1016 if (ctx->type == TCG_TYPE_I32) {
1017 a_mask = (int32_t)a_mask;
1018 z_mask = (int32_t)z_mask;
1019 s_mask |= MAKE_64BIT_MASK(32, 32);
1020 ctx->z_mask = z_mask;
1021 ctx->s_mask = s_mask;
1024 if (z_mask == 0) {
1025 return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
1027 if (a_mask == 0) {
1028 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1030 return false;
1034 * Convert @op to NOT, if NOT is supported by the host.
1035 * Return true f the conversion is successful, which will still
1036 * indicate that the processing is complete.
1038 static bool fold_not(OptContext *ctx, TCGOp *op);
1039 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1041 TCGOpcode not_op;
1042 bool have_not;
1044 switch (ctx->type) {
1045 case TCG_TYPE_I32:
1046 not_op = INDEX_op_not_i32;
1047 have_not = TCG_TARGET_HAS_not_i32;
1048 break;
1049 case TCG_TYPE_I64:
1050 not_op = INDEX_op_not_i64;
1051 have_not = TCG_TARGET_HAS_not_i64;
1052 break;
1053 case TCG_TYPE_V64:
1054 case TCG_TYPE_V128:
1055 case TCG_TYPE_V256:
1056 not_op = INDEX_op_not_vec;
1057 have_not = TCG_TARGET_HAS_not_vec;
1058 break;
1059 default:
1060 g_assert_not_reached();
1062 if (have_not) {
1063 op->opc = not_op;
1064 op->args[1] = op->args[idx];
1065 return fold_not(ctx, op);
1067 return false;
1070 /* If the binary operation has first argument @i, fold to @i. */
1071 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1073 if (arg_is_const_val(op->args[1], i)) {
1074 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1076 return false;
1079 /* If the binary operation has first argument @i, fold to NOT. */
1080 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1082 if (arg_is_const_val(op->args[1], i)) {
1083 return fold_to_not(ctx, op, 2);
1085 return false;
1088 /* If the binary operation has second argument @i, fold to @i. */
1089 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1091 if (arg_is_const_val(op->args[2], i)) {
1092 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1094 return false;
1097 /* If the binary operation has second argument @i, fold to identity. */
1098 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1100 if (arg_is_const_val(op->args[2], i)) {
1101 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1103 return false;
1106 /* If the binary operation has second argument @i, fold to NOT. */
1107 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1109 if (arg_is_const_val(op->args[2], i)) {
1110 return fold_to_not(ctx, op, 1);
1112 return false;
1115 /* If the binary operation has both arguments equal, fold to @i. */
1116 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1118 if (args_are_copies(op->args[1], op->args[2])) {
1119 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1121 return false;
1124 /* If the binary operation has both arguments equal, fold to identity. */
1125 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1127 if (args_are_copies(op->args[1], op->args[2])) {
1128 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1130 return false;
1134 * These outermost fold_<op> functions are sorted alphabetically.
1136 * The ordering of the transformations should be:
1137 * 1) those that produce a constant
1138 * 2) those that produce a copy
1139 * 3) those that produce information about the result value.
1142 static bool fold_add(OptContext *ctx, TCGOp *op)
1144 if (fold_const2_commutative(ctx, op) ||
1145 fold_xi_to_x(ctx, op, 0)) {
1146 return true;
1148 return false;
1151 /* We cannot as yet do_constant_folding with vectors. */
1152 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1154 if (fold_commutative(ctx, op) ||
1155 fold_xi_to_x(ctx, op, 0)) {
1156 return true;
1158 return false;
1161 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
1163 bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
1164 bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
1166 if (a_const && b_const) {
1167 uint64_t al = arg_info(op->args[2])->val;
1168 uint64_t ah = arg_info(op->args[3])->val;
1169 uint64_t bl = arg_info(op->args[4])->val;
1170 uint64_t bh = arg_info(op->args[5])->val;
1171 TCGArg rl, rh;
1172 TCGOp *op2;
1174 if (ctx->type == TCG_TYPE_I32) {
1175 uint64_t a = deposit64(al, 32, 32, ah);
1176 uint64_t b = deposit64(bl, 32, 32, bh);
1178 if (add) {
1179 a += b;
1180 } else {
1181 a -= b;
1184 al = sextract64(a, 0, 32);
1185 ah = sextract64(a, 32, 32);
1186 } else {
1187 Int128 a = int128_make128(al, ah);
1188 Int128 b = int128_make128(bl, bh);
1190 if (add) {
1191 a = int128_add(a, b);
1192 } else {
1193 a = int128_sub(a, b);
1196 al = int128_getlo(a);
1197 ah = int128_gethi(a);
1200 rl = op->args[0];
1201 rh = op->args[1];
1203 /* The proper opcode is supplied by tcg_opt_gen_mov. */
1204 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1206 tcg_opt_gen_movi(ctx, op, rl, al);
1207 tcg_opt_gen_movi(ctx, op2, rh, ah);
1208 return true;
1211 /* Fold sub2 r,x,i to add2 r,x,-i */
1212 if (!add && b_const) {
1213 uint64_t bl = arg_info(op->args[4])->val;
1214 uint64_t bh = arg_info(op->args[5])->val;
1216 /* Negate the two parts without assembling and disassembling. */
1217 bl = -bl;
1218 bh = ~bh + !bl;
1220 op->opc = (ctx->type == TCG_TYPE_I32
1221 ? INDEX_op_add2_i32 : INDEX_op_add2_i64);
1222 op->args[4] = arg_new_constant(ctx, bl);
1223 op->args[5] = arg_new_constant(ctx, bh);
1225 return false;
1228 static bool fold_add2(OptContext *ctx, TCGOp *op)
1230 /* Note that the high and low parts may be independently swapped. */
1231 swap_commutative(op->args[0], &op->args[2], &op->args[4]);
1232 swap_commutative(op->args[1], &op->args[3], &op->args[5]);
1234 return fold_addsub2(ctx, op, true);
1237 static bool fold_and(OptContext *ctx, TCGOp *op)
1239 uint64_t z1, z2;
1241 if (fold_const2_commutative(ctx, op) ||
1242 fold_xi_to_i(ctx, op, 0) ||
1243 fold_xi_to_x(ctx, op, -1) ||
1244 fold_xx_to_x(ctx, op)) {
1245 return true;
1248 z1 = arg_info(op->args[1])->z_mask;
1249 z2 = arg_info(op->args[2])->z_mask;
1250 ctx->z_mask = z1 & z2;
1253 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1254 * Bitwise operations preserve the relative quantity of the repetitions.
1256 ctx->s_mask = arg_info(op->args[1])->s_mask
1257 & arg_info(op->args[2])->s_mask;
1260 * Known-zeros does not imply known-ones. Therefore unless
1261 * arg2 is constant, we can't infer affected bits from it.
1263 if (arg_is_const(op->args[2])) {
1264 ctx->a_mask = z1 & ~z2;
1267 return fold_masks(ctx, op);
1270 static bool fold_andc(OptContext *ctx, TCGOp *op)
1272 uint64_t z1;
1274 if (fold_const2(ctx, op) ||
1275 fold_xx_to_i(ctx, op, 0) ||
1276 fold_xi_to_x(ctx, op, 0) ||
1277 fold_ix_to_not(ctx, op, -1)) {
1278 return true;
1281 z1 = arg_info(op->args[1])->z_mask;
1284 * Known-zeros does not imply known-ones. Therefore unless
1285 * arg2 is constant, we can't infer anything from it.
1287 if (arg_is_const(op->args[2])) {
1288 uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1289 ctx->a_mask = z1 & ~z2;
1290 z1 &= z2;
1292 ctx->z_mask = z1;
1294 ctx->s_mask = arg_info(op->args[1])->s_mask
1295 & arg_info(op->args[2])->s_mask;
1296 return fold_masks(ctx, op);
1299 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1301 int i = do_constant_folding_cond1(ctx, NO_DEST, &op->args[0],
1302 &op->args[1], &op->args[2]);
1303 if (i == 0) {
1304 tcg_op_remove(ctx->tcg, op);
1305 return true;
1307 if (i > 0) {
1308 op->opc = INDEX_op_br;
1309 op->args[0] = op->args[3];
1311 return false;
1314 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1316 TCGCond cond;
1317 TCGArg label;
1318 int i, inv = 0;
1320 i = do_constant_folding_cond2(ctx, &op->args[0]);
1321 cond = op->args[4];
1322 label = op->args[5];
1323 if (i >= 0) {
1324 goto do_brcond_const;
1327 switch (cond) {
1328 case TCG_COND_LT:
1329 case TCG_COND_GE:
1331 * Simplify LT/GE comparisons vs zero to a single compare
1332 * vs the high word of the input.
1334 if (arg_is_const_val(op->args[2], 0) &&
1335 arg_is_const_val(op->args[3], 0)) {
1336 goto do_brcond_high;
1338 break;
1340 case TCG_COND_NE:
1341 inv = 1;
1342 QEMU_FALLTHROUGH;
1343 case TCG_COND_EQ:
1345 * Simplify EQ/NE comparisons where one of the pairs
1346 * can be simplified.
1348 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1349 op->args[2], cond);
1350 switch (i ^ inv) {
1351 case 0:
1352 goto do_brcond_const;
1353 case 1:
1354 goto do_brcond_high;
1357 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1358 op->args[3], cond);
1359 switch (i ^ inv) {
1360 case 0:
1361 goto do_brcond_const;
1362 case 1:
1363 goto do_brcond_low;
1365 break;
1367 case TCG_COND_TSTEQ:
1368 case TCG_COND_TSTNE:
1369 if (arg_is_const_val(op->args[2], 0)) {
1370 goto do_brcond_high;
1372 if (arg_is_const_val(op->args[3], 0)) {
1373 goto do_brcond_low;
1375 break;
1377 default:
1378 break;
1380 do_brcond_low:
1381 op->opc = INDEX_op_brcond_i32;
1382 op->args[1] = op->args[2];
1383 op->args[2] = cond;
1384 op->args[3] = label;
1385 return fold_brcond(ctx, op);
1387 do_brcond_high:
1388 op->opc = INDEX_op_brcond_i32;
1389 op->args[0] = op->args[1];
1390 op->args[1] = op->args[3];
1391 op->args[2] = cond;
1392 op->args[3] = label;
1393 return fold_brcond(ctx, op);
1395 do_brcond_const:
1396 if (i == 0) {
1397 tcg_op_remove(ctx->tcg, op);
1398 return true;
1400 op->opc = INDEX_op_br;
1401 op->args[0] = label;
1402 break;
1404 return false;
1407 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1409 uint64_t z_mask, s_mask, sign;
1411 if (arg_is_const(op->args[1])) {
1412 uint64_t t = arg_info(op->args[1])->val;
1414 t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1415 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1418 z_mask = arg_info(op->args[1])->z_mask;
1420 switch (op->opc) {
1421 case INDEX_op_bswap16_i32:
1422 case INDEX_op_bswap16_i64:
1423 z_mask = bswap16(z_mask);
1424 sign = INT16_MIN;
1425 break;
1426 case INDEX_op_bswap32_i32:
1427 case INDEX_op_bswap32_i64:
1428 z_mask = bswap32(z_mask);
1429 sign = INT32_MIN;
1430 break;
1431 case INDEX_op_bswap64_i64:
1432 z_mask = bswap64(z_mask);
1433 sign = INT64_MIN;
1434 break;
1435 default:
1436 g_assert_not_reached();
1438 s_mask = smask_from_zmask(z_mask);
1440 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1441 case TCG_BSWAP_OZ:
1442 break;
1443 case TCG_BSWAP_OS:
1444 /* If the sign bit may be 1, force all the bits above to 1. */
1445 if (z_mask & sign) {
1446 z_mask |= sign;
1447 s_mask = sign << 1;
1449 break;
1450 default:
1451 /* The high bits are undefined: force all bits above the sign to 1. */
1452 z_mask |= sign << 1;
1453 s_mask = 0;
1454 break;
1456 ctx->z_mask = z_mask;
1457 ctx->s_mask = s_mask;
1459 return fold_masks(ctx, op);
1462 static bool fold_call(OptContext *ctx, TCGOp *op)
1464 TCGContext *s = ctx->tcg;
1465 int nb_oargs = TCGOP_CALLO(op);
1466 int nb_iargs = TCGOP_CALLI(op);
1467 int flags, i;
1469 init_arguments(ctx, op, nb_oargs + nb_iargs);
1470 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1472 /* If the function reads or writes globals, reset temp data. */
1473 flags = tcg_call_flags(op);
1474 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1475 int nb_globals = s->nb_globals;
1477 for (i = 0; i < nb_globals; i++) {
1478 if (test_bit(i, ctx->temps_used.l)) {
1479 reset_ts(ctx, &ctx->tcg->temps[i]);
1484 /* If the function has side effects, reset mem data. */
1485 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1486 remove_mem_copy_all(ctx);
1489 /* Reset temp data for outputs. */
1490 for (i = 0; i < nb_oargs; i++) {
1491 reset_temp(ctx, op->args[i]);
1494 /* Stop optimizing MB across calls. */
1495 ctx->prev_mb = NULL;
1496 return true;
1499 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1501 uint64_t z_mask;
1503 if (arg_is_const(op->args[1])) {
1504 uint64_t t = arg_info(op->args[1])->val;
1506 if (t != 0) {
1507 t = do_constant_folding(op->opc, ctx->type, t, 0);
1508 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1510 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1513 switch (ctx->type) {
1514 case TCG_TYPE_I32:
1515 z_mask = 31;
1516 break;
1517 case TCG_TYPE_I64:
1518 z_mask = 63;
1519 break;
1520 default:
1521 g_assert_not_reached();
1523 ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1524 ctx->s_mask = smask_from_zmask(ctx->z_mask);
1525 return false;
1528 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1530 if (fold_const1(ctx, op)) {
1531 return true;
1534 switch (ctx->type) {
1535 case TCG_TYPE_I32:
1536 ctx->z_mask = 32 | 31;
1537 break;
1538 case TCG_TYPE_I64:
1539 ctx->z_mask = 64 | 63;
1540 break;
1541 default:
1542 g_assert_not_reached();
1544 ctx->s_mask = smask_from_zmask(ctx->z_mask);
1545 return false;
1548 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1550 TCGOpcode and_opc;
1552 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1553 uint64_t t1 = arg_info(op->args[1])->val;
1554 uint64_t t2 = arg_info(op->args[2])->val;
1556 t1 = deposit64(t1, op->args[3], op->args[4], t2);
1557 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1560 switch (ctx->type) {
1561 case TCG_TYPE_I32:
1562 and_opc = INDEX_op_and_i32;
1563 break;
1564 case TCG_TYPE_I64:
1565 and_opc = INDEX_op_and_i64;
1566 break;
1567 default:
1568 g_assert_not_reached();
1571 /* Inserting a value into zero at offset 0. */
1572 if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
1573 uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
1575 op->opc = and_opc;
1576 op->args[1] = op->args[2];
1577 op->args[2] = arg_new_constant(ctx, mask);
1578 ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1579 return false;
1582 /* Inserting zero into a value. */
1583 if (arg_is_const_val(op->args[2], 0)) {
1584 uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
1586 op->opc = and_opc;
1587 op->args[2] = arg_new_constant(ctx, mask);
1588 ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1589 return false;
1592 ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1593 op->args[3], op->args[4],
1594 arg_info(op->args[2])->z_mask);
1595 return false;
1598 static bool fold_divide(OptContext *ctx, TCGOp *op)
1600 if (fold_const2(ctx, op) ||
1601 fold_xi_to_x(ctx, op, 1)) {
1602 return true;
1604 return false;
1607 static bool fold_dup(OptContext *ctx, TCGOp *op)
1609 if (arg_is_const(op->args[1])) {
1610 uint64_t t = arg_info(op->args[1])->val;
1611 t = dup_const(TCGOP_VECE(op), t);
1612 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1614 return false;
1617 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1619 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1620 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1621 arg_info(op->args[2])->val);
1622 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1625 if (args_are_copies(op->args[1], op->args[2])) {
1626 op->opc = INDEX_op_dup_vec;
1627 TCGOP_VECE(op) = MO_32;
1629 return false;
1632 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1634 if (fold_const2_commutative(ctx, op) ||
1635 fold_xi_to_x(ctx, op, -1) ||
1636 fold_xi_to_not(ctx, op, 0)) {
1637 return true;
1640 ctx->s_mask = arg_info(op->args[1])->s_mask
1641 & arg_info(op->args[2])->s_mask;
1642 return false;
1645 static bool fold_extract(OptContext *ctx, TCGOp *op)
1647 uint64_t z_mask_old, z_mask;
1648 int pos = op->args[2];
1649 int len = op->args[3];
1651 if (arg_is_const(op->args[1])) {
1652 uint64_t t;
1654 t = arg_info(op->args[1])->val;
1655 t = extract64(t, pos, len);
1656 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1659 z_mask_old = arg_info(op->args[1])->z_mask;
1660 z_mask = extract64(z_mask_old, pos, len);
1661 if (pos == 0) {
1662 ctx->a_mask = z_mask_old ^ z_mask;
1664 ctx->z_mask = z_mask;
1665 ctx->s_mask = smask_from_zmask(z_mask);
1667 return fold_masks(ctx, op);
1670 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1672 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1673 uint64_t v1 = arg_info(op->args[1])->val;
1674 uint64_t v2 = arg_info(op->args[2])->val;
1675 int shr = op->args[3];
1677 if (op->opc == INDEX_op_extract2_i64) {
1678 v1 >>= shr;
1679 v2 <<= 64 - shr;
1680 } else {
1681 v1 = (uint32_t)v1 >> shr;
1682 v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1684 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1686 return false;
1689 static bool fold_exts(OptContext *ctx, TCGOp *op)
1691 uint64_t s_mask_old, s_mask, z_mask, sign;
1692 bool type_change = false;
1694 if (fold_const1(ctx, op)) {
1695 return true;
1698 z_mask = arg_info(op->args[1])->z_mask;
1699 s_mask = arg_info(op->args[1])->s_mask;
1700 s_mask_old = s_mask;
1702 switch (op->opc) {
1703 CASE_OP_32_64(ext8s):
1704 sign = INT8_MIN;
1705 z_mask = (uint8_t)z_mask;
1706 break;
1707 CASE_OP_32_64(ext16s):
1708 sign = INT16_MIN;
1709 z_mask = (uint16_t)z_mask;
1710 break;
1711 case INDEX_op_ext_i32_i64:
1712 type_change = true;
1713 QEMU_FALLTHROUGH;
1714 case INDEX_op_ext32s_i64:
1715 sign = INT32_MIN;
1716 z_mask = (uint32_t)z_mask;
1717 break;
1718 default:
1719 g_assert_not_reached();
1722 if (z_mask & sign) {
1723 z_mask |= sign;
1725 s_mask |= sign << 1;
1727 ctx->z_mask = z_mask;
1728 ctx->s_mask = s_mask;
1729 if (!type_change) {
1730 ctx->a_mask = s_mask & ~s_mask_old;
1733 return fold_masks(ctx, op);
1736 static bool fold_extu(OptContext *ctx, TCGOp *op)
1738 uint64_t z_mask_old, z_mask;
1739 bool type_change = false;
1741 if (fold_const1(ctx, op)) {
1742 return true;
1745 z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1747 switch (op->opc) {
1748 CASE_OP_32_64(ext8u):
1749 z_mask = (uint8_t)z_mask;
1750 break;
1751 CASE_OP_32_64(ext16u):
1752 z_mask = (uint16_t)z_mask;
1753 break;
1754 case INDEX_op_extrl_i64_i32:
1755 case INDEX_op_extu_i32_i64:
1756 type_change = true;
1757 QEMU_FALLTHROUGH;
1758 case INDEX_op_ext32u_i64:
1759 z_mask = (uint32_t)z_mask;
1760 break;
1761 case INDEX_op_extrh_i64_i32:
1762 type_change = true;
1763 z_mask >>= 32;
1764 break;
1765 default:
1766 g_assert_not_reached();
1769 ctx->z_mask = z_mask;
1770 ctx->s_mask = smask_from_zmask(z_mask);
1771 if (!type_change) {
1772 ctx->a_mask = z_mask_old ^ z_mask;
1774 return fold_masks(ctx, op);
1777 static bool fold_mb(OptContext *ctx, TCGOp *op)
1779 /* Eliminate duplicate and redundant fence instructions. */
1780 if (ctx->prev_mb) {
1782 * Merge two barriers of the same type into one,
1783 * or a weaker barrier into a stronger one,
1784 * or two weaker barriers into a stronger one.
1785 * mb X; mb Y => mb X|Y
1786 * mb; strl => mb; st
1787 * ldaq; mb => ld; mb
1788 * ldaq; strl => ld; mb; st
1789 * Other combinations are also merged into a strong
1790 * barrier. This is stricter than specified but for
1791 * the purposes of TCG is better than not optimizing.
1793 ctx->prev_mb->args[0] |= op->args[0];
1794 tcg_op_remove(ctx->tcg, op);
1795 } else {
1796 ctx->prev_mb = op;
1798 return true;
1801 static bool fold_mov(OptContext *ctx, TCGOp *op)
1803 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1806 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1808 int i;
1811 * Canonicalize the "false" input reg to match the destination reg so
1812 * that the tcg backend can implement a "move if true" operation.
1814 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1815 op->args[5] = tcg_invert_cond(op->args[5]);
1818 i = do_constant_folding_cond1(ctx, NO_DEST, &op->args[1],
1819 &op->args[2], &op->args[5]);
1820 if (i >= 0) {
1821 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1824 ctx->z_mask = arg_info(op->args[3])->z_mask
1825 | arg_info(op->args[4])->z_mask;
1826 ctx->s_mask = arg_info(op->args[3])->s_mask
1827 & arg_info(op->args[4])->s_mask;
1829 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1830 uint64_t tv = arg_info(op->args[3])->val;
1831 uint64_t fv = arg_info(op->args[4])->val;
1832 TCGOpcode opc, negopc = 0;
1833 TCGCond cond = op->args[5];
1835 switch (ctx->type) {
1836 case TCG_TYPE_I32:
1837 opc = INDEX_op_setcond_i32;
1838 if (TCG_TARGET_HAS_negsetcond_i32) {
1839 negopc = INDEX_op_negsetcond_i32;
1841 tv = (int32_t)tv;
1842 fv = (int32_t)fv;
1843 break;
1844 case TCG_TYPE_I64:
1845 opc = INDEX_op_setcond_i64;
1846 if (TCG_TARGET_HAS_negsetcond_i64) {
1847 negopc = INDEX_op_negsetcond_i64;
1849 break;
1850 default:
1851 g_assert_not_reached();
1854 if (tv == 1 && fv == 0) {
1855 op->opc = opc;
1856 op->args[3] = cond;
1857 } else if (fv == 1 && tv == 0) {
1858 op->opc = opc;
1859 op->args[3] = tcg_invert_cond(cond);
1860 } else if (negopc) {
1861 if (tv == -1 && fv == 0) {
1862 op->opc = negopc;
1863 op->args[3] = cond;
1864 } else if (fv == -1 && tv == 0) {
1865 op->opc = negopc;
1866 op->args[3] = tcg_invert_cond(cond);
1870 return false;
1873 static bool fold_mul(OptContext *ctx, TCGOp *op)
1875 if (fold_const2(ctx, op) ||
1876 fold_xi_to_i(ctx, op, 0) ||
1877 fold_xi_to_x(ctx, op, 1)) {
1878 return true;
1880 return false;
1883 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1885 if (fold_const2_commutative(ctx, op) ||
1886 fold_xi_to_i(ctx, op, 0)) {
1887 return true;
1889 return false;
1892 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1894 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1896 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1897 uint64_t a = arg_info(op->args[2])->val;
1898 uint64_t b = arg_info(op->args[3])->val;
1899 uint64_t h, l;
1900 TCGArg rl, rh;
1901 TCGOp *op2;
1903 switch (op->opc) {
1904 case INDEX_op_mulu2_i32:
1905 l = (uint64_t)(uint32_t)a * (uint32_t)b;
1906 h = (int32_t)(l >> 32);
1907 l = (int32_t)l;
1908 break;
1909 case INDEX_op_muls2_i32:
1910 l = (int64_t)(int32_t)a * (int32_t)b;
1911 h = l >> 32;
1912 l = (int32_t)l;
1913 break;
1914 case INDEX_op_mulu2_i64:
1915 mulu64(&l, &h, a, b);
1916 break;
1917 case INDEX_op_muls2_i64:
1918 muls64(&l, &h, a, b);
1919 break;
1920 default:
1921 g_assert_not_reached();
1924 rl = op->args[0];
1925 rh = op->args[1];
1927 /* The proper opcode is supplied by tcg_opt_gen_mov. */
1928 op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1930 tcg_opt_gen_movi(ctx, op, rl, l);
1931 tcg_opt_gen_movi(ctx, op2, rh, h);
1932 return true;
1934 return false;
1937 static bool fold_nand(OptContext *ctx, TCGOp *op)
1939 if (fold_const2_commutative(ctx, op) ||
1940 fold_xi_to_not(ctx, op, -1)) {
1941 return true;
1944 ctx->s_mask = arg_info(op->args[1])->s_mask
1945 & arg_info(op->args[2])->s_mask;
1946 return false;
1949 static bool fold_neg(OptContext *ctx, TCGOp *op)
1951 uint64_t z_mask;
1953 if (fold_const1(ctx, op)) {
1954 return true;
1957 /* Set to 1 all bits to the left of the rightmost. */
1958 z_mask = arg_info(op->args[1])->z_mask;
1959 ctx->z_mask = -(z_mask & -z_mask);
1962 * Because of fold_sub_to_neg, we want to always return true,
1963 * via finish_folding.
1965 finish_folding(ctx, op);
1966 return true;
1969 static bool fold_nor(OptContext *ctx, TCGOp *op)
1971 if (fold_const2_commutative(ctx, op) ||
1972 fold_xi_to_not(ctx, op, 0)) {
1973 return true;
1976 ctx->s_mask = arg_info(op->args[1])->s_mask
1977 & arg_info(op->args[2])->s_mask;
1978 return false;
1981 static bool fold_not(OptContext *ctx, TCGOp *op)
1983 if (fold_const1(ctx, op)) {
1984 return true;
1987 ctx->s_mask = arg_info(op->args[1])->s_mask;
1989 /* Because of fold_to_not, we want to always return true, via finish. */
1990 finish_folding(ctx, op);
1991 return true;
1994 static bool fold_or(OptContext *ctx, TCGOp *op)
1996 if (fold_const2_commutative(ctx, op) ||
1997 fold_xi_to_x(ctx, op, 0) ||
1998 fold_xx_to_x(ctx, op)) {
1999 return true;
2002 ctx->z_mask = arg_info(op->args[1])->z_mask
2003 | arg_info(op->args[2])->z_mask;
2004 ctx->s_mask = arg_info(op->args[1])->s_mask
2005 & arg_info(op->args[2])->s_mask;
2006 return fold_masks(ctx, op);
2009 static bool fold_orc(OptContext *ctx, TCGOp *op)
2011 if (fold_const2(ctx, op) ||
2012 fold_xx_to_i(ctx, op, -1) ||
2013 fold_xi_to_x(ctx, op, -1) ||
2014 fold_ix_to_not(ctx, op, 0)) {
2015 return true;
2018 ctx->s_mask = arg_info(op->args[1])->s_mask
2019 & arg_info(op->args[2])->s_mask;
2020 return false;
2023 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
2025 const TCGOpDef *def = &tcg_op_defs[op->opc];
2026 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2027 MemOp mop = get_memop(oi);
2028 int width = 8 * memop_size(mop);
2030 if (width < 64) {
2031 ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
2032 if (!(mop & MO_SIGN)) {
2033 ctx->z_mask = MAKE_64BIT_MASK(0, width);
2034 ctx->s_mask <<= 1;
2038 /* Opcodes that touch guest memory stop the mb optimization. */
2039 ctx->prev_mb = NULL;
2040 return false;
2043 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2045 /* Opcodes that touch guest memory stop the mb optimization. */
2046 ctx->prev_mb = NULL;
2047 return false;
2050 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2052 if (fold_const2(ctx, op) ||
2053 fold_xx_to_i(ctx, op, 0)) {
2054 return true;
2056 return false;
2059 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2061 TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc, uext_opc, sext_opc;
2062 TCGCond cond = op->args[3];
2063 TCGArg ret, src1, src2;
2064 TCGOp *op2;
2065 uint64_t val;
2066 int sh;
2067 bool inv;
2069 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2070 return;
2073 src2 = op->args[2];
2074 val = arg_info(src2)->val;
2075 if (!is_power_of_2(val)) {
2076 return;
2078 sh = ctz64(val);
2080 switch (ctx->type) {
2081 case TCG_TYPE_I32:
2082 and_opc = INDEX_op_and_i32;
2083 sub_opc = INDEX_op_sub_i32;
2084 xor_opc = INDEX_op_xor_i32;
2085 shr_opc = INDEX_op_shr_i32;
2086 neg_opc = INDEX_op_neg_i32;
2087 if (TCG_TARGET_extract_i32_valid(sh, 1)) {
2088 uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
2089 sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
2091 break;
2092 case TCG_TYPE_I64:
2093 and_opc = INDEX_op_and_i64;
2094 sub_opc = INDEX_op_sub_i64;
2095 xor_opc = INDEX_op_xor_i64;
2096 shr_opc = INDEX_op_shr_i64;
2097 neg_opc = INDEX_op_neg_i64;
2098 if (TCG_TARGET_extract_i64_valid(sh, 1)) {
2099 uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
2100 sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
2102 break;
2103 default:
2104 g_assert_not_reached();
2107 ret = op->args[0];
2108 src1 = op->args[1];
2109 inv = cond == TCG_COND_TSTEQ;
2111 if (sh && sext_opc && neg && !inv) {
2112 op->opc = sext_opc;
2113 op->args[1] = src1;
2114 op->args[2] = sh;
2115 op->args[3] = 1;
2116 return;
2117 } else if (sh && uext_opc) {
2118 op->opc = uext_opc;
2119 op->args[1] = src1;
2120 op->args[2] = sh;
2121 op->args[3] = 1;
2122 } else {
2123 if (sh) {
2124 op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3);
2125 op2->args[0] = ret;
2126 op2->args[1] = src1;
2127 op2->args[2] = arg_new_constant(ctx, sh);
2128 src1 = ret;
2130 op->opc = and_opc;
2131 op->args[1] = src1;
2132 op->args[2] = arg_new_constant(ctx, 1);
2135 if (neg && inv) {
2136 op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3);
2137 op2->args[0] = ret;
2138 op2->args[1] = ret;
2139 op2->args[2] = arg_new_constant(ctx, 1);
2140 } else if (inv) {
2141 op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3);
2142 op2->args[0] = ret;
2143 op2->args[1] = ret;
2144 op2->args[2] = arg_new_constant(ctx, 1);
2145 } else if (neg) {
2146 op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2);
2147 op2->args[0] = ret;
2148 op2->args[1] = ret;
2152 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2154 int i = do_constant_folding_cond1(ctx, op->args[0], &op->args[1],
2155 &op->args[2], &op->args[3]);
2156 if (i >= 0) {
2157 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2159 fold_setcond_tst_pow2(ctx, op, false);
2161 ctx->z_mask = 1;
2162 ctx->s_mask = smask_from_zmask(1);
2163 return false;
2166 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2168 int i = do_constant_folding_cond1(ctx, op->args[0], &op->args[1],
2169 &op->args[2], &op->args[3]);
2170 if (i >= 0) {
2171 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2173 fold_setcond_tst_pow2(ctx, op, true);
2175 /* Value is {0,-1} so all bits are repetitions of the sign. */
2176 ctx->s_mask = -1;
2177 return false;
2180 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2182 TCGCond cond;
2183 int i, inv = 0;
2185 i = do_constant_folding_cond2(ctx, &op->args[1]);
2186 cond = op->args[5];
2187 if (i >= 0) {
2188 goto do_setcond_const;
2191 switch (cond) {
2192 case TCG_COND_LT:
2193 case TCG_COND_GE:
2195 * Simplify LT/GE comparisons vs zero to a single compare
2196 * vs the high word of the input.
2198 if (arg_is_const_val(op->args[3], 0) &&
2199 arg_is_const_val(op->args[4], 0)) {
2200 goto do_setcond_high;
2202 break;
2204 case TCG_COND_NE:
2205 inv = 1;
2206 QEMU_FALLTHROUGH;
2207 case TCG_COND_EQ:
2209 * Simplify EQ/NE comparisons where one of the pairs
2210 * can be simplified.
2212 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2213 op->args[3], cond);
2214 switch (i ^ inv) {
2215 case 0:
2216 goto do_setcond_const;
2217 case 1:
2218 goto do_setcond_high;
2221 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2222 op->args[4], cond);
2223 switch (i ^ inv) {
2224 case 0:
2225 goto do_setcond_const;
2226 case 1:
2227 goto do_setcond_low;
2229 break;
2231 case TCG_COND_TSTEQ:
2232 case TCG_COND_TSTNE:
2233 if (arg_is_const_val(op->args[2], 0)) {
2234 goto do_setcond_high;
2236 if (arg_is_const_val(op->args[4], 0)) {
2237 goto do_setcond_low;
2239 break;
2241 default:
2242 break;
2244 do_setcond_low:
2245 op->args[2] = op->args[3];
2246 op->args[3] = cond;
2247 op->opc = INDEX_op_setcond_i32;
2248 return fold_setcond(ctx, op);
2250 do_setcond_high:
2251 op->args[1] = op->args[2];
2252 op->args[2] = op->args[4];
2253 op->args[3] = cond;
2254 op->opc = INDEX_op_setcond_i32;
2255 return fold_setcond(ctx, op);
2258 ctx->z_mask = 1;
2259 ctx->s_mask = smask_from_zmask(1);
2260 return false;
2262 do_setcond_const:
2263 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2266 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2268 uint64_t z_mask, s_mask, s_mask_old;
2269 int pos = op->args[2];
2270 int len = op->args[3];
2272 if (arg_is_const(op->args[1])) {
2273 uint64_t t;
2275 t = arg_info(op->args[1])->val;
2276 t = sextract64(t, pos, len);
2277 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
2280 z_mask = arg_info(op->args[1])->z_mask;
2281 z_mask = sextract64(z_mask, pos, len);
2282 ctx->z_mask = z_mask;
2284 s_mask_old = arg_info(op->args[1])->s_mask;
2285 s_mask = sextract64(s_mask_old, pos, len);
2286 s_mask |= MAKE_64BIT_MASK(len, 64 - len);
2287 ctx->s_mask = s_mask;
2289 if (pos == 0) {
2290 ctx->a_mask = s_mask & ~s_mask_old;
2293 return fold_masks(ctx, op);
2296 static bool fold_shift(OptContext *ctx, TCGOp *op)
2298 uint64_t s_mask, z_mask, sign;
2300 if (fold_const2(ctx, op) ||
2301 fold_ix_to_i(ctx, op, 0) ||
2302 fold_xi_to_x(ctx, op, 0)) {
2303 return true;
2306 s_mask = arg_info(op->args[1])->s_mask;
2307 z_mask = arg_info(op->args[1])->z_mask;
2309 if (arg_is_const(op->args[2])) {
2310 int sh = arg_info(op->args[2])->val;
2312 ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2314 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2315 ctx->s_mask = smask_from_smask(s_mask);
2317 return fold_masks(ctx, op);
2320 switch (op->opc) {
2321 CASE_OP_32_64(sar):
2323 * Arithmetic right shift will not reduce the number of
2324 * input sign repetitions.
2326 ctx->s_mask = s_mask;
2327 break;
2328 CASE_OP_32_64(shr):
2330 * If the sign bit is known zero, then logical right shift
2331 * will not reduced the number of input sign repetitions.
2333 sign = (s_mask & -s_mask) >> 1;
2334 if (!(z_mask & sign)) {
2335 ctx->s_mask = s_mask;
2337 break;
2338 default:
2339 break;
2342 return false;
2345 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2347 TCGOpcode neg_op;
2348 bool have_neg;
2350 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2351 return false;
2354 switch (ctx->type) {
2355 case TCG_TYPE_I32:
2356 neg_op = INDEX_op_neg_i32;
2357 have_neg = true;
2358 break;
2359 case TCG_TYPE_I64:
2360 neg_op = INDEX_op_neg_i64;
2361 have_neg = true;
2362 break;
2363 case TCG_TYPE_V64:
2364 case TCG_TYPE_V128:
2365 case TCG_TYPE_V256:
2366 neg_op = INDEX_op_neg_vec;
2367 have_neg = (TCG_TARGET_HAS_neg_vec &&
2368 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2369 break;
2370 default:
2371 g_assert_not_reached();
2373 if (have_neg) {
2374 op->opc = neg_op;
2375 op->args[1] = op->args[2];
2376 return fold_neg(ctx, op);
2378 return false;
2381 /* We cannot as yet do_constant_folding with vectors. */
2382 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2384 if (fold_xx_to_i(ctx, op, 0) ||
2385 fold_xi_to_x(ctx, op, 0) ||
2386 fold_sub_to_neg(ctx, op)) {
2387 return true;
2389 return false;
2392 static bool fold_sub(OptContext *ctx, TCGOp *op)
2394 if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
2395 return true;
2398 /* Fold sub r,x,i to add r,x,-i */
2399 if (arg_is_const(op->args[2])) {
2400 uint64_t val = arg_info(op->args[2])->val;
2402 op->opc = (ctx->type == TCG_TYPE_I32
2403 ? INDEX_op_add_i32 : INDEX_op_add_i64);
2404 op->args[2] = arg_new_constant(ctx, -val);
2406 return false;
2409 static bool fold_sub2(OptContext *ctx, TCGOp *op)
2411 return fold_addsub2(ctx, op, false);
2414 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2416 /* We can't do any folding with a load, but we can record bits. */
2417 switch (op->opc) {
2418 CASE_OP_32_64(ld8s):
2419 ctx->s_mask = MAKE_64BIT_MASK(8, 56);
2420 break;
2421 CASE_OP_32_64(ld8u):
2422 ctx->z_mask = MAKE_64BIT_MASK(0, 8);
2423 ctx->s_mask = MAKE_64BIT_MASK(9, 55);
2424 break;
2425 CASE_OP_32_64(ld16s):
2426 ctx->s_mask = MAKE_64BIT_MASK(16, 48);
2427 break;
2428 CASE_OP_32_64(ld16u):
2429 ctx->z_mask = MAKE_64BIT_MASK(0, 16);
2430 ctx->s_mask = MAKE_64BIT_MASK(17, 47);
2431 break;
2432 case INDEX_op_ld32s_i64:
2433 ctx->s_mask = MAKE_64BIT_MASK(32, 32);
2434 break;
2435 case INDEX_op_ld32u_i64:
2436 ctx->z_mask = MAKE_64BIT_MASK(0, 32);
2437 ctx->s_mask = MAKE_64BIT_MASK(33, 31);
2438 break;
2439 default:
2440 g_assert_not_reached();
2442 return false;
2445 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2447 TCGTemp *dst, *src;
2448 intptr_t ofs;
2449 TCGType type;
2451 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2452 return false;
2455 type = ctx->type;
2456 ofs = op->args[2];
2457 dst = arg_temp(op->args[0]);
2458 src = find_mem_copy_for(ctx, type, ofs);
2459 if (src && src->base_type == type) {
2460 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2463 reset_ts(ctx, dst);
2464 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2465 return true;
2468 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2470 intptr_t ofs = op->args[2];
2471 intptr_t lm1;
2473 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2474 remove_mem_copy_all(ctx);
2475 return false;
2478 switch (op->opc) {
2479 CASE_OP_32_64(st8):
2480 lm1 = 0;
2481 break;
2482 CASE_OP_32_64(st16):
2483 lm1 = 1;
2484 break;
2485 case INDEX_op_st32_i64:
2486 case INDEX_op_st_i32:
2487 lm1 = 3;
2488 break;
2489 case INDEX_op_st_i64:
2490 lm1 = 7;
2491 break;
2492 case INDEX_op_st_vec:
2493 lm1 = tcg_type_size(ctx->type) - 1;
2494 break;
2495 default:
2496 g_assert_not_reached();
2498 remove_mem_copy_in(ctx, ofs, ofs + lm1);
2499 return false;
2502 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2504 TCGTemp *src;
2505 intptr_t ofs, last;
2506 TCGType type;
2508 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2509 fold_tcg_st(ctx, op);
2510 return false;
2513 src = arg_temp(op->args[0]);
2514 ofs = op->args[2];
2515 type = ctx->type;
2518 * Eliminate duplicate stores of a constant.
2519 * This happens frequently when the target ISA zero-extends.
2521 if (ts_is_const(src)) {
2522 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2523 if (src == prev) {
2524 tcg_op_remove(ctx->tcg, op);
2525 return true;
2529 last = ofs + tcg_type_size(type) - 1;
2530 remove_mem_copy_in(ctx, ofs, last);
2531 record_mem_copy(ctx, type, src, ofs, last);
2532 return false;
2535 static bool fold_xor(OptContext *ctx, TCGOp *op)
2537 if (fold_const2_commutative(ctx, op) ||
2538 fold_xx_to_i(ctx, op, 0) ||
2539 fold_xi_to_x(ctx, op, 0) ||
2540 fold_xi_to_not(ctx, op, -1)) {
2541 return true;
2544 ctx->z_mask = arg_info(op->args[1])->z_mask
2545 | arg_info(op->args[2])->z_mask;
2546 ctx->s_mask = arg_info(op->args[1])->s_mask
2547 & arg_info(op->args[2])->s_mask;
2548 return fold_masks(ctx, op);
2551 /* Propagate constants and copies, fold constant expressions. */
2552 void tcg_optimize(TCGContext *s)
2554 int nb_temps, i;
2555 TCGOp *op, *op_next;
2556 OptContext ctx = { .tcg = s };
2558 QSIMPLEQ_INIT(&ctx.mem_free);
2560 /* Array VALS has an element for each temp.
2561 If this temp holds a constant then its value is kept in VALS' element.
2562 If this temp is a copy of other ones then the other copies are
2563 available through the doubly linked circular list. */
2565 nb_temps = s->nb_temps;
2566 for (i = 0; i < nb_temps; ++i) {
2567 s->temps[i].state_ptr = NULL;
2570 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2571 TCGOpcode opc = op->opc;
2572 const TCGOpDef *def;
2573 bool done = false;
2575 /* Calls are special. */
2576 if (opc == INDEX_op_call) {
2577 fold_call(&ctx, op);
2578 continue;
2581 def = &tcg_op_defs[opc];
2582 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2583 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2585 /* Pre-compute the type of the operation. */
2586 if (def->flags & TCG_OPF_VECTOR) {
2587 ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2588 } else if (def->flags & TCG_OPF_64BIT) {
2589 ctx.type = TCG_TYPE_I64;
2590 } else {
2591 ctx.type = TCG_TYPE_I32;
2594 /* Assume all bits affected, no bits known zero, no sign reps. */
2595 ctx.a_mask = -1;
2596 ctx.z_mask = -1;
2597 ctx.s_mask = 0;
2600 * Process each opcode.
2601 * Sorted alphabetically by opcode as much as possible.
2603 switch (opc) {
2604 CASE_OP_32_64(add):
2605 done = fold_add(&ctx, op);
2606 break;
2607 case INDEX_op_add_vec:
2608 done = fold_add_vec(&ctx, op);
2609 break;
2610 CASE_OP_32_64(add2):
2611 done = fold_add2(&ctx, op);
2612 break;
2613 CASE_OP_32_64_VEC(and):
2614 done = fold_and(&ctx, op);
2615 break;
2616 CASE_OP_32_64_VEC(andc):
2617 done = fold_andc(&ctx, op);
2618 break;
2619 CASE_OP_32_64(brcond):
2620 done = fold_brcond(&ctx, op);
2621 break;
2622 case INDEX_op_brcond2_i32:
2623 done = fold_brcond2(&ctx, op);
2624 break;
2625 CASE_OP_32_64(bswap16):
2626 CASE_OP_32_64(bswap32):
2627 case INDEX_op_bswap64_i64:
2628 done = fold_bswap(&ctx, op);
2629 break;
2630 CASE_OP_32_64(clz):
2631 CASE_OP_32_64(ctz):
2632 done = fold_count_zeros(&ctx, op);
2633 break;
2634 CASE_OP_32_64(ctpop):
2635 done = fold_ctpop(&ctx, op);
2636 break;
2637 CASE_OP_32_64(deposit):
2638 done = fold_deposit(&ctx, op);
2639 break;
2640 CASE_OP_32_64(div):
2641 CASE_OP_32_64(divu):
2642 done = fold_divide(&ctx, op);
2643 break;
2644 case INDEX_op_dup_vec:
2645 done = fold_dup(&ctx, op);
2646 break;
2647 case INDEX_op_dup2_vec:
2648 done = fold_dup2(&ctx, op);
2649 break;
2650 CASE_OP_32_64_VEC(eqv):
2651 done = fold_eqv(&ctx, op);
2652 break;
2653 CASE_OP_32_64(extract):
2654 done = fold_extract(&ctx, op);
2655 break;
2656 CASE_OP_32_64(extract2):
2657 done = fold_extract2(&ctx, op);
2658 break;
2659 CASE_OP_32_64(ext8s):
2660 CASE_OP_32_64(ext16s):
2661 case INDEX_op_ext32s_i64:
2662 case INDEX_op_ext_i32_i64:
2663 done = fold_exts(&ctx, op);
2664 break;
2665 CASE_OP_32_64(ext8u):
2666 CASE_OP_32_64(ext16u):
2667 case INDEX_op_ext32u_i64:
2668 case INDEX_op_extu_i32_i64:
2669 case INDEX_op_extrl_i64_i32:
2670 case INDEX_op_extrh_i64_i32:
2671 done = fold_extu(&ctx, op);
2672 break;
2673 CASE_OP_32_64(ld8s):
2674 CASE_OP_32_64(ld8u):
2675 CASE_OP_32_64(ld16s):
2676 CASE_OP_32_64(ld16u):
2677 case INDEX_op_ld32s_i64:
2678 case INDEX_op_ld32u_i64:
2679 done = fold_tcg_ld(&ctx, op);
2680 break;
2681 case INDEX_op_ld_i32:
2682 case INDEX_op_ld_i64:
2683 case INDEX_op_ld_vec:
2684 done = fold_tcg_ld_memcopy(&ctx, op);
2685 break;
2686 CASE_OP_32_64(st8):
2687 CASE_OP_32_64(st16):
2688 case INDEX_op_st32_i64:
2689 done = fold_tcg_st(&ctx, op);
2690 break;
2691 case INDEX_op_st_i32:
2692 case INDEX_op_st_i64:
2693 case INDEX_op_st_vec:
2694 done = fold_tcg_st_memcopy(&ctx, op);
2695 break;
2696 case INDEX_op_mb:
2697 done = fold_mb(&ctx, op);
2698 break;
2699 CASE_OP_32_64_VEC(mov):
2700 done = fold_mov(&ctx, op);
2701 break;
2702 CASE_OP_32_64(movcond):
2703 done = fold_movcond(&ctx, op);
2704 break;
2705 CASE_OP_32_64(mul):
2706 done = fold_mul(&ctx, op);
2707 break;
2708 CASE_OP_32_64(mulsh):
2709 CASE_OP_32_64(muluh):
2710 done = fold_mul_highpart(&ctx, op);
2711 break;
2712 CASE_OP_32_64(muls2):
2713 CASE_OP_32_64(mulu2):
2714 done = fold_multiply2(&ctx, op);
2715 break;
2716 CASE_OP_32_64_VEC(nand):
2717 done = fold_nand(&ctx, op);
2718 break;
2719 CASE_OP_32_64(neg):
2720 done = fold_neg(&ctx, op);
2721 break;
2722 CASE_OP_32_64_VEC(nor):
2723 done = fold_nor(&ctx, op);
2724 break;
2725 CASE_OP_32_64_VEC(not):
2726 done = fold_not(&ctx, op);
2727 break;
2728 CASE_OP_32_64_VEC(or):
2729 done = fold_or(&ctx, op);
2730 break;
2731 CASE_OP_32_64_VEC(orc):
2732 done = fold_orc(&ctx, op);
2733 break;
2734 case INDEX_op_qemu_ld_a32_i32:
2735 case INDEX_op_qemu_ld_a64_i32:
2736 case INDEX_op_qemu_ld_a32_i64:
2737 case INDEX_op_qemu_ld_a64_i64:
2738 case INDEX_op_qemu_ld_a32_i128:
2739 case INDEX_op_qemu_ld_a64_i128:
2740 done = fold_qemu_ld(&ctx, op);
2741 break;
2742 case INDEX_op_qemu_st8_a32_i32:
2743 case INDEX_op_qemu_st8_a64_i32:
2744 case INDEX_op_qemu_st_a32_i32:
2745 case INDEX_op_qemu_st_a64_i32:
2746 case INDEX_op_qemu_st_a32_i64:
2747 case INDEX_op_qemu_st_a64_i64:
2748 case INDEX_op_qemu_st_a32_i128:
2749 case INDEX_op_qemu_st_a64_i128:
2750 done = fold_qemu_st(&ctx, op);
2751 break;
2752 CASE_OP_32_64(rem):
2753 CASE_OP_32_64(remu):
2754 done = fold_remainder(&ctx, op);
2755 break;
2756 CASE_OP_32_64(rotl):
2757 CASE_OP_32_64(rotr):
2758 CASE_OP_32_64(sar):
2759 CASE_OP_32_64(shl):
2760 CASE_OP_32_64(shr):
2761 done = fold_shift(&ctx, op);
2762 break;
2763 CASE_OP_32_64(setcond):
2764 done = fold_setcond(&ctx, op);
2765 break;
2766 CASE_OP_32_64(negsetcond):
2767 done = fold_negsetcond(&ctx, op);
2768 break;
2769 case INDEX_op_setcond2_i32:
2770 done = fold_setcond2(&ctx, op);
2771 break;
2772 CASE_OP_32_64(sextract):
2773 done = fold_sextract(&ctx, op);
2774 break;
2775 CASE_OP_32_64(sub):
2776 done = fold_sub(&ctx, op);
2777 break;
2778 case INDEX_op_sub_vec:
2779 done = fold_sub_vec(&ctx, op);
2780 break;
2781 CASE_OP_32_64(sub2):
2782 done = fold_sub2(&ctx, op);
2783 break;
2784 CASE_OP_32_64_VEC(xor):
2785 done = fold_xor(&ctx, op);
2786 break;
2787 default:
2788 break;
2791 if (!done) {
2792 finish_folding(&ctx, op);