2 * Optimizations for Tiny Code Generator for QEMU
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
32 #define CASE_OP_32_64(x) \
33 glue(glue(case INDEX_op_, x), _i32): \
34 glue(glue(case INDEX_op_, x), _i64)
36 #define CASE_OP_32_64_VEC(x) \
37 glue(glue(case INDEX_op_, x), _i32): \
38 glue(glue(case INDEX_op_, x), _i64): \
39 glue(glue(case INDEX_op_, x), _vec)
41 typedef struct MemCopyInfo
{
42 IntervalTreeNode itree
;
43 QSIMPLEQ_ENTRY (MemCopyInfo
) next
;
48 typedef struct TempOptInfo
{
52 QSIMPLEQ_HEAD(, MemCopyInfo
) mem_copy
;
54 uint64_t z_mask
; /* mask bit is 0 if and only if value bit is 0 */
55 uint64_t s_mask
; /* a left-aligned mask of clrsb(value) bits. */
58 typedef struct OptContext
{
61 TCGTempSet temps_used
;
63 IntervalTreeRoot mem_copy
;
64 QSIMPLEQ_HEAD(, MemCopyInfo
) mem_free
;
66 /* In flight values from optimization. */
67 uint64_t a_mask
; /* mask bit is 0 iff value identical to first input */
68 uint64_t z_mask
; /* mask bit is 0 iff value bit is 0 */
69 uint64_t s_mask
; /* mask of clrsb(value) bits */
73 /* Calculate the smask for a specific value. */
74 static uint64_t smask_from_value(uint64_t value
)
76 int rep
= clrsb64(value
);
77 return ~(~0ull >> rep
);
81 * Calculate the smask for a given set of known-zeros.
82 * If there are lots of zeros on the left, we can consider the remainder
83 * an unsigned field, and thus the corresponding signed field is one bit
86 static uint64_t smask_from_zmask(uint64_t zmask
)
89 * Only the 0 bits are significant for zmask, thus the msb itself
90 * must be zero, else we have no sign information.
92 int rep
= clz64(zmask
);
97 return ~(~0ull >> rep
);
101 * Recreate a properly left-aligned smask after manipulation.
102 * Some bit-shuffling, particularly shifts and rotates, may
103 * retain sign bits on the left, but may scatter disconnected
104 * sign bits on the right. Retain only what remains to the left.
106 static uint64_t smask_from_smask(int64_t smask
)
108 /* Only the 1 bits are significant for smask */
109 return smask_from_zmask(~smask
);
112 static inline TempOptInfo
*ts_info(TCGTemp
*ts
)
114 return ts
->state_ptr
;
117 static inline TempOptInfo
*arg_info(TCGArg arg
)
119 return ts_info(arg_temp(arg
));
122 static inline bool ts_is_const(TCGTemp
*ts
)
124 return ts_info(ts
)->is_const
;
127 static inline bool ts_is_const_val(TCGTemp
*ts
, uint64_t val
)
129 TempOptInfo
*ti
= ts_info(ts
);
130 return ti
->is_const
&& ti
->val
== val
;
133 static inline bool arg_is_const(TCGArg arg
)
135 return ts_is_const(arg_temp(arg
));
138 static inline bool arg_is_const_val(TCGArg arg
, uint64_t val
)
140 return ts_is_const_val(arg_temp(arg
), val
);
143 static inline bool ts_is_copy(TCGTemp
*ts
)
145 return ts_info(ts
)->next_copy
!= ts
;
148 static TCGTemp
*cmp_better_copy(TCGTemp
*a
, TCGTemp
*b
)
150 return a
->kind
< b
->kind
? b
: a
;
153 /* Initialize and activate a temporary. */
154 static void init_ts_info(OptContext
*ctx
, TCGTemp
*ts
)
156 size_t idx
= temp_idx(ts
);
159 if (test_bit(idx
, ctx
->temps_used
.l
)) {
162 set_bit(idx
, ctx
->temps_used
.l
);
166 ti
= tcg_malloc(sizeof(TempOptInfo
));
172 QSIMPLEQ_INIT(&ti
->mem_copy
);
173 if (ts
->kind
== TEMP_CONST
) {
176 ti
->z_mask
= ts
->val
;
177 ti
->s_mask
= smask_from_value(ts
->val
);
179 ti
->is_const
= false;
185 static MemCopyInfo
*mem_copy_first(OptContext
*ctx
, intptr_t s
, intptr_t l
)
187 IntervalTreeNode
*r
= interval_tree_iter_first(&ctx
->mem_copy
, s
, l
);
188 return r
? container_of(r
, MemCopyInfo
, itree
) : NULL
;
191 static MemCopyInfo
*mem_copy_next(MemCopyInfo
*mem
, intptr_t s
, intptr_t l
)
193 IntervalTreeNode
*r
= interval_tree_iter_next(&mem
->itree
, s
, l
);
194 return r
? container_of(r
, MemCopyInfo
, itree
) : NULL
;
197 static void remove_mem_copy(OptContext
*ctx
, MemCopyInfo
*mc
)
199 TCGTemp
*ts
= mc
->ts
;
200 TempOptInfo
*ti
= ts_info(ts
);
202 interval_tree_remove(&mc
->itree
, &ctx
->mem_copy
);
203 QSIMPLEQ_REMOVE(&ti
->mem_copy
, mc
, MemCopyInfo
, next
);
204 QSIMPLEQ_INSERT_TAIL(&ctx
->mem_free
, mc
, next
);
207 static void remove_mem_copy_in(OptContext
*ctx
, intptr_t s
, intptr_t l
)
210 MemCopyInfo
*mc
= mem_copy_first(ctx
, s
, l
);
214 remove_mem_copy(ctx
, mc
);
218 static void remove_mem_copy_all(OptContext
*ctx
)
220 remove_mem_copy_in(ctx
, 0, -1);
221 tcg_debug_assert(interval_tree_is_empty(&ctx
->mem_copy
));
224 static TCGTemp
*find_better_copy(TCGTemp
*ts
)
228 /* If this is already readonly, we can't do better. */
229 if (temp_readonly(ts
)) {
234 for (i
= ts_info(ts
)->next_copy
; i
!= ts
; i
= ts_info(i
)->next_copy
) {
235 ret
= cmp_better_copy(ret
, i
);
240 static void move_mem_copies(TCGTemp
*dst_ts
, TCGTemp
*src_ts
)
242 TempOptInfo
*si
= ts_info(src_ts
);
243 TempOptInfo
*di
= ts_info(dst_ts
);
246 QSIMPLEQ_FOREACH(mc
, &si
->mem_copy
, next
) {
247 tcg_debug_assert(mc
->ts
== src_ts
);
250 QSIMPLEQ_CONCAT(&di
->mem_copy
, &si
->mem_copy
);
253 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
254 static void reset_ts(OptContext
*ctx
, TCGTemp
*ts
)
256 TempOptInfo
*ti
= ts_info(ts
);
257 TCGTemp
*pts
= ti
->prev_copy
;
258 TCGTemp
*nts
= ti
->next_copy
;
259 TempOptInfo
*pi
= ts_info(pts
);
260 TempOptInfo
*ni
= ts_info(nts
);
262 ni
->prev_copy
= ti
->prev_copy
;
263 pi
->next_copy
= ti
->next_copy
;
266 ti
->is_const
= false;
270 if (!QSIMPLEQ_EMPTY(&ti
->mem_copy
)) {
272 /* Last temp copy being removed, the mem copies die. */
274 QSIMPLEQ_FOREACH(mc
, &ti
->mem_copy
, next
) {
275 interval_tree_remove(&mc
->itree
, &ctx
->mem_copy
);
277 QSIMPLEQ_CONCAT(&ctx
->mem_free
, &ti
->mem_copy
);
279 move_mem_copies(find_better_copy(nts
), ts
);
284 static void reset_temp(OptContext
*ctx
, TCGArg arg
)
286 reset_ts(ctx
, arg_temp(arg
));
289 static void record_mem_copy(OptContext
*ctx
, TCGType type
,
290 TCGTemp
*ts
, intptr_t start
, intptr_t last
)
295 mc
= QSIMPLEQ_FIRST(&ctx
->mem_free
);
297 QSIMPLEQ_REMOVE_HEAD(&ctx
->mem_free
, next
);
299 mc
= tcg_malloc(sizeof(*mc
));
302 memset(mc
, 0, sizeof(*mc
));
303 mc
->itree
.start
= start
;
304 mc
->itree
.last
= last
;
306 interval_tree_insert(&mc
->itree
, &ctx
->mem_copy
);
308 ts
= find_better_copy(ts
);
311 QSIMPLEQ_INSERT_TAIL(&ti
->mem_copy
, mc
, next
);
314 static bool ts_are_copies(TCGTemp
*ts1
, TCGTemp
*ts2
)
322 if (!ts_is_copy(ts1
) || !ts_is_copy(ts2
)) {
326 for (i
= ts_info(ts1
)->next_copy
; i
!= ts1
; i
= ts_info(i
)->next_copy
) {
335 static bool args_are_copies(TCGArg arg1
, TCGArg arg2
)
337 return ts_are_copies(arg_temp(arg1
), arg_temp(arg2
));
340 static TCGTemp
*find_mem_copy_for(OptContext
*ctx
, TCGType type
, intptr_t s
)
344 for (mc
= mem_copy_first(ctx
, s
, s
); mc
; mc
= mem_copy_next(mc
, s
, s
)) {
345 if (mc
->itree
.start
== s
&& mc
->type
== type
) {
346 return find_better_copy(mc
->ts
);
352 static TCGArg
arg_new_constant(OptContext
*ctx
, uint64_t val
)
354 TCGType type
= ctx
->type
;
357 if (type
== TCG_TYPE_I32
) {
361 ts
= tcg_constant_internal(type
, val
);
362 init_ts_info(ctx
, ts
);
367 static bool tcg_opt_gen_mov(OptContext
*ctx
, TCGOp
*op
, TCGArg dst
, TCGArg src
)
369 TCGTemp
*dst_ts
= arg_temp(dst
);
370 TCGTemp
*src_ts
= arg_temp(src
);
375 if (ts_are_copies(dst_ts
, src_ts
)) {
376 tcg_op_remove(ctx
->tcg
, op
);
380 reset_ts(ctx
, dst_ts
);
381 di
= ts_info(dst_ts
);
382 si
= ts_info(src_ts
);
386 new_op
= INDEX_op_mov_i32
;
389 new_op
= INDEX_op_mov_i64
;
394 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
395 new_op
= INDEX_op_mov_vec
;
398 g_assert_not_reached();
404 di
->z_mask
= si
->z_mask
;
405 di
->s_mask
= si
->s_mask
;
407 if (src_ts
->type
== dst_ts
->type
) {
408 TempOptInfo
*ni
= ts_info(si
->next_copy
);
410 di
->next_copy
= si
->next_copy
;
411 di
->prev_copy
= src_ts
;
412 ni
->prev_copy
= dst_ts
;
413 si
->next_copy
= dst_ts
;
414 di
->is_const
= si
->is_const
;
417 if (!QSIMPLEQ_EMPTY(&si
->mem_copy
)
418 && cmp_better_copy(src_ts
, dst_ts
) == dst_ts
) {
419 move_mem_copies(dst_ts
, src_ts
);
425 static bool tcg_opt_gen_movi(OptContext
*ctx
, TCGOp
*op
,
426 TCGArg dst
, uint64_t val
)
428 /* Convert movi to mov with constant temp. */
429 return tcg_opt_gen_mov(ctx
, op
, dst
, arg_new_constant(ctx
, val
));
432 static uint64_t do_constant_folding_2(TCGOpcode op
, uint64_t x
, uint64_t y
)
446 CASE_OP_32_64_VEC(and):
449 CASE_OP_32_64_VEC(or):
452 CASE_OP_32_64_VEC(xor):
455 case INDEX_op_shl_i32
:
456 return (uint32_t)x
<< (y
& 31);
458 case INDEX_op_shl_i64
:
459 return (uint64_t)x
<< (y
& 63);
461 case INDEX_op_shr_i32
:
462 return (uint32_t)x
>> (y
& 31);
464 case INDEX_op_shr_i64
:
465 return (uint64_t)x
>> (y
& 63);
467 case INDEX_op_sar_i32
:
468 return (int32_t)x
>> (y
& 31);
470 case INDEX_op_sar_i64
:
471 return (int64_t)x
>> (y
& 63);
473 case INDEX_op_rotr_i32
:
474 return ror32(x
, y
& 31);
476 case INDEX_op_rotr_i64
:
477 return ror64(x
, y
& 63);
479 case INDEX_op_rotl_i32
:
480 return rol32(x
, y
& 31);
482 case INDEX_op_rotl_i64
:
483 return rol64(x
, y
& 63);
485 CASE_OP_32_64_VEC(not):
491 CASE_OP_32_64_VEC(andc
):
494 CASE_OP_32_64_VEC(orc
):
497 CASE_OP_32_64_VEC(eqv
):
500 CASE_OP_32_64_VEC(nand
):
503 CASE_OP_32_64_VEC(nor
):
506 case INDEX_op_clz_i32
:
507 return (uint32_t)x
? clz32(x
) : y
;
509 case INDEX_op_clz_i64
:
510 return x
? clz64(x
) : y
;
512 case INDEX_op_ctz_i32
:
513 return (uint32_t)x
? ctz32(x
) : y
;
515 case INDEX_op_ctz_i64
:
516 return x
? ctz64(x
) : y
;
518 case INDEX_op_ctpop_i32
:
521 case INDEX_op_ctpop_i64
:
524 CASE_OP_32_64(ext8s
):
527 CASE_OP_32_64(ext16s
):
530 CASE_OP_32_64(ext8u
):
533 CASE_OP_32_64(ext16u
):
536 CASE_OP_32_64(bswap16
):
538 return y
& TCG_BSWAP_OS
? (int16_t)x
: x
;
540 CASE_OP_32_64(bswap32
):
542 return y
& TCG_BSWAP_OS
? (int32_t)x
: x
;
544 case INDEX_op_bswap64_i64
:
547 case INDEX_op_ext_i32_i64
:
548 case INDEX_op_ext32s_i64
:
551 case INDEX_op_extu_i32_i64
:
552 case INDEX_op_extrl_i64_i32
:
553 case INDEX_op_ext32u_i64
:
556 case INDEX_op_extrh_i64_i32
:
557 return (uint64_t)x
>> 32;
559 case INDEX_op_muluh_i32
:
560 return ((uint64_t)(uint32_t)x
* (uint32_t)y
) >> 32;
561 case INDEX_op_mulsh_i32
:
562 return ((int64_t)(int32_t)x
* (int32_t)y
) >> 32;
564 case INDEX_op_muluh_i64
:
565 mulu64(&l64
, &h64
, x
, y
);
567 case INDEX_op_mulsh_i64
:
568 muls64(&l64
, &h64
, x
, y
);
571 case INDEX_op_div_i32
:
572 /* Avoid crashing on divide by zero, otherwise undefined. */
573 return (int32_t)x
/ ((int32_t)y
? : 1);
574 case INDEX_op_divu_i32
:
575 return (uint32_t)x
/ ((uint32_t)y
? : 1);
576 case INDEX_op_div_i64
:
577 return (int64_t)x
/ ((int64_t)y
? : 1);
578 case INDEX_op_divu_i64
:
579 return (uint64_t)x
/ ((uint64_t)y
? : 1);
581 case INDEX_op_rem_i32
:
582 return (int32_t)x
% ((int32_t)y
? : 1);
583 case INDEX_op_remu_i32
:
584 return (uint32_t)x
% ((uint32_t)y
? : 1);
585 case INDEX_op_rem_i64
:
586 return (int64_t)x
% ((int64_t)y
? : 1);
587 case INDEX_op_remu_i64
:
588 return (uint64_t)x
% ((uint64_t)y
? : 1);
591 g_assert_not_reached();
595 static uint64_t do_constant_folding(TCGOpcode op
, TCGType type
,
596 uint64_t x
, uint64_t y
)
598 uint64_t res
= do_constant_folding_2(op
, x
, y
);
599 if (type
== TCG_TYPE_I32
) {
605 static bool do_constant_folding_cond_32(uint32_t x
, uint32_t y
, TCGCond c
)
613 return (int32_t)x
< (int32_t)y
;
615 return (int32_t)x
>= (int32_t)y
;
617 return (int32_t)x
<= (int32_t)y
;
619 return (int32_t)x
> (int32_t)y
;
632 case TCG_COND_ALWAYS
:
636 g_assert_not_reached();
639 static bool do_constant_folding_cond_64(uint64_t x
, uint64_t y
, TCGCond c
)
647 return (int64_t)x
< (int64_t)y
;
649 return (int64_t)x
>= (int64_t)y
;
651 return (int64_t)x
<= (int64_t)y
;
653 return (int64_t)x
> (int64_t)y
;
666 case TCG_COND_ALWAYS
:
670 g_assert_not_reached();
673 static int do_constant_folding_cond_eq(TCGCond c
)
691 case TCG_COND_ALWAYS
:
695 g_assert_not_reached();
699 * Return -1 if the condition can't be simplified,
700 * and the result of the condition (0 or 1) if it can.
702 static int do_constant_folding_cond(TCGType type
, TCGArg x
,
705 if (arg_is_const(x
) && arg_is_const(y
)) {
706 uint64_t xv
= arg_info(x
)->val
;
707 uint64_t yv
= arg_info(y
)->val
;
711 return do_constant_folding_cond_32(xv
, yv
, c
);
713 return do_constant_folding_cond_64(xv
, yv
, c
);
715 /* Only scalar comparisons are optimizable */
718 } else if (args_are_copies(x
, y
)) {
719 return do_constant_folding_cond_eq(c
);
720 } else if (arg_is_const_val(y
, 0)) {
737 * @dest: TCGArg of the destination argument, or NO_DEST.
738 * @p1: first paired argument
739 * @p2: second paired argument
741 * If *@p1 is a constant and *@p2 is not, swap.
742 * If *@p2 matches @dest, swap.
743 * Return true if a swap was performed.
746 #define NO_DEST temp_arg(NULL)
748 static bool swap_commutative(TCGArg dest
, TCGArg
*p1
, TCGArg
*p2
)
750 TCGArg a1
= *p1
, a2
= *p2
;
752 sum
+= arg_is_const(a1
);
753 sum
-= arg_is_const(a2
);
755 /* Prefer the constant in second argument, and then the form
756 op a, a, b, which is better handled on non-RISC hosts. */
757 if (sum
> 0 || (sum
== 0 && dest
== a2
)) {
765 static bool swap_commutative2(TCGArg
*p1
, TCGArg
*p2
)
768 sum
+= arg_is_const(p1
[0]);
769 sum
+= arg_is_const(p1
[1]);
770 sum
-= arg_is_const(p2
[0]);
771 sum
-= arg_is_const(p2
[1]);
774 t
= p1
[0], p1
[0] = p2
[0], p2
[0] = t
;
775 t
= p1
[1], p1
[1] = p2
[1], p2
[1] = t
;
782 * Return -1 if the condition can't be simplified,
783 * and the result of the condition (0 or 1) if it can.
785 static int do_constant_folding_cond1(OptContext
*ctx
, TCGArg dest
,
786 TCGArg
*p1
, TCGArg
*p2
, TCGArg
*pcond
)
792 swap
= swap_commutative(dest
, p1
, p2
);
795 *pcond
= cond
= tcg_swap_cond(cond
);
798 r
= do_constant_folding_cond(ctx
->type
, *p1
, *p2
, cond
);
802 if (!is_tst_cond(cond
)) {
807 * TSTNE x,x -> NE x,0
808 * TSTNE x,-1 -> NE x,0
810 if (args_are_copies(*p1
, *p2
) || arg_is_const_val(*p2
, -1)) {
811 *p2
= arg_new_constant(ctx
, 0);
812 *pcond
= tcg_tst_eqne_cond(cond
);
816 /* TSTNE x,sign -> LT x,0 */
817 if (arg_is_const_val(*p2
, (ctx
->type
== TCG_TYPE_I32
818 ? INT32_MIN
: INT64_MIN
))) {
819 *p2
= arg_new_constant(ctx
, 0);
820 *pcond
= tcg_tst_ltge_cond(cond
);
825 static int do_constant_folding_cond2(OptContext
*ctx
, TCGArg
*args
)
827 TCGArg al
, ah
, bl
, bh
;
832 swap
= swap_commutative2(args
, args
+ 2);
835 args
[4] = c
= tcg_swap_cond(c
);
843 if (arg_is_const(bl
) && arg_is_const(bh
)) {
844 tcg_target_ulong blv
= arg_info(bl
)->val
;
845 tcg_target_ulong bhv
= arg_info(bh
)->val
;
846 uint64_t b
= deposit64(blv
, 32, 32, bhv
);
848 if (arg_is_const(al
) && arg_is_const(ah
)) {
849 tcg_target_ulong alv
= arg_info(al
)->val
;
850 tcg_target_ulong ahv
= arg_info(ah
)->val
;
851 uint64_t a
= deposit64(alv
, 32, 32, ahv
);
853 r
= do_constant_folding_cond_64(a
, b
, c
);
872 /* TSTNE x,-1 -> NE x,0 */
873 if (b
== -1 && is_tst_cond(c
)) {
874 args
[3] = args
[2] = arg_new_constant(ctx
, 0);
875 args
[4] = tcg_tst_eqne_cond(c
);
879 /* TSTNE x,sign -> LT x,0 */
880 if (b
== INT64_MIN
&& is_tst_cond(c
)) {
881 /* bl must be 0, so copy that to bh */
883 args
[4] = tcg_tst_ltge_cond(c
);
888 if (args_are_copies(al
, bl
) && args_are_copies(ah
, bh
)) {
889 r
= do_constant_folding_cond_eq(c
);
894 /* TSTNE x,x -> NE x,0 */
895 if (is_tst_cond(c
)) {
896 args
[3] = args
[2] = arg_new_constant(ctx
, 0);
897 args
[4] = tcg_tst_eqne_cond(c
);
904 static void init_arguments(OptContext
*ctx
, TCGOp
*op
, int nb_args
)
906 for (int i
= 0; i
< nb_args
; i
++) {
907 TCGTemp
*ts
= arg_temp(op
->args
[i
]);
908 init_ts_info(ctx
, ts
);
912 static void copy_propagate(OptContext
*ctx
, TCGOp
*op
,
913 int nb_oargs
, int nb_iargs
)
915 for (int i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
916 TCGTemp
*ts
= arg_temp(op
->args
[i
]);
917 if (ts_is_copy(ts
)) {
918 op
->args
[i
] = temp_arg(find_better_copy(ts
));
923 static void finish_folding(OptContext
*ctx
, TCGOp
*op
)
925 const TCGOpDef
*def
= &tcg_op_defs
[op
->opc
];
929 * We only optimize extended basic blocks. If the opcode ends a BB
930 * and is not a conditional branch, reset all temp data.
932 if (def
->flags
& TCG_OPF_BB_END
) {
934 if (!(def
->flags
& TCG_OPF_COND_BRANCH
)) {
935 memset(&ctx
->temps_used
, 0, sizeof(ctx
->temps_used
));
936 remove_mem_copy_all(ctx
);
941 nb_oargs
= def
->nb_oargs
;
942 for (i
= 0; i
< nb_oargs
; i
++) {
943 TCGTemp
*ts
= arg_temp(op
->args
[i
]);
946 * Save the corresponding known-zero/sign bits mask for the
947 * first output argument (only one supported so far).
950 ts_info(ts
)->z_mask
= ctx
->z_mask
;
951 ts_info(ts
)->s_mask
= ctx
->s_mask
;
957 * The fold_* functions return true when processing is complete,
958 * usually by folding the operation to a constant or to a copy,
959 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
960 * like collect information about the value produced, for use in
961 * optimizing a subsequent operation.
963 * These first fold_* functions are all helpers, used by other
964 * folders for more specific operations.
967 static bool fold_const1(OptContext
*ctx
, TCGOp
*op
)
969 if (arg_is_const(op
->args
[1])) {
972 t
= arg_info(op
->args
[1])->val
;
973 t
= do_constant_folding(op
->opc
, ctx
->type
, t
, 0);
974 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t
);
979 static bool fold_const2(OptContext
*ctx
, TCGOp
*op
)
981 if (arg_is_const(op
->args
[1]) && arg_is_const(op
->args
[2])) {
982 uint64_t t1
= arg_info(op
->args
[1])->val
;
983 uint64_t t2
= arg_info(op
->args
[2])->val
;
985 t1
= do_constant_folding(op
->opc
, ctx
->type
, t1
, t2
);
986 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t1
);
991 static bool fold_commutative(OptContext
*ctx
, TCGOp
*op
)
993 swap_commutative(op
->args
[0], &op
->args
[1], &op
->args
[2]);
997 static bool fold_const2_commutative(OptContext
*ctx
, TCGOp
*op
)
999 swap_commutative(op
->args
[0], &op
->args
[1], &op
->args
[2]);
1000 return fold_const2(ctx
, op
);
1003 static bool fold_masks(OptContext
*ctx
, TCGOp
*op
)
1005 uint64_t a_mask
= ctx
->a_mask
;
1006 uint64_t z_mask
= ctx
->z_mask
;
1007 uint64_t s_mask
= ctx
->s_mask
;
1010 * 32-bit ops generate 32-bit results, which for the purpose of
1011 * simplifying tcg are sign-extended. Certainly that's how we
1012 * represent our constants elsewhere. Note that the bits will
1013 * be reset properly for a 64-bit value when encountering the
1014 * type changing opcodes.
1016 if (ctx
->type
== TCG_TYPE_I32
) {
1017 a_mask
= (int32_t)a_mask
;
1018 z_mask
= (int32_t)z_mask
;
1019 s_mask
|= MAKE_64BIT_MASK(32, 32);
1020 ctx
->z_mask
= z_mask
;
1021 ctx
->s_mask
= s_mask
;
1025 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], 0);
1028 return tcg_opt_gen_mov(ctx
, op
, op
->args
[0], op
->args
[1]);
1034 * Convert @op to NOT, if NOT is supported by the host.
1035 * Return true f the conversion is successful, which will still
1036 * indicate that the processing is complete.
1038 static bool fold_not(OptContext
*ctx
, TCGOp
*op
);
1039 static bool fold_to_not(OptContext
*ctx
, TCGOp
*op
, int idx
)
1044 switch (ctx
->type
) {
1046 not_op
= INDEX_op_not_i32
;
1047 have_not
= TCG_TARGET_HAS_not_i32
;
1050 not_op
= INDEX_op_not_i64
;
1051 have_not
= TCG_TARGET_HAS_not_i64
;
1056 not_op
= INDEX_op_not_vec
;
1057 have_not
= TCG_TARGET_HAS_not_vec
;
1060 g_assert_not_reached();
1064 op
->args
[1] = op
->args
[idx
];
1065 return fold_not(ctx
, op
);
1070 /* If the binary operation has first argument @i, fold to @i. */
1071 static bool fold_ix_to_i(OptContext
*ctx
, TCGOp
*op
, uint64_t i
)
1073 if (arg_is_const_val(op
->args
[1], i
)) {
1074 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], i
);
1079 /* If the binary operation has first argument @i, fold to NOT. */
1080 static bool fold_ix_to_not(OptContext
*ctx
, TCGOp
*op
, uint64_t i
)
1082 if (arg_is_const_val(op
->args
[1], i
)) {
1083 return fold_to_not(ctx
, op
, 2);
1088 /* If the binary operation has second argument @i, fold to @i. */
1089 static bool fold_xi_to_i(OptContext
*ctx
, TCGOp
*op
, uint64_t i
)
1091 if (arg_is_const_val(op
->args
[2], i
)) {
1092 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], i
);
1097 /* If the binary operation has second argument @i, fold to identity. */
1098 static bool fold_xi_to_x(OptContext
*ctx
, TCGOp
*op
, uint64_t i
)
1100 if (arg_is_const_val(op
->args
[2], i
)) {
1101 return tcg_opt_gen_mov(ctx
, op
, op
->args
[0], op
->args
[1]);
1106 /* If the binary operation has second argument @i, fold to NOT. */
1107 static bool fold_xi_to_not(OptContext
*ctx
, TCGOp
*op
, uint64_t i
)
1109 if (arg_is_const_val(op
->args
[2], i
)) {
1110 return fold_to_not(ctx
, op
, 1);
1115 /* If the binary operation has both arguments equal, fold to @i. */
1116 static bool fold_xx_to_i(OptContext
*ctx
, TCGOp
*op
, uint64_t i
)
1118 if (args_are_copies(op
->args
[1], op
->args
[2])) {
1119 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], i
);
1124 /* If the binary operation has both arguments equal, fold to identity. */
1125 static bool fold_xx_to_x(OptContext
*ctx
, TCGOp
*op
)
1127 if (args_are_copies(op
->args
[1], op
->args
[2])) {
1128 return tcg_opt_gen_mov(ctx
, op
, op
->args
[0], op
->args
[1]);
1134 * These outermost fold_<op> functions are sorted alphabetically.
1136 * The ordering of the transformations should be:
1137 * 1) those that produce a constant
1138 * 2) those that produce a copy
1139 * 3) those that produce information about the result value.
1142 static bool fold_add(OptContext
*ctx
, TCGOp
*op
)
1144 if (fold_const2_commutative(ctx
, op
) ||
1145 fold_xi_to_x(ctx
, op
, 0)) {
1151 /* We cannot as yet do_constant_folding with vectors. */
1152 static bool fold_add_vec(OptContext
*ctx
, TCGOp
*op
)
1154 if (fold_commutative(ctx
, op
) ||
1155 fold_xi_to_x(ctx
, op
, 0)) {
1161 static bool fold_addsub2(OptContext
*ctx
, TCGOp
*op
, bool add
)
1163 bool a_const
= arg_is_const(op
->args
[2]) && arg_is_const(op
->args
[3]);
1164 bool b_const
= arg_is_const(op
->args
[4]) && arg_is_const(op
->args
[5]);
1166 if (a_const
&& b_const
) {
1167 uint64_t al
= arg_info(op
->args
[2])->val
;
1168 uint64_t ah
= arg_info(op
->args
[3])->val
;
1169 uint64_t bl
= arg_info(op
->args
[4])->val
;
1170 uint64_t bh
= arg_info(op
->args
[5])->val
;
1174 if (ctx
->type
== TCG_TYPE_I32
) {
1175 uint64_t a
= deposit64(al
, 32, 32, ah
);
1176 uint64_t b
= deposit64(bl
, 32, 32, bh
);
1184 al
= sextract64(a
, 0, 32);
1185 ah
= sextract64(a
, 32, 32);
1187 Int128 a
= int128_make128(al
, ah
);
1188 Int128 b
= int128_make128(bl
, bh
);
1191 a
= int128_add(a
, b
);
1193 a
= int128_sub(a
, b
);
1196 al
= int128_getlo(a
);
1197 ah
= int128_gethi(a
);
1203 /* The proper opcode is supplied by tcg_opt_gen_mov. */
1204 op2
= tcg_op_insert_before(ctx
->tcg
, op
, 0, 2);
1206 tcg_opt_gen_movi(ctx
, op
, rl
, al
);
1207 tcg_opt_gen_movi(ctx
, op2
, rh
, ah
);
1211 /* Fold sub2 r,x,i to add2 r,x,-i */
1212 if (!add
&& b_const
) {
1213 uint64_t bl
= arg_info(op
->args
[4])->val
;
1214 uint64_t bh
= arg_info(op
->args
[5])->val
;
1216 /* Negate the two parts without assembling and disassembling. */
1220 op
->opc
= (ctx
->type
== TCG_TYPE_I32
1221 ? INDEX_op_add2_i32
: INDEX_op_add2_i64
);
1222 op
->args
[4] = arg_new_constant(ctx
, bl
);
1223 op
->args
[5] = arg_new_constant(ctx
, bh
);
1228 static bool fold_add2(OptContext
*ctx
, TCGOp
*op
)
1230 /* Note that the high and low parts may be independently swapped. */
1231 swap_commutative(op
->args
[0], &op
->args
[2], &op
->args
[4]);
1232 swap_commutative(op
->args
[1], &op
->args
[3], &op
->args
[5]);
1234 return fold_addsub2(ctx
, op
, true);
1237 static bool fold_and(OptContext
*ctx
, TCGOp
*op
)
1241 if (fold_const2_commutative(ctx
, op
) ||
1242 fold_xi_to_i(ctx
, op
, 0) ||
1243 fold_xi_to_x(ctx
, op
, -1) ||
1244 fold_xx_to_x(ctx
, op
)) {
1248 z1
= arg_info(op
->args
[1])->z_mask
;
1249 z2
= arg_info(op
->args
[2])->z_mask
;
1250 ctx
->z_mask
= z1
& z2
;
1253 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1254 * Bitwise operations preserve the relative quantity of the repetitions.
1256 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
1257 & arg_info(op
->args
[2])->s_mask
;
1260 * Known-zeros does not imply known-ones. Therefore unless
1261 * arg2 is constant, we can't infer affected bits from it.
1263 if (arg_is_const(op
->args
[2])) {
1264 ctx
->a_mask
= z1
& ~z2
;
1267 return fold_masks(ctx
, op
);
1270 static bool fold_andc(OptContext
*ctx
, TCGOp
*op
)
1274 if (fold_const2(ctx
, op
) ||
1275 fold_xx_to_i(ctx
, op
, 0) ||
1276 fold_xi_to_x(ctx
, op
, 0) ||
1277 fold_ix_to_not(ctx
, op
, -1)) {
1281 z1
= arg_info(op
->args
[1])->z_mask
;
1284 * Known-zeros does not imply known-ones. Therefore unless
1285 * arg2 is constant, we can't infer anything from it.
1287 if (arg_is_const(op
->args
[2])) {
1288 uint64_t z2
= ~arg_info(op
->args
[2])->z_mask
;
1289 ctx
->a_mask
= z1
& ~z2
;
1294 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
1295 & arg_info(op
->args
[2])->s_mask
;
1296 return fold_masks(ctx
, op
);
1299 static bool fold_brcond(OptContext
*ctx
, TCGOp
*op
)
1301 int i
= do_constant_folding_cond1(ctx
, NO_DEST
, &op
->args
[0],
1302 &op
->args
[1], &op
->args
[2]);
1304 tcg_op_remove(ctx
->tcg
, op
);
1308 op
->opc
= INDEX_op_br
;
1309 op
->args
[0] = op
->args
[3];
1314 static bool fold_brcond2(OptContext
*ctx
, TCGOp
*op
)
1320 i
= do_constant_folding_cond2(ctx
, &op
->args
[0]);
1322 label
= op
->args
[5];
1324 goto do_brcond_const
;
1331 * Simplify LT/GE comparisons vs zero to a single compare
1332 * vs the high word of the input.
1334 if (arg_is_const_val(op
->args
[2], 0) &&
1335 arg_is_const_val(op
->args
[3], 0)) {
1336 goto do_brcond_high
;
1345 * Simplify EQ/NE comparisons where one of the pairs
1346 * can be simplified.
1348 i
= do_constant_folding_cond(TCG_TYPE_I32
, op
->args
[0],
1352 goto do_brcond_const
;
1354 goto do_brcond_high
;
1357 i
= do_constant_folding_cond(TCG_TYPE_I32
, op
->args
[1],
1361 goto do_brcond_const
;
1367 case TCG_COND_TSTEQ
:
1368 case TCG_COND_TSTNE
:
1369 if (arg_is_const_val(op
->args
[2], 0)) {
1370 goto do_brcond_high
;
1372 if (arg_is_const_val(op
->args
[3], 0)) {
1381 op
->opc
= INDEX_op_brcond_i32
;
1382 op
->args
[1] = op
->args
[2];
1384 op
->args
[3] = label
;
1385 return fold_brcond(ctx
, op
);
1388 op
->opc
= INDEX_op_brcond_i32
;
1389 op
->args
[0] = op
->args
[1];
1390 op
->args
[1] = op
->args
[3];
1392 op
->args
[3] = label
;
1393 return fold_brcond(ctx
, op
);
1397 tcg_op_remove(ctx
->tcg
, op
);
1400 op
->opc
= INDEX_op_br
;
1401 op
->args
[0] = label
;
1407 static bool fold_bswap(OptContext
*ctx
, TCGOp
*op
)
1409 uint64_t z_mask
, s_mask
, sign
;
1411 if (arg_is_const(op
->args
[1])) {
1412 uint64_t t
= arg_info(op
->args
[1])->val
;
1414 t
= do_constant_folding(op
->opc
, ctx
->type
, t
, op
->args
[2]);
1415 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t
);
1418 z_mask
= arg_info(op
->args
[1])->z_mask
;
1421 case INDEX_op_bswap16_i32
:
1422 case INDEX_op_bswap16_i64
:
1423 z_mask
= bswap16(z_mask
);
1426 case INDEX_op_bswap32_i32
:
1427 case INDEX_op_bswap32_i64
:
1428 z_mask
= bswap32(z_mask
);
1431 case INDEX_op_bswap64_i64
:
1432 z_mask
= bswap64(z_mask
);
1436 g_assert_not_reached();
1438 s_mask
= smask_from_zmask(z_mask
);
1440 switch (op
->args
[2] & (TCG_BSWAP_OZ
| TCG_BSWAP_OS
)) {
1444 /* If the sign bit may be 1, force all the bits above to 1. */
1445 if (z_mask
& sign
) {
1451 /* The high bits are undefined: force all bits above the sign to 1. */
1452 z_mask
|= sign
<< 1;
1456 ctx
->z_mask
= z_mask
;
1457 ctx
->s_mask
= s_mask
;
1459 return fold_masks(ctx
, op
);
1462 static bool fold_call(OptContext
*ctx
, TCGOp
*op
)
1464 TCGContext
*s
= ctx
->tcg
;
1465 int nb_oargs
= TCGOP_CALLO(op
);
1466 int nb_iargs
= TCGOP_CALLI(op
);
1469 init_arguments(ctx
, op
, nb_oargs
+ nb_iargs
);
1470 copy_propagate(ctx
, op
, nb_oargs
, nb_iargs
);
1472 /* If the function reads or writes globals, reset temp data. */
1473 flags
= tcg_call_flags(op
);
1474 if (!(flags
& (TCG_CALL_NO_READ_GLOBALS
| TCG_CALL_NO_WRITE_GLOBALS
))) {
1475 int nb_globals
= s
->nb_globals
;
1477 for (i
= 0; i
< nb_globals
; i
++) {
1478 if (test_bit(i
, ctx
->temps_used
.l
)) {
1479 reset_ts(ctx
, &ctx
->tcg
->temps
[i
]);
1484 /* If the function has side effects, reset mem data. */
1485 if (!(flags
& TCG_CALL_NO_SIDE_EFFECTS
)) {
1486 remove_mem_copy_all(ctx
);
1489 /* Reset temp data for outputs. */
1490 for (i
= 0; i
< nb_oargs
; i
++) {
1491 reset_temp(ctx
, op
->args
[i
]);
1494 /* Stop optimizing MB across calls. */
1495 ctx
->prev_mb
= NULL
;
1499 static bool fold_count_zeros(OptContext
*ctx
, TCGOp
*op
)
1503 if (arg_is_const(op
->args
[1])) {
1504 uint64_t t
= arg_info(op
->args
[1])->val
;
1507 t
= do_constant_folding(op
->opc
, ctx
->type
, t
, 0);
1508 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t
);
1510 return tcg_opt_gen_mov(ctx
, op
, op
->args
[0], op
->args
[2]);
1513 switch (ctx
->type
) {
1521 g_assert_not_reached();
1523 ctx
->z_mask
= arg_info(op
->args
[2])->z_mask
| z_mask
;
1524 ctx
->s_mask
= smask_from_zmask(ctx
->z_mask
);
1528 static bool fold_ctpop(OptContext
*ctx
, TCGOp
*op
)
1530 if (fold_const1(ctx
, op
)) {
1534 switch (ctx
->type
) {
1536 ctx
->z_mask
= 32 | 31;
1539 ctx
->z_mask
= 64 | 63;
1542 g_assert_not_reached();
1544 ctx
->s_mask
= smask_from_zmask(ctx
->z_mask
);
1548 static bool fold_deposit(OptContext
*ctx
, TCGOp
*op
)
1552 if (arg_is_const(op
->args
[1]) && arg_is_const(op
->args
[2])) {
1553 uint64_t t1
= arg_info(op
->args
[1])->val
;
1554 uint64_t t2
= arg_info(op
->args
[2])->val
;
1556 t1
= deposit64(t1
, op
->args
[3], op
->args
[4], t2
);
1557 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t1
);
1560 switch (ctx
->type
) {
1562 and_opc
= INDEX_op_and_i32
;
1565 and_opc
= INDEX_op_and_i64
;
1568 g_assert_not_reached();
1571 /* Inserting a value into zero at offset 0. */
1572 if (arg_is_const_val(op
->args
[1], 0) && op
->args
[3] == 0) {
1573 uint64_t mask
= MAKE_64BIT_MASK(0, op
->args
[4]);
1576 op
->args
[1] = op
->args
[2];
1577 op
->args
[2] = arg_new_constant(ctx
, mask
);
1578 ctx
->z_mask
= mask
& arg_info(op
->args
[1])->z_mask
;
1582 /* Inserting zero into a value. */
1583 if (arg_is_const_val(op
->args
[2], 0)) {
1584 uint64_t mask
= deposit64(-1, op
->args
[3], op
->args
[4], 0);
1587 op
->args
[2] = arg_new_constant(ctx
, mask
);
1588 ctx
->z_mask
= mask
& arg_info(op
->args
[1])->z_mask
;
1592 ctx
->z_mask
= deposit64(arg_info(op
->args
[1])->z_mask
,
1593 op
->args
[3], op
->args
[4],
1594 arg_info(op
->args
[2])->z_mask
);
1598 static bool fold_divide(OptContext
*ctx
, TCGOp
*op
)
1600 if (fold_const2(ctx
, op
) ||
1601 fold_xi_to_x(ctx
, op
, 1)) {
1607 static bool fold_dup(OptContext
*ctx
, TCGOp
*op
)
1609 if (arg_is_const(op
->args
[1])) {
1610 uint64_t t
= arg_info(op
->args
[1])->val
;
1611 t
= dup_const(TCGOP_VECE(op
), t
);
1612 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t
);
1617 static bool fold_dup2(OptContext
*ctx
, TCGOp
*op
)
1619 if (arg_is_const(op
->args
[1]) && arg_is_const(op
->args
[2])) {
1620 uint64_t t
= deposit64(arg_info(op
->args
[1])->val
, 32, 32,
1621 arg_info(op
->args
[2])->val
);
1622 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t
);
1625 if (args_are_copies(op
->args
[1], op
->args
[2])) {
1626 op
->opc
= INDEX_op_dup_vec
;
1627 TCGOP_VECE(op
) = MO_32
;
1632 static bool fold_eqv(OptContext
*ctx
, TCGOp
*op
)
1634 if (fold_const2_commutative(ctx
, op
) ||
1635 fold_xi_to_x(ctx
, op
, -1) ||
1636 fold_xi_to_not(ctx
, op
, 0)) {
1640 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
1641 & arg_info(op
->args
[2])->s_mask
;
1645 static bool fold_extract(OptContext
*ctx
, TCGOp
*op
)
1647 uint64_t z_mask_old
, z_mask
;
1648 int pos
= op
->args
[2];
1649 int len
= op
->args
[3];
1651 if (arg_is_const(op
->args
[1])) {
1654 t
= arg_info(op
->args
[1])->val
;
1655 t
= extract64(t
, pos
, len
);
1656 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t
);
1659 z_mask_old
= arg_info(op
->args
[1])->z_mask
;
1660 z_mask
= extract64(z_mask_old
, pos
, len
);
1662 ctx
->a_mask
= z_mask_old
^ z_mask
;
1664 ctx
->z_mask
= z_mask
;
1665 ctx
->s_mask
= smask_from_zmask(z_mask
);
1667 return fold_masks(ctx
, op
);
1670 static bool fold_extract2(OptContext
*ctx
, TCGOp
*op
)
1672 if (arg_is_const(op
->args
[1]) && arg_is_const(op
->args
[2])) {
1673 uint64_t v1
= arg_info(op
->args
[1])->val
;
1674 uint64_t v2
= arg_info(op
->args
[2])->val
;
1675 int shr
= op
->args
[3];
1677 if (op
->opc
== INDEX_op_extract2_i64
) {
1681 v1
= (uint32_t)v1
>> shr
;
1682 v2
= (uint64_t)((int32_t)v2
<< (32 - shr
));
1684 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], v1
| v2
);
1689 static bool fold_exts(OptContext
*ctx
, TCGOp
*op
)
1691 uint64_t s_mask_old
, s_mask
, z_mask
, sign
;
1692 bool type_change
= false;
1694 if (fold_const1(ctx
, op
)) {
1698 z_mask
= arg_info(op
->args
[1])->z_mask
;
1699 s_mask
= arg_info(op
->args
[1])->s_mask
;
1700 s_mask_old
= s_mask
;
1703 CASE_OP_32_64(ext8s
):
1705 z_mask
= (uint8_t)z_mask
;
1707 CASE_OP_32_64(ext16s
):
1709 z_mask
= (uint16_t)z_mask
;
1711 case INDEX_op_ext_i32_i64
:
1714 case INDEX_op_ext32s_i64
:
1716 z_mask
= (uint32_t)z_mask
;
1719 g_assert_not_reached();
1722 if (z_mask
& sign
) {
1725 s_mask
|= sign
<< 1;
1727 ctx
->z_mask
= z_mask
;
1728 ctx
->s_mask
= s_mask
;
1730 ctx
->a_mask
= s_mask
& ~s_mask_old
;
1733 return fold_masks(ctx
, op
);
1736 static bool fold_extu(OptContext
*ctx
, TCGOp
*op
)
1738 uint64_t z_mask_old
, z_mask
;
1739 bool type_change
= false;
1741 if (fold_const1(ctx
, op
)) {
1745 z_mask_old
= z_mask
= arg_info(op
->args
[1])->z_mask
;
1748 CASE_OP_32_64(ext8u
):
1749 z_mask
= (uint8_t)z_mask
;
1751 CASE_OP_32_64(ext16u
):
1752 z_mask
= (uint16_t)z_mask
;
1754 case INDEX_op_extrl_i64_i32
:
1755 case INDEX_op_extu_i32_i64
:
1758 case INDEX_op_ext32u_i64
:
1759 z_mask
= (uint32_t)z_mask
;
1761 case INDEX_op_extrh_i64_i32
:
1766 g_assert_not_reached();
1769 ctx
->z_mask
= z_mask
;
1770 ctx
->s_mask
= smask_from_zmask(z_mask
);
1772 ctx
->a_mask
= z_mask_old
^ z_mask
;
1774 return fold_masks(ctx
, op
);
1777 static bool fold_mb(OptContext
*ctx
, TCGOp
*op
)
1779 /* Eliminate duplicate and redundant fence instructions. */
1782 * Merge two barriers of the same type into one,
1783 * or a weaker barrier into a stronger one,
1784 * or two weaker barriers into a stronger one.
1785 * mb X; mb Y => mb X|Y
1786 * mb; strl => mb; st
1787 * ldaq; mb => ld; mb
1788 * ldaq; strl => ld; mb; st
1789 * Other combinations are also merged into a strong
1790 * barrier. This is stricter than specified but for
1791 * the purposes of TCG is better than not optimizing.
1793 ctx
->prev_mb
->args
[0] |= op
->args
[0];
1794 tcg_op_remove(ctx
->tcg
, op
);
1801 static bool fold_mov(OptContext
*ctx
, TCGOp
*op
)
1803 return tcg_opt_gen_mov(ctx
, op
, op
->args
[0], op
->args
[1]);
1806 static bool fold_movcond(OptContext
*ctx
, TCGOp
*op
)
1811 * Canonicalize the "false" input reg to match the destination reg so
1812 * that the tcg backend can implement a "move if true" operation.
1814 if (swap_commutative(op
->args
[0], &op
->args
[4], &op
->args
[3])) {
1815 op
->args
[5] = tcg_invert_cond(op
->args
[5]);
1818 i
= do_constant_folding_cond1(ctx
, NO_DEST
, &op
->args
[1],
1819 &op
->args
[2], &op
->args
[5]);
1821 return tcg_opt_gen_mov(ctx
, op
, op
->args
[0], op
->args
[4 - i
]);
1824 ctx
->z_mask
= arg_info(op
->args
[3])->z_mask
1825 | arg_info(op
->args
[4])->z_mask
;
1826 ctx
->s_mask
= arg_info(op
->args
[3])->s_mask
1827 & arg_info(op
->args
[4])->s_mask
;
1829 if (arg_is_const(op
->args
[3]) && arg_is_const(op
->args
[4])) {
1830 uint64_t tv
= arg_info(op
->args
[3])->val
;
1831 uint64_t fv
= arg_info(op
->args
[4])->val
;
1832 TCGOpcode opc
, negopc
= 0;
1833 TCGCond cond
= op
->args
[5];
1835 switch (ctx
->type
) {
1837 opc
= INDEX_op_setcond_i32
;
1838 if (TCG_TARGET_HAS_negsetcond_i32
) {
1839 negopc
= INDEX_op_negsetcond_i32
;
1845 opc
= INDEX_op_setcond_i64
;
1846 if (TCG_TARGET_HAS_negsetcond_i64
) {
1847 negopc
= INDEX_op_negsetcond_i64
;
1851 g_assert_not_reached();
1854 if (tv
== 1 && fv
== 0) {
1857 } else if (fv
== 1 && tv
== 0) {
1859 op
->args
[3] = tcg_invert_cond(cond
);
1860 } else if (negopc
) {
1861 if (tv
== -1 && fv
== 0) {
1864 } else if (fv
== -1 && tv
== 0) {
1866 op
->args
[3] = tcg_invert_cond(cond
);
1873 static bool fold_mul(OptContext
*ctx
, TCGOp
*op
)
1875 if (fold_const2(ctx
, op
) ||
1876 fold_xi_to_i(ctx
, op
, 0) ||
1877 fold_xi_to_x(ctx
, op
, 1)) {
1883 static bool fold_mul_highpart(OptContext
*ctx
, TCGOp
*op
)
1885 if (fold_const2_commutative(ctx
, op
) ||
1886 fold_xi_to_i(ctx
, op
, 0)) {
1892 static bool fold_multiply2(OptContext
*ctx
, TCGOp
*op
)
1894 swap_commutative(op
->args
[0], &op
->args
[2], &op
->args
[3]);
1896 if (arg_is_const(op
->args
[2]) && arg_is_const(op
->args
[3])) {
1897 uint64_t a
= arg_info(op
->args
[2])->val
;
1898 uint64_t b
= arg_info(op
->args
[3])->val
;
1904 case INDEX_op_mulu2_i32
:
1905 l
= (uint64_t)(uint32_t)a
* (uint32_t)b
;
1906 h
= (int32_t)(l
>> 32);
1909 case INDEX_op_muls2_i32
:
1910 l
= (int64_t)(int32_t)a
* (int32_t)b
;
1914 case INDEX_op_mulu2_i64
:
1915 mulu64(&l
, &h
, a
, b
);
1917 case INDEX_op_muls2_i64
:
1918 muls64(&l
, &h
, a
, b
);
1921 g_assert_not_reached();
1927 /* The proper opcode is supplied by tcg_opt_gen_mov. */
1928 op2
= tcg_op_insert_before(ctx
->tcg
, op
, 0, 2);
1930 tcg_opt_gen_movi(ctx
, op
, rl
, l
);
1931 tcg_opt_gen_movi(ctx
, op2
, rh
, h
);
1937 static bool fold_nand(OptContext
*ctx
, TCGOp
*op
)
1939 if (fold_const2_commutative(ctx
, op
) ||
1940 fold_xi_to_not(ctx
, op
, -1)) {
1944 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
1945 & arg_info(op
->args
[2])->s_mask
;
1949 static bool fold_neg(OptContext
*ctx
, TCGOp
*op
)
1953 if (fold_const1(ctx
, op
)) {
1957 /* Set to 1 all bits to the left of the rightmost. */
1958 z_mask
= arg_info(op
->args
[1])->z_mask
;
1959 ctx
->z_mask
= -(z_mask
& -z_mask
);
1962 * Because of fold_sub_to_neg, we want to always return true,
1963 * via finish_folding.
1965 finish_folding(ctx
, op
);
1969 static bool fold_nor(OptContext
*ctx
, TCGOp
*op
)
1971 if (fold_const2_commutative(ctx
, op
) ||
1972 fold_xi_to_not(ctx
, op
, 0)) {
1976 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
1977 & arg_info(op
->args
[2])->s_mask
;
1981 static bool fold_not(OptContext
*ctx
, TCGOp
*op
)
1983 if (fold_const1(ctx
, op
)) {
1987 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
;
1989 /* Because of fold_to_not, we want to always return true, via finish. */
1990 finish_folding(ctx
, op
);
1994 static bool fold_or(OptContext
*ctx
, TCGOp
*op
)
1996 if (fold_const2_commutative(ctx
, op
) ||
1997 fold_xi_to_x(ctx
, op
, 0) ||
1998 fold_xx_to_x(ctx
, op
)) {
2002 ctx
->z_mask
= arg_info(op
->args
[1])->z_mask
2003 | arg_info(op
->args
[2])->z_mask
;
2004 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
2005 & arg_info(op
->args
[2])->s_mask
;
2006 return fold_masks(ctx
, op
);
2009 static bool fold_orc(OptContext
*ctx
, TCGOp
*op
)
2011 if (fold_const2(ctx
, op
) ||
2012 fold_xx_to_i(ctx
, op
, -1) ||
2013 fold_xi_to_x(ctx
, op
, -1) ||
2014 fold_ix_to_not(ctx
, op
, 0)) {
2018 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
2019 & arg_info(op
->args
[2])->s_mask
;
2023 static bool fold_qemu_ld(OptContext
*ctx
, TCGOp
*op
)
2025 const TCGOpDef
*def
= &tcg_op_defs
[op
->opc
];
2026 MemOpIdx oi
= op
->args
[def
->nb_oargs
+ def
->nb_iargs
];
2027 MemOp mop
= get_memop(oi
);
2028 int width
= 8 * memop_size(mop
);
2031 ctx
->s_mask
= MAKE_64BIT_MASK(width
, 64 - width
);
2032 if (!(mop
& MO_SIGN
)) {
2033 ctx
->z_mask
= MAKE_64BIT_MASK(0, width
);
2038 /* Opcodes that touch guest memory stop the mb optimization. */
2039 ctx
->prev_mb
= NULL
;
2043 static bool fold_qemu_st(OptContext
*ctx
, TCGOp
*op
)
2045 /* Opcodes that touch guest memory stop the mb optimization. */
2046 ctx
->prev_mb
= NULL
;
2050 static bool fold_remainder(OptContext
*ctx
, TCGOp
*op
)
2052 if (fold_const2(ctx
, op
) ||
2053 fold_xx_to_i(ctx
, op
, 0)) {
2059 static void fold_setcond_tst_pow2(OptContext
*ctx
, TCGOp
*op
, bool neg
)
2061 TCGOpcode and_opc
, sub_opc
, xor_opc
, neg_opc
, shr_opc
, uext_opc
, sext_opc
;
2062 TCGCond cond
= op
->args
[3];
2063 TCGArg ret
, src1
, src2
;
2069 if (!is_tst_cond(cond
) || !arg_is_const(op
->args
[2])) {
2074 val
= arg_info(src2
)->val
;
2075 if (!is_power_of_2(val
)) {
2080 switch (ctx
->type
) {
2082 and_opc
= INDEX_op_and_i32
;
2083 sub_opc
= INDEX_op_sub_i32
;
2084 xor_opc
= INDEX_op_xor_i32
;
2085 shr_opc
= INDEX_op_shr_i32
;
2086 neg_opc
= INDEX_op_neg_i32
;
2087 if (TCG_TARGET_extract_i32_valid(sh
, 1)) {
2088 uext_opc
= TCG_TARGET_HAS_extract_i32
? INDEX_op_extract_i32
: 0;
2089 sext_opc
= TCG_TARGET_HAS_sextract_i32
? INDEX_op_sextract_i32
: 0;
2093 and_opc
= INDEX_op_and_i64
;
2094 sub_opc
= INDEX_op_sub_i64
;
2095 xor_opc
= INDEX_op_xor_i64
;
2096 shr_opc
= INDEX_op_shr_i64
;
2097 neg_opc
= INDEX_op_neg_i64
;
2098 if (TCG_TARGET_extract_i64_valid(sh
, 1)) {
2099 uext_opc
= TCG_TARGET_HAS_extract_i64
? INDEX_op_extract_i64
: 0;
2100 sext_opc
= TCG_TARGET_HAS_sextract_i64
? INDEX_op_sextract_i64
: 0;
2104 g_assert_not_reached();
2109 inv
= cond
== TCG_COND_TSTEQ
;
2111 if (sh
&& sext_opc
&& neg
&& !inv
) {
2117 } else if (sh
&& uext_opc
) {
2124 op2
= tcg_op_insert_before(ctx
->tcg
, op
, shr_opc
, 3);
2126 op2
->args
[1] = src1
;
2127 op2
->args
[2] = arg_new_constant(ctx
, sh
);
2132 op
->args
[2] = arg_new_constant(ctx
, 1);
2136 op2
= tcg_op_insert_after(ctx
->tcg
, op
, sub_opc
, 3);
2139 op2
->args
[2] = arg_new_constant(ctx
, 1);
2141 op2
= tcg_op_insert_after(ctx
->tcg
, op
, xor_opc
, 3);
2144 op2
->args
[2] = arg_new_constant(ctx
, 1);
2146 op2
= tcg_op_insert_after(ctx
->tcg
, op
, neg_opc
, 2);
2152 static bool fold_setcond(OptContext
*ctx
, TCGOp
*op
)
2154 int i
= do_constant_folding_cond1(ctx
, op
->args
[0], &op
->args
[1],
2155 &op
->args
[2], &op
->args
[3]);
2157 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], i
);
2159 fold_setcond_tst_pow2(ctx
, op
, false);
2162 ctx
->s_mask
= smask_from_zmask(1);
2166 static bool fold_negsetcond(OptContext
*ctx
, TCGOp
*op
)
2168 int i
= do_constant_folding_cond1(ctx
, op
->args
[0], &op
->args
[1],
2169 &op
->args
[2], &op
->args
[3]);
2171 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], -i
);
2173 fold_setcond_tst_pow2(ctx
, op
, true);
2175 /* Value is {0,-1} so all bits are repetitions of the sign. */
2180 static bool fold_setcond2(OptContext
*ctx
, TCGOp
*op
)
2185 i
= do_constant_folding_cond2(ctx
, &op
->args
[1]);
2188 goto do_setcond_const
;
2195 * Simplify LT/GE comparisons vs zero to a single compare
2196 * vs the high word of the input.
2198 if (arg_is_const_val(op
->args
[3], 0) &&
2199 arg_is_const_val(op
->args
[4], 0)) {
2200 goto do_setcond_high
;
2209 * Simplify EQ/NE comparisons where one of the pairs
2210 * can be simplified.
2212 i
= do_constant_folding_cond(TCG_TYPE_I32
, op
->args
[1],
2216 goto do_setcond_const
;
2218 goto do_setcond_high
;
2221 i
= do_constant_folding_cond(TCG_TYPE_I32
, op
->args
[2],
2225 goto do_setcond_const
;
2227 goto do_setcond_low
;
2231 case TCG_COND_TSTEQ
:
2232 case TCG_COND_TSTNE
:
2233 if (arg_is_const_val(op
->args
[2], 0)) {
2234 goto do_setcond_high
;
2236 if (arg_is_const_val(op
->args
[4], 0)) {
2237 goto do_setcond_low
;
2245 op
->args
[2] = op
->args
[3];
2247 op
->opc
= INDEX_op_setcond_i32
;
2248 return fold_setcond(ctx
, op
);
2251 op
->args
[1] = op
->args
[2];
2252 op
->args
[2] = op
->args
[4];
2254 op
->opc
= INDEX_op_setcond_i32
;
2255 return fold_setcond(ctx
, op
);
2259 ctx
->s_mask
= smask_from_zmask(1);
2263 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], i
);
2266 static bool fold_sextract(OptContext
*ctx
, TCGOp
*op
)
2268 uint64_t z_mask
, s_mask
, s_mask_old
;
2269 int pos
= op
->args
[2];
2270 int len
= op
->args
[3];
2272 if (arg_is_const(op
->args
[1])) {
2275 t
= arg_info(op
->args
[1])->val
;
2276 t
= sextract64(t
, pos
, len
);
2277 return tcg_opt_gen_movi(ctx
, op
, op
->args
[0], t
);
2280 z_mask
= arg_info(op
->args
[1])->z_mask
;
2281 z_mask
= sextract64(z_mask
, pos
, len
);
2282 ctx
->z_mask
= z_mask
;
2284 s_mask_old
= arg_info(op
->args
[1])->s_mask
;
2285 s_mask
= sextract64(s_mask_old
, pos
, len
);
2286 s_mask
|= MAKE_64BIT_MASK(len
, 64 - len
);
2287 ctx
->s_mask
= s_mask
;
2290 ctx
->a_mask
= s_mask
& ~s_mask_old
;
2293 return fold_masks(ctx
, op
);
2296 static bool fold_shift(OptContext
*ctx
, TCGOp
*op
)
2298 uint64_t s_mask
, z_mask
, sign
;
2300 if (fold_const2(ctx
, op
) ||
2301 fold_ix_to_i(ctx
, op
, 0) ||
2302 fold_xi_to_x(ctx
, op
, 0)) {
2306 s_mask
= arg_info(op
->args
[1])->s_mask
;
2307 z_mask
= arg_info(op
->args
[1])->z_mask
;
2309 if (arg_is_const(op
->args
[2])) {
2310 int sh
= arg_info(op
->args
[2])->val
;
2312 ctx
->z_mask
= do_constant_folding(op
->opc
, ctx
->type
, z_mask
, sh
);
2314 s_mask
= do_constant_folding(op
->opc
, ctx
->type
, s_mask
, sh
);
2315 ctx
->s_mask
= smask_from_smask(s_mask
);
2317 return fold_masks(ctx
, op
);
2323 * Arithmetic right shift will not reduce the number of
2324 * input sign repetitions.
2326 ctx
->s_mask
= s_mask
;
2330 * If the sign bit is known zero, then logical right shift
2331 * will not reduced the number of input sign repetitions.
2333 sign
= (s_mask
& -s_mask
) >> 1;
2334 if (!(z_mask
& sign
)) {
2335 ctx
->s_mask
= s_mask
;
2345 static bool fold_sub_to_neg(OptContext
*ctx
, TCGOp
*op
)
2350 if (!arg_is_const(op
->args
[1]) || arg_info(op
->args
[1])->val
!= 0) {
2354 switch (ctx
->type
) {
2356 neg_op
= INDEX_op_neg_i32
;
2360 neg_op
= INDEX_op_neg_i64
;
2366 neg_op
= INDEX_op_neg_vec
;
2367 have_neg
= (TCG_TARGET_HAS_neg_vec
&&
2368 tcg_can_emit_vec_op(neg_op
, ctx
->type
, TCGOP_VECE(op
)) > 0);
2371 g_assert_not_reached();
2375 op
->args
[1] = op
->args
[2];
2376 return fold_neg(ctx
, op
);
2381 /* We cannot as yet do_constant_folding with vectors. */
2382 static bool fold_sub_vec(OptContext
*ctx
, TCGOp
*op
)
2384 if (fold_xx_to_i(ctx
, op
, 0) ||
2385 fold_xi_to_x(ctx
, op
, 0) ||
2386 fold_sub_to_neg(ctx
, op
)) {
2392 static bool fold_sub(OptContext
*ctx
, TCGOp
*op
)
2394 if (fold_const2(ctx
, op
) || fold_sub_vec(ctx
, op
)) {
2398 /* Fold sub r,x,i to add r,x,-i */
2399 if (arg_is_const(op
->args
[2])) {
2400 uint64_t val
= arg_info(op
->args
[2])->val
;
2402 op
->opc
= (ctx
->type
== TCG_TYPE_I32
2403 ? INDEX_op_add_i32
: INDEX_op_add_i64
);
2404 op
->args
[2] = arg_new_constant(ctx
, -val
);
2409 static bool fold_sub2(OptContext
*ctx
, TCGOp
*op
)
2411 return fold_addsub2(ctx
, op
, false);
2414 static bool fold_tcg_ld(OptContext
*ctx
, TCGOp
*op
)
2416 /* We can't do any folding with a load, but we can record bits. */
2418 CASE_OP_32_64(ld8s
):
2419 ctx
->s_mask
= MAKE_64BIT_MASK(8, 56);
2421 CASE_OP_32_64(ld8u
):
2422 ctx
->z_mask
= MAKE_64BIT_MASK(0, 8);
2423 ctx
->s_mask
= MAKE_64BIT_MASK(9, 55);
2425 CASE_OP_32_64(ld16s
):
2426 ctx
->s_mask
= MAKE_64BIT_MASK(16, 48);
2428 CASE_OP_32_64(ld16u
):
2429 ctx
->z_mask
= MAKE_64BIT_MASK(0, 16);
2430 ctx
->s_mask
= MAKE_64BIT_MASK(17, 47);
2432 case INDEX_op_ld32s_i64
:
2433 ctx
->s_mask
= MAKE_64BIT_MASK(32, 32);
2435 case INDEX_op_ld32u_i64
:
2436 ctx
->z_mask
= MAKE_64BIT_MASK(0, 32);
2437 ctx
->s_mask
= MAKE_64BIT_MASK(33, 31);
2440 g_assert_not_reached();
2445 static bool fold_tcg_ld_memcopy(OptContext
*ctx
, TCGOp
*op
)
2451 if (op
->args
[1] != tcgv_ptr_arg(tcg_env
)) {
2457 dst
= arg_temp(op
->args
[0]);
2458 src
= find_mem_copy_for(ctx
, type
, ofs
);
2459 if (src
&& src
->base_type
== type
) {
2460 return tcg_opt_gen_mov(ctx
, op
, temp_arg(dst
), temp_arg(src
));
2464 record_mem_copy(ctx
, type
, dst
, ofs
, ofs
+ tcg_type_size(type
) - 1);
2468 static bool fold_tcg_st(OptContext
*ctx
, TCGOp
*op
)
2470 intptr_t ofs
= op
->args
[2];
2473 if (op
->args
[1] != tcgv_ptr_arg(tcg_env
)) {
2474 remove_mem_copy_all(ctx
);
2482 CASE_OP_32_64(st16
):
2485 case INDEX_op_st32_i64
:
2486 case INDEX_op_st_i32
:
2489 case INDEX_op_st_i64
:
2492 case INDEX_op_st_vec
:
2493 lm1
= tcg_type_size(ctx
->type
) - 1;
2496 g_assert_not_reached();
2498 remove_mem_copy_in(ctx
, ofs
, ofs
+ lm1
);
2502 static bool fold_tcg_st_memcopy(OptContext
*ctx
, TCGOp
*op
)
2508 if (op
->args
[1] != tcgv_ptr_arg(tcg_env
)) {
2509 fold_tcg_st(ctx
, op
);
2513 src
= arg_temp(op
->args
[0]);
2518 * Eliminate duplicate stores of a constant.
2519 * This happens frequently when the target ISA zero-extends.
2521 if (ts_is_const(src
)) {
2522 TCGTemp
*prev
= find_mem_copy_for(ctx
, type
, ofs
);
2524 tcg_op_remove(ctx
->tcg
, op
);
2529 last
= ofs
+ tcg_type_size(type
) - 1;
2530 remove_mem_copy_in(ctx
, ofs
, last
);
2531 record_mem_copy(ctx
, type
, src
, ofs
, last
);
2535 static bool fold_xor(OptContext
*ctx
, TCGOp
*op
)
2537 if (fold_const2_commutative(ctx
, op
) ||
2538 fold_xx_to_i(ctx
, op
, 0) ||
2539 fold_xi_to_x(ctx
, op
, 0) ||
2540 fold_xi_to_not(ctx
, op
, -1)) {
2544 ctx
->z_mask
= arg_info(op
->args
[1])->z_mask
2545 | arg_info(op
->args
[2])->z_mask
;
2546 ctx
->s_mask
= arg_info(op
->args
[1])->s_mask
2547 & arg_info(op
->args
[2])->s_mask
;
2548 return fold_masks(ctx
, op
);
2551 /* Propagate constants and copies, fold constant expressions. */
2552 void tcg_optimize(TCGContext
*s
)
2555 TCGOp
*op
, *op_next
;
2556 OptContext ctx
= { .tcg
= s
};
2558 QSIMPLEQ_INIT(&ctx
.mem_free
);
2560 /* Array VALS has an element for each temp.
2561 If this temp holds a constant then its value is kept in VALS' element.
2562 If this temp is a copy of other ones then the other copies are
2563 available through the doubly linked circular list. */
2565 nb_temps
= s
->nb_temps
;
2566 for (i
= 0; i
< nb_temps
; ++i
) {
2567 s
->temps
[i
].state_ptr
= NULL
;
2570 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2571 TCGOpcode opc
= op
->opc
;
2572 const TCGOpDef
*def
;
2575 /* Calls are special. */
2576 if (opc
== INDEX_op_call
) {
2577 fold_call(&ctx
, op
);
2581 def
= &tcg_op_defs
[opc
];
2582 init_arguments(&ctx
, op
, def
->nb_oargs
+ def
->nb_iargs
);
2583 copy_propagate(&ctx
, op
, def
->nb_oargs
, def
->nb_iargs
);
2585 /* Pre-compute the type of the operation. */
2586 if (def
->flags
& TCG_OPF_VECTOR
) {
2587 ctx
.type
= TCG_TYPE_V64
+ TCGOP_VECL(op
);
2588 } else if (def
->flags
& TCG_OPF_64BIT
) {
2589 ctx
.type
= TCG_TYPE_I64
;
2591 ctx
.type
= TCG_TYPE_I32
;
2594 /* Assume all bits affected, no bits known zero, no sign reps. */
2600 * Process each opcode.
2601 * Sorted alphabetically by opcode as much as possible.
2605 done
= fold_add(&ctx
, op
);
2607 case INDEX_op_add_vec
:
2608 done
= fold_add_vec(&ctx
, op
);
2610 CASE_OP_32_64(add2
):
2611 done
= fold_add2(&ctx
, op
);
2613 CASE_OP_32_64_VEC(and):
2614 done
= fold_and(&ctx
, op
);
2616 CASE_OP_32_64_VEC(andc
):
2617 done
= fold_andc(&ctx
, op
);
2619 CASE_OP_32_64(brcond
):
2620 done
= fold_brcond(&ctx
, op
);
2622 case INDEX_op_brcond2_i32
:
2623 done
= fold_brcond2(&ctx
, op
);
2625 CASE_OP_32_64(bswap16
):
2626 CASE_OP_32_64(bswap32
):
2627 case INDEX_op_bswap64_i64
:
2628 done
= fold_bswap(&ctx
, op
);
2632 done
= fold_count_zeros(&ctx
, op
);
2634 CASE_OP_32_64(ctpop
):
2635 done
= fold_ctpop(&ctx
, op
);
2637 CASE_OP_32_64(deposit
):
2638 done
= fold_deposit(&ctx
, op
);
2641 CASE_OP_32_64(divu
):
2642 done
= fold_divide(&ctx
, op
);
2644 case INDEX_op_dup_vec
:
2645 done
= fold_dup(&ctx
, op
);
2647 case INDEX_op_dup2_vec
:
2648 done
= fold_dup2(&ctx
, op
);
2650 CASE_OP_32_64_VEC(eqv
):
2651 done
= fold_eqv(&ctx
, op
);
2653 CASE_OP_32_64(extract
):
2654 done
= fold_extract(&ctx
, op
);
2656 CASE_OP_32_64(extract2
):
2657 done
= fold_extract2(&ctx
, op
);
2659 CASE_OP_32_64(ext8s
):
2660 CASE_OP_32_64(ext16s
):
2661 case INDEX_op_ext32s_i64
:
2662 case INDEX_op_ext_i32_i64
:
2663 done
= fold_exts(&ctx
, op
);
2665 CASE_OP_32_64(ext8u
):
2666 CASE_OP_32_64(ext16u
):
2667 case INDEX_op_ext32u_i64
:
2668 case INDEX_op_extu_i32_i64
:
2669 case INDEX_op_extrl_i64_i32
:
2670 case INDEX_op_extrh_i64_i32
:
2671 done
= fold_extu(&ctx
, op
);
2673 CASE_OP_32_64(ld8s
):
2674 CASE_OP_32_64(ld8u
):
2675 CASE_OP_32_64(ld16s
):
2676 CASE_OP_32_64(ld16u
):
2677 case INDEX_op_ld32s_i64
:
2678 case INDEX_op_ld32u_i64
:
2679 done
= fold_tcg_ld(&ctx
, op
);
2681 case INDEX_op_ld_i32
:
2682 case INDEX_op_ld_i64
:
2683 case INDEX_op_ld_vec
:
2684 done
= fold_tcg_ld_memcopy(&ctx
, op
);
2687 CASE_OP_32_64(st16
):
2688 case INDEX_op_st32_i64
:
2689 done
= fold_tcg_st(&ctx
, op
);
2691 case INDEX_op_st_i32
:
2692 case INDEX_op_st_i64
:
2693 case INDEX_op_st_vec
:
2694 done
= fold_tcg_st_memcopy(&ctx
, op
);
2697 done
= fold_mb(&ctx
, op
);
2699 CASE_OP_32_64_VEC(mov
):
2700 done
= fold_mov(&ctx
, op
);
2702 CASE_OP_32_64(movcond
):
2703 done
= fold_movcond(&ctx
, op
);
2706 done
= fold_mul(&ctx
, op
);
2708 CASE_OP_32_64(mulsh
):
2709 CASE_OP_32_64(muluh
):
2710 done
= fold_mul_highpart(&ctx
, op
);
2712 CASE_OP_32_64(muls2
):
2713 CASE_OP_32_64(mulu2
):
2714 done
= fold_multiply2(&ctx
, op
);
2716 CASE_OP_32_64_VEC(nand
):
2717 done
= fold_nand(&ctx
, op
);
2720 done
= fold_neg(&ctx
, op
);
2722 CASE_OP_32_64_VEC(nor
):
2723 done
= fold_nor(&ctx
, op
);
2725 CASE_OP_32_64_VEC(not):
2726 done
= fold_not(&ctx
, op
);
2728 CASE_OP_32_64_VEC(or):
2729 done
= fold_or(&ctx
, op
);
2731 CASE_OP_32_64_VEC(orc
):
2732 done
= fold_orc(&ctx
, op
);
2734 case INDEX_op_qemu_ld_a32_i32
:
2735 case INDEX_op_qemu_ld_a64_i32
:
2736 case INDEX_op_qemu_ld_a32_i64
:
2737 case INDEX_op_qemu_ld_a64_i64
:
2738 case INDEX_op_qemu_ld_a32_i128
:
2739 case INDEX_op_qemu_ld_a64_i128
:
2740 done
= fold_qemu_ld(&ctx
, op
);
2742 case INDEX_op_qemu_st8_a32_i32
:
2743 case INDEX_op_qemu_st8_a64_i32
:
2744 case INDEX_op_qemu_st_a32_i32
:
2745 case INDEX_op_qemu_st_a64_i32
:
2746 case INDEX_op_qemu_st_a32_i64
:
2747 case INDEX_op_qemu_st_a64_i64
:
2748 case INDEX_op_qemu_st_a32_i128
:
2749 case INDEX_op_qemu_st_a64_i128
:
2750 done
= fold_qemu_st(&ctx
, op
);
2753 CASE_OP_32_64(remu
):
2754 done
= fold_remainder(&ctx
, op
);
2756 CASE_OP_32_64(rotl
):
2757 CASE_OP_32_64(rotr
):
2761 done
= fold_shift(&ctx
, op
);
2763 CASE_OP_32_64(setcond
):
2764 done
= fold_setcond(&ctx
, op
);
2766 CASE_OP_32_64(negsetcond
):
2767 done
= fold_negsetcond(&ctx
, op
);
2769 case INDEX_op_setcond2_i32
:
2770 done
= fold_setcond2(&ctx
, op
);
2772 CASE_OP_32_64(sextract
):
2773 done
= fold_sextract(&ctx
, op
);
2776 done
= fold_sub(&ctx
, op
);
2778 case INDEX_op_sub_vec
:
2779 done
= fold_sub_vec(&ctx
, op
);
2781 CASE_OP_32_64(sub2
):
2782 done
= fold_sub2(&ctx
, op
);
2784 CASE_OP_32_64_VEC(xor):
2785 done
= fold_xor(&ctx
, op
);
2792 finish_folding(&ctx
, op
);