From 9794389d5d5188a140471d3b90df62b2b5b5a308 Mon Sep 17 00:00:00 2001 From: ktkachov Date: Thu, 17 Nov 2016 14:25:30 +0000 Subject: [PATCH] [AArch64] Expand DImode constant stores to two SImode stores when profitable * config/aarch64/aarch64.md (mov): Call aarch64_split_dimode_const_store on DImode constant stores. * config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store): New prototype. * config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New function. * gcc.target/aarch64/store_repeating_constant_1.c: New test. * gcc.target/aarch64/store_repeating_constant_2.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@242551 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 9 ++++ gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64.c | 57 ++++++++++++++++++++++ gcc/config/aarch64/aarch64.md | 5 ++ gcc/testsuite/ChangeLog | 5 ++ .../aarch64/store_repeating_constant_1.c | 11 +++++ .../aarch64/store_repeating_constant_2.c | 15 ++++++ 7 files changed, 103 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b9b3b8a8e73..f69f911e481 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-11-17 Kyrylo Tkachov + + * config/aarch64/aarch64.md (mov): Call + aarch64_split_dimode_const_store on DImode constant stores. + * config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store): + New prototype. + * config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New + function. + 2016-11-17 Bill Schmidt Richard Biener diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 4f4989d8b0d..b6ca3dfacb0 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -337,6 +337,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); bool aarch64_simd_valid_immediate (rtx, machine_mode, bool, struct simd_immediate_info *); +bool aarch64_split_dimode_const_store (rtx, rtx); bool aarch64_symbolic_address_p (rtx); bool aarch64_uimm12_shift (HOST_WIDE_INT); bool aarch64_use_return_insn_p (void); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 11d41cfe10c..e5ca5eb0ad7 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -13211,6 +13211,63 @@ aarch64_expand_movmem (rtx *operands) return true; } +/* Split a DImode store of a CONST_INT SRC to MEM DST as two + SImode stores. Handle the case when the constant has identical + bottom and top halves. This is beneficial when the two stores can be + merged into an STP and we avoid synthesising potentially expensive + immediates twice. Return true if such a split is possible. */ + +bool +aarch64_split_dimode_const_store (rtx dst, rtx src) +{ + rtx lo = gen_lowpart (SImode, src); + rtx hi = gen_highpart_mode (SImode, DImode, src); + + bool size_p = optimize_function_for_size_p (cfun); + + if (!rtx_equal_p (lo, hi)) + return false; + + unsigned int orig_cost + = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode); + unsigned int lo_cost + = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode); + + /* We want to transform: + MOV x1, 49370 + MOVK x1, 0x140, lsl 16 + MOVK x1, 0xc0da, lsl 32 + MOVK x1, 0x140, lsl 48 + STR x1, [x0] + into: + MOV w1, 49370 + MOVK w1, 0x140, lsl 16 + STP w1, w1, [x0] + So we want to perform this only when we save two instructions + or more. When optimizing for size, however, accept any code size + savings we can. */ + if (size_p && orig_cost <= lo_cost) + return false; + + if (!size_p + && (orig_cost <= lo_cost + 1)) + return false; + + rtx mem_lo = adjust_address (dst, SImode, 0); + if (!aarch64_mem_pair_operand (mem_lo, SImode)) + return false; + + rtx tmp_reg = gen_reg_rtx (SImode); + aarch64_expand_mov_immediate (tmp_reg, lo); + rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode)); + /* Don't emit an explicit store pair as this may not be always profitable. + Let the sched-fusion logic decide whether to merge them. */ + emit_move_insn (mem_lo, tmp_reg); + emit_move_insn (mem_hi, tmp_reg); + + return true; +} + /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ static unsigned HOST_WIDE_INT diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a652a7c12bd..5089ccf047c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1011,6 +1011,11 @@ (match_operand:GPI 1 "general_operand" ""))] "" " + if (MEM_P (operands[0]) && CONST_INT_P (operands[1]) + && mode == DImode + && aarch64_split_dimode_const_store (operands[0], operands[1])) + DONE; + if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) operands[1] = force_reg (mode, operands[1]); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bd766209150..77fa771ab17 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-11-17 Kyrylo Tkachov + + * gcc.target/aarch64/store_repeating_constant_1.c: New test. + * gcc.target/aarch64/store_repeating_constant_2.c: Likewise. + 2016-11-17 Bill Schmidt Richard Biener diff --git a/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c new file mode 100644 index 00000000000..50d456834bc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic" } */ + +void +foo (unsigned long long *a) +{ + a[0] = 0x0140c0da0140c0daULL; +} + +/* { dg-final { scan-assembler-times "movk\\tw.*" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c new file mode 100644 index 00000000000..c421277989a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-Os" } */ + +/* Check that for -Os we synthesize only the bottom half and then + store it twice with an STP rather than synthesizing it twice in each + half of an X-reg. */ + +void +foo (unsigned long long *a) +{ + a[0] = 0xc0da0000c0daULL; +} + +/* { dg-final { scan-assembler-times "mov\\tw.*" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */ -- 2.11.4.GIT