From 78e31c4cd1efb656908d73c255e04c69895b63ac Mon Sep 17 00:00:00 2001 From: wilco Date: Tue, 25 Oct 2016 10:25:28 +0000 Subject: [PATCH] With -fpu=neon DI mode shifts are expanded after reload. DI mode registers can either fully or partially overlap on both ARM and Thumb-2. However the shift expansion code can only deal with the full overlap case, and generates incorrect code for partial overlaps. The fix is to add new variants that support either full overlap or no overlap. gcc/ PR target/78041 * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants. Remove partial overlap check for shift by 1. (ashldi3_neon): Likewise. testsuite/ * gcc.target/arm/pr78041.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@241508 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 7 +++++ gcc/config/arm/neon.md | 48 ++++++++++++++++++---------------- gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.target/arm/pr78041.c | 20 ++++++++++++++ 4 files changed, 58 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/pr78041.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c1d8f946d0d..aaf07f436d7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2016-10-25 Wilco Dijkstra + + PR target/78041 + * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants. + Remove partial overlap check for shift by 1. + (ashldi3_neon): Likewise. + 2016-10-25 Thomas Preud'homme * config/arm/constraints.md (Q constraint): Document its use for diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 05323334ffd..59316de0041 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1143,12 +1143,12 @@ ) (define_insn_and_split "ashldi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") - (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) - (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) - (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w") + (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i"))) + (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X")) + (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X")) (clobber (reg:CC_C CC_REGNUM))] "TARGET_NEON" "#" @@ -1180,9 +1180,11 @@ } else { - if (operands[2] == CONST1_RTX (SImode) - && (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1]))) + /* The shift expanders support either full overlap or no overlap. */ + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) == REGNO (operands[1])); + + if (operands[2] == CONST1_RTX (SImode)) /* This clobbers CC. */ emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); else @@ -1191,8 +1193,8 @@ } DONE; }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,*,*") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,speed,*,*") (set_attr "type" "multiple")] ) @@ -1241,12 +1243,12 @@ ;; ashrdi3_neon ;; lshrdi3_neon (define_insn_and_split "di3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) - (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w") + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) + (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) (clobber (reg:CC CC_REGNUM))] "TARGET_NEON" "#" @@ -1282,9 +1284,11 @@ } else { - if (operands[2] == CONST1_RTX (SImode) - && (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1]))) + /* The shift expanders support either full overlap or no overlap. */ + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) == REGNO (operands[1])); + + if (operands[2] == CONST1_RTX (SImode)) /* This clobbers CC. */ emit_insn (gen_arm_di3_1bit (operands[0], operands[1])); else @@ -1295,8 +1299,8 @@ DONE; }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,*,*") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,speed,*,*") (set_attr "type" "multiple")] ) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1dc07326565..02d8ac6303d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-10-25 Wilco Dijkstra + + PR target/78041 + * gcc.target/arm/pr78041.c: New test. + 2016-10-25 Jakub Jelinek * g++.dg/cpp1z/launder1.C: New test. diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c new file mode 100644 index 00000000000..340ab5cb433 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr78041.c @@ -0,0 +1,20 @@ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */ + +extern void abort (void); + +register long long x asm ("r1"); + +long long f (void) +{ + return x << 5; +} + +int main () +{ + x = 0x0100000001; + if (f () != 0x2000000020) + abort (); + return 0; +} -- 2.11.4.GIT