From 3f8879277725bf2b4208904012052f09f9262c64 Mon Sep 17 00:00:00 2001 From: rsandifo Date: Tue, 3 Jul 2018 10:03:44 +0000 Subject: [PATCH] [16/n] PR85694: Add detection of averaging operations This patch adds detection of average instructions: a = (((wide) b + (wide) c) >> 1); --> a = (wide) .AVG_FLOOR (b, c); a = (((wide) b + (wide) c + 1) >> 1); --> a = (wide) .AVG_CEIL (b, c); in cases where users of "a" need only the low half of the result, making the cast to (wide) redundant. The heavy lifting was done by earlier patches. This showed up another problem in vectorizable_call: if the call is a pattern definition statement rather than the main pattern statement, the type of vectorised call might be different from the type of the original statement. 2018-07-03 Richard Sandiford gcc/ PR tree-optimization/85694 * doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil) (uavgM3_ceil): Document new optabs. * doc/sourcebuild.texi (vect_avg_qi): Document new target selector. * internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal functions. * optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab) (savg_ceil_optab): New optabs. * tree-vect-patterns.c (vect_recog_average_pattern): New function. (vect_vect_recog_func_ptrs): Add it. * tree-vect-stmts.c (vectorizable_call): Get the type of the zero constant directly from the associated lhs. gcc/testsuite/ PR tree-optimization/85694 * lib/target-supports.exp (check_effective_target_vect_avg_qi): New proc. * gcc.dg/vect/vect-avg-1.c: New test. * gcc.dg/vect/vect-avg-2.c: Likewise. * gcc.dg/vect/vect-avg-3.c: Likewise. * gcc.dg/vect/vect-avg-4.c: Likewise. * gcc.dg/vect/vect-avg-5.c: Likewise. * gcc.dg/vect/vect-avg-6.c: Likewise. * gcc.dg/vect/vect-avg-7.c: Likewise. * gcc.dg/vect/vect-avg-8.c: Likewise. * gcc.dg/vect/vect-avg-9.c: Likewise. * gcc.dg/vect/vect-avg-10.c: Likewise. * gcc.dg/vect/vect-avg-11.c: Likewise. * gcc.dg/vect/vect-avg-12.c: Likewise. * gcc.dg/vect/vect-avg-13.c: Likewise. * gcc.dg/vect/vect-avg-14.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@262335 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 15 ++++ gcc/doc/md.texi | 28 ++++++ gcc/doc/sourcebuild.texi | 4 + gcc/internal-fn.def | 5 ++ gcc/optabs.def | 4 + gcc/testsuite/ChangeLog | 20 +++++ gcc/testsuite/gcc.dg/vect/vect-avg-1.c | 47 ++++++++++ gcc/testsuite/gcc.dg/vect/vect-avg-10.c | 8 ++ gcc/testsuite/gcc.dg/vect/vect-avg-11.c | 57 ++++++++++++ gcc/testsuite/gcc.dg/vect/vect-avg-12.c | 10 +++ gcc/testsuite/gcc.dg/vect/vect-avg-13.c | 11 +++ gcc/testsuite/gcc.dg/vect/vect-avg-14.c | 11 +++ gcc/testsuite/gcc.dg/vect/vect-avg-2.c | 10 +++ gcc/testsuite/gcc.dg/vect/vect-avg-3.c | 11 +++ gcc/testsuite/gcc.dg/vect/vect-avg-4.c | 11 +++ gcc/testsuite/gcc.dg/vect/vect-avg-5.c | 51 +++++++++++ gcc/testsuite/gcc.dg/vect/vect-avg-6.c | 10 +++ gcc/testsuite/gcc.dg/vect/vect-avg-7.c | 11 +++ gcc/testsuite/gcc.dg/vect/vect-avg-8.c | 11 +++ gcc/testsuite/gcc.dg/vect/vect-avg-9.c | 8 ++ gcc/testsuite/lib/target-supports.exp | 7 ++ gcc/tree-vect-patterns.c | 150 ++++++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 5 +- 23 files changed, 502 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-10.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-11.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-12.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-13.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-14.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-3.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-4.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-5.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-6.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-7.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-8.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-avg-9.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fd928b0a1fd..52b5e29bd26 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,20 @@ 2018-07-03 Richard Sandiford + PR tree-optimization/85694 + * doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil) + (uavgM3_ceil): Document new optabs. + * doc/sourcebuild.texi (vect_avg_qi): Document new target selector. + * internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal + functions. + * optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab) + (savg_ceil_optab): New optabs. + * tree-vect-patterns.c (vect_recog_average_pattern): New function. + (vect_vect_recog_func_ptrs): Add it. + * tree-vect-stmts.c (vectorizable_call): Get the type of the zero + constant directly from the associated lhs. + +2018-07-03 Richard Sandiford + * tree-vect-patterns.c (vect_split_statement): New function. (vect_convert_input): Use it to try to split an existing cast. diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 22919e4310c..09d6e307c24 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5599,6 +5599,34 @@ Other shift and rotate instructions, analogous to the Vector shift and rotate instructions that take vectors as operand 2 instead of a scalar type. +@cindex @code{avg@var{m}3_floor} instruction pattern +@cindex @code{uavg@var{m}3_floor} instruction pattern +@item @samp{avg@var{m}3_floor} +@itemx @samp{uavg@var{m}3_floor} +Signed and unsigned average instructions. These instructions add +operands 1 and 2 without truncation, divide the result by 2, +round towards -Inf, and store the result in operand 0. This is +equivalent to the C code: +@smallexample +narrow op0, op1, op2; +@dots{} +op0 = (narrow) (((wide) op1 + (wide) op2) >> 1); +@end smallexample +where the sign of @samp{narrow} determines whether this is a signed +or unsigned operation. + +@cindex @code{avg@var{m}3_ceil} instruction pattern +@cindex @code{uavg@var{m}3_ceil} instruction pattern +@item @samp{avg@var{m}3_ceil} +@itemx @samp{uavg@var{m}3_ceil} +Like @samp{avg@var{m}3_floor} and @samp{uavg@var{m}3_floor}, but round +towards +Inf. This is equivalent to the C code: +@smallexample +narrow op0, op1, op2; +@dots{} +op0 = (narrow) (((wide) op1 + (wide) op2 + 1) >> 1); +@end smallexample + @cindex @code{bswap@var{m}2} instruction pattern @item @samp{bswap@var{m}2} Reverse the order of bytes of operand 1 and store the result in operand 0. diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index d52183d9c60..89157079ffb 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1417,6 +1417,10 @@ Target supports Fortran @code{real} kinds larger than @code{real(8)}. The target's ABI allows stack variables to be aligned to the preferred vector alignment. +@item vect_avg_qi +Target supports both signed and unsigned averaging operations on vectors +of bytes. + @item vect_condition Target supports vector conditional operations. diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 66336d8062b..6293ab36dc9 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -143,6 +143,11 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary) +DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first, + savg_floor, uavg_floor, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first, + savg_ceil, uavg_ceil, binary) + DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary) DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary) DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary) diff --git a/gcc/optabs.def b/gcc/optabs.def index 11af7aaeb15..707d9696b4c 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -316,6 +316,10 @@ OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a") OPTAB_D (extract_last_optab, "extract_last_$a") OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a") +OPTAB_D (savg_floor_optab, "avg$a3_floor") +OPTAB_D (uavg_floor_optab, "uavg$a3_floor") +OPTAB_D (savg_ceil_optab, "avg$a3_ceil") +OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil") OPTAB_D (sdot_prod_optab, "sdot_prod$I$a") OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3") OPTAB_D (udot_prod_optab, "udot_prod$I$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 90aa4d7e22a..0ed116fd209 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,25 @@ 2018-07-03 Richard Sandiford + PR tree-optimization/85694 + * lib/target-supports.exp (check_effective_target_vect_avg_qi): New + proc. + * gcc.dg/vect/vect-avg-1.c: New test. + * gcc.dg/vect/vect-avg-2.c: Likewise. + * gcc.dg/vect/vect-avg-3.c: Likewise. + * gcc.dg/vect/vect-avg-4.c: Likewise. + * gcc.dg/vect/vect-avg-5.c: Likewise. + * gcc.dg/vect/vect-avg-6.c: Likewise. + * gcc.dg/vect/vect-avg-7.c: Likewise. + * gcc.dg/vect/vect-avg-8.c: Likewise. + * gcc.dg/vect/vect-avg-9.c: Likewise. + * gcc.dg/vect/vect-avg-10.c: Likewise. + * gcc.dg/vect/vect-avg-11.c: Likewise. + * gcc.dg/vect/vect-avg-12.c: Likewise. + * gcc.dg/vect/vect-avg-13.c: Likewise. + * gcc.dg/vect/vect-avg-14.c: Likewise. + +2018-07-03 Richard Sandiford + * gcc.dg/vect/vect-over-widen-5.c: Test that the extensions get split into two for use by the over-widening pattern. * gcc.dg/vect/vect-over-widen-6.c: Likewise. diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-1.c b/gcc/testsuite/gcc.dg/vect/vect-avg-1.c new file mode 100644 index 00000000000..a7bc7cc9096 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-1.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS +#define SIGNEDNESS unsigned +#endif +#ifndef BIAS +#define BIAS 0 +#endif + +void __attribute__ ((noipa)) +f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b, + SIGNEDNESS char *restrict c) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + a[i] = (b[i] + c[i] + BIAS) >> 1; +} + +#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) +#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) + +int +main (void) +{ + check_vect (); + + SIGNEDNESS char a[N], b[N], c[N]; + for (int i = 0; i < N; ++i) + { + b[i] = BASE1 + i * 5; + c[i] = BASE2 + i * 4; + asm volatile ("" ::: "memory"); + } + f (a, b, c); + for (int i = 0; i < N; ++i) + if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1)) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-10.c b/gcc/testsuite/gcc.dg/vect/vect-avg-10.c new file mode 100644 index 00000000000..2630aeab913 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-10.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 2 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-11.c b/gcc/testsuite/gcc.dg/vect/vect-avg-11.c new file mode 100644 index 00000000000..85292f1b824 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-11.c @@ -0,0 +1,57 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS +#define SIGNEDNESS unsigned +#endif +#ifndef BIAS +#define BIAS 0 +#endif + +void __attribute__ ((noipa)) +f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b, + SIGNEDNESS char *restrict c) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int tmp = b[i]; + tmp ^= 0x55; + tmp += BIAS; + tmp += c[i]; + tmp >>= 1; + tmp |= 0x40; + a[i] = tmp; + } +} + +#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) +#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) + +int +main (void) +{ + check_vect (); + + SIGNEDNESS char a[N], b[N], c[N]; + for (int i = 0; i < N; ++i) + { + b[i] = BASE1 + i * 5; + c[i] = BASE2 + i * 4; + asm volatile ("" ::: "memory"); + } + f (a, b, c); + for (int i = 0; i < N; ++i) + if (a[i] != (((((BASE1 + i * 5) ^ 0x55) + + (BASE2 + i * 4) + + BIAS) >> 1) | 0x40)) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-12.c b/gcc/testsuite/gcc.dg/vect/vect-avg-12.c new file mode 100644 index 00000000000..f40331ea1bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-12.c @@ -0,0 +1,10 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed + +#include "vect-avg-11.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-13.c b/gcc/testsuite/gcc.dg/vect/vect-avg-13.c new file mode 100644 index 00000000000..7957c0e4adc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-13.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 1 + +#include "vect-avg-11.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-14.c b/gcc/testsuite/gcc.dg/vect/vect-avg-14.c new file mode 100644 index 00000000000..8ab11f74e94 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-14.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 1 + +#include "vect-avg-11.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-2.c b/gcc/testsuite/gcc.dg/vect/vect-avg-2.c new file mode 100644 index 00000000000..b5586b5f013 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-2.c @@ -0,0 +1,10 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed + +#include "vect-avg-1.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-3.c b/gcc/testsuite/gcc.dg/vect/vect-avg-3.c new file mode 100644 index 00000000000..104fe961393 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-3.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 1 + +#include "vect-avg-1.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-4.c b/gcc/testsuite/gcc.dg/vect/vect-avg-4.c new file mode 100644 index 00000000000..92181d7fc3d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-4.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 1 + +#include "vect-avg-1.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-5.c b/gcc/testsuite/gcc.dg/vect/vect-avg-5.c new file mode 100644 index 00000000000..6c43575f448 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-5.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS +#define SIGNEDNESS unsigned +#endif +#ifndef BIAS +#define BIAS 0 +#endif + +void __attribute__ ((noipa)) +f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b, + SIGNEDNESS char *restrict c) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int tmp1 = b[i] + BIAS; + int tmp2 = tmp1 + c[i]; + a[i] = tmp2 >> 1; + } +} + +#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) +#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) + +int +main (void) +{ + check_vect (); + + SIGNEDNESS char a[N], b[N], c[N]; + for (int i = 0; i < N; ++i) + { + b[i] = BASE1 + i * 5; + c[i] = BASE2 + i * 4; + asm volatile ("" ::: "memory"); + } + f (a, b, c); + for (int i = 0; i < N; ++i) + if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1)) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-6.c b/gcc/testsuite/gcc.dg/vect/vect-avg-6.c new file mode 100644 index 00000000000..efe97b8a5f1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-6.c @@ -0,0 +1,10 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-7.c b/gcc/testsuite/gcc.dg/vect/vect-avg-7.c new file mode 100644 index 00000000000..62a8474f690 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-7.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 1 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-8.c b/gcc/testsuite/gcc.dg/vect/vect-avg-8.c new file mode 100644 index 00000000000..cc7c4cde6b0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-8.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 1 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-9.c b/gcc/testsuite/gcc.dg/vect/vect-avg-9.c new file mode 100644 index 00000000000..80865b6661a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-9.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 2 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index ffbc882b07d..fc189f31b71 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6313,6 +6313,13 @@ proc check_effective_target_vect_usad_char { } { return $et_vect_usad_char_saved($et_index) } +# Return 1 if the target plus current options supports both signed +# and unsigned average operations on vectors of bytes. + +proc check_effective_target_vect_avg_qi {} { + return 0 +} + # Return 1 if the target plus current options supports a vector # demotion (packing) of shorts (to chars) and ints (to shorts) # using modulo arithmetic, 0 otherwise. diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index a1649d8b0fe..51defa08627 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1721,6 +1721,153 @@ vect_recog_over_widening_pattern (vec *stmts, tree *type_out) return pattern_stmt; } +/* Recognize the patterns: + + ATYPE a; // narrower than TYPE + BTYPE b; // narrower than TYPE + (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1; + or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1; + + where only the bottom half of avg is used. Try to transform them into: + + (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b); + or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b); + + followed by: + + TYPE avg = (TYPE) avg'; + + where NTYPE is no wider than half of TYPE. Since only the bottom half + of avg is used, all or part of the cast of avg' should become redundant. */ + +static gimple * +vect_recog_average_pattern (vec *stmts, tree *type_out) +{ + /* Check for a shift right by one bit. */ + gassign *last_stmt = dyn_cast (stmts->pop ()); + if (!last_stmt + || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR + || !integer_onep (gimple_assign_rhs2 (last_stmt))) + return NULL; + + stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt); + vec_info *vinfo = last_stmt_info->vinfo; + + /* Check that the shift result is wider than the users of the + result need (i.e. that narrowing would be a natural choice). */ + tree lhs = gimple_assign_lhs (last_stmt); + tree type = TREE_TYPE (lhs); + unsigned int target_precision + = vect_element_precision (last_stmt_info->min_output_precision); + if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type)) + return NULL; + + /* Get the definition of the shift input. */ + tree rshift_rhs = gimple_assign_rhs1 (last_stmt); + stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs); + if (!plus_stmt_info) + return NULL; + + /* Check whether the shift input can be seen as a tree of additions on + 2 or 3 widened inputs. + + Note that the pattern should be a win even if the result of one or + more additions is reused elsewhere: if the pattern matches, we'd be + replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */ + internal_fn ifn = IFN_AVG_FLOOR; + vect_unpromoted_value unprom[3]; + tree new_type; + unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR, + PLUS_EXPR, false, 3, + unprom, &new_type); + if (nops == 0) + return NULL; + if (nops == 3) + { + /* Check that one operand is 1. */ + unsigned int i; + for (i = 0; i < 3; ++i) + if (integer_onep (unprom[i].op)) + break; + if (i == 3) + return NULL; + /* Throw away the 1 operand and keep the other two. */ + if (i < 2) + unprom[i] = unprom[2]; + ifn = IFN_AVG_CEIL; + } + + vect_pattern_detected ("vect_recog_average_pattern", last_stmt); + + /* We know that: + + (a) the operation can be viewed as: + + TYPE widened0 = (TYPE) UNPROM[0]; + TYPE widened1 = (TYPE) UNPROM[1]; + TYPE tmp1 = widened0 + widened1 {+ 1}; + TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO + + (b) the first two statements are equivalent to: + + TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0]; + TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1]; + + (c) vect_recog_over_widening_pattern has already tried to narrow TYPE + where sensible; + + (d) all the operations can be performed correctly at twice the width of + NEW_TYPE, due to the nature of the average operation; and + + (e) users of the result of the right shift need only TARGET_PRECISION + bits, where TARGET_PRECISION is no more than half of TYPE's + precision. + + Under these circumstances, the only situation in which NEW_TYPE + could be narrower than TARGET_PRECISION is if widened0, widened1 + and an addition result are all used more than once. Thus we can + treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION + as "free", whereas widening the result of the average instruction + from NEW_TYPE to TARGET_PRECISION would be a new operation. It's + therefore better not to go narrower than TARGET_PRECISION. */ + if (TYPE_PRECISION (new_type) < target_precision) + new_type = build_nonstandard_integer_type (target_precision, + TYPE_UNSIGNED (new_type)); + + /* Check for target support. */ + tree new_vectype = get_vectype_for_scalar_type (new_type); + if (!new_vectype + || !direct_internal_fn_supported_p (ifn, new_vectype, + OPTIMIZE_FOR_SPEED)) + return NULL; + + /* The IR requires a valid vector type for the cast result, even though + it's likely to be discarded. */ + *type_out = get_vectype_for_scalar_type (type); + if (!*type_out) + return NULL; + + /* Generate the IFN_AVG* call. */ + tree new_var = vect_recog_temp_ssa_var (new_type, NULL); + tree new_ops[2]; + vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, + unprom, new_vectype); + gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], + new_ops[1]); + gimple_call_set_lhs (average_stmt, new_var); + gimple_set_location (average_stmt, gimple_location (last_stmt)); + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "created pattern stmt: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0); + } + + stmts->safe_push (last_stmt); + return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype); +} + /* Recognize cases in which the input to a cast is wider than its output, and the input is fed by a widening operation. Fold this by removing the unnecessary intermediate widening. E.g.: @@ -4670,6 +4817,9 @@ struct vect_recog_func less comples onex (widen_sum only after dot_prod or sad for example). */ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_over_widening_pattern, "over_widening" }, + /* Must come after over_widening, which narrows the shift as much as + possible beforehand. */ + { vect_recog_average_pattern, "average" }, { vect_recog_cast_forwprop_pattern, "cast_forwprop" }, { vect_recog_widen_mult_pattern, "widen_mult" }, { vect_recog_dot_prod_pattern, "dot_prod" }, diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ae62fc36401..ea303bd7023 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3116,7 +3116,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, gcall *stmt; tree vec_dest; tree scalar_dest; - tree op, type; + tree op; tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info; tree vectype_out, vectype_in; @@ -3592,12 +3592,11 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (slp_node) return true; - type = TREE_TYPE (scalar_dest); if (is_pattern_stmt_p (stmt_info)) stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); lhs = gimple_get_lhs (stmt_info->stmt); - new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); + new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); set_vinfo_for_stmt (new_stmt, stmt_info); set_vinfo_for_stmt (stmt_info->stmt, NULL); STMT_VINFO_STMT (stmt_info) = new_stmt; -- 2.11.4.GIT