From 3673dcf6d6baeb67bb70ff03d4cb3f92beed0075 Mon Sep 17 00:00:00 2001 From: Jiufu Guo Date: Wed, 7 Jul 2021 13:41:01 +0800 Subject: [PATCH] Analyze niter for until-wrap condition [PR101145] For code like: unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } The number of iterations should be about UINT_MAX - start. There is function adjust_cond_for_loop_until_wrap which handles similar work for const bases. Like adjust_cond_for_loop_until_wrap, this patch enhance function number_of_iterations_cond/number_of_iterations_lt to analyze number of iterations for this kind of loop. gcc/ChangeLog: 2021-08-25 Jiufu Guo PR tree-optimization/101145 * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): New function. (number_of_iterations_lt): Invoke above function. (adjust_cond_for_loop_until_wrap): Merge to number_of_iterations_until_wrap. (number_of_iterations_cond): Update invokes for adjust_cond_for_loop_until_wrap and number_of_iterations_lt. gcc/testsuite/ChangeLog: 2021-08-25 Jiufu Guo PR tree-optimization/101145 * gcc.dg/vect/pr101145.c: New test. * gcc.dg/vect/pr101145.inc: New test. * gcc.dg/vect/pr101145_1.c: New test. * gcc.dg/vect/pr101145_2.c: New test. * gcc.dg/vect/pr101145_3.c: New test. * gcc.dg/vect/pr101145inf.c: New test. * gcc.dg/vect/pr101145inf.inc: New test. * gcc.dg/vect/pr101145inf_1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr101145.inc | 65 +++++++++++ gcc/testsuite/gcc.dg/vect/pr101145_1.c | 13 +++ gcc/testsuite/gcc.dg/vect/pr101145_2.c | 13 +++ gcc/testsuite/gcc.dg/vect/pr101145_3.c | 13 +++ gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 ++++ gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 +++++ gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 ++++ gcc/tree-ssa-loop-niter.c | 157 ++++++++++++++----------- 9 files changed, 459 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 00000000000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (--l < n) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (--l < 64) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = 32; + while (--l < n) + *a++ = *b++ + 1; + return l; +} + + +int a[3200], b[3200]; +int fail; + +int +main () +{ + unsigned l, n; + unsigned res; + /* l > n*/ + n = UINT_MAX - 64; + l = n + 32; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n - 1; + res = foo (a, b, l, n); + if (res != l + 1) + fail++; + + l = n - 32; + res = foo (a, b, l, n); + if (res != l + 1) + fail++; + + l = UINT_MAX; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n + 32; + res = foo_1 (a, b, l, n); + if (res != 0) + fail++; + + l = n + 32; + res = foo_2 (a, b, l, n); + if (res != 0) + fail++; + + l = n; + res = foo_3 (a, b, l, n); + if (res != 0) + fail++; + + l = n - 1; + res = foo_3 (a, b, l, n); + if (res != 0) + fail++; + + l = n - 2; + res = foo_3 (a, b, l, n); + if (res != l + 1) + fail++; + + res = foo_5 (a, b, l, n); + if (res != 0) + fail++; + + n = 64; + l = n - 32; + res = bar (a, b, l, n); + res++; + if (res != 0) + fail++; + + l = n; + res = bar (a, b, l, n); + res++; + if (res != 0) + fail++; + + l = n + 1; + res = bar (a, b, l, n); + res++; + if (res != l) + fail++; + + l = 0; + res = bar (a, b, l, n); + res++; + if (res != l) + fail++; + + l = 32; + res = bar_1 (a, b, l, n); + res++; + if (res != 0) + fail++; + + res = bar_1 (a, b, l, n); + res++; + if (res != 0) + fail++; + + if (fail) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc new file mode 100644 index 00000000000..615d2e5e552 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc @@ -0,0 +1,65 @@ +TYPE __attribute__ ((noinline)) +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) +{ + TYPE i; + for (i = l; n < i; i += C) + *a++ = *b++ + 1; + return i; +} + +TYPE __attribute__ ((noinline)) +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) +{ + TYPE i; + for (i = l; i < n; i -= C) + *a++ = *b++ + 1; + return i; +} + +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } + +int a[1000], b[1000]; +int fail; + +int +main () +{ + TYPE res; + TYPE l; + TYPE n; + n = N_BASE; + l = n - C; + res = foo_sign (a, b, l, n); + if (res != l) + fail++; + + l = n; + res = foo_sign (a, b, l, n); + if (res != l) + fail++; + + l = n + C; + res = foo_sign (a, b, l, n); + if (neq ((res - MIN) / C, 0)) + fail++; + + n = N_BASE_DOWN; + l = n - C; + res = bar_sign (a, b, l, n); + if (neq ((MAX - res) / C, 0)) + fail++; + + l = n; + res = bar_sign (a, b, l, n); + if (res != l) + fail++; + + l = n + C; + res = bar_sign (a, b, l, n); + if (res != l) + fail++; + + if (fail) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c new file mode 100644 index 00000000000..8bc26e2436e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE signed char +#define MIN -128 +#define MAX 127 +#define N_BASE (MAX - 32) +#define N_BASE_DOWN (MIN + 32) + +#define C 3 + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c new file mode 100644 index 00000000000..b14c4b4b519 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE unsigned char +#define MIN 0 +#define MAX 255 +#define N_BASE (MAX - 32 + 1) +#define N_BASE_DOWN (MIN + 32) + +#define C 2 + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c new file mode 100644 index 00000000000..99289afec0b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE int * +#define MIN ((TYPE)0) +#define MAX ((TYPE)((long long)-1)) +#define N_BASE (MIN - 32) +#define N_BASE_DOWN (MIN + 32) + +#define C 1 + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c b/gcc/testsuite/gcc.dg/vect/pr101145inf.c new file mode 100644 index 00000000000..ed49f5670b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c @@ -0,0 +1,25 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-options "-O3" } */ +#include +#include "pr101145inf.inc" + +__attribute__ ((noinline)) +unsigned foo(unsigned val, unsigned start) +{ + unsigned cnt = 0; + for (unsigned i = start; val <= i; i+=16) + cnt++; + return cnt; +} + +void test_finite () +{ + unsigned n = foo (16, UINT_MAX - 32); + if (n != 3) + __builtin_abort (); +} + +void test_infinite () +{ + foo (15, UINT_MAX - 32); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc new file mode 100644 index 00000000000..4aa3d049187 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc @@ -0,0 +1,28 @@ +#include +#include +#include + +void test_finite (); +void test_infinite (); + +void do_exit (int i) +{ + exit (0); +} + +int main(void) +{ + test_finite (); + struct sigaction s; + sigemptyset (&s.sa_mask); + s.sa_handler = do_exit; + s.sa_flags = 0; + sigaction (SIGALRM, &s, NULL); + alarm (1); + + test_infinite (); + + __builtin_abort (); + return 1; +} + diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c new file mode 100644 index 00000000000..4ee3e31c7fe --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c @@ -0,0 +1,23 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-options "-O3" } */ +#include +#include "pr101145inf.inc" + +__attribute__ ((noinline)) +unsigned foo(unsigned val, unsigned start) +{ + unsigned cnt = 0; + for (unsigned i = start; i < val; i-=16) + cnt++; + return cnt; +} + +void test_finite () +{ + foo (UINT_MAX - 15, 32); +} + +void test_infinite () +{ + foo (UINT_MAX - 14, 32); +} diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 466158a5eb1..7af92d1c893 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -1474,6 +1474,93 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1, } /* Determines number of iterations of loop whose ending condition + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. + The number of iterations is stored to NITER. */ + +static bool +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0, + affine_iv *iv1, class tree_niter_desc *niter) +{ + tree niter_type = unsigned_type_for (type); + tree step, num, assumptions, may_be_zero; + wide_int high, low, max, min; + + may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base); + if (integer_onep (may_be_zero)) + return false; + + int prec = TYPE_PRECISION (type); + signop sgn = TYPE_SIGN (type); + min = wi::min_value (prec, sgn); + max = wi::max_value (prec, sgn); + + /* n < {base, C}. */ + if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step)) + { + step = iv1->step; + /* MIN + C - 1 <= n. */ + tree last = wide_int_to_tree (type, min + wi::to_wide (step) - 1); + assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, iv0->base); + if (integer_zerop (assumptions)) + return false; + + num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree (type, max), + iv1->base); + high = max; + if (TREE_CODE (iv1->base) == INTEGER_CST) + low = wi::to_wide (iv1->base) - 1; + else if (TREE_CODE (iv0->base) == INTEGER_CST) + low = wi::to_wide (iv0->base); + else + low = min; + } + /* {base, -C} < n. */ + else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step)) + { + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step); + /* MAX - C + 1 >= n. */ + tree last = wide_int_to_tree (type, max - wi::to_wide (step) + 1); + assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, iv1->base); + if (integer_zerop (assumptions)) + return false; + + num = fold_build2 (MINUS_EXPR, niter_type, iv0->base, + wide_int_to_tree (type, min)); + low = min; + if (TREE_CODE (iv0->base) == INTEGER_CST) + high = wi::to_wide (iv0->base) + 1; + else if (TREE_CODE (iv1->base) == INTEGER_CST) + high = wi::to_wide (iv1->base); + else + high = max; + } + else + return false; + + /* (delta + step - 1) / step */ + step = fold_convert (niter_type, step); + num = fold_convert (niter_type, num); + num = fold_build2 (PLUS_EXPR, niter_type, num, step); + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); + + widest_int delta, s; + delta = widest_int::from (high, sgn) - widest_int::from (low, sgn); + s = wi::to_widest (step); + delta = delta + s - 1; + niter->max = wi::udiv_floor (delta, s); + + niter->may_be_zero = may_be_zero; + + if (!integer_nonzerop (assumptions)) + niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, + niter->assumptions, assumptions); + + niter->control.no_overflow = false; + + return true; +} + +/* Determines number of iterations of loop whose ending condition is IV0 < IV1. TYPE is the type of the iv. The number of iterations is stored to NITER. BNDS bounds the difference IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know @@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0, niter->bound = iv0->base; } + /* {base, -C} < n, or n < {base, C} */ + if (tree_int_cst_sign_bit (iv0->step) + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))) + return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter); + delta = fold_build2 (MINUS_EXPR, niter_type, fold_convert (niter_type, iv1->base), fold_convert (niter_type, iv0->base)); @@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv) } } -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts - the condition for loop-until-wrap cases. For example: - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 - Return true if condition is successfully adjusted. */ - -static bool -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code, - affine_iv *iv1) -{ - /* Only support simple cases for the moment. */ - if (TREE_CODE (iv0->base) != INTEGER_CST - || TREE_CODE (iv1->base) != INTEGER_CST) - return false; - - tree niter_type = unsigned_type_for (type), high, low; - /* Case: i-- < 10. */ - if (integer_zerop (iv1->step)) - { - /* TODO: Should handle case in which abs(step) != 1. */ - if (!integer_minus_onep (iv0->step)) - return false; - /* Give up on infinite loop. */ - if (*code == LE_EXPR - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type))) - return false; - high = fold_build2 (PLUS_EXPR, niter_type, - fold_convert (niter_type, iv0->base), - build_int_cst (niter_type, 1)); - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); - } - else if (integer_zerop (iv0->step)) - { - /* TODO: Should handle case in which abs(step) != 1. */ - if (!integer_onep (iv1->step)) - return false; - /* Give up on infinite loop. */ - if (*code == LE_EXPR - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type))) - return false; - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); - low = fold_build2 (MINUS_EXPR, niter_type, - fold_convert (niter_type, iv1->base), - build_int_cst (niter_type, 1)); - } - else - gcc_unreachable (); - - iv0->base = low; - iv0->step = fold_convert (niter_type, integer_one_node); - iv1->base = high; - iv1->step = build_int_cst (niter_type, 0); - *code = NE_EXPR; - return true; -} - /* Determine the number of iterations according to condition (for staying inside loop) which compares two induction variables using comparison operator CODE. The induction variable on left side of the comparison @@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop, return true; } - /* Handle special case loops: while (i-- < 10) and while (10 < i++) by - adjusting iv0, iv1 and code. */ - if (code != NE_EXPR - && (tree_int_cst_sign_bit (iv0->step) - || (!integer_zerop (iv1->step) - && !tree_int_cst_sign_bit (iv1->step))) - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1)) - return false; - /* OK, now we know we have a senseful loop. Handle several cases, depending on what comparison operator is used. */ bound_difference (loop, iv1->base, iv0->base, &bnds); -- 2.11.4.GIT