From 0bc530310a986fd39ebde3eb48bdf5107eecfc50 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Thu, 2 Jun 2011 14:58:15 +0200 Subject: [PATCH] update lower bounds during clast construction Stride detection was extended in 814dcc1 (detect strides even if the remainder of the lower bound depends on outer iterators, Sat Aug 14 15:51:04 2010 +0200) to include cases where the lower bounds depend on outer iterators. These lower bounds need to be updated to ensure that the initial value of the loop iterator satisfies the stride constraint. This update is currently performed on the iteration domains. This can sometimes be useful as the updated bounds can in some cases result in simplification. However, by definition, these updated bounds are redundant and so they may disappear during later simplifications. We therefore need to update the lower bounds also during the construction of the clasts. This update was already being performed for those cases where the lower bound is a constant expression. Now it is also performed in the other cases. Without this patch the inner loop of the new test case would be generated as c1 = 1; if (c1 <= 32) { S1(c0+g1-1,c1+g2-1); } instead of c1 = -32*floord(t1-1,32)+t1; if (c1 <= 32) { S1(c0+g1-1,c1+g2-1); } Signed-off-by: Sven Verdoolaege --- include/cloog/stride.h | 3 + source/clast.c | 35 ++++++++++-- source/isl/constraints.c | 83 +++++++++++++++++++++++++++ test/Makefile.am | 2 + test/isl/jacobi-shared.c | 11 ++++ test/isl/jacobi-shared.cloog | 129 ++++++++++++++++++++++++++++++++++++++++++ test/isl/jacobi-shared.good.c | 28 +++++++++ 7 files changed, 285 insertions(+), 6 deletions(-) create mode 100644 test/isl/jacobi-shared.c create mode 100644 test/isl/jacobi-shared.cloog create mode 100644 test/isl/jacobi-shared.good.c diff --git a/include/cloog/stride.h b/include/cloog/stride.h index 88fe1e6..f93dc4e 100644 --- a/include/cloog/stride.h +++ b/include/cloog/stride.h @@ -23,6 +23,9 @@ CloogStride *cloog_stride_alloc_from_constraint(cloog_int_t stride, CloogStride *cloog_stride_copy(CloogStride *stride); void cloog_stride_free(CloogStride *stride); +CloogConstraint *cloog_constraint_stride_lower_bound(CloogConstraint *c, + int level, CloogStride *stride); + #if defined(__cplusplus) } #endif diff --git a/source/clast.c b/source/clast.c index 33e0a7b..37e1593 100644 --- a/source/clast.c +++ b/source/clast.c @@ -793,13 +793,29 @@ static int count_bounds(CloogConstraint *c, void *user) } +/* Update the given lower bound based on stride information, + * for those cases where the stride offset is represented by + * a constraint. + * Note that cloog_loop_stride may have already performed a + * similar update of the lower bounds, but the updated lower + * bounds may have been eliminated because they are redundant + * by definition. On the other hand, performing the update + * on an already updated constraint is an identity operation + * and is therefore harmless. + */ +static CloogConstraint *update_lower_bound_c(CloogConstraint *c, int level, + CloogStride *stride) +{ + if (!stride->constraint) + return c; + return cloog_constraint_stride_lower_bound(c, level, stride); +} + + /* Update the given lower bound based on stride information. - * In some backends, the lower bounds are updated from within - * cloog_loop_stride, but other backends leave the updating to - * this function. In the later case, the original lower bound - * is known to be a constant. - * If the bound turns out not to be a constant, we know we - * are in the former case and nothing needs to be done. + * If the stride offset is represented by a constraint, + * then we have already performed the update in update_lower_bound_c. + * Otherwise, the original lower bound is known to be a constant. * If the bound has already been updated and it just happens * to be a constant, then this function performs an identity * operation on the constant. @@ -832,6 +848,11 @@ static int collect_bounds(CloogConstraint *c, void *user) if (!valid_bound(c, d)) return 0; + c = cloog_constraint_copy(c); + + if (d->lower_bound && d->infos->stride[d->level - 1]) + c = update_lower_bound_c(c, d->level, d->infos->stride[d->level - 1]); + d->r->elts[d->n] = clast_bound_from_constraint(c, d->level, d->infos->names); if (d->lower_bound && d->infos->stride[d->level - 1]) { @@ -839,6 +860,8 @@ static int collect_bounds(CloogConstraint *c, void *user) d->infos->stride[d->level - 1]); } + cloog_constraint_release(c); + d->n++; return 0; diff --git a/source/isl/constraints.c b/source/isl/constraints.c index f76312a..ae8a0fa 100644 --- a/source/isl/constraints.c +++ b/source/isl/constraints.c @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -823,3 +824,85 @@ CloogConstraint *cloog_equal_constraint(CloogEqualities *equal, int j) return cloog_constraint_from_isl_constraint( isl_basic_set_first_constraint(isl_basic_set_copy(bset))); } + +/* Given a stride constraint on iterator i (specified by level) of the form + * + * i = f(outer iterators) + stride * f(existentials) + * + * extract f as an isl_aff. + */ +static isl_aff *extract_stride_offset(__isl_keep isl_constraint *c, + int level, CloogStride *stride) +{ + int i; + isl_dim *dim = isl_constraint_get_dim(c); + isl_local_space *ls = isl_local_space_from_dim(dim); + isl_aff *offset = isl_aff_zero(ls); + isl_int u; + unsigned nparam, nvar; + + isl_int_init(u); + + nparam = isl_constraint_dim(c, isl_dim_param); + nvar = isl_constraint_dim(c, isl_dim_set); + + for (i = 0; i < nparam; ++i) { + isl_constraint_get_coefficient(c, isl_dim_param, i, &u); + isl_int_mul(u, u, stride->factor); + offset = isl_aff_set_coefficient(offset, isl_dim_param, i, u); + } + for (i = 0; i < nvar; ++i) { + if (i == level - 1) + continue; + isl_constraint_get_coefficient(c, isl_dim_set, i, &u); + isl_int_mul(u, u, stride->factor); + offset = isl_aff_set_coefficient(offset, isl_dim_set, i, u); + } + isl_constraint_get_constant(c, &u); + isl_int_mul(u, u, stride->factor); + offset = isl_aff_set_constant(offset, u); + + isl_int_clear(u); + + return offset; +} + +/* Update the given lower bound on level such that it satisfies the stride + * constraint. The computation performed here is essentially the same + * as that performed in constraint_stride_lower_c. + * + * We update the constraint + * + * a i + f >= 0 + * + * to + * + * i >= s * ceil((-f/a - d)/s) + d + * + * with s the stride and d the offset encoded in the stride constraint. + */ +CloogConstraint *cloog_constraint_stride_lower_bound(CloogConstraint *c, + int level, CloogStride *stride) +{ + isl_constraint *stride_c = &stride->constraint->isl; + isl_constraint *bound = &c->isl; + isl_aff *offset; + isl_aff *lower; + + lower = isl_constraint_get_bound(bound, isl_dim_set, level - 1); + isl_constraint_free(bound); + + offset = extract_stride_offset(stride_c, level, stride); + + lower = isl_aff_sub(lower, isl_aff_copy(offset)); + lower = isl_aff_scale_down(lower, stride->stride); + lower = isl_aff_ceil(lower); + lower = isl_aff_scale(lower, stride->stride); + lower = isl_aff_add(lower, offset); + lower = isl_aff_neg(lower); + lower = isl_aff_add_coefficient_si(lower, isl_dim_set, level - 1, 1); + + bound = isl_inequality_from_aff(lower); + + return cloog_constraint_from_isl_constraint(bound); +} diff --git a/test/Makefile.am b/test/Makefile.am index d31d9fc..be6995d 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -208,6 +208,7 @@ CLOOGTEST_STRIDED = \ $(CLOOG_ISL_TEST_STRIDED) SPECIAL_TESTS = \ + isl/jacobi-shared \ backtrack \ vasilache \ merge \ @@ -220,6 +221,7 @@ SPECIAL_TESTS = \ stride2 \ sor1d SPECIAL_OPTIONS = \ + 'isl/jacobi-shared -f 4 -l -1 -override -strides 1 -sh 1' \ 'backtrack -f 1 -backtrack' \ 'vasilache -f 8 -l 9' \ 'merge -f -1' \ diff --git a/test/isl/jacobi-shared.c b/test/isl/jacobi-shared.c new file mode 100644 index 0000000..3246227 --- /dev/null +++ b/test/isl/jacobi-shared.c @@ -0,0 +1,11 @@ +/* Generated from ../../../git/cloog/test/isl/jacobi-shared.cloog by CLooG 0.16.2-19-gfcd8fdc gmp bits in 1.65s. */ +if ((h0+1)%2 == 0) { + if ((16*floord(g1+t0-3,16) >= -N+g1+t0+1) && (16*floord(N+15*g1+15*t0+15,16) >= 16*g1+15*t0+17) && (floord(t1-1,32) <= floord(g2+t1-3,32)) && (32*floord(t1-1,32) >= -N+g2+t1+1)) { + for (c0=max(-16*floord(t0-1,16)+t0,-16*floord(g1+t0-3,16)+t0);c0<=min(32,N-g1-1);c0+=16) { + c1 = -32*floord(t1-1,32)+t1; + if (c1 <= 32) { + S1(c0+g1-1,c1+g2-1); + } + } + } +} diff --git a/test/isl/jacobi-shared.cloog b/test/isl/jacobi-shared.cloog new file mode 100644 index 0000000..25e4903 --- /dev/null +++ b/test/isl/jacobi-shared.cloog @@ -0,0 +1,129 @@ +# CLooG -> CLooG +# This is an automatic dump of a CLooG input file from a CloogInput data +# structure. + +# Language: C +c + +# Context: +1 + +22 16 0 0 2 12 +0 0 1024 0 0 0 0 32 0 0 -1 0 0 0 0 0 +0 2048 0 0 0 0 32 0 0 -1 0 0 0 0 0 0 +0 0 0 0 0 -1 0 0 1 0 0 0 0 0 0 0 +1 0 0 0 1 0 0 0 0 0 -1 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 29 +1 0 0 0 1 0 0 0 0 -1 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 29 +1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 +1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 31 +1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 63 +1 0 0 0 1 0 0 -32 0 0 0 0 0 0 0 -2 +1 0 0 0 1 0 -32 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -4 +1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 +1 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 -1 +1 0 0 0 0 0 0 -32 0 0 1 0 0 0 0 0 +1 0 0 0 0 0 -32 0 0 1 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 15 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 31 + +1 # Parameter name(s) +T N h0 b0 b1 g0 g1 g2 g3 g4 t0 t1 + +# Statement number: +1 + +# Iteration domain of statement 1 (write_shared_A). +1 + +33 21 2 0 5 12 +0 0 1 0 0 0 0 32 0 0 0 0 0 0 0 0 0 0 0 -1 1 +0 1 0 0 0 0 16 0 0 0 0 0 0 0 0 0 0 0 -1 0 1 +0 0 0 0 0 1024 0 0 0 0 0 0 992 0 0 1 0 0 0 0 0 +0 0 0 0 2048 0 0 0 0 0 0 2016 0 0 1 0 0 0 0 0 0 +0 0 0 2 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 1 +0 0 0 0 0 0 0 0 0 0 -1 0 0 1 0 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -1 +1 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 -1 +1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 +1 -1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -2 +1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 +1 0 -1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 31 +1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 63 +1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 31 +1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -4 +1 1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 +1 -1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 31 +1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 -1 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 29 +1 0 0 0 0 0 0 0 0 1 0 0 0 0 -1 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 29 +1 0 0 0 0 0 0 0 0 0 0 -32 0 0 1 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 -32 0 0 1 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 1 0 -32 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 1 0 0 -32 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 15 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 31 + +0 0 0 # For future options. + + +0 # Iterator name(s) +# --------------------- SCATTERING -------------------- +1 # Scattering functions + +# Scattering of statement 1 (write_shared_A). +1 + +37 25 4 2 5 12 +0 0 0 0 0 0 1 0 0 0 0 32 0 0 0 0 0 0 0 0 0 0 0 -1 1 +0 0 0 0 0 1 0 0 0 0 16 0 0 0 0 0 0 0 0 0 0 0 -1 0 1 +0 0 0 0 0 0 0 0 0 1024 0 0 0 0 0 0 992 0 0 1 0 0 0 0 0 +0 0 0 0 0 0 0 0 2048 0 0 0 0 0 0 2016 0 0 1 0 0 0 0 0 0 +0 0 0 0 0 0 0 2 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 1 +0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 +0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 +0 0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 -1 +0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 -1 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 1 0 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -1 +1 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 -1 +1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 -1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 -1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 31 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 63 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 31 +1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -4 +1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 +1 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 31 +1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 -1 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 29 +1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 -1 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 29 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -32 0 0 1 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -32 0 0 1 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 -32 0 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 -32 0 0 0 0 0 0 0 -2 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 15 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 31 + +1 # Scattering dimension name(s) +c0 c1 c2 c3 diff --git a/test/isl/jacobi-shared.good.c b/test/isl/jacobi-shared.good.c new file mode 100644 index 0000000..fcd67dc --- /dev/null +++ b/test/isl/jacobi-shared.good.c @@ -0,0 +1,28 @@ +/* Generated from ../../../git/cloog/test/isl/jacobi-shared.cloog by CLooG 0.16.2-19-gfcd8fdc gmp bits in 1.65s. */ +extern void hash(int); + +/* Useful macros. */ +#define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d)) +#define ceild(n,d) (((n)<0) ? -((-(n))/(d)) : ((n)+(d)-1)/(d)) +#define max(x,y) ((x) > (y) ? (x) : (y)) +#define min(x,y) ((x) < (y) ? (x) : (y)) + +#define S1(i,j) { hash(1); hash(i); hash(j); } + +void test(int T, int N, int h0, int b0, int b1, int g0, int g1, int g2, int g3, int g4, int t0, int t1) +{ + /* Scattering iterators. */ + int c0, c1, c2, c3; + /* Original iterators. */ + int i, j; + if ((h0+1)%2 == 0) { + if ((16*floord(g1+t0-3,16) >= -N+g1+t0+1) && (16*floord(N+15*g1+15*t0+15,16) >= 16*g1+15*t0+17) && (floord(t1-1,32) <= floord(g2+t1-3,32)) && (32*floord(t1-1,32) >= -N+g2+t1+1)) { + for (c0=max(-16*floord(t0-1,16)+t0,-16*floord(g1+t0-3,16)+t0);c0<=min(32,N-g1-1);c0+=16) { + c1 = -32*floord(t1-1,32)+t1; + if (c1 <= 32) { + S1(c0+g1-1,c1+g2-1); + } + } + } + } +} -- 2.11.4.GIT