From 814dcc193842e46aa7b5c7cb0e6ea7e70b091ffe Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Sat, 14 Aug 2010 15:51:04 +0200 Subject: [PATCH] detect strides even if the remainder of the lower bound depends on outer iterators The original implementation of stride detection only works if the remainder of the lower bound on division by a potential stride is a constant. The new implementation allows this remainder to vary with each iteration of the outer loop(s), while of course still remaining constant within the same iteration of the outer loop(s). Signed-off-by: Sven Verdoolaege --- include/cloog/domain.h | 14 +++ include/cloog/stride.h | 4 + source/clast.c | 77 +++++++++++- source/isl/domain.c | 297 +++++++++++++++++++++++++++++++++++++++++++++- source/loop.c | 46 +++++++ source/stride.c | 28 ++++- test/reservoir/bastoul3.c | 8 +- 7 files changed, 461 insertions(+), 13 deletions(-) diff --git a/include/cloog/domain.h b/include/cloog/domain.h index ac036c3..068beee 100644 --- a/include/cloog/domain.h +++ b/include/cloog/domain.h @@ -50,6 +50,17 @@ typedef struct cloogscattering CloogScattering; /** + * CloogDomainList structure: + * this structure reprensents a node of a linked list of CloogDomain structures. + */ +struct cloogdomainlist { + CloogDomain *domain; /**< An element of the list. */ + struct cloogdomainlist *next;/**< Pointer to the next element of the list.*/ +} ; +typedef struct cloogdomainlist CloogDomainList; + + +/** * CloogScatteringList structure: * this structure reprensents a node of a linked list of CloogScattering structures. */ @@ -93,6 +104,7 @@ void cloog_domain_print_structure(FILE *file, CloogDomain *domain, int level, /****************************************************************************** * Memory deallocation function * ******************************************************************************/ +void cloog_domain_list_free(CloogDomainList *); void cloog_scattering_list_free(CloogScatteringList *); @@ -146,6 +158,8 @@ CloogDomain * cloog_domain_scatter(CloogDomain *domain, CloogScattering *scatt); int cloog_scattering_fully_specified(CloogScattering *scattering, CloogDomain *domain); +CloogStride *cloog_domain_list_stride(CloogDomainList *list, int level); + #if defined(__cplusplus) } #endif diff --git a/include/cloog/stride.h b/include/cloog/stride.h index edd36e6..88fe1e6 100644 --- a/include/cloog/stride.h +++ b/include/cloog/stride.h @@ -12,10 +12,14 @@ struct cloogstride { int references; cloog_int_t stride; /**< The actual stride. */ cloog_int_t offset; /**< Offset of strided loop. */ + cloog_int_t factor; + CloogConstraint *constraint; }; typedef struct cloogstride CloogStride; CloogStride *cloog_stride_alloc(cloog_int_t stride, cloog_int_t offset); +CloogStride *cloog_stride_alloc_from_constraint(cloog_int_t stride, + CloogConstraint *constraint, cloog_int_t factor); CloogStride *cloog_stride_copy(CloogStride *stride); void cloog_stride_free(CloogStride *stride); diff --git a/source/clast.c b/source/clast.c index 683b24b..bd20c1e 100644 --- a/source/clast.c +++ b/source/clast.c @@ -800,6 +800,8 @@ static void update_lower_bound(struct clast_expr *expr, int level, CloogStride *stride) { struct clast_term *t; + if (stride->constraint) + return; if (expr->type != clast_expr_term) return; t = (struct clast_term *)expr; @@ -1156,6 +1158,72 @@ static void insert_computed_modulo_guard(struct clast_reduction *r, } +/* Try and eliminate coefficients from a modulo constraint based on + * stride information of an earlier level. + * The modulo of the constraint being constructed is "m". + * The stride information at level "level" is given by "stride" + * and indicated that the iterator i at level "level" is equal to + * some expression modulo stride->stride. + * If stride->stride is a multiple of "m' then i is also equal to + * the expression modulo m and so we can eliminate the coefficient of i. + * + * If stride->constraint is NULL, then i has a constant value modulo m, stored + * stride->offset. We simply multiply this constant with the coefficient + * of i and add the result to the constant term, reducing it modulo m. + * + * If stride->constraint is not NULL, then it is a constraint of the form + * + * e + k i = s a + * + * with s equal to stride->stride, e an expression in terms of the + * parameters and earlier iterators and a some arbitrary expression + * in terms of existentially quantified variables. + * stride->factor is a value f such that f * k = -1 mod s. + * Adding stride->constraint f * c times to the current modulo constraint, + * with c the coefficient of i eliminates i in favor of parameters and + * earlier variables. + */ +static void eliminate_using_stride_constraint(cloog_int_t *line, int len, + int nb_iter, CloogStride *stride, int level, cloog_int_t m) +{ + if (!stride) + return; + if (!cloog_int_is_divisible_by(stride->stride, m)) + return; + + if (stride->constraint) { + int i; + cloog_int_t t, v; + + cloog_int_init(t); + cloog_int_init(v); + cloog_int_mul(t, line[level], stride->factor); + for (i = 1; i < level; ++i) { + cloog_constraint_coefficient_get(stride->constraint, + i - 1, &v); + cloog_int_addmul(line[i], t, v); + cloog_int_fdiv_r(line[i], line[i], m); + } + for (i = nb_iter + 1; i <= len - 2; ++i) { + cloog_constraint_coefficient_get(stride->constraint, + i - nb_iter - 1 + level, &v); + cloog_int_addmul(line[i], t, v); + cloog_int_fdiv_r(line[i], line[i], m); + } + cloog_constraint_constant_get(stride->constraint, &v); + cloog_int_addmul(line[len - 1], t, v); + cloog_int_fdiv_r(line[len - 1], line[len - 1], m); + cloog_int_clear(v); + cloog_int_clear(t); + } else { + cloog_int_addmul(line[len - 1], line[level], stride->offset); + cloog_int_fdiv_r(line[len - 1], line[len - 1], m); + } + + cloog_int_set_si(line[level], 0); +} + + /* Temporary structure for communication between insert_modulo_guard and * its cloog_constraint_set_foreach_constraint callback function. */ @@ -1216,15 +1284,12 @@ static int insert_modulo_guard_constraint(CloogConstraint *c, void *user) nb_elts = 0; /* First, the modulo guard : the iterators... */ + for (i = level - 1; i >= 1; --i) + eliminate_using_stride_constraint(line, len, nb_iter, + infos->stride[i - 1], i, line[level]); for (i=1;i<=nb_iter;i++) { if (i == level || cloog_int_is_zero(line[i])) continue; - if (infos->stride[i - 1] && - cloog_int_is_divisible_by(infos->stride[i-1]->stride, line[level])) { - cloog_int_addmul(line[len-1], line[i], infos->stride[i-1]->offset); - cloog_int_fdiv_r(line[len-1], line[len-1], line[level]); - continue; - } name = cloog_names_name_at_level(infos->names, i); diff --git a/source/isl/domain.c b/source/isl/domain.c index b1e396b..93846cf 100644 --- a/source/isl/domain.c +++ b/source/isl/domain.c @@ -337,6 +337,18 @@ void cloog_domain_print_structure(FILE *file, CloogDomain *domain, int level, ******************************************************************************/ +void cloog_domain_list_free(CloogDomainList *list) +{ + CloogDomainList *next; + + for ( ; list; list = next) { + next = list->next; + cloog_domain_free(list->domain); + free(list); + } +} + + /** * cloog_scattering_list_free function: * This function frees the allocated memory for a CloogScatteringList structure. @@ -834,13 +846,109 @@ static int constraint_stride_lower(__isl_take isl_constraint *c, void *user) return 0; } +/* This functions performs essentially the same operation as + * constraint_stride_lower, the only difference being that the offset d + * is not a constant, but an affine expression in terms of the parameters + * and earlier variables. In particular the affine expression is equal + * to the coefficients of stride->constraint multiplied by stride->factor. + * As in constraint_stride_lower, we add an extra bound + * + * i + s * floor((f + a * d)/(a * s)) - d >= 0 + * + * for each lower bound + * + * a i + f >= 0 + * + * where d is not the aforementioned affine expression. + */ +static int constraint_stride_lower_c(__isl_take isl_constraint *c, void *user) +{ + struct cloog_stride_lower *csl = (struct cloog_stride_lower *)user; + int i; + isl_int v; + isl_int t, u; + isl_constraint *bound; + isl_constraint *csl_c; + isl_div *div; + int pos; + unsigned nparam, nvar; + + isl_int_init(v); + isl_constraint_get_coefficient(c, isl_dim_set, csl->level - 1, &v); + if (!isl_int_is_pos(v)) { + isl_int_clear(v); + isl_constraint_free(c); + + return 0; + } + + csl_c = &csl->stride->constraint->isl; + + isl_int_init(t); + isl_int_init(u); + + nparam = isl_constraint_dim(c, isl_dim_param); + nvar = isl_constraint_dim(c, isl_dim_set); + bound = isl_inequality_alloc(isl_basic_set_get_dim(csl->bounds)); + div = isl_div_alloc(isl_basic_set_get_dim(csl->bounds)); + isl_int_mul(t, v, csl->stride->stride); + isl_div_set_denominator(div, t); + for (i = 0; i < nparam; ++i) { + isl_constraint_get_coefficient(c, isl_dim_param, i, &t); + isl_constraint_get_coefficient(csl_c, isl_dim_param, i, &u); + isl_int_mul(u, u, csl->stride->factor); + isl_int_addmul(t, v, u); + isl_div_set_coefficient(div, isl_dim_param, i, t); + isl_int_neg(u, u); + isl_constraint_set_coefficient(bound, isl_dim_param, i, u); + } + for (i = 0; i < nvar; ++i) { + if (i == csl->level - 1) + continue; + isl_constraint_get_coefficient(c, isl_dim_set, i, &t); + isl_constraint_get_coefficient(csl_c, isl_dim_set, i, &u); + isl_int_mul(u, u, csl->stride->factor); + isl_int_addmul(t, v, u); + isl_div_set_coefficient(div, isl_dim_set, i, t); + isl_int_neg(u, u); + isl_constraint_set_coefficient(bound, isl_dim_set, i, u); + } + isl_constraint_get_constant(c, &t); + isl_constraint_get_constant(csl_c, &u); + isl_int_mul(u, u, csl->stride->factor); + isl_int_addmul(t, v, u); + isl_div_set_constant(div, t); + isl_int_neg(u, u); + isl_constraint_set_constant(bound, u); + + bound = isl_constraint_add_div(bound, div, &pos); + isl_int_set_si(t, 1); + isl_constraint_set_coefficient(bound, isl_dim_set, + csl->level - 1, t); + isl_constraint_set_coefficient(bound, isl_dim_div, pos, + csl->stride->stride); + csl->bounds = isl_basic_set_add_constraint(csl->bounds, bound); + + isl_int_clear(u); + isl_int_clear(t); + isl_int_clear(v); + isl_constraint_free(c); + + return 0; +} + static int basic_set_stride_lower(__isl_take isl_basic_set *bset, void *user) { struct cloog_stride_lower *csl = (struct cloog_stride_lower *)user; int r; csl->bounds = isl_basic_set_universe_like(bset); - r = isl_basic_set_foreach_constraint(bset, constraint_stride_lower, csl); + if (csl->stride->constraint) + r = isl_basic_set_foreach_constraint(bset, + &constraint_stride_lower_c, csl); + else + r = isl_basic_set_foreach_constraint(bset, + &constraint_stride_lower, csl); bset = isl_basic_set_intersect(bset, csl->bounds); csl->set = isl_set_union(csl->set, isl_set_from_basic_set(bset)); @@ -1273,3 +1381,190 @@ CloogUnionDomain *cloog_union_domain_from_isl_union_set( return ud; } + +/* Computes x, y and g such that g = gcd(a,b) and a*x+b*y = g */ +static void Euclid(cloog_int_t a, cloog_int_t b, + cloog_int_t *x, cloog_int_t *y, cloog_int_t *g) +{ + cloog_int_t c, d, e, f, tmp; + + cloog_int_init(c); + cloog_int_init(d); + cloog_int_init(e); + cloog_int_init(f); + cloog_int_init(tmp); + cloog_int_abs(c, a); + cloog_int_abs(d, b); + cloog_int_set_si(e, 1); + cloog_int_set_si(f, 0); + while (cloog_int_is_pos(d)) { + cloog_int_tdiv_q(tmp, c, d); + cloog_int_mul(tmp, tmp, f); + cloog_int_sub(e, e, tmp); + cloog_int_tdiv_q(tmp, c, d); + cloog_int_mul(tmp, tmp, d); + cloog_int_sub(c, c, tmp); + cloog_int_swap(c, d); + cloog_int_swap(e, f); + } + cloog_int_set(*g, c); + if (cloog_int_is_zero(a)) + cloog_int_set_si(*x, 0); + else if (cloog_int_is_pos(a)) + cloog_int_set(*x, e); + else cloog_int_neg(*x, e); + if (cloog_int_is_zero(b)) + cloog_int_set_si(*y, 0); + else { + cloog_int_mul(tmp, a, *x); + cloog_int_sub(tmp, c, tmp); + cloog_int_divexact(*y, tmp, b); + } + cloog_int_clear(c); + cloog_int_clear(d); + cloog_int_clear(e); + cloog_int_clear(f); + cloog_int_clear(tmp); +} + +/* Construct a CloogStride from the given constraint for the given level, + * if possible. + * We first compute the gcd of the coefficients of the existentially + * quantified variables and then remove any common factors it has + * with the coefficient at the given level. + * The result is the value of the stride and if it is not one, + * the it is possible to construct a CloogStride. + * The constraint leading to the stride is stored in the CloogStride + * as well a value (factor) such that the product of this value + * and the coefficient at the given level is equal to -1 modulo the stride. + */ +static CloogStride *construct_stride(isl_constraint *c, int level) +{ + int i, n, sign; + isl_int v, m, gcd, stride, factor; + CloogStride *s; + + if (!c) + return NULL; + + isl_int_init(v); + isl_int_init(m); + isl_int_init(gcd); + isl_int_init(factor); + isl_int_init(stride); + + isl_constraint_get_coefficient(c, isl_dim_set, level - 1, &v); + sign = isl_int_sgn(v); + isl_int_abs(m, v); + + isl_int_set_si(gcd, 0); + n = isl_constraint_dim(c, isl_dim_div); + for (i = 0; i < n; ++i) { + isl_constraint_get_coefficient(c, isl_dim_div, i, &v); + isl_int_gcd(gcd, gcd, v); + } + + isl_int_gcd(v, m, gcd); + isl_int_divexact(stride, gcd, v); + + if (isl_int_is_zero(stride) || isl_int_is_one(stride)) + s = NULL; + else { + Euclid(m, stride, &factor, &v, &gcd); + if (sign > 0) + isl_int_neg(factor, factor); + + c = isl_constraint_copy(c); + s = cloog_stride_alloc_from_constraint(stride, + cloog_constraint_from_isl_constraint(c), factor); + } + + isl_int_clear(stride); + isl_int_clear(factor); + isl_int_clear(gcd); + isl_int_clear(m); + isl_int_clear(v); + + return s; +} + +struct cloog_isl_find_stride_data { + int level; + CloogStride *stride; +}; + +/* Check if the given constraint can be used to derive + * a stride on the iterator identified by data->level. + * We first check that there are some existentially quantified variables + * and that the coefficient at data->level is non-zero. + * Then we call construct_stride for further checks and the actual + * construction of the CloogStride. + */ +static int find_stride(__isl_take isl_constraint *c, void *user) +{ + struct cloog_isl_find_stride_data *data; + int n; + isl_int v; + + data = (struct cloog_isl_find_stride_data *)user; + + if (data->stride) { + isl_constraint_free(c); + return 0; + } + + n = isl_constraint_dim(c, isl_dim_div); + if (n == 0) { + isl_constraint_free(c); + return 0; + } + + isl_int_init(v); + + isl_constraint_get_coefficient(c, isl_dim_set, data->level - 1, &v); + if (!isl_int_is_zero(v)) + data->stride = construct_stride(c, data->level); + + isl_int_clear(v); + + isl_constraint_free(c); + + return 0; +} + +/* Check if the given list of domains has a common stride on the given level. + * If so, return a pointer to a CloogStride object. If not, return NULL. + * + * We project out all later variables, take the union and compute + * the affine hull of the union. Then we check the (equality) + * constraints in this affine hull for imposing a stride. + */ +CloogStride *cloog_domain_list_stride(CloogDomainList *list, int level) +{ + struct cloog_isl_find_stride_data data = { level, NULL }; + isl_set *set; + isl_basic_set *aff; + int first = level; + int n; + int r; + + n = isl_set_dim(&list->domain->set, isl_dim_set) - first; + set = isl_set_project_out(isl_set_copy(&list->domain->set), + isl_dim_set, first, n); + + for (list = list->next; list; list = list->next) { + isl_set *set_i; + n = isl_set_dim(&list->domain->set, isl_dim_set) - first; + set_i = isl_set_project_out(isl_set_copy(&list->domain->set), + isl_dim_set, first, n); + set = isl_set_union(set, set_i); + } + aff = isl_set_affine_hull(set); + + r = isl_basic_set_foreach_constraint(aff, &find_stride, &data); + assert(r == 0); + + isl_basic_set_free(aff); + + return data.stride; +} diff --git a/source/loop.c b/source/loop.c index 8764b91..64123c7 100644 --- a/source/loop.c +++ b/source/loop.c @@ -41,6 +41,8 @@ # include # include "../include/cloog/cloog.h" +#define ALLOC(type) (type*)malloc(sizeof(type)) + /****************************************************************************** * Memory leaks hunting * @@ -1142,6 +1144,40 @@ CloogLoop *cloog_loop_nest(CloogLoop *loop, CloogDomain *context, int level) } +/* Check if the domains of the inner loops impose a stride constraint + * on the given level. + * The core of the search is implemented in cloog_domain_list_stride. + * Here, we simply construct a list of domains to pass to this function + * and if a stride is found, we adjust the lower bounds by calling + * cloog_domain_stride_lower_bound. + */ +static int cloog_loop_variable_offset_stride(CloogLoop *loop, int level) +{ + CloogDomainList *list = NULL; + CloogLoop *inner; + CloogStride *stride; + + for (inner = loop->inner; inner; inner = inner->next) { + CloogDomainList *entry = ALLOC(CloogDomainList); + entry->domain = cloog_domain_copy(inner->domain); + entry->next = list; + list = entry; + } + + stride = cloog_domain_list_stride(list, level); + + cloog_domain_list_free(list); + + if (!stride) + return 0; + + loop->stride = stride; + loop->domain = cloog_domain_stride_lower_bound(loop->domain, level, stride); + + return 1; +} + + /** * cloog_loop_stride function: * This function will find the stride of a loop for the iterator at the column @@ -1155,6 +1191,13 @@ CloogLoop *cloog_loop_nest(CloogLoop *loop, CloogDomain *context, int level) * i%3=0, the stride for i is 3. Lastly, we have to find the new lower bound * for i: the first value satisfying the common constraint: -3. At the end, the * iteration domain for i is -3<=i<=n and the stride for i is 3. + * + * The algorithm implemented in this function only allows for strides + * on loops with a lower bound that has a constant remainder on division + * by the stride. Before initiating this procedure, we first check + * if we can find a stride with a lower bound with a variable offset in + * cloog_loop_variable_offset_stride. + * * - loop is the loop including the iteration domain of the considered iterator, * - level is the column number of the iterator in the matrix of contraints. ** @@ -1170,6 +1213,9 @@ void cloog_loop_stride(CloogLoop * loop, int level) if (!cloog_domain_can_stride(loop->domain, level)) return; + if (cloog_loop_variable_offset_stride(loop, level)) + return; + cloog_int_init(stride); cloog_int_init(ref_offset); cloog_int_init(offset); diff --git a/source/stride.c b/source/stride.c index 8a613ce..e8073e4 100644 --- a/source/stride.c +++ b/source/stride.c @@ -2,7 +2,7 @@ #define ALLOC(type) (type*)malloc(sizeof(type)) -CloogStride *cloog_stride_alloc(cloog_int_t stride, cloog_int_t offset) +CloogStride *cloog_stride_malloc() { CloogStride *s; @@ -13,8 +13,32 @@ CloogStride *cloog_stride_alloc(cloog_int_t stride, cloog_int_t offset) s->references = 1; cloog_int_init(s->stride); cloog_int_init(s->offset); + cloog_int_init(s->factor); + s->constraint = cloog_constraint_invalid(); + + return s; +} + +CloogStride *cloog_stride_alloc(cloog_int_t stride, cloog_int_t offset) +{ + CloogStride *s = cloog_stride_malloc(); + cloog_int_set(s->stride, stride); cloog_int_set(s->offset, offset); + cloog_int_set_si(s->factor, 0); + + return s; +} + +CloogStride *cloog_stride_alloc_from_constraint(cloog_int_t stride, + CloogConstraint *constraint, cloog_int_t factor) +{ + CloogStride *s = cloog_stride_malloc(); + + cloog_int_set(s->stride, stride); + cloog_int_set(s->factor, factor); + cloog_int_set_si(s->offset, -1); + s->constraint = constraint; return s; } @@ -39,5 +63,7 @@ void cloog_stride_free(CloogStride *stride) cloog_int_clear(stride->stride); cloog_int_clear(stride->offset); + cloog_int_clear(stride->factor); + cloog_constraint_release(stride->constraint); free(stride); } diff --git a/test/reservoir/bastoul3.c b/test/reservoir/bastoul3.c index 7adb7a6..a89d28e 100644 --- a/test/reservoir/bastoul3.c +++ b/test/reservoir/bastoul3.c @@ -1,8 +1,6 @@ -/* Generated from ../../../git/cloog/test/reservoir/bastoul3.cloog by CLooG 0.14.0-136-gb91ef26 gmp bits in 0.00s. */ +/* Generated from ../../../git/cloog/test/reservoir/bastoul3.cloog by CLooG 0.14.0-308-g2713b64 gmp bits in 0.01s. */ for (i=3;i<=9;i++) { - for (j=max(1,i-6);j<=min(3,i-2);j++) { - if ((i+j)%2 == 0) { - S1(i,j,(i-j)/2) ; - } + for (j=max(i-6,i-2*floord(i-1,2));j<=min(3,i-2);j+=2) { + S1(i,j,(i-j)/2); } } -- 2.11.4.GIT