From 7f6868b2f20382cad4e6a11891f9be5eefe7e7f1 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Thu, 12 May 2011 16:22:49 +0200 Subject: [PATCH] add support for unrolling The user can specify which loop level to unroll. The loops at the specified level, along with all inner loops will be unrolled, provided they have a single lower bound and a number of iterations that is bounded by a constant. Other backends will have to implement their own cloog_domain_can_unroll and cloog_domain_fixed_offset. If cloog_domain_can_unroll always returns 0, then cloog_domain_fixed_offset will never be called. Signed-off-by: Sven Verdoolaege --- doc/cloog.texi | 14 +++++ include/cloog/domain.h | 4 ++ include/cloog/options.h | 1 + source/isl/domain.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++ source/loop.c | 103 +++++++++++++++++++++++++++++++++--- source/options.c | 3 ++ 6 files changed, 254 insertions(+), 7 deletions(-) diff --git a/doc/cloog.texi b/doc/cloog.texi index 1f81f5b..f53b3b7 100644 --- a/doc/cloog.texi +++ b/doc/cloog.texi @@ -826,6 +826,7 @@ input file @code{basic.cloog} with default options by typing: * First Level for Spreading:: * Statement Block:: * Loop Strides:: +* Unrolling:: * Compilable Code:: * Output:: * Help:: @@ -1107,6 +1108,17 @@ for (i=2;i<=n;i+=2) @{ @end group @end example + +@node Unrolling +@subsection First Depth to Unroll @code{-first-unroll } + + @code{-first-unroll }: this option sets the first loop depth + to unroll. Note that a loop is only unrolled when it is supported + by the backend. In case of the isl backend, a loop is unrolled + if it has a single lower bound and if it there is a fixed (non-parametric) + bound on the number of iterations. + + @node Compilable Code @subsection Compilable Code @code{-compilable } @@ -1802,6 +1814,7 @@ struct cloogoptions int f ; /* -f option. */ int strides ; /* -strides option. */ int sh ; /* -sh option. */ + int first_unroll; /* First level to unroll. */ int esp ; /* -esp option. */ int fsp ; /* -fsp option. */ int otl ; /* -otl option. */ @@ -1826,6 +1839,7 @@ As a reminder, the default values are: @item @math{f = 1} (optimize control from the outermost loops), @item @math{strides = 0} (use only unit strides), @item @math{sh = 0} (do not compute simple convex hulls), +@item @math{first_unroll = -1} (do not perform unrolling), @item @math{esp = 1} (spread complex equalities), @item @math{fsp = 1} (start to spread from the first iterators), @item @math{otl = 1} (simplify loops running only once). diff --git a/include/cloog/domain.h b/include/cloog/domain.h index 11b8930..1b50190 100644 --- a/include/cloog/domain.h +++ b/include/cloog/domain.h @@ -134,6 +134,10 @@ int cloog_domain_can_stride(CloogDomain *domain, int level); int cloog_domain_is_otl(CloogDomain *domain, int level); CloogDomain * cloog_domain_stride_lower_bound(CloogDomain *domain, int level, CloogStride *stride); +int cloog_domain_can_unroll(CloogDomain *domain, int level, + cloog_int_t *n, CloogConstraint **lb); +CloogDomain * cloog_domain_fixed_offset(CloogDomain *domain, int level, + CloogConstraint *lb, cloog_int_t offset); int cloog_domain_lazy_disjoint(CloogDomain *, CloogDomain *) ; int cloog_domain_lazy_equal(CloogDomain *, CloogDomain *) ; int cloog_scattering_lazy_block(CloogScattering *, CloogScattering *, diff --git a/include/cloog/options.h b/include/cloog/options.h index c42245a..97e482e 100644 --- a/include/cloog/options.h +++ b/include/cloog/options.h @@ -64,6 +64,7 @@ struct cloogoptions * increment can be something else than one), 0 otherwise. */ int sh; /* 1 for computing simple hulls */ + int first_unroll; /* OPTIONS FOR PRETTY PRINTING */ int esp ; /* 1 if user wants to spread all equalities, i.e. when there diff --git a/source/isl/domain.c b/source/isl/domain.c index 133ee06..aa4dcee 100644 --- a/source/isl/domain.c +++ b/source/isl/domain.c @@ -7,6 +7,7 @@ #include #include #include +#include CloogDomain *cloog_domain_from_isl_set(struct isl_set *set) { @@ -1704,3 +1705,138 @@ CloogStride *cloog_domain_list_stride(CloogDomainList *list, int level) return data.stride; } + +struct cloog_can_unroll { + int can_unroll; + int level; + isl_constraint *c; +}; + + +/* Check if we can still unroll given the presence of the given + * constraint. + * Only lower bounds on the current level have any impact. + * If such a lower bound involves any existentially quantified + * variables, we currently punt. + * Otherwise, if we haven't recorded any other lower bound, + * we record the current one. If we have already recorded another + * bound, then there are at least two lower bounds and we cannot unroll. + */ +static int constraint_can_unroll(__isl_take isl_constraint *c, void *user) +{ + struct cloog_can_unroll *ccu = (struct cloog_can_unroll *)user; + isl_int v; + unsigned n_div; + + isl_int_init(v); + isl_constraint_get_coefficient(c, isl_dim_set, ccu->level - 1, &v); + if (isl_int_is_pos(v)) { + n_div = isl_constraint_dim(c, isl_dim_div); + if (ccu->c || + isl_constraint_involves_dims(c, isl_dim_div, 0, n_div)) + ccu->can_unroll = 0; + else + ccu->c = isl_constraint_copy(c); + } + isl_int_clear(v); + isl_constraint_free(c); + + return 0; +} + + +/* Check if we can unroll the domain at the current level. + * If the domain is a union, we cannot. Otherwise, we check the + * constraints. + */ +static int basic_set_can_unroll(__isl_take isl_basic_set *bset, void *user) +{ + struct cloog_can_unroll *ccu = (struct cloog_can_unroll *)user; + int r = 0; + + if (ccu->c || !ccu->can_unroll) + ccu->can_unroll = 0; + else { + bset = isl_basic_set_remove_redundancies(bset); + r = isl_basic_set_foreach_constraint(bset, + &constraint_can_unroll, ccu); + } + isl_basic_set_free(bset); + return r; +} + + +/* Check if we can unroll the given domain at the given level, and + * if so, return the single lower bound in *lb and an upper bound + * on the number of iterations in *n. + * If we cannot unroll, return 0 and set *lb to NULL. + * + * We can unroll, if we can identify a single lower bound on level + * and if the number of iterations is bounded by a constant. + * We first look for the lower bound, say l, on the iterator i + * and then compute the maximal value of (i - ceil(l) + 1). + */ +int cloog_domain_can_unroll(CloogDomain *domain, int level, cloog_int_t *n, + CloogConstraint **lb) +{ + struct cloog_can_unroll ccu = { 1, level, NULL }; + isl_set *set = isl_set_from_cloog_domain(domain); + int r; + isl_aff *aff; + enum isl_lp_result res; + + *lb = NULL; + r = isl_set_foreach_basic_set(set, &basic_set_can_unroll, &ccu); + assert(r == 0); + if (!ccu.c) + ccu.can_unroll = 0; + if (!ccu.can_unroll) { + isl_constraint_free(ccu.c); + return 0; + } + + aff = isl_constraint_get_bound(ccu.c, isl_dim_set, level - 1); + aff = isl_aff_ceil(aff); + aff = isl_aff_neg(aff); + aff = isl_aff_add_coefficient_si(aff, isl_dim_set, level - 1, 1); + res = isl_set_max(set, aff, n); + isl_aff_free(aff); + + if (res == isl_lp_unbounded) { + isl_constraint_free(ccu.c); + return 0; + } + assert(res == isl_lp_ok); + + cloog_int_add_ui(*n, *n, 1); + + *lb = cloog_constraint_from_isl_constraint(ccu.c); + + return ccu.can_unroll; +} + + +/* Fix the iterator i at the given level to l + o, + * where l is prescribed by the constraint lb and o is equal to offset. + * In particular, if lb is the constraint + * + * a i >= f(j) + * + * then l = ceil(f(j)/a). + */ +CloogDomain *cloog_domain_fixed_offset(CloogDomain *domain, + int level, CloogConstraint *lb, cloog_int_t offset) +{ + isl_aff *aff; + isl_set *set = isl_set_from_cloog_domain(domain); + isl_constraint *eq; + + aff = isl_constraint_get_bound(&lb->isl, isl_dim_set, level - 1); + aff = isl_aff_ceil(aff); + aff = isl_aff_add_coefficient_si(aff, isl_dim_set, level - 1, -1); + aff = isl_aff_add_constant(aff, offset); + eq = isl_equality_from_aff(aff); + set = isl_set_add_constraint(set, eq); + + return cloog_domain_from_isl_set(set); +} diff --git a/source/loop.c b/source/loop.c index db7e54f..2832782 100644 --- a/source/loop.c +++ b/source/loop.c @@ -1617,12 +1617,88 @@ CloogLoop *cloog_loop_constant(CloogLoop *loop, int level) return res; } + + +/* Unroll the given loop at the given level, provided it is allowed + * by cloog_domain_can_unroll. + * If so, we return a list of loops, one for each iteration of the original + * loop. Otherwise, we simply return the original loop. + */ +static CloogLoop *loop_unroll(CloogLoop *loop, int level) +{ + int can_unroll; + cloog_int_t i; + cloog_int_t n; + CloogConstraint *lb; + CloogLoop *res = NULL; + CloogLoop **next_res = &res; + CloogDomain *domain; + CloogLoop *inner; + + cloog_int_init(n); + can_unroll = cloog_domain_can_unroll(loop->domain, level, &n, &lb); + if (!can_unroll) { + cloog_int_clear(n); + return loop; + } + + cloog_int_init(i); + + for (cloog_int_set_si(i, 0); cloog_int_lt(i, n); cloog_int_add_ui(i, i, 1)) { + domain = cloog_domain_copy(loop->domain); + domain = cloog_domain_fixed_offset(domain, level, lb, i); + inner = cloog_loop_copy(loop->inner); + inner = cloog_loop_restrict_all(inner, domain); + if (!inner) { + cloog_domain_free(domain); + continue; + } + *next_res = cloog_loop_alloc(loop->state, domain, 1, NULL, NULL, + inner, NULL); + next_res = &(*next_res)->next; + } + + cloog_int_clear(i); + cloog_int_clear(n); + cloog_constraint_release(lb); + + cloog_loop_free(loop); + + return res; +} + + +/* Unroll all loops in the given list at the given level, provided + * they can be unrolled. + */ +CloogLoop *cloog_loop_unroll(CloogLoop *loop, int level) +{ + CloogLoop *now, *next; + CloogLoop *res = NULL; + CloogLoop **next_res = &res; + + for (now = loop; now; now = next) { + next = now->next; + now->next = NULL; + + *next_res = loop_unroll(now, level); + + while (*next_res) + next_res = &(*next_res)->next; + } + + return res; +} CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop, CloogDomain *context, int level, int scalar, int *scaldims, int nb_scattdims, CloogOptions *options); +CloogLoop *cloog_loop_recurse(CloogLoop *loop, + int level, int scalar, int *scaldims, int nb_scattdims, + int constant, CloogOptions *options); + /** * Recurse on the inner loops of the given single loop. @@ -1632,17 +1708,27 @@ CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop, * - scalar is the current scalar dimension, * - scaldims is the boolean array saying whether a dimension is scalar or not, * - nb_scattdims is the size of the scaldims array, + * - constant is true if the loop is known to be executed at most once * - options are the general code generation options. */ static CloogLoop *loop_recurse(CloogLoop *loop, int level, int scalar, int *scaldims, int nb_scattdims, - CloogOptions *options) + int constant, CloogOptions *options) { CloogLoop *inner, *into, *end, *next, *l, *now; CloogDomain *domain; - if (level && options->strides) + if (level && options->strides && !constant) cloog_loop_stride(loop, level); + + if (!constant && + options->first_unroll >= 0 && level + scalar >= options->first_unroll) { + loop = cloog_loop_unroll(loop, level); + if (loop->next) + return cloog_loop_recurse(loop, level, scalar, scaldims, + nb_scattdims, 1, options); + } + if (level && options->otl) cloog_loop_otl(loop, level); inner = loop->inner; @@ -1686,11 +1772,12 @@ static CloogLoop *loop_recurse(CloogLoop *loop, * - scalar is the current scalar dimension, * - scaldims is the boolean array saying whether a dimension is scalar or not, * - nb_scattdims is the size of the scaldims array, + * - constant is true if the loop is known to be executed at most once * - options are the general code generation options. */ CloogLoop *cloog_loop_recurse(CloogLoop *loop, int level, int scalar, int *scaldims, int nb_scattdims, - CloogOptions *options) + int constant, CloogOptions *options) { CloogLoop *now, *next; CloogLoop *res = NULL; @@ -1701,7 +1788,7 @@ CloogLoop *cloog_loop_recurse(CloogLoop *loop, now->next = NULL; *next_res = loop_recurse(now, level, scalar, scaldims, nb_scattdims, - options); + constant, options); while (*next_res) next_res = &(*next_res)->next; @@ -1739,11 +1826,13 @@ CloogLoop *cloog_loop_generate_general(CloogLoop *loop, { CloogLoop *res, *now, *temp, *l, *new_loop, *next; int separate = 0; + int constant = 0; /* 3. Separate all projections into disjoint polyhedra. */ - if (level > 0 && cloog_loop_is_constant(loop, level)) + if (level > 0 && cloog_loop_is_constant(loop, level)) { res = cloog_loop_constant(loop, level); - else if ((options->f > level+scalar) || (options->f < 0)) + constant = 1; + } else if ((options->f > level+scalar) || (options->f < 0)) res = cloog_loop_merge(loop, level, options); else { res = cloog_loop_separate(loop); @@ -1763,7 +1852,7 @@ CloogLoop *cloog_loop_generate_general(CloogLoop *loop, res = NULL ; if (!level || (level+scalar < options->l) || (options->l < 0)) res = cloog_loop_recurse(temp, level, scalar, scaldims, nb_scattdims, - options); + constant, options); else while (temp != NULL) { next = temp->next ; diff --git a/source/options.c b/source/options.c index d11f65f..561b4f6 100644 --- a/source/options.c +++ b/source/options.c @@ -299,6 +299,7 @@ CloogOptions *cloog_options_malloc(CloogState *state) options->stop = -1 ; /* Generate all the code. */ options->strides = 0 ; /* Generate a code with unit strides. */ options->sh = 0; /* Compute actual convex hull. */ + options->first_unroll = -1; /* First level to unroll: none. */ options->name = ""; /* OPTIONS FOR PRETTY PRINTING */ options->esp = 1 ; /* We want Equality SPreading.*/ @@ -357,6 +358,8 @@ void cloog_options_read(CloogState *state, int argc, char **argv, cloog_options_set(&(*options)->strides,argc,argv,&i) ; else if (strcmp(argv[i],"-sh") == 0) cloog_options_set(&(*options)->sh,argc,argv,&i) ; + else if (!strcmp(argv[i], "-first-unroll")) + cloog_options_set(&(*options)->first_unroll, argc, argv, &i); else if (strcmp(argv[i],"-otl") == 0) cloog_options_set(&(*options)->otl,argc,argv,&i) ; -- 2.11.4.GIT