From b9e2954dfebada35ad03c5ef16f0ed67889a26fe Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Tue, 4 May 2010 16:45:20 +0200 Subject: [PATCH] cloog_loop_generate: optimize the loop domains after separation The separation phase may split a domain into parts with only a single inner loop. In this case we want to replace the iteration domain by the projection of the iteration domain of the inner loop. This can be seen as a light-weight backtracking phase. When backtracking is turned on, the effect of this change is small, but with backtracking turned off, it helps to recover some of the benefits of backtracking. Signed-off-by: Sven Verdoolaege --- source/loop.c | 132 ++++++++++++++++++++++- test/cholesky2.c | 8 +- test/classen.c | 260 ++++++++++++++++++++++----------------------- test/reservoir/cholesky2.c | 7 +- 4 files changed, 266 insertions(+), 141 deletions(-) rewrite test/classen.c (81%) diff --git a/source/loop.c b/source/loop.c index 85c93a4..befab93 100644 --- a/source/loop.c +++ b/source/loop.c @@ -734,6 +734,61 @@ CloogLoop *cloog_loop_combine(CloogLoop *loop) } /** + * Remove loops from list that have an empty domain. + */ +CloogLoop *cloog_loop_remove_empty_domain_loops(CloogLoop *loop) +{ + CloogLoop *l, *res, *next, **res_next; + + res = NULL; + res_next = &res; + for (l = loop; l; l = next) { + next = l->next; + if (cloog_domain_isempty(l->domain)) + cloog_loop_free_parts(l, 1, 1, 1, 0); + else { + *res_next = l; + res_next = &(*res_next)->next; + } + } + res_next = NULL; + + return res; +} + +CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop, + int level, int scalar, int *scaldims, int nb_scattdims); + +/* For each loop with only one inner loop, replace the domain + * of the loop with the projection of the domain of the inner + * loop. To increase the number of loops with a single inner + * we first decompose the inner loops into strongly connected + * components. + */ +CloogLoop *cloog_loop_specialize(CloogLoop *loop, + int level, int scalar, int *scaldims, int nb_scattdims) +{ + int dim; + CloogLoop *l; + + loop = cloog_loop_decompose_inner(loop, level, scalar, + scaldims, nb_scattdims); + + for (l = loop; l; l = l->next) { + if (l->inner->next) + continue; + if (!cloog_domain_isconvex(l->inner->domain)) + continue; + + dim = cloog_domain_dimension(l->domain); + cloog_domain_free(l->domain); + l->domain = cloog_domain_project(l->inner->domain, dim); + } + + return cloog_loop_remove_empty_domain_loops(loop); +} + +/** * cloog_loop_separate function: * This function implements the Quillere algorithm for separation of multiple * loops: for a given set of polyhedra (loop), it computes a set of disjoint @@ -1425,15 +1480,23 @@ CloogLoop *cloog_loop_generate_general(CloogLoop *loop, CloogLoop * res, * now, * temp, * l, * new_loop, * inner, * now2, * end, * next, * into ; CloogDomain * domain ; + int separate = 0; /* 3. Separate all projections into disjoint polyhedra. */ - res = ((options->f > level+scalar) || (options->f < 0)) ? - cloog_loop_merge(loop, level, options) : cloog_loop_separate(loop); + if ((options->f > level+scalar) || (options->f < 0)) + res = cloog_loop_merge(loop, level, options); + else { + res = cloog_loop_separate(loop); + separate = 1; + } /* 3b. -correction- sort the loops to determine their textual order. */ res = cloog_loop_sort(res, level); res = cloog_loop_restrict_inner(res); + + if (separate) + res = cloog_loop_specialize(res, level, scalar, scaldims, nb_scattdims); /* 4. Recurse for each loop with the current domain as context. */ temp = res ; @@ -2011,6 +2074,71 @@ CloogLoop *cloog_loop_generate_components(CloogLoop *loop, } +/* For each loop in the list "loop", decompose the list of + * inner loops into strongly connected components and put + * the components into separate loops at the top level. + */ +CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop, + int level, int scalar, int *scaldims, int nb_scattdims) +{ + CloogLoop *l, *tmp; + CloogLoop **loop_array; + int i, n_loops, max_loops = 0; + struct cloog_loop_sort *s; + + for (l = loop; l; l = l->next) { + n_loops = cloog_loop_count(l->inner); + if (max_loops < n_loops) + max_loops = n_loops; + } + + if (max_loops <= 1) + return loop; + + loop_array = (CloogLoop **)malloc(max_loops * sizeof(CloogLoop *)); + assert(loop_array); + + for (l = loop; l; l = l->next) { + int n; + + for (i = 0, tmp = l->inner; tmp; i++, tmp = tmp->next) + loop_array[i] = tmp; + n_loops = i; + if (n_loops <= 1) + continue; + + s = cloog_loop_sort_alloc(n_loops); + for (i = n_loops - 1; i >= 0; --i) { + if (s->node[i].index >= 0) + continue; + cloog_loop_components_tarjan(s, loop_array, i, level, scalar, + scaldims, nb_scattdims, &cloog_loop_follows); + } + + n = extract_component(loop_array, s->order, &l->inner); + n_loops -= n; + i = n + 1; + while (n_loops) { + CloogLoop *inner; + + n = extract_component(loop_array, &s->order[i], &inner); + n_loops -= n; + i += n + 1; + tmp = cloog_loop_alloc(l->state, cloog_domain_copy(l->domain), + l->stride, l->offset, l->block, inner, l->next); + l->next = tmp; + l = tmp; + } + + cloog_loop_sort_free(s); + } + + free(loop_array); + + return loop; +} + + CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop, int level, int scalar, int *scaldims, int nb_scattdims, CloogOptions *options) diff --git a/test/cholesky2.c b/test/cholesky2.c index e9aa474..20e1c93 100644 --- a/test/cholesky2.c +++ b/test/cholesky2.c @@ -1,4 +1,4 @@ -/* Generated from ../../../git/cloog/test/cholesky2.cloog by CLooG 0.14.0-238-gb1cb779 gmp bits in 0.04s. */ +/* Generated from ../../../git/cloog/test/cholesky2.cloog by CLooG 0.14.0-283-g7c18f7a gmp bits in 0.08s. */ if (M >= 1) { if (M >= 2) { for (c2=1;c2<=M-1;c2++) { @@ -40,12 +40,12 @@ if (M >= 1) { if (c1%3 == 0) { S2((c1+3)/3,c1/3); } - if (c1%3 == 0) { - S2((c1+6)/3,c1/3); - } if ((c1+1)%3 == 0) { S6((c1+1)/3,(c1+4)/3); } + if (c1%3 == 0) { + S2((c1+6)/3,c1/3); + } for (c2=ceild(c1+7,3);c2<=M;c2++) { if ((c1+1)%3 == 0) { S6((c1+1)/3,c2); diff --git a/test/classen.c b/test/classen.c dissimilarity index 81% index d33c93f..2d674ed 100644 --- a/test/classen.c +++ b/test/classen.c @@ -1,131 +1,129 @@ -/* Generated from ../../../git/cloog/test/classen.cloog by CLooG 0.14.0-136-gb91ef26 gmp bits in 0.98s. */ -if (m >= 1) { - if (m >= 2) { - S1(0,1,1,1) ; - S2(0,1,1,1,1,1,2,1) ; - S3(0,1,1,2,1,1,1,2) ; - S4(0,1,2,2,1,1,2,2) ; - S8(0,1) ; - } - if (m == 1) { - S1(0,1,1,1) ; - S8(0,1) ; - } - if (m >= 3) { - S5(0,1,1,1,1,1,2,1) ; - S1(1,1,2,1) ; - S2(1,1,2,1,2,1,3,1) ; - S3(1,1,2,2,2,1,2,2) ; - S4(1,1,3,2,2,1,3,2) ; - S6(0,1,1,2,1,1,1,2) ; - S7(0,1,2,2,1,1,2,2) ; - S1(1,2,1,2) ; - S2(1,2,2,2,1,2,2,2) ; - S3(1,2,2,3,1,2,1,3) ; - S4(1,2,3,3,1,2,2,3) ; - for (coordP1=1;coordP1<=2;coordP1++) { - S8(1,coordP1) ; - } - } - for (glT1=2;glT1<=m-2;glT1++) { - S5(glT1-1,1,glT1,1,glT1,1,glT1+1,1) ; - S1(glT1,1,glT1+1,1) ; - S2(glT1,1,glT1+1,1,glT1+1,1,glT1+2,1) ; - S3(glT1,1,glT1+1,2,glT1+1,1,glT1+1,2) ; - S4(glT1,1,glT1+2,2,glT1+1,1,glT1+2,2) ; - for (rp1=2;rp1<=glT1;rp1++) { - S5(glT1-1,rp1,glT1,rp1,glT1-rp1+1,rp1,glT1-rp1+2,rp1) ; - S6(glT1-1,rp1-1,glT1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+2,rp1) ; - S7(glT1-1,rp1-1,glT1+1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+3,rp1) ; - S1(glT1,rp1,glT1-rp1+2,rp1) ; - S2(glT1,rp1,glT1+1,rp1,glT1-rp1+2,rp1,glT1-rp1+3,rp1) ; - S3(glT1,rp1,glT1+1,rp1+1,glT1-rp1+2,rp1,glT1-rp1+2,rp1+1) ; - S4(glT1,rp1,glT1+2,rp1+1,glT1-rp1+2,rp1,glT1-rp1+3,rp1+1) ; - } - S6(glT1-1,glT1,glT1,glT1+1,1,glT1,1,glT1+1) ; - S7(glT1-1,glT1,glT1+1,glT1+1,1,glT1,2,glT1+1) ; - S1(glT1,glT1+1,1,glT1+1) ; - S2(glT1,glT1+1,glT1+1,glT1+1,1,glT1+1,2,glT1+1) ; - S3(glT1,glT1+1,glT1+1,glT1+2,1,glT1+1,1,glT1+2) ; - S4(glT1,glT1+1,glT1+2,glT1+2,1,glT1+1,2,glT1+2) ; - for (coordP1=1;coordP1<=glT1+1;coordP1++) { - S8(glT1,coordP1) ; - } - } - if (m >= 3) { - S5(m-2,1,m-1,1,m-1,1,m,1) ; - S1(m-1,1,m,1) ; - S3(m-1,1,m,2,m,1,m,2) ; - for (rp1=2;rp1<=m-1;rp1++) { - S5(m-2,rp1,m-1,rp1,-rp1+m,rp1,-rp1+m+1,rp1) ; - S6(m-2,rp1-1,m-1,rp1,-rp1+m+1,rp1-1,-rp1+m+1,rp1) ; - S7(m-2,rp1-1,m,rp1,-rp1+m+1,rp1-1,-rp1+m+2,rp1) ; - S1(m-1,rp1,-rp1+m+1,rp1) ; - S2(m-1,rp1,m,rp1,-rp1+m+1,rp1,-rp1+m+2,rp1) ; - S3(m-1,rp1,m,rp1+1,-rp1+m+1,rp1,-rp1+m+1,rp1+1) ; - S4(m-1,rp1,m+1,rp1+1,-rp1+m+1,rp1,-rp1+m+2,rp1+1) ; - } - S6(m-2,m-1,m-1,m,1,m-1,1,m) ; - S7(m-2,m-1,m,m,1,m-1,2,m) ; - S1(m-1,m,1,m) ; - S2(m-1,m,m,m,1,m,2,m) ; - for (coordP1=1;coordP1<=m;coordP1++) { - S8(m-1,coordP1) ; - } - } - for (glT1=m;glT1<=2*m-4;glT1++) { - S5(glT1-1,glT1-m+2,glT1,glT1-m+2,m-1,glT1-m+2,m,glT1-m+2) ; - S6(glT1-1,glT1-m+1,glT1,glT1-m+2,m,glT1-m+1,m,glT1-m+2) ; - S1(glT1,glT1-m+2,m,glT1-m+2) ; - S3(glT1,glT1-m+2,glT1+1,glT1-m+3,m,glT1-m+2,m,glT1-m+3) ; - for (rp1=glT1-m+3;rp1<=m-1;rp1++) { - S5(glT1-1,rp1,glT1,rp1,glT1-rp1+1,rp1,glT1-rp1+2,rp1) ; - S6(glT1-1,rp1-1,glT1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+2,rp1) ; - S7(glT1-1,rp1-1,glT1+1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+3,rp1) ; - S1(glT1,rp1,glT1-rp1+2,rp1) ; - S2(glT1,rp1,glT1+1,rp1,glT1-rp1+2,rp1,glT1-rp1+3,rp1) ; - S3(glT1,rp1,glT1+1,rp1+1,glT1-rp1+2,rp1,glT1-rp1+2,rp1+1) ; - S4(glT1,rp1,glT1+2,rp1+1,glT1-rp1+2,rp1,glT1-rp1+3,rp1+1) ; - } - S5(glT1-1,m,glT1,m,glT1-m+1,m,glT1-m+2,m) ; - S6(glT1-1,m-1,glT1,m,glT1-m+2,m-1,glT1-m+2,m) ; - S7(glT1-1,m-1,glT1+1,m,glT1-m+2,m-1,glT1-m+3,m) ; - S1(glT1,m,glT1-m+2,m) ; - S2(glT1,m,glT1+1,m,glT1-m+2,m,glT1-m+3,m) ; - for (coordP1=glT1-m+2;coordP1<=m;coordP1++) { - S8(glT1,coordP1) ; - } - } - if (m >= 3) { - S5(2*m-4,m-1,2*m-3,m-1,m-1,m-1,m,m-1) ; - S6(2*m-4,m-2,2*m-3,m-1,m,m-2,m,m-1) ; - S1(2*m-3,m-1,m,m-1) ; - S3(2*m-3,m-1,2*m-2,m,m,m-1,m,m) ; - S5(2*m-4,m,2*m-3,m,m-2,m,m-1,m) ; - S6(2*m-4,m-1,2*m-3,m,m-1,m-1,m-1,m) ; - S7(2*m-4,m-1,2*m-2,m,m-1,m-1,m,m) ; - S1(2*m-3,m,m-1,m) ; - S2(2*m-3,m,2*m-2,m,m-1,m,m,m) ; - for (coordP1=m-1;coordP1<=m;coordP1++) { - S8(2*m-3,coordP1) ; - } - } - if (m == 2) { - S5(0,1,1,1,1,1,2,1) ; - S1(1,1,2,1) ; - S3(1,1,2,2,2,1,2,2) ; - S6(0,1,1,2,1,1,1,2) ; - S7(0,1,2,2,1,1,2,2) ; - S1(1,2,1,2) ; - S2(1,2,2,2,1,2,2,2) ; - for (coordP1=1;coordP1<=2;coordP1++) { - S8(1,coordP1) ; - } - } - if (m >= 2) { - S5(2*m-3,m,2*m-2,m,m-1,m,m,m) ; - S6(2*m-3,m-1,2*m-2,m,m,m-1,m,m) ; - S1(2*m-2,m,m,m) ; - S8(2*m-2,m) ; - } -} +/* Generated from ../../../git/cloog/test/classen.cloog by CLooG 0.14.0-283-g7c18f7a gmp bits in 0.63s. */ +if (m >= 1) { + if (m >= 2) { + S1(0,1,1,1); + S2(0,1,1,1,1,1,2,1); + S3(0,1,1,2,1,1,1,2); + S4(0,1,2,2,1,1,2,2); + S8(0,1); + } + if (m == 1) { + S1(0,1,1,1); + S8(0,1); + } + if (m >= 3) { + S5(0,1,1,1,1,1,2,1); + S1(1,1,2,1); + S2(1,1,2,1,2,1,3,1); + S3(1,1,2,2,2,1,2,2); + S4(1,1,3,2,2,1,3,2); + S6(0,1,1,2,1,1,1,2); + S7(0,1,2,2,1,1,2,2); + S1(1,2,1,2); + S2(1,2,2,2,1,2,2,2); + S3(1,2,2,3,1,2,1,3); + S4(1,2,3,3,1,2,2,3); + for (coordP1=1;coordP1<=2;coordP1++) { + S8(1,coordP1); + } + } + for (glT1=2;glT1<=m-2;glT1++) { + S5(glT1-1,1,glT1,1,glT1,1,glT1+1,1); + S1(glT1,1,glT1+1,1); + S2(glT1,1,glT1+1,1,glT1+1,1,glT1+2,1); + S3(glT1,1,glT1+1,2,glT1+1,1,glT1+1,2); + S4(glT1,1,glT1+2,2,glT1+1,1,glT1+2,2); + for (rp1=2;rp1<=glT1;rp1++) { + S5(glT1-1,rp1,glT1,rp1,glT1-rp1+1,rp1,glT1-rp1+2,rp1); + S6(glT1-1,rp1-1,glT1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+2,rp1); + S7(glT1-1,rp1-1,glT1+1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+3,rp1); + S1(glT1,rp1,glT1-rp1+2,rp1); + S2(glT1,rp1,glT1+1,rp1,glT1-rp1+2,rp1,glT1-rp1+3,rp1); + S3(glT1,rp1,glT1+1,rp1+1,glT1-rp1+2,rp1,glT1-rp1+2,rp1+1); + S4(glT1,rp1,glT1+2,rp1+1,glT1-rp1+2,rp1,glT1-rp1+3,rp1+1); + } + S6(glT1-1,glT1,glT1,glT1+1,1,glT1,1,glT1+1); + S7(glT1-1,glT1,glT1+1,glT1+1,1,glT1,2,glT1+1); + S1(glT1,glT1+1,1,glT1+1); + S2(glT1,glT1+1,glT1+1,glT1+1,1,glT1+1,2,glT1+1); + S3(glT1,glT1+1,glT1+1,glT1+2,1,glT1+1,1,glT1+2); + S4(glT1,glT1+1,glT1+2,glT1+2,1,glT1+1,2,glT1+2); + for (coordP1=1;coordP1<=glT1+1;coordP1++) { + S8(glT1,coordP1); + } + } + if (m >= 3) { + S5(m-2,1,m-1,1,m-1,1,m,1); + S1(m-1,1,m,1); + S3(m-1,1,m,2,m,1,m,2); + for (rp1=2;rp1<=m-1;rp1++) { + S5(m-2,rp1,m-1,rp1,-rp1+m,rp1,-rp1+m+1,rp1); + S6(m-2,rp1-1,m-1,rp1,-rp1+m+1,rp1-1,-rp1+m+1,rp1); + S7(m-2,rp1-1,m,rp1,-rp1+m+1,rp1-1,-rp1+m+2,rp1); + S1(m-1,rp1,-rp1+m+1,rp1); + S2(m-1,rp1,m,rp1,-rp1+m+1,rp1,-rp1+m+2,rp1); + S3(m-1,rp1,m,rp1+1,-rp1+m+1,rp1,-rp1+m+1,rp1+1); + S4(m-1,rp1,m+1,rp1+1,-rp1+m+1,rp1,-rp1+m+2,rp1+1); + } + S6(m-2,m-1,m-1,m,1,m-1,1,m); + S7(m-2,m-1,m,m,1,m-1,2,m); + S1(m-1,m,1,m); + S2(m-1,m,m,m,1,m,2,m); + for (coordP1=1;coordP1<=m;coordP1++) { + S8(m-1,coordP1); + } + } + for (glT1=m;glT1<=2*m-4;glT1++) { + S5(glT1-1,glT1-m+2,glT1,glT1-m+2,m-1,glT1-m+2,m,glT1-m+2); + S6(glT1-1,glT1-m+1,glT1,glT1-m+2,m,glT1-m+1,m,glT1-m+2); + S1(glT1,glT1-m+2,m,glT1-m+2); + S3(glT1,glT1-m+2,glT1+1,glT1-m+3,m,glT1-m+2,m,glT1-m+3); + for (rp1=glT1-m+3;rp1<=m-1;rp1++) { + S5(glT1-1,rp1,glT1,rp1,glT1-rp1+1,rp1,glT1-rp1+2,rp1); + S6(glT1-1,rp1-1,glT1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+2,rp1); + S7(glT1-1,rp1-1,glT1+1,rp1,glT1-rp1+2,rp1-1,glT1-rp1+3,rp1); + S1(glT1,rp1,glT1-rp1+2,rp1); + S2(glT1,rp1,glT1+1,rp1,glT1-rp1+2,rp1,glT1-rp1+3,rp1); + S3(glT1,rp1,glT1+1,rp1+1,glT1-rp1+2,rp1,glT1-rp1+2,rp1+1); + S4(glT1,rp1,glT1+2,rp1+1,glT1-rp1+2,rp1,glT1-rp1+3,rp1+1); + } + S5(glT1-1,m,glT1,m,glT1-m+1,m,glT1-m+2,m); + S6(glT1-1,m-1,glT1,m,glT1-m+2,m-1,glT1-m+2,m); + S7(glT1-1,m-1,glT1+1,m,glT1-m+2,m-1,glT1-m+3,m); + S1(glT1,m,glT1-m+2,m); + S2(glT1,m,glT1+1,m,glT1-m+2,m,glT1-m+3,m); + for (coordP1=glT1-m+2;coordP1<=m;coordP1++) { + S8(glT1,coordP1); + } + } + if (m >= 3) { + S5(2*m-4,m-1,2*m-3,m-1,m-1,m-1,m,m-1); + S6(2*m-4,m-2,2*m-3,m-1,m,m-2,m,m-1); + S1(2*m-3,m-1,m,m-1); + S3(2*m-3,m-1,2*m-2,m,m,m-1,m,m); + S5(2*m-4,m,2*m-3,m,m-2,m,m-1,m); + S6(2*m-4,m-1,2*m-3,m,m-1,m-1,m-1,m); + S7(2*m-4,m-1,2*m-2,m,m-1,m-1,m,m); + S1(2*m-3,m,m-1,m); + } + if (m == 2) { + S5(0,1,1,1,1,1,2,1); + S1(1,1,2,1); + S3(1,1,2,2,2,1,2,2); + S6(0,1,1,2,1,1,1,2); + S7(0,1,2,2,1,1,2,2); + S1(1,2,1,2); + } + if (m >= 2) { + S2(2*m-3,m,2*m-2,m,m-1,m,m,m); + for (coordP1=m-1;coordP1<=m;coordP1++) { + S8(2*m-3,coordP1); + } + } + if (m >= 2) { + S5(2*m-3,m,2*m-2,m,m-1,m,m,m); + S6(2*m-3,m-1,2*m-2,m,m,m-1,m,m); + S1(2*m-2,m,m,m); + S8(2*m-2,m); + } +} diff --git a/test/reservoir/cholesky2.c b/test/reservoir/cholesky2.c index 1c80ba4..ee3c52e 100644 --- a/test/reservoir/cholesky2.c +++ b/test/reservoir/cholesky2.c @@ -1,11 +1,10 @@ -/* Generated from /home/skimo/git/cloog/test/./reservoir/cholesky2.cloog by CLooG 0.14.0-225-g6e2d019 gmp bits in 0.02s. */ +/* Generated from ../../../git/cloog/test/reservoir/cholesky2.cloog by CLooG 0.14.0-283-g7c18f7a gmp bits in 0.04s. */ if (M >= 1) { if (M >= 2) { S1(1); - S2(1,2); } - if (M >= 3) { - S2(1,3); + for (c2=2;c2<=min(3,M);c2++) { + S2(1,c2); } if (M == 1) { S1(1); -- 2.11.4.GIT