From 0d3d3ddd0ab714823e6e98ffecb875ca700a470b Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Wed, 3 May 2017 14:29:47 +0200 Subject: [PATCH] scheduler: solve for original schedule coefficients The Pluto-like scheduler needs to ensure that each row is linearly independent of any previously computed rows. It would therefore compute a transformed space of schedule coefficients and enforce at least one of the transformed schedule coefficients that correspond to linearly independent combinations to be non-zero. If the transformed schedule coefficients are anything other than a permutation of the original schedule coefficients, then the scheduler no longer has any direct control over these original schedule coefficients. For example, it would not necessarily pick the first original coefficient that can be made non-zero to form the next row. This is illustrated by the test case that is changed by this commit. The test case mainly checks that the outer schedule row is computed as "i + j" (by the Feautrier style scheduler), but the Pluto-like scheduler ends up getting used to complete the schedule and it would pick "j" for the second row, while the row "i" would also do and is selected now. As also reported by Oleksandr Zinenko, a related issue was that the sum of the schedule coefficients, which is used as one of the optimization criteria, was expressed in terms of the transformed schedule coefficients rather than the original schedule coefficients. For example, the newly added test case would produce the schedule S[t,i,j,k] -> [t, 2t + i, 2t + i + j, 2t + i + j + k] with sums of coefficients [1, 3, 4, 5] because it happens to have small sums of transformed coefficients, while a schedule S[t,i,j,k] -> [t, 2t + i, t + i + j, 2t + k] } with sums of coefficients [1, 3, 3, 3] is also available. This schedule is now produced for this test case. Express the ILP problem in terms of the original schedule coefficients. This simplifies the code and allows the optimizer to optimize for those original schedule coefficients. Signed-off-by: Sven Verdoolaege --- isl_scheduler.c | 119 +++++++++++++++++++------------------------------------- isl_test.c | 26 ++++++++++++- 2 files changed, 64 insertions(+), 81 deletions(-) diff --git a/isl_scheduler.c b/isl_scheduler.c index d15675b2..7ac88911 100644 --- a/isl_scheduler.c +++ b/isl_scheduler.c @@ -1694,10 +1694,6 @@ static __isl_give isl_basic_set *add_constraints_dim_map( * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-), * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative. * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. - * - * Actually, we do not construct constraints for the c_i_x themselves, - * but for the coefficients of c_i_x written as a linear combination - * of the columns in node->cmap. */ static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph, struct isl_sched_edge *edge) @@ -1713,8 +1709,6 @@ static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph, offset = coef_var_offset(coef); - coef = isl_basic_set_transform_dims(coef, isl_dim_set, - offset, isl_mat_copy(node->cmap)); if (!coef) return isl_stat_error; @@ -1736,10 +1730,6 @@ static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph, * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-), * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative. * In graph->lp, the c_*^- appear before their c_*^+ counterpart. - * - * Actually, we do not construct constraints for the c_*_x themselves, - * but for the coefficients of c_*_x written as a linear combination - * of the columns in node->cmap. */ static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph, struct isl_sched_edge *edge) @@ -1761,10 +1751,6 @@ static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph, offset = coef_var_offset(coef); - coef = isl_basic_set_transform_dims(coef, isl_dim_set, - offset, isl_mat_copy(src->cmap)); - coef = isl_basic_set_transform_dims(coef, isl_dim_set, - offset + src->nvar, isl_mat_copy(dst->cmap)); if (!coef) return isl_stat_error; @@ -1804,10 +1790,6 @@ static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph, * with each coefficient (except m_0) represented as a pair of non-negative * coefficients. * - * Actually, we do not construct constraints for the c_i_x themselves, - * but for the coefficients of c_i_x written as a linear combination - * of the columns in node->cmap. - * * * If "local" is set, then we add constraints * @@ -1838,8 +1820,6 @@ static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph, offset = coef_var_offset(coef); - coef = isl_basic_set_transform_dims(coef, isl_dim_set, - offset, isl_mat_copy(node->cmap)); if (!coef) return isl_stat_error; @@ -1887,10 +1867,6 @@ static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph, * with each coefficient (except m_0, c_*_0 and c_*_n) * represented as a pair of non-negative coefficients. * - * Actually, we do not construct constraints for the c_*_x themselves, - * but for the coefficients of c_*_x written as a linear combination - * of the columns in node->cmap. - * * * If "local" is set (and s = 1), then we add constraints * @@ -1923,10 +1899,6 @@ static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph, offset = coef_var_offset(coef); - coef = isl_basic_set_transform_dims(coef, isl_dim_set, - offset, isl_mat_copy(src->cmap)); - coef = isl_basic_set_transform_dims(coef, isl_dim_set, - offset + src->nvar, isl_mat_copy(dst->cmap)); if (!coef) return isl_stat_error; @@ -2282,21 +2254,20 @@ static int count_bound_coefficient_constraints(isl_ctx *ctx, * -c_n + max >= 0 * * The variables coefficients are, however, not represented directly. - * Instead, the variables coefficients c_x are written as a linear - * combination c_x = cmap c_z of some other coefficients c_z, - * which are in turn encoded as c_z = c_z^+ - c_z^-. - * Let a_j be the elements of row i of node->cmap, then + * Instead, the variable coefficients c_x are written as differences + * c_x = c_x^+ - c_x^-. + * That is, * * -max_i <= c_x_i <= max_i * * is encoded as * - * -max_i <= \sum_j a_j (c_z_j^+ - c_z_j^-) <= max_i + * -max_i <= c_x_i^+ - c_x_i^- <= max_i * * or * - * -\sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0 - * \sum_j a_j (c_z_j^+ - c_z_j^-) + max_i >= 0 + * -(c_x_i^+ - c_x_i^-) + max_i >= 0 + * c_x_i^+ - c_x_i^- + max_i >= 0 */ static isl_stat node_add_coefficient_constraints(isl_ctx *ctx, struct isl_sched_graph *graph, struct isl_sched_node *node, int max) @@ -2327,17 +2298,13 @@ static isl_stat node_add_coefficient_constraints(isl_ctx *ctx, if (!ineq) return isl_stat_error; for (i = 0; i < node->nvar; ++i) { - int pos = 1 + node_var_coef_offset(node); + int pos = 1 + node_var_coef_pos(node, i); if (isl_int_is_neg(node->max->el[i])) continue; - for (j = 0; j < node->nvar; ++j) { - int pos_j = 1 + node_var_coef_pos(node, j); - - isl_int_set(ineq->el[pos_j], node->cmap->row[i][j]); - isl_int_neg(ineq->el[pos_j], node->cmap->row[i][j]); - } + isl_int_set_si(ineq->el[pos], 1); + isl_int_set_si(ineq->el[pos + 1], -1); isl_int_set(ineq->el[0], node->max->el[i]); k = isl_basic_set_alloc_inequality(graph->lp); @@ -2345,7 +2312,7 @@ static isl_stat node_add_coefficient_constraints(isl_ctx *ctx, goto error; isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total); - isl_seq_neg(ineq->el + pos, ineq->el + pos, 2 * node->nvar); + isl_seq_neg(ineq->el + pos, ineq->el + pos + 2 * i, 2); k = isl_basic_set_alloc_inequality(graph->lp); if (k < 0) goto error; @@ -2497,10 +2464,6 @@ static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph, * - c_i_n (if parametric) * - positive and negative parts of c_i_x, in opposite order * - * The c_i_x are not represented directly, but through the columns of - * node->cmap. That is, the computed values are for variable t_i_x - * such that c_i_x = Q t_i_x with Q equal to node->cmap. - * * The constraints are those from the edges plus two or three equalities * to express the sums. * @@ -2609,30 +2572,36 @@ static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node) return node->nvar - node->rank >= graph->maxvar - graph->n_row; } -/* Construct a non-triviality region with "n" directions - * over "n_var" coefficients. - * Each direction corresponds to a schedule coefficient, - * where each schedule coefficient is encoded as the difference - * of two non-negative variables, c^+_i - c^-_i - * with c^-_i at position 2 * i and c^+_i at position 2 * i + 1. - * The order of the directions is the same as that of the node variables, - * but the pairs of non-negative variables representing the coefficients +/* Construct a non-triviality region with triviality directions + * corresponding to the rows of "indep". + * The rows of "indep" are expressed in terms of the schedule coefficients c_i, + * while the triviality directions are expressed in terms of + * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing + * before c^+_i. Furthermore, + * the pairs of non-negative variables representing the coefficients * are stored in the opposite order. - * The first direction therefore corresponds to the last such pair. - * Furthermore, if the number of variables is greater than the number - * of directions, then the directions correspond to the last node variables, - * i.e., the first pairs of non-negative variables. */ -static __isl_give isl_mat *construct_trivial(isl_ctx *ctx, int n, int n_var) +static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep) { + isl_ctx *ctx; isl_mat *mat; - int i, off; + int i, j, n, n_var; - off = n_var - n; - mat = isl_mat_zero(ctx, n, 2 * n_var); + if (!indep) + return NULL; + + ctx = isl_mat_get_ctx(indep); + n = isl_mat_rows(indep); + n_var = isl_mat_cols(indep); + mat = isl_mat_alloc(ctx, n, 2 * n_var); + if (!mat) + return NULL; for (i = 0; i < n; ++i) { - mat = isl_mat_set_element_si(mat, i, 2 * (n - 1 - i), -1); - mat = isl_mat_set_element_si(mat, i, 2 * (n - 1 - i) + 1, 1); + for (j = 0; j < n_var; ++j) { + int nj = n_var - 1 - j; + isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]); + isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]); + } } return mat; @@ -2642,13 +2611,8 @@ static __isl_give isl_mat *construct_trivial(isl_ctx *ctx, int n, int n_var) * For each node such that all the remaining rows of its schedule * need to be non-trivial, we construct a non-triviality region. * This region imposes that the next row is independent of previous rows. - * In particular the coefficients c_i_x are represented by t_i_x - * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that - * its first columns span the rows of the previously computed part - * of the schedule. The non-triviality region enforces that at least - * one of the remaining components of t_i_x is non-zero, i.e., - * that the new schedule row depends on at least one of the remaining - * columns of Q. + * In particular, the non-triviality region enforces that at least + * one of the linear combinations in the rows of node->indep is non-zero. */ static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph) { @@ -2658,13 +2622,11 @@ static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph) for (i = 0; i < graph->n; ++i) { struct isl_sched_node *node = &graph->node[i]; - int skip = node->rank; isl_mat *trivial; graph->region[i].pos = node_var_coef_offset(node); if (needs_row(graph, node)) - trivial = construct_trivial(ctx, node->nvar - skip, - node->nvar); + trivial = construct_trivial(node->indep); else trivial = isl_mat_zero(ctx, 0, 0); graph->region[i].trivial = trivial; @@ -4073,8 +4035,7 @@ error: * Return 1 if the solution is trivial, 0 if it is not and -1 on error. * * Each coefficient is represented as the difference between - * two non-negative values in "sol". "sol" has been computed - * in terms of the original iterators (i.e., without use of cmap). + * two non-negative values in "sol". * We construct the schedule row s and check if it is linearly * independent of previously computed schedule rows * by computing T s, with T the linear combinations that are zero @@ -4106,8 +4067,6 @@ static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol) /* Is the schedule row "sol" trivial on any node where it should * not be trivial? - * "sol" has been computed in terms of the original iterators - * (i.e., without use of cmap). * Return 1 if any solution is trivial, 0 if they are not and -1 on error. */ static int is_any_trivial(struct isl_sched_graph *graph, @@ -5115,7 +5074,7 @@ static isl_stat compute_schedule_wcc_band(isl_ctx *ctx, return isl_stat_ok; } coincident = !has_coincidence || use_coincidence; - if (update_schedule(graph, sol, 1, coincident) < 0) + if (update_schedule(graph, sol, 0, coincident) < 0) return isl_stat_error; if (!check_conditional) diff --git a/isl_test.c b/isl_test.c index a1911db5..95f2f66c 100644 --- a/isl_test.c +++ b/isl_test.c @@ -4045,6 +4045,28 @@ static int test_coalescing_schedule(isl_ctx *ctx) return 0; } +/* Check that the scheduler does not perform any needless + * compound skewing. Earlier versions of isl would compute + * schedules in terms of transformed schedule coefficients and + * would not accurately keep track of the sum of the original + * schedule coefficients. It could then produce the schedule + * S[t,i,j,k] -> [t, 2t + i, 2t + i + j, 2t + i + j + k] + * for the input below instead of the schedule below. + */ +static int test_skewing_schedule(isl_ctx *ctx) +{ + const char *D, *V, *P, *S; + + D = "[n] -> { S[t,i,j,k] : 0 <= t,i,j,k < n }"; + V = "[n] -> { S[t,i,j,k] -> S[t+1,a,b,c] : 0 <= t,i,j,k,a,b,c < n and " + "-2 <= a-i <= 2 and -1 <= a-i + b-j <= 1 and " + "-1 <= a-i + b-j + c-k <= 1 }"; + P = "{ }"; + S = "{ S[t,i,j,k] -> [t, 2t + i, t + i + j, 2t + k] }"; + + return test_special_schedule(ctx, D, V, P, S); +} + int test_schedule(isl_ctx *ctx) { const char *D, *W, *R, *V, *P, *S; @@ -4272,7 +4294,7 @@ int test_schedule(isl_ctx *ctx) "S_0[i, j] -> S_0[1 + i, j] : i >= 1 and i <= 9 and " "j >= 1 and j <= 8 }"; P = "{ }"; - S = "{ S_0[i, j] -> [i + j, j] }"; + S = "{ S_0[i, j] -> [i + j, i] }"; ctx->opt->schedule_algorithm = ISL_SCHEDULE_ALGORITHM_FEAUTRIER; if (test_special_schedule(ctx, D, V, P, S) < 0) return -1; @@ -4352,6 +4374,8 @@ int test_schedule(isl_ctx *ctx) return -1; if (test_coalescing_schedule(ctx) < 0) return -1; + if (test_skewing_schedule(ctx) < 0) + return -1; return 0; } -- 2.11.4.GIT