From f0e0f1f47a4b17f750da687b783c021f603ce33e Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Tue, 18 Apr 2017 16:48:25 +0200 Subject: [PATCH] scheduler: try carrying only self-dependences in the Feautrier fallback A Feautrier style scheduler is used as a fallback for the Pluto-scheduler when the latter fails to make progress. This may happen, for example, when the --schedule-outer-coincidence option is set and it proves to be impossible to construct a band member satisfying the coincidence constraints. The fallback scheduler should then try to carry as many validity (and coincidence) constraints as possible in order to increase the chances that a subsequent nested call to the Pluto-scheduler will produce a solution. However, the Feautrier scheduler tends to carry a bit too many dependences for the purpose for which it is (ab)used. In particular, if it is possible to construct a valid schedule that results in several strongly connected components at the next level, then the Feautrier scheduler will scale the schedule by the number of components and assign each component a different offset in order to also carry the edges between the components. Such cases are already handled by split_scaled, but there are similar cases where it is not the entire schedule that is scaled by an integer, but rather a more complicated transformation that has a similar effect. In some cases, the Feautrier scheduler will also shift some statements only to be able to carry one more dependence. Such shifts are not only surprising to the user, they can also result in a deterioration of the entire schedule. One thing that most if not all of the bad side-effects of the Feautrier scheduler have in common is that they are introduced in order to be able to carry a dependence between different nodes. This commit therefore tries to first carry self-dependences only (while the other dependences are only required to be respected). This usually produces the same benefits of a Feautrier step that also tries to carry the other dependences, but without the unwanted side-effects. If the Feautrier scheduler is unable to carry any of the self-dependences, then a second attempt is performed with all the dependences. For example, for the PolyBench cholesky benchmark with --schedule-outer-coincidence set, trying to carry all dependences produces a schedule of the form { S_10[i] -> [(3i)]; S_5[i, j] -> [(1 + 3j)]; S_3[i, j, k] -> [(2j + k)]; S_8[i, k] -> [(2i + k)] } When only trying to carry self-dependences, the following schedule is produced: { S_10[i] -> [(i)]; S_5[i, j] -> [(j)]; S_3[i, j, k] -> [(k)]; S_8[i, k] -> [(k)] } Note that if there are any self-dependences to carry, then there will typically be no scaling in the schedule and split_scaled is not needed. However, it can still be useful in cases where there are no self-dependences. The rational solution of the LP problem constructed by the Feautrier scheduler may still be non-integral, even when only trying to carry self-dependences. It is therefore still useful to continue looking for an integral solution. Signed-off-by: Sven Verdoolaege --- doc/user.pod | 10 ++++++++ include/isl/schedule.h | 3 +++ isl_options.c | 7 ++++++ isl_options_private.h | 1 + isl_scheduler.c | 62 ++++++++++++++++++++++++++++++++++++++++---------- 5 files changed, 71 insertions(+), 12 deletions(-) diff --git a/doc/user.pod b/doc/user.pod index d78ce712..d192bc48 100644 --- a/doc/user.pod +++ b/doc/user.pod @@ -9299,6 +9299,10 @@ L. isl_ctx *ctx, int val); int isl_options_get_schedule_algorithm( isl_ctx *ctx); + isl_stat isl_options_set_schedule_carry_self_first( + isl_ctx *ctx, int val); + int isl_options_get_schedule_carry_self_first( + isl_ctx *ctx); isl_stat isl_options_set_schedule_separate_components( isl_ctx *ctx, int val); int isl_options_get_schedule_separate_components( @@ -9396,6 +9400,12 @@ For the Feautrier style scheduler, this option detects potentially coalescing schedules and then tries to adjust the schedule to avoid the coalescing. +=item * schedule_carry_self_first + +If this option is set, then the Feautrier style scheduler +(when used as a fallback for the Pluto-like scheduler) will +first try to only carry self-dependences. + =item * schedule_separate_components If this option is set then the function C diff --git a/include/isl/schedule.h b/include/isl/schedule.h index a11de979..895888e3 100644 --- a/include/isl/schedule.h +++ b/include/isl/schedule.h @@ -47,6 +47,9 @@ int isl_options_get_schedule_serialize_sccs(isl_ctx *ctx); isl_stat isl_options_set_schedule_whole_component(isl_ctx *ctx, int val); int isl_options_get_schedule_whole_component(isl_ctx *ctx); +isl_stat isl_options_set_schedule_carry_self_first(isl_ctx *ctx, int val); +int isl_options_get_schedule_carry_self_first(isl_ctx *ctx); + __isl_give isl_schedule_constraints *isl_schedule_constraints_copy( __isl_keep isl_schedule_constraints *sc); __isl_give isl_schedule_constraints *isl_schedule_constraints_on_domain( diff --git a/isl_options.c b/isl_options.c index ad6ce3dc..eb3dd02d 100644 --- a/isl_options.c +++ b/isl_options.c @@ -169,6 +169,8 @@ ISL_ARG_BOOL(struct isl_options, schedule_whole_component, 0, ISL_ARG_CHOICE(struct isl_options, schedule_algorithm, 0, "schedule-algorithm", isl_schedule_algorithm_choice, ISL_SCHEDULE_ALGORITHM_ISL, "scheduling algorithm to use") +ISL_ARG_BOOL(struct isl_options, schedule_carry_self_first, 0, + "schedule-carry-self-first", 1, "try and carry self-dependences first") ISL_ARG_BOOL(struct isl_options, schedule_serialize_sccs, 0, "schedule-serialize-sccs", 0, "serialize strongly connected components in dependence graph") @@ -295,6 +297,11 @@ ISL_CTX_GET_CHOICE_DEF(isl_options, struct isl_options, isl_options_args, schedule_algorithm) ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, + schedule_carry_self_first) +ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, + schedule_carry_self_first) + +ISL_CTX_SET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_serialize_sccs) ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, schedule_serialize_sccs) diff --git a/isl_options_private.h b/isl_options_private.h index 400ba0b1..13f21a7f 100644 --- a/isl_options_private.h +++ b/isl_options_private.h @@ -46,6 +46,7 @@ struct isl_options { int schedule_separate_components; int schedule_whole_component; unsigned schedule_algorithm; + int schedule_carry_self_first; int schedule_serialize_sccs; int tile_scale_tile_loops; diff --git a/isl_scheduler.c b/isl_scheduler.c index d1b1d2e8..0c825a6f 100644 --- a/isl_scheduler.c +++ b/isl_scheduler.c @@ -3600,7 +3600,8 @@ static isl_stat add_intra_constraints(struct isl_sched_graph *graph, /* Add the constraints "coef" derived from an edge from "src" to "dst" * to graph->lp in order to respect the dependences and to try and carry them. - * "pos" is the sequence number of the edge that needs to be carried. + * "pos" is the sequence number of the edge that needs to be carried or + * -1 if no attempt should be made to carry the dependences. * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y) * of valid constraints for (x, y) with x and y instances of "src" and "dst". * @@ -3608,9 +3609,15 @@ static isl_stat add_intra_constraints(struct isl_sched_graph *graph, * * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i * - * for each (x,y) in the dependence relation of the edge. + * for each (x,y) in the dependence relation of the edge or + * + * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0 + * + * if pos is -1. * That is, * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) + * or + * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) * needs to be plugged in for (c_0, c_n, c_x, c_y), * taking into account that each coefficient in c_j_x and c_k_x is represented * as a pair of non-negative coefficients. @@ -3629,7 +3636,8 @@ static isl_stat add_inter_constraints(struct isl_sched_graph *graph, ctx = isl_basic_set_get_ctx(coef); offset = coef_var_offset(coef); dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1); - isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); + if (pos >= 0) + isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map); return isl_stat_ok; @@ -3694,11 +3702,13 @@ static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx, * * "graph" is the schedule constraint graph for which an LP problem * is being constructed. + * "carry_inter" indicates whether inter-node edges should be carried. * "pos" is the position of the next edge that needs to be carried. */ struct isl_add_all_constraints_data { isl_ctx *ctx; struct isl_sched_graph *graph; + int carry_inter; int pos; }; @@ -3728,7 +3738,7 @@ static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user) /* Add the constraints "coef" derived from an edge from a node j * to a node k to data->graph->lp in order to respect the dependences and - * to try and carry them. + * to try and carry them (provided data->carry_inter is set). * * The space of "coef" is of the form * @@ -3742,6 +3752,7 @@ static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user) struct isl_add_all_constraints_data *data = user; isl_space *space, *dom; struct isl_sched_node *src, *dst; + int pos; space = isl_basic_set_get_space(coef); space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space))); @@ -3752,19 +3763,22 @@ static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user) dst = graph_find_compressed_node(data->ctx, data->graph, space); isl_space_free(space); - return add_inter_constraints(data->graph, src, dst, coef, data->pos++); + pos = data->carry_inter ? data->pos++ : -1; + return add_inter_constraints(data->graph, src, dst, coef, pos); } /* Add constraints to graph->lp that force all (conditional) validity * dependences to be respected and attempt to carry them. * "intra" is the sequence of coefficient constraints for intra-node edges. * "inter" is the sequence of coefficient constraints for inter-node edges. + * "carry_inter" indicates whether inter-node edges should be carried or + * only respected. */ static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph, __isl_keep isl_basic_set_list *intra, - __isl_keep isl_basic_set_list *inter) + __isl_keep isl_basic_set_list *inter, int carry_inter) { - struct isl_add_all_constraints_data data = { ctx, graph }; + struct isl_add_all_constraints_data data = { ctx, graph, carry_inter }; data.pos = 0; if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0) @@ -3828,6 +3842,9 @@ static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra, * "intra" is the sequence of coefficient constraints for intra-node edges. * "inter" is the sequence of coefficient constraints for inter-node edges. * "n_edge" is the total number of edges. + * "carry_inter" indicates whether inter-node edges should be carried or + * only respected. That is, if "carry_inter" is not set, then + * no e_i variables are introduced for the inter-node edges. * * All variables of the LP are non-negative. The actual coefficients * may be negative, so each coefficient is represented as the difference @@ -3851,7 +3868,7 @@ static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra, */ static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph, int n_edge, __isl_keep isl_basic_set_list *intra, - __isl_keep isl_basic_set_list *inter) + __isl_keep isl_basic_set_list *inter, int carry_inter) { int i; int k; @@ -3899,7 +3916,7 @@ static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph, isl_int_set_si(graph->lp->ineq[k][0], 1); } - if (add_all_constraints(ctx, graph, intra, inter) < 0) + if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0) return isl_stat_error; return isl_stat_ok; @@ -4445,6 +4462,8 @@ static __isl_give isl_basic_set_list *collect_inter_validity( * If "want_integral" is set, then compute an integral solution * for the coefficients rather than using the numerators * of a rational solution. + * "carry_inter" indicates whether inter-node edges should be carried or + * only respected. * * If none of the "n_edge" groups can be carried * then return an empty vector. @@ -4452,11 +4471,12 @@ static __isl_give isl_basic_set_list *collect_inter_validity( static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx, struct isl_sched_graph *graph, int n_edge, __isl_keep isl_basic_set_list *intra, - __isl_keep isl_basic_set_list *inter, int want_integral) + __isl_keep isl_basic_set_list *inter, int want_integral, + int carry_inter) { isl_basic_set *lp; - if (setup_carry_lp(ctx, graph, n_edge, intra, inter) < 0) + if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0) return NULL; lp = isl_basic_set_copy(graph->lp); @@ -4480,6 +4500,12 @@ static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx, * If a fallback solution is being computed, then compute an integral solution * for the coefficients rather than using the numerators * of a rational solution. + * + * If a fallback solution is being computed, if there are any intra-node + * dependences, and if requested by the user, then first try + * to only carry those intra-node dependences. + * If this fails to carry any dependences, then try again + * with the inter-node dependences included. */ static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx, struct isl_sched_graph *graph, int fallback, int coincidence) @@ -4495,6 +4521,18 @@ static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx, goto error; n_intra = isl_basic_set_list_n_basic_set(carry.intra); n_inter = isl_basic_set_list_n_basic_set(carry.inter); + + if (fallback && n_intra > 0 && + isl_options_get_schedule_carry_self_first(ctx)) { + sol = compute_carrying_sol_coef(ctx, graph, n_intra, + carry.intra, carry.inter, fallback, 0); + if (!sol || sol->size != 0 || n_inter == 0) { + isl_carry_clear(&carry); + return sol; + } + isl_vec_free(sol); + } + n_edge = n_intra + n_inter; if (n_edge == 0) { isl_carry_clear(&carry); @@ -4502,7 +4540,7 @@ static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx, } sol = compute_carrying_sol_coef(ctx, graph, n_edge, - carry.intra, carry.inter, fallback); + carry.intra, carry.inter, fallback, 1); isl_carry_clear(&carry); return sol; error: -- 2.11.4.GIT