From 991a9d1d179dadee7c22a79619c1d38473598568 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Mon, 18 Aug 2014 12:00:00 +0200 Subject: [PATCH] gpu: generate host code from schedule tree In the end, we want to construct a single schedule tree for the host and kernel code combined. Start with a schedule tree for the host code so that we can introduce changes more gradually. Signed-off-by: Sven Verdoolaege --- gpu.c | 36 +++++++++++++++++++++--------------- gpu.h | 2 ++ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/gpu.c b/gpu.c index b1e9f74..02db1cb 100644 --- a/gpu.c +++ b/gpu.c @@ -3637,8 +3637,8 @@ error: return NULL; } -/* Use isl to generate code for the outer gen->tile_first loops - * of the global schedule in gen->sched, resulting in the host code. +/* Use isl to generate host code from gen->host_schedule, which corresponds to + * the outer gen->tile_first loops of the global schedule in gen->sched. * Within each iteration of this partial schedule, i.e., for each kernel * launch, create_host_leaf takes care of generating the kernel code. */ @@ -3646,22 +3646,17 @@ static __isl_give isl_ast_node *generate_host_code(struct gpu_gen *gen) { isl_ast_build *build; isl_ast_node *tree; - isl_union_map *sched; - isl_map *proj; + isl_schedule *schedule; isl_id_list *iterators; - sched = isl_union_map_copy(gen->sched); - proj = projection(isl_union_map_get_space(sched), - gen->untiled_len, gen->tile_first); - sched = isl_union_map_apply_range(sched, isl_union_map_from_map(proj)); - isl_options_set_ast_build_group_coscheduled(gen->ctx, 1); build = isl_ast_build_from_context(isl_set_copy(gen->prog->context)); iterators = ppcg_scop_generate_names(gen->prog->scop, gen->tile_first, "h"); build = isl_ast_build_set_iterators(build, iterators); build = isl_ast_build_set_create_leaf(build, &create_host_leaf, gen); - tree = isl_ast_build_node_from_schedule_map(build, sched); + schedule = isl_schedule_copy(gen->host_schedule); + tree = isl_ast_build_node_from_schedule(build, schedule); isl_ast_build_free(build); return tree; @@ -3722,7 +3717,8 @@ static __isl_give isl_schedule_node *select_outer_band(struct gpu_gen *gen, /* Check if this band node is tilable and has any parallel loops. If so, * take it as the outermost tilable band. If not, continue looking for the * outermost tilable band in the children of the current band. - * Return a pointer to the same node. + * Return a pointer to the same node in a tree where all outermost tilable + * bands in the current subtree have been removed. */ static __isl_give isl_schedule_node *band_select_outer_band(struct gpu_gen *gen, __isl_take isl_schedule_node *node, int pos, struct band_info *info) @@ -3750,6 +3746,8 @@ static __isl_give isl_schedule_node *band_select_outer_band(struct gpu_gen *gen, info->suffix = isl_schedule_node_get_subtree_schedule_union_map(node); isl_union_map_foreach_map(info->prefix, &set_stmt_tile_len, info); + node = isl_schedule_node_cut(node); + return node; } @@ -3840,7 +3838,8 @@ static void separate_bands(struct band_info *info, int n) * for tile_len and/or n_parallel. Finally, combine the resulting * prefix and suffix schedules into a single pair of prefix and * suffix schedules for the entire list. - * Return a pointer to the same node. + * Return a pointer to the same node in a tree where all outermost tilable + * bands in the current subtree have been removed. */ static __isl_give isl_schedule_node *list_select_outer_band( struct gpu_gen *gen, __isl_take isl_schedule_node *node, int pos, @@ -3913,7 +3912,8 @@ static __isl_give isl_schedule_node *list_select_outer_band( /* Select the outermost bands in the elements of the set node "node". * If the schedule_separate_components is set, then separate all bands. - * Return a pointer to the same node. + * Return a pointer to the same node in a tree where all outermost tilable + * bands in the current subtree have been removed. */ static __isl_give isl_schedule_node *set_select_outer_band( struct gpu_gen *gen, __isl_take isl_schedule_node *node, int pos, @@ -3928,7 +3928,8 @@ static __isl_give isl_schedule_node *set_select_outer_band( /* Select the outermost bands in the elements of the sequence node "node", * separating all bands. - * Return a pointer to the same node. + * Return a pointer to the same node in a tree where all outermost tilable + * bands in the current subtree have been removed. */ static __isl_give isl_schedule_node *sequence_select_outer_band( struct gpu_gen *gen, __isl_take isl_schedule_node *node, int pos, @@ -3956,7 +3957,8 @@ static __isl_give isl_schedule_node *leaf_select_outer_band(struct gpu_gen *gen, } /* Select the outermost tilable band in the subtree that "node" points to and - * return a pointer to the same node. + * return a pointer to the same node in a tree where all outermost tilable + * bands in the current subtree have been removed. */ static __isl_give isl_schedule_node *select_outer_band(struct gpu_gen *gen, __isl_take isl_schedule_node *node, int pos, struct band_info *info) @@ -4000,6 +4002,8 @@ static __isl_give isl_schedule_node *select_outer_band(struct gpu_gen *gen, * * Return the complete schedule, with the tilable bands aligned * at gen->tile_first and padded with zero, if needed. + * Store a schedule tree corresponding to the outer gen->tile_first + * dimensions in gen->host_schedule. */ static __isl_give isl_union_map *select_outer_tilable_band(struct gpu_gen *gen, __isl_keep isl_schedule *schedule) @@ -4012,6 +4016,7 @@ static __isl_give isl_union_map *select_outer_tilable_band(struct gpu_gen *gen, node = isl_schedule_get_root(schedule); node = select_outer_band(gen, node, 0, &info); + gen->host_schedule = isl_schedule_node_get_schedule(node); isl_schedule_node_free(node); gen->tile_first = info.tile_first; @@ -4495,6 +4500,7 @@ static __isl_give isl_printer *generate(__isl_take isl_printer *p, } isl_union_map_free(gen->sched); + isl_schedule_free(gen->host_schedule); gpu_prog_free(prog); diff --git a/gpu.h b/gpu.h index 9640fda..a0a3ef1 100644 --- a/gpu.h +++ b/gpu.h @@ -185,6 +185,8 @@ struct gpu_gen { /* Number of rows in schedule after tiling/wrapping over threads. */ int thread_tiled_len; + /* A schedule tree corresponding to the host code. */ + isl_schedule *host_schedule; /* Global untiled schedule. */ isl_union_map *sched; /* Local (per kernel launch) tiled schedule. */ -- 2.11.4.GIT