From 9479c2b5aa02a6e5f5093ac4df4261945dcbafdd Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Wed, 28 May 2014 10:33:31 +0200 Subject: [PATCH] optionally dump effectively used tile, grid and block sizes As explained in the README, it can be difficult to predict how the statements will be distributed over the kernels, but also what the dimension of the tilable band will be that is tiled and mapped to blocks and threads. The README recommends the user to first run PPCG with the defaults and to then examine the kernels. This can be difficult to automate, however. A dump of the effectively used tile, grrid and block sizes is easier to analyze. Requested-by: "Betts, Adam" Tested-by: "Betts, Adam" Signed-off-by: Sven Verdoolaege --- README | 2 ++ gpu.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- ppcg_options.c | 3 +++ ppcg_options.h | 1 + 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/README b/README index 6dfee9b..b786f6c 100644 --- a/README +++ b/README @@ -104,6 +104,8 @@ Since PPCG performs some scheduling, it can be difficult to predict what exactly will end up in a kernel. If you want to specify tile, grid or block sizes, you may want to run PPCG first with the defaults, examine the kernels and then run PPCG again with the desired sizes. +Instead of examining the kernels, you can also specify the option +--dump-sizes on the first run to obtain the effectively used default sizes. Compiling the generated CUDA code with nvcc diff --git a/gpu.c b/gpu.c index dda8970..ac91303 100644 --- a/gpu.c +++ b/gpu.c @@ -131,9 +131,12 @@ struct gpu_gen { /* The sequence of types for which a definition has been printed. */ struct gpu_types types; - /* tile, grid and block sizes for each kernel */ + /* User specified tile, grid and block sizes for each kernel */ isl_union_map *sizes; + /* Effectively used tile, grid and block sizes for each kernel */ + isl_union_map *used_sizes; + /* Identifier of current kernel. */ int kernel_id; /* Pointer to the current kernel. */ @@ -821,8 +824,38 @@ static void read_sizes_from_set(__isl_take isl_set *set, int *sizes, int *len) isl_set_free(set); } +/* Add the map { kernel[id] -> type[sizes] } to gen->used_sizes, + * if the option debug->dump_sizes is set. + */ +static void set_used_sizes(struct gpu_gen *gen, const char *type, int id, + int *sizes, int len) +{ + int i; + isl_space *space; + isl_map *map; + + if (!gen->options->debug->dump_sizes) + return; + + space = isl_union_map_get_space(gen->used_sizes); + space = isl_space_set_from_params(space); + space = isl_space_add_dims(space, isl_dim_set, 1); + space = isl_space_set_tuple_name(space, isl_dim_set, "kernel"); + space = isl_space_from_domain(space); + space = isl_space_add_dims(space, isl_dim_out, len); + space = isl_space_set_tuple_name(space, isl_dim_out, type); + + map = isl_map_universe(space); + map = isl_map_fix_si(map, isl_dim_in, 0, id); + for (i = 0; i < len; ++i) + map = isl_map_fix_si(map, isl_dim_out, i, sizes[i]); + + gen->used_sizes = isl_union_map_add_map(gen->used_sizes, map); +} + /* Extract user specified "tile" sizes from the "sizes" command line option, * defaulting to option->tile_size in each dimension. + * Add the effectively used sizes to gen->used_sizes. */ static void read_tile_sizes(struct gpu_gen *gen) { @@ -836,6 +869,8 @@ static void read_tile_sizes(struct gpu_gen *gen) size = extract_sizes(gen->sizes, "tile", gen->kernel_id); read_sizes_from_set(size, gen->tile_size, &gen->tile_len); + set_used_sizes(gen, "tile", gen->kernel_id, + gen->tile_size, gen->tile_len); if (gen->n_parallel > gen->tile_len) gen->n_parallel = gen->tile_len; @@ -843,6 +878,7 @@ static void read_tile_sizes(struct gpu_gen *gen) /* Extract user specified "block" sizes from the "sizes" command line option, * after filling in some potentially useful defaults. + * Add the effectively used sizes to gen->used_sizes. */ static void read_block_sizes(struct gpu_gen *gen) { @@ -868,10 +904,13 @@ static void read_block_sizes(struct gpu_gen *gen) size = extract_sizes(gen->sizes, "block", gen->kernel_id); read_sizes_from_set(size, gen->block_dim, &gen->n_block); + set_used_sizes(gen, "block", gen->kernel_id, + gen->block_dim, gen->n_block); } /* Extract user specified "grid" sizes from the "sizes" command line option, * after filling in some potentially useful defaults. + * Add the effectively used sizes to gen->used_sizes. */ static void read_grid_sizes(struct gpu_gen *gen) { @@ -891,6 +930,7 @@ static void read_grid_sizes(struct gpu_gen *gen) size = extract_sizes(gen->sizes, "grid", gen->kernel_id); read_sizes_from_set(size, gen->grid_dim, &gen->n_grid); + set_used_sizes(gen, "grid", gen->kernel_id, gen->grid_dim, gen->n_grid); } /* Extract user specified sizes from the "sizes" command line option @@ -5899,8 +5939,18 @@ int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, gen.types.n = 0; gen.types.name = NULL; + if (options->debug->dump_sizes) { + isl_space *space = isl_space_params_alloc(ctx, 0); + gen.used_sizes = isl_union_map_empty(space); + } + r = ppcg_transform(ctx, input, out, options, &generate_wrap, &gen); + if (options->debug->dump_sizes) { + isl_union_map_dump(gen.used_sizes); + isl_union_map_free(gen.used_sizes); + } + isl_union_map_free(gen.sizes); for (i = 0; i < gen.types.n; ++i) free(gen.types.name[i]); diff --git a/ppcg_options.c b/ppcg_options.c index c5d4c97..505ecb0 100644 --- a/ppcg_options.c +++ b/ppcg_options.c @@ -22,6 +22,9 @@ ISL_ARG_BOOL(struct ppcg_debug_options, dump_schedule_constraints, 0, "dump-schedule-constraints", 0, "dump schedule constraints") ISL_ARG_BOOL(struct ppcg_debug_options, dump_schedule, 0, "dump-schedule", 0, "dump isl computed schedule") +ISL_ARG_BOOL(struct ppcg_debug_options, dump_sizes, 0, + "dump-sizes", 0, + "dump effectively used per kernel tile, grid and block sizes") ISL_ARGS_END ISL_ARGS_START(struct ppcg_options, ppcg_opencl_options_args) diff --git a/ppcg_options.h b/ppcg_options.h index fd0dfa5..8fc3a49 100644 --- a/ppcg_options.h +++ b/ppcg_options.h @@ -6,6 +6,7 @@ struct ppcg_debug_options { int dump_schedule_constraints; int dump_schedule; + int dump_sizes; }; struct ppcg_options { -- 2.11.4.GIT