From 96a778ed697f2ca51f92ff737a624dd72440f65e Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Sat, 26 Nov 2011 16:20:03 +0100 Subject: [PATCH] allow specification of maximal amount of shared memory Signed-off-by: Sven Verdoolaege --- cuda.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ppcg_options.c | 2 ++ ppcg_options.h | 3 +++ 3 files changed, 71 insertions(+) diff --git a/cuda.c b/cuda.c index 9ad46c3..fbb9ffe 100644 --- a/cuda.c +++ b/cuda.c @@ -80,6 +80,8 @@ struct cuda_array_info { isl_space *dim; /* Element type. */ char *type; + /* Element size. */ + int size; /* Name of the array. */ char *name; /* Number of indices. */ @@ -261,6 +263,7 @@ static int extract_array_info(__isl_take isl_set *array, void *user) assert(pa); gen->array[gen->n_array].type = strdup(pa->element_type); + gen->array[gen->n_array].size = pa->element_size; if (n_index == 0) { isl_set *space; @@ -3235,6 +3238,68 @@ static void compute_private_size(struct cuda_gen *gen) } } +/* Compute the size of the tile specified by the list "bound" of n_index + * cuda_array_bounds in number of elements and put the result in *size. + */ +static void tile_size(unsigned n_index, struct cuda_array_bound *bound, + isl_int *size) +{ + int i; + + isl_int_set_si(*size, 1); + + for (i = 0; i < n_index; ++i) + isl_int_mul(*size, *size, bound[i].size); +} + +/* If max_shared_memory is not set to infinity (-1), then make + * sure that the total amount of shared memory required by the + * array reference groups mapped to shared memory is no larger + * than this maximum. + * + * We apply a greedy approach and discard (keep in global memory) + * those groups that would result in a total memory size that + * is larger than the maximum. + */ +static void check_shared_memory_bound(struct cuda_gen *gen) +{ + int i, j; + isl_int left, size; + + if (gen->options->max_shared_memory < 0) + return; + + isl_int_init(left); + isl_int_init(size); + isl_int_set_si(left, gen->options->max_shared_memory); + + for (i = 0; i < gen->n_array; ++i) { + struct cuda_array_info *array = &gen->array[i]; + + for (j = 0; j < array->n_group; ++j) { + struct cuda_array_ref_group *group; + + group = array->groups[j]; + if (!group->shared_bound) + continue; + + tile_size(array->n_index, group->shared_bound, &size); + isl_int_mul_ui(size, size, array->size); + + if (isl_int_le(size, left)) { + isl_int_sub(left, left, size); + continue; + } + + free_bound_list(group->shared_bound, array->n_index); + group->shared_bound = NULL; + } + } + + isl_int_clear(size); + isl_int_clear(left); +} + /* Fill up the groups array with singleton groups, i.e., one group * per reference, initializing the array, access, write and refs fields. * In particular the access field is initialized to the scheduled @@ -3801,6 +3866,7 @@ static void print_host_user(struct gpucode_info *code, gen->privatization = compute_privatization(gen); group_references(gen); compute_private_size(gen); + check_shared_memory_bound(gen); localize_bounds(gen, host_domain); gen->local_sched = interchange_for_unroll(gen, gen->local_sched); diff --git a/ppcg_options.c b/ppcg_options.c index e08508f..f5830d2 100644 --- a/ppcg_options.c +++ b/ppcg_options.c @@ -23,4 +23,6 @@ ISL_ARG_STR(struct ppcg_options, ctx, 0, "ctx", "context", NULL, ISL_ARG_INT(struct ppcg_options, tile_size, 'S', "tile-size", "size", 32, NULL) ISL_ARG_STR(struct ppcg_options, sizes, 0, "sizes", "sizes", NULL, "Per kernel tile, grid and block sizes") +ISL_ARG_INT(struct ppcg_options, max_shared_memory, 0, + "max-shared-memory", "size", 8192, "maximal amount of shared memory") ISL_ARGS_END diff --git a/ppcg_options.h b/ppcg_options.h index e3efa5a..1f21c1d 100644 --- a/ppcg_options.h +++ b/ppcg_options.h @@ -17,6 +17,9 @@ struct ppcg_options { /* Take advantage of shared memory. */ int use_shared_memory; + + /* Maximal amount of shared memory. */ + int max_shared_memory; }; ISL_ARG_DECL(ppcg_options, struct ppcg_options, ppcg_options_args) -- 2.11.4.GIT