From eade72083db6e670b7d2bf04dd118542111dbdb7 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Wed, 9 Oct 2013 11:46:20 +0200 Subject: [PATCH] gpu: only use shared memory if kernel will be mapped to more than one thread Originally, we could in theory end up mapping data to shared memory if the kernel is going to be mapped to a single thread (in particular, if the number of block dimensions is zero), because the check for coalescing that we perform is meaningless in this case and may return 0, making it look like it may be worthwhile to map the data to shared memory to enable coalescing. Simply do not even consider mapping anything to shared memory for kernels that are going to be mapped to a single thread. Signed-off-by: Sven Verdoolaege --- gpu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gpu.c b/gpu.c index 3854a81..5c0f80e 100644 --- a/gpu.c +++ b/gpu.c @@ -2233,7 +2233,8 @@ static __isl_give isl_map *next(__isl_take isl_space *domain_dim, int pos) * wrapped over the last thread index results in incrementing * the last array index. * - * This function is only called for access relations without reuse. + * This function is only called for access relations without reuse and + * kernels with at least one block dimension. */ static int access_is_coalesced(struct gpu_gen *gen, __isl_keep isl_union_map *access) @@ -2830,7 +2831,7 @@ static int compute_group_bounds_core(struct gpu_gen *gen, int no_reuse; isl_map *acc; int force_private = group->array->force_private; - int use_shared = gen->options->use_shared_memory; + int use_shared = gen->options->use_shared_memory && gen->n_block > 0; int use_private = force_private || gen->options->use_private_memory; if (!use_shared && !use_private) -- 2.11.4.GIT