From 37be9977d492915ec551f20bfd40c630d0672f2e Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Mon, 2 Jan 2017 15:48:22 +0100 Subject: [PATCH] gpu_group_references: do not construct shared tile on force_private When an array is marked force_private, compute_group_bounds_core always constructs a private tile. It may however also construct a shared tile, even if the array is marked force_private. This did not cause problems before because the private tile would be selected instead of the shared tile. However, since b926320 (gpu_array_ref_group_type: take tile with minimal depth if both types exist, Mon Jun 29 11:17:53 2015 +0200), a shared tile may end up getting selected if it has a smaller depth. In case of a force_private array, the computed depth of a shared tile may not be meaningful, so do not construct the shared tile in the first place to avoid this problem. Reported-by: Michael Kruse Signed-off-by: Sven Verdoolaege --- gpu_group.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpu_group.c b/gpu_group.c index 2e6dd0d..f32e6a1 100644 --- a/gpu_group.c +++ b/gpu_group.c @@ -1082,7 +1082,7 @@ static __isl_give isl_map *shared_access(struct gpu_array_ref_group *group, * that are forcibly mapped to private memory. * * If the array is marked force_private, then we bypass all checks - * and assume we can (and should) use registers. + * and assume we can (and should) use registers only. * * If it turns out we can (or have to) use registers, we compute * the private memory tile size using can_tile, after introducing a dependence @@ -1097,7 +1097,7 @@ static int compute_group_bounds_core(struct ppcg_kernel *kernel, int no_reuse, coalesced; isl_map *acc; int force_private = group->local_array->force_private; - int use_shared = kernel->options->use_shared_memory && + int use_shared = !force_private && kernel->options->use_shared_memory && data->n_thread > 0; int use_private = force_private || kernel->options->use_private_memory; int r = 0; -- 2.11.4.GIT