From 564cb4d8df213595fbdfd905f44b1775e70fee37 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Thu, 3 Oct 2013 15:40:50 +0200 Subject: [PATCH] gpu: avoid private memory if unrolling is needed and any_force_private is set That is, do not even construct a private tile in this case. Before we would construct such a private tile but then discard it afterwards inside interchange_for_unroll. This original check is however performed after all the array reference grouping has finished. If both a mapping to shared and private memory is possible then this grouping would take into account the private tile, while in the end the shared tile would get used. Signed-off-by: Sven Verdoolaege --- gpu.c | 35 ----------------------------------- gpu_group.c | 17 +++++++++++++++-- 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/gpu.c b/gpu.c index 63989aa..24b8212 100644 --- a/gpu.c +++ b/gpu.c @@ -1348,28 +1348,6 @@ static __isl_give isl_map *permutation(__isl_take isl_space *dim, return isl_map_from_basic_map(bmap); } -/* Remove the private tiles from all array reference groups, - * except for the groups of arrays that are marked force_private. - */ -static void remove_private_tiles(struct gpu_gen *gen) -{ - int i, j; - - for (i = 0; i < gen->kernel->n_array; ++i) { - struct gpu_local_array_info *local = &gen->kernel->array[i]; - - if (local->force_private) - continue; - - for (j = 0; j < local->n_group; ++j) { - struct gpu_array_ref_group *group = local->groups[j]; - - group->private_tile = - gpu_array_tile_free(group->private_tile); - } - } -} - /* Find all loops involved in any of the index expressions for any of * the private accesses that require unrolling, move them innermost * and then mark them as requiring unrolling by setting gen->first_unroll. @@ -1377,14 +1355,6 @@ static void remove_private_tiles(struct gpu_gen *gen) * we performed in check_private_group_access. Moving them innermost * is therefore a valid transformation. * - * If any of the arrays are marked force_private, however, then - * those loops may not be parallel with respect to the marked arrays. - * If any of the loops would have to be moved innermost for the - * (non forced) private accesses and if there are any force_private - * arrays, then we revert the decision to map the selected arrays - * to private memory. An alternative solution would be to expand - * the force_private arrays. - * * Loops up to gen->shared_len are generated before the mapping to * threads is applied. They should therefore be ignored. * @@ -1448,11 +1418,6 @@ static __isl_give isl_union_map *interchange_for_unroll(struct gpu_gen *gen, if (unroll[i]) return sched; - if (kernel->any_force_private) { - remove_private_tiles(gen); - return sched; - } - j = 0; for (i = 0; i < gen->shared_len; ++i) perm[i] = j++; diff --git a/gpu_group.c b/gpu_group.c index 17297a4..e27f3e5 100644 --- a/gpu_group.c +++ b/gpu_group.c @@ -807,6 +807,13 @@ static int check_requires_unroll(struct gpu_gen *gen, * Combining the injectivity of the first test with the single-valuedness * of the second test, we simply test for bijectivity. * + * If the use of the private tile requires unrolling, but some + * of the other arrays are forcibly mapped to private memory, + * then we do not allow the use of this private tile since + * we cannot move the schedule dimensions that need to be unrolled down + * without performing some kind of expansion on those arrays + * that are forcibly mapped to private memory. + * * If the array is marked force_private, then we bypass all checks * and assume we can (and should) use registers. * @@ -874,13 +881,19 @@ static int compute_group_bounds_core(struct gpu_gen *gen, return 0; } + acc = isl_map_apply_domain(acc, isl_map_copy(gen->privatization)); + requires_unroll = check_requires_unroll(gen, acc, force_private); + if (requires_unroll < 0 || + (requires_unroll && gen->kernel->any_force_private)) { + isl_map_free(acc); + return requires_unroll < 0 ? -1 : 0; + } + group->private_tile = gpu_array_tile_create(gen->ctx, n_index); if (!group->private_tile) { isl_map_free(acc); return -1; } - acc = isl_map_apply_domain(acc, isl_map_copy(gen->privatization)); - requires_unroll = check_requires_unroll(gen, acc, force_private); group->private_tile->requires_unroll = requires_unroll; if (!can_tile(acc, group->private_tile)) group->private_tile = gpu_array_tile_free(group->private_tile); -- 2.11.4.GIT