From b7955e683b79cfb09553c3dbab500367574071b2 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Fri, 3 Apr 2015 16:45:06 +0200 Subject: [PATCH] ppcg_kernel_requires_array_argument: check if kernel acceses global memory That is, only require a kernel argument for a given array if that kernel accesses the global memory corresponding to that array rather than if the kernel accesses the array at all. In particular, if the array is mapped to private or shared memory without getting copied in or out, then no corresponding kernel argument is needed. Signed-off-by: Sven Verdoolaege --- gpu.c | 42 ++++++++++++++++++++++++++++++------------ gpu.h | 3 +++ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/gpu.c b/gpu.c index 97e6d6b..ab0f45d 100644 --- a/gpu.c +++ b/gpu.c @@ -938,6 +938,29 @@ static void check_shared_memory_bound(struct ppcg_kernel *kernel) isl_val_free(left); } +/* Mark all arrays of "kernel" that have an array reference group + * that is not mapped to private or shared memory as + * accessing the corresponding global device memory. + */ +static void mark_global_arrays(struct ppcg_kernel *kernel) +{ + int i, j; + + for (i = 0; i < kernel->n_array; ++i) { + struct gpu_local_array_info *local = &kernel->array[i]; + + if (local->global) + continue; + for (j = 0; j < local->n_group; ++j) { + if (gpu_array_ref_group_tile(local->groups[j])) + continue; + + local->global = 1; + break; + } + } +} + /* Compute a tiling for all the array reference groups in "kernel". */ static void compute_group_tilings(struct ppcg_kernel *kernel) @@ -1321,21 +1344,11 @@ static struct ppcg_kernel *ppcg_kernel_create_local_arrays( /* Does "kernel" need to be passed an argument corresponding to array "i"? * - * If the array is not accessed by the kernel at all, then it does - * not need to be passed as an argument. + * The argument is only needed if the kernel accesses this device memory. */ int ppcg_kernel_requires_array_argument(struct ppcg_kernel *kernel, int i) { - isl_space *space; - isl_set *arr; - int empty; - - space = isl_space_copy(kernel->array[i].array->space); - arr = isl_union_set_extract_set(kernel->arrays, space); - empty = isl_set_plain_is_empty(arr); - isl_set_free(arr); - - return empty < 0 ? empty : !empty; + return kernel->array[i].global; } /* Find the element in gen->stmt that has the given "id". @@ -3119,6 +3132,8 @@ static __isl_give isl_schedule_node *add_copies_group_private( return gpu_tree_move_up_to_kernel(node); } + group->local_array->global = 1; + from_access = create_from_access(kernel->ctx, group, read); space = isl_space_domain(isl_multi_aff_get_space(from_access)); access = isl_union_map_preimage_range_multi_aff(access, from_access); @@ -3265,6 +3280,8 @@ static __isl_give isl_schedule_node *add_copies_group_shared( return gpu_tree_move_up_to_kernel(node); } + group->local_array->global = 1; + from_access = create_from_access(kernel->ctx, group, read); tile = gpu_array_ref_group_tile(group); @@ -3666,6 +3683,7 @@ static __isl_give isl_schedule_node *create_kernel(struct gpu_gen *gen, isl_set_free(host_domain); check_shared_memory_bound(kernel); + mark_global_arrays(kernel); compute_group_tilings(kernel); node = gpu_tree_move_down_to_thread(node, kernel->core); diff --git a/gpu.h b/gpu.h index 094b1e3..bcac481 100644 --- a/gpu.h +++ b/gpu.h @@ -60,6 +60,8 @@ struct gpu_array_info { * The "n_group" "groups" are the reference groups associated to the array. * If "force_private" is set, then the array (in practice a scalar) * must be mapped to a register. + * "global" is set if the global device memory corresponding + * to this array is accessed by the kernel. * For each index i with 0 <= i < n_index, * bound[i] is equal to array->bound[i] specialized to the current kernel. */ @@ -70,6 +72,7 @@ struct gpu_local_array_info { struct gpu_array_ref_group **groups; int force_private; + int global; unsigned n_index; isl_pw_aff_list *bound; -- 2.11.4.GIT