From 1fcf424ea6d22464d99e27ca87e9513c821a6052 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Sun, 7 Aug 2011 23:55:24 +0100 Subject: [PATCH] Fix code generation, when scalars are used as zero dimensional arrays Scalars can be used not only to index an array or to store loop iterators, but also to store data of a calculation. In case this happens, and this data needs to be referenced they need a special treatment. This change makes sure that the code compiles and is correct. It still copies the scalars into GPU memory and references them by pointer indirection. This is correct and needed if a scalar is not read-only, but its value is changed inside the scop and used afterwards. A possible optimization for read-only scalars is to pass them directly as function arguments. Signed-off-by: Tobias Grosser Signed-off-by: Sven Verdoolaege --- cuda.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/cuda.c b/cuda.c index 2e92500..2efa8b7 100644 --- a/cuda.c +++ b/cuda.c @@ -370,6 +370,15 @@ static void free_device_arrays(struct cuda_gen *gen) gen->array[i].name); } +/* Check if a cuda array is a scalar. A scalar is a value that is not stored + * as an array or through a pointer reference, but as single data element. At + * the moment, scalars are represented as zero dimensional arrays. + */ +static int cuda_array_is_scalar(struct cuda_array_info *array) +{ + return (array->n_index == 0); +} + static void copy_arrays_to_device(struct cuda_gen *gen) { int i; @@ -386,8 +395,15 @@ static void copy_arrays_to_device(struct cuda_gen *gen) if (empty) continue; - fprintf(gen->cuda.host_c, "cudaMemcpy(dev_%s, %s, ", - gen->array[i].name, gen->array[i].name); + fprintf(gen->cuda.host_c, "cudaMemcpy(dev_%s,", + gen->array[i].name); + + if (cuda_array_is_scalar(&(gen->array[i]))) + fprintf(gen->cuda.host_c, " &%s, ", + gen->array[i].name); + else + fprintf(gen->cuda.host_c, " %s, ", gen->array[i].name); + print_array_size(gen, gen->cuda.host_c, &gen->array[i]); fprintf(gen->cuda.host_c, ", cudaMemcpyHostToDevice);\n"); } @@ -1649,6 +1665,12 @@ static void print_access(struct cuda_gen *gen, __isl_take isl_map *access, bounds = group->shared_bound; print_array_name(gen->cuda.kernel_c, group); + + if (cuda_array_is_scalar(array)) { + isl_set_free(data_set); + return; + } + fprintf(gen->cuda.kernel_c, "["); } @@ -3399,6 +3421,11 @@ static void print_global_index(isl_ctx *ctx, FILE *out, int i; isl_printer *prn; + if (cuda_array_is_scalar(array)) { + fprintf(out, "*%s", array->name); + return; + } + fprintf(out, "%s[", array->name); for (i = 0; i + 1 < array->n_index; ++i) fprintf(out, "("); -- 2.11.4.GIT