From 1fcf424ea6d22464d99e27ca87e9513c821a6052 Mon Sep 17 00:00:00 2001
From: Tobias Grosser <tobias@grosser.es>
Date: Sun, 7 Aug 2011 23:55:24 +0100
Subject: [PATCH] Fix code generation, when scalars are used as zero
 dimensional arrays

Scalars can be used not only to index an array or to store loop
iterators, but also to store data of a calculation. In case this
happens, and this data needs to be referenced they need a special
treatment.

This change makes sure that the code compiles and is correct. It still
copies the scalars into GPU memory and references them by pointer
indirection. This is correct and needed if a scalar is not read-only,
but its value is changed inside the scop and used afterwards. A possible
optimization for read-only scalars is to pass them directly as function
arguments.

Signed-off-by: Tobias Grosser <tobias@grosser.es>
Signed-off-by: Sven Verdoolaege <skimo@kotnet.org>
---
 cuda.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/cuda.c b/cuda.c
index 2e92500..2efa8b7 100644
--- a/cuda.c
+++ b/cuda.c
@@ -370,6 +370,15 @@ static void free_device_arrays(struct cuda_gen *gen)
 			gen->array[i].name);
 }
 
+/* Check if a cuda array is a scalar.  A scalar is a value that is not stored
+ * as an array or through a pointer reference, but as single data element.  At
+ * the moment, scalars are represented as zero dimensional arrays.
+ */
+static int cuda_array_is_scalar(struct cuda_array_info *array)
+{
+	return (array->n_index == 0);
+}
+
 static void copy_arrays_to_device(struct cuda_gen *gen)
 {
 	int i;
@@ -386,8 +395,15 @@ static void copy_arrays_to_device(struct cuda_gen *gen)
 		if (empty)
 			continue;
 
-		fprintf(gen->cuda.host_c, "cudaMemcpy(dev_%s, %s, ",
-			gen->array[i].name, gen->array[i].name);
+		fprintf(gen->cuda.host_c, "cudaMemcpy(dev_%s,",
+			gen->array[i].name);
+
+		if (cuda_array_is_scalar(&(gen->array[i])))
+			fprintf(gen->cuda.host_c, " &%s, ",
+				gen->array[i].name);
+		else
+			fprintf(gen->cuda.host_c, " %s, ", gen->array[i].name);
+
 		print_array_size(gen, gen->cuda.host_c, &gen->array[i]);
 		fprintf(gen->cuda.host_c, ", cudaMemcpyHostToDevice);\n");
 	}
@@ -1649,6 +1665,12 @@ static void print_access(struct cuda_gen *gen, __isl_take isl_map *access,
 			bounds = group->shared_bound;
 
 		print_array_name(gen->cuda.kernel_c, group);
+
+		if (cuda_array_is_scalar(array)) {
+			isl_set_free(data_set);
+			return;
+		}
+
 		fprintf(gen->cuda.kernel_c, "[");
 	}
 
@@ -3399,6 +3421,11 @@ static void print_global_index(isl_ctx *ctx, FILE *out,
 	int i;
 	isl_printer *prn;
 
+	if (cuda_array_is_scalar(array)) {
+		fprintf(out, "*%s", array->name);
+		return;
+	}
+
 	fprintf(out, "%s[", array->name);
 	for (i = 0; i + 1 < array->n_index; ++i)
 		fprintf(out, "(");
-- 
2.11.4.GIT