From c1615a36d5fceacb3f22951fb33aec719d968c88 Mon Sep 17 00:00:00 2001
From: Sven Verdoolaege <skimo@kotnet.org>
Date: Mon, 22 Jul 2013 16:54:52 +0200
Subject: [PATCH] move copying of code before and after scop from print_cuda to
 generate_gpu

This copying should not depend on the output format.

Signed-off-by: Sven Verdoolaege <skimo@kotnet.org>
---
 cuda.c        | 63 +++++++++++++++++++++++++++++++++--------------------------
 cuda_common.c | 15 +++-----------
 cuda_common.h |  7 -------
 gpu.c         | 46 +++++++++++++++++++++++++++++++++++++------
 gpu.h         |  7 +++++--
 5 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/cuda.c b/cuda.c
index fd61c1c..77a1865 100644
--- a/cuda.c
+++ b/cuda.c
@@ -602,30 +602,15 @@ static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p,
 	return p;
 }
 
-int generate_cuda(isl_ctx *ctx, struct ppcg_scop *scop,
-	struct ppcg_options *options, const char *input)
+/* Given a gpu_prog "prog" and the corresponding transformed AST
+ * "tree", print the entire CUDA code to "p".
+ */
+static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p,
+	struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
+	void *user)
 {
-	struct cuda_info cuda;
-	struct gpu_prog *prog;
-	isl_ast_node *tree;
-	isl_printer *p;
-
-	if (!scop)
-		return -1;
-
-	prog = gpu_prog_alloc(ctx, scop);
-	if (!prog)
-		return -1;
+	struct cuda_info *cuda = user;
 
-	tree = generate_gpu(ctx, prog, options);
-
-	cuda.start = scop->start;
-	cuda.end = scop->end;
-	cuda_open_files(&cuda, input);
-
-	p = isl_printer_to_file(ctx, cuda.host_c);
-	p = isl_printer_set_output_format(p, ISL_FORMAT_C);
-	p = ppcg_print_exposed_declarations(p, scop);
 	p = ppcg_start_block(p);
 
 	p = print_cuda_macros(p);
@@ -634,18 +619,40 @@ int generate_cuda(isl_ctx *ctx, struct ppcg_scop *scop,
 	p = allocate_device_arrays(p, prog);
 	p = copy_arrays_to_device(p, prog);
 
-	p = print_host_code(p, prog, tree, &cuda);
-	isl_ast_node_free(tree);
+	p = print_host_code(p, prog, tree, cuda);
 
 	p = copy_arrays_from_device(p, prog);
 	p = free_device_arrays(p, prog);
 
 	p = ppcg_end_block(p);
-	isl_printer_free(p);
 
-	cuda_close_files(&cuda);
+	return p;
+}
 
-	gpu_prog_free(prog);
+/* Generate CUDA code for the given "scop", with the given "options".
+ * The names of the output files are derived from "input".
+ *
+ * We let generate_gpu do all the hard work and then let it call
+ * us back for printing the AST in print_cuda.
+ *
+ * To prepare for this printing, we first open the output files
+ * and we close them after generate_gpu has finished.
+ */
+int generate_cuda(isl_ctx *ctx, struct ppcg_scop *scop,
+	struct ppcg_options *options, const char *input)
+{
+	struct cuda_info cuda;
+	int r;
+
+	if (!scop)
+		return -1;
+
+	cuda_open_files(&cuda, input);
+
+	r = generate_gpu(ctx, input, cuda.host_c, scop, options,
+			&print_cuda, &cuda);
+
+	cuda_close_files(&cuda);
 
-	return 0;
+	return r;
 }
diff --git a/cuda_common.c b/cuda_common.c
index f4f949d..7a1c22e 100644
--- a/cuda_common.c
+++ b/cuda_common.c
@@ -13,12 +13,9 @@
 #include <string.h>
 
 #include "cuda_common.h"
-#include "rewrite.h"
 
-/* Open the "input" file for reading and open the host .cu file
- * and the kernel .hu and .cu files for writing.
- * Add the necessary includes and copy all code from the input
- * file up to the openscop pragma to the host .cu file.
+/* Open the host .cu file and the kernel .hu and .cu files for writing.
+ * Add the necessary includes.
  */
 void cuda_open_files(struct cuda_info *info, const char *input)
 {
@@ -48,18 +45,12 @@ void cuda_open_files(struct cuda_info *info, const char *input)
     fprintf(info->host_c, "#include \"%s\"\n", name);
     fprintf(info->kernel_c, "#include \"%s\"\n", name);
     fprintf(info->kernel_h, "#include \"cuda.h\"\n\n");
-
-    info->input = fopen(input, "r");
-    copy(info->input, info->host_c, 0, info->start);
 }
 
-/* Copy all code starting at the endscop pragma from the input
- * file to the host .cu file and close all input and output files.
+/* Close all output files.
  */
 void cuda_close_files(struct cuda_info *info)
 {
-    copy(info->input, info->host_c, info->end, -1);
-    fclose(info->input);
     fclose(info->kernel_c);
     fclose(info->kernel_h);
     fclose(info->host_c);
diff --git a/cuda_common.h b/cuda_common.h
index 99f9bce..2a7db95 100644
--- a/cuda_common.h
+++ b/cuda_common.h
@@ -3,14 +3,7 @@
 
 #include <stdio.h>
 
-/* start and end are file offsets of the program text that corresponds
- * to the scop being transformed.
- */
 struct cuda_info {
-	unsigned start;
-	unsigned end;
-
-	FILE *input;
 	FILE *host_c;
 	FILE *kernel_c;
 	FILE *kernel_h;
diff --git a/gpu.c b/gpu.c
index d4dbf17..025dfa1 100644
--- a/gpu.c
+++ b/gpu.c
@@ -27,6 +27,8 @@
 #include "gpu.h"
 #include "schedule.h"
 #include "ppcg_options.h"
+#include "print.h"
+#include "rewrite.h"
 
 /* The fields stride, shift and shift_map only contain valid information
  * if shift != NULL.
@@ -4912,7 +4914,14 @@ static struct gpu_stmt *extract_stmts(isl_ctx *ctx, struct ppcg_scop *scop,
 }
 
 /* Replace the scop in the "input" file by equivalent code
- * that uses the GPU.  "scop" is assumed to correspond to this scop.
+ * that uses the GPU and print the result to "out".
+ * "scop" is assumed to correspond to this scop.
+ * The code before the scop is first copied to "out",
+ * then the transformed scop is printed and finally
+ * the code after the scop is copied to "out".
+ * After generating an AST for the transformed scop as explained below,
+ * we call "print" to print the AST in the desired output format
+ * to a printer hooked up to "out".
  *
  * We first compute a schedule that respects the dependences
  * of the original program and select the outermost band
@@ -4953,15 +4962,30 @@ static struct gpu_stmt *extract_stmts(isl_ctx *ctx, struct ppcg_scop *scop,
  * to h%d parameters and the T1P loops to the block dimensions.
  * Finally, we generate code for the remaining loops in a similar fashion.
  */
-__isl_give isl_ast_node *generate_gpu(isl_ctx *ctx, struct gpu_prog *prog,
-	struct ppcg_options *options)
+int generate_gpu(isl_ctx *ctx, const char *input, FILE *out,
+	struct ppcg_scop *scop, struct ppcg_options *options,
+	__isl_give isl_printer *(*print)(__isl_take isl_printer *p,
+		struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
+		void *user), void *user)
 {
-	isl_union_map *sched;
 	struct gpu_gen gen;
+	struct gpu_prog *prog;
 	isl_ast_node *tree;
+	isl_printer *p;
+	FILE *in;
+
+	if (!scop)
+		return -1;
 
+	in = fopen(input, "r");
+	copy(in, out, 0, scop->start);
+
+	prog = gpu_prog_alloc(ctx, scop);
 	if (!prog)
-		return NULL;
+		return -1;
+
+	p = isl_printer_to_file(ctx, out);
+	p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 
 	gen.ctx = ctx;
 	gen.prog = prog;
@@ -4973,10 +4997,20 @@ __isl_give isl_ast_node *generate_gpu(isl_ctx *ctx, struct gpu_prog *prog,
 
 	gen.kernel_id = 0;
 	tree = generate_host_code(&gen);
+	p = ppcg_print_exposed_declarations(p, prog->scop);
+	p = print(p, prog, tree, user);
+	isl_ast_node_free(tree);
 
 	clear_gpu_gen(&gen);
 
-	return tree;
+	isl_printer_free(p);
+
+	gpu_prog_free(prog);
+
+	copy(in, out, scop->end, -1);
+	fclose(in);
+
+	return p ? 0 : -1;
 }
 
 struct gpu_prog *gpu_prog_alloc(isl_ctx *ctx, struct ppcg_scop *scop)
diff --git a/gpu.h b/gpu.h
index cd84466..f93c7da 100644
--- a/gpu.h
+++ b/gpu.h
@@ -206,7 +206,10 @@ int gpu_array_is_read_only_scalar(struct gpu_array_info *array);
 struct gpu_prog *gpu_prog_alloc(isl_ctx *ctx, struct ppcg_scop *scop);
 void *gpu_prog_free(struct gpu_prog *prog);
 
-__isl_give isl_ast_node *generate_gpu(isl_ctx *ctx, struct gpu_prog *prog,
-	struct ppcg_options *options);
+int generate_gpu(isl_ctx *ctx, const char *input, FILE *out,
+	struct ppcg_scop *scop, struct ppcg_options *options,
+	__isl_give isl_printer *(*print)(__isl_take isl_printer *p,
+		struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
+		void *user), void *user);
 
 #endif
-- 
2.11.4.GIT