From c1615a36d5fceacb3f22951fb33aec719d968c88 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Mon, 22 Jul 2013 16:54:52 +0200 Subject: [PATCH] move copying of code before and after scop from print_cuda to generate_gpu This copying should not depend on the output format. Signed-off-by: Sven Verdoolaege --- cuda.c | 63 +++++++++++++++++++++++++++++++++-------------------------- cuda_common.c | 15 +++----------- cuda_common.h | 7 ------- gpu.c | 46 +++++++++++++++++++++++++++++++++++++------ gpu.h | 7 +++++-- 5 files changed, 83 insertions(+), 55 deletions(-) diff --git a/cuda.c b/cuda.c index fd61c1c..77a1865 100644 --- a/cuda.c +++ b/cuda.c @@ -602,30 +602,15 @@ static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p, return p; } -int generate_cuda(isl_ctx *ctx, struct ppcg_scop *scop, - struct ppcg_options *options, const char *input) +/* Given a gpu_prog "prog" and the corresponding transformed AST + * "tree", print the entire CUDA code to "p". + */ +static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p, + struct gpu_prog *prog, __isl_keep isl_ast_node *tree, + void *user) { - struct cuda_info cuda; - struct gpu_prog *prog; - isl_ast_node *tree; - isl_printer *p; - - if (!scop) - return -1; - - prog = gpu_prog_alloc(ctx, scop); - if (!prog) - return -1; + struct cuda_info *cuda = user; - tree = generate_gpu(ctx, prog, options); - - cuda.start = scop->start; - cuda.end = scop->end; - cuda_open_files(&cuda, input); - - p = isl_printer_to_file(ctx, cuda.host_c); - p = isl_printer_set_output_format(p, ISL_FORMAT_C); - p = ppcg_print_exposed_declarations(p, scop); p = ppcg_start_block(p); p = print_cuda_macros(p); @@ -634,18 +619,40 @@ int generate_cuda(isl_ctx *ctx, struct ppcg_scop *scop, p = allocate_device_arrays(p, prog); p = copy_arrays_to_device(p, prog); - p = print_host_code(p, prog, tree, &cuda); - isl_ast_node_free(tree); + p = print_host_code(p, prog, tree, cuda); p = copy_arrays_from_device(p, prog); p = free_device_arrays(p, prog); p = ppcg_end_block(p); - isl_printer_free(p); - cuda_close_files(&cuda); + return p; +} - gpu_prog_free(prog); +/* Generate CUDA code for the given "scop", with the given "options". + * The names of the output files are derived from "input". + * + * We let generate_gpu do all the hard work and then let it call + * us back for printing the AST in print_cuda. + * + * To prepare for this printing, we first open the output files + * and we close them after generate_gpu has finished. + */ +int generate_cuda(isl_ctx *ctx, struct ppcg_scop *scop, + struct ppcg_options *options, const char *input) +{ + struct cuda_info cuda; + int r; + + if (!scop) + return -1; + + cuda_open_files(&cuda, input); + + r = generate_gpu(ctx, input, cuda.host_c, scop, options, + &print_cuda, &cuda); + + cuda_close_files(&cuda); - return 0; + return r; } diff --git a/cuda_common.c b/cuda_common.c index f4f949d..7a1c22e 100644 --- a/cuda_common.c +++ b/cuda_common.c @@ -13,12 +13,9 @@ #include #include "cuda_common.h" -#include "rewrite.h" -/* Open the "input" file for reading and open the host .cu file - * and the kernel .hu and .cu files for writing. - * Add the necessary includes and copy all code from the input - * file up to the openscop pragma to the host .cu file. +/* Open the host .cu file and the kernel .hu and .cu files for writing. + * Add the necessary includes. */ void cuda_open_files(struct cuda_info *info, const char *input) { @@ -48,18 +45,12 @@ void cuda_open_files(struct cuda_info *info, const char *input) fprintf(info->host_c, "#include \"%s\"\n", name); fprintf(info->kernel_c, "#include \"%s\"\n", name); fprintf(info->kernel_h, "#include \"cuda.h\"\n\n"); - - info->input = fopen(input, "r"); - copy(info->input, info->host_c, 0, info->start); } -/* Copy all code starting at the endscop pragma from the input - * file to the host .cu file and close all input and output files. +/* Close all output files. */ void cuda_close_files(struct cuda_info *info) { - copy(info->input, info->host_c, info->end, -1); - fclose(info->input); fclose(info->kernel_c); fclose(info->kernel_h); fclose(info->host_c); diff --git a/cuda_common.h b/cuda_common.h index 99f9bce..2a7db95 100644 --- a/cuda_common.h +++ b/cuda_common.h @@ -3,14 +3,7 @@ #include -/* start and end are file offsets of the program text that corresponds - * to the scop being transformed. - */ struct cuda_info { - unsigned start; - unsigned end; - - FILE *input; FILE *host_c; FILE *kernel_c; FILE *kernel_h; diff --git a/gpu.c b/gpu.c index d4dbf17..025dfa1 100644 --- a/gpu.c +++ b/gpu.c @@ -27,6 +27,8 @@ #include "gpu.h" #include "schedule.h" #include "ppcg_options.h" +#include "print.h" +#include "rewrite.h" /* The fields stride, shift and shift_map only contain valid information * if shift != NULL. @@ -4912,7 +4914,14 @@ static struct gpu_stmt *extract_stmts(isl_ctx *ctx, struct ppcg_scop *scop, } /* Replace the scop in the "input" file by equivalent code - * that uses the GPU. "scop" is assumed to correspond to this scop. + * that uses the GPU and print the result to "out". + * "scop" is assumed to correspond to this scop. + * The code before the scop is first copied to "out", + * then the transformed scop is printed and finally + * the code after the scop is copied to "out". + * After generating an AST for the transformed scop as explained below, + * we call "print" to print the AST in the desired output format + * to a printer hooked up to "out". * * We first compute a schedule that respects the dependences * of the original program and select the outermost band @@ -4953,15 +4962,30 @@ static struct gpu_stmt *extract_stmts(isl_ctx *ctx, struct ppcg_scop *scop, * to h%d parameters and the T1P loops to the block dimensions. * Finally, we generate code for the remaining loops in a similar fashion. */ -__isl_give isl_ast_node *generate_gpu(isl_ctx *ctx, struct gpu_prog *prog, - struct ppcg_options *options) +int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, + struct ppcg_scop *scop, struct ppcg_options *options, + __isl_give isl_printer *(*print)(__isl_take isl_printer *p, + struct gpu_prog *prog, __isl_keep isl_ast_node *tree, + void *user), void *user) { - isl_union_map *sched; struct gpu_gen gen; + struct gpu_prog *prog; isl_ast_node *tree; + isl_printer *p; + FILE *in; + + if (!scop) + return -1; + in = fopen(input, "r"); + copy(in, out, 0, scop->start); + + prog = gpu_prog_alloc(ctx, scop); if (!prog) - return NULL; + return -1; + + p = isl_printer_to_file(ctx, out); + p = isl_printer_set_output_format(p, ISL_FORMAT_C); gen.ctx = ctx; gen.prog = prog; @@ -4973,10 +4997,20 @@ __isl_give isl_ast_node *generate_gpu(isl_ctx *ctx, struct gpu_prog *prog, gen.kernel_id = 0; tree = generate_host_code(&gen); + p = ppcg_print_exposed_declarations(p, prog->scop); + p = print(p, prog, tree, user); + isl_ast_node_free(tree); clear_gpu_gen(&gen); - return tree; + isl_printer_free(p); + + gpu_prog_free(prog); + + copy(in, out, scop->end, -1); + fclose(in); + + return p ? 0 : -1; } struct gpu_prog *gpu_prog_alloc(isl_ctx *ctx, struct ppcg_scop *scop) diff --git a/gpu.h b/gpu.h index cd84466..f93c7da 100644 --- a/gpu.h +++ b/gpu.h @@ -206,7 +206,10 @@ int gpu_array_is_read_only_scalar(struct gpu_array_info *array); struct gpu_prog *gpu_prog_alloc(isl_ctx *ctx, struct ppcg_scop *scop); void *gpu_prog_free(struct gpu_prog *prog); -__isl_give isl_ast_node *generate_gpu(isl_ctx *ctx, struct gpu_prog *prog, - struct ppcg_options *options); +int generate_gpu(isl_ctx *ctx, const char *input, FILE *out, + struct ppcg_scop *scop, struct ppcg_options *options, + __isl_give isl_printer *(*print)(__isl_take isl_printer *p, + struct gpu_prog *prog, __isl_keep isl_ast_node *tree, + void *user), void *user); #endif -- 2.11.4.GIT