2 * Copyright 2012 Ecole Normale Superieure
4 * Use of this software is governed by the GNU LGPLv2.1 license
6 * Written by Sven Verdoolaege,
7 * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
13 #include "cuda_common.h"
16 #include "pet_printer.h"
20 static __isl_give isl_printer
*print_cuda_macros(__isl_take isl_printer
*p
)
23 "#define cudaCheckReturn(ret) \\\n"
25 " cudaError_t e = (ret); \\\n"
26 " if (e != cudaSuccess) { \\\n"
27 " fprintf(stderr, \"CUDA error: %s\\n\", "
28 "cudaGetErrorString(e)); \\\n"
29 " fflush(stderr); \\\n"
31 " assert(e == cudaSuccess); \\\n"
33 "#define cudaCheckKernel() \\\n"
35 " cudaError_t e = cudaGetLastError(); \\\n"
36 " cudaCheckReturn(e); \\\n"
39 p
= isl_printer_print_str(p
, macros
);
43 static __isl_give isl_printer
*print_array_size(__isl_take isl_printer
*prn
,
44 struct gpu_array_info
*array
)
48 for (i
= 0; i
< array
->n_index
; ++i
) {
49 prn
= isl_printer_print_str(prn
, "(");
50 prn
= isl_printer_print_pw_aff(prn
, array
->bound
[i
]);
51 prn
= isl_printer_print_str(prn
, ") * ");
53 prn
= isl_printer_print_str(prn
, "sizeof(");
54 prn
= isl_printer_print_str(prn
, array
->type
);
55 prn
= isl_printer_print_str(prn
, ")");
60 static __isl_give isl_printer
*declare_device_arrays(__isl_take isl_printer
*p
,
61 struct gpu_prog
*prog
)
65 for (i
= 0; i
< prog
->n_array
; ++i
) {
66 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
68 p
= isl_printer_start_line(p
);
69 p
= isl_printer_print_str(p
, prog
->array
[i
].type
);
70 p
= isl_printer_print_str(p
, " *dev_");
71 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
72 p
= isl_printer_print_str(p
, ";");
73 p
= isl_printer_end_line(p
);
75 p
= isl_printer_start_line(p
);
76 p
= isl_printer_end_line(p
);
80 static __isl_give isl_printer
*allocate_device_arrays(
81 __isl_take isl_printer
*p
, struct gpu_prog
*prog
)
85 for (i
= 0; i
< prog
->n_array
; ++i
) {
86 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
88 p
= isl_printer_start_line(p
);
89 p
= isl_printer_print_str(p
,
90 "cudaCheckReturn(cudaMalloc((void **) &dev_");
91 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
92 p
= isl_printer_print_str(p
, ", ");
93 p
= print_array_size(p
, &prog
->array
[i
]);
94 p
= isl_printer_print_str(p
, "));");
95 p
= isl_printer_end_line(p
);
97 p
= isl_printer_start_line(p
);
98 p
= isl_printer_end_line(p
);
102 static __isl_give isl_printer
*copy_arrays_to_device(__isl_take isl_printer
*p
,
103 struct gpu_prog
*prog
)
107 for (i
= 0; i
< prog
->n_array
; ++i
) {
112 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
115 dim
= isl_space_copy(prog
->array
[i
].dim
);
116 read_i
= isl_union_set_extract_set(prog
->copy_in
, dim
);
117 empty
= isl_set_fast_is_empty(read_i
);
118 isl_set_free(read_i
);
122 p
= isl_printer_print_str(p
, "cudaCheckReturn(cudaMemcpy(dev_");
123 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
124 p
= isl_printer_print_str(p
, ", ");
126 if (gpu_array_is_scalar(&prog
->array
[i
]))
127 p
= isl_printer_print_str(p
, "&");
128 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
129 p
= isl_printer_print_str(p
, ", ");
131 p
= print_array_size(p
, &prog
->array
[i
]);
132 p
= isl_printer_print_str(p
, ", cudaMemcpyHostToDevice));");
133 p
= isl_printer_end_line(p
);
135 p
= isl_printer_start_line(p
);
136 p
= isl_printer_end_line(p
);
140 static void print_reverse_list(FILE *out
, int len
, int *list
)
148 for (i
= 0; i
< len
; ++i
) {
151 fprintf(out
, "%d", list
[len
- 1 - i
]);
156 /* Print the effective grid size as a list of the sizes in each
157 * dimension, from innermost to outermost.
159 static __isl_give isl_printer
*print_grid_size(__isl_take isl_printer
*p
,
160 struct ppcg_kernel
*kernel
)
165 dim
= isl_multi_pw_aff_dim(kernel
->grid_size
, isl_dim_set
);
169 p
= isl_printer_print_str(p
, "(");
170 for (i
= dim
- 1; i
>= 0; --i
) {
173 bound
= isl_multi_pw_aff_get_pw_aff(kernel
->grid_size
, i
);
174 p
= isl_printer_print_pw_aff(p
, bound
);
175 isl_pw_aff_free(bound
);
178 p
= isl_printer_print_str(p
, ", ");
181 p
= isl_printer_print_str(p
, ")");
186 /* Print the grid definition.
188 static __isl_give isl_printer
*print_grid(__isl_take isl_printer
*p
,
189 struct ppcg_kernel
*kernel
)
191 p
= isl_printer_start_line(p
);
192 p
= isl_printer_print_str(p
, "dim3 k");
193 p
= isl_printer_print_int(p
, kernel
->id
);
194 p
= isl_printer_print_str(p
, "_dimGrid");
195 p
= print_grid_size(p
, kernel
);
196 p
= isl_printer_print_str(p
, ";");
197 p
= isl_printer_end_line(p
);
202 /* Print the arguments to a kernel declaration or call. If "types" is set,
203 * then print a declaration (including the types of the arguments).
205 * The arguments are printed in the following order
206 * - the arrays accessed by the kernel
208 * - the host loop iterators
210 static __isl_give isl_printer
*print_kernel_arguments(__isl_take isl_printer
*p
,
211 struct gpu_prog
*prog
, struct ppcg_kernel
*kernel
, int types
)
219 for (i
= 0; i
< prog
->n_array
; ++i
) {
223 space
= isl_space_copy(prog
->array
[i
].dim
);
224 arr
= isl_union_set_extract_set(kernel
->arrays
, space
);
225 empty
= isl_set_fast_is_empty(arr
);
231 p
= isl_printer_print_str(p
, ", ");
234 p
= isl_printer_print_str(p
, prog
->array
[i
].type
);
235 p
= isl_printer_print_str(p
, " ");
238 if (gpu_array_is_read_only_scalar(&prog
->array
[i
])) {
239 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
242 p
= isl_printer_print_str(p
, "*");
244 p
= isl_printer_print_str(p
, "dev_");
245 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
251 space
= isl_union_set_get_space(kernel
->arrays
);
252 nparam
= isl_space_dim(space
, isl_dim_param
);
253 for (i
= 0; i
< nparam
; ++i
) {
256 name
= isl_space_get_dim_name(space
, isl_dim_param
, i
);
259 p
= isl_printer_print_str(p
, ", ");
261 p
= isl_printer_print_str(p
, "int ");
262 p
= isl_printer_print_str(p
, name
);
266 isl_space_free(space
);
268 n
= isl_space_dim(kernel
->space
, isl_dim_set
);
269 type
= isl_options_get_ast_iterator_type(prog
->ctx
);
270 for (i
= 0; i
< n
; ++i
) {
275 p
= isl_printer_print_str(p
, ", ");
276 name
= isl_space_get_dim_name(kernel
->space
, isl_dim_set
, i
);
278 p
= isl_printer_print_str(p
, type
);
279 p
= isl_printer_print_str(p
, " ");
281 p
= isl_printer_print_str(p
, name
);
289 /* Print the header of the given kernel.
291 static __isl_give isl_printer
*print_kernel_header(__isl_take isl_printer
*p
,
292 struct gpu_prog
*prog
, struct ppcg_kernel
*kernel
)
294 p
= isl_printer_start_line(p
);
295 p
= isl_printer_print_str(p
, "__global__ void kernel");
296 p
= isl_printer_print_int(p
, kernel
->id
);
297 p
= isl_printer_print_str(p
, "(");
298 p
= print_kernel_arguments(p
, prog
, kernel
, 1);
299 p
= isl_printer_print_str(p
, ")");
304 /* Print the header of the given kernel to both gen->cuda.kernel_h
305 * and gen->cuda.kernel_c.
307 static void print_kernel_headers(struct gpu_prog
*prog
,
308 struct ppcg_kernel
*kernel
, struct cuda_info
*cuda
)
312 p
= isl_printer_to_file(prog
->ctx
, cuda
->kernel_h
);
313 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
314 p
= print_kernel_header(p
, prog
, kernel
);
315 p
= isl_printer_print_str(p
, ";");
316 p
= isl_printer_end_line(p
);
319 p
= isl_printer_to_file(prog
->ctx
, cuda
->kernel_c
);
320 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
321 p
= print_kernel_header(p
, prog
, kernel
);
322 p
= isl_printer_end_line(p
);
326 static void print_indent(FILE *dst
, int indent
)
328 fprintf(dst
, "%*s", indent
, "");
331 static void print_kernel_iterators(FILE *out
, struct ppcg_kernel
*kernel
)
334 const char *block_dims
[] = { "blockIdx.x", "blockIdx.y" };
335 const char *thread_dims
[] = { "threadIdx.x", "threadIdx.y",
338 if (kernel
->n_grid
> 0) {
339 print_indent(out
, 4);
340 fprintf(out
, "int ");
341 for (i
= 0; i
< kernel
->n_grid
; ++i
) {
344 fprintf(out
, "b%d = %s",
345 i
, block_dims
[kernel
->n_grid
- 1 - i
]);
350 if (kernel
->n_block
> 0) {
351 print_indent(out
, 4);
352 fprintf(out
, "int ");
353 for (i
= 0; i
< kernel
->n_block
; ++i
) {
356 fprintf(out
, "t%d = %s",
357 i
, thread_dims
[kernel
->n_block
- 1 - i
]);
363 static void print_kernel_var(FILE *out
, struct ppcg_kernel_var
*var
)
368 print_indent(out
, 4);
369 if (var
->type
== ppcg_access_shared
)
370 fprintf(out
, "__shared__ ");
371 fprintf(out
, "%s %s", var
->array
->type
, var
->name
);
373 for (j
= 0; j
< var
->array
->n_index
; ++j
) {
375 isl_vec_get_element(var
->size
, j
, &v
);
376 isl_int_print(out
, v
, 0);
383 static void print_kernel_vars(FILE *out
, struct ppcg_kernel
*kernel
)
387 for (i
= 0; i
< kernel
->n_var
; ++i
)
388 print_kernel_var(out
, &kernel
->var
[i
]);
391 /* Print an access to the element in the private/shared memory copy
392 * described by "stmt". The index of the copy is recorded in
393 * stmt->local_index as a "call" to the array.
395 static __isl_give isl_printer
*stmt_print_local_index(__isl_take isl_printer
*p
,
396 struct ppcg_kernel_stmt
*stmt
)
400 struct gpu_array_info
*array
= stmt
->u
.c
.array
;
402 expr
= isl_ast_expr_get_op_arg(stmt
->u
.c
.local_index
, 0);
403 p
= isl_printer_print_ast_expr(p
, expr
);
404 isl_ast_expr_free(expr
);
406 for (i
= 0; i
< array
->n_index
; ++i
) {
407 expr
= isl_ast_expr_get_op_arg(stmt
->u
.c
.local_index
, 1 + i
);
409 p
= isl_printer_print_str(p
, "[");
410 p
= isl_printer_print_ast_expr(p
, expr
);
411 p
= isl_printer_print_str(p
, "]");
413 isl_ast_expr_free(expr
);
419 /* Print an access to the element in the global memory copy
420 * described by "stmt". The index of the copy is recorded in
421 * stmt->index as a "call" to the array.
423 * The copy in global memory has been linearized, so we need to take
424 * the array size into account.
426 static __isl_give isl_printer
*stmt_print_global_index(
427 __isl_take isl_printer
*p
, struct ppcg_kernel_stmt
*stmt
)
430 struct gpu_array_info
*array
= stmt
->u
.c
.array
;
431 isl_pw_aff_list
*bound
= stmt
->u
.c
.local_array
->bound
;
433 if (gpu_array_is_scalar(array
)) {
434 if (!array
->read_only
)
435 p
= isl_printer_print_str(p
, "*");
436 p
= isl_printer_print_str(p
, array
->name
);
440 p
= isl_printer_print_str(p
, array
->name
);
441 p
= isl_printer_print_str(p
, "[");
442 for (i
= 0; i
+ 1 < array
->n_index
; ++i
)
443 p
= isl_printer_print_str(p
, "(");
444 for (i
= 0; i
< array
->n_index
; ++i
) {
446 expr
= isl_ast_expr_get_op_arg(stmt
->u
.c
.index
, 1 + i
);
449 bound_i
= isl_pw_aff_list_get_pw_aff(bound
, i
);
450 p
= isl_printer_print_str(p
, ") * (");
451 p
= isl_printer_print_pw_aff(p
, bound_i
);
452 p
= isl_printer_print_str(p
, ") + ");
453 isl_pw_aff_free(bound_i
);
455 p
= isl_printer_print_ast_expr(p
, expr
);
456 isl_ast_expr_free(expr
);
458 p
= isl_printer_print_str(p
, "]");
463 /* Print a copy statement.
465 * A read copy statement is printed as
469 * while a write copy statement is printed as
473 static __isl_give isl_printer
*print_copy(__isl_take isl_printer
*p
,
474 struct ppcg_kernel_stmt
*stmt
)
476 p
= isl_printer_start_line(p
);
477 if (stmt
->u
.c
.read
) {
478 p
= stmt_print_local_index(p
, stmt
);
479 p
= isl_printer_print_str(p
, " = ");
480 p
= stmt_print_global_index(p
, stmt
);
482 p
= stmt_print_global_index(p
, stmt
);
483 p
= isl_printer_print_str(p
, " = ");
484 p
= stmt_print_local_index(p
, stmt
);
486 p
= isl_printer_print_str(p
, ";");
487 p
= isl_printer_end_line(p
);
492 /* Print a sync statement.
494 static __isl_give isl_printer
*print_sync(__isl_take isl_printer
*p
,
495 struct ppcg_kernel_stmt
*stmt
)
497 p
= isl_printer_start_line(p
);
498 p
= isl_printer_print_str(p
, "__syncthreads();");
499 p
= isl_printer_end_line(p
);
504 /* Print an access based on the information in "access".
505 * If this an access to global memory, then the index expression
508 * If access->array is NULL, then we are
509 * accessing an iterator in the original program.
511 static __isl_give isl_printer
*print_access(__isl_take isl_printer
*p
,
512 struct ppcg_kernel_access
*access
)
516 struct gpu_array_info
*array
;
517 isl_pw_aff_list
*bound
;
519 array
= access
->array
;
520 bound
= array
? access
->local_array
->bound
: NULL
;
522 p
= isl_printer_print_str(p
, "(");
524 if (access
->type
== ppcg_access_global
&&
525 gpu_array_is_scalar(array
) && !array
->read_only
)
526 p
= isl_printer_print_str(p
, "*");
527 p
= isl_printer_print_str(p
, access
->local_name
);
528 if (gpu_array_is_scalar(array
))
530 p
= isl_printer_print_str(p
, "[");
533 n_index
= isl_ast_expr_list_n_ast_expr(access
->index
);
534 if (access
->type
== ppcg_access_global
)
535 for (i
= 0; i
+ 1 < n_index
; ++i
)
536 p
= isl_printer_print_str(p
, "(");
538 for (i
= 0; i
< n_index
; ++i
) {
541 index
= isl_ast_expr_list_get_ast_expr(access
->index
, i
);
543 if (access
->type
== ppcg_access_global
) {
545 bound_i
= isl_pw_aff_list_get_pw_aff(bound
, i
);
546 p
= isl_printer_print_str(p
, ") * (");
547 p
= isl_printer_print_pw_aff(p
, bound_i
);
548 p
= isl_printer_print_str(p
, ") + ");
549 isl_pw_aff_free(bound_i
);
551 p
= isl_printer_print_str(p
, "][");
553 p
= isl_printer_print_ast_expr(p
, index
);
554 isl_ast_expr_free(index
);
557 p
= isl_printer_print_str(p
, ")");
559 p
= isl_printer_print_str(p
, "]");
564 struct cuda_access_print_info
{
566 struct ppcg_kernel_stmt
*stmt
;
569 /* To print the cuda accesses we walk the list of cuda accesses simultaneously
570 * with the pet printer. This means that whenever the pet printer prints a
571 * pet access expression we have the corresponding cuda access available and can
572 * print the modified access.
574 static __isl_give isl_printer
*print_cuda_access(__isl_take isl_printer
*p
,
575 struct pet_expr
*expr
, void *usr
)
577 struct cuda_access_print_info
*info
=
578 (struct cuda_access_print_info
*) usr
;
580 p
= print_access(p
, &info
->stmt
->u
.d
.access
[info
->i
]);
586 static __isl_give isl_printer
*print_stmt_body(__isl_take isl_printer
*p
,
587 struct ppcg_kernel_stmt
*stmt
)
589 struct cuda_access_print_info info
;
594 p
= isl_printer_start_line(p
);
595 p
= print_pet_expr(p
, stmt
->u
.d
.stmt
->body
, &print_cuda_access
, &info
);
596 p
= isl_printer_print_str(p
, ";");
597 p
= isl_printer_end_line(p
);
602 /* This function is called for each user statement in the AST,
603 * i.e., for each kernel body statement, copy statement or sync statement.
605 static __isl_give isl_printer
*print_kernel_stmt(__isl_take isl_printer
*p
,
606 __isl_take isl_ast_print_options
*print_options
,
607 __isl_keep isl_ast_node
*node
, void *user
)
610 struct ppcg_kernel_stmt
*stmt
;
612 id
= isl_ast_node_get_annotation(node
);
613 stmt
= isl_id_get_user(id
);
616 isl_ast_print_options_free(print_options
);
618 switch (stmt
->type
) {
619 case ppcg_kernel_copy
:
620 return print_copy(p
, stmt
);
621 case ppcg_kernel_sync
:
622 return print_sync(p
, stmt
);
623 case ppcg_kernel_domain
:
624 return print_stmt_body(p
, stmt
);
630 static int print_macro(enum isl_ast_op_type type
, void *user
)
632 isl_printer
**p
= user
;
634 if (type
== isl_ast_op_fdiv_q
)
637 *p
= isl_ast_op_type_print_macro(type
, *p
);
642 /* Print the required macros for "node", including one for floord.
643 * We always print a macro for floord as it may also appear in the statements.
645 static __isl_give isl_printer
*print_macros(
646 __isl_keep isl_ast_node
*node
, __isl_take isl_printer
*p
)
648 p
= isl_ast_op_type_print_macro(isl_ast_op_fdiv_q
, p
);
649 if (isl_ast_node_foreach_ast_op_type(node
, &print_macro
, &p
) < 0)
650 return isl_printer_free(p
);
654 static void print_kernel(struct gpu_prog
*prog
, struct ppcg_kernel
*kernel
,
655 struct cuda_info
*cuda
)
657 isl_ctx
*ctx
= isl_ast_node_get_ctx(kernel
->tree
);
658 isl_ast_print_options
*print_options
;
661 print_kernel_headers(prog
, kernel
, cuda
);
662 fprintf(cuda
->kernel_c
, "{\n");
663 print_kernel_iterators(cuda
->kernel_c
, kernel
);
664 print_kernel_vars(cuda
->kernel_c
, kernel
);
665 fprintf(cuda
->kernel_c
, "\n");
667 print_options
= isl_ast_print_options_alloc(ctx
);
668 print_options
= isl_ast_print_options_set_print_user(print_options
,
669 &print_kernel_stmt
, NULL
);
671 p
= isl_printer_to_file(ctx
, cuda
->kernel_c
);
672 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
673 p
= isl_printer_indent(p
, 4);
674 p
= print_macros(kernel
->tree
, p
);
675 p
= isl_ast_node_print(kernel
->tree
, p
, print_options
);
678 fprintf(cuda
->kernel_c
, "}\n");
681 struct print_host_user_data
{
682 struct cuda_info
*cuda
;
683 struct gpu_prog
*prog
;
686 /* Print the user statement of the host code to "p".
688 * In particular, print a block of statements that defines the grid
689 * and the block and then launches the kernel.
691 static __isl_give isl_printer
*print_host_user(__isl_take isl_printer
*p
,
692 __isl_take isl_ast_print_options
*print_options
,
693 __isl_keep isl_ast_node
*node
, void *user
)
696 struct ppcg_kernel
*kernel
;
697 struct print_host_user_data
*data
;
699 id
= isl_ast_node_get_annotation(node
);
700 kernel
= isl_id_get_user(id
);
703 data
= (struct print_host_user_data
*) user
;
705 p
= isl_printer_start_line(p
);
706 p
= isl_printer_print_str(p
, "{");
707 p
= isl_printer_end_line(p
);
708 p
= isl_printer_indent(p
, 2);
710 p
= isl_printer_start_line(p
);
711 p
= isl_printer_print_str(p
, "dim3 k");
712 p
= isl_printer_print_int(p
, kernel
->id
);
713 p
= isl_printer_print_str(p
, "_dimBlock");
714 print_reverse_list(isl_printer_get_file(p
),
715 kernel
->n_block
, kernel
->block_dim
);
716 p
= isl_printer_print_str(p
, ";");
717 p
= isl_printer_end_line(p
);
719 p
= print_grid(p
, kernel
);
721 p
= isl_printer_start_line(p
);
722 p
= isl_printer_print_str(p
, "kernel");
723 p
= isl_printer_print_int(p
, kernel
->id
);
724 p
= isl_printer_print_str(p
, " <<<k");
725 p
= isl_printer_print_int(p
, kernel
->id
);
726 p
= isl_printer_print_str(p
, "_dimGrid, k");
727 p
= isl_printer_print_int(p
, kernel
->id
);
728 p
= isl_printer_print_str(p
, "_dimBlock>>> (");
729 p
= print_kernel_arguments(p
, data
->prog
, kernel
, 0);
730 p
= isl_printer_print_str(p
, ");");
731 p
= isl_printer_end_line(p
);
733 p
= isl_printer_start_line(p
);
734 p
= isl_printer_print_str(p
, "cudaCheckKernel();");
735 p
= isl_printer_end_line(p
);
737 p
= isl_printer_indent(p
, -2);
738 p
= isl_printer_start_line(p
);
739 p
= isl_printer_print_str(p
, "}");
740 p
= isl_printer_end_line(p
);
742 p
= isl_printer_start_line(p
);
743 p
= isl_printer_end_line(p
);
745 print_kernel(data
->prog
, kernel
, data
->cuda
);
747 isl_ast_print_options_free(print_options
);
752 static __isl_give isl_printer
*print_host_code(__isl_take isl_printer
*p
,
753 struct gpu_prog
*prog
, __isl_keep isl_ast_node
*tree
,
754 struct cuda_info
*cuda
)
756 isl_ast_print_options
*print_options
;
757 isl_ctx
*ctx
= isl_ast_node_get_ctx(tree
);
758 struct print_host_user_data data
= { cuda
, prog
};
760 print_options
= isl_ast_print_options_alloc(ctx
);
761 print_options
= isl_ast_print_options_set_print_user(print_options
,
762 &print_host_user
, &data
);
764 p
= print_macros(tree
, p
);
765 p
= isl_ast_node_print(tree
, p
, print_options
);
770 /* For each array that is written anywhere in the gpu_prog,
771 * copy the contents back from the GPU to the host.
773 * Arrays that are not visible outside the corresponding scop
774 * do not need to be copied back.
776 static __isl_give isl_printer
*copy_arrays_from_device(
777 __isl_take isl_printer
*p
, struct gpu_prog
*prog
)
780 isl_union_set
*write
;
781 write
= isl_union_map_range(isl_union_map_copy(prog
->write
));
783 for (i
= 0; i
< prog
->n_array
; ++i
) {
788 if (prog
->array
[i
].local
)
791 dim
= isl_space_copy(prog
->array
[i
].dim
);
792 write_i
= isl_union_set_extract_set(write
, dim
);
793 empty
= isl_set_fast_is_empty(write_i
);
794 isl_set_free(write_i
);
798 p
= isl_printer_print_str(p
, "cudaCheckReturn(cudaMemcpy(");
799 if (gpu_array_is_scalar(&prog
->array
[i
]))
800 p
= isl_printer_print_str(p
, "&");
801 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
802 p
= isl_printer_print_str(p
, ", dev_");
803 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
804 p
= isl_printer_print_str(p
, ", ");
805 p
= print_array_size(p
, &prog
->array
[i
]);
806 p
= isl_printer_print_str(p
, ", cudaMemcpyDeviceToHost));");
807 p
= isl_printer_end_line(p
);
810 isl_union_set_free(write
);
811 p
= isl_printer_start_line(p
);
812 p
= isl_printer_end_line(p
);
816 static __isl_give isl_printer
*free_device_arrays(__isl_take isl_printer
*p
,
817 struct gpu_prog
*prog
)
821 for (i
= 0; i
< prog
->n_array
; ++i
) {
822 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
824 p
= isl_printer_print_str(p
, "cudaCheckReturn(cudaFree(dev_");
825 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
826 p
= isl_printer_print_str(p
, "));");
827 p
= isl_printer_end_line(p
);
833 int generate_cuda(isl_ctx
*ctx
, struct ppcg_scop
*scop
,
834 struct ppcg_options
*options
, const char *input
)
836 struct cuda_info cuda
;
837 struct gpu_prog
*prog
;
844 scop
->context
= add_context_from_str(scop
->context
, options
->ctx
);
846 prog
= gpu_prog_alloc(ctx
, scop
);
848 tree
= generate_gpu(ctx
, prog
, options
);
850 cuda_open_files(&cuda
, input
);
852 p
= isl_printer_to_file(ctx
, cuda
.host_c
);
853 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
854 p
= ppcg_print_exposed_declarations(p
, scop
);
855 p
= ppcg_start_block(p
);
857 p
= print_cuda_macros(p
);
859 p
= declare_device_arrays(p
, prog
);
860 p
= allocate_device_arrays(p
, prog
);
861 p
= copy_arrays_to_device(p
, prog
);
863 p
= print_host_code(p
, prog
, tree
, &cuda
);
864 isl_ast_node_free(tree
);
866 p
= copy_arrays_from_device(p
, prog
);
867 p
= free_device_arrays(p
, prog
);
869 p
= ppcg_end_block(p
);
872 cuda_close_files(&cuda
);