2 * Copyright 2012 Ecole Normale Superieure
4 * Use of this software is governed by the GNU LGPLv2.1 license
6 * Written by Sven Verdoolaege,
7 * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
13 #include "cuda_common.h"
16 #include "pet_printer.h"
20 static __isl_give isl_printer
*print_cuda_macros(__isl_take isl_printer
*p
)
23 "#define cudaCheckReturn(ret) assert((ret) == cudaSuccess)\n"
24 "#define cudaCheckKernel()"
25 " assert(cudaGetLastError() == cudaSuccess)\n\n";
26 p
= isl_printer_print_str(p
, macros
);
30 static __isl_give isl_printer
*print_array_size(__isl_take isl_printer
*prn
,
31 struct gpu_array_info
*array
)
35 for (i
= 0; i
< array
->n_index
; ++i
) {
36 prn
= isl_printer_print_str(prn
, "(");
37 prn
= isl_printer_print_pw_aff(prn
, array
->bound
[i
]);
38 prn
= isl_printer_print_str(prn
, ") * ");
40 prn
= isl_printer_print_str(prn
, "sizeof(");
41 prn
= isl_printer_print_str(prn
, array
->type
);
42 prn
= isl_printer_print_str(prn
, ")");
47 static __isl_give isl_printer
*declare_device_arrays(__isl_take isl_printer
*p
,
48 struct gpu_prog
*prog
)
52 for (i
= 0; i
< prog
->n_array
; ++i
) {
53 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
55 p
= isl_printer_start_line(p
);
56 p
= isl_printer_print_str(p
, prog
->array
[i
].type
);
57 p
= isl_printer_print_str(p
, " *dev_");
58 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
59 p
= isl_printer_print_str(p
, ";");
60 p
= isl_printer_end_line(p
);
62 p
= isl_printer_start_line(p
);
63 p
= isl_printer_end_line(p
);
67 static __isl_give isl_printer
*allocate_device_arrays(
68 __isl_take isl_printer
*p
, struct gpu_prog
*prog
)
72 for (i
= 0; i
< prog
->n_array
; ++i
) {
73 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
75 p
= isl_printer_start_line(p
);
76 p
= isl_printer_print_str(p
,
77 "cudaCheckReturn(cudaMalloc((void **) &dev_");
78 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
79 p
= isl_printer_print_str(p
, ", ");
80 p
= print_array_size(p
, &prog
->array
[i
]);
81 p
= isl_printer_print_str(p
, "));");
82 p
= isl_printer_end_line(p
);
84 p
= isl_printer_start_line(p
);
85 p
= isl_printer_end_line(p
);
89 static __isl_give isl_printer
*copy_arrays_to_device(__isl_take isl_printer
*p
,
90 struct gpu_prog
*prog
)
94 for (i
= 0; i
< prog
->n_array
; ++i
) {
99 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
102 dim
= isl_space_copy(prog
->array
[i
].dim
);
103 read_i
= isl_union_set_extract_set(prog
->copy_in
, dim
);
104 empty
= isl_set_fast_is_empty(read_i
);
105 isl_set_free(read_i
);
109 p
= isl_printer_print_str(p
, "cudaCheckReturn(cudaMemcpy(dev_");
110 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
111 p
= isl_printer_print_str(p
, ", ");
113 if (gpu_array_is_scalar(&prog
->array
[i
]))
114 p
= isl_printer_print_str(p
, "&");
115 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
116 p
= isl_printer_print_str(p
, ", ");
118 p
= print_array_size(p
, &prog
->array
[i
]);
119 p
= isl_printer_print_str(p
, ", cudaMemcpyHostToDevice));");
120 p
= isl_printer_end_line(p
);
122 p
= isl_printer_start_line(p
);
123 p
= isl_printer_end_line(p
);
127 static void print_reverse_list(FILE *out
, int len
, int *list
)
135 for (i
= 0; i
< len
; ++i
) {
138 fprintf(out
, "%d", list
[len
- 1 - i
]);
143 /* Print the effective grid size as a list of the sizes in each
144 * dimension, from innermost to outermost.
146 static __isl_give isl_printer
*print_grid_size(__isl_take isl_printer
*p
,
147 struct ppcg_kernel
*kernel
)
152 dim
= isl_multi_pw_aff_dim(kernel
->grid_size
, isl_dim_set
);
156 p
= isl_printer_print_str(p
, "(");
157 for (i
= dim
- 1; i
>= 0; --i
) {
160 bound
= isl_multi_pw_aff_get_pw_aff(kernel
->grid_size
, i
);
161 p
= isl_printer_print_pw_aff(p
, bound
);
162 isl_pw_aff_free(bound
);
165 p
= isl_printer_print_str(p
, ", ");
168 p
= isl_printer_print_str(p
, ")");
173 /* Print the grid definition.
175 static __isl_give isl_printer
*print_grid(__isl_take isl_printer
*p
,
176 struct ppcg_kernel
*kernel
)
178 p
= isl_printer_start_line(p
);
179 p
= isl_printer_print_str(p
, "dim3 k");
180 p
= isl_printer_print_int(p
, kernel
->id
);
181 p
= isl_printer_print_str(p
, "_dimGrid");
182 p
= print_grid_size(p
, kernel
);
183 p
= isl_printer_print_str(p
, ";");
184 p
= isl_printer_end_line(p
);
189 /* Print the arguments to a kernel declaration or call. If "types" is set,
190 * then print a declaration (including the types of the arguments).
192 * The arguments are printed in the following order
193 * - the arrays accessed by the kernel
195 * - the host loop iterators
197 static __isl_give isl_printer
*print_kernel_arguments(__isl_take isl_printer
*p
,
198 struct gpu_prog
*prog
, struct ppcg_kernel
*kernel
, int types
)
206 for (i
= 0; i
< prog
->n_array
; ++i
) {
210 space
= isl_space_copy(prog
->array
[i
].dim
);
211 arr
= isl_union_set_extract_set(kernel
->arrays
, space
);
212 empty
= isl_set_fast_is_empty(arr
);
218 p
= isl_printer_print_str(p
, ", ");
221 p
= isl_printer_print_str(p
, prog
->array
[i
].type
);
222 p
= isl_printer_print_str(p
, " ");
225 if (gpu_array_is_read_only_scalar(&prog
->array
[i
])) {
226 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
229 p
= isl_printer_print_str(p
, "*");
231 p
= isl_printer_print_str(p
, "dev_");
232 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
238 space
= isl_union_set_get_space(kernel
->arrays
);
239 nparam
= isl_space_dim(space
, isl_dim_param
);
240 for (i
= 0; i
< nparam
; ++i
) {
243 name
= isl_space_get_dim_name(space
, isl_dim_param
, i
);
246 p
= isl_printer_print_str(p
, ", ");
248 p
= isl_printer_print_str(p
, "int ");
249 p
= isl_printer_print_str(p
, name
);
253 isl_space_free(space
);
255 n
= isl_space_dim(kernel
->space
, isl_dim_set
);
256 type
= isl_options_get_ast_iterator_type(prog
->ctx
);
257 for (i
= 0; i
< n
; ++i
) {
262 p
= isl_printer_print_str(p
, ", ");
263 name
= isl_space_get_dim_name(kernel
->space
, isl_dim_set
, i
);
265 p
= isl_printer_print_str(p
, type
);
266 p
= isl_printer_print_str(p
, " ");
268 p
= isl_printer_print_str(p
, name
);
276 /* Print the header of the given kernel.
278 static __isl_give isl_printer
*print_kernel_header(__isl_take isl_printer
*p
,
279 struct gpu_prog
*prog
, struct ppcg_kernel
*kernel
)
281 p
= isl_printer_start_line(p
);
282 p
= isl_printer_print_str(p
, "__global__ void kernel");
283 p
= isl_printer_print_int(p
, kernel
->id
);
284 p
= isl_printer_print_str(p
, "(");
285 p
= print_kernel_arguments(p
, prog
, kernel
, 1);
286 p
= isl_printer_print_str(p
, ")");
291 /* Print the header of the given kernel to both gen->cuda.kernel_h
292 * and gen->cuda.kernel_c.
294 static void print_kernel_headers(struct gpu_prog
*prog
,
295 struct ppcg_kernel
*kernel
, struct cuda_info
*cuda
)
299 p
= isl_printer_to_file(prog
->ctx
, cuda
->kernel_h
);
300 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
301 p
= print_kernel_header(p
, prog
, kernel
);
302 p
= isl_printer_print_str(p
, ";");
303 p
= isl_printer_end_line(p
);
306 p
= isl_printer_to_file(prog
->ctx
, cuda
->kernel_c
);
307 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
308 p
= print_kernel_header(p
, prog
, kernel
);
309 p
= isl_printer_end_line(p
);
313 static void print_indent(FILE *dst
, int indent
)
315 fprintf(dst
, "%*s", indent
, "");
318 static void print_kernel_iterators(FILE *out
, struct ppcg_kernel
*kernel
)
321 const char *block_dims
[] = { "blockIdx.x", "blockIdx.y" };
322 const char *thread_dims
[] = { "threadIdx.x", "threadIdx.y",
325 if (kernel
->n_grid
> 0) {
326 print_indent(out
, 4);
327 fprintf(out
, "int ");
328 for (i
= 0; i
< kernel
->n_grid
; ++i
) {
331 fprintf(out
, "b%d = %s",
332 i
, block_dims
[kernel
->n_grid
- 1 - i
]);
337 if (kernel
->n_block
> 0) {
338 print_indent(out
, 4);
339 fprintf(out
, "int ");
340 for (i
= 0; i
< kernel
->n_block
; ++i
) {
343 fprintf(out
, "t%d = %s",
344 i
, thread_dims
[kernel
->n_block
- 1 - i
]);
350 static void print_kernel_var(FILE *out
, struct ppcg_kernel_var
*var
)
355 print_indent(out
, 4);
356 if (var
->type
== ppcg_access_shared
)
357 fprintf(out
, "__shared__ ");
358 fprintf(out
, "%s %s", var
->array
->type
, var
->name
);
360 for (j
= 0; j
< var
->array
->n_index
; ++j
) {
362 isl_vec_get_element(var
->size
, j
, &v
);
363 isl_int_print(out
, v
, 0);
370 static void print_kernel_vars(FILE *out
, struct ppcg_kernel
*kernel
)
374 for (i
= 0; i
< kernel
->n_var
; ++i
)
375 print_kernel_var(out
, &kernel
->var
[i
]);
378 /* Print an access to the element in the private/shared memory copy
379 * described by "stmt". The index of the copy is recorded in
380 * stmt->local_index as a "call" to the array.
382 static __isl_give isl_printer
*stmt_print_local_index(__isl_take isl_printer
*p
,
383 struct ppcg_kernel_stmt
*stmt
)
387 struct gpu_array_info
*array
= stmt
->u
.c
.array
;
389 expr
= isl_ast_expr_get_op_arg(stmt
->u
.c
.local_index
, 0);
390 p
= isl_printer_print_ast_expr(p
, expr
);
391 isl_ast_expr_free(expr
);
393 for (i
= 0; i
< array
->n_index
; ++i
) {
394 expr
= isl_ast_expr_get_op_arg(stmt
->u
.c
.local_index
, 1 + i
);
396 p
= isl_printer_print_str(p
, "[");
397 p
= isl_printer_print_ast_expr(p
, expr
);
398 p
= isl_printer_print_str(p
, "]");
400 isl_ast_expr_free(expr
);
406 /* Print an access to the element in the global memory copy
407 * described by "stmt". The index of the copy is recorded in
408 * stmt->index as a "call" to the array.
410 * The copy in global memory has been linearized, so we need to take
411 * the array size into account.
413 static __isl_give isl_printer
*stmt_print_global_index(
414 __isl_take isl_printer
*p
, struct ppcg_kernel_stmt
*stmt
)
417 struct gpu_array_info
*array
= stmt
->u
.c
.array
;
418 isl_pw_aff_list
*bound
= stmt
->u
.c
.local_array
->bound
;
420 if (gpu_array_is_scalar(array
)) {
421 if (!array
->read_only
)
422 p
= isl_printer_print_str(p
, "*");
423 p
= isl_printer_print_str(p
, array
->name
);
427 p
= isl_printer_print_str(p
, array
->name
);
428 p
= isl_printer_print_str(p
, "[");
429 for (i
= 0; i
+ 1 < array
->n_index
; ++i
)
430 p
= isl_printer_print_str(p
, "(");
431 for (i
= 0; i
< array
->n_index
; ++i
) {
433 expr
= isl_ast_expr_get_op_arg(stmt
->u
.c
.index
, 1 + i
);
436 bound_i
= isl_pw_aff_list_get_pw_aff(bound
, i
);
437 p
= isl_printer_print_str(p
, ") * (");
438 p
= isl_printer_print_pw_aff(p
, bound_i
);
439 p
= isl_printer_print_str(p
, ") + ");
440 isl_pw_aff_free(bound_i
);
442 p
= isl_printer_print_ast_expr(p
, expr
);
443 isl_ast_expr_free(expr
);
445 p
= isl_printer_print_str(p
, "]");
450 /* Print a copy statement.
452 * A read copy statement is printed as
456 * while a write copy statement is printed as
460 static __isl_give isl_printer
*print_copy(__isl_take isl_printer
*p
,
461 struct ppcg_kernel_stmt
*stmt
)
463 p
= isl_printer_start_line(p
);
464 if (stmt
->u
.c
.read
) {
465 p
= stmt_print_local_index(p
, stmt
);
466 p
= isl_printer_print_str(p
, " = ");
467 p
= stmt_print_global_index(p
, stmt
);
469 p
= stmt_print_global_index(p
, stmt
);
470 p
= isl_printer_print_str(p
, " = ");
471 p
= stmt_print_local_index(p
, stmt
);
473 p
= isl_printer_print_str(p
, ";");
474 p
= isl_printer_end_line(p
);
479 /* Print a sync statement.
481 static __isl_give isl_printer
*print_sync(__isl_take isl_printer
*p
,
482 struct ppcg_kernel_stmt
*stmt
)
484 p
= isl_printer_start_line(p
);
485 p
= isl_printer_print_str(p
, "__syncthreads();");
486 p
= isl_printer_end_line(p
);
491 /* Print an access based on the information in "access".
492 * If this an access to global memory, then the index expression
495 * If access->array is NULL, then we are
496 * accessing an iterator in the original program.
498 static __isl_give isl_printer
*print_access(__isl_take isl_printer
*p
,
499 struct ppcg_kernel_access
*access
)
503 struct gpu_array_info
*array
;
504 isl_pw_aff_list
*bound
;
506 array
= access
->array
;
507 bound
= array
? access
->local_array
->bound
: NULL
;
509 p
= isl_printer_print_str(p
, "(");
511 if (access
->type
== ppcg_access_global
&&
512 gpu_array_is_scalar(array
) && !array
->read_only
)
513 p
= isl_printer_print_str(p
, "*");
514 p
= isl_printer_print_str(p
, access
->local_name
);
515 if (gpu_array_is_scalar(array
))
517 p
= isl_printer_print_str(p
, "[");
520 n_index
= isl_ast_expr_list_n_ast_expr(access
->index
);
521 if (access
->type
== ppcg_access_global
)
522 for (i
= 0; i
+ 1 < n_index
; ++i
)
523 p
= isl_printer_print_str(p
, "(");
525 for (i
= 0; i
< n_index
; ++i
) {
528 index
= isl_ast_expr_list_get_ast_expr(access
->index
, i
);
530 if (access
->type
== ppcg_access_global
) {
532 bound_i
= isl_pw_aff_list_get_pw_aff(bound
, i
);
533 p
= isl_printer_print_str(p
, ") * (");
534 p
= isl_printer_print_pw_aff(p
, bound_i
);
535 p
= isl_printer_print_str(p
, ") + ");
536 isl_pw_aff_free(bound_i
);
538 p
= isl_printer_print_str(p
, "][");
540 p
= isl_printer_print_ast_expr(p
, index
);
541 isl_ast_expr_free(index
);
544 p
= isl_printer_print_str(p
, ")");
546 p
= isl_printer_print_str(p
, "]");
551 struct cuda_access_print_info
{
553 struct ppcg_kernel_stmt
*stmt
;
556 /* To print the cuda accesses we walk the list of cuda accesses simultaneously
557 * with the pet printer. This means that whenever the pet printer prints a
558 * pet access expression we have the corresponding cuda access available and can
559 * print the modified access.
561 static __isl_give isl_printer
*print_cuda_access(__isl_take isl_printer
*p
,
562 struct pet_expr
*expr
, void *usr
)
564 struct cuda_access_print_info
*info
=
565 (struct cuda_access_print_info
*) usr
;
567 p
= print_access(p
, &info
->stmt
->u
.d
.access
[info
->i
]);
573 static __isl_give isl_printer
*print_stmt_body(__isl_take isl_printer
*p
,
574 struct ppcg_kernel_stmt
*stmt
)
576 struct cuda_access_print_info info
;
581 p
= isl_printer_start_line(p
);
582 p
= print_pet_expr(p
, stmt
->u
.d
.stmt
->body
, &print_cuda_access
, &info
);
583 p
= isl_printer_print_str(p
, ";");
584 p
= isl_printer_end_line(p
);
589 /* This function is called for each user statement in the AST,
590 * i.e., for each kernel body statement, copy statement or sync statement.
592 static __isl_give isl_printer
*print_kernel_stmt(__isl_take isl_printer
*p
,
593 __isl_keep isl_ast_node
*node
, void *user
)
596 struct ppcg_kernel_stmt
*stmt
;
598 id
= isl_ast_node_get_annotation(node
);
599 stmt
= isl_id_get_user(id
);
602 switch (stmt
->type
) {
603 case ppcg_kernel_copy
:
604 return print_copy(p
, stmt
);
605 case ppcg_kernel_sync
:
606 return print_sync(p
, stmt
);
607 case ppcg_kernel_domain
:
608 return print_stmt_body(p
, stmt
);
614 static int print_macro(enum isl_ast_op_type type
, void *user
)
616 isl_printer
**p
= user
;
618 if (type
== isl_ast_op_fdiv_q
)
621 *p
= isl_ast_op_type_print_macro(type
, *p
);
626 /* Print the required macros for "node", including one for floord.
627 * We always print a macro for floord as it may also appear in the statements.
629 static __isl_give isl_printer
*print_macros(
630 __isl_keep isl_ast_node
*node
, __isl_take isl_printer
*p
)
632 p
= isl_ast_op_type_print_macro(isl_ast_op_fdiv_q
, p
);
633 if (isl_ast_node_foreach_ast_op_type(node
, &print_macro
, &p
) < 0)
634 return isl_printer_free(p
);
638 static void print_kernel(struct gpu_prog
*prog
, struct ppcg_kernel
*kernel
,
639 struct cuda_info
*cuda
)
641 isl_ctx
*ctx
= isl_ast_node_get_ctx(kernel
->tree
);
642 isl_ast_print_options
*print_options
;
645 print_kernel_headers(prog
, kernel
, cuda
);
646 fprintf(cuda
->kernel_c
, "{\n");
647 print_kernel_iterators(cuda
->kernel_c
, kernel
);
648 print_kernel_vars(cuda
->kernel_c
, kernel
);
649 fprintf(cuda
->kernel_c
, "\n");
651 print_options
= isl_ast_print_options_alloc(ctx
);
652 print_options
= isl_ast_print_options_set_print_user(print_options
,
653 &print_kernel_stmt
, NULL
);
655 p
= isl_printer_to_file(ctx
, cuda
->kernel_c
);
656 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
657 p
= isl_printer_indent(p
, 4);
658 p
= print_macros(kernel
->tree
, p
);
659 p
= isl_ast_node_print(kernel
->tree
, p
, print_options
);
662 isl_ast_print_options_free(print_options
);
664 fprintf(cuda
->kernel_c
, "}\n");
667 struct print_host_user_data
{
668 struct cuda_info
*cuda
;
669 struct gpu_prog
*prog
;
672 /* Print the user statement of the host code to "p".
674 * In particular, print a block of statements that defines the grid
675 * and the block and then launches the kernel.
677 static __isl_give isl_printer
*print_host_user(__isl_take isl_printer
*p
,
678 __isl_keep isl_ast_node
*node
, void *user
)
681 struct ppcg_kernel
*kernel
;
682 struct print_host_user_data
*data
;
684 id
= isl_ast_node_get_annotation(node
);
685 kernel
= isl_id_get_user(id
);
688 data
= (struct print_host_user_data
*) user
;
690 p
= isl_printer_start_line(p
);
691 p
= isl_printer_print_str(p
, "{");
692 p
= isl_printer_end_line(p
);
693 p
= isl_printer_indent(p
, 2);
695 p
= isl_printer_start_line(p
);
696 p
= isl_printer_print_str(p
, "dim3 k");
697 p
= isl_printer_print_int(p
, kernel
->id
);
698 p
= isl_printer_print_str(p
, "_dimBlock");
699 print_reverse_list(isl_printer_get_file(p
),
700 kernel
->n_block
, kernel
->block_dim
);
701 p
= isl_printer_print_str(p
, ";");
702 p
= isl_printer_end_line(p
);
704 p
= print_grid(p
, kernel
);
706 p
= isl_printer_start_line(p
);
707 p
= isl_printer_print_str(p
, "kernel");
708 p
= isl_printer_print_int(p
, kernel
->id
);
709 p
= isl_printer_print_str(p
, " <<<k");
710 p
= isl_printer_print_int(p
, kernel
->id
);
711 p
= isl_printer_print_str(p
, "_dimGrid, k");
712 p
= isl_printer_print_int(p
, kernel
->id
);
713 p
= isl_printer_print_str(p
, "_dimBlock>>> (");
714 p
= print_kernel_arguments(p
, data
->prog
, kernel
, 0);
715 p
= isl_printer_print_str(p
, ");");
716 p
= isl_printer_end_line(p
);
718 p
= isl_printer_start_line(p
);
719 p
= isl_printer_print_str(p
, "cudaCheckKernel();");
720 p
= isl_printer_end_line(p
);
722 p
= isl_printer_indent(p
, -2);
723 p
= isl_printer_start_line(p
);
724 p
= isl_printer_print_str(p
, "}");
725 p
= isl_printer_end_line(p
);
727 p
= isl_printer_start_line(p
);
728 p
= isl_printer_end_line(p
);
730 print_kernel(data
->prog
, kernel
, data
->cuda
);
735 static __isl_give isl_printer
*print_host_code(__isl_take isl_printer
*p
,
736 struct gpu_prog
*prog
, __isl_keep isl_ast_node
*tree
,
737 struct cuda_info
*cuda
)
739 isl_ast_print_options
*print_options
;
740 isl_ctx
*ctx
= isl_ast_node_get_ctx(tree
);
741 struct print_host_user_data data
= { cuda
, prog
};
743 print_options
= isl_ast_print_options_alloc(ctx
);
744 print_options
= isl_ast_print_options_set_print_user(print_options
,
745 &print_host_user
, &data
);
747 p
= print_macros(tree
, p
);
748 p
= isl_ast_node_print(tree
, p
, print_options
);
750 isl_ast_print_options_free(print_options
);
755 static __isl_give isl_printer
*copy_arrays_from_device(
756 __isl_take isl_printer
*p
, struct gpu_prog
*prog
)
759 isl_union_set
*write
;
760 write
= isl_union_map_range(isl_union_map_copy(prog
->write
));
762 for (i
= 0; i
< prog
->n_array
; ++i
) {
767 dim
= isl_space_copy(prog
->array
[i
].dim
);
768 write_i
= isl_union_set_extract_set(write
, dim
);
769 empty
= isl_set_fast_is_empty(write_i
);
770 isl_set_free(write_i
);
774 p
= isl_printer_print_str(p
, "cudaCheckReturn(cudaMemcpy(");
775 if (gpu_array_is_scalar(&prog
->array
[i
]))
776 p
= isl_printer_print_str(p
, "&");
777 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
778 p
= isl_printer_print_str(p
, ", dev_");
779 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
780 p
= isl_printer_print_str(p
, ", ");
781 p
= print_array_size(p
, &prog
->array
[i
]);
782 p
= isl_printer_print_str(p
, ", cudaMemcpyDeviceToHost));");
783 p
= isl_printer_end_line(p
);
786 isl_union_set_free(write
);
787 p
= isl_printer_start_line(p
);
788 p
= isl_printer_end_line(p
);
792 static __isl_give isl_printer
*free_device_arrays(__isl_take isl_printer
*p
,
793 struct gpu_prog
*prog
)
797 for (i
= 0; i
< prog
->n_array
; ++i
) {
798 if (gpu_array_is_read_only_scalar(&prog
->array
[i
]))
800 p
= isl_printer_print_str(p
, "cudaCheckReturn(cudaFree(dev_");
801 p
= isl_printer_print_str(p
, prog
->array
[i
].name
);
802 p
= isl_printer_print_str(p
, "));");
803 p
= isl_printer_end_line(p
);
809 int generate_cuda(isl_ctx
*ctx
, struct ppcg_scop
*scop
,
810 struct ppcg_options
*options
, const char *input
)
812 struct cuda_info cuda
;
813 struct gpu_prog
*prog
;
820 scop
->context
= add_context_from_str(scop
->context
, options
->ctx
);
822 prog
= gpu_prog_alloc(ctx
, scop
);
824 tree
= generate_gpu(ctx
, prog
, options
);
826 cuda_open_files(&cuda
, input
);
828 p
= isl_printer_to_file(ctx
, cuda
.host_c
);
829 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
830 p
= ppcg_start_block(p
);
832 p
= print_cuda_macros(p
);
834 p
= declare_device_arrays(p
, prog
);
835 p
= allocate_device_arrays(p
, prog
);
836 p
= copy_arrays_to_device(p
, prog
);
838 p
= print_host_code(p
, prog
, tree
, &cuda
);
839 isl_ast_node_free(tree
);
841 p
= copy_arrays_from_device(p
, prog
);
842 p
= free_device_arrays(p
, prog
);
844 p
= ppcg_end_block(p
);
847 cuda_close_files(&cuda
);