cuda.c

   1 /*
   2  * Copyright 2012      Ecole Normale Superieure
   3  *
   4  * Use of this software is governed by the MIT license
   5  *
   6  * Written by Sven Verdoolaege,
   7  * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
   8  */
   9
  10 #include <isl/aff.h>
  11 #include <isl/ast.h>
  12
  13 #include "cuda_common.h"
  14 #include "cuda.h"
  15 #include "gpu.h"
  16 #include "gpu_print.h"
  17 #include "print.h"
  18 #include "util.h"
  19
  20 static __isl_give isl_printer *print_cuda_macros(__isl_take isl_printer *p)
  21 {
  22         const char *macros =
  23                 "#define cudaCheckReturn(ret) \\\n"
  24                 "  do { \\\n"
  25                 "    cudaError_t cudaCheckReturn_e = (ret); \\\n"
  26                 "    if (cudaCheckReturn_e != cudaSuccess) { \\\n"
  27                 "      fprintf(stderr, \"CUDA error: %s\\n\", "
  28                 "cudaGetErrorString(cudaCheckReturn_e)); \\\n"
  29                 "      fflush(stderr); \\\n"
  30                 "    } \\\n"
  31                 "    assert(cudaCheckReturn_e == cudaSuccess); \\\n"
  32                 "  } while(0)\n"
  33                 "#define cudaCheckKernel() \\\n"
  34                 "  do { \\\n"
  35                 "    cudaCheckReturn(cudaGetLastError()); \\\n"
  36                 "  } while(0)\n\n";
  37
  38         p = isl_printer_print_str(p, macros);
  39         return p;
  40 }
  41
  42 /* Print a declaration for the device array corresponding to "array" on "p".
  43  */
  44 static __isl_give isl_printer *declare_device_array(__isl_take isl_printer *p,
  45         struct gpu_array_info *array)
  46 {
  47         int i;
  48
  49         p = isl_printer_start_line(p);
  50         p = isl_printer_print_str(p, array->type);
  51         p = isl_printer_print_str(p, " ");
  52         if (!array->linearize && array->n_index > 1)
  53                 p = isl_printer_print_str(p, "(");
  54         p = isl_printer_print_str(p, "*dev_");
  55         p = isl_printer_print_str(p, array->name);
  56         if (!array->linearize && array->n_index > 1) {
  57                 p = isl_printer_print_str(p, ")");
  58                 for (i = 1; i < array->n_index; i++) {
  59                         p = isl_printer_print_str(p, "[");
  60                         p = isl_printer_print_pw_aff(p, array->bound[i]);
  61                         p = isl_printer_print_str(p, "]");
  62                 }
  63         }
  64         p = isl_printer_print_str(p, ";");
  65         p = isl_printer_end_line(p);
  66
  67         return p;
  68 }
  69
  70 static __isl_give isl_printer *declare_device_arrays(__isl_take isl_printer *p,
  71         struct gpu_prog *prog)
  72 {
  73         int i;
  74
  75         for (i = 0; i < prog->n_array; ++i) {
  76                 if (!gpu_array_requires_device_allocation(&prog->array[i]))
  77                         continue;
  78
  79                 p = declare_device_array(p, &prog->array[i]);
  80         }
  81         p = isl_printer_start_line(p);
  82         p = isl_printer_end_line(p);
  83         return p;
  84 }
  85
  86 static __isl_give isl_printer *allocate_device_arrays(
  87         __isl_take isl_printer *p, struct gpu_prog *prog)
  88 {
  89         int i;
  90
  91         for (i = 0; i < prog->n_array; ++i) {
  92                 if (!gpu_array_requires_device_allocation(&prog->array[i]))
  93                         continue;
  94                 p = isl_printer_start_line(p);
  95                 p = isl_printer_print_str(p,
  96                         "cudaCheckReturn(cudaMalloc((void **) &dev_");
  97                 p = isl_printer_print_str(p, prog->array[i].name);
  98                 p = isl_printer_print_str(p, ", ");
  99                 p = gpu_array_info_print_size(p, &prog->array[i]);
 100                 p = isl_printer_print_str(p, "));");
 101                 p = isl_printer_end_line(p);
 102         }
 103         p = isl_printer_start_line(p);
 104         p = isl_printer_end_line(p);
 105         return p;
 106 }
 107
 108 /* Print code to "p" for copying "array" from the host to the device
 109  * in its entirety.  The bounds on the extent of "array" have
 110  * been precomputed in extract_array_info and are used in
 111  * gpu_array_info_print_size.
 112  */
 113 static __isl_give isl_printer *copy_array_to_device(__isl_take isl_printer *p,
 114         struct gpu_array_info *array)
 115 {
 116         p = isl_printer_start_line(p);
 117         p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(dev_");
 118         p = isl_printer_print_str(p, array->name);
 119         p = isl_printer_print_str(p, ", ");
 120
 121         if (gpu_array_is_scalar(array))
 122                 p = isl_printer_print_str(p, "&");
 123         p = isl_printer_print_str(p, array->name);
 124         p = isl_printer_print_str(p, ", ");
 125
 126         p = gpu_array_info_print_size(p, array);
 127         p = isl_printer_print_str(p, ", cudaMemcpyHostToDevice));");
 128         p = isl_printer_end_line(p);
 129
 130         return p;
 131 }
 132
 133 /* Print code to "p" for copying "array" back from the device to the host
 134  * in its entirety.  The bounds on the extent of "array" have
 135  * been precomputed in extract_array_info and are used in
 136  * gpu_array_info_print_size.
 137  */
 138 static __isl_give isl_printer *copy_array_from_device(
 139         __isl_take isl_printer *p, struct gpu_array_info *array)
 140 {
 141         p = isl_printer_start_line(p);
 142         p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(");
 143         if (gpu_array_is_scalar(array))
 144                 p = isl_printer_print_str(p, "&");
 145         p = isl_printer_print_str(p, array->name);
 146         p = isl_printer_print_str(p, ", dev_");
 147         p = isl_printer_print_str(p, array->name);
 148         p = isl_printer_print_str(p, ", ");
 149         p = gpu_array_info_print_size(p, array);
 150         p = isl_printer_print_str(p, ", cudaMemcpyDeviceToHost));");
 151         p = isl_printer_end_line(p);
 152
 153         return p;
 154 }
 155
 156 static void print_reverse_list(FILE *out, int len, int *list)
 157 {
 158         int i;
 159
 160         if (len == 0)
 161                 return;
 162
 163         fprintf(out, "(");
 164         for (i = 0; i < len; ++i) {
 165                 if (i)
 166                         fprintf(out, ", ");
 167                 fprintf(out, "%d", list[len - 1 - i]);
 168         }
 169         fprintf(out, ")");
 170 }
 171
 172 /* Print the effective grid size as a list of the sizes in each
 173  * dimension, from innermost to outermost.
 174  */
 175 static __isl_give isl_printer *print_grid_size(__isl_take isl_printer *p,
 176         struct ppcg_kernel *kernel)
 177 {
 178         int i;
 179         int dim;
 180
 181         dim = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set);
 182         if (dim == 0)
 183                 return p;
 184
 185         p = isl_printer_print_str(p, "(");
 186         for (i = dim - 1; i >= 0; --i) {
 187                 isl_pw_aff *bound;
 188
 189                 bound = isl_multi_pw_aff_get_pw_aff(kernel->grid_size, i);
 190                 p = isl_printer_print_pw_aff(p, bound);
 191                 isl_pw_aff_free(bound);
 192
 193                 if (i > 0)
 194                         p = isl_printer_print_str(p, ", ");
 195         }
 196
 197         p = isl_printer_print_str(p, ")");
 198
 199         return p;
 200 }
 201
 202 /* Print the grid definition.
 203  */
 204 static __isl_give isl_printer *print_grid(__isl_take isl_printer *p,
 205         struct ppcg_kernel *kernel)
 206 {
 207         p = isl_printer_start_line(p);
 208         p = isl_printer_print_str(p, "dim3 k");
 209         p = isl_printer_print_int(p, kernel->id);
 210         p = isl_printer_print_str(p, "_dimGrid");
 211         p = print_grid_size(p, kernel);
 212         p = isl_printer_print_str(p, ";");
 213         p = isl_printer_end_line(p);
 214
 215         return p;
 216 }
 217
 218 /* Print the arguments to a kernel declaration or call.  If "types" is set,
 219  * then print a declaration (including the types of the arguments).
 220  *
 221  * The arguments are printed in the following order
 222  * - the arrays accessed by the kernel
 223  * - the parameters
 224  * - the host loop iterators
 225  */
 226 static __isl_give isl_printer *print_kernel_arguments(__isl_take isl_printer *p,
 227         struct gpu_prog *prog, struct ppcg_kernel *kernel, int types)
 228 {
 229         int i, n;
 230         int first = 1;
 231         unsigned nparam;
 232         isl_space *space;
 233         const char *type;
 234
 235         for (i = 0; i < prog->n_array; ++i) {
 236                 int required;
 237
 238                 required = ppcg_kernel_requires_array_argument(kernel, i);
 239                 if (required < 0)
 240                         return isl_printer_free(p);
 241                 if (!required)
 242                         continue;
 243
 244                 if (!first)
 245                         p = isl_printer_print_str(p, ", ");
 246
 247                 if (types)
 248                         p = gpu_array_info_print_declaration_argument(p,
 249                                 &prog->array[i], NULL);
 250                 else
 251                         p = gpu_array_info_print_call_argument(p,
 252                                 &prog->array[i]);
 253
 254                 first = 0;
 255         }
 256
 257         space = isl_union_set_get_space(kernel->arrays);
 258         nparam = isl_space_dim(space, isl_dim_param);
 259         for (i = 0; i < nparam; ++i) {
 260                 const char *name;
 261
 262                 name = isl_space_get_dim_name(space, isl_dim_param, i);
 263
 264                 if (!first)
 265                         p = isl_printer_print_str(p, ", ");
 266                 if (types)
 267                         p = isl_printer_print_str(p, "int ");
 268                 p = isl_printer_print_str(p, name);
 269
 270                 first = 0;
 271         }
 272         isl_space_free(space);
 273
 274         n = isl_space_dim(kernel->space, isl_dim_set);
 275         type = isl_options_get_ast_iterator_type(prog->ctx);
 276         for (i = 0; i < n; ++i) {
 277                 const char *name;
 278
 279                 if (!first)
 280                         p = isl_printer_print_str(p, ", ");
 281                 name = isl_space_get_dim_name(kernel->space, isl_dim_set, i);
 282                 if (types) {
 283                         p = isl_printer_print_str(p, type);
 284                         p = isl_printer_print_str(p, " ");
 285                 }
 286                 p = isl_printer_print_str(p, name);
 287
 288                 first = 0;
 289         }
 290
 291         return p;
 292 }
 293
 294 /* Print the header of the given kernel.
 295  */
 296 static __isl_give isl_printer *print_kernel_header(__isl_take isl_printer *p,
 297         struct gpu_prog *prog, struct ppcg_kernel *kernel)
 298 {
 299         p = isl_printer_start_line(p);
 300         p = isl_printer_print_str(p, "__global__ void kernel");
 301         p = isl_printer_print_int(p, kernel->id);
 302         p = isl_printer_print_str(p, "(");
 303         p = print_kernel_arguments(p, prog, kernel, 1);
 304         p = isl_printer_print_str(p, ")");
 305
 306         return p;
 307 }
 308
 309 /* Print the header of the given kernel to both gen->cuda.kernel_h
 310  * and gen->cuda.kernel_c.
 311  */
 312 static void print_kernel_headers(struct gpu_prog *prog,
 313         struct ppcg_kernel *kernel, struct cuda_info *cuda)
 314 {
 315         isl_printer *p;
 316
 317         p = isl_printer_to_file(prog->ctx, cuda->kernel_h);
 318         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 319         p = print_kernel_header(p, prog, kernel);
 320         p = isl_printer_print_str(p, ";");
 321         p = isl_printer_end_line(p);
 322         isl_printer_free(p);
 323
 324         p = isl_printer_to_file(prog->ctx, cuda->kernel_c);
 325         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 326         p = print_kernel_header(p, prog, kernel);
 327         p = isl_printer_end_line(p);
 328         isl_printer_free(p);
 329 }
 330
 331 static void print_indent(FILE *dst, int indent)
 332 {
 333         fprintf(dst, "%*s", indent, "");
 334 }
 335
 336 /* Print a list of iterators of type "type" with names "ids" to "out".
 337  * Each iterator is assigned one of the cuda identifiers in cuda_dims.
 338  * In particular, the last iterator is assigned the x identifier
 339  * (the first in the list of cuda identifiers).
 340  */
 341 static void print_iterators(FILE *out, const char *type,
 342         __isl_keep isl_id_list *ids, const char *cuda_dims[])
 343 {
 344         int i, n;
 345
 346         n = isl_id_list_n_id(ids);
 347         if (n <= 0)
 348                 return;
 349         print_indent(out, 4);
 350         fprintf(out, "%s ", type);
 351         for (i = 0; i < n; ++i) {
 352                 isl_id *id;
 353
 354                 if (i)
 355                         fprintf(out, ", ");
 356                 id = isl_id_list_get_id(ids, i);
 357                 fprintf(out, "%s = %s", isl_id_get_name(id),
 358                         cuda_dims[n - 1 - i]);
 359                 isl_id_free(id);
 360         }
 361         fprintf(out, ";\n");
 362 }
 363
 364 static void print_kernel_iterators(FILE *out, struct ppcg_kernel *kernel)
 365 {
 366         isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
 367         const char *type;
 368         const char *block_dims[] = { "blockIdx.x", "blockIdx.y" };
 369         const char *thread_dims[] = { "threadIdx.x", "threadIdx.y",
 370                                         "threadIdx.z" };
 371
 372         type = isl_options_get_ast_iterator_type(ctx);
 373
 374         print_iterators(out, type, kernel->block_ids, block_dims);
 375         print_iterators(out, type, kernel->thread_ids, thread_dims);
 376 }
 377
 378 static __isl_give isl_printer *print_kernel_var(__isl_take isl_printer *p,
 379         struct ppcg_kernel_var *var)
 380 {
 381         int j;
 382
 383         p = isl_printer_start_line(p);
 384         if (var->type == ppcg_access_shared)
 385                 p = isl_printer_print_str(p, "__shared__ ");
 386         p = isl_printer_print_str(p, var->array->type);
 387         p = isl_printer_print_str(p, " ");
 388         p = isl_printer_print_str(p,  var->name);
 389         for (j = 0; j < var->array->n_index; ++j) {
 390                 isl_val *v;
 391
 392                 p = isl_printer_print_str(p, "[");
 393                 v = isl_vec_get_element_val(var->size, j);
 394                 p = isl_printer_print_val(p, v);
 395                 isl_val_free(v);
 396                 p = isl_printer_print_str(p, "]");
 397         }
 398         p = isl_printer_print_str(p, ";");
 399         p = isl_printer_end_line(p);
 400
 401         return p;
 402 }
 403
 404 static __isl_give isl_printer *print_kernel_vars(__isl_take isl_printer *p,
 405         struct ppcg_kernel *kernel)
 406 {
 407         int i;
 408
 409         for (i = 0; i < kernel->n_var; ++i)
 410                 p = print_kernel_var(p, &kernel->var[i]);
 411
 412         return p;
 413 }
 414
 415 /* Print a sync statement.
 416  */
 417 static __isl_give isl_printer *print_sync(__isl_take isl_printer *p,
 418         struct ppcg_kernel_stmt *stmt)
 419 {
 420         p = isl_printer_start_line(p);
 421         p = isl_printer_print_str(p, "__syncthreads();");
 422         p = isl_printer_end_line(p);
 423
 424         return p;
 425 }
 426
 427 /* This function is called for each user statement in the AST,
 428  * i.e., for each kernel body statement, copy statement or sync statement.
 429  */
 430 static __isl_give isl_printer *print_kernel_stmt(__isl_take isl_printer *p,
 431         __isl_take isl_ast_print_options *print_options,
 432         __isl_keep isl_ast_node *node, void *user)
 433 {
 434         isl_id *id;
 435         struct ppcg_kernel_stmt *stmt;
 436
 437         id = isl_ast_node_get_annotation(node);
 438         stmt = isl_id_get_user(id);
 439         isl_id_free(id);
 440
 441         isl_ast_print_options_free(print_options);
 442
 443         switch (stmt->type) {
 444         case ppcg_kernel_copy:
 445                 return ppcg_kernel_print_copy(p, stmt);
 446         case ppcg_kernel_sync:
 447                 return print_sync(p, stmt);
 448         case ppcg_kernel_domain:
 449                 return ppcg_kernel_print_domain(p, stmt);
 450         }
 451
 452         return p;
 453 }
 454
 455 static void print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel,
 456         struct cuda_info *cuda)
 457 {
 458         isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
 459         isl_ast_print_options *print_options;
 460         isl_printer *p;
 461
 462         print_kernel_headers(prog, kernel, cuda);
 463         fprintf(cuda->kernel_c, "{\n");
 464         print_kernel_iterators(cuda->kernel_c, kernel);
 465
 466         p = isl_printer_to_file(ctx, cuda->kernel_c);
 467         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 468         p = isl_printer_indent(p, 4);
 469
 470         p = print_kernel_vars(p, kernel);
 471         p = isl_printer_end_line(p);
 472         p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p);
 473         p = ppcg_print_macros(p, kernel->tree);
 474
 475         print_options = isl_ast_print_options_alloc(ctx);
 476         print_options = isl_ast_print_options_set_print_user(print_options,
 477                                                     &print_kernel_stmt, NULL);
 478         p = isl_ast_node_print(kernel->tree, p, print_options);
 479         isl_printer_free(p);
 480
 481         fprintf(cuda->kernel_c, "}\n");
 482 }
 483
 484 /* Print a statement for copying an array to or from the device.
 485  * The statement identifier is called "to_device_<array name>" or
 486  * "from_device_<array name>" and its user pointer points
 487  * to the gpu_array_info of the array that needs to be copied.
 488  *
 489  * Extract the array from the identifier and call
 490  * copy_array_to_device or copy_array_from_device.
 491  */
 492 static __isl_give isl_printer *print_to_from_device(__isl_take isl_printer *p,
 493         __isl_keep isl_ast_node *node, struct gpu_prog *prog)
 494 {
 495         isl_ast_expr *expr, *arg;
 496         isl_id *id;
 497         const char *name;
 498         struct gpu_array_info *array;
 499
 500         expr = isl_ast_node_user_get_expr(node);
 501         arg = isl_ast_expr_get_op_arg(expr, 0);
 502         id = isl_ast_expr_get_id(arg);
 503         name = isl_id_get_name(id);
 504         array = isl_id_get_user(id);
 505         isl_id_free(id);
 506         isl_ast_expr_free(arg);
 507         isl_ast_expr_free(expr);
 508
 509         if (!name)
 510                 array = NULL;
 511         if (!array)
 512                 return isl_printer_free(p);
 513
 514         if (!prefixcmp(name, "to_device"))
 515                 return copy_array_to_device(p, array);
 516         else
 517                 return copy_array_from_device(p, array);
 518 }
 519
 520 struct print_host_user_data {
 521         struct cuda_info *cuda;
 522         struct gpu_prog *prog;
 523 };
 524
 525 /* Print the user statement of the host code to "p".
 526  *
 527  * The host code may contain original user statements, kernel launches and
 528  * statements that copy data to/from the device.
 529  * The original user statements and the kernel launches have
 530  * an associated annotation, while the data copy statements do not.
 531  * The latter are handled by print_to_from_device.
 532  * The annotation on the user statements is called "user".
 533  *
 534  * In case of a kernel launch, print a block of statements that
 535  * defines the grid and the block and then launches the kernel.
 536  */
 537 static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
 538         __isl_take isl_ast_print_options *print_options,
 539         __isl_keep isl_ast_node *node, void *user)
 540 {
 541         isl_id *id;
 542         int is_user;
 543         struct ppcg_kernel *kernel;
 544         struct ppcg_kernel_stmt *stmt;
 545         struct print_host_user_data *data;
 546
 547         isl_ast_print_options_free(print_options);
 548
 549         data = (struct print_host_user_data *) user;
 550
 551         id = isl_ast_node_get_annotation(node);
 552         if (!id)
 553                 return print_to_from_device(p, node, data->prog);
 554
 555         is_user = !strcmp(isl_id_get_name(id), "user");
 556         kernel = is_user ? NULL : isl_id_get_user(id);
 557         stmt = is_user ? isl_id_get_user(id) : NULL;
 558         isl_id_free(id);
 559
 560         if (is_user)
 561                 return ppcg_kernel_print_domain(p, stmt);
 562
 563         p = isl_printer_start_line(p);
 564         p = isl_printer_print_str(p, "{");
 565         p = isl_printer_end_line(p);
 566         p = isl_printer_indent(p, 2);
 567
 568         p = isl_printer_start_line(p);
 569         p = isl_printer_print_str(p, "dim3 k");
 570         p = isl_printer_print_int(p, kernel->id);
 571         p = isl_printer_print_str(p, "_dimBlock");
 572         print_reverse_list(isl_printer_get_file(p),
 573                                 kernel->n_block, kernel->block_dim);
 574         p = isl_printer_print_str(p, ";");
 575         p = isl_printer_end_line(p);
 576
 577         p = print_grid(p, kernel);
 578
 579         p = isl_printer_start_line(p);
 580         p = isl_printer_print_str(p, "kernel");
 581         p = isl_printer_print_int(p, kernel->id);
 582         p = isl_printer_print_str(p, " <<<k");
 583         p = isl_printer_print_int(p, kernel->id);
 584         p = isl_printer_print_str(p, "_dimGrid, k");
 585         p = isl_printer_print_int(p, kernel->id);
 586         p = isl_printer_print_str(p, "_dimBlock>>> (");
 587         p = print_kernel_arguments(p, data->prog, kernel, 0);
 588         p = isl_printer_print_str(p, ");");
 589         p = isl_printer_end_line(p);
 590
 591         p = isl_printer_start_line(p);
 592         p = isl_printer_print_str(p, "cudaCheckKernel();");
 593         p = isl_printer_end_line(p);
 594
 595         p = isl_printer_indent(p, -2);
 596         p = isl_printer_start_line(p);
 597         p = isl_printer_print_str(p, "}");
 598         p = isl_printer_end_line(p);
 599
 600         p = isl_printer_start_line(p);
 601         p = isl_printer_end_line(p);
 602
 603         print_kernel(data->prog, kernel, data->cuda);
 604
 605         return p;
 606 }
 607
 608 static __isl_give isl_printer *print_host_code(__isl_take isl_printer *p,
 609         struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
 610         struct cuda_info *cuda)
 611 {
 612         isl_ast_print_options *print_options;
 613         isl_ctx *ctx = isl_ast_node_get_ctx(tree);
 614         struct print_host_user_data data = { cuda, prog };
 615
 616         print_options = isl_ast_print_options_alloc(ctx);
 617         print_options = isl_ast_print_options_set_print_user(print_options,
 618                                                 &print_host_user, &data);
 619
 620         p = ppcg_print_macros(p, tree);
 621         p = isl_ast_node_print(tree, p, print_options);
 622
 623         return p;
 624 }
 625
 626 static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p,
 627         struct gpu_prog *prog)
 628 {
 629         int i;
 630
 631         for (i = 0; i < prog->n_array; ++i) {
 632                 if (!gpu_array_requires_device_allocation(&prog->array[i]))
 633                         continue;
 634                 p = isl_printer_start_line(p);
 635                 p = isl_printer_print_str(p, "cudaCheckReturn(cudaFree(dev_");
 636                 p = isl_printer_print_str(p, prog->array[i].name);
 637                 p = isl_printer_print_str(p, "));");
 638                 p = isl_printer_end_line(p);
 639         }
 640
 641         return p;
 642 }
 643
 644 /* Given a gpu_prog "prog" and the corresponding transformed AST
 645  * "tree", print the entire CUDA code to "p".
 646  * "types" collects the types for which a definition has already
 647  * been printed.
 648  */
 649 static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p,
 650         struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
 651         struct gpu_types *types, void *user)
 652 {
 653         struct cuda_info *cuda = user;
 654         isl_printer *kernel;
 655
 656         kernel = isl_printer_to_file(isl_printer_get_ctx(p), cuda->kernel_c);
 657         kernel = isl_printer_set_output_format(kernel, ISL_FORMAT_C);
 658         kernel = gpu_print_types(kernel, types, prog);
 659         isl_printer_free(kernel);
 660
 661         if (!kernel)
 662                 return isl_printer_free(p);
 663
 664         p = ppcg_start_block(p);
 665
 666         p = print_cuda_macros(p);
 667
 668         p = gpu_print_local_declarations(p, prog);
 669         p = declare_device_arrays(p, prog);
 670         p = allocate_device_arrays(p, prog);
 671
 672         p = print_host_code(p, prog, tree, cuda);
 673
 674         p = free_device_arrays(p, prog);
 675
 676         p = ppcg_end_block(p);
 677
 678         return p;
 679 }
 680
 681 /* Transform the code in the file called "input" by replacing
 682  * all scops by corresponding CUDA code.
 683  * The names of the output files are derived from "input".
 684  *
 685  * We let generate_gpu do all the hard work and then let it call
 686  * us back for printing the AST in print_cuda.
 687  *
 688  * To prepare for this printing, we first open the output files
 689  * and we close them after generate_gpu has finished.
 690  */
 691 int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
 692         const char *input)
 693 {
 694         struct cuda_info cuda;
 695         int r;
 696
 697         cuda_open_files(&cuda, input);
 698
 699         r = generate_gpu(ctx, input, cuda.host_c, options, &print_cuda, &cuda);
 700
 701         cuda_close_files(&cuda);
 702
 703         return r;
 704 }