cuda.c

   1 /*
   2  * Copyright 2012      Ecole Normale Superieure
   3  *
   4  * Use of this software is governed by the MIT license
   5  *
   6  * Written by Sven Verdoolaege,
   7  * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
   8  */
   9
  10 #include <isl/aff.h>
  11 #include <isl/ast.h>
  12
  13 #include "cuda_common.h"
  14 #include "cuda.h"
  15 #include "gpu.h"
  16 #include "gpu_print.h"
  17 #include "print.h"
  18 #include "util.h"
  19
  20 static __isl_give isl_printer *print_cuda_macros(__isl_take isl_printer *p)
  21 {
  22         const char *macros =
  23                 "#define cudaCheckReturn(ret) \\\n"
  24                 "  do { \\\n"
  25                 "    cudaError_t cudaCheckReturn_e = (ret); \\\n"
  26                 "    if (cudaCheckReturn_e != cudaSuccess) { \\\n"
  27                 "      fprintf(stderr, \"CUDA error: %s\\n\", "
  28                 "cudaGetErrorString(cudaCheckReturn_e)); \\\n"
  29                 "      fflush(stderr); \\\n"
  30                 "    } \\\n"
  31                 "    assert(cudaCheckReturn_e == cudaSuccess); \\\n"
  32                 "  } while(0)\n"
  33                 "#define cudaCheckKernel() \\\n"
  34                 "  do { \\\n"
  35                 "    cudaCheckReturn(cudaGetLastError()); \\\n"
  36                 "  } while(0)\n\n";
  37
  38         p = isl_printer_print_str(p, macros);
  39         return p;
  40 }
  41
  42 /* Print a declaration for the device array corresponding to "array" on "p".
  43  */
  44 static __isl_give isl_printer *declare_device_array(__isl_take isl_printer *p,
  45         struct gpu_array_info *array)
  46 {
  47         int i;
  48
  49         p = isl_printer_start_line(p);
  50         p = isl_printer_print_str(p, array->type);
  51         p = isl_printer_print_str(p, " ");
  52         if (!array->linearize && array->n_index > 1)
  53                 p = isl_printer_print_str(p, "(");
  54         p = isl_printer_print_str(p, "*dev_");
  55         p = isl_printer_print_str(p, array->name);
  56         if (!array->linearize && array->n_index > 1) {
  57                 p = isl_printer_print_str(p, ")");
  58                 for (i = 1; i < array->n_index; i++) {
  59                         p = isl_printer_print_str(p, "[");
  60                         p = isl_printer_print_pw_aff(p, array->bound[i]);
  61                         p = isl_printer_print_str(p, "]");
  62                 }
  63         }
  64         p = isl_printer_print_str(p, ";");
  65         p = isl_printer_end_line(p);
  66
  67         return p;
  68 }
  69
  70 static __isl_give isl_printer *declare_device_arrays(__isl_take isl_printer *p,
  71         struct gpu_prog *prog)
  72 {
  73         int i;
  74
  75         for (i = 0; i < prog->n_array; ++i) {
  76                 if (gpu_array_is_read_only_scalar(&prog->array[i]))
  77                         continue;
  78                 if (!prog->array[i].accessed)
  79                         continue;
  80
  81                 p = declare_device_array(p, &prog->array[i]);
  82         }
  83         p = isl_printer_start_line(p);
  84         p = isl_printer_end_line(p);
  85         return p;
  86 }
  87
  88 static __isl_give isl_printer *allocate_device_arrays(
  89         __isl_take isl_printer *p, struct gpu_prog *prog)
  90 {
  91         int i;
  92
  93         for (i = 0; i < prog->n_array; ++i) {
  94                 if (gpu_array_is_read_only_scalar(&prog->array[i]))
  95                         continue;
  96                 if (!prog->array[i].accessed)
  97                         continue;
  98                 p = isl_printer_start_line(p);
  99                 p = isl_printer_print_str(p,
 100                         "cudaCheckReturn(cudaMalloc((void **) &dev_");
 101                 p = isl_printer_print_str(p, prog->array[i].name);
 102                 p = isl_printer_print_str(p, ", ");
 103                 p = gpu_array_info_print_size(p, &prog->array[i]);
 104                 p = isl_printer_print_str(p, "));");
 105                 p = isl_printer_end_line(p);
 106         }
 107         p = isl_printer_start_line(p);
 108         p = isl_printer_end_line(p);
 109         return p;
 110 }
 111
 112 /* Print code to "p" for copying "array" from the host to the device
 113  * in its entirety.  The bounds on the extent of "array" have
 114  * been precomputed in extract_array_info and are used in
 115  * gpu_array_info_print_size.
 116  */
 117 static __isl_give isl_printer *copy_array_to_device(__isl_take isl_printer *p,
 118         struct gpu_array_info *array)
 119 {
 120         p = isl_printer_start_line(p);
 121         p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(dev_");
 122         p = isl_printer_print_str(p, array->name);
 123         p = isl_printer_print_str(p, ", ");
 124
 125         if (gpu_array_is_scalar(array))
 126                 p = isl_printer_print_str(p, "&");
 127         p = isl_printer_print_str(p, array->name);
 128         p = isl_printer_print_str(p, ", ");
 129
 130         p = gpu_array_info_print_size(p, array);
 131         p = isl_printer_print_str(p, ", cudaMemcpyHostToDevice));");
 132         p = isl_printer_end_line(p);
 133
 134         return p;
 135 }
 136
 137 /* Print code to "p" for copying "array" back from the device to the host
 138  * in its entirety.  The bounds on the extent of "array" have
 139  * been precomputed in extract_array_info and are used in
 140  * gpu_array_info_print_size.
 141  */
 142 static __isl_give isl_printer *copy_array_from_device(
 143         __isl_take isl_printer *p, struct gpu_array_info *array)
 144 {
 145         p = isl_printer_start_line(p);
 146         p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(");
 147         if (gpu_array_is_scalar(array))
 148                 p = isl_printer_print_str(p, "&");
 149         p = isl_printer_print_str(p, array->name);
 150         p = isl_printer_print_str(p, ", dev_");
 151         p = isl_printer_print_str(p, array->name);
 152         p = isl_printer_print_str(p, ", ");
 153         p = gpu_array_info_print_size(p, array);
 154         p = isl_printer_print_str(p, ", cudaMemcpyDeviceToHost));");
 155         p = isl_printer_end_line(p);
 156
 157         return p;
 158 }
 159
 160 static void print_reverse_list(FILE *out, int len, int *list)
 161 {
 162         int i;
 163
 164         if (len == 0)
 165                 return;
 166
 167         fprintf(out, "(");
 168         for (i = 0; i < len; ++i) {
 169                 if (i)
 170                         fprintf(out, ", ");
 171                 fprintf(out, "%d", list[len - 1 - i]);
 172         }
 173         fprintf(out, ")");
 174 }
 175
 176 /* Print the effective grid size as a list of the sizes in each
 177  * dimension, from innermost to outermost.
 178  */
 179 static __isl_give isl_printer *print_grid_size(__isl_take isl_printer *p,
 180         struct ppcg_kernel *kernel)
 181 {
 182         int i;
 183         int dim;
 184
 185         dim = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set);
 186         if (dim == 0)
 187                 return p;
 188
 189         p = isl_printer_print_str(p, "(");
 190         for (i = dim - 1; i >= 0; --i) {
 191                 isl_pw_aff *bound;
 192
 193                 bound = isl_multi_pw_aff_get_pw_aff(kernel->grid_size, i);
 194                 p = isl_printer_print_pw_aff(p, bound);
 195                 isl_pw_aff_free(bound);
 196
 197                 if (i > 0)
 198                         p = isl_printer_print_str(p, ", ");
 199         }
 200
 201         p = isl_printer_print_str(p, ")");
 202
 203         return p;
 204 }
 205
 206 /* Print the grid definition.
 207  */
 208 static __isl_give isl_printer *print_grid(__isl_take isl_printer *p,
 209         struct ppcg_kernel *kernel)
 210 {
 211         p = isl_printer_start_line(p);
 212         p = isl_printer_print_str(p, "dim3 k");
 213         p = isl_printer_print_int(p, kernel->id);
 214         p = isl_printer_print_str(p, "_dimGrid");
 215         p = print_grid_size(p, kernel);
 216         p = isl_printer_print_str(p, ";");
 217         p = isl_printer_end_line(p);
 218
 219         return p;
 220 }
 221
 222 /* Print the arguments to a kernel declaration or call.  If "types" is set,
 223  * then print a declaration (including the types of the arguments).
 224  *
 225  * The arguments are printed in the following order
 226  * - the arrays accessed by the kernel
 227  * - the parameters
 228  * - the host loop iterators
 229  */
 230 static __isl_give isl_printer *print_kernel_arguments(__isl_take isl_printer *p,
 231         struct gpu_prog *prog, struct ppcg_kernel *kernel, int types)
 232 {
 233         int i, n;
 234         int first = 1;
 235         unsigned nparam;
 236         isl_space *space;
 237         const char *type;
 238
 239         for (i = 0; i < prog->n_array; ++i) {
 240                 int required;
 241
 242                 required = ppcg_kernel_requires_array_argument(kernel, i);
 243                 if (required < 0)
 244                         return isl_printer_free(p);
 245                 if (!required)
 246                         continue;
 247
 248                 if (!first)
 249                         p = isl_printer_print_str(p, ", ");
 250
 251                 if (types)
 252                         p = gpu_array_info_print_declaration_argument(p,
 253                                 &prog->array[i], NULL);
 254                 else
 255                         p = gpu_array_info_print_call_argument(p,
 256                                 &prog->array[i]);
 257
 258                 first = 0;
 259         }
 260
 261         space = isl_union_set_get_space(kernel->arrays);
 262         nparam = isl_space_dim(space, isl_dim_param);
 263         for (i = 0; i < nparam; ++i) {
 264                 const char *name;
 265
 266                 name = isl_space_get_dim_name(space, isl_dim_param, i);
 267
 268                 if (!first)
 269                         p = isl_printer_print_str(p, ", ");
 270                 if (types)
 271                         p = isl_printer_print_str(p, "int ");
 272                 p = isl_printer_print_str(p, name);
 273
 274                 first = 0;
 275         }
 276         isl_space_free(space);
 277
 278         n = isl_space_dim(kernel->space, isl_dim_set);
 279         type = isl_options_get_ast_iterator_type(prog->ctx);
 280         for (i = 0; i < n; ++i) {
 281                 const char *name;
 282
 283                 if (!first)
 284                         p = isl_printer_print_str(p, ", ");
 285                 name = isl_space_get_dim_name(kernel->space, isl_dim_set, i);
 286                 if (types) {
 287                         p = isl_printer_print_str(p, type);
 288                         p = isl_printer_print_str(p, " ");
 289                 }
 290                 p = isl_printer_print_str(p, name);
 291
 292                 first = 0;
 293         }
 294
 295         return p;
 296 }
 297
 298 /* Print the header of the given kernel.
 299  */
 300 static __isl_give isl_printer *print_kernel_header(__isl_take isl_printer *p,
 301         struct gpu_prog *prog, struct ppcg_kernel *kernel)
 302 {
 303         p = isl_printer_start_line(p);
 304         p = isl_printer_print_str(p, "__global__ void kernel");
 305         p = isl_printer_print_int(p, kernel->id);
 306         p = isl_printer_print_str(p, "(");
 307         p = print_kernel_arguments(p, prog, kernel, 1);
 308         p = isl_printer_print_str(p, ")");
 309
 310         return p;
 311 }
 312
 313 /* Print the header of the given kernel to both gen->cuda.kernel_h
 314  * and gen->cuda.kernel_c.
 315  */
 316 static void print_kernel_headers(struct gpu_prog *prog,
 317         struct ppcg_kernel *kernel, struct cuda_info *cuda)
 318 {
 319         isl_printer *p;
 320
 321         p = isl_printer_to_file(prog->ctx, cuda->kernel_h);
 322         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 323         p = print_kernel_header(p, prog, kernel);
 324         p = isl_printer_print_str(p, ";");
 325         p = isl_printer_end_line(p);
 326         isl_printer_free(p);
 327
 328         p = isl_printer_to_file(prog->ctx, cuda->kernel_c);
 329         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 330         p = print_kernel_header(p, prog, kernel);
 331         p = isl_printer_end_line(p);
 332         isl_printer_free(p);
 333 }
 334
 335 static void print_indent(FILE *dst, int indent)
 336 {
 337         fprintf(dst, "%*s", indent, "");
 338 }
 339
 340 /* Print a list of iterators of type "type" with names "ids" to "out".
 341  * Each iterator is assigned one of the cuda identifiers in cuda_dims.
 342  * In particular, the last iterator is assigned the x identifier
 343  * (the first in the list of cuda identifiers).
 344  */
 345 static void print_iterators(FILE *out, const char *type,
 346         __isl_keep isl_id_list *ids, const char *cuda_dims[])
 347 {
 348         int i, n;
 349
 350         n = isl_id_list_n_id(ids);
 351         if (n <= 0)
 352                 return;
 353         print_indent(out, 4);
 354         fprintf(out, "%s ", type);
 355         for (i = 0; i < n; ++i) {
 356                 isl_id *id;
 357
 358                 if (i)
 359                         fprintf(out, ", ");
 360                 id = isl_id_list_get_id(ids, i);
 361                 fprintf(out, "%s = %s", isl_id_get_name(id),
 362                         cuda_dims[n - 1 - i]);
 363                 isl_id_free(id);
 364         }
 365         fprintf(out, ";\n");
 366 }
 367
 368 static void print_kernel_iterators(FILE *out, struct ppcg_kernel *kernel)
 369 {
 370         isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
 371         const char *type;
 372         const char *block_dims[] = { "blockIdx.x", "blockIdx.y" };
 373         const char *thread_dims[] = { "threadIdx.x", "threadIdx.y",
 374                                         "threadIdx.z" };
 375
 376         type = isl_options_get_ast_iterator_type(ctx);
 377
 378         print_iterators(out, type, kernel->block_ids, block_dims);
 379         print_iterators(out, type, kernel->thread_ids, thread_dims);
 380 }
 381
 382 static __isl_give isl_printer *print_kernel_var(__isl_take isl_printer *p,
 383         struct ppcg_kernel_var *var)
 384 {
 385         int j;
 386
 387         p = isl_printer_start_line(p);
 388         if (var->type == ppcg_access_shared)
 389                 p = isl_printer_print_str(p, "__shared__ ");
 390         p = isl_printer_print_str(p, var->array->type);
 391         p = isl_printer_print_str(p, " ");
 392         p = isl_printer_print_str(p,  var->name);
 393         for (j = 0; j < var->array->n_index; ++j) {
 394                 isl_val *v;
 395
 396                 p = isl_printer_print_str(p, "[");
 397                 v = isl_vec_get_element_val(var->size, j);
 398                 p = isl_printer_print_val(p, v);
 399                 isl_val_free(v);
 400                 p = isl_printer_print_str(p, "]");
 401         }
 402         p = isl_printer_print_str(p, ";");
 403         p = isl_printer_end_line(p);
 404
 405         return p;
 406 }
 407
 408 static __isl_give isl_printer *print_kernel_vars(__isl_take isl_printer *p,
 409         struct ppcg_kernel *kernel)
 410 {
 411         int i;
 412
 413         for (i = 0; i < kernel->n_var; ++i)
 414                 p = print_kernel_var(p, &kernel->var[i]);
 415
 416         return p;
 417 }
 418
 419 /* Print a sync statement.
 420  */
 421 static __isl_give isl_printer *print_sync(__isl_take isl_printer *p,
 422         struct ppcg_kernel_stmt *stmt)
 423 {
 424         p = isl_printer_start_line(p);
 425         p = isl_printer_print_str(p, "__syncthreads();");
 426         p = isl_printer_end_line(p);
 427
 428         return p;
 429 }
 430
 431 /* This function is called for each user statement in the AST,
 432  * i.e., for each kernel body statement, copy statement or sync statement.
 433  */
 434 static __isl_give isl_printer *print_kernel_stmt(__isl_take isl_printer *p,
 435         __isl_take isl_ast_print_options *print_options,
 436         __isl_keep isl_ast_node *node, void *user)
 437 {
 438         isl_id *id;
 439         struct ppcg_kernel_stmt *stmt;
 440
 441         id = isl_ast_node_get_annotation(node);
 442         stmt = isl_id_get_user(id);
 443         isl_id_free(id);
 444
 445         isl_ast_print_options_free(print_options);
 446
 447         switch (stmt->type) {
 448         case ppcg_kernel_copy:
 449                 return ppcg_kernel_print_copy(p, stmt);
 450         case ppcg_kernel_sync:
 451                 return print_sync(p, stmt);
 452         case ppcg_kernel_domain:
 453                 return ppcg_kernel_print_domain(p, stmt);
 454         }
 455
 456         return p;
 457 }
 458
 459 static void print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel,
 460         struct cuda_info *cuda)
 461 {
 462         isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
 463         isl_ast_print_options *print_options;
 464         isl_printer *p;
 465
 466         print_kernel_headers(prog, kernel, cuda);
 467         fprintf(cuda->kernel_c, "{\n");
 468         print_kernel_iterators(cuda->kernel_c, kernel);
 469
 470         p = isl_printer_to_file(ctx, cuda->kernel_c);
 471         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 472         p = isl_printer_indent(p, 4);
 473
 474         p = print_kernel_vars(p, kernel);
 475         p = isl_printer_end_line(p);
 476         p = gpu_print_macros(p, kernel->tree);
 477
 478         print_options = isl_ast_print_options_alloc(ctx);
 479         print_options = isl_ast_print_options_set_print_user(print_options,
 480                                                     &print_kernel_stmt, NULL);
 481         p = isl_ast_node_print(kernel->tree, p, print_options);
 482         isl_printer_free(p);
 483
 484         fprintf(cuda->kernel_c, "}\n");
 485 }
 486
 487 /* Print a statement for copying an array to or from the device.
 488  * The statement identifier is called "to_device_<array name>" or
 489  * "from_device_<array name>" and its user pointer points
 490  * to the gpu_array_info of the array that needs to be copied.
 491  *
 492  * Extract the array from the identifier and call
 493  * copy_array_to_device or copy_array_from_device.
 494  */
 495 static __isl_give isl_printer *print_to_from_device(__isl_take isl_printer *p,
 496         __isl_keep isl_ast_node *node, struct gpu_prog *prog)
 497 {
 498         isl_ast_expr *expr, *arg;
 499         isl_id *id;
 500         const char *name;
 501         struct gpu_array_info *array;
 502
 503         expr = isl_ast_node_user_get_expr(node);
 504         arg = isl_ast_expr_get_op_arg(expr, 0);
 505         id = isl_ast_expr_get_id(arg);
 506         name = isl_id_get_name(id);
 507         array = isl_id_get_user(id);
 508         isl_id_free(id);
 509         isl_ast_expr_free(arg);
 510         isl_ast_expr_free(expr);
 511
 512         if (!name)
 513                 array = NULL;
 514         if (!array)
 515                 return isl_printer_free(p);
 516
 517         if (!prefixcmp(name, "to_device"))
 518                 return copy_array_to_device(p, array);
 519         else
 520                 return copy_array_from_device(p, array);
 521 }
 522
 523 struct print_host_user_data {
 524         struct cuda_info *cuda;
 525         struct gpu_prog *prog;
 526 };
 527
 528 /* Print the user statement of the host code to "p".
 529  *
 530  * The host code may contain original user statements, kernel launches and
 531  * statements that copy data to/from the device.
 532  * The original user statements and the kernel launches have
 533  * an associated annotation, while the data copy statements do not.
 534  * The latter are handled by print_to_from_device.
 535  * The annotation on the user statements is called "user".
 536  *
 537  * In case of a kernel launch, print a block of statements that
 538  * defines the grid and the block and then launches the kernel.
 539  */
 540 static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
 541         __isl_take isl_ast_print_options *print_options,
 542         __isl_keep isl_ast_node *node, void *user)
 543 {
 544         isl_id *id;
 545         int is_user;
 546         struct ppcg_kernel *kernel;
 547         struct ppcg_kernel_stmt *stmt;
 548         struct print_host_user_data *data;
 549
 550         isl_ast_print_options_free(print_options);
 551
 552         data = (struct print_host_user_data *) user;
 553
 554         id = isl_ast_node_get_annotation(node);
 555         if (!id)
 556                 return print_to_from_device(p, node, data->prog);
 557
 558         is_user = !strcmp(isl_id_get_name(id), "user");
 559         kernel = is_user ? NULL : isl_id_get_user(id);
 560         stmt = is_user ? isl_id_get_user(id) : NULL;
 561         isl_id_free(id);
 562
 563         if (is_user)
 564                 return ppcg_kernel_print_domain(p, stmt);
 565
 566         p = isl_printer_start_line(p);
 567         p = isl_printer_print_str(p, "{");
 568         p = isl_printer_end_line(p);
 569         p = isl_printer_indent(p, 2);
 570
 571         p = isl_printer_start_line(p);
 572         p = isl_printer_print_str(p, "dim3 k");
 573         p = isl_printer_print_int(p, kernel->id);
 574         p = isl_printer_print_str(p, "_dimBlock");
 575         print_reverse_list(isl_printer_get_file(p),
 576                                 kernel->n_block, kernel->block_dim);
 577         p = isl_printer_print_str(p, ";");
 578         p = isl_printer_end_line(p);
 579
 580         p = print_grid(p, kernel);
 581
 582         p = isl_printer_start_line(p);
 583         p = isl_printer_print_str(p, "kernel");
 584         p = isl_printer_print_int(p, kernel->id);
 585         p = isl_printer_print_str(p, " <<<k");
 586         p = isl_printer_print_int(p, kernel->id);
 587         p = isl_printer_print_str(p, "_dimGrid, k");
 588         p = isl_printer_print_int(p, kernel->id);
 589         p = isl_printer_print_str(p, "_dimBlock>>> (");
 590         p = print_kernel_arguments(p, data->prog, kernel, 0);
 591         p = isl_printer_print_str(p, ");");
 592         p = isl_printer_end_line(p);
 593
 594         p = isl_printer_start_line(p);
 595         p = isl_printer_print_str(p, "cudaCheckKernel();");
 596         p = isl_printer_end_line(p);
 597
 598         p = isl_printer_indent(p, -2);
 599         p = isl_printer_start_line(p);
 600         p = isl_printer_print_str(p, "}");
 601         p = isl_printer_end_line(p);
 602
 603         p = isl_printer_start_line(p);
 604         p = isl_printer_end_line(p);
 605
 606         print_kernel(data->prog, kernel, data->cuda);
 607
 608         return p;
 609 }
 610
 611 static __isl_give isl_printer *print_host_code(__isl_take isl_printer *p,
 612         struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
 613         struct cuda_info *cuda)
 614 {
 615         isl_ast_print_options *print_options;
 616         isl_ctx *ctx = isl_ast_node_get_ctx(tree);
 617         struct print_host_user_data data = { cuda, prog };
 618
 619         print_options = isl_ast_print_options_alloc(ctx);
 620         print_options = isl_ast_print_options_set_print_user(print_options,
 621                                                 &print_host_user, &data);
 622
 623         p = gpu_print_macros(p, tree);
 624         p = isl_ast_node_print(tree, p, print_options);
 625
 626         return p;
 627 }
 628
 629 static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p,
 630         struct gpu_prog *prog)
 631 {
 632         int i;
 633
 634         for (i = 0; i < prog->n_array; ++i) {
 635                 if (gpu_array_is_read_only_scalar(&prog->array[i]))
 636                         continue;
 637                 if (!prog->array[i].accessed)
 638                         continue;
 639                 p = isl_printer_start_line(p);
 640                 p = isl_printer_print_str(p, "cudaCheckReturn(cudaFree(dev_");
 641                 p = isl_printer_print_str(p, prog->array[i].name);
 642                 p = isl_printer_print_str(p, "));");
 643                 p = isl_printer_end_line(p);
 644         }
 645
 646         return p;
 647 }
 648
 649 /* Given a gpu_prog "prog" and the corresponding transformed AST
 650  * "tree", print the entire CUDA code to "p".
 651  * "types" collects the types for which a definition has already
 652  * been printed.
 653  */
 654 static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p,
 655         struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
 656         struct gpu_types *types, void *user)
 657 {
 658         struct cuda_info *cuda = user;
 659         isl_printer *kernel;
 660
 661         kernel = isl_printer_to_file(isl_printer_get_ctx(p), cuda->kernel_c);
 662         kernel = isl_printer_set_output_format(kernel, ISL_FORMAT_C);
 663         kernel = gpu_print_types(kernel, types, prog);
 664         isl_printer_free(kernel);
 665
 666         if (!kernel)
 667                 return isl_printer_free(p);
 668
 669         p = ppcg_start_block(p);
 670
 671         p = print_cuda_macros(p);
 672
 673         p = gpu_print_local_declarations(p, prog);
 674         p = declare_device_arrays(p, prog);
 675         p = allocate_device_arrays(p, prog);
 676
 677         p = print_host_code(p, prog, tree, cuda);
 678
 679         p = free_device_arrays(p, prog);
 680
 681         p = ppcg_end_block(p);
 682
 683         return p;
 684 }
 685
 686 /* Transform the code in the file called "input" by replacing
 687  * all scops by corresponding CUDA code.
 688  * The names of the output files are derived from "input".
 689  *
 690  * We let generate_gpu do all the hard work and then let it call
 691  * us back for printing the AST in print_cuda.
 692  *
 693  * To prepare for this printing, we first open the output files
 694  * and we close them after generate_gpu has finished.
 695  */
 696 int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
 697         const char *input)
 698 {
 699         struct cuda_info cuda;
 700         int r;
 701
 702         cuda_open_files(&cuda, input);
 703
 704         r = generate_gpu(ctx, input, cuda.host_c, options, &print_cuda, &cuda);
 705
 706         cuda_close_files(&cuda);
 707
 708         return r;
 709 }