cuda.c

   1 /*
   2  * Copyright 2012      Ecole Normale Superieure
   3  *
   4  * Use of this software is governed by the MIT license
   5  *
   6  * Written by Sven Verdoolaege,
   7  * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
   8  */
   9
  10 #include <isl/aff.h>
  11 #include <isl/ast.h>
  12
  13 #include "cuda_common.h"
  14 #include "cuda.h"
  15 #include "gpu.h"
  16 #include "gpu_print.h"
  17 #include "print.h"
  18
  19 static __isl_give isl_printer *print_cuda_macros(__isl_take isl_printer *p)
  20 {
  21         const char *macros =
  22                 "#define cudaCheckReturn(ret) \\\n"
  23                 "  do { \\\n"
  24                 "    cudaError_t cudaCheckReturn_e = (ret); \\\n"
  25                 "    if (cudaCheckReturn_e != cudaSuccess) { \\\n"
  26                 "      fprintf(stderr, \"CUDA error: %s\\n\", "
  27                 "cudaGetErrorString(cudaCheckReturn_e)); \\\n"
  28                 "      fflush(stderr); \\\n"
  29                 "    } \\\n"
  30                 "    assert(cudaCheckReturn_e == cudaSuccess); \\\n"
  31                 "  } while(0)\n"
  32                 "#define cudaCheckKernel() \\\n"
  33                 "  do { \\\n"
  34                 "    cudaCheckReturn(cudaGetLastError()); \\\n"
  35                 "  } while(0)\n\n";
  36
  37         p = isl_printer_print_str(p, macros);
  38         return p;
  39 }
  40
  41 static __isl_give isl_printer *declare_device_arrays(__isl_take isl_printer *p,
  42         struct gpu_prog *prog)
  43 {
  44         int i;
  45
  46         for (i = 0; i < prog->n_array; ++i) {
  47                 if (gpu_array_is_read_only_scalar(&prog->array[i]))
  48                         continue;
  49                 p = isl_printer_start_line(p);
  50                 p = isl_printer_print_str(p, prog->array[i].type);
  51                 p = isl_printer_print_str(p, " *dev_");
  52                 p = isl_printer_print_str(p, prog->array[i].name);
  53                 p = isl_printer_print_str(p, ";");
  54                 p = isl_printer_end_line(p);
  55         }
  56         p = isl_printer_start_line(p);
  57         p = isl_printer_end_line(p);
  58         return p;
  59 }
  60
  61 static __isl_give isl_printer *allocate_device_arrays(
  62         __isl_take isl_printer *p, struct gpu_prog *prog)
  63 {
  64         int i;
  65
  66         for (i = 0; i < prog->n_array; ++i) {
  67                 if (gpu_array_is_read_only_scalar(&prog->array[i]))
  68                         continue;
  69                 p = isl_printer_start_line(p);
  70                 p = isl_printer_print_str(p,
  71                         "cudaCheckReturn(cudaMalloc((void **) &dev_");
  72                 p = isl_printer_print_str(p, prog->array[i].name);
  73                 p = isl_printer_print_str(p, ", ");
  74                 p = gpu_array_info_print_size(p, &prog->array[i]);
  75                 p = isl_printer_print_str(p, "));");
  76                 p = isl_printer_end_line(p);
  77         }
  78         p = isl_printer_start_line(p);
  79         p = isl_printer_end_line(p);
  80         return p;
  81 }
  82
  83 static __isl_give isl_printer *copy_arrays_to_device(__isl_take isl_printer *p,
  84         struct gpu_prog *prog)
  85 {
  86         int i;
  87
  88         for (i = 0; i < prog->n_array; ++i) {
  89                 isl_space *dim;
  90                 isl_set *read_i;
  91                 int empty;
  92
  93                 if (gpu_array_is_read_only_scalar(&prog->array[i]))
  94                         continue;
  95
  96                 dim = isl_space_copy(prog->array[i].space);
  97                 read_i = isl_union_set_extract_set(prog->copy_in, dim);
  98                 empty = isl_set_fast_is_empty(read_i);
  99                 isl_set_free(read_i);
 100                 if (empty)
 101                         continue;
 102
 103                 p = isl_printer_start_line(p);
 104                 p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(dev_");
 105                 p = isl_printer_print_str(p, prog->array[i].name);
 106                 p = isl_printer_print_str(p, ", ");
 107
 108                 if (gpu_array_is_scalar(&prog->array[i]))
 109                         p = isl_printer_print_str(p, "&");
 110                 p = isl_printer_print_str(p, prog->array[i].name);
 111                 p = isl_printer_print_str(p, ", ");
 112
 113                 p = gpu_array_info_print_size(p, &prog->array[i]);
 114                 p = isl_printer_print_str(p, ", cudaMemcpyHostToDevice));");
 115                 p = isl_printer_end_line(p);
 116         }
 117         p = isl_printer_start_line(p);
 118         p = isl_printer_end_line(p);
 119         return p;
 120 }
 121
 122 static void print_reverse_list(FILE *out, int len, int *list)
 123 {
 124         int i;
 125
 126         if (len == 0)
 127                 return;
 128
 129         fprintf(out, "(");
 130         for (i = 0; i < len; ++i) {
 131                 if (i)
 132                         fprintf(out, ", ");
 133                 fprintf(out, "%d", list[len - 1 - i]);
 134         }
 135         fprintf(out, ")");
 136 }
 137
 138 /* Print the effective grid size as a list of the sizes in each
 139  * dimension, from innermost to outermost.
 140  */
 141 static __isl_give isl_printer *print_grid_size(__isl_take isl_printer *p,
 142         struct ppcg_kernel *kernel)
 143 {
 144         int i;
 145         int dim;
 146
 147         dim = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set);
 148         if (dim == 0)
 149                 return p;
 150
 151         p = isl_printer_print_str(p, "(");
 152         for (i = dim - 1; i >= 0; --i) {
 153                 isl_pw_aff *bound;
 154
 155                 bound = isl_multi_pw_aff_get_pw_aff(kernel->grid_size, i);
 156                 p = isl_printer_print_pw_aff(p, bound);
 157                 isl_pw_aff_free(bound);
 158
 159                 if (i > 0)
 160                         p = isl_printer_print_str(p, ", ");
 161         }
 162
 163         p = isl_printer_print_str(p, ")");
 164
 165         return p;
 166 }
 167
 168 /* Print the grid definition.
 169  */
 170 static __isl_give isl_printer *print_grid(__isl_take isl_printer *p,
 171         struct ppcg_kernel *kernel)
 172 {
 173         p = isl_printer_start_line(p);
 174         p = isl_printer_print_str(p, "dim3 k");
 175         p = isl_printer_print_int(p, kernel->id);
 176         p = isl_printer_print_str(p, "_dimGrid");
 177         p = print_grid_size(p, kernel);
 178         p = isl_printer_print_str(p, ";");
 179         p = isl_printer_end_line(p);
 180
 181         return p;
 182 }
 183
 184 /* Print the arguments to a kernel declaration or call.  If "types" is set,
 185  * then print a declaration (including the types of the arguments).
 186  *
 187  * The arguments are printed in the following order
 188  * - the arrays accessed by the kernel
 189  * - the parameters
 190  * - the host loop iterators
 191  */
 192 static __isl_give isl_printer *print_kernel_arguments(__isl_take isl_printer *p,
 193         struct gpu_prog *prog, struct ppcg_kernel *kernel, int types)
 194 {
 195         int i, n;
 196         int first = 1;
 197         unsigned nparam;
 198         isl_space *space;
 199         const char *type;
 200
 201         for (i = 0; i < prog->n_array; ++i) {
 202                 isl_set *arr;
 203                 int empty;
 204
 205                 space = isl_space_copy(prog->array[i].space);
 206                 arr = isl_union_set_extract_set(kernel->arrays, space);
 207                 empty = isl_set_fast_is_empty(arr);
 208                 isl_set_free(arr);
 209                 if (empty)
 210                         continue;
 211
 212                 if (!first)
 213                         p = isl_printer_print_str(p, ", ");
 214
 215                 if (types) {
 216                         p = isl_printer_print_str(p, prog->array[i].type);
 217                         p = isl_printer_print_str(p, " ");
 218                 }
 219
 220                 if (gpu_array_is_read_only_scalar(&prog->array[i])) {
 221                         p = isl_printer_print_str(p, prog->array[i].name);
 222                 } else {
 223                         if (types)
 224                                 p = isl_printer_print_str(p, "*");
 225                         else
 226                                 p = isl_printer_print_str(p, "dev_");
 227                         p = isl_printer_print_str(p, prog->array[i].name);
 228                 }
 229
 230                 first = 0;
 231         }
 232
 233         space = isl_union_set_get_space(kernel->arrays);
 234         nparam = isl_space_dim(space, isl_dim_param);
 235         for (i = 0; i < nparam; ++i) {
 236                 const char *name;
 237
 238                 name = isl_space_get_dim_name(space, isl_dim_param, i);
 239
 240                 if (!first)
 241                         p = isl_printer_print_str(p, ", ");
 242                 if (types)
 243                         p = isl_printer_print_str(p, "int ");
 244                 p = isl_printer_print_str(p, name);
 245
 246                 first = 0;
 247         }
 248         isl_space_free(space);
 249
 250         n = isl_space_dim(kernel->space, isl_dim_set);
 251         type = isl_options_get_ast_iterator_type(prog->ctx);
 252         for (i = 0; i < n; ++i) {
 253                 const char *name;
 254                 isl_id *id;
 255
 256                 if (!first)
 257                         p = isl_printer_print_str(p, ", ");
 258                 name = isl_space_get_dim_name(kernel->space, isl_dim_set, i);
 259                 if (types) {
 260                         p = isl_printer_print_str(p, type);
 261                         p = isl_printer_print_str(p, " ");
 262                 }
 263                 p = isl_printer_print_str(p, name);
 264
 265                 first = 0;
 266         }
 267
 268         return p;
 269 }
 270
 271 /* Print the header of the given kernel.
 272  */
 273 static __isl_give isl_printer *print_kernel_header(__isl_take isl_printer *p,
 274         struct gpu_prog *prog, struct ppcg_kernel *kernel)
 275 {
 276         p = isl_printer_start_line(p);
 277         p = isl_printer_print_str(p, "__global__ void kernel");
 278         p = isl_printer_print_int(p, kernel->id);
 279         p = isl_printer_print_str(p, "(");
 280         p = print_kernel_arguments(p, prog, kernel, 1);
 281         p = isl_printer_print_str(p, ")");
 282
 283         return p;
 284 }
 285
 286 /* Print the header of the given kernel to both gen->cuda.kernel_h
 287  * and gen->cuda.kernel_c.
 288  */
 289 static void print_kernel_headers(struct gpu_prog *prog,
 290         struct ppcg_kernel *kernel, struct cuda_info *cuda)
 291 {
 292         isl_printer *p;
 293
 294         p = isl_printer_to_file(prog->ctx, cuda->kernel_h);
 295         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 296         p = print_kernel_header(p, prog, kernel);
 297         p = isl_printer_print_str(p, ";");
 298         p = isl_printer_end_line(p);
 299         isl_printer_free(p);
 300
 301         p = isl_printer_to_file(prog->ctx, cuda->kernel_c);
 302         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 303         p = print_kernel_header(p, prog, kernel);
 304         p = isl_printer_end_line(p);
 305         isl_printer_free(p);
 306 }
 307
 308 static void print_indent(FILE *dst, int indent)
 309 {
 310         fprintf(dst, "%*s", indent, "");
 311 }
 312
 313 static void print_kernel_iterators(FILE *out, struct ppcg_kernel *kernel)
 314 {
 315         int i, n_grid;
 316         isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
 317         const char *type;
 318         const char *block_dims[] = { "blockIdx.x", "blockIdx.y" };
 319         const char *thread_dims[] = { "threadIdx.x", "threadIdx.y",
 320                                         "threadIdx.z" };
 321
 322         type = isl_options_get_ast_iterator_type(ctx);
 323
 324         n_grid = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set);
 325         if (n_grid > 0) {
 326                 print_indent(out, 4);
 327                 fprintf(out, "%s ", type);
 328                 for (i = 0; i < n_grid; ++i) {
 329                         if (i)
 330                                 fprintf(out, ", ");
 331                         fprintf(out, "b%d = %s",
 332                                 i, block_dims[n_grid - 1 - i]);
 333                 }
 334                 fprintf(out, ";\n");
 335         }
 336
 337         if (kernel->n_block > 0) {
 338                 print_indent(out, 4);
 339                 fprintf(out, "%s ", type);
 340                 for (i = 0; i < kernel->n_block; ++i) {
 341                         if (i)
 342                                 fprintf(out, ", ");
 343                         fprintf(out, "t%d = %s",
 344                                 i, thread_dims[kernel->n_block - 1 - i]);
 345                 }
 346                 fprintf(out, ";\n");
 347         }
 348 }
 349
 350 static __isl_give isl_printer *print_kernel_var(__isl_take isl_printer *p,
 351         struct ppcg_kernel_var *var)
 352 {
 353         int j;
 354
 355         p = isl_printer_start_line(p);
 356         if (var->type == ppcg_access_shared)
 357                 p = isl_printer_print_str(p, "__shared__ ");
 358         p = isl_printer_print_str(p, var->array->type);
 359         p = isl_printer_print_str(p, " ");
 360         p = isl_printer_print_str(p,  var->name);
 361         for (j = 0; j < var->array->n_index; ++j) {
 362                 isl_val *v;
 363
 364                 p = isl_printer_print_str(p, "[");
 365                 v = isl_vec_get_element_val(var->size, j);
 366                 p = isl_printer_print_val(p, v);
 367                 isl_val_free(v);
 368                 p = isl_printer_print_str(p, "]");
 369         }
 370         p = isl_printer_print_str(p, ";");
 371         p = isl_printer_end_line(p);
 372
 373         return p;
 374 }
 375
 376 static __isl_give isl_printer *print_kernel_vars(__isl_take isl_printer *p,
 377         struct ppcg_kernel *kernel)
 378 {
 379         int i;
 380
 381         for (i = 0; i < kernel->n_var; ++i)
 382                 p = print_kernel_var(p, &kernel->var[i]);
 383
 384         return p;
 385 }
 386
 387 /* Print a sync statement.
 388  */
 389 static __isl_give isl_printer *print_sync(__isl_take isl_printer *p,
 390         struct ppcg_kernel_stmt *stmt)
 391 {
 392         p = isl_printer_start_line(p);
 393         p = isl_printer_print_str(p, "__syncthreads();");
 394         p = isl_printer_end_line(p);
 395
 396         return p;
 397 }
 398
 399 /* This function is called for each user statement in the AST,
 400  * i.e., for each kernel body statement, copy statement or sync statement.
 401  */
 402 static __isl_give isl_printer *print_kernel_stmt(__isl_take isl_printer *p,
 403         __isl_take isl_ast_print_options *print_options,
 404         __isl_keep isl_ast_node *node, void *user)
 405 {
 406         isl_id *id;
 407         struct ppcg_kernel_stmt *stmt;
 408
 409         id = isl_ast_node_get_annotation(node);
 410         stmt = isl_id_get_user(id);
 411         isl_id_free(id);
 412
 413         isl_ast_print_options_free(print_options);
 414
 415         switch (stmt->type) {
 416         case ppcg_kernel_copy:
 417                 return ppcg_kernel_print_copy(p, stmt);
 418         case ppcg_kernel_sync:
 419                 return print_sync(p, stmt);
 420         case ppcg_kernel_domain:
 421                 return ppcg_kernel_print_domain(p, stmt);
 422         }
 423
 424         return p;
 425 }
 426
 427 static void print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel,
 428         struct cuda_info *cuda)
 429 {
 430         isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
 431         isl_ast_print_options *print_options;
 432         isl_printer *p;
 433
 434         print_kernel_headers(prog, kernel, cuda);
 435         fprintf(cuda->kernel_c, "{\n");
 436         print_kernel_iterators(cuda->kernel_c, kernel);
 437
 438         p = isl_printer_to_file(ctx, cuda->kernel_c);
 439         p = isl_printer_set_output_format(p, ISL_FORMAT_C);
 440         p = isl_printer_indent(p, 4);
 441
 442         p = print_kernel_vars(p, kernel);
 443         p = isl_printer_end_line(p);
 444         p = gpu_print_macros(p, kernel->tree);
 445
 446         print_options = isl_ast_print_options_alloc(ctx);
 447         print_options = isl_ast_print_options_set_print_user(print_options,
 448                                                     &print_kernel_stmt, NULL);
 449         p = isl_ast_node_print(kernel->tree, p, print_options);
 450         isl_printer_free(p);
 451
 452         fprintf(cuda->kernel_c, "}\n");
 453 }
 454
 455 struct print_host_user_data {
 456         struct cuda_info *cuda;
 457         struct gpu_prog *prog;
 458 };
 459
 460 /* Print the user statement of the host code to "p".
 461  *
 462  * In particular, print a block of statements that defines the grid
 463  * and the block and then launches the kernel.
 464  */
 465 static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
 466         __isl_take isl_ast_print_options *print_options,
 467         __isl_keep isl_ast_node *node, void *user)
 468 {
 469         isl_id *id;
 470         struct ppcg_kernel *kernel;
 471         struct print_host_user_data *data;
 472
 473         id = isl_ast_node_get_annotation(node);
 474         kernel = isl_id_get_user(id);
 475         isl_id_free(id);
 476
 477         data = (struct print_host_user_data *) user;
 478
 479         p = isl_printer_start_line(p);
 480         p = isl_printer_print_str(p, "{");
 481         p = isl_printer_end_line(p);
 482         p = isl_printer_indent(p, 2);
 483
 484         p = isl_printer_start_line(p);
 485         p = isl_printer_print_str(p, "dim3 k");
 486         p = isl_printer_print_int(p, kernel->id);
 487         p = isl_printer_print_str(p, "_dimBlock");
 488         print_reverse_list(isl_printer_get_file(p),
 489                                 kernel->n_block, kernel->block_dim);
 490         p = isl_printer_print_str(p, ";");
 491         p = isl_printer_end_line(p);
 492
 493         p = print_grid(p, kernel);
 494
 495         p = isl_printer_start_line(p);
 496         p = isl_printer_print_str(p, "kernel");
 497         p = isl_printer_print_int(p, kernel->id);
 498         p = isl_printer_print_str(p, " <<<k");
 499         p = isl_printer_print_int(p, kernel->id);
 500         p = isl_printer_print_str(p, "_dimGrid, k");
 501         p = isl_printer_print_int(p, kernel->id);
 502         p = isl_printer_print_str(p, "_dimBlock>>> (");
 503         p = print_kernel_arguments(p, data->prog, kernel, 0);
 504         p = isl_printer_print_str(p, ");");
 505         p = isl_printer_end_line(p);
 506
 507         p = isl_printer_start_line(p);
 508         p = isl_printer_print_str(p, "cudaCheckKernel();");
 509         p = isl_printer_end_line(p);
 510
 511         p = isl_printer_indent(p, -2);
 512         p = isl_printer_start_line(p);
 513         p = isl_printer_print_str(p, "}");
 514         p = isl_printer_end_line(p);
 515
 516         p = isl_printer_start_line(p);
 517         p = isl_printer_end_line(p);
 518
 519         print_kernel(data->prog, kernel, data->cuda);
 520
 521         isl_ast_print_options_free(print_options);
 522
 523         return p;
 524 }
 525
 526 static __isl_give isl_printer *print_host_code(__isl_take isl_printer *p,
 527         struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
 528         struct cuda_info *cuda)
 529 {
 530         isl_ast_print_options *print_options;
 531         isl_ctx *ctx = isl_ast_node_get_ctx(tree);
 532         struct print_host_user_data data = { cuda, prog };
 533
 534         print_options = isl_ast_print_options_alloc(ctx);
 535         print_options = isl_ast_print_options_set_print_user(print_options,
 536                                                 &print_host_user, &data);
 537
 538         p = gpu_print_macros(p, tree);
 539         p = isl_ast_node_print(tree, p, print_options);
 540
 541         return p;
 542 }
 543
 544 /* For each array that needs to be copied out (based on prog->copy_out),
 545  * copy the contents back from the GPU to the host.
 546  *
 547  * If any element of a given array appears in prog->copy_out, then its
 548  * entire extent is in prog->copy_out.  The bounds on this extent have
 549  * been precomputed in extract_array_info and are used in
 550  * gpu_array_info_print_size.
 551  */
 552 static __isl_give isl_printer *copy_arrays_from_device(
 553         __isl_take isl_printer *p, struct gpu_prog *prog)
 554 {
 555         int i;
 556         isl_union_set *copy_out;
 557         copy_out = isl_union_set_copy(prog->copy_out);
 558
 559         for (i = 0; i < prog->n_array; ++i) {
 560                 isl_space *dim;
 561                 isl_set *copy_out_i;
 562                 int empty;
 563
 564                 dim = isl_space_copy(prog->array[i].space);
 565                 copy_out_i = isl_union_set_extract_set(copy_out, dim);
 566                 empty = isl_set_fast_is_empty(copy_out_i);
 567                 isl_set_free(copy_out_i);
 568                 if (empty)
 569                         continue;
 570
 571                 p = isl_printer_start_line(p);
 572                 p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(");
 573                 if (gpu_array_is_scalar(&prog->array[i]))
 574                         p = isl_printer_print_str(p, "&");
 575                 p = isl_printer_print_str(p, prog->array[i].name);
 576                 p = isl_printer_print_str(p, ", dev_");
 577                 p = isl_printer_print_str(p, prog->array[i].name);
 578                 p = isl_printer_print_str(p, ", ");
 579                 p = gpu_array_info_print_size(p, &prog->array[i]);
 580                 p = isl_printer_print_str(p, ", cudaMemcpyDeviceToHost));");
 581                 p = isl_printer_end_line(p);
 582         }
 583
 584         isl_union_set_free(copy_out);
 585         p = isl_printer_start_line(p);
 586         p = isl_printer_end_line(p);
 587         return p;
 588 }
 589
 590 static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p,
 591         struct gpu_prog *prog)
 592 {
 593         int i;
 594
 595         for (i = 0; i < prog->n_array; ++i) {
 596                 if (gpu_array_is_read_only_scalar(&prog->array[i]))
 597                         continue;
 598                 p = isl_printer_start_line(p);
 599                 p = isl_printer_print_str(p, "cudaCheckReturn(cudaFree(dev_");
 600                 p = isl_printer_print_str(p, prog->array[i].name);
 601                 p = isl_printer_print_str(p, "));");
 602                 p = isl_printer_end_line(p);
 603         }
 604
 605         return p;
 606 }
 607
 608 /* Given a gpu_prog "prog" and the corresponding transformed AST
 609  * "tree", print the entire CUDA code to "p".
 610  */
 611 static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p,
 612         struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
 613         void *user)
 614 {
 615         struct cuda_info *cuda = user;
 616
 617         p = ppcg_start_block(p);
 618
 619         p = print_cuda_macros(p);
 620
 621         p = declare_device_arrays(p, prog);
 622         p = allocate_device_arrays(p, prog);
 623         p = copy_arrays_to_device(p, prog);
 624
 625         p = print_host_code(p, prog, tree, cuda);
 626
 627         p = copy_arrays_from_device(p, prog);
 628         p = free_device_arrays(p, prog);
 629
 630         p = ppcg_end_block(p);
 631
 632         return p;
 633 }
 634
 635 /* Transform the code in the file called "input" by replacing
 636  * all scops by corresponding CUDA code.
 637  * The names of the output files are derived from "input".
 638  *
 639  * We let generate_gpu do all the hard work and then let it call
 640  * us back for printing the AST in print_cuda.
 641  *
 642  * To prepare for this printing, we first open the output files
 643  * and we close them after generate_gpu has finished.
 644  */
 645 int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
 646         const char *input)
 647 {
 648         struct cuda_info cuda;
 649         int r;
 650
 651         cuda_open_files(&cuda, input);
 652
 653         r = generate_gpu(ctx, input, cuda.host_c, options, &print_cuda, &cuda);
 654
 655         cuda_close_files(&cuda);
 656
 657         return r;
 658 }