cuda.h

   1 #ifndef _CUDA_H
   2 #define _CUDA_H
   3
   4 #include "clan/clan.h"
   5 #include "cuda_common.h"
   6 #include "gpucode.h"
   7 #include "ppcg_options.h"
   8
   9 struct cuda_gen {
  10         struct cuda_info cuda;
  11         struct gpucode_info code;
  12         struct gpucode_info kernel_code;
  13         struct gpucode_info stmt_code;
  14
  15         isl_ctx *ctx;
  16         struct ppcg_options *options;
  17         CloogState *state;
  18
  19         /* Set of parameter values */
  20         isl_set *context;
  21
  22         /* Uninitialized data elements (or an overapproximation) */
  23         isl_union_set *copy_in;
  24
  25         /* All read accesses in the entire program */
  26         isl_union_map *read;
  27
  28         /* All write accesses in the entire program */
  29         isl_union_map *write;
  30
  31         /* Array of statements */
  32         int n_stmts;
  33         struct cuda_stmt *stmts;
  34
  35         int n_array;
  36         struct cuda_array_info *array;
  37
  38         /* Identifier of current kernel. */
  39         int kernel_id;
  40
  41         /* First tile dimension. */
  42         int tile_first;
  43         /* Number of tile dimensions. */
  44         int tile_len;
  45         /* Number of initial parallel loops among tile dimensions. */
  46         int n_parallel;
  47
  48         /* Number of dimensions determining shared memory. */
  49         int shared_len;
  50
  51         /* Number of rows in the untiled schedule. */
  52         int untiled_len;
  53         /* Number of rows in the tiled schedule. */
  54         int tiled_len;
  55         /* Number of rows in schedule after tiling/wrapping over threads. */
  56         int thread_tiled_len;
  57
  58         /* Global untiled schedule. */
  59         isl_union_map *sched;
  60         /* Local (per kernel launch) tiled schedule. */
  61         isl_union_map *tiled_sched;
  62         /* Local schedule per shared memory tile loop iteration. */
  63         isl_union_map *local_sched;
  64         /* Domain of the current statement (within print_statement). */
  65         isl_set *stmt_domain;
  66
  67         /* Position of first parameter corresponding to shared tile loop
  68          * in shared_sched.
  69          */
  70         unsigned first_shared;
  71         /* Local tiled schedule projected onto the shared tile loops and
  72          * the loops that will be wrapped over the threads,
  73          * with all shared tile loops parametrized.
  74          */
  75         isl_union_map *shared_sched;
  76         /* Projects out the loops that will be wrapped over the threads
  77          * from shared_sched.
  78          */
  79         isl_union_map *shared_proj;
  80
  81         /* A map that takes the range of shared_sched as input,
  82          * wraps the appropriate loops over the threads and then projects
  83          * out these loops.
  84          */
  85         isl_map *privatization;
  86
  87         /* A map from the shared memory tile loops and the thread indices
  88          * (as parameters) to the set of accessed memory elements that
  89          * will be accessed through private copies.
  90          */
  91         isl_union_map *private_access;
  92
  93         /* The schedule for the current private access
  94          * (within print_private_access).
  95          */
  96         isl_map *private_sched;
  97         /* The array reference group corresponding to private_sched. */
  98         struct cuda_array_ref_group *private_group;
  99
 100         /* First loop to unroll (or -1 if none). */
 101         int first_unroll;
 102
 103         int n_grid;
 104         int n_block;
 105         /* Note: in the input file, the sizes of the grid and the blocks
 106          * are specified in the order x, y, z, but internally, the sizes
 107          * are stored in reverse order, so that the last element always
 108          * refers to the x dimension.
 109          */
 110         int grid_dim[2];
 111         int block_dim[3];
 112         int *tile_size;
 113 };
 114
 115 __isl_give isl_set *add_context_from_str(__isl_take isl_set *set,
 116         const char *str);
 117 void collect_array_info(struct cuda_gen *gen);
 118 void print_host_code(struct cuda_gen *gen);
 119 void clear_cuda_gen(struct cuda_gen *gen);
 120
 121 int cuda_scop(isl_ctx *ctx, scoplib_scop_p scop, struct ppcg_options *options,
 122         const char *input);
 123
 124 #endif