cuda.h

   1 #ifndef _CUDA_H
   2 #define _CUDA_H
   3
   4 #include <pet.h>
   5 #include "cuda_common.h"
   6 #include "gpucode.h"
   7 #include "ppcg_options.h"
   8
   9 struct cuda_gen {
  10         struct cuda_info cuda;
  11         struct gpucode_info code;
  12         struct gpucode_info kernel_code;
  13         struct gpucode_info stmt_code;
  14
  15         isl_ctx *ctx;
  16         struct ppcg_options *options;
  17         CloogState *state;
  18
  19         struct pet_scop *scop;
  20
  21         /* Set of parameter values */
  22         isl_set *context;
  23
  24         /* tile, grid and block sizes for each kernel */
  25         isl_union_map *sizes;
  26
  27         /* Uninitialized data elements (or an overapproximation) */
  28         isl_union_set *copy_in;
  29
  30         /* All read accesses in the entire program */
  31         isl_union_map *read;
  32
  33         /* All write accesses in the entire program */
  34         isl_union_map *write;
  35
  36         /* Array of statements */
  37         int n_stmts;
  38         struct cuda_stmt *stmts;
  39
  40         int n_array;
  41         struct cuda_array_info *array;
  42
  43         /* Identifier of current kernel. */
  44         int kernel_id;
  45
  46         /* First tile dimension. */
  47         int tile_first;
  48         /* Number of tile dimensions. */
  49         int tile_len;
  50         /* Number of initial parallel loops among tile dimensions. */
  51         int n_parallel;
  52
  53         /* Number of dimensions determining shared memory. */
  54         int shared_len;
  55
  56         /* Number of rows in the untiled schedule. */
  57         int untiled_len;
  58         /* Number of rows in the tiled schedule. */
  59         int tiled_len;
  60         /* Number of rows in schedule after tiling/wrapping over threads. */
  61         int thread_tiled_len;
  62
  63         /* Global untiled schedule. */
  64         isl_union_map *sched;
  65         /* Local (per kernel launch) tiled schedule. */
  66         isl_union_map *tiled_sched;
  67         /* Local schedule per shared memory tile loop iteration. */
  68         isl_union_map *local_sched;
  69         /* Domain of the current statement (within print_statement). */
  70         isl_set *stmt_domain;
  71
  72         /* Position of first parameter corresponding to shared tile loop
  73          * in shared_sched.
  74          */
  75         unsigned first_shared;
  76         /* Local tiled schedule projected onto the shared tile loops and
  77          * the loops that will be wrapped over the threads,
  78          * with all shared tile loops parametrized.
  79          */
  80         isl_union_map *shared_sched;
  81         /* Projects out the loops that will be wrapped over the threads
  82          * from shared_sched.
  83          */
  84         isl_union_map *shared_proj;
  85
  86         /* A map that takes the range of shared_sched as input,
  87          * wraps the appropriate loops over the threads and then projects
  88          * out these loops.
  89          */
  90         isl_map *privatization;
  91
  92         /* A map from the shared memory tile loops and the thread indices
  93          * (as parameters) to the set of accessed memory elements that
  94          * will be accessed through private copies.
  95          */
  96         isl_union_map *private_access;
  97
  98         /* The schedule for the current private/shared access
  99          * (within print_private_access or print_shared_access).
 100          */
 101         isl_map *copy_sched;
 102         /* The array reference group corresponding to copy_sched. */
 103         struct cuda_array_ref_group *copy_group;
 104         /* copy_group->private_bound or copy_group->shared_bound */
 105         struct cuda_array_bound *copy_bound;
 106
 107         /* First loop to unroll (or -1 if none). */
 108         int first_unroll;
 109
 110         int n_grid;
 111         int n_block;
 112         /* Note: in the input file, the sizes of the grid and the blocks
 113          * are specified in the order x, y, z, but internally, the sizes
 114          * are stored in reverse order, so that the last element always
 115          * refers to the x dimension.
 116          */
 117         int grid_dim[2];
 118         int block_dim[3];
 119         int *tile_size;
 120 };
 121
 122 __isl_give isl_set *add_context_from_str(__isl_take isl_set *set,
 123         const char *str);
 124 void collect_array_info(struct cuda_gen *gen);
 125 void print_host_code(struct cuda_gen *gen);
 126 void clear_cuda_gen(struct cuda_gen *gen);
 127
 128 int cuda_pet(isl_ctx *ctx, struct pet_scop *scop, struct ppcg_options *options,
 129         const char *input);
 130
 131 #endif