cuda.h

   1 #ifndef _CUDA_H
   2 #define _CUDA_H
   3
   4 #include <pet.h>
   5 #include "cuda_common.h"
   6 #include "gpucode.h"
   7 #include "ppcg_options.h"
   8
   9 struct cuda_gen {
  10         struct cuda_info cuda;
  11         struct gpucode_info code;
  12         struct gpucode_info kernel_code;
  13         struct gpucode_info stmt_code;
  14
  15         isl_ctx *ctx;
  16         struct ppcg_options *options;
  17         CloogState *state;
  18
  19         struct pet_scop *scop;
  20
  21         /* Set of parameter values */
  22         isl_set *context;
  23
  24         /* Uninitialized data elements (or an overapproximation) */
  25         isl_union_set *copy_in;
  26
  27         /* All read accesses in the entire program */
  28         isl_union_map *read;
  29
  30         /* All write accesses in the entire program */
  31         isl_union_map *write;
  32
  33         /* Array of statements */
  34         int n_stmts;
  35         struct cuda_stmt *stmts;
  36
  37         int n_array;
  38         struct cuda_array_info *array;
  39
  40         /* Identifier of current kernel. */
  41         int kernel_id;
  42
  43         /* First tile dimension. */
  44         int tile_first;
  45         /* Number of tile dimensions. */
  46         int tile_len;
  47         /* Number of initial parallel loops among tile dimensions. */
  48         int n_parallel;
  49
  50         /* Number of dimensions determining shared memory. */
  51         int shared_len;
  52
  53         /* Number of rows in the untiled schedule. */
  54         int untiled_len;
  55         /* Number of rows in the tiled schedule. */
  56         int tiled_len;
  57         /* Number of rows in schedule after tiling/wrapping over threads. */
  58         int thread_tiled_len;
  59
  60         /* Global untiled schedule. */
  61         isl_union_map *sched;
  62         /* Local (per kernel launch) tiled schedule. */
  63         isl_union_map *tiled_sched;
  64         /* Local schedule per shared memory tile loop iteration. */
  65         isl_union_map *local_sched;
  66         /* Domain of the current statement (within print_statement). */
  67         isl_set *stmt_domain;
  68
  69         /* Position of first parameter corresponding to shared tile loop
  70          * in shared_sched.
  71          */
  72         unsigned first_shared;
  73         /* Local tiled schedule projected onto the shared tile loops and
  74          * the loops that will be wrapped over the threads,
  75          * with all shared tile loops parametrized.
  76          */
  77         isl_union_map *shared_sched;
  78         /* Projects out the loops that will be wrapped over the threads
  79          * from shared_sched.
  80          */
  81         isl_union_map *shared_proj;
  82
  83         /* A map that takes the range of shared_sched as input,
  84          * wraps the appropriate loops over the threads and then projects
  85          * out these loops.
  86          */
  87         isl_map *privatization;
  88
  89         /* A map from the shared memory tile loops and the thread indices
  90          * (as parameters) to the set of accessed memory elements that
  91          * will be accessed through private copies.
  92          */
  93         isl_union_map *private_access;
  94
  95         /* The schedule for the current private access
  96          * (within print_private_access).
  97          */
  98         isl_map *private_sched;
  99         /* The array reference group corresponding to private_sched. */
 100         struct cuda_array_ref_group *private_group;
 101
 102         /* First loop to unroll (or -1 if none). */
 103         int first_unroll;
 104
 105         int n_grid;
 106         int n_block;
 107         /* Note: in the input file, the sizes of the grid and the blocks
 108          * are specified in the order x, y, z, but internally, the sizes
 109          * are stored in reverse order, so that the last element always
 110          * refers to the x dimension.
 111          */
 112         int grid_dim[2];
 113         int block_dim[3];
 114         int *tile_size;
 115 };
 116
 117 __isl_give isl_set *add_context_from_str(__isl_take isl_set *set,
 118         const char *str);
 119 void collect_array_info(struct cuda_gen *gen);
 120 void print_host_code(struct cuda_gen *gen);
 121 void clear_cuda_gen(struct cuda_gen *gen);
 122
 123 int cuda_pet(isl_ctx *ctx, struct pet_scop *scop, struct ppcg_options *options,
 124         const char *input);
 125
 126 #endif