gpu.h

   1 #ifndef _GPU_H
   2 #define _GPU_H
   3
   4 #include <pet.h>
   5 #include "cuda_common.h"
   6 #include "ppcg_options.h"
   7
   8 /* For each index i, array->bound[i] specialized to the current kernel. */
   9 struct gpu_local_array_info {
  10         isl_pw_aff_list *bound;
  11 };
  12
  13 struct gpu_gen {
  14         struct cuda_info cuda;
  15
  16         isl_ctx *ctx;
  17         struct ppcg_options *options;
  18
  19         struct pet_scop *scop;
  20
  21         /* Set of parameter values */
  22         isl_set *context;
  23
  24         /* tile, grid and block sizes for each kernel */
  25         isl_union_map *sizes;
  26
  27         /* Uninitialized data elements (or an overapproximation) */
  28         isl_union_set *copy_in;
  29
  30         /* All read accesses in the entire program */
  31         isl_union_map *read;
  32
  33         /* All write accesses in the entire program */
  34         isl_union_map *write;
  35
  36         /* Array of statements */
  37         int n_stmts;
  38         struct gpu_stmt *stmts;
  39
  40         int n_array;
  41         struct gpu_array_info *array;
  42
  43         /* Identifier of current kernel. */
  44         int kernel_id;
  45         /* Pointer to the current kernel. */
  46         struct ppcg_kernel *kernel;
  47
  48         /* First tile dimension. */
  49         int tile_first;
  50         /* Number of tile dimensions. */
  51         int tile_len;
  52         /* Number of initial parallel loops among tile dimensions. */
  53         int n_parallel;
  54
  55         /* Number of dimensions determining shared memory. */
  56         int shared_len;
  57
  58         /* Number of rows in the untiled schedule. */
  59         int untiled_len;
  60         /* Number of rows in the tiled schedule. */
  61         int tiled_len;
  62         /* Number of rows in schedule after tiling/wrapping over threads. */
  63         int thread_tiled_len;
  64
  65         /* Global untiled schedule. */
  66         isl_union_map *sched;
  67         /* Local (per kernel launch) tiled schedule. */
  68         isl_union_map *tiled_sched;
  69         /* Local schedule per shared memory tile loop iteration. */
  70         isl_union_map *local_sched;
  71
  72         /* Local tiled schedule projected onto the shared tile loops and
  73          * the loops that will be wrapped over the threads,
  74          * with all shared tile loops parametrized.
  75          */
  76         isl_union_map *shared_sched;
  77         /* Projects out the loops that will be wrapped over the threads
  78          * from shared_sched.
  79          */
  80         isl_union_map *shared_proj;
  81
  82         /* A map that takes the range of shared_sched as input,
  83          * wraps the appropriate loops over the threads and then projects
  84          * out these loops.
  85          */
  86         isl_map *privatization;
  87
  88         /* A map from the shared memory tile loops and the thread indices
  89          * (as parameters) to the set of accessed memory elements that
  90          * will be accessed through private copies.
  91          */
  92         isl_union_map *private_access;
  93
  94         /* The schedule for the current private/shared access
  95          * (within print_private_access or print_shared_access).
  96          */
  97         isl_map *copy_sched;
  98         /* The array reference group corresponding to copy_sched. */
  99         struct gpu_array_ref_group *copy_group;
 100         /* copy_group->private_bound or copy_group->shared_bound */
 101         struct gpu_array_bound *copy_bound;
 102
 103         /* First loop to unroll (or -1 if none) in the current part of the
 104          * schedule.
 105          */
 106         int first_unroll;
 107
 108         int n_grid;
 109         int n_block;
 110         /* Note: in the input file, the sizes of the grid and the blocks
 111          * are specified in the order x, y, z, but internally, the sizes
 112          * are stored in reverse order, so that the last element always
 113          * refers to the x dimension.
 114          */
 115         int grid_dim[2];
 116         int block_dim[3];
 117         int *tile_size;
 118 };
 119
 120 __isl_give isl_set *add_context_from_str(__isl_take isl_set *set,
 121         const char *str);
 122 void collect_array_info(struct gpu_gen *gen);
 123 void print_host_code(struct gpu_gen *gen);
 124 void clear_gpu_gen(struct gpu_gen *gen);
 125
 126 int generate_cuda(isl_ctx *ctx, struct pet_scop *scop,
 127         struct ppcg_options *options, const char *input);
 128
 129 #endif