initial version of ppcg
[ppcg.git] / cuda.h
blob8cd2810f0198e9d20b7fd9ff92034c6154211a99
1 #ifndef _CUDA_H
2 #define _CUDA_H
4 #include "clan/clan.h"
5 #include "cuda_common.h"
6 #include "gpucode.h"
7 #include "ppcg_options.h"
9 struct cuda_gen {
10 struct cuda_info cuda;
11 struct gpucode_info code;
12 struct gpucode_info kernel_code;
13 struct gpucode_info stmt_code;
15 isl_ctx *ctx;
16 struct ppcg_options *options;
17 CloogState *state;
19 /* Set of parameter values */
20 isl_set *context;
22 /* Uninitialized data elements (or an overapproximation) */
23 isl_union_set *copy_in;
25 /* All read accesses in the entire program */
26 isl_union_map *read;
28 /* All write accesses in the entire program */
29 isl_union_map *write;
31 /* Array of statements */
32 int n_stmts;
33 struct cuda_stmt *stmts;
35 int n_array;
36 struct cuda_array_info *array;
38 /* Identifier of current kernel. */
39 int kernel_id;
41 /* First tile dimension. */
42 int tile_first;
43 /* Number of tile dimensions. */
44 int tile_len;
45 /* Number of initial parallel loops among tile dimensions. */
46 int n_parallel;
48 /* Number of dimensions determining shared memory. */
49 int shared_len;
51 /* Number of rows in the untiled schedule. */
52 int untiled_len;
53 /* Number of rows in the tiled schedule. */
54 int tiled_len;
55 /* Number of rows in schedule after tiling/wrapping over threads. */
56 int thread_tiled_len;
58 /* Global untiled schedule. */
59 isl_union_map *sched;
60 /* Local (per kernel launch) tiled schedule. */
61 isl_union_map *tiled_sched;
62 /* Local schedule per shared memory tile loop iteration. */
63 isl_union_map *local_sched;
64 /* Domain of the current statement (within print_statement). */
65 isl_set *stmt_domain;
67 /* Position of first parameter corresponding to shared tile loop
68 * in shared_sched.
70 unsigned first_shared;
71 /* Local tiled schedule projected onto the shared tile loops and
72 * the loops that will be wrapped over the threads,
73 * with all shared tile loops parametrized.
75 isl_union_map *shared_sched;
76 /* Projects out the loops that will be wrapped over the threads
77 * from shared_sched.
79 isl_union_map *shared_proj;
81 /* A map that takes the range of shared_sched as input,
82 * wraps the appropriate loops over the threads and then projects
83 * out these loops.
85 isl_map *privatization;
87 /* A map from the shared memory tile loops and the thread indices
88 * (as parameters) to the set of accessed memory elements that
89 * will be accessed through private copies.
91 isl_union_map *private_access;
93 /* The schedule for the current private access
94 * (within print_private_access).
96 isl_map *private_sched;
97 /* The array reference group corresponding to private_sched. */
98 struct cuda_array_ref_group *private_group;
100 /* First loop to unroll (or -1 if none). */
101 int first_unroll;
103 int n_grid;
104 int n_block;
105 /* Note: in the input file, the sizes of the grid and the blocks
106 * are specified in the order x, y, z, but internally, the sizes
107 * are stored in reverse order, so that the last element always
108 * refers to the x dimension.
110 int grid_dim[2];
111 int block_dim[3];
112 int *tile_size;
115 __isl_give isl_set *add_context_from_str(__isl_take isl_set *set,
116 const char *str);
117 void collect_array_info(struct cuda_gen *gen);
118 void print_host_code(struct cuda_gen *gen);
119 void clear_cuda_gen(struct cuda_gen *gen);
121 int cuda_scop(isl_ctx *ctx, scoplib_scop_p scop, struct ppcg_options *options,
122 const char *input);
124 #endif