Extract tool independent file rewriting into its own file
[ppcg.git] / cuda.h
blob8d92f15638e163812fa532a4b9a432e85032b317
1 #ifndef _CUDA_H
2 #define _CUDA_H
4 #include <pet.h>
5 #include "cuda_common.h"
6 #include "gpucode.h"
7 #include "ppcg_options.h"
9 struct cuda_gen {
10 struct cuda_info cuda;
11 struct gpucode_info code;
12 struct gpucode_info kernel_code;
13 struct gpucode_info stmt_code;
15 isl_ctx *ctx;
16 struct ppcg_options *options;
17 CloogState *state;
19 struct pet_scop *scop;
21 /* Set of parameter values */
22 isl_set *context;
24 /* tile, grid and block sizes for each kernel */
25 isl_union_map *sizes;
27 /* Uninitialized data elements (or an overapproximation) */
28 isl_union_set *copy_in;
30 /* All read accesses in the entire program */
31 isl_union_map *read;
33 /* All write accesses in the entire program */
34 isl_union_map *write;
36 /* Array of statements */
37 int n_stmts;
38 struct cuda_stmt *stmts;
40 int n_array;
41 struct cuda_array_info *array;
43 /* Identifier of current kernel. */
44 int kernel_id;
46 /* First tile dimension. */
47 int tile_first;
48 /* Number of tile dimensions. */
49 int tile_len;
50 /* Number of initial parallel loops among tile dimensions. */
51 int n_parallel;
53 /* Number of dimensions determining shared memory. */
54 int shared_len;
56 /* Number of rows in the untiled schedule. */
57 int untiled_len;
58 /* Number of rows in the tiled schedule. */
59 int tiled_len;
60 /* Number of rows in schedule after tiling/wrapping over threads. */
61 int thread_tiled_len;
63 /* Global untiled schedule. */
64 isl_union_map *sched;
65 /* Local (per kernel launch) tiled schedule. */
66 isl_union_map *tiled_sched;
67 /* Local schedule per shared memory tile loop iteration. */
68 isl_union_map *local_sched;
69 /* Domain of the current statement (within print_statement). */
70 isl_set *stmt_domain;
72 /* Position of first parameter corresponding to shared tile loop
73 * in shared_sched.
75 unsigned first_shared;
76 /* Local tiled schedule projected onto the shared tile loops and
77 * the loops that will be wrapped over the threads,
78 * with all shared tile loops parametrized.
80 isl_union_map *shared_sched;
81 /* Projects out the loops that will be wrapped over the threads
82 * from shared_sched.
84 isl_union_map *shared_proj;
86 /* A map that takes the range of shared_sched as input,
87 * wraps the appropriate loops over the threads and then projects
88 * out these loops.
90 isl_map *privatization;
92 /* A map from the shared memory tile loops and the thread indices
93 * (as parameters) to the set of accessed memory elements that
94 * will be accessed through private copies.
96 isl_union_map *private_access;
98 /* The schedule for the current private/shared access
99 * (within print_private_access or print_shared_access).
101 isl_map *copy_sched;
102 /* The array reference group corresponding to copy_sched. */
103 struct cuda_array_ref_group *copy_group;
104 /* copy_group->private_bound or copy_group->shared_bound */
105 struct cuda_array_bound *copy_bound;
107 /* First loop to unroll (or -1 if none). */
108 int first_unroll;
110 int n_grid;
111 int n_block;
112 /* Note: in the input file, the sizes of the grid and the blocks
113 * are specified in the order x, y, z, but internally, the sizes
114 * are stored in reverse order, so that the last element always
115 * refers to the x dimension.
117 int grid_dim[2];
118 int block_dim[3];
119 int *tile_size;
122 __isl_give isl_set *add_context_from_str(__isl_take isl_set *set,
123 const char *str);
124 void collect_array_info(struct cuda_gen *gen);
125 void print_host_code(struct cuda_gen *gen);
126 void clear_cuda_gen(struct cuda_gen *gen);
128 int cuda_pet(isl_ctx *ctx, struct pet_scop *scop, struct ppcg_options *options,
129 const char *input);
131 #endif