gpu.c: make local array information available through ppcg_kernel
[ppcg.git] / gpu.h
blob43c65dfe4fea698a678f3f114c7d8234e0ff4809
1 #ifndef _GPU_H
2 #define _GPU_H
4 #include <pet.h>
5 #include "cuda_common.h"
6 #include "ppcg_options.h"
8 /* For each index i, array->bound[i] specialized to the current kernel. */
9 struct gpu_local_array_info {
10 isl_pw_aff_list *bound;
13 struct gpu_gen {
14 struct cuda_info cuda;
16 isl_ctx *ctx;
17 struct ppcg_options *options;
19 struct pet_scop *scop;
21 /* Set of parameter values */
22 isl_set *context;
24 /* tile, grid and block sizes for each kernel */
25 isl_union_map *sizes;
27 /* Uninitialized data elements (or an overapproximation) */
28 isl_union_set *copy_in;
30 /* All read accesses in the entire program */
31 isl_union_map *read;
33 /* All write accesses in the entire program */
34 isl_union_map *write;
36 /* Array of statements */
37 int n_stmts;
38 struct gpu_stmt *stmts;
40 int n_array;
41 struct gpu_array_info *array;
43 /* Identifier of current kernel. */
44 int kernel_id;
45 /* Pointer to the current kernel. */
46 struct ppcg_kernel *kernel;
48 /* First tile dimension. */
49 int tile_first;
50 /* Number of tile dimensions. */
51 int tile_len;
52 /* Number of initial parallel loops among tile dimensions. */
53 int n_parallel;
55 /* Number of dimensions determining shared memory. */
56 int shared_len;
58 /* Number of rows in the untiled schedule. */
59 int untiled_len;
60 /* Number of rows in the tiled schedule. */
61 int tiled_len;
62 /* Number of rows in schedule after tiling/wrapping over threads. */
63 int thread_tiled_len;
65 /* Global untiled schedule. */
66 isl_union_map *sched;
67 /* Local (per kernel launch) tiled schedule. */
68 isl_union_map *tiled_sched;
69 /* Local schedule per shared memory tile loop iteration. */
70 isl_union_map *local_sched;
72 /* Local tiled schedule projected onto the shared tile loops and
73 * the loops that will be wrapped over the threads,
74 * with all shared tile loops parametrized.
76 isl_union_map *shared_sched;
77 /* Projects out the loops that will be wrapped over the threads
78 * from shared_sched.
80 isl_union_map *shared_proj;
82 /* A map that takes the range of shared_sched as input,
83 * wraps the appropriate loops over the threads and then projects
84 * out these loops.
86 isl_map *privatization;
88 /* A map from the shared memory tile loops and the thread indices
89 * (as parameters) to the set of accessed memory elements that
90 * will be accessed through private copies.
92 isl_union_map *private_access;
94 /* The schedule for the current private/shared access
95 * (within print_private_access or print_shared_access).
97 isl_map *copy_sched;
98 /* The array reference group corresponding to copy_sched. */
99 struct gpu_array_ref_group *copy_group;
100 /* copy_group->private_bound or copy_group->shared_bound */
101 struct gpu_array_bound *copy_bound;
103 /* First loop to unroll (or -1 if none) in the current part of the
104 * schedule.
106 int first_unroll;
108 int n_grid;
109 int n_block;
110 /* Note: in the input file, the sizes of the grid and the blocks
111 * are specified in the order x, y, z, but internally, the sizes
112 * are stored in reverse order, so that the last element always
113 * refers to the x dimension.
115 int grid_dim[2];
116 int block_dim[3];
117 int *tile_size;
120 __isl_give isl_set *add_context_from_str(__isl_take isl_set *set,
121 const char *str);
122 void collect_array_info(struct gpu_gen *gen);
123 void print_host_code(struct gpu_gen *gen);
124 void clear_gpu_gen(struct gpu_gen *gen);
126 int generate_cuda(isl_ctx *ctx, struct pet_scop *scop,
127 struct ppcg_options *options, const char *input);
129 #endif