5 #include "cuda_common.h"
6 #include "ppcg_options.h"
8 /* For each index i, array->bound[i] specialized to the current kernel. */
9 struct gpu_local_array_info
{
10 isl_pw_aff_list
*bound
;
14 struct cuda_info cuda
;
17 struct ppcg_options
*options
;
19 struct pet_scop
*scop
;
21 /* Set of parameter values */
24 /* tile, grid and block sizes for each kernel */
27 /* Uninitialized data elements (or an overapproximation) */
28 isl_union_set
*copy_in
;
30 /* All read accesses in the entire program */
33 /* All write accesses in the entire program */
36 /* Array of statements */
38 struct gpu_stmt
*stmts
;
41 struct gpu_array_info
*array
;
43 /* Identifier of current kernel. */
45 /* Pointer to the current kernel. */
46 struct ppcg_kernel
*kernel
;
48 /* First tile dimension. */
50 /* Number of tile dimensions. */
52 /* Number of initial parallel loops among tile dimensions. */
55 /* Number of dimensions determining shared memory. */
58 /* Number of rows in the untiled schedule. */
60 /* Number of rows in the tiled schedule. */
62 /* Number of rows in schedule after tiling/wrapping over threads. */
65 /* Global untiled schedule. */
67 /* Local (per kernel launch) tiled schedule. */
68 isl_union_map
*tiled_sched
;
69 /* Local schedule per shared memory tile loop iteration. */
70 isl_union_map
*local_sched
;
72 /* Local tiled schedule projected onto the shared tile loops and
73 * the loops that will be wrapped over the threads,
74 * with all shared tile loops parametrized.
76 isl_union_map
*shared_sched
;
77 /* Projects out the loops that will be wrapped over the threads
80 isl_union_map
*shared_proj
;
82 /* A map that takes the range of shared_sched as input,
83 * wraps the appropriate loops over the threads and then projects
86 isl_map
*privatization
;
88 /* A map from the shared memory tile loops and the thread indices
89 * (as parameters) to the set of accessed memory elements that
90 * will be accessed through private copies.
92 isl_union_map
*private_access
;
94 /* The schedule for the current private/shared access
95 * (within print_private_access or print_shared_access).
98 /* The array reference group corresponding to copy_sched. */
99 struct gpu_array_ref_group
*copy_group
;
100 /* copy_group->private_bound or copy_group->shared_bound */
101 struct gpu_array_bound
*copy_bound
;
103 /* First loop to unroll (or -1 if none) in the current part of the
110 /* Note: in the input file, the sizes of the grid and the blocks
111 * are specified in the order x, y, z, but internally, the sizes
112 * are stored in reverse order, so that the last element always
113 * refers to the x dimension.
120 __isl_give isl_set
*add_context_from_str(__isl_take isl_set
*set
,
122 void collect_array_info(struct gpu_gen
*gen
);
123 void print_host_code(struct gpu_gen
*gen
);
124 void clear_gpu_gen(struct gpu_gen
*gen
);
126 int generate_cuda(isl_ctx
*ctx
, struct pet_scop
*scop
,
127 struct ppcg_options
*options
, const char *input
);