1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
30 #include <isl/options.h>
34 #include "coretypes.h"
37 #include "tree-ssa-loop.h"
40 #include "tree-chrec.h"
41 #include "tree-data-ref.h"
42 #include "tree-scalar-evolution.h"
46 #include "graphite-poly.h"
48 static isl_union_set
*
49 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED
)
53 isl_space
*space
= isl_set_get_space (scop
->context
);
54 isl_union_set
*res
= isl_union_set_empty (space
);
56 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
57 res
= isl_union_set_add_set (res
, isl_set_copy (pbb
->domain
));
62 static isl_union_map
*
63 scop_get_dependences (scop_p scop
)
65 isl_union_map
*dependences
;
68 compute_deps (scop
, SCOP_BBS (scop
),
69 &scop
->must_raw
, &scop
->may_raw
,
70 &scop
->must_raw_no_source
, &scop
->may_raw_no_source
,
71 &scop
->must_war
, &scop
->may_war
,
72 &scop
->must_war_no_source
, &scop
->may_war_no_source
,
73 &scop
->must_waw
, &scop
->may_waw
,
74 &scop
->must_waw_no_source
, &scop
->may_waw_no_source
);
76 dependences
= isl_union_map_copy (scop
->must_raw
);
77 dependences
= isl_union_map_union (dependences
,
78 isl_union_map_copy (scop
->must_war
));
79 dependences
= isl_union_map_union (dependences
,
80 isl_union_map_copy (scop
->must_waw
));
81 dependences
= isl_union_map_union (dependences
,
82 isl_union_map_copy (scop
->may_raw
));
83 dependences
= isl_union_map_union (dependences
,
84 isl_union_map_copy (scop
->may_war
));
85 dependences
= isl_union_map_union (dependences
,
86 isl_union_map_copy (scop
->may_waw
));
91 /* getTileMap - Create a map that describes a n-dimensonal tiling.
93 getTileMap creates a map from a n-dimensional scattering space into an
94 2*n-dimensional scattering space. The map describes a rectangular tiling.
97 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
99 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
100 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
101 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
105 for (i = 0; i < N; i++)
106 for (j = 0; j < M; j++)
111 for (t_i = 0; t_i < N; i+=32)
112 for (t_j = 0; t_j < M; j+=32)
113 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
114 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
118 static isl_basic_map
*
119 getTileMap (isl_ctx
*ctx
, int scheduleDimensions
, int tileSize
)
124 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
125 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
126 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
128 and project out the auxilary dimensions a0 and a1. */
129 isl_space
*Space
= isl_space_alloc (ctx
, 0, scheduleDimensions
,
130 scheduleDimensions
* 3);
131 isl_basic_map
*tileMap
= isl_basic_map_universe (isl_space_copy (Space
));
133 isl_local_space
*LocalSpace
= isl_local_space_from_space (Space
);
135 for (x
= 0; x
< scheduleDimensions
; x
++)
139 int pX
= scheduleDimensions
+ x
;
140 int aX
= 2 * scheduleDimensions
+ x
;
144 /* sX = aX * tileSize; */
145 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
146 isl_constraint_set_coefficient_si (c
, isl_dim_out
, sX
, 1);
147 isl_constraint_set_coefficient_si (c
, isl_dim_out
, aX
, -tileSize
);
148 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
151 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
152 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
153 isl_constraint_set_coefficient_si (c
, isl_dim_in
, sX
, -1);
154 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
157 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
158 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
159 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, -1);
160 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
162 /* pX <= tX + (tileSize - 1) */
163 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
164 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, 1);
165 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, -1);
166 isl_constraint_set_constant_si (c
, tileSize
- 1);
167 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
170 /* Project out auxiliary dimensions.
172 The auxiliary dimensions are transformed into existentially quantified ones.
173 This reduces the number of visible scattering dimensions and allows Cloog
174 to produces better code. */
175 tileMap
= isl_basic_map_project_out (tileMap
, isl_dim_out
,
176 2 * scheduleDimensions
,
178 isl_local_space_free (LocalSpace
);
182 /* getScheduleForBand - Get the schedule for this band.
184 Polly applies transformations like tiling on top of the isl calculated value.
185 This can influence the number of scheduling dimension. The number of
186 schedule dimensions is returned in the parameter 'Dimension'. */
187 static bool DisableTiling
= false;
189 static isl_union_map
*
190 getScheduleForBand (isl_band
*Band
, int *Dimensions
)
192 isl_union_map
*PartialSchedule
;
195 isl_basic_map
*TileMap
;
196 isl_union_map
*TileUMap
;
198 PartialSchedule
= isl_band_get_partial_schedule (Band
);
199 *Dimensions
= isl_band_n_member (Band
);
202 return PartialSchedule
;
204 /* It does not make any sense to tile a band with just one dimension. */
205 if (*Dimensions
== 1)
206 return PartialSchedule
;
208 ctx
= isl_union_map_get_ctx (PartialSchedule
);
209 Space
= isl_union_map_get_space (PartialSchedule
);
211 TileMap
= getTileMap (ctx
, *Dimensions
, 32);
212 TileUMap
= isl_union_map_from_map (isl_map_from_basic_map (TileMap
));
213 TileUMap
= isl_union_map_align_params (TileUMap
, Space
);
214 *Dimensions
= 2 * *Dimensions
;
216 return isl_union_map_apply_range (PartialSchedule
, TileUMap
);
219 /* Create a map that pre-vectorizes one scheduling dimension.
221 getPrevectorMap creates a map that maps each input dimension to the same
222 output dimension, except for the dimension DimToVectorize. DimToVectorize is
223 strip mined by 'VectorWidth' and the newly created point loop of
224 DimToVectorize is moved to the innermost level.
226 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
228 | Before transformation
232 | for (i = 0; i < 128; i++)
233 | for (j = 0; j < 128; j++)
237 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
239 | After transformation:
241 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
243 | for (it = 0; it < 128; it+=4)
244 | for (j = 0; j < 128; j++)
245 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
248 The goal of this transformation is to create a trivially vectorizable loop.
249 This means a parallel loop at the innermost level that has a constant number
250 of iterations corresponding to the target vector width.
252 This transformation creates a loop at the innermost level. The loop has a
253 constant number of iterations, if the number of loop iterations at
254 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
255 currently constant and not yet target specific. This function does not reason
256 about parallelism. */
258 getPrevectorMap (isl_ctx
*ctx
, int DimToVectorize
,
259 int ScheduleDimensions
,
263 isl_local_space
*LocalSpace
, *LocalSpaceRange
;
268 int PointDimension
; /* ip */
269 int TileDimension
; /* it */
270 isl_int VectorWidthMP
;
273 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
275 Space
= isl_space_alloc (ctx
, 0, ScheduleDimensions
, ScheduleDimensions
+ 1);
276 TilingMap
= isl_map_universe (isl_space_copy (Space
));
277 LocalSpace
= isl_local_space_from_space (Space
);
278 PointDimension
= ScheduleDimensions
;
279 TileDimension
= DimToVectorize
;
281 /* Create an identity map for everything except DimToVectorize and map
282 DimToVectorize to the point loop at the innermost dimension. */
283 for (i
= 0; i
< ScheduleDimensions
; i
++)
285 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
286 isl_constraint_set_coefficient_si (c
, isl_dim_in
, i
, -1);
288 if (i
== DimToVectorize
)
289 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
291 isl_constraint_set_coefficient_si (c
, isl_dim_out
, i
, 1);
293 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
296 /* it % 'VectorWidth' = 0 */
297 LocalSpaceRange
= isl_local_space_range (isl_local_space_copy (LocalSpace
));
298 Aff
= isl_aff_zero_on_domain (LocalSpaceRange
);
299 Aff
= isl_aff_set_constant_si (Aff
, VectorWidth
);
300 Aff
= isl_aff_set_coefficient_si (Aff
, isl_dim_in
, TileDimension
, 1);
301 isl_int_init (VectorWidthMP
);
302 isl_int_set_si (VectorWidthMP
, VectorWidth
);
303 Aff
= isl_aff_mod (Aff
, VectorWidthMP
);
304 isl_int_clear (VectorWidthMP
);
305 Modulo
= isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff
));
306 TilingMap
= isl_map_intersect_range (TilingMap
, Modulo
);
309 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
310 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, -1);
311 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
312 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
314 /* ip <= it + ('VectorWidth' - 1) */
315 c
= isl_inequality_alloc (LocalSpace
);
316 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, 1);
317 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, -1);
318 isl_constraint_set_constant_si (c
, VectorWidth
- 1);
319 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
321 isl_map_dump (TilingMap
);
326 static bool EnablePollyVector
= false;
328 /* getScheduleForBandList - Get the scheduling map for a list of bands.
330 We walk recursively the forest of bands to combine the schedules of the
331 individual bands to the overall schedule. In case tiling is requested,
332 the individual bands are tiled. */
333 static isl_union_map
*
334 getScheduleForBandList (isl_band_list
*BandList
)
337 isl_union_map
*Schedule
;
340 ctx
= isl_band_list_get_ctx (BandList
);
341 NumBands
= isl_band_list_n_band (BandList
);
342 Schedule
= isl_union_map_empty (isl_space_params_alloc (ctx
, 0));
344 for (i
= 0; i
< NumBands
; i
++)
347 isl_union_map
*PartialSchedule
;
348 int ScheduleDimensions
;
351 Band
= isl_band_list_get_band (BandList
, i
);
352 PartialSchedule
= getScheduleForBand (Band
, &ScheduleDimensions
);
353 Space
= isl_union_map_get_space (PartialSchedule
);
355 if (isl_band_has_children (Band
))
357 isl_band_list
*Children
;
358 isl_union_map
*SuffixSchedule
;
360 Children
= isl_band_get_children (Band
);
361 SuffixSchedule
= getScheduleForBandList (Children
);
362 PartialSchedule
= isl_union_map_flat_range_product (PartialSchedule
,
364 isl_band_list_free (Children
);
366 else if (EnablePollyVector
)
368 for (i
= ScheduleDimensions
- 1 ; i
>= 0 ; i
--)
370 if (isl_band_member_is_zero_distance (Band
, i
))
373 isl_union_map
*TileUMap
;
375 TileMap
= getPrevectorMap (ctx
, i
, ScheduleDimensions
, 4);
376 TileUMap
= isl_union_map_from_map (TileMap
);
377 TileUMap
= isl_union_map_align_params
378 (TileUMap
, isl_space_copy (Space
));
379 PartialSchedule
= isl_union_map_apply_range
380 (PartialSchedule
, TileUMap
);
386 Schedule
= isl_union_map_union (Schedule
, PartialSchedule
);
388 isl_band_free (Band
);
389 isl_space_free (Space
);
395 static isl_union_map
*
396 getScheduleMap (isl_schedule
*Schedule
)
398 isl_band_list
*BandList
= isl_schedule_get_band_forest (Schedule
);
399 isl_union_map
*ScheduleMap
= getScheduleForBandList (BandList
);
400 isl_band_list_free (BandList
);
405 getSingleMap (__isl_take isl_map
*map
, void *user
)
407 isl_map
**singleMap
= (isl_map
**) user
;
414 apply_schedule_map_to_scop (scop_p scop
, isl_union_map
*schedule_map
)
419 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
421 isl_set
*domain
= isl_set_copy (pbb
->domain
);
422 isl_union_map
*stmtBand
;
423 isl_map
*stmtSchedule
;
425 stmtBand
= isl_union_map_intersect_domain
426 (isl_union_map_copy (schedule_map
),
427 isl_union_set_from_set (domain
));
428 isl_union_map_foreach_map (stmtBand
, getSingleMap
, &stmtSchedule
);
429 isl_map_free (pbb
->transformed
);
430 pbb
->transformed
= stmtSchedule
;
431 isl_union_map_free (stmtBand
);
435 static const int CONSTANT_BOUND
= 20;
438 optimize_isl (scop_p scop
)
441 isl_schedule
*schedule
;
442 isl_union_set
*domain
;
443 isl_union_map
*validity
, *proximity
, *dependences
;
444 isl_union_map
*schedule_map
;
446 domain
= scop_get_domains (scop
);
447 dependences
= scop_get_dependences (scop
);
448 dependences
= isl_union_map_gist_domain (dependences
,
449 isl_union_set_copy (domain
));
450 dependences
= isl_union_map_gist_range (dependences
,
451 isl_union_set_copy (domain
));
452 validity
= dependences
;
454 proximity
= isl_union_map_copy (validity
);
456 isl_options_set_schedule_max_constant_term (scop
->ctx
, CONSTANT_BOUND
);
457 isl_options_set_schedule_maximize_band_depth (scop
->ctx
, 1);
458 isl_options_set_schedule_fuse (scop
->ctx
, ISL_SCHEDULE_FUSE_MIN
);
459 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_CONTINUE
);
460 schedule
= isl_union_set_compute_schedule (domain
, validity
, proximity
);
461 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_ABORT
);
466 schedule_map
= getScheduleMap (schedule
);
468 apply_schedule_map_to_scop (scop
, schedule_map
);
470 isl_schedule_free (schedule
);
471 isl_union_map_free (schedule_map
);