1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
30 #include <isl/options.h>
34 #include "coretypes.h"
36 #include "basic-block.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "gimple-expr.h"
42 #include "gimple-iterator.h"
43 #include "tree-ssa-loop.h"
46 #include "tree-chrec.h"
47 #include "tree-data-ref.h"
48 #include "tree-scalar-evolution.h"
52 #include "graphite-poly.h"
54 static isl_union_set
*
55 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED
)
59 isl_space
*space
= isl_set_get_space (scop
->context
);
60 isl_union_set
*res
= isl_union_set_empty (space
);
62 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
63 res
= isl_union_set_add_set (res
, isl_set_copy (pbb
->domain
));
68 /* getTileMap - Create a map that describes a n-dimensonal tiling.
70 getTileMap creates a map from a n-dimensional scattering space into an
71 2*n-dimensional scattering space. The map describes a rectangular tiling.
74 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
76 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
77 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
78 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
82 for (i = 0; i < N; i++)
83 for (j = 0; j < M; j++)
88 for (t_i = 0; t_i < N; i+=32)
89 for (t_j = 0; t_j < M; j+=32)
90 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
91 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
95 static isl_basic_map
*
96 getTileMap (isl_ctx
*ctx
, int scheduleDimensions
, int tileSize
)
101 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
102 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
103 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
105 and project out the auxilary dimensions a0 and a1. */
106 isl_space
*Space
= isl_space_alloc (ctx
, 0, scheduleDimensions
,
107 scheduleDimensions
* 3);
108 isl_basic_map
*tileMap
= isl_basic_map_universe (isl_space_copy (Space
));
110 isl_local_space
*LocalSpace
= isl_local_space_from_space (Space
);
112 for (x
= 0; x
< scheduleDimensions
; x
++)
116 int pX
= scheduleDimensions
+ x
;
117 int aX
= 2 * scheduleDimensions
+ x
;
121 /* sX = aX * tileSize; */
122 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
123 isl_constraint_set_coefficient_si (c
, isl_dim_out
, sX
, 1);
124 isl_constraint_set_coefficient_si (c
, isl_dim_out
, aX
, -tileSize
);
125 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
128 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
129 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
130 isl_constraint_set_coefficient_si (c
, isl_dim_in
, sX
, -1);
131 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
134 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
135 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
136 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, -1);
137 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
139 /* pX <= tX + (tileSize - 1) */
140 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
141 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, 1);
142 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, -1);
143 isl_constraint_set_constant_si (c
, tileSize
- 1);
144 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
147 /* Project out auxiliary dimensions.
149 The auxiliary dimensions are transformed into existentially quantified ones.
150 This reduces the number of visible scattering dimensions and allows Cloog
151 to produces better code. */
152 tileMap
= isl_basic_map_project_out (tileMap
, isl_dim_out
,
153 2 * scheduleDimensions
,
155 isl_local_space_free (LocalSpace
);
159 /* getScheduleForBand - Get the schedule for this band.
161 Polly applies transformations like tiling on top of the isl calculated value.
162 This can influence the number of scheduling dimension. The number of
163 schedule dimensions is returned in the parameter 'Dimension'. */
164 static bool DisableTiling
= false;
166 static isl_union_map
*
167 getScheduleForBand (isl_band
*Band
, int *Dimensions
)
169 isl_union_map
*PartialSchedule
;
172 isl_basic_map
*TileMap
;
173 isl_union_map
*TileUMap
;
175 PartialSchedule
= isl_band_get_partial_schedule (Band
);
176 *Dimensions
= isl_band_n_member (Band
);
179 return PartialSchedule
;
181 /* It does not make any sense to tile a band with just one dimension. */
182 if (*Dimensions
== 1)
183 return PartialSchedule
;
185 ctx
= isl_union_map_get_ctx (PartialSchedule
);
186 Space
= isl_union_map_get_space (PartialSchedule
);
188 TileMap
= getTileMap (ctx
, *Dimensions
, 32);
189 TileUMap
= isl_union_map_from_map (isl_map_from_basic_map (TileMap
));
190 TileUMap
= isl_union_map_align_params (TileUMap
, Space
);
191 *Dimensions
= 2 * *Dimensions
;
193 return isl_union_map_apply_range (PartialSchedule
, TileUMap
);
196 /* Create a map that pre-vectorizes one scheduling dimension.
198 getPrevectorMap creates a map that maps each input dimension to the same
199 output dimension, except for the dimension DimToVectorize. DimToVectorize is
200 strip mined by 'VectorWidth' and the newly created point loop of
201 DimToVectorize is moved to the innermost level.
203 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
205 | Before transformation
209 | for (i = 0; i < 128; i++)
210 | for (j = 0; j < 128; j++)
214 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
216 | After transformation:
218 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
220 | for (it = 0; it < 128; it+=4)
221 | for (j = 0; j < 128; j++)
222 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
225 The goal of this transformation is to create a trivially vectorizable loop.
226 This means a parallel loop at the innermost level that has a constant number
227 of iterations corresponding to the target vector width.
229 This transformation creates a loop at the innermost level. The loop has a
230 constant number of iterations, if the number of loop iterations at
231 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
232 currently constant and not yet target specific. This function does not reason
233 about parallelism. */
235 getPrevectorMap (isl_ctx
*ctx
, int DimToVectorize
,
236 int ScheduleDimensions
,
240 isl_local_space
*LocalSpace
, *LocalSpaceRange
;
245 int PointDimension
; /* ip */
246 int TileDimension
; /* it */
247 isl_val
*VectorWidthMP
;
250 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
252 Space
= isl_space_alloc (ctx
, 0, ScheduleDimensions
, ScheduleDimensions
+ 1);
253 TilingMap
= isl_map_universe (isl_space_copy (Space
));
254 LocalSpace
= isl_local_space_from_space (Space
);
255 PointDimension
= ScheduleDimensions
;
256 TileDimension
= DimToVectorize
;
258 /* Create an identity map for everything except DimToVectorize and map
259 DimToVectorize to the point loop at the innermost dimension. */
260 for (i
= 0; i
< ScheduleDimensions
; i
++)
262 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
263 isl_constraint_set_coefficient_si (c
, isl_dim_in
, i
, -1);
265 if (i
== DimToVectorize
)
266 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
268 isl_constraint_set_coefficient_si (c
, isl_dim_out
, i
, 1);
270 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
273 /* it % 'VectorWidth' = 0 */
274 LocalSpaceRange
= isl_local_space_range (isl_local_space_copy (LocalSpace
));
275 Aff
= isl_aff_zero_on_domain (LocalSpaceRange
);
276 Aff
= isl_aff_set_constant_si (Aff
, VectorWidth
);
277 Aff
= isl_aff_set_coefficient_si (Aff
, isl_dim_in
, TileDimension
, 1);
279 VectorWidthMP
= isl_val_int_from_si (ctx
, VectorWidth
);
280 Aff
= isl_aff_mod_val (Aff
, VectorWidthMP
);
281 Modulo
= isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff
));
282 TilingMap
= isl_map_intersect_range (TilingMap
, Modulo
);
285 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
286 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, -1);
287 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
288 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
290 /* ip <= it + ('VectorWidth' - 1) */
291 c
= isl_inequality_alloc (LocalSpace
);
292 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, 1);
293 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, -1);
294 isl_constraint_set_constant_si (c
, VectorWidth
- 1);
295 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
297 isl_map_dump (TilingMap
);
302 static bool EnablePollyVector
= false;
304 /* getScheduleForBandList - Get the scheduling map for a list of bands.
306 We walk recursively the forest of bands to combine the schedules of the
307 individual bands to the overall schedule. In case tiling is requested,
308 the individual bands are tiled. */
309 static isl_union_map
*
310 getScheduleForBandList (isl_band_list
*BandList
)
313 isl_union_map
*Schedule
;
316 ctx
= isl_band_list_get_ctx (BandList
);
317 NumBands
= isl_band_list_n_band (BandList
);
318 Schedule
= isl_union_map_empty (isl_space_params_alloc (ctx
, 0));
320 for (i
= 0; i
< NumBands
; i
++)
323 isl_union_map
*PartialSchedule
;
324 int ScheduleDimensions
;
327 Band
= isl_band_list_get_band (BandList
, i
);
328 PartialSchedule
= getScheduleForBand (Band
, &ScheduleDimensions
);
329 Space
= isl_union_map_get_space (PartialSchedule
);
331 if (isl_band_has_children (Band
))
333 isl_band_list
*Children
;
334 isl_union_map
*SuffixSchedule
;
336 Children
= isl_band_get_children (Band
);
337 SuffixSchedule
= getScheduleForBandList (Children
);
338 PartialSchedule
= isl_union_map_flat_range_product (PartialSchedule
,
340 isl_band_list_free (Children
);
342 else if (EnablePollyVector
)
344 for (i
= ScheduleDimensions
- 1 ; i
>= 0 ; i
--)
346 if (isl_band_member_is_zero_distance (Band
, i
))
349 isl_union_map
*TileUMap
;
351 TileMap
= getPrevectorMap (ctx
, i
, ScheduleDimensions
, 4);
352 TileUMap
= isl_union_map_from_map (TileMap
);
353 TileUMap
= isl_union_map_align_params
354 (TileUMap
, isl_space_copy (Space
));
355 PartialSchedule
= isl_union_map_apply_range
356 (PartialSchedule
, TileUMap
);
362 Schedule
= isl_union_map_union (Schedule
, PartialSchedule
);
364 isl_band_free (Band
);
365 isl_space_free (Space
);
371 static isl_union_map
*
372 getScheduleMap (isl_schedule
*Schedule
)
374 isl_band_list
*BandList
= isl_schedule_get_band_forest (Schedule
);
375 isl_union_map
*ScheduleMap
= getScheduleForBandList (BandList
);
376 isl_band_list_free (BandList
);
381 getSingleMap (__isl_take isl_map
*map
, void *user
)
383 isl_map
**singleMap
= (isl_map
**) user
;
390 apply_schedule_map_to_scop (scop_p scop
, isl_union_map
*schedule_map
)
395 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
397 isl_set
*domain
= isl_set_copy (pbb
->domain
);
398 isl_union_map
*stmtBand
;
399 isl_map
*stmtSchedule
;
401 stmtBand
= isl_union_map_intersect_domain
402 (isl_union_map_copy (schedule_map
),
403 isl_union_set_from_set (domain
));
404 isl_union_map_foreach_map (stmtBand
, getSingleMap
, &stmtSchedule
);
405 isl_map_free (pbb
->transformed
);
406 pbb
->transformed
= stmtSchedule
;
407 isl_union_map_free (stmtBand
);
411 static const int CONSTANT_BOUND
= 20;
414 optimize_isl (scop_p scop
)
417 isl_schedule
*schedule
;
418 isl_union_set
*domain
;
419 isl_union_map
*validity
, *proximity
, *dependences
;
420 isl_union_map
*schedule_map
;
422 domain
= scop_get_domains (scop
);
423 dependences
= scop_get_dependences (scop
);
424 dependences
= isl_union_map_gist_domain (dependences
,
425 isl_union_set_copy (domain
));
426 dependences
= isl_union_map_gist_range (dependences
,
427 isl_union_set_copy (domain
));
428 validity
= dependences
;
430 proximity
= isl_union_map_copy (validity
);
432 isl_options_set_schedule_max_constant_term (scop
->ctx
, CONSTANT_BOUND
);
433 isl_options_set_schedule_maximize_band_depth (scop
->ctx
, 1);
434 isl_options_set_schedule_fuse (scop
->ctx
, ISL_SCHEDULE_FUSE_MIN
);
435 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_CONTINUE
);
436 schedule
= isl_union_set_compute_schedule (domain
, validity
, proximity
);
437 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_ABORT
);
442 schedule_map
= getScheduleMap (schedule
);
444 apply_schedule_map_to_scop (scop
, schedule_map
);
446 isl_schedule_free (schedule
);
447 isl_union_map_free (schedule_map
);