1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
30 #include <isl/options.h>
34 #include "coretypes.h"
40 #include "fold-const.h"
43 #include "hard-reg-set.h"
46 #include "dominance.h"
48 #include "basic-block.h"
49 #include "tree-ssa-alias.h"
50 #include "internal-fn.h"
51 #include "gimple-expr.h"
54 #include "gimple-iterator.h"
55 #include "tree-ssa-loop.h"
58 #include "tree-chrec.h"
59 #include "tree-data-ref.h"
60 #include "tree-scalar-evolution.h"
64 #include "graphite-poly.h"
66 static isl_union_set
*
67 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED
)
71 isl_space
*space
= isl_set_get_space (scop
->context
);
72 isl_union_set
*res
= isl_union_set_empty (space
);
74 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
75 res
= isl_union_set_add_set (res
, isl_set_copy (pbb
->domain
));
80 /* getTileMap - Create a map that describes a n-dimensonal tiling.
82 getTileMap creates a map from a n-dimensional scattering space into an
83 2*n-dimensional scattering space. The map describes a rectangular tiling.
86 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
88 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
89 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
90 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
94 for (i = 0; i < N; i++)
95 for (j = 0; j < M; j++)
100 for (t_i = 0; t_i < N; i+=32)
101 for (t_j = 0; t_j < M; j+=32)
102 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
103 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
107 static isl_basic_map
*
108 getTileMap (isl_ctx
*ctx
, int scheduleDimensions
, int tileSize
)
113 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
114 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
115 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
117 and project out the auxilary dimensions a0 and a1. */
118 isl_space
*Space
= isl_space_alloc (ctx
, 0, scheduleDimensions
,
119 scheduleDimensions
* 3);
120 isl_basic_map
*tileMap
= isl_basic_map_universe (isl_space_copy (Space
));
122 isl_local_space
*LocalSpace
= isl_local_space_from_space (Space
);
124 for (x
= 0; x
< scheduleDimensions
; x
++)
128 int pX
= scheduleDimensions
+ x
;
129 int aX
= 2 * scheduleDimensions
+ x
;
133 /* sX = aX * tileSize; */
134 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
135 isl_constraint_set_coefficient_si (c
, isl_dim_out
, sX
, 1);
136 isl_constraint_set_coefficient_si (c
, isl_dim_out
, aX
, -tileSize
);
137 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
140 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
141 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
142 isl_constraint_set_coefficient_si (c
, isl_dim_in
, sX
, -1);
143 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
146 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
147 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
148 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, -1);
149 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
151 /* pX <= tX + (tileSize - 1) */
152 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
153 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, 1);
154 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, -1);
155 isl_constraint_set_constant_si (c
, tileSize
- 1);
156 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
159 /* Project out auxiliary dimensions.
161 The auxiliary dimensions are transformed into existentially quantified ones.
162 This reduces the number of visible scattering dimensions and allows Cloog
163 to produces better code. */
164 tileMap
= isl_basic_map_project_out (tileMap
, isl_dim_out
,
165 2 * scheduleDimensions
,
167 isl_local_space_free (LocalSpace
);
171 /* getScheduleForBand - Get the schedule for this band.
173 Polly applies transformations like tiling on top of the isl calculated value.
174 This can influence the number of scheduling dimension. The number of
175 schedule dimensions is returned in the parameter 'Dimension'. */
176 static bool DisableTiling
= false;
178 static isl_union_map
*
179 getScheduleForBand (isl_band
*Band
, int *Dimensions
)
181 isl_union_map
*PartialSchedule
;
184 isl_basic_map
*TileMap
;
185 isl_union_map
*TileUMap
;
187 PartialSchedule
= isl_band_get_partial_schedule (Band
);
188 *Dimensions
= isl_band_n_member (Band
);
190 if (DisableTiling
|| flag_loop_unroll_jam
)
191 return PartialSchedule
;
193 /* It does not make any sense to tile a band with just one dimension. */
194 if (*Dimensions
== 1)
195 return PartialSchedule
;
197 ctx
= isl_union_map_get_ctx (PartialSchedule
);
198 Space
= isl_union_map_get_space (PartialSchedule
);
200 TileMap
= getTileMap (ctx
, *Dimensions
, 32);
201 TileUMap
= isl_union_map_from_map (isl_map_from_basic_map (TileMap
));
202 TileUMap
= isl_union_map_align_params (TileUMap
, Space
);
203 *Dimensions
= 2 * *Dimensions
;
205 return isl_union_map_apply_range (PartialSchedule
, TileUMap
);
208 /* Create a map that pre-vectorizes one scheduling dimension.
210 getPrevectorMap creates a map that maps each input dimension to the same
211 output dimension, except for the dimension DimToVectorize. DimToVectorize is
212 strip mined by 'VectorWidth' and the newly created point loop of
213 DimToVectorize is moved to the innermost level.
215 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
217 | Before transformation
221 | for (i = 0; i < 128; i++)
222 | for (j = 0; j < 128; j++)
226 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
228 | After transformation:
230 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
232 | for (it = 0; it < 128; it+=4)
233 | for (j = 0; j < 128; j++)
234 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
237 The goal of this transformation is to create a trivially vectorizable loop.
238 This means a parallel loop at the innermost level that has a constant number
239 of iterations corresponding to the target vector width.
241 This transformation creates a loop at the innermost level. The loop has a
242 constant number of iterations, if the number of loop iterations at
243 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
244 currently constant and not yet target specific. This function does not reason
249 getPrevectorMap (isl_ctx
*ctx
, int DimToVectorize
,
250 int ScheduleDimensions
,
254 isl_local_space
*LocalSpace
, *LocalSpaceRange
;
259 int PointDimension
; /* ip */
260 int TileDimension
; /* it */
261 isl_val
*VectorWidthMP
;
264 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
266 Space
= isl_space_alloc (ctx
, 0, ScheduleDimensions
, ScheduleDimensions
+ 1);
267 TilingMap
= isl_map_universe (isl_space_copy (Space
));
268 LocalSpace
= isl_local_space_from_space (Space
);
269 PointDimension
= ScheduleDimensions
;
270 TileDimension
= DimToVectorize
;
272 /* Create an identity map for everything except DimToVectorize and map
273 DimToVectorize to the point loop at the innermost dimension. */
274 for (i
= 0; i
< ScheduleDimensions
; i
++)
276 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
277 isl_constraint_set_coefficient_si (c
, isl_dim_in
, i
, -1);
279 if (i
== DimToVectorize
)
280 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
282 isl_constraint_set_coefficient_si (c
, isl_dim_out
, i
, 1);
284 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
287 /* it % 'VectorWidth' = 0 */
288 LocalSpaceRange
= isl_local_space_range (isl_local_space_copy (LocalSpace
));
289 Aff
= isl_aff_zero_on_domain (LocalSpaceRange
);
290 Aff
= isl_aff_set_constant_si (Aff
, VectorWidth
);
291 Aff
= isl_aff_set_coefficient_si (Aff
, isl_dim_in
, TileDimension
, 1);
293 VectorWidthMP
= isl_val_int_from_si (ctx
, VectorWidth
);
294 Aff
= isl_aff_mod_val (Aff
, VectorWidthMP
);
295 Modulo
= isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff
));
296 TilingMap
= isl_map_intersect_range (TilingMap
, Modulo
);
299 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
300 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, -1);
301 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
302 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
304 /* ip <= it + ('VectorWidth' - 1) */
305 c
= isl_inequality_alloc (LocalSpace
);
306 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, 1);
307 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, -1);
308 isl_constraint_set_constant_si (c
, VectorWidth
- 1);
309 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
314 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
315 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
316 corresponding option for AST build.
318 The map (for VectorWidth=4):
320 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
323 The image of this map is the separation class. The range of this map includes
324 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
328 getPrevectorMap_full (isl_ctx
*ctx
, int DimToVectorize
,
329 int ScheduleDimensions
,
333 isl_local_space
*LocalSpace
, *LocalSpaceRange
;
338 int PointDimension
; /* ip */
339 int TileDimension
; /* it */
340 isl_val
*VectorWidthMP
;
343 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
345 Space
= isl_space_alloc (ctx
, 0, ScheduleDimensions
, ScheduleDimensions
+ 1);
346 TilingMap
= isl_map_universe (isl_space_copy (Space
));
347 LocalSpace
= isl_local_space_from_space (Space
);
348 PointDimension
= ScheduleDimensions
;
349 TileDimension
= DimToVectorize
;
351 /* Create an identity map for everything except DimToVectorize and the
353 for (i
= 0; i
< ScheduleDimensions
; i
++)
355 if (i
== DimToVectorize
)
358 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
360 isl_constraint_set_coefficient_si (c
, isl_dim_in
, i
, -1);
361 isl_constraint_set_coefficient_si (c
, isl_dim_out
, i
, 1);
363 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
366 /* it % 'VectorWidth' = 0 */
367 LocalSpaceRange
= isl_local_space_range (isl_local_space_copy (LocalSpace
));
368 Aff
= isl_aff_zero_on_domain (LocalSpaceRange
);
369 Aff
= isl_aff_set_constant_si (Aff
, VectorWidth
);
370 Aff
= isl_aff_set_coefficient_si (Aff
, isl_dim_in
, TileDimension
, 1);
372 VectorWidthMP
= isl_val_int_from_si (ctx
, VectorWidth
);
373 Aff
= isl_aff_mod_val (Aff
, VectorWidthMP
);
374 Modulo
= isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff
));
375 TilingMap
= isl_map_intersect_range (TilingMap
, Modulo
);
377 /* it + ('VectorWidth' - 1) = i0 */
378 c
= isl_equality_alloc (isl_local_space_copy(LocalSpace
));
379 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
,-1);
380 isl_constraint_set_coefficient_si (c
, isl_dim_in
, TileDimension
, 1);
381 isl_constraint_set_constant_si (c
, -VectorWidth
+ 1);
382 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
385 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
386 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
387 isl_constraint_set_constant_si (c
, 0);
388 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
391 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
392 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, -1);
393 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
394 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
396 /* ip <= it + ('VectorWidth' - 1) */
397 c
= isl_inequality_alloc (LocalSpace
);
398 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, 1);
399 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, -1);
400 isl_constraint_set_constant_si (c
, VectorWidth
- 1);
401 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
406 static bool EnablePollyVector
= false;
408 /* getScheduleForBandList - Get the scheduling map for a list of bands.
410 We walk recursively the forest of bands to combine the schedules of the
411 individual bands to the overall schedule. In case tiling is requested,
412 the individual bands are tiled.
413 For unroll and jam the map the schedule for full tiles of the unrolled
414 dimnesion is computed. */
415 static isl_union_map
*
416 getScheduleForBandList (isl_band_list
*BandList
, isl_union_map
**map_sepcl
)
419 isl_union_map
*Schedule
;
422 ctx
= isl_band_list_get_ctx (BandList
);
423 NumBands
= isl_band_list_n_band (BandList
);
424 Schedule
= isl_union_map_empty (isl_space_params_alloc (ctx
, 0));
426 for (i
= 0; i
< NumBands
; i
++)
429 isl_union_map
*PartialSchedule
;
430 int ScheduleDimensions
;
433 isl_union_map
*PartialSchedule_f
;
435 Band
= isl_band_list_get_band (BandList
, i
);
436 PartialSchedule
= getScheduleForBand (Band
, &ScheduleDimensions
);
437 Space
= isl_union_map_get_space (PartialSchedule
);
439 PartialSchedule_f
= NULL
;
441 if (isl_band_has_children (Band
))
443 isl_band_list
*Children
;
444 isl_union_map
*SuffixSchedule
;
446 Children
= isl_band_get_children (Band
);
447 SuffixSchedule
= getScheduleForBandList (Children
, map_sepcl
);
448 PartialSchedule
= isl_union_map_flat_range_product (PartialSchedule
,
450 isl_band_list_free (Children
);
452 else if (EnablePollyVector
|| flag_loop_unroll_jam
)
457 depth
= PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH
);
459 for (i
= ScheduleDimensions
- 1 ; i
>= 0 ; i
--)
461 if (flag_loop_unroll_jam
&& (i
!= (ScheduleDimensions
- depth
)))
464 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
465 if (isl_band_member_is_coincident (Band
, i
))
467 if (isl_band_member_is_zero_distance (Band
, i
))
471 isl_union_map
*TileUMap
;
474 stride
= PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE
);
476 TileMap
= getPrevectorMap_full (ctx
, i
, ScheduleDimensions
,
478 TileUMap
= isl_union_map_from_map (TileMap
);
479 TileUMap
= isl_union_map_align_params
480 (TileUMap
, isl_space_copy (Space
));
481 PartialSchedule_f
= isl_union_map_apply_range
482 (isl_union_map_copy (PartialSchedule
), TileUMap
);
484 TileMap
= getPrevectorMap (ctx
, i
, ScheduleDimensions
, stride
);
485 TileUMap
= isl_union_map_from_map (TileMap
);
486 TileUMap
= isl_union_map_align_params
487 (TileUMap
, isl_space_copy (Space
));
488 PartialSchedule
= isl_union_map_apply_range
489 (PartialSchedule
, TileUMap
);
494 Schedule
= isl_union_map_union (Schedule
,
495 isl_union_map_copy(PartialSchedule
));
497 isl_band_free (Band
);
498 isl_space_free (Space
);
500 if (!flag_loop_unroll_jam
)
502 isl_union_map_free (PartialSchedule
);
506 if (PartialSchedule_f
)
508 *map_sepcl
= isl_union_map_union (*map_sepcl
, PartialSchedule_f
);
509 isl_union_map_free (PartialSchedule
);
512 *map_sepcl
= isl_union_map_union (*map_sepcl
, PartialSchedule
);
518 static isl_union_map
*
519 getScheduleMap (isl_schedule
*Schedule
, isl_union_map
**map_sepcl
)
521 isl_band_list
*BandList
= isl_schedule_get_band_forest (Schedule
);
522 isl_union_map
*ScheduleMap
= getScheduleForBandList (BandList
, map_sepcl
);
523 isl_band_list_free (BandList
);
528 getSingleMap (__isl_take isl_map
*map
, void *user
)
530 isl_map
**singleMap
= (isl_map
**) user
;
537 apply_schedule_map_to_scop (scop_p scop
, isl_union_map
*schedule_map
, bool sepcl
)
542 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
544 isl_set
*domain
= isl_set_copy (pbb
->domain
);
545 isl_union_map
*stmtBand
;
546 isl_map
*stmtSchedule
;
548 stmtBand
= isl_union_map_intersect_domain
549 (isl_union_map_copy (schedule_map
),
550 isl_union_set_from_set (domain
));
551 isl_union_map_foreach_map (stmtBand
, getSingleMap
, &stmtSchedule
);
555 isl_map_free (pbb
->transformed
);
556 pbb
->transformed
= stmtSchedule
;
559 pbb
->map_sepclass
= stmtSchedule
;
561 isl_union_map_free (stmtBand
);
565 static const int CONSTANT_BOUND
= 20;
568 optimize_isl (scop_p scop
)
571 isl_schedule
*schedule
;
572 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
573 isl_schedule_constraints
*schedule_constraints
;
575 isl_union_set
*domain
;
576 isl_union_map
*validity
, *proximity
, *dependences
;
577 isl_union_map
*schedule_map
;
578 isl_union_map
*schedule_map_f
;
580 domain
= scop_get_domains (scop
);
581 dependences
= scop_get_dependences (scop
);
582 dependences
= isl_union_map_gist_domain (dependences
,
583 isl_union_set_copy (domain
));
584 dependences
= isl_union_map_gist_range (dependences
,
585 isl_union_set_copy (domain
));
586 validity
= dependences
;
588 proximity
= isl_union_map_copy (validity
);
590 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
591 schedule_constraints
= isl_schedule_constraints_on_domain (domain
);
593 = isl_schedule_constraints_set_proximity (schedule_constraints
,
596 = isl_schedule_constraints_set_validity (schedule_constraints
,
597 isl_union_map_copy (validity
));
599 = isl_schedule_constraints_set_coincidence (schedule_constraints
,
603 isl_options_set_schedule_max_constant_term (scop
->ctx
, CONSTANT_BOUND
);
604 isl_options_set_schedule_maximize_band_depth (scop
->ctx
, 1);
605 isl_options_set_schedule_fuse (scop
->ctx
, ISL_SCHEDULE_FUSE_MIN
);
606 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_CONTINUE
);
608 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
609 schedule
= isl_schedule_constraints_compute_schedule(schedule_constraints
);
611 schedule
= isl_union_set_compute_schedule (domain
, validity
, proximity
);
614 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_ABORT
);
619 schedule_map_f
= isl_union_map_empty (isl_space_params_alloc (scop
->ctx
, 0));
620 schedule_map
= getScheduleMap (schedule
, &schedule_map_f
);
622 apply_schedule_map_to_scop (scop
, schedule_map
, false);
623 if (!isl_union_map_is_empty (schedule_map_f
))
624 apply_schedule_map_to_scop (scop
, schedule_map_f
, true);
625 isl_union_map_free (schedule_map_f
);
627 isl_schedule_free (schedule
);
628 isl_union_map_free (schedule_map
);