1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
30 #include <isl/options.h>
34 #include "coretypes.h"
38 #include "double-int.h"
46 #include "fold-const.h"
49 #include "hard-reg-set.h"
52 #include "dominance.h"
54 #include "basic-block.h"
55 #include "tree-ssa-alias.h"
56 #include "internal-fn.h"
57 #include "gimple-expr.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa-loop.h"
64 #include "tree-chrec.h"
65 #include "tree-data-ref.h"
66 #include "tree-scalar-evolution.h"
70 #include "graphite-poly.h"
72 static isl_union_set
*
73 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED
)
77 isl_space
*space
= isl_set_get_space (scop
->context
);
78 isl_union_set
*res
= isl_union_set_empty (space
);
80 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
81 res
= isl_union_set_add_set (res
, isl_set_copy (pbb
->domain
));
86 /* getTileMap - Create a map that describes a n-dimensonal tiling.
88 getTileMap creates a map from a n-dimensional scattering space into an
89 2*n-dimensional scattering space. The map describes a rectangular tiling.
92 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
94 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
95 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
96 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
100 for (i = 0; i < N; i++)
101 for (j = 0; j < M; j++)
106 for (t_i = 0; t_i < N; i+=32)
107 for (t_j = 0; t_j < M; j+=32)
108 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
109 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
113 static isl_basic_map
*
114 getTileMap (isl_ctx
*ctx
, int scheduleDimensions
, int tileSize
)
119 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
120 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
121 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
123 and project out the auxilary dimensions a0 and a1. */
124 isl_space
*Space
= isl_space_alloc (ctx
, 0, scheduleDimensions
,
125 scheduleDimensions
* 3);
126 isl_basic_map
*tileMap
= isl_basic_map_universe (isl_space_copy (Space
));
128 isl_local_space
*LocalSpace
= isl_local_space_from_space (Space
);
130 for (x
= 0; x
< scheduleDimensions
; x
++)
134 int pX
= scheduleDimensions
+ x
;
135 int aX
= 2 * scheduleDimensions
+ x
;
139 /* sX = aX * tileSize; */
140 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
141 isl_constraint_set_coefficient_si (c
, isl_dim_out
, sX
, 1);
142 isl_constraint_set_coefficient_si (c
, isl_dim_out
, aX
, -tileSize
);
143 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
146 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
147 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
148 isl_constraint_set_coefficient_si (c
, isl_dim_in
, sX
, -1);
149 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
152 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
153 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
154 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, -1);
155 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
157 /* pX <= tX + (tileSize - 1) */
158 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
159 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, 1);
160 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, -1);
161 isl_constraint_set_constant_si (c
, tileSize
- 1);
162 tileMap
= isl_basic_map_add_constraint (tileMap
, c
);
165 /* Project out auxiliary dimensions.
167 The auxiliary dimensions are transformed into existentially quantified ones.
168 This reduces the number of visible scattering dimensions and allows Cloog
169 to produces better code. */
170 tileMap
= isl_basic_map_project_out (tileMap
, isl_dim_out
,
171 2 * scheduleDimensions
,
173 isl_local_space_free (LocalSpace
);
177 /* getScheduleForBand - Get the schedule for this band.
179 Polly applies transformations like tiling on top of the isl calculated value.
180 This can influence the number of scheduling dimension. The number of
181 schedule dimensions is returned in the parameter 'Dimension'. */
182 static bool DisableTiling
= false;
184 static isl_union_map
*
185 getScheduleForBand (isl_band
*Band
, int *Dimensions
)
187 isl_union_map
*PartialSchedule
;
190 isl_basic_map
*TileMap
;
191 isl_union_map
*TileUMap
;
193 PartialSchedule
= isl_band_get_partial_schedule (Band
);
194 *Dimensions
= isl_band_n_member (Band
);
196 if (DisableTiling
|| flag_loop_unroll_jam
)
197 return PartialSchedule
;
199 /* It does not make any sense to tile a band with just one dimension. */
200 if (*Dimensions
== 1)
201 return PartialSchedule
;
203 ctx
= isl_union_map_get_ctx (PartialSchedule
);
204 Space
= isl_union_map_get_space (PartialSchedule
);
206 TileMap
= getTileMap (ctx
, *Dimensions
, 32);
207 TileUMap
= isl_union_map_from_map (isl_map_from_basic_map (TileMap
));
208 TileUMap
= isl_union_map_align_params (TileUMap
, Space
);
209 *Dimensions
= 2 * *Dimensions
;
211 return isl_union_map_apply_range (PartialSchedule
, TileUMap
);
214 /* Create a map that pre-vectorizes one scheduling dimension.
216 getPrevectorMap creates a map that maps each input dimension to the same
217 output dimension, except for the dimension DimToVectorize. DimToVectorize is
218 strip mined by 'VectorWidth' and the newly created point loop of
219 DimToVectorize is moved to the innermost level.
221 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
223 | Before transformation
227 | for (i = 0; i < 128; i++)
228 | for (j = 0; j < 128; j++)
232 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
234 | After transformation:
236 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
238 | for (it = 0; it < 128; it+=4)
239 | for (j = 0; j < 128; j++)
240 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
243 The goal of this transformation is to create a trivially vectorizable loop.
244 This means a parallel loop at the innermost level that has a constant number
245 of iterations corresponding to the target vector width.
247 This transformation creates a loop at the innermost level. The loop has a
248 constant number of iterations, if the number of loop iterations at
249 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
250 currently constant and not yet target specific. This function does not reason
255 getPrevectorMap (isl_ctx
*ctx
, int DimToVectorize
,
256 int ScheduleDimensions
,
260 isl_local_space
*LocalSpace
, *LocalSpaceRange
;
265 int PointDimension
; /* ip */
266 int TileDimension
; /* it */
267 isl_val
*VectorWidthMP
;
270 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
272 Space
= isl_space_alloc (ctx
, 0, ScheduleDimensions
, ScheduleDimensions
+ 1);
273 TilingMap
= isl_map_universe (isl_space_copy (Space
));
274 LocalSpace
= isl_local_space_from_space (Space
);
275 PointDimension
= ScheduleDimensions
;
276 TileDimension
= DimToVectorize
;
278 /* Create an identity map for everything except DimToVectorize and map
279 DimToVectorize to the point loop at the innermost dimension. */
280 for (i
= 0; i
< ScheduleDimensions
; i
++)
282 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
283 isl_constraint_set_coefficient_si (c
, isl_dim_in
, i
, -1);
285 if (i
== DimToVectorize
)
286 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
288 isl_constraint_set_coefficient_si (c
, isl_dim_out
, i
, 1);
290 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
293 /* it % 'VectorWidth' = 0 */
294 LocalSpaceRange
= isl_local_space_range (isl_local_space_copy (LocalSpace
));
295 Aff
= isl_aff_zero_on_domain (LocalSpaceRange
);
296 Aff
= isl_aff_set_constant_si (Aff
, VectorWidth
);
297 Aff
= isl_aff_set_coefficient_si (Aff
, isl_dim_in
, TileDimension
, 1);
299 VectorWidthMP
= isl_val_int_from_si (ctx
, VectorWidth
);
300 Aff
= isl_aff_mod_val (Aff
, VectorWidthMP
);
301 Modulo
= isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff
));
302 TilingMap
= isl_map_intersect_range (TilingMap
, Modulo
);
305 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
306 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, -1);
307 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
308 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
310 /* ip <= it + ('VectorWidth' - 1) */
311 c
= isl_inequality_alloc (LocalSpace
);
312 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, 1);
313 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, -1);
314 isl_constraint_set_constant_si (c
, VectorWidth
- 1);
315 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
320 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
321 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
322 corresponding option for AST build.
324 The map (for VectorWidth=4):
326 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
329 The image of this map is the separation class. The range of this map includes
330 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
334 getPrevectorMap_full (isl_ctx
*ctx
, int DimToVectorize
,
335 int ScheduleDimensions
,
339 isl_local_space
*LocalSpace
, *LocalSpaceRange
;
344 int PointDimension
; /* ip */
345 int TileDimension
; /* it */
346 isl_val
*VectorWidthMP
;
349 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
351 Space
= isl_space_alloc (ctx
, 0, ScheduleDimensions
, ScheduleDimensions
+ 1);
352 TilingMap
= isl_map_universe (isl_space_copy (Space
));
353 LocalSpace
= isl_local_space_from_space (Space
);
354 PointDimension
= ScheduleDimensions
;
355 TileDimension
= DimToVectorize
;
357 /* Create an identity map for everything except DimToVectorize and the
359 for (i
= 0; i
< ScheduleDimensions
; i
++)
361 if (i
== DimToVectorize
)
364 c
= isl_equality_alloc (isl_local_space_copy (LocalSpace
));
366 isl_constraint_set_coefficient_si (c
, isl_dim_in
, i
, -1);
367 isl_constraint_set_coefficient_si (c
, isl_dim_out
, i
, 1);
369 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
372 /* it % 'VectorWidth' = 0 */
373 LocalSpaceRange
= isl_local_space_range (isl_local_space_copy (LocalSpace
));
374 Aff
= isl_aff_zero_on_domain (LocalSpaceRange
);
375 Aff
= isl_aff_set_constant_si (Aff
, VectorWidth
);
376 Aff
= isl_aff_set_coefficient_si (Aff
, isl_dim_in
, TileDimension
, 1);
378 VectorWidthMP
= isl_val_int_from_si (ctx
, VectorWidth
);
379 Aff
= isl_aff_mod_val (Aff
, VectorWidthMP
);
380 Modulo
= isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff
));
381 TilingMap
= isl_map_intersect_range (TilingMap
, Modulo
);
383 /* it + ('VectorWidth' - 1) = i0 */
384 c
= isl_equality_alloc (isl_local_space_copy(LocalSpace
));
385 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
,-1);
386 isl_constraint_set_coefficient_si (c
, isl_dim_in
, TileDimension
, 1);
387 isl_constraint_set_constant_si (c
, -VectorWidth
+ 1);
388 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
391 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
392 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
393 isl_constraint_set_constant_si (c
, 0);
394 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
397 c
= isl_inequality_alloc (isl_local_space_copy (LocalSpace
));
398 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, -1);
399 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, 1);
400 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
402 /* ip <= it + ('VectorWidth' - 1) */
403 c
= isl_inequality_alloc (LocalSpace
);
404 isl_constraint_set_coefficient_si (c
, isl_dim_out
, TileDimension
, 1);
405 isl_constraint_set_coefficient_si (c
, isl_dim_out
, PointDimension
, -1);
406 isl_constraint_set_constant_si (c
, VectorWidth
- 1);
407 TilingMap
= isl_map_add_constraint (TilingMap
, c
);
412 static bool EnablePollyVector
= false;
414 /* getScheduleForBandList - Get the scheduling map for a list of bands.
416 We walk recursively the forest of bands to combine the schedules of the
417 individual bands to the overall schedule. In case tiling is requested,
418 the individual bands are tiled.
419 For unroll and jam the map the schedule for full tiles of the unrolled
420 dimnesion is computed. */
421 static isl_union_map
*
422 getScheduleForBandList (isl_band_list
*BandList
, isl_union_map
**map_sepcl
)
425 isl_union_map
*Schedule
;
428 ctx
= isl_band_list_get_ctx (BandList
);
429 NumBands
= isl_band_list_n_band (BandList
);
430 Schedule
= isl_union_map_empty (isl_space_params_alloc (ctx
, 0));
432 for (i
= 0; i
< NumBands
; i
++)
435 isl_union_map
*PartialSchedule
;
436 int ScheduleDimensions
;
439 isl_union_map
*PartialSchedule_f
;
441 Band
= isl_band_list_get_band (BandList
, i
);
442 PartialSchedule
= getScheduleForBand (Band
, &ScheduleDimensions
);
443 Space
= isl_union_map_get_space (PartialSchedule
);
445 PartialSchedule_f
= NULL
;
447 if (isl_band_has_children (Band
))
449 isl_band_list
*Children
;
450 isl_union_map
*SuffixSchedule
;
452 Children
= isl_band_get_children (Band
);
453 SuffixSchedule
= getScheduleForBandList (Children
, map_sepcl
);
454 PartialSchedule
= isl_union_map_flat_range_product (PartialSchedule
,
456 isl_band_list_free (Children
);
458 else if (EnablePollyVector
|| flag_loop_unroll_jam
)
463 depth
= PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH
);
465 for (i
= ScheduleDimensions
- 1 ; i
>= 0 ; i
--)
467 if (flag_loop_unroll_jam
&& (i
!= (ScheduleDimensions
- depth
)))
470 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
471 if (isl_band_member_is_coincident (Band
, i
))
473 if (isl_band_member_is_zero_distance (Band
, i
))
477 isl_union_map
*TileUMap
;
480 stride
= PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE
);
482 TileMap
= getPrevectorMap_full (ctx
, i
, ScheduleDimensions
,
484 TileUMap
= isl_union_map_from_map (TileMap
);
485 TileUMap
= isl_union_map_align_params
486 (TileUMap
, isl_space_copy (Space
));
487 PartialSchedule_f
= isl_union_map_apply_range
488 (isl_union_map_copy (PartialSchedule
), TileUMap
);
490 TileMap
= getPrevectorMap (ctx
, i
, ScheduleDimensions
, stride
);
491 TileUMap
= isl_union_map_from_map (TileMap
);
492 TileUMap
= isl_union_map_align_params
493 (TileUMap
, isl_space_copy (Space
));
494 PartialSchedule
= isl_union_map_apply_range
495 (PartialSchedule
, TileUMap
);
500 Schedule
= isl_union_map_union (Schedule
,
501 isl_union_map_copy(PartialSchedule
));
503 isl_band_free (Band
);
504 isl_space_free (Space
);
506 if (!flag_loop_unroll_jam
)
508 isl_union_map_free (PartialSchedule
);
512 if (PartialSchedule_f
)
514 *map_sepcl
= isl_union_map_union (*map_sepcl
, PartialSchedule_f
);
515 isl_union_map_free (PartialSchedule
);
518 *map_sepcl
= isl_union_map_union (*map_sepcl
, PartialSchedule
);
524 static isl_union_map
*
525 getScheduleMap (isl_schedule
*Schedule
, isl_union_map
**map_sepcl
)
527 isl_band_list
*BandList
= isl_schedule_get_band_forest (Schedule
);
528 isl_union_map
*ScheduleMap
= getScheduleForBandList (BandList
, map_sepcl
);
529 isl_band_list_free (BandList
);
534 getSingleMap (__isl_take isl_map
*map
, void *user
)
536 isl_map
**singleMap
= (isl_map
**) user
;
543 apply_schedule_map_to_scop (scop_p scop
, isl_union_map
*schedule_map
, bool sepcl
)
548 FOR_EACH_VEC_ELT (scop
->bbs
, i
, pbb
)
550 isl_set
*domain
= isl_set_copy (pbb
->domain
);
551 isl_union_map
*stmtBand
;
552 isl_map
*stmtSchedule
;
554 stmtBand
= isl_union_map_intersect_domain
555 (isl_union_map_copy (schedule_map
),
556 isl_union_set_from_set (domain
));
557 isl_union_map_foreach_map (stmtBand
, getSingleMap
, &stmtSchedule
);
561 isl_map_free (pbb
->transformed
);
562 pbb
->transformed
= stmtSchedule
;
565 pbb
->map_sepclass
= stmtSchedule
;
567 isl_union_map_free (stmtBand
);
571 static const int CONSTANT_BOUND
= 20;
574 optimize_isl (scop_p scop
)
577 isl_schedule
*schedule
;
578 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
579 isl_schedule_constraints
*schedule_constraints
;
581 isl_union_set
*domain
;
582 isl_union_map
*validity
, *proximity
, *dependences
;
583 isl_union_map
*schedule_map
;
584 isl_union_map
*schedule_map_f
;
586 domain
= scop_get_domains (scop
);
587 dependences
= scop_get_dependences (scop
);
588 dependences
= isl_union_map_gist_domain (dependences
,
589 isl_union_set_copy (domain
));
590 dependences
= isl_union_map_gist_range (dependences
,
591 isl_union_set_copy (domain
));
592 validity
= dependences
;
594 proximity
= isl_union_map_copy (validity
);
596 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
597 schedule_constraints
= isl_schedule_constraints_on_domain (domain
);
599 = isl_schedule_constraints_set_proximity (schedule_constraints
,
602 = isl_schedule_constraints_set_validity (schedule_constraints
,
603 isl_union_map_copy (validity
));
605 = isl_schedule_constraints_set_coincidence (schedule_constraints
,
609 isl_options_set_schedule_max_constant_term (scop
->ctx
, CONSTANT_BOUND
);
610 isl_options_set_schedule_maximize_band_depth (scop
->ctx
, 1);
611 isl_options_set_schedule_fuse (scop
->ctx
, ISL_SCHEDULE_FUSE_MIN
);
612 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_CONTINUE
);
614 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
615 schedule
= isl_schedule_constraints_compute_schedule(schedule_constraints
);
617 schedule
= isl_union_set_compute_schedule (domain
, validity
, proximity
);
620 isl_options_set_on_error (scop
->ctx
, ISL_ON_ERROR_ABORT
);
625 schedule_map_f
= isl_union_map_empty (isl_space_params_alloc (scop
->ctx
, 0));
626 schedule_map
= getScheduleMap (schedule
, &schedule_map_f
);
628 apply_schedule_map_to_scop (scop
, schedule_map
, false);
629 if (!isl_union_map_is_empty (schedule_map_f
))
630 apply_schedule_map_to_scop (scop
, schedule_map_f
, true);
631 isl_union_map_free (schedule_map_f
);
633 isl_schedule_free (schedule
);
634 isl_union_map_free (schedule_map
);