PR tree-optimization/66718
[official-gcc.git] / gcc / graphite-optimize-isl.c
blobb87e645dd57a1f4cb16635b86d08261edc40b564
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_isl
24 /* Workaround for GMP 5.1.3 bug, see PR56019. */
25 #include <stddef.h>
27 #include <isl/set.h>
28 #include <isl/map.h>
29 #include <isl/union_map.h>
30 #include <isl/schedule.h>
31 #include <isl/band.h>
32 #include <isl/aff.h>
33 #include <isl/options.h>
34 #endif
36 #include "system.h"
37 #include "coretypes.h"
38 #include "alias.h"
39 #include "backend.h"
40 #include "tree.h"
41 #include "gimple.h"
42 #include "hard-reg-set.h"
43 #include "options.h"
44 #include "fold-const.h"
45 #include "internal-fn.h"
46 #include "gimple-iterator.h"
47 #include "tree-ssa-loop.h"
48 #include "dumpfile.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "sese.h"
55 #ifdef HAVE_isl
56 #include "graphite-poly.h"
58 static isl_union_set *
59 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
61 int i;
62 poly_bb_p pbb;
63 isl_space *space = isl_set_get_space (scop->context);
64 isl_union_set *res = isl_union_set_empty (space);
66 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
67 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
69 return res;
72 /* getTileMap - Create a map that describes a n-dimensonal tiling.
74 getTileMap creates a map from a n-dimensional scattering space into an
75 2*n-dimensional scattering space. The map describes a rectangular tiling.
77 Example:
78 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
80 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
81 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
82 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
84 Before tiling:
86 for (i = 0; i < N; i++)
87 for (j = 0; j < M; j++)
88 S(i,j)
90 After tiling:
92 for (t_i = 0; t_i < N; i+=32)
93 for (t_j = 0; t_j < M; j+=32)
94 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
95 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
96 S(i,j)
99 static isl_basic_map *
100 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
102 int x;
103 /* We construct
105 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
106 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
107 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
109 and project out the auxilary dimensions a0 and a1. */
110 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
111 scheduleDimensions * 3);
112 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
114 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
116 for (x = 0; x < scheduleDimensions; x++)
118 int sX = x;
119 int tX = x;
120 int pX = scheduleDimensions + x;
121 int aX = 2 * scheduleDimensions + x;
123 isl_constraint *c;
125 /* sX = aX * tileSize; */
126 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
127 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
128 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
129 tileMap = isl_basic_map_add_constraint (tileMap, c);
131 /* pX = sX; */
132 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
133 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
134 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
135 tileMap = isl_basic_map_add_constraint (tileMap, c);
137 /* tX <= pX */
138 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
139 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
140 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
141 tileMap = isl_basic_map_add_constraint (tileMap, c);
143 /* pX <= tX + (tileSize - 1) */
144 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
145 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
146 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
147 isl_constraint_set_constant_si (c, tileSize - 1);
148 tileMap = isl_basic_map_add_constraint (tileMap, c);
151 /* Project out auxiliary dimensions.
153 The auxiliary dimensions are transformed into existentially quantified ones.
154 This reduces the number of visible scattering dimensions and allows Cloog
155 to produces better code. */
156 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
157 2 * scheduleDimensions,
158 scheduleDimensions);
159 isl_local_space_free (LocalSpace);
160 return tileMap;
163 /* getScheduleForBand - Get the schedule for this band.
165 Polly applies transformations like tiling on top of the isl calculated value.
166 This can influence the number of scheduling dimension. The number of
167 schedule dimensions is returned in the parameter 'Dimension'. */
168 static bool DisableTiling = false;
170 static isl_union_map *
171 getScheduleForBand (isl_band *Band, int *Dimensions)
173 isl_union_map *PartialSchedule;
174 isl_ctx *ctx;
175 isl_space *Space;
176 isl_basic_map *TileMap;
177 isl_union_map *TileUMap;
179 PartialSchedule = isl_band_get_partial_schedule (Band);
180 *Dimensions = isl_band_n_member (Band);
182 if (DisableTiling || flag_loop_unroll_jam)
183 return PartialSchedule;
185 /* It does not make any sense to tile a band with just one dimension. */
186 if (*Dimensions == 1)
187 return PartialSchedule;
189 ctx = isl_union_map_get_ctx (PartialSchedule);
190 Space = isl_union_map_get_space (PartialSchedule);
192 TileMap = getTileMap (ctx, *Dimensions, 32);
193 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
194 TileUMap = isl_union_map_align_params (TileUMap, Space);
195 *Dimensions = 2 * *Dimensions;
197 return isl_union_map_apply_range (PartialSchedule, TileUMap);
200 /* Create a map that pre-vectorizes one scheduling dimension.
202 getPrevectorMap creates a map that maps each input dimension to the same
203 output dimension, except for the dimension DimToVectorize. DimToVectorize is
204 strip mined by 'VectorWidth' and the newly created point loop of
205 DimToVectorize is moved to the innermost level.
207 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
209 | Before transformation
211 | A[i,j] -> [i,j]
213 | for (i = 0; i < 128; i++)
214 | for (j = 0; j < 128; j++)
215 | A(i,j);
217 Prevector map:
218 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
220 | After transformation:
222 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
224 | for (it = 0; it < 128; it+=4)
225 | for (j = 0; j < 128; j++)
226 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
227 | A(ip,j);
229 The goal of this transformation is to create a trivially vectorizable loop.
230 This means a parallel loop at the innermost level that has a constant number
231 of iterations corresponding to the target vector width.
233 This transformation creates a loop at the innermost level. The loop has a
234 constant number of iterations, if the number of loop iterations at
235 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
236 currently constant and not yet target specific. This function does not reason
237 about parallelism.
240 static isl_map *
241 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
242 int ScheduleDimensions,
243 int VectorWidth)
245 isl_space *Space;
246 isl_local_space *LocalSpace, *LocalSpaceRange;
247 isl_set *Modulo;
248 isl_map *TilingMap;
249 isl_constraint *c;
250 isl_aff *Aff;
251 int PointDimension; /* ip */
252 int TileDimension; /* it */
253 isl_val *VectorWidthMP;
254 int i;
256 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
258 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
259 TilingMap = isl_map_universe (isl_space_copy (Space));
260 LocalSpace = isl_local_space_from_space (Space);
261 PointDimension = ScheduleDimensions;
262 TileDimension = DimToVectorize;
264 /* Create an identity map for everything except DimToVectorize and map
265 DimToVectorize to the point loop at the innermost dimension. */
266 for (i = 0; i < ScheduleDimensions; i++)
268 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
269 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
271 if (i == DimToVectorize)
272 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
273 else
274 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
276 TilingMap = isl_map_add_constraint (TilingMap, c);
279 /* it % 'VectorWidth' = 0 */
280 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
281 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
282 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
283 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
285 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
286 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
287 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
288 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
290 /* it <= ip */
291 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
292 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
293 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
294 TilingMap = isl_map_add_constraint (TilingMap, c);
296 /* ip <= it + ('VectorWidth' - 1) */
297 c = isl_inequality_alloc (LocalSpace);
298 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
299 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
300 isl_constraint_set_constant_si (c, VectorWidth - 1);
301 TilingMap = isl_map_add_constraint (TilingMap, c);
303 return TilingMap;
306 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
307 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
308 corresponding option for AST build.
310 The map (for VectorWidth=4):
312 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
313 ip >= 0
315 The image of this map is the separation class. The range of this map includes
316 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
319 static isl_map *
320 getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
321 int ScheduleDimensions,
322 int VectorWidth)
324 isl_space *Space;
325 isl_local_space *LocalSpace, *LocalSpaceRange;
326 isl_set *Modulo;
327 isl_map *TilingMap;
328 isl_constraint *c;
329 isl_aff *Aff;
330 int PointDimension; /* ip */
331 int TileDimension; /* it */
332 isl_val *VectorWidthMP;
333 int i;
335 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
337 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
338 TilingMap = isl_map_universe (isl_space_copy (Space));
339 LocalSpace = isl_local_space_from_space (Space);
340 PointDimension = ScheduleDimensions;
341 TileDimension = DimToVectorize;
343 /* Create an identity map for everything except DimToVectorize and the
344 point loop. */
345 for (i = 0; i < ScheduleDimensions; i++)
347 if (i == DimToVectorize)
348 continue;
350 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
352 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
353 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
355 TilingMap = isl_map_add_constraint (TilingMap, c);
358 /* it % 'VectorWidth' = 0 */
359 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
360 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
361 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
362 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
364 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
365 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
366 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
367 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
369 /* it + ('VectorWidth' - 1) = i0 */
370 c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
371 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
372 isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
373 isl_constraint_set_constant_si (c, -VectorWidth + 1);
374 TilingMap = isl_map_add_constraint (TilingMap, c);
376 /* ip >= 0 */
377 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
378 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
379 isl_constraint_set_constant_si (c, 0);
380 TilingMap = isl_map_add_constraint (TilingMap, c);
382 /* it <= ip */
383 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
384 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
385 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
386 TilingMap = isl_map_add_constraint (TilingMap, c);
388 /* ip <= it + ('VectorWidth' - 1) */
389 c = isl_inequality_alloc (LocalSpace);
390 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
391 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
392 isl_constraint_set_constant_si (c, VectorWidth - 1);
393 TilingMap = isl_map_add_constraint (TilingMap, c);
395 return TilingMap;
398 static bool EnablePollyVector = false;
400 /* getScheduleForBandList - Get the scheduling map for a list of bands.
402 We walk recursively the forest of bands to combine the schedules of the
403 individual bands to the overall schedule. In case tiling is requested,
404 the individual bands are tiled.
405 For unroll and jam the map the schedule for full tiles of the unrolled
406 dimnesion is computed. */
407 static isl_union_map *
408 getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
410 int NumBands, i;
411 isl_union_map *Schedule;
412 isl_ctx *ctx;
414 ctx = isl_band_list_get_ctx (BandList);
415 NumBands = isl_band_list_n_band (BandList);
416 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
418 for (i = 0; i < NumBands; i++)
420 isl_band *Band;
421 isl_union_map *PartialSchedule;
422 int ScheduleDimensions;
423 isl_space *Space;
425 isl_union_map *PartialSchedule_f;
427 Band = isl_band_list_get_band (BandList, i);
428 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
429 Space = isl_union_map_get_space (PartialSchedule);
431 PartialSchedule_f = NULL;
433 if (isl_band_has_children (Band))
435 isl_band_list *Children;
436 isl_union_map *SuffixSchedule;
438 Children = isl_band_get_children (Band);
439 SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
440 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
441 SuffixSchedule);
442 isl_band_list_free (Children);
444 else if (EnablePollyVector || flag_loop_unroll_jam)
446 int i;
447 int depth;
449 depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
451 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
453 if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
454 continue;
456 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
457 if (isl_band_member_is_coincident (Band, i))
458 #else
459 if (isl_band_member_is_zero_distance (Band, i))
460 #endif
462 isl_map *TileMap;
463 isl_union_map *TileUMap;
464 int stride;
466 stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
468 TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions,
469 stride);
470 TileUMap = isl_union_map_from_map (TileMap);
471 TileUMap = isl_union_map_align_params
472 (TileUMap, isl_space_copy (Space));
473 PartialSchedule_f = isl_union_map_apply_range
474 (isl_union_map_copy (PartialSchedule), TileUMap);
476 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
477 TileUMap = isl_union_map_from_map (TileMap);
478 TileUMap = isl_union_map_align_params
479 (TileUMap, isl_space_copy (Space));
480 PartialSchedule = isl_union_map_apply_range
481 (PartialSchedule, TileUMap);
482 break;
486 Schedule = isl_union_map_union (Schedule,
487 isl_union_map_copy(PartialSchedule));
489 isl_band_free (Band);
490 isl_space_free (Space);
492 if (!flag_loop_unroll_jam)
494 isl_union_map_free (PartialSchedule);
495 continue;
498 if (PartialSchedule_f)
500 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule_f);
501 isl_union_map_free (PartialSchedule);
503 else
504 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule);
507 return Schedule;
510 static isl_union_map *
511 getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
513 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
514 isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
515 isl_band_list_free (BandList);
516 return ScheduleMap;
519 static int
520 getSingleMap (__isl_take isl_map *map, void *user)
522 isl_map **singleMap = (isl_map **) user;
523 *singleMap = map;
525 return 0;
528 static void
529 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
531 int i;
532 poly_bb_p pbb;
534 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
536 isl_set *domain = isl_set_copy (pbb->domain);
537 isl_union_map *stmtBand;
538 isl_map *stmtSchedule;
540 stmtBand = isl_union_map_intersect_domain
541 (isl_union_map_copy (schedule_map),
542 isl_union_set_from_set (domain));
543 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
545 if (!sepcl)
547 isl_map_free (pbb->transformed);
548 pbb->transformed = stmtSchedule;
550 else
551 pbb->map_sepclass = stmtSchedule;
553 isl_union_map_free (stmtBand);
557 static const int CONSTANT_BOUND = 20;
559 bool
560 optimize_isl (scop_p scop)
563 isl_schedule *schedule;
564 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
565 isl_schedule_constraints *schedule_constraints;
566 #endif
567 isl_union_set *domain;
568 isl_union_map *validity, *proximity, *dependences;
569 isl_union_map *schedule_map;
570 isl_union_map *schedule_map_f;
572 domain = scop_get_domains (scop);
573 dependences = scop_get_dependences (scop);
574 dependences = isl_union_map_gist_domain (dependences,
575 isl_union_set_copy (domain));
576 dependences = isl_union_map_gist_range (dependences,
577 isl_union_set_copy (domain));
578 validity = dependences;
580 proximity = isl_union_map_copy (validity);
582 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
583 schedule_constraints = isl_schedule_constraints_on_domain (domain);
584 schedule_constraints
585 = isl_schedule_constraints_set_proximity (schedule_constraints,
586 proximity);
587 schedule_constraints
588 = isl_schedule_constraints_set_validity (schedule_constraints,
589 isl_union_map_copy (validity));
590 schedule_constraints
591 = isl_schedule_constraints_set_coincidence (schedule_constraints,
592 validity);
593 #endif
595 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
596 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
597 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
598 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
600 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
601 schedule = isl_schedule_constraints_compute_schedule(schedule_constraints);
602 #else
603 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
604 #endif
606 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
608 if (!schedule)
609 return false;
611 schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
612 schedule_map = getScheduleMap (schedule, &schedule_map_f);
614 apply_schedule_map_to_scop (scop, schedule_map, false);
615 if (!isl_union_map_is_empty (schedule_map_f))
616 apply_schedule_map_to_scop (scop, schedule_map_f, true);
617 isl_union_map_free (schedule_map_f);
619 isl_schedule_free (schedule);
620 isl_union_map_free (schedule_map);
622 return true;
625 #endif