PR c/64423
[official-gcc.git] / gcc / graphite-optimize-isl.c
blob4cce70093f10082712083c22a7ca2441581ace93
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_isl
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tree.h"
36 #include "predict.h"
37 #include "vec.h"
38 #include "hashtab.h"
39 #include "hash-set.h"
40 #include "machmode.h"
41 #include "tm.h"
42 #include "hard-reg-set.h"
43 #include "input.h"
44 #include "function.h"
45 #include "dominance.h"
46 #include "cfg.h"
47 #include "basic-block.h"
48 #include "tree-ssa-alias.h"
49 #include "internal-fn.h"
50 #include "gimple-expr.h"
51 #include "is-a.h"
52 #include "gimple.h"
53 #include "gimple-iterator.h"
54 #include "tree-ssa-loop.h"
55 #include "dumpfile.h"
56 #include "cfgloop.h"
57 #include "tree-chrec.h"
58 #include "tree-data-ref.h"
59 #include "tree-scalar-evolution.h"
60 #include "sese.h"
62 #ifdef HAVE_isl
63 #include "graphite-poly.h"
65 static isl_union_set *
66 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
68 int i;
69 poly_bb_p pbb;
70 isl_space *space = isl_set_get_space (scop->context);
71 isl_union_set *res = isl_union_set_empty (space);
73 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
74 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
76 return res;
79 /* getTileMap - Create a map that describes a n-dimensonal tiling.
81 getTileMap creates a map from a n-dimensional scattering space into an
82 2*n-dimensional scattering space. The map describes a rectangular tiling.
84 Example:
85 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
87 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
88 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
89 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
91 Before tiling:
93 for (i = 0; i < N; i++)
94 for (j = 0; j < M; j++)
95 S(i,j)
97 After tiling:
99 for (t_i = 0; t_i < N; i+=32)
100 for (t_j = 0; t_j < M; j+=32)
101 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
102 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
103 S(i,j)
106 static isl_basic_map *
107 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
109 int x;
110 /* We construct
112 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
113 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
114 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
116 and project out the auxilary dimensions a0 and a1. */
117 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
118 scheduleDimensions * 3);
119 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
121 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
123 for (x = 0; x < scheduleDimensions; x++)
125 int sX = x;
126 int tX = x;
127 int pX = scheduleDimensions + x;
128 int aX = 2 * scheduleDimensions + x;
130 isl_constraint *c;
132 /* sX = aX * tileSize; */
133 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
134 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
135 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
136 tileMap = isl_basic_map_add_constraint (tileMap, c);
138 /* pX = sX; */
139 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
140 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
141 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
142 tileMap = isl_basic_map_add_constraint (tileMap, c);
144 /* tX <= pX */
145 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
146 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
147 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
148 tileMap = isl_basic_map_add_constraint (tileMap, c);
150 /* pX <= tX + (tileSize - 1) */
151 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
152 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
153 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
154 isl_constraint_set_constant_si (c, tileSize - 1);
155 tileMap = isl_basic_map_add_constraint (tileMap, c);
158 /* Project out auxiliary dimensions.
160 The auxiliary dimensions are transformed into existentially quantified ones.
161 This reduces the number of visible scattering dimensions and allows Cloog
162 to produces better code. */
163 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
164 2 * scheduleDimensions,
165 scheduleDimensions);
166 isl_local_space_free (LocalSpace);
167 return tileMap;
170 /* getScheduleForBand - Get the schedule for this band.
172 Polly applies transformations like tiling on top of the isl calculated value.
173 This can influence the number of scheduling dimension. The number of
174 schedule dimensions is returned in the parameter 'Dimension'. */
175 static bool DisableTiling = false;
177 static isl_union_map *
178 getScheduleForBand (isl_band *Band, int *Dimensions)
180 isl_union_map *PartialSchedule;
181 isl_ctx *ctx;
182 isl_space *Space;
183 isl_basic_map *TileMap;
184 isl_union_map *TileUMap;
186 PartialSchedule = isl_band_get_partial_schedule (Band);
187 *Dimensions = isl_band_n_member (Band);
189 if (DisableTiling || flag_loop_unroll_jam)
190 return PartialSchedule;
192 /* It does not make any sense to tile a band with just one dimension. */
193 if (*Dimensions == 1)
194 return PartialSchedule;
196 ctx = isl_union_map_get_ctx (PartialSchedule);
197 Space = isl_union_map_get_space (PartialSchedule);
199 TileMap = getTileMap (ctx, *Dimensions, 32);
200 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
201 TileUMap = isl_union_map_align_params (TileUMap, Space);
202 *Dimensions = 2 * *Dimensions;
204 return isl_union_map_apply_range (PartialSchedule, TileUMap);
207 /* Create a map that pre-vectorizes one scheduling dimension.
209 getPrevectorMap creates a map that maps each input dimension to the same
210 output dimension, except for the dimension DimToVectorize. DimToVectorize is
211 strip mined by 'VectorWidth' and the newly created point loop of
212 DimToVectorize is moved to the innermost level.
214 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
216 | Before transformation
218 | A[i,j] -> [i,j]
220 | for (i = 0; i < 128; i++)
221 | for (j = 0; j < 128; j++)
222 | A(i,j);
224 Prevector map:
225 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
227 | After transformation:
229 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
231 | for (it = 0; it < 128; it+=4)
232 | for (j = 0; j < 128; j++)
233 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
234 | A(ip,j);
236 The goal of this transformation is to create a trivially vectorizable loop.
237 This means a parallel loop at the innermost level that has a constant number
238 of iterations corresponding to the target vector width.
240 This transformation creates a loop at the innermost level. The loop has a
241 constant number of iterations, if the number of loop iterations at
242 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
243 currently constant and not yet target specific. This function does not reason
244 about parallelism.
247 static isl_map *
248 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
249 int ScheduleDimensions,
250 int VectorWidth)
252 isl_space *Space;
253 isl_local_space *LocalSpace, *LocalSpaceRange;
254 isl_set *Modulo;
255 isl_map *TilingMap;
256 isl_constraint *c;
257 isl_aff *Aff;
258 int PointDimension; /* ip */
259 int TileDimension; /* it */
260 isl_val *VectorWidthMP;
261 int i;
263 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
265 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
266 TilingMap = isl_map_universe (isl_space_copy (Space));
267 LocalSpace = isl_local_space_from_space (Space);
268 PointDimension = ScheduleDimensions;
269 TileDimension = DimToVectorize;
271 /* Create an identity map for everything except DimToVectorize and map
272 DimToVectorize to the point loop at the innermost dimension. */
273 for (i = 0; i < ScheduleDimensions; i++)
275 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
276 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
278 if (i == DimToVectorize)
279 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
280 else
281 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
283 TilingMap = isl_map_add_constraint (TilingMap, c);
286 /* it % 'VectorWidth' = 0 */
287 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
288 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
289 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
290 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
292 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
293 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
294 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
295 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
297 /* it <= ip */
298 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
299 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
300 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
301 TilingMap = isl_map_add_constraint (TilingMap, c);
303 /* ip <= it + ('VectorWidth' - 1) */
304 c = isl_inequality_alloc (LocalSpace);
305 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
306 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
307 isl_constraint_set_constant_si (c, VectorWidth - 1);
308 TilingMap = isl_map_add_constraint (TilingMap, c);
310 return TilingMap;
313 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
314 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
315 corresponding option for AST build.
317 The map (for VectorWidth=4):
319 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
320 ip >= 0
322 The image of this map is the separation class. The range of this map includes
323 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
326 static isl_map *
327 getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
328 int ScheduleDimensions,
329 int VectorWidth)
331 isl_space *Space;
332 isl_local_space *LocalSpace, *LocalSpaceRange;
333 isl_set *Modulo;
334 isl_map *TilingMap;
335 isl_constraint *c;
336 isl_aff *Aff;
337 int PointDimension; /* ip */
338 int TileDimension; /* it */
339 isl_val *VectorWidthMP;
340 int i;
342 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
344 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
345 TilingMap = isl_map_universe (isl_space_copy (Space));
346 LocalSpace = isl_local_space_from_space (Space);
347 PointDimension = ScheduleDimensions;
348 TileDimension = DimToVectorize;
350 /* Create an identity map for everything except DimToVectorize and the
351 point loop. */
352 for (i = 0; i < ScheduleDimensions; i++)
354 if (i == DimToVectorize)
355 continue;
357 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
359 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
360 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
362 TilingMap = isl_map_add_constraint (TilingMap, c);
365 /* it % 'VectorWidth' = 0 */
366 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
367 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
368 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
369 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
371 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
372 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
373 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
374 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
376 /* it + ('VectorWidth' - 1) = i0 */
377 c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
378 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
379 isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
380 isl_constraint_set_constant_si (c, -VectorWidth + 1);
381 TilingMap = isl_map_add_constraint (TilingMap, c);
383 /* ip >= 0 */
384 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
385 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
386 isl_constraint_set_constant_si (c, 0);
387 TilingMap = isl_map_add_constraint (TilingMap, c);
389 /* it <= ip */
390 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
391 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
392 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
393 TilingMap = isl_map_add_constraint (TilingMap, c);
395 /* ip <= it + ('VectorWidth' - 1) */
396 c = isl_inequality_alloc (LocalSpace);
397 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
398 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
399 isl_constraint_set_constant_si (c, VectorWidth - 1);
400 TilingMap = isl_map_add_constraint (TilingMap, c);
402 return TilingMap;
405 static bool EnablePollyVector = false;
407 /* getScheduleForBandList - Get the scheduling map for a list of bands.
409 We walk recursively the forest of bands to combine the schedules of the
410 individual bands to the overall schedule. In case tiling is requested,
411 the individual bands are tiled.
412 For unroll and jam the map the schedule for full tiles of the unrolled
413 dimnesion is computed. */
414 static isl_union_map *
415 getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
417 int NumBands, i;
418 isl_union_map *Schedule;
419 isl_ctx *ctx;
421 ctx = isl_band_list_get_ctx (BandList);
422 NumBands = isl_band_list_n_band (BandList);
423 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
425 for (i = 0; i < NumBands; i++)
427 isl_band *Band;
428 isl_union_map *PartialSchedule;
429 int ScheduleDimensions;
430 isl_space *Space;
432 isl_union_map *PartialSchedule_f;
434 Band = isl_band_list_get_band (BandList, i);
435 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
436 Space = isl_union_map_get_space (PartialSchedule);
438 PartialSchedule_f = NULL;
440 if (isl_band_has_children (Band))
442 isl_band_list *Children;
443 isl_union_map *SuffixSchedule;
445 Children = isl_band_get_children (Band);
446 SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
447 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
448 SuffixSchedule);
449 isl_band_list_free (Children);
451 else if (EnablePollyVector || flag_loop_unroll_jam)
453 int i;
454 int depth;
456 depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
458 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
460 if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
461 continue;
463 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
464 if (isl_band_member_is_coincident (Band, i))
465 #else
466 if (isl_band_member_is_zero_distance (Band, i))
467 #endif
469 isl_map *TileMap;
470 isl_union_map *TileUMap;
471 int stride;
473 stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
475 TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions,
476 stride);
477 TileUMap = isl_union_map_from_map (TileMap);
478 TileUMap = isl_union_map_align_params
479 (TileUMap, isl_space_copy (Space));
480 PartialSchedule_f = isl_union_map_apply_range
481 (isl_union_map_copy (PartialSchedule), TileUMap);
483 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
484 TileUMap = isl_union_map_from_map (TileMap);
485 TileUMap = isl_union_map_align_params
486 (TileUMap, isl_space_copy (Space));
487 PartialSchedule = isl_union_map_apply_range
488 (PartialSchedule, TileUMap);
489 break;
493 Schedule = isl_union_map_union (Schedule,
494 isl_union_map_copy(PartialSchedule));
496 isl_band_free (Band);
497 isl_space_free (Space);
499 if (!flag_loop_unroll_jam)
501 isl_union_map_free (PartialSchedule);
502 continue;
505 if (PartialSchedule_f)
507 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule_f);
508 isl_union_map_free (PartialSchedule);
510 else
511 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule);
514 return Schedule;
517 static isl_union_map *
518 getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
520 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
521 isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
522 isl_band_list_free (BandList);
523 return ScheduleMap;
526 static int
527 getSingleMap (__isl_take isl_map *map, void *user)
529 isl_map **singleMap = (isl_map **) user;
530 *singleMap = map;
532 return 0;
535 static void
536 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
538 int i;
539 poly_bb_p pbb;
541 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
543 isl_set *domain = isl_set_copy (pbb->domain);
544 isl_union_map *stmtBand;
545 isl_map *stmtSchedule;
547 stmtBand = isl_union_map_intersect_domain
548 (isl_union_map_copy (schedule_map),
549 isl_union_set_from_set (domain));
550 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
552 if (!sepcl)
554 isl_map_free (pbb->transformed);
555 pbb->transformed = stmtSchedule;
557 else
558 pbb->map_sepclass = stmtSchedule;
560 isl_union_map_free (stmtBand);
564 static const int CONSTANT_BOUND = 20;
566 bool
567 optimize_isl (scop_p scop)
570 isl_schedule *schedule;
571 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
572 isl_schedule_constraints *schedule_constraints;
573 #endif
574 isl_union_set *domain;
575 isl_union_map *validity, *proximity, *dependences;
576 isl_union_map *schedule_map;
577 isl_union_map *schedule_map_f;
579 domain = scop_get_domains (scop);
580 dependences = scop_get_dependences (scop);
581 dependences = isl_union_map_gist_domain (dependences,
582 isl_union_set_copy (domain));
583 dependences = isl_union_map_gist_range (dependences,
584 isl_union_set_copy (domain));
585 validity = dependences;
587 proximity = isl_union_map_copy (validity);
589 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
590 schedule_constraints = isl_schedule_constraints_on_domain (domain);
591 schedule_constraints
592 = isl_schedule_constraints_set_proximity (schedule_constraints,
593 proximity);
594 schedule_constraints
595 = isl_schedule_constraints_set_validity (schedule_constraints,
596 isl_union_map_copy (validity));
597 schedule_constraints
598 = isl_schedule_constraints_set_coincidence (schedule_constraints,
599 validity);
600 #endif
602 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
603 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
604 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
605 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
607 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
608 schedule = isl_schedule_constraints_compute_schedule(schedule_constraints);
609 #else
610 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
611 #endif
613 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
615 if (!schedule)
616 return false;
618 schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
619 schedule_map = getScheduleMap (schedule, &schedule_map_f);
621 apply_schedule_map_to_scop (scop, schedule_map, false);
622 if (!isl_union_map_is_empty (schedule_map_f))
623 apply_schedule_map_to_scop (scop, schedule_map_f, true);
624 isl_union_map_free (schedule_map_f);
626 isl_schedule_free (schedule);
627 isl_union_map_free (schedule_map);
629 return true;
632 #endif