2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / graphite-optimize-isl.c
blob74602c71473f8d11c4af8d1564362f293ca5603a
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_isl
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
33 #include "system.h"
34 #include "coretypes.h"
35 #include "input.h"
36 #include "alias.h"
37 #include "symtab.h"
38 #include "options.h"
39 #include "tree.h"
40 #include "fold-const.h"
41 #include "predict.h"
42 #include "tm.h"
43 #include "hard-reg-set.h"
44 #include "input.h"
45 #include "function.h"
46 #include "dominance.h"
47 #include "cfg.h"
48 #include "basic-block.h"
49 #include "tree-ssa-alias.h"
50 #include "internal-fn.h"
51 #include "gimple-expr.h"
52 #include "is-a.h"
53 #include "gimple.h"
54 #include "gimple-iterator.h"
55 #include "tree-ssa-loop.h"
56 #include "dumpfile.h"
57 #include "cfgloop.h"
58 #include "tree-chrec.h"
59 #include "tree-data-ref.h"
60 #include "tree-scalar-evolution.h"
61 #include "sese.h"
63 #ifdef HAVE_isl
64 #include "graphite-poly.h"
66 static isl_union_set *
67 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
69 int i;
70 poly_bb_p pbb;
71 isl_space *space = isl_set_get_space (scop->context);
72 isl_union_set *res = isl_union_set_empty (space);
74 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
75 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
77 return res;
80 /* getTileMap - Create a map that describes a n-dimensonal tiling.
82 getTileMap creates a map from a n-dimensional scattering space into an
83 2*n-dimensional scattering space. The map describes a rectangular tiling.
85 Example:
86 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
88 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
89 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
90 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
92 Before tiling:
94 for (i = 0; i < N; i++)
95 for (j = 0; j < M; j++)
96 S(i,j)
98 After tiling:
100 for (t_i = 0; t_i < N; i+=32)
101 for (t_j = 0; t_j < M; j+=32)
102 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
103 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
104 S(i,j)
107 static isl_basic_map *
108 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
110 int x;
111 /* We construct
113 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
114 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
115 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
117 and project out the auxilary dimensions a0 and a1. */
118 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
119 scheduleDimensions * 3);
120 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
122 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
124 for (x = 0; x < scheduleDimensions; x++)
126 int sX = x;
127 int tX = x;
128 int pX = scheduleDimensions + x;
129 int aX = 2 * scheduleDimensions + x;
131 isl_constraint *c;
133 /* sX = aX * tileSize; */
134 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
135 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
136 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
137 tileMap = isl_basic_map_add_constraint (tileMap, c);
139 /* pX = sX; */
140 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
141 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
142 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
143 tileMap = isl_basic_map_add_constraint (tileMap, c);
145 /* tX <= pX */
146 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
147 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
148 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
149 tileMap = isl_basic_map_add_constraint (tileMap, c);
151 /* pX <= tX + (tileSize - 1) */
152 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
153 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
154 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
155 isl_constraint_set_constant_si (c, tileSize - 1);
156 tileMap = isl_basic_map_add_constraint (tileMap, c);
159 /* Project out auxiliary dimensions.
161 The auxiliary dimensions are transformed into existentially quantified ones.
162 This reduces the number of visible scattering dimensions and allows Cloog
163 to produces better code. */
164 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
165 2 * scheduleDimensions,
166 scheduleDimensions);
167 isl_local_space_free (LocalSpace);
168 return tileMap;
171 /* getScheduleForBand - Get the schedule for this band.
173 Polly applies transformations like tiling on top of the isl calculated value.
174 This can influence the number of scheduling dimension. The number of
175 schedule dimensions is returned in the parameter 'Dimension'. */
176 static bool DisableTiling = false;
178 static isl_union_map *
179 getScheduleForBand (isl_band *Band, int *Dimensions)
181 isl_union_map *PartialSchedule;
182 isl_ctx *ctx;
183 isl_space *Space;
184 isl_basic_map *TileMap;
185 isl_union_map *TileUMap;
187 PartialSchedule = isl_band_get_partial_schedule (Band);
188 *Dimensions = isl_band_n_member (Band);
190 if (DisableTiling || flag_loop_unroll_jam)
191 return PartialSchedule;
193 /* It does not make any sense to tile a band with just one dimension. */
194 if (*Dimensions == 1)
195 return PartialSchedule;
197 ctx = isl_union_map_get_ctx (PartialSchedule);
198 Space = isl_union_map_get_space (PartialSchedule);
200 TileMap = getTileMap (ctx, *Dimensions, 32);
201 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
202 TileUMap = isl_union_map_align_params (TileUMap, Space);
203 *Dimensions = 2 * *Dimensions;
205 return isl_union_map_apply_range (PartialSchedule, TileUMap);
208 /* Create a map that pre-vectorizes one scheduling dimension.
210 getPrevectorMap creates a map that maps each input dimension to the same
211 output dimension, except for the dimension DimToVectorize. DimToVectorize is
212 strip mined by 'VectorWidth' and the newly created point loop of
213 DimToVectorize is moved to the innermost level.
215 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
217 | Before transformation
219 | A[i,j] -> [i,j]
221 | for (i = 0; i < 128; i++)
222 | for (j = 0; j < 128; j++)
223 | A(i,j);
225 Prevector map:
226 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
228 | After transformation:
230 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
232 | for (it = 0; it < 128; it+=4)
233 | for (j = 0; j < 128; j++)
234 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
235 | A(ip,j);
237 The goal of this transformation is to create a trivially vectorizable loop.
238 This means a parallel loop at the innermost level that has a constant number
239 of iterations corresponding to the target vector width.
241 This transformation creates a loop at the innermost level. The loop has a
242 constant number of iterations, if the number of loop iterations at
243 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
244 currently constant and not yet target specific. This function does not reason
245 about parallelism.
248 static isl_map *
249 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
250 int ScheduleDimensions,
251 int VectorWidth)
253 isl_space *Space;
254 isl_local_space *LocalSpace, *LocalSpaceRange;
255 isl_set *Modulo;
256 isl_map *TilingMap;
257 isl_constraint *c;
258 isl_aff *Aff;
259 int PointDimension; /* ip */
260 int TileDimension; /* it */
261 isl_val *VectorWidthMP;
262 int i;
264 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
266 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
267 TilingMap = isl_map_universe (isl_space_copy (Space));
268 LocalSpace = isl_local_space_from_space (Space);
269 PointDimension = ScheduleDimensions;
270 TileDimension = DimToVectorize;
272 /* Create an identity map for everything except DimToVectorize and map
273 DimToVectorize to the point loop at the innermost dimension. */
274 for (i = 0; i < ScheduleDimensions; i++)
276 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
277 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
279 if (i == DimToVectorize)
280 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
281 else
282 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
284 TilingMap = isl_map_add_constraint (TilingMap, c);
287 /* it % 'VectorWidth' = 0 */
288 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
289 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
290 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
291 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
293 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
294 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
295 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
296 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
298 /* it <= ip */
299 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
300 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
301 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
302 TilingMap = isl_map_add_constraint (TilingMap, c);
304 /* ip <= it + ('VectorWidth' - 1) */
305 c = isl_inequality_alloc (LocalSpace);
306 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
307 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
308 isl_constraint_set_constant_si (c, VectorWidth - 1);
309 TilingMap = isl_map_add_constraint (TilingMap, c);
311 return TilingMap;
314 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
315 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
316 corresponding option for AST build.
318 The map (for VectorWidth=4):
320 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
321 ip >= 0
323 The image of this map is the separation class. The range of this map includes
324 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
327 static isl_map *
328 getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
329 int ScheduleDimensions,
330 int VectorWidth)
332 isl_space *Space;
333 isl_local_space *LocalSpace, *LocalSpaceRange;
334 isl_set *Modulo;
335 isl_map *TilingMap;
336 isl_constraint *c;
337 isl_aff *Aff;
338 int PointDimension; /* ip */
339 int TileDimension; /* it */
340 isl_val *VectorWidthMP;
341 int i;
343 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
345 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
346 TilingMap = isl_map_universe (isl_space_copy (Space));
347 LocalSpace = isl_local_space_from_space (Space);
348 PointDimension = ScheduleDimensions;
349 TileDimension = DimToVectorize;
351 /* Create an identity map for everything except DimToVectorize and the
352 point loop. */
353 for (i = 0; i < ScheduleDimensions; i++)
355 if (i == DimToVectorize)
356 continue;
358 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
360 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
361 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
363 TilingMap = isl_map_add_constraint (TilingMap, c);
366 /* it % 'VectorWidth' = 0 */
367 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
368 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
369 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
370 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
372 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
373 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
374 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
375 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
377 /* it + ('VectorWidth' - 1) = i0 */
378 c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
379 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
380 isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
381 isl_constraint_set_constant_si (c, -VectorWidth + 1);
382 TilingMap = isl_map_add_constraint (TilingMap, c);
384 /* ip >= 0 */
385 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
386 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
387 isl_constraint_set_constant_si (c, 0);
388 TilingMap = isl_map_add_constraint (TilingMap, c);
390 /* it <= ip */
391 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
392 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
393 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
394 TilingMap = isl_map_add_constraint (TilingMap, c);
396 /* ip <= it + ('VectorWidth' - 1) */
397 c = isl_inequality_alloc (LocalSpace);
398 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
399 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
400 isl_constraint_set_constant_si (c, VectorWidth - 1);
401 TilingMap = isl_map_add_constraint (TilingMap, c);
403 return TilingMap;
406 static bool EnablePollyVector = false;
408 /* getScheduleForBandList - Get the scheduling map for a list of bands.
410 We walk recursively the forest of bands to combine the schedules of the
411 individual bands to the overall schedule. In case tiling is requested,
412 the individual bands are tiled.
413 For unroll and jam the map the schedule for full tiles of the unrolled
414 dimnesion is computed. */
415 static isl_union_map *
416 getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
418 int NumBands, i;
419 isl_union_map *Schedule;
420 isl_ctx *ctx;
422 ctx = isl_band_list_get_ctx (BandList);
423 NumBands = isl_band_list_n_band (BandList);
424 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
426 for (i = 0; i < NumBands; i++)
428 isl_band *Band;
429 isl_union_map *PartialSchedule;
430 int ScheduleDimensions;
431 isl_space *Space;
433 isl_union_map *PartialSchedule_f;
435 Band = isl_band_list_get_band (BandList, i);
436 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
437 Space = isl_union_map_get_space (PartialSchedule);
439 PartialSchedule_f = NULL;
441 if (isl_band_has_children (Band))
443 isl_band_list *Children;
444 isl_union_map *SuffixSchedule;
446 Children = isl_band_get_children (Band);
447 SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
448 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
449 SuffixSchedule);
450 isl_band_list_free (Children);
452 else if (EnablePollyVector || flag_loop_unroll_jam)
454 int i;
455 int depth;
457 depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
459 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
461 if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
462 continue;
464 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
465 if (isl_band_member_is_coincident (Band, i))
466 #else
467 if (isl_band_member_is_zero_distance (Band, i))
468 #endif
470 isl_map *TileMap;
471 isl_union_map *TileUMap;
472 int stride;
474 stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
476 TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions,
477 stride);
478 TileUMap = isl_union_map_from_map (TileMap);
479 TileUMap = isl_union_map_align_params
480 (TileUMap, isl_space_copy (Space));
481 PartialSchedule_f = isl_union_map_apply_range
482 (isl_union_map_copy (PartialSchedule), TileUMap);
484 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
485 TileUMap = isl_union_map_from_map (TileMap);
486 TileUMap = isl_union_map_align_params
487 (TileUMap, isl_space_copy (Space));
488 PartialSchedule = isl_union_map_apply_range
489 (PartialSchedule, TileUMap);
490 break;
494 Schedule = isl_union_map_union (Schedule,
495 isl_union_map_copy(PartialSchedule));
497 isl_band_free (Band);
498 isl_space_free (Space);
500 if (!flag_loop_unroll_jam)
502 isl_union_map_free (PartialSchedule);
503 continue;
506 if (PartialSchedule_f)
508 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule_f);
509 isl_union_map_free (PartialSchedule);
511 else
512 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule);
515 return Schedule;
518 static isl_union_map *
519 getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
521 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
522 isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
523 isl_band_list_free (BandList);
524 return ScheduleMap;
527 static int
528 getSingleMap (__isl_take isl_map *map, void *user)
530 isl_map **singleMap = (isl_map **) user;
531 *singleMap = map;
533 return 0;
536 static void
537 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
539 int i;
540 poly_bb_p pbb;
542 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
544 isl_set *domain = isl_set_copy (pbb->domain);
545 isl_union_map *stmtBand;
546 isl_map *stmtSchedule;
548 stmtBand = isl_union_map_intersect_domain
549 (isl_union_map_copy (schedule_map),
550 isl_union_set_from_set (domain));
551 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
553 if (!sepcl)
555 isl_map_free (pbb->transformed);
556 pbb->transformed = stmtSchedule;
558 else
559 pbb->map_sepclass = stmtSchedule;
561 isl_union_map_free (stmtBand);
565 static const int CONSTANT_BOUND = 20;
567 bool
568 optimize_isl (scop_p scop)
571 isl_schedule *schedule;
572 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
573 isl_schedule_constraints *schedule_constraints;
574 #endif
575 isl_union_set *domain;
576 isl_union_map *validity, *proximity, *dependences;
577 isl_union_map *schedule_map;
578 isl_union_map *schedule_map_f;
580 domain = scop_get_domains (scop);
581 dependences = scop_get_dependences (scop);
582 dependences = isl_union_map_gist_domain (dependences,
583 isl_union_set_copy (domain));
584 dependences = isl_union_map_gist_range (dependences,
585 isl_union_set_copy (domain));
586 validity = dependences;
588 proximity = isl_union_map_copy (validity);
590 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
591 schedule_constraints = isl_schedule_constraints_on_domain (domain);
592 schedule_constraints
593 = isl_schedule_constraints_set_proximity (schedule_constraints,
594 proximity);
595 schedule_constraints
596 = isl_schedule_constraints_set_validity (schedule_constraints,
597 isl_union_map_copy (validity));
598 schedule_constraints
599 = isl_schedule_constraints_set_coincidence (schedule_constraints,
600 validity);
601 #endif
603 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
604 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
605 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
606 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
608 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
609 schedule = isl_schedule_constraints_compute_schedule(schedule_constraints);
610 #else
611 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
612 #endif
614 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
616 if (!schedule)
617 return false;
619 schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
620 schedule_map = getScheduleMap (schedule, &schedule_map_f);
622 apply_schedule_map_to_scop (scop, schedule_map, false);
623 if (!isl_union_map_is_empty (schedule_map_f))
624 apply_schedule_map_to_scop (scop, schedule_map_f, true);
625 isl_union_map_free (schedule_map_f);
627 isl_schedule_free (schedule);
628 isl_union_map_free (schedule_map);
630 return true;
633 #endif