/cp
[official-gcc.git] / gcc / graphite-optimize-isl.c
blob624cc87df9b1b30fcc402289b43b03288f0a3f9e
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_isl
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
33 #include "system.h"
34 #include "coretypes.h"
35 #include "alias.h"
36 #include "symtab.h"
37 #include "options.h"
38 #include "tree.h"
39 #include "fold-const.h"
40 #include "predict.h"
41 #include "tm.h"
42 #include "hard-reg-set.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "basic-block.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "gimple-expr.h"
50 #include "gimple.h"
51 #include "gimple-iterator.h"
52 #include "tree-ssa-loop.h"
53 #include "dumpfile.h"
54 #include "cfgloop.h"
55 #include "tree-chrec.h"
56 #include "tree-data-ref.h"
57 #include "tree-scalar-evolution.h"
58 #include "sese.h"
60 #ifdef HAVE_isl
61 #include "graphite-poly.h"
63 static isl_union_set *
64 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
66 int i;
67 poly_bb_p pbb;
68 isl_space *space = isl_set_get_space (scop->context);
69 isl_union_set *res = isl_union_set_empty (space);
71 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
72 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
74 return res;
77 /* getTileMap - Create a map that describes a n-dimensonal tiling.
79 getTileMap creates a map from a n-dimensional scattering space into an
80 2*n-dimensional scattering space. The map describes a rectangular tiling.
82 Example:
83 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
85 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
86 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
87 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
89 Before tiling:
91 for (i = 0; i < N; i++)
92 for (j = 0; j < M; j++)
93 S(i,j)
95 After tiling:
97 for (t_i = 0; t_i < N; i+=32)
98 for (t_j = 0; t_j < M; j+=32)
99 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
100 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
101 S(i,j)
104 static isl_basic_map *
105 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
107 int x;
108 /* We construct
110 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
111 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
112 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
114 and project out the auxilary dimensions a0 and a1. */
115 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
116 scheduleDimensions * 3);
117 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
119 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
121 for (x = 0; x < scheduleDimensions; x++)
123 int sX = x;
124 int tX = x;
125 int pX = scheduleDimensions + x;
126 int aX = 2 * scheduleDimensions + x;
128 isl_constraint *c;
130 /* sX = aX * tileSize; */
131 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
132 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
133 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
134 tileMap = isl_basic_map_add_constraint (tileMap, c);
136 /* pX = sX; */
137 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
138 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
139 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
140 tileMap = isl_basic_map_add_constraint (tileMap, c);
142 /* tX <= pX */
143 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
144 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
145 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
146 tileMap = isl_basic_map_add_constraint (tileMap, c);
148 /* pX <= tX + (tileSize - 1) */
149 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
150 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
151 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
152 isl_constraint_set_constant_si (c, tileSize - 1);
153 tileMap = isl_basic_map_add_constraint (tileMap, c);
156 /* Project out auxiliary dimensions.
158 The auxiliary dimensions are transformed into existentially quantified ones.
159 This reduces the number of visible scattering dimensions and allows Cloog
160 to produces better code. */
161 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
162 2 * scheduleDimensions,
163 scheduleDimensions);
164 isl_local_space_free (LocalSpace);
165 return tileMap;
168 /* getScheduleForBand - Get the schedule for this band.
170 Polly applies transformations like tiling on top of the isl calculated value.
171 This can influence the number of scheduling dimension. The number of
172 schedule dimensions is returned in the parameter 'Dimension'. */
173 static bool DisableTiling = false;
175 static isl_union_map *
176 getScheduleForBand (isl_band *Band, int *Dimensions)
178 isl_union_map *PartialSchedule;
179 isl_ctx *ctx;
180 isl_space *Space;
181 isl_basic_map *TileMap;
182 isl_union_map *TileUMap;
184 PartialSchedule = isl_band_get_partial_schedule (Band);
185 *Dimensions = isl_band_n_member (Band);
187 if (DisableTiling || flag_loop_unroll_jam)
188 return PartialSchedule;
190 /* It does not make any sense to tile a band with just one dimension. */
191 if (*Dimensions == 1)
192 return PartialSchedule;
194 ctx = isl_union_map_get_ctx (PartialSchedule);
195 Space = isl_union_map_get_space (PartialSchedule);
197 TileMap = getTileMap (ctx, *Dimensions, 32);
198 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
199 TileUMap = isl_union_map_align_params (TileUMap, Space);
200 *Dimensions = 2 * *Dimensions;
202 return isl_union_map_apply_range (PartialSchedule, TileUMap);
205 /* Create a map that pre-vectorizes one scheduling dimension.
207 getPrevectorMap creates a map that maps each input dimension to the same
208 output dimension, except for the dimension DimToVectorize. DimToVectorize is
209 strip mined by 'VectorWidth' and the newly created point loop of
210 DimToVectorize is moved to the innermost level.
212 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
214 | Before transformation
216 | A[i,j] -> [i,j]
218 | for (i = 0; i < 128; i++)
219 | for (j = 0; j < 128; j++)
220 | A(i,j);
222 Prevector map:
223 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
225 | After transformation:
227 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
229 | for (it = 0; it < 128; it+=4)
230 | for (j = 0; j < 128; j++)
231 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
232 | A(ip,j);
234 The goal of this transformation is to create a trivially vectorizable loop.
235 This means a parallel loop at the innermost level that has a constant number
236 of iterations corresponding to the target vector width.
238 This transformation creates a loop at the innermost level. The loop has a
239 constant number of iterations, if the number of loop iterations at
240 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
241 currently constant and not yet target specific. This function does not reason
242 about parallelism.
245 static isl_map *
246 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
247 int ScheduleDimensions,
248 int VectorWidth)
250 isl_space *Space;
251 isl_local_space *LocalSpace, *LocalSpaceRange;
252 isl_set *Modulo;
253 isl_map *TilingMap;
254 isl_constraint *c;
255 isl_aff *Aff;
256 int PointDimension; /* ip */
257 int TileDimension; /* it */
258 isl_val *VectorWidthMP;
259 int i;
261 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
263 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
264 TilingMap = isl_map_universe (isl_space_copy (Space));
265 LocalSpace = isl_local_space_from_space (Space);
266 PointDimension = ScheduleDimensions;
267 TileDimension = DimToVectorize;
269 /* Create an identity map for everything except DimToVectorize and map
270 DimToVectorize to the point loop at the innermost dimension. */
271 for (i = 0; i < ScheduleDimensions; i++)
273 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
274 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
276 if (i == DimToVectorize)
277 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
278 else
279 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
281 TilingMap = isl_map_add_constraint (TilingMap, c);
284 /* it % 'VectorWidth' = 0 */
285 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
286 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
287 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
288 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
290 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
291 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
292 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
293 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
295 /* it <= ip */
296 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
297 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
298 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
299 TilingMap = isl_map_add_constraint (TilingMap, c);
301 /* ip <= it + ('VectorWidth' - 1) */
302 c = isl_inequality_alloc (LocalSpace);
303 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
304 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
305 isl_constraint_set_constant_si (c, VectorWidth - 1);
306 TilingMap = isl_map_add_constraint (TilingMap, c);
308 return TilingMap;
311 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
312 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
313 corresponding option for AST build.
315 The map (for VectorWidth=4):
317 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
318 ip >= 0
320 The image of this map is the separation class. The range of this map includes
321 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
324 static isl_map *
325 getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
326 int ScheduleDimensions,
327 int VectorWidth)
329 isl_space *Space;
330 isl_local_space *LocalSpace, *LocalSpaceRange;
331 isl_set *Modulo;
332 isl_map *TilingMap;
333 isl_constraint *c;
334 isl_aff *Aff;
335 int PointDimension; /* ip */
336 int TileDimension; /* it */
337 isl_val *VectorWidthMP;
338 int i;
340 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
342 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
343 TilingMap = isl_map_universe (isl_space_copy (Space));
344 LocalSpace = isl_local_space_from_space (Space);
345 PointDimension = ScheduleDimensions;
346 TileDimension = DimToVectorize;
348 /* Create an identity map for everything except DimToVectorize and the
349 point loop. */
350 for (i = 0; i < ScheduleDimensions; i++)
352 if (i == DimToVectorize)
353 continue;
355 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
357 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
358 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
360 TilingMap = isl_map_add_constraint (TilingMap, c);
363 /* it % 'VectorWidth' = 0 */
364 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
365 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
366 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
367 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
369 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
370 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
371 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
372 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
374 /* it + ('VectorWidth' - 1) = i0 */
375 c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
376 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
377 isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
378 isl_constraint_set_constant_si (c, -VectorWidth + 1);
379 TilingMap = isl_map_add_constraint (TilingMap, c);
381 /* ip >= 0 */
382 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
383 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
384 isl_constraint_set_constant_si (c, 0);
385 TilingMap = isl_map_add_constraint (TilingMap, c);
387 /* it <= ip */
388 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
389 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
390 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
391 TilingMap = isl_map_add_constraint (TilingMap, c);
393 /* ip <= it + ('VectorWidth' - 1) */
394 c = isl_inequality_alloc (LocalSpace);
395 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
396 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
397 isl_constraint_set_constant_si (c, VectorWidth - 1);
398 TilingMap = isl_map_add_constraint (TilingMap, c);
400 return TilingMap;
403 static bool EnablePollyVector = false;
405 /* getScheduleForBandList - Get the scheduling map for a list of bands.
407 We walk recursively the forest of bands to combine the schedules of the
408 individual bands to the overall schedule. In case tiling is requested,
409 the individual bands are tiled.
410 For unroll and jam the map the schedule for full tiles of the unrolled
411 dimnesion is computed. */
412 static isl_union_map *
413 getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
415 int NumBands, i;
416 isl_union_map *Schedule;
417 isl_ctx *ctx;
419 ctx = isl_band_list_get_ctx (BandList);
420 NumBands = isl_band_list_n_band (BandList);
421 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
423 for (i = 0; i < NumBands; i++)
425 isl_band *Band;
426 isl_union_map *PartialSchedule;
427 int ScheduleDimensions;
428 isl_space *Space;
430 isl_union_map *PartialSchedule_f;
432 Band = isl_band_list_get_band (BandList, i);
433 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
434 Space = isl_union_map_get_space (PartialSchedule);
436 PartialSchedule_f = NULL;
438 if (isl_band_has_children (Band))
440 isl_band_list *Children;
441 isl_union_map *SuffixSchedule;
443 Children = isl_band_get_children (Band);
444 SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
445 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
446 SuffixSchedule);
447 isl_band_list_free (Children);
449 else if (EnablePollyVector || flag_loop_unroll_jam)
451 int i;
452 int depth;
454 depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
456 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
458 if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
459 continue;
461 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
462 if (isl_band_member_is_coincident (Band, i))
463 #else
464 if (isl_band_member_is_zero_distance (Band, i))
465 #endif
467 isl_map *TileMap;
468 isl_union_map *TileUMap;
469 int stride;
471 stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
473 TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions,
474 stride);
475 TileUMap = isl_union_map_from_map (TileMap);
476 TileUMap = isl_union_map_align_params
477 (TileUMap, isl_space_copy (Space));
478 PartialSchedule_f = isl_union_map_apply_range
479 (isl_union_map_copy (PartialSchedule), TileUMap);
481 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
482 TileUMap = isl_union_map_from_map (TileMap);
483 TileUMap = isl_union_map_align_params
484 (TileUMap, isl_space_copy (Space));
485 PartialSchedule = isl_union_map_apply_range
486 (PartialSchedule, TileUMap);
487 break;
491 Schedule = isl_union_map_union (Schedule,
492 isl_union_map_copy(PartialSchedule));
494 isl_band_free (Band);
495 isl_space_free (Space);
497 if (!flag_loop_unroll_jam)
499 isl_union_map_free (PartialSchedule);
500 continue;
503 if (PartialSchedule_f)
505 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule_f);
506 isl_union_map_free (PartialSchedule);
508 else
509 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule);
512 return Schedule;
515 static isl_union_map *
516 getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
518 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
519 isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
520 isl_band_list_free (BandList);
521 return ScheduleMap;
524 static int
525 getSingleMap (__isl_take isl_map *map, void *user)
527 isl_map **singleMap = (isl_map **) user;
528 *singleMap = map;
530 return 0;
533 static void
534 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
536 int i;
537 poly_bb_p pbb;
539 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
541 isl_set *domain = isl_set_copy (pbb->domain);
542 isl_union_map *stmtBand;
543 isl_map *stmtSchedule;
545 stmtBand = isl_union_map_intersect_domain
546 (isl_union_map_copy (schedule_map),
547 isl_union_set_from_set (domain));
548 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
550 if (!sepcl)
552 isl_map_free (pbb->transformed);
553 pbb->transformed = stmtSchedule;
555 else
556 pbb->map_sepclass = stmtSchedule;
558 isl_union_map_free (stmtBand);
562 static const int CONSTANT_BOUND = 20;
564 bool
565 optimize_isl (scop_p scop)
568 isl_schedule *schedule;
569 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
570 isl_schedule_constraints *schedule_constraints;
571 #endif
572 isl_union_set *domain;
573 isl_union_map *validity, *proximity, *dependences;
574 isl_union_map *schedule_map;
575 isl_union_map *schedule_map_f;
577 domain = scop_get_domains (scop);
578 dependences = scop_get_dependences (scop);
579 dependences = isl_union_map_gist_domain (dependences,
580 isl_union_set_copy (domain));
581 dependences = isl_union_map_gist_range (dependences,
582 isl_union_set_copy (domain));
583 validity = dependences;
585 proximity = isl_union_map_copy (validity);
587 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
588 schedule_constraints = isl_schedule_constraints_on_domain (domain);
589 schedule_constraints
590 = isl_schedule_constraints_set_proximity (schedule_constraints,
591 proximity);
592 schedule_constraints
593 = isl_schedule_constraints_set_validity (schedule_constraints,
594 isl_union_map_copy (validity));
595 schedule_constraints
596 = isl_schedule_constraints_set_coincidence (schedule_constraints,
597 validity);
598 #endif
600 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
601 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
602 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
603 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
605 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
606 schedule = isl_schedule_constraints_compute_schedule(schedule_constraints);
607 #else
608 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
609 #endif
611 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
613 if (!schedule)
614 return false;
616 schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
617 schedule_map = getScheduleMap (schedule, &schedule_map_f);
619 apply_schedule_map_to_scop (scop, schedule_map, false);
620 if (!isl_union_map_is_empty (schedule_map_f))
621 apply_schedule_map_to_scop (scop, schedule_map_f, true);
622 isl_union_map_free (schedule_map_f);
624 isl_schedule_free (schedule);
625 isl_union_map_free (schedule_map);
627 return true;
630 #endif