PR go/67101
[official-gcc.git] / gcc / graphite-optimize-isl.c
blob1b57c6cc6d23c57859b30061547fa4b4c7c70d16
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_isl
24 /* Workaround for GMP 5.1.3 bug, see PR56019. */
25 #include <stddef.h>
27 #include <isl/constraint.h>
28 #include <isl/set.h>
29 #include <isl/union_set.h>
30 #include <isl/map.h>
31 #include <isl/union_map.h>
32 #include <isl/schedule.h>
33 #include <isl/band.h>
34 #include <isl/aff.h>
35 #include <isl/options.h>
37 #include "system.h"
38 #include "coretypes.h"
39 #include "backend.h"
40 #include "cfghooks.h"
41 #include "tree.h"
42 #include "gimple.h"
43 #include "fold-const.h"
44 #include "gimple-iterator.h"
45 #include "tree-ssa-loop.h"
46 #include "cfgloop.h"
47 #include "tree-data-ref.h"
48 #include "graphite-poly.h"
49 #include "params.h"
51 static isl_union_set *
52 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
54 int i;
55 poly_bb_p pbb;
56 isl_space *space = isl_set_get_space (scop->context);
57 isl_union_set *res = isl_union_set_empty (space);
59 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
60 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
62 return res;
65 /* getTileMap - Create a map that describes a n-dimensonal tiling.
67 getTileMap creates a map from a n-dimensional scattering space into an
68 2*n-dimensional scattering space. The map describes a rectangular tiling.
70 Example:
71 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
73 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
74 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
75 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
77 Before tiling:
79 for (i = 0; i < N; i++)
80 for (j = 0; j < M; j++)
81 S(i,j)
83 After tiling:
85 for (t_i = 0; t_i < N; i+=32)
86 for (t_j = 0; t_j < M; j+=32)
87 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
88 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
89 S(i,j)
92 static isl_basic_map *
93 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
95 int x;
96 /* We construct
98 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
99 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
100 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
102 and project out the auxilary dimensions a0 and a1. */
103 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
104 scheduleDimensions * 3);
105 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
107 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
109 for (x = 0; x < scheduleDimensions; x++)
111 int sX = x;
112 int tX = x;
113 int pX = scheduleDimensions + x;
114 int aX = 2 * scheduleDimensions + x;
116 isl_constraint *c;
118 /* sX = aX * tileSize; */
119 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
120 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
121 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
122 tileMap = isl_basic_map_add_constraint (tileMap, c);
124 /* pX = sX; */
125 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
126 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
127 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
128 tileMap = isl_basic_map_add_constraint (tileMap, c);
130 /* tX <= pX */
131 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
132 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
133 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
134 tileMap = isl_basic_map_add_constraint (tileMap, c);
136 /* pX <= tX + (tileSize - 1) */
137 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
138 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
139 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
140 isl_constraint_set_constant_si (c, tileSize - 1);
141 tileMap = isl_basic_map_add_constraint (tileMap, c);
144 /* Project out auxiliary dimensions.
146 The auxiliary dimensions are transformed into existentially quantified ones.
147 This reduces the number of visible scattering dimensions and allows Cloog
148 to produces better code. */
149 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
150 2 * scheduleDimensions,
151 scheduleDimensions);
152 isl_local_space_free (LocalSpace);
153 return tileMap;
156 /* getScheduleForBand - Get the schedule for this band.
158 Polly applies transformations like tiling on top of the isl calculated value.
159 This can influence the number of scheduling dimension. The number of
160 schedule dimensions is returned in the parameter 'Dimension'. */
161 static bool DisableTiling = false;
163 static isl_union_map *
164 getScheduleForBand (isl_band *Band, int *Dimensions)
166 isl_union_map *PartialSchedule;
167 isl_ctx *ctx;
168 isl_space *Space;
169 isl_basic_map *TileMap;
170 isl_union_map *TileUMap;
172 PartialSchedule = isl_band_get_partial_schedule (Band);
173 *Dimensions = isl_band_n_member (Band);
175 if (DisableTiling || flag_loop_unroll_jam)
176 return PartialSchedule;
178 /* It does not make any sense to tile a band with just one dimension. */
179 if (*Dimensions == 1)
180 return PartialSchedule;
182 ctx = isl_union_map_get_ctx (PartialSchedule);
183 Space = isl_union_map_get_space (PartialSchedule);
185 TileMap = getTileMap (ctx, *Dimensions, 32);
186 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
187 TileUMap = isl_union_map_align_params (TileUMap, Space);
188 *Dimensions = 2 * *Dimensions;
190 return isl_union_map_apply_range (PartialSchedule, TileUMap);
193 /* Create a map that pre-vectorizes one scheduling dimension.
195 getPrevectorMap creates a map that maps each input dimension to the same
196 output dimension, except for the dimension DimToVectorize. DimToVectorize is
197 strip mined by 'VectorWidth' and the newly created point loop of
198 DimToVectorize is moved to the innermost level.
200 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
202 | Before transformation
204 | A[i,j] -> [i,j]
206 | for (i = 0; i < 128; i++)
207 | for (j = 0; j < 128; j++)
208 | A(i,j);
210 Prevector map:
211 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
213 | After transformation:
215 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
217 | for (it = 0; it < 128; it+=4)
218 | for (j = 0; j < 128; j++)
219 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
220 | A(ip,j);
222 The goal of this transformation is to create a trivially vectorizable loop.
223 This means a parallel loop at the innermost level that has a constant number
224 of iterations corresponding to the target vector width.
226 This transformation creates a loop at the innermost level. The loop has a
227 constant number of iterations, if the number of loop iterations at
228 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
229 currently constant and not yet target specific. This function does not reason
230 about parallelism.
233 static isl_map *
234 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
235 int ScheduleDimensions,
236 int VectorWidth)
238 isl_space *Space;
239 isl_local_space *LocalSpace, *LocalSpaceRange;
240 isl_set *Modulo;
241 isl_map *TilingMap;
242 isl_constraint *c;
243 isl_aff *Aff;
244 int PointDimension; /* ip */
245 int TileDimension; /* it */
246 isl_val *VectorWidthMP;
247 int i;
249 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
251 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
252 TilingMap = isl_map_universe (isl_space_copy (Space));
253 LocalSpace = isl_local_space_from_space (Space);
254 PointDimension = ScheduleDimensions;
255 TileDimension = DimToVectorize;
257 /* Create an identity map for everything except DimToVectorize and map
258 DimToVectorize to the point loop at the innermost dimension. */
259 for (i = 0; i < ScheduleDimensions; i++)
261 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
262 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
264 if (i == DimToVectorize)
265 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
266 else
267 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
269 TilingMap = isl_map_add_constraint (TilingMap, c);
272 /* it % 'VectorWidth' = 0 */
273 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
274 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
275 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
276 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
278 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
279 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
280 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
281 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
283 /* it <= ip */
284 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
285 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
286 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
287 TilingMap = isl_map_add_constraint (TilingMap, c);
289 /* ip <= it + ('VectorWidth' - 1) */
290 c = isl_inequality_alloc (LocalSpace);
291 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
292 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
293 isl_constraint_set_constant_si (c, VectorWidth - 1);
294 TilingMap = isl_map_add_constraint (TilingMap, c);
296 return TilingMap;
299 /* Compute an auxiliary map to getPrevectorMap, for computing the separating
300 class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
301 corresponding option for AST build.
303 The map (for VectorWidth=4):
305 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
306 ip >= 0
308 The image of this map is the separation class. The range of this map includes
309 all the i multiple of 4 in the domain such as i + 3 is in the domain too.
312 static isl_map *
313 getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
314 int ScheduleDimensions,
315 int VectorWidth)
317 isl_space *Space;
318 isl_local_space *LocalSpace, *LocalSpaceRange;
319 isl_set *Modulo;
320 isl_map *TilingMap;
321 isl_constraint *c;
322 isl_aff *Aff;
323 int PointDimension; /* ip */
324 int TileDimension; /* it */
325 isl_val *VectorWidthMP;
326 int i;
328 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
330 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
331 TilingMap = isl_map_universe (isl_space_copy (Space));
332 LocalSpace = isl_local_space_from_space (Space);
333 PointDimension = ScheduleDimensions;
334 TileDimension = DimToVectorize;
336 /* Create an identity map for everything except DimToVectorize and the
337 point loop. */
338 for (i = 0; i < ScheduleDimensions; i++)
340 if (i == DimToVectorize)
341 continue;
343 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
345 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
346 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
348 TilingMap = isl_map_add_constraint (TilingMap, c);
351 /* it % 'VectorWidth' = 0 */
352 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
353 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
354 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
355 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
357 VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
358 Aff = isl_aff_mod_val (Aff, VectorWidthMP);
359 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
360 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
362 /* it + ('VectorWidth' - 1) = i0 */
363 c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
364 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
365 isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
366 isl_constraint_set_constant_si (c, -VectorWidth + 1);
367 TilingMap = isl_map_add_constraint (TilingMap, c);
369 /* ip >= 0 */
370 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
371 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
372 isl_constraint_set_constant_si (c, 0);
373 TilingMap = isl_map_add_constraint (TilingMap, c);
375 /* it <= ip */
376 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
377 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
378 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
379 TilingMap = isl_map_add_constraint (TilingMap, c);
381 /* ip <= it + ('VectorWidth' - 1) */
382 c = isl_inequality_alloc (LocalSpace);
383 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
384 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
385 isl_constraint_set_constant_si (c, VectorWidth - 1);
386 TilingMap = isl_map_add_constraint (TilingMap, c);
388 return TilingMap;
391 static bool EnablePollyVector = false;
393 /* getScheduleForBandList - Get the scheduling map for a list of bands.
395 We walk recursively the forest of bands to combine the schedules of the
396 individual bands to the overall schedule. In case tiling is requested,
397 the individual bands are tiled.
398 For unroll and jam the map the schedule for full tiles of the unrolled
399 dimnesion is computed. */
400 static isl_union_map *
401 getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
403 int NumBands, i;
404 isl_union_map *Schedule;
405 isl_ctx *ctx;
407 ctx = isl_band_list_get_ctx (BandList);
408 NumBands = isl_band_list_n_band (BandList);
409 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
411 for (i = 0; i < NumBands; i++)
413 isl_band *Band;
414 isl_union_map *PartialSchedule;
415 int ScheduleDimensions;
416 isl_space *Space;
418 isl_union_map *PartialSchedule_f;
420 Band = isl_band_list_get_band (BandList, i);
421 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
422 Space = isl_union_map_get_space (PartialSchedule);
424 PartialSchedule_f = NULL;
426 if (isl_band_has_children (Band))
428 isl_band_list *Children;
429 isl_union_map *SuffixSchedule;
431 Children = isl_band_get_children (Band);
432 SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
433 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
434 SuffixSchedule);
435 isl_band_list_free (Children);
437 else if (EnablePollyVector || flag_loop_unroll_jam)
439 int i;
440 int depth;
442 depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
444 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
446 if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
447 continue;
449 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
450 if (isl_band_member_is_coincident (Band, i))
451 #else
452 if (isl_band_member_is_zero_distance (Band, i))
453 #endif
455 isl_map *TileMap;
456 isl_union_map *TileUMap;
457 int stride;
459 stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
461 TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions,
462 stride);
463 TileUMap = isl_union_map_from_map (TileMap);
464 TileUMap = isl_union_map_align_params
465 (TileUMap, isl_space_copy (Space));
466 PartialSchedule_f = isl_union_map_apply_range
467 (isl_union_map_copy (PartialSchedule), TileUMap);
469 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
470 TileUMap = isl_union_map_from_map (TileMap);
471 TileUMap = isl_union_map_align_params
472 (TileUMap, isl_space_copy (Space));
473 PartialSchedule = isl_union_map_apply_range
474 (PartialSchedule, TileUMap);
475 break;
479 Schedule = isl_union_map_union (Schedule,
480 isl_union_map_copy(PartialSchedule));
482 isl_band_free (Band);
483 isl_space_free (Space);
485 if (!flag_loop_unroll_jam)
487 isl_union_map_free (PartialSchedule);
488 continue;
491 if (PartialSchedule_f)
493 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule_f);
494 isl_union_map_free (PartialSchedule);
496 else
497 *map_sepcl = isl_union_map_union (*map_sepcl, PartialSchedule);
500 return Schedule;
503 static isl_union_map *
504 getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
506 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
507 isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
508 isl_band_list_free (BandList);
509 return ScheduleMap;
512 static isl_stat
513 getSingleMap (__isl_take isl_map *map, void *user)
515 isl_map **singleMap = (isl_map **) user;
516 *singleMap = map;
518 return isl_stat_ok;
521 static void
522 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
524 int i;
525 poly_bb_p pbb;
527 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
529 isl_set *domain = isl_set_copy (pbb->domain);
530 isl_union_map *stmtBand;
531 isl_map *stmtSchedule;
533 stmtBand = isl_union_map_intersect_domain
534 (isl_union_map_copy (schedule_map),
535 isl_union_set_from_set (domain));
536 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
538 if (!sepcl)
540 isl_map_free (pbb->transformed);
541 pbb->transformed = stmtSchedule;
543 else
544 pbb->map_sepclass = stmtSchedule;
546 isl_union_map_free (stmtBand);
550 static const int CONSTANT_BOUND = 20;
552 bool
553 optimize_isl (scop_p scop)
556 isl_schedule *schedule;
557 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
558 isl_schedule_constraints *schedule_constraints;
559 #endif
560 isl_union_set *domain;
561 isl_union_map *validity, *proximity, *dependences;
562 isl_union_map *schedule_map;
563 isl_union_map *schedule_map_f;
565 domain = scop_get_domains (scop);
566 dependences = scop_get_dependences (scop);
567 dependences = isl_union_map_gist_domain (dependences,
568 isl_union_set_copy (domain));
569 dependences = isl_union_map_gist_range (dependences,
570 isl_union_set_copy (domain));
571 validity = dependences;
573 proximity = isl_union_map_copy (validity);
575 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
576 schedule_constraints = isl_schedule_constraints_on_domain (domain);
577 schedule_constraints
578 = isl_schedule_constraints_set_proximity (schedule_constraints,
579 proximity);
580 schedule_constraints
581 = isl_schedule_constraints_set_validity (schedule_constraints,
582 isl_union_map_copy (validity));
583 schedule_constraints
584 = isl_schedule_constraints_set_coincidence (schedule_constraints,
585 validity);
586 #endif
588 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
589 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
590 #ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS
591 isl_options_set_schedule_serialize_sccs (scop->ctx, 1);
592 #else
593 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
594 #endif
595 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
597 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
598 schedule = isl_schedule_constraints_compute_schedule(schedule_constraints);
599 #else
600 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
601 #endif
603 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
605 if (!schedule)
606 return false;
608 schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
609 schedule_map = getScheduleMap (schedule, &schedule_map_f);
611 apply_schedule_map_to_scop (scop, schedule_map, false);
612 if (!isl_union_map_is_empty (schedule_map_f))
613 apply_schedule_map_to_scop (scop, schedule_map_f, true);
614 isl_union_map_free (schedule_map_f);
616 isl_schedule_free (schedule);
617 isl_union_map_free (schedule_map);
619 return true;
622 #endif /* HAVE_isl */