gcc-defs.exp (dg-additional-files-options): Extend regsub for dg-additional-files...
[official-gcc.git] / gcc / graphite-optimize-isl.c
blobd039c1b74e1acbd37bdfa41fd7f32276f856fe09
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "tree-ssa-loop.h"
38 #include "dumpfile.h"
39 #include "cfgloop.h"
40 #include "tree-chrec.h"
41 #include "tree-data-ref.h"
42 #include "tree-scalar-evolution.h"
43 #include "sese.h"
45 #ifdef HAVE_cloog
46 #include "graphite-poly.h"
48 static isl_union_set *
49 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
51 int i;
52 poly_bb_p pbb;
53 isl_space *space = isl_set_get_space (scop->context);
54 isl_union_set *res = isl_union_set_empty (space);
56 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
57 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
59 return res;
62 static isl_union_map *
63 scop_get_dependences (scop_p scop)
65 isl_union_map *dependences;
67 if (!scop->must_raw)
68 compute_deps (scop, SCOP_BBS (scop),
69 &scop->must_raw, &scop->may_raw,
70 &scop->must_raw_no_source, &scop->may_raw_no_source,
71 &scop->must_war, &scop->may_war,
72 &scop->must_war_no_source, &scop->may_war_no_source,
73 &scop->must_waw, &scop->may_waw,
74 &scop->must_waw_no_source, &scop->may_waw_no_source);
76 dependences = isl_union_map_copy (scop->must_raw);
77 dependences = isl_union_map_union (dependences,
78 isl_union_map_copy (scop->must_war));
79 dependences = isl_union_map_union (dependences,
80 isl_union_map_copy (scop->must_waw));
81 dependences = isl_union_map_union (dependences,
82 isl_union_map_copy (scop->may_raw));
83 dependences = isl_union_map_union (dependences,
84 isl_union_map_copy (scop->may_war));
85 dependences = isl_union_map_union (dependences,
86 isl_union_map_copy (scop->may_waw));
88 return dependences;
91 /* getTileMap - Create a map that describes a n-dimensonal tiling.
93 getTileMap creates a map from a n-dimensional scattering space into an
94 2*n-dimensional scattering space. The map describes a rectangular tiling.
96 Example:
97 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
99 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
100 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
101 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
103 Before tiling:
105 for (i = 0; i < N; i++)
106 for (j = 0; j < M; j++)
107 S(i,j)
109 After tiling:
111 for (t_i = 0; t_i < N; i+=32)
112 for (t_j = 0; t_j < M; j+=32)
113 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
114 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
115 S(i,j)
118 static isl_basic_map *
119 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
121 int x;
122 /* We construct
124 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
125 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
126 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
128 and project out the auxilary dimensions a0 and a1. */
129 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
130 scheduleDimensions * 3);
131 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
133 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
135 for (x = 0; x < scheduleDimensions; x++)
137 int sX = x;
138 int tX = x;
139 int pX = scheduleDimensions + x;
140 int aX = 2 * scheduleDimensions + x;
142 isl_constraint *c;
144 /* sX = aX * tileSize; */
145 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
146 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
147 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
148 tileMap = isl_basic_map_add_constraint (tileMap, c);
150 /* pX = sX; */
151 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
152 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
153 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
154 tileMap = isl_basic_map_add_constraint (tileMap, c);
156 /* tX <= pX */
157 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
158 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
159 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
160 tileMap = isl_basic_map_add_constraint (tileMap, c);
162 /* pX <= tX + (tileSize - 1) */
163 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
164 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
165 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
166 isl_constraint_set_constant_si (c, tileSize - 1);
167 tileMap = isl_basic_map_add_constraint (tileMap, c);
170 /* Project out auxiliary dimensions.
172 The auxiliary dimensions are transformed into existentially quantified ones.
173 This reduces the number of visible scattering dimensions and allows Cloog
174 to produces better code. */
175 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
176 2 * scheduleDimensions,
177 scheduleDimensions);
178 isl_local_space_free (LocalSpace);
179 return tileMap;
182 /* getScheduleForBand - Get the schedule for this band.
184 Polly applies transformations like tiling on top of the isl calculated value.
185 This can influence the number of scheduling dimension. The number of
186 schedule dimensions is returned in the parameter 'Dimension'. */
187 static bool DisableTiling = false;
189 static isl_union_map *
190 getScheduleForBand (isl_band *Band, int *Dimensions)
192 isl_union_map *PartialSchedule;
193 isl_ctx *ctx;
194 isl_space *Space;
195 isl_basic_map *TileMap;
196 isl_union_map *TileUMap;
198 PartialSchedule = isl_band_get_partial_schedule (Band);
199 *Dimensions = isl_band_n_member (Band);
201 if (DisableTiling)
202 return PartialSchedule;
204 /* It does not make any sense to tile a band with just one dimension. */
205 if (*Dimensions == 1)
206 return PartialSchedule;
208 ctx = isl_union_map_get_ctx (PartialSchedule);
209 Space = isl_union_map_get_space (PartialSchedule);
211 TileMap = getTileMap (ctx, *Dimensions, 32);
212 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
213 TileUMap = isl_union_map_align_params (TileUMap, Space);
214 *Dimensions = 2 * *Dimensions;
216 return isl_union_map_apply_range (PartialSchedule, TileUMap);
219 /* Create a map that pre-vectorizes one scheduling dimension.
221 getPrevectorMap creates a map that maps each input dimension to the same
222 output dimension, except for the dimension DimToVectorize. DimToVectorize is
223 strip mined by 'VectorWidth' and the newly created point loop of
224 DimToVectorize is moved to the innermost level.
226 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
228 | Before transformation
230 | A[i,j] -> [i,j]
232 | for (i = 0; i < 128; i++)
233 | for (j = 0; j < 128; j++)
234 | A(i,j);
236 Prevector map:
237 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
239 | After transformation:
241 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
243 | for (it = 0; it < 128; it+=4)
244 | for (j = 0; j < 128; j++)
245 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
246 | A(ip,j);
248 The goal of this transformation is to create a trivially vectorizable loop.
249 This means a parallel loop at the innermost level that has a constant number
250 of iterations corresponding to the target vector width.
252 This transformation creates a loop at the innermost level. The loop has a
253 constant number of iterations, if the number of loop iterations at
254 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
255 currently constant and not yet target specific. This function does not reason
256 about parallelism. */
257 static isl_map *
258 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
259 int ScheduleDimensions,
260 int VectorWidth)
262 isl_space *Space;
263 isl_local_space *LocalSpace, *LocalSpaceRange;
264 isl_set *Modulo;
265 isl_map *TilingMap;
266 isl_constraint *c;
267 isl_aff *Aff;
268 int PointDimension; /* ip */
269 int TileDimension; /* it */
270 isl_int VectorWidthMP;
271 int i;
273 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
275 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
276 TilingMap = isl_map_universe (isl_space_copy (Space));
277 LocalSpace = isl_local_space_from_space (Space);
278 PointDimension = ScheduleDimensions;
279 TileDimension = DimToVectorize;
281 /* Create an identity map for everything except DimToVectorize and map
282 DimToVectorize to the point loop at the innermost dimension. */
283 for (i = 0; i < ScheduleDimensions; i++)
285 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
286 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
288 if (i == DimToVectorize)
289 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
290 else
291 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
293 TilingMap = isl_map_add_constraint (TilingMap, c);
296 /* it % 'VectorWidth' = 0 */
297 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
298 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
299 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
300 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
301 isl_int_init (VectorWidthMP);
302 isl_int_set_si (VectorWidthMP, VectorWidth);
303 Aff = isl_aff_mod (Aff, VectorWidthMP);
304 isl_int_clear (VectorWidthMP);
305 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
306 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
308 /* it <= ip */
309 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
310 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
311 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
312 TilingMap = isl_map_add_constraint (TilingMap, c);
314 /* ip <= it + ('VectorWidth' - 1) */
315 c = isl_inequality_alloc (LocalSpace);
316 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
317 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
318 isl_constraint_set_constant_si (c, VectorWidth - 1);
319 TilingMap = isl_map_add_constraint (TilingMap, c);
321 isl_map_dump (TilingMap);
323 return TilingMap;
326 static bool EnablePollyVector = false;
328 /* getScheduleForBandList - Get the scheduling map for a list of bands.
330 We walk recursively the forest of bands to combine the schedules of the
331 individual bands to the overall schedule. In case tiling is requested,
332 the individual bands are tiled. */
333 static isl_union_map *
334 getScheduleForBandList (isl_band_list *BandList)
336 int NumBands, i;
337 isl_union_map *Schedule;
338 isl_ctx *ctx;
340 ctx = isl_band_list_get_ctx (BandList);
341 NumBands = isl_band_list_n_band (BandList);
342 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
344 for (i = 0; i < NumBands; i++)
346 isl_band *Band;
347 isl_union_map *PartialSchedule;
348 int ScheduleDimensions;
349 isl_space *Space;
351 Band = isl_band_list_get_band (BandList, i);
352 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
353 Space = isl_union_map_get_space (PartialSchedule);
355 if (isl_band_has_children (Band))
357 isl_band_list *Children;
358 isl_union_map *SuffixSchedule;
360 Children = isl_band_get_children (Band);
361 SuffixSchedule = getScheduleForBandList (Children);
362 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
363 SuffixSchedule);
364 isl_band_list_free (Children);
366 else if (EnablePollyVector)
368 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
370 if (isl_band_member_is_zero_distance (Band, i))
372 isl_map *TileMap;
373 isl_union_map *TileUMap;
375 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
376 TileUMap = isl_union_map_from_map (TileMap);
377 TileUMap = isl_union_map_align_params
378 (TileUMap, isl_space_copy (Space));
379 PartialSchedule = isl_union_map_apply_range
380 (PartialSchedule, TileUMap);
381 break;
386 Schedule = isl_union_map_union (Schedule, PartialSchedule);
388 isl_band_free (Band);
389 isl_space_free (Space);
392 return Schedule;
395 static isl_union_map *
396 getScheduleMap (isl_schedule *Schedule)
398 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
399 isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
400 isl_band_list_free (BandList);
401 return ScheduleMap;
404 static int
405 getSingleMap (__isl_take isl_map *map, void *user)
407 isl_map **singleMap = (isl_map **) user;
408 *singleMap = map;
410 return 0;
413 static void
414 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
416 int i;
417 poly_bb_p pbb;
419 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
421 isl_set *domain = isl_set_copy (pbb->domain);
422 isl_union_map *stmtBand;
423 isl_map *stmtSchedule;
425 stmtBand = isl_union_map_intersect_domain
426 (isl_union_map_copy (schedule_map),
427 isl_union_set_from_set (domain));
428 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
429 isl_map_free (pbb->transformed);
430 pbb->transformed = stmtSchedule;
431 isl_union_map_free (stmtBand);
435 static const int CONSTANT_BOUND = 20;
437 bool
438 optimize_isl (scop_p scop)
441 isl_schedule *schedule;
442 isl_union_set *domain;
443 isl_union_map *validity, *proximity, *dependences;
444 isl_union_map *schedule_map;
446 domain = scop_get_domains (scop);
447 dependences = scop_get_dependences (scop);
448 dependences = isl_union_map_gist_domain (dependences,
449 isl_union_set_copy (domain));
450 dependences = isl_union_map_gist_range (dependences,
451 isl_union_set_copy (domain));
452 validity = dependences;
454 proximity = isl_union_map_copy (validity);
456 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
457 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
458 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
459 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
460 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
461 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
463 if (!schedule)
464 return false;
466 schedule_map = getScheduleMap (schedule);
468 apply_schedule_map_to_scop (scop, schedule_map);
470 isl_schedule_free (schedule);
471 isl_union_map_free (schedule_map);
473 return true;
476 #endif