Mark ChangeLog
[official-gcc.git] / gcc / graphite-optimize-isl.c
blob6a94cbdf88cd6b8f6104adfd246f95a1451d9bd9
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
32 #include <isl/deprecated/int.h>
33 #include <isl/deprecated/aff_int.h>
34 #endif
35 #endif
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree-flow.h"
40 #include "dumpfile.h"
41 #include "cfgloop.h"
42 #include "tree-chrec.h"
43 #include "tree-data-ref.h"
44 #include "tree-scalar-evolution.h"
45 #include "sese.h"
47 #ifdef HAVE_cloog
48 #include "graphite-poly.h"
50 static isl_union_set *
51 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
53 int i;
54 poly_bb_p pbb;
55 isl_space *space = isl_set_get_space (scop->context);
56 isl_union_set *res = isl_union_set_empty (space);
58 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
59 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
61 return res;
64 static isl_union_map *
65 scop_get_dependences (scop_p scop)
67 isl_union_map *dependences;
69 if (!scop->must_raw)
70 compute_deps (scop, SCOP_BBS (scop),
71 &scop->must_raw, &scop->may_raw,
72 &scop->must_raw_no_source, &scop->may_raw_no_source,
73 &scop->must_war, &scop->may_war,
74 &scop->must_war_no_source, &scop->may_war_no_source,
75 &scop->must_waw, &scop->may_waw,
76 &scop->must_waw_no_source, &scop->may_waw_no_source);
78 dependences = isl_union_map_copy (scop->must_raw);
79 dependences = isl_union_map_union (dependences,
80 isl_union_map_copy (scop->must_war));
81 dependences = isl_union_map_union (dependences,
82 isl_union_map_copy (scop->must_waw));
83 dependences = isl_union_map_union (dependences,
84 isl_union_map_copy (scop->may_raw));
85 dependences = isl_union_map_union (dependences,
86 isl_union_map_copy (scop->may_war));
87 dependences = isl_union_map_union (dependences,
88 isl_union_map_copy (scop->may_waw));
90 return dependences;
93 /* getTileMap - Create a map that describes a n-dimensonal tiling.
95 getTileMap creates a map from a n-dimensional scattering space into an
96 2*n-dimensional scattering space. The map describes a rectangular tiling.
98 Example:
99 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
101 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
102 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
103 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
105 Before tiling:
107 for (i = 0; i < N; i++)
108 for (j = 0; j < M; j++)
109 S(i,j)
111 After tiling:
113 for (t_i = 0; t_i < N; i+=32)
114 for (t_j = 0; t_j < M; j+=32)
115 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
116 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
117 S(i,j)
120 static isl_basic_map *
121 getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize)
123 int x;
124 /* We construct
126 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
127 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
128 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
130 and project out the auxilary dimensions a0 and a1. */
131 isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions,
132 scheduleDimensions * 3);
133 isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
135 isl_local_space *LocalSpace = isl_local_space_from_space(Space);
137 for (x = 0; x < scheduleDimensions; x++)
139 int sX = x;
140 int tX = x;
141 int pX = scheduleDimensions + x;
142 int aX = 2 * scheduleDimensions + x;
144 isl_constraint *c;
146 /* sX = aX * tileSize; */
147 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
148 isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1);
149 isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize);
150 tileMap = isl_basic_map_add_constraint(tileMap, c);
152 /* pX = sX; */
153 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
154 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
155 isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1);
156 tileMap = isl_basic_map_add_constraint(tileMap, c);
158 /* tX <= pX */
159 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
160 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
161 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1);
162 tileMap = isl_basic_map_add_constraint(tileMap, c);
164 /* pX <= tX + (tileSize - 1) */
165 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
166 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1);
167 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1);
168 isl_constraint_set_constant_si(c, tileSize - 1);
169 tileMap = isl_basic_map_add_constraint(tileMap, c);
172 /* Project out auxilary dimensions.
174 The auxilary dimensions are transformed into existentially quantified ones.
175 This reduces the number of visible scattering dimensions and allows Cloog
176 to produces better code. */
177 tileMap = isl_basic_map_project_out(tileMap, isl_dim_out,
178 2 * scheduleDimensions,
179 scheduleDimensions);
180 isl_local_space_free(LocalSpace);
181 return tileMap;
184 /* getScheduleForBand - Get the schedule for this band.
186 Polly applies transformations like tiling on top of the isl calculated value.
187 This can influence the number of scheduling dimension. The number of
188 schedule dimensions is returned in the parameter 'Dimension'. */
189 static bool DisableTiling = false;
191 static isl_union_map *
192 getScheduleForBand(isl_band *Band, int *Dimensions)
194 isl_union_map *PartialSchedule;
195 isl_ctx *ctx;
196 isl_space *Space;
197 isl_basic_map *TileMap;
198 isl_union_map *TileUMap;
200 PartialSchedule = isl_band_get_partial_schedule(Band);
201 *Dimensions = isl_band_n_member(Band);
203 if (DisableTiling)
204 return PartialSchedule;
206 /* It does not make any sense to tile a band with just one dimension. */
207 if (*Dimensions == 1)
208 return PartialSchedule;
210 ctx = isl_union_map_get_ctx(PartialSchedule);
211 Space = isl_union_map_get_space(PartialSchedule);
213 TileMap = getTileMap(ctx, *Dimensions, 32);
214 TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap));
215 TileUMap = isl_union_map_align_params(TileUMap, Space);
216 *Dimensions = 2 * *Dimensions;
218 return isl_union_map_apply_range(PartialSchedule, TileUMap);
221 /* Create a map that pre-vectorizes one scheduling dimension.
223 getPrevectorMap creates a map that maps each input dimension to the same
224 output dimension, except for the dimension DimToVectorize. DimToVectorize is
225 strip mined by 'VectorWidth' and the newly created point loop of
226 DimToVectorize is moved to the innermost level.
228 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
230 | Before transformation
232 | A[i,j] -> [i,j]
234 | for (i = 0; i < 128; i++)
235 | for (j = 0; j < 128; j++)
236 | A(i,j);
238 Prevector map:
239 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
241 | After transformation:
243 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
245 | for (it = 0; it < 128; it+=4)
246 | for (j = 0; j < 128; j++)
247 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
248 | A(ip,j);
250 The goal of this transformation is to create a trivially vectorizable loop.
251 This means a parallel loop at the innermost level that has a constant number
252 of iterations corresponding to the target vector width.
254 This transformation creates a loop at the innermost level. The loop has a
255 constant number of iterations, if the number of loop iterations at
256 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
257 currently constant and not yet target specific. This function does not reason
258 about parallelism. */
259 static isl_map *
260 getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
261 int ScheduleDimensions,
262 int VectorWidth)
264 isl_space *Space;
265 isl_local_space *LocalSpace, *LocalSpaceRange;
266 isl_set *Modulo;
267 isl_map *TilingMap;
268 isl_constraint *c;
269 isl_aff *Aff;
270 int PointDimension; /* ip */
271 int TileDimension; /* it */
272 isl_int VectorWidthMP;
273 int i;
275 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
277 Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
278 TilingMap = isl_map_universe(isl_space_copy(Space));
279 LocalSpace = isl_local_space_from_space(Space);
280 PointDimension = ScheduleDimensions;
281 TileDimension = DimToVectorize;
283 /* Create an identity map for everything except DimToVectorize and map
284 DimToVectorize to the point loop at the innermost dimension. */
285 for (i = 0; i < ScheduleDimensions; i++)
287 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
288 isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1);
290 if (i == DimToVectorize)
291 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
292 else
293 isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1);
295 TilingMap = isl_map_add_constraint(TilingMap, c);
298 /* it % 'VectorWidth' = 0 */
299 LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
300 Aff = isl_aff_zero_on_domain(LocalSpaceRange);
301 Aff = isl_aff_set_constant_si(Aff, VectorWidth);
302 Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
303 isl_int_init(VectorWidthMP);
304 isl_int_set_si(VectorWidthMP, VectorWidth);
305 Aff = isl_aff_mod(Aff, VectorWidthMP);
306 isl_int_clear(VectorWidthMP);
307 Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
308 TilingMap = isl_map_intersect_range(TilingMap, Modulo);
310 /* it <= ip */
311 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
312 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1);
313 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
314 TilingMap = isl_map_add_constraint(TilingMap, c);
316 /* ip <= it + ('VectorWidth' - 1) */
317 c = isl_inequality_alloc(LocalSpace);
318 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
319 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
320 isl_constraint_set_constant_si(c, VectorWidth - 1);
321 TilingMap = isl_map_add_constraint(TilingMap, c);
323 isl_map_dump(TilingMap);
325 return TilingMap;
328 static bool EnablePollyVector = false;
330 /* getScheduleForBandList - Get the scheduling map for a list of bands.
332 We walk recursively the forest of bands to combine the schedules of the
333 individual bands to the overall schedule. In case tiling is requested,
334 the individual bands are tiled. */
335 static isl_union_map *
336 getScheduleForBandList(isl_band_list *BandList)
338 int NumBands, i;
339 isl_union_map *Schedule;
340 isl_ctx *ctx;
342 ctx = isl_band_list_get_ctx(BandList);
343 NumBands = isl_band_list_n_band(BandList);
344 Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0));
346 for (i = 0; i < NumBands; i++)
348 isl_band *Band;
349 isl_union_map *PartialSchedule;
350 int ScheduleDimensions;
351 isl_space *Space;
353 Band = isl_band_list_get_band(BandList, i);
354 PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions);
355 Space = isl_union_map_get_space(PartialSchedule);
357 if (isl_band_has_children(Band))
359 isl_band_list *Children;
360 isl_union_map *SuffixSchedule;
362 Children = isl_band_get_children(Band);
363 SuffixSchedule = getScheduleForBandList(Children);
364 PartialSchedule = isl_union_map_flat_range_product(PartialSchedule,
365 SuffixSchedule);
366 isl_band_list_free(Children);
368 else if (EnablePollyVector)
370 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
372 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
373 if (isl_band_member_is_coincident (Band, i))
374 #else
375 if (isl_band_member_is_zero_distance(Band, i))
376 #endif
378 isl_map *TileMap;
379 isl_union_map *TileUMap;
381 TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4);
382 TileUMap = isl_union_map_from_map(TileMap);
383 TileUMap = isl_union_map_align_params(TileUMap,
384 isl_space_copy(Space));
385 PartialSchedule = isl_union_map_apply_range(PartialSchedule,
386 TileUMap);
387 break;
392 Schedule = isl_union_map_union(Schedule, PartialSchedule);
394 isl_band_free(Band);
395 isl_space_free(Space);
398 return Schedule;
401 static isl_union_map *
402 getScheduleMap(isl_schedule *Schedule)
404 isl_band_list *BandList = isl_schedule_get_band_forest(Schedule);
405 isl_union_map *ScheduleMap = getScheduleForBandList(BandList);
406 isl_band_list_free(BandList);
407 return ScheduleMap;
410 static int
411 getSingleMap(__isl_take isl_map *map, void *user)
413 isl_map **singleMap = (isl_map **) user;
414 *singleMap = map;
416 return 0;
419 static void
420 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
422 int i;
423 poly_bb_p pbb;
425 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
427 isl_set *domain = isl_set_copy (pbb->domain);
428 isl_union_map *stmtBand;
429 isl_map *stmtSchedule;
431 stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map),
432 isl_union_set_from_set(domain));
433 isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule);
434 isl_map_free(pbb->transformed);
435 pbb->transformed = stmtSchedule;
436 isl_union_map_free(stmtBand);
440 static const int CONSTANT_BOUND = 20;
442 bool
443 optimize_isl (scop_p scop)
446 isl_schedule *schedule;
447 isl_union_set *domain;
448 isl_union_map *validity, *proximity, *dependences;
449 isl_union_map *schedule_map;
451 domain = scop_get_domains (scop);
452 dependences = scop_get_dependences (scop);
453 dependences = isl_union_map_gist_domain(dependences,
454 isl_union_set_copy(domain));
455 dependences = isl_union_map_gist_range(dependences,
456 isl_union_set_copy(domain));
457 validity = dependences;
459 proximity = isl_union_map_copy (validity);
461 isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND);
462 isl_options_set_schedule_maximize_band_depth(scop->ctx, 1);
463 isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN);
464 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE);
465 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
466 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT);
468 if (!schedule)
469 return false;
471 schedule_map = getScheduleMap (schedule);
473 apply_schedule_map_to_scop (scop, schedule_map);
475 isl_schedule_free (schedule);
476 isl_union_map_free (schedule_map);
478 return true;
481 #endif