Merge from trunk: 215733-215743
[official-gcc.git] / gcc-4_9 / gcc / graphite-optimize-isl.c
blobfc12eebbf109204bd77dba8f8ab0091fcfad4c10
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
32 #include <isl/deprecated/int.h>
33 #include <isl/deprecated/aff_int.h>
34 #endif
35 #endif
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree.h"
40 #include "basic-block.h"
41 #include "tree-ssa-alias.h"
42 #include "internal-fn.h"
43 #include "gimple-expr.h"
44 #include "is-a.h"
45 #include "gimple.h"
46 #include "gimple-iterator.h"
47 #include "tree-ssa-loop.h"
48 #include "dumpfile.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "sese.h"
55 #ifdef HAVE_cloog
56 #include "graphite-poly.h"
58 static isl_union_set *
59 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
61 int i;
62 poly_bb_p pbb;
63 isl_space *space = isl_set_get_space (scop->context);
64 isl_union_set *res = isl_union_set_empty (space);
66 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
67 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
69 return res;
72 static isl_union_map *
73 scop_get_dependences (scop_p scop)
75 isl_union_map *dependences;
77 if (!scop->must_raw)
78 compute_deps (scop, SCOP_BBS (scop),
79 &scop->must_raw, &scop->may_raw,
80 &scop->must_raw_no_source, &scop->may_raw_no_source,
81 &scop->must_war, &scop->may_war,
82 &scop->must_war_no_source, &scop->may_war_no_source,
83 &scop->must_waw, &scop->may_waw,
84 &scop->must_waw_no_source, &scop->may_waw_no_source);
86 dependences = isl_union_map_copy (scop->must_raw);
87 dependences = isl_union_map_union (dependences,
88 isl_union_map_copy (scop->must_war));
89 dependences = isl_union_map_union (dependences,
90 isl_union_map_copy (scop->must_waw));
91 dependences = isl_union_map_union (dependences,
92 isl_union_map_copy (scop->may_raw));
93 dependences = isl_union_map_union (dependences,
94 isl_union_map_copy (scop->may_war));
95 dependences = isl_union_map_union (dependences,
96 isl_union_map_copy (scop->may_waw));
98 return dependences;
101 /* getTileMap - Create a map that describes a n-dimensonal tiling.
103 getTileMap creates a map from a n-dimensional scattering space into an
104 2*n-dimensional scattering space. The map describes a rectangular tiling.
106 Example:
107 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
109 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
110 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
111 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
113 Before tiling:
115 for (i = 0; i < N; i++)
116 for (j = 0; j < M; j++)
117 S(i,j)
119 After tiling:
121 for (t_i = 0; t_i < N; i+=32)
122 for (t_j = 0; t_j < M; j+=32)
123 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
124 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
125 S(i,j)
128 static isl_basic_map *
129 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
131 int x;
132 /* We construct
134 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
135 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
136 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
138 and project out the auxilary dimensions a0 and a1. */
139 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
140 scheduleDimensions * 3);
141 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
143 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
145 for (x = 0; x < scheduleDimensions; x++)
147 int sX = x;
148 int tX = x;
149 int pX = scheduleDimensions + x;
150 int aX = 2 * scheduleDimensions + x;
152 isl_constraint *c;
154 /* sX = aX * tileSize; */
155 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
156 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
157 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
158 tileMap = isl_basic_map_add_constraint (tileMap, c);
160 /* pX = sX; */
161 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
162 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
163 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
164 tileMap = isl_basic_map_add_constraint (tileMap, c);
166 /* tX <= pX */
167 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
168 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
169 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
170 tileMap = isl_basic_map_add_constraint (tileMap, c);
172 /* pX <= tX + (tileSize - 1) */
173 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
174 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
175 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
176 isl_constraint_set_constant_si (c, tileSize - 1);
177 tileMap = isl_basic_map_add_constraint (tileMap, c);
180 /* Project out auxiliary dimensions.
182 The auxiliary dimensions are transformed into existentially quantified ones.
183 This reduces the number of visible scattering dimensions and allows Cloog
184 to produces better code. */
185 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
186 2 * scheduleDimensions,
187 scheduleDimensions);
188 isl_local_space_free (LocalSpace);
189 return tileMap;
192 /* getScheduleForBand - Get the schedule for this band.
194 Polly applies transformations like tiling on top of the isl calculated value.
195 This can influence the number of scheduling dimension. The number of
196 schedule dimensions is returned in the parameter 'Dimension'. */
197 static bool DisableTiling = false;
199 static isl_union_map *
200 getScheduleForBand (isl_band *Band, int *Dimensions)
202 isl_union_map *PartialSchedule;
203 isl_ctx *ctx;
204 isl_space *Space;
205 isl_basic_map *TileMap;
206 isl_union_map *TileUMap;
208 PartialSchedule = isl_band_get_partial_schedule (Band);
209 *Dimensions = isl_band_n_member (Band);
211 if (DisableTiling)
212 return PartialSchedule;
214 /* It does not make any sense to tile a band with just one dimension. */
215 if (*Dimensions == 1)
216 return PartialSchedule;
218 ctx = isl_union_map_get_ctx (PartialSchedule);
219 Space = isl_union_map_get_space (PartialSchedule);
221 TileMap = getTileMap (ctx, *Dimensions, 32);
222 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
223 TileUMap = isl_union_map_align_params (TileUMap, Space);
224 *Dimensions = 2 * *Dimensions;
226 return isl_union_map_apply_range (PartialSchedule, TileUMap);
229 /* Create a map that pre-vectorizes one scheduling dimension.
231 getPrevectorMap creates a map that maps each input dimension to the same
232 output dimension, except for the dimension DimToVectorize. DimToVectorize is
233 strip mined by 'VectorWidth' and the newly created point loop of
234 DimToVectorize is moved to the innermost level.
236 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
238 | Before transformation
240 | A[i,j] -> [i,j]
242 | for (i = 0; i < 128; i++)
243 | for (j = 0; j < 128; j++)
244 | A(i,j);
246 Prevector map:
247 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
249 | After transformation:
251 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
253 | for (it = 0; it < 128; it+=4)
254 | for (j = 0; j < 128; j++)
255 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
256 | A(ip,j);
258 The goal of this transformation is to create a trivially vectorizable loop.
259 This means a parallel loop at the innermost level that has a constant number
260 of iterations corresponding to the target vector width.
262 This transformation creates a loop at the innermost level. The loop has a
263 constant number of iterations, if the number of loop iterations at
264 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
265 currently constant and not yet target specific. This function does not reason
266 about parallelism. */
267 static isl_map *
268 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
269 int ScheduleDimensions,
270 int VectorWidth)
272 isl_space *Space;
273 isl_local_space *LocalSpace, *LocalSpaceRange;
274 isl_set *Modulo;
275 isl_map *TilingMap;
276 isl_constraint *c;
277 isl_aff *Aff;
278 int PointDimension; /* ip */
279 int TileDimension; /* it */
280 isl_int VectorWidthMP;
281 int i;
283 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
285 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
286 TilingMap = isl_map_universe (isl_space_copy (Space));
287 LocalSpace = isl_local_space_from_space (Space);
288 PointDimension = ScheduleDimensions;
289 TileDimension = DimToVectorize;
291 /* Create an identity map for everything except DimToVectorize and map
292 DimToVectorize to the point loop at the innermost dimension. */
293 for (i = 0; i < ScheduleDimensions; i++)
295 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
296 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
298 if (i == DimToVectorize)
299 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
300 else
301 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
303 TilingMap = isl_map_add_constraint (TilingMap, c);
306 /* it % 'VectorWidth' = 0 */
307 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
308 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
309 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
310 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
311 isl_int_init (VectorWidthMP);
312 isl_int_set_si (VectorWidthMP, VectorWidth);
313 Aff = isl_aff_mod (Aff, VectorWidthMP);
314 isl_int_clear (VectorWidthMP);
315 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
316 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
318 /* it <= ip */
319 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
320 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
321 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
322 TilingMap = isl_map_add_constraint (TilingMap, c);
324 /* ip <= it + ('VectorWidth' - 1) */
325 c = isl_inequality_alloc (LocalSpace);
326 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
327 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
328 isl_constraint_set_constant_si (c, VectorWidth - 1);
329 TilingMap = isl_map_add_constraint (TilingMap, c);
331 isl_map_dump (TilingMap);
333 return TilingMap;
336 static bool EnablePollyVector = false;
338 /* getScheduleForBandList - Get the scheduling map for a list of bands.
340 We walk recursively the forest of bands to combine the schedules of the
341 individual bands to the overall schedule. In case tiling is requested,
342 the individual bands are tiled. */
343 static isl_union_map *
344 getScheduleForBandList (isl_band_list *BandList)
346 int NumBands, i;
347 isl_union_map *Schedule;
348 isl_ctx *ctx;
350 ctx = isl_band_list_get_ctx (BandList);
351 NumBands = isl_band_list_n_band (BandList);
352 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
354 for (i = 0; i < NumBands; i++)
356 isl_band *Band;
357 isl_union_map *PartialSchedule;
358 int ScheduleDimensions;
359 isl_space *Space;
361 Band = isl_band_list_get_band (BandList, i);
362 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
363 Space = isl_union_map_get_space (PartialSchedule);
365 if (isl_band_has_children (Band))
367 isl_band_list *Children;
368 isl_union_map *SuffixSchedule;
370 Children = isl_band_get_children (Band);
371 SuffixSchedule = getScheduleForBandList (Children);
372 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
373 SuffixSchedule);
374 isl_band_list_free (Children);
376 else if (EnablePollyVector)
378 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
380 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
381 if (isl_band_member_is_coincident (Band, i))
382 #else
383 if (isl_band_member_is_zero_distance (Band, i))
384 #endif
386 isl_map *TileMap;
387 isl_union_map *TileUMap;
389 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
390 TileUMap = isl_union_map_from_map (TileMap);
391 TileUMap = isl_union_map_align_params
392 (TileUMap, isl_space_copy (Space));
393 PartialSchedule = isl_union_map_apply_range
394 (PartialSchedule, TileUMap);
395 break;
400 Schedule = isl_union_map_union (Schedule, PartialSchedule);
402 isl_band_free (Band);
403 isl_space_free (Space);
406 return Schedule;
409 static isl_union_map *
410 getScheduleMap (isl_schedule *Schedule)
412 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
413 isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
414 isl_band_list_free (BandList);
415 return ScheduleMap;
418 static int
419 getSingleMap (__isl_take isl_map *map, void *user)
421 isl_map **singleMap = (isl_map **) user;
422 *singleMap = map;
424 return 0;
427 static void
428 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
430 int i;
431 poly_bb_p pbb;
433 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
435 isl_set *domain = isl_set_copy (pbb->domain);
436 isl_union_map *stmtBand;
437 isl_map *stmtSchedule;
439 stmtBand = isl_union_map_intersect_domain
440 (isl_union_map_copy (schedule_map),
441 isl_union_set_from_set (domain));
442 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
443 isl_map_free (pbb->transformed);
444 pbb->transformed = stmtSchedule;
445 isl_union_map_free (stmtBand);
449 static const int CONSTANT_BOUND = 20;
451 bool
452 optimize_isl (scop_p scop)
455 isl_schedule *schedule;
456 isl_union_set *domain;
457 isl_union_map *validity, *proximity, *dependences;
458 isl_union_map *schedule_map;
460 domain = scop_get_domains (scop);
461 dependences = scop_get_dependences (scop);
462 dependences = isl_union_map_gist_domain (dependences,
463 isl_union_set_copy (domain));
464 dependences = isl_union_map_gist_range (dependences,
465 isl_union_set_copy (domain));
466 validity = dependences;
468 proximity = isl_union_map_copy (validity);
470 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
471 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
472 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
473 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
474 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
475 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
477 if (!schedule)
478 return false;
480 schedule_map = getScheduleMap (schedule);
482 apply_schedule_map_to_scop (scop, schedule_map);
484 isl_schedule_free (schedule);
485 isl_union_map_free (schedule_map);
487 return true;
490 #endif