* ipa/devirt9.C: Fix previous change.
[official-gcc.git] / gcc / graphite-optimize-isl.c
blob2260c507f8603bae21af868a5a351df67a359a62
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "gimple-iterator.h"
38 #include "tree-ssa-loop.h"
39 #include "dumpfile.h"
40 #include "cfgloop.h"
41 #include "tree-chrec.h"
42 #include "tree-data-ref.h"
43 #include "tree-scalar-evolution.h"
44 #include "sese.h"
46 #ifdef HAVE_cloog
47 #include "graphite-poly.h"
49 static isl_union_set *
50 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
52 int i;
53 poly_bb_p pbb;
54 isl_space *space = isl_set_get_space (scop->context);
55 isl_union_set *res = isl_union_set_empty (space);
57 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
58 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
60 return res;
63 static isl_union_map *
64 scop_get_dependences (scop_p scop)
66 isl_union_map *dependences;
68 if (!scop->must_raw)
69 compute_deps (scop, SCOP_BBS (scop),
70 &scop->must_raw, &scop->may_raw,
71 &scop->must_raw_no_source, &scop->may_raw_no_source,
72 &scop->must_war, &scop->may_war,
73 &scop->must_war_no_source, &scop->may_war_no_source,
74 &scop->must_waw, &scop->may_waw,
75 &scop->must_waw_no_source, &scop->may_waw_no_source);
77 dependences = isl_union_map_copy (scop->must_raw);
78 dependences = isl_union_map_union (dependences,
79 isl_union_map_copy (scop->must_war));
80 dependences = isl_union_map_union (dependences,
81 isl_union_map_copy (scop->must_waw));
82 dependences = isl_union_map_union (dependences,
83 isl_union_map_copy (scop->may_raw));
84 dependences = isl_union_map_union (dependences,
85 isl_union_map_copy (scop->may_war));
86 dependences = isl_union_map_union (dependences,
87 isl_union_map_copy (scop->may_waw));
89 return dependences;
92 /* getTileMap - Create a map that describes a n-dimensonal tiling.
94 getTileMap creates a map from a n-dimensional scattering space into an
95 2*n-dimensional scattering space. The map describes a rectangular tiling.
97 Example:
98 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
100 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
101 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
102 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
104 Before tiling:
106 for (i = 0; i < N; i++)
107 for (j = 0; j < M; j++)
108 S(i,j)
110 After tiling:
112 for (t_i = 0; t_i < N; i+=32)
113 for (t_j = 0; t_j < M; j+=32)
114 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
115 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
116 S(i,j)
119 static isl_basic_map *
120 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
122 int x;
123 /* We construct
125 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
126 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
127 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
129 and project out the auxilary dimensions a0 and a1. */
130 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
131 scheduleDimensions * 3);
132 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
134 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
136 for (x = 0; x < scheduleDimensions; x++)
138 int sX = x;
139 int tX = x;
140 int pX = scheduleDimensions + x;
141 int aX = 2 * scheduleDimensions + x;
143 isl_constraint *c;
145 /* sX = aX * tileSize; */
146 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
147 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
148 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
149 tileMap = isl_basic_map_add_constraint (tileMap, c);
151 /* pX = sX; */
152 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
153 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
154 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
155 tileMap = isl_basic_map_add_constraint (tileMap, c);
157 /* tX <= pX */
158 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
159 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
160 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
161 tileMap = isl_basic_map_add_constraint (tileMap, c);
163 /* pX <= tX + (tileSize - 1) */
164 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
165 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
166 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
167 isl_constraint_set_constant_si (c, tileSize - 1);
168 tileMap = isl_basic_map_add_constraint (tileMap, c);
171 /* Project out auxiliary dimensions.
173 The auxiliary dimensions are transformed into existentially quantified ones.
174 This reduces the number of visible scattering dimensions and allows Cloog
175 to produces better code. */
176 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
177 2 * scheduleDimensions,
178 scheduleDimensions);
179 isl_local_space_free (LocalSpace);
180 return tileMap;
183 /* getScheduleForBand - Get the schedule for this band.
185 Polly applies transformations like tiling on top of the isl calculated value.
186 This can influence the number of scheduling dimension. The number of
187 schedule dimensions is returned in the parameter 'Dimension'. */
188 static bool DisableTiling = false;
190 static isl_union_map *
191 getScheduleForBand (isl_band *Band, int *Dimensions)
193 isl_union_map *PartialSchedule;
194 isl_ctx *ctx;
195 isl_space *Space;
196 isl_basic_map *TileMap;
197 isl_union_map *TileUMap;
199 PartialSchedule = isl_band_get_partial_schedule (Band);
200 *Dimensions = isl_band_n_member (Band);
202 if (DisableTiling)
203 return PartialSchedule;
205 /* It does not make any sense to tile a band with just one dimension. */
206 if (*Dimensions == 1)
207 return PartialSchedule;
209 ctx = isl_union_map_get_ctx (PartialSchedule);
210 Space = isl_union_map_get_space (PartialSchedule);
212 TileMap = getTileMap (ctx, *Dimensions, 32);
213 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
214 TileUMap = isl_union_map_align_params (TileUMap, Space);
215 *Dimensions = 2 * *Dimensions;
217 return isl_union_map_apply_range (PartialSchedule, TileUMap);
220 /* Create a map that pre-vectorizes one scheduling dimension.
222 getPrevectorMap creates a map that maps each input dimension to the same
223 output dimension, except for the dimension DimToVectorize. DimToVectorize is
224 strip mined by 'VectorWidth' and the newly created point loop of
225 DimToVectorize is moved to the innermost level.
227 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
229 | Before transformation
231 | A[i,j] -> [i,j]
233 | for (i = 0; i < 128; i++)
234 | for (j = 0; j < 128; j++)
235 | A(i,j);
237 Prevector map:
238 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
240 | After transformation:
242 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
244 | for (it = 0; it < 128; it+=4)
245 | for (j = 0; j < 128; j++)
246 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
247 | A(ip,j);
249 The goal of this transformation is to create a trivially vectorizable loop.
250 This means a parallel loop at the innermost level that has a constant number
251 of iterations corresponding to the target vector width.
253 This transformation creates a loop at the innermost level. The loop has a
254 constant number of iterations, if the number of loop iterations at
255 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
256 currently constant and not yet target specific. This function does not reason
257 about parallelism. */
258 static isl_map *
259 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
260 int ScheduleDimensions,
261 int VectorWidth)
263 isl_space *Space;
264 isl_local_space *LocalSpace, *LocalSpaceRange;
265 isl_set *Modulo;
266 isl_map *TilingMap;
267 isl_constraint *c;
268 isl_aff *Aff;
269 int PointDimension; /* ip */
270 int TileDimension; /* it */
271 isl_int VectorWidthMP;
272 int i;
274 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
276 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
277 TilingMap = isl_map_universe (isl_space_copy (Space));
278 LocalSpace = isl_local_space_from_space (Space);
279 PointDimension = ScheduleDimensions;
280 TileDimension = DimToVectorize;
282 /* Create an identity map for everything except DimToVectorize and map
283 DimToVectorize to the point loop at the innermost dimension. */
284 for (i = 0; i < ScheduleDimensions; i++)
286 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
287 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
289 if (i == DimToVectorize)
290 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
291 else
292 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
294 TilingMap = isl_map_add_constraint (TilingMap, c);
297 /* it % 'VectorWidth' = 0 */
298 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
299 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
300 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
301 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
302 isl_int_init (VectorWidthMP);
303 isl_int_set_si (VectorWidthMP, VectorWidth);
304 Aff = isl_aff_mod (Aff, VectorWidthMP);
305 isl_int_clear (VectorWidthMP);
306 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
307 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
309 /* it <= ip */
310 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
311 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
312 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
313 TilingMap = isl_map_add_constraint (TilingMap, c);
315 /* ip <= it + ('VectorWidth' - 1) */
316 c = isl_inequality_alloc (LocalSpace);
317 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
318 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
319 isl_constraint_set_constant_si (c, VectorWidth - 1);
320 TilingMap = isl_map_add_constraint (TilingMap, c);
322 isl_map_dump (TilingMap);
324 return TilingMap;
327 static bool EnablePollyVector = false;
329 /* getScheduleForBandList - Get the scheduling map for a list of bands.
331 We walk recursively the forest of bands to combine the schedules of the
332 individual bands to the overall schedule. In case tiling is requested,
333 the individual bands are tiled. */
334 static isl_union_map *
335 getScheduleForBandList (isl_band_list *BandList)
337 int NumBands, i;
338 isl_union_map *Schedule;
339 isl_ctx *ctx;
341 ctx = isl_band_list_get_ctx (BandList);
342 NumBands = isl_band_list_n_band (BandList);
343 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
345 for (i = 0; i < NumBands; i++)
347 isl_band *Band;
348 isl_union_map *PartialSchedule;
349 int ScheduleDimensions;
350 isl_space *Space;
352 Band = isl_band_list_get_band (BandList, i);
353 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
354 Space = isl_union_map_get_space (PartialSchedule);
356 if (isl_band_has_children (Band))
358 isl_band_list *Children;
359 isl_union_map *SuffixSchedule;
361 Children = isl_band_get_children (Band);
362 SuffixSchedule = getScheduleForBandList (Children);
363 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
364 SuffixSchedule);
365 isl_band_list_free (Children);
367 else if (EnablePollyVector)
369 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
371 if (isl_band_member_is_zero_distance (Band, i))
373 isl_map *TileMap;
374 isl_union_map *TileUMap;
376 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
377 TileUMap = isl_union_map_from_map (TileMap);
378 TileUMap = isl_union_map_align_params
379 (TileUMap, isl_space_copy (Space));
380 PartialSchedule = isl_union_map_apply_range
381 (PartialSchedule, TileUMap);
382 break;
387 Schedule = isl_union_map_union (Schedule, PartialSchedule);
389 isl_band_free (Band);
390 isl_space_free (Space);
393 return Schedule;
396 static isl_union_map *
397 getScheduleMap (isl_schedule *Schedule)
399 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
400 isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
401 isl_band_list_free (BandList);
402 return ScheduleMap;
405 static int
406 getSingleMap (__isl_take isl_map *map, void *user)
408 isl_map **singleMap = (isl_map **) user;
409 *singleMap = map;
411 return 0;
414 static void
415 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
417 int i;
418 poly_bb_p pbb;
420 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
422 isl_set *domain = isl_set_copy (pbb->domain);
423 isl_union_map *stmtBand;
424 isl_map *stmtSchedule;
426 stmtBand = isl_union_map_intersect_domain
427 (isl_union_map_copy (schedule_map),
428 isl_union_set_from_set (domain));
429 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
430 isl_map_free (pbb->transformed);
431 pbb->transformed = stmtSchedule;
432 isl_union_map_free (stmtBand);
436 static const int CONSTANT_BOUND = 20;
438 bool
439 optimize_isl (scop_p scop)
442 isl_schedule *schedule;
443 isl_union_set *domain;
444 isl_union_map *validity, *proximity, *dependences;
445 isl_union_map *schedule_map;
447 domain = scop_get_domains (scop);
448 dependences = scop_get_dependences (scop);
449 dependences = isl_union_map_gist_domain (dependences,
450 isl_union_set_copy (domain));
451 dependences = isl_union_map_gist_range (dependences,
452 isl_union_set_copy (domain));
453 validity = dependences;
455 proximity = isl_union_map_copy (validity);
457 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
458 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
459 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
460 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
461 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
462 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
464 if (!schedule)
465 return false;
467 schedule_map = getScheduleMap (schedule);
469 apply_schedule_map_to_scop (scop, schedule_map);
471 isl_schedule_free (schedule);
472 isl_union_map_free (schedule_map);
474 return true;
477 #endif