2013-10-11 Marc Glisse <marc.glisse@inria.fr>
[official-gcc.git] / gcc / graphite-optimize-isl.c
blobb9b21566fb52b6378076b09bd15d6a5e93ba70e6
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tree-ssa.h"
36 #include "dumpfile.h"
37 #include "cfgloop.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "sese.h"
43 #ifdef HAVE_cloog
44 #include "graphite-poly.h"
46 static isl_union_set *
47 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
49 int i;
50 poly_bb_p pbb;
51 isl_space *space = isl_set_get_space (scop->context);
52 isl_union_set *res = isl_union_set_empty (space);
54 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
55 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
57 return res;
60 static isl_union_map *
61 scop_get_dependences (scop_p scop)
63 isl_union_map *dependences;
65 if (!scop->must_raw)
66 compute_deps (scop, SCOP_BBS (scop),
67 &scop->must_raw, &scop->may_raw,
68 &scop->must_raw_no_source, &scop->may_raw_no_source,
69 &scop->must_war, &scop->may_war,
70 &scop->must_war_no_source, &scop->may_war_no_source,
71 &scop->must_waw, &scop->may_waw,
72 &scop->must_waw_no_source, &scop->may_waw_no_source);
74 dependences = isl_union_map_copy (scop->must_raw);
75 dependences = isl_union_map_union (dependences,
76 isl_union_map_copy (scop->must_war));
77 dependences = isl_union_map_union (dependences,
78 isl_union_map_copy (scop->must_waw));
79 dependences = isl_union_map_union (dependences,
80 isl_union_map_copy (scop->may_raw));
81 dependences = isl_union_map_union (dependences,
82 isl_union_map_copy (scop->may_war));
83 dependences = isl_union_map_union (dependences,
84 isl_union_map_copy (scop->may_waw));
86 return dependences;
89 /* getTileMap - Create a map that describes a n-dimensonal tiling.
91 getTileMap creates a map from a n-dimensional scattering space into an
92 2*n-dimensional scattering space. The map describes a rectangular tiling.
94 Example:
95 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
97 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
98 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
99 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
101 Before tiling:
103 for (i = 0; i < N; i++)
104 for (j = 0; j < M; j++)
105 S(i,j)
107 After tiling:
109 for (t_i = 0; t_i < N; i+=32)
110 for (t_j = 0; t_j < M; j+=32)
111 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
112 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
113 S(i,j)
116 static isl_basic_map *
117 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
119 int x;
120 /* We construct
122 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
123 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
124 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
126 and project out the auxilary dimensions a0 and a1. */
127 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
128 scheduleDimensions * 3);
129 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
131 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
133 for (x = 0; x < scheduleDimensions; x++)
135 int sX = x;
136 int tX = x;
137 int pX = scheduleDimensions + x;
138 int aX = 2 * scheduleDimensions + x;
140 isl_constraint *c;
142 /* sX = aX * tileSize; */
143 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
144 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
145 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
146 tileMap = isl_basic_map_add_constraint (tileMap, c);
148 /* pX = sX; */
149 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
150 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
151 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
152 tileMap = isl_basic_map_add_constraint (tileMap, c);
154 /* tX <= pX */
155 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
156 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
157 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
158 tileMap = isl_basic_map_add_constraint (tileMap, c);
160 /* pX <= tX + (tileSize - 1) */
161 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
162 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
163 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
164 isl_constraint_set_constant_si (c, tileSize - 1);
165 tileMap = isl_basic_map_add_constraint (tileMap, c);
168 /* Project out auxiliary dimensions.
170 The auxiliary dimensions are transformed into existentially quantified ones.
171 This reduces the number of visible scattering dimensions and allows Cloog
172 to produces better code. */
173 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
174 2 * scheduleDimensions,
175 scheduleDimensions);
176 isl_local_space_free (LocalSpace);
177 return tileMap;
180 /* getScheduleForBand - Get the schedule for this band.
182 Polly applies transformations like tiling on top of the isl calculated value.
183 This can influence the number of scheduling dimension. The number of
184 schedule dimensions is returned in the parameter 'Dimension'. */
185 static bool DisableTiling = false;
187 static isl_union_map *
188 getScheduleForBand (isl_band *Band, int *Dimensions)
190 isl_union_map *PartialSchedule;
191 isl_ctx *ctx;
192 isl_space *Space;
193 isl_basic_map *TileMap;
194 isl_union_map *TileUMap;
196 PartialSchedule = isl_band_get_partial_schedule (Band);
197 *Dimensions = isl_band_n_member (Band);
199 if (DisableTiling)
200 return PartialSchedule;
202 /* It does not make any sense to tile a band with just one dimension. */
203 if (*Dimensions == 1)
204 return PartialSchedule;
206 ctx = isl_union_map_get_ctx (PartialSchedule);
207 Space = isl_union_map_get_space (PartialSchedule);
209 TileMap = getTileMap (ctx, *Dimensions, 32);
210 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
211 TileUMap = isl_union_map_align_params (TileUMap, Space);
212 *Dimensions = 2 * *Dimensions;
214 return isl_union_map_apply_range (PartialSchedule, TileUMap);
217 /* Create a map that pre-vectorizes one scheduling dimension.
219 getPrevectorMap creates a map that maps each input dimension to the same
220 output dimension, except for the dimension DimToVectorize. DimToVectorize is
221 strip mined by 'VectorWidth' and the newly created point loop of
222 DimToVectorize is moved to the innermost level.
224 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
226 | Before transformation
228 | A[i,j] -> [i,j]
230 | for (i = 0; i < 128; i++)
231 | for (j = 0; j < 128; j++)
232 | A(i,j);
234 Prevector map:
235 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
237 | After transformation:
239 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
241 | for (it = 0; it < 128; it+=4)
242 | for (j = 0; j < 128; j++)
243 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
244 | A(ip,j);
246 The goal of this transformation is to create a trivially vectorizable loop.
247 This means a parallel loop at the innermost level that has a constant number
248 of iterations corresponding to the target vector width.
250 This transformation creates a loop at the innermost level. The loop has a
251 constant number of iterations, if the number of loop iterations at
252 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
253 currently constant and not yet target specific. This function does not reason
254 about parallelism. */
255 static isl_map *
256 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
257 int ScheduleDimensions,
258 int VectorWidth)
260 isl_space *Space;
261 isl_local_space *LocalSpace, *LocalSpaceRange;
262 isl_set *Modulo;
263 isl_map *TilingMap;
264 isl_constraint *c;
265 isl_aff *Aff;
266 int PointDimension; /* ip */
267 int TileDimension; /* it */
268 isl_int VectorWidthMP;
269 int i;
271 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
273 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
274 TilingMap = isl_map_universe (isl_space_copy (Space));
275 LocalSpace = isl_local_space_from_space (Space);
276 PointDimension = ScheduleDimensions;
277 TileDimension = DimToVectorize;
279 /* Create an identity map for everything except DimToVectorize and map
280 DimToVectorize to the point loop at the innermost dimension. */
281 for (i = 0; i < ScheduleDimensions; i++)
283 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
284 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
286 if (i == DimToVectorize)
287 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
288 else
289 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
291 TilingMap = isl_map_add_constraint (TilingMap, c);
294 /* it % 'VectorWidth' = 0 */
295 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
296 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
297 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
298 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
299 isl_int_init (VectorWidthMP);
300 isl_int_set_si (VectorWidthMP, VectorWidth);
301 Aff = isl_aff_mod (Aff, VectorWidthMP);
302 isl_int_clear (VectorWidthMP);
303 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
304 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
306 /* it <= ip */
307 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
308 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
309 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
310 TilingMap = isl_map_add_constraint (TilingMap, c);
312 /* ip <= it + ('VectorWidth' - 1) */
313 c = isl_inequality_alloc (LocalSpace);
314 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
315 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
316 isl_constraint_set_constant_si (c, VectorWidth - 1);
317 TilingMap = isl_map_add_constraint (TilingMap, c);
319 isl_map_dump (TilingMap);
321 return TilingMap;
324 static bool EnablePollyVector = false;
326 /* getScheduleForBandList - Get the scheduling map for a list of bands.
328 We walk recursively the forest of bands to combine the schedules of the
329 individual bands to the overall schedule. In case tiling is requested,
330 the individual bands are tiled. */
331 static isl_union_map *
332 getScheduleForBandList (isl_band_list *BandList)
334 int NumBands, i;
335 isl_union_map *Schedule;
336 isl_ctx *ctx;
338 ctx = isl_band_list_get_ctx (BandList);
339 NumBands = isl_band_list_n_band (BandList);
340 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
342 for (i = 0; i < NumBands; i++)
344 isl_band *Band;
345 isl_union_map *PartialSchedule;
346 int ScheduleDimensions;
347 isl_space *Space;
349 Band = isl_band_list_get_band (BandList, i);
350 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
351 Space = isl_union_map_get_space (PartialSchedule);
353 if (isl_band_has_children (Band))
355 isl_band_list *Children;
356 isl_union_map *SuffixSchedule;
358 Children = isl_band_get_children (Band);
359 SuffixSchedule = getScheduleForBandList (Children);
360 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
361 SuffixSchedule);
362 isl_band_list_free (Children);
364 else if (EnablePollyVector)
366 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
368 if (isl_band_member_is_zero_distance (Band, i))
370 isl_map *TileMap;
371 isl_union_map *TileUMap;
373 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
374 TileUMap = isl_union_map_from_map (TileMap);
375 TileUMap = isl_union_map_align_params
376 (TileUMap, isl_space_copy (Space));
377 PartialSchedule = isl_union_map_apply_range
378 (PartialSchedule, TileUMap);
379 break;
384 Schedule = isl_union_map_union (Schedule, PartialSchedule);
386 isl_band_free (Band);
387 isl_space_free (Space);
390 return Schedule;
393 static isl_union_map *
394 getScheduleMap (isl_schedule *Schedule)
396 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
397 isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
398 isl_band_list_free (BandList);
399 return ScheduleMap;
402 static int
403 getSingleMap (__isl_take isl_map *map, void *user)
405 isl_map **singleMap = (isl_map **) user;
406 *singleMap = map;
408 return 0;
411 static void
412 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
414 int i;
415 poly_bb_p pbb;
417 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
419 isl_set *domain = isl_set_copy (pbb->domain);
420 isl_union_map *stmtBand;
421 isl_map *stmtSchedule;
423 stmtBand = isl_union_map_intersect_domain
424 (isl_union_map_copy (schedule_map),
425 isl_union_set_from_set (domain));
426 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
427 isl_map_free (pbb->transformed);
428 pbb->transformed = stmtSchedule;
429 isl_union_map_free (stmtBand);
433 static const int CONSTANT_BOUND = 20;
435 bool
436 optimize_isl (scop_p scop)
439 isl_schedule *schedule;
440 isl_union_set *domain;
441 isl_union_map *validity, *proximity, *dependences;
442 isl_union_map *schedule_map;
444 domain = scop_get_domains (scop);
445 dependences = scop_get_dependences (scop);
446 dependences = isl_union_map_gist_domain (dependences,
447 isl_union_set_copy (domain));
448 dependences = isl_union_map_gist_range (dependences,
449 isl_union_set_copy (domain));
450 validity = dependences;
452 proximity = isl_union_map_copy (validity);
454 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
455 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
456 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
457 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
458 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
459 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
461 if (!schedule)
462 return false;
464 schedule_map = getScheduleMap (schedule);
466 apply_schedule_map_to_scop (scop, schedule_map);
468 isl_schedule_free (schedule);
469 isl_union_map_free (schedule_map);
471 return true;
474 #endif