1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2016 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
28 #include "coretypes.h"
33 #include "fold-const.h"
34 #include "gimple-iterator.h"
35 #include "tree-ssa-loop.h"
37 #include "tree-data-ref.h"
42 #ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS
43 /* isl 0.15 or later. */
45 /* get_schedule_for_node_st - Improve schedule for the schedule node.
46 Only Simple loop tiling is considered. */
48 static __isl_give isl_schedule_node
*
49 get_schedule_for_node_st (__isl_take isl_schedule_node
*node
, void *user
)
54 if (isl_schedule_node_get_type (node
) != isl_schedule_node_band
55 || isl_schedule_node_n_children (node
) != 1)
58 isl_space
*space
= isl_schedule_node_band_get_space (node
);
59 unsigned dims
= isl_space_dim (space
, isl_dim_set
);
60 isl_schedule_node
*child
= isl_schedule_node_get_child (node
, 0);
61 isl_schedule_node_type type
= isl_schedule_node_get_type (child
);
62 isl_space_free (space
);
63 isl_schedule_node_free (child
);
65 if (type
!= isl_schedule_node_leaf
)
68 if (dims
<= 1 || !isl_schedule_node_band_get_permutable (node
))
70 if (dump_file
&& dump_flags
)
71 fprintf (dump_file
, "not tiled\n");
76 space
= isl_schedule_node_band_get_space (node
);
77 isl_multi_val
*sizes
= isl_multi_val_zero (space
);
78 long tile_size
= PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE
);
79 isl_ctx
*ctx
= isl_schedule_node_get_ctx (node
);
81 for (unsigned i
= 0; i
< dims
; i
++)
83 sizes
= isl_multi_val_set_val (sizes
, i
,
84 isl_val_int_from_si (ctx
, tile_size
));
85 if (dump_file
&& dump_flags
)
86 fprintf (dump_file
, "tiled by %ld\n", tile_size
);
89 node
= isl_schedule_node_band_tile (node
, sizes
);
90 node
= isl_schedule_node_child (node
, 0);
95 static isl_union_set
*
96 scop_get_domains (scop_p scop
)
100 isl_space
*space
= isl_set_get_space (scop
->param_context
);
101 isl_union_set
*res
= isl_union_set_empty (space
);
103 FOR_EACH_VEC_ELT (scop
->pbbs
, i
, pbb
)
104 res
= isl_union_set_add_set (res
, isl_set_copy (pbb
->domain
));
109 /* Compute the schedule for SCOP based on its parameters, domain and set of
110 constraints. Then apply the schedule to SCOP. */
113 optimize_isl (scop_p scop
)
115 int old_max_operations
= isl_ctx_get_max_operations (scop
->isl_context
);
116 int max_operations
= PARAM_VALUE (PARAM_MAX_ISL_OPERATIONS
);
118 isl_ctx_set_max_operations (scop
->isl_context
, max_operations
);
119 isl_options_set_on_error (scop
->isl_context
, ISL_ON_ERROR_CONTINUE
);
121 isl_union_set
*domain
= scop_get_domains (scop
);
123 /* Simplify the dependences on the domain. */
124 scop_get_dependences (scop
);
125 isl_union_map
*dependences
126 = isl_union_map_gist_domain (isl_union_map_copy (scop
->dependence
),
127 isl_union_set_copy (domain
));
128 isl_union_map
*validity
129 = isl_union_map_gist_range (dependences
, isl_union_set_copy (domain
));
131 /* FIXME: proximity should not be validity. */
132 isl_union_map
*proximity
= isl_union_map_copy (validity
);
134 isl_schedule_constraints
*sc
= isl_schedule_constraints_on_domain (domain
);
135 sc
= isl_schedule_constraints_set_proximity (sc
, proximity
);
136 sc
= isl_schedule_constraints_set_validity (sc
, isl_union_map_copy (validity
));
137 sc
= isl_schedule_constraints_set_coincidence (sc
, validity
);
139 isl_options_set_schedule_serialize_sccs (scop
->isl_context
, 0);
140 isl_options_set_schedule_maximize_band_depth (scop
->isl_context
, 1);
141 isl_options_set_schedule_max_constant_term (scop
->isl_context
, 20);
142 isl_options_set_schedule_max_coefficient (scop
->isl_context
, 20);
143 isl_options_set_tile_scale_tile_loops (scop
->isl_context
, 0);
144 /* Generate loop upper bounds that consist of the current loop iterator, an
145 operator (< or <=) and an expression not involving the iterator. If this
146 option is not set, then the current loop iterator may appear several times
147 in the upper bound. See the isl manual for more details. */
148 isl_options_set_ast_build_atomic_upper_bound (scop
->isl_context
, 1);
150 scop
->transformed_schedule
= isl_schedule_constraints_compute_schedule (sc
);
151 scop
->transformed_schedule
=
152 isl_schedule_map_schedule_node_bottom_up (scop
->transformed_schedule
,
153 get_schedule_for_node_st
, NULL
);
154 isl_options_set_on_error (scop
->isl_context
, ISL_ON_ERROR_ABORT
);
156 isl_ctx_reset_operations (scop
->isl_context
);
157 isl_ctx_set_max_operations (scop
->isl_context
, old_max_operations
);
158 if (!scop
->transformed_schedule
159 || isl_ctx_last_error (scop
->isl_context
) == isl_error_quota
)
161 if (dump_file
&& dump_flags
)
162 fprintf (dump_file
, "isl timed out --param max-isl-operations=%d\n",
167 gcc_assert (scop
->original_schedule
);
168 isl_union_map
*original
= isl_schedule_get_map (scop
->original_schedule
);
169 isl_union_map
*transformed
= isl_schedule_get_map (scop
->transformed_schedule
);
170 bool same_schedule
= isl_union_map_is_equal (original
, transformed
);
171 isl_union_map_free (original
);
172 isl_union_map_free (transformed
);
178 fprintf (dump_file
, "[scheduler] isl optimized schedule is "
179 "identical to the original schedule.\n");
180 print_schedule_ast (dump_file
, scop
->original_schedule
, scop
);
182 isl_schedule_free (scop
->transformed_schedule
);
183 scop
->transformed_schedule
= isl_schedule_copy (scop
->original_schedule
);
190 /* Apply graphite transformations to all the basic blocks of SCOP. */
193 apply_poly_transforms (scop_p scop
)
195 if (flag_loop_nest_optimize
)
196 return optimize_isl (scop
);
198 if (!flag_graphite_identity
&& !flag_loop_parallelize_all
)
201 /* Generate code even if we did not apply any real transformation.
202 This also allows to check the performance for the identity
203 transformation: GIMPLE -> GRAPHITE -> GIMPLE. */
204 gcc_assert (scop
->original_schedule
);
205 scop
->transformed_schedule
= isl_schedule_copy (scop
->original_schedule
);
211 /* get_tile_map - Create a map that describes a n-dimensonal tiling.
213 get_tile_map creates a map from a n-dimensional scattering space into an
214 2*n-dimensional scattering space. The map describes a rectangular tiling.
217 SCHEDULE_DIMENSIONS = 2, PARAMETER_DIMENSIONS = 1, TILE_SIZE = 32
219 tile_map := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
220 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
221 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
225 for (i = 0; i < N; i++)
226 for (j = 0; j < M; j++)
231 for (t_i = 0; t_i < N; i+=32)
232 for (t_j = 0; t_j < M; j+=32)
233 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
234 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
238 static isl_basic_map
*
239 get_tile_map (isl_ctx
*ctx
, int schedule_dimensions
, int tile_size
)
243 tile_map := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
244 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
245 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
247 and project out the auxilary dimensions a0 and a1. */
249 = isl_space_alloc (ctx
, 0, schedule_dimensions
, schedule_dimensions
* 3);
250 isl_basic_map
*tile_map
= isl_basic_map_universe (isl_space_copy (space
));
252 isl_local_space
*local_space
= isl_local_space_from_space (space
);
254 for (int x
= 0; x
< schedule_dimensions
; x
++)
258 int pX
= schedule_dimensions
+ x
;
259 int aX
= 2 * schedule_dimensions
+ x
;
263 /* sX = aX * tile_size; */
264 c
= isl_equality_alloc (isl_local_space_copy (local_space
));
265 isl_constraint_set_coefficient_si (c
, isl_dim_out
, sX
, 1);
266 isl_constraint_set_coefficient_si (c
, isl_dim_out
, aX
, -tile_size
);
267 tile_map
= isl_basic_map_add_constraint (tile_map
, c
);
270 c
= isl_equality_alloc (isl_local_space_copy (local_space
));
271 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
272 isl_constraint_set_coefficient_si (c
, isl_dim_in
, sX
, -1);
273 tile_map
= isl_basic_map_add_constraint (tile_map
, c
);
276 c
= isl_inequality_alloc (isl_local_space_copy (local_space
));
277 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, 1);
278 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, -1);
279 tile_map
= isl_basic_map_add_constraint (tile_map
, c
);
281 /* pX <= tX + (tile_size - 1) */
282 c
= isl_inequality_alloc (isl_local_space_copy (local_space
));
283 isl_constraint_set_coefficient_si (c
, isl_dim_out
, tX
, 1);
284 isl_constraint_set_coefficient_si (c
, isl_dim_out
, pX
, -1);
285 isl_constraint_set_constant_si (c
, tile_size
- 1);
286 tile_map
= isl_basic_map_add_constraint (tile_map
, c
);
289 /* Project out auxiliary dimensions.
291 The auxiliary dimensions are transformed into existentially quantified
293 This reduces the number of visible scattering dimensions and allows isl
294 to produces better code. */
296 isl_basic_map_project_out (tile_map
, isl_dim_out
,
297 2 * schedule_dimensions
, schedule_dimensions
);
298 isl_local_space_free (local_space
);
302 /* get_schedule_for_band - Get the schedule for this BAND.
304 Polly applies transformations like tiling on top of the isl calculated
306 This can influence the number of scheduling dimension. The number of
307 schedule dimensions is returned in DIMENSIONS. */
309 static isl_union_map
*
310 get_schedule_for_band (isl_band
*band
, int *dimensions
)
312 isl_union_map
*partial_schedule
;
315 isl_basic_map
*tile_map
;
316 isl_union_map
*tile_umap
;
318 partial_schedule
= isl_band_get_partial_schedule (band
);
319 *dimensions
= isl_band_n_member (band
);
321 /* It does not make any sense to tile a band with just one dimension. */
322 if (*dimensions
== 1)
324 if (dump_file
&& dump_flags
)
325 fprintf (dump_file
, "not tiled\n");
326 return partial_schedule
;
329 if (dump_file
&& dump_flags
)
330 fprintf (dump_file
, "tiled by %d\n",
331 PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE
));
333 ctx
= isl_union_map_get_ctx (partial_schedule
);
334 space
= isl_union_map_get_space (partial_schedule
);
336 tile_map
= get_tile_map (ctx
, *dimensions
,
337 PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE
));
338 tile_umap
= isl_union_map_from_map (isl_map_from_basic_map (tile_map
));
339 tile_umap
= isl_union_map_align_params (tile_umap
, space
);
340 tile_umap
= isl_union_map_coalesce (tile_umap
);
341 *dimensions
= 2 * *dimensions
;
343 return isl_union_map_apply_range (partial_schedule
, tile_umap
);
347 /* get_schedule_for_band_list - Get the scheduling map for a list of bands.
349 We walk recursively the forest of bands to combine the schedules of the
350 individual bands to the overall schedule. In case tiling is requested,
351 the individual bands are tiled. */
353 static isl_union_map
*
354 get_schedule_for_band_list (isl_band_list
*band_list
)
357 isl_union_map
*schedule
;
360 ctx
= isl_band_list_get_ctx (band_list
);
361 num_bands
= isl_band_list_n_band (band_list
);
362 schedule
= isl_union_map_empty (isl_space_params_alloc (ctx
, 0));
364 for (i
= 0; i
< num_bands
; i
++)
367 isl_union_map
*partial_schedule
;
368 int schedule_dimensions
;
371 band
= isl_band_list_get_band (band_list
, i
);
372 partial_schedule
= get_schedule_for_band (band
, &schedule_dimensions
);
373 space
= isl_union_map_get_space (partial_schedule
);
375 if (isl_band_has_children (band
))
377 isl_band_list
*children
= isl_band_get_children (band
);
378 isl_union_map
*suffixSchedule
379 = get_schedule_for_band_list (children
);
381 = isl_union_map_flat_range_product (partial_schedule
,
383 isl_band_list_free (children
);
386 schedule
= isl_union_map_union (schedule
, partial_schedule
);
388 isl_band_free (band
);
389 isl_space_free (space
);
392 return isl_union_map_coalesce (schedule
);
395 static isl_union_map
*
396 get_schedule_map (isl_schedule
*schedule
)
398 isl_band_list
*band_list
= isl_schedule_get_band_forest (schedule
);
399 isl_union_map
*schedule_map
= get_schedule_for_band_list (band_list
);
400 isl_band_list_free (band_list
);
405 get_single_map (__isl_take isl_map
*map
, void *user
)
407 isl_map
**single_map
= (isl_map
**)user
;
413 apply_schedule_map_to_scop (scop_p scop
, isl_union_map
*schedule_map
)
418 FOR_EACH_VEC_ELT (scop
->pbbs
, i
, pbb
)
420 isl_set
*domain
= isl_set_copy (pbb
->domain
);
421 isl_map
*stmt_schedule
;
423 isl_union_map
*stmt_band
424 = isl_union_map_intersect_domain (isl_union_map_copy (schedule_map
),
425 isl_union_set_from_set (domain
));
426 stmt_band
= isl_union_map_coalesce (stmt_band
);
427 isl_union_map_foreach_map (stmt_band
, get_single_map
, &stmt_schedule
);
428 isl_map_free (pbb
->transformed
);
429 pbb
->transformed
= isl_map_coalesce (stmt_schedule
);
430 isl_union_map_free (stmt_band
);
434 static isl_union_set
*
435 scop_get_domains (scop_p scop
)
439 isl_space
*space
= isl_set_get_space (scop
->param_context
);
440 isl_union_set
*res
= isl_union_set_empty (space
);
442 FOR_EACH_VEC_ELT (scop
->pbbs
, i
, pbb
)
443 res
= isl_union_set_add_set (res
, isl_set_copy (pbb
->domain
));
448 /* Compute the schedule for SCOP based on its parameters, domain and set of
449 constraints. Then apply the schedule to SCOP. */
452 optimize_isl (scop_p scop
)
454 int old_max_operations
= isl_ctx_get_max_operations (scop
->isl_context
);
455 int max_operations
= PARAM_VALUE (PARAM_MAX_ISL_OPERATIONS
);
457 isl_ctx_set_max_operations (scop
->isl_context
, max_operations
);
458 isl_options_set_on_error (scop
->isl_context
, ISL_ON_ERROR_CONTINUE
);
460 isl_union_set
*domain
= scop_get_domains (scop
);
461 scop_get_dependences (scop
);
463 = isl_union_map_gist_domain (scop
->dependence
, isl_union_set_copy (domain
));
465 = isl_union_map_gist_range (scop
->dependence
, isl_union_set_copy (domain
));
466 isl_union_map
*validity
= isl_union_map_copy (scop
->dependence
);
467 isl_union_map
*proximity
= isl_union_map_copy (validity
);
469 isl_options_set_schedule_fuse (scop
->isl_context
, ISL_SCHEDULE_FUSE_MIN
);
470 isl_schedule
*schedule
471 = isl_union_set_compute_schedule (domain
, validity
, proximity
);
473 isl_options_set_on_error (scop
->isl_context
, ISL_ON_ERROR_ABORT
);
475 isl_ctx_reset_operations (scop
->isl_context
);
476 isl_ctx_set_max_operations (scop
->isl_context
, old_max_operations
);
477 if (!schedule
|| isl_ctx_last_error (scop
->isl_context
) == isl_error_quota
)
479 if (dump_file
&& dump_flags
)
482 fprintf (dump_file
, "isl did not return any schedule.\n");
484 fprintf (dump_file
, "isl timed out --param max-isl-operations=%d\n",
489 isl_schedule_free (schedule
);
493 scop
->schedule
= schedule
;
495 isl_union_map
*schedule_map
= get_schedule_map (schedule
);
496 apply_schedule_map_to_scop (scop
, schedule_map
);
497 isl_union_map_free (schedule_map
);
501 fprintf (dump_file
, "isl end schedule:\n");
502 print_isl_schedule (dump_file
, scop
->schedule
);
508 /* Apply graphite transformations to all the basic blocks of SCOP. */
511 apply_poly_transforms (scop_p scop
)
513 if (flag_loop_nest_optimize
)
514 return optimize_isl (scop
);
516 if (!flag_graphite_identity
&& !flag_loop_parallelize_all
)
519 /* Generate code even if we did not apply any real transformation.
520 This also allows to check the performance for the identity
521 transformation: GIMPLE -> GRAPHITE -> GIMPLE. */
525 #endif /* HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS */
527 #endif /* HAVE_isl */