1 /* Loop flattening for Graphite.
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@amd.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
33 #include "tree-dump.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-pass.h"
41 #include "value-prof.h"
42 #include "pointer-set.h"
49 #include "graphite-ppl.h"
51 #include "graphite-poly.h"
53 /* The loop flattening pass transforms loop nests into a single loop,
54 removing the loop nesting structure. The auto-vectorization can
55 then apply on the full loop body, without needing the outer-loop
58 The loop flattening pass that has been described in a very Fortran
59 specific way in the 1992 paper by Reinhard von Hanxleden and Ken
60 Kennedy: "Relaxing SIMD Control Flow Constraints using Loop
61 Transformations" available from
62 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033
64 The canonical example is as follows: suppose that we have a loop
65 nest with known iteration counts
67 | for (i = 1; i <= 6; i++)
68 | for (j = 1; j <= 6; j++)
71 The loop flattening is performed by linearizing the iteration space
72 using the function "f (x) = 6 * i + j". In this case, CLooG would
75 | for (c1=7;c1<=42;c1++) {
80 There are several limitations for loop flattening that are linked
81 to the expressivity of the polyhedral model. One has to take an
82 upper bound approximation to deal with the parametric case of loop
83 flattening. For example, in the loop nest:
85 | for (i = 1; i <= N; i++)
86 | for (j = 1; j <= M; j++)
89 One would like to flatten this loop using a linearization function
90 like this "f (x) = M * i + j". However CLooG's schedules are not
91 expressive enough to deal with this case, and so the parameter M
92 has to be replaced by an integer upper bound approximation. If we
93 further know in the context of the scop that "M <= 6", then it is
94 possible to linearize the loop with "f (x) = 6 * i + j". In this
95 case, CLooG would produce this code:
97 | for (c1=7;c1<=6*M+N;c1++) {
99 | if (i <= floord(c1-1,6)) {
104 For an arbitrarily complex loop nest the algorithm proceeds in two
105 steps. First, the LST is flattened by removing the loops structure
106 and by inserting the statements in the order they appear in
107 depth-first order. Then, the scattering of each statement is
108 transformed accordingly.
110 Supposing that the original program is represented by the following
117 | (loop_4 stmt_5 stmt_6)
123 Loop flattening traverses the LST in depth-first order, and
124 flattens pairs of loops successively by projecting the inner loops
125 in the iteration domain of the outer loops:
127 lst_project_loop (loop_2, loop_3, stride)
131 | (loop_2 stmt_3 stmt_4
132 | (loop_4 stmt_5 stmt_6)
138 lst_project_loop (loop_2, loop_4, stride)
142 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
146 lst_project_loop (loop_1, loop_2, stride)
149 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
152 At each step, the iteration domain of the outer loop is enlarged to
153 contain enough points to iterate over the inner loop domain. */
155 /* Initializes RES to the number of iterations of the linearized loop
156 LST. RES is the cardinal of the iteration domain of LST. */
159 lst_linearized_niter (lst_p lst
, mpz_t res
)
168 FOR_EACH_VEC_ELT (lst_p
, LST_SEQ (lst
), i
, l
)
171 lst_linearized_niter (l
, n
);
172 mpz_add (res
, res
, n
);
175 if (LST_LOOP_P (lst
))
177 lst_niter_for_loop (lst
, n
);
179 if (mpz_cmp_si (res
, 0) != 0)
180 mpz_mul (res
, res
, n
);
188 /* Applies the translation "f (x) = x + OFFSET" to the loop containing
192 lst_offset (lst_p stmt
, mpz_t offset
)
194 lst_p inner
= LST_LOOP_FATHER (stmt
);
195 poly_bb_p pbb
= LST_PBB (stmt
);
196 ppl_Polyhedron_t poly
= PBB_TRANSFORMED_SCATTERING (pbb
);
197 int inner_depth
= lst_depth (inner
);
198 ppl_dimension_type inner_dim
= psct_dynamic_dim (pbb
, inner_depth
);
199 ppl_Linear_Expression_t expr
;
200 ppl_dimension_type dim
;
201 ppl_Coefficient_t one
;
206 ppl_new_Coefficient (&one
);
207 ppl_assign_Coefficient_from_mpz_t (one
, x
);
209 ppl_Polyhedron_space_dimension (poly
, &dim
);
210 ppl_new_Linear_Expression_with_dimension (&expr
, dim
);
212 ppl_set_coef (expr
, inner_dim
, 1);
213 ppl_set_inhomogeneous_gmp (expr
, offset
);
214 ppl_Polyhedron_affine_image (poly
, inner_dim
, expr
, one
);
215 ppl_delete_Linear_Expression (expr
);
216 ppl_delete_Coefficient (one
);
219 /* Scale by FACTOR the loop LST containing STMT. */
222 lst_scale (lst_p lst
, lst_p stmt
, mpz_t factor
)
225 ppl_Coefficient_t one
;
226 int outer_depth
= lst_depth (lst
);
227 poly_bb_p pbb
= LST_PBB (stmt
);
228 ppl_Polyhedron_t poly
= PBB_TRANSFORMED_SCATTERING (pbb
);
229 ppl_dimension_type outer_dim
= psct_dynamic_dim (pbb
, outer_depth
);
230 ppl_Linear_Expression_t expr
;
231 ppl_dimension_type dim
;
235 ppl_new_Coefficient (&one
);
236 ppl_assign_Coefficient_from_mpz_t (one
, x
);
238 ppl_Polyhedron_space_dimension (poly
, &dim
);
239 ppl_new_Linear_Expression_with_dimension (&expr
, dim
);
241 /* outer_dim = factor * outer_dim. */
242 ppl_set_coef_gmp (expr
, outer_dim
, factor
);
243 ppl_Polyhedron_affine_image (poly
, outer_dim
, expr
, one
);
244 ppl_delete_Linear_Expression (expr
);
247 ppl_delete_Coefficient (one
);
250 /* Project the INNER loop into the iteration domain of the OUTER loop.
251 STRIDE is the number of iterations between two iterations of the
255 lst_project_loop (lst_p outer
, lst_p inner
, mpz_t stride
)
260 ppl_Coefficient_t one
;
261 int outer_depth
= lst_depth (outer
);
262 int inner_depth
= lst_depth (inner
);
266 ppl_new_Coefficient (&one
);
267 ppl_assign_Coefficient_from_mpz_t (one
, x
);
269 FOR_EACH_VEC_ELT (lst_p
, LST_SEQ (inner
), i
, stmt
)
271 poly_bb_p pbb
= LST_PBB (stmt
);
272 ppl_Polyhedron_t poly
= PBB_TRANSFORMED_SCATTERING (pbb
);
273 ppl_dimension_type outer_dim
= psct_dynamic_dim (pbb
, outer_depth
);
274 ppl_dimension_type inner_dim
= psct_dynamic_dim (pbb
, inner_depth
);
275 ppl_Linear_Expression_t expr
;
276 ppl_dimension_type dim
;
277 ppl_dimension_type
*ds
;
279 /* There should be no loops under INNER. */
280 gcc_assert (!LST_LOOP_P (stmt
));
281 ppl_Polyhedron_space_dimension (poly
, &dim
);
282 ppl_new_Linear_Expression_with_dimension (&expr
, dim
);
284 /* outer_dim = outer_dim * stride + inner_dim. */
285 ppl_set_coef (expr
, inner_dim
, 1);
286 ppl_set_coef_gmp (expr
, outer_dim
, stride
);
287 ppl_Polyhedron_affine_image (poly
, outer_dim
, expr
, one
);
288 ppl_delete_Linear_Expression (expr
);
290 /* Project on inner_dim. */
291 ppl_new_Linear_Expression_with_dimension (&expr
, dim
- 1);
292 ppl_Polyhedron_affine_image (poly
, inner_dim
, expr
, one
);
293 ppl_delete_Linear_Expression (expr
);
295 /* Remove inner loop and the static schedule of its body. */
296 ds
= XNEWVEC (ppl_dimension_type
, 2);
298 ds
[1] = inner_dim
+ 1;
299 ppl_Polyhedron_remove_space_dimensions (poly
, ds
, 2);
300 PBB_NB_SCATTERING_TRANSFORM (pbb
) -= 2;
305 ppl_delete_Coefficient (one
);
308 /* Flattens the loop nest LST. Return true when something changed.
309 OFFSET is used to compute the number of iterations of the outermost
310 loop before the current LST is executed. */
313 lst_flatten_loop (lst_p lst
, mpz_t init_offset
)
318 mpz_t n
, one
, offset
, stride
;
324 mpz_set (offset
, init_offset
);
327 lst_linearized_niter (lst
, stride
);
328 lst_niter_for_loop (lst
, n
);
329 mpz_tdiv_q (stride
, stride
, n
);
331 FOR_EACH_VEC_ELT (lst_p
, LST_SEQ (lst
), i
, l
)
336 lst_flatten_loop (l
, offset
);
337 lst_niter_for_loop (l
, n
);
339 lst_project_loop (lst
, l
, stride
);
341 /* The offset is the number of iterations minus 1, as we want
342 to execute the next statements at the same iteration as the
343 last iteration of the loop. */
345 mpz_add (offset
, offset
, n
);
349 lst_scale (lst
, l
, stride
);
350 if (mpz_cmp_si (offset
, 0) != 0)
351 lst_offset (l
, offset
);
354 FOR_EACH_VEC_ELT (lst_p
, LST_SEQ (lst
), i
, l
)
356 lst_remove_loop_and_inline_stmts_in_loop_father (l
);
365 /* Remove all but the first 3 dimensions of the scattering:
366 - dim0: the static schedule for the loop
367 - dim1: the dynamic schedule of the loop
368 - dim2: the static schedule for the loop body. */
371 remove_unused_scattering_dimensions (lst_p lst
)
376 ppl_Coefficient_t one
;
380 ppl_new_Coefficient (&one
);
381 ppl_assign_Coefficient_from_mpz_t (one
, x
);
383 FOR_EACH_VEC_ELT (lst_p
, LST_SEQ (lst
), i
, stmt
)
385 poly_bb_p pbb
= LST_PBB (stmt
);
386 ppl_Polyhedron_t poly
= PBB_TRANSFORMED_SCATTERING (pbb
);
387 int j
, nb_dims_to_remove
= PBB_NB_SCATTERING_TRANSFORM (pbb
) - 3;
388 ppl_dimension_type
*ds
;
390 /* There should be no loops inside LST after flattening. */
391 gcc_assert (!LST_LOOP_P (stmt
));
393 if (!nb_dims_to_remove
)
396 ds
= XNEWVEC (ppl_dimension_type
, nb_dims_to_remove
);
397 for (j
= 0; j
< nb_dims_to_remove
; j
++)
400 ppl_Polyhedron_remove_space_dimensions (poly
, ds
, nb_dims_to_remove
);
401 PBB_NB_SCATTERING_TRANSFORM (pbb
) -= nb_dims_to_remove
;
406 ppl_delete_Coefficient (one
);
409 /* Flattens all the loop nests of LST. Return true when something
413 lst_do_flatten (lst_p lst
)
421 || !LST_LOOP_P (lst
))
425 mpz_set_si (zero
, 0);
427 FOR_EACH_VEC_ELT (lst_p
, LST_SEQ (lst
), i
, l
)
430 res
|= lst_flatten_loop (l
, zero
);
431 remove_unused_scattering_dimensions (l
);
434 lst_update_scattering (lst
);
439 /* Flatten all the loop nests in SCOP. Returns true when something
443 flatten_all_loops (scop_p scop
)
445 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop
));