PR target/44750
[official-gcc.git] / gcc / graphite-flattening.c
blobc026ffc227ae3ee4dd73221d01dceb357c727d2a
1 /* Loop flattening for Graphite.
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@amd.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "output.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "toplev.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-pass.h"
40 #include "domwalk.h"
41 #include "value-prof.h"
42 #include "pointer-set.h"
43 #include "gimple.h"
44 #include "params.h"
46 #ifdef HAVE_cloog
47 #include "ppl_c.h"
48 #include "sese.h"
49 #include "graphite-ppl.h"
50 #include "graphite.h"
51 #include "graphite-poly.h"
53 /* The loop flattening pass transforms loop nests into a single loop,
54 removing the loop nesting structure. The auto-vectorization can
55 then apply on the full loop body, without needing the outer-loop
56 vectorization.
58 The loop flattening pass that has been described in a very Fortran
59 specific way in the 1992 paper by Reinhard von Hanxleden and Ken
60 Kennedy: "Relaxing SIMD Control Flow Constraints using Loop
61 Transformations" available from
62 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033
64 The canonical example is as follows: suppose that we have a loop
65 nest with known iteration counts
67 | for (i = 1; i <= 6; i++)
68 | for (j = 1; j <= 6; j++)
69 | S1(i,j);
71 The loop flattening is performed by linearizing the iteration space
72 using the function "f (x) = 6 * i + j". In this case, CLooG would
73 produce this code:
75 | for (c1=7;c1<=42;c1++) {
76 | i = floord(c1-1,6);
77 | S1(i,c1-6*i);
78 | }
80 There are several limitations for loop flattening that are linked
81 to the expressivity of the polyhedral model. One has to take an
82 upper bound approximation to deal with the parametric case of loop
83 flattening. For example, in the loop nest:
85 | for (i = 1; i <= N; i++)
86 | for (j = 1; j <= M; j++)
87 | S1(i,j);
89 One would like to flatten this loop using a linearization function
90 like this "f (x) = M * i + j". However CLooG's schedules are not
91 expressive enough to deal with this case, and so the parameter M
92 has to be replaced by an integer upper bound approximation. If we
93 further know in the context of the scop that "M <= 6", then it is
94 possible to linearize the loop with "f (x) = 6 * i + j". In this
95 case, CLooG would produce this code:
97 | for (c1=7;c1<=6*M+N;c1++) {
98 | i = ceild(c1-N,6);
99 | if (i <= floord(c1-1,6)) {
100 | S1(i,c1-6*i);
104 For an arbitrarily complex loop nest the algorithm proceeds in two
105 steps. First, the LST is flattened by removing the loops structure
106 and by inserting the statements in the order they appear in
107 depth-first order. Then, the scattering of each statement is
108 transformed accordingly.
110 Supposing that the original program is represented by the following
111 LST:
113 | (loop_1
114 | stmt_1
115 | (loop_2 stmt_3
116 | (loop_3 stmt_4)
117 | (loop_4 stmt_5 stmt_6)
118 | stmt_7
120 | stmt_2
123 Loop flattening traverses the LST in depth-first order, and
124 flattens pairs of loops successively by projecting the inner loops
125 in the iteration domain of the outer loops:
127 lst_project_loop (loop_2, loop_3, stride)
129 | (loop_1
130 | stmt_1
131 | (loop_2 stmt_3 stmt_4
132 | (loop_4 stmt_5 stmt_6)
133 | stmt_7
135 | stmt_2
138 lst_project_loop (loop_2, loop_4, stride)
140 | (loop_1
141 | stmt_1
142 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
143 | stmt_2
146 lst_project_loop (loop_1, loop_2, stride)
148 | (loop_1
149 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
152 At each step, the iteration domain of the outer loop is enlarged to
153 contain enough points to iterate over the inner loop domain. */
155 /* Initializes RES to the number of iterations of the linearized loop
156 LST. RES is the cardinal of the iteration domain of LST. */
158 static void
159 lst_linearized_niter (lst_p lst, mpz_t res)
161 int i;
162 lst_p l;
163 mpz_t n;
165 mpz_init (n);
166 mpz_set_si (res, 0);
168 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
169 if (LST_LOOP_P (l))
171 lst_linearized_niter (l, n);
172 mpz_add (res, res, n);
175 if (LST_LOOP_P (lst))
177 lst_niter_for_loop (lst, n);
179 if (mpz_cmp_si (res, 0) != 0)
180 mpz_mul (res, res, n);
181 else
182 mpz_set (res, n);
185 mpz_clear (n);
188 /* Applies the translation "f (x) = x + OFFSET" to the loop containing
189 STMT. */
191 static void
192 lst_offset (lst_p stmt, mpz_t offset)
194 lst_p inner = LST_LOOP_FATHER (stmt);
195 poly_bb_p pbb = LST_PBB (stmt);
196 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
197 int inner_depth = lst_depth (inner);
198 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
199 ppl_Linear_Expression_t expr;
200 ppl_dimension_type dim;
201 ppl_Coefficient_t one;
202 mpz_t x;
204 mpz_init (x);
205 mpz_set_si (x, 1);
206 ppl_new_Coefficient (&one);
207 ppl_assign_Coefficient_from_mpz_t (one, x);
209 ppl_Polyhedron_space_dimension (poly, &dim);
210 ppl_new_Linear_Expression_with_dimension (&expr, dim);
212 ppl_set_coef (expr, inner_dim, 1);
213 ppl_set_inhomogeneous_gmp (expr, offset);
214 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
215 ppl_delete_Linear_Expression (expr);
216 ppl_delete_Coefficient (one);
219 /* Scale by FACTOR the loop LST containing STMT. */
221 static void
222 lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
224 mpz_t x;
225 ppl_Coefficient_t one;
226 int outer_depth = lst_depth (lst);
227 poly_bb_p pbb = LST_PBB (stmt);
228 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
229 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
230 ppl_Linear_Expression_t expr;
231 ppl_dimension_type dim;
233 mpz_init (x);
234 mpz_set_si (x, 1);
235 ppl_new_Coefficient (&one);
236 ppl_assign_Coefficient_from_mpz_t (one, x);
238 ppl_Polyhedron_space_dimension (poly, &dim);
239 ppl_new_Linear_Expression_with_dimension (&expr, dim);
241 /* outer_dim = factor * outer_dim. */
242 ppl_set_coef_gmp (expr, outer_dim, factor);
243 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
244 ppl_delete_Linear_Expression (expr);
246 mpz_clear (x);
247 ppl_delete_Coefficient (one);
250 /* Project the INNER loop into the iteration domain of the OUTER loop.
251 STRIDE is the number of iterations between two iterations of the
252 outer loop. */
254 static void
255 lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
257 int i;
258 lst_p stmt;
259 mpz_t x;
260 ppl_Coefficient_t one;
261 int outer_depth = lst_depth (outer);
262 int inner_depth = lst_depth (inner);
264 mpz_init (x);
265 mpz_set_si (x, 1);
266 ppl_new_Coefficient (&one);
267 ppl_assign_Coefficient_from_mpz_t (one, x);
269 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
271 poly_bb_p pbb = LST_PBB (stmt);
272 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
273 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
274 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
275 ppl_Linear_Expression_t expr;
276 ppl_dimension_type dim;
277 ppl_dimension_type *ds;
279 /* There should be no loops under INNER. */
280 gcc_assert (!LST_LOOP_P (stmt));
281 ppl_Polyhedron_space_dimension (poly, &dim);
282 ppl_new_Linear_Expression_with_dimension (&expr, dim);
284 /* outer_dim = outer_dim * stride + inner_dim. */
285 ppl_set_coef (expr, inner_dim, 1);
286 ppl_set_coef_gmp (expr, outer_dim, stride);
287 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
288 ppl_delete_Linear_Expression (expr);
290 /* Project on inner_dim. */
291 ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
292 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
293 ppl_delete_Linear_Expression (expr);
295 /* Remove inner loop and the static schedule of its body. */
296 ds = XNEWVEC (ppl_dimension_type, 2);
297 ds[0] = inner_dim;
298 ds[1] = inner_dim + 1;
299 ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
300 PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
301 free (ds);
304 mpz_clear (x);
305 ppl_delete_Coefficient (one);
308 /* Flattens the loop nest LST. Return true when something changed.
309 OFFSET is used to compute the number of iterations of the outermost
310 loop before the current LST is executed. */
312 static bool
313 lst_flatten_loop (lst_p lst, mpz_t init_offset)
315 int i;
316 lst_p l;
317 bool res = false;
318 mpz_t n, one, offset, stride;
320 mpz_init (n);
321 mpz_init (one);
322 mpz_init (offset);
323 mpz_init (stride);
324 mpz_set (offset, init_offset);
325 mpz_set_si (one, 1);
327 lst_linearized_niter (lst, stride);
328 lst_niter_for_loop (lst, n);
329 mpz_tdiv_q (stride, stride, n);
331 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
332 if (LST_LOOP_P (l))
334 res = true;
336 lst_flatten_loop (l, offset);
337 lst_niter_for_loop (l, n);
339 lst_project_loop (lst, l, stride);
341 /* The offset is the number of iterations minus 1, as we want
342 to execute the next statements at the same iteration as the
343 last iteration of the loop. */
344 mpz_sub (n, n, one);
345 mpz_add (offset, offset, n);
347 else
349 lst_scale (lst, l, stride);
350 if (mpz_cmp_si (offset, 0) != 0)
351 lst_offset (l, offset);
354 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
355 if (LST_LOOP_P (l))
356 lst_remove_loop_and_inline_stmts_in_loop_father (l);
358 mpz_clear (n);
359 mpz_clear (one);
360 mpz_clear (offset);
361 mpz_clear (stride);
362 return res;
365 /* Remove all but the first 3 dimensions of the scattering:
366 - dim0: the static schedule for the loop
367 - dim1: the dynamic schedule of the loop
368 - dim2: the static schedule for the loop body. */
370 static void
371 remove_unused_scattering_dimensions (lst_p lst)
373 int i;
374 lst_p stmt;
375 mpz_t x;
376 ppl_Coefficient_t one;
378 mpz_init (x);
379 mpz_set_si (x, 1);
380 ppl_new_Coefficient (&one);
381 ppl_assign_Coefficient_from_mpz_t (one, x);
383 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
385 poly_bb_p pbb = LST_PBB (stmt);
386 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
387 int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
388 ppl_dimension_type *ds;
390 /* There should be no loops inside LST after flattening. */
391 gcc_assert (!LST_LOOP_P (stmt));
393 if (!nb_dims_to_remove)
394 continue;
396 ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
397 for (j = 0; j < nb_dims_to_remove; j++)
398 ds[j] = j + 3;
400 ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
401 PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
402 free (ds);
405 mpz_clear (x);
406 ppl_delete_Coefficient (one);
409 /* Flattens all the loop nests of LST. Return true when something
410 changed. */
412 static bool
413 lst_do_flatten (lst_p lst)
415 int i;
416 lst_p l;
417 bool res = false;
418 mpz_t zero;
420 if (!lst
421 || !LST_LOOP_P (lst))
422 return false;
424 mpz_init (zero);
425 mpz_set_si (zero, 0);
427 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
428 if (LST_LOOP_P (l))
430 res |= lst_flatten_loop (l, zero);
431 remove_unused_scattering_dimensions (l);
434 lst_update_scattering (lst);
435 mpz_clear (zero);
436 return res;
439 /* Flatten all the loop nests in SCOP. Returns true when something
440 changed. */
442 bool
443 flatten_all_loops (scop_p scop)
445 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));
448 #endif