In libobjc/: 2010-12-21 Nicola Pero <nicola.pero@meta-innovation.com>
[official-gcc.git] / gcc / graphite-flattening.c
blob6e341f4f3f81188d19d78c2fba2c1cae2080a9f5
1 /* Loop flattening for Graphite.
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@amd.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "output.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "tree-chrec.h"
36 #include "tree-data-ref.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-pass.h"
39 #include "domwalk.h"
40 #include "value-prof.h"
41 #include "pointer-set.h"
42 #include "gimple.h"
43 #include "params.h"
45 #ifdef HAVE_cloog
46 #include "ppl_c.h"
47 #include "sese.h"
48 #include "graphite-ppl.h"
49 #include "graphite.h"
50 #include "graphite-poly.h"
52 /* The loop flattening pass transforms loop nests into a single loop,
53 removing the loop nesting structure. The auto-vectorization can
54 then apply on the full loop body, without needing the outer-loop
55 vectorization.
57 The loop flattening pass that has been described in a very Fortran
58 specific way in the 1992 paper by Reinhard von Hanxleden and Ken
59 Kennedy: "Relaxing SIMD Control Flow Constraints using Loop
60 Transformations" available from
61 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033
63 The canonical example is as follows: suppose that we have a loop
64 nest with known iteration counts
66 | for (i = 1; i <= 6; i++)
67 | for (j = 1; j <= 6; j++)
68 | S1(i,j);
70 The loop flattening is performed by linearizing the iteration space
71 using the function "f (x) = 6 * i + j". In this case, CLooG would
72 produce this code:
74 | for (c1=7;c1<=42;c1++) {
75 | i = floord(c1-1,6);
76 | S1(i,c1-6*i);
77 | }
79 There are several limitations for loop flattening that are linked
80 to the expressivity of the polyhedral model. One has to take an
81 upper bound approximation to deal with the parametric case of loop
82 flattening. For example, in the loop nest:
84 | for (i = 1; i <= N; i++)
85 | for (j = 1; j <= M; j++)
86 | S1(i,j);
88 One would like to flatten this loop using a linearization function
89 like this "f (x) = M * i + j". However CLooG's schedules are not
90 expressive enough to deal with this case, and so the parameter M
91 has to be replaced by an integer upper bound approximation. If we
92 further know in the context of the scop that "M <= 6", then it is
93 possible to linearize the loop with "f (x) = 6 * i + j". In this
94 case, CLooG would produce this code:
96 | for (c1=7;c1<=6*M+N;c1++) {
97 | i = ceild(c1-N,6);
98 | if (i <= floord(c1-1,6)) {
99 | S1(i,c1-6*i);
103 For an arbitrarily complex loop nest the algorithm proceeds in two
104 steps. First, the LST is flattened by removing the loops structure
105 and by inserting the statements in the order they appear in
106 depth-first order. Then, the scattering of each statement is
107 transformed accordingly.
109 Supposing that the original program is represented by the following
110 LST:
112 | (loop_1
113 | stmt_1
114 | (loop_2 stmt_3
115 | (loop_3 stmt_4)
116 | (loop_4 stmt_5 stmt_6)
117 | stmt_7
119 | stmt_2
122 Loop flattening traverses the LST in depth-first order, and
123 flattens pairs of loops successively by projecting the inner loops
124 in the iteration domain of the outer loops:
126 lst_project_loop (loop_2, loop_3, stride)
128 | (loop_1
129 | stmt_1
130 | (loop_2 stmt_3 stmt_4
131 | (loop_4 stmt_5 stmt_6)
132 | stmt_7
134 | stmt_2
137 lst_project_loop (loop_2, loop_4, stride)
139 | (loop_1
140 | stmt_1
141 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
142 | stmt_2
145 lst_project_loop (loop_1, loop_2, stride)
147 | (loop_1
148 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
151 At each step, the iteration domain of the outer loop is enlarged to
152 contain enough points to iterate over the inner loop domain. */
154 /* Initializes RES to the number of iterations of the linearized loop
155 LST. RES is the cardinal of the iteration domain of LST. */
157 static void
158 lst_linearized_niter (lst_p lst, mpz_t res)
160 int i;
161 lst_p l;
162 mpz_t n;
164 mpz_init (n);
165 mpz_set_si (res, 0);
167 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
168 if (LST_LOOP_P (l))
170 lst_linearized_niter (l, n);
171 mpz_add (res, res, n);
174 if (LST_LOOP_P (lst))
176 lst_niter_for_loop (lst, n);
178 if (mpz_cmp_si (res, 0) != 0)
179 mpz_mul (res, res, n);
180 else
181 mpz_set (res, n);
184 mpz_clear (n);
187 /* Applies the translation "f (x) = x + OFFSET" to the loop containing
188 STMT. */
190 static void
191 lst_offset (lst_p stmt, mpz_t offset)
193 lst_p inner = LST_LOOP_FATHER (stmt);
194 poly_bb_p pbb = LST_PBB (stmt);
195 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
196 int inner_depth = lst_depth (inner);
197 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
198 ppl_Linear_Expression_t expr;
199 ppl_dimension_type dim;
200 ppl_Coefficient_t one;
201 mpz_t x;
203 mpz_init (x);
204 mpz_set_si (x, 1);
205 ppl_new_Coefficient (&one);
206 ppl_assign_Coefficient_from_mpz_t (one, x);
208 ppl_Polyhedron_space_dimension (poly, &dim);
209 ppl_new_Linear_Expression_with_dimension (&expr, dim);
211 ppl_set_coef (expr, inner_dim, 1);
212 ppl_set_inhomogeneous_gmp (expr, offset);
213 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
214 ppl_delete_Linear_Expression (expr);
215 ppl_delete_Coefficient (one);
218 /* Scale by FACTOR the loop LST containing STMT. */
220 static void
221 lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
223 mpz_t x;
224 ppl_Coefficient_t one;
225 int outer_depth = lst_depth (lst);
226 poly_bb_p pbb = LST_PBB (stmt);
227 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
228 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
229 ppl_Linear_Expression_t expr;
230 ppl_dimension_type dim;
232 mpz_init (x);
233 mpz_set_si (x, 1);
234 ppl_new_Coefficient (&one);
235 ppl_assign_Coefficient_from_mpz_t (one, x);
237 ppl_Polyhedron_space_dimension (poly, &dim);
238 ppl_new_Linear_Expression_with_dimension (&expr, dim);
240 /* outer_dim = factor * outer_dim. */
241 ppl_set_coef_gmp (expr, outer_dim, factor);
242 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
243 ppl_delete_Linear_Expression (expr);
245 mpz_clear (x);
246 ppl_delete_Coefficient (one);
249 /* Project the INNER loop into the iteration domain of the OUTER loop.
250 STRIDE is the number of iterations between two iterations of the
251 outer loop. */
253 static void
254 lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
256 int i;
257 lst_p stmt;
258 mpz_t x;
259 ppl_Coefficient_t one;
260 int outer_depth = lst_depth (outer);
261 int inner_depth = lst_depth (inner);
263 mpz_init (x);
264 mpz_set_si (x, 1);
265 ppl_new_Coefficient (&one);
266 ppl_assign_Coefficient_from_mpz_t (one, x);
268 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
270 poly_bb_p pbb = LST_PBB (stmt);
271 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
272 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
273 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
274 ppl_Linear_Expression_t expr;
275 ppl_dimension_type dim;
276 ppl_dimension_type *ds;
278 /* There should be no loops under INNER. */
279 gcc_assert (!LST_LOOP_P (stmt));
280 ppl_Polyhedron_space_dimension (poly, &dim);
281 ppl_new_Linear_Expression_with_dimension (&expr, dim);
283 /* outer_dim = outer_dim * stride + inner_dim. */
284 ppl_set_coef (expr, inner_dim, 1);
285 ppl_set_coef_gmp (expr, outer_dim, stride);
286 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
287 ppl_delete_Linear_Expression (expr);
289 /* Project on inner_dim. */
290 ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
291 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
292 ppl_delete_Linear_Expression (expr);
294 /* Remove inner loop and the static schedule of its body. */
295 ds = XNEWVEC (ppl_dimension_type, 2);
296 ds[0] = inner_dim;
297 ds[1] = inner_dim + 1;
298 ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
299 PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
300 free (ds);
303 mpz_clear (x);
304 ppl_delete_Coefficient (one);
307 /* Flattens the loop nest LST. Return true when something changed.
308 OFFSET is used to compute the number of iterations of the outermost
309 loop before the current LST is executed. */
311 static bool
312 lst_flatten_loop (lst_p lst, mpz_t init_offset)
314 int i;
315 lst_p l;
316 bool res = false;
317 mpz_t n, one, offset, stride;
319 mpz_init (n);
320 mpz_init (one);
321 mpz_init (offset);
322 mpz_init (stride);
323 mpz_set (offset, init_offset);
324 mpz_set_si (one, 1);
326 lst_linearized_niter (lst, stride);
327 lst_niter_for_loop (lst, n);
328 mpz_tdiv_q (stride, stride, n);
330 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
331 if (LST_LOOP_P (l))
333 res = true;
335 lst_flatten_loop (l, offset);
336 lst_niter_for_loop (l, n);
338 lst_project_loop (lst, l, stride);
340 /* The offset is the number of iterations minus 1, as we want
341 to execute the next statements at the same iteration as the
342 last iteration of the loop. */
343 mpz_sub (n, n, one);
344 mpz_add (offset, offset, n);
346 else
348 lst_scale (lst, l, stride);
349 if (mpz_cmp_si (offset, 0) != 0)
350 lst_offset (l, offset);
353 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
354 if (LST_LOOP_P (l))
355 lst_remove_loop_and_inline_stmts_in_loop_father (l);
357 mpz_clear (n);
358 mpz_clear (one);
359 mpz_clear (offset);
360 mpz_clear (stride);
361 return res;
364 /* Remove all but the first 3 dimensions of the scattering:
365 - dim0: the static schedule for the loop
366 - dim1: the dynamic schedule of the loop
367 - dim2: the static schedule for the loop body. */
369 static void
370 remove_unused_scattering_dimensions (lst_p lst)
372 int i;
373 lst_p stmt;
374 mpz_t x;
375 ppl_Coefficient_t one;
377 mpz_init (x);
378 mpz_set_si (x, 1);
379 ppl_new_Coefficient (&one);
380 ppl_assign_Coefficient_from_mpz_t (one, x);
382 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
384 poly_bb_p pbb = LST_PBB (stmt);
385 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
386 int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
387 ppl_dimension_type *ds;
389 /* There should be no loops inside LST after flattening. */
390 gcc_assert (!LST_LOOP_P (stmt));
392 if (!nb_dims_to_remove)
393 continue;
395 ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
396 for (j = 0; j < nb_dims_to_remove; j++)
397 ds[j] = j + 3;
399 ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
400 PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
401 free (ds);
404 mpz_clear (x);
405 ppl_delete_Coefficient (one);
408 /* Flattens all the loop nests of LST. Return true when something
409 changed. */
411 static bool
412 lst_do_flatten (lst_p lst)
414 int i;
415 lst_p l;
416 bool res = false;
417 mpz_t zero;
419 if (!lst
420 || !LST_LOOP_P (lst))
421 return false;
423 mpz_init (zero);
424 mpz_set_si (zero, 0);
426 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
427 if (LST_LOOP_P (l))
429 res |= lst_flatten_loop (l, zero);
430 remove_unused_scattering_dimensions (l);
433 lst_update_scattering (lst);
434 mpz_clear (zero);
435 return res;
438 /* Flatten all the loop nests in SCOP. Returns true when something
439 changed. */
441 bool
442 flatten_all_loops (scop_p scop)
444 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));
447 #endif