gcc/
[official-gcc.git] / gcc / graphite-interchange.c
blob8d691dbc7d50dd7f781c415d129bad400cb9db88
1 /* Interchange heuristics and transform for loop interchange on
2 polyhedral representation.
4 Copyright (C) 2009-2014 Free Software Foundation, Inc.
5 Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6 Harsha Jagasia <harsha.jagasia@amd.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
26 #ifdef HAVE_isl
27 #include <isl/aff.h>
28 #include <isl/set.h>
29 #include <isl/map.h>
30 #include <isl/union_map.h>
31 #include <isl/ilp.h>
32 #include <isl/val.h>
33 #if defined(__cplusplus)
34 extern "C" {
35 #endif
36 #include <isl/val_gmp.h>
37 #if defined(__cplusplus)
39 #endif
40 #ifdef HAVE_cloog
41 #include <cloog/cloog.h>
42 #include <cloog/isl/domain.h>
43 #endif
44 #endif
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree.h"
49 #include "basic-block.h"
50 #include "tree-ssa-alias.h"
51 #include "internal-fn.h"
52 #include "gimple-expr.h"
53 #include "is-a.h"
54 #include "gimple.h"
55 #include "gimple-iterator.h"
56 #include "tree-ssa-loop.h"
57 #include "dumpfile.h"
58 #include "cfgloop.h"
59 #include "tree-chrec.h"
60 #include "tree-data-ref.h"
61 #include "tree-scalar-evolution.h"
62 #include "sese.h"
64 #ifdef HAVE_isl
65 #include "graphite-poly.h"
67 /* XXX isl rewrite following comment */
68 /* Builds a linear expression, of dimension DIM, representing PDR's
69 memory access:
71 L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
73 For an array A[10][20] with two subscript locations s0 and s1, the
74 linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
75 corresponds to a memory stride of 20.
77 OFFSET is a number of dimensions to prepend before the
78 subscript dimensions: s_0, s_1, ..., s_n.
80 Thus, the final linear expression has the following format:
81 0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
82 where the expression itself is:
83 c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */
85 static isl_constraint *
86 build_linearized_memory_access (isl_map *map, poly_dr_p pdr)
88 isl_constraint *res;
89 isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map));
90 unsigned offset, nsubs;
91 int i;
92 isl_ctx *ctx;
94 isl_val *size, *subsize, *size1;
96 res = isl_equality_alloc (ls);
97 ctx = isl_local_space_get_ctx (ls);
98 size = isl_val_int_from_ui (ctx, 1);
100 nsubs = isl_set_dim (pdr->extent, isl_dim_set);
101 /* -1 for the already included L dimension. */
102 offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs;
103 res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1);
104 /* Go through all subscripts from last to first. First dimension
105 is the alias set, ignore it. */
106 for (i = nsubs - 1; i >= 1; i--)
108 isl_space *dc;
109 isl_aff *aff;
111 size1 = isl_val_copy (size);
112 res = isl_constraint_set_coefficient_val (res, isl_dim_out, offset + i, size);
113 dc = isl_set_get_space (pdr->extent);
114 aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
115 aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1);
116 subsize = isl_set_max_val (pdr->extent, aff);
117 isl_aff_free (aff);
118 size = isl_val_mul (size1, subsize);
121 isl_val_free (size);
123 return res;
126 /* Set STRIDE to the stride of PDR in memory by advancing by one in
127 the loop at DEPTH. */
129 static void
130 pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
132 poly_bb_p pbb = PDR_PBB (pdr);
133 isl_map *map;
134 isl_set *set;
135 isl_aff *aff;
136 isl_space *dc;
137 isl_constraint *lma, *c;
138 isl_val *islstride;
139 graphite_dim_t time_depth;
140 unsigned offset, nt;
141 unsigned i;
142 /* XXX isl rewrite following comments. */
143 /* Builds a partial difference equations and inserts them
144 into pointset powerset polyhedron P. Polyhedron is assumed
145 to have the format: T|I|T'|I'|G|S|S'|l1|l2.
147 TIME_DEPTH is the time dimension w.r.t. which we are
148 differentiating.
149 OFFSET represents the number of dimensions between
150 columns t_{time_depth} and t'_{time_depth}.
151 DIM_SCTR is the number of scattering dimensions. It is
152 essentially the dimensionality of the T vector.
154 The following equations are inserted into the polyhedron P:
155 | t_1 = t_1'
156 | ...
157 | t_{time_depth-1} = t'_{time_depth-1}
158 | t_{time_depth} = t'_{time_depth} + 1
159 | t_{time_depth+1} = t'_{time_depth + 1}
160 | ...
161 | t_{dim_sctr} = t'_{dim_sctr}. */
163 /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
164 This is the core part of this alogrithm, since this
165 constraint asks for the memory access stride (difference)
166 between two consecutive points in time dimensions. */
168 /* Add equalities:
169 | t1 = t1'
170 | ...
171 | t_{time_depth-1} = t'_{time_depth-1}
172 | t_{time_depth+1} = t'_{time_depth+1}
173 | ...
174 | t_{dim_sctr} = t'_{dim_sctr}
176 This means that all the time dimensions are equal except for
177 time_depth, where the constraint is t_{depth} = t'_{depth} + 1
178 step. More to this: we should be careful not to add equalities
179 to the 'coupled' dimensions, which happens when the one dimension
180 is stripmined dimension, and the other dimension corresponds
181 to the point loop inside stripmined dimension. */
183 /* pdr->accesses: [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript]
184 ??? [P] not used for PDRs?
185 pdr->extent: [a,S1..nb_subscript]
186 pbb->domain: [P1..nb_param,I1..nb_domain]
187 pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr]
188 [T] includes local vars (currently unused)
190 First we create [P,I] -> [T,a,S]. */
192 map = isl_map_flat_range_product (isl_map_copy (pbb->transformed),
193 isl_map_copy (pdr->accesses));
194 /* Add a dimension for L: [P,I] -> [T,a,S,L].*/
195 map = isl_map_add_dims (map, isl_dim_out, 1);
196 /* Build a constraint for "lma[S] - L == 0", effectively calculating
197 L in terms of subscripts. */
198 lma = build_linearized_memory_access (map, pdr);
199 /* And add it to the map, so we now have:
200 [P,I] -> [T,a,S,L] : lma([S]) == L. */
201 map = isl_map_add_constraint (map, lma);
203 /* Then we create [P,I,P',I'] -> [T,a,S,L,T',a',S',L']. */
204 map = isl_map_flat_product (map, isl_map_copy (map));
206 /* Now add the equality T[time_depth] == T'[time_depth]+1. This will
207 force L' to be the linear address at T[time_depth] + 1. */
208 time_depth = psct_dynamic_dim (pbb, depth);
209 /* Length of [a,S] plus [L] ... */
210 offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out);
211 /* ... plus [T]. */
212 offset += isl_map_dim (pbb->transformed, isl_dim_out);
214 c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map)));
215 c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1);
216 c = isl_constraint_set_coefficient_si (c, isl_dim_out,
217 offset + time_depth, -1);
218 c = isl_constraint_set_constant_si (c, 1);
219 map = isl_map_add_constraint (map, c);
221 /* Now we equate most of the T/T' elements (making PITaSL nearly
222 the same is (PITaSL)', except for one dimension, namely for 'depth'
223 (an index into [I]), after translating to index into [T]. Take care
224 to not produce an empty map, which indicates we wanted to equate
225 two dimensions that are already coupled via the above time_depth
226 dimension. Happens with strip mining where several scatter dimension
227 are interdependend. */
228 /* Length of [T]. */
229 nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb);
230 for (i = 0; i < nt; i++)
231 if (i != time_depth)
233 isl_map *temp = isl_map_equate (isl_map_copy (map),
234 isl_dim_out, i,
235 isl_dim_out, offset + i);
236 if (isl_map_is_empty (temp))
237 isl_map_free (temp);
238 else
240 isl_map_free (map);
241 map = temp;
245 /* Now maximize the expression L' - L. */
246 set = isl_map_range (map);
247 dc = isl_set_get_space (set);
248 aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
249 aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1);
250 aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1);
251 islstride = isl_set_max_val (set, aff);
252 isl_val_get_num_gmp (islstride, stride);
253 isl_val_free (islstride);
254 isl_aff_free (aff);
255 isl_set_free (set);
257 if (dump_file && (dump_flags & TDF_DETAILS))
259 gmp_fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d: %Zd ",
260 pbb_index (pbb), PDR_ID (pdr), (int) depth, stride);
264 /* Sets STRIDES to the sum of all the strides of the data references
265 accessed in LOOP at DEPTH. */
267 static void
268 memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
270 int i, j;
271 lst_p l;
272 poly_dr_p pdr;
273 mpz_t s, n;
275 mpz_init (s);
276 mpz_init (n);
278 FOR_EACH_VEC_ELT (LST_SEQ (loop), j, l)
279 if (LST_LOOP_P (l))
280 memory_strides_in_loop_1 (l, depth, strides);
281 else
282 FOR_EACH_VEC_ELT (PBB_DRS (LST_PBB (l)), i, pdr)
284 pdr_stride_in_loop (s, depth, pdr);
285 mpz_set_si (n, PDR_NB_REFS (pdr));
286 mpz_mul (s, s, n);
287 mpz_add (strides, strides, s);
290 mpz_clear (s);
291 mpz_clear (n);
294 /* Sets STRIDES to the sum of all the strides of the data references
295 accessed in LOOP at DEPTH. */
297 static void
298 memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
300 if (mpz_cmp_si (loop->memory_strides, -1) == 0)
302 mpz_set_si (strides, 0);
303 memory_strides_in_loop_1 (loop, depth, strides);
305 else
306 mpz_set (strides, loop->memory_strides);
309 /* Return true when the interchange of loops LOOP1 and LOOP2 is
310 profitable.
312 Example:
314 | int a[100][100];
316 | int
317 | foo (int N)
319 | int j;
320 | int i;
322 | for (i = 0; i < N; i++)
323 | for (j = 0; j < N; j++)
324 | a[j][2 * i] += 1;
326 | return a[N][12];
329 The data access A[j][i] is described like this:
331 | i j N a s0 s1 1
332 | 0 0 0 1 0 0 -5 = 0
333 | 0 -1 0 0 1 0 0 = 0
334 |-2 0 0 0 0 1 0 = 0
335 | 0 0 0 0 1 0 0 >= 0
336 | 0 0 0 0 0 1 0 >= 0
337 | 0 0 0 0 -1 0 100 >= 0
338 | 0 0 0 0 0 -1 100 >= 0
340 The linearized memory access L to A[100][100] is:
342 | i j N a s0 s1 1
343 | 0 0 0 0 100 1 0
345 TODO: the shown format is not valid as it does not show the fact
346 that the iteration domain "i j" is transformed using the scattering.
348 Next, to measure the impact of iterating once in loop "i", we build
349 a maximization problem: first, we add to DR accesses the dimensions
350 k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
351 L1 and L2 are the linearized memory access functions.
353 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
354 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
355 | 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j
356 |-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i
357 | 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0
358 | 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0
359 | 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0
360 | 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0
361 | 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1
363 Then, we generate the polyhedron P2 by interchanging the dimensions
364 (s0, s2), (s1, s3), (L1, L2), (k, i)
366 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
367 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
368 | 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j
369 | 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k
370 | 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0
371 | 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0
372 | 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0
373 | 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0
374 | 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3
376 then we add to P2 the equality k = i + 1:
378 |-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1
380 and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
382 Similarly, to determine the impact of one iteration on loop "j", we
383 interchange (k, j), we add "k = j + 1", and we compute D2 the
384 maximal value of the difference.
386 Finally, the profitability test is D1 < D2: if in the outer loop
387 the strides are smaller than in the inner loop, then it is
388 profitable to interchange the loops at DEPTH1 and DEPTH2. */
390 static bool
391 lst_interchange_profitable_p (lst_p nest, int depth1, int depth2)
393 mpz_t d1, d2;
394 bool res;
396 gcc_assert (depth1 < depth2);
398 mpz_init (d1);
399 mpz_init (d2);
401 memory_strides_in_loop (nest, depth1, d1);
402 memory_strides_in_loop (nest, depth2, d2);
404 res = mpz_cmp (d1, d2) < 0;
406 mpz_clear (d1);
407 mpz_clear (d2);
409 return res;
412 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
413 scattering and assigns the resulting polyhedron to the transformed
414 scattering. */
416 static void
417 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
418 poly_bb_p pbb)
420 unsigned i;
421 unsigned dim1 = psct_dynamic_dim (pbb, depth1);
422 unsigned dim2 = psct_dynamic_dim (pbb, depth2);
423 isl_space *d = isl_map_get_space (pbb->transformed);
424 isl_space *d1 = isl_space_range (d);
425 unsigned n = isl_space_dim (d1, isl_dim_out);
426 isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n);
427 isl_map *x = isl_map_universe (d2);
429 x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2);
430 x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1);
432 for (i = 0; i < n; i++)
433 if (i != dim1 && i != dim2)
434 x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i);
436 pbb->transformed = isl_map_apply_range (pbb->transformed, x);
439 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
440 the statements below LST. */
442 static void
443 lst_apply_interchange (lst_p lst, int depth1, int depth2)
445 if (!lst)
446 return;
448 if (LST_LOOP_P (lst))
450 int i;
451 lst_p l;
453 FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
454 lst_apply_interchange (l, depth1, depth2);
456 else
457 pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
460 /* Return true when the nest starting at LOOP1 and ending on LOOP2 is
461 perfect: i.e. there are no sequence of statements. */
463 static bool
464 lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
466 if (loop1 == loop2)
467 return true;
469 if (!LST_LOOP_P (loop1))
470 return false;
472 return LST_SEQ (loop1).length () == 1
473 && lst_perfectly_nested_p (LST_SEQ (loop1)[0], loop2);
476 /* Transform the loop nest between LOOP1 and LOOP2 into a perfect
477 nest. To continue the naming tradition, this function is called
478 after perfect_nestify. NEST is set to the perfectly nested loop
479 that is created. BEFORE/AFTER are set to the loops distributed
480 before/after the loop NEST. */
482 static void
483 lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
484 lst_p *nest, lst_p *after)
486 poly_bb_p first, last;
488 gcc_assert (loop1 && loop2
489 && loop1 != loop2
490 && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
492 first = LST_PBB (lst_find_first_pbb (loop2));
493 last = LST_PBB (lst_find_last_pbb (loop2));
495 *before = copy_lst (loop1);
496 *nest = copy_lst (loop1);
497 *after = copy_lst (loop1);
499 lst_remove_all_before_including_pbb (*before, first, false);
500 lst_remove_all_before_including_pbb (*after, last, true);
502 lst_remove_all_before_excluding_pbb (*nest, first, true);
503 lst_remove_all_before_excluding_pbb (*nest, last, false);
505 if (lst_empty_p (*before))
507 free_lst (*before);
508 *before = NULL;
510 if (lst_empty_p (*after))
512 free_lst (*after);
513 *after = NULL;
515 if (lst_empty_p (*nest))
517 free_lst (*nest);
518 *nest = NULL;
522 /* Try to interchange LOOP1 with LOOP2 for all the statements of the
523 body of LOOP2. LOOP1 contains LOOP2. Return true if it did the
524 interchange. */
526 static bool
527 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
529 int depth1 = lst_depth (loop1);
530 int depth2 = lst_depth (loop2);
531 lst_p transformed;
533 lst_p before = NULL, nest = NULL, after = NULL;
535 if (!lst_perfectly_nested_p (loop1, loop2))
536 lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
538 if (!lst_interchange_profitable_p (loop2, depth1, depth2))
539 return false;
541 lst_apply_interchange (loop2, depth1, depth2);
543 /* Sync the transformed LST information and the PBB scatterings
544 before using the scatterings in the data dependence analysis. */
545 if (before || nest || after)
547 transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
548 before, nest, after);
549 lst_update_scattering (transformed);
550 free_lst (transformed);
553 if (graphite_legal_transform (scop))
555 if (dump_file && (dump_flags & TDF_DETAILS))
556 fprintf (dump_file,
557 "Loops at depths %d and %d will be interchanged.\n",
558 depth1, depth2);
560 /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */
561 lst_insert_in_sequence (before, loop1, true);
562 lst_insert_in_sequence (after, loop1, false);
564 if (nest)
566 lst_replace (loop1, nest);
567 free_lst (loop1);
570 return true;
573 /* Undo the transform. */
574 free_lst (before);
575 free_lst (nest);
576 free_lst (after);
577 lst_apply_interchange (loop2, depth2, depth1);
578 return false;
581 /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
582 with the loop OUTER in LST_SEQ (OUTER_FATHER). */
584 static bool
585 lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
586 lst_p inner_father)
588 int inner;
589 lst_p loop1, loop2;
591 gcc_assert (outer_father
592 && LST_LOOP_P (outer_father)
593 && LST_LOOP_P (LST_SEQ (outer_father)[outer])
594 && inner_father
595 && LST_LOOP_P (inner_father));
597 loop1 = LST_SEQ (outer_father)[outer];
599 FOR_EACH_VEC_ELT (LST_SEQ (inner_father), inner, loop2)
600 if (LST_LOOP_P (loop2)
601 && (lst_try_interchange_loops (scop, loop1, loop2)
602 || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
603 return true;
605 return false;
608 /* Interchanges all the loops of LOOP and the loops of its body that
609 are considered profitable to interchange. Return the number of
610 interchanged loops. OUTER is the index in LST_SEQ (LOOP) that
611 points to the next outer loop to be considered for interchange. */
613 static int
614 lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
616 lst_p l;
617 int res = 0;
618 int i = 0;
619 lst_p father;
621 if (!loop || !LST_LOOP_P (loop))
622 return 0;
624 father = LST_LOOP_FATHER (loop);
625 if (father)
627 while (lst_interchange_select_inner (scop, father, outer, loop))
629 res++;
630 loop = LST_SEQ (father)[outer];
634 if (LST_LOOP_P (loop))
635 FOR_EACH_VEC_ELT (LST_SEQ (loop), i, l)
636 if (LST_LOOP_P (l))
637 res += lst_interchange_select_outer (scop, l, i);
639 return res;
642 /* Interchanges all the loop depths that are considered profitable for
643 SCOP. Return the number of interchanged loops. */
646 scop_do_interchange (scop_p scop)
648 int res = lst_interchange_select_outer
649 (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
651 lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
653 return res;
657 #endif