1 /* Linear Loop transforms
2 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dberlin@dberlin.org>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
28 #include "basic-block.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
34 #include "tree-chrec.h"
35 #include "tree-data-ref.h"
36 #include "tree-scalar-evolution.h"
37 #include "tree-pass.h"
40 /* Linear loop transforms include any composition of interchange,
41 scaling, skewing, and reversal. They are used to change the
42 iteration order of loop nests in order to optimize data locality of
43 traversals, or remove dependences that prevent
44 parallelization/vectorization/etc.
46 TODO: Determine reuse vectors/matrix and use it to determine optimal
47 transform matrix for locality purposes.
48 TODO: Completion of partial transforms. */
50 /* Gather statistics for loop interchange. LOOP is the loop being
51 considered. The first loop in the considered loop nest is
52 FIRST_LOOP, and consequently, the index of the considered loop is
53 obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
56 - DEPENDENCE_STEPS the sum of all the data dependence distances
59 - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
60 for which the loop LOOP is not carrying any dependence,
62 - ACCESS_STRIDES the sum of all the strides in LOOP.
64 Example: for the following loop,
66 | loop_1 runs 1335 times
67 | loop_2 runs 1335 times
68 | A[{{0, +, 1}_1, +, 1335}_2]
69 | B[{{0, +, 1}_1, +, 1335}_2]
74 gather_interchange_stats (in loop_1) will return
75 DEPENDENCE_STEPS = 3002
76 NB_DEPS_NOT_CARRIED_BY_LOOP = 5
77 ACCESS_STRIDES = 10694
79 gather_interchange_stats (in loop_2) will return
80 DEPENDENCE_STEPS = 3000
81 NB_DEPS_NOT_CARRIED_BY_LOOP = 7
86 gather_interchange_stats (VEC (ddr_p
, heap
) *dependence_relations ATTRIBUTE_UNUSED
,
87 VEC (data_reference_p
, heap
) *datarefs ATTRIBUTE_UNUSED
,
88 struct loop
*loop ATTRIBUTE_UNUSED
,
89 struct loop
*first_loop ATTRIBUTE_UNUSED
,
90 unsigned int *dependence_steps ATTRIBUTE_UNUSED
,
91 unsigned int *nb_deps_not_carried_by_loop ATTRIBUTE_UNUSED
,
92 double_int
*access_strides ATTRIBUTE_UNUSED
)
95 struct data_dependence_relation
*ddr
;
96 struct data_reference
*dr
;
98 *dependence_steps
= 0;
99 *nb_deps_not_carried_by_loop
= 0;
100 *access_strides
= double_int_zero
;
102 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
104 /* If we don't know anything about this dependence, or the distance
105 vector is NULL, or there is no dependence, then there is no reuse of
107 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
108 || DDR_ARE_DEPENDENT (ddr
) == chrec_known
109 || DDR_NUM_DIST_VECTS (ddr
) == 0)
112 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
114 int dist
= DDR_DIST_VECT (ddr
, j
)[loop_depth (loop
) - loop_depth (first_loop
)];
117 (*nb_deps_not_carried_by_loop
) += 1;
120 (*dependence_steps
) += -dist
;
123 (*dependence_steps
) += dist
;
127 /* Compute the access strides. */
128 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
131 tree ref
= DR_REF (dr
);
132 gimple stmt
= DR_STMT (dr
);
133 struct loop
*stmt_loop
= loop_containing_stmt (stmt
);
134 struct loop
*inner_loop
= first_loop
->inner
;
136 if (inner_loop
!= stmt_loop
137 && !flow_loop_nested_p (inner_loop
, stmt_loop
))
140 for (it
= 0; it
< DR_NUM_DIMENSIONS (dr
);
141 it
++, ref
= TREE_OPERAND (ref
, 0))
143 int num
= am_vector_index_for_loop (DR_ACCESS_MATRIX (dr
), loop
->num
);
144 int istride
= AM_GET_ACCESS_MATRIX_ELEMENT (DR_ACCESS_MATRIX (dr
), it
, num
);
145 tree array_size
= TYPE_SIZE (TREE_TYPE (ref
));
148 if (array_size
== NULL_TREE
149 || TREE_CODE (array_size
) != INTEGER_CST
)
152 dstride
= double_int_mul (tree_to_double_int (array_size
),
153 shwi_to_double_int (istride
));
154 (*access_strides
) = double_int_add (*access_strides
, dstride
);
159 /* Attempt to apply interchange transformations to TRANS to maximize the
160 spatial and temporal locality of the loop.
161 Returns the new transform matrix. The smaller the reuse vector
162 distances in the inner loops, the fewer the cache misses.
163 FIRST_LOOP is the loop->num of the first loop in the analyzed loop
167 static lambda_trans_matrix
168 try_interchange_loops (lambda_trans_matrix trans
,
170 VEC (ddr_p
, heap
) *dependence_relations
,
171 VEC (data_reference_p
, heap
) *datarefs
,
172 struct loop
*first_loop
)
177 unsigned int dependence_steps_i
, dependence_steps_j
;
178 double_int access_strides_i
, access_strides_j
;
179 double_int small
, large
, nb_iter
;
180 double_int l1_cache_size
, l2_cache_size
;
182 unsigned int nb_deps_not_carried_by_i
, nb_deps_not_carried_by_j
;
183 struct data_dependence_relation
*ddr
;
185 if (VEC_length (ddr_p
, dependence_relations
) == 0)
188 /* When there is an unknown relation in the dependence_relations, we
189 know that it is no worth looking at this loop nest: give up. */
190 ddr
= VEC_index (ddr_p
, dependence_relations
, 0);
191 if (ddr
== NULL
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
194 l1_cache_size
= uhwi_to_double_int (L1_CACHE_SIZE
* 1024);
195 l2_cache_size
= uhwi_to_double_int (L2_CACHE_SIZE
* 1024);
197 /* LOOP_I is always the outer loop. */
198 for (loop_j
= first_loop
->inner
;
200 loop_j
= loop_j
->inner
)
201 for (loop_i
= first_loop
;
202 loop_depth (loop_i
) < loop_depth (loop_j
);
203 loop_i
= loop_i
->inner
)
205 gather_interchange_stats (dependence_relations
, datarefs
,
208 &nb_deps_not_carried_by_i
,
210 gather_interchange_stats (dependence_relations
, datarefs
,
213 &nb_deps_not_carried_by_j
,
216 /* Heuristics for loop interchange profitability:
218 0. Don't transform if the smallest stride is larger than
219 the L2 cache, or if the largest stride multiplied by the
220 number of iterations is smaller than the L1 cache.
222 1. (spatial locality) Inner loops should have smallest
225 2. (spatial locality) Inner loops should contain more
226 dependence relations not carried by the loop.
228 3. (temporal locality) Inner loops should have smallest
229 array access strides.
232 cmp
= double_int_ucmp (access_strides_i
, access_strides_j
);
233 small
= cmp
< 0 ? access_strides_i
: access_strides_j
;
234 large
= cmp
< 0 ? access_strides_j
: access_strides_i
;
236 if (double_int_ucmp (small
, l2_cache_size
) > 0)
240 estimated_loop_iterations (loop_j
, false, &nb_iter
):
241 estimated_loop_iterations (loop_i
, false, &nb_iter
);
244 && double_int_ucmp (double_int_mul (large
, nb_iter
),
248 if (dependence_steps_i
< dependence_steps_j
249 || nb_deps_not_carried_by_i
> nb_deps_not_carried_by_j
252 lambda_matrix_row_exchange (LTM_MATRIX (trans
),
253 loop_depth (loop_i
) - loop_depth (first_loop
),
254 loop_depth (loop_j
) - loop_depth (first_loop
));
255 /* Validate the resulting matrix. When the transformation
256 is not valid, reverse to the previous transformation. */
257 if (!lambda_transform_legal_p (trans
, depth
, dependence_relations
))
258 lambda_matrix_row_exchange (LTM_MATRIX (trans
),
259 loop_depth (loop_i
) - loop_depth (first_loop
),
260 loop_depth (loop_j
) - loop_depth (first_loop
));
267 /* Return the number of nested loops in LOOP_NEST, or 0 if the loops
268 are not perfectly nested. */
271 perfect_loop_nest_depth (struct loop
*loop_nest
)
274 unsigned int depth
= 1;
276 /* If it's not a loop nest, we don't want it. We also don't handle
277 sibling loops properly, which are loops of the following form:
279 | for (i = 0; i < 50; i++)
281 | for (j = 0; j < 50; j++)
285 | for (j = 0; j < 50; j++)
292 if (!loop_nest
->inner
|| !single_exit (loop_nest
))
295 for (temp
= loop_nest
->inner
; temp
; temp
= temp
->inner
)
297 /* If we have a sibling loop or multiple exit edges, jump ship. */
298 if (temp
->next
|| !single_exit (temp
))
307 /* Perform a set of linear transforms on loops. */
310 linear_transform_loops (void)
312 bool modified
= false;
314 VEC(tree
,heap
) *oldivs
= NULL
;
315 VEC(tree
,heap
) *invariants
= NULL
;
316 VEC(tree
,heap
) *lambda_parameters
= NULL
;
317 VEC(gimple
,heap
) *remove_ivs
= VEC_alloc (gimple
, heap
, 3);
318 struct loop
*loop_nest
;
322 FOR_EACH_LOOP (li
, loop_nest
, 0)
324 unsigned int depth
= 0;
325 VEC (ddr_p
, heap
) *dependence_relations
;
326 VEC (data_reference_p
, heap
) *datarefs
;
328 lambda_loopnest before
, after
;
329 lambda_trans_matrix trans
;
330 struct obstack lambda_obstack
;
332 VEC(loop_p
,heap
) *nest
;
334 depth
= perfect_loop_nest_depth (loop_nest
);
338 nest
= VEC_alloc (loop_p
, heap
, 3);
339 for (loop
= loop_nest
; loop
; loop
= loop
->inner
)
340 VEC_safe_push (loop_p
, heap
, nest
, loop
);
342 gcc_obstack_init (&lambda_obstack
);
343 VEC_truncate (tree
, oldivs
, 0);
344 VEC_truncate (tree
, invariants
, 0);
345 VEC_truncate (tree
, lambda_parameters
, 0);
347 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
348 dependence_relations
= VEC_alloc (ddr_p
, heap
, 10 * 10);
349 if (!compute_data_dependences_for_loop (loop_nest
, true, &datarefs
,
350 &dependence_relations
))
351 goto free_and_continue
;
353 lambda_collect_parameters (datarefs
, &lambda_parameters
);
354 if (!lambda_compute_access_matrices (datarefs
, lambda_parameters
,
355 nest
, &lambda_obstack
))
356 goto free_and_continue
;
358 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
359 dump_ddrs (dump_file
, dependence_relations
);
361 /* Build the transformation matrix. */
362 trans
= lambda_trans_matrix_new (depth
, depth
, &lambda_obstack
);
363 lambda_matrix_id (LTM_MATRIX (trans
), depth
);
364 trans
= try_interchange_loops (trans
, depth
, dependence_relations
,
365 datarefs
, loop_nest
);
367 if (lambda_trans_matrix_id_p (trans
))
370 fprintf (dump_file
, "Won't transform loop. Optimal transform is the identity transform\n");
371 goto free_and_continue
;
374 /* Check whether the transformation is legal. */
375 if (!lambda_transform_legal_p (trans
, depth
, dependence_relations
))
378 fprintf (dump_file
, "Can't transform loop, transform is illegal:\n");
379 goto free_and_continue
;
382 before
= gcc_loopnest_to_lambda_loopnest (loop_nest
, &oldivs
,
383 &invariants
, &lambda_obstack
);
386 goto free_and_continue
;
390 fprintf (dump_file
, "Before:\n");
391 print_lambda_loopnest (dump_file
, before
, 'i');
394 after
= lambda_loopnest_transform (before
, trans
, &lambda_obstack
);
398 fprintf (dump_file
, "After:\n");
399 print_lambda_loopnest (dump_file
, after
, 'u');
402 lambda_loopnest_to_gcc_loopnest (loop_nest
, oldivs
, invariants
,
404 after
, trans
, &lambda_obstack
);
408 fprintf (dump_file
, "Successfully transformed loop.\n");
411 obstack_free (&lambda_obstack
, NULL
);
412 free_dependence_relations (dependence_relations
);
413 free_data_refs (datarefs
);
414 VEC_free (loop_p
, heap
, nest
);
417 FOR_EACH_VEC_ELT (gimple
, remove_ivs
, i
, oldiv_stmt
)
418 remove_iv (oldiv_stmt
);
420 VEC_free (tree
, heap
, oldivs
);
421 VEC_free (tree
, heap
, invariants
);
422 VEC_free (gimple
, heap
, remove_ivs
);
426 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa_full_phi
);