1 /* Linear Loop transforms
2 Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-pass.h"
45 /* Linear loop transforms include any composition of interchange,
46 scaling, skewing, and reversal. They are used to change the
47 iteration order of loop nests in order to optimize data locality of
48 traversals, or remove dependences that prevent
49 parallelization/vectorization/etc.
51 TODO: Determine reuse vectors/matrix and use it to determine optimal
52 transform matrix for locality purposes.
53 TODO: Completion of partial transforms. */
55 /* Gather statistics for loop interchange. LOOP is the loop being
56 considered. The first loop in the considered loop nest is
57 FIRST_LOOP, and consequently, the index of the considered loop is
58 obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
61 - DEPENDENCE_STEPS the sum of all the data dependence distances
64 - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
65 for which the loop LOOP is not carrying any dependence,
67 - ACCESS_STRIDES the sum of all the strides in LOOP.
69 Example: for the following loop,
71 | loop_1 runs 1335 times
72 | loop_2 runs 1335 times
73 | A[{{0, +, 1}_1, +, 1335}_2]
74 | B[{{0, +, 1}_1, +, 1335}_2]
79 gather_interchange_stats (in loop_1) will return
80 DEPENDENCE_STEPS = 3002
81 NB_DEPS_NOT_CARRIED_BY_LOOP = 5
82 ACCESS_STRIDES = 10694
84 gather_interchange_stats (in loop_2) will return
85 DEPENDENCE_STEPS = 3000
86 NB_DEPS_NOT_CARRIED_BY_LOOP = 7
91 gather_interchange_stats (VEC (ddr_p
, heap
) *dependence_relations
,
92 VEC (data_reference_p
, heap
) *datarefs
,
94 struct loop
*first_loop
,
95 unsigned int *dependence_steps
,
96 unsigned int *nb_deps_not_carried_by_loop
,
97 unsigned int *access_strides
)
100 struct data_dependence_relation
*ddr
;
101 struct data_reference
*dr
;
103 *dependence_steps
= 0;
104 *nb_deps_not_carried_by_loop
= 0;
107 for (i
= 0; VEC_iterate (ddr_p
, dependence_relations
, i
, ddr
); i
++)
109 /* If we don't know anything about this dependence, or the distance
110 vector is NULL, or there is no dependence, then there is no reuse of
112 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
113 || DDR_ARE_DEPENDENT (ddr
) == chrec_known
114 || DDR_NUM_DIST_VECTS (ddr
) == 0)
117 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
119 int dist
= DDR_DIST_VECT (ddr
, j
)[loop
->depth
- first_loop
->depth
];
122 (*nb_deps_not_carried_by_loop
) += 1;
125 (*dependence_steps
) += -dist
;
128 (*dependence_steps
) += dist
;
132 /* Compute the access strides. */
133 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
136 tree stmt
= DR_STMT (dr
);
137 struct loop
*stmt_loop
= loop_containing_stmt (stmt
);
138 struct loop
*inner_loop
= first_loop
->inner
;
140 if (inner_loop
!= stmt_loop
141 && !flow_loop_nested_p (inner_loop
, stmt_loop
))
143 for (it
= 0; it
< DR_NUM_DIMENSIONS (dr
); it
++)
145 tree chrec
= DR_ACCESS_FN (dr
, it
);
146 tree tstride
= evolution_part_in_loop_num
149 if (tstride
== NULL_TREE
150 || TREE_CODE (tstride
) != INTEGER_CST
)
153 (*access_strides
) += int_cst_value (tstride
);
158 /* Attempt to apply interchange transformations to TRANS to maximize the
159 spatial and temporal locality of the loop.
160 Returns the new transform matrix. The smaller the reuse vector
161 distances in the inner loops, the fewer the cache misses.
162 FIRST_LOOP is the loop->num of the first loop in the analyzed loop
166 static lambda_trans_matrix
167 try_interchange_loops (lambda_trans_matrix trans
,
169 VEC (ddr_p
, heap
) *dependence_relations
,
170 VEC (data_reference_p
, heap
) *datarefs
,
171 struct loop
*first_loop
)
175 unsigned int dependence_steps_i
, dependence_steps_j
;
176 unsigned int access_strides_i
, access_strides_j
;
177 unsigned int nb_deps_not_carried_by_i
, nb_deps_not_carried_by_j
;
178 struct data_dependence_relation
*ddr
;
180 if (VEC_length (ddr_p
, dependence_relations
) == 0)
183 /* When there is an unknown relation in the dependence_relations, we
184 know that it is no worth looking at this loop nest: give up. */
185 ddr
= VEC_index (ddr_p
, dependence_relations
, 0);
186 if (ddr
== NULL
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
189 /* LOOP_I is always the outer loop. */
190 for (loop_j
= first_loop
->inner
;
192 loop_j
= loop_j
->inner
)
193 for (loop_i
= first_loop
;
194 loop_i
->depth
< loop_j
->depth
;
195 loop_i
= loop_i
->inner
)
197 gather_interchange_stats (dependence_relations
, datarefs
,
200 &nb_deps_not_carried_by_i
,
202 gather_interchange_stats (dependence_relations
, datarefs
,
205 &nb_deps_not_carried_by_j
,
208 /* Heuristics for loop interchange profitability:
210 1. (spatial locality) Inner loops should have smallest
213 2. (spatial locality) Inner loops should contain more
214 dependence relations not carried by the loop.
216 3. (temporal locality) Inner loops should have smallest
217 array access strides.
219 if (dependence_steps_i
< dependence_steps_j
220 || nb_deps_not_carried_by_i
> nb_deps_not_carried_by_j
221 || access_strides_i
< access_strides_j
)
223 lambda_matrix_row_exchange (LTM_MATRIX (trans
),
224 loop_i
->depth
- first_loop
->depth
,
225 loop_j
->depth
- first_loop
->depth
);
226 /* Validate the resulting matrix. When the transformation
227 is not valid, reverse to the previous transformation. */
228 if (!lambda_transform_legal_p (trans
, depth
, dependence_relations
))
229 lambda_matrix_row_exchange (LTM_MATRIX (trans
),
230 loop_i
->depth
- first_loop
->depth
,
231 loop_j
->depth
- first_loop
->depth
);
238 /* Perform a set of linear transforms on LOOPS. */
241 linear_transform_loops (struct loops
*loops
)
243 bool modified
= false;
245 VEC(tree
,heap
) *oldivs
= NULL
;
246 VEC(tree
,heap
) *invariants
= NULL
;
248 for (i
= 1; i
< loops
->num
; i
++)
250 unsigned int depth
= 0;
251 VEC (ddr_p
, heap
) *dependence_relations
;
252 VEC (data_reference_p
, heap
) *datarefs
;
253 struct loop
*loop_nest
= loops
->parray
[i
];
255 lambda_loopnest before
, after
;
256 lambda_trans_matrix trans
;
257 bool problem
= false;
258 /* If it's not a loop nest, we don't want it.
259 We also don't handle sibling loops properly,
260 which are loops of the following form:
261 for (i = 0; i < 50; i++)
263 for (j = 0; j < 50; j++)
267 for (j = 0; j < 50; j++)
272 if (!loop_nest
|| !loop_nest
->inner
|| !loop_nest
->single_exit
)
274 VEC_truncate (tree
, oldivs
, 0);
275 VEC_truncate (tree
, invariants
, 0);
277 for (temp
= loop_nest
->inner
; temp
; temp
= temp
->inner
)
279 /* If we have a sibling loop or multiple exit edges, jump ship. */
280 if (temp
->next
|| !temp
->single_exit
)
290 /* Analyze data references and dependence relations using scev. */
292 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
293 dependence_relations
= VEC_alloc (ddr_p
, heap
, 10 * 10);
294 compute_data_dependences_for_loop (loop_nest
, true, &datarefs
,
295 &dependence_relations
);
297 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
298 dump_ddrs (dump_file
, dependence_relations
);
300 /* Build the transformation matrix. */
301 trans
= lambda_trans_matrix_new (depth
, depth
);
302 lambda_matrix_id (LTM_MATRIX (trans
), depth
);
303 trans
= try_interchange_loops (trans
, depth
, dependence_relations
,
304 datarefs
, loop_nest
);
306 if (lambda_trans_matrix_id_p (trans
))
309 fprintf (dump_file
, "Won't transform loop. Optimal transform is the identity transform\n");
310 goto free_and_continue
;
313 /* Check whether the transformation is legal. */
314 if (!lambda_transform_legal_p (trans
, depth
, dependence_relations
))
317 fprintf (dump_file
, "Can't transform loop, transform is illegal:\n");
318 goto free_and_continue
;
321 before
= gcc_loopnest_to_lambda_loopnest (loops
, loop_nest
, &oldivs
,
325 goto free_and_continue
;
329 fprintf (dump_file
, "Before:\n");
330 print_lambda_loopnest (dump_file
, before
, 'i');
333 after
= lambda_loopnest_transform (before
, trans
);
337 fprintf (dump_file
, "After:\n");
338 print_lambda_loopnest (dump_file
, after
, 'u');
341 lambda_loopnest_to_gcc_loopnest (loop_nest
, oldivs
, invariants
,
346 fprintf (dump_file
, "Successfully transformed loop.\n");
349 free_dependence_relations (dependence_relations
);
350 free_data_refs (datarefs
);
353 VEC_free (tree
, heap
, oldivs
);
354 VEC_free (tree
, heap
, invariants
);
358 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa_full_phi
);