Mark as release
[official-gcc.git] / gcc / tree-loop-linear.c
blobd27fa2aa375ff2ed2e2d09e6072f48414d44e165
1 /* Linear Loop transforms
2 Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-pass.h"
43 #include "lambda.h"
45 /* Linear loop transforms include any composition of interchange,
46 scaling, skewing, and reversal. They are used to change the
47 iteration order of loop nests in order to optimize data locality of
48 traversals, or remove dependences that prevent
49 parallelization/vectorization/etc.
51 TODO: Determine reuse vectors/matrix and use it to determine optimal
52 transform matrix for locality purposes.
53 TODO: Completion of partial transforms. */
55 /* Gather statistics for loop interchange. LOOP is the loop being
56 considered. The first loop in the considered loop nest is
57 FIRST_LOOP, and consequently, the index of the considered loop is
58 obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
60 Initializes:
61 - DEPENDENCE_STEPS the sum of all the data dependence distances
62 carried by loop LOOP,
64 - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
65 for which the loop LOOP is not carrying any dependence,
67 - ACCESS_STRIDES the sum of all the strides in LOOP.
69 Example: for the following loop,
71 | loop_1 runs 1335 times
72 | loop_2 runs 1335 times
73 | A[{{0, +, 1}_1, +, 1335}_2]
74 | B[{{0, +, 1}_1, +, 1335}_2]
75 | endloop_2
76 | A[{0, +, 1336}_1]
77 | endloop_1
79 gather_interchange_stats (in loop_1) will return
80 DEPENDENCE_STEPS = 3002
81 NB_DEPS_NOT_CARRIED_BY_LOOP = 5
82 ACCESS_STRIDES = 10694
84 gather_interchange_stats (in loop_2) will return
85 DEPENDENCE_STEPS = 3000
86 NB_DEPS_NOT_CARRIED_BY_LOOP = 7
87 ACCESS_STRIDES = 8010
90 static void
91 gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations,
92 VEC (data_reference_p, heap) *datarefs,
93 struct loop *loop,
94 struct loop *first_loop,
95 unsigned int *dependence_steps,
96 unsigned int *nb_deps_not_carried_by_loop,
97 unsigned int *access_strides)
99 unsigned int i, j;
100 struct data_dependence_relation *ddr;
101 struct data_reference *dr;
103 *dependence_steps = 0;
104 *nb_deps_not_carried_by_loop = 0;
105 *access_strides = 0;
107 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
109 /* If we don't know anything about this dependence, or the distance
110 vector is NULL, or there is no dependence, then there is no reuse of
111 data. */
112 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
113 || DDR_ARE_DEPENDENT (ddr) == chrec_known
114 || DDR_NUM_DIST_VECTS (ddr) == 0)
115 continue;
117 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
119 int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth];
121 if (dist == 0)
122 (*nb_deps_not_carried_by_loop) += 1;
124 else if (dist < 0)
125 (*dependence_steps) += -dist;
127 else
128 (*dependence_steps) += dist;
132 /* Compute the access strides. */
133 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
135 unsigned int it;
136 tree stmt = DR_STMT (dr);
137 struct loop *stmt_loop = loop_containing_stmt (stmt);
138 struct loop *inner_loop = first_loop->inner;
140 if (inner_loop != stmt_loop
141 && !flow_loop_nested_p (inner_loop, stmt_loop))
142 continue;
143 for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
145 tree chrec = DR_ACCESS_FN (dr, it);
146 tree tstride = evolution_part_in_loop_num
147 (chrec, loop->num);
149 if (tstride == NULL_TREE
150 || TREE_CODE (tstride) != INTEGER_CST)
151 continue;
153 (*access_strides) += int_cst_value (tstride);
158 /* Attempt to apply interchange transformations to TRANS to maximize the
159 spatial and temporal locality of the loop.
160 Returns the new transform matrix. The smaller the reuse vector
161 distances in the inner loops, the fewer the cache misses.
162 FIRST_LOOP is the loop->num of the first loop in the analyzed loop
163 nest. */
166 static lambda_trans_matrix
167 try_interchange_loops (lambda_trans_matrix trans,
168 unsigned int depth,
169 VEC (ddr_p, heap) *dependence_relations,
170 VEC (data_reference_p, heap) *datarefs,
171 struct loop *first_loop)
173 struct loop *loop_i;
174 struct loop *loop_j;
175 unsigned int dependence_steps_i, dependence_steps_j;
176 unsigned int access_strides_i, access_strides_j;
177 unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
178 struct data_dependence_relation *ddr;
180 if (VEC_length (ddr_p, dependence_relations) == 0)
181 return trans;
183 /* When there is an unknown relation in the dependence_relations, we
184 know that it is no worth looking at this loop nest: give up. */
185 ddr = VEC_index (ddr_p, dependence_relations, 0);
186 if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
187 return trans;
189 /* LOOP_I is always the outer loop. */
190 for (loop_j = first_loop->inner;
191 loop_j;
192 loop_j = loop_j->inner)
193 for (loop_i = first_loop;
194 loop_i->depth < loop_j->depth;
195 loop_i = loop_i->inner)
197 gather_interchange_stats (dependence_relations, datarefs,
198 loop_i, first_loop,
199 &dependence_steps_i,
200 &nb_deps_not_carried_by_i,
201 &access_strides_i);
202 gather_interchange_stats (dependence_relations, datarefs,
203 loop_j, first_loop,
204 &dependence_steps_j,
205 &nb_deps_not_carried_by_j,
206 &access_strides_j);
208 /* Heuristics for loop interchange profitability:
210 1. (spatial locality) Inner loops should have smallest
211 dependence steps.
213 2. (spatial locality) Inner loops should contain more
214 dependence relations not carried by the loop.
216 3. (temporal locality) Inner loops should have smallest
217 array access strides.
219 if (dependence_steps_i < dependence_steps_j
220 || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
221 || access_strides_i < access_strides_j)
223 lambda_matrix_row_exchange (LTM_MATRIX (trans),
224 loop_i->depth - first_loop->depth,
225 loop_j->depth - first_loop->depth);
226 /* Validate the resulting matrix. When the transformation
227 is not valid, reverse to the previous transformation. */
228 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
229 lambda_matrix_row_exchange (LTM_MATRIX (trans),
230 loop_i->depth - first_loop->depth,
231 loop_j->depth - first_loop->depth);
235 return trans;
238 /* Perform a set of linear transforms on LOOPS. */
240 void
241 linear_transform_loops (struct loops *loops)
243 bool modified = false;
244 unsigned int i;
245 VEC(tree,heap) *oldivs = NULL;
246 VEC(tree,heap) *invariants = NULL;
248 for (i = 1; i < loops->num; i++)
250 unsigned int depth = 0;
251 VEC (ddr_p, heap) *dependence_relations;
252 VEC (data_reference_p, heap) *datarefs;
253 struct loop *loop_nest = loops->parray[i];
254 struct loop *temp;
255 lambda_loopnest before, after;
256 lambda_trans_matrix trans;
257 bool problem = false;
258 /* If it's not a loop nest, we don't want it.
259 We also don't handle sibling loops properly,
260 which are loops of the following form:
261 for (i = 0; i < 50; i++)
263 for (j = 0; j < 50; j++)
267 for (j = 0; j < 50; j++)
271 } */
272 if (!loop_nest || !loop_nest->inner || !loop_nest->single_exit)
273 continue;
274 VEC_truncate (tree, oldivs, 0);
275 VEC_truncate (tree, invariants, 0);
276 depth = 1;
277 for (temp = loop_nest->inner; temp; temp = temp->inner)
279 /* If we have a sibling loop or multiple exit edges, jump ship. */
280 if (temp->next || !temp->single_exit)
282 problem = true;
283 break;
285 depth ++;
287 if (problem)
288 continue;
290 /* Analyze data references and dependence relations using scev. */
292 datarefs = VEC_alloc (data_reference_p, heap, 10);
293 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
294 compute_data_dependences_for_loop (loop_nest, true, &datarefs,
295 &dependence_relations);
297 if (dump_file && (dump_flags & TDF_DETAILS))
298 dump_ddrs (dump_file, dependence_relations);
300 /* Build the transformation matrix. */
301 trans = lambda_trans_matrix_new (depth, depth);
302 lambda_matrix_id (LTM_MATRIX (trans), depth);
303 trans = try_interchange_loops (trans, depth, dependence_relations,
304 datarefs, loop_nest);
306 if (lambda_trans_matrix_id_p (trans))
308 if (dump_file)
309 fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
310 goto free_and_continue;
313 /* Check whether the transformation is legal. */
314 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
316 if (dump_file)
317 fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
318 goto free_and_continue;
321 before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs,
322 &invariants);
324 if (!before)
325 goto free_and_continue;
327 if (dump_file)
329 fprintf (dump_file, "Before:\n");
330 print_lambda_loopnest (dump_file, before, 'i');
333 after = lambda_loopnest_transform (before, trans);
335 if (dump_file)
337 fprintf (dump_file, "After:\n");
338 print_lambda_loopnest (dump_file, after, 'u');
341 lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
342 after, trans);
343 modified = true;
345 if (dump_file)
346 fprintf (dump_file, "Successfully transformed loop.\n");
348 free_and_continue:
349 free_dependence_relations (dependence_relations);
350 free_data_refs (datarefs);
353 VEC_free (tree, heap, oldivs);
354 VEC_free (tree, heap, invariants);
355 scev_reset ();
357 if (modified)
358 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);