EnumSet*.class: Regenerate
[official-gcc.git] / gcc / tree-loop-linear.c
blob3fb4d05dbe1d2c1ac0e3542d69dd25225f8153e2
1 /* Linear Loop transforms
2 Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-pass.h"
43 #include "lambda.h"
45 /* Linear loop transforms include any composition of interchange,
46 scaling, skewing, and reversal. They are used to change the
47 iteration order of loop nests in order to optimize data locality of
48 traversals, or remove dependences that prevent
49 parallelization/vectorization/etc.
51 TODO: Determine reuse vectors/matrix and use it to determine optimal
52 transform matrix for locality purposes.
53 TODO: Completion of partial transforms. */
55 /* Gather statistics for loop interchange. LOOP is the loop being
56 considered. The first loop in the considered loop nest is
57 FIRST_LOOP, and consequently, the index of the considered loop is
58 obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
60 Initializes:
61 - DEPENDENCE_STEPS the sum of all the data dependence distances
62 carried by loop LOOP,
64 - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
65 for which the loop LOOP is not carrying any dependence,
67 - ACCESS_STRIDES the sum of all the strides in LOOP.
69 Example: for the following loop,
71 | loop_1 runs 1335 times
72 | loop_2 runs 1335 times
73 | A[{{0, +, 1}_1, +, 1335}_2]
74 | B[{{0, +, 1}_1, +, 1335}_2]
75 | endloop_2
76 | A[{0, +, 1336}_1]
77 | endloop_1
79 gather_interchange_stats (in loop_1) will return
80 DEPENDENCE_STEPS = 3002
81 NB_DEPS_NOT_CARRIED_BY_LOOP = 5
82 ACCESS_STRIDES = 10694
84 gather_interchange_stats (in loop_2) will return
85 DEPENDENCE_STEPS = 3000
86 NB_DEPS_NOT_CARRIED_BY_LOOP = 7
87 ACCESS_STRIDES = 8010
90 static void
91 gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations,
92 VEC (data_reference_p, heap) *datarefs,
93 struct loop *loop,
94 struct loop *first_loop,
95 unsigned int *dependence_steps,
96 unsigned int *nb_deps_not_carried_by_loop,
97 double_int *access_strides)
99 unsigned int i, j;
100 struct data_dependence_relation *ddr;
101 struct data_reference *dr;
103 *dependence_steps = 0;
104 *nb_deps_not_carried_by_loop = 0;
105 *access_strides = double_int_zero;
107 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
109 /* If we don't know anything about this dependence, or the distance
110 vector is NULL, or there is no dependence, then there is no reuse of
111 data. */
112 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
113 || DDR_ARE_DEPENDENT (ddr) == chrec_known
114 || DDR_NUM_DIST_VECTS (ddr) == 0)
115 continue;
117 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
119 int dist = DDR_DIST_VECT (ddr, j)[loop_depth (loop) - loop_depth (first_loop)];
121 if (dist == 0)
122 (*nb_deps_not_carried_by_loop) += 1;
124 else if (dist < 0)
125 (*dependence_steps) += -dist;
127 else
128 (*dependence_steps) += dist;
132 /* Compute the access strides. */
133 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
135 unsigned int it;
136 tree ref = DR_REF (dr);
137 tree stmt = DR_STMT (dr);
138 struct loop *stmt_loop = loop_containing_stmt (stmt);
139 struct loop *inner_loop = first_loop->inner;
141 if (inner_loop != stmt_loop
142 && !flow_loop_nested_p (inner_loop, stmt_loop))
143 continue;
145 for (it = 0; it < DR_NUM_DIMENSIONS (dr);
146 it++, ref = TREE_OPERAND (ref, 0))
148 tree chrec = DR_ACCESS_FN (dr, it);
149 tree tstride = evolution_part_in_loop_num (chrec, loop->num);
150 tree array_size = TYPE_SIZE (TREE_TYPE (ref));
151 double_int dstride;
153 if (tstride == NULL_TREE
154 || array_size == NULL_TREE
155 || TREE_CODE (tstride) != INTEGER_CST
156 || TREE_CODE (array_size) != INTEGER_CST)
157 continue;
159 dstride = double_int_mul (tree_to_double_int (array_size),
160 tree_to_double_int (tstride));
161 (*access_strides) = double_int_add (*access_strides, dstride);
166 /* Attempt to apply interchange transformations to TRANS to maximize the
167 spatial and temporal locality of the loop.
168 Returns the new transform matrix. The smaller the reuse vector
169 distances in the inner loops, the fewer the cache misses.
170 FIRST_LOOP is the loop->num of the first loop in the analyzed loop
171 nest. */
174 static lambda_trans_matrix
175 try_interchange_loops (lambda_trans_matrix trans,
176 unsigned int depth,
177 VEC (ddr_p, heap) *dependence_relations,
178 VEC (data_reference_p, heap) *datarefs,
179 struct loop *first_loop)
181 struct loop *loop_i;
182 struct loop *loop_j;
183 unsigned int dependence_steps_i, dependence_steps_j;
184 double_int access_strides_i, access_strides_j;
185 unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
186 struct data_dependence_relation *ddr;
188 if (VEC_length (ddr_p, dependence_relations) == 0)
189 return trans;
191 /* When there is an unknown relation in the dependence_relations, we
192 know that it is no worth looking at this loop nest: give up. */
193 ddr = VEC_index (ddr_p, dependence_relations, 0);
194 if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
195 return trans;
197 /* LOOP_I is always the outer loop. */
198 for (loop_j = first_loop->inner;
199 loop_j;
200 loop_j = loop_j->inner)
201 for (loop_i = first_loop;
202 loop_depth (loop_i) < loop_depth (loop_j);
203 loop_i = loop_i->inner)
205 gather_interchange_stats (dependence_relations, datarefs,
206 loop_i, first_loop,
207 &dependence_steps_i,
208 &nb_deps_not_carried_by_i,
209 &access_strides_i);
210 gather_interchange_stats (dependence_relations, datarefs,
211 loop_j, first_loop,
212 &dependence_steps_j,
213 &nb_deps_not_carried_by_j,
214 &access_strides_j);
216 /* Heuristics for loop interchange profitability:
218 1. (spatial locality) Inner loops should have smallest
219 dependence steps.
221 2. (spatial locality) Inner loops should contain more
222 dependence relations not carried by the loop.
224 3. (temporal locality) Inner loops should have smallest
225 array access strides.
227 if (dependence_steps_i < dependence_steps_j
228 || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
229 || double_int_ucmp (access_strides_i, access_strides_j) < 0)
231 lambda_matrix_row_exchange (LTM_MATRIX (trans),
232 loop_depth (loop_i) - loop_depth (first_loop),
233 loop_depth (loop_j) - loop_depth (first_loop));
234 /* Validate the resulting matrix. When the transformation
235 is not valid, reverse to the previous transformation. */
236 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
237 lambda_matrix_row_exchange (LTM_MATRIX (trans),
238 loop_depth (loop_i) - loop_depth (first_loop),
239 loop_depth (loop_j) - loop_depth (first_loop));
243 return trans;
246 /* Perform a set of linear transforms on loops. */
248 void
249 linear_transform_loops (void)
251 bool modified = false;
252 loop_iterator li;
253 VEC(tree,heap) *oldivs = NULL;
254 VEC(tree,heap) *invariants = NULL;
255 struct loop *loop_nest;
257 FOR_EACH_LOOP (li, loop_nest, 0)
259 unsigned int depth = 0;
260 VEC (ddr_p, heap) *dependence_relations;
261 VEC (data_reference_p, heap) *datarefs;
262 struct loop *temp;
263 lambda_loopnest before, after;
264 lambda_trans_matrix trans;
265 bool problem = false;
266 /* If it's not a loop nest, we don't want it.
267 We also don't handle sibling loops properly,
268 which are loops of the following form:
269 for (i = 0; i < 50; i++)
271 for (j = 0; j < 50; j++)
275 for (j = 0; j < 50; j++)
279 } */
280 if (!loop_nest->inner || !single_exit (loop_nest))
281 continue;
282 VEC_truncate (tree, oldivs, 0);
283 VEC_truncate (tree, invariants, 0);
284 depth = 1;
285 for (temp = loop_nest->inner; temp; temp = temp->inner)
287 /* If we have a sibling loop or multiple exit edges, jump ship. */
288 if (temp->next || !single_exit (temp))
290 problem = true;
291 break;
293 depth ++;
295 if (problem)
296 continue;
298 /* Analyze data references and dependence relations using scev. */
300 datarefs = VEC_alloc (data_reference_p, heap, 10);
301 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
302 compute_data_dependences_for_loop (loop_nest, true, &datarefs,
303 &dependence_relations);
305 if (dump_file && (dump_flags & TDF_DETAILS))
306 dump_ddrs (dump_file, dependence_relations);
308 /* Build the transformation matrix. */
309 trans = lambda_trans_matrix_new (depth, depth);
310 lambda_matrix_id (LTM_MATRIX (trans), depth);
311 trans = try_interchange_loops (trans, depth, dependence_relations,
312 datarefs, loop_nest);
314 if (lambda_trans_matrix_id_p (trans))
316 if (dump_file)
317 fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
318 goto free_and_continue;
321 /* Check whether the transformation is legal. */
322 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
324 if (dump_file)
325 fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
326 goto free_and_continue;
329 before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
330 &invariants);
332 if (!before)
333 goto free_and_continue;
335 if (dump_file)
337 fprintf (dump_file, "Before:\n");
338 print_lambda_loopnest (dump_file, before, 'i');
341 after = lambda_loopnest_transform (before, trans);
343 if (dump_file)
345 fprintf (dump_file, "After:\n");
346 print_lambda_loopnest (dump_file, after, 'u');
349 lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
350 after, trans);
351 modified = true;
353 if (dump_file)
354 fprintf (dump_file, "Successfully transformed loop.\n");
356 free_and_continue:
357 free_dependence_relations (dependence_relations);
358 free_data_refs (datarefs);
361 VEC_free (tree, heap, oldivs);
362 VEC_free (tree, heap, invariants);
363 scev_reset ();
365 if (modified)
366 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);