PR tree-optimization/17468
[official-gcc.git] / gcc / tree-loop-linear.c
blob856867e9b668a10dc6d6c9a35c87dacec426bb42
1 /* Linear Loop transforms
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "errors.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "target.h"
32 #include "rtl.h"
33 #include "basic-block.h"
34 #include "diagnostic.h"
35 #include "tree-flow.h"
36 #include "tree-dump.h"
37 #include "timevar.h"
38 #include "cfgloop.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "tree-chrec.h"
42 #include "tree-data-ref.h"
43 #include "tree-scalar-evolution.h"
44 #include "tree-pass.h"
45 #include "varray.h"
46 #include "lambda.h"
48 /* Linear loop transforms include any composition of interchange,
49 scaling, skewing, and reversal. They are used to change the
50 iteration order of loop nests in order to optimize data locality of
51 traversals, or remove dependences that prevent
52 parallelization/vectorization/etc.
54 TODO: Determine reuse vectors/matrix and use it to determine optimal
55 transform matrix for locality purposes.
56 TODO: Completion of partial transforms. */
58 /* Gather statistics for loop interchange. Initializes SUM the sum of
59 all the data dependence distances carried by loop LOOP_NUMBER.
60 NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of
61 dependence relations for which the loop LOOP_NUMBER is not carrying
62 any dependence. */
64 static void
65 gather_interchange_stats (varray_type dependence_relations,
66 unsigned int loop_number,
67 unsigned int *sum,
68 unsigned int *nb_deps_not_carried_by_loop)
70 unsigned int i;
72 *sum = 0;
73 *nb_deps_not_carried_by_loop = 0;
74 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
76 int dist;
77 struct data_dependence_relation *ddr =
78 (struct data_dependence_relation *)
79 VARRAY_GENERIC_PTR (dependence_relations, i);
81 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
83 /* Some constants will need tweaking, but not something that should
84 be user-accessible. Thus, no --param. */
85 *sum += 100;
86 continue;
89 /* When we know that there is no dependence, we know that there
90 is no reuse of the data. */
91 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
93 /* Ditto on the no --param here */
94 *sum += 1000;
95 continue;
98 dist = DDR_DIST_VECT (ddr)[loop_number];
99 if (dist == 0)
100 *nb_deps_not_carried_by_loop++;
101 else if (dist < 0)
102 *sum += -dist;
103 else
104 *sum += dist;
108 /* Apply to TRANS any loop interchange that minimize inner loop steps.
109 DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array
110 of dependence relations.
111 Returns the new transform matrix. The smaller the reuse vector
112 distances in the inner loops, the fewer the cache misses. */
114 static lambda_trans_matrix
115 try_interchange_loops (lambda_trans_matrix trans,
116 unsigned int depth,
117 varray_type dependence_relations)
119 unsigned int loop_i, loop_j;
120 unsigned int steps_i, steps_j;
121 unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
122 struct data_dependence_relation *ddr;
124 /* When there is an unknown relation in the dependence_relations, we
125 know that it is no worth looking at this loop nest: give up. */
126 ddr = (struct data_dependence_relation *)
127 VARRAY_GENERIC_PTR (dependence_relations, 0);
128 if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
129 return trans;
131 /* LOOP_I is always the outer loop. */
132 for (loop_j = 1; loop_j < depth; loop_j++)
133 for (loop_i = 0; loop_i < loop_j; loop_i++)
135 gather_interchange_stats (dependence_relations, loop_i, &steps_i,
136 &nb_deps_not_carried_by_i);
137 gather_interchange_stats (dependence_relations, loop_j, &steps_j,
138 &nb_deps_not_carried_by_j);
140 /* Heuristics for loop interchange profitability:
141 1. Inner loops should have smallest steps.
142 2. Inner loops should contain more dependence relations not
143 carried by the loop.
145 if (steps_i < steps_j
146 || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j)
148 lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
150 /* Validate the resulting matrix. When the transformation
151 is not valid, reverse to the previous matrix.
153 FIXME: In this case of transformation it could be
154 faster to verify the validity of the interchange
155 without applying the transform to the matrix. But for
156 the moment do it cleanly: this is just a prototype. */
157 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
158 lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
162 return trans;
165 /* Perform a set of linear transforms on LOOPS. */
167 void
168 linear_transform_loops (struct loops *loops)
170 unsigned int i;
172 for (i = 1; i < loops->num; i++)
174 unsigned int depth = 0;
175 varray_type datarefs;
176 varray_type dependence_relations;
177 struct loop *loop_nest = loops->parray[i];
178 struct loop *temp;
179 VEC (tree) *oldivs;
180 VEC (tree) *invariants;
181 lambda_loopnest before, after;
182 lambda_trans_matrix trans;
183 bool problem = false;
184 /* If it's not a loop nest, we don't want it.
185 We also don't handle sibling loops properly,
186 which are loops of the following form:
187 for (i = 0; i < 50; i++)
189 for (j = 0; j < 50; j++)
193 for (j = 0; j < 50; j++)
197 } */
198 if (!loop_nest->inner)
199 continue;
200 for (temp = loop_nest; temp; temp = temp->inner)
202 flow_loop_scan (temp, LOOP_ALL);
203 /* If we have a sibling loop or multiple exit edges, jump ship. */
204 if (temp->next || temp->num_exits != 1)
206 problem = true;
207 break;
209 depth ++;
211 if (problem)
212 continue;
214 /* Analyze data references and dependence relations using scev. */
216 VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
217 VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
218 "dependence_relations");
221 compute_data_dependences_for_loop (depth, loop_nest,
222 &datarefs, &dependence_relations);
223 if (dump_file && (dump_flags & TDF_DETAILS))
225 unsigned int j;
226 for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
228 struct data_dependence_relation *ddr =
229 (struct data_dependence_relation *)
230 VARRAY_GENERIC_PTR (dependence_relations, j);
232 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
234 fprintf (dump_file, "DISTANCE_V (");
235 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr),
236 loops->num);
237 fprintf (dump_file, ")\n");
238 fprintf (dump_file, "DIRECTION_V (");
239 print_lambda_vector (dump_file, DDR_DIR_VECT (ddr),
240 loops->num);
241 fprintf (dump_file, ")\n");
244 fprintf (dump_file, "\n\n");
246 /* Build the transformation matrix. */
247 trans = lambda_trans_matrix_new (depth, depth);
248 lambda_matrix_id (LTM_MATRIX (trans), depth);
249 trans = try_interchange_loops (trans, depth, dependence_relations);
251 /* Check whether the transformation is legal. */
252 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
254 if (dump_file)
255 fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
256 continue;
258 before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
259 &invariants);
260 if (!before)
261 continue;
263 if (dump_file)
265 fprintf (dump_file, "Before:\n");
266 print_lambda_loopnest (dump_file, before, 'i');
269 after = lambda_loopnest_transform (before, trans);
270 if (dump_file)
272 fprintf (dump_file, "After:\n");
273 print_lambda_loopnest (dump_file, after, 'u');
275 lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
276 after, trans);
277 oldivs = NULL;
278 invariants = NULL;
279 free_dependence_relations (dependence_relations);
280 free_data_refs (datarefs);