2009-07-17 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / cfgloopanal.c
blob36e0d152265fb91a44ea11ce67e6c9ee424215c0
1 /* Natural loop analysis code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "basic-block.h"
29 #include "cfgloop.h"
30 #include "expr.h"
31 #include "output.h"
32 #include "graphds.h"
33 #include "params.h"
35 /* Checks whether BB is executed exactly once in each LOOP iteration. */
37 bool
38 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
40 /* It must be executed at least once each iteration. */
41 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
42 return false;
44 /* And just once. */
45 if (bb->loop_father != loop)
46 return false;
48 /* But this was not enough. We might have some irreducible loop here. */
49 if (bb->flags & BB_IRREDUCIBLE_LOOP)
50 return false;
52 return true;
55 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
56 throw away all latch edges and mark blocks inside any remaining cycle.
57 Everything is a bit complicated due to fact we do not want to do this
58 for parts of cycles that only "pass" through some loop -- i.e. for
59 each cycle, we want to mark blocks that belong directly to innermost
60 loop containing the whole cycle.
62 LOOPS is the loop tree. */
64 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
65 #define BB_REPR(BB) ((BB)->index + 1)
67 bool
68 mark_irreducible_loops (void)
70 basic_block act;
71 struct graph_edge *ge;
72 edge e;
73 edge_iterator ei;
74 int src, dest;
75 unsigned depth;
76 struct graph *g;
77 int num = number_of_loops ();
78 struct loop *cloop;
79 bool irred_loop_found = false;
80 int i;
82 gcc_assert (current_loops != NULL);
84 /* Reset the flags. */
85 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
87 act->flags &= ~BB_IRREDUCIBLE_LOOP;
88 FOR_EACH_EDGE (e, ei, act->succs)
89 e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
92 /* Create the edge lists. */
93 g = new_graph (last_basic_block + num);
95 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
96 FOR_EACH_EDGE (e, ei, act->succs)
98 /* Ignore edges to exit. */
99 if (e->dest == EXIT_BLOCK_PTR)
100 continue;
102 src = BB_REPR (act);
103 dest = BB_REPR (e->dest);
105 /* Ignore latch edges. */
106 if (e->dest->loop_father->header == e->dest
107 && e->dest->loop_father->latch == act)
108 continue;
110 /* Edges inside a single loop should be left where they are. Edges
111 to subloop headers should lead to representative of the subloop,
112 but from the same place.
114 Edges exiting loops should lead from representative
115 of the son of nearest common ancestor of the loops in that
116 act lays. */
118 if (e->dest->loop_father->header == e->dest)
119 dest = LOOP_REPR (e->dest->loop_father);
121 if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
123 depth = 1 + loop_depth (find_common_loop (act->loop_father,
124 e->dest->loop_father));
125 if (depth == loop_depth (act->loop_father))
126 cloop = act->loop_father;
127 else
128 cloop = VEC_index (loop_p, act->loop_father->superloops, depth);
130 src = LOOP_REPR (cloop);
133 add_edge (g, src, dest)->data = e;
136 /* Find the strongly connected components. */
137 graphds_scc (g, NULL);
139 /* Mark the irreducible loops. */
140 for (i = 0; i < g->n_vertices; i++)
141 for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
143 edge real = (edge) ge->data;
144 /* edge E in graph G is irreducible if it connects two vertices in the
145 same scc. */
147 /* All edges should lead from a component with higher number to the
148 one with lower one. */
149 gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
151 if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
152 continue;
154 real->flags |= EDGE_IRREDUCIBLE_LOOP;
155 irred_loop_found = true;
156 if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
157 real->src->flags |= BB_IRREDUCIBLE_LOOP;
160 free_graph (g);
162 loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
163 return irred_loop_found;
166 /* Counts number of insns inside LOOP. */
168 num_loop_insns (const struct loop *loop)
170 basic_block *bbs, bb;
171 unsigned i, ninsns = 0;
172 rtx insn;
174 bbs = get_loop_body (loop);
175 for (i = 0; i < loop->num_nodes; i++)
177 bb = bbs[i];
178 ninsns++;
179 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
180 if (INSN_P (insn))
181 ninsns++;
183 free(bbs);
185 return ninsns;
188 /* Counts number of insns executed on average per iteration LOOP. */
190 average_num_loop_insns (const struct loop *loop)
192 basic_block *bbs, bb;
193 unsigned i, binsns, ninsns, ratio;
194 rtx insn;
196 ninsns = 0;
197 bbs = get_loop_body (loop);
198 for (i = 0; i < loop->num_nodes; i++)
200 bb = bbs[i];
202 binsns = 1;
203 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
204 if (INSN_P (insn))
205 binsns++;
207 ratio = loop->header->frequency == 0
208 ? BB_FREQ_MAX
209 : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
210 ninsns += binsns * ratio;
212 free(bbs);
214 ninsns /= BB_FREQ_MAX;
215 if (!ninsns)
216 ninsns = 1; /* To avoid division by zero. */
218 return ninsns;
221 /* Returns expected number of iterations of LOOP, according to
222 measured or guessed profile. No bounding is done on the
223 value. */
225 gcov_type
226 expected_loop_iterations_unbounded (const struct loop *loop)
228 edge e;
229 edge_iterator ei;
231 if (loop->latch->count || loop->header->count)
233 gcov_type count_in, count_latch, expected;
235 count_in = 0;
236 count_latch = 0;
238 FOR_EACH_EDGE (e, ei, loop->header->preds)
239 if (e->src == loop->latch)
240 count_latch = e->count;
241 else
242 count_in += e->count;
244 if (count_in == 0)
245 expected = count_latch * 2;
246 else
247 expected = (count_latch + count_in - 1) / count_in;
249 return expected;
251 else
253 int freq_in, freq_latch;
255 freq_in = 0;
256 freq_latch = 0;
258 FOR_EACH_EDGE (e, ei, loop->header->preds)
259 if (e->src == loop->latch)
260 freq_latch = EDGE_FREQUENCY (e);
261 else
262 freq_in += EDGE_FREQUENCY (e);
264 if (freq_in == 0)
265 return freq_latch * 2;
267 return (freq_latch + freq_in - 1) / freq_in;
271 /* Returns expected number of LOOP iterations. The returned value is bounded
272 by REG_BR_PROB_BASE. */
274 unsigned
275 expected_loop_iterations (const struct loop *loop)
277 gcov_type expected = expected_loop_iterations_unbounded (loop);
278 return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
281 /* Returns the maximum level of nesting of subloops of LOOP. */
283 unsigned
284 get_loop_level (const struct loop *loop)
286 const struct loop *ploop;
287 unsigned mx = 0, l;
289 for (ploop = loop->inner; ploop; ploop = ploop->next)
291 l = get_loop_level (ploop);
292 if (l >= mx)
293 mx = l + 1;
295 return mx;
298 /* Returns estimate on cost of computing SEQ. */
300 static unsigned
301 seq_cost (const_rtx seq, bool speed)
303 unsigned cost = 0;
304 rtx set;
306 for (; seq; seq = NEXT_INSN (seq))
308 set = single_set (seq);
309 if (set)
310 cost += rtx_cost (set, SET, speed);
311 else
312 cost++;
315 return cost;
318 /* The properties of the target. */
320 unsigned target_avail_regs; /* Number of available registers. */
321 unsigned target_res_regs; /* Number of registers reserved for temporary
322 expressions. */
323 unsigned target_reg_cost[2]; /* The cost for register when there still
324 is some reserve, but we are approaching
325 the number of available registers. */
326 unsigned target_spill_cost[2]; /* The cost for register when we need
327 to spill. */
329 /* Initialize the constants for computing set costs. */
331 void
332 init_set_costs (void)
334 int speed;
335 rtx seq;
336 rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
337 rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
338 rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
339 rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
340 unsigned i;
342 target_avail_regs = 0;
343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
344 if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
345 && !fixed_regs[i])
346 target_avail_regs++;
348 target_res_regs = 3;
350 for (speed = 0; speed < 2; speed++)
352 crtl->maybe_hot_insn_p = speed;
353 /* Set up the costs for using extra registers:
355 1) If not many free registers remain, we should prefer having an
356 additional move to decreasing the number of available registers.
357 (TARGET_REG_COST).
358 2) If no registers are available, we need to spill, which may require
359 storing the old value to memory and loading it back
360 (TARGET_SPILL_COST). */
362 start_sequence ();
363 emit_move_insn (reg1, reg2);
364 seq = get_insns ();
365 end_sequence ();
366 target_reg_cost [speed] = seq_cost (seq, speed);
368 start_sequence ();
369 emit_move_insn (mem, reg1);
370 emit_move_insn (reg2, mem);
371 seq = get_insns ();
372 end_sequence ();
373 target_spill_cost [speed] = seq_cost (seq, speed);
375 default_rtl_profile ();
378 /* Estimates cost of increased register pressure caused by making N_NEW new
379 registers live around the loop. N_OLD is the number of registers live
380 around the loop. */
382 unsigned
383 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
385 unsigned cost;
386 unsigned regs_needed = n_new + n_old;
388 /* If we have enough registers, we should use them and not restrict
389 the transformations unnecessarily. */
390 if (regs_needed + target_res_regs <= target_avail_regs)
391 return 0;
393 if (regs_needed <= target_avail_regs)
394 /* If we are close to running out of registers, try to preserve
395 them. */
396 cost = target_reg_cost [speed] * n_new;
397 else
398 /* If we run out of registers, it is very expensive to add another
399 one. */
400 cost = target_spill_cost [speed] * n_new;
402 if (optimize && (flag_ira_region == IRA_REGION_ALL
403 || flag_ira_region == IRA_REGION_MIXED)
404 && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM)
405 /* IRA regional allocation deals with high register pressure
406 better. So decrease the cost (to do more accurate the cost
407 calculation for IRA, we need to know how many registers lives
408 through the loop transparently). */
409 cost /= 2;
411 return cost;
414 /* Sets EDGE_LOOP_EXIT flag for all loop exits. */
416 void
417 mark_loop_exit_edges (void)
419 basic_block bb;
420 edge e;
422 if (number_of_loops () <= 1)
423 return;
425 FOR_EACH_BB (bb)
427 edge_iterator ei;
429 FOR_EACH_EDGE (e, ei, bb->succs)
431 if (loop_outer (bb->loop_father)
432 && loop_exit_edge_p (bb->loop_father, e))
433 e->flags |= EDGE_LOOP_EXIT;
434 else
435 e->flags &= ~EDGE_LOOP_EXIT;