gcc/ChangeLog:
[official-gcc.git] / gcc / tracer.c
blobec9b9bac5aecdbd29bb0c51f1d5498076bce86b4
1 /* The tracer pass for the GNU compiler.
2 Contributed by Jan Hubicka, SuSE Labs.
3 Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* This pass performs the tail duplication needed for superblock formation.
23 For more information see:
25 Design and Analysis of Profile-Based Optimization in Compaq's
26 Compilation Tools for Alpha; Journal of Instruction-Level
27 Parallelism 3 (2000) 1-25
29 Unlike Compaq's implementation we don't do the loop peeling as most
30 probably a better job can be done by a special pass and we don't
31 need to worry too much about the code size implications as the tail
32 duplicates are crossjumped again if optimizations are not
33 performed. */
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tm.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "hard-reg-set.h"
43 #include "basic-block.h"
44 #include "output.h"
45 #include "cfglayout.h"
46 #include "fibheap.h"
47 #include "flags.h"
48 #include "params.h"
49 #include "coverage.h"
51 static int count_insns PARAMS ((basic_block));
52 static bool ignore_bb_p PARAMS ((basic_block));
53 static bool better_p PARAMS ((edge, edge));
54 static edge find_best_successor PARAMS ((basic_block));
55 static edge find_best_predecessor PARAMS ((basic_block));
56 static int find_trace PARAMS ((basic_block, basic_block *));
57 static void tail_duplicate PARAMS ((void));
58 static void layout_superblocks PARAMS ((void));
60 /* Minimal outgoing edge probability considered for superblock formation. */
61 static int probability_cutoff;
62 static int branch_ratio_cutoff;
64 /* Return true if BB has been seen - it is connected to some trace
65 already. */
67 #define seen(bb) (RBI (bb)->visited || RBI (bb)->next)
69 /* Return true if we should ignore the basic block for purposes of tracing. */
70 static bool
71 ignore_bb_p (bb)
72 basic_block bb;
74 if (bb->index < 0)
75 return true;
76 if (!maybe_hot_bb_p (bb))
77 return true;
78 return false;
81 /* Return number of instructions in the block. */
83 static int
84 count_insns (bb)
85 basic_block bb;
87 rtx insn;
88 int n = 0;
90 for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn))
91 if (active_insn_p (insn))
92 n++;
93 return n;
96 /* Return true if E1 is more frequent than E2. */
97 static bool
98 better_p (e1, e2)
99 edge e1, e2;
101 if (e1->count != e2->count)
102 return e1->count > e2->count;
103 if (e1->src->frequency * e1->probability !=
104 e2->src->frequency * e2->probability)
105 return (e1->src->frequency * e1->probability
106 > e2->src->frequency * e2->probability);
107 /* This is needed to avoid changes in the decision after
108 CFG is modified. */
109 if (e1->src != e2->src)
110 return e1->src->index > e2->src->index;
111 return e1->dest->index > e2->dest->index;
114 /* Return most frequent successor of basic block BB. */
116 static edge
117 find_best_successor (bb)
118 basic_block bb;
120 edge e;
121 edge best = NULL;
123 for (e = bb->succ; e; e = e->succ_next)
124 if (!best || better_p (e, best))
125 best = e;
126 if (!best || ignore_bb_p (best->dest))
127 return NULL;
128 if (best->probability <= probability_cutoff)
129 return NULL;
130 return best;
133 /* Return most frequent predecessor of basic block BB. */
135 static edge
136 find_best_predecessor (bb)
137 basic_block bb;
139 edge e;
140 edge best = NULL;
142 for (e = bb->pred; e; e = e->pred_next)
143 if (!best || better_p (e, best))
144 best = e;
145 if (!best || ignore_bb_p (best->src))
146 return NULL;
147 if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
148 < bb->frequency * branch_ratio_cutoff)
149 return NULL;
150 return best;
153 /* Find the trace using bb and record it in the TRACE array.
154 Return number of basic blocks recorded. */
156 static int
157 find_trace (bb, trace)
158 basic_block bb;
159 basic_block *trace;
161 int i = 0;
162 edge e;
164 if (rtl_dump_file)
165 fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
167 while ((e = find_best_predecessor (bb)) != NULL)
169 basic_block bb2 = e->src;
170 if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
171 || find_best_successor (bb2) != e)
172 break;
173 if (rtl_dump_file)
174 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
175 bb = bb2;
177 if (rtl_dump_file)
178 fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency);
179 trace[i++] = bb;
181 /* Follow the trace in forward direction. */
182 while ((e = find_best_successor (bb)) != NULL)
184 bb = e->dest;
185 if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
186 || find_best_predecessor (bb) != e)
187 break;
188 if (rtl_dump_file)
189 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
190 trace[i++] = bb;
192 if (rtl_dump_file)
193 fprintf (rtl_dump_file, "\n");
194 return i;
197 /* Look for basic blocks in frequency order, construct traces and tail duplicate
198 if profitable. */
200 static void
201 tail_duplicate ()
203 fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t));
204 basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks);
205 int *counts = xmalloc (sizeof (int) * last_basic_block);
206 int ninsns = 0, nduplicated = 0;
207 gcov_type weighted_insns = 0, traced_insns = 0;
208 fibheap_t heap = fibheap_new ();
209 gcov_type cover_insns;
210 int max_dup_insns;
211 basic_block bb;
213 if (profile_info && flag_branch_probabilities)
214 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
215 else
216 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
217 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
219 branch_ratio_cutoff =
220 (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
222 FOR_EACH_BB (bb)
224 int n = count_insns (bb);
225 if (!ignore_bb_p (bb))
226 blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
227 bb);
229 counts [bb->index] = n;
230 ninsns += n;
231 weighted_insns += n * bb->frequency;
234 if (profile_info && flag_branch_probabilities)
235 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
236 else
237 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
238 cover_insns = (weighted_insns * cover_insns + 50) / 100;
239 max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
241 while (traced_insns < cover_insns && nduplicated < max_dup_insns
242 && !fibheap_empty (heap))
244 basic_block bb = fibheap_extract_min (heap);
245 int n, pos;
247 if (!bb)
248 break;
250 blocks[bb->index] = NULL;
252 if (ignore_bb_p (bb))
253 continue;
254 if (seen (bb))
255 abort ();
257 n = find_trace (bb, trace);
259 bb = trace[0];
260 traced_insns += bb->frequency * counts [bb->index];
261 if (blocks[bb->index])
263 fibheap_delete_node (heap, blocks[bb->index]);
264 blocks[bb->index] = NULL;
267 for (pos = 1; pos < n; pos++)
269 basic_block bb2 = trace[pos];
271 if (blocks[bb2->index])
273 fibheap_delete_node (heap, blocks[bb2->index]);
274 blocks[bb2->index] = NULL;
276 traced_insns += bb2->frequency * counts [bb2->index];
277 if (bb2->pred && bb2->pred->pred_next
278 && cfg_layout_can_duplicate_bb_p (bb2))
280 edge e = bb2->pred;
281 basic_block old = bb2;
283 while (e->src != bb)
284 e = e->pred_next;
285 nduplicated += counts [bb2->index];
286 bb2 = cfg_layout_duplicate_bb (bb2, e);
288 /* Reconsider the original copy of block we've duplicated.
289 Removing the most common predecessor may make it to be
290 head. */
291 blocks[old->index] =
292 fibheap_insert (heap, -old->frequency, old);
294 if (rtl_dump_file)
295 fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n",
296 old->index, bb2->index, bb2->frequency);
298 RBI (bb)->next = bb2;
299 RBI (bb2)->visited = 1;
300 bb = bb2;
301 /* In case the trace became infrequent, stop duplicating. */
302 if (ignore_bb_p (bb))
303 break;
305 if (rtl_dump_file)
306 fprintf (rtl_dump_file, " covered now %.1f\n\n",
307 traced_insns * 100.0 / weighted_insns);
309 if (rtl_dump_file)
310 fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
311 nduplicated * 100 / ninsns);
313 free (blocks);
314 free (trace);
315 free (counts);
316 fibheap_delete (heap);
319 /* Connect the superblocks into linear seuqence. At the moment we attempt to keep
320 the original order as much as possible, but the algorithm may be made smarter
321 later if needed. BB reordering pass should void most of the benefits of such
322 change though. */
324 static void
325 layout_superblocks ()
327 basic_block end = ENTRY_BLOCK_PTR->succ->dest;
328 basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb;
330 while (bb != EXIT_BLOCK_PTR)
332 edge e, best = NULL;
333 while (RBI (end)->next)
334 end = RBI (end)->next;
336 for (e = end->succ; e; e = e->succ_next)
337 if (e->dest != EXIT_BLOCK_PTR
338 && e->dest != ENTRY_BLOCK_PTR->succ->dest
339 && !RBI (e->dest)->visited
340 && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
341 best = e;
343 if (best)
345 RBI (end)->next = best->dest;
346 RBI (best->dest)->visited = 1;
348 else
349 for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
351 if (!RBI (bb)->visited)
353 RBI (end)->next = bb;
354 RBI (bb)->visited = 1;
355 break;
361 /* Main entry point to this file. */
363 void
364 tracer ()
366 if (n_basic_blocks <= 1)
367 return;
368 cfg_layout_initialize (NULL);
369 mark_dfs_back_edges ();
370 if (rtl_dump_file)
371 dump_flow_info (rtl_dump_file);
372 tail_duplicate ();
373 layout_superblocks ();
374 if (rtl_dump_file)
375 dump_flow_info (rtl_dump_file);
376 cfg_layout_finalize ();
377 /* Merge basic blocks in duplicated traces. */
378 cleanup_cfg (CLEANUP_EXPENSIVE);