PR c++/8795
[official-gcc.git] / gcc / tracer.c
blobeb311b9c7ef6dd2c1bb0c87fc67555914a20f6cc
1 /* The tracer pass for the GNU compiler.
2 Contributed by Jan Hubicka, SuSE Labs.
3 Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* This pass performs the tail duplication needed for superblock formation.
23 For more information see:
25 Design and Analysis of Profile-Based Optimization in Compaq's
26 Compilation Tools for Alpha; Journal of Instruction-Level
27 Parallelism 3 (2000) 1-25
29 Unlike Compaq's implementation we don't do the loop peeling as most
30 probably a better job can be done by a special pass and we don't
31 need to worry too much about the code size implications as the tail
32 duplicates are crossjumped again if optimizations are not
33 performed. */
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tm.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "hard-reg-set.h"
43 #include "basic-block.h"
44 #include "output.h"
45 #include "cfglayout.h"
46 #include "fibheap.h"
47 #include "flags.h"
48 #include "params.h"
49 #include "coverage.h"
51 static int count_insns (basic_block);
52 static bool ignore_bb_p (basic_block);
53 static bool better_p (edge, edge);
54 static edge find_best_successor (basic_block);
55 static edge find_best_predecessor (basic_block);
56 static int find_trace (basic_block, basic_block *);
57 static void tail_duplicate (void);
58 static void layout_superblocks (void);
60 /* Minimal outgoing edge probability considered for superblock formation. */
61 static int probability_cutoff;
62 static int branch_ratio_cutoff;
64 /* Return true if BB has been seen - it is connected to some trace
65 already. */
67 #define seen(bb) (bb->rbi->visited || bb->rbi->next)
69 /* Return true if we should ignore the basic block for purposes of tracing. */
70 static bool
71 ignore_bb_p (basic_block bb)
73 if (bb->index < 0)
74 return true;
75 if (!maybe_hot_bb_p (bb))
76 return true;
77 return false;
80 /* Return number of instructions in the block. */
82 static int
83 count_insns (basic_block bb)
85 rtx insn;
86 int n = 0;
88 for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn))
89 if (active_insn_p (insn))
90 n++;
91 return n;
94 /* Return true if E1 is more frequent than E2. */
95 static bool
96 better_p (edge e1, edge e2)
98 if (e1->count != e2->count)
99 return e1->count > e2->count;
100 if (e1->src->frequency * e1->probability !=
101 e2->src->frequency * e2->probability)
102 return (e1->src->frequency * e1->probability
103 > e2->src->frequency * e2->probability);
104 /* This is needed to avoid changes in the decision after
105 CFG is modified. */
106 if (e1->src != e2->src)
107 return e1->src->index > e2->src->index;
108 return e1->dest->index > e2->dest->index;
111 /* Return most frequent successor of basic block BB. */
113 static edge
114 find_best_successor (basic_block bb)
116 edge e;
117 edge best = NULL;
119 for (e = bb->succ; e; e = e->succ_next)
120 if (!best || better_p (e, best))
121 best = e;
122 if (!best || ignore_bb_p (best->dest))
123 return NULL;
124 if (best->probability <= probability_cutoff)
125 return NULL;
126 return best;
129 /* Return most frequent predecessor of basic block BB. */
131 static edge
132 find_best_predecessor (basic_block bb)
134 edge e;
135 edge best = NULL;
137 for (e = bb->pred; e; e = e->pred_next)
138 if (!best || better_p (e, best))
139 best = e;
140 if (!best || ignore_bb_p (best->src))
141 return NULL;
142 if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
143 < bb->frequency * branch_ratio_cutoff)
144 return NULL;
145 return best;
148 /* Find the trace using bb and record it in the TRACE array.
149 Return number of basic blocks recorded. */
151 static int
152 find_trace (basic_block bb, basic_block *trace)
154 int i = 0;
155 edge e;
157 if (rtl_dump_file)
158 fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
160 while ((e = find_best_predecessor (bb)) != NULL)
162 basic_block bb2 = e->src;
163 if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
164 || find_best_successor (bb2) != e)
165 break;
166 if (rtl_dump_file)
167 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
168 bb = bb2;
170 if (rtl_dump_file)
171 fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency);
172 trace[i++] = bb;
174 /* Follow the trace in forward direction. */
175 while ((e = find_best_successor (bb)) != NULL)
177 bb = e->dest;
178 if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
179 || find_best_predecessor (bb) != e)
180 break;
181 if (rtl_dump_file)
182 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
183 trace[i++] = bb;
185 if (rtl_dump_file)
186 fprintf (rtl_dump_file, "\n");
187 return i;
190 /* Look for basic blocks in frequency order, construct traces and tail duplicate
191 if profitable. */
193 static void
194 tail_duplicate (void)
196 fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t));
197 basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks);
198 int *counts = xmalloc (sizeof (int) * last_basic_block);
199 int ninsns = 0, nduplicated = 0;
200 gcov_type weighted_insns = 0, traced_insns = 0;
201 fibheap_t heap = fibheap_new ();
202 gcov_type cover_insns;
203 int max_dup_insns;
204 basic_block bb;
206 if (profile_info && flag_branch_probabilities)
207 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
208 else
209 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
210 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
212 branch_ratio_cutoff =
213 (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
215 FOR_EACH_BB (bb)
217 int n = count_insns (bb);
218 if (!ignore_bb_p (bb))
219 blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
220 bb);
222 counts [bb->index] = n;
223 ninsns += n;
224 weighted_insns += n * bb->frequency;
227 if (profile_info && flag_branch_probabilities)
228 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
229 else
230 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
231 cover_insns = (weighted_insns * cover_insns + 50) / 100;
232 max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
234 while (traced_insns < cover_insns && nduplicated < max_dup_insns
235 && !fibheap_empty (heap))
237 basic_block bb = fibheap_extract_min (heap);
238 int n, pos;
240 if (!bb)
241 break;
243 blocks[bb->index] = NULL;
245 if (ignore_bb_p (bb))
246 continue;
247 if (seen (bb))
248 abort ();
250 n = find_trace (bb, trace);
252 bb = trace[0];
253 traced_insns += bb->frequency * counts [bb->index];
254 if (blocks[bb->index])
256 fibheap_delete_node (heap, blocks[bb->index]);
257 blocks[bb->index] = NULL;
260 for (pos = 1; pos < n; pos++)
262 basic_block bb2 = trace[pos];
264 if (blocks[bb2->index])
266 fibheap_delete_node (heap, blocks[bb2->index]);
267 blocks[bb2->index] = NULL;
269 traced_insns += bb2->frequency * counts [bb2->index];
270 if (bb2->pred && bb2->pred->pred_next
271 && cfg_layout_can_duplicate_bb_p (bb2))
273 edge e = bb2->pred;
274 basic_block old = bb2;
276 while (e->src != bb)
277 e = e->pred_next;
278 nduplicated += counts [bb2->index];
279 bb2 = cfg_layout_duplicate_bb (bb2, e);
281 /* Reconsider the original copy of block we've duplicated.
282 Removing the most common predecessor may make it to be
283 head. */
284 blocks[old->index] =
285 fibheap_insert (heap, -old->frequency, old);
287 if (rtl_dump_file)
288 fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n",
289 old->index, bb2->index, bb2->frequency);
291 bb->rbi->next = bb2;
292 bb2->rbi->visited = 1;
293 bb = bb2;
294 /* In case the trace became infrequent, stop duplicating. */
295 if (ignore_bb_p (bb))
296 break;
298 if (rtl_dump_file)
299 fprintf (rtl_dump_file, " covered now %.1f\n\n",
300 traced_insns * 100.0 / weighted_insns);
302 if (rtl_dump_file)
303 fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
304 nduplicated * 100 / ninsns);
306 free (blocks);
307 free (trace);
308 free (counts);
309 fibheap_delete (heap);
312 /* Connect the superblocks into linear sequence. At the moment we attempt to keep
313 the original order as much as possible, but the algorithm may be made smarter
314 later if needed. BB reordering pass should void most of the benefits of such
315 change though. */
317 static void
318 layout_superblocks (void)
320 basic_block end = ENTRY_BLOCK_PTR->succ->dest;
321 basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb;
323 while (bb != EXIT_BLOCK_PTR)
325 edge e, best = NULL;
326 while (end->rbi->next)
327 end = end->rbi->next;
329 for (e = end->succ; e; e = e->succ_next)
330 if (e->dest != EXIT_BLOCK_PTR
331 && e->dest != ENTRY_BLOCK_PTR->succ->dest
332 && !e->dest->rbi->visited
333 && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
334 best = e;
336 if (best)
338 end->rbi->next = best->dest;
339 best->dest->rbi->visited = 1;
341 else
342 for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
344 if (!bb->rbi->visited)
346 end->rbi->next = bb;
347 bb->rbi->visited = 1;
348 break;
354 /* Main entry point to this file. */
356 void
357 tracer (void)
359 if (n_basic_blocks <= 1)
360 return;
361 cfg_layout_initialize ();
362 mark_dfs_back_edges ();
363 if (rtl_dump_file)
364 dump_flow_info (rtl_dump_file);
365 tail_duplicate ();
366 layout_superblocks ();
367 if (rtl_dump_file)
368 dump_flow_info (rtl_dump_file);
369 cfg_layout_finalize ();
370 /* Merge basic blocks in duplicated traces. */
371 cleanup_cfg (CLEANUP_EXPENSIVE);