* config/arm/elf.h (ASM_OUTPUT_ALIGNED_COMMON): Remove definition.
[official-gcc.git] / gcc / tracer.c
blob701c16a93953fa5c2de652ef538780d91eb930a3
1 /* The tracer pass for the GNU compiler.
2 Contributed by Jan Hubicka, SuSE Labs.
3 Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* This pass performs the tail duplication needed for superblock formation.
23 For more information see:
25 Design and Analysis of Profile-Based Optimization in Compaq's
26 Compilation Tools for Alpha; Journal of Instruction-Level
27 Parallelism 3 (2000) 1-25
29 Unlike Compaq's implementation we don't do the loop peeling as most
30 probably a better job can be done by a special pass and we don't
31 need to worry too much about the code size implications as the tail
32 duplicates are crossjumped again if optimizations are not
33 performed. */
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tm.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "hard-reg-set.h"
43 #include "basic-block.h"
44 #include "output.h"
45 #include "cfglayout.h"
46 #include "fibheap.h"
47 #include "flags.h"
48 #include "params.h"
49 #include "coverage.h"
51 static int count_insns PARAMS ((basic_block));
52 static bool ignore_bb_p PARAMS ((basic_block));
53 static bool better_p PARAMS ((edge, edge));
54 static edge find_best_successor PARAMS ((basic_block));
55 static edge find_best_predecessor PARAMS ((basic_block));
56 static int find_trace PARAMS ((basic_block, basic_block *));
57 static void tail_duplicate PARAMS ((void));
58 static void layout_superblocks PARAMS ((void));
59 static bool ignore_bb_p PARAMS ((basic_block));
61 /* Minimal outgoing edge probability considered for superblock formation. */
62 static int probability_cutoff;
63 static int branch_ratio_cutoff;
65 /* Return true if BB has been seen - it is connected to some trace
66 already. */
68 #define seen(bb) (RBI (bb)->visited || RBI (bb)->next)
70 /* Return true if we should ignore the basic block for purposes of tracing. */
71 static bool
72 ignore_bb_p (bb)
73 basic_block bb;
75 if (bb->index < 0)
76 return true;
77 if (!maybe_hot_bb_p (bb))
78 return true;
79 return false;
82 /* Return number of instructions in the block. */
84 static int
85 count_insns (bb)
86 basic_block bb;
88 rtx insn;
89 int n = 0;
91 for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn))
92 if (active_insn_p (insn))
93 n++;
94 return n;
97 /* Return true if E1 is more frequent than E2. */
98 static bool
99 better_p (e1, e2)
100 edge e1, e2;
102 if (e1->count != e2->count)
103 return e1->count > e2->count;
104 if (e1->src->frequency * e1->probability !=
105 e2->src->frequency * e2->probability)
106 return (e1->src->frequency * e1->probability
107 > e2->src->frequency * e2->probability);
108 /* This is needed to avoid changes in the decision after
109 CFG is modified. */
110 if (e1->src != e2->src)
111 return e1->src->index > e2->src->index;
112 return e1->dest->index > e2->dest->index;
115 /* Return most frequent successor of basic block BB. */
117 static edge
118 find_best_successor (bb)
119 basic_block bb;
121 edge e;
122 edge best = NULL;
124 for (e = bb->succ; e; e = e->succ_next)
125 if (!best || better_p (e, best))
126 best = e;
127 if (!best || ignore_bb_p (best->dest))
128 return NULL;
129 if (best->probability <= probability_cutoff)
130 return NULL;
131 return best;
134 /* Return most frequent predecessor of basic block BB. */
136 static edge
137 find_best_predecessor (bb)
138 basic_block bb;
140 edge e;
141 edge best = NULL;
143 for (e = bb->pred; e; e = e->pred_next)
144 if (!best || better_p (e, best))
145 best = e;
146 if (!best || ignore_bb_p (best->src))
147 return NULL;
148 if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
149 < bb->frequency * branch_ratio_cutoff)
150 return NULL;
151 return best;
154 /* Find the trace using bb and record it in the TRACE array.
155 Return number of basic blocks recorded. */
157 static int
158 find_trace (bb, trace)
159 basic_block bb;
160 basic_block *trace;
162 int i = 0;
163 edge e;
165 if (rtl_dump_file)
166 fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
168 while ((e = find_best_predecessor (bb)) != NULL)
170 basic_block bb2 = e->src;
171 if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
172 || find_best_successor (bb2) != e)
173 break;
174 if (rtl_dump_file)
175 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
176 bb = bb2;
178 if (rtl_dump_file)
179 fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency);
180 trace[i++] = bb;
182 /* Follow the trace in forward direction. */
183 while ((e = find_best_successor (bb)) != NULL)
185 bb = e->dest;
186 if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
187 || find_best_predecessor (bb) != e)
188 break;
189 if (rtl_dump_file)
190 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
191 trace[i++] = bb;
193 if (rtl_dump_file)
194 fprintf (rtl_dump_file, "\n");
195 return i;
198 /* Look for basic blocks in frequency order, construct traces and tail duplicate
199 if profitable. */
201 static void
202 tail_duplicate ()
204 fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t));
205 basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks);
206 int *counts = xmalloc (sizeof (int) * last_basic_block);
207 int ninsns = 0, nduplicated = 0;
208 gcov_type weighted_insns = 0, traced_insns = 0;
209 fibheap_t heap = fibheap_new ();
210 gcov_type cover_insns;
211 int max_dup_insns;
212 basic_block bb;
214 if (profile_info && flag_branch_probabilities)
215 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
216 else
217 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
218 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
220 branch_ratio_cutoff =
221 (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
223 FOR_EACH_BB (bb)
225 int n = count_insns (bb);
226 if (!ignore_bb_p (bb))
227 blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
228 bb);
230 counts [bb->index] = n;
231 ninsns += n;
232 weighted_insns += n * bb->frequency;
235 if (profile_info && flag_branch_probabilities)
236 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
237 else
238 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
239 cover_insns = (weighted_insns * cover_insns + 50) / 100;
240 max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
242 while (traced_insns < cover_insns && nduplicated < max_dup_insns
243 && !fibheap_empty (heap))
245 basic_block bb = fibheap_extract_min (heap);
246 int n, pos;
248 if (!bb)
249 break;
251 blocks[bb->index] = NULL;
253 if (ignore_bb_p (bb))
254 continue;
255 if (seen (bb))
256 abort ();
258 n = find_trace (bb, trace);
260 bb = trace[0];
261 traced_insns += bb->frequency * counts [bb->index];
262 if (blocks[bb->index])
264 fibheap_delete_node (heap, blocks[bb->index]);
265 blocks[bb->index] = NULL;
268 for (pos = 1; pos < n; pos++)
270 basic_block bb2 = trace[pos];
272 if (blocks[bb2->index])
274 fibheap_delete_node (heap, blocks[bb2->index]);
275 blocks[bb2->index] = NULL;
277 traced_insns += bb2->frequency * counts [bb2->index];
278 if (bb2->pred && bb2->pred->pred_next
279 && cfg_layout_can_duplicate_bb_p (bb2))
281 edge e = bb2->pred;
282 basic_block old = bb2;
284 while (e->src != bb)
285 e = e->pred_next;
286 nduplicated += counts [bb2->index];
287 bb2 = cfg_layout_duplicate_bb (bb2, e);
289 /* Reconsider the original copy of block we've duplicated.
290 Removing the most common predecessor may make it to be
291 head. */
292 blocks[old->index] =
293 fibheap_insert (heap, -old->frequency, old);
295 if (rtl_dump_file)
296 fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n",
297 old->index, bb2->index, bb2->frequency);
299 RBI (bb)->next = bb2;
300 RBI (bb2)->visited = 1;
301 bb = bb2;
302 /* In case the trace became infrequent, stop duplicating. */
303 if (ignore_bb_p (bb))
304 break;
306 if (rtl_dump_file)
307 fprintf (rtl_dump_file, " covered now %.1f\n\n",
308 traced_insns * 100.0 / weighted_insns);
310 if (rtl_dump_file)
311 fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
312 nduplicated * 100 / ninsns);
314 free (blocks);
315 free (trace);
316 free (counts);
317 fibheap_delete (heap);
320 /* Connect the superblocks into linear seuqence. At the moment we attempt to keep
321 the original order as much as possible, but the algorithm may be made smarter
322 later if needed. BB reordering pass should void most of the benefits of such
323 change though. */
325 static void
326 layout_superblocks ()
328 basic_block end = ENTRY_BLOCK_PTR->succ->dest;
329 basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb;
331 while (bb != EXIT_BLOCK_PTR)
333 edge e, best = NULL;
334 while (RBI (end)->next)
335 end = RBI (end)->next;
337 for (e = end->succ; e; e = e->succ_next)
338 if (e->dest != EXIT_BLOCK_PTR
339 && e->dest != ENTRY_BLOCK_PTR->succ->dest
340 && !RBI (e->dest)->visited
341 && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
342 best = e;
344 if (best)
346 RBI (end)->next = best->dest;
347 RBI (best->dest)->visited = 1;
349 else
350 for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
352 if (!RBI (bb)->visited)
354 RBI (end)->next = bb;
355 RBI (bb)->visited = 1;
356 break;
362 /* Main entry point to this file. */
364 void
365 tracer ()
367 if (n_basic_blocks <= 1)
368 return;
369 cfg_layout_initialize (NULL);
370 mark_dfs_back_edges ();
371 if (rtl_dump_file)
372 dump_flow_info (rtl_dump_file);
373 tail_duplicate ();
374 layout_superblocks ();
375 if (rtl_dump_file)
376 dump_flow_info (rtl_dump_file);
377 cfg_layout_finalize ();
378 /* Merge basic blocks in duplicated traces. */
379 cleanup_cfg (CLEANUP_EXPENSIVE);