1 /* The tracer pass for the GNU compiler.
2 Contributed by Jan Hubicka, SuSE Labs.
3 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* This pass performs the tail duplication needed for superblock formation.
23 For more information see:
25 Design and Analysis of Profile-Based Optimization in Compaq's
26 Compilation Tools for Alpha; Journal of Instruction-Level
27 Parallelism 3 (2000) 1-25
29 Unlike Compaq's implementation we don't do the loop peeling as most
30 probably a better job can be done by a special pass and we don't
31 need to worry too much about the code size implications as the tail
32 duplicates are crossjumped again if optimizations are not
38 #include "coretypes.h"
42 #include "hard-reg-set.h"
43 #include "basic-block.h"
45 #include "cfglayout.h"
51 static int count_insns
PARAMS ((basic_block
));
52 static bool ignore_bb_p
PARAMS ((basic_block
));
53 static bool better_p
PARAMS ((edge
, edge
));
54 static edge find_best_successor
PARAMS ((basic_block
));
55 static edge find_best_predecessor
PARAMS ((basic_block
));
56 static int find_trace
PARAMS ((basic_block
, basic_block
*));
57 static void tail_duplicate
PARAMS ((void));
58 static void layout_superblocks
PARAMS ((void));
59 static bool ignore_bb_p
PARAMS ((basic_block
));
61 /* Minimal outgoing edge probability considered for superblock formation. */
62 static int probability_cutoff
;
63 static int branch_ratio_cutoff
;
65 /* Return true if BB has been seen - it is connected to some trace
68 #define seen(bb) (RBI (bb)->visited || RBI (bb)->next)
70 /* Return true if we should ignore the basic block for purposes of tracing. */
77 if (!maybe_hot_bb_p (bb
))
82 /* Return number of instructions in the block. */
91 for (insn
= bb
->head
; insn
!= NEXT_INSN (bb
->end
); insn
= NEXT_INSN (insn
))
92 if (active_insn_p (insn
))
97 /* Return true if E1 is more frequent than E2. */
102 if (e1
->count
!= e2
->count
)
103 return e1
->count
> e2
->count
;
104 if (e1
->src
->frequency
* e1
->probability
!=
105 e2
->src
->frequency
* e2
->probability
)
106 return (e1
->src
->frequency
* e1
->probability
107 > e2
->src
->frequency
* e2
->probability
);
108 /* This is needed to avoid changes in the decision after
110 if (e1
->src
!= e2
->src
)
111 return e1
->src
->index
> e2
->src
->index
;
112 return e1
->dest
->index
> e2
->dest
->index
;
115 /* Return most frequent successor of basic block BB. */
118 find_best_successor (bb
)
124 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
125 if (!best
|| better_p (e
, best
))
127 if (!best
|| ignore_bb_p (best
->dest
))
129 if (best
->probability
<= probability_cutoff
)
134 /* Return most frequent predecessor of basic block BB. */
137 find_best_predecessor (bb
)
143 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
144 if (!best
|| better_p (e
, best
))
146 if (!best
|| ignore_bb_p (best
->src
))
148 if (EDGE_FREQUENCY (best
) * REG_BR_PROB_BASE
149 < bb
->frequency
* branch_ratio_cutoff
)
154 /* Find the trace using bb and record it in the TRACE array.
155 Return number of basic blocks recorded. */
158 find_trace (bb
, trace
)
166 fprintf (rtl_dump_file
, "Trace seed %i [%i]", bb
->index
, bb
->frequency
);
168 while ((e
= find_best_predecessor (bb
)) != NULL
)
170 basic_block bb2
= e
->src
;
171 if (seen (bb2
) || (e
->flags
& (EDGE_DFS_BACK
| EDGE_COMPLEX
))
172 || find_best_successor (bb2
) != e
)
175 fprintf (rtl_dump_file
, ",%i [%i]", bb
->index
, bb
->frequency
);
179 fprintf (rtl_dump_file
, " forward %i [%i]", bb
->index
, bb
->frequency
);
182 /* Follow the trace in forward direction. */
183 while ((e
= find_best_successor (bb
)) != NULL
)
186 if (seen (bb
) || (e
->flags
& (EDGE_DFS_BACK
| EDGE_COMPLEX
))
187 || find_best_predecessor (bb
) != e
)
190 fprintf (rtl_dump_file
, ",%i [%i]", bb
->index
, bb
->frequency
);
194 fprintf (rtl_dump_file
, "\n");
198 /* Look for basic blocks in frequency order, construct traces and tail duplicate
204 fibnode_t
*blocks
= xcalloc (last_basic_block
, sizeof (fibnode_t
));
205 basic_block
*trace
= xmalloc (sizeof (basic_block
) * n_basic_blocks
);
206 int *counts
= xmalloc (sizeof (int) * last_basic_block
);
207 int ninsns
= 0, nduplicated
= 0;
208 gcov_type weighted_insns
= 0, traced_insns
= 0;
209 fibheap_t heap
= fibheap_new ();
210 gcov_type cover_insns
;
214 if (profile_info
.count_profiles_merged
&& flag_branch_probabilities
)
215 probability_cutoff
= PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK
);
217 probability_cutoff
= PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY
);
218 probability_cutoff
= REG_BR_PROB_BASE
/ 100 * probability_cutoff
;
220 branch_ratio_cutoff
=
221 (REG_BR_PROB_BASE
/ 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO
));
225 int n
= count_insns (bb
);
226 if (!ignore_bb_p (bb
))
227 blocks
[bb
->index
] = fibheap_insert (heap
, -bb
->frequency
,
230 counts
[bb
->index
] = n
;
232 weighted_insns
+= n
* bb
->frequency
;
235 if (profile_info
.count_profiles_merged
&& flag_branch_probabilities
)
236 cover_insns
= PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK
);
238 cover_insns
= PARAM_VALUE (TRACER_DYNAMIC_COVERAGE
);
239 cover_insns
= (weighted_insns
* cover_insns
+ 50) / 100;
240 max_dup_insns
= (ninsns
* PARAM_VALUE (TRACER_MAX_CODE_GROWTH
) + 50) / 100;
242 while (traced_insns
< cover_insns
&& nduplicated
< max_dup_insns
243 && !fibheap_empty (heap
))
245 basic_block bb
= fibheap_extract_min (heap
);
251 blocks
[bb
->index
] = NULL
;
253 if (ignore_bb_p (bb
))
258 n
= find_trace (bb
, trace
);
261 traced_insns
+= bb
->frequency
* counts
[bb
->index
];
262 if (blocks
[bb
->index
])
264 fibheap_delete_node (heap
, blocks
[bb
->index
]);
265 blocks
[bb
->index
] = NULL
;
268 for (pos
= 1; pos
< n
; pos
++)
270 basic_block bb2
= trace
[pos
];
272 if (blocks
[bb2
->index
])
274 fibheap_delete_node (heap
, blocks
[bb2
->index
]);
275 blocks
[bb2
->index
] = NULL
;
277 traced_insns
+= bb2
->frequency
* counts
[bb2
->index
];
278 if (bb2
->pred
&& bb2
->pred
->pred_next
279 && cfg_layout_can_duplicate_bb_p (bb2
))
282 basic_block old
= bb2
;
286 nduplicated
+= counts
[bb2
->index
];
287 bb2
= cfg_layout_duplicate_bb (bb2
, e
);
289 /* Reconsider the original copy of block we've duplicated.
290 Removing the most common predecessor may make it to be
293 fibheap_insert (heap
, -old
->frequency
, old
);
296 fprintf (rtl_dump_file
, "Duplicated %i as %i [%i]\n",
297 old
->index
, bb2
->index
, bb2
->frequency
);
299 RBI (bb
)->next
= bb2
;
300 RBI (bb2
)->visited
= 1;
302 /* In case the trace became infrequent, stop duplicating. */
303 if (ignore_bb_p (bb
))
307 fprintf (rtl_dump_file
, " covered now %.1f\n\n",
308 traced_insns
* 100.0 / weighted_insns
);
311 fprintf (rtl_dump_file
, "Duplicated %i insns (%i%%)\n", nduplicated
,
312 nduplicated
* 100 / ninsns
);
317 fibheap_delete (heap
);
320 /* Connect the superblocks into linear seuqence. At the moment we attempt to keep
321 the original order as much as possible, but the algorithm may be made smarter
322 later if needed. BB reordering pass should void most of the benefits of such
326 layout_superblocks ()
328 basic_block end
= ENTRY_BLOCK_PTR
->succ
->dest
;
329 basic_block bb
= ENTRY_BLOCK_PTR
->succ
->dest
->next_bb
;
331 while (bb
!= EXIT_BLOCK_PTR
)
334 while (RBI (end
)->next
)
335 end
= RBI (end
)->next
;
337 for (e
= end
->succ
; e
; e
= e
->succ_next
)
338 if (e
->dest
!= EXIT_BLOCK_PTR
339 && e
->dest
!= ENTRY_BLOCK_PTR
->succ
->dest
340 && !RBI (e
->dest
)->visited
341 && (!best
|| EDGE_FREQUENCY (e
) > EDGE_FREQUENCY (best
)))
346 RBI (end
)->next
= best
->dest
;
347 RBI (best
->dest
)->visited
= 1;
350 for (; bb
!= EXIT_BLOCK_PTR
; bb
= bb
->next_bb
)
352 if (!RBI (bb
)->visited
)
354 RBI (end
)->next
= bb
;
355 RBI (bb
)->visited
= 1;
362 /* Main entry point to this file. */
367 if (n_basic_blocks
<= 1)
369 cfg_layout_initialize (NULL
);
370 mark_dfs_back_edges ();
372 dump_flow_info (rtl_dump_file
);
374 layout_superblocks ();
376 dump_flow_info (rtl_dump_file
);
377 cfg_layout_finalize ();
378 /* Merge basic blocks in duplicated traces. */
379 cleanup_cfg (CLEANUP_EXPENSIVE
);