2004-08-18 David Daney <ddaney@avtrex.com>
[official-gcc.git] / gcc / rtl-profile.c
bloba53a00464a951cbcf1f819a6b1bf763d23deb1cc
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 02111-1307, USA. */
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
42 The auxiliary file generated is <dumpbase>.bbg. The format is
43 described in full in gcov-io.h. */
45 /* ??? Register allocation should use basic block execution counts to
46 give preference to the most commonly executed blocks. */
48 /* ??? Should calculate branch probabilities before instrumenting code, since
49 then we can use arc counts to help decide which arcs to instrument. */
51 #include "config.h"
52 #include "system.h"
53 #include "coretypes.h"
54 #include "tm.h"
55 #include "rtl.h"
56 #include "flags.h"
57 #include "output.h"
58 #include "regs.h"
59 #include "expr.h"
60 #include "function.h"
61 #include "toplev.h"
62 #include "coverage.h"
63 #include "value-prof.h"
64 #include "tree.h"
66 /* Output instructions as RTL to increment the edge execution count. */
68 static void
69 rtl_gen_edge_profiler (int edgeno, edge e)
71 rtx ref = rtl_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
72 rtx tmp;
73 enum machine_mode mode = GET_MODE (ref);
74 rtx sequence;
76 start_sequence ();
77 ref = validize_mem (ref);
79 tmp = expand_simple_binop (mode, PLUS, ref, const1_rtx,
80 ref, 0, OPTAB_WIDEN);
82 if (tmp != ref)
83 emit_move_insn (copy_rtx (ref), tmp);
85 sequence = get_insns ();
86 end_sequence ();
87 insert_insn_on_edge (sequence, e);
88 rebuild_jump_labels (e->insns.r);
91 /* Output instructions as RTL to increment the interval histogram counter.
92 VALUE is the expression whose value is profiled. TAG is the tag of the
93 section for counters, BASE is offset of the counter position. */
95 static void
96 rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag,
97 unsigned base)
99 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
100 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
101 rtx mem_ref, tmp, tmp1, mr, val;
102 rtx sequence;
103 rtx more_label = gen_label_rtx ();
104 rtx less_label = gen_label_rtx ();
105 rtx end_of_code_label = gen_label_rtx ();
106 int per_counter = gcov_size / BITS_PER_UNIT;
107 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
108 PREV_INSN ((rtx)value->insn));
110 start_sequence ();
112 if (value->seq)
113 emit_insn (value->seq);
115 mr = gen_reg_rtx (Pmode);
117 tmp = rtl_coverage_counter_ref (tag, base);
118 tmp = force_reg (Pmode, XEXP (tmp, 0));
120 val = expand_simple_binop (value->mode, MINUS,
121 copy_rtx (value->value),
122 GEN_INT (value->hdata.intvl.int_start),
123 NULL_RTX, 0, OPTAB_WIDEN);
125 if (value->hdata.intvl.may_be_more)
126 do_compare_rtx_and_jump (copy_rtx (val), GEN_INT (value->hdata.intvl.steps),
127 GE, 0, value->mode, NULL_RTX, NULL_RTX, more_label);
128 if (value->hdata.intvl.may_be_less)
129 do_compare_rtx_and_jump (copy_rtx (val), const0_rtx, LT, 0, value->mode,
130 NULL_RTX, NULL_RTX, less_label);
132 /* We are in range. */
133 tmp1 = expand_simple_binop (value->mode, MULT,
134 copy_rtx (val), GEN_INT (per_counter),
135 NULL_RTX, 0, OPTAB_WIDEN);
136 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp), tmp1, mr,
137 0, OPTAB_WIDEN);
138 if (tmp1 != mr)
139 emit_move_insn (copy_rtx (mr), tmp1);
141 if (value->hdata.intvl.may_be_more
142 || value->hdata.intvl.may_be_less)
144 emit_jump_insn (gen_jump (end_of_code_label));
145 emit_barrier ();
148 /* Above the interval. */
149 if (value->hdata.intvl.may_be_more)
151 emit_label (more_label);
152 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
153 GEN_INT (per_counter * value->hdata.intvl.steps),
154 mr, 0, OPTAB_WIDEN);
155 if (tmp1 != mr)
156 emit_move_insn (copy_rtx (mr), tmp1);
157 if (value->hdata.intvl.may_be_less)
159 emit_jump_insn (gen_jump (end_of_code_label));
160 emit_barrier ();
164 /* Below the interval. */
165 if (value->hdata.intvl.may_be_less)
167 emit_label (less_label);
168 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
169 GEN_INT (per_counter * (value->hdata.intvl.steps
170 + (value->hdata.intvl.may_be_more ? 1 : 0))),
171 mr, 0, OPTAB_WIDEN);
172 if (tmp1 != mr)
173 emit_move_insn (copy_rtx (mr), tmp1);
176 if (value->hdata.intvl.may_be_more
177 || value->hdata.intvl.may_be_less)
178 emit_label (end_of_code_label);
180 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
182 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
183 mem_ref, 0, OPTAB_WIDEN);
185 if (tmp != mem_ref)
186 emit_move_insn (copy_rtx (mem_ref), tmp);
188 sequence = get_insns ();
189 end_sequence ();
190 rebuild_jump_labels (sequence);
191 safe_insert_insn_on_edge (sequence, e);
194 /* Output instructions as RTL to increment the power of two histogram counter.
195 VALUE is the expression whose value is profiled. TAG is the tag of the
196 section for counters, BASE is offset of the counter position. */
198 static void
199 rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag,
200 unsigned base)
202 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
203 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
204 rtx mem_ref, tmp, mr, uval;
205 rtx sequence;
206 rtx end_of_code_label = gen_label_rtx ();
207 rtx loop_label = gen_label_rtx ();
208 int per_counter = gcov_size / BITS_PER_UNIT;
209 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
210 PREV_INSN ((rtx)value->insn));
212 start_sequence ();
214 if (value->seq)
215 emit_insn (value->seq);
217 mr = gen_reg_rtx (Pmode);
218 tmp = rtl_coverage_counter_ref (tag, base);
219 tmp = force_reg (Pmode, XEXP (tmp, 0));
220 emit_move_insn (mr, tmp);
222 uval = gen_reg_rtx (value->mode);
223 emit_move_insn (uval, copy_rtx (value->value));
225 /* Check for non-power of 2. */
226 if (value->hdata.pow2.may_be_other)
228 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, LE, 0, value->mode,
229 NULL_RTX, NULL_RTX, end_of_code_label);
230 tmp = expand_simple_binop (value->mode, PLUS, copy_rtx (uval),
231 constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN);
232 tmp = expand_simple_binop (value->mode, AND, copy_rtx (uval), tmp,
233 NULL_RTX, 0, OPTAB_WIDEN);
234 do_compare_rtx_and_jump (tmp, const0_rtx, NE, 0, value->mode, NULL_RTX,
235 NULL_RTX, end_of_code_label);
238 /* Count log_2(value). */
239 emit_label (loop_label);
241 tmp = expand_simple_binop (Pmode, PLUS, copy_rtx (mr), GEN_INT (per_counter), mr, 0, OPTAB_WIDEN);
242 if (tmp != mr)
243 emit_move_insn (copy_rtx (mr), tmp);
245 tmp = expand_simple_binop (value->mode, ASHIFTRT, copy_rtx (uval), const1_rtx,
246 uval, 0, OPTAB_WIDEN);
247 if (tmp != uval)
248 emit_move_insn (copy_rtx (uval), tmp);
250 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, NE, 0, value->mode,
251 NULL_RTX, NULL_RTX, loop_label);
253 /* Increase the counter. */
254 emit_label (end_of_code_label);
256 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
258 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
259 mem_ref, 0, OPTAB_WIDEN);
261 if (tmp != mem_ref)
262 emit_move_insn (copy_rtx (mem_ref), tmp);
264 sequence = get_insns ();
265 end_sequence ();
266 rebuild_jump_labels (sequence);
267 safe_insert_insn_on_edge (sequence, e);
270 /* Output instructions as RTL for code to find the most common value.
271 VALUE is the expression whose value is profiled. TAG is the tag of the
272 section for counters, BASE is offset of the counter position. */
274 static rtx
275 rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value,
276 unsigned tag, unsigned base)
278 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
279 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
280 rtx stored_value_ref, counter_ref, all_ref, stored_value, counter, all;
281 rtx tmp, uval;
282 rtx sequence;
283 rtx same_label = gen_label_rtx ();
284 rtx zero_label = gen_label_rtx ();
285 rtx end_of_code_label = gen_label_rtx ();
287 start_sequence ();
289 if (value->seq)
290 emit_insn (value->seq);
292 stored_value_ref = rtl_coverage_counter_ref (tag, base);
293 counter_ref = rtl_coverage_counter_ref (tag, base + 1);
294 all_ref = rtl_coverage_counter_ref (tag, base + 2);
295 stored_value = validize_mem (stored_value_ref);
296 counter = validize_mem (counter_ref);
297 all = validize_mem (all_ref);
299 uval = gen_reg_rtx (mode);
300 convert_move (uval, copy_rtx (value->value), 0);
302 /* Check if the stored value matches. */
303 do_compare_rtx_and_jump (copy_rtx (uval), copy_rtx (stored_value), EQ,
304 0, mode, NULL_RTX, NULL_RTX, same_label);
306 /* Does not match; check whether the counter is zero. */
307 do_compare_rtx_and_jump (copy_rtx (counter), const0_rtx, EQ, 0, mode,
308 NULL_RTX, NULL_RTX, zero_label);
310 /* The counter is not zero yet. */
311 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), constm1_rtx,
312 counter, 0, OPTAB_WIDEN);
314 if (tmp != counter)
315 emit_move_insn (copy_rtx (counter), tmp);
317 emit_jump_insn (gen_jump (end_of_code_label));
318 emit_barrier ();
320 emit_label (zero_label);
321 /* Set new value. */
322 emit_move_insn (copy_rtx (stored_value), copy_rtx (uval));
324 emit_label (same_label);
325 /* Increase the counter. */
326 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), const1_rtx,
327 counter, 0, OPTAB_WIDEN);
329 if (tmp != counter)
330 emit_move_insn (copy_rtx (counter), tmp);
332 emit_label (end_of_code_label);
334 /* Increase the counter of all executions; this seems redundant given
335 that ve have counts for edges in cfg, but it may happen that some
336 optimization will change the counts for the block (either because
337 it is unable to update them correctly, or because it will duplicate
338 the block or its part). */
339 tmp = expand_simple_binop (mode, PLUS, copy_rtx (all), const1_rtx,
340 all, 0, OPTAB_WIDEN);
342 if (tmp != all)
343 emit_move_insn (copy_rtx (all), tmp);
344 sequence = get_insns ();
345 end_sequence ();
346 return sequence;
349 /* Output instructions as RTL for code to find the most common value.
350 VALUE is the expression whose value is profiled. TAG is the tag of the
351 section for counters, BASE is offset of the counter position. */
353 static void
354 rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag,
355 unsigned base)
357 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
358 PREV_INSN ((rtx)value->insn));
359 rtx sequence = rtl_gen_one_value_profiler_no_edge_manipulation (value,
360 tag, base);
361 rebuild_jump_labels (sequence);
362 safe_insert_insn_on_edge (sequence, e);
365 /* Output instructions as RTL for code to find the most common value of
366 a difference between two evaluations of an expression.
367 VALUE is the expression whose value is profiled. TAG is the tag of the
368 section for counters, BASE is offset of the counter position. */
370 static void
371 rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag,
372 unsigned base)
374 struct histogram_value one_value_delta;
375 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
376 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
377 rtx stored_value_ref, stored_value, tmp, uval;
378 rtx sequence;
379 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
380 PREV_INSN ((rtx)value->insn));
382 start_sequence ();
384 if (value->seq)
385 emit_insn (value->seq);
387 stored_value_ref = rtl_coverage_counter_ref (tag, base);
388 stored_value = validize_mem (stored_value_ref);
390 uval = gen_reg_rtx (mode);
391 convert_move (uval, copy_rtx (value->value), 0);
392 tmp = expand_simple_binop (mode, MINUS,
393 copy_rtx (uval), copy_rtx (stored_value),
394 NULL_RTX, 0, OPTAB_WIDEN);
396 one_value_delta.value = tmp;
397 one_value_delta.mode = mode;
398 one_value_delta.seq = NULL_RTX;
399 one_value_delta.insn = value->insn;
400 one_value_delta.type = HIST_TYPE_SINGLE_VALUE;
401 emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (&one_value_delta,
402 tag, base + 1));
403 emit_move_insn (copy_rtx (stored_value), uval);
404 sequence = get_insns ();
405 end_sequence ();
406 rebuild_jump_labels (sequence);
407 safe_insert_insn_on_edge (sequence, e);
410 /* Return the file on which profile dump output goes, if any. */
412 static FILE *rtl_profile_dump_file (void) {
413 return dump_file;
416 struct profile_hooks rtl_profile_hooks =
418 rtl_gen_edge_profiler,
419 rtl_gen_interval_profiler,
420 rtl_gen_pow2_profiler,
421 rtl_gen_one_value_profiler,
422 rtl_gen_const_delta_profiler,
423 rtl_profile_dump_file