Merge from mainline (gomp-merge-2005-02-26).
[official-gcc.git] / gcc / rtl-profile.c
blob3439f472b7bd37d5c044e0e1412fd4e12ea045f1
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 02111-1307, USA. */
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
42 The auxiliary file generated is <dumpbase>.bbg. The format is
43 described in full in gcov-io.h. */
45 /* ??? Register allocation should use basic block execution counts to
46 give preference to the most commonly executed blocks. */
48 /* ??? Should calculate branch probabilities before instrumenting code, since
49 then we can use arc counts to help decide which arcs to instrument. */
51 #include "config.h"
52 #include "system.h"
53 #include "coretypes.h"
54 #include "tm.h"
55 #include "rtl.h"
56 #include "flags.h"
57 #include "output.h"
58 #include "regs.h"
59 #include "expr.h"
60 #include "function.h"
61 #include "toplev.h"
62 #include "coverage.h"
63 #include "value-prof.h"
64 #include "tree.h"
65 #include "ggc.h"
67 /* Do initialization work for the edge profiler. */
69 static void
70 rtl_init_edge_profiler (void)
72 /* gen_edge_profiler calls safe_insert_insn_on_edge which needs
73 register liveness data to be available. */
74 life_analysis (NULL, 0);
77 /* Output instructions as RTL to increment the edge execution count. */
79 static void
80 rtl_gen_edge_profiler (int edgeno, edge e)
82 rtx ref = rtl_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
83 rtx tmp;
84 enum machine_mode mode = GET_MODE (ref);
85 rtx sequence;
87 start_sequence ();
88 ref = validize_mem (ref);
90 tmp = expand_simple_binop (mode, PLUS, ref, const1_rtx,
91 ref, 0, OPTAB_WIDEN);
93 if (tmp != ref)
94 emit_move_insn (copy_rtx (ref), tmp);
96 sequence = get_insns ();
97 end_sequence ();
98 safe_insert_insn_on_edge (sequence, e);
99 rebuild_jump_labels (e->insns.r);
102 /* Output instructions as RTL to increment the interval histogram counter.
103 VALUE is the expression whose value is profiled. TAG is the tag of the
104 section for counters, BASE is offset of the counter position. */
106 static void
107 rtl_gen_interval_profiler (histogram_value value, unsigned tag, unsigned base)
109 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
110 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
111 rtx mem_ref, tmp, tmp1, mr, val;
112 rtx sequence;
113 rtx more_label = gen_label_rtx ();
114 rtx less_label = gen_label_rtx ();
115 rtx end_of_code_label = gen_label_rtx ();
116 int per_counter = gcov_size / BITS_PER_UNIT;
117 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
118 PREV_INSN ((rtx)value->insn));
120 start_sequence ();
122 if (value->seq)
123 emit_insn (value->seq);
125 mr = gen_reg_rtx (Pmode);
127 tmp = rtl_coverage_counter_ref (tag, base);
128 tmp = force_reg (Pmode, XEXP (tmp, 0));
130 val = expand_simple_binop (value->mode, MINUS,
131 copy_rtx (value->value),
132 GEN_INT (value->hdata.intvl.int_start),
133 NULL_RTX, 0, OPTAB_WIDEN);
135 if (value->hdata.intvl.may_be_more)
136 do_compare_rtx_and_jump (copy_rtx (val), GEN_INT (value->hdata.intvl.steps),
137 GE, 0, value->mode, NULL_RTX, NULL_RTX, more_label);
138 if (value->hdata.intvl.may_be_less)
139 do_compare_rtx_and_jump (copy_rtx (val), const0_rtx, LT, 0, value->mode,
140 NULL_RTX, NULL_RTX, less_label);
142 /* We are in range. */
143 tmp1 = expand_simple_binop (value->mode, MULT,
144 copy_rtx (val), GEN_INT (per_counter),
145 NULL_RTX, 0, OPTAB_WIDEN);
146 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp), tmp1, mr,
147 0, OPTAB_WIDEN);
148 if (tmp1 != mr)
149 emit_move_insn (copy_rtx (mr), tmp1);
151 if (value->hdata.intvl.may_be_more
152 || value->hdata.intvl.may_be_less)
154 emit_jump_insn (gen_jump (end_of_code_label));
155 emit_barrier ();
158 /* Above the interval. */
159 if (value->hdata.intvl.may_be_more)
161 emit_label (more_label);
162 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
163 GEN_INT (per_counter * value->hdata.intvl.steps),
164 mr, 0, OPTAB_WIDEN);
165 if (tmp1 != mr)
166 emit_move_insn (copy_rtx (mr), tmp1);
167 if (value->hdata.intvl.may_be_less)
169 emit_jump_insn (gen_jump (end_of_code_label));
170 emit_barrier ();
174 /* Below the interval. */
175 if (value->hdata.intvl.may_be_less)
177 emit_label (less_label);
178 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
179 GEN_INT (per_counter * (value->hdata.intvl.steps
180 + (value->hdata.intvl.may_be_more ? 1 : 0))),
181 mr, 0, OPTAB_WIDEN);
182 if (tmp1 != mr)
183 emit_move_insn (copy_rtx (mr), tmp1);
186 if (value->hdata.intvl.may_be_more
187 || value->hdata.intvl.may_be_less)
188 emit_label (end_of_code_label);
190 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
192 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
193 mem_ref, 0, OPTAB_WIDEN);
195 if (tmp != mem_ref)
196 emit_move_insn (copy_rtx (mem_ref), tmp);
198 sequence = get_insns ();
199 end_sequence ();
200 rebuild_jump_labels (sequence);
201 safe_insert_insn_on_edge (sequence, e);
204 /* Output instructions as RTL to increment the power of two histogram counter.
205 VALUE is the expression whose value is profiled. TAG is the tag of the
206 section for counters, BASE is offset of the counter position. */
208 static void
209 rtl_gen_pow2_profiler (histogram_value value, unsigned tag, unsigned base)
211 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
212 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
213 rtx mem_ref, tmp, mr, uval;
214 rtx sequence;
215 rtx end_of_code_label = gen_label_rtx ();
216 rtx loop_label = gen_label_rtx ();
217 int per_counter = gcov_size / BITS_PER_UNIT;
218 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
219 PREV_INSN ((rtx)value->insn));
221 start_sequence ();
223 if (value->seq)
224 emit_insn (value->seq);
226 mr = gen_reg_rtx (Pmode);
227 tmp = rtl_coverage_counter_ref (tag, base);
228 tmp = force_reg (Pmode, XEXP (tmp, 0));
229 emit_move_insn (mr, tmp);
231 uval = gen_reg_rtx (value->mode);
232 emit_move_insn (uval, copy_rtx (value->value));
234 /* Check for non-power of 2. */
235 if (value->hdata.pow2.may_be_other)
237 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, LE, 0, value->mode,
238 NULL_RTX, NULL_RTX, end_of_code_label);
239 tmp = expand_simple_binop (value->mode, PLUS, copy_rtx (uval),
240 constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN);
241 tmp = expand_simple_binop (value->mode, AND, copy_rtx (uval), tmp,
242 NULL_RTX, 0, OPTAB_WIDEN);
243 do_compare_rtx_and_jump (tmp, const0_rtx, NE, 0, value->mode, NULL_RTX,
244 NULL_RTX, end_of_code_label);
247 /* Count log_2(value). */
248 emit_label (loop_label);
250 tmp = expand_simple_binop (Pmode, PLUS, copy_rtx (mr), GEN_INT (per_counter), mr, 0, OPTAB_WIDEN);
251 if (tmp != mr)
252 emit_move_insn (copy_rtx (mr), tmp);
254 tmp = expand_simple_binop (value->mode, ASHIFTRT, copy_rtx (uval), const1_rtx,
255 uval, 0, OPTAB_WIDEN);
256 if (tmp != uval)
257 emit_move_insn (copy_rtx (uval), tmp);
259 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, NE, 0, value->mode,
260 NULL_RTX, NULL_RTX, loop_label);
262 /* Increase the counter. */
263 emit_label (end_of_code_label);
265 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
267 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
268 mem_ref, 0, OPTAB_WIDEN);
270 if (tmp != mem_ref)
271 emit_move_insn (copy_rtx (mem_ref), tmp);
273 sequence = get_insns ();
274 end_sequence ();
275 rebuild_jump_labels (sequence);
276 safe_insert_insn_on_edge (sequence, e);
279 /* Output instructions as RTL for code to find the most common value.
280 VALUE is the expression whose value is profiled. TAG is the tag of the
281 section for counters, BASE is offset of the counter position. */
283 static rtx
284 rtl_gen_one_value_profiler_no_edge_manipulation (histogram_value value,
285 unsigned tag, unsigned base)
287 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
288 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
289 rtx stored_value_ref, counter_ref, all_ref, stored_value, counter, all;
290 rtx tmp, uval;
291 rtx sequence;
292 rtx same_label = gen_label_rtx ();
293 rtx zero_label = gen_label_rtx ();
294 rtx end_of_code_label = gen_label_rtx ();
296 start_sequence ();
298 if (value->seq)
299 emit_insn (value->seq);
301 stored_value_ref = rtl_coverage_counter_ref (tag, base);
302 counter_ref = rtl_coverage_counter_ref (tag, base + 1);
303 all_ref = rtl_coverage_counter_ref (tag, base + 2);
304 stored_value = validize_mem (stored_value_ref);
305 counter = validize_mem (counter_ref);
306 all = validize_mem (all_ref);
308 uval = gen_reg_rtx (mode);
309 convert_move (uval, copy_rtx (value->value), 0);
311 /* Check if the stored value matches. */
312 do_compare_rtx_and_jump (copy_rtx (uval), copy_rtx (stored_value), EQ,
313 0, mode, NULL_RTX, NULL_RTX, same_label);
315 /* Does not match; check whether the counter is zero. */
316 do_compare_rtx_and_jump (copy_rtx (counter), const0_rtx, EQ, 0, mode,
317 NULL_RTX, NULL_RTX, zero_label);
319 /* The counter is not zero yet. */
320 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), constm1_rtx,
321 counter, 0, OPTAB_WIDEN);
323 if (tmp != counter)
324 emit_move_insn (copy_rtx (counter), tmp);
326 emit_jump_insn (gen_jump (end_of_code_label));
327 emit_barrier ();
329 emit_label (zero_label);
330 /* Set new value. */
331 emit_move_insn (copy_rtx (stored_value), copy_rtx (uval));
333 emit_label (same_label);
334 /* Increase the counter. */
335 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), const1_rtx,
336 counter, 0, OPTAB_WIDEN);
338 if (tmp != counter)
339 emit_move_insn (copy_rtx (counter), tmp);
341 emit_label (end_of_code_label);
343 /* Increase the counter of all executions; this seems redundant given
344 that ve have counts for edges in cfg, but it may happen that some
345 optimization will change the counts for the block (either because
346 it is unable to update them correctly, or because it will duplicate
347 the block or its part). */
348 tmp = expand_simple_binop (mode, PLUS, copy_rtx (all), const1_rtx,
349 all, 0, OPTAB_WIDEN);
351 if (tmp != all)
352 emit_move_insn (copy_rtx (all), tmp);
353 sequence = get_insns ();
354 end_sequence ();
355 return sequence;
358 /* Output instructions as RTL for code to find the most common value.
359 VALUE is the expression whose value is profiled. TAG is the tag of the
360 section for counters, BASE is offset of the counter position. */
362 static void
363 rtl_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base)
365 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
366 PREV_INSN ((rtx)value->insn));
367 rtx sequence = rtl_gen_one_value_profiler_no_edge_manipulation (value,
368 tag, base);
369 rebuild_jump_labels (sequence);
370 safe_insert_insn_on_edge (sequence, e);
373 /* Output instructions as RTL for code to find the most common value of
374 a difference between two evaluations of an expression.
375 VALUE is the expression whose value is profiled. TAG is the tag of the
376 section for counters, BASE is offset of the counter position. */
378 static void
379 rtl_gen_const_delta_profiler (histogram_value value, unsigned tag, unsigned base)
381 histogram_value one_value_delta;
382 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
383 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
384 rtx stored_value_ref, stored_value, tmp, uval;
385 rtx sequence;
386 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
387 PREV_INSN ((rtx)value->insn));
389 start_sequence ();
391 if (value->seq)
392 emit_insn (value->seq);
394 stored_value_ref = rtl_coverage_counter_ref (tag, base);
395 stored_value = validize_mem (stored_value_ref);
397 uval = gen_reg_rtx (mode);
398 convert_move (uval, copy_rtx (value->value), 0);
399 tmp = expand_simple_binop (mode, MINUS,
400 copy_rtx (uval), copy_rtx (stored_value),
401 NULL_RTX, 0, OPTAB_WIDEN);
403 one_value_delta = ggc_alloc (sizeof (*one_value_delta));
404 one_value_delta->value = tmp;
405 one_value_delta->mode = mode;
406 one_value_delta->seq = NULL_RTX;
407 one_value_delta->insn = value->insn;
408 one_value_delta->type = HIST_TYPE_SINGLE_VALUE;
409 emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (one_value_delta,
410 tag, base + 1));
411 emit_move_insn (copy_rtx (stored_value), uval);
412 sequence = get_insns ();
413 end_sequence ();
414 rebuild_jump_labels (sequence);
415 safe_insert_insn_on_edge (sequence, e);
418 /* Return the file on which profile dump output goes, if any. */
420 static FILE *rtl_profile_dump_file (void) {
421 return dump_file;
424 struct profile_hooks rtl_profile_hooks =
426 rtl_init_edge_profiler,
427 rtl_gen_edge_profiler,
428 rtl_gen_interval_profiler,
429 rtl_gen_pow2_profiler,
430 rtl_gen_one_value_profiler,
431 rtl_gen_const_delta_profiler,
432 rtl_profile_dump_file