PR tree-optimization/17468
[official-gcc.git] / gcc / rtl-profile.c
blob2d0c69ccb9162bfdf9fcd8930f334489836ad6a8
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 02111-1307, USA. */
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
42 The auxiliary file generated is <dumpbase>.bbg. The format is
43 described in full in gcov-io.h. */
45 /* ??? Register allocation should use basic block execution counts to
46 give preference to the most commonly executed blocks. */
48 /* ??? Should calculate branch probabilities before instrumenting code, since
49 then we can use arc counts to help decide which arcs to instrument. */
51 #include "config.h"
52 #include "system.h"
53 #include "coretypes.h"
54 #include "tm.h"
55 #include "rtl.h"
56 #include "flags.h"
57 #include "output.h"
58 #include "regs.h"
59 #include "expr.h"
60 #include "function.h"
61 #include "toplev.h"
62 #include "coverage.h"
63 #include "value-prof.h"
64 #include "tree.h"
65 #include "ggc.h"
67 /* Output instructions as RTL to increment the edge execution count. */
69 static void
70 rtl_gen_edge_profiler (int edgeno, edge e)
72 rtx ref = rtl_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
73 rtx tmp;
74 enum machine_mode mode = GET_MODE (ref);
75 rtx sequence;
77 start_sequence ();
78 ref = validize_mem (ref);
80 tmp = expand_simple_binop (mode, PLUS, ref, const1_rtx,
81 ref, 0, OPTAB_WIDEN);
83 if (tmp != ref)
84 emit_move_insn (copy_rtx (ref), tmp);
86 sequence = get_insns ();
87 end_sequence ();
88 insert_insn_on_edge (sequence, e);
89 rebuild_jump_labels (e->insns.r);
92 /* Output instructions as RTL to increment the interval histogram counter.
93 VALUE is the expression whose value is profiled. TAG is the tag of the
94 section for counters, BASE is offset of the counter position. */
96 static void
97 rtl_gen_interval_profiler (histogram_value value, unsigned tag, unsigned base)
99 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
100 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
101 rtx mem_ref, tmp, tmp1, mr, val;
102 rtx sequence;
103 rtx more_label = gen_label_rtx ();
104 rtx less_label = gen_label_rtx ();
105 rtx end_of_code_label = gen_label_rtx ();
106 int per_counter = gcov_size / BITS_PER_UNIT;
107 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
108 PREV_INSN ((rtx)value->insn));
110 start_sequence ();
112 if (value->seq)
113 emit_insn (value->seq);
115 mr = gen_reg_rtx (Pmode);
117 tmp = rtl_coverage_counter_ref (tag, base);
118 tmp = force_reg (Pmode, XEXP (tmp, 0));
120 val = expand_simple_binop (value->mode, MINUS,
121 copy_rtx (value->value),
122 GEN_INT (value->hdata.intvl.int_start),
123 NULL_RTX, 0, OPTAB_WIDEN);
125 if (value->hdata.intvl.may_be_more)
126 do_compare_rtx_and_jump (copy_rtx (val), GEN_INT (value->hdata.intvl.steps),
127 GE, 0, value->mode, NULL_RTX, NULL_RTX, more_label);
128 if (value->hdata.intvl.may_be_less)
129 do_compare_rtx_and_jump (copy_rtx (val), const0_rtx, LT, 0, value->mode,
130 NULL_RTX, NULL_RTX, less_label);
132 /* We are in range. */
133 tmp1 = expand_simple_binop (value->mode, MULT,
134 copy_rtx (val), GEN_INT (per_counter),
135 NULL_RTX, 0, OPTAB_WIDEN);
136 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp), tmp1, mr,
137 0, OPTAB_WIDEN);
138 if (tmp1 != mr)
139 emit_move_insn (copy_rtx (mr), tmp1);
141 if (value->hdata.intvl.may_be_more
142 || value->hdata.intvl.may_be_less)
144 emit_jump_insn (gen_jump (end_of_code_label));
145 emit_barrier ();
148 /* Above the interval. */
149 if (value->hdata.intvl.may_be_more)
151 emit_label (more_label);
152 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
153 GEN_INT (per_counter * value->hdata.intvl.steps),
154 mr, 0, OPTAB_WIDEN);
155 if (tmp1 != mr)
156 emit_move_insn (copy_rtx (mr), tmp1);
157 if (value->hdata.intvl.may_be_less)
159 emit_jump_insn (gen_jump (end_of_code_label));
160 emit_barrier ();
164 /* Below the interval. */
165 if (value->hdata.intvl.may_be_less)
167 emit_label (less_label);
168 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
169 GEN_INT (per_counter * (value->hdata.intvl.steps
170 + (value->hdata.intvl.may_be_more ? 1 : 0))),
171 mr, 0, OPTAB_WIDEN);
172 if (tmp1 != mr)
173 emit_move_insn (copy_rtx (mr), tmp1);
176 if (value->hdata.intvl.may_be_more
177 || value->hdata.intvl.may_be_less)
178 emit_label (end_of_code_label);
180 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
182 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
183 mem_ref, 0, OPTAB_WIDEN);
185 if (tmp != mem_ref)
186 emit_move_insn (copy_rtx (mem_ref), tmp);
188 sequence = get_insns ();
189 end_sequence ();
190 rebuild_jump_labels (sequence);
191 safe_insert_insn_on_edge (sequence, e);
194 /* Output instructions as RTL to increment the power of two histogram counter.
195 VALUE is the expression whose value is profiled. TAG is the tag of the
196 section for counters, BASE is offset of the counter position. */
198 static void
199 rtl_gen_pow2_profiler (histogram_value value, unsigned tag, unsigned base)
201 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
202 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
203 rtx mem_ref, tmp, mr, uval;
204 rtx sequence;
205 rtx end_of_code_label = gen_label_rtx ();
206 rtx loop_label = gen_label_rtx ();
207 int per_counter = gcov_size / BITS_PER_UNIT;
208 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
209 PREV_INSN ((rtx)value->insn));
211 start_sequence ();
213 if (value->seq)
214 emit_insn (value->seq);
216 mr = gen_reg_rtx (Pmode);
217 tmp = rtl_coverage_counter_ref (tag, base);
218 tmp = force_reg (Pmode, XEXP (tmp, 0));
219 emit_move_insn (mr, tmp);
221 uval = gen_reg_rtx (value->mode);
222 emit_move_insn (uval, copy_rtx (value->value));
224 /* Check for non-power of 2. */
225 if (value->hdata.pow2.may_be_other)
227 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, LE, 0, value->mode,
228 NULL_RTX, NULL_RTX, end_of_code_label);
229 tmp = expand_simple_binop (value->mode, PLUS, copy_rtx (uval),
230 constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN);
231 tmp = expand_simple_binop (value->mode, AND, copy_rtx (uval), tmp,
232 NULL_RTX, 0, OPTAB_WIDEN);
233 do_compare_rtx_and_jump (tmp, const0_rtx, NE, 0, value->mode, NULL_RTX,
234 NULL_RTX, end_of_code_label);
237 /* Count log_2(value). */
238 emit_label (loop_label);
240 tmp = expand_simple_binop (Pmode, PLUS, copy_rtx (mr), GEN_INT (per_counter), mr, 0, OPTAB_WIDEN);
241 if (tmp != mr)
242 emit_move_insn (copy_rtx (mr), tmp);
244 tmp = expand_simple_binop (value->mode, ASHIFTRT, copy_rtx (uval), const1_rtx,
245 uval, 0, OPTAB_WIDEN);
246 if (tmp != uval)
247 emit_move_insn (copy_rtx (uval), tmp);
249 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, NE, 0, value->mode,
250 NULL_RTX, NULL_RTX, loop_label);
252 /* Increase the counter. */
253 emit_label (end_of_code_label);
255 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
257 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
258 mem_ref, 0, OPTAB_WIDEN);
260 if (tmp != mem_ref)
261 emit_move_insn (copy_rtx (mem_ref), tmp);
263 sequence = get_insns ();
264 end_sequence ();
265 rebuild_jump_labels (sequence);
266 safe_insert_insn_on_edge (sequence, e);
269 /* Output instructions as RTL for code to find the most common value.
270 VALUE is the expression whose value is profiled. TAG is the tag of the
271 section for counters, BASE is offset of the counter position. */
273 static rtx
274 rtl_gen_one_value_profiler_no_edge_manipulation (histogram_value value,
275 unsigned tag, unsigned base)
277 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
278 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
279 rtx stored_value_ref, counter_ref, all_ref, stored_value, counter, all;
280 rtx tmp, uval;
281 rtx sequence;
282 rtx same_label = gen_label_rtx ();
283 rtx zero_label = gen_label_rtx ();
284 rtx end_of_code_label = gen_label_rtx ();
286 start_sequence ();
288 if (value->seq)
289 emit_insn (value->seq);
291 stored_value_ref = rtl_coverage_counter_ref (tag, base);
292 counter_ref = rtl_coverage_counter_ref (tag, base + 1);
293 all_ref = rtl_coverage_counter_ref (tag, base + 2);
294 stored_value = validize_mem (stored_value_ref);
295 counter = validize_mem (counter_ref);
296 all = validize_mem (all_ref);
298 uval = gen_reg_rtx (mode);
299 convert_move (uval, copy_rtx (value->value), 0);
301 /* Check if the stored value matches. */
302 do_compare_rtx_and_jump (copy_rtx (uval), copy_rtx (stored_value), EQ,
303 0, mode, NULL_RTX, NULL_RTX, same_label);
305 /* Does not match; check whether the counter is zero. */
306 do_compare_rtx_and_jump (copy_rtx (counter), const0_rtx, EQ, 0, mode,
307 NULL_RTX, NULL_RTX, zero_label);
309 /* The counter is not zero yet. */
310 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), constm1_rtx,
311 counter, 0, OPTAB_WIDEN);
313 if (tmp != counter)
314 emit_move_insn (copy_rtx (counter), tmp);
316 emit_jump_insn (gen_jump (end_of_code_label));
317 emit_barrier ();
319 emit_label (zero_label);
320 /* Set new value. */
321 emit_move_insn (copy_rtx (stored_value), copy_rtx (uval));
323 emit_label (same_label);
324 /* Increase the counter. */
325 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), const1_rtx,
326 counter, 0, OPTAB_WIDEN);
328 if (tmp != counter)
329 emit_move_insn (copy_rtx (counter), tmp);
331 emit_label (end_of_code_label);
333 /* Increase the counter of all executions; this seems redundant given
334 that ve have counts for edges in cfg, but it may happen that some
335 optimization will change the counts for the block (either because
336 it is unable to update them correctly, or because it will duplicate
337 the block or its part). */
338 tmp = expand_simple_binop (mode, PLUS, copy_rtx (all), const1_rtx,
339 all, 0, OPTAB_WIDEN);
341 if (tmp != all)
342 emit_move_insn (copy_rtx (all), tmp);
343 sequence = get_insns ();
344 end_sequence ();
345 return sequence;
348 /* Output instructions as RTL for code to find the most common value.
349 VALUE is the expression whose value is profiled. TAG is the tag of the
350 section for counters, BASE is offset of the counter position. */
352 static void
353 rtl_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base)
355 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
356 PREV_INSN ((rtx)value->insn));
357 rtx sequence = rtl_gen_one_value_profiler_no_edge_manipulation (value,
358 tag, base);
359 rebuild_jump_labels (sequence);
360 safe_insert_insn_on_edge (sequence, e);
363 /* Output instructions as RTL for code to find the most common value of
364 a difference between two evaluations of an expression.
365 VALUE is the expression whose value is profiled. TAG is the tag of the
366 section for counters, BASE is offset of the counter position. */
368 static void
369 rtl_gen_const_delta_profiler (histogram_value value, unsigned tag, unsigned base)
371 histogram_value one_value_delta;
372 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
373 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
374 rtx stored_value_ref, stored_value, tmp, uval;
375 rtx sequence;
376 edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
377 PREV_INSN ((rtx)value->insn));
379 start_sequence ();
381 if (value->seq)
382 emit_insn (value->seq);
384 stored_value_ref = rtl_coverage_counter_ref (tag, base);
385 stored_value = validize_mem (stored_value_ref);
387 uval = gen_reg_rtx (mode);
388 convert_move (uval, copy_rtx (value->value), 0);
389 tmp = expand_simple_binop (mode, MINUS,
390 copy_rtx (uval), copy_rtx (stored_value),
391 NULL_RTX, 0, OPTAB_WIDEN);
393 one_value_delta = ggc_alloc (sizeof (*one_value_delta));
394 one_value_delta->value = tmp;
395 one_value_delta->mode = mode;
396 one_value_delta->seq = NULL_RTX;
397 one_value_delta->insn = value->insn;
398 one_value_delta->type = HIST_TYPE_SINGLE_VALUE;
399 emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (one_value_delta,
400 tag, base + 1));
401 emit_move_insn (copy_rtx (stored_value), uval);
402 sequence = get_insns ();
403 end_sequence ();
404 rebuild_jump_labels (sequence);
405 safe_insert_insn_on_edge (sequence, e);
408 /* Return the file on which profile dump output goes, if any. */
410 static FILE *rtl_profile_dump_file (void) {
411 return dump_file;
414 struct profile_hooks rtl_profile_hooks =
416 rtl_gen_edge_profiler,
417 rtl_gen_interval_profiler,
418 rtl_gen_pow2_profiler,
419 rtl_gen_one_value_profiler,
420 rtl_gen_const_delta_profiler,
421 rtl_profile_dump_file