1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990-2015 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Generate basic block profile instrumentation and auxiliary files.
24 Profile generation is optimized, so that not all arcs in the basic
25 block graph need instrumenting. First, the BB graph is closed with
26 one entry (function start), and one exit (function exit). Any
27 ABNORMAL_EDGE cannot be instrumented (because there is no control
28 path to place the code). We close the graph by inserting fake
29 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
30 edges that do not go to the exit_block. We ignore such abnormal
31 edges. Naturally these fake edges are never directly traversed,
32 and so *cannot* be directly instrumented. Some other graph
33 massaging is done. To optimize the instrumentation we generate the
34 BB minimal span tree, only edges that are not on the span tree
35 (plus the entry point) need instrumenting. From that information
36 all other edge counts can be deduced. By construction all fake
37 edges must be on the spanning tree. We also attempt to place
38 EDGE_CRITICAL edges on the spanning tree.
40 The auxiliary files generated are <dumpbase>.gcno (at compile time)
41 and <dumpbase>.gcda (at run time). The format is
42 described in full in gcov-io.h. */
44 /* ??? Register allocation should use basic block execution counts to
45 give preference to the most commonly executed blocks. */
47 /* ??? Should calculate branch probabilities before instrumenting code, since
48 then we can use arc counts to help decide which arcs to instrument. */
52 #include "coretypes.h"
62 #include "hard-reg-set.h"
65 #include "statistics.h"
66 #include "double-int.h"
68 #include "fixed-value.h"
73 #include "insn-config.h"
83 #include "dominance.h"
86 #include "basic-block.h"
87 #include "diagnostic-core.h"
89 #include "value-prof.h"
90 #include "fold-const.h"
91 #include "tree-ssa-alias.h"
92 #include "internal-fn.h"
93 #include "gimple-expr.h"
96 #include "gimple-iterator.h"
100 #include "hash-map.h"
101 #include "plugin-api.h"
107 struct bb_profile_info
{
108 unsigned int count_valid
: 1;
110 /* Number of successor and predecessor edges. */
111 gcov_type succ_count
;
112 gcov_type pred_count
;
115 #define BB_INFO(b) ((struct bb_profile_info *) (b)->aux)
118 /* Counter summary from the last set of coverage counts read. */
120 const struct gcov_ctr_summary
*profile_info
;
122 /* Counter working set information computed from the current counter
123 summary. Not initialized unless profile_info summary is non-NULL. */
124 static gcov_working_set_t gcov_working_sets
[NUM_GCOV_WORKING_SETS
];
126 /* Collect statistics on the performance of this pass for the entire source
129 static int total_num_blocks
;
130 static int total_num_edges
;
131 static int total_num_edges_ignored
;
132 static int total_num_edges_instrumented
;
133 static int total_num_blocks_created
;
134 static int total_num_passes
;
135 static int total_num_times_called
;
136 static int total_hist_br_prob
[20];
137 static int total_num_branches
;
139 /* Helper function to update gcov_working_sets. */
141 void add_working_set (gcov_working_set_t
*set
) {
143 for (; i
< NUM_GCOV_WORKING_SETS
; i
++)
144 gcov_working_sets
[i
] = set
[i
];
147 /* Forward declarations. */
148 static void find_spanning_tree (struct edge_list
*);
150 /* Add edge instrumentation code to the entire insn chain.
152 F is the first insn of the chain.
153 NUM_BLOCKS is the number of basic blocks found in F. */
156 instrument_edges (struct edge_list
*el
)
158 unsigned num_instr_edges
= 0;
159 int num_edges
= NUM_EDGES (el
);
162 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
167 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
169 struct edge_profile_info
*inf
= EDGE_INFO (e
);
171 if (!inf
->ignore
&& !inf
->on_tree
)
173 gcc_assert (!(e
->flags
& EDGE_ABNORMAL
));
175 fprintf (dump_file
, "Edge %d to %d instrumented%s\n",
176 e
->src
->index
, e
->dest
->index
,
177 EDGE_CRITICAL_P (e
) ? " (and split)" : "");
178 gimple_gen_edge_profiler (num_instr_edges
++, e
);
183 total_num_blocks_created
+= num_edges
;
185 fprintf (dump_file
, "%d edges instrumented\n", num_instr_edges
);
186 return num_instr_edges
;
189 /* Add code to measure histograms for values in list VALUES. */
191 instrument_values (histogram_values values
)
195 /* Emit code to generate the histograms before the insns. */
197 for (i
= 0; i
< values
.length (); i
++)
199 histogram_value hist
= values
[i
];
200 unsigned t
= COUNTER_FOR_HIST_TYPE (hist
->type
);
202 if (!coverage_counter_alloc (t
, hist
->n_counters
))
207 case HIST_TYPE_INTERVAL
:
208 gimple_gen_interval_profiler (hist
, t
, 0);
212 gimple_gen_pow2_profiler (hist
, t
, 0);
215 case HIST_TYPE_SINGLE_VALUE
:
216 gimple_gen_one_value_profiler (hist
, t
, 0);
219 case HIST_TYPE_CONST_DELTA
:
220 gimple_gen_const_delta_profiler (hist
, t
, 0);
223 case HIST_TYPE_INDIR_CALL
:
224 case HIST_TYPE_INDIR_CALL_TOPN
:
225 gimple_gen_ic_profiler (hist
, t
, 0);
228 case HIST_TYPE_AVERAGE
:
229 gimple_gen_average_profiler (hist
, t
, 0);
233 gimple_gen_ior_profiler (hist
, t
, 0);
236 case HIST_TYPE_TIME_PROFILE
:
239 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
240 gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
242 gimple_gen_time_profiler (t
, 0, gsi
);
253 /* Fill the working set information into the profile_info structure. */
256 get_working_sets (void)
258 unsigned ws_ix
, pctinc
, pct
;
259 gcov_working_set_t
*ws_info
;
264 compute_working_sets (profile_info
, gcov_working_sets
);
268 fprintf (dump_file
, "Counter working sets:\n");
269 /* Multiply the percentage by 100 to avoid float. */
270 pctinc
= 100 * 100 / NUM_GCOV_WORKING_SETS
;
271 for (ws_ix
= 0, pct
= pctinc
; ws_ix
< NUM_GCOV_WORKING_SETS
;
272 ws_ix
++, pct
+= pctinc
)
274 if (ws_ix
== NUM_GCOV_WORKING_SETS
- 1)
276 ws_info
= &gcov_working_sets
[ws_ix
];
277 /* Print out the percentage using int arithmatic to avoid float. */
278 fprintf (dump_file
, "\t\t%u.%02u%%: num counts=%u, min counter="
280 pct
/ 100, pct
- (pct
/ 100 * 100),
281 ws_info
->num_counters
,
282 (int64_t)ws_info
->min_counter
);
287 /* Given a the desired percentage of the full profile (sum_all from the
288 summary), multiplied by 10 to avoid float in PCT_TIMES_10, returns
289 the corresponding working set information. If an exact match for
290 the percentage isn't found, the closest value is used. */
293 find_working_set (unsigned pct_times_10
)
298 gcc_assert (pct_times_10
<= 1000);
299 if (pct_times_10
>= 999)
300 return &gcov_working_sets
[NUM_GCOV_WORKING_SETS
- 1];
301 i
= pct_times_10
* NUM_GCOV_WORKING_SETS
/ 1000;
303 return &gcov_working_sets
[0];
304 return &gcov_working_sets
[i
- 1];
307 /* Computes hybrid profile for all matching entries in da_file.
309 CFG_CHECKSUM is the precomputed checksum for the CFG. */
312 get_exec_counts (unsigned cfg_checksum
, unsigned lineno_checksum
)
314 unsigned num_edges
= 0;
318 /* Count the edges to be (possibly) instrumented. */
319 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
324 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
325 if (!EDGE_INFO (e
)->ignore
&& !EDGE_INFO (e
)->on_tree
)
329 counts
= get_coverage_counts (GCOV_COUNTER_ARCS
, num_edges
, cfg_checksum
,
330 lineno_checksum
, &profile_info
);
336 if (dump_file
&& profile_info
)
337 fprintf (dump_file
, "Merged %u profiles with maximal count %u.\n",
338 profile_info
->runs
, (unsigned) profile_info
->sum_max
);
345 is_edge_inconsistent (vec
<edge
, va_gc
> *edges
)
349 FOR_EACH_EDGE (e
, ei
, edges
)
351 if (!EDGE_INFO (e
)->ignore
)
354 && (!(e
->flags
& EDGE_FAKE
)
355 || !block_ends_with_call_p (e
->src
)))
360 "Edge %i->%i is inconsistent, count%"PRId64
,
361 e
->src
->index
, e
->dest
->index
, e
->count
);
362 dump_bb (dump_file
, e
->src
, 0, TDF_DETAILS
);
363 dump_bb (dump_file
, e
->dest
, 0, TDF_DETAILS
);
373 correct_negative_edge_counts (void)
379 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
381 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
389 /* Check consistency.
390 Return true if inconsistency is found. */
392 is_inconsistent (void)
395 bool inconsistent
= false;
396 FOR_EACH_BB_FN (bb
, cfun
)
398 inconsistent
|= is_edge_inconsistent (bb
->preds
);
399 if (!dump_file
&& inconsistent
)
401 inconsistent
|= is_edge_inconsistent (bb
->succs
);
402 if (!dump_file
&& inconsistent
)
408 fprintf (dump_file
, "BB %i count is negative "
412 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
416 if (bb
->count
!= sum_edge_counts (bb
->preds
))
420 fprintf (dump_file
, "BB %i count does not match sum of incoming edges "
421 "%"PRId64
" should be %"PRId64
,
424 sum_edge_counts (bb
->preds
));
425 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
429 if (bb
->count
!= sum_edge_counts (bb
->succs
) &&
430 ! (find_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
)) != NULL
431 && block_ends_with_call_p (bb
)))
435 fprintf (dump_file
, "BB %i count does not match sum of outgoing edges "
436 "%"PRId64
" should be %"PRId64
,
439 sum_edge_counts (bb
->succs
));
440 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
444 if (!dump_file
&& inconsistent
)
451 /* Set each basic block count to the sum of its outgoing edge counts */
456 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
458 bb
->count
= sum_edge_counts (bb
->succs
);
459 gcc_assert (bb
->count
>= 0);
463 /* Reads profile data and returns total number of edge counts read */
465 read_profile_edge_counts (gcov_type
*exec_counts
)
469 int exec_counts_pos
= 0;
470 /* For each edge not on the spanning tree, set its execution count from
472 /* The first count in the .da file is the number of times that the function
473 was entered. This is the exec_count for block zero. */
475 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
480 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
481 if (!EDGE_INFO (e
)->ignore
&& !EDGE_INFO (e
)->on_tree
)
486 e
->count
= exec_counts
[exec_counts_pos
++];
487 if (e
->count
> profile_info
->sum_max
)
489 if (flag_profile_correction
)
491 static bool informed
= 0;
492 if (dump_enabled_p () && !informed
)
493 dump_printf_loc (MSG_NOTE
, input_location
,
494 "corrupted profile info: edge count"
495 " exceeds maximal count\n");
499 error ("corrupted profile info: edge from %i to %i exceeds maximal count",
500 bb
->index
, e
->dest
->index
);
506 EDGE_INFO (e
)->count_valid
= 1;
507 BB_INFO (bb
)->succ_count
--;
508 BB_INFO (e
->dest
)->pred_count
--;
511 fprintf (dump_file
, "\nRead edge from %i to %i, count:",
512 bb
->index
, e
->dest
->index
);
513 fprintf (dump_file
, "%"PRId64
,
522 #define OVERLAP_BASE 10000
524 /* Compare the static estimated profile to the actual profile, and
525 return the "degree of overlap" measure between them.
527 Degree of overlap is a number between 0 and OVERLAP_BASE. It is
528 the sum of each basic block's minimum relative weights between
529 two profiles. And overlap of OVERLAP_BASE means two profiles are
533 compute_frequency_overlap (void)
535 gcov_type count_total
= 0, freq_total
= 0;
539 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
541 count_total
+= bb
->count
;
542 freq_total
+= bb
->frequency
;
545 if (count_total
== 0 || freq_total
== 0)
548 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
549 overlap
+= MIN (bb
->count
* OVERLAP_BASE
/ count_total
,
550 bb
->frequency
* OVERLAP_BASE
/ freq_total
);
555 /* Compute the branch probabilities for the various branches.
556 Annotate them accordingly.
558 CFG_CHECKSUM is the precomputed checksum for the CFG. */
561 compute_branch_probabilities (unsigned cfg_checksum
, unsigned lineno_checksum
)
568 int hist_br_prob
[20];
570 gcov_type
*exec_counts
= get_exec_counts (cfg_checksum
, lineno_checksum
);
571 int inconsistent
= 0;
573 /* Very simple sanity checks so we catch bugs in our profiling code. */
577 if (profile_info
->sum_all
< profile_info
->sum_max
)
579 error ("corrupted profile info: sum_all is smaller than sum_max");
583 /* Attach extra info block to each bb. */
584 alloc_aux_for_blocks (sizeof (struct bb_profile_info
));
585 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
590 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
591 if (!EDGE_INFO (e
)->ignore
)
592 BB_INFO (bb
)->succ_count
++;
593 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
594 if (!EDGE_INFO (e
)->ignore
)
595 BB_INFO (bb
)->pred_count
++;
598 /* Avoid predicting entry on exit nodes. */
599 BB_INFO (EXIT_BLOCK_PTR_FOR_FN (cfun
))->succ_count
= 2;
600 BB_INFO (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->pred_count
= 2;
602 num_edges
= read_profile_edge_counts (exec_counts
);
605 fprintf (dump_file
, "\n%d edge counts read\n", num_edges
);
607 /* For every block in the file,
608 - if every exit/entrance edge has a known count, then set the block count
609 - if the block count is known, and every exit/entrance edge but one has
610 a known execution count, then set the count of the remaining edge
612 As edge counts are set, decrement the succ/pred count, but don't delete
613 the edge, that way we can easily tell when all edges are known, or only
614 one edge is unknown. */
616 /* The order that the basic blocks are iterated through is important.
617 Since the code that finds spanning trees starts with block 0, low numbered
618 edges are put on the spanning tree in preference to high numbered edges.
619 Hence, most instrumented edges are at the end. Graph solving works much
620 faster if we propagate numbers from the end to the start.
622 This takes an average of slightly more than 3 passes. */
630 FOR_BB_BETWEEN (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), NULL
, prev_bb
)
632 struct bb_profile_info
*bi
= BB_INFO (bb
);
633 if (! bi
->count_valid
)
635 if (bi
->succ_count
== 0)
641 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
647 else if (bi
->pred_count
== 0)
653 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
662 if (bi
->succ_count
== 1)
668 /* One of the counts will be invalid, but it is zero,
669 so adding it in also doesn't hurt. */
670 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
673 /* Search for the invalid edge, and set its count. */
674 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
675 if (! EDGE_INFO (e
)->count_valid
&& ! EDGE_INFO (e
)->ignore
)
678 /* Calculate count for remaining edge by conservation. */
679 total
= bb
->count
- total
;
682 EDGE_INFO (e
)->count_valid
= 1;
686 BB_INFO (e
->dest
)->pred_count
--;
689 if (bi
->pred_count
== 1)
695 /* One of the counts will be invalid, but it is zero,
696 so adding it in also doesn't hurt. */
697 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
700 /* Search for the invalid edge, and set its count. */
701 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
702 if (!EDGE_INFO (e
)->count_valid
&& !EDGE_INFO (e
)->ignore
)
705 /* Calculate count for remaining edge by conservation. */
706 total
= bb
->count
- total
+ e
->count
;
709 EDGE_INFO (e
)->count_valid
= 1;
713 BB_INFO (e
->src
)->succ_count
--;
721 int overlap
= compute_frequency_overlap ();
722 gimple_dump_cfg (dump_file
, dump_flags
);
723 fprintf (dump_file
, "Static profile overlap: %d.%d%%\n",
724 overlap
/ (OVERLAP_BASE
/ 100),
725 overlap
% (OVERLAP_BASE
/ 100));
728 total_num_passes
+= passes
;
730 fprintf (dump_file
, "Graph solving took %d passes.\n\n", passes
);
732 /* If the graph has been correctly solved, every block will have a
733 succ and pred count of zero. */
734 FOR_EACH_BB_FN (bb
, cfun
)
736 gcc_assert (!BB_INFO (bb
)->succ_count
&& !BB_INFO (bb
)->pred_count
);
739 /* Check for inconsistent basic block counts */
740 inconsistent
= is_inconsistent ();
744 if (flag_profile_correction
)
746 /* Inconsistency detected. Make it flow-consistent. */
747 static int informed
= 0;
748 if (dump_enabled_p () && informed
== 0)
751 dump_printf_loc (MSG_NOTE
, input_location
,
752 "correcting inconsistent profile data\n");
754 correct_negative_edge_counts ();
755 /* Set bb counts to the sum of the outgoing edge counts */
758 fprintf (dump_file
, "\nCalling mcf_smooth_cfg\n");
762 error ("corrupted profile info: profile data is not flow-consistent");
765 /* For every edge, calculate its branch probability and add a reg_note
766 to the branch insn to indicate this. */
768 for (i
= 0; i
< 20; i
++)
772 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
779 error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
780 bb
->index
, (int)bb
->count
);
783 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
785 /* Function may return twice in the cased the called function is
786 setjmp or calls fork, but we can't represent this by extra
787 edge from the entry, since extra edge from the exit is
788 already present. We get negative frequency from the entry
791 && e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
792 || (e
->count
> bb
->count
793 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)))
795 if (block_ends_with_call_p (bb
))
796 e
->count
= e
->count
< 0 ? 0 : bb
->count
;
798 if (e
->count
< 0 || e
->count
> bb
->count
)
800 error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
801 e
->src
->index
, e
->dest
->index
,
803 e
->count
= bb
->count
/ 2;
808 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
809 e
->probability
= GCOV_COMPUTE_SCALE (e
->count
, bb
->count
);
810 if (bb
->index
>= NUM_FIXED_BLOCKS
811 && block_ends_with_condjump_p (bb
)
812 && EDGE_COUNT (bb
->succs
) >= 2)
818 /* Find the branch edge. It is possible that we do have fake
820 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
821 if (!(e
->flags
& (EDGE_FAKE
| EDGE_FALLTHRU
)))
824 prob
= e
->probability
;
825 index
= prob
* 20 / REG_BR_PROB_BASE
;
829 hist_br_prob
[index
]++;
834 /* As a last resort, distribute the probabilities evenly.
835 Use simple heuristics that if there are normal edges,
836 give all abnormals frequency of 0, otherwise distribute the
837 frequency over abnormals (this is the case of noreturn
839 else if (profile_status_for_fn (cfun
) == PROFILE_ABSENT
)
843 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
844 if (!(e
->flags
& (EDGE_COMPLEX
| EDGE_FAKE
)))
848 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
849 if (!(e
->flags
& (EDGE_COMPLEX
| EDGE_FAKE
)))
850 e
->probability
= REG_BR_PROB_BASE
/ total
;
856 total
+= EDGE_COUNT (bb
->succs
);
857 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
858 e
->probability
= REG_BR_PROB_BASE
/ total
;
860 if (bb
->index
>= NUM_FIXED_BLOCKS
861 && block_ends_with_condjump_p (bb
)
862 && EDGE_COUNT (bb
->succs
) >= 2)
867 profile_status_for_fn (cfun
) = PROFILE_READ
;
868 compute_function_frequency ();
872 fprintf (dump_file
, "%d branches\n", num_branches
);
874 for (i
= 0; i
< 10; i
++)
875 fprintf (dump_file
, "%d%% branches in range %d-%d%%\n",
876 (hist_br_prob
[i
] + hist_br_prob
[19-i
]) * 100 / num_branches
,
879 total_num_branches
+= num_branches
;
880 for (i
= 0; i
< 20; i
++)
881 total_hist_br_prob
[i
] += hist_br_prob
[i
];
883 fputc ('\n', dump_file
);
884 fputc ('\n', dump_file
);
887 free_aux_for_blocks ();
890 /* Load value histograms values whose description is stored in VALUES array
893 CFG_CHECKSUM is the precomputed checksum for the CFG. */
896 compute_value_histograms (histogram_values values
, unsigned cfg_checksum
,
897 unsigned lineno_checksum
)
899 unsigned i
, j
, t
, any
;
900 unsigned n_histogram_counters
[GCOV_N_VALUE_COUNTERS
];
901 gcov_type
*histogram_counts
[GCOV_N_VALUE_COUNTERS
];
902 gcov_type
*act_count
[GCOV_N_VALUE_COUNTERS
];
903 gcov_type
*aact_count
;
904 struct cgraph_node
*node
;
906 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
907 n_histogram_counters
[t
] = 0;
909 for (i
= 0; i
< values
.length (); i
++)
911 histogram_value hist
= values
[i
];
912 n_histogram_counters
[(int) hist
->type
] += hist
->n_counters
;
916 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
918 if (!n_histogram_counters
[t
])
920 histogram_counts
[t
] = NULL
;
924 histogram_counts
[t
] =
925 get_coverage_counts (COUNTER_FOR_HIST_TYPE (t
),
926 n_histogram_counters
[t
], cfg_checksum
,
927 lineno_checksum
, NULL
);
928 if (histogram_counts
[t
])
930 act_count
[t
] = histogram_counts
[t
];
935 for (i
= 0; i
< values
.length (); i
++)
937 histogram_value hist
= values
[i
];
938 gimple stmt
= hist
->hvalue
.stmt
;
940 t
= (int) hist
->type
;
942 aact_count
= act_count
[t
];
945 act_count
[t
] += hist
->n_counters
;
947 gimple_add_histogram_value (cfun
, stmt
, hist
);
948 hist
->hvalue
.counters
= XNEWVEC (gcov_type
, hist
->n_counters
);
949 for (j
= 0; j
< hist
->n_counters
; j
++)
951 hist
->hvalue
.counters
[j
] = aact_count
[j
];
953 hist
->hvalue
.counters
[j
] = 0;
955 /* Time profiler counter is not related to any statement,
956 so that we have to read the counter and set the value to
957 the corresponding call graph node. */
958 if (hist
->type
== HIST_TYPE_TIME_PROFILE
)
960 node
= cgraph_node::get (hist
->fun
->decl
);
961 node
->tp_first_run
= hist
->hvalue
.counters
[0];
964 fprintf (dump_file
, "Read tp_first_run: %d\n", node
->tp_first_run
);
968 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
969 free (histogram_counts
[t
]);
972 /* When passed NULL as file_name, initialize.
973 When passed something else, output the necessary commands to change
974 line to LINE and offset to FILE_NAME. */
976 output_location (char const *file_name
, int line
,
977 gcov_position_t
*offset
, basic_block bb
)
979 static char const *prev_file_name
;
980 static int prev_line
;
981 bool name_differs
, line_differs
;
985 prev_file_name
= NULL
;
990 name_differs
= !prev_file_name
|| filename_cmp (file_name
, prev_file_name
);
991 line_differs
= prev_line
!= line
;
993 if (name_differs
|| line_differs
)
997 *offset
= gcov_write_tag (GCOV_TAG_LINES
);
998 gcov_write_unsigned (bb
->index
);
999 name_differs
= line_differs
=true;
1002 /* If this is a new source file, then output the
1003 file's name to the .bb file. */
1006 prev_file_name
= file_name
;
1007 gcov_write_unsigned (0);
1008 gcov_write_string (prev_file_name
);
1012 gcov_write_unsigned (line
);
1018 /* Instrument and/or analyze program behavior based on program the CFG.
1020 This function creates a representation of the control flow graph (of
1021 the function being compiled) that is suitable for the instrumentation
1022 of edges and/or converting measured edge counts to counts on the
1025 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
1026 the flow graph that are needed to reconstruct the dynamic behavior of the
1027 flow graph. This data is written to the gcno file for gcov.
1029 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
1030 information from the gcda file containing edge count information from
1031 previous executions of the function being compiled. In this case, the
1032 control flow graph is annotated with actual execution counts by
1033 compute_branch_probabilities().
1035 Main entry point of this file. */
1042 unsigned num_edges
, ignored_edges
;
1043 unsigned num_instrumented
;
1044 struct edge_list
*el
;
1045 histogram_values values
= histogram_values ();
1046 unsigned cfg_checksum
, lineno_checksum
;
1048 total_num_times_called
++;
1050 flow_call_edges_add (NULL
);
1051 add_noreturn_fake_exit_edges ();
1053 /* We can't handle cyclic regions constructed using abnormal edges.
1054 To avoid these we replace every source of abnormal edge by a fake
1055 edge from entry node and every destination by fake edge to exit.
1056 This keeps graph acyclic and our calculation exact for all normal
1057 edges except for exit and entrance ones.
1059 We also add fake exit edges for each call and asm statement in the
1060 basic, since it may not return. */
1062 FOR_EACH_BB_FN (bb
, cfun
)
1064 int need_exit_edge
= 0, need_entry_edge
= 0;
1065 int have_exit_edge
= 0, have_entry_edge
= 0;
1069 /* Functions returning multiple times are not handled by extra edges.
1070 Instead we simply allow negative counts on edges from exit to the
1071 block past call and corresponding probabilities. We can't go
1072 with the extra edges because that would result in flowgraph that
1073 needs to have fake edges outside the spanning tree. */
1075 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1077 gimple_stmt_iterator gsi
;
1080 /* It may happen that there are compiler generated statements
1081 without a locus at all. Go through the basic block from the
1082 last to the first statement looking for a locus. */
1083 for (gsi
= gsi_last_nondebug_bb (bb
);
1085 gsi_prev_nondebug (&gsi
))
1087 last
= gsi_stmt (gsi
);
1088 if (gimple_has_location (last
))
1092 /* Edge with goto locus might get wrong coverage info unless
1093 it is the only edge out of BB.
1094 Don't do that when the locuses match, so
1095 if (blah) goto something;
1096 is not computed twice. */
1098 && gimple_has_location (last
)
1099 && LOCATION_LOCUS (e
->goto_locus
) != UNKNOWN_LOCATION
1100 && !single_succ_p (bb
)
1101 && (LOCATION_FILE (e
->goto_locus
)
1102 != LOCATION_FILE (gimple_location (last
))
1103 || (LOCATION_LINE (e
->goto_locus
)
1104 != LOCATION_LINE (gimple_location (last
)))))
1106 basic_block new_bb
= split_edge (e
);
1107 edge ne
= single_succ_edge (new_bb
);
1108 ne
->goto_locus
= e
->goto_locus
;
1110 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1111 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1113 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1116 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1118 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1119 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1120 need_entry_edge
= 1;
1121 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1122 have_entry_edge
= 1;
1125 if (need_exit_edge
&& !have_exit_edge
)
1128 fprintf (dump_file
, "Adding fake exit edge to bb %i\n",
1130 make_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), EDGE_FAKE
);
1132 if (need_entry_edge
&& !have_entry_edge
)
1135 fprintf (dump_file
, "Adding fake entry edge to bb %i\n",
1137 make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
), bb
, EDGE_FAKE
);
1138 /* Avoid bbs that have both fake entry edge and also some
1139 exit edge. One of those edges wouldn't be added to the
1140 spanning tree, but we can't instrument any of them. */
1141 if (have_exit_edge
|| need_exit_edge
)
1143 gimple_stmt_iterator gsi
;
1146 gsi
= gsi_start_nondebug_after_labels_bb (bb
);
1147 gcc_checking_assert (!gsi_end_p (gsi
));
1148 first
= gsi_stmt (gsi
);
1149 /* Don't split the bbs containing __builtin_setjmp_receiver
1150 or ABNORMAL_DISPATCHER calls. These are very
1151 special and don't expect anything to be inserted before
1153 if (is_gimple_call (first
)
1154 && (gimple_call_builtin_p (first
, BUILT_IN_SETJMP_RECEIVER
)
1155 || (gimple_call_flags (first
) & ECF_RETURNS_TWICE
)
1156 || (gimple_call_internal_p (first
)
1157 && (gimple_call_internal_fn (first
)
1158 == IFN_ABNORMAL_DISPATCHER
))))
1162 fprintf (dump_file
, "Splitting bb %i after labels\n",
1164 split_block_after_labels (bb
);
1169 el
= create_edge_list ();
1170 num_edges
= NUM_EDGES (el
);
1171 alloc_aux_for_edges (sizeof (struct edge_profile_info
));
1173 /* The basic blocks are expected to be numbered sequentially. */
1177 for (i
= 0 ; i
< num_edges
; i
++)
1179 edge e
= INDEX_EDGE (el
, i
);
1182 /* Mark edges we've replaced by fake edges above as ignored. */
1183 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1184 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
1185 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1187 EDGE_INFO (e
)->ignore
= 1;
1192 /* Create spanning tree from basic block graph, mark each edge that is
1193 on the spanning tree. We insert as many abnormal and critical edges
1194 as possible to minimize number of edge splits necessary. */
1196 find_spanning_tree (el
);
1198 /* Fake edges that are not on the tree will not be instrumented, so
1199 mark them ignored. */
1200 for (num_instrumented
= i
= 0; i
< num_edges
; i
++)
1202 edge e
= INDEX_EDGE (el
, i
);
1203 struct edge_profile_info
*inf
= EDGE_INFO (e
);
1205 if (inf
->ignore
|| inf
->on_tree
)
1207 else if (e
->flags
& EDGE_FAKE
)
1216 total_num_blocks
+= n_basic_blocks_for_fn (cfun
);
1218 fprintf (dump_file
, "%d basic blocks\n", n_basic_blocks_for_fn (cfun
));
1220 total_num_edges
+= num_edges
;
1222 fprintf (dump_file
, "%d edges\n", num_edges
);
1224 total_num_edges_ignored
+= ignored_edges
;
1226 fprintf (dump_file
, "%d ignored edges\n", ignored_edges
);
1228 total_num_edges_instrumented
+= num_instrumented
;
1230 fprintf (dump_file
, "%d instrumentation edges\n", num_instrumented
);
1232 /* Compute two different checksums. Note that we want to compute
1233 the checksum in only once place, since it depends on the shape
1234 of the control flow which can change during
1235 various transformations. */
1236 cfg_checksum
= coverage_compute_cfg_checksum (cfun
);
1237 lineno_checksum
= coverage_compute_lineno_checksum ();
1239 /* Write the data from which gcov can reconstruct the basic block
1240 graph and function line numbers (the gcno file). */
1241 if (coverage_begin_function (lineno_checksum
, cfg_checksum
))
1243 gcov_position_t offset
;
1245 /* Basic block flags */
1246 offset
= gcov_write_tag (GCOV_TAG_BLOCKS
);
1247 for (i
= 0; i
!= (unsigned) (n_basic_blocks_for_fn (cfun
)); i
++)
1248 gcov_write_unsigned (0);
1249 gcov_write_length (offset
);
1252 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
),
1253 EXIT_BLOCK_PTR_FOR_FN (cfun
), next_bb
)
1258 offset
= gcov_write_tag (GCOV_TAG_ARCS
);
1259 gcov_write_unsigned (bb
->index
);
1261 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1263 struct edge_profile_info
*i
= EDGE_INFO (e
);
1266 unsigned flag_bits
= 0;
1269 flag_bits
|= GCOV_ARC_ON_TREE
;
1270 if (e
->flags
& EDGE_FAKE
)
1271 flag_bits
|= GCOV_ARC_FAKE
;
1272 if (e
->flags
& EDGE_FALLTHRU
)
1273 flag_bits
|= GCOV_ARC_FALLTHROUGH
;
1274 /* On trees we don't have fallthru flags, but we can
1275 recompute them from CFG shape. */
1276 if (e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)
1277 && e
->src
->next_bb
== e
->dest
)
1278 flag_bits
|= GCOV_ARC_FALLTHROUGH
;
1280 gcov_write_unsigned (e
->dest
->index
);
1281 gcov_write_unsigned (flag_bits
);
1285 gcov_write_length (offset
);
1289 /* Initialize the output. */
1290 output_location (NULL
, 0, NULL
, NULL
);
1292 FOR_EACH_BB_FN (bb
, cfun
)
1294 gimple_stmt_iterator gsi
;
1295 gcov_position_t offset
= 0;
1297 if (bb
== ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
)
1299 expanded_location curr_location
=
1300 expand_location (DECL_SOURCE_LOCATION (current_function_decl
));
1301 output_location (curr_location
.file
, curr_location
.line
,
1305 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1307 gimple stmt
= gsi_stmt (gsi
);
1308 if (gimple_has_location (stmt
))
1309 output_location (gimple_filename (stmt
), gimple_lineno (stmt
),
1313 /* Notice GOTO expressions eliminated while constructing the CFG. */
1314 if (single_succ_p (bb
)
1315 && LOCATION_LOCUS (single_succ_edge (bb
)->goto_locus
)
1316 != UNKNOWN_LOCATION
)
1318 expanded_location curr_location
1319 = expand_location (single_succ_edge (bb
)->goto_locus
);
1320 output_location (curr_location
.file
, curr_location
.line
,
1326 /* A file of NULL indicates the end of run. */
1327 gcov_write_unsigned (0);
1328 gcov_write_string (NULL
);
1329 gcov_write_length (offset
);
1334 if (flag_profile_values
)
1335 gimple_find_values_to_profile (&values
);
1337 if (flag_branch_probabilities
)
1339 compute_branch_probabilities (cfg_checksum
, lineno_checksum
);
1340 if (flag_profile_values
)
1341 compute_value_histograms (values
, cfg_checksum
, lineno_checksum
);
1344 remove_fake_edges ();
1346 /* For each edge not on the spanning tree, add counting code. */
1347 if (profile_arc_flag
1348 && coverage_counter_alloc (GCOV_COUNTER_ARCS
, num_instrumented
))
1350 unsigned n_instrumented
;
1352 gimple_init_edge_profiler ();
1354 n_instrumented
= instrument_edges (el
);
1356 gcc_assert (n_instrumented
== num_instrumented
);
1358 if (flag_profile_values
)
1359 instrument_values (values
);
1361 /* Commit changes done by instrumentation. */
1362 gsi_commit_edge_inserts ();
1365 free_aux_for_edges ();
1368 free_edge_list (el
);
1369 coverage_end_function (lineno_checksum
, cfg_checksum
);
1372 /* Union find algorithm implementation for the basic blocks using
1376 find_group (basic_block bb
)
1378 basic_block group
= bb
, bb1
;
1380 while ((basic_block
) group
->aux
!= group
)
1381 group
= (basic_block
) group
->aux
;
1383 /* Compress path. */
1384 while ((basic_block
) bb
->aux
!= group
)
1386 bb1
= (basic_block
) bb
->aux
;
1387 bb
->aux
= (void *) group
;
1394 union_groups (basic_block bb1
, basic_block bb2
)
1396 basic_block bb1g
= find_group (bb1
);
1397 basic_block bb2g
= find_group (bb2
);
1399 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
1400 this code is unlikely going to be performance problem anyway. */
1401 gcc_assert (bb1g
!= bb2g
);
1406 /* This function searches all of the edges in the program flow graph, and puts
1407 as many bad edges as possible onto the spanning tree. Bad edges include
1408 abnormals edges, which can't be instrumented at the moment. Since it is
1409 possible for fake edges to form a cycle, we will have to develop some
1410 better way in the future. Also put critical edges to the tree, since they
1411 are more expensive to instrument. */
1414 find_spanning_tree (struct edge_list
*el
)
1417 int num_edges
= NUM_EDGES (el
);
1420 /* We use aux field for standard union-find algorithm. */
1421 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
1424 /* Add fake edge exit to entry we can't instrument. */
1425 union_groups (EXIT_BLOCK_PTR_FOR_FN (cfun
), ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1427 /* First add all abnormal edges to the tree unless they form a cycle. Also
1428 add all edges to the exit block to avoid inserting profiling code behind
1429 setting return value from function. */
1430 for (i
= 0; i
< num_edges
; i
++)
1432 edge e
= INDEX_EDGE (el
, i
);
1433 if (((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
| EDGE_FAKE
))
1434 || e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1435 && !EDGE_INFO (e
)->ignore
1436 && (find_group (e
->src
) != find_group (e
->dest
)))
1439 fprintf (dump_file
, "Abnormal edge %d to %d put to tree\n",
1440 e
->src
->index
, e
->dest
->index
);
1441 EDGE_INFO (e
)->on_tree
= 1;
1442 union_groups (e
->src
, e
->dest
);
1446 /* Now insert all critical edges to the tree unless they form a cycle. */
1447 for (i
= 0; i
< num_edges
; i
++)
1449 edge e
= INDEX_EDGE (el
, i
);
1450 if (EDGE_CRITICAL_P (e
) && !EDGE_INFO (e
)->ignore
1451 && find_group (e
->src
) != find_group (e
->dest
))
1454 fprintf (dump_file
, "Critical edge %d to %d put to tree\n",
1455 e
->src
->index
, e
->dest
->index
);
1456 EDGE_INFO (e
)->on_tree
= 1;
1457 union_groups (e
->src
, e
->dest
);
1461 /* And now the rest. */
1462 for (i
= 0; i
< num_edges
; i
++)
1464 edge e
= INDEX_EDGE (el
, i
);
1465 if (!EDGE_INFO (e
)->ignore
1466 && find_group (e
->src
) != find_group (e
->dest
))
1469 fprintf (dump_file
, "Normal edge %d to %d put to tree\n",
1470 e
->src
->index
, e
->dest
->index
);
1471 EDGE_INFO (e
)->on_tree
= 1;
1472 union_groups (e
->src
, e
->dest
);
1476 clear_aux_for_blocks ();
1479 /* Perform file-level initialization for branch-prob processing. */
1482 init_branch_prob (void)
1486 total_num_blocks
= 0;
1487 total_num_edges
= 0;
1488 total_num_edges_ignored
= 0;
1489 total_num_edges_instrumented
= 0;
1490 total_num_blocks_created
= 0;
1491 total_num_passes
= 0;
1492 total_num_times_called
= 0;
1493 total_num_branches
= 0;
1494 for (i
= 0; i
< 20; i
++)
1495 total_hist_br_prob
[i
] = 0;
1498 /* Performs file-level cleanup after branch-prob processing
1502 end_branch_prob (void)
1506 fprintf (dump_file
, "\n");
1507 fprintf (dump_file
, "Total number of blocks: %d\n",
1509 fprintf (dump_file
, "Total number of edges: %d\n", total_num_edges
);
1510 fprintf (dump_file
, "Total number of ignored edges: %d\n",
1511 total_num_edges_ignored
);
1512 fprintf (dump_file
, "Total number of instrumented edges: %d\n",
1513 total_num_edges_instrumented
);
1514 fprintf (dump_file
, "Total number of blocks created: %d\n",
1515 total_num_blocks_created
);
1516 fprintf (dump_file
, "Total number of graph solution passes: %d\n",
1518 if (total_num_times_called
!= 0)
1519 fprintf (dump_file
, "Average number of graph solution passes: %d\n",
1520 (total_num_passes
+ (total_num_times_called
>> 1))
1521 / total_num_times_called
);
1522 fprintf (dump_file
, "Total number of branches: %d\n",
1523 total_num_branches
);
1524 if (total_num_branches
)
1528 for (i
= 0; i
< 10; i
++)
1529 fprintf (dump_file
, "%d%% branches in range %d-%d%%\n",
1530 (total_hist_br_prob
[i
] + total_hist_br_prob
[19-i
]) * 100
1531 / total_num_branches
, 5*i
, 5*i
+5);