1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990-2016 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Generate basic block profile instrumentation and auxiliary files.
24 Profile generation is optimized, so that not all arcs in the basic
25 block graph need instrumenting. First, the BB graph is closed with
26 one entry (function start), and one exit (function exit). Any
27 ABNORMAL_EDGE cannot be instrumented (because there is no control
28 path to place the code). We close the graph by inserting fake
29 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
30 edges that do not go to the exit_block. We ignore such abnormal
31 edges. Naturally these fake edges are never directly traversed,
32 and so *cannot* be directly instrumented. Some other graph
33 massaging is done. To optimize the instrumentation we generate the
34 BB minimal span tree, only edges that are not on the span tree
35 (plus the entry point) need instrumenting. From that information
36 all other edge counts can be deduced. By construction all fake
37 edges must be on the spanning tree. We also attempt to place
38 EDGE_CRITICAL edges on the spanning tree.
40 The auxiliary files generated are <dumpbase>.gcno (at compile time)
41 and <dumpbase>.gcda (at run time). The format is
42 described in full in gcov-io.h. */
44 /* ??? Register allocation should use basic block execution counts to
45 give preference to the most commonly executed blocks. */
47 /* ??? Should calculate branch probabilities before instrumenting code, since
48 then we can use arc counts to help decide which arcs to instrument. */
52 #include "coretypes.h"
60 #include "diagnostic-core.h"
62 #include "value-prof.h"
63 #include "gimple-iterator.h"
70 struct bb_profile_info
{
71 unsigned int count_valid
: 1;
73 /* Number of successor and predecessor edges. */
78 #define BB_INFO(b) ((struct bb_profile_info *) (b)->aux)
81 /* Counter summary from the last set of coverage counts read. */
83 const struct gcov_ctr_summary
*profile_info
;
85 /* Counter working set information computed from the current counter
86 summary. Not initialized unless profile_info summary is non-NULL. */
87 static gcov_working_set_t gcov_working_sets
[NUM_GCOV_WORKING_SETS
];
89 /* Collect statistics on the performance of this pass for the entire source
92 static int total_num_blocks
;
93 static int total_num_edges
;
94 static int total_num_edges_ignored
;
95 static int total_num_edges_instrumented
;
96 static int total_num_blocks_created
;
97 static int total_num_passes
;
98 static int total_num_times_called
;
99 static int total_hist_br_prob
[20];
100 static int total_num_branches
;
102 /* Helper function to update gcov_working_sets. */
104 void add_working_set (gcov_working_set_t
*set
) {
106 for (; i
< NUM_GCOV_WORKING_SETS
; i
++)
107 gcov_working_sets
[i
] = set
[i
];
110 /* Forward declarations. */
111 static void find_spanning_tree (struct edge_list
*);
113 /* Add edge instrumentation code to the entire insn chain.
115 F is the first insn of the chain.
116 NUM_BLOCKS is the number of basic blocks found in F. */
119 instrument_edges (struct edge_list
*el
)
121 unsigned num_instr_edges
= 0;
122 int num_edges
= NUM_EDGES (el
);
125 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
130 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
132 struct edge_profile_info
*inf
= EDGE_INFO (e
);
134 if (!inf
->ignore
&& !inf
->on_tree
)
136 gcc_assert (!(e
->flags
& EDGE_ABNORMAL
));
138 fprintf (dump_file
, "Edge %d to %d instrumented%s\n",
139 e
->src
->index
, e
->dest
->index
,
140 EDGE_CRITICAL_P (e
) ? " (and split)" : "");
141 gimple_gen_edge_profiler (num_instr_edges
++, e
);
146 total_num_blocks_created
+= num_edges
;
148 fprintf (dump_file
, "%d edges instrumented\n", num_instr_edges
);
149 return num_instr_edges
;
152 /* Add code to measure histograms for values in list VALUES. */
154 instrument_values (histogram_values values
)
158 /* Emit code to generate the histograms before the insns. */
160 for (i
= 0; i
< values
.length (); i
++)
162 histogram_value hist
= values
[i
];
163 unsigned t
= COUNTER_FOR_HIST_TYPE (hist
->type
);
165 if (!coverage_counter_alloc (t
, hist
->n_counters
))
170 case HIST_TYPE_INTERVAL
:
171 gimple_gen_interval_profiler (hist
, t
, 0);
175 gimple_gen_pow2_profiler (hist
, t
, 0);
178 case HIST_TYPE_SINGLE_VALUE
:
179 gimple_gen_one_value_profiler (hist
, t
, 0);
182 case HIST_TYPE_INDIR_CALL
:
183 case HIST_TYPE_INDIR_CALL_TOPN
:
184 gimple_gen_ic_profiler (hist
, t
, 0);
187 case HIST_TYPE_AVERAGE
:
188 gimple_gen_average_profiler (hist
, t
, 0);
192 gimple_gen_ior_profiler (hist
, t
, 0);
195 case HIST_TYPE_TIME_PROFILE
:
198 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
199 gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
201 gimple_gen_time_profiler (t
, 0, gsi
);
212 /* Fill the working set information into the profile_info structure. */
215 get_working_sets (void)
217 unsigned ws_ix
, pctinc
, pct
;
218 gcov_working_set_t
*ws_info
;
223 compute_working_sets (profile_info
, gcov_working_sets
);
227 fprintf (dump_file
, "Counter working sets:\n");
228 /* Multiply the percentage by 100 to avoid float. */
229 pctinc
= 100 * 100 / NUM_GCOV_WORKING_SETS
;
230 for (ws_ix
= 0, pct
= pctinc
; ws_ix
< NUM_GCOV_WORKING_SETS
;
231 ws_ix
++, pct
+= pctinc
)
233 if (ws_ix
== NUM_GCOV_WORKING_SETS
- 1)
235 ws_info
= &gcov_working_sets
[ws_ix
];
236 /* Print out the percentage using int arithmatic to avoid float. */
237 fprintf (dump_file
, "\t\t%u.%02u%%: num counts=%u, min counter="
239 pct
/ 100, pct
- (pct
/ 100 * 100),
240 ws_info
->num_counters
,
241 (int64_t)ws_info
->min_counter
);
246 /* Given a the desired percentage of the full profile (sum_all from the
247 summary), multiplied by 10 to avoid float in PCT_TIMES_10, returns
248 the corresponding working set information. If an exact match for
249 the percentage isn't found, the closest value is used. */
252 find_working_set (unsigned pct_times_10
)
257 gcc_assert (pct_times_10
<= 1000);
258 if (pct_times_10
>= 999)
259 return &gcov_working_sets
[NUM_GCOV_WORKING_SETS
- 1];
260 i
= pct_times_10
* NUM_GCOV_WORKING_SETS
/ 1000;
262 return &gcov_working_sets
[0];
263 return &gcov_working_sets
[i
- 1];
266 /* Computes hybrid profile for all matching entries in da_file.
268 CFG_CHECKSUM is the precomputed checksum for the CFG. */
271 get_exec_counts (unsigned cfg_checksum
, unsigned lineno_checksum
)
273 unsigned num_edges
= 0;
277 /* Count the edges to be (possibly) instrumented. */
278 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
283 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
284 if (!EDGE_INFO (e
)->ignore
&& !EDGE_INFO (e
)->on_tree
)
288 counts
= get_coverage_counts (GCOV_COUNTER_ARCS
, num_edges
, cfg_checksum
,
289 lineno_checksum
, &profile_info
);
295 if (dump_file
&& profile_info
)
296 fprintf (dump_file
, "Merged %u profiles with maximal count %u.\n",
297 profile_info
->runs
, (unsigned) profile_info
->sum_max
);
304 is_edge_inconsistent (vec
<edge
, va_gc
> *edges
)
308 FOR_EACH_EDGE (e
, ei
, edges
)
310 if (!EDGE_INFO (e
)->ignore
)
313 && (!(e
->flags
& EDGE_FAKE
)
314 || !block_ends_with_call_p (e
->src
)))
319 "Edge %i->%i is inconsistent, count%" PRId64
,
320 e
->src
->index
, e
->dest
->index
, e
->count
);
321 dump_bb (dump_file
, e
->src
, 0, TDF_DETAILS
);
322 dump_bb (dump_file
, e
->dest
, 0, TDF_DETAILS
);
332 correct_negative_edge_counts (void)
338 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
340 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
348 /* Check consistency.
349 Return true if inconsistency is found. */
351 is_inconsistent (void)
354 bool inconsistent
= false;
355 FOR_EACH_BB_FN (bb
, cfun
)
357 inconsistent
|= is_edge_inconsistent (bb
->preds
);
358 if (!dump_file
&& inconsistent
)
360 inconsistent
|= is_edge_inconsistent (bb
->succs
);
361 if (!dump_file
&& inconsistent
)
367 fprintf (dump_file
, "BB %i count is negative "
371 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
375 if (bb
->count
!= sum_edge_counts (bb
->preds
))
379 fprintf (dump_file
, "BB %i count does not match sum of incoming edges "
380 "%" PRId64
" should be %" PRId64
,
383 sum_edge_counts (bb
->preds
));
384 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
388 if (bb
->count
!= sum_edge_counts (bb
->succs
) &&
389 ! (find_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
)) != NULL
390 && block_ends_with_call_p (bb
)))
394 fprintf (dump_file
, "BB %i count does not match sum of outgoing edges "
395 "%" PRId64
" should be %" PRId64
,
398 sum_edge_counts (bb
->succs
));
399 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
403 if (!dump_file
&& inconsistent
)
410 /* Set each basic block count to the sum of its outgoing edge counts */
415 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
417 bb
->count
= sum_edge_counts (bb
->succs
);
418 gcc_assert (bb
->count
>= 0);
422 /* Reads profile data and returns total number of edge counts read */
424 read_profile_edge_counts (gcov_type
*exec_counts
)
428 int exec_counts_pos
= 0;
429 /* For each edge not on the spanning tree, set its execution count from
431 /* The first count in the .da file is the number of times that the function
432 was entered. This is the exec_count for block zero. */
434 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
439 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
440 if (!EDGE_INFO (e
)->ignore
&& !EDGE_INFO (e
)->on_tree
)
445 e
->count
= exec_counts
[exec_counts_pos
++];
446 if (e
->count
> profile_info
->sum_max
)
448 if (flag_profile_correction
)
450 static bool informed
= 0;
451 if (dump_enabled_p () && !informed
)
452 dump_printf_loc (MSG_NOTE
, input_location
,
453 "corrupted profile info: edge count"
454 " exceeds maximal count\n");
458 error ("corrupted profile info: edge from %i to %i exceeds maximal count",
459 bb
->index
, e
->dest
->index
);
465 EDGE_INFO (e
)->count_valid
= 1;
466 BB_INFO (bb
)->succ_count
--;
467 BB_INFO (e
->dest
)->pred_count
--;
470 fprintf (dump_file
, "\nRead edge from %i to %i, count:",
471 bb
->index
, e
->dest
->index
);
472 fprintf (dump_file
, "%" PRId64
,
481 #define OVERLAP_BASE 10000
483 /* Compare the static estimated profile to the actual profile, and
484 return the "degree of overlap" measure between them.
486 Degree of overlap is a number between 0 and OVERLAP_BASE. It is
487 the sum of each basic block's minimum relative weights between
488 two profiles. And overlap of OVERLAP_BASE means two profiles are
492 compute_frequency_overlap (void)
494 gcov_type count_total
= 0, freq_total
= 0;
498 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
500 count_total
+= bb
->count
;
501 freq_total
+= bb
->frequency
;
504 if (count_total
== 0 || freq_total
== 0)
507 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
508 overlap
+= MIN (bb
->count
* OVERLAP_BASE
/ count_total
,
509 bb
->frequency
* OVERLAP_BASE
/ freq_total
);
514 /* Compute the branch probabilities for the various branches.
515 Annotate them accordingly.
517 CFG_CHECKSUM is the precomputed checksum for the CFG. */
520 compute_branch_probabilities (unsigned cfg_checksum
, unsigned lineno_checksum
)
527 int hist_br_prob
[20];
529 gcov_type
*exec_counts
= get_exec_counts (cfg_checksum
, lineno_checksum
);
530 int inconsistent
= 0;
532 /* Very simple sanity checks so we catch bugs in our profiling code. */
536 if (profile_info
->sum_all
< profile_info
->sum_max
)
538 error ("corrupted profile info: sum_all is smaller than sum_max");
542 /* Attach extra info block to each bb. */
543 alloc_aux_for_blocks (sizeof (struct bb_profile_info
));
544 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
549 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
550 if (!EDGE_INFO (e
)->ignore
)
551 BB_INFO (bb
)->succ_count
++;
552 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
553 if (!EDGE_INFO (e
)->ignore
)
554 BB_INFO (bb
)->pred_count
++;
557 /* Avoid predicting entry on exit nodes. */
558 BB_INFO (EXIT_BLOCK_PTR_FOR_FN (cfun
))->succ_count
= 2;
559 BB_INFO (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->pred_count
= 2;
561 num_edges
= read_profile_edge_counts (exec_counts
);
564 fprintf (dump_file
, "\n%d edge counts read\n", num_edges
);
566 /* For every block in the file,
567 - if every exit/entrance edge has a known count, then set the block count
568 - if the block count is known, and every exit/entrance edge but one has
569 a known execution count, then set the count of the remaining edge
571 As edge counts are set, decrement the succ/pred count, but don't delete
572 the edge, that way we can easily tell when all edges are known, or only
573 one edge is unknown. */
575 /* The order that the basic blocks are iterated through is important.
576 Since the code that finds spanning trees starts with block 0, low numbered
577 edges are put on the spanning tree in preference to high numbered edges.
578 Hence, most instrumented edges are at the end. Graph solving works much
579 faster if we propagate numbers from the end to the start.
581 This takes an average of slightly more than 3 passes. */
589 FOR_BB_BETWEEN (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), NULL
, prev_bb
)
591 struct bb_profile_info
*bi
= BB_INFO (bb
);
592 if (! bi
->count_valid
)
594 if (bi
->succ_count
== 0)
600 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
606 else if (bi
->pred_count
== 0)
612 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
621 if (bi
->succ_count
== 1)
627 /* One of the counts will be invalid, but it is zero,
628 so adding it in also doesn't hurt. */
629 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
632 /* Search for the invalid edge, and set its count. */
633 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
634 if (! EDGE_INFO (e
)->count_valid
&& ! EDGE_INFO (e
)->ignore
)
637 /* Calculate count for remaining edge by conservation. */
638 total
= bb
->count
- total
;
641 EDGE_INFO (e
)->count_valid
= 1;
645 BB_INFO (e
->dest
)->pred_count
--;
648 if (bi
->pred_count
== 1)
654 /* One of the counts will be invalid, but it is zero,
655 so adding it in also doesn't hurt. */
656 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
659 /* Search for the invalid edge, and set its count. */
660 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
661 if (!EDGE_INFO (e
)->count_valid
&& !EDGE_INFO (e
)->ignore
)
664 /* Calculate count for remaining edge by conservation. */
665 total
= bb
->count
- total
+ e
->count
;
668 EDGE_INFO (e
)->count_valid
= 1;
672 BB_INFO (e
->src
)->succ_count
--;
680 int overlap
= compute_frequency_overlap ();
681 gimple_dump_cfg (dump_file
, dump_flags
);
682 fprintf (dump_file
, "Static profile overlap: %d.%d%%\n",
683 overlap
/ (OVERLAP_BASE
/ 100),
684 overlap
% (OVERLAP_BASE
/ 100));
687 total_num_passes
+= passes
;
689 fprintf (dump_file
, "Graph solving took %d passes.\n\n", passes
);
691 /* If the graph has been correctly solved, every block will have a
692 succ and pred count of zero. */
693 FOR_EACH_BB_FN (bb
, cfun
)
695 gcc_assert (!BB_INFO (bb
)->succ_count
&& !BB_INFO (bb
)->pred_count
);
698 /* Check for inconsistent basic block counts */
699 inconsistent
= is_inconsistent ();
703 if (flag_profile_correction
)
705 /* Inconsistency detected. Make it flow-consistent. */
706 static int informed
= 0;
707 if (dump_enabled_p () && informed
== 0)
710 dump_printf_loc (MSG_NOTE
, input_location
,
711 "correcting inconsistent profile data\n");
713 correct_negative_edge_counts ();
714 /* Set bb counts to the sum of the outgoing edge counts */
717 fprintf (dump_file
, "\nCalling mcf_smooth_cfg\n");
721 error ("corrupted profile info: profile data is not flow-consistent");
724 /* For every edge, calculate its branch probability and add a reg_note
725 to the branch insn to indicate this. */
727 for (i
= 0; i
< 20; i
++)
731 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
738 error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
739 bb
->index
, (int)bb
->count
);
742 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
744 /* Function may return twice in the cased the called function is
745 setjmp or calls fork, but we can't represent this by extra
746 edge from the entry, since extra edge from the exit is
747 already present. We get negative frequency from the entry
750 && e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
751 || (e
->count
> bb
->count
752 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)))
754 if (block_ends_with_call_p (bb
))
755 e
->count
= e
->count
< 0 ? 0 : bb
->count
;
757 if (e
->count
< 0 || e
->count
> bb
->count
)
759 error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
760 e
->src
->index
, e
->dest
->index
,
762 e
->count
= bb
->count
/ 2;
767 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
768 e
->probability
= GCOV_COMPUTE_SCALE (e
->count
, bb
->count
);
769 if (bb
->index
>= NUM_FIXED_BLOCKS
770 && block_ends_with_condjump_p (bb
)
771 && EDGE_COUNT (bb
->succs
) >= 2)
777 /* Find the branch edge. It is possible that we do have fake
779 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
780 if (!(e
->flags
& (EDGE_FAKE
| EDGE_FALLTHRU
)))
783 prob
= e
->probability
;
784 index
= prob
* 20 / REG_BR_PROB_BASE
;
788 hist_br_prob
[index
]++;
793 /* As a last resort, distribute the probabilities evenly.
794 Use simple heuristics that if there are normal edges,
795 give all abnormals frequency of 0, otherwise distribute the
796 frequency over abnormals (this is the case of noreturn
798 else if (profile_status_for_fn (cfun
) == PROFILE_ABSENT
)
802 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
803 if (!(e
->flags
& (EDGE_COMPLEX
| EDGE_FAKE
)))
807 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
808 if (!(e
->flags
& (EDGE_COMPLEX
| EDGE_FAKE
)))
809 e
->probability
= REG_BR_PROB_BASE
/ total
;
815 total
+= EDGE_COUNT (bb
->succs
);
816 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
817 e
->probability
= REG_BR_PROB_BASE
/ total
;
819 if (bb
->index
>= NUM_FIXED_BLOCKS
820 && block_ends_with_condjump_p (bb
)
821 && EDGE_COUNT (bb
->succs
) >= 2)
829 fprintf (dump_file
, "%d branches\n", num_branches
);
831 for (i
= 0; i
< 10; i
++)
832 fprintf (dump_file
, "%d%% branches in range %d-%d%%\n",
833 (hist_br_prob
[i
] + hist_br_prob
[19-i
]) * 100 / num_branches
,
836 total_num_branches
+= num_branches
;
837 for (i
= 0; i
< 20; i
++)
838 total_hist_br_prob
[i
] += hist_br_prob
[i
];
840 fputc ('\n', dump_file
);
841 fputc ('\n', dump_file
);
844 free_aux_for_blocks ();
847 /* Load value histograms values whose description is stored in VALUES array
850 CFG_CHECKSUM is the precomputed checksum for the CFG. */
853 compute_value_histograms (histogram_values values
, unsigned cfg_checksum
,
854 unsigned lineno_checksum
)
856 unsigned i
, j
, t
, any
;
857 unsigned n_histogram_counters
[GCOV_N_VALUE_COUNTERS
];
858 gcov_type
*histogram_counts
[GCOV_N_VALUE_COUNTERS
];
859 gcov_type
*act_count
[GCOV_N_VALUE_COUNTERS
];
860 gcov_type
*aact_count
;
861 struct cgraph_node
*node
;
863 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
864 n_histogram_counters
[t
] = 0;
866 for (i
= 0; i
< values
.length (); i
++)
868 histogram_value hist
= values
[i
];
869 n_histogram_counters
[(int) hist
->type
] += hist
->n_counters
;
873 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
875 if (!n_histogram_counters
[t
])
877 histogram_counts
[t
] = NULL
;
881 histogram_counts
[t
] =
882 get_coverage_counts (COUNTER_FOR_HIST_TYPE (t
),
883 n_histogram_counters
[t
], cfg_checksum
,
884 lineno_checksum
, NULL
);
885 if (histogram_counts
[t
])
887 act_count
[t
] = histogram_counts
[t
];
892 for (i
= 0; i
< values
.length (); i
++)
894 histogram_value hist
= values
[i
];
895 gimple
*stmt
= hist
->hvalue
.stmt
;
897 t
= (int) hist
->type
;
899 aact_count
= act_count
[t
];
902 act_count
[t
] += hist
->n_counters
;
904 gimple_add_histogram_value (cfun
, stmt
, hist
);
905 hist
->hvalue
.counters
= XNEWVEC (gcov_type
, hist
->n_counters
);
906 for (j
= 0; j
< hist
->n_counters
; j
++)
908 hist
->hvalue
.counters
[j
] = aact_count
[j
];
910 hist
->hvalue
.counters
[j
] = 0;
912 /* Time profiler counter is not related to any statement,
913 so that we have to read the counter and set the value to
914 the corresponding call graph node. */
915 if (hist
->type
== HIST_TYPE_TIME_PROFILE
)
917 node
= cgraph_node::get (hist
->fun
->decl
);
918 node
->tp_first_run
= hist
->hvalue
.counters
[0];
921 fprintf (dump_file
, "Read tp_first_run: %d\n", node
->tp_first_run
);
925 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
926 free (histogram_counts
[t
]);
929 /* When passed NULL as file_name, initialize.
930 When passed something else, output the necessary commands to change
931 line to LINE and offset to FILE_NAME. */
933 output_location (char const *file_name
, int line
,
934 gcov_position_t
*offset
, basic_block bb
)
936 static char const *prev_file_name
;
937 static int prev_line
;
938 bool name_differs
, line_differs
;
942 prev_file_name
= NULL
;
947 name_differs
= !prev_file_name
|| filename_cmp (file_name
, prev_file_name
);
948 line_differs
= prev_line
!= line
;
950 if (name_differs
|| line_differs
)
954 *offset
= gcov_write_tag (GCOV_TAG_LINES
);
955 gcov_write_unsigned (bb
->index
);
956 name_differs
= line_differs
=true;
959 /* If this is a new source file, then output the
960 file's name to the .bb file. */
963 prev_file_name
= file_name
;
964 gcov_write_unsigned (0);
965 gcov_write_string (prev_file_name
);
969 gcov_write_unsigned (line
);
975 /* Instrument and/or analyze program behavior based on program the CFG.
977 This function creates a representation of the control flow graph (of
978 the function being compiled) that is suitable for the instrumentation
979 of edges and/or converting measured edge counts to counts on the
982 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
983 the flow graph that are needed to reconstruct the dynamic behavior of the
984 flow graph. This data is written to the gcno file for gcov.
986 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
987 information from the gcda file containing edge count information from
988 previous executions of the function being compiled. In this case, the
989 control flow graph is annotated with actual execution counts by
990 compute_branch_probabilities().
992 Main entry point of this file. */
999 unsigned num_edges
, ignored_edges
;
1000 unsigned num_instrumented
;
1001 struct edge_list
*el
;
1002 histogram_values values
= histogram_values ();
1003 unsigned cfg_checksum
, lineno_checksum
;
1005 total_num_times_called
++;
1007 flow_call_edges_add (NULL
);
1008 add_noreturn_fake_exit_edges ();
1010 /* We can't handle cyclic regions constructed using abnormal edges.
1011 To avoid these we replace every source of abnormal edge by a fake
1012 edge from entry node and every destination by fake edge to exit.
1013 This keeps graph acyclic and our calculation exact for all normal
1014 edges except for exit and entrance ones.
1016 We also add fake exit edges for each call and asm statement in the
1017 basic, since it may not return. */
1019 FOR_EACH_BB_FN (bb
, cfun
)
1021 int need_exit_edge
= 0, need_entry_edge
= 0;
1022 int have_exit_edge
= 0, have_entry_edge
= 0;
1026 /* Functions returning multiple times are not handled by extra edges.
1027 Instead we simply allow negative counts on edges from exit to the
1028 block past call and corresponding probabilities. We can't go
1029 with the extra edges because that would result in flowgraph that
1030 needs to have fake edges outside the spanning tree. */
1032 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1034 gimple_stmt_iterator gsi
;
1035 gimple
*last
= NULL
;
1037 /* It may happen that there are compiler generated statements
1038 without a locus at all. Go through the basic block from the
1039 last to the first statement looking for a locus. */
1040 for (gsi
= gsi_last_nondebug_bb (bb
);
1042 gsi_prev_nondebug (&gsi
))
1044 last
= gsi_stmt (gsi
);
1045 if (!RESERVED_LOCATION_P (gimple_location (last
)))
1049 /* Edge with goto locus might get wrong coverage info unless
1050 it is the only edge out of BB.
1051 Don't do that when the locuses match, so
1052 if (blah) goto something;
1053 is not computed twice. */
1055 && gimple_has_location (last
)
1056 && !RESERVED_LOCATION_P (e
->goto_locus
)
1057 && !single_succ_p (bb
)
1058 && (LOCATION_FILE (e
->goto_locus
)
1059 != LOCATION_FILE (gimple_location (last
))
1060 || (LOCATION_LINE (e
->goto_locus
)
1061 != LOCATION_LINE (gimple_location (last
)))))
1063 basic_block new_bb
= split_edge (e
);
1064 edge ne
= single_succ_edge (new_bb
);
1065 ne
->goto_locus
= e
->goto_locus
;
1067 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1068 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1070 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1073 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1075 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1076 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1077 need_entry_edge
= 1;
1078 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1079 have_entry_edge
= 1;
1082 if (need_exit_edge
&& !have_exit_edge
)
1085 fprintf (dump_file
, "Adding fake exit edge to bb %i\n",
1087 make_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), EDGE_FAKE
);
1089 if (need_entry_edge
&& !have_entry_edge
)
1092 fprintf (dump_file
, "Adding fake entry edge to bb %i\n",
1094 make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
), bb
, EDGE_FAKE
);
1095 /* Avoid bbs that have both fake entry edge and also some
1096 exit edge. One of those edges wouldn't be added to the
1097 spanning tree, but we can't instrument any of them. */
1098 if (have_exit_edge
|| need_exit_edge
)
1100 gimple_stmt_iterator gsi
;
1103 gsi
= gsi_start_nondebug_after_labels_bb (bb
);
1104 gcc_checking_assert (!gsi_end_p (gsi
));
1105 first
= gsi_stmt (gsi
);
1106 /* Don't split the bbs containing __builtin_setjmp_receiver
1107 or ABNORMAL_DISPATCHER calls. These are very
1108 special and don't expect anything to be inserted before
1110 if (is_gimple_call (first
)
1111 && (gimple_call_builtin_p (first
, BUILT_IN_SETJMP_RECEIVER
)
1112 || (gimple_call_flags (first
) & ECF_RETURNS_TWICE
)
1113 || (gimple_call_internal_p (first
)
1114 && (gimple_call_internal_fn (first
)
1115 == IFN_ABNORMAL_DISPATCHER
))))
1119 fprintf (dump_file
, "Splitting bb %i after labels\n",
1121 split_block_after_labels (bb
);
1126 el
= create_edge_list ();
1127 num_edges
= NUM_EDGES (el
);
1128 alloc_aux_for_edges (sizeof (struct edge_profile_info
));
1130 /* The basic blocks are expected to be numbered sequentially. */
1134 for (i
= 0 ; i
< num_edges
; i
++)
1136 edge e
= INDEX_EDGE (el
, i
);
1139 /* Mark edges we've replaced by fake edges above as ignored. */
1140 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1141 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
1142 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1144 EDGE_INFO (e
)->ignore
= 1;
1149 /* Create spanning tree from basic block graph, mark each edge that is
1150 on the spanning tree. We insert as many abnormal and critical edges
1151 as possible to minimize number of edge splits necessary. */
1153 find_spanning_tree (el
);
1155 /* Fake edges that are not on the tree will not be instrumented, so
1156 mark them ignored. */
1157 for (num_instrumented
= i
= 0; i
< num_edges
; i
++)
1159 edge e
= INDEX_EDGE (el
, i
);
1160 struct edge_profile_info
*inf
= EDGE_INFO (e
);
1162 if (inf
->ignore
|| inf
->on_tree
)
1164 else if (e
->flags
& EDGE_FAKE
)
1173 total_num_blocks
+= n_basic_blocks_for_fn (cfun
);
1175 fprintf (dump_file
, "%d basic blocks\n", n_basic_blocks_for_fn (cfun
));
1177 total_num_edges
+= num_edges
;
1179 fprintf (dump_file
, "%d edges\n", num_edges
);
1181 total_num_edges_ignored
+= ignored_edges
;
1183 fprintf (dump_file
, "%d ignored edges\n", ignored_edges
);
1185 total_num_edges_instrumented
+= num_instrumented
;
1187 fprintf (dump_file
, "%d instrumentation edges\n", num_instrumented
);
1189 /* Compute two different checksums. Note that we want to compute
1190 the checksum in only once place, since it depends on the shape
1191 of the control flow which can change during
1192 various transformations. */
1193 cfg_checksum
= coverage_compute_cfg_checksum (cfun
);
1194 lineno_checksum
= coverage_compute_lineno_checksum ();
1196 /* Write the data from which gcov can reconstruct the basic block
1197 graph and function line numbers (the gcno file). */
1198 if (coverage_begin_function (lineno_checksum
, cfg_checksum
))
1200 gcov_position_t offset
;
1202 /* Basic block flags */
1203 offset
= gcov_write_tag (GCOV_TAG_BLOCKS
);
1204 for (i
= 0; i
!= (unsigned) (n_basic_blocks_for_fn (cfun
)); i
++)
1205 gcov_write_unsigned (0);
1206 gcov_write_length (offset
);
1209 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
),
1210 EXIT_BLOCK_PTR_FOR_FN (cfun
), next_bb
)
1215 offset
= gcov_write_tag (GCOV_TAG_ARCS
);
1216 gcov_write_unsigned (bb
->index
);
1218 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1220 struct edge_profile_info
*i
= EDGE_INFO (e
);
1223 unsigned flag_bits
= 0;
1226 flag_bits
|= GCOV_ARC_ON_TREE
;
1227 if (e
->flags
& EDGE_FAKE
)
1228 flag_bits
|= GCOV_ARC_FAKE
;
1229 if (e
->flags
& EDGE_FALLTHRU
)
1230 flag_bits
|= GCOV_ARC_FALLTHROUGH
;
1231 /* On trees we don't have fallthru flags, but we can
1232 recompute them from CFG shape. */
1233 if (e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)
1234 && e
->src
->next_bb
== e
->dest
)
1235 flag_bits
|= GCOV_ARC_FALLTHROUGH
;
1237 gcov_write_unsigned (e
->dest
->index
);
1238 gcov_write_unsigned (flag_bits
);
1242 gcov_write_length (offset
);
1246 /* Initialize the output. */
1247 output_location (NULL
, 0, NULL
, NULL
);
1249 FOR_EACH_BB_FN (bb
, cfun
)
1251 gimple_stmt_iterator gsi
;
1252 gcov_position_t offset
= 0;
1254 if (bb
== ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
)
1256 expanded_location curr_location
=
1257 expand_location (DECL_SOURCE_LOCATION (current_function_decl
));
1258 output_location (curr_location
.file
, curr_location
.line
,
1262 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1264 gimple
*stmt
= gsi_stmt (gsi
);
1265 if (!RESERVED_LOCATION_P (gimple_location (stmt
)))
1266 output_location (gimple_filename (stmt
), gimple_lineno (stmt
),
1270 /* Notice GOTO expressions eliminated while constructing the CFG. */
1271 if (single_succ_p (bb
)
1272 && !RESERVED_LOCATION_P (single_succ_edge (bb
)->goto_locus
))
1274 expanded_location curr_location
1275 = expand_location (single_succ_edge (bb
)->goto_locus
);
1276 output_location (curr_location
.file
, curr_location
.line
,
1282 /* A file of NULL indicates the end of run. */
1283 gcov_write_unsigned (0);
1284 gcov_write_string (NULL
);
1285 gcov_write_length (offset
);
1290 if (flag_profile_values
)
1291 gimple_find_values_to_profile (&values
);
1293 if (flag_branch_probabilities
)
1295 compute_branch_probabilities (cfg_checksum
, lineno_checksum
);
1296 if (flag_profile_values
)
1297 compute_value_histograms (values
, cfg_checksum
, lineno_checksum
);
1300 remove_fake_edges ();
1302 /* For each edge not on the spanning tree, add counting code. */
1303 if (profile_arc_flag
1304 && coverage_counter_alloc (GCOV_COUNTER_ARCS
, num_instrumented
))
1306 unsigned n_instrumented
;
1308 gimple_init_edge_profiler ();
1310 n_instrumented
= instrument_edges (el
);
1312 gcc_assert (n_instrumented
== num_instrumented
);
1314 if (flag_profile_values
)
1315 instrument_values (values
);
1317 /* Commit changes done by instrumentation. */
1318 gsi_commit_edge_inserts ();
1321 free_aux_for_edges ();
1324 free_edge_list (el
);
1325 coverage_end_function (lineno_checksum
, cfg_checksum
);
1326 if (flag_branch_probabilities
&& profile_info
)
1329 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1330 report_predictor_hitrates ();
1331 profile_status_for_fn (cfun
) = PROFILE_READ
;
1333 /* At this moment we have precise loop iteration count estimates.
1334 Record them to loop structure before the profile gets out of date. */
1335 FOR_EACH_LOOP (loop
, 0)
1336 if (loop
->header
->count
)
1338 gcov_type nit
= expected_loop_iterations_unbounded (loop
);
1339 widest_int bound
= gcov_type_to_wide_int (nit
);
1340 loop
->any_estimate
= false;
1341 record_niter_bound (loop
, bound
, true, false);
1343 compute_function_frequency ();
1347 /* Union find algorithm implementation for the basic blocks using
1351 find_group (basic_block bb
)
1353 basic_block group
= bb
, bb1
;
1355 while ((basic_block
) group
->aux
!= group
)
1356 group
= (basic_block
) group
->aux
;
1358 /* Compress path. */
1359 while ((basic_block
) bb
->aux
!= group
)
1361 bb1
= (basic_block
) bb
->aux
;
1362 bb
->aux
= (void *) group
;
1369 union_groups (basic_block bb1
, basic_block bb2
)
1371 basic_block bb1g
= find_group (bb1
);
1372 basic_block bb2g
= find_group (bb2
);
1374 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
1375 this code is unlikely going to be performance problem anyway. */
1376 gcc_assert (bb1g
!= bb2g
);
1381 /* This function searches all of the edges in the program flow graph, and puts
1382 as many bad edges as possible onto the spanning tree. Bad edges include
1383 abnormals edges, which can't be instrumented at the moment. Since it is
1384 possible for fake edges to form a cycle, we will have to develop some
1385 better way in the future. Also put critical edges to the tree, since they
1386 are more expensive to instrument. */
1389 find_spanning_tree (struct edge_list
*el
)
1392 int num_edges
= NUM_EDGES (el
);
1395 /* We use aux field for standard union-find algorithm. */
1396 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
1399 /* Add fake edge exit to entry we can't instrument. */
1400 union_groups (EXIT_BLOCK_PTR_FOR_FN (cfun
), ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1402 /* First add all abnormal edges to the tree unless they form a cycle. Also
1403 add all edges to the exit block to avoid inserting profiling code behind
1404 setting return value from function. */
1405 for (i
= 0; i
< num_edges
; i
++)
1407 edge e
= INDEX_EDGE (el
, i
);
1408 if (((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
| EDGE_FAKE
))
1409 || e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1410 && !EDGE_INFO (e
)->ignore
1411 && (find_group (e
->src
) != find_group (e
->dest
)))
1414 fprintf (dump_file
, "Abnormal edge %d to %d put to tree\n",
1415 e
->src
->index
, e
->dest
->index
);
1416 EDGE_INFO (e
)->on_tree
= 1;
1417 union_groups (e
->src
, e
->dest
);
1421 /* Now insert all critical edges to the tree unless they form a cycle. */
1422 for (i
= 0; i
< num_edges
; i
++)
1424 edge e
= INDEX_EDGE (el
, i
);
1425 if (EDGE_CRITICAL_P (e
) && !EDGE_INFO (e
)->ignore
1426 && find_group (e
->src
) != find_group (e
->dest
))
1429 fprintf (dump_file
, "Critical edge %d to %d put to tree\n",
1430 e
->src
->index
, e
->dest
->index
);
1431 EDGE_INFO (e
)->on_tree
= 1;
1432 union_groups (e
->src
, e
->dest
);
1436 /* And now the rest. */
1437 for (i
= 0; i
< num_edges
; i
++)
1439 edge e
= INDEX_EDGE (el
, i
);
1440 if (!EDGE_INFO (e
)->ignore
1441 && find_group (e
->src
) != find_group (e
->dest
))
1444 fprintf (dump_file
, "Normal edge %d to %d put to tree\n",
1445 e
->src
->index
, e
->dest
->index
);
1446 EDGE_INFO (e
)->on_tree
= 1;
1447 union_groups (e
->src
, e
->dest
);
1451 clear_aux_for_blocks ();
1454 /* Perform file-level initialization for branch-prob processing. */
1457 init_branch_prob (void)
1461 total_num_blocks
= 0;
1462 total_num_edges
= 0;
1463 total_num_edges_ignored
= 0;
1464 total_num_edges_instrumented
= 0;
1465 total_num_blocks_created
= 0;
1466 total_num_passes
= 0;
1467 total_num_times_called
= 0;
1468 total_num_branches
= 0;
1469 for (i
= 0; i
< 20; i
++)
1470 total_hist_br_prob
[i
] = 0;
1473 /* Performs file-level cleanup after branch-prob processing
1477 end_branch_prob (void)
1481 fprintf (dump_file
, "\n");
1482 fprintf (dump_file
, "Total number of blocks: %d\n",
1484 fprintf (dump_file
, "Total number of edges: %d\n", total_num_edges
);
1485 fprintf (dump_file
, "Total number of ignored edges: %d\n",
1486 total_num_edges_ignored
);
1487 fprintf (dump_file
, "Total number of instrumented edges: %d\n",
1488 total_num_edges_instrumented
);
1489 fprintf (dump_file
, "Total number of blocks created: %d\n",
1490 total_num_blocks_created
);
1491 fprintf (dump_file
, "Total number of graph solution passes: %d\n",
1493 if (total_num_times_called
!= 0)
1494 fprintf (dump_file
, "Average number of graph solution passes: %d\n",
1495 (total_num_passes
+ (total_num_times_called
>> 1))
1496 / total_num_times_called
);
1497 fprintf (dump_file
, "Total number of branches: %d\n",
1498 total_num_branches
);
1499 if (total_num_branches
)
1503 for (i
= 0; i
< 10; i
++)
1504 fprintf (dump_file
, "%d%% branches in range %d-%d%%\n",
1505 (total_hist_br_prob
[i
] + total_hist_br_prob
[19-i
]) * 100
1506 / total_num_branches
, 5*i
, 5*i
+5);