1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990-2019 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Generate basic block profile instrumentation and auxiliary files.
24 Profile generation is optimized, so that not all arcs in the basic
25 block graph need instrumenting. First, the BB graph is closed with
26 one entry (function start), and one exit (function exit). Any
27 ABNORMAL_EDGE cannot be instrumented (because there is no control
28 path to place the code). We close the graph by inserting fake
29 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
30 edges that do not go to the exit_block. We ignore such abnormal
31 edges. Naturally these fake edges are never directly traversed,
32 and so *cannot* be directly instrumented. Some other graph
33 massaging is done. To optimize the instrumentation we generate the
34 BB minimal span tree, only edges that are not on the span tree
35 (plus the entry point) need instrumenting. From that information
36 all other edge counts can be deduced. By construction all fake
37 edges must be on the spanning tree. We also attempt to place
38 EDGE_CRITICAL edges on the spanning tree.
40 The auxiliary files generated are <dumpbase>.gcno (at compile time)
41 and <dumpbase>.gcda (at run time). The format is
42 described in full in gcov-io.h. */
44 /* ??? Register allocation should use basic block execution counts to
45 give preference to the most commonly executed blocks. */
47 /* ??? Should calculate branch probabilities before instrumenting code, since
48 then we can use arc counts to help decide which arcs to instrument. */
52 #include "coretypes.h"
60 #include "diagnostic-core.h"
62 #include "value-prof.h"
63 #include "gimple-iterator.h"
70 /* Map from BBs/edges to gcov counters. */
71 vec
<gcov_type
> bb_gcov_counts
;
72 hash_map
<edge
,gcov_type
> *edge_gcov_counts
;
74 struct bb_profile_info
{
75 unsigned int count_valid
: 1;
77 /* Number of successor and predecessor edges. */
82 #define BB_INFO(b) ((struct bb_profile_info *) (b)->aux)
85 /* Counter summary from the last set of coverage counts read. */
87 gcov_summary
*profile_info
;
89 /* Collect statistics on the performance of this pass for the entire source
92 static int total_num_blocks
;
93 static int total_num_edges
;
94 static int total_num_edges_ignored
;
95 static int total_num_edges_instrumented
;
96 static int total_num_blocks_created
;
97 static int total_num_passes
;
98 static int total_num_times_called
;
99 static int total_hist_br_prob
[20];
100 static int total_num_branches
;
102 /* Forward declarations. */
103 static void find_spanning_tree (struct edge_list
*);
105 /* Add edge instrumentation code to the entire insn chain.
107 F is the first insn of the chain.
108 NUM_BLOCKS is the number of basic blocks found in F. */
111 instrument_edges (struct edge_list
*el
)
113 unsigned num_instr_edges
= 0;
114 int num_edges
= NUM_EDGES (el
);
117 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
122 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
124 struct edge_profile_info
*inf
= EDGE_INFO (e
);
126 if (!inf
->ignore
&& !inf
->on_tree
)
128 gcc_assert (!(e
->flags
& EDGE_ABNORMAL
));
130 fprintf (dump_file
, "Edge %d to %d instrumented%s\n",
131 e
->src
->index
, e
->dest
->index
,
132 EDGE_CRITICAL_P (e
) ? " (and split)" : "");
133 gimple_gen_edge_profiler (num_instr_edges
++, e
);
138 total_num_blocks_created
+= num_edges
;
140 fprintf (dump_file
, "%d edges instrumented\n", num_instr_edges
);
141 return num_instr_edges
;
144 /* Add code to measure histograms for values in list VALUES. */
146 instrument_values (histogram_values values
)
150 /* Emit code to generate the histograms before the insns. */
152 for (i
= 0; i
< values
.length (); i
++)
154 histogram_value hist
= values
[i
];
155 unsigned t
= COUNTER_FOR_HIST_TYPE (hist
->type
);
157 if (!coverage_counter_alloc (t
, hist
->n_counters
))
162 case HIST_TYPE_INTERVAL
:
163 gimple_gen_interval_profiler (hist
, t
, 0);
167 gimple_gen_pow2_profiler (hist
, t
, 0);
170 case HIST_TYPE_TOPN_VALUES
:
171 gimple_gen_topn_values_profiler (hist
, t
, 0);
174 case HIST_TYPE_INDIR_CALL
:
175 gimple_gen_ic_profiler (hist
, t
, 0);
178 case HIST_TYPE_AVERAGE
:
179 gimple_gen_average_profiler (hist
, t
, 0);
183 gimple_gen_ior_profiler (hist
, t
, 0);
186 case HIST_TYPE_TIME_PROFILE
:
187 gimple_gen_time_profiler (t
, 0);
197 /* Computes hybrid profile for all matching entries in da_file.
199 CFG_CHECKSUM is the precomputed checksum for the CFG. */
202 get_exec_counts (unsigned cfg_checksum
, unsigned lineno_checksum
)
204 unsigned num_edges
= 0;
208 /* Count the edges to be (possibly) instrumented. */
209 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
214 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
215 if (!EDGE_INFO (e
)->ignore
&& !EDGE_INFO (e
)->on_tree
)
219 counts
= get_coverage_counts (GCOV_COUNTER_ARCS
, cfg_checksum
,
220 lineno_checksum
, num_edges
);
228 is_edge_inconsistent (vec
<edge
, va_gc
> *edges
)
232 FOR_EACH_EDGE (e
, ei
, edges
)
234 if (!EDGE_INFO (e
)->ignore
)
236 if (edge_gcov_count (e
) < 0
237 && (!(e
->flags
& EDGE_FAKE
)
238 || !block_ends_with_call_p (e
->src
)))
243 "Edge %i->%i is inconsistent, count%" PRId64
,
244 e
->src
->index
, e
->dest
->index
, edge_gcov_count (e
));
245 dump_bb (dump_file
, e
->src
, 0, TDF_DETAILS
);
246 dump_bb (dump_file
, e
->dest
, 0, TDF_DETAILS
);
256 correct_negative_edge_counts (void)
262 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
264 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
266 if (edge_gcov_count (e
) < 0)
267 edge_gcov_count (e
) = 0;
272 /* Check consistency.
273 Return true if inconsistency is found. */
275 is_inconsistent (void)
278 bool inconsistent
= false;
279 FOR_EACH_BB_FN (bb
, cfun
)
281 inconsistent
|= is_edge_inconsistent (bb
->preds
);
282 if (!dump_file
&& inconsistent
)
284 inconsistent
|= is_edge_inconsistent (bb
->succs
);
285 if (!dump_file
&& inconsistent
)
287 if (bb_gcov_count (bb
) < 0)
291 fprintf (dump_file
, "BB %i count is negative "
295 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
299 if (bb_gcov_count (bb
) != sum_edge_counts (bb
->preds
))
303 fprintf (dump_file
, "BB %i count does not match sum of incoming edges "
304 "%" PRId64
" should be %" PRId64
,
307 sum_edge_counts (bb
->preds
));
308 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
312 if (bb_gcov_count (bb
) != sum_edge_counts (bb
->succs
) &&
313 ! (find_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
)) != NULL
314 && block_ends_with_call_p (bb
)))
318 fprintf (dump_file
, "BB %i count does not match sum of outgoing edges "
319 "%" PRId64
" should be %" PRId64
,
322 sum_edge_counts (bb
->succs
));
323 dump_bb (dump_file
, bb
, 0, TDF_DETAILS
);
327 if (!dump_file
&& inconsistent
)
334 /* Set each basic block count to the sum of its outgoing edge counts */
339 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
341 bb_gcov_count (bb
) = sum_edge_counts (bb
->succs
);
342 gcc_assert (bb_gcov_count (bb
) >= 0);
346 /* Reads profile data and returns total number of edge counts read */
348 read_profile_edge_counts (gcov_type
*exec_counts
)
352 int exec_counts_pos
= 0;
353 /* For each edge not on the spanning tree, set its execution count from
355 /* The first count in the .da file is the number of times that the function
356 was entered. This is the exec_count for block zero. */
358 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
363 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
364 if (!EDGE_INFO (e
)->ignore
&& !EDGE_INFO (e
)->on_tree
)
368 edge_gcov_count (e
) = exec_counts
[exec_counts_pos
++];
370 edge_gcov_count (e
) = 0;
372 EDGE_INFO (e
)->count_valid
= 1;
373 BB_INFO (bb
)->succ_count
--;
374 BB_INFO (e
->dest
)->pred_count
--;
377 fprintf (dump_file
, "\nRead edge from %i to %i, count:",
378 bb
->index
, e
->dest
->index
);
379 fprintf (dump_file
, "%" PRId64
,
380 (int64_t) edge_gcov_count (e
));
389 /* Compute the branch probabilities for the various branches.
390 Annotate them accordingly.
392 CFG_CHECKSUM is the precomputed checksum for the CFG. */
395 compute_branch_probabilities (unsigned cfg_checksum
, unsigned lineno_checksum
)
402 int hist_br_prob
[20];
404 gcov_type
*exec_counts
= get_exec_counts (cfg_checksum
, lineno_checksum
);
405 int inconsistent
= 0;
407 /* Very simple sanity checks so we catch bugs in our profiling code. */
411 fprintf (dump_file
, "Profile info is missing; giving up\n");
415 bb_gcov_counts
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
416 edge_gcov_counts
= new hash_map
<edge
,gcov_type
>;
418 /* Attach extra info block to each bb. */
419 alloc_aux_for_blocks (sizeof (struct bb_profile_info
));
420 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
425 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
426 if (!EDGE_INFO (e
)->ignore
)
427 BB_INFO (bb
)->succ_count
++;
428 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
429 if (!EDGE_INFO (e
)->ignore
)
430 BB_INFO (bb
)->pred_count
++;
433 /* Avoid predicting entry on exit nodes. */
434 BB_INFO (EXIT_BLOCK_PTR_FOR_FN (cfun
))->succ_count
= 2;
435 BB_INFO (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->pred_count
= 2;
437 num_edges
= read_profile_edge_counts (exec_counts
);
440 fprintf (dump_file
, "\n%d edge counts read\n", num_edges
);
442 /* For every block in the file,
443 - if every exit/entrance edge has a known count, then set the block count
444 - if the block count is known, and every exit/entrance edge but one has
445 a known execution count, then set the count of the remaining edge
447 As edge counts are set, decrement the succ/pred count, but don't delete
448 the edge, that way we can easily tell when all edges are known, or only
449 one edge is unknown. */
451 /* The order that the basic blocks are iterated through is important.
452 Since the code that finds spanning trees starts with block 0, low numbered
453 edges are put on the spanning tree in preference to high numbered edges.
454 Hence, most instrumented edges are at the end. Graph solving works much
455 faster if we propagate numbers from the end to the start.
457 This takes an average of slightly more than 3 passes. */
465 FOR_BB_BETWEEN (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), NULL
, prev_bb
)
467 struct bb_profile_info
*bi
= BB_INFO (bb
);
468 if (! bi
->count_valid
)
470 if (bi
->succ_count
== 0)
476 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
477 total
+= edge_gcov_count (e
);
478 bb_gcov_count (bb
) = total
;
482 else if (bi
->pred_count
== 0)
488 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
489 total
+= edge_gcov_count (e
);
490 bb_gcov_count (bb
) = total
;
497 if (bi
->succ_count
== 1)
503 /* One of the counts will be invalid, but it is zero,
504 so adding it in also doesn't hurt. */
505 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
506 total
+= edge_gcov_count (e
);
508 /* Search for the invalid edge, and set its count. */
509 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
510 if (! EDGE_INFO (e
)->count_valid
&& ! EDGE_INFO (e
)->ignore
)
513 /* Calculate count for remaining edge by conservation. */
514 total
= bb_gcov_count (bb
) - total
;
517 EDGE_INFO (e
)->count_valid
= 1;
518 edge_gcov_count (e
) = total
;
521 BB_INFO (e
->dest
)->pred_count
--;
524 if (bi
->pred_count
== 1)
530 /* One of the counts will be invalid, but it is zero,
531 so adding it in also doesn't hurt. */
532 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
533 total
+= edge_gcov_count (e
);
535 /* Search for the invalid edge, and set its count. */
536 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
537 if (!EDGE_INFO (e
)->count_valid
&& !EDGE_INFO (e
)->ignore
)
540 /* Calculate count for remaining edge by conservation. */
541 total
= bb_gcov_count (bb
) - total
+ edge_gcov_count (e
);
544 EDGE_INFO (e
)->count_valid
= 1;
545 edge_gcov_count (e
) = total
;
548 BB_INFO (e
->src
)->succ_count
--;
555 total_num_passes
+= passes
;
557 fprintf (dump_file
, "Graph solving took %d passes.\n\n", passes
);
559 /* If the graph has been correctly solved, every block will have a
560 succ and pred count of zero. */
561 FOR_EACH_BB_FN (bb
, cfun
)
563 gcc_assert (!BB_INFO (bb
)->succ_count
&& !BB_INFO (bb
)->pred_count
);
566 /* Check for inconsistent basic block counts */
567 inconsistent
= is_inconsistent ();
571 if (flag_profile_correction
)
573 /* Inconsistency detected. Make it flow-consistent. */
574 static int informed
= 0;
575 if (dump_enabled_p () && informed
== 0)
578 dump_printf_loc (MSG_NOTE
,
579 dump_user_location_t::from_location_t (input_location
),
580 "correcting inconsistent profile data\n");
582 correct_negative_edge_counts ();
583 /* Set bb counts to the sum of the outgoing edge counts */
586 fprintf (dump_file
, "\nCalling mcf_smooth_cfg\n");
590 error ("corrupted profile info: profile data is not flow-consistent");
593 /* For every edge, calculate its branch probability and add a reg_note
594 to the branch insn to indicate this. */
596 for (i
= 0; i
< 20; i
++)
600 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
605 if (bb_gcov_count (bb
) < 0)
607 error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
608 bb
->index
, (int)bb_gcov_count (bb
));
609 bb_gcov_count (bb
) = 0;
611 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
613 /* Function may return twice in the cased the called function is
614 setjmp or calls fork, but we can't represent this by extra
615 edge from the entry, since extra edge from the exit is
616 already present. We get negative frequency from the entry
618 if ((edge_gcov_count (e
) < 0
619 && e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
620 || (edge_gcov_count (e
) > bb_gcov_count (bb
)
621 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)))
623 if (block_ends_with_call_p (bb
))
624 edge_gcov_count (e
) = edge_gcov_count (e
) < 0
625 ? 0 : bb_gcov_count (bb
);
627 if (edge_gcov_count (e
) < 0
628 || edge_gcov_count (e
) > bb_gcov_count (bb
))
630 error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
631 e
->src
->index
, e
->dest
->index
,
632 (int)edge_gcov_count (e
));
633 edge_gcov_count (e
) = bb_gcov_count (bb
) / 2;
636 if (bb_gcov_count (bb
))
638 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
639 e
->probability
= profile_probability::probability_in_gcov_type
640 (edge_gcov_count (e
), bb_gcov_count (bb
));
641 if (bb
->index
>= NUM_FIXED_BLOCKS
642 && block_ends_with_condjump_p (bb
)
643 && EDGE_COUNT (bb
->succs
) >= 2)
649 /* Find the branch edge. It is possible that we do have fake
651 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
652 if (!(e
->flags
& (EDGE_FAKE
| EDGE_FALLTHRU
)))
655 prob
= e
->probability
.to_reg_br_prob_base ();
656 index
= prob
* 20 / REG_BR_PROB_BASE
;
660 hist_br_prob
[index
]++;
665 /* As a last resort, distribute the probabilities evenly.
666 Use simple heuristics that if there are normal edges,
667 give all abnormals frequency of 0, otherwise distribute the
668 frequency over abnormals (this is the case of noreturn
670 else if (profile_status_for_fn (cfun
) == PROFILE_ABSENT
)
674 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
675 if (!(e
->flags
& (EDGE_COMPLEX
| EDGE_FAKE
)))
679 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
680 if (!(e
->flags
& (EDGE_COMPLEX
| EDGE_FAKE
)))
682 = profile_probability::guessed_always ().apply_scale (1, total
);
684 e
->probability
= profile_probability::never ();
688 total
+= EDGE_COUNT (bb
->succs
);
689 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
691 = profile_probability::guessed_always ().apply_scale (1, total
);
693 if (bb
->index
>= NUM_FIXED_BLOCKS
694 && block_ends_with_condjump_p (bb
)
695 && EDGE_COUNT (bb
->succs
) >= 2)
701 profile_status_for_fn (cfun
) = PROFILE_READ
;
703 /* If we have real data, use them! */
704 if (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun
))
705 || !flag_guess_branch_prob
)
706 FOR_ALL_BB_FN (bb
, cfun
)
707 bb
->count
= profile_count::from_gcov_type (bb_gcov_count (bb
));
708 /* If function was not trained, preserve local estimates including statically
709 determined zero counts. */
710 else if (profile_status_for_fn (cfun
) == PROFILE_READ
)
711 FOR_ALL_BB_FN (bb
, cfun
)
712 if (!(bb
->count
== profile_count::zero ()))
713 bb
->count
= bb
->count
.global0 ();
715 bb_gcov_counts
.release ();
716 delete edge_gcov_counts
;
717 edge_gcov_counts
= NULL
;
719 update_max_bb_count ();
723 fprintf (dump_file
, " Profile feedback for function");
724 fprintf (dump_file
, ((profile_status_for_fn (cfun
) == PROFILE_READ
)
726 : " is not available \n"));
728 fprintf (dump_file
, "%d branches\n", num_branches
);
730 for (i
= 0; i
< 10; i
++)
731 fprintf (dump_file
, "%d%% branches in range %d-%d%%\n",
732 (hist_br_prob
[i
] + hist_br_prob
[19-i
]) * 100 / num_branches
,
735 total_num_branches
+= num_branches
;
736 for (i
= 0; i
< 20; i
++)
737 total_hist_br_prob
[i
] += hist_br_prob
[i
];
739 fputc ('\n', dump_file
);
740 fputc ('\n', dump_file
);
743 free_aux_for_blocks ();
746 /* Sort the histogram value and count for TOPN and INDIR_CALL type. */
749 sort_hist_values (histogram_value hist
)
751 /* counters[2] equal to -1 means that all counters are invalidated. */
752 if (hist
->hvalue
.counters
[2] == -1)
755 gcc_assert (hist
->type
== HIST_TYPE_TOPN_VALUES
756 || hist
->type
== HIST_TYPE_INDIR_CALL
);
758 gcc_assert (hist
->n_counters
== GCOV_TOPN_VALUES_COUNTERS
);
760 /* Hist value is organized as:
761 [total_executions, value1, counter1, ..., value4, counter4]
762 Use decrease bubble sort to rearrange it. The sort starts from <value1,
763 counter1> and compares counter first. If counter is same, compares the
764 value, exchange it if small to keep stable. */
765 for (unsigned i
= 0; i
< GCOV_TOPN_VALUES
- 1; i
++)
767 bool swapped
= false;
768 for (unsigned j
= 0; j
< GCOV_TOPN_VALUES
- 1 - i
; j
++)
770 gcov_type
*p
= &hist
->hvalue
.counters
[2 * j
+ 1];
771 if (p
[1] < p
[3] || (p
[1] == p
[3] && p
[0] < p
[2]))
773 std::swap (p
[0], p
[2]);
774 std::swap (p
[1], p
[3]);
782 /* Load value histograms values whose description is stored in VALUES array
785 CFG_CHECKSUM is the precomputed checksum for the CFG. */
788 compute_value_histograms (histogram_values values
, unsigned cfg_checksum
,
789 unsigned lineno_checksum
)
791 unsigned i
, j
, t
, any
;
792 unsigned n_histogram_counters
[GCOV_N_VALUE_COUNTERS
];
793 gcov_type
*histogram_counts
[GCOV_N_VALUE_COUNTERS
];
794 gcov_type
*act_count
[GCOV_N_VALUE_COUNTERS
];
795 gcov_type
*aact_count
;
796 struct cgraph_node
*node
;
798 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
799 n_histogram_counters
[t
] = 0;
801 for (i
= 0; i
< values
.length (); i
++)
803 histogram_value hist
= values
[i
];
804 n_histogram_counters
[(int) hist
->type
] += hist
->n_counters
;
808 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
810 if (!n_histogram_counters
[t
])
812 histogram_counts
[t
] = NULL
;
816 histogram_counts
[t
] = get_coverage_counts (COUNTER_FOR_HIST_TYPE (t
),
819 n_histogram_counters
[t
]);
820 if (histogram_counts
[t
])
822 act_count
[t
] = histogram_counts
[t
];
827 for (i
= 0; i
< values
.length (); i
++)
829 histogram_value hist
= values
[i
];
830 gimple
*stmt
= hist
->hvalue
.stmt
;
832 t
= (int) hist
->type
;
834 aact_count
= act_count
[t
];
837 act_count
[t
] += hist
->n_counters
;
839 gimple_add_histogram_value (cfun
, stmt
, hist
);
840 hist
->hvalue
.counters
= XNEWVEC (gcov_type
, hist
->n_counters
);
841 for (j
= 0; j
< hist
->n_counters
; j
++)
843 hist
->hvalue
.counters
[j
] = aact_count
[j
];
845 hist
->hvalue
.counters
[j
] = 0;
847 if (hist
->type
== HIST_TYPE_TOPN_VALUES
848 || hist
->type
== HIST_TYPE_INDIR_CALL
)
849 sort_hist_values (hist
);
851 /* Time profiler counter is not related to any statement,
852 so that we have to read the counter and set the value to
853 the corresponding call graph node. */
854 if (hist
->type
== HIST_TYPE_TIME_PROFILE
)
856 node
= cgraph_node::get (hist
->fun
->decl
);
857 node
->tp_first_run
= hist
->hvalue
.counters
[0];
860 fprintf (dump_file
, "Read tp_first_run: %d\n", node
->tp_first_run
);
864 for (t
= 0; t
< GCOV_N_VALUE_COUNTERS
; t
++)
865 free (histogram_counts
[t
]);
868 /* Location triplet which records a location. */
869 struct location_triplet
871 const char *filename
;
876 /* Traits class for streamed_locations hash set below. */
878 struct location_triplet_hash
: typed_noop_remove
<location_triplet
>
880 typedef location_triplet value_type
;
881 typedef location_triplet compare_type
;
884 hash (const location_triplet
&ref
)
886 inchash::hash
hstate (0);
888 hstate
.add_int (strlen (ref
.filename
));
889 hstate
.add_int (ref
.lineno
);
890 hstate
.add_int (ref
.bb_index
);
891 return hstate
.end ();
895 equal (const location_triplet
&ref1
, const location_triplet
&ref2
)
897 return ref1
.lineno
== ref2
.lineno
898 && ref1
.bb_index
== ref2
.bb_index
899 && ref1
.filename
!= NULL
900 && ref2
.filename
!= NULL
901 && strcmp (ref1
.filename
, ref2
.filename
) == 0;
905 mark_deleted (location_triplet
&ref
)
911 mark_empty (location_triplet
&ref
)
917 is_deleted (const location_triplet
&ref
)
919 return ref
.lineno
== -1;
923 is_empty (const location_triplet
&ref
)
925 return ref
.lineno
== -2;
932 /* When passed NULL as file_name, initialize.
933 When passed something else, output the necessary commands to change
934 line to LINE and offset to FILE_NAME. */
936 output_location (hash_set
<location_triplet_hash
> *streamed_locations
,
937 char const *file_name
, int line
,
938 gcov_position_t
*offset
, basic_block bb
)
940 static char const *prev_file_name
;
941 static int prev_line
;
942 bool name_differs
, line_differs
;
944 location_triplet triplet
;
945 triplet
.filename
= file_name
;
946 triplet
.lineno
= line
;
947 triplet
.bb_index
= bb
? bb
->index
: 0;
949 if (streamed_locations
->add (triplet
))
954 prev_file_name
= NULL
;
959 name_differs
= !prev_file_name
|| filename_cmp (file_name
, prev_file_name
);
960 line_differs
= prev_line
!= line
;
964 *offset
= gcov_write_tag (GCOV_TAG_LINES
);
965 gcov_write_unsigned (bb
->index
);
966 name_differs
= line_differs
= true;
969 /* If this is a new source file, then output the
970 file's name to the .bb file. */
973 prev_file_name
= file_name
;
974 gcov_write_unsigned (0);
975 gcov_write_filename (prev_file_name
);
979 gcov_write_unsigned (line
);
984 /* Helper for qsort so edges get sorted from highest frequency to smallest.
985 This controls the weight for minimal spanning tree algorithm */
987 compare_freqs (const void *p1
, const void *p2
)
989 const_edge e1
= *(const const_edge
*)p1
;
990 const_edge e2
= *(const const_edge
*)p2
;
992 /* Critical edges needs to be split which introduce extra control flow.
993 Make them more heavy. */
994 int m1
= EDGE_CRITICAL_P (e1
) ? 2 : 1;
995 int m2
= EDGE_CRITICAL_P (e2
) ? 2 : 1;
997 if (EDGE_FREQUENCY (e1
) * m1
+ m1
!= EDGE_FREQUENCY (e2
) * m2
+ m2
)
998 return EDGE_FREQUENCY (e2
) * m2
+ m2
- EDGE_FREQUENCY (e1
) * m1
- m1
;
999 /* Stabilize sort. */
1000 if (e1
->src
->index
!= e2
->src
->index
)
1001 return e2
->src
->index
- e1
->src
->index
;
1002 return e2
->dest
->index
- e1
->dest
->index
;
1005 /* Only read execution count for thunks. */
1008 read_thunk_profile (struct cgraph_node
*node
)
1010 tree old
= current_function_decl
;
1011 current_function_decl
= node
->decl
;
1012 gcov_type
*counts
= get_coverage_counts (GCOV_COUNTER_ARCS
, 0, 0, 1);
1015 node
->callees
->count
= node
->count
1016 = profile_count::from_gcov_type (counts
[0]);
1019 current_function_decl
= old
;
1024 /* Instrument and/or analyze program behavior based on program the CFG.
1026 This function creates a representation of the control flow graph (of
1027 the function being compiled) that is suitable for the instrumentation
1028 of edges and/or converting measured edge counts to counts on the
1031 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
1032 the flow graph that are needed to reconstruct the dynamic behavior of the
1033 flow graph. This data is written to the gcno file for gcov.
1035 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
1036 information from the gcda file containing edge count information from
1037 previous executions of the function being compiled. In this case, the
1038 control flow graph is annotated with actual execution counts by
1039 compute_branch_probabilities().
1041 Main entry point of this file. */
1044 branch_prob (bool thunk
)
1048 unsigned num_edges
, ignored_edges
;
1049 unsigned num_instrumented
;
1050 struct edge_list
*el
;
1051 histogram_values values
= histogram_values ();
1052 unsigned cfg_checksum
, lineno_checksum
;
1054 total_num_times_called
++;
1056 flow_call_edges_add (NULL
);
1057 add_noreturn_fake_exit_edges ();
1059 hash_set
<location_triplet_hash
> streamed_locations
;
1063 /* We can't handle cyclic regions constructed using abnormal edges.
1064 To avoid these we replace every source of abnormal edge by a fake
1065 edge from entry node and every destination by fake edge to exit.
1066 This keeps graph acyclic and our calculation exact for all normal
1067 edges except for exit and entrance ones.
1069 We also add fake exit edges for each call and asm statement in the
1070 basic, since it may not return. */
1072 FOR_EACH_BB_FN (bb
, cfun
)
1074 int need_exit_edge
= 0, need_entry_edge
= 0;
1075 int have_exit_edge
= 0, have_entry_edge
= 0;
1079 /* Functions returning multiple times are not handled by extra edges.
1080 Instead we simply allow negative counts on edges from exit to the
1081 block past call and corresponding probabilities. We can't go
1082 with the extra edges because that would result in flowgraph that
1083 needs to have fake edges outside the spanning tree. */
1085 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1087 gimple_stmt_iterator gsi
;
1088 gimple
*last
= NULL
;
1090 /* It may happen that there are compiler generated statements
1091 without a locus at all. Go through the basic block from the
1092 last to the first statement looking for a locus. */
1093 for (gsi
= gsi_last_nondebug_bb (bb
);
1095 gsi_prev_nondebug (&gsi
))
1097 last
= gsi_stmt (gsi
);
1098 if (!RESERVED_LOCATION_P (gimple_location (last
)))
1102 /* Edge with goto locus might get wrong coverage info unless
1103 it is the only edge out of BB.
1104 Don't do that when the locuses match, so
1105 if (blah) goto something;
1106 is not computed twice. */
1108 && gimple_has_location (last
)
1109 && !RESERVED_LOCATION_P (e
->goto_locus
)
1110 && !single_succ_p (bb
)
1111 && (LOCATION_FILE (e
->goto_locus
)
1112 != LOCATION_FILE (gimple_location (last
))
1113 || (LOCATION_LINE (e
->goto_locus
)
1114 != LOCATION_LINE (gimple_location (last
)))))
1116 basic_block new_bb
= split_edge (e
);
1117 edge ne
= single_succ_edge (new_bb
);
1118 ne
->goto_locus
= e
->goto_locus
;
1120 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1121 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1123 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1126 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1128 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1129 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1130 need_entry_edge
= 1;
1131 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1132 have_entry_edge
= 1;
1135 if (need_exit_edge
&& !have_exit_edge
)
1138 fprintf (dump_file
, "Adding fake exit edge to bb %i\n",
1140 make_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), EDGE_FAKE
);
1142 if (need_entry_edge
&& !have_entry_edge
)
1145 fprintf (dump_file
, "Adding fake entry edge to bb %i\n",
1147 make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
), bb
, EDGE_FAKE
);
1148 /* Avoid bbs that have both fake entry edge and also some
1149 exit edge. One of those edges wouldn't be added to the
1150 spanning tree, but we can't instrument any of them. */
1151 if (have_exit_edge
|| need_exit_edge
)
1153 gimple_stmt_iterator gsi
;
1156 gsi
= gsi_start_nondebug_after_labels_bb (bb
);
1157 gcc_checking_assert (!gsi_end_p (gsi
));
1158 first
= gsi_stmt (gsi
);
1159 /* Don't split the bbs containing __builtin_setjmp_receiver
1160 or ABNORMAL_DISPATCHER calls. These are very
1161 special and don't expect anything to be inserted before
1163 if (is_gimple_call (first
)
1164 && (gimple_call_builtin_p (first
, BUILT_IN_SETJMP_RECEIVER
)
1165 || (gimple_call_flags (first
) & ECF_RETURNS_TWICE
)
1166 || (gimple_call_internal_p (first
)
1167 && (gimple_call_internal_fn (first
)
1168 == IFN_ABNORMAL_DISPATCHER
))))
1172 fprintf (dump_file
, "Splitting bb %i after labels\n",
1174 split_block_after_labels (bb
);
1180 el
= create_edge_list ();
1181 num_edges
= NUM_EDGES (el
);
1182 qsort (el
->index_to_edge
, num_edges
, sizeof (edge
), compare_freqs
);
1183 alloc_aux_for_edges (sizeof (struct edge_profile_info
));
1185 /* The basic blocks are expected to be numbered sequentially. */
1189 for (i
= 0 ; i
< num_edges
; i
++)
1191 edge e
= INDEX_EDGE (el
, i
);
1193 /* Mark edges we've replaced by fake edges above as ignored. */
1194 if ((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
))
1195 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
1196 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1198 EDGE_INFO (e
)->ignore
= 1;
1203 /* Create spanning tree from basic block graph, mark each edge that is
1204 on the spanning tree. We insert as many abnormal and critical edges
1205 as possible to minimize number of edge splits necessary. */
1208 find_spanning_tree (el
);
1213 /* Keep only edge from entry block to be instrumented. */
1214 FOR_EACH_BB_FN (bb
, cfun
)
1215 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1216 EDGE_INFO (e
)->ignore
= true;
1220 /* Fake edges that are not on the tree will not be instrumented, so
1221 mark them ignored. */
1222 for (num_instrumented
= i
= 0; i
< num_edges
; i
++)
1224 edge e
= INDEX_EDGE (el
, i
);
1225 struct edge_profile_info
*inf
= EDGE_INFO (e
);
1227 if (inf
->ignore
|| inf
->on_tree
)
1229 else if (e
->flags
& EDGE_FAKE
)
1238 total_num_blocks
+= n_basic_blocks_for_fn (cfun
);
1240 fprintf (dump_file
, "%d basic blocks\n", n_basic_blocks_for_fn (cfun
));
1242 total_num_edges
+= num_edges
;
1244 fprintf (dump_file
, "%d edges\n", num_edges
);
1246 total_num_edges_ignored
+= ignored_edges
;
1248 fprintf (dump_file
, "%d ignored edges\n", ignored_edges
);
1250 total_num_edges_instrumented
+= num_instrumented
;
1252 fprintf (dump_file
, "%d instrumentation edges\n", num_instrumented
);
1254 /* Compute two different checksums. Note that we want to compute
1255 the checksum in only once place, since it depends on the shape
1256 of the control flow which can change during
1257 various transformations. */
1260 /* At stream in time we do not have CFG, so we cannot do checksums. */
1262 lineno_checksum
= 0;
1266 cfg_checksum
= coverage_compute_cfg_checksum (cfun
);
1267 lineno_checksum
= coverage_compute_lineno_checksum ();
1270 /* Write the data from which gcov can reconstruct the basic block
1271 graph and function line numbers (the gcno file). */
1272 if (coverage_begin_function (lineno_checksum
, cfg_checksum
))
1274 gcov_position_t offset
;
1276 /* Basic block flags */
1277 offset
= gcov_write_tag (GCOV_TAG_BLOCKS
);
1278 gcov_write_unsigned (n_basic_blocks_for_fn (cfun
));
1279 gcov_write_length (offset
);
1282 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
),
1283 EXIT_BLOCK_PTR_FOR_FN (cfun
), next_bb
)
1288 offset
= gcov_write_tag (GCOV_TAG_ARCS
);
1289 gcov_write_unsigned (bb
->index
);
1291 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1293 struct edge_profile_info
*i
= EDGE_INFO (e
);
1296 unsigned flag_bits
= 0;
1299 flag_bits
|= GCOV_ARC_ON_TREE
;
1300 if (e
->flags
& EDGE_FAKE
)
1301 flag_bits
|= GCOV_ARC_FAKE
;
1302 if (e
->flags
& EDGE_FALLTHRU
)
1303 flag_bits
|= GCOV_ARC_FALLTHROUGH
;
1304 /* On trees we don't have fallthru flags, but we can
1305 recompute them from CFG shape. */
1306 if (e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)
1307 && e
->src
->next_bb
== e
->dest
)
1308 flag_bits
|= GCOV_ARC_FALLTHROUGH
;
1310 gcov_write_unsigned (e
->dest
->index
);
1311 gcov_write_unsigned (flag_bits
);
1315 gcov_write_length (offset
);
1319 /* Initialize the output. */
1320 output_location (&streamed_locations
, NULL
, 0, NULL
, NULL
);
1322 hash_set
<int_hash
<location_t
, 0, 2> > seen_locations
;
1324 FOR_EACH_BB_FN (bb
, cfun
)
1326 gimple_stmt_iterator gsi
;
1327 gcov_position_t offset
= 0;
1329 if (bb
== ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
)
1331 location_t loc
= DECL_SOURCE_LOCATION (current_function_decl
);
1332 seen_locations
.add (loc
);
1333 expanded_location curr_location
= expand_location (loc
);
1334 output_location (&streamed_locations
, curr_location
.file
,
1335 curr_location
.line
, &offset
, bb
);
1338 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1340 gimple
*stmt
= gsi_stmt (gsi
);
1341 location_t loc
= gimple_location (stmt
);
1342 if (!RESERVED_LOCATION_P (loc
))
1344 seen_locations
.add (loc
);
1345 output_location (&streamed_locations
, gimple_filename (stmt
),
1346 gimple_lineno (stmt
), &offset
, bb
);
1350 /* Notice GOTO expressions eliminated while constructing the CFG.
1351 It's hard to distinguish such expression, but goto_locus should
1352 not be any of already seen location. */
1354 if (single_succ_p (bb
)
1355 && (loc
= single_succ_edge (bb
)->goto_locus
)
1356 && !RESERVED_LOCATION_P (loc
)
1357 && !seen_locations
.contains (loc
))
1359 expanded_location curr_location
= expand_location (loc
);
1360 output_location (&streamed_locations
, curr_location
.file
,
1361 curr_location
.line
, &offset
, bb
);
1366 /* A file of NULL indicates the end of run. */
1367 gcov_write_unsigned (0);
1368 gcov_write_string (NULL
);
1369 gcov_write_length (offset
);
1374 if (flag_profile_values
)
1375 gimple_find_values_to_profile (&values
);
1377 if (flag_branch_probabilities
)
1379 compute_branch_probabilities (cfg_checksum
, lineno_checksum
);
1380 if (flag_profile_values
)
1381 compute_value_histograms (values
, cfg_checksum
, lineno_checksum
);
1384 remove_fake_edges ();
1386 /* For each edge not on the spanning tree, add counting code. */
1387 if (profile_arc_flag
1388 && coverage_counter_alloc (GCOV_COUNTER_ARCS
, num_instrumented
))
1390 unsigned n_instrumented
;
1392 gimple_init_gcov_profiler ();
1394 n_instrumented
= instrument_edges (el
);
1396 gcc_assert (n_instrumented
== num_instrumented
);
1398 if (flag_profile_values
)
1399 instrument_values (values
);
1401 /* Commit changes done by instrumentation. */
1402 gsi_commit_edge_inserts ();
1405 free_aux_for_edges ();
1408 free_edge_list (el
);
1409 coverage_end_function (lineno_checksum
, cfg_checksum
);
1410 if (flag_branch_probabilities
1411 && (profile_status_for_fn (cfun
) == PROFILE_READ
))
1414 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1415 report_predictor_hitrates ();
1417 /* At this moment we have precise loop iteration count estimates.
1418 Record them to loop structure before the profile gets out of date. */
1419 FOR_EACH_LOOP (loop
, 0)
1420 if (loop
->header
->count
> 0)
1422 gcov_type nit
= expected_loop_iterations_unbounded (loop
);
1423 widest_int bound
= gcov_type_to_wide_int (nit
);
1424 loop
->any_estimate
= false;
1425 record_niter_bound (loop
, bound
, true, false);
1427 compute_function_frequency ();
1431 /* Union find algorithm implementation for the basic blocks using
1435 find_group (basic_block bb
)
1437 basic_block group
= bb
, bb1
;
1439 while ((basic_block
) group
->aux
!= group
)
1440 group
= (basic_block
) group
->aux
;
1442 /* Compress path. */
1443 while ((basic_block
) bb
->aux
!= group
)
1445 bb1
= (basic_block
) bb
->aux
;
1446 bb
->aux
= (void *) group
;
1453 union_groups (basic_block bb1
, basic_block bb2
)
1455 basic_block bb1g
= find_group (bb1
);
1456 basic_block bb2g
= find_group (bb2
);
1458 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
1459 this code is unlikely going to be performance problem anyway. */
1460 gcc_assert (bb1g
!= bb2g
);
1465 /* This function searches all of the edges in the program flow graph, and puts
1466 as many bad edges as possible onto the spanning tree. Bad edges include
1467 abnormals edges, which can't be instrumented at the moment. Since it is
1468 possible for fake edges to form a cycle, we will have to develop some
1469 better way in the future. Also put critical edges to the tree, since they
1470 are more expensive to instrument. */
1473 find_spanning_tree (struct edge_list
*el
)
1476 int num_edges
= NUM_EDGES (el
);
1479 /* We use aux field for standard union-find algorithm. */
1480 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
1483 /* Add fake edge exit to entry we can't instrument. */
1484 union_groups (EXIT_BLOCK_PTR_FOR_FN (cfun
), ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1486 /* First add all abnormal edges to the tree unless they form a cycle. Also
1487 add all edges to the exit block to avoid inserting profiling code behind
1488 setting return value from function. */
1489 for (i
= 0; i
< num_edges
; i
++)
1491 edge e
= INDEX_EDGE (el
, i
);
1492 if (((e
->flags
& (EDGE_ABNORMAL
| EDGE_ABNORMAL_CALL
| EDGE_FAKE
))
1493 || e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1494 && !EDGE_INFO (e
)->ignore
1495 && (find_group (e
->src
) != find_group (e
->dest
)))
1498 fprintf (dump_file
, "Abnormal edge %d to %d put to tree\n",
1499 e
->src
->index
, e
->dest
->index
);
1500 EDGE_INFO (e
)->on_tree
= 1;
1501 union_groups (e
->src
, e
->dest
);
1505 /* And now the rest. Edge list is sorted according to frequencies and
1506 thus we will produce minimal spanning tree. */
1507 for (i
= 0; i
< num_edges
; i
++)
1509 edge e
= INDEX_EDGE (el
, i
);
1510 if (!EDGE_INFO (e
)->ignore
1511 && find_group (e
->src
) != find_group (e
->dest
))
1514 fprintf (dump_file
, "Normal edge %d to %d put to tree\n",
1515 e
->src
->index
, e
->dest
->index
);
1516 EDGE_INFO (e
)->on_tree
= 1;
1517 union_groups (e
->src
, e
->dest
);
1521 clear_aux_for_blocks ();
1524 /* Perform file-level initialization for branch-prob processing. */
1527 init_branch_prob (void)
1531 total_num_blocks
= 0;
1532 total_num_edges
= 0;
1533 total_num_edges_ignored
= 0;
1534 total_num_edges_instrumented
= 0;
1535 total_num_blocks_created
= 0;
1536 total_num_passes
= 0;
1537 total_num_times_called
= 0;
1538 total_num_branches
= 0;
1539 for (i
= 0; i
< 20; i
++)
1540 total_hist_br_prob
[i
] = 0;
1543 /* Performs file-level cleanup after branch-prob processing
1547 end_branch_prob (void)
1551 fprintf (dump_file
, "\n");
1552 fprintf (dump_file
, "Total number of blocks: %d\n",
1554 fprintf (dump_file
, "Total number of edges: %d\n", total_num_edges
);
1555 fprintf (dump_file
, "Total number of ignored edges: %d\n",
1556 total_num_edges_ignored
);
1557 fprintf (dump_file
, "Total number of instrumented edges: %d\n",
1558 total_num_edges_instrumented
);
1559 fprintf (dump_file
, "Total number of blocks created: %d\n",
1560 total_num_blocks_created
);
1561 fprintf (dump_file
, "Total number of graph solution passes: %d\n",
1563 if (total_num_times_called
!= 0)
1564 fprintf (dump_file
, "Average number of graph solution passes: %d\n",
1565 (total_num_passes
+ (total_num_times_called
>> 1))
1566 / total_num_times_called
);
1567 fprintf (dump_file
, "Total number of branches: %d\n",
1568 total_num_branches
);
1569 if (total_num_branches
)
1573 for (i
= 0; i
< 10; i
++)
1574 fprintf (dump_file
, "%d%% branches in range %d-%d%%\n",
1575 (total_hist_br_prob
[i
] + total_hist_br_prob
[19-i
]) * 100
1576 / total_num_branches
, 5*i
, 5*i
+5);