PR c++/11503
[official-gcc.git] / gcc / profile.c
blobdd23628c70875de93d5f3ca73f851d37fe92d3dc
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 02111-1307, USA. */
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
42 The auxiliary file generated is <dumpbase>.bbg. The format is
43 described in full in gcov-io.h. */
45 /* ??? Register allocation should use basic block execution counts to
46 give preference to the most commonly executed blocks. */
48 /* ??? Should calculate branch probabilities before instrumenting code, since
49 then we can use arc counts to help decide which arcs to instrument. */
51 #include "config.h"
52 #include "system.h"
53 #include "coretypes.h"
54 #include "tm.h"
55 #include "rtl.h"
56 #include "flags.h"
57 #include "output.h"
58 #include "regs.h"
59 #include "expr.h"
60 #include "function.h"
61 #include "toplev.h"
62 #include "coverage.h"
63 #include "value-prof.h"
64 #include "tree.h"
66 /* Additional information about the edges we need. */
67 struct edge_info {
68 unsigned int count_valid : 1;
70 /* Is on the spanning tree. */
71 unsigned int on_tree : 1;
73 /* Pretend this edge does not exist (it is abnormal and we've
74 inserted a fake to compensate). */
75 unsigned int ignore : 1;
78 struct bb_info {
79 unsigned int count_valid : 1;
81 /* Number of successor and predecessor edges. */
82 gcov_type succ_count;
83 gcov_type pred_count;
86 #define EDGE_INFO(e) ((struct edge_info *) (e)->aux)
87 #define BB_INFO(b) ((struct bb_info *) (b)->aux)
89 /* Counter summary from the last set of coverage counts read. */
91 const struct gcov_ctr_summary *profile_info;
93 /* Collect statistics on the performance of this pass for the entire source
94 file. */
96 static int total_num_blocks;
97 static int total_num_edges;
98 static int total_num_edges_ignored;
99 static int total_num_edges_instrumented;
100 static int total_num_blocks_created;
101 static int total_num_passes;
102 static int total_num_times_called;
103 static int total_hist_br_prob[20];
104 static int total_num_never_executed;
105 static int total_num_branches;
107 /* Forward declarations. */
108 static void find_spanning_tree (struct edge_list *);
109 static rtx gen_edge_profiler (int);
110 static rtx gen_interval_profiler (struct histogram_value *, unsigned,
111 unsigned);
112 static rtx gen_pow2_profiler (struct histogram_value *, unsigned, unsigned);
113 static rtx gen_one_value_profiler (struct histogram_value *, unsigned,
114 unsigned);
115 static rtx gen_const_delta_profiler (struct histogram_value *, unsigned,
116 unsigned);
117 static unsigned instrument_edges (struct edge_list *);
118 static void instrument_values (unsigned, struct histogram_value *);
119 static void compute_branch_probabilities (void);
120 static gcov_type * get_exec_counts (void);
121 static basic_block find_group (basic_block);
122 static void union_groups (basic_block, basic_block);
125 /* Add edge instrumentation code to the entire insn chain.
127 F is the first insn of the chain.
128 NUM_BLOCKS is the number of basic blocks found in F. */
130 static unsigned
131 instrument_edges (struct edge_list *el)
133 unsigned num_instr_edges = 0;
134 int num_edges = NUM_EDGES (el);
135 basic_block bb;
137 remove_fake_edges ();
139 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
141 edge e;
143 for (e = bb->succ; e; e = e->succ_next)
145 struct edge_info *inf = EDGE_INFO (e);
147 if (!inf->ignore && !inf->on_tree)
149 rtx edge_profile;
151 if (e->flags & EDGE_ABNORMAL)
152 abort ();
153 if (rtl_dump_file)
154 fprintf (rtl_dump_file, "Edge %d to %d instrumented%s\n",
155 e->src->index, e->dest->index,
156 EDGE_CRITICAL_P (e) ? " (and split)" : "");
157 edge_profile = gen_edge_profiler (num_instr_edges++);
158 insert_insn_on_edge (edge_profile, e);
159 rebuild_jump_labels (e->insns);
164 total_num_blocks_created += num_edges;
165 if (rtl_dump_file)
166 fprintf (rtl_dump_file, "%d edges instrumented\n", num_instr_edges);
167 return num_instr_edges;
170 /* Add code to measure histograms list of VALUES of length N_VALUES. */
171 static void
172 instrument_values (unsigned n_values, struct histogram_value *values)
174 rtx sequence;
175 unsigned i, t;
176 edge e;
178 /* Emit code to generate the histograms before the insns. */
180 for (i = 0; i < n_values; i++)
182 e = split_block (BLOCK_FOR_INSN (values[i].insn),
183 PREV_INSN (values[i].insn));
184 switch (values[i].type)
186 case HIST_TYPE_INTERVAL:
187 t = GCOV_COUNTER_V_INTERVAL;
188 break;
190 case HIST_TYPE_POW2:
191 t = GCOV_COUNTER_V_POW2;
192 break;
194 case HIST_TYPE_SINGLE_VALUE:
195 t = GCOV_COUNTER_V_SINGLE;
196 break;
198 case HIST_TYPE_CONST_DELTA:
199 t = GCOV_COUNTER_V_DELTA;
200 break;
202 default:
203 abort ();
205 if (!coverage_counter_alloc (t, values[i].n_counters))
206 continue;
208 switch (values[i].type)
210 case HIST_TYPE_INTERVAL:
211 sequence = gen_interval_profiler (values + i, t, 0);
212 break;
214 case HIST_TYPE_POW2:
215 sequence = gen_pow2_profiler (values + i, t, 0);
216 break;
218 case HIST_TYPE_SINGLE_VALUE:
219 sequence = gen_one_value_profiler (values + i, t, 0);
220 break;
222 case HIST_TYPE_CONST_DELTA:
223 sequence = gen_const_delta_profiler (values + i, t, 0);
224 break;
226 default:
227 abort ();
230 safe_insert_insn_on_edge (sequence, e);
235 /* Computes hybrid profile for all matching entries in da_file. */
237 static gcov_type *
238 get_exec_counts (void)
240 unsigned num_edges = 0;
241 basic_block bb;
242 gcov_type *counts;
244 /* Count the edges to be (possibly) instrumented. */
245 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
247 edge e;
248 for (e = bb->succ; e; e = e->succ_next)
249 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
250 num_edges++;
253 counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, &profile_info);
254 if (!counts)
255 return NULL;
257 if (rtl_dump_file && profile_info)
258 fprintf(rtl_dump_file, "Merged %u profiles with maximal count %u.\n",
259 profile_info->runs, (unsigned) profile_info->sum_max);
261 return counts;
265 /* Compute the branch probabilities for the various branches.
266 Annotate them accordingly. */
268 static void
269 compute_branch_probabilities (void)
271 basic_block bb;
272 int i;
273 int num_edges = 0;
274 int changes;
275 int passes;
276 int hist_br_prob[20];
277 int num_never_executed;
278 int num_branches;
279 gcov_type *exec_counts = get_exec_counts ();
280 int exec_counts_pos = 0;
282 /* Attach extra info block to each bb. */
284 alloc_aux_for_blocks (sizeof (struct bb_info));
285 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
287 edge e;
289 for (e = bb->succ; e; e = e->succ_next)
290 if (!EDGE_INFO (e)->ignore)
291 BB_INFO (bb)->succ_count++;
292 for (e = bb->pred; e; e = e->pred_next)
293 if (!EDGE_INFO (e)->ignore)
294 BB_INFO (bb)->pred_count++;
297 /* Avoid predicting entry on exit nodes. */
298 BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
299 BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
301 /* For each edge not on the spanning tree, set its execution count from
302 the .da file. */
304 /* The first count in the .da file is the number of times that the function
305 was entered. This is the exec_count for block zero. */
307 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
309 edge e;
310 for (e = bb->succ; e; e = e->succ_next)
311 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
313 num_edges++;
314 if (exec_counts)
316 e->count = exec_counts[exec_counts_pos++];
318 else
319 e->count = 0;
321 EDGE_INFO (e)->count_valid = 1;
322 BB_INFO (bb)->succ_count--;
323 BB_INFO (e->dest)->pred_count--;
324 if (rtl_dump_file)
326 fprintf (rtl_dump_file, "\nRead edge from %i to %i, count:",
327 bb->index, e->dest->index);
328 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,
329 (HOST_WIDEST_INT) e->count);
334 if (rtl_dump_file)
335 fprintf (rtl_dump_file, "\n%d edge counts read\n", num_edges);
337 /* For every block in the file,
338 - if every exit/entrance edge has a known count, then set the block count
339 - if the block count is known, and every exit/entrance edge but one has
340 a known execution count, then set the count of the remaining edge
342 As edge counts are set, decrement the succ/pred count, but don't delete
343 the edge, that way we can easily tell when all edges are known, or only
344 one edge is unknown. */
346 /* The order that the basic blocks are iterated through is important.
347 Since the code that finds spanning trees starts with block 0, low numbered
348 edges are put on the spanning tree in preference to high numbered edges.
349 Hence, most instrumented edges are at the end. Graph solving works much
350 faster if we propagate numbers from the end to the start.
352 This takes an average of slightly more than 3 passes. */
354 changes = 1;
355 passes = 0;
356 while (changes)
358 passes++;
359 changes = 0;
360 FOR_BB_BETWEEN (bb, EXIT_BLOCK_PTR, NULL, prev_bb)
362 struct bb_info *bi = BB_INFO (bb);
363 if (! bi->count_valid)
365 if (bi->succ_count == 0)
367 edge e;
368 gcov_type total = 0;
370 for (e = bb->succ; e; e = e->succ_next)
371 total += e->count;
372 bb->count = total;
373 bi->count_valid = 1;
374 changes = 1;
376 else if (bi->pred_count == 0)
378 edge e;
379 gcov_type total = 0;
381 for (e = bb->pred; e; e = e->pred_next)
382 total += e->count;
383 bb->count = total;
384 bi->count_valid = 1;
385 changes = 1;
388 if (bi->count_valid)
390 if (bi->succ_count == 1)
392 edge e;
393 gcov_type total = 0;
395 /* One of the counts will be invalid, but it is zero,
396 so adding it in also doesn't hurt. */
397 for (e = bb->succ; e; e = e->succ_next)
398 total += e->count;
400 /* Seedgeh for the invalid edge, and set its count. */
401 for (e = bb->succ; e; e = e->succ_next)
402 if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
403 break;
405 /* Calculate count for remaining edge by conservation. */
406 total = bb->count - total;
408 if (! e)
409 abort ();
410 EDGE_INFO (e)->count_valid = 1;
411 e->count = total;
412 bi->succ_count--;
414 BB_INFO (e->dest)->pred_count--;
415 changes = 1;
417 if (bi->pred_count == 1)
419 edge e;
420 gcov_type total = 0;
422 /* One of the counts will be invalid, but it is zero,
423 so adding it in also doesn't hurt. */
424 for (e = bb->pred; e; e = e->pred_next)
425 total += e->count;
427 /* Search for the invalid edge, and set its count. */
428 for (e = bb->pred; e; e = e->pred_next)
429 if (!EDGE_INFO (e)->count_valid && !EDGE_INFO (e)->ignore)
430 break;
432 /* Calculate count for remaining edge by conservation. */
433 total = bb->count - total + e->count;
435 if (! e)
436 abort ();
437 EDGE_INFO (e)->count_valid = 1;
438 e->count = total;
439 bi->pred_count--;
441 BB_INFO (e->src)->succ_count--;
442 changes = 1;
447 if (rtl_dump_file)
448 dump_flow_info (rtl_dump_file);
450 total_num_passes += passes;
451 if (rtl_dump_file)
452 fprintf (rtl_dump_file, "Graph solving took %d passes.\n\n", passes);
454 /* If the graph has been correctly solved, every block will have a
455 succ and pred count of zero. */
456 FOR_EACH_BB (bb)
458 if (BB_INFO (bb)->succ_count || BB_INFO (bb)->pred_count)
459 abort ();
462 /* For every edge, calculate its branch probability and add a reg_note
463 to the branch insn to indicate this. */
465 for (i = 0; i < 20; i++)
466 hist_br_prob[i] = 0;
467 num_never_executed = 0;
468 num_branches = 0;
470 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
472 edge e;
473 rtx note;
475 if (bb->count < 0)
477 error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
478 bb->index, (int)bb->count);
479 bb->count = 0;
481 for (e = bb->succ; e; e = e->succ_next)
483 /* Function may return twice in the cased the called fucntion is
484 setjmp or calls fork, but we can't represent this by extra
485 edge from the entry, since extra edge from the exit is
486 already present. We get negative frequency from the entry
487 point. */
488 if ((e->count < 0
489 && e->dest == EXIT_BLOCK_PTR)
490 || (e->count > bb->count
491 && e->dest != EXIT_BLOCK_PTR))
493 rtx insn = bb->end;
495 while (GET_CODE (insn) != CALL_INSN
496 && insn != bb->head
497 && keep_with_call_p (insn))
498 insn = PREV_INSN (insn);
499 if (GET_CODE (insn) == CALL_INSN)
500 e->count = e->count < 0 ? 0 : bb->count;
502 if (e->count < 0 || e->count > bb->count)
504 error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
505 e->src->index, e->dest->index,
506 (int)e->count);
507 e->count = bb->count / 2;
510 if (bb->count)
512 for (e = bb->succ; e; e = e->succ_next)
513 e->probability = (e->count * REG_BR_PROB_BASE + bb->count / 2) / bb->count;
514 if (bb->index >= 0
515 && any_condjump_p (bb->end)
516 && bb->succ->succ_next)
518 int prob;
519 edge e;
520 int index;
522 /* Find the branch edge. It is possible that we do have fake
523 edges here. */
524 for (e = bb->succ; e->flags & (EDGE_FAKE | EDGE_FALLTHRU);
525 e = e->succ_next)
526 continue; /* Loop body has been intentionally left blank. */
528 prob = e->probability;
529 index = prob * 20 / REG_BR_PROB_BASE;
531 if (index == 20)
532 index = 19;
533 hist_br_prob[index]++;
535 note = find_reg_note (bb->end, REG_BR_PROB, 0);
536 /* There may be already note put by some other pass, such
537 as builtin_expect expander. */
538 if (note)
539 XEXP (note, 0) = GEN_INT (prob);
540 else
541 REG_NOTES (bb->end)
542 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
543 REG_NOTES (bb->end));
544 num_branches++;
547 /* Otherwise distribute the probabilities evenly so we get sane
548 sum. Use simple heuristics that if there are normal edges,
549 give all abnormals frequency of 0, otherwise distribute the
550 frequency over abnormals (this is the case of noreturn
551 calls). */
552 else
554 int total = 0;
556 for (e = bb->succ; e; e = e->succ_next)
557 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
558 total ++;
559 if (total)
561 for (e = bb->succ; e; e = e->succ_next)
562 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
563 e->probability = REG_BR_PROB_BASE / total;
564 else
565 e->probability = 0;
567 else
569 for (e = bb->succ; e; e = e->succ_next)
570 total ++;
571 for (e = bb->succ; e; e = e->succ_next)
572 e->probability = REG_BR_PROB_BASE / total;
574 if (bb->index >= 0
575 && any_condjump_p (bb->end)
576 && bb->succ->succ_next)
577 num_branches++, num_never_executed;
581 if (rtl_dump_file)
583 fprintf (rtl_dump_file, "%d branches\n", num_branches);
584 fprintf (rtl_dump_file, "%d branches never executed\n",
585 num_never_executed);
586 if (num_branches)
587 for (i = 0; i < 10; i++)
588 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
589 (hist_br_prob[i] + hist_br_prob[19-i]) * 100 / num_branches,
590 5 * i, 5 * i + 5);
592 total_num_branches += num_branches;
593 total_num_never_executed += num_never_executed;
594 for (i = 0; i < 20; i++)
595 total_hist_br_prob[i] += hist_br_prob[i];
597 fputc ('\n', rtl_dump_file);
598 fputc ('\n', rtl_dump_file);
601 free_aux_for_blocks ();
604 /* Instrument and/or analyze program behavior based on program flow graph.
605 In either case, this function builds a flow graph for the function being
606 compiled. The flow graph is stored in BB_GRAPH.
608 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
609 the flow graph that are needed to reconstruct the dynamic behavior of the
610 flow graph.
612 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
613 information from a data file containing edge count information from previous
614 executions of the function being compiled. In this case, the flow graph is
615 annotated with actual execution counts, which are later propagated into the
616 rtl for optimization purposes.
618 Main entry point of this file. */
620 void
621 branch_prob (void)
623 basic_block bb;
624 unsigned i;
625 unsigned num_edges, ignored_edges;
626 unsigned num_instrumented;
627 struct edge_list *el;
628 unsigned n_values = 0;
629 struct histogram_value *values = NULL;
631 total_num_times_called++;
633 flow_call_edges_add (NULL);
634 add_noreturn_fake_exit_edges ();
636 /* We can't handle cyclic regions constructed using abnormal edges.
637 To avoid these we replace every source of abnormal edge by a fake
638 edge from entry node and every destination by fake edge to exit.
639 This keeps graph acyclic and our calculation exact for all normal
640 edges except for exit and entrance ones.
642 We also add fake exit edges for each call and asm statement in the
643 basic, since it may not return. */
645 FOR_EACH_BB (bb)
647 int need_exit_edge = 0, need_entry_edge = 0;
648 int have_exit_edge = 0, have_entry_edge = 0;
649 edge e;
651 /* Functions returning multiple times are not handled by extra edges.
652 Instead we simply allow negative counts on edges from exit to the
653 block past call and corresponding probabilities. We can't go
654 with the extra edges because that would result in flowgraph that
655 needs to have fake edges outside the spanning tree. */
657 for (e = bb->succ; e; e = e->succ_next)
659 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
660 && e->dest != EXIT_BLOCK_PTR)
661 need_exit_edge = 1;
662 if (e->dest == EXIT_BLOCK_PTR)
663 have_exit_edge = 1;
665 for (e = bb->pred; e; e = e->pred_next)
667 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
668 && e->src != ENTRY_BLOCK_PTR)
669 need_entry_edge = 1;
670 if (e->src == ENTRY_BLOCK_PTR)
671 have_entry_edge = 1;
674 if (need_exit_edge && !have_exit_edge)
676 if (rtl_dump_file)
677 fprintf (rtl_dump_file, "Adding fake exit edge to bb %i\n",
678 bb->index);
679 make_edge (bb, EXIT_BLOCK_PTR, EDGE_FAKE);
681 if (need_entry_edge && !have_entry_edge)
683 if (rtl_dump_file)
684 fprintf (rtl_dump_file, "Adding fake entry edge to bb %i\n",
685 bb->index);
686 make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
690 el = create_edge_list ();
691 num_edges = NUM_EDGES (el);
692 alloc_aux_for_edges (sizeof (struct edge_info));
694 /* The basic blocks are expected to be numbered sequentially. */
695 compact_blocks ();
697 ignored_edges = 0;
698 for (i = 0 ; i < num_edges ; i++)
700 edge e = INDEX_EDGE (el, i);
701 e->count = 0;
703 /* Mark edges we've replaced by fake edges above as ignored. */
704 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
705 && e->src != ENTRY_BLOCK_PTR && e->dest != EXIT_BLOCK_PTR)
707 EDGE_INFO (e)->ignore = 1;
708 ignored_edges++;
712 #ifdef ENABLE_CHECKING
713 verify_flow_info ();
714 #endif
716 /* Create spanning tree from basic block graph, mark each edge that is
717 on the spanning tree. We insert as many abnormal and critical edges
718 as possible to minimize number of edge splits necessary. */
720 find_spanning_tree (el);
722 /* Fake edges that are not on the tree will not be instrumented, so
723 mark them ignored. */
724 for (num_instrumented = i = 0; i < num_edges; i++)
726 edge e = INDEX_EDGE (el, i);
727 struct edge_info *inf = EDGE_INFO (e);
729 if (inf->ignore || inf->on_tree)
730 /*NOP*/;
731 else if (e->flags & EDGE_FAKE)
733 inf->ignore = 1;
734 ignored_edges++;
736 else
737 num_instrumented++;
740 total_num_blocks += n_basic_blocks + 2;
741 if (rtl_dump_file)
742 fprintf (rtl_dump_file, "%d basic blocks\n", n_basic_blocks);
744 total_num_edges += num_edges;
745 if (rtl_dump_file)
746 fprintf (rtl_dump_file, "%d edges\n", num_edges);
748 total_num_edges_ignored += ignored_edges;
749 if (rtl_dump_file)
750 fprintf (rtl_dump_file, "%d ignored edges\n", ignored_edges);
752 /* Write the data from which gcov can reconstruct the basic block
753 graph. */
755 /* Basic block flags */
756 if (coverage_begin_output ())
758 gcov_position_t offset;
760 offset = gcov_write_tag (GCOV_TAG_BLOCKS);
761 for (i = 0; i != (unsigned) (n_basic_blocks + 2); i++)
762 gcov_write_unsigned (0);
763 gcov_write_length (offset);
766 /* Keep all basic block indexes nonnegative in the gcov output.
767 Index 0 is used for entry block, last index is for exit block.
769 ENTRY_BLOCK_PTR->index = -1;
770 EXIT_BLOCK_PTR->index = last_basic_block;
771 #define BB_TO_GCOV_INDEX(bb) ((bb)->index + 1)
773 /* Arcs */
774 if (coverage_begin_output ())
776 gcov_position_t offset;
778 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
780 edge e;
782 offset = gcov_write_tag (GCOV_TAG_ARCS);
783 gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
785 for (e = bb->succ; e; e = e->succ_next)
787 struct edge_info *i = EDGE_INFO (e);
788 if (!i->ignore)
790 unsigned flag_bits = 0;
792 if (i->on_tree)
793 flag_bits |= GCOV_ARC_ON_TREE;
794 if (e->flags & EDGE_FAKE)
795 flag_bits |= GCOV_ARC_FAKE;
796 if (e->flags & EDGE_FALLTHRU)
797 flag_bits |= GCOV_ARC_FALLTHROUGH;
799 gcov_write_unsigned (BB_TO_GCOV_INDEX (e->dest));
800 gcov_write_unsigned (flag_bits);
804 gcov_write_length (offset);
808 /* Line numbers. */
809 if (coverage_begin_output ())
811 char const *prev_file_name = NULL;
812 gcov_position_t offset;
814 FOR_EACH_BB (bb)
816 rtx insn = bb->head;
817 int ignore_next_note = 0;
819 offset = 0;
821 /* We are looking for line number notes. Search backward
822 before basic block to find correct ones. */
823 insn = prev_nonnote_insn (insn);
824 if (!insn)
825 insn = get_insns ();
826 else
827 insn = NEXT_INSN (insn);
829 while (insn != bb->end)
831 if (GET_CODE (insn) == NOTE)
833 /* Must ignore the line number notes that
834 immediately follow the end of an inline function
835 to avoid counting it twice. There is a note
836 before the call, and one after the call. */
837 if (NOTE_LINE_NUMBER (insn)
838 == NOTE_INSN_REPEATED_LINE_NUMBER)
839 ignore_next_note = 1;
840 else if (NOTE_LINE_NUMBER (insn) <= 0)
841 /*NOP*/;
842 else if (ignore_next_note)
843 ignore_next_note = 0;
844 else
846 if (!offset)
848 offset = gcov_write_tag (GCOV_TAG_LINES);
849 gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
852 /* If this is a new source file, then output the
853 file's name to the .bb file. */
854 if (!prev_file_name
855 || strcmp (NOTE_SOURCE_FILE (insn),
856 prev_file_name))
858 prev_file_name = NOTE_SOURCE_FILE (insn);
859 gcov_write_unsigned (0);
860 gcov_write_string (prev_file_name);
862 gcov_write_unsigned (NOTE_LINE_NUMBER (insn));
865 insn = NEXT_INSN (insn);
868 if (offset)
870 /* A file of NULL indicates the end of run. */
871 gcov_write_unsigned (0);
872 gcov_write_string (NULL);
873 gcov_write_length (offset);
877 ENTRY_BLOCK_PTR->index = ENTRY_BLOCK;
878 EXIT_BLOCK_PTR->index = EXIT_BLOCK;
879 #undef BB_TO_GCOV_INDEX
881 if (flag_profile_values)
883 life_analysis (get_insns (), NULL, PROP_DEATH_NOTES);
884 find_values_to_profile (&n_values, &values);
885 allocate_reg_info (max_reg_num (), FALSE, FALSE);
888 if (flag_branch_probabilities)
889 compute_branch_probabilities ();
891 /* For each edge not on the spanning tree, add counting code as rtl. */
892 if (profile_arc_flag
893 && coverage_counter_alloc (GCOV_COUNTER_ARCS, num_instrumented))
895 unsigned n_instrumented = instrument_edges (el);
897 if (n_instrumented != num_instrumented)
898 abort ();
900 if (flag_profile_values)
901 instrument_values (n_values, values);
903 /* Commit changes done by instrumentation. */
904 commit_edge_insertions_watch_calls ();
905 allocate_reg_info (max_reg_num (), FALSE, FALSE);
908 if (flag_profile_values)
909 count_or_remove_death_notes (NULL, 1);
910 remove_fake_edges ();
911 free_aux_for_edges ();
912 /* Re-merge split basic blocks and the mess introduced by
913 insert_insn_on_edge. */
914 cleanup_cfg (profile_arc_flag ? CLEANUP_EXPENSIVE : 0);
915 if (rtl_dump_file)
916 dump_flow_info (rtl_dump_file);
918 free_edge_list (el);
921 /* Union find algorithm implementation for the basic blocks using
922 aux fields. */
924 static basic_block
925 find_group (basic_block bb)
927 basic_block group = bb, bb1;
929 while ((basic_block) group->aux != group)
930 group = (basic_block) group->aux;
932 /* Compress path. */
933 while ((basic_block) bb->aux != group)
935 bb1 = (basic_block) bb->aux;
936 bb->aux = (void *) group;
937 bb = bb1;
939 return group;
942 static void
943 union_groups (basic_block bb1, basic_block bb2)
945 basic_block bb1g = find_group (bb1);
946 basic_block bb2g = find_group (bb2);
948 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
949 this code is unlikely going to be performance problem anyway. */
950 if (bb1g == bb2g)
951 abort ();
953 bb1g->aux = bb2g;
956 /* This function searches all of the edges in the program flow graph, and puts
957 as many bad edges as possible onto the spanning tree. Bad edges include
958 abnormals edges, which can't be instrumented at the moment. Since it is
959 possible for fake edges to form a cycle, we will have to develop some
960 better way in the future. Also put critical edges to the tree, since they
961 are more expensive to instrument. */
963 static void
964 find_spanning_tree (struct edge_list *el)
966 int i;
967 int num_edges = NUM_EDGES (el);
968 basic_block bb;
970 /* We use aux field for standard union-find algorithm. */
971 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
972 bb->aux = bb;
974 /* Add fake edge exit to entry we can't instrument. */
975 union_groups (EXIT_BLOCK_PTR, ENTRY_BLOCK_PTR);
977 /* First add all abnormal edges to the tree unless they form a cycle. Also
978 add all edges to EXIT_BLOCK_PTR to avoid inserting profiling code behind
979 setting return value from function. */
980 for (i = 0; i < num_edges; i++)
982 edge e = INDEX_EDGE (el, i);
983 if (((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_FAKE))
984 || e->dest == EXIT_BLOCK_PTR)
985 && !EDGE_INFO (e)->ignore
986 && (find_group (e->src) != find_group (e->dest)))
988 if (rtl_dump_file)
989 fprintf (rtl_dump_file, "Abnormal edge %d to %d put to tree\n",
990 e->src->index, e->dest->index);
991 EDGE_INFO (e)->on_tree = 1;
992 union_groups (e->src, e->dest);
996 /* Now insert all critical edges to the tree unless they form a cycle. */
997 for (i = 0; i < num_edges; i++)
999 edge e = INDEX_EDGE (el, i);
1000 if (EDGE_CRITICAL_P (e) && !EDGE_INFO (e)->ignore
1001 && find_group (e->src) != find_group (e->dest))
1003 if (rtl_dump_file)
1004 fprintf (rtl_dump_file, "Critical edge %d to %d put to tree\n",
1005 e->src->index, e->dest->index);
1006 EDGE_INFO (e)->on_tree = 1;
1007 union_groups (e->src, e->dest);
1011 /* And now the rest. */
1012 for (i = 0; i < num_edges; i++)
1014 edge e = INDEX_EDGE (el, i);
1015 if (!EDGE_INFO (e)->ignore
1016 && find_group (e->src) != find_group (e->dest))
1018 if (rtl_dump_file)
1019 fprintf (rtl_dump_file, "Normal edge %d to %d put to tree\n",
1020 e->src->index, e->dest->index);
1021 EDGE_INFO (e)->on_tree = 1;
1022 union_groups (e->src, e->dest);
1026 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1027 bb->aux = NULL;
1030 /* Perform file-level initialization for branch-prob processing. */
1032 void
1033 init_branch_prob (void)
1035 int i;
1037 total_num_blocks = 0;
1038 total_num_edges = 0;
1039 total_num_edges_ignored = 0;
1040 total_num_edges_instrumented = 0;
1041 total_num_blocks_created = 0;
1042 total_num_passes = 0;
1043 total_num_times_called = 0;
1044 total_num_branches = 0;
1045 total_num_never_executed = 0;
1046 for (i = 0; i < 20; i++)
1047 total_hist_br_prob[i] = 0;
1050 /* Performs file-level cleanup after branch-prob processing
1051 is completed. */
1053 void
1054 end_branch_prob (void)
1056 if (rtl_dump_file)
1058 fprintf (rtl_dump_file, "\n");
1059 fprintf (rtl_dump_file, "Total number of blocks: %d\n",
1060 total_num_blocks);
1061 fprintf (rtl_dump_file, "Total number of edges: %d\n", total_num_edges);
1062 fprintf (rtl_dump_file, "Total number of ignored edges: %d\n",
1063 total_num_edges_ignored);
1064 fprintf (rtl_dump_file, "Total number of instrumented edges: %d\n",
1065 total_num_edges_instrumented);
1066 fprintf (rtl_dump_file, "Total number of blocks created: %d\n",
1067 total_num_blocks_created);
1068 fprintf (rtl_dump_file, "Total number of graph solution passes: %d\n",
1069 total_num_passes);
1070 if (total_num_times_called != 0)
1071 fprintf (rtl_dump_file, "Average number of graph solution passes: %d\n",
1072 (total_num_passes + (total_num_times_called >> 1))
1073 / total_num_times_called);
1074 fprintf (rtl_dump_file, "Total number of branches: %d\n",
1075 total_num_branches);
1076 fprintf (rtl_dump_file, "Total number of branches never executed: %d\n",
1077 total_num_never_executed);
1078 if (total_num_branches)
1080 int i;
1082 for (i = 0; i < 10; i++)
1083 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
1084 (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1085 / total_num_branches, 5*i, 5*i+5);
1091 /* Output instructions as RTL to increment the edge execution count. */
1093 static rtx
1094 gen_edge_profiler (int edgeno)
1096 rtx ref = coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1097 rtx tmp;
1098 enum machine_mode mode = GET_MODE (ref);
1099 rtx sequence;
1101 start_sequence ();
1102 ref = validize_mem (ref);
1104 tmp = expand_simple_binop (mode, PLUS, ref, const1_rtx,
1105 ref, 0, OPTAB_WIDEN);
1107 if (tmp != ref)
1108 emit_move_insn (copy_rtx (ref), tmp);
1110 sequence = get_insns ();
1111 end_sequence ();
1112 return sequence;
1115 /* Output instructions as RTL to increment the interval histogram counter.
1116 VALUE is the expression whose value is profiled. TAG is the tag of the
1117 section for counters, BASE is offset of the counter position. */
1119 static rtx
1120 gen_interval_profiler (struct histogram_value *value, unsigned tag,
1121 unsigned base)
1123 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1124 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1125 rtx mem_ref, tmp, tmp1, mr, val;
1126 rtx sequence;
1127 rtx more_label = gen_label_rtx ();
1128 rtx less_label = gen_label_rtx ();
1129 rtx end_of_code_label = gen_label_rtx ();
1130 int per_counter = gcov_size / BITS_PER_UNIT;
1132 start_sequence ();
1134 if (value->seq)
1135 emit_insn (value->seq);
1137 mr = gen_reg_rtx (Pmode);
1139 tmp = coverage_counter_ref (tag, base);
1140 tmp = force_reg (Pmode, XEXP (tmp, 0));
1142 val = expand_simple_binop (value->mode, MINUS,
1143 copy_rtx (value->value),
1144 GEN_INT (value->hdata.intvl.int_start),
1145 NULL_RTX, 0, OPTAB_WIDEN);
1147 if (value->hdata.intvl.may_be_more)
1148 do_compare_rtx_and_jump (copy_rtx (val), GEN_INT (value->hdata.intvl.steps),
1149 GE, 0, value->mode, NULL_RTX, NULL_RTX, more_label);
1150 if (value->hdata.intvl.may_be_less)
1151 do_compare_rtx_and_jump (copy_rtx (val), const0_rtx, LT, 0, value->mode,
1152 NULL_RTX, NULL_RTX, less_label);
1154 /* We are in range. */
1155 tmp1 = expand_simple_binop (value->mode, MULT,
1156 copy_rtx (val), GEN_INT (per_counter),
1157 NULL_RTX, 0, OPTAB_WIDEN);
1158 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp), tmp1, mr,
1159 0, OPTAB_WIDEN);
1160 if (tmp1 != mr)
1161 emit_move_insn (copy_rtx (mr), tmp1);
1163 if (value->hdata.intvl.may_be_more
1164 || value->hdata.intvl.may_be_less)
1166 emit_jump_insn (gen_jump (end_of_code_label));
1167 emit_barrier ();
1170 /* Above the interval. */
1171 if (value->hdata.intvl.may_be_more)
1173 emit_label (more_label);
1174 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
1175 GEN_INT (per_counter * value->hdata.intvl.steps),
1176 mr, 0, OPTAB_WIDEN);
1177 if (tmp1 != mr)
1178 emit_move_insn (copy_rtx (mr), tmp1);
1179 if (value->hdata.intvl.may_be_less)
1181 emit_jump_insn (gen_jump (end_of_code_label));
1182 emit_barrier ();
1186 /* Below the interval. */
1187 if (value->hdata.intvl.may_be_less)
1189 emit_label (less_label);
1190 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
1191 GEN_INT (per_counter * (value->hdata.intvl.steps
1192 + (value->hdata.intvl.may_be_more ? 1 : 0))),
1193 mr, 0, OPTAB_WIDEN);
1194 if (tmp1 != mr)
1195 emit_move_insn (copy_rtx (mr), tmp1);
1198 if (value->hdata.intvl.may_be_more
1199 || value->hdata.intvl.may_be_less)
1200 emit_label (end_of_code_label);
1202 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
1204 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
1205 mem_ref, 0, OPTAB_WIDEN);
1207 if (tmp != mem_ref)
1208 emit_move_insn (copy_rtx (mem_ref), tmp);
1210 sequence = get_insns ();
1211 end_sequence ();
1212 rebuild_jump_labels (sequence);
1213 return sequence;
1216 /* Output instructions as RTL to increment the power of two histogram counter.
1217 VALUE is the expression whose value is profiled. TAG is the tag of the
1218 section for counters, BASE is offset of the counter position. */
1220 static rtx
1221 gen_pow2_profiler (struct histogram_value *value, unsigned tag, unsigned base)
1223 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1224 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1225 rtx mem_ref, tmp, mr, uval;
1226 rtx sequence;
1227 rtx end_of_code_label = gen_label_rtx ();
1228 rtx loop_label = gen_label_rtx ();
1229 int per_counter = gcov_size / BITS_PER_UNIT;
1231 start_sequence ();
1233 if (value->seq)
1234 emit_insn (value->seq);
1236 mr = gen_reg_rtx (Pmode);
1237 tmp = coverage_counter_ref (tag, base);
1238 tmp = force_reg (Pmode, XEXP (tmp, 0));
1239 emit_move_insn (mr, tmp);
1241 uval = gen_reg_rtx (value->mode);
1242 emit_move_insn (uval, copy_rtx (value->value));
1244 /* Check for non-power of 2. */
1245 if (value->hdata.pow2.may_be_other)
1247 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, LE, 0, value->mode,
1248 NULL_RTX, NULL_RTX, end_of_code_label);
1249 tmp = expand_simple_binop (value->mode, PLUS, copy_rtx (uval),
1250 constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN);
1251 tmp = expand_simple_binop (value->mode, AND, copy_rtx (uval), tmp,
1252 NULL_RTX, 0, OPTAB_WIDEN);
1253 do_compare_rtx_and_jump (tmp, const0_rtx, NE, 0, value->mode, NULL_RTX,
1254 NULL_RTX, end_of_code_label);
1257 /* Count log_2(value). */
1258 emit_label (loop_label);
1260 tmp = expand_simple_binop (Pmode, PLUS, copy_rtx (mr), GEN_INT (per_counter), mr, 0, OPTAB_WIDEN);
1261 if (tmp != mr)
1262 emit_move_insn (copy_rtx (mr), tmp);
1264 tmp = expand_simple_binop (value->mode, ASHIFTRT, copy_rtx (uval), const1_rtx,
1265 uval, 0, OPTAB_WIDEN);
1266 if (tmp != uval)
1267 emit_move_insn (copy_rtx (uval), tmp);
1269 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, NE, 0, value->mode,
1270 NULL_RTX, NULL_RTX, loop_label);
1272 /* Increase the counter. */
1273 emit_label (end_of_code_label);
1275 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
1277 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
1278 mem_ref, 0, OPTAB_WIDEN);
1280 if (tmp != mem_ref)
1281 emit_move_insn (copy_rtx (mem_ref), tmp);
1283 sequence = get_insns ();
1284 end_sequence ();
1285 rebuild_jump_labels (sequence);
1286 return sequence;
1289 /* Output instructions as RTL for code to find the most common value.
1290 VALUE is the expression whose value is profiled. TAG is the tag of the
1291 section for counters, BASE is offset of the counter position. */
1293 static rtx
1294 gen_one_value_profiler (struct histogram_value *value, unsigned tag,
1295 unsigned base)
1297 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1298 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1299 rtx stored_value_ref, counter_ref, all_ref, stored_value, counter, all;
1300 rtx tmp, uval;
1301 rtx sequence;
1302 rtx same_label = gen_label_rtx ();
1303 rtx zero_label = gen_label_rtx ();
1304 rtx end_of_code_label = gen_label_rtx ();
1306 start_sequence ();
1308 if (value->seq)
1309 emit_insn (value->seq);
1311 stored_value_ref = coverage_counter_ref (tag, base);
1312 counter_ref = coverage_counter_ref (tag, base + 1);
1313 all_ref = coverage_counter_ref (tag, base + 2);
1314 stored_value = validize_mem (stored_value_ref);
1315 counter = validize_mem (counter_ref);
1316 all = validize_mem (all_ref);
1318 uval = gen_reg_rtx (mode);
1319 convert_move (uval, copy_rtx (value->value), 0);
1321 /* Check if the stored value matches. */
1322 do_compare_rtx_and_jump (copy_rtx (uval), copy_rtx (stored_value), EQ,
1323 0, mode, NULL_RTX, NULL_RTX, same_label);
1325 /* Does not match; check whether the counter is zero. */
1326 do_compare_rtx_and_jump (copy_rtx (counter), const0_rtx, EQ, 0, mode,
1327 NULL_RTX, NULL_RTX, zero_label);
1329 /* The counter is not zero yet. */
1330 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), constm1_rtx,
1331 counter, 0, OPTAB_WIDEN);
1333 if (tmp != counter)
1334 emit_move_insn (copy_rtx (counter), tmp);
1336 emit_jump_insn (gen_jump (end_of_code_label));
1337 emit_barrier ();
1339 emit_label (zero_label);
1340 /* Set new value. */
1341 emit_move_insn (copy_rtx (stored_value), copy_rtx (uval));
1343 emit_label (same_label);
1344 /* Increase the counter. */
1345 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), const1_rtx,
1346 counter, 0, OPTAB_WIDEN);
1348 if (tmp != counter)
1349 emit_move_insn (copy_rtx (counter), tmp);
1351 emit_label (end_of_code_label);
1353 /* Increase the counter of all executions; this seems redundant given
1354 that ve have counts for edges in cfg, but it may happen that some
1355 optimization will change the counts for the block (either because
1356 it is unable to update them correctly, or because it will duplicate
1357 the block or its part). */
1358 tmp = expand_simple_binop (mode, PLUS, copy_rtx (all), const1_rtx,
1359 all, 0, OPTAB_WIDEN);
1361 if (tmp != all)
1362 emit_move_insn (copy_rtx (all), tmp);
1363 sequence = get_insns ();
1364 end_sequence ();
1365 rebuild_jump_labels (sequence);
1366 return sequence;
1369 /* Output instructions as RTL for code to find the most common value of
1370 a difference between two evaluations of an expression.
1371 VALUE is the expression whose value is profiled. TAG is the tag of the
1372 section for counters, BASE is offset of the counter position. */
1374 static rtx
1375 gen_const_delta_profiler (struct histogram_value *value, unsigned tag,
1376 unsigned base)
1378 struct histogram_value one_value_delta;
1379 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1380 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1381 rtx stored_value_ref, stored_value, tmp, uval;
1382 rtx sequence;
1384 start_sequence ();
1386 if (value->seq)
1387 emit_insn (value->seq);
1389 stored_value_ref = coverage_counter_ref (tag, base);
1390 stored_value = validize_mem (stored_value_ref);
1392 uval = gen_reg_rtx (mode);
1393 convert_move (uval, copy_rtx (value->value), 0);
1394 tmp = expand_simple_binop (mode, MINUS,
1395 copy_rtx (uval), copy_rtx (stored_value),
1396 NULL_RTX, 0, OPTAB_WIDEN);
1398 one_value_delta.value = tmp;
1399 one_value_delta.mode = mode;
1400 one_value_delta.seq = NULL_RTX;
1401 one_value_delta.insn = value->insn;
1402 one_value_delta.type = HIST_TYPE_SINGLE_VALUE;
1403 emit_insn (gen_one_value_profiler (&one_value_delta, tag, base + 1));
1405 emit_move_insn (copy_rtx (stored_value), uval);
1406 sequence = get_insns ();
1407 end_sequence ();
1408 rebuild_jump_labels (sequence);
1409 return sequence;