1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
56 #include "tree-flow.h"
58 #include "tree-dump.h"
59 #include "tree-pass.h"
62 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
63 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
64 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
65 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
67 /* Random guesstimation given names. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73 static void combine_predictions_for_insn (rtx
, basic_block
);
74 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
75 static void estimate_loops_at_level (struct loop
*loop
);
76 static void propagate_freq (struct loop
*);
77 static void estimate_bb_frequencies (struct loops
*);
78 static int counts_to_freqs (void);
79 static bool last_basic_block_p (basic_block
);
80 static void compute_function_frequency (void);
81 static void choose_function_section (void);
82 static bool can_predict_insn_p (rtx
);
84 /* Information we hold about each branch predictor.
85 Filled using information from predict.def. */
89 const char *const name
; /* Name used in the debugging dumps. */
90 const int hitrate
; /* Expected hitrate used by
91 predict_insn_def call. */
95 /* Use given predictor without Dempster-Shaffer theory if it matches
96 using first_match heuristics. */
97 #define PRED_FLAG_FIRST_MATCH 1
99 /* Recompute hitrate in percent to our representation. */
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
104 static const struct predictor_info predictor_info
[]= {
105 #include "predict.def"
107 /* Upper bound on predictors. */
112 /* Return true in case BB can be CPU intensive and should be optimized
113 for maximal performance. */
116 maybe_hot_bb_p (basic_block bb
)
118 if (profile_info
&& flag_branch_probabilities
120 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
122 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
127 /* Return true in case BB is cold and should be optimized for size. */
130 probably_cold_bb_p (basic_block bb
)
132 if (profile_info
&& flag_branch_probabilities
134 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
136 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
141 /* Return true in case BB is probably never executed. */
143 probably_never_executed_bb_p (basic_block bb
)
145 if (profile_info
&& flag_branch_probabilities
)
146 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
150 /* Return true if the one of outgoing edges is already predicted by
154 rtl_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
157 if (!INSN_P (BB_END (bb
)))
159 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
160 if (REG_NOTE_KIND (note
) == REG_BR_PRED
161 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
166 /* Return true if the one of outgoing edges is already predicted by
170 tree_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
172 struct edge_prediction
*i
= bb_ann (bb
)->predictions
;
173 for (i
= bb_ann (bb
)->predictions
; i
; i
= i
->next
)
174 if (i
->predictor
== predictor
)
180 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
182 if (!any_condjump_p (insn
))
184 if (!flag_guess_branch_prob
)
188 = gen_rtx_EXPR_LIST (REG_BR_PRED
,
189 gen_rtx_CONCAT (VOIDmode
,
190 GEN_INT ((int) predictor
),
191 GEN_INT ((int) probability
)),
195 /* Predict insn by given predictor. */
198 predict_insn_def (rtx insn
, enum br_predictor predictor
,
199 enum prediction taken
)
201 int probability
= predictor_info
[(int) predictor
].hitrate
;
204 probability
= REG_BR_PROB_BASE
- probability
;
206 predict_insn (insn
, predictor
, probability
);
209 /* Predict edge E with given probability if possible. */
212 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
215 last_insn
= BB_END (e
->src
);
217 /* We can store the branch prediction information only about
218 conditional jumps. */
219 if (!any_condjump_p (last_insn
))
222 /* We always store probability of branching. */
223 if (e
->flags
& EDGE_FALLTHRU
)
224 probability
= REG_BR_PROB_BASE
- probability
;
226 predict_insn (last_insn
, predictor
, probability
);
229 /* Predict edge E with the given PROBABILITY. */
231 tree_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
233 struct edge_prediction
*i
= ggc_alloc (sizeof (struct edge_prediction
));
235 i
->next
= bb_ann (e
->src
)->predictions
;
236 bb_ann (e
->src
)->predictions
= i
;
237 i
->probability
= probability
;
238 i
->predictor
= predictor
;
242 /* Return true when we can store prediction on insn INSN.
243 At the moment we represent predictions only on conditional
244 jumps, not at computed jump or other complicated cases. */
246 can_predict_insn_p (rtx insn
)
248 return (JUMP_P (insn
)
249 && any_condjump_p (insn
)
250 && BLOCK_FOR_INSN (insn
)->succ
->succ_next
);
253 /* Predict edge E by given predictor if possible. */
256 predict_edge_def (edge e
, enum br_predictor predictor
,
257 enum prediction taken
)
259 int probability
= predictor_info
[(int) predictor
].hitrate
;
262 probability
= REG_BR_PROB_BASE
- probability
;
264 predict_edge (e
, predictor
, probability
);
267 /* Invert all branch predictions or probability notes in the INSN. This needs
268 to be done each time we invert the condition used by the jump. */
271 invert_br_probabilities (rtx insn
)
275 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
276 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
277 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
278 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
279 XEXP (XEXP (note
, 0), 1)
280 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
283 /* Dump information about the branch prediction to the output file. */
286 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
287 basic_block bb
, int used
)
294 while (e
&& (e
->flags
& EDGE_FALLTHRU
))
297 fprintf (file
, " %s heuristics%s: %.1f%%",
298 predictor_info
[predictor
].name
,
299 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
303 fprintf (file
, " exec ");
304 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
307 fprintf (file
, " hit ");
308 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
309 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
313 fprintf (file
, "\n");
316 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
317 note if not already present. Remove now useless REG_BR_PRED notes. */
320 combine_predictions_for_insn (rtx insn
, basic_block bb
)
322 rtx prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
323 rtx
*pnote
= ®_NOTES (insn
);
325 int best_probability
= PROB_EVEN
;
326 int best_predictor
= END_PREDICTORS
;
327 int combined_probability
= REG_BR_PROB_BASE
/ 2;
329 bool first_match
= false;
333 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
336 /* We implement "first match" heuristics and use probability guessed
337 by predictor with smallest index. */
338 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
339 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
341 int predictor
= INTVAL (XEXP (XEXP (note
, 0), 0));
342 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
345 if (best_predictor
> predictor
)
346 best_probability
= probability
, best_predictor
= predictor
;
348 d
= (combined_probability
* probability
349 + (REG_BR_PROB_BASE
- combined_probability
)
350 * (REG_BR_PROB_BASE
- probability
));
352 /* Use FP math to avoid overflows of 32bit integers. */
354 /* If one probability is 0% and one 100%, avoid division by zero. */
355 combined_probability
= REG_BR_PROB_BASE
/ 2;
357 combined_probability
= (((double) combined_probability
) * probability
358 * REG_BR_PROB_BASE
/ d
+ 0.5);
361 /* Decide which heuristic to use. In case we didn't match anything,
362 use no_prediction heuristic, in case we did match, use either
363 first match or Dempster-Shaffer theory depending on the flags. */
365 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
369 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
370 combined_probability
, bb
, true);
373 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
375 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
380 combined_probability
= best_probability
;
381 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
385 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
387 int predictor
= INTVAL (XEXP (XEXP (*pnote
, 0), 0));
388 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
390 dump_prediction (dump_file
, predictor
, probability
, bb
,
391 !first_match
|| best_predictor
== predictor
);
392 *pnote
= XEXP (*pnote
, 1);
395 pnote
= &XEXP (*pnote
, 1);
401 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
402 GEN_INT (combined_probability
), REG_NOTES (insn
));
404 /* Save the prediction into CFG in case we are seeing non-degenerated
406 if (bb
->succ
->succ_next
)
408 BRANCH_EDGE (bb
)->probability
= combined_probability
;
409 FALLTHRU_EDGE (bb
)->probability
410 = REG_BR_PROB_BASE
- combined_probability
;
415 /* Combine predictions into single probability and store them into CFG.
416 Remove now useless prediction entries. */
419 combine_predictions_for_bb (FILE *file
, basic_block bb
)
421 int best_probability
= PROB_EVEN
;
422 int best_predictor
= END_PREDICTORS
;
423 int combined_probability
= REG_BR_PROB_BASE
/ 2;
425 bool first_match
= false;
427 struct edge_prediction
*pred
;
429 edge e
, first
= NULL
, second
= NULL
;
431 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
432 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
435 if (first
&& !second
)
441 /* When there is no successor or only one choice, prediction is easy.
443 We are lazy for now and predict only basic blocks with two outgoing
444 edges. It is possible to predict generic case too, but we have to
445 ignore first match heuristics and do more involved combining. Implement
449 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
450 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
451 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
454 bb_ann (bb
)->predictions
= NULL
;
456 fprintf (file
, "%i edges in bb %i predicted to even probabilities\n",
462 fprintf (file
, "Predictions for bb %i\n", bb
->index
);
464 /* We implement "first match" heuristics and use probability guessed
465 by predictor with smallest index. */
466 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
468 int predictor
= pred
->predictor
;
469 int probability
= pred
->probability
;
471 if (pred
->edge
!= first
)
472 probability
= REG_BR_PROB_BASE
- probability
;
475 if (best_predictor
> predictor
)
476 best_probability
= probability
, best_predictor
= predictor
;
478 d
= (combined_probability
* probability
479 + (REG_BR_PROB_BASE
- combined_probability
)
480 * (REG_BR_PROB_BASE
- probability
));
482 /* Use FP math to avoid overflows of 32bit integers. */
484 /* If one probability is 0% and one 100%, avoid division by zero. */
485 combined_probability
= REG_BR_PROB_BASE
/ 2;
487 combined_probability
= (((double) combined_probability
) * probability
488 * REG_BR_PROB_BASE
/ d
+ 0.5);
491 /* Decide which heuristic to use. In case we didn't match anything,
492 use no_prediction heuristic, in case we did match, use either
493 first match or Dempster-Shaffer theory depending on the flags. */
495 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
499 dump_prediction (file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
502 dump_prediction (file
, PRED_DS_THEORY
, combined_probability
, bb
,
504 dump_prediction (file
, PRED_FIRST_MATCH
, best_probability
, bb
,
509 combined_probability
= best_probability
;
510 dump_prediction (file
, PRED_COMBINED
, combined_probability
, bb
, true);
512 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
514 int predictor
= pred
->predictor
;
515 int probability
= pred
->probability
;
517 if (pred
->edge
!= bb
->succ
)
518 probability
= REG_BR_PROB_BASE
- probability
;
519 dump_prediction (file
, predictor
, probability
, bb
,
520 !first_match
|| best_predictor
== predictor
);
522 bb_ann (bb
)->predictions
= NULL
;
524 first
->probability
= combined_probability
;
525 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
528 /* Predict edge probabilities by exploiting loop structure.
529 When SIMPLELOOPS is set, attempt to count number of iterations by analyzing
532 predict_loops (struct loops
*loops_info
, bool simpleloops
)
536 /* Try to predict out blocks in a loop that are not part of a
538 for (i
= 1; i
< loops_info
->num
; i
++)
540 basic_block bb
, *bbs
;
543 struct loop
*loop
= loops_info
->parray
[i
];
544 struct niter_desc desc
;
545 unsigned HOST_WIDE_INT niter
;
547 flow_loop_scan (loop
, LOOP_EXIT_EDGES
);
548 exits
= loop
->num_exits
;
552 iv_analysis_loop_init (loop
);
553 find_simple_exit (loop
, &desc
);
555 if (desc
.simple_p
&& desc
.const_iter
)
558 niter
= desc
.niter
+ 1;
559 if (niter
== 0) /* We might overflow here. */
562 prob
= (REG_BR_PROB_BASE
563 - (REG_BR_PROB_BASE
+ niter
/2) / niter
);
564 /* Branch prediction algorithm gives 0 frequency for everything
565 after the end of loop for loop having 0 probability to finish. */
566 if (prob
== REG_BR_PROB_BASE
)
567 prob
= REG_BR_PROB_BASE
- 1;
568 predict_edge (desc
.in_edge
, PRED_LOOP_ITERATIONS
,
573 bbs
= get_loop_body (loop
);
575 for (j
= 0; j
< loop
->num_nodes
; j
++)
577 int header_found
= 0;
582 /* Bypass loop heuristics on continue statement. These
583 statements construct loops via "non-loop" constructs
584 in the source language and are better to be handled
586 if ((simpleloops
&& !can_predict_insn_p (BB_END (bb
)))
587 || predicted_by_p (bb
, PRED_CONTINUE
))
590 /* Loop branch heuristics - predict an edge back to a
591 loop's head as taken. */
592 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
593 if (e
->dest
== loop
->header
594 && e
->src
== loop
->latch
)
597 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
600 /* Loop exit heuristics - predict an edge exiting the loop if the
601 conditional has no loop header successors as not taken. */
603 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
604 if (e
->dest
->index
< 0
605 || !flow_bb_inside_loop_p (loop
, e
->dest
))
609 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
613 /* Free basic blocks from get_loop_body. */
618 /* Statically estimate the probability that a branch will be taken and produce
619 estimated profile. When profile feedback is present never executed portions
620 of function gets estimated. */
623 estimate_probability (struct loops
*loops_info
)
627 connect_infinite_loops_to_exit ();
628 calculate_dominance_info (CDI_DOMINATORS
);
629 calculate_dominance_info (CDI_POST_DOMINATORS
);
631 predict_loops (loops_info
, true);
635 /* Attempt to predict conditional jumps using a number of heuristics. */
638 rtx last_insn
= BB_END (bb
);
642 if (! can_predict_insn_p (last_insn
))
645 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
647 /* Predict early returns to be probable, as we've already taken
648 care for error returns and other are often used for fast paths
650 if ((e
->dest
== EXIT_BLOCK_PTR
651 || (e
->dest
->succ
&& !e
->dest
->succ
->succ_next
652 && e
->dest
->succ
->dest
== EXIT_BLOCK_PTR
))
653 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
654 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
655 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
656 && !last_basic_block_p (e
->dest
))
657 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
659 /* Look for block we are guarding (ie we dominate it,
660 but it doesn't postdominate us). */
661 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
662 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
663 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
667 /* The call heuristic claims that a guarded function call
668 is improbable. This is because such calls are often used
669 to signal exceptional situations such as printing error
671 for (insn
= BB_HEAD (e
->dest
); insn
!= NEXT_INSN (BB_END (e
->dest
));
672 insn
= NEXT_INSN (insn
))
674 /* Constant and pure calls are hardly used to signalize
675 something exceptional. */
676 && ! CONST_OR_PURE_CALL_P (insn
))
678 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
684 cond
= get_condition (last_insn
, NULL
, false, false);
688 /* Try "pointer heuristic."
689 A comparison ptr == 0 is predicted as false.
690 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
691 if (COMPARISON_P (cond
)
692 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
693 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
695 if (GET_CODE (cond
) == EQ
)
696 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
697 else if (GET_CODE (cond
) == NE
)
698 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
702 /* Try "opcode heuristic."
703 EQ tests are usually false and NE tests are usually true. Also,
704 most quantities are positive, so we can make the appropriate guesses
705 about signed comparisons against zero. */
706 switch (GET_CODE (cond
))
709 /* Unconditional branch. */
710 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
711 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
716 /* Floating point comparisons appears to behave in a very
717 unpredictable way because of special role of = tests in
719 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
721 /* Comparisons with 0 are often used for booleans and there is
722 nothing useful to predict about them. */
723 else if (XEXP (cond
, 1) == const0_rtx
724 || XEXP (cond
, 0) == const0_rtx
)
727 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
732 /* Floating point comparisons appears to behave in a very
733 unpredictable way because of special role of = tests in
735 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
737 /* Comparisons with 0 are often used for booleans and there is
738 nothing useful to predict about them. */
739 else if (XEXP (cond
, 1) == const0_rtx
740 || XEXP (cond
, 0) == const0_rtx
)
743 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
747 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
751 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
756 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
757 || XEXP (cond
, 1) == constm1_rtx
)
758 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
763 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
764 || XEXP (cond
, 1) == constm1_rtx
)
765 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
773 /* Attach the combined probability to each conditional jump. */
775 if (JUMP_P (BB_END (bb
))
776 && any_condjump_p (BB_END (bb
))
777 && bb
->succ
->succ_next
!= NULL
)
778 combine_predictions_for_insn (BB_END (bb
), bb
);
780 remove_fake_exit_edges ();
781 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
785 rtx last_insn
= BB_END (bb
);
787 if (!can_predict_insn_p (last_insn
))
789 /* We can predict only conditional jumps at the moment.
790 Expect each edge to be equally probable.
791 ?? In the future we want to make abnormal edges improbable. */
795 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
798 if (e
->probability
!= 0)
802 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
803 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
806 estimate_bb_frequencies (loops_info
);
807 free_dominance_info (CDI_POST_DOMINATORS
);
808 if (profile_status
== PROFILE_ABSENT
)
809 profile_status
= PROFILE_GUESSED
;
813 /* Predict using opcode of the last statement in basic block. */
815 tree_predict_by_opcode (basic_block bb
)
817 tree stmt
= last_stmt (bb
);
823 if (!stmt
|| TREE_CODE (stmt
) != COND_EXPR
)
825 for (then_edge
= bb
->succ
; then_edge
; then_edge
= then_edge
->succ_next
)
826 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
828 cond
= TREE_OPERAND (stmt
, 0);
829 if (TREE_CODE_CLASS (TREE_CODE (cond
)) != '<')
831 op0
= TREE_OPERAND (cond
, 0);
832 type
= TREE_TYPE (op0
);
833 /* Try "pointer heuristic."
834 A comparison ptr == 0 is predicted as false.
835 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
836 if (POINTER_TYPE_P (type
))
838 if (TREE_CODE (cond
) == EQ_EXPR
)
839 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
840 else if (TREE_CODE (cond
) == NE_EXPR
)
841 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
845 /* Try "opcode heuristic."
846 EQ tests are usually false and NE tests are usually true. Also,
847 most quantities are positive, so we can make the appropriate guesses
848 about signed comparisons against zero. */
849 switch (TREE_CODE (cond
))
853 /* Floating point comparisons appears to behave in a very
854 unpredictable way because of special role of = tests in
856 if (FLOAT_TYPE_P (type
))
858 /* Comparisons with 0 are often used for booleans and there is
859 nothing useful to predict about them. */
860 else if (integer_zerop (op0
)
861 || integer_zerop (TREE_OPERAND (cond
, 1)))
864 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
869 /* Floating point comparisons appears to behave in a very
870 unpredictable way because of special role of = tests in
872 if (FLOAT_TYPE_P (type
))
874 /* Comparisons with 0 are often used for booleans and there is
875 nothing useful to predict about them. */
876 else if (integer_zerop (op0
)
877 || integer_zerop (TREE_OPERAND (cond
, 1)))
880 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
884 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
888 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
893 if (integer_zerop (TREE_OPERAND (cond
, 1))
894 || integer_onep (TREE_OPERAND (cond
, 1))
895 || integer_all_onesp (TREE_OPERAND (cond
, 1))
896 || real_zerop (TREE_OPERAND (cond
, 1))
897 || real_onep (TREE_OPERAND (cond
, 1))
898 || real_minus_onep (TREE_OPERAND (cond
, 1)))
899 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
904 if (integer_zerop (TREE_OPERAND (cond
, 1))
905 || integer_onep (TREE_OPERAND (cond
, 1))
906 || integer_all_onesp (TREE_OPERAND (cond
, 1))
907 || real_zerop (TREE_OPERAND (cond
, 1))
908 || real_onep (TREE_OPERAND (cond
, 1))
909 || real_minus_onep (TREE_OPERAND (cond
, 1)))
910 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
918 /* Predict branch probabilities and estimate profile of the tree CFG. */
920 tree_estimate_probability (void)
923 struct loops loops_info
;
925 flow_loops_find (&loops_info
, LOOP_TREE
);
926 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
927 flow_loops_dump (&loops_info
, dump_file
, NULL
, 0);
929 connect_infinite_loops_to_exit ();
930 calculate_dominance_info (CDI_DOMINATORS
);
931 calculate_dominance_info (CDI_POST_DOMINATORS
);
933 predict_loops (&loops_info
, false);
939 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
941 /* Predict early returns to be probable, as we've already taken
942 care for error returns and other are often used for fast paths
944 if ((e
->dest
== EXIT_BLOCK_PTR
945 || (e
->dest
->succ
&& !e
->dest
->succ
->succ_next
946 && e
->dest
->succ
->dest
== EXIT_BLOCK_PTR
))
947 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
948 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
949 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
950 && !last_basic_block_p (e
->dest
))
951 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
953 /* Look for block we are guarding (ie we dominate it,
954 but it doesn't postdominate us). */
955 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
956 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
957 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
959 block_stmt_iterator bi
;
961 /* The call heuristic claims that a guarded function call
962 is improbable. This is because such calls are often used
963 to signal exceptional situations such as printing error
965 for (bi
= bsi_start (e
->dest
); !bsi_end_p (bi
);
968 tree stmt
= bsi_stmt (bi
);
969 if ((TREE_CODE (stmt
) == CALL_EXPR
970 || (TREE_CODE (stmt
) == MODIFY_EXPR
971 && TREE_CODE (TREE_OPERAND (stmt
, 1)) == CALL_EXPR
))
972 /* Constant and pure calls are hardly used to signalize
973 something exceptional. */
974 && TREE_SIDE_EFFECTS (stmt
))
976 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
982 tree_predict_by_opcode (bb
);
985 combine_predictions_for_bb (dump_file
, bb
);
987 estimate_bb_frequencies (&loops_info
);
988 free_dominance_info (CDI_POST_DOMINATORS
);
989 remove_fake_exit_edges ();
990 flow_loops_free (&loops_info
);
991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
992 dump_tree_cfg (dump_file
, dump_flags
);
993 if (profile_status
== PROFILE_ABSENT
)
994 profile_status
= PROFILE_GUESSED
;
997 /* __builtin_expect dropped tokens into the insn stream describing expected
998 values of registers. Generate branch probabilities based off these
1002 expected_value_to_br_prob (void)
1004 rtx insn
, cond
, ev
= NULL_RTX
, ev_reg
= NULL_RTX
;
1006 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1008 switch (GET_CODE (insn
))
1011 /* Look for expected value notes. */
1012 if (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EXPECTED_VALUE
)
1014 ev
= NOTE_EXPECTED_VALUE (insn
);
1015 ev_reg
= XEXP (ev
, 0);
1021 /* Never propagate across labels. */
1026 /* Look for simple conditional branches. If we haven't got an
1027 expected value yet, no point going further. */
1028 if (!JUMP_P (insn
) || ev
== NULL_RTX
1029 || ! any_condjump_p (insn
))
1034 /* Look for insns that clobber the EV register. */
1035 if (ev
&& reg_set_p (ev_reg
, insn
))
1040 /* Collect the branch condition, hopefully relative to EV_REG. */
1041 /* ??? At present we'll miss things like
1042 (expected_value (eq r70 0))
1044 (set r80 (lt r70 r71))
1045 (set pc (if_then_else (ne r80 0) ...))
1046 as canonicalize_condition will render this to us as
1048 Could use cselib to try and reduce this further. */
1049 cond
= XEXP (SET_SRC (pc_set (insn
)), 0);
1050 cond
= canonicalize_condition (insn
, cond
, 0, NULL
, ev_reg
,
1052 if (! cond
|| XEXP (cond
, 0) != ev_reg
1053 || GET_CODE (XEXP (cond
, 1)) != CONST_INT
)
1056 /* Substitute and simplify. Given that the expression we're
1057 building involves two constants, we should wind up with either
1059 cond
= gen_rtx_fmt_ee (GET_CODE (cond
), VOIDmode
,
1060 XEXP (ev
, 1), XEXP (cond
, 1));
1061 cond
= simplify_rtx (cond
);
1063 /* Turn the condition into a scaled branch probability. */
1064 if (cond
!= const_true_rtx
&& cond
!= const0_rtx
)
1066 predict_insn_def (insn
, PRED_BUILTIN_EXPECT
,
1067 cond
== const_true_rtx
? TAKEN
: NOT_TAKEN
);
1071 /* Check whether this is the last basic block of function. Commonly
1072 there is one extra common cleanup block. */
1074 last_basic_block_p (basic_block bb
)
1076 if (bb
== EXIT_BLOCK_PTR
)
1079 return (bb
->next_bb
== EXIT_BLOCK_PTR
1080 || (bb
->next_bb
->next_bb
== EXIT_BLOCK_PTR
1081 && bb
->succ
&& !bb
->succ
->succ_next
1082 && bb
->succ
->dest
->next_bb
== EXIT_BLOCK_PTR
));
1085 /* This is used to carry information about basic blocks. It is
1086 attached to the AUX field of the standard CFG block. */
1088 typedef struct block_info_def
1090 /* Estimated frequency of execution of basic_block. */
1093 /* To keep queue of basic blocks to process. */
1096 /* True if block needs to be visited in propagate_freq. */
1097 unsigned int tovisit
:1;
1099 /* Number of predecessors we need to visit first. */
1103 /* Similar information for edges. */
1104 typedef struct edge_info_def
1106 /* In case edge is an loopback edge, the probability edge will be reached
1107 in case header is. Estimated number of iterations of the loop can be
1108 then computed as 1 / (1 - back_edge_prob). */
1109 sreal back_edge_prob
;
1110 /* True if the edge is an loopback edge in the natural loop. */
1111 unsigned int back_edge
:1;
1114 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1115 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1117 /* Helper function for estimate_bb_frequencies.
1118 Propagate the frequencies for LOOP. */
1121 propagate_freq (struct loop
*loop
)
1123 basic_block head
= loop
->header
;
1129 /* For each basic block we need to visit count number of his predecessors
1130 we need to visit first. */
1131 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1133 if (BLOCK_INFO (bb
)->tovisit
)
1137 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
1138 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1140 else if (BLOCK_INFO (e
->src
)->tovisit
1141 && dump_file
&& !EDGE_INFO (e
)->back_edge
)
1143 "Irreducible region hit, ignoring edge to %i->%i\n",
1144 e
->src
->index
, bb
->index
);
1145 BLOCK_INFO (bb
)->npredecessors
= count
;
1149 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
1151 for (bb
= head
; bb
; bb
= nextbb
)
1153 sreal cyclic_probability
, frequency
;
1155 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
1156 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
1158 nextbb
= BLOCK_INFO (bb
)->next
;
1159 BLOCK_INFO (bb
)->next
= NULL
;
1161 /* Compute frequency of basic block. */
1164 #ifdef ENABLE_CHECKING
1165 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
1166 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1170 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
1171 if (EDGE_INFO (e
)->back_edge
)
1173 sreal_add (&cyclic_probability
, &cyclic_probability
,
1174 &EDGE_INFO (e
)->back_edge_prob
);
1176 else if (!(e
->flags
& EDGE_DFS_BACK
))
1180 /* frequency += (e->probability
1181 * BLOCK_INFO (e->src)->frequency /
1182 REG_BR_PROB_BASE); */
1184 sreal_init (&tmp
, e
->probability
, 0);
1185 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
1186 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
1187 sreal_add (&frequency
, &frequency
, &tmp
);
1190 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
1192 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
1193 sizeof (frequency
));
1197 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
1199 memcpy (&cyclic_probability
, &real_almost_one
,
1200 sizeof (real_almost_one
));
1203 /* BLOCK_INFO (bb)->frequency = frequency
1204 / (1 - cyclic_probability) */
1206 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
1207 sreal_div (&BLOCK_INFO (bb
)->frequency
,
1208 &frequency
, &cyclic_probability
);
1212 BLOCK_INFO (bb
)->tovisit
= 0;
1214 /* Compute back edge frequencies. */
1215 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1216 if (e
->dest
== head
)
1220 /* EDGE_INFO (e)->back_edge_prob
1221 = ((e->probability * BLOCK_INFO (bb)->frequency)
1222 / REG_BR_PROB_BASE); */
1224 sreal_init (&tmp
, e
->probability
, 0);
1225 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1226 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1227 &tmp
, &real_inv_br_prob_base
);
1230 /* Propagate to successor blocks. */
1231 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1232 if (!(e
->flags
& EDGE_DFS_BACK
)
1233 && BLOCK_INFO (e
->dest
)->npredecessors
)
1235 BLOCK_INFO (e
->dest
)->npredecessors
--;
1236 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1241 BLOCK_INFO (last
)->next
= e
->dest
;
1249 /* Estimate probabilities of loopback edges in loops at same nest level. */
1252 estimate_loops_at_level (struct loop
*first_loop
)
1256 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1262 estimate_loops_at_level (loop
->inner
);
1264 if (loop
->latch
->succ
) /* Do not do this for dummy function loop. */
1266 /* Find current loop back edge and mark it. */
1267 e
= loop_latch_edge (loop
);
1268 EDGE_INFO (e
)->back_edge
= 1;
1271 bbs
= get_loop_body (loop
);
1272 for (i
= 0; i
< loop
->num_nodes
; i
++)
1273 BLOCK_INFO (bbs
[i
])->tovisit
= 1;
1275 propagate_freq (loop
);
1279 /* Convert counts measured by profile driven feedback to frequencies.
1280 Return nonzero iff there was any nonzero execution count. */
1283 counts_to_freqs (void)
1285 gcov_type count_max
, true_count_max
= 0;
1289 true_count_max
= MAX (bb
->count
, true_count_max
);
1291 count_max
= MAX (true_count_max
, 1);
1292 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1293 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
1294 return true_count_max
;
1297 /* Return true if function is likely to be expensive, so there is no point to
1298 optimize performance of prologue, epilogue or do inlining at the expense
1299 of code size growth. THRESHOLD is the limit of number of instructions
1300 function can execute at average to be still considered not expensive. */
1303 expensive_function_p (int threshold
)
1305 unsigned int sum
= 0;
1309 /* We can not compute accurately for large thresholds due to scaled
1311 if (threshold
> BB_FREQ_MAX
)
1314 /* Frequencies are out of range. This either means that function contains
1315 internal loop executing more than BB_FREQ_MAX times or profile feedback
1316 is available and function has not been executed at all. */
1317 if (ENTRY_BLOCK_PTR
->frequency
== 0)
1320 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1321 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
1326 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
1327 insn
= NEXT_INSN (insn
))
1328 if (active_insn_p (insn
))
1330 sum
+= bb
->frequency
;
1339 /* Estimate basic blocks frequency by given branch probabilities. */
1342 estimate_bb_frequencies (struct loops
*loops
)
1347 if (!flag_branch_probabilities
|| !counts_to_freqs ())
1349 static int real_values_initialized
= 0;
1351 if (!real_values_initialized
)
1353 real_values_initialized
= 1;
1354 sreal_init (&real_zero
, 0, 0);
1355 sreal_init (&real_one
, 1, 0);
1356 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
1357 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
1358 sreal_init (&real_one_half
, 1, -1);
1359 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
1360 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
1363 mark_dfs_back_edges ();
1365 ENTRY_BLOCK_PTR
->succ
->probability
= REG_BR_PROB_BASE
;
1367 /* Set up block info for each basic block. */
1368 alloc_aux_for_blocks (sizeof (struct block_info_def
));
1369 alloc_aux_for_edges (sizeof (struct edge_info_def
));
1370 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1374 BLOCK_INFO (bb
)->tovisit
= 0;
1375 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1377 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
1378 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1379 &EDGE_INFO (e
)->back_edge_prob
,
1380 &real_inv_br_prob_base
);
1384 /* First compute probabilities locally for each loop from innermost
1385 to outermost to examine probabilities for back edges. */
1386 estimate_loops_at_level (loops
->tree_root
);
1388 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
1390 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
1391 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
1393 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
1394 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1398 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
1399 sreal_add (&tmp
, &tmp
, &real_one_half
);
1400 bb
->frequency
= sreal_to_int (&tmp
);
1403 free_aux_for_blocks ();
1404 free_aux_for_edges ();
1406 compute_function_frequency ();
1407 if (flag_reorder_functions
)
1408 choose_function_section ();
1411 /* Decide whether function is hot, cold or unlikely executed. */
1413 compute_function_frequency (void)
1417 if (!profile_info
|| !flag_branch_probabilities
)
1419 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
1422 if (maybe_hot_bb_p (bb
))
1424 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
1427 if (!probably_never_executed_bb_p (bb
))
1428 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
1432 /* Choose appropriate section for the function. */
1434 choose_function_section (void)
1436 if (DECL_SECTION_NAME (current_function_decl
)
1437 || !targetm
.have_named_sections
1438 /* Theoretically we can split the gnu.linkonce text section too,
1439 but this requires more work as the frequency needs to match
1440 for all generated objects so we need to merge the frequency
1441 of all instances. For now just never set frequency for these. */
1442 || DECL_ONE_ONLY (current_function_decl
))
1444 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
1445 DECL_SECTION_NAME (current_function_decl
) =
1446 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
1447 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
1448 DECL_SECTION_NAME (current_function_decl
) =
1449 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
1450 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);
1454 struct tree_opt_pass pass_profile
=
1456 "profile", /* name */
1458 tree_estimate_probability
, /* execute */
1461 0, /* static_pass_number */
1462 TV_BRANCH_PROB
, /* tv_id */
1463 PROP_cfg
, /* properties_required */
1464 0, /* properties_provided */
1465 0, /* properties_destroyed */
1466 0, /* todo_flags_start */
1467 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */