1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
56 #include "tree-flow.h"
58 #include "tree-dump.h"
59 #include "tree-pass.h"
62 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
63 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
64 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
65 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
67 /* Random guesstimation given names. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73 static void combine_predictions_for_insn (rtx
, basic_block
);
74 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
75 static void estimate_loops_at_level (struct loop
*loop
);
76 static void propagate_freq (struct loop
*);
77 static void estimate_bb_frequencies (struct loops
*);
78 static int counts_to_freqs (void);
79 static bool last_basic_block_p (basic_block
);
80 static void compute_function_frequency (void);
81 static void choose_function_section (void);
82 static bool can_predict_insn_p (rtx
);
84 /* Information we hold about each branch predictor.
85 Filled using information from predict.def. */
89 const char *const name
; /* Name used in the debugging dumps. */
90 const int hitrate
; /* Expected hitrate used by
91 predict_insn_def call. */
95 /* Use given predictor without Dempster-Shaffer theory if it matches
96 using first_match heuristics. */
97 #define PRED_FLAG_FIRST_MATCH 1
99 /* Recompute hitrate in percent to our representation. */
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
104 static const struct predictor_info predictor_info
[]= {
105 #include "predict.def"
107 /* Upper bound on predictors. */
112 /* Return true in case BB can be CPU intensive and should be optimized
113 for maximal performance. */
116 maybe_hot_bb_p (basic_block bb
)
118 if (profile_info
&& flag_branch_probabilities
120 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
122 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
127 /* Return true in case BB is cold and should be optimized for size. */
130 probably_cold_bb_p (basic_block bb
)
132 if (profile_info
&& flag_branch_probabilities
134 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
136 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
141 /* Return true in case BB is probably never executed. */
143 probably_never_executed_bb_p (basic_block bb
)
145 if (profile_info
&& flag_branch_probabilities
)
146 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
150 /* Return true if the one of outgoing edges is already predicted by
154 rtl_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
157 if (!INSN_P (BB_END (bb
)))
159 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
160 if (REG_NOTE_KIND (note
) == REG_BR_PRED
161 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
166 /* Return true if the one of outgoing edges is already predicted by
170 tree_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
172 struct edge_prediction
*i
= bb_ann (bb
)->predictions
;
173 for (i
= bb_ann (bb
)->predictions
; i
; i
= i
->next
)
174 if (i
->predictor
== predictor
)
180 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
182 if (!any_condjump_p (insn
))
184 if (!flag_guess_branch_prob
)
188 = gen_rtx_EXPR_LIST (REG_BR_PRED
,
189 gen_rtx_CONCAT (VOIDmode
,
190 GEN_INT ((int) predictor
),
191 GEN_INT ((int) probability
)),
195 /* Predict insn by given predictor. */
198 predict_insn_def (rtx insn
, enum br_predictor predictor
,
199 enum prediction taken
)
201 int probability
= predictor_info
[(int) predictor
].hitrate
;
204 probability
= REG_BR_PROB_BASE
- probability
;
206 predict_insn (insn
, predictor
, probability
);
209 /* Predict edge E with given probability if possible. */
212 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
215 last_insn
= BB_END (e
->src
);
217 /* We can store the branch prediction information only about
218 conditional jumps. */
219 if (!any_condjump_p (last_insn
))
222 /* We always store probability of branching. */
223 if (e
->flags
& EDGE_FALLTHRU
)
224 probability
= REG_BR_PROB_BASE
- probability
;
226 predict_insn (last_insn
, predictor
, probability
);
229 /* Predict edge E with the given PROBABILITY. */
231 tree_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
233 struct edge_prediction
*i
= ggc_alloc (sizeof (struct edge_prediction
));
235 i
->next
= bb_ann (e
->src
)->predictions
;
236 bb_ann (e
->src
)->predictions
= i
;
237 i
->probability
= probability
;
238 i
->predictor
= predictor
;
242 /* Return true when we can store prediction on insn INSN.
243 At the moment we represent predictions only on conditional
244 jumps, not at computed jump or other complicated cases. */
246 can_predict_insn_p (rtx insn
)
248 return (JUMP_P (insn
)
249 && any_condjump_p (insn
)
250 && BLOCK_FOR_INSN (insn
)->succ
->succ_next
);
253 /* Predict edge E by given predictor if possible. */
256 predict_edge_def (edge e
, enum br_predictor predictor
,
257 enum prediction taken
)
259 int probability
= predictor_info
[(int) predictor
].hitrate
;
262 probability
= REG_BR_PROB_BASE
- probability
;
264 predict_edge (e
, predictor
, probability
);
267 /* Invert all branch predictions or probability notes in the INSN. This needs
268 to be done each time we invert the condition used by the jump. */
271 invert_br_probabilities (rtx insn
)
275 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
276 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
277 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
278 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
279 XEXP (XEXP (note
, 0), 1)
280 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
283 /* Dump information about the branch prediction to the output file. */
286 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
287 basic_block bb
, int used
)
294 while (e
&& (e
->flags
& EDGE_FALLTHRU
))
297 fprintf (file
, " %s heuristics%s: %.1f%%",
298 predictor_info
[predictor
].name
,
299 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
303 fprintf (file
, " exec ");
304 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
307 fprintf (file
, " hit ");
308 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
309 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
313 fprintf (file
, "\n");
316 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
317 note if not already present. Remove now useless REG_BR_PRED notes. */
320 combine_predictions_for_insn (rtx insn
, basic_block bb
)
322 rtx prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
323 rtx
*pnote
= ®_NOTES (insn
);
325 int best_probability
= PROB_EVEN
;
326 int best_predictor
= END_PREDICTORS
;
327 int combined_probability
= REG_BR_PROB_BASE
/ 2;
329 bool first_match
= false;
333 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
336 /* We implement "first match" heuristics and use probability guessed
337 by predictor with smallest index. */
338 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
339 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
341 int predictor
= INTVAL (XEXP (XEXP (note
, 0), 0));
342 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
345 if (best_predictor
> predictor
)
346 best_probability
= probability
, best_predictor
= predictor
;
348 d
= (combined_probability
* probability
349 + (REG_BR_PROB_BASE
- combined_probability
)
350 * (REG_BR_PROB_BASE
- probability
));
352 /* Use FP math to avoid overflows of 32bit integers. */
354 /* If one probability is 0% and one 100%, avoid division by zero. */
355 combined_probability
= REG_BR_PROB_BASE
/ 2;
357 combined_probability
= (((double) combined_probability
) * probability
358 * REG_BR_PROB_BASE
/ d
+ 0.5);
361 /* Decide which heuristic to use. In case we didn't match anything,
362 use no_prediction heuristic, in case we did match, use either
363 first match or Dempster-Shaffer theory depending on the flags. */
365 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
369 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
370 combined_probability
, bb
, true);
373 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
375 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
380 combined_probability
= best_probability
;
381 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
385 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
387 int predictor
= INTVAL (XEXP (XEXP (*pnote
, 0), 0));
388 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
390 dump_prediction (dump_file
, predictor
, probability
, bb
,
391 !first_match
|| best_predictor
== predictor
);
392 *pnote
= XEXP (*pnote
, 1);
395 pnote
= &XEXP (*pnote
, 1);
401 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
402 GEN_INT (combined_probability
), REG_NOTES (insn
));
404 /* Save the prediction into CFG in case we are seeing non-degenerated
406 if (bb
->succ
->succ_next
)
408 BRANCH_EDGE (bb
)->probability
= combined_probability
;
409 FALLTHRU_EDGE (bb
)->probability
410 = REG_BR_PROB_BASE
- combined_probability
;
415 /* Combine predictions into single probability and store them into CFG.
416 Remove now useless prediction entries. */
419 combine_predictions_for_bb (FILE *file
, basic_block bb
)
421 int best_probability
= PROB_EVEN
;
422 int best_predictor
= END_PREDICTORS
;
423 int combined_probability
= REG_BR_PROB_BASE
/ 2;
425 bool first_match
= false;
427 struct edge_prediction
*pred
;
429 edge e
, first
= NULL
, second
= NULL
;
431 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
432 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
435 if (first
&& !second
)
441 /* When there is no successor or only one choice, prediction is easy.
443 We are lazy for now and predict only basic blocks with two outgoing
444 edges. It is possible to predict generic case too, but we have to
445 ignore first match heuristics and do more involved combining. Implement
449 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
450 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
451 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
454 bb_ann (bb
)->predictions
= NULL
;
456 fprintf (file
, "%i edges in bb %i predicted to even probabilities\n",
462 fprintf (file
, "Predictions for bb %i\n", bb
->index
);
464 /* We implement "first match" heuristics and use probability guessed
465 by predictor with smallest index. */
466 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
468 int predictor
= pred
->predictor
;
469 int probability
= pred
->probability
;
471 if (pred
->edge
!= first
)
472 probability
= REG_BR_PROB_BASE
- probability
;
475 if (best_predictor
> predictor
)
476 best_probability
= probability
, best_predictor
= predictor
;
478 d
= (combined_probability
* probability
479 + (REG_BR_PROB_BASE
- combined_probability
)
480 * (REG_BR_PROB_BASE
- probability
));
482 /* Use FP math to avoid overflows of 32bit integers. */
484 /* If one probability is 0% and one 100%, avoid division by zero. */
485 combined_probability
= REG_BR_PROB_BASE
/ 2;
487 combined_probability
= (((double) combined_probability
) * probability
488 * REG_BR_PROB_BASE
/ d
+ 0.5);
491 /* Decide which heuristic to use. In case we didn't match anything,
492 use no_prediction heuristic, in case we did match, use either
493 first match or Dempster-Shaffer theory depending on the flags. */
495 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
499 dump_prediction (file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
502 dump_prediction (file
, PRED_DS_THEORY
, combined_probability
, bb
,
504 dump_prediction (file
, PRED_FIRST_MATCH
, best_probability
, bb
,
509 combined_probability
= best_probability
;
510 dump_prediction (file
, PRED_COMBINED
, combined_probability
, bb
, true);
512 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
514 int predictor
= pred
->predictor
;
515 int probability
= pred
->probability
;
517 if (pred
->edge
!= bb
->succ
)
518 probability
= REG_BR_PROB_BASE
- probability
;
519 dump_prediction (file
, predictor
, probability
, bb
,
520 !first_match
|| best_predictor
== predictor
);
522 bb_ann (bb
)->predictions
= NULL
;
524 first
->probability
= combined_probability
;
525 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
528 /* Predict edge probabilities by exploiting loop structure.
529 When SIMPLELOOPS is set, attempt to count number of iterations by analyzing
532 predict_loops (struct loops
*loops_info
, bool simpleloops
)
536 /* Try to predict out blocks in a loop that are not part of a
538 for (i
= 1; i
< loops_info
->num
; i
++)
540 basic_block bb
, *bbs
;
543 struct loop
*loop
= loops_info
->parray
[i
];
544 struct niter_desc desc
;
545 unsigned HOST_WIDE_INT niter
;
547 flow_loop_scan (loop
, LOOP_EXIT_EDGES
);
548 exits
= loop
->num_exits
;
552 iv_analysis_loop_init (loop
);
553 find_simple_exit (loop
, &desc
);
555 if (desc
.simple_p
&& desc
.const_iter
)
558 niter
= desc
.niter
+ 1;
559 if (niter
== 0) /* We might overflow here. */
562 prob
= (REG_BR_PROB_BASE
563 - (REG_BR_PROB_BASE
+ niter
/2) / niter
);
564 /* Branch prediction algorithm gives 0 frequency for everything
565 after the end of loop for loop having 0 probability to finish. */
566 if (prob
== REG_BR_PROB_BASE
)
567 prob
= REG_BR_PROB_BASE
- 1;
568 predict_edge (desc
.in_edge
, PRED_LOOP_ITERATIONS
,
573 bbs
= get_loop_body (loop
);
575 for (j
= 0; j
< loop
->num_nodes
; j
++)
577 int header_found
= 0;
582 /* Bypass loop heuristics on continue statement. These
583 statements construct loops via "non-loop" constructs
584 in the source language and are better to be handled
586 if ((simpleloops
&& !can_predict_insn_p (BB_END (bb
)))
587 || predicted_by_p (bb
, PRED_CONTINUE
))
590 /* Loop branch heuristics - predict an edge back to a
591 loop's head as taken. */
592 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
593 if (e
->dest
== loop
->header
594 && e
->src
== loop
->latch
)
597 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
600 /* Loop exit heuristics - predict an edge exiting the loop if the
601 conditional has no loop header successors as not taken. */
603 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
604 if (e
->dest
->index
< 0
605 || !flow_bb_inside_loop_p (loop
, e
->dest
))
609 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
613 /* Free basic blocks from get_loop_body. */
618 /* Statically estimate the probability that a branch will be taken and produce
619 estimated profile. When profile feedback is present never executed portions
620 of function gets estimated. */
623 estimate_probability (struct loops
*loops_info
)
627 connect_infinite_loops_to_exit ();
628 calculate_dominance_info (CDI_DOMINATORS
);
629 calculate_dominance_info (CDI_POST_DOMINATORS
);
631 predict_loops (loops_info
, true);
635 /* Attempt to predict conditional jumps using a number of heuristics. */
638 rtx last_insn
= BB_END (bb
);
642 if (! can_predict_insn_p (last_insn
))
645 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
647 /* Predict early returns to be probable, as we've already taken
648 care for error returns and other are often used for fast paths
650 if ((e
->dest
== EXIT_BLOCK_PTR
651 || (e
->dest
->succ
&& !e
->dest
->succ
->succ_next
652 && e
->dest
->succ
->dest
== EXIT_BLOCK_PTR
))
653 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
654 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
655 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
656 && !last_basic_block_p (e
->dest
))
657 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
659 /* Look for block we are guarding (ie we dominate it,
660 but it doesn't postdominate us). */
661 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
662 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
663 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
667 /* The call heuristic claims that a guarded function call
668 is improbable. This is because such calls are often used
669 to signal exceptional situations such as printing error
671 for (insn
= BB_HEAD (e
->dest
); insn
!= NEXT_INSN (BB_END (e
->dest
));
672 insn
= NEXT_INSN (insn
))
674 /* Constant and pure calls are hardly used to signalize
675 something exceptional. */
676 && ! CONST_OR_PURE_CALL_P (insn
))
678 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
684 cond
= get_condition (last_insn
, NULL
, false, false);
688 /* Try "pointer heuristic."
689 A comparison ptr == 0 is predicted as false.
690 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
691 if (COMPARISON_P (cond
)
692 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
693 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
695 if (GET_CODE (cond
) == EQ
)
696 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
697 else if (GET_CODE (cond
) == NE
)
698 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
702 /* Try "opcode heuristic."
703 EQ tests are usually false and NE tests are usually true. Also,
704 most quantities are positive, so we can make the appropriate guesses
705 about signed comparisons against zero. */
706 switch (GET_CODE (cond
))
709 /* Unconditional branch. */
710 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
711 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
716 /* Floating point comparisons appears to behave in a very
717 unpredictable way because of special role of = tests in
719 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
721 /* Comparisons with 0 are often used for booleans and there is
722 nothing useful to predict about them. */
723 else if (XEXP (cond
, 1) == const0_rtx
724 || XEXP (cond
, 0) == const0_rtx
)
727 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
732 /* Floating point comparisons appears to behave in a very
733 unpredictable way because of special role of = tests in
735 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
737 /* Comparisons with 0 are often used for booleans and there is
738 nothing useful to predict about them. */
739 else if (XEXP (cond
, 1) == const0_rtx
740 || XEXP (cond
, 0) == const0_rtx
)
743 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
747 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
751 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
756 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
757 || XEXP (cond
, 1) == constm1_rtx
)
758 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
763 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
764 || XEXP (cond
, 1) == constm1_rtx
)
765 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
773 /* Attach the combined probability to each conditional jump. */
775 if (JUMP_P (BB_END (bb
))
776 && any_condjump_p (BB_END (bb
))
777 && bb
->succ
->succ_next
!= NULL
)
778 combine_predictions_for_insn (BB_END (bb
), bb
);
780 remove_fake_exit_edges ();
781 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
785 rtx last_insn
= BB_END (bb
);
787 if (!can_predict_insn_p (last_insn
))
789 /* We can predict only conditional jumps at the moment.
790 Expect each edge to be equally probable.
791 ?? In the future we want to make abnormal edges improbable. */
795 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
798 if (e
->probability
!= 0)
802 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
803 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
806 estimate_bb_frequencies (loops_info
);
807 free_dominance_info (CDI_POST_DOMINATORS
);
811 /* Predict using opcode of the last statement in basic block. */
813 tree_predict_by_opcode (basic_block bb
)
815 tree stmt
= last_stmt (bb
);
821 if (!stmt
|| TREE_CODE (stmt
) != COND_EXPR
)
823 for (then_edge
= bb
->succ
; then_edge
; then_edge
= then_edge
->succ_next
)
824 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
826 cond
= TREE_OPERAND (stmt
, 0);
827 if (TREE_CODE_CLASS (TREE_CODE (cond
)) != '<')
829 op0
= TREE_OPERAND (cond
, 0);
830 type
= TREE_TYPE (op0
);
831 /* Try "pointer heuristic."
832 A comparison ptr == 0 is predicted as false.
833 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
834 if (POINTER_TYPE_P (type
))
836 if (TREE_CODE (cond
) == EQ_EXPR
)
837 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
838 else if (TREE_CODE (cond
) == NE_EXPR
)
839 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
843 /* Try "opcode heuristic."
844 EQ tests are usually false and NE tests are usually true. Also,
845 most quantities are positive, so we can make the appropriate guesses
846 about signed comparisons against zero. */
847 switch (TREE_CODE (cond
))
851 /* Floating point comparisons appears to behave in a very
852 unpredictable way because of special role of = tests in
854 if (FLOAT_TYPE_P (type
))
856 /* Comparisons with 0 are often used for booleans and there is
857 nothing useful to predict about them. */
858 else if (integer_zerop (op0
)
859 || integer_zerop (TREE_OPERAND (cond
, 1)))
862 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
867 /* Floating point comparisons appears to behave in a very
868 unpredictable way because of special role of = tests in
870 if (FLOAT_TYPE_P (type
))
872 /* Comparisons with 0 are often used for booleans and there is
873 nothing useful to predict about them. */
874 else if (integer_zerop (op0
)
875 || integer_zerop (TREE_OPERAND (cond
, 1)))
878 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
882 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
886 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
891 if (integer_zerop (TREE_OPERAND (cond
, 1))
892 || integer_onep (TREE_OPERAND (cond
, 1))
893 || integer_all_onesp (TREE_OPERAND (cond
, 1))
894 || real_zerop (TREE_OPERAND (cond
, 1))
895 || real_onep (TREE_OPERAND (cond
, 1))
896 || real_minus_onep (TREE_OPERAND (cond
, 1)))
897 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
902 if (integer_zerop (TREE_OPERAND (cond
, 1))
903 || integer_onep (TREE_OPERAND (cond
, 1))
904 || integer_all_onesp (TREE_OPERAND (cond
, 1))
905 || real_zerop (TREE_OPERAND (cond
, 1))
906 || real_onep (TREE_OPERAND (cond
, 1))
907 || real_minus_onep (TREE_OPERAND (cond
, 1)))
908 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
916 /* Predict branch probabilities and estimate profile of the tree CFG. */
918 tree_estimate_probability (void)
921 struct loops loops_info
;
923 flow_loops_find (&loops_info
, LOOP_TREE
);
924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
925 flow_loops_dump (&loops_info
, dump_file
, NULL
, 0);
927 connect_infinite_loops_to_exit ();
928 calculate_dominance_info (CDI_DOMINATORS
);
929 calculate_dominance_info (CDI_POST_DOMINATORS
);
931 predict_loops (&loops_info
, false);
937 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
939 /* Predict early returns to be probable, as we've already taken
940 care for error returns and other are often used for fast paths
942 if ((e
->dest
== EXIT_BLOCK_PTR
943 || (e
->dest
->succ
&& !e
->dest
->succ
->succ_next
944 && e
->dest
->succ
->dest
== EXIT_BLOCK_PTR
))
945 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
946 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
947 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
948 && !last_basic_block_p (e
->dest
))
949 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
951 /* Look for block we are guarding (ie we dominate it,
952 but it doesn't postdominate us). */
953 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
954 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
955 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
957 block_stmt_iterator bi
;
959 /* The call heuristic claims that a guarded function call
960 is improbable. This is because such calls are often used
961 to signal exceptional situations such as printing error
963 for (bi
= bsi_start (e
->dest
); !bsi_end_p (bi
);
966 tree stmt
= bsi_stmt (bi
);
967 if ((TREE_CODE (stmt
) == CALL_EXPR
968 || (TREE_CODE (stmt
) == MODIFY_EXPR
969 && TREE_CODE (TREE_OPERAND (stmt
, 1)) == CALL_EXPR
))
970 /* Constant and pure calls are hardly used to signalize
971 something exceptional. */
972 && TREE_SIDE_EFFECTS (stmt
))
974 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
980 tree_predict_by_opcode (bb
);
983 combine_predictions_for_bb (dump_file
, bb
);
985 estimate_bb_frequencies (&loops_info
);
986 free_dominance_info (CDI_POST_DOMINATORS
);
987 remove_fake_exit_edges ();
988 flow_loops_free (&loops_info
);
989 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
990 dump_tree_cfg (dump_file
, dump_flags
);
993 /* __builtin_expect dropped tokens into the insn stream describing expected
994 values of registers. Generate branch probabilities based off these
998 expected_value_to_br_prob (void)
1000 rtx insn
, cond
, ev
= NULL_RTX
, ev_reg
= NULL_RTX
;
1002 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1004 switch (GET_CODE (insn
))
1007 /* Look for expected value notes. */
1008 if (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EXPECTED_VALUE
)
1010 ev
= NOTE_EXPECTED_VALUE (insn
);
1011 ev_reg
= XEXP (ev
, 0);
1017 /* Never propagate across labels. */
1022 /* Look for simple conditional branches. If we haven't got an
1023 expected value yet, no point going further. */
1024 if (!JUMP_P (insn
) || ev
== NULL_RTX
1025 || ! any_condjump_p (insn
))
1030 /* Look for insns that clobber the EV register. */
1031 if (ev
&& reg_set_p (ev_reg
, insn
))
1036 /* Collect the branch condition, hopefully relative to EV_REG. */
1037 /* ??? At present we'll miss things like
1038 (expected_value (eq r70 0))
1040 (set r80 (lt r70 r71))
1041 (set pc (if_then_else (ne r80 0) ...))
1042 as canonicalize_condition will render this to us as
1044 Could use cselib to try and reduce this further. */
1045 cond
= XEXP (SET_SRC (pc_set (insn
)), 0);
1046 cond
= canonicalize_condition (insn
, cond
, 0, NULL
, ev_reg
,
1048 if (! cond
|| XEXP (cond
, 0) != ev_reg
1049 || GET_CODE (XEXP (cond
, 1)) != CONST_INT
)
1052 /* Substitute and simplify. Given that the expression we're
1053 building involves two constants, we should wind up with either
1055 cond
= gen_rtx_fmt_ee (GET_CODE (cond
), VOIDmode
,
1056 XEXP (ev
, 1), XEXP (cond
, 1));
1057 cond
= simplify_rtx (cond
);
1059 /* Turn the condition into a scaled branch probability. */
1060 if (cond
!= const_true_rtx
&& cond
!= const0_rtx
)
1062 predict_insn_def (insn
, PRED_BUILTIN_EXPECT
,
1063 cond
== const_true_rtx
? TAKEN
: NOT_TAKEN
);
1067 /* Check whether this is the last basic block of function. Commonly
1068 there is one extra common cleanup block. */
1070 last_basic_block_p (basic_block bb
)
1072 if (bb
== EXIT_BLOCK_PTR
)
1075 return (bb
->next_bb
== EXIT_BLOCK_PTR
1076 || (bb
->next_bb
->next_bb
== EXIT_BLOCK_PTR
1077 && bb
->succ
&& !bb
->succ
->succ_next
1078 && bb
->succ
->dest
->next_bb
== EXIT_BLOCK_PTR
));
1081 /* This is used to carry information about basic blocks. It is
1082 attached to the AUX field of the standard CFG block. */
1084 typedef struct block_info_def
1086 /* Estimated frequency of execution of basic_block. */
1089 /* To keep queue of basic blocks to process. */
1092 /* True if block needs to be visited in propagate_freq. */
1093 unsigned int tovisit
:1;
1095 /* Number of predecessors we need to visit first. */
1099 /* Similar information for edges. */
1100 typedef struct edge_info_def
1102 /* In case edge is an loopback edge, the probability edge will be reached
1103 in case header is. Estimated number of iterations of the loop can be
1104 then computed as 1 / (1 - back_edge_prob). */
1105 sreal back_edge_prob
;
1106 /* True if the edge is an loopback edge in the natural loop. */
1107 unsigned int back_edge
:1;
1110 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1111 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1113 /* Helper function for estimate_bb_frequencies.
1114 Propagate the frequencies for LOOP. */
1117 propagate_freq (struct loop
*loop
)
1119 basic_block head
= loop
->header
;
1125 /* For each basic block we need to visit count number of his predecessors
1126 we need to visit first. */
1127 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1129 if (BLOCK_INFO (bb
)->tovisit
)
1133 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
1134 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1136 else if (BLOCK_INFO (e
->src
)->tovisit
1137 && dump_file
&& !EDGE_INFO (e
)->back_edge
)
1139 "Irreducible region hit, ignoring edge to %i->%i\n",
1140 e
->src
->index
, bb
->index
);
1141 BLOCK_INFO (bb
)->npredecessors
= count
;
1145 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
1147 for (bb
= head
; bb
; bb
= nextbb
)
1149 sreal cyclic_probability
, frequency
;
1151 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
1152 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
1154 nextbb
= BLOCK_INFO (bb
)->next
;
1155 BLOCK_INFO (bb
)->next
= NULL
;
1157 /* Compute frequency of basic block. */
1160 #ifdef ENABLE_CHECKING
1161 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
1162 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1166 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
1167 if (EDGE_INFO (e
)->back_edge
)
1169 sreal_add (&cyclic_probability
, &cyclic_probability
,
1170 &EDGE_INFO (e
)->back_edge_prob
);
1172 else if (!(e
->flags
& EDGE_DFS_BACK
))
1176 /* frequency += (e->probability
1177 * BLOCK_INFO (e->src)->frequency /
1178 REG_BR_PROB_BASE); */
1180 sreal_init (&tmp
, e
->probability
, 0);
1181 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
1182 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
1183 sreal_add (&frequency
, &frequency
, &tmp
);
1186 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
1188 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
1189 sizeof (frequency
));
1193 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
1195 memcpy (&cyclic_probability
, &real_almost_one
,
1196 sizeof (real_almost_one
));
1199 /* BLOCK_INFO (bb)->frequency = frequency
1200 / (1 - cyclic_probability) */
1202 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
1203 sreal_div (&BLOCK_INFO (bb
)->frequency
,
1204 &frequency
, &cyclic_probability
);
1208 BLOCK_INFO (bb
)->tovisit
= 0;
1210 /* Compute back edge frequencies. */
1211 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1212 if (e
->dest
== head
)
1216 /* EDGE_INFO (e)->back_edge_prob
1217 = ((e->probability * BLOCK_INFO (bb)->frequency)
1218 / REG_BR_PROB_BASE); */
1220 sreal_init (&tmp
, e
->probability
, 0);
1221 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1222 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1223 &tmp
, &real_inv_br_prob_base
);
1226 /* Propagate to successor blocks. */
1227 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1228 if (!(e
->flags
& EDGE_DFS_BACK
)
1229 && BLOCK_INFO (e
->dest
)->npredecessors
)
1231 BLOCK_INFO (e
->dest
)->npredecessors
--;
1232 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1237 BLOCK_INFO (last
)->next
= e
->dest
;
1245 /* Estimate probabilities of loopback edges in loops at same nest level. */
1248 estimate_loops_at_level (struct loop
*first_loop
)
1252 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1258 estimate_loops_at_level (loop
->inner
);
1260 if (loop
->latch
->succ
) /* Do not do this for dummy function loop. */
1262 /* Find current loop back edge and mark it. */
1263 e
= loop_latch_edge (loop
);
1264 EDGE_INFO (e
)->back_edge
= 1;
1267 bbs
= get_loop_body (loop
);
1268 for (i
= 0; i
< loop
->num_nodes
; i
++)
1269 BLOCK_INFO (bbs
[i
])->tovisit
= 1;
1271 propagate_freq (loop
);
1275 /* Convert counts measured by profile driven feedback to frequencies.
1276 Return nonzero iff there was any nonzero execution count. */
1279 counts_to_freqs (void)
1281 gcov_type count_max
, true_count_max
= 0;
1285 true_count_max
= MAX (bb
->count
, true_count_max
);
1287 count_max
= MAX (true_count_max
, 1);
1288 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1289 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
1290 return true_count_max
;
1293 /* Return true if function is likely to be expensive, so there is no point to
1294 optimize performance of prologue, epilogue or do inlining at the expense
1295 of code size growth. THRESHOLD is the limit of number of instructions
1296 function can execute at average to be still considered not expensive. */
1299 expensive_function_p (int threshold
)
1301 unsigned int sum
= 0;
1305 /* We can not compute accurately for large thresholds due to scaled
1307 if (threshold
> BB_FREQ_MAX
)
1310 /* Frequencies are out of range. This either means that function contains
1311 internal loop executing more than BB_FREQ_MAX times or profile feedback
1312 is available and function has not been executed at all. */
1313 if (ENTRY_BLOCK_PTR
->frequency
== 0)
1316 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1317 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
1322 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
1323 insn
= NEXT_INSN (insn
))
1324 if (active_insn_p (insn
))
1326 sum
+= bb
->frequency
;
1335 /* Estimate basic blocks frequency by given branch probabilities. */
1338 estimate_bb_frequencies (struct loops
*loops
)
1343 if (!flag_branch_probabilities
|| !counts_to_freqs ())
1345 static int real_values_initialized
= 0;
1347 if (!real_values_initialized
)
1349 real_values_initialized
= 1;
1350 sreal_init (&real_zero
, 0, 0);
1351 sreal_init (&real_one
, 1, 0);
1352 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
1353 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
1354 sreal_init (&real_one_half
, 1, -1);
1355 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
1356 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
1359 mark_dfs_back_edges ();
1361 ENTRY_BLOCK_PTR
->succ
->probability
= REG_BR_PROB_BASE
;
1363 /* Set up block info for each basic block. */
1364 alloc_aux_for_blocks (sizeof (struct block_info_def
));
1365 alloc_aux_for_edges (sizeof (struct edge_info_def
));
1366 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1370 BLOCK_INFO (bb
)->tovisit
= 0;
1371 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1373 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
1374 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1375 &EDGE_INFO (e
)->back_edge_prob
,
1376 &real_inv_br_prob_base
);
1380 /* First compute probabilities locally for each loop from innermost
1381 to outermost to examine probabilities for back edges. */
1382 estimate_loops_at_level (loops
->tree_root
);
1384 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
1386 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
1387 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
1389 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
1390 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1394 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
1395 sreal_add (&tmp
, &tmp
, &real_one_half
);
1396 bb
->frequency
= sreal_to_int (&tmp
);
1399 free_aux_for_blocks ();
1400 free_aux_for_edges ();
1402 compute_function_frequency ();
1403 if (flag_reorder_functions
)
1404 choose_function_section ();
1407 /* Decide whether function is hot, cold or unlikely executed. */
1409 compute_function_frequency (void)
1413 if (!profile_info
|| !flag_branch_probabilities
)
1415 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
1418 if (maybe_hot_bb_p (bb
))
1420 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
1423 if (!probably_never_executed_bb_p (bb
))
1424 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
1428 /* Choose appropriate section for the function. */
1430 choose_function_section (void)
1432 if (DECL_SECTION_NAME (current_function_decl
)
1433 || !targetm
.have_named_sections
1434 /* Theoretically we can split the gnu.linkonce text section too,
1435 but this requires more work as the frequency needs to match
1436 for all generated objects so we need to merge the frequency
1437 of all instances. For now just never set frequency for these. */
1438 || DECL_ONE_ONLY (current_function_decl
))
1440 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
1441 DECL_SECTION_NAME (current_function_decl
) =
1442 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
1443 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
1444 DECL_SECTION_NAME (current_function_decl
) =
1445 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
1446 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);
1450 struct tree_opt_pass pass_profile
=
1452 "profile", /* name */
1454 tree_estimate_probability
, /* execute */
1457 0, /* static_pass_number */
1458 TV_BRANCH_PROB
, /* tv_id */
1459 PROP_cfg
, /* properties_required */
1460 0, /* properties_provided */
1461 0, /* properties_destroyed */
1462 0, /* todo_flags_start */
1463 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */