1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
56 #include "tree-flow.h"
58 #include "tree-dump.h"
59 #include "tree-pass.h"
62 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
63 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
64 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
65 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
67 /* Random guesstimation given names. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73 static void combine_predictions_for_insn (rtx
, basic_block
);
74 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
75 static void estimate_loops_at_level (struct loop
*loop
);
76 static void propagate_freq (struct loop
*);
77 static void estimate_bb_frequencies (struct loops
*);
78 static int counts_to_freqs (void);
79 static bool last_basic_block_p (basic_block
);
80 static void compute_function_frequency (void);
81 static void choose_function_section (void);
82 static bool can_predict_insn_p (rtx
);
84 /* Information we hold about each branch predictor.
85 Filled using information from predict.def. */
89 const char *const name
; /* Name used in the debugging dumps. */
90 const int hitrate
; /* Expected hitrate used by
91 predict_insn_def call. */
95 /* Use given predictor without Dempster-Shaffer theory if it matches
96 using first_match heuristics. */
97 #define PRED_FLAG_FIRST_MATCH 1
99 /* Recompute hitrate in percent to our representation. */
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
104 static const struct predictor_info predictor_info
[]= {
105 #include "predict.def"
107 /* Upper bound on predictors. */
112 /* Return true in case BB can be CPU intensive and should be optimized
113 for maximal performance. */
116 maybe_hot_bb_p (basic_block bb
)
118 if (profile_info
&& flag_branch_probabilities
120 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
122 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
127 /* Return true in case BB is cold and should be optimized for size. */
130 probably_cold_bb_p (basic_block bb
)
132 if (profile_info
&& flag_branch_probabilities
134 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
136 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
141 /* Return true in case BB is probably never executed. */
143 probably_never_executed_bb_p (basic_block bb
)
145 if (profile_info
&& flag_branch_probabilities
)
146 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
150 /* Return true if the one of outgoing edges is already predicted by
154 rtl_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
157 if (!INSN_P (BB_END (bb
)))
159 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
160 if (REG_NOTE_KIND (note
) == REG_BR_PRED
161 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
166 /* Return true if the one of outgoing edges is already predicted by
170 tree_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
172 struct edge_prediction
*i
= bb_ann (bb
)->predictions
;
173 for (i
= bb_ann (bb
)->predictions
; i
; i
= i
->next
)
174 if (i
->predictor
== predictor
)
180 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
182 if (!any_condjump_p (insn
))
184 if (!flag_guess_branch_prob
)
188 = gen_rtx_EXPR_LIST (REG_BR_PRED
,
189 gen_rtx_CONCAT (VOIDmode
,
190 GEN_INT ((int) predictor
),
191 GEN_INT ((int) probability
)),
195 /* Predict insn by given predictor. */
198 predict_insn_def (rtx insn
, enum br_predictor predictor
,
199 enum prediction taken
)
201 int probability
= predictor_info
[(int) predictor
].hitrate
;
204 probability
= REG_BR_PROB_BASE
- probability
;
206 predict_insn (insn
, predictor
, probability
);
209 /* Predict edge E with given probability if possible. */
212 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
215 last_insn
= BB_END (e
->src
);
217 /* We can store the branch prediction information only about
218 conditional jumps. */
219 if (!any_condjump_p (last_insn
))
222 /* We always store probability of branching. */
223 if (e
->flags
& EDGE_FALLTHRU
)
224 probability
= REG_BR_PROB_BASE
- probability
;
226 predict_insn (last_insn
, predictor
, probability
);
229 /* Predict edge E with the given PROBABILITY. */
231 tree_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
233 struct edge_prediction
*i
= ggc_alloc (sizeof (struct edge_prediction
));
235 i
->next
= bb_ann (e
->src
)->predictions
;
236 bb_ann (e
->src
)->predictions
= i
;
237 i
->probability
= probability
;
238 i
->predictor
= predictor
;
242 /* Return true when we can store prediction on insn INSN.
243 At the moment we represent predictions only on conditional
244 jumps, not at computed jump or other complicated cases. */
246 can_predict_insn_p (rtx insn
)
248 return (JUMP_P (insn
)
249 && any_condjump_p (insn
)
250 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
253 /* Predict edge E by given predictor if possible. */
256 predict_edge_def (edge e
, enum br_predictor predictor
,
257 enum prediction taken
)
259 int probability
= predictor_info
[(int) predictor
].hitrate
;
262 probability
= REG_BR_PROB_BASE
- probability
;
264 predict_edge (e
, predictor
, probability
);
267 /* Invert all branch predictions or probability notes in the INSN. This needs
268 to be done each time we invert the condition used by the jump. */
271 invert_br_probabilities (rtx insn
)
275 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
276 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
277 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
278 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
279 XEXP (XEXP (note
, 0), 1)
280 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
283 /* Dump information about the branch prediction to the output file. */
286 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
287 basic_block bb
, int used
)
294 FOR_EACH_EDGE (e
, bb
->succs
)
296 if (! (e
->flags
& EDGE_FALLTHRU
))
301 fprintf (file
, " %s heuristics%s: %.1f%%",
302 predictor_info
[predictor
].name
,
303 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
307 fprintf (file
, " exec ");
308 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
311 fprintf (file
, " hit ");
312 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
313 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
317 fprintf (file
, "\n");
320 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
321 note if not already present. Remove now useless REG_BR_PRED notes. */
324 combine_predictions_for_insn (rtx insn
, basic_block bb
)
326 rtx prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
327 rtx
*pnote
= ®_NOTES (insn
);
329 int best_probability
= PROB_EVEN
;
330 int best_predictor
= END_PREDICTORS
;
331 int combined_probability
= REG_BR_PROB_BASE
/ 2;
333 bool first_match
= false;
337 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
340 /* We implement "first match" heuristics and use probability guessed
341 by predictor with smallest index. */
342 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
343 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
345 int predictor
= INTVAL (XEXP (XEXP (note
, 0), 0));
346 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
349 if (best_predictor
> predictor
)
350 best_probability
= probability
, best_predictor
= predictor
;
352 d
= (combined_probability
* probability
353 + (REG_BR_PROB_BASE
- combined_probability
)
354 * (REG_BR_PROB_BASE
- probability
));
356 /* Use FP math to avoid overflows of 32bit integers. */
358 /* If one probability is 0% and one 100%, avoid division by zero. */
359 combined_probability
= REG_BR_PROB_BASE
/ 2;
361 combined_probability
= (((double) combined_probability
) * probability
362 * REG_BR_PROB_BASE
/ d
+ 0.5);
365 /* Decide which heuristic to use. In case we didn't match anything,
366 use no_prediction heuristic, in case we did match, use either
367 first match or Dempster-Shaffer theory depending on the flags. */
369 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
373 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
374 combined_probability
, bb
, true);
377 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
379 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
384 combined_probability
= best_probability
;
385 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
389 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
391 int predictor
= INTVAL (XEXP (XEXP (*pnote
, 0), 0));
392 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
394 dump_prediction (dump_file
, predictor
, probability
, bb
,
395 !first_match
|| best_predictor
== predictor
);
396 *pnote
= XEXP (*pnote
, 1);
399 pnote
= &XEXP (*pnote
, 1);
405 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
406 GEN_INT (combined_probability
), REG_NOTES (insn
));
408 /* Save the prediction into CFG in case we are seeing non-degenerated
410 if (EDGE_COUNT (bb
->succs
) > 1)
412 BRANCH_EDGE (bb
)->probability
= combined_probability
;
413 FALLTHRU_EDGE (bb
)->probability
414 = REG_BR_PROB_BASE
- combined_probability
;
419 /* Combine predictions into single probability and store them into CFG.
420 Remove now useless prediction entries. */
423 combine_predictions_for_bb (FILE *file
, basic_block bb
)
425 int best_probability
= PROB_EVEN
;
426 int best_predictor
= END_PREDICTORS
;
427 int combined_probability
= REG_BR_PROB_BASE
/ 2;
429 bool first_match
= false;
431 struct edge_prediction
*pred
;
433 edge e
, first
= NULL
, second
= NULL
;
435 FOR_EACH_EDGE (e
, bb
->succs
)
437 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
440 if (first
&& !second
)
448 /* When there is no successor or only one choice, prediction is easy.
450 We are lazy for now and predict only basic blocks with two outgoing
451 edges. It is possible to predict generic case too, but we have to
452 ignore first match heuristics and do more involved combining. Implement
456 FOR_EACH_EDGE (e
, bb
->succs
)
458 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
459 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
464 bb_ann (bb
)->predictions
= NULL
;
466 fprintf (file
, "%i edges in bb %i predicted to even probabilities\n",
472 fprintf (file
, "Predictions for bb %i\n", bb
->index
);
474 /* We implement "first match" heuristics and use probability guessed
475 by predictor with smallest index. */
476 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
478 int predictor
= pred
->predictor
;
479 int probability
= pred
->probability
;
481 if (pred
->edge
!= first
)
482 probability
= REG_BR_PROB_BASE
- probability
;
485 if (best_predictor
> predictor
)
486 best_probability
= probability
, best_predictor
= predictor
;
488 d
= (combined_probability
* probability
489 + (REG_BR_PROB_BASE
- combined_probability
)
490 * (REG_BR_PROB_BASE
- probability
));
492 /* Use FP math to avoid overflows of 32bit integers. */
494 /* If one probability is 0% and one 100%, avoid division by zero. */
495 combined_probability
= REG_BR_PROB_BASE
/ 2;
497 combined_probability
= (((double) combined_probability
) * probability
498 * REG_BR_PROB_BASE
/ d
+ 0.5);
501 /* Decide which heuristic to use. In case we didn't match anything,
502 use no_prediction heuristic, in case we did match, use either
503 first match or Dempster-Shaffer theory depending on the flags. */
505 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
509 dump_prediction (file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
512 dump_prediction (file
, PRED_DS_THEORY
, combined_probability
, bb
,
514 dump_prediction (file
, PRED_FIRST_MATCH
, best_probability
, bb
,
519 combined_probability
= best_probability
;
520 dump_prediction (file
, PRED_COMBINED
, combined_probability
, bb
, true);
522 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
524 int predictor
= pred
->predictor
;
525 int probability
= pred
->probability
;
527 if (pred
->edge
!= EDGE_SUCC (bb
, 0))
528 probability
= REG_BR_PROB_BASE
- probability
;
529 dump_prediction (file
, predictor
, probability
, bb
,
530 !first_match
|| best_predictor
== predictor
);
532 bb_ann (bb
)->predictions
= NULL
;
534 first
->probability
= combined_probability
;
535 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
538 /* Predict edge probabilities by exploiting loop structure.
539 When SIMPLELOOPS is set, attempt to count number of iterations by analyzing
542 predict_loops (struct loops
*loops_info
, bool simpleloops
)
546 /* Try to predict out blocks in a loop that are not part of a
548 for (i
= 1; i
< loops_info
->num
; i
++)
550 basic_block bb
, *bbs
;
553 struct loop
*loop
= loops_info
->parray
[i
];
554 struct niter_desc desc
;
555 unsigned HOST_WIDE_INT niter
;
557 flow_loop_scan (loop
, LOOP_EXIT_EDGES
);
558 exits
= loop
->num_exits
;
562 iv_analysis_loop_init (loop
);
563 find_simple_exit (loop
, &desc
);
565 if (desc
.simple_p
&& desc
.const_iter
)
568 niter
= desc
.niter
+ 1;
569 if (niter
== 0) /* We might overflow here. */
572 prob
= (REG_BR_PROB_BASE
573 - (REG_BR_PROB_BASE
+ niter
/2) / niter
);
574 /* Branch prediction algorithm gives 0 frequency for everything
575 after the end of loop for loop having 0 probability to finish. */
576 if (prob
== REG_BR_PROB_BASE
)
577 prob
= REG_BR_PROB_BASE
- 1;
578 predict_edge (desc
.in_edge
, PRED_LOOP_ITERATIONS
,
583 bbs
= get_loop_body (loop
);
585 for (j
= 0; j
< loop
->num_nodes
; j
++)
587 int header_found
= 0;
592 /* Bypass loop heuristics on continue statement. These
593 statements construct loops via "non-loop" constructs
594 in the source language and are better to be handled
596 if ((simpleloops
&& !can_predict_insn_p (BB_END (bb
)))
597 || predicted_by_p (bb
, PRED_CONTINUE
))
600 /* Loop branch heuristics - predict an edge back to a
601 loop's head as taken. */
602 FOR_EACH_EDGE (e
, bb
->succs
)
604 if (e
->dest
== loop
->header
605 && e
->src
== loop
->latch
)
608 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
613 /* Loop exit heuristics - predict an edge exiting the loop if the
614 conditional has no loop header successors as not taken. */
616 FOR_EACH_EDGE (e
, bb
->succs
)
618 if (e
->dest
->index
< 0
619 || !flow_bb_inside_loop_p (loop
, e
->dest
))
623 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
629 /* Free basic blocks from get_loop_body. */
634 /* Statically estimate the probability that a branch will be taken and produce
635 estimated profile. When profile feedback is present never executed portions
636 of function gets estimated. */
639 estimate_probability (struct loops
*loops_info
)
643 connect_infinite_loops_to_exit ();
644 calculate_dominance_info (CDI_DOMINATORS
);
645 calculate_dominance_info (CDI_POST_DOMINATORS
);
647 predict_loops (loops_info
, true);
651 /* Attempt to predict conditional jumps using a number of heuristics. */
654 rtx last_insn
= BB_END (bb
);
658 if (! can_predict_insn_p (last_insn
))
661 FOR_EACH_EDGE (e
, bb
->succs
)
663 /* Predict early returns to be probable, as we've already taken
664 care for error returns and other are often used for fast paths
666 if ((e
->dest
== EXIT_BLOCK_PTR
667 || (EDGE_COUNT (e
->dest
->succs
) == 1
668 && EDGE_SUCC (e
->dest
, 0)->dest
== EXIT_BLOCK_PTR
))
669 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
670 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
671 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
672 && !last_basic_block_p (e
->dest
))
673 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
675 /* Look for block we are guarding (ie we dominate it,
676 but it doesn't postdominate us). */
677 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
678 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
679 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
683 /* The call heuristic claims that a guarded function call
684 is improbable. This is because such calls are often used
685 to signal exceptional situations such as printing error
687 for (insn
= BB_HEAD (e
->dest
); insn
!= NEXT_INSN (BB_END (e
->dest
));
688 insn
= NEXT_INSN (insn
))
690 /* Constant and pure calls are hardly used to signalize
691 something exceptional. */
692 && ! CONST_OR_PURE_CALL_P (insn
))
694 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
701 cond
= get_condition (last_insn
, NULL
, false, false);
705 /* Try "pointer heuristic."
706 A comparison ptr == 0 is predicted as false.
707 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
708 if (COMPARISON_P (cond
)
709 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
710 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
712 if (GET_CODE (cond
) == EQ
)
713 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
714 else if (GET_CODE (cond
) == NE
)
715 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
719 /* Try "opcode heuristic."
720 EQ tests are usually false and NE tests are usually true. Also,
721 most quantities are positive, so we can make the appropriate guesses
722 about signed comparisons against zero. */
723 switch (GET_CODE (cond
))
726 /* Unconditional branch. */
727 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
728 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
733 /* Floating point comparisons appears to behave in a very
734 unpredictable way because of special role of = tests in
736 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
738 /* Comparisons with 0 are often used for booleans and there is
739 nothing useful to predict about them. */
740 else if (XEXP (cond
, 1) == const0_rtx
741 || XEXP (cond
, 0) == const0_rtx
)
744 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
749 /* Floating point comparisons appears to behave in a very
750 unpredictable way because of special role of = tests in
752 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
754 /* Comparisons with 0 are often used for booleans and there is
755 nothing useful to predict about them. */
756 else if (XEXP (cond
, 1) == const0_rtx
757 || XEXP (cond
, 0) == const0_rtx
)
760 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
764 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
768 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
773 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
774 || XEXP (cond
, 1) == constm1_rtx
)
775 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
780 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
781 || XEXP (cond
, 1) == constm1_rtx
)
782 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
790 /* Attach the combined probability to each conditional jump. */
792 if (JUMP_P (BB_END (bb
))
793 && any_condjump_p (BB_END (bb
))
794 && EDGE_COUNT (bb
->succs
) >= 2)
795 combine_predictions_for_insn (BB_END (bb
), bb
);
797 remove_fake_exit_edges ();
798 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
802 rtx last_insn
= BB_END (bb
);
804 if (!can_predict_insn_p (last_insn
))
806 /* We can predict only conditional jumps at the moment.
807 Expect each edge to be equally probable.
808 ?? In the future we want to make abnormal edges improbable. */
812 FOR_EACH_EDGE (e
, bb
->succs
)
815 if (e
->probability
!= 0)
820 FOR_EACH_EDGE (e
, bb
->succs
)
822 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
827 estimate_bb_frequencies (loops_info
);
828 free_dominance_info (CDI_POST_DOMINATORS
);
832 /* Predict using opcode of the last statement in basic block. */
834 tree_predict_by_opcode (basic_block bb
)
836 tree stmt
= last_stmt (bb
);
842 if (!stmt
|| TREE_CODE (stmt
) != COND_EXPR
)
844 FOR_EACH_EDGE (then_edge
, bb
->succs
)
846 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
850 cond
= TREE_OPERAND (stmt
, 0);
851 if (TREE_CODE_CLASS (TREE_CODE (cond
)) != '<')
853 op0
= TREE_OPERAND (cond
, 0);
854 type
= TREE_TYPE (op0
);
855 /* Try "pointer heuristic."
856 A comparison ptr == 0 is predicted as false.
857 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
858 if (POINTER_TYPE_P (type
))
860 if (TREE_CODE (cond
) == EQ_EXPR
)
861 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
862 else if (TREE_CODE (cond
) == NE_EXPR
)
863 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
867 /* Try "opcode heuristic."
868 EQ tests are usually false and NE tests are usually true. Also,
869 most quantities are positive, so we can make the appropriate guesses
870 about signed comparisons against zero. */
871 switch (TREE_CODE (cond
))
875 /* Floating point comparisons appears to behave in a very
876 unpredictable way because of special role of = tests in
878 if (FLOAT_TYPE_P (type
))
880 /* Comparisons with 0 are often used for booleans and there is
881 nothing useful to predict about them. */
882 else if (integer_zerop (op0
)
883 || integer_zerop (TREE_OPERAND (cond
, 1)))
886 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
891 /* Floating point comparisons appears to behave in a very
892 unpredictable way because of special role of = tests in
894 if (FLOAT_TYPE_P (type
))
896 /* Comparisons with 0 are often used for booleans and there is
897 nothing useful to predict about them. */
898 else if (integer_zerop (op0
)
899 || integer_zerop (TREE_OPERAND (cond
, 1)))
902 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
906 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
910 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
915 if (integer_zerop (TREE_OPERAND (cond
, 1))
916 || integer_onep (TREE_OPERAND (cond
, 1))
917 || integer_all_onesp (TREE_OPERAND (cond
, 1))
918 || real_zerop (TREE_OPERAND (cond
, 1))
919 || real_onep (TREE_OPERAND (cond
, 1))
920 || real_minus_onep (TREE_OPERAND (cond
, 1)))
921 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
926 if (integer_zerop (TREE_OPERAND (cond
, 1))
927 || integer_onep (TREE_OPERAND (cond
, 1))
928 || integer_all_onesp (TREE_OPERAND (cond
, 1))
929 || real_zerop (TREE_OPERAND (cond
, 1))
930 || real_onep (TREE_OPERAND (cond
, 1))
931 || real_minus_onep (TREE_OPERAND (cond
, 1)))
932 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
940 /* Predict branch probabilities and estimate profile of the tree CFG. */
942 tree_estimate_probability (void)
945 struct loops loops_info
;
947 flow_loops_find (&loops_info
, LOOP_TREE
);
948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
949 flow_loops_dump (&loops_info
, dump_file
, NULL
, 0);
951 connect_infinite_loops_to_exit ();
952 calculate_dominance_info (CDI_DOMINATORS
);
953 calculate_dominance_info (CDI_POST_DOMINATORS
);
955 predict_loops (&loops_info
, false);
961 FOR_EACH_EDGE (e
, bb
->succs
)
963 /* Predict early returns to be probable, as we've already taken
964 care for error returns and other are often used for fast paths
966 if ((e
->dest
== EXIT_BLOCK_PTR
967 || (EDGE_COUNT (e
->dest
->succs
) == 1
968 && EDGE_SUCC (e
->dest
, 0)->dest
== EXIT_BLOCK_PTR
))
969 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
970 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
971 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
972 && !last_basic_block_p (e
->dest
))
973 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
975 /* Look for block we are guarding (ie we dominate it,
976 but it doesn't postdominate us). */
977 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
978 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
979 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
981 block_stmt_iterator bi
;
983 /* The call heuristic claims that a guarded function call
984 is improbable. This is because such calls are often used
985 to signal exceptional situations such as printing error
987 for (bi
= bsi_start (e
->dest
); !bsi_end_p (bi
);
990 tree stmt
= bsi_stmt (bi
);
991 if ((TREE_CODE (stmt
) == CALL_EXPR
992 || (TREE_CODE (stmt
) == MODIFY_EXPR
993 && TREE_CODE (TREE_OPERAND (stmt
, 1)) == CALL_EXPR
))
994 /* Constant and pure calls are hardly used to signalize
995 something exceptional. */
996 && TREE_SIDE_EFFECTS (stmt
))
998 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
1005 tree_predict_by_opcode (bb
);
1008 combine_predictions_for_bb (dump_file
, bb
);
1010 estimate_bb_frequencies (&loops_info
);
1011 free_dominance_info (CDI_POST_DOMINATORS
);
1012 remove_fake_exit_edges ();
1013 flow_loops_free (&loops_info
);
1014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1015 dump_tree_cfg (dump_file
, dump_flags
);
1018 /* __builtin_expect dropped tokens into the insn stream describing expected
1019 values of registers. Generate branch probabilities based off these
1023 expected_value_to_br_prob (void)
1025 rtx insn
, cond
, ev
= NULL_RTX
, ev_reg
= NULL_RTX
;
1027 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1029 switch (GET_CODE (insn
))
1032 /* Look for expected value notes. */
1033 if (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EXPECTED_VALUE
)
1035 ev
= NOTE_EXPECTED_VALUE (insn
);
1036 ev_reg
= XEXP (ev
, 0);
1042 /* Never propagate across labels. */
1047 /* Look for simple conditional branches. If we haven't got an
1048 expected value yet, no point going further. */
1049 if (!JUMP_P (insn
) || ev
== NULL_RTX
1050 || ! any_condjump_p (insn
))
1055 /* Look for insns that clobber the EV register. */
1056 if (ev
&& reg_set_p (ev_reg
, insn
))
1061 /* Collect the branch condition, hopefully relative to EV_REG. */
1062 /* ??? At present we'll miss things like
1063 (expected_value (eq r70 0))
1065 (set r80 (lt r70 r71))
1066 (set pc (if_then_else (ne r80 0) ...))
1067 as canonicalize_condition will render this to us as
1069 Could use cselib to try and reduce this further. */
1070 cond
= XEXP (SET_SRC (pc_set (insn
)), 0);
1071 cond
= canonicalize_condition (insn
, cond
, 0, NULL
, ev_reg
,
1073 if (! cond
|| XEXP (cond
, 0) != ev_reg
1074 || GET_CODE (XEXP (cond
, 1)) != CONST_INT
)
1077 /* Substitute and simplify. Given that the expression we're
1078 building involves two constants, we should wind up with either
1080 cond
= gen_rtx_fmt_ee (GET_CODE (cond
), VOIDmode
,
1081 XEXP (ev
, 1), XEXP (cond
, 1));
1082 cond
= simplify_rtx (cond
);
1084 /* Turn the condition into a scaled branch probability. */
1085 if (cond
!= const_true_rtx
&& cond
!= const0_rtx
)
1087 predict_insn_def (insn
, PRED_BUILTIN_EXPECT
,
1088 cond
== const_true_rtx
? TAKEN
: NOT_TAKEN
);
1092 /* Check whether this is the last basic block of function. Commonly
1093 there is one extra common cleanup block. */
1095 last_basic_block_p (basic_block bb
)
1097 if (bb
== EXIT_BLOCK_PTR
)
1100 return (bb
->next_bb
== EXIT_BLOCK_PTR
1101 || (bb
->next_bb
->next_bb
== EXIT_BLOCK_PTR
1102 && EDGE_COUNT (bb
->succs
) == 1
1103 && EDGE_SUCC (bb
, 0)->dest
->next_bb
== EXIT_BLOCK_PTR
));
1106 /* This is used to carry information about basic blocks. It is
1107 attached to the AUX field of the standard CFG block. */
1109 typedef struct block_info_def
1111 /* Estimated frequency of execution of basic_block. */
1114 /* To keep queue of basic blocks to process. */
1117 /* True if block needs to be visited in propagate_freq. */
1118 unsigned int tovisit
:1;
1120 /* Number of predecessors we need to visit first. */
1124 /* Similar information for edges. */
1125 typedef struct edge_info_def
1127 /* In case edge is an loopback edge, the probability edge will be reached
1128 in case header is. Estimated number of iterations of the loop can be
1129 then computed as 1 / (1 - back_edge_prob). */
1130 sreal back_edge_prob
;
1131 /* True if the edge is an loopback edge in the natural loop. */
1132 unsigned int back_edge
:1;
1135 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1136 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1138 /* Helper function for estimate_bb_frequencies.
1139 Propagate the frequencies for LOOP. */
1142 propagate_freq (struct loop
*loop
)
1144 basic_block head
= loop
->header
;
1150 /* For each basic block we need to visit count number of his predecessors
1151 we need to visit first. */
1152 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1154 if (BLOCK_INFO (bb
)->tovisit
)
1158 FOR_EACH_EDGE (e
, bb
->preds
)
1160 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1162 else if (BLOCK_INFO (e
->src
)->tovisit
1163 && dump_file
&& !EDGE_INFO (e
)->back_edge
)
1165 "Irreducible region hit, ignoring edge to %i->%i\n",
1166 e
->src
->index
, bb
->index
);
1169 BLOCK_INFO (bb
)->npredecessors
= count
;
1173 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
1175 for (bb
= head
; bb
; bb
= nextbb
)
1177 sreal cyclic_probability
, frequency
;
1179 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
1180 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
1182 nextbb
= BLOCK_INFO (bb
)->next
;
1183 BLOCK_INFO (bb
)->next
= NULL
;
1185 /* Compute frequency of basic block. */
1188 #ifdef ENABLE_CHECKING
1189 FOR_EACH_EDGE (e
, bb
->preds
)
1191 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1197 FOR_EACH_EDGE (e
, bb
->preds
)
1199 if (EDGE_INFO (e
)->back_edge
)
1201 sreal_add (&cyclic_probability
, &cyclic_probability
,
1202 &EDGE_INFO (e
)->back_edge_prob
);
1204 else if (!(e
->flags
& EDGE_DFS_BACK
))
1208 /* frequency += (e->probability *
1209 BLOCK_INFO (e->src)->frequency / REG_BR_PROB_BASE); */
1211 sreal_init (&tmp
, e
->probability
, 0);
1212 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
1213 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
1214 sreal_add (&frequency
, &frequency
, &tmp
);
1219 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
1221 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
1222 sizeof (frequency
));
1226 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
1228 memcpy (&cyclic_probability
, &real_almost_one
,
1229 sizeof (real_almost_one
));
1232 /* BLOCK_INFO (bb)->frequency = frequency
1233 / (1 - cyclic_probability) */
1235 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
1236 sreal_div (&BLOCK_INFO (bb
)->frequency
,
1237 &frequency
, &cyclic_probability
);
1241 BLOCK_INFO (bb
)->tovisit
= 0;
1243 /* Compute back edge frequencies. */
1244 FOR_EACH_EDGE (e
, bb
->succs
)
1246 if (e
->dest
== head
)
1250 /* EDGE_INFO (e)->back_edge_prob
1251 = ((e->probability * BLOCK_INFO (bb)->frequency)
1252 / REG_BR_PROB_BASE); */
1254 sreal_init (&tmp
, e
->probability
, 0);
1255 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1256 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1257 &tmp
, &real_inv_br_prob_base
);
1262 /* Propagate to successor blocks. */
1263 FOR_EACH_EDGE (e
, bb
->succs
)
1265 if (!(e
->flags
& EDGE_DFS_BACK
)
1266 && BLOCK_INFO (e
->dest
)->npredecessors
)
1268 BLOCK_INFO (e
->dest
)->npredecessors
--;
1269 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1274 BLOCK_INFO (last
)->next
= e
->dest
;
1284 /* Estimate probabilities of loopback edges in loops at same nest level. */
1287 estimate_loops_at_level (struct loop
*first_loop
)
1291 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1297 estimate_loops_at_level (loop
->inner
);
1299 /* Do not do this for dummy function loop. */
1300 if (EDGE_COUNT (loop
->latch
->succs
) > 0)
1302 /* Find current loop back edge and mark it. */
1303 e
= loop_latch_edge (loop
);
1304 EDGE_INFO (e
)->back_edge
= 1;
1307 bbs
= get_loop_body (loop
);
1308 for (i
= 0; i
< loop
->num_nodes
; i
++)
1309 BLOCK_INFO (bbs
[i
])->tovisit
= 1;
1311 propagate_freq (loop
);
1315 /* Convert counts measured by profile driven feedback to frequencies.
1316 Return nonzero iff there was any nonzero execution count. */
1319 counts_to_freqs (void)
1321 gcov_type count_max
, true_count_max
= 0;
1325 true_count_max
= MAX (bb
->count
, true_count_max
);
1327 count_max
= MAX (true_count_max
, 1);
1328 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1329 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
1330 return true_count_max
;
1333 /* Return true if function is likely to be expensive, so there is no point to
1334 optimize performance of prologue, epilogue or do inlining at the expense
1335 of code size growth. THRESHOLD is the limit of number of instructions
1336 function can execute at average to be still considered not expensive. */
1339 expensive_function_p (int threshold
)
1341 unsigned int sum
= 0;
1345 /* We can not compute accurately for large thresholds due to scaled
1347 if (threshold
> BB_FREQ_MAX
)
1350 /* Frequencies are out of range. This either means that function contains
1351 internal loop executing more than BB_FREQ_MAX times or profile feedback
1352 is available and function has not been executed at all. */
1353 if (ENTRY_BLOCK_PTR
->frequency
== 0)
1356 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1357 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
1362 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
1363 insn
= NEXT_INSN (insn
))
1364 if (active_insn_p (insn
))
1366 sum
+= bb
->frequency
;
1375 /* Estimate basic blocks frequency by given branch probabilities. */
1378 estimate_bb_frequencies (struct loops
*loops
)
1383 if (!flag_branch_probabilities
|| !counts_to_freqs ())
1385 static int real_values_initialized
= 0;
1387 if (!real_values_initialized
)
1389 real_values_initialized
= 1;
1390 sreal_init (&real_zero
, 0, 0);
1391 sreal_init (&real_one
, 1, 0);
1392 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
1393 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
1394 sreal_init (&real_one_half
, 1, -1);
1395 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
1396 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
1399 mark_dfs_back_edges ();
1401 EDGE_SUCC (ENTRY_BLOCK_PTR
, 0)->probability
= REG_BR_PROB_BASE
;
1403 /* Set up block info for each basic block. */
1404 alloc_aux_for_blocks (sizeof (struct block_info_def
));
1405 alloc_aux_for_edges (sizeof (struct edge_info_def
));
1406 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1410 BLOCK_INFO (bb
)->tovisit
= 0;
1411 FOR_EACH_EDGE (e
, bb
->succs
)
1413 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
1414 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1415 &EDGE_INFO (e
)->back_edge_prob
,
1416 &real_inv_br_prob_base
);
1421 /* First compute probabilities locally for each loop from innermost
1422 to outermost to examine probabilities for back edges. */
1423 estimate_loops_at_level (loops
->tree_root
);
1425 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
1427 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
1428 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
1430 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
1431 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1435 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
1436 sreal_add (&tmp
, &tmp
, &real_one_half
);
1437 bb
->frequency
= sreal_to_int (&tmp
);
1440 free_aux_for_blocks ();
1441 free_aux_for_edges ();
1443 compute_function_frequency ();
1444 if (flag_reorder_functions
)
1445 choose_function_section ();
1448 /* Decide whether function is hot, cold or unlikely executed. */
1450 compute_function_frequency (void)
1454 if (!profile_info
|| !flag_branch_probabilities
)
1456 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
1459 if (maybe_hot_bb_p (bb
))
1461 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
1464 if (!probably_never_executed_bb_p (bb
))
1465 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
1469 /* Choose appropriate section for the function. */
1471 choose_function_section (void)
1473 if (DECL_SECTION_NAME (current_function_decl
)
1474 || !targetm
.have_named_sections
1475 /* Theoretically we can split the gnu.linkonce text section too,
1476 but this requires more work as the frequency needs to match
1477 for all generated objects so we need to merge the frequency
1478 of all instances. For now just never set frequency for these. */
1479 || DECL_ONE_ONLY (current_function_decl
))
1481 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
1482 DECL_SECTION_NAME (current_function_decl
) =
1483 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
1484 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
1485 DECL_SECTION_NAME (current_function_decl
) =
1486 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
1487 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);
1491 struct tree_opt_pass pass_profile
=
1493 "profile", /* name */
1495 tree_estimate_probability
, /* execute */
1498 0, /* static_pass_number */
1499 TV_BRANCH_PROB
, /* tv_id */
1500 PROP_cfg
, /* properties_required */
1501 0, /* properties_provided */
1502 0, /* properties_destroyed */
1503 0, /* todo_flags_start */
1504 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */