1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
55 #include "tree-flow.h"
57 #include "tree-dump.h"
58 #include "tree-pass.h"
60 #include "tree-scalar-evolution.h"
62 #include "pointer-set.h"
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
67 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
69 /* Random guesstimation given names.
70 PROV_VERY_UNLIKELY should be small enough so basic block predicted
71 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */
72 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
73 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
74 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
75 #define PROB_ALWAYS (REG_BR_PROB_BASE)
77 static void combine_predictions_for_insn (rtx
, basic_block
);
78 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
79 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
80 static void choose_function_section (void);
81 static bool can_predict_insn_p (const_rtx
);
83 /* Information we hold about each branch predictor.
84 Filled using information from predict.def. */
88 const char *const name
; /* Name used in the debugging dumps. */
89 const int hitrate
; /* Expected hitrate used by
90 predict_insn_def call. */
94 /* Use given predictor without Dempster-Shaffer theory if it matches
95 using first_match heuristics. */
96 #define PRED_FLAG_FIRST_MATCH 1
98 /* Recompute hitrate in percent to our representation. */
100 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
102 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103 static const struct predictor_info predictor_info
[]= {
104 #include "predict.def"
106 /* Upper bound on predictors. */
111 /* Return TRUE if frequency FREQ is considered to be hot. */
114 maybe_hot_frequency_p (int freq
)
116 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
117 if (!profile_info
|| !flag_branch_probabilities
)
119 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
121 if (node
->frequency
== NODE_FREQUENCY_HOT
)
124 if (profile_status
== PROFILE_ABSENT
)
126 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
127 && freq
<= (ENTRY_BLOCK_PTR
->frequency
* 2 / 3))
129 if (freq
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
134 /* Return TRUE if frequency FREQ is considered to be hot. */
137 maybe_hot_count_p (gcov_type count
)
139 if (profile_status
!= PROFILE_READ
)
141 /* Code executed at most once is not hot. */
142 if (profile_info
->runs
>= count
)
145 > profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
));
148 /* Return true in case BB can be CPU intensive and should be optimized
149 for maximal performance. */
152 maybe_hot_bb_p (const_basic_block bb
)
154 if (profile_status
== PROFILE_READ
)
155 return maybe_hot_count_p (bb
->count
);
156 return maybe_hot_frequency_p (bb
->frequency
);
159 /* Return true if the call can be hot. */
162 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
164 if (profile_info
&& flag_branch_probabilities
166 <= profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
168 if (edge
->caller
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
169 || edge
->callee
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
173 if (edge
->caller
->frequency
== NODE_FREQUENCY_HOT
)
175 if (edge
->caller
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
176 && edge
->frequency
< CGRAPH_FREQ_BASE
* 3 / 2)
178 if (flag_guess_branch_prob
179 && edge
->frequency
<= (CGRAPH_FREQ_BASE
180 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
185 /* Return true in case BB can be CPU intensive and should be optimized
186 for maximal performance. */
189 maybe_hot_edge_p (edge e
)
191 if (profile_status
== PROFILE_READ
)
192 return maybe_hot_count_p (e
->count
);
193 return maybe_hot_frequency_p (EDGE_FREQUENCY (e
));
196 /* Return true in case BB is probably never executed. */
198 probably_never_executed_bb_p (const_basic_block bb
)
200 if (profile_info
&& flag_branch_probabilities
)
201 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
202 if ((!profile_info
|| !flag_branch_probabilities
)
203 && cgraph_node (current_function_decl
)->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
208 /* Return true when current function should always be optimized for size. */
211 optimize_function_for_size_p (struct function
*fun
)
213 return (optimize_size
215 && (cgraph_node (fun
->decl
)->frequency
216 == NODE_FREQUENCY_UNLIKELY_EXECUTED
)));
219 /* Return true when current function should always be optimized for speed. */
222 optimize_function_for_speed_p (struct function
*fun
)
224 return !optimize_function_for_size_p (fun
);
227 /* Return TRUE when BB should be optimized for size. */
230 optimize_bb_for_size_p (const_basic_block bb
)
232 return optimize_function_for_size_p (cfun
) || !maybe_hot_bb_p (bb
);
235 /* Return TRUE when BB should be optimized for speed. */
238 optimize_bb_for_speed_p (const_basic_block bb
)
240 return !optimize_bb_for_size_p (bb
);
243 /* Return TRUE when BB should be optimized for size. */
246 optimize_edge_for_size_p (edge e
)
248 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
251 /* Return TRUE when BB should be optimized for speed. */
254 optimize_edge_for_speed_p (edge e
)
256 return !optimize_edge_for_size_p (e
);
259 /* Return TRUE when BB should be optimized for size. */
262 optimize_insn_for_size_p (void)
264 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
267 /* Return TRUE when BB should be optimized for speed. */
270 optimize_insn_for_speed_p (void)
272 return !optimize_insn_for_size_p ();
275 /* Return TRUE when LOOP should be optimized for size. */
278 optimize_loop_for_size_p (struct loop
*loop
)
280 return optimize_bb_for_size_p (loop
->header
);
283 /* Return TRUE when LOOP should be optimized for speed. */
286 optimize_loop_for_speed_p (struct loop
*loop
)
288 return optimize_bb_for_speed_p (loop
->header
);
291 /* Return TRUE when LOOP nest should be optimized for speed. */
294 optimize_loop_nest_for_speed_p (struct loop
*loop
)
296 struct loop
*l
= loop
;
297 if (optimize_loop_for_speed_p (loop
))
300 while (l
&& l
!= loop
)
302 if (optimize_loop_for_speed_p (l
))
310 while (l
!= loop
&& !l
->next
)
319 /* Return TRUE when LOOP nest should be optimized for size. */
322 optimize_loop_nest_for_size_p (struct loop
*loop
)
324 return !optimize_loop_nest_for_speed_p (loop
);
327 /* Return true when edge E is likely to be well predictable by branch
331 predictable_edge_p (edge e
)
333 if (profile_status
== PROFILE_ABSENT
)
336 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
337 || (REG_BR_PROB_BASE
- e
->probability
338 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
344 /* Set RTL expansion for BB profile. */
347 rtl_profile_for_bb (basic_block bb
)
349 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (bb
);
352 /* Set RTL expansion for edge profile. */
355 rtl_profile_for_edge (edge e
)
357 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
360 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
362 default_rtl_profile (void)
364 crtl
->maybe_hot_insn_p
= true;
367 /* Return true if the one of outgoing edges is already predicted by
371 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
374 if (!INSN_P (BB_END (bb
)))
376 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
377 if (REG_NOTE_KIND (note
) == REG_BR_PRED
378 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
383 /* This map contains for a basic block the list of predictions for the
386 static struct pointer_map_t
*bb_predictions
;
388 /* Return true if the one of outgoing edges is already predicted by
392 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
394 struct edge_prediction
*i
;
395 void **preds
= pointer_map_contains (bb_predictions
, bb
);
400 for (i
= (struct edge_prediction
*) *preds
; i
; i
= i
->ep_next
)
401 if (i
->ep_predictor
== predictor
)
406 /* Return true when the probability of edge is reliable.
408 The profile guessing code is good at predicting branch outcome (ie.
409 taken/not taken), that is predicted right slightly over 75% of time.
410 It is however notoriously poor on predicting the probability itself.
411 In general the profile appear a lot flatter (with probabilities closer
412 to 50%) than the reality so it is bad idea to use it to drive optimization
413 such as those disabling dynamic branch prediction for well predictable
416 There are two exceptions - edges leading to noreturn edges and edges
417 predicted by number of iterations heuristics are predicted well. This macro
418 should be able to distinguish those, but at the moment it simply check for
419 noreturn heuristic that is only one giving probability over 99% or bellow
420 1%. In future we might want to propagate reliability information across the
421 CFG if we find this information useful on multiple places. */
423 probability_reliable_p (int prob
)
425 return (profile_status
== PROFILE_READ
426 || (profile_status
== PROFILE_GUESSED
427 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
430 /* Same predicate as above, working on edges. */
432 edge_probability_reliable_p (const_edge e
)
434 return probability_reliable_p (e
->probability
);
437 /* Same predicate as edge_probability_reliable_p, working on notes. */
439 br_prob_note_reliable_p (const_rtx note
)
441 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
442 return probability_reliable_p (INTVAL (XEXP (note
, 0)));
446 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
448 gcc_assert (any_condjump_p (insn
));
449 if (!flag_guess_branch_prob
)
452 add_reg_note (insn
, REG_BR_PRED
,
453 gen_rtx_CONCAT (VOIDmode
,
454 GEN_INT ((int) predictor
),
455 GEN_INT ((int) probability
)));
458 /* Predict insn by given predictor. */
461 predict_insn_def (rtx insn
, enum br_predictor predictor
,
462 enum prediction taken
)
464 int probability
= predictor_info
[(int) predictor
].hitrate
;
467 probability
= REG_BR_PROB_BASE
- probability
;
469 predict_insn (insn
, predictor
, probability
);
472 /* Predict edge E with given probability if possible. */
475 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
478 last_insn
= BB_END (e
->src
);
480 /* We can store the branch prediction information only about
481 conditional jumps. */
482 if (!any_condjump_p (last_insn
))
485 /* We always store probability of branching. */
486 if (e
->flags
& EDGE_FALLTHRU
)
487 probability
= REG_BR_PROB_BASE
- probability
;
489 predict_insn (last_insn
, predictor
, probability
);
492 /* Predict edge E with the given PROBABILITY. */
494 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
496 gcc_assert (profile_status
!= PROFILE_GUESSED
);
497 if ((e
->src
!= ENTRY_BLOCK_PTR
&& EDGE_COUNT (e
->src
->succs
) > 1)
498 && flag_guess_branch_prob
&& optimize
)
500 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
501 void **preds
= pointer_map_insert (bb_predictions
, e
->src
);
503 i
->ep_next
= (struct edge_prediction
*) *preds
;
505 i
->ep_probability
= probability
;
506 i
->ep_predictor
= predictor
;
511 /* Remove all predictions on given basic block that are attached
514 remove_predictions_associated_with_edge (edge e
)
521 preds
= pointer_map_contains (bb_predictions
, e
->src
);
525 struct edge_prediction
**prediction
= (struct edge_prediction
**) preds
;
526 struct edge_prediction
*next
;
530 if ((*prediction
)->ep_edge
== e
)
532 next
= (*prediction
)->ep_next
;
537 prediction
= &((*prediction
)->ep_next
);
542 /* Clears the list of predictions stored for BB. */
545 clear_bb_predictions (basic_block bb
)
547 void **preds
= pointer_map_contains (bb_predictions
, bb
);
548 struct edge_prediction
*pred
, *next
;
553 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= next
)
555 next
= pred
->ep_next
;
561 /* Return true when we can store prediction on insn INSN.
562 At the moment we represent predictions only on conditional
563 jumps, not at computed jump or other complicated cases. */
565 can_predict_insn_p (const_rtx insn
)
567 return (JUMP_P (insn
)
568 && any_condjump_p (insn
)
569 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
572 /* Predict edge E by given predictor if possible. */
575 predict_edge_def (edge e
, enum br_predictor predictor
,
576 enum prediction taken
)
578 int probability
= predictor_info
[(int) predictor
].hitrate
;
581 probability
= REG_BR_PROB_BASE
- probability
;
583 predict_edge (e
, predictor
, probability
);
586 /* Invert all branch predictions or probability notes in the INSN. This needs
587 to be done each time we invert the condition used by the jump. */
590 invert_br_probabilities (rtx insn
)
594 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
595 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
596 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
597 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
598 XEXP (XEXP (note
, 0), 1)
599 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
602 /* Dump information about the branch prediction to the output file. */
605 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
606 basic_block bb
, int used
)
614 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
615 if (! (e
->flags
& EDGE_FALLTHRU
))
618 fprintf (file
, " %s heuristics%s: %.1f%%",
619 predictor_info
[predictor
].name
,
620 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
624 fprintf (file
, " exec ");
625 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
628 fprintf (file
, " hit ");
629 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
630 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
634 fprintf (file
, "\n");
637 /* We can not predict the probabilities of outgoing edges of bb. Set them
638 evenly and hope for the best. */
640 set_even_probabilities (basic_block bb
)
646 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
647 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
649 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
650 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
651 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
656 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
657 note if not already present. Remove now useless REG_BR_PRED notes. */
660 combine_predictions_for_insn (rtx insn
, basic_block bb
)
665 int best_probability
= PROB_EVEN
;
666 enum br_predictor best_predictor
= END_PREDICTORS
;
667 int combined_probability
= REG_BR_PROB_BASE
/ 2;
669 bool first_match
= false;
672 if (!can_predict_insn_p (insn
))
674 set_even_probabilities (bb
);
678 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
679 pnote
= ®_NOTES (insn
);
681 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
684 /* We implement "first match" heuristics and use probability guessed
685 by predictor with smallest index. */
686 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
687 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
689 enum br_predictor predictor
= ((enum br_predictor
)
690 INTVAL (XEXP (XEXP (note
, 0), 0)));
691 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
694 if (best_predictor
> predictor
)
695 best_probability
= probability
, best_predictor
= predictor
;
697 d
= (combined_probability
* probability
698 + (REG_BR_PROB_BASE
- combined_probability
)
699 * (REG_BR_PROB_BASE
- probability
));
701 /* Use FP math to avoid overflows of 32bit integers. */
703 /* If one probability is 0% and one 100%, avoid division by zero. */
704 combined_probability
= REG_BR_PROB_BASE
/ 2;
706 combined_probability
= (((double) combined_probability
) * probability
707 * REG_BR_PROB_BASE
/ d
+ 0.5);
710 /* Decide which heuristic to use. In case we didn't match anything,
711 use no_prediction heuristic, in case we did match, use either
712 first match or Dempster-Shaffer theory depending on the flags. */
714 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
718 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
719 combined_probability
, bb
, true);
722 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
724 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
729 combined_probability
= best_probability
;
730 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
734 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
736 enum br_predictor predictor
= ((enum br_predictor
)
737 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
738 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
740 dump_prediction (dump_file
, predictor
, probability
, bb
,
741 !first_match
|| best_predictor
== predictor
);
742 *pnote
= XEXP (*pnote
, 1);
745 pnote
= &XEXP (*pnote
, 1);
750 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (combined_probability
));
752 /* Save the prediction into CFG in case we are seeing non-degenerated
754 if (!single_succ_p (bb
))
756 BRANCH_EDGE (bb
)->probability
= combined_probability
;
757 FALLTHRU_EDGE (bb
)->probability
758 = REG_BR_PROB_BASE
- combined_probability
;
761 else if (!single_succ_p (bb
))
763 int prob
= INTVAL (XEXP (prob_note
, 0));
765 BRANCH_EDGE (bb
)->probability
= prob
;
766 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
769 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
772 /* Combine predictions into single probability and store them into CFG.
773 Remove now useless prediction entries. */
776 combine_predictions_for_bb (basic_block bb
)
778 int best_probability
= PROB_EVEN
;
779 enum br_predictor best_predictor
= END_PREDICTORS
;
780 int combined_probability
= REG_BR_PROB_BASE
/ 2;
782 bool first_match
= false;
784 struct edge_prediction
*pred
;
786 edge e
, first
= NULL
, second
= NULL
;
790 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
791 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
794 if (first
&& !second
)
800 /* When there is no successor or only one choice, prediction is easy.
802 We are lazy for now and predict only basic blocks with two outgoing
803 edges. It is possible to predict generic case too, but we have to
804 ignore first match heuristics and do more involved combining. Implement
809 set_even_probabilities (bb
);
810 clear_bb_predictions (bb
);
812 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
818 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
820 preds
= pointer_map_contains (bb_predictions
, bb
);
823 /* We implement "first match" heuristics and use probability guessed
824 by predictor with smallest index. */
825 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
827 enum br_predictor predictor
= pred
->ep_predictor
;
828 int probability
= pred
->ep_probability
;
830 if (pred
->ep_edge
!= first
)
831 probability
= REG_BR_PROB_BASE
- probability
;
834 /* First match heuristics would be widly confused if we predicted
836 if (best_predictor
> predictor
)
838 struct edge_prediction
*pred2
;
839 int prob
= probability
;
841 for (pred2
= (struct edge_prediction
*) *preds
; pred2
; pred2
= pred2
->ep_next
)
842 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
844 int probability2
= pred
->ep_probability
;
846 if (pred2
->ep_edge
!= first
)
847 probability2
= REG_BR_PROB_BASE
- probability2
;
849 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
850 (probability2
< REG_BR_PROB_BASE
/ 2))
853 /* If the same predictor later gave better result, go for it! */
854 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
855 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
859 best_probability
= prob
, best_predictor
= predictor
;
862 d
= (combined_probability
* probability
863 + (REG_BR_PROB_BASE
- combined_probability
)
864 * (REG_BR_PROB_BASE
- probability
));
866 /* Use FP math to avoid overflows of 32bit integers. */
868 /* If one probability is 0% and one 100%, avoid division by zero. */
869 combined_probability
= REG_BR_PROB_BASE
/ 2;
871 combined_probability
= (((double) combined_probability
)
873 * REG_BR_PROB_BASE
/ d
+ 0.5);
877 /* Decide which heuristic to use. In case we didn't match anything,
878 use no_prediction heuristic, in case we did match, use either
879 first match or Dempster-Shaffer theory depending on the flags. */
881 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
885 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
888 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
890 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
895 combined_probability
= best_probability
;
896 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
900 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
902 enum br_predictor predictor
= pred
->ep_predictor
;
903 int probability
= pred
->ep_probability
;
905 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
906 probability
= REG_BR_PROB_BASE
- probability
;
907 dump_prediction (dump_file
, predictor
, probability
, bb
,
908 !first_match
|| best_predictor
== predictor
);
911 clear_bb_predictions (bb
);
915 first
->probability
= combined_probability
;
916 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
920 /* Predict edge probabilities by exploiting loop structure. */
928 /* Try to predict out blocks in a loop that are not part of a
930 FOR_EACH_LOOP (li
, loop
, 0)
932 basic_block bb
, *bbs
;
934 VEC (edge
, heap
) *exits
;
935 struct tree_niter_desc niter_desc
;
938 exits
= get_loop_exit_edges (loop
);
939 n_exits
= VEC_length (edge
, exits
);
941 for (j
= 0; VEC_iterate (edge
, exits
, j
, ex
); j
++)
944 HOST_WIDE_INT nitercst
;
945 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
947 enum br_predictor predictor
;
949 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false))
950 niter
= niter_desc
.niter
;
951 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
952 niter
= loop_niter_by_eval (loop
, ex
);
954 if (TREE_CODE (niter
) == INTEGER_CST
)
956 if (host_integerp (niter
, 1)
957 && compare_tree_int (niter
, max
-1) == -1)
958 nitercst
= tree_low_cst (niter
, 1) + 1;
961 predictor
= PRED_LOOP_ITERATIONS
;
963 /* If we have just one exit and we can derive some information about
964 the number of iterations of the loop from the statements inside
965 the loop, use it to predict this exit. */
966 else if (n_exits
== 1)
968 nitercst
= estimated_loop_iterations_int (loop
, false);
974 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
979 probability
= ((REG_BR_PROB_BASE
+ nitercst
/ 2) / nitercst
);
980 predict_edge (ex
, predictor
, probability
);
982 VEC_free (edge
, heap
, exits
);
984 bbs
= get_loop_body (loop
);
986 for (j
= 0; j
< loop
->num_nodes
; j
++)
988 int header_found
= 0;
994 /* Bypass loop heuristics on continue statement. These
995 statements construct loops via "non-loop" constructs
996 in the source language and are better to be handled
998 if (predicted_by_p (bb
, PRED_CONTINUE
))
1001 /* Loop branch heuristics - predict an edge back to a
1002 loop's head as taken. */
1003 if (bb
== loop
->latch
)
1005 e
= find_edge (loop
->latch
, loop
->header
);
1009 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
1013 /* Loop exit heuristics - predict an edge exiting the loop if the
1014 conditional has no loop header successors as not taken. */
1016 /* If we already used more reliable loop exit predictors, do not
1017 bother with PRED_LOOP_EXIT. */
1018 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1019 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS
))
1021 /* For loop with many exits we don't want to predict all exits
1022 with the pretty large probability, because if all exits are
1023 considered in row, the loop would be predicted to iterate
1024 almost never. The code to divide probability by number of
1025 exits is very rough. It should compute the number of exits
1026 taken in each patch through function (not the overall number
1027 of exits that might be a lot higher for loops with wide switch
1028 statements in them) and compute n-th square root.
1030 We limit the minimal probability by 2% to avoid
1031 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1032 as this was causing regression in perl benchmark containing such
1035 int probability
= ((REG_BR_PROB_BASE
1036 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
1038 if (probability
< HITRATE (2))
1039 probability
= HITRATE (2);
1040 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1041 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1042 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1043 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1047 /* Free basic blocks from get_loop_body. */
1052 /* Attempt to predict probabilities of BB outgoing edges using local
1055 bb_estimate_probability_locally (basic_block bb
)
1057 rtx last_insn
= BB_END (bb
);
1060 if (! can_predict_insn_p (last_insn
))
1062 cond
= get_condition (last_insn
, NULL
, false, false);
1066 /* Try "pointer heuristic."
1067 A comparison ptr == 0 is predicted as false.
1068 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1069 if (COMPARISON_P (cond
)
1070 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1071 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1073 if (GET_CODE (cond
) == EQ
)
1074 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1075 else if (GET_CODE (cond
) == NE
)
1076 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1080 /* Try "opcode heuristic."
1081 EQ tests are usually false and NE tests are usually true. Also,
1082 most quantities are positive, so we can make the appropriate guesses
1083 about signed comparisons against zero. */
1084 switch (GET_CODE (cond
))
1087 /* Unconditional branch. */
1088 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1089 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1094 /* Floating point comparisons appears to behave in a very
1095 unpredictable way because of special role of = tests in
1097 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1099 /* Comparisons with 0 are often used for booleans and there is
1100 nothing useful to predict about them. */
1101 else if (XEXP (cond
, 1) == const0_rtx
1102 || XEXP (cond
, 0) == const0_rtx
)
1105 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1110 /* Floating point comparisons appears to behave in a very
1111 unpredictable way because of special role of = tests in
1113 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1115 /* Comparisons with 0 are often used for booleans and there is
1116 nothing useful to predict about them. */
1117 else if (XEXP (cond
, 1) == const0_rtx
1118 || XEXP (cond
, 0) == const0_rtx
)
1121 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1125 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1129 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1134 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1135 || XEXP (cond
, 1) == constm1_rtx
)
1136 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1141 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1142 || XEXP (cond
, 1) == constm1_rtx
)
1143 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1151 /* Set edge->probability for each successor edge of BB. */
1153 guess_outgoing_edge_probabilities (basic_block bb
)
1155 bb_estimate_probability_locally (bb
);
1156 combine_predictions_for_insn (BB_END (bb
), bb
);
1159 static tree
expr_expected_value (tree
, bitmap
);
1161 /* Helper function for expr_expected_value. */
1164 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
, tree op1
, bitmap visited
)
1168 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1170 if (TREE_CONSTANT (op0
))
1173 if (code
!= SSA_NAME
)
1176 def
= SSA_NAME_DEF_STMT (op0
);
1178 /* If we were already here, break the infinite cycle. */
1179 if (bitmap_bit_p (visited
, SSA_NAME_VERSION (op0
)))
1181 bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
));
1183 if (gimple_code (def
) == GIMPLE_PHI
)
1185 /* All the arguments of the PHI node must have the same constant
1187 int i
, n
= gimple_phi_num_args (def
);
1188 tree val
= NULL
, new_val
;
1190 for (i
= 0; i
< n
; i
++)
1192 tree arg
= PHI_ARG_DEF (def
, i
);
1194 /* If this PHI has itself as an argument, we cannot
1195 determine the string length of this argument. However,
1196 if we can find an expected constant value for the other
1197 PHI args then we can still be sure that this is
1198 likely a constant. So be optimistic and just
1199 continue with the next argument. */
1200 if (arg
== PHI_RESULT (def
))
1203 new_val
= expr_expected_value (arg
, visited
);
1208 else if (!operand_equal_p (val
, new_val
, false))
1213 if (is_gimple_assign (def
))
1215 if (gimple_assign_lhs (def
) != op0
)
1218 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1219 gimple_assign_rhs1 (def
),
1220 gimple_assign_rhs_code (def
),
1221 gimple_assign_rhs2 (def
),
1225 if (is_gimple_call (def
))
1227 tree decl
= gimple_call_fndecl (def
);
1230 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
1231 && DECL_FUNCTION_CODE (decl
) == BUILT_IN_EXPECT
)
1235 if (gimple_call_num_args (def
) != 2)
1237 val
= gimple_call_arg (def
, 0);
1238 if (TREE_CONSTANT (val
))
1240 return gimple_call_arg (def
, 1);
1247 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1250 op0
= expr_expected_value (op0
, visited
);
1253 op1
= expr_expected_value (op1
, visited
);
1256 res
= fold_build2 (code
, type
, op0
, op1
);
1257 if (TREE_CONSTANT (res
))
1261 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1264 op0
= expr_expected_value (op0
, visited
);
1267 res
= fold_build1 (code
, type
, op0
);
1268 if (TREE_CONSTANT (res
))
1275 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1276 The function is used by builtin_expect branch predictor so the evidence
1277 must come from this construct and additional possible constant folding.
1279 We may want to implement more involved value guess (such as value range
1280 propagation based prediction), but such tricks shall go to new
1284 expr_expected_value (tree expr
, bitmap visited
)
1286 enum tree_code code
;
1289 if (TREE_CONSTANT (expr
))
1292 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1293 return expr_expected_value_1 (TREE_TYPE (expr
),
1294 op0
, code
, op1
, visited
);
1298 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1299 we no longer need. */
1301 strip_predict_hints (void)
1309 gimple_stmt_iterator bi
;
1310 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
1312 gimple stmt
= gsi_stmt (bi
);
1314 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1316 gsi_remove (&bi
, true);
1319 else if (gimple_code (stmt
) == GIMPLE_CALL
)
1321 tree fndecl
= gimple_call_fndecl (stmt
);
1324 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
1325 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
1326 && gimple_call_num_args (stmt
) == 2)
1328 var
= gimple_call_lhs (stmt
);
1329 ass_stmt
= gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
1331 gsi_replace (&bi
, ass_stmt
, true);
1340 /* Predict using opcode of the last statement in basic block. */
1342 tree_predict_by_opcode (basic_block bb
)
1344 gimple stmt
= last_stmt (bb
);
1353 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1355 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1356 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1358 op0
= gimple_cond_lhs (stmt
);
1359 op1
= gimple_cond_rhs (stmt
);
1360 cmp
= gimple_cond_code (stmt
);
1361 type
= TREE_TYPE (op0
);
1362 visited
= BITMAP_ALLOC (NULL
);
1363 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
);
1364 BITMAP_FREE (visited
);
1367 if (integer_zerop (val
))
1368 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, NOT_TAKEN
);
1370 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, TAKEN
);
1373 /* Try "pointer heuristic."
1374 A comparison ptr == 0 is predicted as false.
1375 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1376 if (POINTER_TYPE_P (type
))
1379 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
1380 else if (cmp
== NE_EXPR
)
1381 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
1385 /* Try "opcode heuristic."
1386 EQ tests are usually false and NE tests are usually true. Also,
1387 most quantities are positive, so we can make the appropriate guesses
1388 about signed comparisons against zero. */
1393 /* Floating point comparisons appears to behave in a very
1394 unpredictable way because of special role of = tests in
1396 if (FLOAT_TYPE_P (type
))
1398 /* Comparisons with 0 are often used for booleans and there is
1399 nothing useful to predict about them. */
1400 else if (integer_zerop (op0
) || integer_zerop (op1
))
1403 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
1408 /* Floating point comparisons appears to behave in a very
1409 unpredictable way because of special role of = tests in
1411 if (FLOAT_TYPE_P (type
))
1413 /* Comparisons with 0 are often used for booleans and there is
1414 nothing useful to predict about them. */
1415 else if (integer_zerop (op0
)
1416 || integer_zerop (op1
))
1419 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
1423 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
1426 case UNORDERED_EXPR
:
1427 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
1432 if (integer_zerop (op1
)
1433 || integer_onep (op1
)
1434 || integer_all_onesp (op1
)
1437 || real_minus_onep (op1
))
1438 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
1443 if (integer_zerop (op1
)
1444 || integer_onep (op1
)
1445 || integer_all_onesp (op1
)
1448 || real_minus_onep (op1
))
1449 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
1457 /* Try to guess whether the value of return means error code. */
1459 static enum br_predictor
1460 return_prediction (tree val
, enum prediction
*prediction
)
1464 return PRED_NO_PREDICTION
;
1465 /* Different heuristics for pointers and scalars. */
1466 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1468 /* NULL is usually not returned. */
1469 if (integer_zerop (val
))
1471 *prediction
= NOT_TAKEN
;
1472 return PRED_NULL_RETURN
;
1475 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1477 /* Negative return values are often used to indicate
1479 if (TREE_CODE (val
) == INTEGER_CST
1480 && tree_int_cst_sgn (val
) < 0)
1482 *prediction
= NOT_TAKEN
;
1483 return PRED_NEGATIVE_RETURN
;
1485 /* Constant return values seems to be commonly taken.
1486 Zero/one often represent booleans so exclude them from the
1488 if (TREE_CONSTANT (val
)
1489 && (!integer_zerop (val
) && !integer_onep (val
)))
1491 *prediction
= TAKEN
;
1492 return PRED_CONST_RETURN
;
1495 return PRED_NO_PREDICTION
;
1498 /* Find the basic block with return expression and look up for possible
1499 return value trying to apply RETURN_PREDICTION heuristics. */
1501 apply_return_prediction (void)
1503 gimple return_stmt
= NULL
;
1507 int phi_num_args
, i
;
1508 enum br_predictor pred
;
1509 enum prediction direction
;
1512 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
1514 return_stmt
= last_stmt (e
->src
);
1516 && gimple_code (return_stmt
) == GIMPLE_RETURN
)
1521 return_val
= gimple_return_retval (return_stmt
);
1524 if (TREE_CODE (return_val
) != SSA_NAME
1525 || !SSA_NAME_DEF_STMT (return_val
)
1526 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
1528 phi
= SSA_NAME_DEF_STMT (return_val
);
1529 phi_num_args
= gimple_phi_num_args (phi
);
1530 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
1532 /* Avoid the degenerate case where all return values form the function
1533 belongs to same category (ie they are all positive constants)
1534 so we can hardly say something about them. */
1535 for (i
= 1; i
< phi_num_args
; i
++)
1536 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
1538 if (i
!= phi_num_args
)
1539 for (i
= 0; i
< phi_num_args
; i
++)
1541 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
1542 if (pred
!= PRED_NO_PREDICTION
)
1543 predict_paths_leading_to (gimple_phi_arg_edge (phi
, i
)->src
, pred
,
1548 /* Look for basic block that contains unlikely to happen events
1549 (such as noreturn calls) and mark all paths leading to execution
1550 of this basic blocks as unlikely. */
1553 tree_bb_level_predictions (void)
1556 bool has_return_edges
= false;
1560 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
1561 if (!(e
->flags
& (EDGE_ABNORMAL
| EDGE_FAKE
| EDGE_EH
)))
1563 has_return_edges
= true;
1567 apply_return_prediction ();
1571 gimple_stmt_iterator gsi
;
1573 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1575 gimple stmt
= gsi_stmt (gsi
);
1578 if (is_gimple_call (stmt
))
1580 if ((gimple_call_flags (stmt
) & ECF_NORETURN
)
1581 && has_return_edges
)
1582 predict_paths_leading_to (bb
, PRED_NORETURN
,
1584 decl
= gimple_call_fndecl (stmt
);
1586 && lookup_attribute ("cold",
1587 DECL_ATTRIBUTES (decl
)))
1588 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
1591 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1593 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
1594 gimple_predict_outcome (stmt
));
1595 /* Keep GIMPLE_PREDICT around so early inlining will propagate
1596 hints to callers. */
1602 #ifdef ENABLE_CHECKING
1604 /* Callback for pointer_map_traverse, asserts that the pointer map is
1608 assert_is_empty (const void *key ATTRIBUTE_UNUSED
, void **value
,
1609 void *data ATTRIBUTE_UNUSED
)
1611 gcc_assert (!*value
);
1616 /* Predict branch probabilities and estimate profile for basic block BB. */
1619 tree_estimate_probability_bb (basic_block bb
)
1625 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1627 /* Predict early returns to be probable, as we've already taken
1628 care for error returns and other cases are often used for
1629 fast paths through function.
1631 Since we've already removed the return statements, we are
1632 looking for CFG like:
1642 if (e
->dest
!= bb
->next_bb
1643 && e
->dest
!= EXIT_BLOCK_PTR
1644 && single_succ_p (e
->dest
)
1645 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR
1646 && (last
= last_stmt (e
->dest
)) != NULL
1647 && gimple_code (last
) == GIMPLE_RETURN
)
1652 if (single_succ_p (bb
))
1654 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
1655 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
1656 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
1657 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
1658 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
1661 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
1662 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
1663 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
1664 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
1667 /* Look for block we are guarding (ie we dominate it,
1668 but it doesn't postdominate us). */
1669 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
1670 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
1671 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
1673 gimple_stmt_iterator bi
;
1675 /* The call heuristic claims that a guarded function call
1676 is improbable. This is because such calls are often used
1677 to signal exceptional situations such as printing error
1679 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
1682 gimple stmt
= gsi_stmt (bi
);
1683 if (is_gimple_call (stmt
)
1684 /* Constant and pure calls are hardly used to signalize
1685 something exceptional. */
1686 && gimple_has_side_effects (stmt
))
1688 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
1694 tree_predict_by_opcode (bb
);
1697 /* Predict branch probabilities and estimate profile of the tree CFG.
1698 This function can be called from the loop optimizers to recompute
1699 the profile information. */
1702 tree_estimate_probability (void)
1706 add_noreturn_fake_exit_edges ();
1707 connect_infinite_loops_to_exit ();
1708 /* We use loop_niter_by_eval, which requires that the loops have
1710 create_preheaders (CP_SIMPLE_PREHEADERS
);
1711 calculate_dominance_info (CDI_POST_DOMINATORS
);
1713 bb_predictions
= pointer_map_create ();
1714 tree_bb_level_predictions ();
1715 record_loop_exits ();
1717 if (number_of_loops () > 1)
1721 tree_estimate_probability_bb (bb
);
1724 combine_predictions_for_bb (bb
);
1726 #ifdef ENABLE_CHECKING
1727 pointer_map_traverse (bb_predictions
, assert_is_empty
, NULL
);
1729 pointer_map_destroy (bb_predictions
);
1730 bb_predictions
= NULL
;
1732 estimate_bb_frequencies ();
1733 free_dominance_info (CDI_POST_DOMINATORS
);
1734 remove_fake_exit_edges ();
1737 /* Predict branch probabilities and estimate profile of the tree CFG.
1738 This is the driver function for PASS_PROFILE. */
1741 tree_estimate_probability_driver (void)
1745 loop_optimizer_init (0);
1746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1747 flow_loops_dump (dump_file
, NULL
, 0);
1749 mark_irreducible_loops ();
1751 nb_loops
= number_of_loops ();
1755 tree_estimate_probability ();
1760 loop_optimizer_finalize ();
1761 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1762 gimple_dump_cfg (dump_file
, dump_flags
);
1763 if (profile_status
== PROFILE_ABSENT
)
1764 profile_status
= PROFILE_GUESSED
;
1768 /* Predict edges to successors of CUR whose sources are not postdominated by
1769 BB by PRED and recurse to all postdominators. */
1772 predict_paths_for_bb (basic_block cur
, basic_block bb
,
1773 enum br_predictor pred
,
1774 enum prediction taken
)
1780 /* We are looking for all edges forming edge cut induced by
1781 set of all blocks postdominated by BB. */
1782 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
1783 if (e
->src
->index
>= NUM_FIXED_BLOCKS
1784 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
1786 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
1787 predict_edge_def (e
, pred
, taken
);
1789 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
1791 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
1792 predict_paths_for_bb (son
, bb
, pred
, taken
);
1795 /* Sets branch probabilities according to PREDiction and
1799 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
1800 enum prediction taken
)
1802 predict_paths_for_bb (bb
, bb
, pred
, taken
);
1805 /* This is used to carry information about basic blocks. It is
1806 attached to the AUX field of the standard CFG block. */
1808 typedef struct block_info_def
1810 /* Estimated frequency of execution of basic_block. */
1813 /* To keep queue of basic blocks to process. */
1816 /* Number of predecessors we need to visit first. */
1820 /* Similar information for edges. */
1821 typedef struct edge_info_def
1823 /* In case edge is a loopback edge, the probability edge will be reached
1824 in case header is. Estimated number of iterations of the loop can be
1825 then computed as 1 / (1 - back_edge_prob). */
1826 sreal back_edge_prob
;
1827 /* True if the edge is a loopback edge in the natural loop. */
1828 unsigned int back_edge
:1;
1831 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1832 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1834 /* Helper function for estimate_bb_frequencies.
1835 Propagate the frequencies in blocks marked in
1836 TOVISIT, starting in HEAD. */
1839 propagate_freq (basic_block head
, bitmap tovisit
)
1848 /* For each basic block we need to visit count number of his predecessors
1849 we need to visit first. */
1850 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
1855 /* The outermost "loop" includes the exit block, which we can not
1856 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1857 directly. Do the same for the entry block. */
1858 bb
= BASIC_BLOCK (i
);
1860 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1862 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
1864 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
1866 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
1868 "Irreducible region hit, ignoring edge to %i->%i\n",
1869 e
->src
->index
, bb
->index
);
1871 BLOCK_INFO (bb
)->npredecessors
= count
;
1874 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
1876 for (bb
= head
; bb
; bb
= nextbb
)
1879 sreal cyclic_probability
, frequency
;
1881 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
1882 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
1884 nextbb
= BLOCK_INFO (bb
)->next
;
1885 BLOCK_INFO (bb
)->next
= NULL
;
1887 /* Compute frequency of basic block. */
1890 #ifdef ENABLE_CHECKING
1891 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1892 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
1893 || (e
->flags
& EDGE_DFS_BACK
));
1896 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1897 if (EDGE_INFO (e
)->back_edge
)
1899 sreal_add (&cyclic_probability
, &cyclic_probability
,
1900 &EDGE_INFO (e
)->back_edge_prob
);
1902 else if (!(e
->flags
& EDGE_DFS_BACK
))
1906 /* frequency += (e->probability
1907 * BLOCK_INFO (e->src)->frequency /
1908 REG_BR_PROB_BASE); */
1910 sreal_init (&tmp
, e
->probability
, 0);
1911 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
1912 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
1913 sreal_add (&frequency
, &frequency
, &tmp
);
1916 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
1918 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
1919 sizeof (frequency
));
1923 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
1925 memcpy (&cyclic_probability
, &real_almost_one
,
1926 sizeof (real_almost_one
));
1929 /* BLOCK_INFO (bb)->frequency = frequency
1930 / (1 - cyclic_probability) */
1932 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
1933 sreal_div (&BLOCK_INFO (bb
)->frequency
,
1934 &frequency
, &cyclic_probability
);
1938 bitmap_clear_bit (tovisit
, bb
->index
);
1940 e
= find_edge (bb
, head
);
1945 /* EDGE_INFO (e)->back_edge_prob
1946 = ((e->probability * BLOCK_INFO (bb)->frequency)
1947 / REG_BR_PROB_BASE); */
1949 sreal_init (&tmp
, e
->probability
, 0);
1950 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1951 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1952 &tmp
, &real_inv_br_prob_base
);
1955 /* Propagate to successor blocks. */
1956 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1957 if (!(e
->flags
& EDGE_DFS_BACK
)
1958 && BLOCK_INFO (e
->dest
)->npredecessors
)
1960 BLOCK_INFO (e
->dest
)->npredecessors
--;
1961 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1966 BLOCK_INFO (last
)->next
= e
->dest
;
1974 /* Estimate probabilities of loopback edges in loops at same nest level. */
1977 estimate_loops_at_level (struct loop
*first_loop
)
1981 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1986 bitmap tovisit
= BITMAP_ALLOC (NULL
);
1988 estimate_loops_at_level (loop
->inner
);
1990 /* Find current loop back edge and mark it. */
1991 e
= loop_latch_edge (loop
);
1992 EDGE_INFO (e
)->back_edge
= 1;
1994 bbs
= get_loop_body (loop
);
1995 for (i
= 0; i
< loop
->num_nodes
; i
++)
1996 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
1998 propagate_freq (loop
->header
, tovisit
);
1999 BITMAP_FREE (tovisit
);
2003 /* Propagates frequencies through structure of loops. */
2006 estimate_loops (void)
2008 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2011 /* Start by estimating the frequencies in the loops. */
2012 if (number_of_loops () > 1)
2013 estimate_loops_at_level (current_loops
->tree_root
->inner
);
2015 /* Now propagate the frequencies through all the blocks. */
2018 bitmap_set_bit (tovisit
, bb
->index
);
2020 propagate_freq (ENTRY_BLOCK_PTR
, tovisit
);
2021 BITMAP_FREE (tovisit
);
2024 /* Convert counts measured by profile driven feedback to frequencies.
2025 Return nonzero iff there was any nonzero execution count. */
2028 counts_to_freqs (void)
2030 gcov_type count_max
, true_count_max
= 0;
2033 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2034 true_count_max
= MAX (bb
->count
, true_count_max
);
2036 count_max
= MAX (true_count_max
, 1);
2037 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2038 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
2040 return true_count_max
;
2043 /* Return true if function is likely to be expensive, so there is no point to
2044 optimize performance of prologue, epilogue or do inlining at the expense
2045 of code size growth. THRESHOLD is the limit of number of instructions
2046 function can execute at average to be still considered not expensive. */
2049 expensive_function_p (int threshold
)
2051 unsigned int sum
= 0;
2055 /* We can not compute accurately for large thresholds due to scaled
2057 gcc_assert (threshold
<= BB_FREQ_MAX
);
2059 /* Frequencies are out of range. This either means that function contains
2060 internal loop executing more than BB_FREQ_MAX times or profile feedback
2061 is available and function has not been executed at all. */
2062 if (ENTRY_BLOCK_PTR
->frequency
== 0)
2065 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2066 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
2071 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
2072 insn
= NEXT_INSN (insn
))
2073 if (active_insn_p (insn
))
2075 sum
+= bb
->frequency
;
2084 /* Estimate basic blocks frequency by given branch probabilities. */
2087 estimate_bb_frequencies (void)
2092 if (profile_status
!= PROFILE_READ
|| !counts_to_freqs ())
2094 static int real_values_initialized
= 0;
2096 if (!real_values_initialized
)
2098 real_values_initialized
= 1;
2099 sreal_init (&real_zero
, 0, 0);
2100 sreal_init (&real_one
, 1, 0);
2101 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
2102 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
2103 sreal_init (&real_one_half
, 1, -1);
2104 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
2105 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
2108 mark_dfs_back_edges ();
2110 single_succ_edge (ENTRY_BLOCK_PTR
)->probability
= REG_BR_PROB_BASE
;
2112 /* Set up block info for each basic block. */
2113 alloc_aux_for_blocks (sizeof (struct block_info_def
));
2114 alloc_aux_for_edges (sizeof (struct edge_info_def
));
2115 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2120 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2122 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
2123 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2124 &EDGE_INFO (e
)->back_edge_prob
,
2125 &real_inv_br_prob_base
);
2129 /* First compute probabilities locally for each loop from innermost
2130 to outermost to examine probabilities for back edges. */
2133 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
2135 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
2136 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
2138 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
2139 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2143 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
2144 sreal_add (&tmp
, &tmp
, &real_one_half
);
2145 bb
->frequency
= sreal_to_int (&tmp
);
2148 free_aux_for_blocks ();
2149 free_aux_for_edges ();
2151 compute_function_frequency ();
2152 if (flag_reorder_functions
)
2153 choose_function_section ();
2156 /* Decide whether function is hot, cold or unlikely executed. */
2158 compute_function_frequency (void)
2161 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2163 if (!profile_info
|| !flag_branch_probabilities
)
2165 int flags
= flags_from_decl_or_type (current_function_decl
);
2166 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
2168 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2169 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
2171 node
->frequency
= NODE_FREQUENCY_HOT
;
2172 else if (flags
& ECF_NORETURN
)
2173 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2174 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2175 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2176 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2177 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
2178 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2181 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2184 if (maybe_hot_bb_p (bb
))
2186 node
->frequency
= NODE_FREQUENCY_HOT
;
2189 if (!probably_never_executed_bb_p (bb
))
2190 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2194 /* Choose appropriate section for the function. */
2196 choose_function_section (void)
2198 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2199 if (DECL_SECTION_NAME (current_function_decl
)
2200 || !targetm
.have_named_sections
2201 /* Theoretically we can split the gnu.linkonce text section too,
2202 but this requires more work as the frequency needs to match
2203 for all generated objects so we need to merge the frequency
2204 of all instances. For now just never set frequency for these. */
2205 || DECL_ONE_ONLY (current_function_decl
))
2208 /* If we are doing the partitioning optimization, let the optimization
2209 choose the correct section into which to put things. */
2211 if (flag_reorder_blocks_and_partition
)
2214 if (node
->frequency
== NODE_FREQUENCY_HOT
)
2215 DECL_SECTION_NAME (current_function_decl
) =
2216 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
2217 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
2218 DECL_SECTION_NAME (current_function_decl
) =
2219 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
2220 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);
2224 gate_estimate_probability (void)
2226 return flag_guess_branch_prob
;
2229 /* Build PREDICT_EXPR. */
2231 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
2233 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
2234 build_int_cst (NULL
, predictor
));
2235 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
2240 predictor_name (enum br_predictor predictor
)
2242 return predictor_info
[predictor
].name
;
2245 struct gimple_opt_pass pass_profile
=
2249 "profile", /* name */
2250 gate_estimate_probability
, /* gate */
2251 tree_estimate_probability_driver
, /* execute */
2254 0, /* static_pass_number */
2255 TV_BRANCH_PROB
, /* tv_id */
2256 PROP_cfg
, /* properties_required */
2257 0, /* properties_provided */
2258 0, /* properties_destroyed */
2259 0, /* todo_flags_start */
2260 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2264 struct gimple_opt_pass pass_strip_predict_hints
=
2268 "*strip_predict_hints", /* name */
2270 strip_predict_hints
, /* execute */
2273 0, /* static_pass_number */
2274 TV_BRANCH_PROB
, /* tv_id */
2275 PROP_cfg
, /* properties_required */
2276 0, /* properties_provided */
2277 0, /* properties_destroyed */
2278 0, /* todo_flags_start */
2279 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */