1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
32 #include "coretypes.h"
37 #include "hard-reg-set.h"
38 #include "basic-block.h"
39 #include "insn-config.h"
44 #include "diagnostic-core.h"
53 #include "tree-flow.h"
55 #include "tree-pass.h"
56 #include "tree-scalar-evolution.h"
58 #include "pointer-set.h"
60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
61 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
62 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
63 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
65 /* Random guesstimation given names.
66 PROV_VERY_UNLIKELY should be small enough so basic block predicted
67 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73 static void combine_predictions_for_insn (rtx
, basic_block
);
74 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
75 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
76 static void predict_paths_leading_to_edge (edge
, enum br_predictor
, enum prediction
);
77 static bool can_predict_insn_p (const_rtx
);
79 /* Information we hold about each branch predictor.
80 Filled using information from predict.def. */
84 const char *const name
; /* Name used in the debugging dumps. */
85 const int hitrate
; /* Expected hitrate used by
86 predict_insn_def call. */
90 /* Use given predictor without Dempster-Shaffer theory if it matches
91 using first_match heuristics. */
92 #define PRED_FLAG_FIRST_MATCH 1
94 /* Recompute hitrate in percent to our representation. */
96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
99 static const struct predictor_info predictor_info
[]= {
100 #include "predict.def"
102 /* Upper bound on predictors. */
107 /* Return TRUE if frequency FREQ is considered to be hot. */
110 maybe_hot_frequency_p (struct function
*fun
, int freq
)
112 struct cgraph_node
*node
= cgraph_get_node (fun
->decl
);
113 if (!profile_info
|| !flag_branch_probabilities
)
115 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
117 if (node
->frequency
== NODE_FREQUENCY_HOT
)
120 if (profile_status_for_function (fun
) == PROFILE_ABSENT
)
122 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
123 && freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
* 2 / 3))
125 if (freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
126 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
131 static gcov_type min_count
= -1;
133 /* Determine the threshold for hot BB counts. */
136 get_hot_bb_threshold ()
138 gcov_working_set_t
*ws
;
141 ws
= find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
));
143 min_count
= ws
->min_counter
;
148 /* Set the threshold for hot BB counts. */
151 set_hot_bb_threshold (gcov_type min
)
156 /* Return TRUE if frequency FREQ is considered to be hot. */
159 maybe_hot_count_p (struct function
*fun
, gcov_type count
)
161 if (fun
&& profile_status_for_function (fun
) != PROFILE_READ
)
163 /* Code executed at most once is not hot. */
164 if (profile_info
->runs
>= count
)
166 return (count
>= get_hot_bb_threshold ());
169 /* Return true in case BB can be CPU intensive and should be optimized
170 for maximal performance. */
173 maybe_hot_bb_p (struct function
*fun
, const_basic_block bb
)
175 gcc_checking_assert (fun
);
176 if (profile_status_for_function (fun
) == PROFILE_READ
)
177 return maybe_hot_count_p (fun
, bb
->count
);
178 return maybe_hot_frequency_p (fun
, bb
->frequency
);
181 /* Return true if the call can be hot. */
184 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
186 if (profile_info
&& flag_branch_probabilities
187 && !maybe_hot_count_p (NULL
,
190 if (edge
->caller
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
192 && edge
->callee
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
194 if (edge
->caller
->frequency
> NODE_FREQUENCY_UNLIKELY_EXECUTED
196 && edge
->callee
->frequency
<= NODE_FREQUENCY_EXECUTED_ONCE
))
200 if (edge
->caller
->frequency
== NODE_FREQUENCY_HOT
)
202 if (edge
->caller
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
203 && edge
->frequency
< CGRAPH_FREQ_BASE
* 3 / 2)
205 if (flag_guess_branch_prob
206 && edge
->frequency
<= (CGRAPH_FREQ_BASE
207 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
212 /* Return true in case BB can be CPU intensive and should be optimized
213 for maximal performance. */
216 maybe_hot_edge_p (edge e
)
218 if (profile_status
== PROFILE_READ
)
219 return maybe_hot_count_p (cfun
, e
->count
);
220 return maybe_hot_frequency_p (cfun
, EDGE_FREQUENCY (e
));
224 /* Return true in case BB is probably never executed. */
227 probably_never_executed_bb_p (struct function
*fun
, const_basic_block bb
)
229 gcc_checking_assert (fun
);
230 if (profile_info
&& flag_branch_probabilities
)
231 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
232 if ((!profile_info
|| !flag_branch_probabilities
)
233 && (cgraph_get_node (fun
->decl
)->frequency
234 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
239 /* Return true if NODE should be optimized for size. */
242 cgraph_optimize_for_size_p (struct cgraph_node
*node
)
246 if (node
&& (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
252 /* Return true when current function should always be optimized for size. */
255 optimize_function_for_size_p (struct function
*fun
)
259 if (!fun
|| !fun
->decl
)
261 return cgraph_optimize_for_size_p (cgraph_get_node (fun
->decl
));
264 /* Return true when current function should always be optimized for speed. */
267 optimize_function_for_speed_p (struct function
*fun
)
269 return !optimize_function_for_size_p (fun
);
272 /* Return TRUE when BB should be optimized for size. */
275 optimize_bb_for_size_p (const_basic_block bb
)
277 return optimize_function_for_size_p (cfun
) || !maybe_hot_bb_p (cfun
, bb
);
280 /* Return TRUE when BB should be optimized for speed. */
283 optimize_bb_for_speed_p (const_basic_block bb
)
285 return !optimize_bb_for_size_p (bb
);
288 /* Return TRUE when BB should be optimized for size. */
291 optimize_edge_for_size_p (edge e
)
293 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
296 /* Return TRUE when BB should be optimized for speed. */
299 optimize_edge_for_speed_p (edge e
)
301 return !optimize_edge_for_size_p (e
);
304 /* Return TRUE when BB should be optimized for size. */
307 optimize_insn_for_size_p (void)
309 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
312 /* Return TRUE when BB should be optimized for speed. */
315 optimize_insn_for_speed_p (void)
317 return !optimize_insn_for_size_p ();
320 /* Return TRUE when LOOP should be optimized for size. */
323 optimize_loop_for_size_p (struct loop
*loop
)
325 return optimize_bb_for_size_p (loop
->header
);
328 /* Return TRUE when LOOP should be optimized for speed. */
331 optimize_loop_for_speed_p (struct loop
*loop
)
333 return optimize_bb_for_speed_p (loop
->header
);
336 /* Return TRUE when LOOP nest should be optimized for speed. */
339 optimize_loop_nest_for_speed_p (struct loop
*loop
)
341 struct loop
*l
= loop
;
342 if (optimize_loop_for_speed_p (loop
))
345 while (l
&& l
!= loop
)
347 if (optimize_loop_for_speed_p (l
))
355 while (l
!= loop
&& !l
->next
)
364 /* Return TRUE when LOOP nest should be optimized for size. */
367 optimize_loop_nest_for_size_p (struct loop
*loop
)
369 return !optimize_loop_nest_for_speed_p (loop
);
372 /* Return true when edge E is likely to be well predictable by branch
376 predictable_edge_p (edge e
)
378 if (profile_status
== PROFILE_ABSENT
)
381 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
382 || (REG_BR_PROB_BASE
- e
->probability
383 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
389 /* Set RTL expansion for BB profile. */
392 rtl_profile_for_bb (basic_block bb
)
394 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (cfun
, bb
);
397 /* Set RTL expansion for edge profile. */
400 rtl_profile_for_edge (edge e
)
402 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
405 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
407 default_rtl_profile (void)
409 crtl
->maybe_hot_insn_p
= true;
412 /* Return true if the one of outgoing edges is already predicted by
416 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
419 if (!INSN_P (BB_END (bb
)))
421 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
422 if (REG_NOTE_KIND (note
) == REG_BR_PRED
423 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
428 /* This map contains for a basic block the list of predictions for the
431 static struct pointer_map_t
*bb_predictions
;
433 /* Structure representing predictions in tree level. */
435 struct edge_prediction
{
436 struct edge_prediction
*ep_next
;
438 enum br_predictor ep_predictor
;
442 /* Return true if the one of outgoing edges is already predicted by
446 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
448 struct edge_prediction
*i
;
449 void **preds
= pointer_map_contains (bb_predictions
, bb
);
454 for (i
= (struct edge_prediction
*) *preds
; i
; i
= i
->ep_next
)
455 if (i
->ep_predictor
== predictor
)
460 /* Return true when the probability of edge is reliable.
462 The profile guessing code is good at predicting branch outcome (ie.
463 taken/not taken), that is predicted right slightly over 75% of time.
464 It is however notoriously poor on predicting the probability itself.
465 In general the profile appear a lot flatter (with probabilities closer
466 to 50%) than the reality so it is bad idea to use it to drive optimization
467 such as those disabling dynamic branch prediction for well predictable
470 There are two exceptions - edges leading to noreturn edges and edges
471 predicted by number of iterations heuristics are predicted well. This macro
472 should be able to distinguish those, but at the moment it simply check for
473 noreturn heuristic that is only one giving probability over 99% or bellow
474 1%. In future we might want to propagate reliability information across the
475 CFG if we find this information useful on multiple places. */
477 probability_reliable_p (int prob
)
479 return (profile_status
== PROFILE_READ
480 || (profile_status
== PROFILE_GUESSED
481 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
484 /* Same predicate as above, working on edges. */
486 edge_probability_reliable_p (const_edge e
)
488 return probability_reliable_p (e
->probability
);
491 /* Same predicate as edge_probability_reliable_p, working on notes. */
493 br_prob_note_reliable_p (const_rtx note
)
495 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
496 return probability_reliable_p (INTVAL (XEXP (note
, 0)));
500 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
502 gcc_assert (any_condjump_p (insn
));
503 if (!flag_guess_branch_prob
)
506 add_reg_note (insn
, REG_BR_PRED
,
507 gen_rtx_CONCAT (VOIDmode
,
508 GEN_INT ((int) predictor
),
509 GEN_INT ((int) probability
)));
512 /* Predict insn by given predictor. */
515 predict_insn_def (rtx insn
, enum br_predictor predictor
,
516 enum prediction taken
)
518 int probability
= predictor_info
[(int) predictor
].hitrate
;
521 probability
= REG_BR_PROB_BASE
- probability
;
523 predict_insn (insn
, predictor
, probability
);
526 /* Predict edge E with given probability if possible. */
529 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
532 last_insn
= BB_END (e
->src
);
534 /* We can store the branch prediction information only about
535 conditional jumps. */
536 if (!any_condjump_p (last_insn
))
539 /* We always store probability of branching. */
540 if (e
->flags
& EDGE_FALLTHRU
)
541 probability
= REG_BR_PROB_BASE
- probability
;
543 predict_insn (last_insn
, predictor
, probability
);
546 /* Predict edge E with the given PROBABILITY. */
548 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
550 gcc_assert (profile_status
!= PROFILE_GUESSED
);
551 if ((e
->src
!= ENTRY_BLOCK_PTR
&& EDGE_COUNT (e
->src
->succs
) > 1)
552 && flag_guess_branch_prob
&& optimize
)
554 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
555 void **preds
= pointer_map_insert (bb_predictions
, e
->src
);
557 i
->ep_next
= (struct edge_prediction
*) *preds
;
559 i
->ep_probability
= probability
;
560 i
->ep_predictor
= predictor
;
565 /* Remove all predictions on given basic block that are attached
568 remove_predictions_associated_with_edge (edge e
)
575 preds
= pointer_map_contains (bb_predictions
, e
->src
);
579 struct edge_prediction
**prediction
= (struct edge_prediction
**) preds
;
580 struct edge_prediction
*next
;
584 if ((*prediction
)->ep_edge
== e
)
586 next
= (*prediction
)->ep_next
;
591 prediction
= &((*prediction
)->ep_next
);
596 /* Clears the list of predictions stored for BB. */
599 clear_bb_predictions (basic_block bb
)
601 void **preds
= pointer_map_contains (bb_predictions
, bb
);
602 struct edge_prediction
*pred
, *next
;
607 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= next
)
609 next
= pred
->ep_next
;
615 /* Return true when we can store prediction on insn INSN.
616 At the moment we represent predictions only on conditional
617 jumps, not at computed jump or other complicated cases. */
619 can_predict_insn_p (const_rtx insn
)
621 return (JUMP_P (insn
)
622 && any_condjump_p (insn
)
623 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
626 /* Predict edge E by given predictor if possible. */
629 predict_edge_def (edge e
, enum br_predictor predictor
,
630 enum prediction taken
)
632 int probability
= predictor_info
[(int) predictor
].hitrate
;
635 probability
= REG_BR_PROB_BASE
- probability
;
637 predict_edge (e
, predictor
, probability
);
640 /* Invert all branch predictions or probability notes in the INSN. This needs
641 to be done each time we invert the condition used by the jump. */
644 invert_br_probabilities (rtx insn
)
648 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
649 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
650 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
651 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
652 XEXP (XEXP (note
, 0), 1)
653 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
656 /* Dump information about the branch prediction to the output file. */
659 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
660 basic_block bb
, int used
)
668 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
669 if (! (e
->flags
& EDGE_FALLTHRU
))
672 fprintf (file
, " %s heuristics%s: %.1f%%",
673 predictor_info
[predictor
].name
,
674 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
678 fprintf (file
, " exec ");
679 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
682 fprintf (file
, " hit ");
683 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
684 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
688 fprintf (file
, "\n");
691 /* We can not predict the probabilities of outgoing edges of bb. Set them
692 evenly and hope for the best. */
694 set_even_probabilities (basic_block bb
)
700 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
701 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
703 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
704 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
705 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
710 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
711 note if not already present. Remove now useless REG_BR_PRED notes. */
714 combine_predictions_for_insn (rtx insn
, basic_block bb
)
719 int best_probability
= PROB_EVEN
;
720 enum br_predictor best_predictor
= END_PREDICTORS
;
721 int combined_probability
= REG_BR_PROB_BASE
/ 2;
723 bool first_match
= false;
726 if (!can_predict_insn_p (insn
))
728 set_even_probabilities (bb
);
732 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
733 pnote
= ®_NOTES (insn
);
735 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
738 /* We implement "first match" heuristics and use probability guessed
739 by predictor with smallest index. */
740 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
741 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
743 enum br_predictor predictor
= ((enum br_predictor
)
744 INTVAL (XEXP (XEXP (note
, 0), 0)));
745 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
748 if (best_predictor
> predictor
)
749 best_probability
= probability
, best_predictor
= predictor
;
751 d
= (combined_probability
* probability
752 + (REG_BR_PROB_BASE
- combined_probability
)
753 * (REG_BR_PROB_BASE
- probability
));
755 /* Use FP math to avoid overflows of 32bit integers. */
757 /* If one probability is 0% and one 100%, avoid division by zero. */
758 combined_probability
= REG_BR_PROB_BASE
/ 2;
760 combined_probability
= (((double) combined_probability
) * probability
761 * REG_BR_PROB_BASE
/ d
+ 0.5);
764 /* Decide which heuristic to use. In case we didn't match anything,
765 use no_prediction heuristic, in case we did match, use either
766 first match or Dempster-Shaffer theory depending on the flags. */
768 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
772 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
773 combined_probability
, bb
, true);
776 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
778 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
783 combined_probability
= best_probability
;
784 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
788 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
790 enum br_predictor predictor
= ((enum br_predictor
)
791 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
792 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
794 dump_prediction (dump_file
, predictor
, probability
, bb
,
795 !first_match
|| best_predictor
== predictor
);
796 *pnote
= XEXP (*pnote
, 1);
799 pnote
= &XEXP (*pnote
, 1);
804 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (combined_probability
));
806 /* Save the prediction into CFG in case we are seeing non-degenerated
808 if (!single_succ_p (bb
))
810 BRANCH_EDGE (bb
)->probability
= combined_probability
;
811 FALLTHRU_EDGE (bb
)->probability
812 = REG_BR_PROB_BASE
- combined_probability
;
815 else if (!single_succ_p (bb
))
817 int prob
= INTVAL (XEXP (prob_note
, 0));
819 BRANCH_EDGE (bb
)->probability
= prob
;
820 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
823 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
826 /* Combine predictions into single probability and store them into CFG.
827 Remove now useless prediction entries. */
830 combine_predictions_for_bb (basic_block bb
)
832 int best_probability
= PROB_EVEN
;
833 enum br_predictor best_predictor
= END_PREDICTORS
;
834 int combined_probability
= REG_BR_PROB_BASE
/ 2;
836 bool first_match
= false;
838 struct edge_prediction
*pred
;
840 edge e
, first
= NULL
, second
= NULL
;
844 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
845 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
848 if (first
&& !second
)
854 /* When there is no successor or only one choice, prediction is easy.
856 We are lazy for now and predict only basic blocks with two outgoing
857 edges. It is possible to predict generic case too, but we have to
858 ignore first match heuristics and do more involved combining. Implement
863 set_even_probabilities (bb
);
864 clear_bb_predictions (bb
);
866 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
872 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
874 preds
= pointer_map_contains (bb_predictions
, bb
);
877 /* We implement "first match" heuristics and use probability guessed
878 by predictor with smallest index. */
879 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
881 enum br_predictor predictor
= pred
->ep_predictor
;
882 int probability
= pred
->ep_probability
;
884 if (pred
->ep_edge
!= first
)
885 probability
= REG_BR_PROB_BASE
- probability
;
888 /* First match heuristics would be widly confused if we predicted
890 if (best_predictor
> predictor
)
892 struct edge_prediction
*pred2
;
893 int prob
= probability
;
895 for (pred2
= (struct edge_prediction
*) *preds
; pred2
; pred2
= pred2
->ep_next
)
896 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
898 int probability2
= pred
->ep_probability
;
900 if (pred2
->ep_edge
!= first
)
901 probability2
= REG_BR_PROB_BASE
- probability2
;
903 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
904 (probability2
< REG_BR_PROB_BASE
/ 2))
907 /* If the same predictor later gave better result, go for it! */
908 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
909 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
913 best_probability
= prob
, best_predictor
= predictor
;
916 d
= (combined_probability
* probability
917 + (REG_BR_PROB_BASE
- combined_probability
)
918 * (REG_BR_PROB_BASE
- probability
));
920 /* Use FP math to avoid overflows of 32bit integers. */
922 /* If one probability is 0% and one 100%, avoid division by zero. */
923 combined_probability
= REG_BR_PROB_BASE
/ 2;
925 combined_probability
= (((double) combined_probability
)
927 * REG_BR_PROB_BASE
/ d
+ 0.5);
931 /* Decide which heuristic to use. In case we didn't match anything,
932 use no_prediction heuristic, in case we did match, use either
933 first match or Dempster-Shaffer theory depending on the flags. */
935 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
939 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
942 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
944 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
949 combined_probability
= best_probability
;
950 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
954 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
956 enum br_predictor predictor
= pred
->ep_predictor
;
957 int probability
= pred
->ep_probability
;
959 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
960 probability
= REG_BR_PROB_BASE
- probability
;
961 dump_prediction (dump_file
, predictor
, probability
, bb
,
962 !first_match
|| best_predictor
== predictor
);
965 clear_bb_predictions (bb
);
969 first
->probability
= combined_probability
;
970 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
974 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
975 Return the SSA_NAME if the condition satisfies, NULL otherwise.
977 T1 and T2 should be one of the following cases:
978 1. T1 is SSA_NAME, T2 is NULL
979 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
980 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
983 strips_small_constant (tree t1
, tree t2
)
990 else if (TREE_CODE (t1
) == SSA_NAME
)
992 else if (host_integerp (t1
, 0))
993 value
= tree_low_cst (t1
, 0);
999 else if (host_integerp (t2
, 0))
1000 value
= tree_low_cst (t2
, 0);
1001 else if (TREE_CODE (t2
) == SSA_NAME
)
1009 if (value
<= 4 && value
>= -4)
1015 /* Return the SSA_NAME in T or T's operands.
1016 Return NULL if SSA_NAME cannot be found. */
1019 get_base_value (tree t
)
1021 if (TREE_CODE (t
) == SSA_NAME
)
1024 if (!BINARY_CLASS_P (t
))
1027 switch (TREE_OPERAND_LENGTH (t
))
1030 return strips_small_constant (TREE_OPERAND (t
, 0), NULL
);
1032 return strips_small_constant (TREE_OPERAND (t
, 0),
1033 TREE_OPERAND (t
, 1));
1039 /* Check the compare STMT in LOOP. If it compares an induction
1040 variable to a loop invariant, return true, and save
1041 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1042 Otherwise return false and set LOOP_INVAIANT to NULL. */
1045 is_comparison_with_loop_invariant_p (gimple stmt
, struct loop
*loop
,
1046 tree
*loop_invariant
,
1047 enum tree_code
*compare_code
,
1051 tree op0
, op1
, bound
, base
;
1053 enum tree_code code
;
1056 code
= gimple_cond_code (stmt
);
1057 *loop_invariant
= NULL
;
1073 op0
= gimple_cond_lhs (stmt
);
1074 op1
= gimple_cond_rhs (stmt
);
1076 if ((TREE_CODE (op0
) != SSA_NAME
&& TREE_CODE (op0
) != INTEGER_CST
)
1077 || (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op1
) != INTEGER_CST
))
1079 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op0
, &iv0
, true))
1081 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op1
, &iv1
, true))
1083 if (TREE_CODE (iv0
.step
) != INTEGER_CST
1084 || TREE_CODE (iv1
.step
) != INTEGER_CST
)
1086 if ((integer_zerop (iv0
.step
) && integer_zerop (iv1
.step
))
1087 || (!integer_zerop (iv0
.step
) && !integer_zerop (iv1
.step
)))
1090 if (integer_zerop (iv0
.step
))
1092 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
1093 code
= invert_tree_comparison (code
, false);
1096 if (host_integerp (iv1
.step
, 0))
1105 if (host_integerp (iv0
.step
, 0))
1111 if (TREE_CODE (bound
) != INTEGER_CST
)
1112 bound
= get_base_value (bound
);
1115 if (TREE_CODE (base
) != INTEGER_CST
)
1116 base
= get_base_value (base
);
1120 *loop_invariant
= bound
;
1121 *compare_code
= code
;
1123 *loop_iv_base
= base
;
1127 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1130 expr_coherent_p (tree t1
, tree t2
)
1133 tree ssa_name_1
= NULL
;
1134 tree ssa_name_2
= NULL
;
1136 gcc_assert (TREE_CODE (t1
) == SSA_NAME
|| TREE_CODE (t1
) == INTEGER_CST
);
1137 gcc_assert (TREE_CODE (t2
) == SSA_NAME
|| TREE_CODE (t2
) == INTEGER_CST
);
1142 if (TREE_CODE (t1
) == INTEGER_CST
&& TREE_CODE (t2
) == INTEGER_CST
)
1144 if (TREE_CODE (t1
) == INTEGER_CST
|| TREE_CODE (t2
) == INTEGER_CST
)
1147 /* Check to see if t1 is expressed/defined with t2. */
1148 stmt
= SSA_NAME_DEF_STMT (t1
);
1149 gcc_assert (stmt
!= NULL
);
1150 if (is_gimple_assign (stmt
))
1152 ssa_name_1
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1153 if (ssa_name_1
&& ssa_name_1
== t2
)
1157 /* Check to see if t2 is expressed/defined with t1. */
1158 stmt
= SSA_NAME_DEF_STMT (t2
);
1159 gcc_assert (stmt
!= NULL
);
1160 if (is_gimple_assign (stmt
))
1162 ssa_name_2
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1163 if (ssa_name_2
&& ssa_name_2
== t1
)
1167 /* Compare if t1 and t2's def_stmts are identical. */
1168 if (ssa_name_2
!= NULL
&& ssa_name_1
== ssa_name_2
)
1174 /* Predict branch probability of BB when BB contains a branch that compares
1175 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1176 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1179 for (int i = 0; i < bound; i++) {
1186 In this loop, we will predict the branch inside the loop to be taken. */
1189 predict_iv_comparison (struct loop
*loop
, basic_block bb
,
1190 tree loop_bound_var
,
1191 tree loop_iv_base_var
,
1192 enum tree_code loop_bound_code
,
1193 int loop_bound_step
)
1196 tree compare_var
, compare_base
;
1197 enum tree_code compare_code
;
1198 tree compare_step_var
;
1202 if (predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1203 || predicted_by_p (bb
, PRED_LOOP_ITERATIONS
)
1204 || predicted_by_p (bb
, PRED_LOOP_EXIT
))
1207 stmt
= last_stmt (bb
);
1208 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1210 if (!is_comparison_with_loop_invariant_p (stmt
, loop
, &compare_var
,
1216 /* Find the taken edge. */
1217 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1218 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1221 /* When comparing an IV to a loop invariant, NE is more likely to be
1222 taken while EQ is more likely to be not-taken. */
1223 if (compare_code
== NE_EXPR
)
1225 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1228 else if (compare_code
== EQ_EXPR
)
1230 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1234 if (!expr_coherent_p (loop_iv_base_var
, compare_base
))
1237 /* If loop bound, base and compare bound are all constants, we can
1238 calculate the probability directly. */
1239 if (host_integerp (loop_bound_var
, 0)
1240 && host_integerp (compare_var
, 0)
1241 && host_integerp (compare_base
, 0))
1244 bool of
, overflow
= false;
1245 double_int mod
, compare_count
, tem
, loop_count
;
1247 double_int loop_bound
= tree_to_double_int (loop_bound_var
);
1248 double_int compare_bound
= tree_to_double_int (compare_var
);
1249 double_int base
= tree_to_double_int (compare_base
);
1250 double_int compare_step
= tree_to_double_int (compare_step_var
);
1252 /* (loop_bound - base) / compare_step */
1253 tem
= loop_bound
.sub_with_overflow (base
, &of
);
1255 loop_count
= tem
.divmod_with_overflow (compare_step
,
1260 if ((!compare_step
.is_negative ())
1261 ^ (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1263 /* (loop_bound - compare_bound) / compare_step */
1264 tem
= loop_bound
.sub_with_overflow (compare_bound
, &of
);
1266 compare_count
= tem
.divmod_with_overflow (compare_step
,
1273 /* (compare_bound - base) / compare_step */
1274 tem
= compare_bound
.sub_with_overflow (base
, &of
);
1276 compare_count
= tem
.divmod_with_overflow (compare_step
,
1281 if (compare_code
== LE_EXPR
|| compare_code
== GE_EXPR
)
1283 if (loop_bound_code
== LE_EXPR
|| loop_bound_code
== GE_EXPR
)
1285 if (compare_count
.is_negative ())
1286 compare_count
= double_int_zero
;
1287 if (loop_count
.is_negative ())
1288 loop_count
= double_int_zero
;
1289 if (loop_count
.is_zero ())
1291 else if (compare_count
.scmp (loop_count
) == 1)
1292 probability
= REG_BR_PROB_BASE
;
1295 /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1296 could overflow, shift both loop_count and compare_count right
1297 a bit so that it doesn't overflow. Note both counts are known not
1298 to be negative at this point. */
1299 int clz_bits
= clz_hwi (loop_count
.high
);
1300 gcc_assert (REG_BR_PROB_BASE
< 32768);
1303 loop_count
.arshift (16 - clz_bits
, HOST_BITS_PER_DOUBLE_INT
);
1304 compare_count
.arshift (16 - clz_bits
, HOST_BITS_PER_DOUBLE_INT
);
1306 tem
= compare_count
.mul_with_sign (double_int::from_shwi
1307 (REG_BR_PROB_BASE
), true, &of
);
1309 tem
= tem
.divmod (loop_count
, true, TRUNC_DIV_EXPR
, &mod
);
1310 probability
= tem
.to_uhwi ();
1314 predict_edge (then_edge
, PRED_LOOP_IV_COMPARE
, probability
);
1319 if (expr_coherent_p (loop_bound_var
, compare_var
))
1321 if ((loop_bound_code
== LT_EXPR
|| loop_bound_code
== LE_EXPR
)
1322 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1323 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1324 else if ((loop_bound_code
== GT_EXPR
|| loop_bound_code
== GE_EXPR
)
1325 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1326 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1327 else if (loop_bound_code
== NE_EXPR
)
1329 /* If the loop backedge condition is "(i != bound)", we do
1330 the comparison based on the step of IV:
1331 * step < 0 : backedge condition is like (i > bound)
1332 * step > 0 : backedge condition is like (i < bound) */
1333 gcc_assert (loop_bound_step
!= 0);
1334 if (loop_bound_step
> 0
1335 && (compare_code
== LT_EXPR
1336 || compare_code
== LE_EXPR
))
1337 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1338 else if (loop_bound_step
< 0
1339 && (compare_code
== GT_EXPR
1340 || compare_code
== GE_EXPR
))
1341 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1343 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1346 /* The branch is predicted not-taken if loop_bound_code is
1347 opposite with compare_code. */
1348 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1350 else if (expr_coherent_p (loop_iv_base_var
, compare_var
))
1353 for (i = s; i < h; i++)
1355 The branch should be predicted taken. */
1356 if (loop_bound_step
> 0
1357 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1358 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1359 else if (loop_bound_step
< 0
1360 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1361 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1363 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1367 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1368 exits are resulted from short-circuit conditions that will generate an
1371 if (foo() || global > 10)
1374 This will be translated into:
1379 if foo() goto BB6 else goto BB5
1381 if global > 10 goto BB6 else goto BB7
1385 iftmp = (PHI 0(BB5), 1(BB6))
1386 if iftmp == 1 goto BB8 else goto BB3
1388 outside of the loop...
1390 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1391 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1392 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1393 exits to predict them using PRED_LOOP_EXIT. */
1396 predict_extra_loop_exits (edge exit_edge
)
1399 bool check_value_one
;
1401 tree cmp_rhs
, cmp_lhs
;
1402 gimple cmp_stmt
= last_stmt (exit_edge
->src
);
1404 if (!cmp_stmt
|| gimple_code (cmp_stmt
) != GIMPLE_COND
)
1406 cmp_rhs
= gimple_cond_rhs (cmp_stmt
);
1407 cmp_lhs
= gimple_cond_lhs (cmp_stmt
);
1408 if (!TREE_CONSTANT (cmp_rhs
)
1409 || !(integer_zerop (cmp_rhs
) || integer_onep (cmp_rhs
)))
1411 if (TREE_CODE (cmp_lhs
) != SSA_NAME
)
1414 /* If check_value_one is true, only the phi_args with value '1' will lead
1415 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1417 check_value_one
= (((integer_onep (cmp_rhs
))
1418 ^ (gimple_cond_code (cmp_stmt
) == EQ_EXPR
))
1419 ^ ((exit_edge
->flags
& EDGE_TRUE_VALUE
) != 0));
1421 phi_stmt
= SSA_NAME_DEF_STMT (cmp_lhs
);
1422 if (!phi_stmt
|| gimple_code (phi_stmt
) != GIMPLE_PHI
)
1425 for (i
= 0; i
< gimple_phi_num_args (phi_stmt
); i
++)
1429 tree val
= gimple_phi_arg_def (phi_stmt
, i
);
1430 edge e
= gimple_phi_arg_edge (phi_stmt
, i
);
1432 if (!TREE_CONSTANT (val
) || !(integer_zerop (val
) || integer_onep (val
)))
1434 if ((check_value_one
^ integer_onep (val
)) == 1)
1436 if (EDGE_COUNT (e
->src
->succs
) != 1)
1438 predict_paths_leading_to_edge (e
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1442 FOR_EACH_EDGE (e1
, ei
, e
->src
->preds
)
1443 predict_paths_leading_to_edge (e1
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1447 /* Predict edge probabilities by exploiting loop structure. */
1450 predict_loops (void)
1455 /* Try to predict out blocks in a loop that are not part of a
1457 FOR_EACH_LOOP (li
, loop
, 0)
1459 basic_block bb
, *bbs
;
1460 unsigned j
, n_exits
;
1462 struct tree_niter_desc niter_desc
;
1464 struct nb_iter_bound
*nb_iter
;
1465 enum tree_code loop_bound_code
= ERROR_MARK
;
1466 tree loop_bound_step
= NULL
;
1467 tree loop_bound_var
= NULL
;
1468 tree loop_iv_base
= NULL
;
1471 exits
= get_loop_exit_edges (loop
);
1472 n_exits
= exits
.length ();
1479 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1482 HOST_WIDE_INT nitercst
;
1483 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
1485 enum br_predictor predictor
;
1487 predict_extra_loop_exits (ex
);
1489 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
1490 niter
= niter_desc
.niter
;
1491 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
1492 niter
= loop_niter_by_eval (loop
, ex
);
1494 if (TREE_CODE (niter
) == INTEGER_CST
)
1496 if (host_integerp (niter
, 1)
1498 && compare_tree_int (niter
, max
- 1) == -1)
1499 nitercst
= tree_low_cst (niter
, 1) + 1;
1502 predictor
= PRED_LOOP_ITERATIONS
;
1504 /* If we have just one exit and we can derive some information about
1505 the number of iterations of the loop from the statements inside
1506 the loop, use it to predict this exit. */
1507 else if (n_exits
== 1)
1509 nitercst
= estimated_stmt_executions_int (loop
);
1515 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
1520 /* If the prediction for number of iterations is zero, do not
1521 predict the exit edges. */
1525 probability
= ((REG_BR_PROB_BASE
+ nitercst
/ 2) / nitercst
);
1526 predict_edge (ex
, predictor
, probability
);
1530 /* Find information about loop bound variables. */
1531 for (nb_iter
= loop
->bounds
; nb_iter
;
1532 nb_iter
= nb_iter
->next
)
1534 && gimple_code (nb_iter
->stmt
) == GIMPLE_COND
)
1536 stmt
= nb_iter
->stmt
;
1539 if (!stmt
&& last_stmt (loop
->header
)
1540 && gimple_code (last_stmt (loop
->header
)) == GIMPLE_COND
)
1541 stmt
= last_stmt (loop
->header
);
1543 is_comparison_with_loop_invariant_p (stmt
, loop
,
1549 bbs
= get_loop_body (loop
);
1551 for (j
= 0; j
< loop
->num_nodes
; j
++)
1553 int header_found
= 0;
1559 /* Bypass loop heuristics on continue statement. These
1560 statements construct loops via "non-loop" constructs
1561 in the source language and are better to be handled
1563 if (predicted_by_p (bb
, PRED_CONTINUE
))
1566 /* Loop branch heuristics - predict an edge back to a
1567 loop's head as taken. */
1568 if (bb
== loop
->latch
)
1570 e
= find_edge (loop
->latch
, loop
->header
);
1574 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
1578 /* Loop exit heuristics - predict an edge exiting the loop if the
1579 conditional has no loop header successors as not taken. */
1581 /* If we already used more reliable loop exit predictors, do not
1582 bother with PRED_LOOP_EXIT. */
1583 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1584 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS
))
1586 /* For loop with many exits we don't want to predict all exits
1587 with the pretty large probability, because if all exits are
1588 considered in row, the loop would be predicted to iterate
1589 almost never. The code to divide probability by number of
1590 exits is very rough. It should compute the number of exits
1591 taken in each patch through function (not the overall number
1592 of exits that might be a lot higher for loops with wide switch
1593 statements in them) and compute n-th square root.
1595 We limit the minimal probability by 2% to avoid
1596 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1597 as this was causing regression in perl benchmark containing such
1600 int probability
= ((REG_BR_PROB_BASE
1601 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
1603 if (probability
< HITRATE (2))
1604 probability
= HITRATE (2);
1605 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1606 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1607 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1608 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1611 predict_iv_comparison (loop
, bb
, loop_bound_var
, loop_iv_base
,
1613 tree_low_cst (loop_bound_step
, 0));
1616 /* Free basic blocks from get_loop_body. */
1621 /* Attempt to predict probabilities of BB outgoing edges using local
1624 bb_estimate_probability_locally (basic_block bb
)
1626 rtx last_insn
= BB_END (bb
);
1629 if (! can_predict_insn_p (last_insn
))
1631 cond
= get_condition (last_insn
, NULL
, false, false);
1635 /* Try "pointer heuristic."
1636 A comparison ptr == 0 is predicted as false.
1637 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1638 if (COMPARISON_P (cond
)
1639 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1640 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1642 if (GET_CODE (cond
) == EQ
)
1643 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1644 else if (GET_CODE (cond
) == NE
)
1645 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1649 /* Try "opcode heuristic."
1650 EQ tests are usually false and NE tests are usually true. Also,
1651 most quantities are positive, so we can make the appropriate guesses
1652 about signed comparisons against zero. */
1653 switch (GET_CODE (cond
))
1656 /* Unconditional branch. */
1657 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1658 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1663 /* Floating point comparisons appears to behave in a very
1664 unpredictable way because of special role of = tests in
1666 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1668 /* Comparisons with 0 are often used for booleans and there is
1669 nothing useful to predict about them. */
1670 else if (XEXP (cond
, 1) == const0_rtx
1671 || XEXP (cond
, 0) == const0_rtx
)
1674 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1679 /* Floating point comparisons appears to behave in a very
1680 unpredictable way because of special role of = tests in
1682 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1684 /* Comparisons with 0 are often used for booleans and there is
1685 nothing useful to predict about them. */
1686 else if (XEXP (cond
, 1) == const0_rtx
1687 || XEXP (cond
, 0) == const0_rtx
)
1690 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1694 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1698 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1703 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1704 || XEXP (cond
, 1) == constm1_rtx
)
1705 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1710 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1711 || XEXP (cond
, 1) == constm1_rtx
)
1712 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1720 /* Set edge->probability for each successor edge of BB. */
1722 guess_outgoing_edge_probabilities (basic_block bb
)
1724 bb_estimate_probability_locally (bb
);
1725 combine_predictions_for_insn (BB_END (bb
), bb
);
1728 static tree
expr_expected_value (tree
, bitmap
);
1730 /* Helper function for expr_expected_value. */
1733 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
,
1734 tree op1
, bitmap visited
)
1738 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1740 if (TREE_CONSTANT (op0
))
1743 if (code
!= SSA_NAME
)
1746 def
= SSA_NAME_DEF_STMT (op0
);
1748 /* If we were already here, break the infinite cycle. */
1749 if (!bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
)))
1752 if (gimple_code (def
) == GIMPLE_PHI
)
1754 /* All the arguments of the PHI node must have the same constant
1756 int i
, n
= gimple_phi_num_args (def
);
1757 tree val
= NULL
, new_val
;
1759 for (i
= 0; i
< n
; i
++)
1761 tree arg
= PHI_ARG_DEF (def
, i
);
1763 /* If this PHI has itself as an argument, we cannot
1764 determine the string length of this argument. However,
1765 if we can find an expected constant value for the other
1766 PHI args then we can still be sure that this is
1767 likely a constant. So be optimistic and just
1768 continue with the next argument. */
1769 if (arg
== PHI_RESULT (def
))
1772 new_val
= expr_expected_value (arg
, visited
);
1777 else if (!operand_equal_p (val
, new_val
, false))
1782 if (is_gimple_assign (def
))
1784 if (gimple_assign_lhs (def
) != op0
)
1787 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1788 gimple_assign_rhs1 (def
),
1789 gimple_assign_rhs_code (def
),
1790 gimple_assign_rhs2 (def
),
1794 if (is_gimple_call (def
))
1796 tree decl
= gimple_call_fndecl (def
);
1799 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
1800 switch (DECL_FUNCTION_CODE (decl
))
1802 case BUILT_IN_EXPECT
:
1805 if (gimple_call_num_args (def
) != 2)
1807 val
= gimple_call_arg (def
, 0);
1808 if (TREE_CONSTANT (val
))
1810 return gimple_call_arg (def
, 1);
1813 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
:
1814 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1
:
1815 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2
:
1816 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4
:
1817 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8
:
1818 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16
:
1819 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE
:
1820 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
:
1821 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
:
1822 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2
:
1823 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4
:
1824 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8
:
1825 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16
:
1826 /* Assume that any given atomic operation has low contention,
1827 and thus the compare-and-swap operation succeeds. */
1828 return boolean_true_node
;
1835 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1838 op0
= expr_expected_value (op0
, visited
);
1841 op1
= expr_expected_value (op1
, visited
);
1844 res
= fold_build2 (code
, type
, op0
, op1
);
1845 if (TREE_CONSTANT (res
))
1849 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1852 op0
= expr_expected_value (op0
, visited
);
1855 res
= fold_build1 (code
, type
, op0
);
1856 if (TREE_CONSTANT (res
))
1863 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1864 The function is used by builtin_expect branch predictor so the evidence
1865 must come from this construct and additional possible constant folding.
1867 We may want to implement more involved value guess (such as value range
1868 propagation based prediction), but such tricks shall go to new
1872 expr_expected_value (tree expr
, bitmap visited
)
1874 enum tree_code code
;
1877 if (TREE_CONSTANT (expr
))
1880 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1881 return expr_expected_value_1 (TREE_TYPE (expr
),
1882 op0
, code
, op1
, visited
);
1886 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1887 we no longer need. */
1889 strip_predict_hints (void)
1897 gimple_stmt_iterator bi
;
1898 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
1900 gimple stmt
= gsi_stmt (bi
);
1902 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1904 gsi_remove (&bi
, true);
1907 else if (gimple_code (stmt
) == GIMPLE_CALL
)
1909 tree fndecl
= gimple_call_fndecl (stmt
);
1912 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
1913 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
1914 && gimple_call_num_args (stmt
) == 2)
1916 var
= gimple_call_lhs (stmt
);
1920 = gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
1921 gsi_replace (&bi
, ass_stmt
, true);
1925 gsi_remove (&bi
, true);
1936 /* Predict using opcode of the last statement in basic block. */
1938 tree_predict_by_opcode (basic_block bb
)
1940 gimple stmt
= last_stmt (bb
);
1949 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1951 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1952 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1954 op0
= gimple_cond_lhs (stmt
);
1955 op1
= gimple_cond_rhs (stmt
);
1956 cmp
= gimple_cond_code (stmt
);
1957 type
= TREE_TYPE (op0
);
1958 visited
= BITMAP_ALLOC (NULL
);
1959 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
);
1960 BITMAP_FREE (visited
);
1963 if (integer_zerop (val
))
1964 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, NOT_TAKEN
);
1966 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, TAKEN
);
1969 /* Try "pointer heuristic."
1970 A comparison ptr == 0 is predicted as false.
1971 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1972 if (POINTER_TYPE_P (type
))
1975 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
1976 else if (cmp
== NE_EXPR
)
1977 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
1981 /* Try "opcode heuristic."
1982 EQ tests are usually false and NE tests are usually true. Also,
1983 most quantities are positive, so we can make the appropriate guesses
1984 about signed comparisons against zero. */
1989 /* Floating point comparisons appears to behave in a very
1990 unpredictable way because of special role of = tests in
1992 if (FLOAT_TYPE_P (type
))
1994 /* Comparisons with 0 are often used for booleans and there is
1995 nothing useful to predict about them. */
1996 else if (integer_zerop (op0
) || integer_zerop (op1
))
1999 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
2004 /* Floating point comparisons appears to behave in a very
2005 unpredictable way because of special role of = tests in
2007 if (FLOAT_TYPE_P (type
))
2009 /* Comparisons with 0 are often used for booleans and there is
2010 nothing useful to predict about them. */
2011 else if (integer_zerop (op0
)
2012 || integer_zerop (op1
))
2015 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
2019 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
2022 case UNORDERED_EXPR
:
2023 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
2028 if (integer_zerop (op1
)
2029 || integer_onep (op1
)
2030 || integer_all_onesp (op1
)
2033 || real_minus_onep (op1
))
2034 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
2039 if (integer_zerop (op1
)
2040 || integer_onep (op1
)
2041 || integer_all_onesp (op1
)
2044 || real_minus_onep (op1
))
2045 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
2053 /* Try to guess whether the value of return means error code. */
2055 static enum br_predictor
2056 return_prediction (tree val
, enum prediction
*prediction
)
2060 return PRED_NO_PREDICTION
;
2061 /* Different heuristics for pointers and scalars. */
2062 if (POINTER_TYPE_P (TREE_TYPE (val
)))
2064 /* NULL is usually not returned. */
2065 if (integer_zerop (val
))
2067 *prediction
= NOT_TAKEN
;
2068 return PRED_NULL_RETURN
;
2071 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
2073 /* Negative return values are often used to indicate
2075 if (TREE_CODE (val
) == INTEGER_CST
2076 && tree_int_cst_sgn (val
) < 0)
2078 *prediction
= NOT_TAKEN
;
2079 return PRED_NEGATIVE_RETURN
;
2081 /* Constant return values seems to be commonly taken.
2082 Zero/one often represent booleans so exclude them from the
2084 if (TREE_CONSTANT (val
)
2085 && (!integer_zerop (val
) && !integer_onep (val
)))
2087 *prediction
= TAKEN
;
2088 return PRED_CONST_RETURN
;
2091 return PRED_NO_PREDICTION
;
2094 /* Find the basic block with return expression and look up for possible
2095 return value trying to apply RETURN_PREDICTION heuristics. */
2097 apply_return_prediction (void)
2099 gimple return_stmt
= NULL
;
2103 int phi_num_args
, i
;
2104 enum br_predictor pred
;
2105 enum prediction direction
;
2108 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2110 return_stmt
= last_stmt (e
->src
);
2112 && gimple_code (return_stmt
) == GIMPLE_RETURN
)
2117 return_val
= gimple_return_retval (return_stmt
);
2120 if (TREE_CODE (return_val
) != SSA_NAME
2121 || !SSA_NAME_DEF_STMT (return_val
)
2122 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
2124 phi
= SSA_NAME_DEF_STMT (return_val
);
2125 phi_num_args
= gimple_phi_num_args (phi
);
2126 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
2128 /* Avoid the degenerate case where all return values form the function
2129 belongs to same category (ie they are all positive constants)
2130 so we can hardly say something about them. */
2131 for (i
= 1; i
< phi_num_args
; i
++)
2132 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
2134 if (i
!= phi_num_args
)
2135 for (i
= 0; i
< phi_num_args
; i
++)
2137 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
2138 if (pred
!= PRED_NO_PREDICTION
)
2139 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi
, i
), pred
,
2144 /* Look for basic block that contains unlikely to happen events
2145 (such as noreturn calls) and mark all paths leading to execution
2146 of this basic blocks as unlikely. */
2149 tree_bb_level_predictions (void)
2152 bool has_return_edges
= false;
2156 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2157 if (!(e
->flags
& (EDGE_ABNORMAL
| EDGE_FAKE
| EDGE_EH
)))
2159 has_return_edges
= true;
2163 apply_return_prediction ();
2167 gimple_stmt_iterator gsi
;
2169 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2171 gimple stmt
= gsi_stmt (gsi
);
2174 if (is_gimple_call (stmt
))
2176 if ((gimple_call_flags (stmt
) & ECF_NORETURN
)
2177 && has_return_edges
)
2178 predict_paths_leading_to (bb
, PRED_NORETURN
,
2180 decl
= gimple_call_fndecl (stmt
);
2182 && lookup_attribute ("cold",
2183 DECL_ATTRIBUTES (decl
)))
2184 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
2187 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
2189 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
2190 gimple_predict_outcome (stmt
));
2191 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2192 hints to callers. */
2198 #ifdef ENABLE_CHECKING
2200 /* Callback for pointer_map_traverse, asserts that the pointer map is
2204 assert_is_empty (const void *key ATTRIBUTE_UNUSED
, void **value
,
2205 void *data ATTRIBUTE_UNUSED
)
2207 gcc_assert (!*value
);
2212 /* Predict branch probabilities and estimate profile for basic block BB. */
2215 tree_estimate_probability_bb (basic_block bb
)
2221 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2223 /* Predict edges to user labels with attributes. */
2224 if (e
->dest
!= EXIT_BLOCK_PTR
)
2226 gimple_stmt_iterator gi
;
2227 for (gi
= gsi_start_bb (e
->dest
); !gsi_end_p (gi
); gsi_next (&gi
))
2229 gimple stmt
= gsi_stmt (gi
);
2232 if (gimple_code (stmt
) != GIMPLE_LABEL
)
2234 decl
= gimple_label_label (stmt
);
2235 if (DECL_ARTIFICIAL (decl
))
2238 /* Finally, we have a user-defined label. */
2239 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl
)))
2240 predict_edge_def (e
, PRED_COLD_LABEL
, NOT_TAKEN
);
2241 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl
)))
2242 predict_edge_def (e
, PRED_HOT_LABEL
, TAKEN
);
2246 /* Predict early returns to be probable, as we've already taken
2247 care for error returns and other cases are often used for
2248 fast paths through function.
2250 Since we've already removed the return statements, we are
2251 looking for CFG like:
2261 if (e
->dest
!= bb
->next_bb
2262 && e
->dest
!= EXIT_BLOCK_PTR
2263 && single_succ_p (e
->dest
)
2264 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR
2265 && (last
= last_stmt (e
->dest
)) != NULL
2266 && gimple_code (last
) == GIMPLE_RETURN
)
2271 if (single_succ_p (bb
))
2273 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
2274 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
2275 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
2276 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
2277 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2280 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
2281 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
2282 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
2283 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2286 /* Look for block we are guarding (ie we dominate it,
2287 but it doesn't postdominate us). */
2288 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
2289 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
2290 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
2292 gimple_stmt_iterator bi
;
2294 /* The call heuristic claims that a guarded function call
2295 is improbable. This is because such calls are often used
2296 to signal exceptional situations such as printing error
2298 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
2301 gimple stmt
= gsi_stmt (bi
);
2302 if (is_gimple_call (stmt
)
2303 /* Constant and pure calls are hardly used to signalize
2304 something exceptional. */
2305 && gimple_has_side_effects (stmt
))
2307 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
2313 tree_predict_by_opcode (bb
);
2316 /* Predict branch probabilities and estimate profile of the tree CFG.
2317 This function can be called from the loop optimizers to recompute
2318 the profile information. */
2321 tree_estimate_probability (void)
2325 add_noreturn_fake_exit_edges ();
2326 connect_infinite_loops_to_exit ();
2327 /* We use loop_niter_by_eval, which requires that the loops have
2329 create_preheaders (CP_SIMPLE_PREHEADERS
);
2330 calculate_dominance_info (CDI_POST_DOMINATORS
);
2332 bb_predictions
= pointer_map_create ();
2333 tree_bb_level_predictions ();
2334 record_loop_exits ();
2336 if (number_of_loops () > 1)
2340 tree_estimate_probability_bb (bb
);
2343 combine_predictions_for_bb (bb
);
2345 #ifdef ENABLE_CHECKING
2346 pointer_map_traverse (bb_predictions
, assert_is_empty
, NULL
);
2348 pointer_map_destroy (bb_predictions
);
2349 bb_predictions
= NULL
;
2351 estimate_bb_frequencies ();
2352 free_dominance_info (CDI_POST_DOMINATORS
);
2353 remove_fake_exit_edges ();
2356 /* Predict branch probabilities and estimate profile of the tree CFG.
2357 This is the driver function for PASS_PROFILE. */
2360 tree_estimate_probability_driver (void)
2364 loop_optimizer_init (LOOPS_NORMAL
);
2365 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2366 flow_loops_dump (dump_file
, NULL
, 0);
2368 mark_irreducible_loops ();
2370 nb_loops
= number_of_loops ();
2374 tree_estimate_probability ();
2379 loop_optimizer_finalize ();
2380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2381 gimple_dump_cfg (dump_file
, dump_flags
);
2382 if (profile_status
== PROFILE_ABSENT
)
2383 profile_status
= PROFILE_GUESSED
;
2387 /* Predict edges to successors of CUR whose sources are not postdominated by
2388 BB by PRED and recurse to all postdominators. */
2391 predict_paths_for_bb (basic_block cur
, basic_block bb
,
2392 enum br_predictor pred
,
2393 enum prediction taken
,
2400 /* We are looking for all edges forming edge cut induced by
2401 set of all blocks postdominated by BB. */
2402 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
2403 if (e
->src
->index
>= NUM_FIXED_BLOCKS
2404 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
2410 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2411 if (e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2413 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
2415 /* See if there is an edge from e->src that is not abnormal
2416 and does not lead to BB. */
2417 FOR_EACH_EDGE (e2
, ei2
, e
->src
->succs
)
2419 && !(e2
->flags
& (EDGE_EH
| EDGE_FAKE
))
2420 && !dominated_by_p (CDI_POST_DOMINATORS
, e2
->dest
, bb
))
2426 /* If there is non-abnormal path leaving e->src, predict edge
2427 using predictor. Otherwise we need to look for paths
2430 The second may lead to infinite loop in the case we are predicitng
2431 regions that are only reachable by abnormal edges. We simply
2432 prevent visiting given BB twice. */
2434 predict_edge_def (e
, pred
, taken
);
2435 else if (bitmap_set_bit (visited
, e
->src
->index
))
2436 predict_paths_for_bb (e
->src
, e
->src
, pred
, taken
, visited
);
2438 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
2440 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
2441 predict_paths_for_bb (son
, bb
, pred
, taken
, visited
);
2444 /* Sets branch probabilities according to PREDiction and
2448 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
2449 enum prediction taken
)
2451 bitmap visited
= BITMAP_ALLOC (NULL
);
2452 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2453 BITMAP_FREE (visited
);
2456 /* Like predict_paths_leading_to but take edge instead of basic block. */
2459 predict_paths_leading_to_edge (edge e
, enum br_predictor pred
,
2460 enum prediction taken
)
2462 bool has_nonloop_edge
= false;
2466 basic_block bb
= e
->src
;
2467 FOR_EACH_EDGE (e2
, ei
, bb
->succs
)
2468 if (e2
->dest
!= e
->src
&& e2
->dest
!= e
->dest
2469 && !(e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2470 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e2
->dest
))
2472 has_nonloop_edge
= true;
2475 if (!has_nonloop_edge
)
2477 bitmap visited
= BITMAP_ALLOC (NULL
);
2478 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2479 BITMAP_FREE (visited
);
2482 predict_edge_def (e
, pred
, taken
);
2485 /* This is used to carry information about basic blocks. It is
2486 attached to the AUX field of the standard CFG block. */
2488 typedef struct block_info_def
2490 /* Estimated frequency of execution of basic_block. */
2493 /* To keep queue of basic blocks to process. */
2496 /* Number of predecessors we need to visit first. */
2500 /* Similar information for edges. */
2501 typedef struct edge_info_def
2503 /* In case edge is a loopback edge, the probability edge will be reached
2504 in case header is. Estimated number of iterations of the loop can be
2505 then computed as 1 / (1 - back_edge_prob). */
2506 sreal back_edge_prob
;
2507 /* True if the edge is a loopback edge in the natural loop. */
2508 unsigned int back_edge
:1;
2511 #define BLOCK_INFO(B) ((block_info) (B)->aux)
2512 #define EDGE_INFO(E) ((edge_info) (E)->aux)
2514 /* Helper function for estimate_bb_frequencies.
2515 Propagate the frequencies in blocks marked in
2516 TOVISIT, starting in HEAD. */
2519 propagate_freq (basic_block head
, bitmap tovisit
)
2528 /* For each basic block we need to visit count number of his predecessors
2529 we need to visit first. */
2530 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
2535 bb
= BASIC_BLOCK (i
);
2537 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2539 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
2541 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
2543 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
2545 "Irreducible region hit, ignoring edge to %i->%i\n",
2546 e
->src
->index
, bb
->index
);
2548 BLOCK_INFO (bb
)->npredecessors
= count
;
2549 /* When function never returns, we will never process exit block. */
2550 if (!count
&& bb
== EXIT_BLOCK_PTR
)
2551 bb
->count
= bb
->frequency
= 0;
2554 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
2556 for (bb
= head
; bb
; bb
= nextbb
)
2559 sreal cyclic_probability
, frequency
;
2561 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
2562 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
2564 nextbb
= BLOCK_INFO (bb
)->next
;
2565 BLOCK_INFO (bb
)->next
= NULL
;
2567 /* Compute frequency of basic block. */
2570 #ifdef ENABLE_CHECKING
2571 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2572 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
2573 || (e
->flags
& EDGE_DFS_BACK
));
2576 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2577 if (EDGE_INFO (e
)->back_edge
)
2579 sreal_add (&cyclic_probability
, &cyclic_probability
,
2580 &EDGE_INFO (e
)->back_edge_prob
);
2582 else if (!(e
->flags
& EDGE_DFS_BACK
))
2586 /* frequency += (e->probability
2587 * BLOCK_INFO (e->src)->frequency /
2588 REG_BR_PROB_BASE); */
2590 sreal_init (&tmp
, e
->probability
, 0);
2591 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
2592 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
2593 sreal_add (&frequency
, &frequency
, &tmp
);
2596 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
2598 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
2599 sizeof (frequency
));
2603 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
2605 memcpy (&cyclic_probability
, &real_almost_one
,
2606 sizeof (real_almost_one
));
2609 /* BLOCK_INFO (bb)->frequency = frequency
2610 / (1 - cyclic_probability) */
2612 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
2613 sreal_div (&BLOCK_INFO (bb
)->frequency
,
2614 &frequency
, &cyclic_probability
);
2618 bitmap_clear_bit (tovisit
, bb
->index
);
2620 e
= find_edge (bb
, head
);
2625 /* EDGE_INFO (e)->back_edge_prob
2626 = ((e->probability * BLOCK_INFO (bb)->frequency)
2627 / REG_BR_PROB_BASE); */
2629 sreal_init (&tmp
, e
->probability
, 0);
2630 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
2631 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2632 &tmp
, &real_inv_br_prob_base
);
2635 /* Propagate to successor blocks. */
2636 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2637 if (!(e
->flags
& EDGE_DFS_BACK
)
2638 && BLOCK_INFO (e
->dest
)->npredecessors
)
2640 BLOCK_INFO (e
->dest
)->npredecessors
--;
2641 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
2646 BLOCK_INFO (last
)->next
= e
->dest
;
2654 /* Estimate probabilities of loopback edges in loops at same nest level. */
2657 estimate_loops_at_level (struct loop
*first_loop
)
2661 for (loop
= first_loop
; loop
; loop
= loop
->next
)
2666 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2668 estimate_loops_at_level (loop
->inner
);
2670 /* Find current loop back edge and mark it. */
2671 e
= loop_latch_edge (loop
);
2672 EDGE_INFO (e
)->back_edge
= 1;
2674 bbs
= get_loop_body (loop
);
2675 for (i
= 0; i
< loop
->num_nodes
; i
++)
2676 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
2678 propagate_freq (loop
->header
, tovisit
);
2679 BITMAP_FREE (tovisit
);
2683 /* Propagates frequencies through structure of loops. */
2686 estimate_loops (void)
2688 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2691 /* Start by estimating the frequencies in the loops. */
2692 if (number_of_loops () > 1)
2693 estimate_loops_at_level (current_loops
->tree_root
->inner
);
2695 /* Now propagate the frequencies through all the blocks. */
2698 bitmap_set_bit (tovisit
, bb
->index
);
2700 propagate_freq (ENTRY_BLOCK_PTR
, tovisit
);
2701 BITMAP_FREE (tovisit
);
2704 /* Convert counts measured by profile driven feedback to frequencies.
2705 Return nonzero iff there was any nonzero execution count. */
2708 counts_to_freqs (void)
2710 gcov_type count_max
, true_count_max
= 0;
2713 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2714 true_count_max
= MAX (bb
->count
, true_count_max
);
2716 count_max
= MAX (true_count_max
, 1);
2717 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2718 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
2720 return true_count_max
;
2723 /* Return true if function is likely to be expensive, so there is no point to
2724 optimize performance of prologue, epilogue or do inlining at the expense
2725 of code size growth. THRESHOLD is the limit of number of instructions
2726 function can execute at average to be still considered not expensive. */
2729 expensive_function_p (int threshold
)
2731 unsigned int sum
= 0;
2735 /* We can not compute accurately for large thresholds due to scaled
2737 gcc_assert (threshold
<= BB_FREQ_MAX
);
2739 /* Frequencies are out of range. This either means that function contains
2740 internal loop executing more than BB_FREQ_MAX times or profile feedback
2741 is available and function has not been executed at all. */
2742 if (ENTRY_BLOCK_PTR
->frequency
== 0)
2745 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2746 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
2751 FOR_BB_INSNS (bb
, insn
)
2752 if (active_insn_p (insn
))
2754 sum
+= bb
->frequency
;
2763 /* Estimate basic blocks frequency by given branch probabilities. */
2766 estimate_bb_frequencies (void)
2771 if (profile_status
!= PROFILE_READ
|| !counts_to_freqs ())
2773 static int real_values_initialized
= 0;
2775 if (!real_values_initialized
)
2777 real_values_initialized
= 1;
2778 sreal_init (&real_zero
, 0, 0);
2779 sreal_init (&real_one
, 1, 0);
2780 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
2781 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
2782 sreal_init (&real_one_half
, 1, -1);
2783 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
2784 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
2787 mark_dfs_back_edges ();
2789 single_succ_edge (ENTRY_BLOCK_PTR
)->probability
= REG_BR_PROB_BASE
;
2791 /* Set up block info for each basic block. */
2792 alloc_aux_for_blocks (sizeof (struct block_info_def
));
2793 alloc_aux_for_edges (sizeof (struct edge_info_def
));
2794 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2799 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2801 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
2802 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2803 &EDGE_INFO (e
)->back_edge_prob
,
2804 &real_inv_br_prob_base
);
2808 /* First compute probabilities locally for each loop from innermost
2809 to outermost to examine probabilities for back edges. */
2812 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
2814 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
2815 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
2817 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
2818 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2822 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
2823 sreal_add (&tmp
, &tmp
, &real_one_half
);
2824 bb
->frequency
= sreal_to_int (&tmp
);
2827 free_aux_for_blocks ();
2828 free_aux_for_edges ();
2830 compute_function_frequency ();
2833 /* Decide whether function is hot, cold or unlikely executed. */
2835 compute_function_frequency (void)
2838 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2839 if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2840 || MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2841 node
->only_called_at_startup
= true;
2842 if (DECL_STATIC_DESTRUCTOR (current_function_decl
))
2843 node
->only_called_at_exit
= true;
2845 if (!profile_info
|| !flag_branch_probabilities
)
2847 int flags
= flags_from_decl_or_type (current_function_decl
);
2848 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
2850 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2851 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
2853 node
->frequency
= NODE_FREQUENCY_HOT
;
2854 else if (flags
& ECF_NORETURN
)
2855 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2856 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2857 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2858 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2859 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
2860 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2863 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2866 if (maybe_hot_bb_p (cfun
, bb
))
2868 node
->frequency
= NODE_FREQUENCY_HOT
;
2871 if (!probably_never_executed_bb_p (cfun
, bb
))
2872 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2877 gate_estimate_probability (void)
2879 return flag_guess_branch_prob
;
2882 /* Build PREDICT_EXPR. */
2884 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
2886 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
2887 build_int_cst (integer_type_node
, predictor
));
2888 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
2893 predictor_name (enum br_predictor predictor
)
2895 return predictor_info
[predictor
].name
;
2898 struct gimple_opt_pass pass_profile
=
2902 "profile_estimate", /* name */
2903 OPTGROUP_NONE
, /* optinfo_flags */
2904 gate_estimate_probability
, /* gate */
2905 tree_estimate_probability_driver
, /* execute */
2908 0, /* static_pass_number */
2909 TV_BRANCH_PROB
, /* tv_id */
2910 PROP_cfg
, /* properties_required */
2911 0, /* properties_provided */
2912 0, /* properties_destroyed */
2913 0, /* todo_flags_start */
2914 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2918 struct gimple_opt_pass pass_strip_predict_hints
=
2922 "*strip_predict_hints", /* name */
2923 OPTGROUP_NONE
, /* optinfo_flags */
2925 strip_predict_hints
, /* execute */
2928 0, /* static_pass_number */
2929 TV_BRANCH_PROB
, /* tv_id */
2930 PROP_cfg
, /* properties_required */
2931 0, /* properties_provided */
2932 0, /* properties_destroyed */
2933 0, /* todo_flags_start */
2934 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2938 /* Rebuild function frequencies. Passes are in general expected to
2939 maintain profile by hand, however in some cases this is not possible:
2940 for example when inlining several functions with loops freuqencies might run
2941 out of scale and thus needs to be recomputed. */
2944 rebuild_frequencies (void)
2946 timevar_push (TV_REBUILD_FREQUENCIES
);
2947 if (profile_status
== PROFILE_GUESSED
)
2949 loop_optimizer_init (0);
2950 add_noreturn_fake_exit_edges ();
2951 mark_irreducible_loops ();
2952 connect_infinite_loops_to_exit ();
2953 estimate_bb_frequencies ();
2954 remove_fake_exit_edges ();
2955 loop_optimizer_finalize ();
2957 else if (profile_status
== PROFILE_READ
)
2961 timevar_pop (TV_REBUILD_FREQUENCIES
);