1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
32 #include "coretypes.h"
37 #include "hard-reg-set.h"
38 #include "basic-block.h"
39 #include "insn-config.h"
44 #include "diagnostic-core.h"
53 #include "tree-flow.h"
55 #include "tree-pass.h"
56 #include "tree-scalar-evolution.h"
58 #include "pointer-set.h"
60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
61 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
62 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
63 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
65 /* Random guesstimation given names.
66 PROV_VERY_UNLIKELY should be small enough so basic block predicted
67 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73 static void combine_predictions_for_insn (rtx
, basic_block
);
74 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
75 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
76 static void predict_paths_leading_to_edge (edge
, enum br_predictor
, enum prediction
);
77 static bool can_predict_insn_p (const_rtx
);
79 /* Information we hold about each branch predictor.
80 Filled using information from predict.def. */
84 const char *const name
; /* Name used in the debugging dumps. */
85 const int hitrate
; /* Expected hitrate used by
86 predict_insn_def call. */
90 /* Use given predictor without Dempster-Shaffer theory if it matches
91 using first_match heuristics. */
92 #define PRED_FLAG_FIRST_MATCH 1
94 /* Recompute hitrate in percent to our representation. */
96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
99 static const struct predictor_info predictor_info
[]= {
100 #include "predict.def"
102 /* Upper bound on predictors. */
107 /* Return TRUE if frequency FREQ is considered to be hot. */
110 maybe_hot_frequency_p (struct function
*fun
, int freq
)
112 struct cgraph_node
*node
= cgraph_get_node (fun
->decl
);
113 if (!profile_info
|| !flag_branch_probabilities
)
115 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
117 if (node
->frequency
== NODE_FREQUENCY_HOT
)
120 if (profile_status_for_function (fun
) == PROFILE_ABSENT
)
122 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
123 && freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
* 2 / 3))
125 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
) == 0)
127 if (freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
128 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
133 /* Return TRUE if frequency FREQ is considered to be hot. */
136 maybe_hot_count_p (struct function
*fun
, gcov_type count
)
138 gcov_working_set_t
*ws
;
139 static gcov_type min_count
= -1;
140 if (fun
&& profile_status_for_function (fun
) != PROFILE_READ
)
142 /* Code executed at most once is not hot. */
143 if (profile_info
->runs
>= count
)
147 ws
= find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
));
149 min_count
= ws
->min_counter
;
151 return (count
>= min_count
);
154 /* Return true in case BB can be CPU intensive and should be optimized
155 for maximal performance. */
158 maybe_hot_bb_p (struct function
*fun
, const_basic_block bb
)
160 gcc_checking_assert (fun
);
161 if (profile_status_for_function (fun
) == PROFILE_READ
)
162 return maybe_hot_count_p (fun
, bb
->count
);
163 return maybe_hot_frequency_p (fun
, bb
->frequency
);
166 /* Return true if the call can be hot. */
169 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
171 if (profile_info
&& flag_branch_probabilities
172 && !maybe_hot_count_p (NULL
,
175 if (edge
->caller
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
177 && edge
->callee
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
179 if (edge
->caller
->frequency
> NODE_FREQUENCY_UNLIKELY_EXECUTED
181 && edge
->callee
->frequency
<= NODE_FREQUENCY_EXECUTED_ONCE
))
185 if (edge
->caller
->frequency
== NODE_FREQUENCY_HOT
)
187 if (edge
->caller
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
188 && edge
->frequency
< CGRAPH_FREQ_BASE
* 3 / 2)
190 if (flag_guess_branch_prob
)
192 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
) == 0
193 || edge
->frequency
<= (CGRAPH_FREQ_BASE
194 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
200 /* Return true in case BB can be CPU intensive and should be optimized
201 for maximal performance. */
204 maybe_hot_edge_p (edge e
)
206 if (profile_status
== PROFILE_READ
)
207 return maybe_hot_count_p (cfun
, e
->count
);
208 return maybe_hot_frequency_p (cfun
, EDGE_FREQUENCY (e
));
212 /* Return true in case BB is probably never executed. */
215 probably_never_executed_bb_p (struct function
*fun
, const_basic_block bb
)
217 gcc_checking_assert (fun
);
218 if (profile_info
&& flag_branch_probabilities
)
219 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
220 if ((!profile_info
|| !flag_branch_probabilities
)
221 && (cgraph_get_node (fun
->decl
)->frequency
222 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
227 /* Return true if NODE should be optimized for size. */
230 cgraph_optimize_for_size_p (struct cgraph_node
*node
)
234 if (node
&& (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
240 /* Return true when current function should always be optimized for size. */
243 optimize_function_for_size_p (struct function
*fun
)
247 if (!fun
|| !fun
->decl
)
249 return cgraph_optimize_for_size_p (cgraph_get_node (fun
->decl
));
252 /* Return true when current function should always be optimized for speed. */
255 optimize_function_for_speed_p (struct function
*fun
)
257 return !optimize_function_for_size_p (fun
);
260 /* Return TRUE when BB should be optimized for size. */
263 optimize_bb_for_size_p (const_basic_block bb
)
265 return optimize_function_for_size_p (cfun
) || !maybe_hot_bb_p (cfun
, bb
);
268 /* Return TRUE when BB should be optimized for speed. */
271 optimize_bb_for_speed_p (const_basic_block bb
)
273 return !optimize_bb_for_size_p (bb
);
276 /* Return TRUE when BB should be optimized for size. */
279 optimize_edge_for_size_p (edge e
)
281 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
284 /* Return TRUE when BB should be optimized for speed. */
287 optimize_edge_for_speed_p (edge e
)
289 return !optimize_edge_for_size_p (e
);
292 /* Return TRUE when BB should be optimized for size. */
295 optimize_insn_for_size_p (void)
297 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
300 /* Return TRUE when BB should be optimized for speed. */
303 optimize_insn_for_speed_p (void)
305 return !optimize_insn_for_size_p ();
308 /* Return TRUE when LOOP should be optimized for size. */
311 optimize_loop_for_size_p (struct loop
*loop
)
313 return optimize_bb_for_size_p (loop
->header
);
316 /* Return TRUE when LOOP should be optimized for speed. */
319 optimize_loop_for_speed_p (struct loop
*loop
)
321 return optimize_bb_for_speed_p (loop
->header
);
324 /* Return TRUE when LOOP nest should be optimized for speed. */
327 optimize_loop_nest_for_speed_p (struct loop
*loop
)
329 struct loop
*l
= loop
;
330 if (optimize_loop_for_speed_p (loop
))
333 while (l
&& l
!= loop
)
335 if (optimize_loop_for_speed_p (l
))
343 while (l
!= loop
&& !l
->next
)
352 /* Return TRUE when LOOP nest should be optimized for size. */
355 optimize_loop_nest_for_size_p (struct loop
*loop
)
357 return !optimize_loop_nest_for_speed_p (loop
);
360 /* Return true when edge E is likely to be well predictable by branch
364 predictable_edge_p (edge e
)
366 if (profile_status
== PROFILE_ABSENT
)
369 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
370 || (REG_BR_PROB_BASE
- e
->probability
371 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
377 /* Set RTL expansion for BB profile. */
380 rtl_profile_for_bb (basic_block bb
)
382 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (cfun
, bb
);
385 /* Set RTL expansion for edge profile. */
388 rtl_profile_for_edge (edge e
)
390 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
393 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
395 default_rtl_profile (void)
397 crtl
->maybe_hot_insn_p
= true;
400 /* Return true if the one of outgoing edges is already predicted by
404 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
407 if (!INSN_P (BB_END (bb
)))
409 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
410 if (REG_NOTE_KIND (note
) == REG_BR_PRED
411 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
416 /* This map contains for a basic block the list of predictions for the
419 static struct pointer_map_t
*bb_predictions
;
421 /* Structure representing predictions in tree level. */
423 struct edge_prediction
{
424 struct edge_prediction
*ep_next
;
426 enum br_predictor ep_predictor
;
430 /* Return true if the one of outgoing edges is already predicted by
434 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
436 struct edge_prediction
*i
;
437 void **preds
= pointer_map_contains (bb_predictions
, bb
);
442 for (i
= (struct edge_prediction
*) *preds
; i
; i
= i
->ep_next
)
443 if (i
->ep_predictor
== predictor
)
448 /* Return true when the probability of edge is reliable.
450 The profile guessing code is good at predicting branch outcome (ie.
451 taken/not taken), that is predicted right slightly over 75% of time.
452 It is however notoriously poor on predicting the probability itself.
453 In general the profile appear a lot flatter (with probabilities closer
454 to 50%) than the reality so it is bad idea to use it to drive optimization
455 such as those disabling dynamic branch prediction for well predictable
458 There are two exceptions - edges leading to noreturn edges and edges
459 predicted by number of iterations heuristics are predicted well. This macro
460 should be able to distinguish those, but at the moment it simply check for
461 noreturn heuristic that is only one giving probability over 99% or bellow
462 1%. In future we might want to propagate reliability information across the
463 CFG if we find this information useful on multiple places. */
465 probability_reliable_p (int prob
)
467 return (profile_status
== PROFILE_READ
468 || (profile_status
== PROFILE_GUESSED
469 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
472 /* Same predicate as above, working on edges. */
474 edge_probability_reliable_p (const_edge e
)
476 return probability_reliable_p (e
->probability
);
479 /* Same predicate as edge_probability_reliable_p, working on notes. */
481 br_prob_note_reliable_p (const_rtx note
)
483 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
484 return probability_reliable_p (INTVAL (XEXP (note
, 0)));
488 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
490 gcc_assert (any_condjump_p (insn
));
491 if (!flag_guess_branch_prob
)
494 add_reg_note (insn
, REG_BR_PRED
,
495 gen_rtx_CONCAT (VOIDmode
,
496 GEN_INT ((int) predictor
),
497 GEN_INT ((int) probability
)));
500 /* Predict insn by given predictor. */
503 predict_insn_def (rtx insn
, enum br_predictor predictor
,
504 enum prediction taken
)
506 int probability
= predictor_info
[(int) predictor
].hitrate
;
509 probability
= REG_BR_PROB_BASE
- probability
;
511 predict_insn (insn
, predictor
, probability
);
514 /* Predict edge E with given probability if possible. */
517 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
520 last_insn
= BB_END (e
->src
);
522 /* We can store the branch prediction information only about
523 conditional jumps. */
524 if (!any_condjump_p (last_insn
))
527 /* We always store probability of branching. */
528 if (e
->flags
& EDGE_FALLTHRU
)
529 probability
= REG_BR_PROB_BASE
- probability
;
531 predict_insn (last_insn
, predictor
, probability
);
534 /* Predict edge E with the given PROBABILITY. */
536 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
538 gcc_assert (profile_status
!= PROFILE_GUESSED
);
539 if ((e
->src
!= ENTRY_BLOCK_PTR
&& EDGE_COUNT (e
->src
->succs
) > 1)
540 && flag_guess_branch_prob
&& optimize
)
542 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
543 void **preds
= pointer_map_insert (bb_predictions
, e
->src
);
545 i
->ep_next
= (struct edge_prediction
*) *preds
;
547 i
->ep_probability
= probability
;
548 i
->ep_predictor
= predictor
;
553 /* Remove all predictions on given basic block that are attached
556 remove_predictions_associated_with_edge (edge e
)
563 preds
= pointer_map_contains (bb_predictions
, e
->src
);
567 struct edge_prediction
**prediction
= (struct edge_prediction
**) preds
;
568 struct edge_prediction
*next
;
572 if ((*prediction
)->ep_edge
== e
)
574 next
= (*prediction
)->ep_next
;
579 prediction
= &((*prediction
)->ep_next
);
584 /* Clears the list of predictions stored for BB. */
587 clear_bb_predictions (basic_block bb
)
589 void **preds
= pointer_map_contains (bb_predictions
, bb
);
590 struct edge_prediction
*pred
, *next
;
595 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= next
)
597 next
= pred
->ep_next
;
603 /* Return true when we can store prediction on insn INSN.
604 At the moment we represent predictions only on conditional
605 jumps, not at computed jump or other complicated cases. */
607 can_predict_insn_p (const_rtx insn
)
609 return (JUMP_P (insn
)
610 && any_condjump_p (insn
)
611 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
614 /* Predict edge E by given predictor if possible. */
617 predict_edge_def (edge e
, enum br_predictor predictor
,
618 enum prediction taken
)
620 int probability
= predictor_info
[(int) predictor
].hitrate
;
623 probability
= REG_BR_PROB_BASE
- probability
;
625 predict_edge (e
, predictor
, probability
);
628 /* Invert all branch predictions or probability notes in the INSN. This needs
629 to be done each time we invert the condition used by the jump. */
632 invert_br_probabilities (rtx insn
)
636 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
637 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
638 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
639 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
640 XEXP (XEXP (note
, 0), 1)
641 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
644 /* Dump information about the branch prediction to the output file. */
647 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
648 basic_block bb
, int used
)
656 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
657 if (! (e
->flags
& EDGE_FALLTHRU
))
660 fprintf (file
, " %s heuristics%s: %.1f%%",
661 predictor_info
[predictor
].name
,
662 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
666 fprintf (file
, " exec ");
667 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
670 fprintf (file
, " hit ");
671 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
672 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
676 fprintf (file
, "\n");
679 /* We can not predict the probabilities of outgoing edges of bb. Set them
680 evenly and hope for the best. */
682 set_even_probabilities (basic_block bb
)
688 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
689 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
691 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
692 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
693 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
698 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
699 note if not already present. Remove now useless REG_BR_PRED notes. */
702 combine_predictions_for_insn (rtx insn
, basic_block bb
)
707 int best_probability
= PROB_EVEN
;
708 enum br_predictor best_predictor
= END_PREDICTORS
;
709 int combined_probability
= REG_BR_PROB_BASE
/ 2;
711 bool first_match
= false;
714 if (!can_predict_insn_p (insn
))
716 set_even_probabilities (bb
);
720 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
721 pnote
= ®_NOTES (insn
);
723 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
726 /* We implement "first match" heuristics and use probability guessed
727 by predictor with smallest index. */
728 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
729 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
731 enum br_predictor predictor
= ((enum br_predictor
)
732 INTVAL (XEXP (XEXP (note
, 0), 0)));
733 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
736 if (best_predictor
> predictor
)
737 best_probability
= probability
, best_predictor
= predictor
;
739 d
= (combined_probability
* probability
740 + (REG_BR_PROB_BASE
- combined_probability
)
741 * (REG_BR_PROB_BASE
- probability
));
743 /* Use FP math to avoid overflows of 32bit integers. */
745 /* If one probability is 0% and one 100%, avoid division by zero. */
746 combined_probability
= REG_BR_PROB_BASE
/ 2;
748 combined_probability
= (((double) combined_probability
) * probability
749 * REG_BR_PROB_BASE
/ d
+ 0.5);
752 /* Decide which heuristic to use. In case we didn't match anything,
753 use no_prediction heuristic, in case we did match, use either
754 first match or Dempster-Shaffer theory depending on the flags. */
756 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
760 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
761 combined_probability
, bb
, true);
764 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
766 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
771 combined_probability
= best_probability
;
772 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
776 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
778 enum br_predictor predictor
= ((enum br_predictor
)
779 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
780 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
782 dump_prediction (dump_file
, predictor
, probability
, bb
,
783 !first_match
|| best_predictor
== predictor
);
784 *pnote
= XEXP (*pnote
, 1);
787 pnote
= &XEXP (*pnote
, 1);
792 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (combined_probability
));
794 /* Save the prediction into CFG in case we are seeing non-degenerated
796 if (!single_succ_p (bb
))
798 BRANCH_EDGE (bb
)->probability
= combined_probability
;
799 FALLTHRU_EDGE (bb
)->probability
800 = REG_BR_PROB_BASE
- combined_probability
;
803 else if (!single_succ_p (bb
))
805 int prob
= INTVAL (XEXP (prob_note
, 0));
807 BRANCH_EDGE (bb
)->probability
= prob
;
808 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
811 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
814 /* Combine predictions into single probability and store them into CFG.
815 Remove now useless prediction entries. */
818 combine_predictions_for_bb (basic_block bb
)
820 int best_probability
= PROB_EVEN
;
821 enum br_predictor best_predictor
= END_PREDICTORS
;
822 int combined_probability
= REG_BR_PROB_BASE
/ 2;
824 bool first_match
= false;
826 struct edge_prediction
*pred
;
828 edge e
, first
= NULL
, second
= NULL
;
832 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
833 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
836 if (first
&& !second
)
842 /* When there is no successor or only one choice, prediction is easy.
844 We are lazy for now and predict only basic blocks with two outgoing
845 edges. It is possible to predict generic case too, but we have to
846 ignore first match heuristics and do more involved combining. Implement
851 set_even_probabilities (bb
);
852 clear_bb_predictions (bb
);
854 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
860 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
862 preds
= pointer_map_contains (bb_predictions
, bb
);
865 /* We implement "first match" heuristics and use probability guessed
866 by predictor with smallest index. */
867 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
869 enum br_predictor predictor
= pred
->ep_predictor
;
870 int probability
= pred
->ep_probability
;
872 if (pred
->ep_edge
!= first
)
873 probability
= REG_BR_PROB_BASE
- probability
;
876 /* First match heuristics would be widly confused if we predicted
878 if (best_predictor
> predictor
)
880 struct edge_prediction
*pred2
;
881 int prob
= probability
;
883 for (pred2
= (struct edge_prediction
*) *preds
; pred2
; pred2
= pred2
->ep_next
)
884 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
886 int probability2
= pred
->ep_probability
;
888 if (pred2
->ep_edge
!= first
)
889 probability2
= REG_BR_PROB_BASE
- probability2
;
891 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
892 (probability2
< REG_BR_PROB_BASE
/ 2))
895 /* If the same predictor later gave better result, go for it! */
896 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
897 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
901 best_probability
= prob
, best_predictor
= predictor
;
904 d
= (combined_probability
* probability
905 + (REG_BR_PROB_BASE
- combined_probability
)
906 * (REG_BR_PROB_BASE
- probability
));
908 /* Use FP math to avoid overflows of 32bit integers. */
910 /* If one probability is 0% and one 100%, avoid division by zero. */
911 combined_probability
= REG_BR_PROB_BASE
/ 2;
913 combined_probability
= (((double) combined_probability
)
915 * REG_BR_PROB_BASE
/ d
+ 0.5);
919 /* Decide which heuristic to use. In case we didn't match anything,
920 use no_prediction heuristic, in case we did match, use either
921 first match or Dempster-Shaffer theory depending on the flags. */
923 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
927 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
930 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
932 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
937 combined_probability
= best_probability
;
938 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
942 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
944 enum br_predictor predictor
= pred
->ep_predictor
;
945 int probability
= pred
->ep_probability
;
947 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
948 probability
= REG_BR_PROB_BASE
- probability
;
949 dump_prediction (dump_file
, predictor
, probability
, bb
,
950 !first_match
|| best_predictor
== predictor
);
953 clear_bb_predictions (bb
);
957 first
->probability
= combined_probability
;
958 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
962 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
963 Return the SSA_NAME if the condition satisfies, NULL otherwise.
965 T1 and T2 should be one of the following cases:
966 1. T1 is SSA_NAME, T2 is NULL
967 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
968 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
971 strips_small_constant (tree t1
, tree t2
)
978 else if (TREE_CODE (t1
) == SSA_NAME
)
980 else if (host_integerp (t1
, 0))
981 value
= tree_low_cst (t1
, 0);
987 else if (host_integerp (t2
, 0))
988 value
= tree_low_cst (t2
, 0);
989 else if (TREE_CODE (t2
) == SSA_NAME
)
997 if (value
<= 4 && value
>= -4)
1003 /* Return the SSA_NAME in T or T's operands.
1004 Return NULL if SSA_NAME cannot be found. */
1007 get_base_value (tree t
)
1009 if (TREE_CODE (t
) == SSA_NAME
)
1012 if (!BINARY_CLASS_P (t
))
1015 switch (TREE_OPERAND_LENGTH (t
))
1018 return strips_small_constant (TREE_OPERAND (t
, 0), NULL
);
1020 return strips_small_constant (TREE_OPERAND (t
, 0),
1021 TREE_OPERAND (t
, 1));
1027 /* Check the compare STMT in LOOP. If it compares an induction
1028 variable to a loop invariant, return true, and save
1029 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1030 Otherwise return false and set LOOP_INVAIANT to NULL. */
1033 is_comparison_with_loop_invariant_p (gimple stmt
, struct loop
*loop
,
1034 tree
*loop_invariant
,
1035 enum tree_code
*compare_code
,
1039 tree op0
, op1
, bound
, base
;
1041 enum tree_code code
;
1044 code
= gimple_cond_code (stmt
);
1045 *loop_invariant
= NULL
;
1061 op0
= gimple_cond_lhs (stmt
);
1062 op1
= gimple_cond_rhs (stmt
);
1064 if ((TREE_CODE (op0
) != SSA_NAME
&& TREE_CODE (op0
) != INTEGER_CST
)
1065 || (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op1
) != INTEGER_CST
))
1067 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op0
, &iv0
, true))
1069 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op1
, &iv1
, true))
1071 if (TREE_CODE (iv0
.step
) != INTEGER_CST
1072 || TREE_CODE (iv1
.step
) != INTEGER_CST
)
1074 if ((integer_zerop (iv0
.step
) && integer_zerop (iv1
.step
))
1075 || (!integer_zerop (iv0
.step
) && !integer_zerop (iv1
.step
)))
1078 if (integer_zerop (iv0
.step
))
1080 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
1081 code
= invert_tree_comparison (code
, false);
1084 if (host_integerp (iv1
.step
, 0))
1093 if (host_integerp (iv0
.step
, 0))
1099 if (TREE_CODE (bound
) != INTEGER_CST
)
1100 bound
= get_base_value (bound
);
1103 if (TREE_CODE (base
) != INTEGER_CST
)
1104 base
= get_base_value (base
);
1108 *loop_invariant
= bound
;
1109 *compare_code
= code
;
1111 *loop_iv_base
= base
;
1115 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1118 expr_coherent_p (tree t1
, tree t2
)
1121 tree ssa_name_1
= NULL
;
1122 tree ssa_name_2
= NULL
;
1124 gcc_assert (TREE_CODE (t1
) == SSA_NAME
|| TREE_CODE (t1
) == INTEGER_CST
);
1125 gcc_assert (TREE_CODE (t2
) == SSA_NAME
|| TREE_CODE (t2
) == INTEGER_CST
);
1130 if (TREE_CODE (t1
) == INTEGER_CST
&& TREE_CODE (t2
) == INTEGER_CST
)
1132 if (TREE_CODE (t1
) == INTEGER_CST
|| TREE_CODE (t2
) == INTEGER_CST
)
1135 /* Check to see if t1 is expressed/defined with t2. */
1136 stmt
= SSA_NAME_DEF_STMT (t1
);
1137 gcc_assert (stmt
!= NULL
);
1138 if (is_gimple_assign (stmt
))
1140 ssa_name_1
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1141 if (ssa_name_1
&& ssa_name_1
== t2
)
1145 /* Check to see if t2 is expressed/defined with t1. */
1146 stmt
= SSA_NAME_DEF_STMT (t2
);
1147 gcc_assert (stmt
!= NULL
);
1148 if (is_gimple_assign (stmt
))
1150 ssa_name_2
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1151 if (ssa_name_2
&& ssa_name_2
== t1
)
1155 /* Compare if t1 and t2's def_stmts are identical. */
1156 if (ssa_name_2
!= NULL
&& ssa_name_1
== ssa_name_2
)
1162 /* Predict branch probability of BB when BB contains a branch that compares
1163 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1164 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1167 for (int i = 0; i < bound; i++) {
1174 In this loop, we will predict the branch inside the loop to be taken. */
1177 predict_iv_comparison (struct loop
*loop
, basic_block bb
,
1178 tree loop_bound_var
,
1179 tree loop_iv_base_var
,
1180 enum tree_code loop_bound_code
,
1181 int loop_bound_step
)
1184 tree compare_var
, compare_base
;
1185 enum tree_code compare_code
;
1186 tree compare_step_var
;
1190 if (predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1191 || predicted_by_p (bb
, PRED_LOOP_ITERATIONS
)
1192 || predicted_by_p (bb
, PRED_LOOP_EXIT
))
1195 stmt
= last_stmt (bb
);
1196 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1198 if (!is_comparison_with_loop_invariant_p (stmt
, loop
, &compare_var
,
1204 /* Find the taken edge. */
1205 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1206 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1209 /* When comparing an IV to a loop invariant, NE is more likely to be
1210 taken while EQ is more likely to be not-taken. */
1211 if (compare_code
== NE_EXPR
)
1213 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1216 else if (compare_code
== EQ_EXPR
)
1218 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1222 if (!expr_coherent_p (loop_iv_base_var
, compare_base
))
1225 /* If loop bound, base and compare bound are all constants, we can
1226 calculate the probability directly. */
1227 if (host_integerp (loop_bound_var
, 0)
1228 && host_integerp (compare_var
, 0)
1229 && host_integerp (compare_base
, 0))
1232 bool of
, overflow
= false;
1233 double_int mod
, compare_count
, tem
, loop_count
;
1235 double_int loop_bound
= tree_to_double_int (loop_bound_var
);
1236 double_int compare_bound
= tree_to_double_int (compare_var
);
1237 double_int base
= tree_to_double_int (compare_base
);
1238 double_int compare_step
= tree_to_double_int (compare_step_var
);
1240 /* (loop_bound - base) / compare_step */
1241 tem
= loop_bound
.sub_with_overflow (base
, &of
);
1243 loop_count
= tem
.divmod_with_overflow (compare_step
,
1248 if ((!compare_step
.is_negative ())
1249 ^ (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1251 /* (loop_bound - compare_bound) / compare_step */
1252 tem
= loop_bound
.sub_with_overflow (compare_bound
, &of
);
1254 compare_count
= tem
.divmod_with_overflow (compare_step
,
1261 /* (compare_bound - base) / compare_step */
1262 tem
= compare_bound
.sub_with_overflow (base
, &of
);
1264 compare_count
= tem
.divmod_with_overflow (compare_step
,
1269 if (compare_code
== LE_EXPR
|| compare_code
== GE_EXPR
)
1271 if (loop_bound_code
== LE_EXPR
|| loop_bound_code
== GE_EXPR
)
1273 if (compare_count
.is_negative ())
1274 compare_count
= double_int_zero
;
1275 if (loop_count
.is_negative ())
1276 loop_count
= double_int_zero
;
1277 if (loop_count
.is_zero ())
1279 else if (compare_count
.scmp (loop_count
) == 1)
1280 probability
= REG_BR_PROB_BASE
;
1283 /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1284 could overflow, shift both loop_count and compare_count right
1285 a bit so that it doesn't overflow. Note both counts are known not
1286 to be negative at this point. */
1287 int clz_bits
= clz_hwi (loop_count
.high
);
1288 gcc_assert (REG_BR_PROB_BASE
< 32768);
1291 loop_count
.arshift (16 - clz_bits
, HOST_BITS_PER_DOUBLE_INT
);
1292 compare_count
.arshift (16 - clz_bits
, HOST_BITS_PER_DOUBLE_INT
);
1294 tem
= compare_count
.mul_with_sign (double_int::from_shwi
1295 (REG_BR_PROB_BASE
), true, &of
);
1297 tem
= tem
.divmod (loop_count
, true, TRUNC_DIV_EXPR
, &mod
);
1298 probability
= tem
.to_uhwi ();
1302 predict_edge (then_edge
, PRED_LOOP_IV_COMPARE
, probability
);
1307 if (expr_coherent_p (loop_bound_var
, compare_var
))
1309 if ((loop_bound_code
== LT_EXPR
|| loop_bound_code
== LE_EXPR
)
1310 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1311 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1312 else if ((loop_bound_code
== GT_EXPR
|| loop_bound_code
== GE_EXPR
)
1313 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1314 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1315 else if (loop_bound_code
== NE_EXPR
)
1317 /* If the loop backedge condition is "(i != bound)", we do
1318 the comparison based on the step of IV:
1319 * step < 0 : backedge condition is like (i > bound)
1320 * step > 0 : backedge condition is like (i < bound) */
1321 gcc_assert (loop_bound_step
!= 0);
1322 if (loop_bound_step
> 0
1323 && (compare_code
== LT_EXPR
1324 || compare_code
== LE_EXPR
))
1325 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1326 else if (loop_bound_step
< 0
1327 && (compare_code
== GT_EXPR
1328 || compare_code
== GE_EXPR
))
1329 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1331 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1334 /* The branch is predicted not-taken if loop_bound_code is
1335 opposite with compare_code. */
1336 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1338 else if (expr_coherent_p (loop_iv_base_var
, compare_var
))
1341 for (i = s; i < h; i++)
1343 The branch should be predicted taken. */
1344 if (loop_bound_step
> 0
1345 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1346 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1347 else if (loop_bound_step
< 0
1348 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1349 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1351 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1355 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1356 exits are resulted from short-circuit conditions that will generate an
1359 if (foo() || global > 10)
1362 This will be translated into:
1367 if foo() goto BB6 else goto BB5
1369 if global > 10 goto BB6 else goto BB7
1373 iftmp = (PHI 0(BB5), 1(BB6))
1374 if iftmp == 1 goto BB8 else goto BB3
1376 outside of the loop...
1378 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1379 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1380 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1381 exits to predict them using PRED_LOOP_EXIT. */
1384 predict_extra_loop_exits (edge exit_edge
)
1387 bool check_value_one
;
1389 tree cmp_rhs
, cmp_lhs
;
1390 gimple cmp_stmt
= last_stmt (exit_edge
->src
);
1392 if (!cmp_stmt
|| gimple_code (cmp_stmt
) != GIMPLE_COND
)
1394 cmp_rhs
= gimple_cond_rhs (cmp_stmt
);
1395 cmp_lhs
= gimple_cond_lhs (cmp_stmt
);
1396 if (!TREE_CONSTANT (cmp_rhs
)
1397 || !(integer_zerop (cmp_rhs
) || integer_onep (cmp_rhs
)))
1399 if (TREE_CODE (cmp_lhs
) != SSA_NAME
)
1402 /* If check_value_one is true, only the phi_args with value '1' will lead
1403 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1405 check_value_one
= (((integer_onep (cmp_rhs
))
1406 ^ (gimple_cond_code (cmp_stmt
) == EQ_EXPR
))
1407 ^ ((exit_edge
->flags
& EDGE_TRUE_VALUE
) != 0));
1409 phi_stmt
= SSA_NAME_DEF_STMT (cmp_lhs
);
1410 if (!phi_stmt
|| gimple_code (phi_stmt
) != GIMPLE_PHI
)
1413 for (i
= 0; i
< gimple_phi_num_args (phi_stmt
); i
++)
1417 tree val
= gimple_phi_arg_def (phi_stmt
, i
);
1418 edge e
= gimple_phi_arg_edge (phi_stmt
, i
);
1420 if (!TREE_CONSTANT (val
) || !(integer_zerop (val
) || integer_onep (val
)))
1422 if ((check_value_one
^ integer_onep (val
)) == 1)
1424 if (EDGE_COUNT (e
->src
->succs
) != 1)
1426 predict_paths_leading_to_edge (e
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1430 FOR_EACH_EDGE (e1
, ei
, e
->src
->preds
)
1431 predict_paths_leading_to_edge (e1
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1435 /* Predict edge probabilities by exploiting loop structure. */
1438 predict_loops (void)
1443 /* Try to predict out blocks in a loop that are not part of a
1445 FOR_EACH_LOOP (li
, loop
, 0)
1447 basic_block bb
, *bbs
;
1448 unsigned j
, n_exits
;
1450 struct tree_niter_desc niter_desc
;
1452 struct nb_iter_bound
*nb_iter
;
1453 enum tree_code loop_bound_code
= ERROR_MARK
;
1454 tree loop_bound_step
= NULL
;
1455 tree loop_bound_var
= NULL
;
1456 tree loop_iv_base
= NULL
;
1459 exits
= get_loop_exit_edges (loop
);
1460 n_exits
= exits
.length ();
1467 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1470 HOST_WIDE_INT nitercst
;
1471 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
1473 enum br_predictor predictor
;
1475 predict_extra_loop_exits (ex
);
1477 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
1478 niter
= niter_desc
.niter
;
1479 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
1480 niter
= loop_niter_by_eval (loop
, ex
);
1482 if (TREE_CODE (niter
) == INTEGER_CST
)
1484 if (host_integerp (niter
, 1)
1486 && compare_tree_int (niter
, max
- 1) == -1)
1487 nitercst
= tree_low_cst (niter
, 1) + 1;
1490 predictor
= PRED_LOOP_ITERATIONS
;
1492 /* If we have just one exit and we can derive some information about
1493 the number of iterations of the loop from the statements inside
1494 the loop, use it to predict this exit. */
1495 else if (n_exits
== 1)
1497 nitercst
= estimated_stmt_executions_int (loop
);
1503 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
1508 /* If the prediction for number of iterations is zero, do not
1509 predict the exit edges. */
1513 probability
= ((REG_BR_PROB_BASE
+ nitercst
/ 2) / nitercst
);
1514 predict_edge (ex
, predictor
, probability
);
1518 /* Find information about loop bound variables. */
1519 for (nb_iter
= loop
->bounds
; nb_iter
;
1520 nb_iter
= nb_iter
->next
)
1522 && gimple_code (nb_iter
->stmt
) == GIMPLE_COND
)
1524 stmt
= nb_iter
->stmt
;
1527 if (!stmt
&& last_stmt (loop
->header
)
1528 && gimple_code (last_stmt (loop
->header
)) == GIMPLE_COND
)
1529 stmt
= last_stmt (loop
->header
);
1531 is_comparison_with_loop_invariant_p (stmt
, loop
,
1537 bbs
= get_loop_body (loop
);
1539 for (j
= 0; j
< loop
->num_nodes
; j
++)
1541 int header_found
= 0;
1547 /* Bypass loop heuristics on continue statement. These
1548 statements construct loops via "non-loop" constructs
1549 in the source language and are better to be handled
1551 if (predicted_by_p (bb
, PRED_CONTINUE
))
1554 /* Loop branch heuristics - predict an edge back to a
1555 loop's head as taken. */
1556 if (bb
== loop
->latch
)
1558 e
= find_edge (loop
->latch
, loop
->header
);
1562 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
1566 /* Loop exit heuristics - predict an edge exiting the loop if the
1567 conditional has no loop header successors as not taken. */
1569 /* If we already used more reliable loop exit predictors, do not
1570 bother with PRED_LOOP_EXIT. */
1571 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1572 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS
))
1574 /* For loop with many exits we don't want to predict all exits
1575 with the pretty large probability, because if all exits are
1576 considered in row, the loop would be predicted to iterate
1577 almost never. The code to divide probability by number of
1578 exits is very rough. It should compute the number of exits
1579 taken in each patch through function (not the overall number
1580 of exits that might be a lot higher for loops with wide switch
1581 statements in them) and compute n-th square root.
1583 We limit the minimal probability by 2% to avoid
1584 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1585 as this was causing regression in perl benchmark containing such
1588 int probability
= ((REG_BR_PROB_BASE
1589 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
1591 if (probability
< HITRATE (2))
1592 probability
= HITRATE (2);
1593 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1594 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1595 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1596 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1599 predict_iv_comparison (loop
, bb
, loop_bound_var
, loop_iv_base
,
1601 tree_low_cst (loop_bound_step
, 0));
1604 /* Free basic blocks from get_loop_body. */
1609 /* Attempt to predict probabilities of BB outgoing edges using local
1612 bb_estimate_probability_locally (basic_block bb
)
1614 rtx last_insn
= BB_END (bb
);
1617 if (! can_predict_insn_p (last_insn
))
1619 cond
= get_condition (last_insn
, NULL
, false, false);
1623 /* Try "pointer heuristic."
1624 A comparison ptr == 0 is predicted as false.
1625 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1626 if (COMPARISON_P (cond
)
1627 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1628 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1630 if (GET_CODE (cond
) == EQ
)
1631 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1632 else if (GET_CODE (cond
) == NE
)
1633 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1637 /* Try "opcode heuristic."
1638 EQ tests are usually false and NE tests are usually true. Also,
1639 most quantities are positive, so we can make the appropriate guesses
1640 about signed comparisons against zero. */
1641 switch (GET_CODE (cond
))
1644 /* Unconditional branch. */
1645 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1646 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1651 /* Floating point comparisons appears to behave in a very
1652 unpredictable way because of special role of = tests in
1654 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1656 /* Comparisons with 0 are often used for booleans and there is
1657 nothing useful to predict about them. */
1658 else if (XEXP (cond
, 1) == const0_rtx
1659 || XEXP (cond
, 0) == const0_rtx
)
1662 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1667 /* Floating point comparisons appears to behave in a very
1668 unpredictable way because of special role of = tests in
1670 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1672 /* Comparisons with 0 are often used for booleans and there is
1673 nothing useful to predict about them. */
1674 else if (XEXP (cond
, 1) == const0_rtx
1675 || XEXP (cond
, 0) == const0_rtx
)
1678 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1682 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1686 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1691 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1692 || XEXP (cond
, 1) == constm1_rtx
)
1693 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1698 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1699 || XEXP (cond
, 1) == constm1_rtx
)
1700 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1708 /* Set edge->probability for each successor edge of BB. */
1710 guess_outgoing_edge_probabilities (basic_block bb
)
1712 bb_estimate_probability_locally (bb
);
1713 combine_predictions_for_insn (BB_END (bb
), bb
);
1716 static tree
expr_expected_value (tree
, bitmap
);
1718 /* Helper function for expr_expected_value. */
1721 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
,
1722 tree op1
, bitmap visited
)
1726 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1728 if (TREE_CONSTANT (op0
))
1731 if (code
!= SSA_NAME
)
1734 def
= SSA_NAME_DEF_STMT (op0
);
1736 /* If we were already here, break the infinite cycle. */
1737 if (!bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
)))
1740 if (gimple_code (def
) == GIMPLE_PHI
)
1742 /* All the arguments of the PHI node must have the same constant
1744 int i
, n
= gimple_phi_num_args (def
);
1745 tree val
= NULL
, new_val
;
1747 for (i
= 0; i
< n
; i
++)
1749 tree arg
= PHI_ARG_DEF (def
, i
);
1751 /* If this PHI has itself as an argument, we cannot
1752 determine the string length of this argument. However,
1753 if we can find an expected constant value for the other
1754 PHI args then we can still be sure that this is
1755 likely a constant. So be optimistic and just
1756 continue with the next argument. */
1757 if (arg
== PHI_RESULT (def
))
1760 new_val
= expr_expected_value (arg
, visited
);
1765 else if (!operand_equal_p (val
, new_val
, false))
1770 if (is_gimple_assign (def
))
1772 if (gimple_assign_lhs (def
) != op0
)
1775 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1776 gimple_assign_rhs1 (def
),
1777 gimple_assign_rhs_code (def
),
1778 gimple_assign_rhs2 (def
),
1782 if (is_gimple_call (def
))
1784 tree decl
= gimple_call_fndecl (def
);
1787 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
1788 switch (DECL_FUNCTION_CODE (decl
))
1790 case BUILT_IN_EXPECT
:
1793 if (gimple_call_num_args (def
) != 2)
1795 val
= gimple_call_arg (def
, 0);
1796 if (TREE_CONSTANT (val
))
1798 return gimple_call_arg (def
, 1);
1801 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
:
1802 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1
:
1803 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2
:
1804 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4
:
1805 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8
:
1806 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16
:
1807 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE
:
1808 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
:
1809 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
:
1810 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2
:
1811 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4
:
1812 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8
:
1813 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16
:
1814 /* Assume that any given atomic operation has low contention,
1815 and thus the compare-and-swap operation succeeds. */
1816 return boolean_true_node
;
1823 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1826 op0
= expr_expected_value (op0
, visited
);
1829 op1
= expr_expected_value (op1
, visited
);
1832 res
= fold_build2 (code
, type
, op0
, op1
);
1833 if (TREE_CONSTANT (res
))
1837 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1840 op0
= expr_expected_value (op0
, visited
);
1843 res
= fold_build1 (code
, type
, op0
);
1844 if (TREE_CONSTANT (res
))
1851 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1852 The function is used by builtin_expect branch predictor so the evidence
1853 must come from this construct and additional possible constant folding.
1855 We may want to implement more involved value guess (such as value range
1856 propagation based prediction), but such tricks shall go to new
1860 expr_expected_value (tree expr
, bitmap visited
)
1862 enum tree_code code
;
1865 if (TREE_CONSTANT (expr
))
1868 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1869 return expr_expected_value_1 (TREE_TYPE (expr
),
1870 op0
, code
, op1
, visited
);
1874 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1875 we no longer need. */
1877 strip_predict_hints (void)
1885 gimple_stmt_iterator bi
;
1886 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
1888 gimple stmt
= gsi_stmt (bi
);
1890 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1892 gsi_remove (&bi
, true);
1895 else if (gimple_code (stmt
) == GIMPLE_CALL
)
1897 tree fndecl
= gimple_call_fndecl (stmt
);
1900 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
1901 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
1902 && gimple_call_num_args (stmt
) == 2)
1904 var
= gimple_call_lhs (stmt
);
1908 = gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
1909 gsi_replace (&bi
, ass_stmt
, true);
1913 gsi_remove (&bi
, true);
1924 /* Predict using opcode of the last statement in basic block. */
1926 tree_predict_by_opcode (basic_block bb
)
1928 gimple stmt
= last_stmt (bb
);
1937 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1939 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1940 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1942 op0
= gimple_cond_lhs (stmt
);
1943 op1
= gimple_cond_rhs (stmt
);
1944 cmp
= gimple_cond_code (stmt
);
1945 type
= TREE_TYPE (op0
);
1946 visited
= BITMAP_ALLOC (NULL
);
1947 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
);
1948 BITMAP_FREE (visited
);
1951 if (integer_zerop (val
))
1952 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, NOT_TAKEN
);
1954 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, TAKEN
);
1957 /* Try "pointer heuristic."
1958 A comparison ptr == 0 is predicted as false.
1959 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1960 if (POINTER_TYPE_P (type
))
1963 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
1964 else if (cmp
== NE_EXPR
)
1965 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
1969 /* Try "opcode heuristic."
1970 EQ tests are usually false and NE tests are usually true. Also,
1971 most quantities are positive, so we can make the appropriate guesses
1972 about signed comparisons against zero. */
1977 /* Floating point comparisons appears to behave in a very
1978 unpredictable way because of special role of = tests in
1980 if (FLOAT_TYPE_P (type
))
1982 /* Comparisons with 0 are often used for booleans and there is
1983 nothing useful to predict about them. */
1984 else if (integer_zerop (op0
) || integer_zerop (op1
))
1987 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
1992 /* Floating point comparisons appears to behave in a very
1993 unpredictable way because of special role of = tests in
1995 if (FLOAT_TYPE_P (type
))
1997 /* Comparisons with 0 are often used for booleans and there is
1998 nothing useful to predict about them. */
1999 else if (integer_zerop (op0
)
2000 || integer_zerop (op1
))
2003 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
2007 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
2010 case UNORDERED_EXPR
:
2011 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
2016 if (integer_zerop (op1
)
2017 || integer_onep (op1
)
2018 || integer_all_onesp (op1
)
2021 || real_minus_onep (op1
))
2022 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
2027 if (integer_zerop (op1
)
2028 || integer_onep (op1
)
2029 || integer_all_onesp (op1
)
2032 || real_minus_onep (op1
))
2033 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
2041 /* Try to guess whether the value of return means error code. */
2043 static enum br_predictor
2044 return_prediction (tree val
, enum prediction
*prediction
)
2048 return PRED_NO_PREDICTION
;
2049 /* Different heuristics for pointers and scalars. */
2050 if (POINTER_TYPE_P (TREE_TYPE (val
)))
2052 /* NULL is usually not returned. */
2053 if (integer_zerop (val
))
2055 *prediction
= NOT_TAKEN
;
2056 return PRED_NULL_RETURN
;
2059 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
2061 /* Negative return values are often used to indicate
2063 if (TREE_CODE (val
) == INTEGER_CST
2064 && tree_int_cst_sgn (val
) < 0)
2066 *prediction
= NOT_TAKEN
;
2067 return PRED_NEGATIVE_RETURN
;
2069 /* Constant return values seems to be commonly taken.
2070 Zero/one often represent booleans so exclude them from the
2072 if (TREE_CONSTANT (val
)
2073 && (!integer_zerop (val
) && !integer_onep (val
)))
2075 *prediction
= TAKEN
;
2076 return PRED_CONST_RETURN
;
2079 return PRED_NO_PREDICTION
;
2082 /* Find the basic block with return expression and look up for possible
2083 return value trying to apply RETURN_PREDICTION heuristics. */
2085 apply_return_prediction (void)
2087 gimple return_stmt
= NULL
;
2091 int phi_num_args
, i
;
2092 enum br_predictor pred
;
2093 enum prediction direction
;
2096 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2098 return_stmt
= last_stmt (e
->src
);
2100 && gimple_code (return_stmt
) == GIMPLE_RETURN
)
2105 return_val
= gimple_return_retval (return_stmt
);
2108 if (TREE_CODE (return_val
) != SSA_NAME
2109 || !SSA_NAME_DEF_STMT (return_val
)
2110 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
2112 phi
= SSA_NAME_DEF_STMT (return_val
);
2113 phi_num_args
= gimple_phi_num_args (phi
);
2114 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
2116 /* Avoid the degenerate case where all return values form the function
2117 belongs to same category (ie they are all positive constants)
2118 so we can hardly say something about them. */
2119 for (i
= 1; i
< phi_num_args
; i
++)
2120 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
2122 if (i
!= phi_num_args
)
2123 for (i
= 0; i
< phi_num_args
; i
++)
2125 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
2126 if (pred
!= PRED_NO_PREDICTION
)
2127 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi
, i
), pred
,
2132 /* Look for basic block that contains unlikely to happen events
2133 (such as noreturn calls) and mark all paths leading to execution
2134 of this basic blocks as unlikely. */
2137 tree_bb_level_predictions (void)
2140 bool has_return_edges
= false;
2144 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2145 if (!(e
->flags
& (EDGE_ABNORMAL
| EDGE_FAKE
| EDGE_EH
)))
2147 has_return_edges
= true;
2151 apply_return_prediction ();
2155 gimple_stmt_iterator gsi
;
2157 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2159 gimple stmt
= gsi_stmt (gsi
);
2162 if (is_gimple_call (stmt
))
2164 if ((gimple_call_flags (stmt
) & ECF_NORETURN
)
2165 && has_return_edges
)
2166 predict_paths_leading_to (bb
, PRED_NORETURN
,
2168 decl
= gimple_call_fndecl (stmt
);
2170 && lookup_attribute ("cold",
2171 DECL_ATTRIBUTES (decl
)))
2172 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
2175 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
2177 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
2178 gimple_predict_outcome (stmt
));
2179 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2180 hints to callers. */
2186 #ifdef ENABLE_CHECKING
2188 /* Callback for pointer_map_traverse, asserts that the pointer map is
2192 assert_is_empty (const void *key ATTRIBUTE_UNUSED
, void **value
,
2193 void *data ATTRIBUTE_UNUSED
)
2195 gcc_assert (!*value
);
2200 /* Predict branch probabilities and estimate profile for basic block BB. */
2203 tree_estimate_probability_bb (basic_block bb
)
2209 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2211 /* Predict edges to user labels with attributes. */
2212 if (e
->dest
!= EXIT_BLOCK_PTR
)
2214 gimple_stmt_iterator gi
;
2215 for (gi
= gsi_start_bb (e
->dest
); !gsi_end_p (gi
); gsi_next (&gi
))
2217 gimple stmt
= gsi_stmt (gi
);
2220 if (gimple_code (stmt
) != GIMPLE_LABEL
)
2222 decl
= gimple_label_label (stmt
);
2223 if (DECL_ARTIFICIAL (decl
))
2226 /* Finally, we have a user-defined label. */
2227 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl
)))
2228 predict_edge_def (e
, PRED_COLD_LABEL
, NOT_TAKEN
);
2229 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl
)))
2230 predict_edge_def (e
, PRED_HOT_LABEL
, TAKEN
);
2234 /* Predict early returns to be probable, as we've already taken
2235 care for error returns and other cases are often used for
2236 fast paths through function.
2238 Since we've already removed the return statements, we are
2239 looking for CFG like:
2249 if (e
->dest
!= bb
->next_bb
2250 && e
->dest
!= EXIT_BLOCK_PTR
2251 && single_succ_p (e
->dest
)
2252 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR
2253 && (last
= last_stmt (e
->dest
)) != NULL
2254 && gimple_code (last
) == GIMPLE_RETURN
)
2259 if (single_succ_p (bb
))
2261 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
2262 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
2263 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
2264 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
2265 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2268 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
2269 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
2270 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
2271 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2274 /* Look for block we are guarding (ie we dominate it,
2275 but it doesn't postdominate us). */
2276 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
2277 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
2278 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
2280 gimple_stmt_iterator bi
;
2282 /* The call heuristic claims that a guarded function call
2283 is improbable. This is because such calls are often used
2284 to signal exceptional situations such as printing error
2286 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
2289 gimple stmt
= gsi_stmt (bi
);
2290 if (is_gimple_call (stmt
)
2291 /* Constant and pure calls are hardly used to signalize
2292 something exceptional. */
2293 && gimple_has_side_effects (stmt
))
2295 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
2301 tree_predict_by_opcode (bb
);
2304 /* Predict branch probabilities and estimate profile of the tree CFG.
2305 This function can be called from the loop optimizers to recompute
2306 the profile information. */
2309 tree_estimate_probability (void)
2313 add_noreturn_fake_exit_edges ();
2314 connect_infinite_loops_to_exit ();
2315 /* We use loop_niter_by_eval, which requires that the loops have
2317 create_preheaders (CP_SIMPLE_PREHEADERS
);
2318 calculate_dominance_info (CDI_POST_DOMINATORS
);
2320 bb_predictions
= pointer_map_create ();
2321 tree_bb_level_predictions ();
2322 record_loop_exits ();
2324 if (number_of_loops () > 1)
2328 tree_estimate_probability_bb (bb
);
2331 combine_predictions_for_bb (bb
);
2333 #ifdef ENABLE_CHECKING
2334 pointer_map_traverse (bb_predictions
, assert_is_empty
, NULL
);
2336 pointer_map_destroy (bb_predictions
);
2337 bb_predictions
= NULL
;
2339 estimate_bb_frequencies ();
2340 free_dominance_info (CDI_POST_DOMINATORS
);
2341 remove_fake_exit_edges ();
2344 /* Predict branch probabilities and estimate profile of the tree CFG.
2345 This is the driver function for PASS_PROFILE. */
2348 tree_estimate_probability_driver (void)
2352 loop_optimizer_init (LOOPS_NORMAL
);
2353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2354 flow_loops_dump (dump_file
, NULL
, 0);
2356 mark_irreducible_loops ();
2358 nb_loops
= number_of_loops ();
2362 tree_estimate_probability ();
2367 loop_optimizer_finalize ();
2368 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2369 gimple_dump_cfg (dump_file
, dump_flags
);
2370 if (profile_status
== PROFILE_ABSENT
)
2371 profile_status
= PROFILE_GUESSED
;
2375 /* Predict edges to successors of CUR whose sources are not postdominated by
2376 BB by PRED and recurse to all postdominators. */
2379 predict_paths_for_bb (basic_block cur
, basic_block bb
,
2380 enum br_predictor pred
,
2381 enum prediction taken
,
2388 /* We are looking for all edges forming edge cut induced by
2389 set of all blocks postdominated by BB. */
2390 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
2391 if (e
->src
->index
>= NUM_FIXED_BLOCKS
2392 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
2398 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2399 if (e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2401 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
2403 /* See if there is an edge from e->src that is not abnormal
2404 and does not lead to BB. */
2405 FOR_EACH_EDGE (e2
, ei2
, e
->src
->succs
)
2407 && !(e2
->flags
& (EDGE_EH
| EDGE_FAKE
))
2408 && !dominated_by_p (CDI_POST_DOMINATORS
, e2
->dest
, bb
))
2414 /* If there is non-abnormal path leaving e->src, predict edge
2415 using predictor. Otherwise we need to look for paths
2418 The second may lead to infinite loop in the case we are predicitng
2419 regions that are only reachable by abnormal edges. We simply
2420 prevent visiting given BB twice. */
2422 predict_edge_def (e
, pred
, taken
);
2423 else if (bitmap_set_bit (visited
, e
->src
->index
))
2424 predict_paths_for_bb (e
->src
, e
->src
, pred
, taken
, visited
);
2426 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
2428 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
2429 predict_paths_for_bb (son
, bb
, pred
, taken
, visited
);
2432 /* Sets branch probabilities according to PREDiction and
2436 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
2437 enum prediction taken
)
2439 bitmap visited
= BITMAP_ALLOC (NULL
);
2440 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2441 BITMAP_FREE (visited
);
2444 /* Like predict_paths_leading_to but take edge instead of basic block. */
2447 predict_paths_leading_to_edge (edge e
, enum br_predictor pred
,
2448 enum prediction taken
)
2450 bool has_nonloop_edge
= false;
2454 basic_block bb
= e
->src
;
2455 FOR_EACH_EDGE (e2
, ei
, bb
->succs
)
2456 if (e2
->dest
!= e
->src
&& e2
->dest
!= e
->dest
2457 && !(e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2458 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e2
->dest
))
2460 has_nonloop_edge
= true;
2463 if (!has_nonloop_edge
)
2465 bitmap visited
= BITMAP_ALLOC (NULL
);
2466 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2467 BITMAP_FREE (visited
);
2470 predict_edge_def (e
, pred
, taken
);
2473 /* This is used to carry information about basic blocks. It is
2474 attached to the AUX field of the standard CFG block. */
2476 typedef struct block_info_def
2478 /* Estimated frequency of execution of basic_block. */
2481 /* To keep queue of basic blocks to process. */
2484 /* Number of predecessors we need to visit first. */
2488 /* Similar information for edges. */
2489 typedef struct edge_info_def
2491 /* In case edge is a loopback edge, the probability edge will be reached
2492 in case header is. Estimated number of iterations of the loop can be
2493 then computed as 1 / (1 - back_edge_prob). */
2494 sreal back_edge_prob
;
2495 /* True if the edge is a loopback edge in the natural loop. */
2496 unsigned int back_edge
:1;
2499 #define BLOCK_INFO(B) ((block_info) (B)->aux)
2500 #define EDGE_INFO(E) ((edge_info) (E)->aux)
2502 /* Helper function for estimate_bb_frequencies.
2503 Propagate the frequencies in blocks marked in
2504 TOVISIT, starting in HEAD. */
2507 propagate_freq (basic_block head
, bitmap tovisit
)
2516 /* For each basic block we need to visit count number of his predecessors
2517 we need to visit first. */
2518 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
2523 bb
= BASIC_BLOCK (i
);
2525 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2527 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
2529 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
2531 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
2533 "Irreducible region hit, ignoring edge to %i->%i\n",
2534 e
->src
->index
, bb
->index
);
2536 BLOCK_INFO (bb
)->npredecessors
= count
;
2537 /* When function never returns, we will never process exit block. */
2538 if (!count
&& bb
== EXIT_BLOCK_PTR
)
2539 bb
->count
= bb
->frequency
= 0;
2542 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
2544 for (bb
= head
; bb
; bb
= nextbb
)
2547 sreal cyclic_probability
, frequency
;
2549 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
2550 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
2552 nextbb
= BLOCK_INFO (bb
)->next
;
2553 BLOCK_INFO (bb
)->next
= NULL
;
2555 /* Compute frequency of basic block. */
2558 #ifdef ENABLE_CHECKING
2559 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2560 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
2561 || (e
->flags
& EDGE_DFS_BACK
));
2564 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2565 if (EDGE_INFO (e
)->back_edge
)
2567 sreal_add (&cyclic_probability
, &cyclic_probability
,
2568 &EDGE_INFO (e
)->back_edge_prob
);
2570 else if (!(e
->flags
& EDGE_DFS_BACK
))
2574 /* frequency += (e->probability
2575 * BLOCK_INFO (e->src)->frequency /
2576 REG_BR_PROB_BASE); */
2578 sreal_init (&tmp
, e
->probability
, 0);
2579 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
2580 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
2581 sreal_add (&frequency
, &frequency
, &tmp
);
2584 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
2586 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
2587 sizeof (frequency
));
2591 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
2593 memcpy (&cyclic_probability
, &real_almost_one
,
2594 sizeof (real_almost_one
));
2597 /* BLOCK_INFO (bb)->frequency = frequency
2598 / (1 - cyclic_probability) */
2600 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
2601 sreal_div (&BLOCK_INFO (bb
)->frequency
,
2602 &frequency
, &cyclic_probability
);
2606 bitmap_clear_bit (tovisit
, bb
->index
);
2608 e
= find_edge (bb
, head
);
2613 /* EDGE_INFO (e)->back_edge_prob
2614 = ((e->probability * BLOCK_INFO (bb)->frequency)
2615 / REG_BR_PROB_BASE); */
2617 sreal_init (&tmp
, e
->probability
, 0);
2618 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
2619 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2620 &tmp
, &real_inv_br_prob_base
);
2623 /* Propagate to successor blocks. */
2624 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2625 if (!(e
->flags
& EDGE_DFS_BACK
)
2626 && BLOCK_INFO (e
->dest
)->npredecessors
)
2628 BLOCK_INFO (e
->dest
)->npredecessors
--;
2629 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
2634 BLOCK_INFO (last
)->next
= e
->dest
;
2642 /* Estimate probabilities of loopback edges in loops at same nest level. */
2645 estimate_loops_at_level (struct loop
*first_loop
)
2649 for (loop
= first_loop
; loop
; loop
= loop
->next
)
2654 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2656 estimate_loops_at_level (loop
->inner
);
2658 /* Find current loop back edge and mark it. */
2659 e
= loop_latch_edge (loop
);
2660 EDGE_INFO (e
)->back_edge
= 1;
2662 bbs
= get_loop_body (loop
);
2663 for (i
= 0; i
< loop
->num_nodes
; i
++)
2664 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
2666 propagate_freq (loop
->header
, tovisit
);
2667 BITMAP_FREE (tovisit
);
2671 /* Propagates frequencies through structure of loops. */
2674 estimate_loops (void)
2676 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2679 /* Start by estimating the frequencies in the loops. */
2680 if (number_of_loops () > 1)
2681 estimate_loops_at_level (current_loops
->tree_root
->inner
);
2683 /* Now propagate the frequencies through all the blocks. */
2686 bitmap_set_bit (tovisit
, bb
->index
);
2688 propagate_freq (ENTRY_BLOCK_PTR
, tovisit
);
2689 BITMAP_FREE (tovisit
);
2692 /* Convert counts measured by profile driven feedback to frequencies.
2693 Return nonzero iff there was any nonzero execution count. */
2696 counts_to_freqs (void)
2698 gcov_type count_max
, true_count_max
= 0;
2701 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2702 true_count_max
= MAX (bb
->count
, true_count_max
);
2704 count_max
= MAX (true_count_max
, 1);
2705 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2706 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
2708 return true_count_max
;
2711 /* Return true if function is likely to be expensive, so there is no point to
2712 optimize performance of prologue, epilogue or do inlining at the expense
2713 of code size growth. THRESHOLD is the limit of number of instructions
2714 function can execute at average to be still considered not expensive. */
2717 expensive_function_p (int threshold
)
2719 unsigned int sum
= 0;
2723 /* We can not compute accurately for large thresholds due to scaled
2725 gcc_assert (threshold
<= BB_FREQ_MAX
);
2727 /* Frequencies are out of range. This either means that function contains
2728 internal loop executing more than BB_FREQ_MAX times or profile feedback
2729 is available and function has not been executed at all. */
2730 if (ENTRY_BLOCK_PTR
->frequency
== 0)
2733 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2734 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
2739 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
2740 insn
= NEXT_INSN (insn
))
2741 if (active_insn_p (insn
))
2743 sum
+= bb
->frequency
;
2752 /* Estimate basic blocks frequency by given branch probabilities. */
2755 estimate_bb_frequencies (void)
2760 if (profile_status
!= PROFILE_READ
|| !counts_to_freqs ())
2762 static int real_values_initialized
= 0;
2764 if (!real_values_initialized
)
2766 real_values_initialized
= 1;
2767 sreal_init (&real_zero
, 0, 0);
2768 sreal_init (&real_one
, 1, 0);
2769 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
2770 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
2771 sreal_init (&real_one_half
, 1, -1);
2772 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
2773 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
2776 mark_dfs_back_edges ();
2778 single_succ_edge (ENTRY_BLOCK_PTR
)->probability
= REG_BR_PROB_BASE
;
2780 /* Set up block info for each basic block. */
2781 alloc_aux_for_blocks (sizeof (struct block_info_def
));
2782 alloc_aux_for_edges (sizeof (struct edge_info_def
));
2783 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2788 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2790 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
2791 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2792 &EDGE_INFO (e
)->back_edge_prob
,
2793 &real_inv_br_prob_base
);
2797 /* First compute probabilities locally for each loop from innermost
2798 to outermost to examine probabilities for back edges. */
2801 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
2803 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
2804 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
2806 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
2807 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2811 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
2812 sreal_add (&tmp
, &tmp
, &real_one_half
);
2813 bb
->frequency
= sreal_to_int (&tmp
);
2816 free_aux_for_blocks ();
2817 free_aux_for_edges ();
2819 compute_function_frequency ();
2822 /* Decide whether function is hot, cold or unlikely executed. */
2824 compute_function_frequency (void)
2827 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2828 if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2829 || MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2830 node
->only_called_at_startup
= true;
2831 if (DECL_STATIC_DESTRUCTOR (current_function_decl
))
2832 node
->only_called_at_exit
= true;
2834 if (!profile_info
|| !flag_branch_probabilities
)
2836 int flags
= flags_from_decl_or_type (current_function_decl
);
2837 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
2839 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2840 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
2842 node
->frequency
= NODE_FREQUENCY_HOT
;
2843 else if (flags
& ECF_NORETURN
)
2844 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2845 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2846 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2847 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2848 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
2849 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2852 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2855 if (maybe_hot_bb_p (cfun
, bb
))
2857 node
->frequency
= NODE_FREQUENCY_HOT
;
2860 if (!probably_never_executed_bb_p (cfun
, bb
))
2861 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2866 gate_estimate_probability (void)
2868 return flag_guess_branch_prob
;
2871 /* Build PREDICT_EXPR. */
2873 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
2875 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
2876 build_int_cst (integer_type_node
, predictor
));
2877 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
2882 predictor_name (enum br_predictor predictor
)
2884 return predictor_info
[predictor
].name
;
2887 struct gimple_opt_pass pass_profile
=
2891 "profile_estimate", /* name */
2892 OPTGROUP_NONE
, /* optinfo_flags */
2893 gate_estimate_probability
, /* gate */
2894 tree_estimate_probability_driver
, /* execute */
2897 0, /* static_pass_number */
2898 TV_BRANCH_PROB
, /* tv_id */
2899 PROP_cfg
, /* properties_required */
2900 0, /* properties_provided */
2901 0, /* properties_destroyed */
2902 0, /* todo_flags_start */
2903 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2907 struct gimple_opt_pass pass_strip_predict_hints
=
2911 "*strip_predict_hints", /* name */
2912 OPTGROUP_NONE
, /* optinfo_flags */
2914 strip_predict_hints
, /* execute */
2917 0, /* static_pass_number */
2918 TV_BRANCH_PROB
, /* tv_id */
2919 PROP_cfg
, /* properties_required */
2920 0, /* properties_provided */
2921 0, /* properties_destroyed */
2922 0, /* todo_flags_start */
2923 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2927 /* Rebuild function frequencies. Passes are in general expected to
2928 maintain profile by hand, however in some cases this is not possible:
2929 for example when inlining several functions with loops freuqencies might run
2930 out of scale and thus needs to be recomputed. */
2933 rebuild_frequencies (void)
2935 timevar_push (TV_REBUILD_FREQUENCIES
);
2936 if (profile_status
== PROFILE_GUESSED
)
2938 loop_optimizer_init (0);
2939 add_noreturn_fake_exit_edges ();
2940 mark_irreducible_loops ();
2941 connect_infinite_loops_to_exit ();
2942 estimate_bb_frequencies ();
2943 remove_fake_exit_edges ();
2944 loop_optimizer_finalize ();
2946 else if (profile_status
== PROFILE_READ
)
2950 timevar_pop (TV_REBUILD_FREQUENCIES
);