1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
32 #include "coretypes.h"
37 #include "hard-reg-set.h"
38 #include "basic-block.h"
39 #include "insn-config.h"
44 #include "diagnostic-core.h"
53 #include "tree-flow.h"
55 #include "tree-pass.h"
56 #include "tree-scalar-evolution.h"
58 #include "pointer-set.h"
60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
61 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
62 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
63 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
65 /* Random guesstimation given names.
66 PROV_VERY_UNLIKELY should be small enough so basic block predicted
67 by it gets below HOT_BB_FREQUENCY_FRACTION. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73 static void combine_predictions_for_insn (rtx
, basic_block
);
74 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
75 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
76 static void predict_paths_leading_to_edge (edge
, enum br_predictor
, enum prediction
);
77 static bool can_predict_insn_p (const_rtx
);
79 /* Information we hold about each branch predictor.
80 Filled using information from predict.def. */
84 const char *const name
; /* Name used in the debugging dumps. */
85 const int hitrate
; /* Expected hitrate used by
86 predict_insn_def call. */
90 /* Use given predictor without Dempster-Shaffer theory if it matches
91 using first_match heuristics. */
92 #define PRED_FLAG_FIRST_MATCH 1
94 /* Recompute hitrate in percent to our representation. */
96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
99 static const struct predictor_info predictor_info
[]= {
100 #include "predict.def"
102 /* Upper bound on predictors. */
107 /* Return TRUE if frequency FREQ is considered to be hot. */
110 maybe_hot_frequency_p (struct function
*fun
, int freq
)
112 struct cgraph_node
*node
= cgraph_get_node (fun
->decl
);
113 if (!profile_info
|| !flag_branch_probabilities
)
115 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
117 if (node
->frequency
== NODE_FREQUENCY_HOT
)
120 if (profile_status_for_function (fun
) == PROFILE_ABSENT
)
122 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
123 && freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
* 2 / 3))
125 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
) == 0)
127 if (freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
128 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
133 static gcov_type min_count
= -1;
135 /* Determine the threshold for hot BB counts. */
138 get_hot_bb_threshold ()
140 gcov_working_set_t
*ws
;
143 ws
= find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
));
145 min_count
= ws
->min_counter
;
150 /* Set the threshold for hot BB counts. */
153 set_hot_bb_threshold (gcov_type min
)
158 /* Return TRUE if frequency FREQ is considered to be hot. */
161 maybe_hot_count_p (struct function
*fun
, gcov_type count
)
163 if (fun
&& profile_status_for_function (fun
) != PROFILE_READ
)
165 /* Code executed at most once is not hot. */
166 if (profile_info
->runs
>= count
)
168 return (count
>= get_hot_bb_threshold ());
171 /* Return true in case BB can be CPU intensive and should be optimized
172 for maximal performance. */
175 maybe_hot_bb_p (struct function
*fun
, const_basic_block bb
)
177 gcc_checking_assert (fun
);
178 if (profile_status_for_function (fun
) == PROFILE_READ
)
179 return maybe_hot_count_p (fun
, bb
->count
);
180 return maybe_hot_frequency_p (fun
, bb
->frequency
);
183 /* Return true if the call can be hot. */
186 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
188 if (profile_info
&& flag_branch_probabilities
189 && !maybe_hot_count_p (NULL
,
192 if (edge
->caller
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
194 && edge
->callee
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
196 if (edge
->caller
->frequency
> NODE_FREQUENCY_UNLIKELY_EXECUTED
198 && edge
->callee
->frequency
<= NODE_FREQUENCY_EXECUTED_ONCE
))
202 if (edge
->caller
->frequency
== NODE_FREQUENCY_HOT
)
204 if (edge
->caller
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
205 && edge
->frequency
< CGRAPH_FREQ_BASE
* 3 / 2)
207 if (flag_guess_branch_prob
)
209 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
) == 0
210 || edge
->frequency
<= (CGRAPH_FREQ_BASE
211 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
217 /* Return true in case BB can be CPU intensive and should be optimized
218 for maximal performance. */
221 maybe_hot_edge_p (edge e
)
223 if (profile_status
== PROFILE_READ
)
224 return maybe_hot_count_p (cfun
, e
->count
);
225 return maybe_hot_frequency_p (cfun
, EDGE_FREQUENCY (e
));
229 /* Return true in case BB is probably never executed. */
232 probably_never_executed_bb_p (struct function
*fun
, const_basic_block bb
)
234 gcc_checking_assert (fun
);
235 if (profile_info
&& flag_branch_probabilities
)
236 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
237 if ((!profile_info
|| !flag_branch_probabilities
)
238 && (cgraph_get_node (fun
->decl
)->frequency
239 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
245 /* Return true in case edge E is probably never executed. */
248 probably_never_executed_edge_p (struct function
*fun
, edge e
)
250 gcc_checking_assert (fun
);
251 if (profile_info
&& flag_branch_probabilities
)
252 return ((e
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
253 if ((!profile_info
|| !flag_branch_probabilities
)
254 && (cgraph_get_node (fun
->decl
)->frequency
255 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
260 /* Return true if NODE should be optimized for size. */
263 cgraph_optimize_for_size_p (struct cgraph_node
*node
)
267 if (node
&& (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
273 /* Return true when current function should always be optimized for size. */
276 optimize_function_for_size_p (struct function
*fun
)
280 if (!fun
|| !fun
->decl
)
282 return cgraph_optimize_for_size_p (cgraph_get_node (fun
->decl
));
285 /* Return true when current function should always be optimized for speed. */
288 optimize_function_for_speed_p (struct function
*fun
)
290 return !optimize_function_for_size_p (fun
);
293 /* Return TRUE when BB should be optimized for size. */
296 optimize_bb_for_size_p (const_basic_block bb
)
298 return optimize_function_for_size_p (cfun
) || !maybe_hot_bb_p (cfun
, bb
);
301 /* Return TRUE when BB should be optimized for speed. */
304 optimize_bb_for_speed_p (const_basic_block bb
)
306 return !optimize_bb_for_size_p (bb
);
309 /* Return TRUE when BB should be optimized for size. */
312 optimize_edge_for_size_p (edge e
)
314 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
317 /* Return TRUE when BB should be optimized for speed. */
320 optimize_edge_for_speed_p (edge e
)
322 return !optimize_edge_for_size_p (e
);
325 /* Return TRUE when BB should be optimized for size. */
328 optimize_insn_for_size_p (void)
330 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
333 /* Return TRUE when BB should be optimized for speed. */
336 optimize_insn_for_speed_p (void)
338 return !optimize_insn_for_size_p ();
341 /* Return TRUE when LOOP should be optimized for size. */
344 optimize_loop_for_size_p (struct loop
*loop
)
346 return optimize_bb_for_size_p (loop
->header
);
349 /* Return TRUE when LOOP should be optimized for speed. */
352 optimize_loop_for_speed_p (struct loop
*loop
)
354 return optimize_bb_for_speed_p (loop
->header
);
357 /* Return TRUE when LOOP nest should be optimized for speed. */
360 optimize_loop_nest_for_speed_p (struct loop
*loop
)
362 struct loop
*l
= loop
;
363 if (optimize_loop_for_speed_p (loop
))
366 while (l
&& l
!= loop
)
368 if (optimize_loop_for_speed_p (l
))
376 while (l
!= loop
&& !l
->next
)
385 /* Return TRUE when LOOP nest should be optimized for size. */
388 optimize_loop_nest_for_size_p (struct loop
*loop
)
390 return !optimize_loop_nest_for_speed_p (loop
);
393 /* Return true when edge E is likely to be well predictable by branch
397 predictable_edge_p (edge e
)
399 if (profile_status
== PROFILE_ABSENT
)
402 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
403 || (REG_BR_PROB_BASE
- e
->probability
404 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
410 /* Set RTL expansion for BB profile. */
413 rtl_profile_for_bb (basic_block bb
)
415 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (cfun
, bb
);
418 /* Set RTL expansion for edge profile. */
421 rtl_profile_for_edge (edge e
)
423 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
426 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
428 default_rtl_profile (void)
430 crtl
->maybe_hot_insn_p
= true;
433 /* Return true if the one of outgoing edges is already predicted by
437 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
440 if (!INSN_P (BB_END (bb
)))
442 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
443 if (REG_NOTE_KIND (note
) == REG_BR_PRED
444 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
449 /* This map contains for a basic block the list of predictions for the
452 static struct pointer_map_t
*bb_predictions
;
454 /* Structure representing predictions in tree level. */
456 struct edge_prediction
{
457 struct edge_prediction
*ep_next
;
459 enum br_predictor ep_predictor
;
463 /* Return true if the one of outgoing edges is already predicted by
467 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
469 struct edge_prediction
*i
;
470 void **preds
= pointer_map_contains (bb_predictions
, bb
);
475 for (i
= (struct edge_prediction
*) *preds
; i
; i
= i
->ep_next
)
476 if (i
->ep_predictor
== predictor
)
481 /* Return true when the probability of edge is reliable.
483 The profile guessing code is good at predicting branch outcome (ie.
484 taken/not taken), that is predicted right slightly over 75% of time.
485 It is however notoriously poor on predicting the probability itself.
486 In general the profile appear a lot flatter (with probabilities closer
487 to 50%) than the reality so it is bad idea to use it to drive optimization
488 such as those disabling dynamic branch prediction for well predictable
491 There are two exceptions - edges leading to noreturn edges and edges
492 predicted by number of iterations heuristics are predicted well. This macro
493 should be able to distinguish those, but at the moment it simply check for
494 noreturn heuristic that is only one giving probability over 99% or bellow
495 1%. In future we might want to propagate reliability information across the
496 CFG if we find this information useful on multiple places. */
498 probability_reliable_p (int prob
)
500 return (profile_status
== PROFILE_READ
501 || (profile_status
== PROFILE_GUESSED
502 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
505 /* Same predicate as above, working on edges. */
507 edge_probability_reliable_p (const_edge e
)
509 return probability_reliable_p (e
->probability
);
512 /* Same predicate as edge_probability_reliable_p, working on notes. */
514 br_prob_note_reliable_p (const_rtx note
)
516 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
517 return probability_reliable_p (INTVAL (XEXP (note
, 0)));
521 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
523 gcc_assert (any_condjump_p (insn
));
524 if (!flag_guess_branch_prob
)
527 add_reg_note (insn
, REG_BR_PRED
,
528 gen_rtx_CONCAT (VOIDmode
,
529 GEN_INT ((int) predictor
),
530 GEN_INT ((int) probability
)));
533 /* Predict insn by given predictor. */
536 predict_insn_def (rtx insn
, enum br_predictor predictor
,
537 enum prediction taken
)
539 int probability
= predictor_info
[(int) predictor
].hitrate
;
542 probability
= REG_BR_PROB_BASE
- probability
;
544 predict_insn (insn
, predictor
, probability
);
547 /* Predict edge E with given probability if possible. */
550 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
553 last_insn
= BB_END (e
->src
);
555 /* We can store the branch prediction information only about
556 conditional jumps. */
557 if (!any_condjump_p (last_insn
))
560 /* We always store probability of branching. */
561 if (e
->flags
& EDGE_FALLTHRU
)
562 probability
= REG_BR_PROB_BASE
- probability
;
564 predict_insn (last_insn
, predictor
, probability
);
567 /* Predict edge E with the given PROBABILITY. */
569 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
571 gcc_assert (profile_status
!= PROFILE_GUESSED
);
572 if ((e
->src
!= ENTRY_BLOCK_PTR
&& EDGE_COUNT (e
->src
->succs
) > 1)
573 && flag_guess_branch_prob
&& optimize
)
575 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
576 void **preds
= pointer_map_insert (bb_predictions
, e
->src
);
578 i
->ep_next
= (struct edge_prediction
*) *preds
;
580 i
->ep_probability
= probability
;
581 i
->ep_predictor
= predictor
;
586 /* Remove all predictions on given basic block that are attached
589 remove_predictions_associated_with_edge (edge e
)
596 preds
= pointer_map_contains (bb_predictions
, e
->src
);
600 struct edge_prediction
**prediction
= (struct edge_prediction
**) preds
;
601 struct edge_prediction
*next
;
605 if ((*prediction
)->ep_edge
== e
)
607 next
= (*prediction
)->ep_next
;
612 prediction
= &((*prediction
)->ep_next
);
617 /* Clears the list of predictions stored for BB. */
620 clear_bb_predictions (basic_block bb
)
622 void **preds
= pointer_map_contains (bb_predictions
, bb
);
623 struct edge_prediction
*pred
, *next
;
628 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= next
)
630 next
= pred
->ep_next
;
636 /* Return true when we can store prediction on insn INSN.
637 At the moment we represent predictions only on conditional
638 jumps, not at computed jump or other complicated cases. */
640 can_predict_insn_p (const_rtx insn
)
642 return (JUMP_P (insn
)
643 && any_condjump_p (insn
)
644 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
647 /* Predict edge E by given predictor if possible. */
650 predict_edge_def (edge e
, enum br_predictor predictor
,
651 enum prediction taken
)
653 int probability
= predictor_info
[(int) predictor
].hitrate
;
656 probability
= REG_BR_PROB_BASE
- probability
;
658 predict_edge (e
, predictor
, probability
);
661 /* Invert all branch predictions or probability notes in the INSN. This needs
662 to be done each time we invert the condition used by the jump. */
665 invert_br_probabilities (rtx insn
)
669 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
670 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
671 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
672 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
673 XEXP (XEXP (note
, 0), 1)
674 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
677 /* Dump information about the branch prediction to the output file. */
680 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
681 basic_block bb
, int used
)
689 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
690 if (! (e
->flags
& EDGE_FALLTHRU
))
693 fprintf (file
, " %s heuristics%s: %.1f%%",
694 predictor_info
[predictor
].name
,
695 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
699 fprintf (file
, " exec ");
700 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
703 fprintf (file
, " hit ");
704 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
705 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
709 fprintf (file
, "\n");
712 /* We can not predict the probabilities of outgoing edges of bb. Set them
713 evenly and hope for the best. */
715 set_even_probabilities (basic_block bb
)
721 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
722 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
724 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
725 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
726 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
731 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
732 note if not already present. Remove now useless REG_BR_PRED notes. */
735 combine_predictions_for_insn (rtx insn
, basic_block bb
)
740 int best_probability
= PROB_EVEN
;
741 enum br_predictor best_predictor
= END_PREDICTORS
;
742 int combined_probability
= REG_BR_PROB_BASE
/ 2;
744 bool first_match
= false;
747 if (!can_predict_insn_p (insn
))
749 set_even_probabilities (bb
);
753 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
754 pnote
= ®_NOTES (insn
);
756 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
759 /* We implement "first match" heuristics and use probability guessed
760 by predictor with smallest index. */
761 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
762 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
764 enum br_predictor predictor
= ((enum br_predictor
)
765 INTVAL (XEXP (XEXP (note
, 0), 0)));
766 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
769 if (best_predictor
> predictor
)
770 best_probability
= probability
, best_predictor
= predictor
;
772 d
= (combined_probability
* probability
773 + (REG_BR_PROB_BASE
- combined_probability
)
774 * (REG_BR_PROB_BASE
- probability
));
776 /* Use FP math to avoid overflows of 32bit integers. */
778 /* If one probability is 0% and one 100%, avoid division by zero. */
779 combined_probability
= REG_BR_PROB_BASE
/ 2;
781 combined_probability
= (((double) combined_probability
) * probability
782 * REG_BR_PROB_BASE
/ d
+ 0.5);
785 /* Decide which heuristic to use. In case we didn't match anything,
786 use no_prediction heuristic, in case we did match, use either
787 first match or Dempster-Shaffer theory depending on the flags. */
789 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
793 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
794 combined_probability
, bb
, true);
797 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
799 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
804 combined_probability
= best_probability
;
805 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
809 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
811 enum br_predictor predictor
= ((enum br_predictor
)
812 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
813 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
815 dump_prediction (dump_file
, predictor
, probability
, bb
,
816 !first_match
|| best_predictor
== predictor
);
817 *pnote
= XEXP (*pnote
, 1);
820 pnote
= &XEXP (*pnote
, 1);
825 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (combined_probability
));
827 /* Save the prediction into CFG in case we are seeing non-degenerated
829 if (!single_succ_p (bb
))
831 BRANCH_EDGE (bb
)->probability
= combined_probability
;
832 FALLTHRU_EDGE (bb
)->probability
833 = REG_BR_PROB_BASE
- combined_probability
;
836 else if (!single_succ_p (bb
))
838 int prob
= INTVAL (XEXP (prob_note
, 0));
840 BRANCH_EDGE (bb
)->probability
= prob
;
841 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
844 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
847 /* Combine predictions into single probability and store them into CFG.
848 Remove now useless prediction entries. */
851 combine_predictions_for_bb (basic_block bb
)
853 int best_probability
= PROB_EVEN
;
854 enum br_predictor best_predictor
= END_PREDICTORS
;
855 int combined_probability
= REG_BR_PROB_BASE
/ 2;
857 bool first_match
= false;
859 struct edge_prediction
*pred
;
861 edge e
, first
= NULL
, second
= NULL
;
865 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
866 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
869 if (first
&& !second
)
875 /* When there is no successor or only one choice, prediction is easy.
877 We are lazy for now and predict only basic blocks with two outgoing
878 edges. It is possible to predict generic case too, but we have to
879 ignore first match heuristics and do more involved combining. Implement
884 set_even_probabilities (bb
);
885 clear_bb_predictions (bb
);
887 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
893 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
895 preds
= pointer_map_contains (bb_predictions
, bb
);
898 /* We implement "first match" heuristics and use probability guessed
899 by predictor with smallest index. */
900 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
902 enum br_predictor predictor
= pred
->ep_predictor
;
903 int probability
= pred
->ep_probability
;
905 if (pred
->ep_edge
!= first
)
906 probability
= REG_BR_PROB_BASE
- probability
;
909 /* First match heuristics would be widly confused if we predicted
911 if (best_predictor
> predictor
)
913 struct edge_prediction
*pred2
;
914 int prob
= probability
;
916 for (pred2
= (struct edge_prediction
*) *preds
; pred2
; pred2
= pred2
->ep_next
)
917 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
919 int probability2
= pred
->ep_probability
;
921 if (pred2
->ep_edge
!= first
)
922 probability2
= REG_BR_PROB_BASE
- probability2
;
924 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
925 (probability2
< REG_BR_PROB_BASE
/ 2))
928 /* If the same predictor later gave better result, go for it! */
929 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
930 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
934 best_probability
= prob
, best_predictor
= predictor
;
937 d
= (combined_probability
* probability
938 + (REG_BR_PROB_BASE
- combined_probability
)
939 * (REG_BR_PROB_BASE
- probability
));
941 /* Use FP math to avoid overflows of 32bit integers. */
943 /* If one probability is 0% and one 100%, avoid division by zero. */
944 combined_probability
= REG_BR_PROB_BASE
/ 2;
946 combined_probability
= (((double) combined_probability
)
948 * REG_BR_PROB_BASE
/ d
+ 0.5);
952 /* Decide which heuristic to use. In case we didn't match anything,
953 use no_prediction heuristic, in case we did match, use either
954 first match or Dempster-Shaffer theory depending on the flags. */
956 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
960 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
963 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
965 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
970 combined_probability
= best_probability
;
971 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
975 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
977 enum br_predictor predictor
= pred
->ep_predictor
;
978 int probability
= pred
->ep_probability
;
980 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
981 probability
= REG_BR_PROB_BASE
- probability
;
982 dump_prediction (dump_file
, predictor
, probability
, bb
,
983 !first_match
|| best_predictor
== predictor
);
986 clear_bb_predictions (bb
);
990 first
->probability
= combined_probability
;
991 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
995 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
996 Return the SSA_NAME if the condition satisfies, NULL otherwise.
998 T1 and T2 should be one of the following cases:
999 1. T1 is SSA_NAME, T2 is NULL
1000 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1001 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1004 strips_small_constant (tree t1
, tree t2
)
1011 else if (TREE_CODE (t1
) == SSA_NAME
)
1013 else if (host_integerp (t1
, 0))
1014 value
= tree_low_cst (t1
, 0);
1020 else if (host_integerp (t2
, 0))
1021 value
= tree_low_cst (t2
, 0);
1022 else if (TREE_CODE (t2
) == SSA_NAME
)
1030 if (value
<= 4 && value
>= -4)
1036 /* Return the SSA_NAME in T or T's operands.
1037 Return NULL if SSA_NAME cannot be found. */
1040 get_base_value (tree t
)
1042 if (TREE_CODE (t
) == SSA_NAME
)
1045 if (!BINARY_CLASS_P (t
))
1048 switch (TREE_OPERAND_LENGTH (t
))
1051 return strips_small_constant (TREE_OPERAND (t
, 0), NULL
);
1053 return strips_small_constant (TREE_OPERAND (t
, 0),
1054 TREE_OPERAND (t
, 1));
1060 /* Check the compare STMT in LOOP. If it compares an induction
1061 variable to a loop invariant, return true, and save
1062 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1063 Otherwise return false and set LOOP_INVAIANT to NULL. */
1066 is_comparison_with_loop_invariant_p (gimple stmt
, struct loop
*loop
,
1067 tree
*loop_invariant
,
1068 enum tree_code
*compare_code
,
1072 tree op0
, op1
, bound
, base
;
1074 enum tree_code code
;
1077 code
= gimple_cond_code (stmt
);
1078 *loop_invariant
= NULL
;
1094 op0
= gimple_cond_lhs (stmt
);
1095 op1
= gimple_cond_rhs (stmt
);
1097 if ((TREE_CODE (op0
) != SSA_NAME
&& TREE_CODE (op0
) != INTEGER_CST
)
1098 || (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op1
) != INTEGER_CST
))
1100 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op0
, &iv0
, true))
1102 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op1
, &iv1
, true))
1104 if (TREE_CODE (iv0
.step
) != INTEGER_CST
1105 || TREE_CODE (iv1
.step
) != INTEGER_CST
)
1107 if ((integer_zerop (iv0
.step
) && integer_zerop (iv1
.step
))
1108 || (!integer_zerop (iv0
.step
) && !integer_zerop (iv1
.step
)))
1111 if (integer_zerop (iv0
.step
))
1113 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
1114 code
= invert_tree_comparison (code
, false);
1117 if (host_integerp (iv1
.step
, 0))
1126 if (host_integerp (iv0
.step
, 0))
1132 if (TREE_CODE (bound
) != INTEGER_CST
)
1133 bound
= get_base_value (bound
);
1136 if (TREE_CODE (base
) != INTEGER_CST
)
1137 base
= get_base_value (base
);
1141 *loop_invariant
= bound
;
1142 *compare_code
= code
;
1144 *loop_iv_base
= base
;
1148 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1151 expr_coherent_p (tree t1
, tree t2
)
1154 tree ssa_name_1
= NULL
;
1155 tree ssa_name_2
= NULL
;
1157 gcc_assert (TREE_CODE (t1
) == SSA_NAME
|| TREE_CODE (t1
) == INTEGER_CST
);
1158 gcc_assert (TREE_CODE (t2
) == SSA_NAME
|| TREE_CODE (t2
) == INTEGER_CST
);
1163 if (TREE_CODE (t1
) == INTEGER_CST
&& TREE_CODE (t2
) == INTEGER_CST
)
1165 if (TREE_CODE (t1
) == INTEGER_CST
|| TREE_CODE (t2
) == INTEGER_CST
)
1168 /* Check to see if t1 is expressed/defined with t2. */
1169 stmt
= SSA_NAME_DEF_STMT (t1
);
1170 gcc_assert (stmt
!= NULL
);
1171 if (is_gimple_assign (stmt
))
1173 ssa_name_1
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1174 if (ssa_name_1
&& ssa_name_1
== t2
)
1178 /* Check to see if t2 is expressed/defined with t1. */
1179 stmt
= SSA_NAME_DEF_STMT (t2
);
1180 gcc_assert (stmt
!= NULL
);
1181 if (is_gimple_assign (stmt
))
1183 ssa_name_2
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1184 if (ssa_name_2
&& ssa_name_2
== t1
)
1188 /* Compare if t1 and t2's def_stmts are identical. */
1189 if (ssa_name_2
!= NULL
&& ssa_name_1
== ssa_name_2
)
1195 /* Predict branch probability of BB when BB contains a branch that compares
1196 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1197 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1200 for (int i = 0; i < bound; i++) {
1207 In this loop, we will predict the branch inside the loop to be taken. */
1210 predict_iv_comparison (struct loop
*loop
, basic_block bb
,
1211 tree loop_bound_var
,
1212 tree loop_iv_base_var
,
1213 enum tree_code loop_bound_code
,
1214 int loop_bound_step
)
1217 tree compare_var
, compare_base
;
1218 enum tree_code compare_code
;
1219 tree compare_step_var
;
1223 if (predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1224 || predicted_by_p (bb
, PRED_LOOP_ITERATIONS
)
1225 || predicted_by_p (bb
, PRED_LOOP_EXIT
))
1228 stmt
= last_stmt (bb
);
1229 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1231 if (!is_comparison_with_loop_invariant_p (stmt
, loop
, &compare_var
,
1237 /* Find the taken edge. */
1238 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1239 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1242 /* When comparing an IV to a loop invariant, NE is more likely to be
1243 taken while EQ is more likely to be not-taken. */
1244 if (compare_code
== NE_EXPR
)
1246 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1249 else if (compare_code
== EQ_EXPR
)
1251 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1255 if (!expr_coherent_p (loop_iv_base_var
, compare_base
))
1258 /* If loop bound, base and compare bound are all constants, we can
1259 calculate the probability directly. */
1260 if (host_integerp (loop_bound_var
, 0)
1261 && host_integerp (compare_var
, 0)
1262 && host_integerp (compare_base
, 0))
1265 bool of
, overflow
= false;
1266 double_int mod
, compare_count
, tem
, loop_count
;
1268 double_int loop_bound
= tree_to_double_int (loop_bound_var
);
1269 double_int compare_bound
= tree_to_double_int (compare_var
);
1270 double_int base
= tree_to_double_int (compare_base
);
1271 double_int compare_step
= tree_to_double_int (compare_step_var
);
1273 /* (loop_bound - base) / compare_step */
1274 tem
= loop_bound
.sub_with_overflow (base
, &of
);
1276 loop_count
= tem
.divmod_with_overflow (compare_step
,
1281 if ((!compare_step
.is_negative ())
1282 ^ (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1284 /* (loop_bound - compare_bound) / compare_step */
1285 tem
= loop_bound
.sub_with_overflow (compare_bound
, &of
);
1287 compare_count
= tem
.divmod_with_overflow (compare_step
,
1294 /* (compare_bound - base) / compare_step */
1295 tem
= compare_bound
.sub_with_overflow (base
, &of
);
1297 compare_count
= tem
.divmod_with_overflow (compare_step
,
1302 if (compare_code
== LE_EXPR
|| compare_code
== GE_EXPR
)
1304 if (loop_bound_code
== LE_EXPR
|| loop_bound_code
== GE_EXPR
)
1306 if (compare_count
.is_negative ())
1307 compare_count
= double_int_zero
;
1308 if (loop_count
.is_negative ())
1309 loop_count
= double_int_zero
;
1310 if (loop_count
.is_zero ())
1312 else if (compare_count
.scmp (loop_count
) == 1)
1313 probability
= REG_BR_PROB_BASE
;
1316 /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1317 could overflow, shift both loop_count and compare_count right
1318 a bit so that it doesn't overflow. Note both counts are known not
1319 to be negative at this point. */
1320 int clz_bits
= clz_hwi (loop_count
.high
);
1321 gcc_assert (REG_BR_PROB_BASE
< 32768);
1324 loop_count
.arshift (16 - clz_bits
, HOST_BITS_PER_DOUBLE_INT
);
1325 compare_count
.arshift (16 - clz_bits
, HOST_BITS_PER_DOUBLE_INT
);
1327 tem
= compare_count
.mul_with_sign (double_int::from_shwi
1328 (REG_BR_PROB_BASE
), true, &of
);
1330 tem
= tem
.divmod (loop_count
, true, TRUNC_DIV_EXPR
, &mod
);
1331 probability
= tem
.to_uhwi ();
1335 predict_edge (then_edge
, PRED_LOOP_IV_COMPARE
, probability
);
1340 if (expr_coherent_p (loop_bound_var
, compare_var
))
1342 if ((loop_bound_code
== LT_EXPR
|| loop_bound_code
== LE_EXPR
)
1343 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1344 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1345 else if ((loop_bound_code
== GT_EXPR
|| loop_bound_code
== GE_EXPR
)
1346 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1347 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1348 else if (loop_bound_code
== NE_EXPR
)
1350 /* If the loop backedge condition is "(i != bound)", we do
1351 the comparison based on the step of IV:
1352 * step < 0 : backedge condition is like (i > bound)
1353 * step > 0 : backedge condition is like (i < bound) */
1354 gcc_assert (loop_bound_step
!= 0);
1355 if (loop_bound_step
> 0
1356 && (compare_code
== LT_EXPR
1357 || compare_code
== LE_EXPR
))
1358 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1359 else if (loop_bound_step
< 0
1360 && (compare_code
== GT_EXPR
1361 || compare_code
== GE_EXPR
))
1362 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1364 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1367 /* The branch is predicted not-taken if loop_bound_code is
1368 opposite with compare_code. */
1369 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1371 else if (expr_coherent_p (loop_iv_base_var
, compare_var
))
1374 for (i = s; i < h; i++)
1376 The branch should be predicted taken. */
1377 if (loop_bound_step
> 0
1378 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1379 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1380 else if (loop_bound_step
< 0
1381 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1382 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1384 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1388 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1389 exits are resulted from short-circuit conditions that will generate an
1392 if (foo() || global > 10)
1395 This will be translated into:
1400 if foo() goto BB6 else goto BB5
1402 if global > 10 goto BB6 else goto BB7
1406 iftmp = (PHI 0(BB5), 1(BB6))
1407 if iftmp == 1 goto BB8 else goto BB3
1409 outside of the loop...
1411 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1412 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1413 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1414 exits to predict them using PRED_LOOP_EXIT. */
1417 predict_extra_loop_exits (edge exit_edge
)
1420 bool check_value_one
;
1422 tree cmp_rhs
, cmp_lhs
;
1423 gimple cmp_stmt
= last_stmt (exit_edge
->src
);
1425 if (!cmp_stmt
|| gimple_code (cmp_stmt
) != GIMPLE_COND
)
1427 cmp_rhs
= gimple_cond_rhs (cmp_stmt
);
1428 cmp_lhs
= gimple_cond_lhs (cmp_stmt
);
1429 if (!TREE_CONSTANT (cmp_rhs
)
1430 || !(integer_zerop (cmp_rhs
) || integer_onep (cmp_rhs
)))
1432 if (TREE_CODE (cmp_lhs
) != SSA_NAME
)
1435 /* If check_value_one is true, only the phi_args with value '1' will lead
1436 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1438 check_value_one
= (((integer_onep (cmp_rhs
))
1439 ^ (gimple_cond_code (cmp_stmt
) == EQ_EXPR
))
1440 ^ ((exit_edge
->flags
& EDGE_TRUE_VALUE
) != 0));
1442 phi_stmt
= SSA_NAME_DEF_STMT (cmp_lhs
);
1443 if (!phi_stmt
|| gimple_code (phi_stmt
) != GIMPLE_PHI
)
1446 for (i
= 0; i
< gimple_phi_num_args (phi_stmt
); i
++)
1450 tree val
= gimple_phi_arg_def (phi_stmt
, i
);
1451 edge e
= gimple_phi_arg_edge (phi_stmt
, i
);
1453 if (!TREE_CONSTANT (val
) || !(integer_zerop (val
) || integer_onep (val
)))
1455 if ((check_value_one
^ integer_onep (val
)) == 1)
1457 if (EDGE_COUNT (e
->src
->succs
) != 1)
1459 predict_paths_leading_to_edge (e
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1463 FOR_EACH_EDGE (e1
, ei
, e
->src
->preds
)
1464 predict_paths_leading_to_edge (e1
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1468 /* Predict edge probabilities by exploiting loop structure. */
1471 predict_loops (void)
1476 /* Try to predict out blocks in a loop that are not part of a
1478 FOR_EACH_LOOP (li
, loop
, 0)
1480 basic_block bb
, *bbs
;
1481 unsigned j
, n_exits
;
1483 struct tree_niter_desc niter_desc
;
1485 struct nb_iter_bound
*nb_iter
;
1486 enum tree_code loop_bound_code
= ERROR_MARK
;
1487 tree loop_bound_step
= NULL
;
1488 tree loop_bound_var
= NULL
;
1489 tree loop_iv_base
= NULL
;
1492 exits
= get_loop_exit_edges (loop
);
1493 n_exits
= exits
.length ();
1500 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1503 HOST_WIDE_INT nitercst
;
1504 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
1506 enum br_predictor predictor
;
1508 predict_extra_loop_exits (ex
);
1510 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
1511 niter
= niter_desc
.niter
;
1512 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
1513 niter
= loop_niter_by_eval (loop
, ex
);
1515 if (TREE_CODE (niter
) == INTEGER_CST
)
1517 if (host_integerp (niter
, 1)
1519 && compare_tree_int (niter
, max
- 1) == -1)
1520 nitercst
= tree_low_cst (niter
, 1) + 1;
1523 predictor
= PRED_LOOP_ITERATIONS
;
1525 /* If we have just one exit and we can derive some information about
1526 the number of iterations of the loop from the statements inside
1527 the loop, use it to predict this exit. */
1528 else if (n_exits
== 1)
1530 nitercst
= estimated_stmt_executions_int (loop
);
1536 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
1541 /* If the prediction for number of iterations is zero, do not
1542 predict the exit edges. */
1546 probability
= ((REG_BR_PROB_BASE
+ nitercst
/ 2) / nitercst
);
1547 predict_edge (ex
, predictor
, probability
);
1551 /* Find information about loop bound variables. */
1552 for (nb_iter
= loop
->bounds
; nb_iter
;
1553 nb_iter
= nb_iter
->next
)
1555 && gimple_code (nb_iter
->stmt
) == GIMPLE_COND
)
1557 stmt
= nb_iter
->stmt
;
1560 if (!stmt
&& last_stmt (loop
->header
)
1561 && gimple_code (last_stmt (loop
->header
)) == GIMPLE_COND
)
1562 stmt
= last_stmt (loop
->header
);
1564 is_comparison_with_loop_invariant_p (stmt
, loop
,
1570 bbs
= get_loop_body (loop
);
1572 for (j
= 0; j
< loop
->num_nodes
; j
++)
1574 int header_found
= 0;
1580 /* Bypass loop heuristics on continue statement. These
1581 statements construct loops via "non-loop" constructs
1582 in the source language and are better to be handled
1584 if (predicted_by_p (bb
, PRED_CONTINUE
))
1587 /* Loop branch heuristics - predict an edge back to a
1588 loop's head as taken. */
1589 if (bb
== loop
->latch
)
1591 e
= find_edge (loop
->latch
, loop
->header
);
1595 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
1599 /* Loop exit heuristics - predict an edge exiting the loop if the
1600 conditional has no loop header successors as not taken. */
1602 /* If we already used more reliable loop exit predictors, do not
1603 bother with PRED_LOOP_EXIT. */
1604 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1605 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS
))
1607 /* For loop with many exits we don't want to predict all exits
1608 with the pretty large probability, because if all exits are
1609 considered in row, the loop would be predicted to iterate
1610 almost never. The code to divide probability by number of
1611 exits is very rough. It should compute the number of exits
1612 taken in each patch through function (not the overall number
1613 of exits that might be a lot higher for loops with wide switch
1614 statements in them) and compute n-th square root.
1616 We limit the minimal probability by 2% to avoid
1617 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1618 as this was causing regression in perl benchmark containing such
1621 int probability
= ((REG_BR_PROB_BASE
1622 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
1624 if (probability
< HITRATE (2))
1625 probability
= HITRATE (2);
1626 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1627 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1628 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1629 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1632 predict_iv_comparison (loop
, bb
, loop_bound_var
, loop_iv_base
,
1634 tree_low_cst (loop_bound_step
, 0));
1637 /* Free basic blocks from get_loop_body. */
1642 /* Attempt to predict probabilities of BB outgoing edges using local
1645 bb_estimate_probability_locally (basic_block bb
)
1647 rtx last_insn
= BB_END (bb
);
1650 if (! can_predict_insn_p (last_insn
))
1652 cond
= get_condition (last_insn
, NULL
, false, false);
1656 /* Try "pointer heuristic."
1657 A comparison ptr == 0 is predicted as false.
1658 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1659 if (COMPARISON_P (cond
)
1660 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1661 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1663 if (GET_CODE (cond
) == EQ
)
1664 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1665 else if (GET_CODE (cond
) == NE
)
1666 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1670 /* Try "opcode heuristic."
1671 EQ tests are usually false and NE tests are usually true. Also,
1672 most quantities are positive, so we can make the appropriate guesses
1673 about signed comparisons against zero. */
1674 switch (GET_CODE (cond
))
1677 /* Unconditional branch. */
1678 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1679 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1684 /* Floating point comparisons appears to behave in a very
1685 unpredictable way because of special role of = tests in
1687 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1689 /* Comparisons with 0 are often used for booleans and there is
1690 nothing useful to predict about them. */
1691 else if (XEXP (cond
, 1) == const0_rtx
1692 || XEXP (cond
, 0) == const0_rtx
)
1695 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1700 /* Floating point comparisons appears to behave in a very
1701 unpredictable way because of special role of = tests in
1703 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1705 /* Comparisons with 0 are often used for booleans and there is
1706 nothing useful to predict about them. */
1707 else if (XEXP (cond
, 1) == const0_rtx
1708 || XEXP (cond
, 0) == const0_rtx
)
1711 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1715 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1719 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1724 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1725 || XEXP (cond
, 1) == constm1_rtx
)
1726 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1731 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1732 || XEXP (cond
, 1) == constm1_rtx
)
1733 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1741 /* Set edge->probability for each successor edge of BB. */
1743 guess_outgoing_edge_probabilities (basic_block bb
)
1745 bb_estimate_probability_locally (bb
);
1746 combine_predictions_for_insn (BB_END (bb
), bb
);
1749 static tree
expr_expected_value (tree
, bitmap
);
1751 /* Helper function for expr_expected_value. */
1754 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
,
1755 tree op1
, bitmap visited
)
1759 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1761 if (TREE_CONSTANT (op0
))
1764 if (code
!= SSA_NAME
)
1767 def
= SSA_NAME_DEF_STMT (op0
);
1769 /* If we were already here, break the infinite cycle. */
1770 if (!bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
)))
1773 if (gimple_code (def
) == GIMPLE_PHI
)
1775 /* All the arguments of the PHI node must have the same constant
1777 int i
, n
= gimple_phi_num_args (def
);
1778 tree val
= NULL
, new_val
;
1780 for (i
= 0; i
< n
; i
++)
1782 tree arg
= PHI_ARG_DEF (def
, i
);
1784 /* If this PHI has itself as an argument, we cannot
1785 determine the string length of this argument. However,
1786 if we can find an expected constant value for the other
1787 PHI args then we can still be sure that this is
1788 likely a constant. So be optimistic and just
1789 continue with the next argument. */
1790 if (arg
== PHI_RESULT (def
))
1793 new_val
= expr_expected_value (arg
, visited
);
1798 else if (!operand_equal_p (val
, new_val
, false))
1803 if (is_gimple_assign (def
))
1805 if (gimple_assign_lhs (def
) != op0
)
1808 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1809 gimple_assign_rhs1 (def
),
1810 gimple_assign_rhs_code (def
),
1811 gimple_assign_rhs2 (def
),
1815 if (is_gimple_call (def
))
1817 tree decl
= gimple_call_fndecl (def
);
1820 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
1821 switch (DECL_FUNCTION_CODE (decl
))
1823 case BUILT_IN_EXPECT
:
1826 if (gimple_call_num_args (def
) != 2)
1828 val
= gimple_call_arg (def
, 0);
1829 if (TREE_CONSTANT (val
))
1831 return gimple_call_arg (def
, 1);
1834 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
:
1835 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1
:
1836 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2
:
1837 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4
:
1838 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8
:
1839 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16
:
1840 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE
:
1841 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
:
1842 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
:
1843 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2
:
1844 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4
:
1845 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8
:
1846 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16
:
1847 /* Assume that any given atomic operation has low contention,
1848 and thus the compare-and-swap operation succeeds. */
1849 return boolean_true_node
;
1856 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1859 op0
= expr_expected_value (op0
, visited
);
1862 op1
= expr_expected_value (op1
, visited
);
1865 res
= fold_build2 (code
, type
, op0
, op1
);
1866 if (TREE_CONSTANT (res
))
1870 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1873 op0
= expr_expected_value (op0
, visited
);
1876 res
= fold_build1 (code
, type
, op0
);
1877 if (TREE_CONSTANT (res
))
1884 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1885 The function is used by builtin_expect branch predictor so the evidence
1886 must come from this construct and additional possible constant folding.
1888 We may want to implement more involved value guess (such as value range
1889 propagation based prediction), but such tricks shall go to new
1893 expr_expected_value (tree expr
, bitmap visited
)
1895 enum tree_code code
;
1898 if (TREE_CONSTANT (expr
))
1901 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1902 return expr_expected_value_1 (TREE_TYPE (expr
),
1903 op0
, code
, op1
, visited
);
1907 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1908 we no longer need. */
1910 strip_predict_hints (void)
1918 gimple_stmt_iterator bi
;
1919 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
1921 gimple stmt
= gsi_stmt (bi
);
1923 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1925 gsi_remove (&bi
, true);
1928 else if (gimple_code (stmt
) == GIMPLE_CALL
)
1930 tree fndecl
= gimple_call_fndecl (stmt
);
1933 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
1934 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
1935 && gimple_call_num_args (stmt
) == 2)
1937 var
= gimple_call_lhs (stmt
);
1941 = gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
1942 gsi_replace (&bi
, ass_stmt
, true);
1946 gsi_remove (&bi
, true);
1957 /* Predict using opcode of the last statement in basic block. */
1959 tree_predict_by_opcode (basic_block bb
)
1961 gimple stmt
= last_stmt (bb
);
1970 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1972 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1973 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1975 op0
= gimple_cond_lhs (stmt
);
1976 op1
= gimple_cond_rhs (stmt
);
1977 cmp
= gimple_cond_code (stmt
);
1978 type
= TREE_TYPE (op0
);
1979 visited
= BITMAP_ALLOC (NULL
);
1980 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
);
1981 BITMAP_FREE (visited
);
1984 if (integer_zerop (val
))
1985 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, NOT_TAKEN
);
1987 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, TAKEN
);
1990 /* Try "pointer heuristic."
1991 A comparison ptr == 0 is predicted as false.
1992 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1993 if (POINTER_TYPE_P (type
))
1996 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
1997 else if (cmp
== NE_EXPR
)
1998 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
2002 /* Try "opcode heuristic."
2003 EQ tests are usually false and NE tests are usually true. Also,
2004 most quantities are positive, so we can make the appropriate guesses
2005 about signed comparisons against zero. */
2010 /* Floating point comparisons appears to behave in a very
2011 unpredictable way because of special role of = tests in
2013 if (FLOAT_TYPE_P (type
))
2015 /* Comparisons with 0 are often used for booleans and there is
2016 nothing useful to predict about them. */
2017 else if (integer_zerop (op0
) || integer_zerop (op1
))
2020 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
2025 /* Floating point comparisons appears to behave in a very
2026 unpredictable way because of special role of = tests in
2028 if (FLOAT_TYPE_P (type
))
2030 /* Comparisons with 0 are often used for booleans and there is
2031 nothing useful to predict about them. */
2032 else if (integer_zerop (op0
)
2033 || integer_zerop (op1
))
2036 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
2040 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
2043 case UNORDERED_EXPR
:
2044 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
2049 if (integer_zerop (op1
)
2050 || integer_onep (op1
)
2051 || integer_all_onesp (op1
)
2054 || real_minus_onep (op1
))
2055 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
2060 if (integer_zerop (op1
)
2061 || integer_onep (op1
)
2062 || integer_all_onesp (op1
)
2065 || real_minus_onep (op1
))
2066 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
2074 /* Try to guess whether the value of return means error code. */
2076 static enum br_predictor
2077 return_prediction (tree val
, enum prediction
*prediction
)
2081 return PRED_NO_PREDICTION
;
2082 /* Different heuristics for pointers and scalars. */
2083 if (POINTER_TYPE_P (TREE_TYPE (val
)))
2085 /* NULL is usually not returned. */
2086 if (integer_zerop (val
))
2088 *prediction
= NOT_TAKEN
;
2089 return PRED_NULL_RETURN
;
2092 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
2094 /* Negative return values are often used to indicate
2096 if (TREE_CODE (val
) == INTEGER_CST
2097 && tree_int_cst_sgn (val
) < 0)
2099 *prediction
= NOT_TAKEN
;
2100 return PRED_NEGATIVE_RETURN
;
2102 /* Constant return values seems to be commonly taken.
2103 Zero/one often represent booleans so exclude them from the
2105 if (TREE_CONSTANT (val
)
2106 && (!integer_zerop (val
) && !integer_onep (val
)))
2108 *prediction
= TAKEN
;
2109 return PRED_CONST_RETURN
;
2112 return PRED_NO_PREDICTION
;
2115 /* Find the basic block with return expression and look up for possible
2116 return value trying to apply RETURN_PREDICTION heuristics. */
2118 apply_return_prediction (void)
2120 gimple return_stmt
= NULL
;
2124 int phi_num_args
, i
;
2125 enum br_predictor pred
;
2126 enum prediction direction
;
2129 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2131 return_stmt
= last_stmt (e
->src
);
2133 && gimple_code (return_stmt
) == GIMPLE_RETURN
)
2138 return_val
= gimple_return_retval (return_stmt
);
2141 if (TREE_CODE (return_val
) != SSA_NAME
2142 || !SSA_NAME_DEF_STMT (return_val
)
2143 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
2145 phi
= SSA_NAME_DEF_STMT (return_val
);
2146 phi_num_args
= gimple_phi_num_args (phi
);
2147 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
2149 /* Avoid the degenerate case where all return values form the function
2150 belongs to same category (ie they are all positive constants)
2151 so we can hardly say something about them. */
2152 for (i
= 1; i
< phi_num_args
; i
++)
2153 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
2155 if (i
!= phi_num_args
)
2156 for (i
= 0; i
< phi_num_args
; i
++)
2158 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
2159 if (pred
!= PRED_NO_PREDICTION
)
2160 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi
, i
), pred
,
2165 /* Look for basic block that contains unlikely to happen events
2166 (such as noreturn calls) and mark all paths leading to execution
2167 of this basic blocks as unlikely. */
2170 tree_bb_level_predictions (void)
2173 bool has_return_edges
= false;
2177 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2178 if (!(e
->flags
& (EDGE_ABNORMAL
| EDGE_FAKE
| EDGE_EH
)))
2180 has_return_edges
= true;
2184 apply_return_prediction ();
2188 gimple_stmt_iterator gsi
;
2190 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2192 gimple stmt
= gsi_stmt (gsi
);
2195 if (is_gimple_call (stmt
))
2197 if ((gimple_call_flags (stmt
) & ECF_NORETURN
)
2198 && has_return_edges
)
2199 predict_paths_leading_to (bb
, PRED_NORETURN
,
2201 decl
= gimple_call_fndecl (stmt
);
2203 && lookup_attribute ("cold",
2204 DECL_ATTRIBUTES (decl
)))
2205 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
2208 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
2210 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
2211 gimple_predict_outcome (stmt
));
2212 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2213 hints to callers. */
2219 #ifdef ENABLE_CHECKING
2221 /* Callback for pointer_map_traverse, asserts that the pointer map is
2225 assert_is_empty (const void *key ATTRIBUTE_UNUSED
, void **value
,
2226 void *data ATTRIBUTE_UNUSED
)
2228 gcc_assert (!*value
);
2233 /* Predict branch probabilities and estimate profile for basic block BB. */
2236 tree_estimate_probability_bb (basic_block bb
)
2242 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2244 /* Predict edges to user labels with attributes. */
2245 if (e
->dest
!= EXIT_BLOCK_PTR
)
2247 gimple_stmt_iterator gi
;
2248 for (gi
= gsi_start_bb (e
->dest
); !gsi_end_p (gi
); gsi_next (&gi
))
2250 gimple stmt
= gsi_stmt (gi
);
2253 if (gimple_code (stmt
) != GIMPLE_LABEL
)
2255 decl
= gimple_label_label (stmt
);
2256 if (DECL_ARTIFICIAL (decl
))
2259 /* Finally, we have a user-defined label. */
2260 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl
)))
2261 predict_edge_def (e
, PRED_COLD_LABEL
, NOT_TAKEN
);
2262 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl
)))
2263 predict_edge_def (e
, PRED_HOT_LABEL
, TAKEN
);
2267 /* Predict early returns to be probable, as we've already taken
2268 care for error returns and other cases are often used for
2269 fast paths through function.
2271 Since we've already removed the return statements, we are
2272 looking for CFG like:
2282 if (e
->dest
!= bb
->next_bb
2283 && e
->dest
!= EXIT_BLOCK_PTR
2284 && single_succ_p (e
->dest
)
2285 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR
2286 && (last
= last_stmt (e
->dest
)) != NULL
2287 && gimple_code (last
) == GIMPLE_RETURN
)
2292 if (single_succ_p (bb
))
2294 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
2295 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
2296 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
2297 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
2298 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2301 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
2302 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
2303 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
2304 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2307 /* Look for block we are guarding (ie we dominate it,
2308 but it doesn't postdominate us). */
2309 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
2310 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
2311 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
2313 gimple_stmt_iterator bi
;
2315 /* The call heuristic claims that a guarded function call
2316 is improbable. This is because such calls are often used
2317 to signal exceptional situations such as printing error
2319 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
2322 gimple stmt
= gsi_stmt (bi
);
2323 if (is_gimple_call (stmt
)
2324 /* Constant and pure calls are hardly used to signalize
2325 something exceptional. */
2326 && gimple_has_side_effects (stmt
))
2328 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
2334 tree_predict_by_opcode (bb
);
2337 /* Predict branch probabilities and estimate profile of the tree CFG.
2338 This function can be called from the loop optimizers to recompute
2339 the profile information. */
2342 tree_estimate_probability (void)
2346 add_noreturn_fake_exit_edges ();
2347 connect_infinite_loops_to_exit ();
2348 /* We use loop_niter_by_eval, which requires that the loops have
2350 create_preheaders (CP_SIMPLE_PREHEADERS
);
2351 calculate_dominance_info (CDI_POST_DOMINATORS
);
2353 bb_predictions
= pointer_map_create ();
2354 tree_bb_level_predictions ();
2355 record_loop_exits ();
2357 if (number_of_loops (cfun
) > 1)
2361 tree_estimate_probability_bb (bb
);
2364 combine_predictions_for_bb (bb
);
2366 #ifdef ENABLE_CHECKING
2367 pointer_map_traverse (bb_predictions
, assert_is_empty
, NULL
);
2369 pointer_map_destroy (bb_predictions
);
2370 bb_predictions
= NULL
;
2372 estimate_bb_frequencies ();
2373 free_dominance_info (CDI_POST_DOMINATORS
);
2374 remove_fake_exit_edges ();
2377 /* Predict branch probabilities and estimate profile of the tree CFG.
2378 This is the driver function for PASS_PROFILE. */
2381 tree_estimate_probability_driver (void)
2385 loop_optimizer_init (LOOPS_NORMAL
);
2386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2387 flow_loops_dump (dump_file
, NULL
, 0);
2389 mark_irreducible_loops ();
2391 nb_loops
= number_of_loops (cfun
);
2395 tree_estimate_probability ();
2400 loop_optimizer_finalize ();
2401 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2402 gimple_dump_cfg (dump_file
, dump_flags
);
2403 if (profile_status
== PROFILE_ABSENT
)
2404 profile_status
= PROFILE_GUESSED
;
2408 /* Predict edges to successors of CUR whose sources are not postdominated by
2409 BB by PRED and recurse to all postdominators. */
2412 predict_paths_for_bb (basic_block cur
, basic_block bb
,
2413 enum br_predictor pred
,
2414 enum prediction taken
,
2421 /* We are looking for all edges forming edge cut induced by
2422 set of all blocks postdominated by BB. */
2423 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
2424 if (e
->src
->index
>= NUM_FIXED_BLOCKS
2425 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
2431 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2432 if (e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2434 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
2436 /* See if there is an edge from e->src that is not abnormal
2437 and does not lead to BB. */
2438 FOR_EACH_EDGE (e2
, ei2
, e
->src
->succs
)
2440 && !(e2
->flags
& (EDGE_EH
| EDGE_FAKE
))
2441 && !dominated_by_p (CDI_POST_DOMINATORS
, e2
->dest
, bb
))
2447 /* If there is non-abnormal path leaving e->src, predict edge
2448 using predictor. Otherwise we need to look for paths
2451 The second may lead to infinite loop in the case we are predicitng
2452 regions that are only reachable by abnormal edges. We simply
2453 prevent visiting given BB twice. */
2455 predict_edge_def (e
, pred
, taken
);
2456 else if (bitmap_set_bit (visited
, e
->src
->index
))
2457 predict_paths_for_bb (e
->src
, e
->src
, pred
, taken
, visited
);
2459 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
2461 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
2462 predict_paths_for_bb (son
, bb
, pred
, taken
, visited
);
2465 /* Sets branch probabilities according to PREDiction and
2469 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
2470 enum prediction taken
)
2472 bitmap visited
= BITMAP_ALLOC (NULL
);
2473 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2474 BITMAP_FREE (visited
);
2477 /* Like predict_paths_leading_to but take edge instead of basic block. */
2480 predict_paths_leading_to_edge (edge e
, enum br_predictor pred
,
2481 enum prediction taken
)
2483 bool has_nonloop_edge
= false;
2487 basic_block bb
= e
->src
;
2488 FOR_EACH_EDGE (e2
, ei
, bb
->succs
)
2489 if (e2
->dest
!= e
->src
&& e2
->dest
!= e
->dest
2490 && !(e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2491 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e2
->dest
))
2493 has_nonloop_edge
= true;
2496 if (!has_nonloop_edge
)
2498 bitmap visited
= BITMAP_ALLOC (NULL
);
2499 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2500 BITMAP_FREE (visited
);
2503 predict_edge_def (e
, pred
, taken
);
2506 /* This is used to carry information about basic blocks. It is
2507 attached to the AUX field of the standard CFG block. */
2509 typedef struct block_info_def
2511 /* Estimated frequency of execution of basic_block. */
2514 /* To keep queue of basic blocks to process. */
2517 /* Number of predecessors we need to visit first. */
2521 /* Similar information for edges. */
2522 typedef struct edge_info_def
2524 /* In case edge is a loopback edge, the probability edge will be reached
2525 in case header is. Estimated number of iterations of the loop can be
2526 then computed as 1 / (1 - back_edge_prob). */
2527 sreal back_edge_prob
;
2528 /* True if the edge is a loopback edge in the natural loop. */
2529 unsigned int back_edge
:1;
2532 #define BLOCK_INFO(B) ((block_info) (B)->aux)
2533 #define EDGE_INFO(E) ((edge_info) (E)->aux)
2535 /* Helper function for estimate_bb_frequencies.
2536 Propagate the frequencies in blocks marked in
2537 TOVISIT, starting in HEAD. */
2540 propagate_freq (basic_block head
, bitmap tovisit
)
2549 /* For each basic block we need to visit count number of his predecessors
2550 we need to visit first. */
2551 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
2556 bb
= BASIC_BLOCK (i
);
2558 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2560 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
2562 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
2564 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
2566 "Irreducible region hit, ignoring edge to %i->%i\n",
2567 e
->src
->index
, bb
->index
);
2569 BLOCK_INFO (bb
)->npredecessors
= count
;
2570 /* When function never returns, we will never process exit block. */
2571 if (!count
&& bb
== EXIT_BLOCK_PTR
)
2572 bb
->count
= bb
->frequency
= 0;
2575 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
2577 for (bb
= head
; bb
; bb
= nextbb
)
2580 sreal cyclic_probability
, frequency
;
2582 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
2583 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
2585 nextbb
= BLOCK_INFO (bb
)->next
;
2586 BLOCK_INFO (bb
)->next
= NULL
;
2588 /* Compute frequency of basic block. */
2591 #ifdef ENABLE_CHECKING
2592 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2593 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
2594 || (e
->flags
& EDGE_DFS_BACK
));
2597 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2598 if (EDGE_INFO (e
)->back_edge
)
2600 sreal_add (&cyclic_probability
, &cyclic_probability
,
2601 &EDGE_INFO (e
)->back_edge_prob
);
2603 else if (!(e
->flags
& EDGE_DFS_BACK
))
2607 /* frequency += (e->probability
2608 * BLOCK_INFO (e->src)->frequency /
2609 REG_BR_PROB_BASE); */
2611 sreal_init (&tmp
, e
->probability
, 0);
2612 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
2613 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
2614 sreal_add (&frequency
, &frequency
, &tmp
);
2617 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
2619 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
2620 sizeof (frequency
));
2624 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
2626 memcpy (&cyclic_probability
, &real_almost_one
,
2627 sizeof (real_almost_one
));
2630 /* BLOCK_INFO (bb)->frequency = frequency
2631 / (1 - cyclic_probability) */
2633 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
2634 sreal_div (&BLOCK_INFO (bb
)->frequency
,
2635 &frequency
, &cyclic_probability
);
2639 bitmap_clear_bit (tovisit
, bb
->index
);
2641 e
= find_edge (bb
, head
);
2646 /* EDGE_INFO (e)->back_edge_prob
2647 = ((e->probability * BLOCK_INFO (bb)->frequency)
2648 / REG_BR_PROB_BASE); */
2650 sreal_init (&tmp
, e
->probability
, 0);
2651 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
2652 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2653 &tmp
, &real_inv_br_prob_base
);
2656 /* Propagate to successor blocks. */
2657 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2658 if (!(e
->flags
& EDGE_DFS_BACK
)
2659 && BLOCK_INFO (e
->dest
)->npredecessors
)
2661 BLOCK_INFO (e
->dest
)->npredecessors
--;
2662 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
2667 BLOCK_INFO (last
)->next
= e
->dest
;
2675 /* Estimate probabilities of loopback edges in loops at same nest level. */
2678 estimate_loops_at_level (struct loop
*first_loop
)
2682 for (loop
= first_loop
; loop
; loop
= loop
->next
)
2687 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2689 estimate_loops_at_level (loop
->inner
);
2691 /* Find current loop back edge and mark it. */
2692 e
= loop_latch_edge (loop
);
2693 EDGE_INFO (e
)->back_edge
= 1;
2695 bbs
= get_loop_body (loop
);
2696 for (i
= 0; i
< loop
->num_nodes
; i
++)
2697 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
2699 propagate_freq (loop
->header
, tovisit
);
2700 BITMAP_FREE (tovisit
);
2704 /* Propagates frequencies through structure of loops. */
2707 estimate_loops (void)
2709 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2712 /* Start by estimating the frequencies in the loops. */
2713 if (number_of_loops (cfun
) > 1)
2714 estimate_loops_at_level (current_loops
->tree_root
->inner
);
2716 /* Now propagate the frequencies through all the blocks. */
2719 bitmap_set_bit (tovisit
, bb
->index
);
2721 propagate_freq (ENTRY_BLOCK_PTR
, tovisit
);
2722 BITMAP_FREE (tovisit
);
2725 /* Convert counts measured by profile driven feedback to frequencies.
2726 Return nonzero iff there was any nonzero execution count. */
2729 counts_to_freqs (void)
2731 gcov_type count_max
, true_count_max
= 0;
2734 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2735 true_count_max
= MAX (bb
->count
, true_count_max
);
2737 count_max
= MAX (true_count_max
, 1);
2738 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2739 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
2741 return true_count_max
;
2744 /* Return true if function is likely to be expensive, so there is no point to
2745 optimize performance of prologue, epilogue or do inlining at the expense
2746 of code size growth. THRESHOLD is the limit of number of instructions
2747 function can execute at average to be still considered not expensive. */
2750 expensive_function_p (int threshold
)
2752 unsigned int sum
= 0;
2756 /* We can not compute accurately for large thresholds due to scaled
2758 gcc_assert (threshold
<= BB_FREQ_MAX
);
2760 /* Frequencies are out of range. This either means that function contains
2761 internal loop executing more than BB_FREQ_MAX times or profile feedback
2762 is available and function has not been executed at all. */
2763 if (ENTRY_BLOCK_PTR
->frequency
== 0)
2766 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2767 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
2772 FOR_BB_INSNS (bb
, insn
)
2773 if (active_insn_p (insn
))
2775 sum
+= bb
->frequency
;
2784 /* Estimate basic blocks frequency by given branch probabilities. */
2787 estimate_bb_frequencies (void)
2792 if (profile_status
!= PROFILE_READ
|| !counts_to_freqs ())
2794 static int real_values_initialized
= 0;
2796 if (!real_values_initialized
)
2798 real_values_initialized
= 1;
2799 sreal_init (&real_zero
, 0, 0);
2800 sreal_init (&real_one
, 1, 0);
2801 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
2802 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
2803 sreal_init (&real_one_half
, 1, -1);
2804 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
2805 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
2808 mark_dfs_back_edges ();
2810 single_succ_edge (ENTRY_BLOCK_PTR
)->probability
= REG_BR_PROB_BASE
;
2812 /* Set up block info for each basic block. */
2813 alloc_aux_for_blocks (sizeof (struct block_info_def
));
2814 alloc_aux_for_edges (sizeof (struct edge_info_def
));
2815 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2820 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2822 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
2823 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2824 &EDGE_INFO (e
)->back_edge_prob
,
2825 &real_inv_br_prob_base
);
2829 /* First compute probabilities locally for each loop from innermost
2830 to outermost to examine probabilities for back edges. */
2833 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
2835 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
2836 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
2838 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
2839 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2843 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
2844 sreal_add (&tmp
, &tmp
, &real_one_half
);
2845 bb
->frequency
= sreal_to_int (&tmp
);
2848 free_aux_for_blocks ();
2849 free_aux_for_edges ();
2851 compute_function_frequency ();
2854 /* Decide whether function is hot, cold or unlikely executed. */
2856 compute_function_frequency (void)
2859 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2860 if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2861 || MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2862 node
->only_called_at_startup
= true;
2863 if (DECL_STATIC_DESTRUCTOR (current_function_decl
))
2864 node
->only_called_at_exit
= true;
2866 if (!profile_info
|| !flag_branch_probabilities
)
2868 int flags
= flags_from_decl_or_type (current_function_decl
);
2869 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
2871 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2872 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
2874 node
->frequency
= NODE_FREQUENCY_HOT
;
2875 else if (flags
& ECF_NORETURN
)
2876 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2877 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2878 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2879 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2880 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
2881 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2884 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2887 if (maybe_hot_bb_p (cfun
, bb
))
2889 node
->frequency
= NODE_FREQUENCY_HOT
;
2892 if (!probably_never_executed_bb_p (cfun
, bb
))
2893 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2898 gate_estimate_probability (void)
2900 return flag_guess_branch_prob
;
2903 /* Build PREDICT_EXPR. */
2905 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
2907 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
2908 build_int_cst (integer_type_node
, predictor
));
2909 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
2914 predictor_name (enum br_predictor predictor
)
2916 return predictor_info
[predictor
].name
;
2921 const pass_data pass_data_profile
=
2923 GIMPLE_PASS
, /* type */
2924 "profile_estimate", /* name */
2925 OPTGROUP_NONE
, /* optinfo_flags */
2926 true, /* has_gate */
2927 true, /* has_execute */
2928 TV_BRANCH_PROB
, /* tv_id */
2929 PROP_cfg
, /* properties_required */
2930 0, /* properties_provided */
2931 0, /* properties_destroyed */
2932 0, /* todo_flags_start */
2933 TODO_verify_ssa
, /* todo_flags_finish */
2936 class pass_profile
: public gimple_opt_pass
2939 pass_profile(gcc::context
*ctxt
)
2940 : gimple_opt_pass(pass_data_profile
, ctxt
)
2943 /* opt_pass methods: */
2944 bool gate () { return gate_estimate_probability (); }
2945 unsigned int execute () { return tree_estimate_probability_driver (); }
2947 }; // class pass_profile
2952 make_pass_profile (gcc::context
*ctxt
)
2954 return new pass_profile (ctxt
);
2959 const pass_data pass_data_strip_predict_hints
=
2961 GIMPLE_PASS
, /* type */
2962 "*strip_predict_hints", /* name */
2963 OPTGROUP_NONE
, /* optinfo_flags */
2964 false, /* has_gate */
2965 true, /* has_execute */
2966 TV_BRANCH_PROB
, /* tv_id */
2967 PROP_cfg
, /* properties_required */
2968 0, /* properties_provided */
2969 0, /* properties_destroyed */
2970 0, /* todo_flags_start */
2971 TODO_verify_ssa
, /* todo_flags_finish */
2974 class pass_strip_predict_hints
: public gimple_opt_pass
2977 pass_strip_predict_hints(gcc::context
*ctxt
)
2978 : gimple_opt_pass(pass_data_strip_predict_hints
, ctxt
)
2981 /* opt_pass methods: */
2982 opt_pass
* clone () { return new pass_strip_predict_hints (ctxt_
); }
2983 unsigned int execute () { return strip_predict_hints (); }
2985 }; // class pass_strip_predict_hints
2990 make_pass_strip_predict_hints (gcc::context
*ctxt
)
2992 return new pass_strip_predict_hints (ctxt
);
2995 /* Rebuild function frequencies. Passes are in general expected to
2996 maintain profile by hand, however in some cases this is not possible:
2997 for example when inlining several functions with loops freuqencies might run
2998 out of scale and thus needs to be recomputed. */
3001 rebuild_frequencies (void)
3003 timevar_push (TV_REBUILD_FREQUENCIES
);
3004 if (profile_status
== PROFILE_GUESSED
)
3006 loop_optimizer_init (0);
3007 add_noreturn_fake_exit_edges ();
3008 mark_irreducible_loops ();
3009 connect_infinite_loops_to_exit ();
3010 estimate_bb_frequencies ();
3011 remove_fake_exit_edges ();
3012 loop_optimizer_finalize ();
3014 else if (profile_status
== PROFILE_READ
)
3018 timevar_pop (TV_REBUILD_FREQUENCIES
);