1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
45 #include "diagnostic-core.h"
54 #include "tree-flow.h"
56 #include "tree-pass.h"
57 #include "tree-scalar-evolution.h"
59 #include "pointer-set.h"
61 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
62 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
63 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
64 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
66 /* Random guesstimation given names.
67 PROV_VERY_UNLIKELY should be small enough so basic block predicted
68 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */
69 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
70 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
71 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
72 #define PROB_ALWAYS (REG_BR_PROB_BASE)
74 static void combine_predictions_for_insn (rtx
, basic_block
);
75 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
76 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
77 static void predict_paths_leading_to_edge (edge
, enum br_predictor
, enum prediction
);
78 static bool can_predict_insn_p (const_rtx
);
80 /* Information we hold about each branch predictor.
81 Filled using information from predict.def. */
85 const char *const name
; /* Name used in the debugging dumps. */
86 const int hitrate
; /* Expected hitrate used by
87 predict_insn_def call. */
91 /* Use given predictor without Dempster-Shaffer theory if it matches
92 using first_match heuristics. */
93 #define PRED_FLAG_FIRST_MATCH 1
95 /* Recompute hitrate in percent to our representation. */
97 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
99 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
100 static const struct predictor_info predictor_info
[]= {
101 #include "predict.def"
103 /* Upper bound on predictors. */
108 /* Return TRUE if frequency FREQ is considered to be hot. */
111 maybe_hot_frequency_p (struct function
*fun
, int freq
)
113 struct cgraph_node
*node
= cgraph_get_node (fun
->decl
);
114 if (!profile_info
|| !flag_branch_probabilities
)
116 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
118 if (node
->frequency
== NODE_FREQUENCY_HOT
)
121 if (profile_status_for_function (fun
) == PROFILE_ABSENT
)
123 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
124 && freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
* 2 / 3))
126 if (freq
< (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun
)->frequency
127 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
132 /* Return TRUE if frequency FREQ is considered to be hot. */
135 maybe_hot_count_p (struct function
*fun
, gcov_type count
)
137 gcov_working_set_t
*ws
;
138 static gcov_type min_count
= -1;
139 if (fun
&& profile_status_for_function (fun
) != PROFILE_READ
)
141 /* Code executed at most once is not hot. */
142 if (profile_info
->runs
>= count
)
146 ws
= find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
));
148 min_count
= ws
->min_counter
;
150 return (count
>= min_count
);
153 /* Return true in case BB can be CPU intensive and should be optimized
154 for maximal performance. */
157 maybe_hot_bb_p (struct function
*fun
, const_basic_block bb
)
159 gcc_checking_assert (fun
);
160 if (profile_status_for_function (fun
) == PROFILE_READ
)
161 return maybe_hot_count_p (fun
, bb
->count
);
162 return maybe_hot_frequency_p (fun
, bb
->frequency
);
165 /* Return true if the call can be hot. */
168 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
170 if (profile_info
&& flag_branch_probabilities
171 && !maybe_hot_count_p (NULL
,
174 if (edge
->caller
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
176 && edge
->callee
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
178 if (edge
->caller
->frequency
> NODE_FREQUENCY_UNLIKELY_EXECUTED
180 && edge
->callee
->frequency
<= NODE_FREQUENCY_EXECUTED_ONCE
))
184 if (edge
->caller
->frequency
== NODE_FREQUENCY_HOT
)
186 if (edge
->caller
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
187 && edge
->frequency
< CGRAPH_FREQ_BASE
* 3 / 2)
189 if (flag_guess_branch_prob
190 && edge
->frequency
<= (CGRAPH_FREQ_BASE
191 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)))
196 /* Return true in case BB can be CPU intensive and should be optimized
197 for maximal performance. */
200 maybe_hot_edge_p (edge e
)
202 if (profile_status
== PROFILE_READ
)
203 return maybe_hot_count_p (cfun
, e
->count
);
204 return maybe_hot_frequency_p (cfun
, EDGE_FREQUENCY (e
));
208 /* Return true in case BB is probably never executed. */
211 probably_never_executed_bb_p (struct function
*fun
, const_basic_block bb
)
213 gcc_checking_assert (fun
);
214 if (profile_info
&& flag_branch_probabilities
)
215 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
216 if ((!profile_info
|| !flag_branch_probabilities
)
217 && (cgraph_get_node (fun
->decl
)->frequency
218 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
223 /* Return true if NODE should be optimized for size. */
226 cgraph_optimize_for_size_p (struct cgraph_node
*node
)
230 if (node
&& (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
))
236 /* Return true when current function should always be optimized for size. */
239 optimize_function_for_size_p (struct function
*fun
)
243 if (!fun
|| !fun
->decl
)
245 return cgraph_optimize_for_size_p (cgraph_get_node (fun
->decl
));
248 /* Return true when current function should always be optimized for speed. */
251 optimize_function_for_speed_p (struct function
*fun
)
253 return !optimize_function_for_size_p (fun
);
256 /* Return TRUE when BB should be optimized for size. */
259 optimize_bb_for_size_p (const_basic_block bb
)
261 return optimize_function_for_size_p (cfun
) || !maybe_hot_bb_p (cfun
, bb
);
264 /* Return TRUE when BB should be optimized for speed. */
267 optimize_bb_for_speed_p (const_basic_block bb
)
269 return !optimize_bb_for_size_p (bb
);
272 /* Return TRUE when BB should be optimized for size. */
275 optimize_edge_for_size_p (edge e
)
277 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
280 /* Return TRUE when BB should be optimized for speed. */
283 optimize_edge_for_speed_p (edge e
)
285 return !optimize_edge_for_size_p (e
);
288 /* Return TRUE when BB should be optimized for size. */
291 optimize_insn_for_size_p (void)
293 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
296 /* Return TRUE when BB should be optimized for speed. */
299 optimize_insn_for_speed_p (void)
301 return !optimize_insn_for_size_p ();
304 /* Return TRUE when LOOP should be optimized for size. */
307 optimize_loop_for_size_p (struct loop
*loop
)
309 return optimize_bb_for_size_p (loop
->header
);
312 /* Return TRUE when LOOP should be optimized for speed. */
315 optimize_loop_for_speed_p (struct loop
*loop
)
317 return optimize_bb_for_speed_p (loop
->header
);
320 /* Return TRUE when LOOP nest should be optimized for speed. */
323 optimize_loop_nest_for_speed_p (struct loop
*loop
)
325 struct loop
*l
= loop
;
326 if (optimize_loop_for_speed_p (loop
))
329 while (l
&& l
!= loop
)
331 if (optimize_loop_for_speed_p (l
))
339 while (l
!= loop
&& !l
->next
)
348 /* Return TRUE when LOOP nest should be optimized for size. */
351 optimize_loop_nest_for_size_p (struct loop
*loop
)
353 return !optimize_loop_nest_for_speed_p (loop
);
356 /* Return true when edge E is likely to be well predictable by branch
360 predictable_edge_p (edge e
)
362 if (profile_status
== PROFILE_ABSENT
)
365 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
366 || (REG_BR_PROB_BASE
- e
->probability
367 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
373 /* Set RTL expansion for BB profile. */
376 rtl_profile_for_bb (basic_block bb
)
378 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (cfun
, bb
);
381 /* Set RTL expansion for edge profile. */
384 rtl_profile_for_edge (edge e
)
386 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
389 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
391 default_rtl_profile (void)
393 crtl
->maybe_hot_insn_p
= true;
396 /* Return true if the one of outgoing edges is already predicted by
400 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
403 if (!INSN_P (BB_END (bb
)))
405 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
406 if (REG_NOTE_KIND (note
) == REG_BR_PRED
407 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
412 /* This map contains for a basic block the list of predictions for the
415 static struct pointer_map_t
*bb_predictions
;
417 /* Structure representing predictions in tree level. */
419 struct edge_prediction
{
420 struct edge_prediction
*ep_next
;
422 enum br_predictor ep_predictor
;
426 /* Return true if the one of outgoing edges is already predicted by
430 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
432 struct edge_prediction
*i
;
433 void **preds
= pointer_map_contains (bb_predictions
, bb
);
438 for (i
= (struct edge_prediction
*) *preds
; i
; i
= i
->ep_next
)
439 if (i
->ep_predictor
== predictor
)
444 /* Return true when the probability of edge is reliable.
446 The profile guessing code is good at predicting branch outcome (ie.
447 taken/not taken), that is predicted right slightly over 75% of time.
448 It is however notoriously poor on predicting the probability itself.
449 In general the profile appear a lot flatter (with probabilities closer
450 to 50%) than the reality so it is bad idea to use it to drive optimization
451 such as those disabling dynamic branch prediction for well predictable
454 There are two exceptions - edges leading to noreturn edges and edges
455 predicted by number of iterations heuristics are predicted well. This macro
456 should be able to distinguish those, but at the moment it simply check for
457 noreturn heuristic that is only one giving probability over 99% or bellow
458 1%. In future we might want to propagate reliability information across the
459 CFG if we find this information useful on multiple places. */
461 probability_reliable_p (int prob
)
463 return (profile_status
== PROFILE_READ
464 || (profile_status
== PROFILE_GUESSED
465 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
468 /* Same predicate as above, working on edges. */
470 edge_probability_reliable_p (const_edge e
)
472 return probability_reliable_p (e
->probability
);
475 /* Same predicate as edge_probability_reliable_p, working on notes. */
477 br_prob_note_reliable_p (const_rtx note
)
479 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
480 return probability_reliable_p (INTVAL (XEXP (note
, 0)));
484 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
486 gcc_assert (any_condjump_p (insn
));
487 if (!flag_guess_branch_prob
)
490 add_reg_note (insn
, REG_BR_PRED
,
491 gen_rtx_CONCAT (VOIDmode
,
492 GEN_INT ((int) predictor
),
493 GEN_INT ((int) probability
)));
496 /* Predict insn by given predictor. */
499 predict_insn_def (rtx insn
, enum br_predictor predictor
,
500 enum prediction taken
)
502 int probability
= predictor_info
[(int) predictor
].hitrate
;
505 probability
= REG_BR_PROB_BASE
- probability
;
507 predict_insn (insn
, predictor
, probability
);
510 /* Predict edge E with given probability if possible. */
513 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
516 last_insn
= BB_END (e
->src
);
518 /* We can store the branch prediction information only about
519 conditional jumps. */
520 if (!any_condjump_p (last_insn
))
523 /* We always store probability of branching. */
524 if (e
->flags
& EDGE_FALLTHRU
)
525 probability
= REG_BR_PROB_BASE
- probability
;
527 predict_insn (last_insn
, predictor
, probability
);
530 /* Predict edge E with the given PROBABILITY. */
532 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
534 gcc_assert (profile_status
!= PROFILE_GUESSED
);
535 if ((e
->src
!= ENTRY_BLOCK_PTR
&& EDGE_COUNT (e
->src
->succs
) > 1)
536 && flag_guess_branch_prob
&& optimize
)
538 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
539 void **preds
= pointer_map_insert (bb_predictions
, e
->src
);
541 i
->ep_next
= (struct edge_prediction
*) *preds
;
543 i
->ep_probability
= probability
;
544 i
->ep_predictor
= predictor
;
549 /* Remove all predictions on given basic block that are attached
552 remove_predictions_associated_with_edge (edge e
)
559 preds
= pointer_map_contains (bb_predictions
, e
->src
);
563 struct edge_prediction
**prediction
= (struct edge_prediction
**) preds
;
564 struct edge_prediction
*next
;
568 if ((*prediction
)->ep_edge
== e
)
570 next
= (*prediction
)->ep_next
;
575 prediction
= &((*prediction
)->ep_next
);
580 /* Clears the list of predictions stored for BB. */
583 clear_bb_predictions (basic_block bb
)
585 void **preds
= pointer_map_contains (bb_predictions
, bb
);
586 struct edge_prediction
*pred
, *next
;
591 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= next
)
593 next
= pred
->ep_next
;
599 /* Return true when we can store prediction on insn INSN.
600 At the moment we represent predictions only on conditional
601 jumps, not at computed jump or other complicated cases. */
603 can_predict_insn_p (const_rtx insn
)
605 return (JUMP_P (insn
)
606 && any_condjump_p (insn
)
607 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
610 /* Predict edge E by given predictor if possible. */
613 predict_edge_def (edge e
, enum br_predictor predictor
,
614 enum prediction taken
)
616 int probability
= predictor_info
[(int) predictor
].hitrate
;
619 probability
= REG_BR_PROB_BASE
- probability
;
621 predict_edge (e
, predictor
, probability
);
624 /* Invert all branch predictions or probability notes in the INSN. This needs
625 to be done each time we invert the condition used by the jump. */
628 invert_br_probabilities (rtx insn
)
632 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
633 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
634 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
635 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
636 XEXP (XEXP (note
, 0), 1)
637 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
640 /* Dump information about the branch prediction to the output file. */
643 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
644 basic_block bb
, int used
)
652 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
653 if (! (e
->flags
& EDGE_FALLTHRU
))
656 fprintf (file
, " %s heuristics%s: %.1f%%",
657 predictor_info
[predictor
].name
,
658 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
662 fprintf (file
, " exec ");
663 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
666 fprintf (file
, " hit ");
667 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
668 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
672 fprintf (file
, "\n");
675 /* We can not predict the probabilities of outgoing edges of bb. Set them
676 evenly and hope for the best. */
678 set_even_probabilities (basic_block bb
)
684 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
685 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
687 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
688 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
689 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
694 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
695 note if not already present. Remove now useless REG_BR_PRED notes. */
698 combine_predictions_for_insn (rtx insn
, basic_block bb
)
703 int best_probability
= PROB_EVEN
;
704 enum br_predictor best_predictor
= END_PREDICTORS
;
705 int combined_probability
= REG_BR_PROB_BASE
/ 2;
707 bool first_match
= false;
710 if (!can_predict_insn_p (insn
))
712 set_even_probabilities (bb
);
716 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
717 pnote
= ®_NOTES (insn
);
719 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
722 /* We implement "first match" heuristics and use probability guessed
723 by predictor with smallest index. */
724 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
725 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
727 enum br_predictor predictor
= ((enum br_predictor
)
728 INTVAL (XEXP (XEXP (note
, 0), 0)));
729 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
732 if (best_predictor
> predictor
)
733 best_probability
= probability
, best_predictor
= predictor
;
735 d
= (combined_probability
* probability
736 + (REG_BR_PROB_BASE
- combined_probability
)
737 * (REG_BR_PROB_BASE
- probability
));
739 /* Use FP math to avoid overflows of 32bit integers. */
741 /* If one probability is 0% and one 100%, avoid division by zero. */
742 combined_probability
= REG_BR_PROB_BASE
/ 2;
744 combined_probability
= (((double) combined_probability
) * probability
745 * REG_BR_PROB_BASE
/ d
+ 0.5);
748 /* Decide which heuristic to use. In case we didn't match anything,
749 use no_prediction heuristic, in case we did match, use either
750 first match or Dempster-Shaffer theory depending on the flags. */
752 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
756 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
757 combined_probability
, bb
, true);
760 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
762 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
767 combined_probability
= best_probability
;
768 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
772 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
774 enum br_predictor predictor
= ((enum br_predictor
)
775 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
776 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
778 dump_prediction (dump_file
, predictor
, probability
, bb
,
779 !first_match
|| best_predictor
== predictor
);
780 *pnote
= XEXP (*pnote
, 1);
783 pnote
= &XEXP (*pnote
, 1);
788 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (combined_probability
));
790 /* Save the prediction into CFG in case we are seeing non-degenerated
792 if (!single_succ_p (bb
))
794 BRANCH_EDGE (bb
)->probability
= combined_probability
;
795 FALLTHRU_EDGE (bb
)->probability
796 = REG_BR_PROB_BASE
- combined_probability
;
799 else if (!single_succ_p (bb
))
801 int prob
= INTVAL (XEXP (prob_note
, 0));
803 BRANCH_EDGE (bb
)->probability
= prob
;
804 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
807 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
810 /* Combine predictions into single probability and store them into CFG.
811 Remove now useless prediction entries. */
814 combine_predictions_for_bb (basic_block bb
)
816 int best_probability
= PROB_EVEN
;
817 enum br_predictor best_predictor
= END_PREDICTORS
;
818 int combined_probability
= REG_BR_PROB_BASE
/ 2;
820 bool first_match
= false;
822 struct edge_prediction
*pred
;
824 edge e
, first
= NULL
, second
= NULL
;
828 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
829 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
832 if (first
&& !second
)
838 /* When there is no successor or only one choice, prediction is easy.
840 We are lazy for now and predict only basic blocks with two outgoing
841 edges. It is possible to predict generic case too, but we have to
842 ignore first match heuristics and do more involved combining. Implement
847 set_even_probabilities (bb
);
848 clear_bb_predictions (bb
);
850 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
856 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
858 preds
= pointer_map_contains (bb_predictions
, bb
);
861 /* We implement "first match" heuristics and use probability guessed
862 by predictor with smallest index. */
863 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
865 enum br_predictor predictor
= pred
->ep_predictor
;
866 int probability
= pred
->ep_probability
;
868 if (pred
->ep_edge
!= first
)
869 probability
= REG_BR_PROB_BASE
- probability
;
872 /* First match heuristics would be widly confused if we predicted
874 if (best_predictor
> predictor
)
876 struct edge_prediction
*pred2
;
877 int prob
= probability
;
879 for (pred2
= (struct edge_prediction
*) *preds
; pred2
; pred2
= pred2
->ep_next
)
880 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
882 int probability2
= pred
->ep_probability
;
884 if (pred2
->ep_edge
!= first
)
885 probability2
= REG_BR_PROB_BASE
- probability2
;
887 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
888 (probability2
< REG_BR_PROB_BASE
/ 2))
891 /* If the same predictor later gave better result, go for it! */
892 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
893 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
897 best_probability
= prob
, best_predictor
= predictor
;
900 d
= (combined_probability
* probability
901 + (REG_BR_PROB_BASE
- combined_probability
)
902 * (REG_BR_PROB_BASE
- probability
));
904 /* Use FP math to avoid overflows of 32bit integers. */
906 /* If one probability is 0% and one 100%, avoid division by zero. */
907 combined_probability
= REG_BR_PROB_BASE
/ 2;
909 combined_probability
= (((double) combined_probability
)
911 * REG_BR_PROB_BASE
/ d
+ 0.5);
915 /* Decide which heuristic to use. In case we didn't match anything,
916 use no_prediction heuristic, in case we did match, use either
917 first match or Dempster-Shaffer theory depending on the flags. */
919 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
923 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
926 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
928 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
933 combined_probability
= best_probability
;
934 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
938 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
940 enum br_predictor predictor
= pred
->ep_predictor
;
941 int probability
= pred
->ep_probability
;
943 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
944 probability
= REG_BR_PROB_BASE
- probability
;
945 dump_prediction (dump_file
, predictor
, probability
, bb
,
946 !first_match
|| best_predictor
== predictor
);
949 clear_bb_predictions (bb
);
953 first
->probability
= combined_probability
;
954 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
958 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
959 Return the SSA_NAME if the condition satisfies, NULL otherwise.
961 T1 and T2 should be one of the following cases:
962 1. T1 is SSA_NAME, T2 is NULL
963 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
964 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
967 strips_small_constant (tree t1
, tree t2
)
974 else if (TREE_CODE (t1
) == SSA_NAME
)
976 else if (host_integerp (t1
, 0))
977 value
= tree_low_cst (t1
, 0);
983 else if (host_integerp (t2
, 0))
984 value
= tree_low_cst (t2
, 0);
985 else if (TREE_CODE (t2
) == SSA_NAME
)
993 if (value
<= 4 && value
>= -4)
999 /* Return the SSA_NAME in T or T's operands.
1000 Return NULL if SSA_NAME cannot be found. */
1003 get_base_value (tree t
)
1005 if (TREE_CODE (t
) == SSA_NAME
)
1008 if (!BINARY_CLASS_P (t
))
1011 switch (TREE_OPERAND_LENGTH (t
))
1014 return strips_small_constant (TREE_OPERAND (t
, 0), NULL
);
1016 return strips_small_constant (TREE_OPERAND (t
, 0),
1017 TREE_OPERAND (t
, 1));
1023 /* Check the compare STMT in LOOP. If it compares an induction
1024 variable to a loop invariant, return true, and save
1025 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1026 Otherwise return false and set LOOP_INVAIANT to NULL. */
1029 is_comparison_with_loop_invariant_p (gimple stmt
, struct loop
*loop
,
1030 tree
*loop_invariant
,
1031 enum tree_code
*compare_code
,
1035 tree op0
, op1
, bound
, base
;
1037 enum tree_code code
;
1040 code
= gimple_cond_code (stmt
);
1041 *loop_invariant
= NULL
;
1057 op0
= gimple_cond_lhs (stmt
);
1058 op1
= gimple_cond_rhs (stmt
);
1060 if ((TREE_CODE (op0
) != SSA_NAME
&& TREE_CODE (op0
) != INTEGER_CST
)
1061 || (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op1
) != INTEGER_CST
))
1063 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op0
, &iv0
, true))
1065 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op1
, &iv1
, true))
1067 if (TREE_CODE (iv0
.step
) != INTEGER_CST
1068 || TREE_CODE (iv1
.step
) != INTEGER_CST
)
1070 if ((integer_zerop (iv0
.step
) && integer_zerop (iv1
.step
))
1071 || (!integer_zerop (iv0
.step
) && !integer_zerop (iv1
.step
)))
1074 if (integer_zerop (iv0
.step
))
1076 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
1077 code
= invert_tree_comparison (code
, false);
1080 if (host_integerp (iv1
.step
, 0))
1081 step
= tree_low_cst (iv1
.step
, 0);
1089 if (host_integerp (iv0
.step
, 0))
1090 step
= tree_low_cst (iv0
.step
, 0);
1095 if (TREE_CODE (bound
) != INTEGER_CST
)
1096 bound
= get_base_value (bound
);
1099 if (TREE_CODE (base
) != INTEGER_CST
)
1100 base
= get_base_value (base
);
1104 *loop_invariant
= bound
;
1105 *compare_code
= code
;
1107 *loop_iv_base
= base
;
1111 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1114 expr_coherent_p (tree t1
, tree t2
)
1117 tree ssa_name_1
= NULL
;
1118 tree ssa_name_2
= NULL
;
1120 gcc_assert (TREE_CODE (t1
) == SSA_NAME
|| TREE_CODE (t1
) == INTEGER_CST
);
1121 gcc_assert (TREE_CODE (t2
) == SSA_NAME
|| TREE_CODE (t2
) == INTEGER_CST
);
1126 if (TREE_CODE (t1
) == INTEGER_CST
&& TREE_CODE (t2
) == INTEGER_CST
)
1128 if (TREE_CODE (t1
) == INTEGER_CST
|| TREE_CODE (t2
) == INTEGER_CST
)
1131 /* Check to see if t1 is expressed/defined with t2. */
1132 stmt
= SSA_NAME_DEF_STMT (t1
);
1133 gcc_assert (stmt
!= NULL
);
1134 if (is_gimple_assign (stmt
))
1136 ssa_name_1
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1137 if (ssa_name_1
&& ssa_name_1
== t2
)
1141 /* Check to see if t2 is expressed/defined with t1. */
1142 stmt
= SSA_NAME_DEF_STMT (t2
);
1143 gcc_assert (stmt
!= NULL
);
1144 if (is_gimple_assign (stmt
))
1146 ssa_name_2
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1147 if (ssa_name_2
&& ssa_name_2
== t1
)
1151 /* Compare if t1 and t2's def_stmts are identical. */
1152 if (ssa_name_2
!= NULL
&& ssa_name_1
== ssa_name_2
)
1158 /* Predict branch probability of BB when BB contains a branch that compares
1159 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1160 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1163 for (int i = 0; i < bound; i++) {
1170 In this loop, we will predict the branch inside the loop to be taken. */
1173 predict_iv_comparison (struct loop
*loop
, basic_block bb
,
1174 tree loop_bound_var
,
1175 tree loop_iv_base_var
,
1176 enum tree_code loop_bound_code
,
1177 int loop_bound_step
)
1180 tree compare_var
, compare_base
;
1181 enum tree_code compare_code
;
1186 if (predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1187 || predicted_by_p (bb
, PRED_LOOP_ITERATIONS
)
1188 || predicted_by_p (bb
, PRED_LOOP_EXIT
))
1191 stmt
= last_stmt (bb
);
1192 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1194 if (!is_comparison_with_loop_invariant_p (stmt
, loop
, &compare_var
,
1200 /* Find the taken edge. */
1201 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1202 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1205 /* When comparing an IV to a loop invariant, NE is more likely to be
1206 taken while EQ is more likely to be not-taken. */
1207 if (compare_code
== NE_EXPR
)
1209 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1212 else if (compare_code
== EQ_EXPR
)
1214 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1218 if (!expr_coherent_p (loop_iv_base_var
, compare_base
))
1221 /* If loop bound, base and compare bound are all constants, we can
1222 calculate the probability directly. */
1223 if (host_integerp (loop_bound_var
, 0)
1224 && host_integerp (compare_var
, 0)
1225 && host_integerp (compare_base
, 0))
1228 HOST_WIDE_INT compare_count
;
1229 HOST_WIDE_INT loop_bound
= tree_low_cst (loop_bound_var
, 0);
1230 HOST_WIDE_INT compare_bound
= tree_low_cst (compare_var
, 0);
1231 HOST_WIDE_INT base
= tree_low_cst (compare_base
, 0);
1232 HOST_WIDE_INT loop_count
= (loop_bound
- base
) / compare_step
;
1234 if ((compare_step
> 0)
1235 ^ (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1236 compare_count
= (loop_bound
- compare_bound
) / compare_step
;
1238 compare_count
= (compare_bound
- base
) / compare_step
;
1240 if (compare_code
== LE_EXPR
|| compare_code
== GE_EXPR
)
1242 if (loop_bound_code
== LE_EXPR
|| loop_bound_code
== GE_EXPR
)
1244 if (compare_count
< 0)
1249 if (loop_count
== 0)
1251 else if (compare_count
> loop_count
)
1252 probability
= REG_BR_PROB_BASE
;
1254 probability
= (double) REG_BR_PROB_BASE
* compare_count
/ loop_count
;
1255 predict_edge (then_edge
, PRED_LOOP_IV_COMPARE
, probability
);
1259 if (expr_coherent_p (loop_bound_var
, compare_var
))
1261 if ((loop_bound_code
== LT_EXPR
|| loop_bound_code
== LE_EXPR
)
1262 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1263 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1264 else if ((loop_bound_code
== GT_EXPR
|| loop_bound_code
== GE_EXPR
)
1265 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1266 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1267 else if (loop_bound_code
== NE_EXPR
)
1269 /* If the loop backedge condition is "(i != bound)", we do
1270 the comparison based on the step of IV:
1271 * step < 0 : backedge condition is like (i > bound)
1272 * step > 0 : backedge condition is like (i < bound) */
1273 gcc_assert (loop_bound_step
!= 0);
1274 if (loop_bound_step
> 0
1275 && (compare_code
== LT_EXPR
1276 || compare_code
== LE_EXPR
))
1277 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1278 else if (loop_bound_step
< 0
1279 && (compare_code
== GT_EXPR
1280 || compare_code
== GE_EXPR
))
1281 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1283 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1286 /* The branch is predicted not-taken if loop_bound_code is
1287 opposite with compare_code. */
1288 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1290 else if (expr_coherent_p (loop_iv_base_var
, compare_var
))
1293 for (i = s; i < h; i++)
1295 The branch should be predicted taken. */
1296 if (loop_bound_step
> 0
1297 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1298 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1299 else if (loop_bound_step
< 0
1300 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1301 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1303 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1307 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1308 exits are resulted from short-circuit conditions that will generate an
1311 if (foo() || global > 10)
1314 This will be translated into:
1319 if foo() goto BB6 else goto BB5
1321 if global > 10 goto BB6 else goto BB7
1325 iftmp = (PHI 0(BB5), 1(BB6))
1326 if iftmp == 1 goto BB8 else goto BB3
1328 outside of the loop...
1330 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1331 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1332 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1333 exits to predict them using PRED_LOOP_EXIT. */
1336 predict_extra_loop_exits (edge exit_edge
)
1339 bool check_value_one
;
1341 tree cmp_rhs
, cmp_lhs
;
1342 gimple cmp_stmt
= last_stmt (exit_edge
->src
);
1344 if (!cmp_stmt
|| gimple_code (cmp_stmt
) != GIMPLE_COND
)
1346 cmp_rhs
= gimple_cond_rhs (cmp_stmt
);
1347 cmp_lhs
= gimple_cond_lhs (cmp_stmt
);
1348 if (!TREE_CONSTANT (cmp_rhs
)
1349 || !(integer_zerop (cmp_rhs
) || integer_onep (cmp_rhs
)))
1351 if (TREE_CODE (cmp_lhs
) != SSA_NAME
)
1354 /* If check_value_one is true, only the phi_args with value '1' will lead
1355 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1357 check_value_one
= (((integer_onep (cmp_rhs
))
1358 ^ (gimple_cond_code (cmp_stmt
) == EQ_EXPR
))
1359 ^ ((exit_edge
->flags
& EDGE_TRUE_VALUE
) != 0));
1361 phi_stmt
= SSA_NAME_DEF_STMT (cmp_lhs
);
1362 if (!phi_stmt
|| gimple_code (phi_stmt
) != GIMPLE_PHI
)
1365 for (i
= 0; i
< gimple_phi_num_args (phi_stmt
); i
++)
1369 tree val
= gimple_phi_arg_def (phi_stmt
, i
);
1370 edge e
= gimple_phi_arg_edge (phi_stmt
, i
);
1372 if (!TREE_CONSTANT (val
) || !(integer_zerop (val
) || integer_onep (val
)))
1374 if ((check_value_one
^ integer_onep (val
)) == 1)
1376 if (EDGE_COUNT (e
->src
->succs
) != 1)
1378 predict_paths_leading_to_edge (e
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1382 FOR_EACH_EDGE (e1
, ei
, e
->src
->preds
)
1383 predict_paths_leading_to_edge (e1
, PRED_LOOP_EXIT
, NOT_TAKEN
);
1387 /* Predict edge probabilities by exploiting loop structure. */
1390 predict_loops (void)
1395 /* Try to predict out blocks in a loop that are not part of a
1397 FOR_EACH_LOOP (li
, loop
, 0)
1399 basic_block bb
, *bbs
;
1400 unsigned j
, n_exits
;
1402 struct tree_niter_desc niter_desc
;
1404 struct nb_iter_bound
*nb_iter
;
1405 enum tree_code loop_bound_code
= ERROR_MARK
;
1406 int loop_bound_step
= 0;
1407 tree loop_bound_var
= NULL
;
1408 tree loop_iv_base
= NULL
;
1411 exits
= get_loop_exit_edges (loop
);
1412 n_exits
= exits
.length ();
1419 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1422 HOST_WIDE_INT nitercst
;
1423 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
1425 enum br_predictor predictor
;
1427 predict_extra_loop_exits (ex
);
1429 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
1430 niter
= niter_desc
.niter
;
1431 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
1432 niter
= loop_niter_by_eval (loop
, ex
);
1434 if (TREE_CODE (niter
) == INTEGER_CST
)
1436 if (host_integerp (niter
, 1)
1437 && compare_tree_int (niter
, max
-1) == -1)
1438 nitercst
= tree_low_cst (niter
, 1) + 1;
1441 predictor
= PRED_LOOP_ITERATIONS
;
1443 /* If we have just one exit and we can derive some information about
1444 the number of iterations of the loop from the statements inside
1445 the loop, use it to predict this exit. */
1446 else if (n_exits
== 1)
1448 nitercst
= estimated_stmt_executions_int (loop
);
1454 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
1459 probability
= ((REG_BR_PROB_BASE
+ nitercst
/ 2) / nitercst
);
1460 predict_edge (ex
, predictor
, probability
);
1464 /* Find information about loop bound variables. */
1465 for (nb_iter
= loop
->bounds
; nb_iter
;
1466 nb_iter
= nb_iter
->next
)
1468 && gimple_code (nb_iter
->stmt
) == GIMPLE_COND
)
1470 stmt
= nb_iter
->stmt
;
1473 if (!stmt
&& last_stmt (loop
->header
)
1474 && gimple_code (last_stmt (loop
->header
)) == GIMPLE_COND
)
1475 stmt
= last_stmt (loop
->header
);
1477 is_comparison_with_loop_invariant_p (stmt
, loop
,
1483 bbs
= get_loop_body (loop
);
1485 for (j
= 0; j
< loop
->num_nodes
; j
++)
1487 int header_found
= 0;
1493 /* Bypass loop heuristics on continue statement. These
1494 statements construct loops via "non-loop" constructs
1495 in the source language and are better to be handled
1497 if (predicted_by_p (bb
, PRED_CONTINUE
))
1500 /* Loop branch heuristics - predict an edge back to a
1501 loop's head as taken. */
1502 if (bb
== loop
->latch
)
1504 e
= find_edge (loop
->latch
, loop
->header
);
1508 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
1512 /* Loop exit heuristics - predict an edge exiting the loop if the
1513 conditional has no loop header successors as not taken. */
1515 /* If we already used more reliable loop exit predictors, do not
1516 bother with PRED_LOOP_EXIT. */
1517 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS_GUESSED
)
1518 && !predicted_by_p (bb
, PRED_LOOP_ITERATIONS
))
1520 /* For loop with many exits we don't want to predict all exits
1521 with the pretty large probability, because if all exits are
1522 considered in row, the loop would be predicted to iterate
1523 almost never. The code to divide probability by number of
1524 exits is very rough. It should compute the number of exits
1525 taken in each patch through function (not the overall number
1526 of exits that might be a lot higher for loops with wide switch
1527 statements in them) and compute n-th square root.
1529 We limit the minimal probability by 2% to avoid
1530 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1531 as this was causing regression in perl benchmark containing such
1534 int probability
= ((REG_BR_PROB_BASE
1535 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
1537 if (probability
< HITRATE (2))
1538 probability
= HITRATE (2);
1539 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1540 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1541 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1542 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1545 predict_iv_comparison (loop
, bb
, loop_bound_var
, loop_iv_base
,
1550 /* Free basic blocks from get_loop_body. */
1555 /* Attempt to predict probabilities of BB outgoing edges using local
1558 bb_estimate_probability_locally (basic_block bb
)
1560 rtx last_insn
= BB_END (bb
);
1563 if (! can_predict_insn_p (last_insn
))
1565 cond
= get_condition (last_insn
, NULL
, false, false);
1569 /* Try "pointer heuristic."
1570 A comparison ptr == 0 is predicted as false.
1571 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1572 if (COMPARISON_P (cond
)
1573 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1574 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1576 if (GET_CODE (cond
) == EQ
)
1577 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1578 else if (GET_CODE (cond
) == NE
)
1579 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1583 /* Try "opcode heuristic."
1584 EQ tests are usually false and NE tests are usually true. Also,
1585 most quantities are positive, so we can make the appropriate guesses
1586 about signed comparisons against zero. */
1587 switch (GET_CODE (cond
))
1590 /* Unconditional branch. */
1591 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1592 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1597 /* Floating point comparisons appears to behave in a very
1598 unpredictable way because of special role of = tests in
1600 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1602 /* Comparisons with 0 are often used for booleans and there is
1603 nothing useful to predict about them. */
1604 else if (XEXP (cond
, 1) == const0_rtx
1605 || XEXP (cond
, 0) == const0_rtx
)
1608 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1613 /* Floating point comparisons appears to behave in a very
1614 unpredictable way because of special role of = tests in
1616 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1618 /* Comparisons with 0 are often used for booleans and there is
1619 nothing useful to predict about them. */
1620 else if (XEXP (cond
, 1) == const0_rtx
1621 || XEXP (cond
, 0) == const0_rtx
)
1624 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1628 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1632 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1637 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1638 || XEXP (cond
, 1) == constm1_rtx
)
1639 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1644 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1645 || XEXP (cond
, 1) == constm1_rtx
)
1646 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1654 /* Set edge->probability for each successor edge of BB. */
1656 guess_outgoing_edge_probabilities (basic_block bb
)
1658 bb_estimate_probability_locally (bb
);
1659 combine_predictions_for_insn (BB_END (bb
), bb
);
1662 static tree
expr_expected_value (tree
, bitmap
);
1664 /* Helper function for expr_expected_value. */
1667 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
,
1668 tree op1
, bitmap visited
)
1672 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1674 if (TREE_CONSTANT (op0
))
1677 if (code
!= SSA_NAME
)
1680 def
= SSA_NAME_DEF_STMT (op0
);
1682 /* If we were already here, break the infinite cycle. */
1683 if (!bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
)))
1686 if (gimple_code (def
) == GIMPLE_PHI
)
1688 /* All the arguments of the PHI node must have the same constant
1690 int i
, n
= gimple_phi_num_args (def
);
1691 tree val
= NULL
, new_val
;
1693 for (i
= 0; i
< n
; i
++)
1695 tree arg
= PHI_ARG_DEF (def
, i
);
1697 /* If this PHI has itself as an argument, we cannot
1698 determine the string length of this argument. However,
1699 if we can find an expected constant value for the other
1700 PHI args then we can still be sure that this is
1701 likely a constant. So be optimistic and just
1702 continue with the next argument. */
1703 if (arg
== PHI_RESULT (def
))
1706 new_val
= expr_expected_value (arg
, visited
);
1711 else if (!operand_equal_p (val
, new_val
, false))
1716 if (is_gimple_assign (def
))
1718 if (gimple_assign_lhs (def
) != op0
)
1721 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1722 gimple_assign_rhs1 (def
),
1723 gimple_assign_rhs_code (def
),
1724 gimple_assign_rhs2 (def
),
1728 if (is_gimple_call (def
))
1730 tree decl
= gimple_call_fndecl (def
);
1733 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
1734 switch (DECL_FUNCTION_CODE (decl
))
1736 case BUILT_IN_EXPECT
:
1739 if (gimple_call_num_args (def
) != 2)
1741 val
= gimple_call_arg (def
, 0);
1742 if (TREE_CONSTANT (val
))
1744 return gimple_call_arg (def
, 1);
1747 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
:
1748 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1
:
1749 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2
:
1750 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4
:
1751 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8
:
1752 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16
:
1753 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE
:
1754 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
:
1755 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
:
1756 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2
:
1757 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4
:
1758 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8
:
1759 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16
:
1760 /* Assume that any given atomic operation has low contention,
1761 and thus the compare-and-swap operation succeeds. */
1762 return boolean_true_node
;
1769 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1772 op0
= expr_expected_value (op0
, visited
);
1775 op1
= expr_expected_value (op1
, visited
);
1778 res
= fold_build2 (code
, type
, op0
, op1
);
1779 if (TREE_CONSTANT (res
))
1783 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1786 op0
= expr_expected_value (op0
, visited
);
1789 res
= fold_build1 (code
, type
, op0
);
1790 if (TREE_CONSTANT (res
))
1797 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1798 The function is used by builtin_expect branch predictor so the evidence
1799 must come from this construct and additional possible constant folding.
1801 We may want to implement more involved value guess (such as value range
1802 propagation based prediction), but such tricks shall go to new
1806 expr_expected_value (tree expr
, bitmap visited
)
1808 enum tree_code code
;
1811 if (TREE_CONSTANT (expr
))
1814 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1815 return expr_expected_value_1 (TREE_TYPE (expr
),
1816 op0
, code
, op1
, visited
);
1820 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1821 we no longer need. */
1823 strip_predict_hints (void)
1831 gimple_stmt_iterator bi
;
1832 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
1834 gimple stmt
= gsi_stmt (bi
);
1836 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
1838 gsi_remove (&bi
, true);
1841 else if (gimple_code (stmt
) == GIMPLE_CALL
)
1843 tree fndecl
= gimple_call_fndecl (stmt
);
1846 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
1847 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
1848 && gimple_call_num_args (stmt
) == 2)
1850 var
= gimple_call_lhs (stmt
);
1854 = gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
1855 gsi_replace (&bi
, ass_stmt
, true);
1859 gsi_remove (&bi
, true);
1870 /* Predict using opcode of the last statement in basic block. */
1872 tree_predict_by_opcode (basic_block bb
)
1874 gimple stmt
= last_stmt (bb
);
1883 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1885 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1886 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1888 op0
= gimple_cond_lhs (stmt
);
1889 op1
= gimple_cond_rhs (stmt
);
1890 cmp
= gimple_cond_code (stmt
);
1891 type
= TREE_TYPE (op0
);
1892 visited
= BITMAP_ALLOC (NULL
);
1893 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
);
1894 BITMAP_FREE (visited
);
1897 if (integer_zerop (val
))
1898 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, NOT_TAKEN
);
1900 predict_edge_def (then_edge
, PRED_BUILTIN_EXPECT
, TAKEN
);
1903 /* Try "pointer heuristic."
1904 A comparison ptr == 0 is predicted as false.
1905 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1906 if (POINTER_TYPE_P (type
))
1909 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
1910 else if (cmp
== NE_EXPR
)
1911 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
1915 /* Try "opcode heuristic."
1916 EQ tests are usually false and NE tests are usually true. Also,
1917 most quantities are positive, so we can make the appropriate guesses
1918 about signed comparisons against zero. */
1923 /* Floating point comparisons appears to behave in a very
1924 unpredictable way because of special role of = tests in
1926 if (FLOAT_TYPE_P (type
))
1928 /* Comparisons with 0 are often used for booleans and there is
1929 nothing useful to predict about them. */
1930 else if (integer_zerop (op0
) || integer_zerop (op1
))
1933 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
1938 /* Floating point comparisons appears to behave in a very
1939 unpredictable way because of special role of = tests in
1941 if (FLOAT_TYPE_P (type
))
1943 /* Comparisons with 0 are often used for booleans and there is
1944 nothing useful to predict about them. */
1945 else if (integer_zerop (op0
)
1946 || integer_zerop (op1
))
1949 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
1953 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
1956 case UNORDERED_EXPR
:
1957 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
1962 if (integer_zerop (op1
)
1963 || integer_onep (op1
)
1964 || integer_all_onesp (op1
)
1967 || real_minus_onep (op1
))
1968 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
1973 if (integer_zerop (op1
)
1974 || integer_onep (op1
)
1975 || integer_all_onesp (op1
)
1978 || real_minus_onep (op1
))
1979 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
1987 /* Try to guess whether the value of return means error code. */
1989 static enum br_predictor
1990 return_prediction (tree val
, enum prediction
*prediction
)
1994 return PRED_NO_PREDICTION
;
1995 /* Different heuristics for pointers and scalars. */
1996 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1998 /* NULL is usually not returned. */
1999 if (integer_zerop (val
))
2001 *prediction
= NOT_TAKEN
;
2002 return PRED_NULL_RETURN
;
2005 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
2007 /* Negative return values are often used to indicate
2009 if (TREE_CODE (val
) == INTEGER_CST
2010 && tree_int_cst_sgn (val
) < 0)
2012 *prediction
= NOT_TAKEN
;
2013 return PRED_NEGATIVE_RETURN
;
2015 /* Constant return values seems to be commonly taken.
2016 Zero/one often represent booleans so exclude them from the
2018 if (TREE_CONSTANT (val
)
2019 && (!integer_zerop (val
) && !integer_onep (val
)))
2021 *prediction
= TAKEN
;
2022 return PRED_CONST_RETURN
;
2025 return PRED_NO_PREDICTION
;
2028 /* Find the basic block with return expression and look up for possible
2029 return value trying to apply RETURN_PREDICTION heuristics. */
2031 apply_return_prediction (void)
2033 gimple return_stmt
= NULL
;
2037 int phi_num_args
, i
;
2038 enum br_predictor pred
;
2039 enum prediction direction
;
2042 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2044 return_stmt
= last_stmt (e
->src
);
2046 && gimple_code (return_stmt
) == GIMPLE_RETURN
)
2051 return_val
= gimple_return_retval (return_stmt
);
2054 if (TREE_CODE (return_val
) != SSA_NAME
2055 || !SSA_NAME_DEF_STMT (return_val
)
2056 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
2058 phi
= SSA_NAME_DEF_STMT (return_val
);
2059 phi_num_args
= gimple_phi_num_args (phi
);
2060 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
2062 /* Avoid the degenerate case where all return values form the function
2063 belongs to same category (ie they are all positive constants)
2064 so we can hardly say something about them. */
2065 for (i
= 1; i
< phi_num_args
; i
++)
2066 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
2068 if (i
!= phi_num_args
)
2069 for (i
= 0; i
< phi_num_args
; i
++)
2071 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
2072 if (pred
!= PRED_NO_PREDICTION
)
2073 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi
, i
), pred
,
2078 /* Look for basic block that contains unlikely to happen events
2079 (such as noreturn calls) and mark all paths leading to execution
2080 of this basic blocks as unlikely. */
2083 tree_bb_level_predictions (void)
2086 bool has_return_edges
= false;
2090 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2091 if (!(e
->flags
& (EDGE_ABNORMAL
| EDGE_FAKE
| EDGE_EH
)))
2093 has_return_edges
= true;
2097 apply_return_prediction ();
2101 gimple_stmt_iterator gsi
;
2103 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2105 gimple stmt
= gsi_stmt (gsi
);
2108 if (is_gimple_call (stmt
))
2110 if ((gimple_call_flags (stmt
) & ECF_NORETURN
)
2111 && has_return_edges
)
2112 predict_paths_leading_to (bb
, PRED_NORETURN
,
2114 decl
= gimple_call_fndecl (stmt
);
2116 && lookup_attribute ("cold",
2117 DECL_ATTRIBUTES (decl
)))
2118 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
2121 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
2123 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
2124 gimple_predict_outcome (stmt
));
2125 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2126 hints to callers. */
2132 #ifdef ENABLE_CHECKING
2134 /* Callback for pointer_map_traverse, asserts that the pointer map is
2138 assert_is_empty (const void *key ATTRIBUTE_UNUSED
, void **value
,
2139 void *data ATTRIBUTE_UNUSED
)
2141 gcc_assert (!*value
);
2146 /* Predict branch probabilities and estimate profile for basic block BB. */
2149 tree_estimate_probability_bb (basic_block bb
)
2155 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2157 /* Predict edges to user labels with attributes. */
2158 if (e
->dest
!= EXIT_BLOCK_PTR
)
2160 gimple_stmt_iterator gi
;
2161 for (gi
= gsi_start_bb (e
->dest
); !gsi_end_p (gi
); gsi_next (&gi
))
2163 gimple stmt
= gsi_stmt (gi
);
2166 if (gimple_code (stmt
) != GIMPLE_LABEL
)
2168 decl
= gimple_label_label (stmt
);
2169 if (DECL_ARTIFICIAL (decl
))
2172 /* Finally, we have a user-defined label. */
2173 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl
)))
2174 predict_edge_def (e
, PRED_COLD_LABEL
, NOT_TAKEN
);
2175 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl
)))
2176 predict_edge_def (e
, PRED_HOT_LABEL
, TAKEN
);
2180 /* Predict early returns to be probable, as we've already taken
2181 care for error returns and other cases are often used for
2182 fast paths through function.
2184 Since we've already removed the return statements, we are
2185 looking for CFG like:
2195 if (e
->dest
!= bb
->next_bb
2196 && e
->dest
!= EXIT_BLOCK_PTR
2197 && single_succ_p (e
->dest
)
2198 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR
2199 && (last
= last_stmt (e
->dest
)) != NULL
2200 && gimple_code (last
) == GIMPLE_RETURN
)
2205 if (single_succ_p (bb
))
2207 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
2208 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
2209 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
2210 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
2211 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2214 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
2215 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
2216 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
2217 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2220 /* Look for block we are guarding (ie we dominate it,
2221 but it doesn't postdominate us). */
2222 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
2223 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
2224 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
2226 gimple_stmt_iterator bi
;
2228 /* The call heuristic claims that a guarded function call
2229 is improbable. This is because such calls are often used
2230 to signal exceptional situations such as printing error
2232 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
2235 gimple stmt
= gsi_stmt (bi
);
2236 if (is_gimple_call (stmt
)
2237 /* Constant and pure calls are hardly used to signalize
2238 something exceptional. */
2239 && gimple_has_side_effects (stmt
))
2241 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
2247 tree_predict_by_opcode (bb
);
2250 /* Predict branch probabilities and estimate profile of the tree CFG.
2251 This function can be called from the loop optimizers to recompute
2252 the profile information. */
2255 tree_estimate_probability (void)
2259 add_noreturn_fake_exit_edges ();
2260 connect_infinite_loops_to_exit ();
2261 /* We use loop_niter_by_eval, which requires that the loops have
2263 create_preheaders (CP_SIMPLE_PREHEADERS
);
2264 calculate_dominance_info (CDI_POST_DOMINATORS
);
2266 bb_predictions
= pointer_map_create ();
2267 tree_bb_level_predictions ();
2268 record_loop_exits ();
2270 if (number_of_loops () > 1)
2274 tree_estimate_probability_bb (bb
);
2277 combine_predictions_for_bb (bb
);
2279 #ifdef ENABLE_CHECKING
2280 pointer_map_traverse (bb_predictions
, assert_is_empty
, NULL
);
2282 pointer_map_destroy (bb_predictions
);
2283 bb_predictions
= NULL
;
2285 estimate_bb_frequencies ();
2286 free_dominance_info (CDI_POST_DOMINATORS
);
2287 remove_fake_exit_edges ();
2290 /* Predict branch probabilities and estimate profile of the tree CFG.
2291 This is the driver function for PASS_PROFILE. */
2294 tree_estimate_probability_driver (void)
2298 loop_optimizer_init (LOOPS_NORMAL
);
2299 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2300 flow_loops_dump (dump_file
, NULL
, 0);
2302 mark_irreducible_loops ();
2304 nb_loops
= number_of_loops ();
2308 tree_estimate_probability ();
2313 loop_optimizer_finalize ();
2314 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2315 gimple_dump_cfg (dump_file
, dump_flags
);
2316 if (profile_status
== PROFILE_ABSENT
)
2317 profile_status
= PROFILE_GUESSED
;
2321 /* Predict edges to successors of CUR whose sources are not postdominated by
2322 BB by PRED and recurse to all postdominators. */
2325 predict_paths_for_bb (basic_block cur
, basic_block bb
,
2326 enum br_predictor pred
,
2327 enum prediction taken
,
2334 /* We are looking for all edges forming edge cut induced by
2335 set of all blocks postdominated by BB. */
2336 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
2337 if (e
->src
->index
>= NUM_FIXED_BLOCKS
2338 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
2344 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2345 if (e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2347 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
2349 /* See if there is an edge from e->src that is not abnormal
2350 and does not lead to BB. */
2351 FOR_EACH_EDGE (e2
, ei2
, e
->src
->succs
)
2353 && !(e2
->flags
& (EDGE_EH
| EDGE_FAKE
))
2354 && !dominated_by_p (CDI_POST_DOMINATORS
, e2
->dest
, bb
))
2360 /* If there is non-abnormal path leaving e->src, predict edge
2361 using predictor. Otherwise we need to look for paths
2364 The second may lead to infinite loop in the case we are predicitng
2365 regions that are only reachable by abnormal edges. We simply
2366 prevent visiting given BB twice. */
2368 predict_edge_def (e
, pred
, taken
);
2369 else if (bitmap_set_bit (visited
, e
->src
->index
))
2370 predict_paths_for_bb (e
->src
, e
->src
, pred
, taken
, visited
);
2372 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
2374 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
2375 predict_paths_for_bb (son
, bb
, pred
, taken
, visited
);
2378 /* Sets branch probabilities according to PREDiction and
2382 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
2383 enum prediction taken
)
2385 bitmap visited
= BITMAP_ALLOC (NULL
);
2386 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2387 BITMAP_FREE (visited
);
2390 /* Like predict_paths_leading_to but take edge instead of basic block. */
2393 predict_paths_leading_to_edge (edge e
, enum br_predictor pred
,
2394 enum prediction taken
)
2396 bool has_nonloop_edge
= false;
2400 basic_block bb
= e
->src
;
2401 FOR_EACH_EDGE (e2
, ei
, bb
->succs
)
2402 if (e2
->dest
!= e
->src
&& e2
->dest
!= e
->dest
2403 && !(e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2404 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e2
->dest
))
2406 has_nonloop_edge
= true;
2409 if (!has_nonloop_edge
)
2411 bitmap visited
= BITMAP_ALLOC (NULL
);
2412 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2413 BITMAP_FREE (visited
);
2416 predict_edge_def (e
, pred
, taken
);
2419 /* This is used to carry information about basic blocks. It is
2420 attached to the AUX field of the standard CFG block. */
2422 typedef struct block_info_def
2424 /* Estimated frequency of execution of basic_block. */
2427 /* To keep queue of basic blocks to process. */
2430 /* Number of predecessors we need to visit first. */
2434 /* Similar information for edges. */
2435 typedef struct edge_info_def
2437 /* In case edge is a loopback edge, the probability edge will be reached
2438 in case header is. Estimated number of iterations of the loop can be
2439 then computed as 1 / (1 - back_edge_prob). */
2440 sreal back_edge_prob
;
2441 /* True if the edge is a loopback edge in the natural loop. */
2442 unsigned int back_edge
:1;
2445 #define BLOCK_INFO(B) ((block_info) (B)->aux)
2446 #define EDGE_INFO(E) ((edge_info) (E)->aux)
2448 /* Helper function for estimate_bb_frequencies.
2449 Propagate the frequencies in blocks marked in
2450 TOVISIT, starting in HEAD. */
2453 propagate_freq (basic_block head
, bitmap tovisit
)
2462 /* For each basic block we need to visit count number of his predecessors
2463 we need to visit first. */
2464 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
2469 bb
= BASIC_BLOCK (i
);
2471 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2473 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
2475 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
2477 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
2479 "Irreducible region hit, ignoring edge to %i->%i\n",
2480 e
->src
->index
, bb
->index
);
2482 BLOCK_INFO (bb
)->npredecessors
= count
;
2483 /* When function never returns, we will never process exit block. */
2484 if (!count
&& bb
== EXIT_BLOCK_PTR
)
2485 bb
->count
= bb
->frequency
= 0;
2488 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
2490 for (bb
= head
; bb
; bb
= nextbb
)
2493 sreal cyclic_probability
, frequency
;
2495 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
2496 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
2498 nextbb
= BLOCK_INFO (bb
)->next
;
2499 BLOCK_INFO (bb
)->next
= NULL
;
2501 /* Compute frequency of basic block. */
2504 #ifdef ENABLE_CHECKING
2505 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2506 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
2507 || (e
->flags
& EDGE_DFS_BACK
));
2510 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2511 if (EDGE_INFO (e
)->back_edge
)
2513 sreal_add (&cyclic_probability
, &cyclic_probability
,
2514 &EDGE_INFO (e
)->back_edge_prob
);
2516 else if (!(e
->flags
& EDGE_DFS_BACK
))
2520 /* frequency += (e->probability
2521 * BLOCK_INFO (e->src)->frequency /
2522 REG_BR_PROB_BASE); */
2524 sreal_init (&tmp
, e
->probability
, 0);
2525 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
2526 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
2527 sreal_add (&frequency
, &frequency
, &tmp
);
2530 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
2532 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
2533 sizeof (frequency
));
2537 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
2539 memcpy (&cyclic_probability
, &real_almost_one
,
2540 sizeof (real_almost_one
));
2543 /* BLOCK_INFO (bb)->frequency = frequency
2544 / (1 - cyclic_probability) */
2546 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
2547 sreal_div (&BLOCK_INFO (bb
)->frequency
,
2548 &frequency
, &cyclic_probability
);
2552 bitmap_clear_bit (tovisit
, bb
->index
);
2554 e
= find_edge (bb
, head
);
2559 /* EDGE_INFO (e)->back_edge_prob
2560 = ((e->probability * BLOCK_INFO (bb)->frequency)
2561 / REG_BR_PROB_BASE); */
2563 sreal_init (&tmp
, e
->probability
, 0);
2564 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
2565 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2566 &tmp
, &real_inv_br_prob_base
);
2569 /* Propagate to successor blocks. */
2570 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2571 if (!(e
->flags
& EDGE_DFS_BACK
)
2572 && BLOCK_INFO (e
->dest
)->npredecessors
)
2574 BLOCK_INFO (e
->dest
)->npredecessors
--;
2575 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
2580 BLOCK_INFO (last
)->next
= e
->dest
;
2588 /* Estimate probabilities of loopback edges in loops at same nest level. */
2591 estimate_loops_at_level (struct loop
*first_loop
)
2595 for (loop
= first_loop
; loop
; loop
= loop
->next
)
2600 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2602 estimate_loops_at_level (loop
->inner
);
2604 /* Find current loop back edge and mark it. */
2605 e
= loop_latch_edge (loop
);
2606 EDGE_INFO (e
)->back_edge
= 1;
2608 bbs
= get_loop_body (loop
);
2609 for (i
= 0; i
< loop
->num_nodes
; i
++)
2610 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
2612 propagate_freq (loop
->header
, tovisit
);
2613 BITMAP_FREE (tovisit
);
2617 /* Propagates frequencies through structure of loops. */
2620 estimate_loops (void)
2622 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2625 /* Start by estimating the frequencies in the loops. */
2626 if (number_of_loops () > 1)
2627 estimate_loops_at_level (current_loops
->tree_root
->inner
);
2629 /* Now propagate the frequencies through all the blocks. */
2632 bitmap_set_bit (tovisit
, bb
->index
);
2634 propagate_freq (ENTRY_BLOCK_PTR
, tovisit
);
2635 BITMAP_FREE (tovisit
);
2638 /* Convert counts measured by profile driven feedback to frequencies.
2639 Return nonzero iff there was any nonzero execution count. */
2642 counts_to_freqs (void)
2644 gcov_type count_max
, true_count_max
= 0;
2647 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2648 true_count_max
= MAX (bb
->count
, true_count_max
);
2650 count_max
= MAX (true_count_max
, 1);
2651 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2652 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
2654 return true_count_max
;
2657 /* Return true if function is likely to be expensive, so there is no point to
2658 optimize performance of prologue, epilogue or do inlining at the expense
2659 of code size growth. THRESHOLD is the limit of number of instructions
2660 function can execute at average to be still considered not expensive. */
2663 expensive_function_p (int threshold
)
2665 unsigned int sum
= 0;
2669 /* We can not compute accurately for large thresholds due to scaled
2671 gcc_assert (threshold
<= BB_FREQ_MAX
);
2673 /* Frequencies are out of range. This either means that function contains
2674 internal loop executing more than BB_FREQ_MAX times or profile feedback
2675 is available and function has not been executed at all. */
2676 if (ENTRY_BLOCK_PTR
->frequency
== 0)
2679 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2680 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
2685 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
2686 insn
= NEXT_INSN (insn
))
2687 if (active_insn_p (insn
))
2689 sum
+= bb
->frequency
;
2698 /* Estimate basic blocks frequency by given branch probabilities. */
2701 estimate_bb_frequencies (void)
2706 if (profile_status
!= PROFILE_READ
|| !counts_to_freqs ())
2708 static int real_values_initialized
= 0;
2710 if (!real_values_initialized
)
2712 real_values_initialized
= 1;
2713 sreal_init (&real_zero
, 0, 0);
2714 sreal_init (&real_one
, 1, 0);
2715 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
2716 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
2717 sreal_init (&real_one_half
, 1, -1);
2718 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
2719 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
2722 mark_dfs_back_edges ();
2724 single_succ_edge (ENTRY_BLOCK_PTR
)->probability
= REG_BR_PROB_BASE
;
2726 /* Set up block info for each basic block. */
2727 alloc_aux_for_blocks (sizeof (struct block_info_def
));
2728 alloc_aux_for_edges (sizeof (struct edge_info_def
));
2729 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2734 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2736 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
2737 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
2738 &EDGE_INFO (e
)->back_edge_prob
,
2739 &real_inv_br_prob_base
);
2743 /* First compute probabilities locally for each loop from innermost
2744 to outermost to examine probabilities for back edges. */
2747 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
2749 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
2750 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
2752 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
2753 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
2757 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
2758 sreal_add (&tmp
, &tmp
, &real_one_half
);
2759 bb
->frequency
= sreal_to_int (&tmp
);
2762 free_aux_for_blocks ();
2763 free_aux_for_edges ();
2765 compute_function_frequency ();
2768 /* Decide whether function is hot, cold or unlikely executed. */
2770 compute_function_frequency (void)
2773 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2774 if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2775 || MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2776 node
->only_called_at_startup
= true;
2777 if (DECL_STATIC_DESTRUCTOR (current_function_decl
))
2778 node
->only_called_at_exit
= true;
2780 if (!profile_info
|| !flag_branch_probabilities
)
2782 int flags
= flags_from_decl_or_type (current_function_decl
);
2783 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
2785 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2786 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
2788 node
->frequency
= NODE_FREQUENCY_HOT
;
2789 else if (flags
& ECF_NORETURN
)
2790 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2791 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
2792 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2793 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
2794 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
2795 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
2798 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
2801 if (maybe_hot_bb_p (cfun
, bb
))
2803 node
->frequency
= NODE_FREQUENCY_HOT
;
2806 if (!probably_never_executed_bb_p (cfun
, bb
))
2807 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2812 gate_estimate_probability (void)
2814 return flag_guess_branch_prob
;
2817 /* Build PREDICT_EXPR. */
2819 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
2821 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
2822 build_int_cst (integer_type_node
, predictor
));
2823 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
2828 predictor_name (enum br_predictor predictor
)
2830 return predictor_info
[predictor
].name
;
2833 struct gimple_opt_pass pass_profile
=
2837 "profile_estimate", /* name */
2838 OPTGROUP_NONE
, /* optinfo_flags */
2839 gate_estimate_probability
, /* gate */
2840 tree_estimate_probability_driver
, /* execute */
2843 0, /* static_pass_number */
2844 TV_BRANCH_PROB
, /* tv_id */
2845 PROP_cfg
, /* properties_required */
2846 0, /* properties_provided */
2847 0, /* properties_destroyed */
2848 0, /* todo_flags_start */
2849 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2853 struct gimple_opt_pass pass_strip_predict_hints
=
2857 "*strip_predict_hints", /* name */
2858 OPTGROUP_NONE
, /* optinfo_flags */
2860 strip_predict_hints
, /* execute */
2863 0, /* static_pass_number */
2864 TV_BRANCH_PROB
, /* tv_id */
2865 PROP_cfg
, /* properties_required */
2866 0, /* properties_provided */
2867 0, /* properties_destroyed */
2868 0, /* todo_flags_start */
2869 TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */
2873 /* Rebuild function frequencies. Passes are in general expected to
2874 maintain profile by hand, however in some cases this is not possible:
2875 for example when inlining several functions with loops freuqencies might run
2876 out of scale and thus needs to be recomputed. */
2879 rebuild_frequencies (void)
2881 timevar_push (TV_REBUILD_FREQUENCIES
);
2882 if (profile_status
== PROFILE_GUESSED
)
2884 loop_optimizer_init (0);
2885 add_noreturn_fake_exit_edges ();
2886 mark_irreducible_loops ();
2887 connect_infinite_loops_to_exit ();
2888 estimate_bb_frequencies ();
2889 remove_fake_exit_edges ();
2890 loop_optimizer_finalize ();
2892 else if (profile_status
== PROFILE_READ
)
2896 timevar_pop (TV_REBUILD_FREQUENCIES
);