1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
32 #include "coretypes.h"
38 #include "tree-pass.h"
43 #include "diagnostic-core.h"
44 #include "gimple-predict.h"
45 #include "fold-const.h"
52 #include "gimple-iterator.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-scalar-evolution.h"
58 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
59 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
60 static sreal real_almost_one
, real_br_prob_base
,
61 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
63 static void combine_predictions_for_insn (rtx_insn
*, basic_block
);
64 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
65 static void predict_paths_leading_to (basic_block
, enum br_predictor
, enum prediction
);
66 static void predict_paths_leading_to_edge (edge
, enum br_predictor
, enum prediction
);
67 static bool can_predict_insn_p (const rtx_insn
*);
69 /* Information we hold about each branch predictor.
70 Filled using information from predict.def. */
74 const char *const name
; /* Name used in the debugging dumps. */
75 const int hitrate
; /* Expected hitrate used by
76 predict_insn_def call. */
80 /* Use given predictor without Dempster-Shaffer theory if it matches
81 using first_match heuristics. */
82 #define PRED_FLAG_FIRST_MATCH 1
84 /* Recompute hitrate in percent to our representation. */
86 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
88 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
89 static const struct predictor_info predictor_info
[]= {
90 #include "predict.def"
92 /* Upper bound on predictors. */
97 /* Return TRUE if frequency FREQ is considered to be hot. */
100 maybe_hot_frequency_p (struct function
*fun
, int freq
)
102 struct cgraph_node
*node
= cgraph_node::get (fun
->decl
);
104 || !opt_for_fn (fun
->decl
, flag_branch_probabilities
))
106 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
108 if (node
->frequency
== NODE_FREQUENCY_HOT
)
111 if (profile_status_for_fn (fun
) == PROFILE_ABSENT
)
113 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
114 && freq
< (ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
* 2 / 3))
116 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
) == 0)
118 if (freq
* PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
)
119 < ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
)
124 static gcov_type min_count
= -1;
126 /* Determine the threshold for hot BB counts. */
129 get_hot_bb_threshold ()
131 gcov_working_set_t
*ws
;
134 ws
= find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
));
136 min_count
= ws
->min_counter
;
141 /* Set the threshold for hot BB counts. */
144 set_hot_bb_threshold (gcov_type min
)
149 /* Return TRUE if frequency FREQ is considered to be hot. */
152 maybe_hot_count_p (struct function
*fun
, gcov_type count
)
154 if (fun
&& profile_status_for_fn (fun
) != PROFILE_READ
)
156 /* Code executed at most once is not hot. */
157 if (profile_info
->runs
>= count
)
159 return (count
>= get_hot_bb_threshold ());
162 /* Return true in case BB can be CPU intensive and should be optimized
163 for maximal performance. */
166 maybe_hot_bb_p (struct function
*fun
, const_basic_block bb
)
168 gcc_checking_assert (fun
);
169 if (profile_status_for_fn (fun
) == PROFILE_READ
)
170 return maybe_hot_count_p (fun
, bb
->count
);
171 return maybe_hot_frequency_p (fun
, bb
->frequency
);
174 /* Return true in case BB can be CPU intensive and should be optimized
175 for maximal performance. */
178 maybe_hot_edge_p (edge e
)
180 if (profile_status_for_fn (cfun
) == PROFILE_READ
)
181 return maybe_hot_count_p (cfun
, e
->count
);
182 return maybe_hot_frequency_p (cfun
, EDGE_FREQUENCY (e
));
185 /* Return true if profile COUNT and FREQUENCY, or function FUN static
186 node frequency reflects never being executed. */
189 probably_never_executed (struct function
*fun
,
190 gcov_type count
, int frequency
)
192 gcc_checking_assert (fun
);
193 if (profile_status_for_fn (fun
) == PROFILE_READ
)
195 int unlikely_count_fraction
= PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION
);
196 if (count
* unlikely_count_fraction
>= profile_info
->runs
)
200 if (!ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
)
202 if (ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
)
204 gcov_type computed_count
;
205 /* Check for possibility of overflow, in which case entry bb count
206 is large enough to do the division first without losing much
208 if (ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
< REG_BR_PROB_BASE
*
211 gcov_type scaled_count
212 = frequency
* ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
*
213 unlikely_count_fraction
;
214 computed_count
= RDIV (scaled_count
,
215 ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
);
219 computed_count
= RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
,
220 ENTRY_BLOCK_PTR_FOR_FN (fun
)->frequency
);
221 computed_count
*= frequency
* unlikely_count_fraction
;
223 if (computed_count
>= profile_info
->runs
)
228 if ((!profile_info
|| !(opt_for_fn (fun
->decl
, flag_branch_probabilities
)))
229 && (cgraph_node::get (fun
->decl
)->frequency
230 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
236 /* Return true in case BB is probably never executed. */
239 probably_never_executed_bb_p (struct function
*fun
, const_basic_block bb
)
241 return probably_never_executed (fun
, bb
->count
, bb
->frequency
);
245 /* Return true in case edge E is probably never executed. */
248 probably_never_executed_edge_p (struct function
*fun
, edge e
)
250 return probably_never_executed (fun
, e
->count
, EDGE_FREQUENCY (e
));
253 /* Return true when current function should always be optimized for size. */
256 optimize_function_for_size_p (struct function
*fun
)
258 if (!fun
|| !fun
->decl
)
259 return optimize_size
;
260 cgraph_node
*n
= cgraph_node::get (fun
->decl
);
261 return n
&& n
->optimize_for_size_p ();
264 /* Return true when current function should always be optimized for speed. */
267 optimize_function_for_speed_p (struct function
*fun
)
269 return !optimize_function_for_size_p (fun
);
272 /* Return the optimization type that should be used for the function FUN. */
275 function_optimization_type (struct function
*fun
)
277 return (optimize_function_for_speed_p (fun
)
279 : OPTIMIZE_FOR_SIZE
);
282 /* Return TRUE when BB should be optimized for size. */
285 optimize_bb_for_size_p (const_basic_block bb
)
287 return (optimize_function_for_size_p (cfun
)
288 || (bb
&& !maybe_hot_bb_p (cfun
, bb
)));
291 /* Return TRUE when BB should be optimized for speed. */
294 optimize_bb_for_speed_p (const_basic_block bb
)
296 return !optimize_bb_for_size_p (bb
);
299 /* Return the optimization type that should be used for block BB. */
302 bb_optimization_type (const_basic_block bb
)
304 return (optimize_bb_for_speed_p (bb
)
306 : OPTIMIZE_FOR_SIZE
);
309 /* Return TRUE when BB should be optimized for size. */
312 optimize_edge_for_size_p (edge e
)
314 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
317 /* Return TRUE when BB should be optimized for speed. */
320 optimize_edge_for_speed_p (edge e
)
322 return !optimize_edge_for_size_p (e
);
325 /* Return TRUE when BB should be optimized for size. */
328 optimize_insn_for_size_p (void)
330 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
333 /* Return TRUE when BB should be optimized for speed. */
336 optimize_insn_for_speed_p (void)
338 return !optimize_insn_for_size_p ();
341 /* Return TRUE when LOOP should be optimized for size. */
344 optimize_loop_for_size_p (struct loop
*loop
)
346 return optimize_bb_for_size_p (loop
->header
);
349 /* Return TRUE when LOOP should be optimized for speed. */
352 optimize_loop_for_speed_p (struct loop
*loop
)
354 return optimize_bb_for_speed_p (loop
->header
);
357 /* Return TRUE when LOOP nest should be optimized for speed. */
360 optimize_loop_nest_for_speed_p (struct loop
*loop
)
362 struct loop
*l
= loop
;
363 if (optimize_loop_for_speed_p (loop
))
366 while (l
&& l
!= loop
)
368 if (optimize_loop_for_speed_p (l
))
376 while (l
!= loop
&& !l
->next
)
385 /* Return TRUE when LOOP nest should be optimized for size. */
388 optimize_loop_nest_for_size_p (struct loop
*loop
)
390 return !optimize_loop_nest_for_speed_p (loop
);
393 /* Return true when edge E is likely to be well predictable by branch
397 predictable_edge_p (edge e
)
399 if (profile_status_for_fn (cfun
) == PROFILE_ABSENT
)
402 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
403 || (REG_BR_PROB_BASE
- e
->probability
404 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
410 /* Set RTL expansion for BB profile. */
413 rtl_profile_for_bb (basic_block bb
)
415 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (cfun
, bb
);
418 /* Set RTL expansion for edge profile. */
421 rtl_profile_for_edge (edge e
)
423 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
426 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
428 default_rtl_profile (void)
430 crtl
->maybe_hot_insn_p
= true;
433 /* Return true if the one of outgoing edges is already predicted by
437 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
440 if (!INSN_P (BB_END (bb
)))
442 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
443 if (REG_NOTE_KIND (note
) == REG_BR_PRED
444 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
449 /* Structure representing predictions in tree level. */
451 struct edge_prediction
{
452 struct edge_prediction
*ep_next
;
454 enum br_predictor ep_predictor
;
458 /* This map contains for a basic block the list of predictions for the
461 static hash_map
<const_basic_block
, edge_prediction
*> *bb_predictions
;
463 /* Return true if the one of outgoing edges is already predicted by
467 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
469 struct edge_prediction
*i
;
470 edge_prediction
**preds
= bb_predictions
->get (bb
);
475 for (i
= *preds
; i
; i
= i
->ep_next
)
476 if (i
->ep_predictor
== predictor
)
481 /* Return true if the one of outgoing edges is already predicted by
482 PREDICTOR for edge E predicted as TAKEN. */
485 edge_predicted_by_p (edge e
, enum br_predictor predictor
, bool taken
)
487 struct edge_prediction
*i
;
488 basic_block bb
= e
->src
;
489 edge_prediction
**preds
= bb_predictions
->get (bb
);
493 int probability
= predictor_info
[(int) predictor
].hitrate
;
496 probability
= REG_BR_PROB_BASE
- probability
;
498 for (i
= *preds
; i
; i
= i
->ep_next
)
499 if (i
->ep_predictor
== predictor
501 && i
->ep_probability
== probability
)
506 /* Return true when the probability of edge is reliable.
508 The profile guessing code is good at predicting branch outcome (ie.
509 taken/not taken), that is predicted right slightly over 75% of time.
510 It is however notoriously poor on predicting the probability itself.
511 In general the profile appear a lot flatter (with probabilities closer
512 to 50%) than the reality so it is bad idea to use it to drive optimization
513 such as those disabling dynamic branch prediction for well predictable
516 There are two exceptions - edges leading to noreturn edges and edges
517 predicted by number of iterations heuristics are predicted well. This macro
518 should be able to distinguish those, but at the moment it simply check for
519 noreturn heuristic that is only one giving probability over 99% or bellow
520 1%. In future we might want to propagate reliability information across the
521 CFG if we find this information useful on multiple places. */
523 probability_reliable_p (int prob
)
525 return (profile_status_for_fn (cfun
) == PROFILE_READ
526 || (profile_status_for_fn (cfun
) == PROFILE_GUESSED
527 && (prob
<= HITRATE (1) || prob
>= HITRATE (99))));
530 /* Same predicate as above, working on edges. */
532 edge_probability_reliable_p (const_edge e
)
534 return probability_reliable_p (e
->probability
);
537 /* Same predicate as edge_probability_reliable_p, working on notes. */
539 br_prob_note_reliable_p (const_rtx note
)
541 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
542 return probability_reliable_p (XINT (note
, 0));
546 predict_insn (rtx_insn
*insn
, enum br_predictor predictor
, int probability
)
548 gcc_assert (any_condjump_p (insn
));
549 if (!flag_guess_branch_prob
)
552 add_reg_note (insn
, REG_BR_PRED
,
553 gen_rtx_CONCAT (VOIDmode
,
554 GEN_INT ((int) predictor
),
555 GEN_INT ((int) probability
)));
558 /* Predict insn by given predictor. */
561 predict_insn_def (rtx_insn
*insn
, enum br_predictor predictor
,
562 enum prediction taken
)
564 int probability
= predictor_info
[(int) predictor
].hitrate
;
567 probability
= REG_BR_PROB_BASE
- probability
;
569 predict_insn (insn
, predictor
, probability
);
572 /* Predict edge E with given probability if possible. */
575 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
578 last_insn
= BB_END (e
->src
);
580 /* We can store the branch prediction information only about
581 conditional jumps. */
582 if (!any_condjump_p (last_insn
))
585 /* We always store probability of branching. */
586 if (e
->flags
& EDGE_FALLTHRU
)
587 probability
= REG_BR_PROB_BASE
- probability
;
589 predict_insn (last_insn
, predictor
, probability
);
592 /* Predict edge E with the given PROBABILITY. */
594 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
596 if (e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
597 && EDGE_COUNT (e
->src
->succs
) > 1
598 && flag_guess_branch_prob
601 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
602 edge_prediction
*&preds
= bb_predictions
->get_or_insert (e
->src
);
606 i
->ep_probability
= probability
;
607 i
->ep_predictor
= predictor
;
612 /* Remove all predictions on given basic block that are attached
615 remove_predictions_associated_with_edge (edge e
)
620 edge_prediction
**preds
= bb_predictions
->get (e
->src
);
624 struct edge_prediction
**prediction
= preds
;
625 struct edge_prediction
*next
;
629 if ((*prediction
)->ep_edge
== e
)
631 next
= (*prediction
)->ep_next
;
636 prediction
= &((*prediction
)->ep_next
);
641 /* Clears the list of predictions stored for BB. */
644 clear_bb_predictions (basic_block bb
)
646 edge_prediction
**preds
= bb_predictions
->get (bb
);
647 struct edge_prediction
*pred
, *next
;
652 for (pred
= *preds
; pred
; pred
= next
)
654 next
= pred
->ep_next
;
660 /* Return true when we can store prediction on insn INSN.
661 At the moment we represent predictions only on conditional
662 jumps, not at computed jump or other complicated cases. */
664 can_predict_insn_p (const rtx_insn
*insn
)
666 return (JUMP_P (insn
)
667 && any_condjump_p (insn
)
668 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
671 /* Predict edge E by given predictor if possible. */
674 predict_edge_def (edge e
, enum br_predictor predictor
,
675 enum prediction taken
)
677 int probability
= predictor_info
[(int) predictor
].hitrate
;
680 probability
= REG_BR_PROB_BASE
- probability
;
682 predict_edge (e
, predictor
, probability
);
685 /* Invert all branch predictions or probability notes in the INSN. This needs
686 to be done each time we invert the condition used by the jump. */
689 invert_br_probabilities (rtx insn
)
693 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
694 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
695 XINT (note
, 0) = REG_BR_PROB_BASE
- XINT (note
, 0);
696 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
697 XEXP (XEXP (note
, 0), 1)
698 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
701 /* Dump information about the branch prediction to the output file. */
704 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
705 basic_block bb
, int used
)
713 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
714 if (! (e
->flags
& EDGE_FALLTHRU
))
717 fprintf (file
, " %s heuristics%s: %.1f%%",
718 predictor_info
[predictor
].name
,
719 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
723 fprintf (file
, " exec %" PRId64
, bb
->count
);
726 fprintf (file
, " hit %" PRId64
, e
->count
);
727 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
731 fprintf (file
, "\n");
734 /* We can not predict the probabilities of outgoing edges of bb. Set them
735 evenly and hope for the best. */
737 set_even_probabilities (basic_block bb
)
743 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
744 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
746 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
747 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
748 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
753 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
754 note if not already present. Remove now useless REG_BR_PRED notes. */
757 combine_predictions_for_insn (rtx_insn
*insn
, basic_block bb
)
762 int best_probability
= PROB_EVEN
;
763 enum br_predictor best_predictor
= END_PREDICTORS
;
764 int combined_probability
= REG_BR_PROB_BASE
/ 2;
766 bool first_match
= false;
769 if (!can_predict_insn_p (insn
))
771 set_even_probabilities (bb
);
775 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
776 pnote
= ®_NOTES (insn
);
778 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
781 /* We implement "first match" heuristics and use probability guessed
782 by predictor with smallest index. */
783 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
784 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
786 enum br_predictor predictor
= ((enum br_predictor
)
787 INTVAL (XEXP (XEXP (note
, 0), 0)));
788 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
791 if (best_predictor
> predictor
)
792 best_probability
= probability
, best_predictor
= predictor
;
794 d
= (combined_probability
* probability
795 + (REG_BR_PROB_BASE
- combined_probability
)
796 * (REG_BR_PROB_BASE
- probability
));
798 /* Use FP math to avoid overflows of 32bit integers. */
800 /* If one probability is 0% and one 100%, avoid division by zero. */
801 combined_probability
= REG_BR_PROB_BASE
/ 2;
803 combined_probability
= (((double) combined_probability
) * probability
804 * REG_BR_PROB_BASE
/ d
+ 0.5);
807 /* Decide which heuristic to use. In case we didn't match anything,
808 use no_prediction heuristic, in case we did match, use either
809 first match or Dempster-Shaffer theory depending on the flags. */
811 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
815 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
816 combined_probability
, bb
, true);
819 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
821 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
826 combined_probability
= best_probability
;
827 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
831 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
833 enum br_predictor predictor
= ((enum br_predictor
)
834 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
835 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
837 dump_prediction (dump_file
, predictor
, probability
, bb
,
838 !first_match
|| best_predictor
== predictor
);
839 *pnote
= XEXP (*pnote
, 1);
842 pnote
= &XEXP (*pnote
, 1);
847 add_int_reg_note (insn
, REG_BR_PROB
, combined_probability
);
849 /* Save the prediction into CFG in case we are seeing non-degenerated
851 if (!single_succ_p (bb
))
853 BRANCH_EDGE (bb
)->probability
= combined_probability
;
854 FALLTHRU_EDGE (bb
)->probability
855 = REG_BR_PROB_BASE
- combined_probability
;
858 else if (!single_succ_p (bb
))
860 int prob
= XINT (prob_note
, 0);
862 BRANCH_EDGE (bb
)->probability
= prob
;
863 FALLTHRU_EDGE (bb
)->probability
= REG_BR_PROB_BASE
- prob
;
866 single_succ_edge (bb
)->probability
= REG_BR_PROB_BASE
;
869 /* Combine predictions into single probability and store them into CFG.
870 Remove now useless prediction entries.
871 If DRY_RUN is set, only produce dumps and do not modify profile. */
874 combine_predictions_for_bb (basic_block bb
, bool dry_run
)
876 int best_probability
= PROB_EVEN
;
877 enum br_predictor best_predictor
= END_PREDICTORS
;
878 int combined_probability
= REG_BR_PROB_BASE
/ 2;
880 bool first_match
= false;
882 struct edge_prediction
*pred
;
884 edge e
, first
= NULL
, second
= NULL
;
887 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
888 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
891 if (first
&& !second
)
897 /* When there is no successor or only one choice, prediction is easy.
899 We are lazy for now and predict only basic blocks with two outgoing
900 edges. It is possible to predict generic case too, but we have to
901 ignore first match heuristics and do more involved combining. Implement
905 if (!bb
->count
&& !dry_run
)
906 set_even_probabilities (bb
);
907 clear_bb_predictions (bb
);
909 fprintf (dump_file
, "%i edges in bb %i predicted to even probabilities\n",
915 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
917 edge_prediction
**preds
= bb_predictions
->get (bb
);
920 /* We implement "first match" heuristics and use probability guessed
921 by predictor with smallest index. */
922 for (pred
= *preds
; pred
; pred
= pred
->ep_next
)
924 enum br_predictor predictor
= pred
->ep_predictor
;
925 int probability
= pred
->ep_probability
;
927 if (pred
->ep_edge
!= first
)
928 probability
= REG_BR_PROB_BASE
- probability
;
931 /* First match heuristics would be widly confused if we predicted
933 if (best_predictor
> predictor
)
935 struct edge_prediction
*pred2
;
936 int prob
= probability
;
938 for (pred2
= (struct edge_prediction
*) *preds
;
939 pred2
; pred2
= pred2
->ep_next
)
940 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
942 int probability2
= pred2
->ep_probability
;
944 if (pred2
->ep_edge
!= first
)
945 probability2
= REG_BR_PROB_BASE
- probability2
;
947 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
948 (probability2
< REG_BR_PROB_BASE
/ 2))
951 /* If the same predictor later gave better result, go for it! */
952 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
953 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
957 best_probability
= prob
, best_predictor
= predictor
;
960 d
= (combined_probability
* probability
961 + (REG_BR_PROB_BASE
- combined_probability
)
962 * (REG_BR_PROB_BASE
- probability
));
964 /* Use FP math to avoid overflows of 32bit integers. */
966 /* If one probability is 0% and one 100%, avoid division by zero. */
967 combined_probability
= REG_BR_PROB_BASE
/ 2;
969 combined_probability
= (((double) combined_probability
)
971 * REG_BR_PROB_BASE
/ d
+ 0.5);
975 /* Decide which heuristic to use. In case we didn't match anything,
976 use no_prediction heuristic, in case we did match, use either
977 first match or Dempster-Shaffer theory depending on the flags. */
979 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
983 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
986 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
988 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
993 combined_probability
= best_probability
;
994 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
998 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
1000 enum br_predictor predictor
= pred
->ep_predictor
;
1001 int probability
= pred
->ep_probability
;
1003 if (pred
->ep_edge
!= EDGE_SUCC (bb
, 0))
1004 probability
= REG_BR_PROB_BASE
- probability
;
1005 dump_prediction (dump_file
, predictor
, probability
, bb
,
1006 !first_match
|| best_predictor
== predictor
);
1009 clear_bb_predictions (bb
);
1011 if (!bb
->count
&& !dry_run
)
1013 first
->probability
= combined_probability
;
1014 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
1018 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1019 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1021 T1 and T2 should be one of the following cases:
1022 1. T1 is SSA_NAME, T2 is NULL
1023 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1024 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1027 strips_small_constant (tree t1
, tree t2
)
1034 else if (TREE_CODE (t1
) == SSA_NAME
)
1036 else if (tree_fits_shwi_p (t1
))
1037 value
= tree_to_shwi (t1
);
1043 else if (tree_fits_shwi_p (t2
))
1044 value
= tree_to_shwi (t2
);
1045 else if (TREE_CODE (t2
) == SSA_NAME
)
1053 if (value
<= 4 && value
>= -4)
1059 /* Return the SSA_NAME in T or T's operands.
1060 Return NULL if SSA_NAME cannot be found. */
1063 get_base_value (tree t
)
1065 if (TREE_CODE (t
) == SSA_NAME
)
1068 if (!BINARY_CLASS_P (t
))
1071 switch (TREE_OPERAND_LENGTH (t
))
1074 return strips_small_constant (TREE_OPERAND (t
, 0), NULL
);
1076 return strips_small_constant (TREE_OPERAND (t
, 0),
1077 TREE_OPERAND (t
, 1));
1083 /* Check the compare STMT in LOOP. If it compares an induction
1084 variable to a loop invariant, return true, and save
1085 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1086 Otherwise return false and set LOOP_INVAIANT to NULL. */
1089 is_comparison_with_loop_invariant_p (gcond
*stmt
, struct loop
*loop
,
1090 tree
*loop_invariant
,
1091 enum tree_code
*compare_code
,
1095 tree op0
, op1
, bound
, base
;
1097 enum tree_code code
;
1100 code
= gimple_cond_code (stmt
);
1101 *loop_invariant
= NULL
;
1117 op0
= gimple_cond_lhs (stmt
);
1118 op1
= gimple_cond_rhs (stmt
);
1120 if ((TREE_CODE (op0
) != SSA_NAME
&& TREE_CODE (op0
) != INTEGER_CST
)
1121 || (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op1
) != INTEGER_CST
))
1123 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op0
, &iv0
, true))
1125 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op1
, &iv1
, true))
1127 if (TREE_CODE (iv0
.step
) != INTEGER_CST
1128 || TREE_CODE (iv1
.step
) != INTEGER_CST
)
1130 if ((integer_zerop (iv0
.step
) && integer_zerop (iv1
.step
))
1131 || (!integer_zerop (iv0
.step
) && !integer_zerop (iv1
.step
)))
1134 if (integer_zerop (iv0
.step
))
1136 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
1137 code
= invert_tree_comparison (code
, false);
1140 if (tree_fits_shwi_p (iv1
.step
))
1149 if (tree_fits_shwi_p (iv0
.step
))
1155 if (TREE_CODE (bound
) != INTEGER_CST
)
1156 bound
= get_base_value (bound
);
1159 if (TREE_CODE (base
) != INTEGER_CST
)
1160 base
= get_base_value (base
);
1164 *loop_invariant
= bound
;
1165 *compare_code
= code
;
1167 *loop_iv_base
= base
;
1171 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1174 expr_coherent_p (tree t1
, tree t2
)
1177 tree ssa_name_1
= NULL
;
1178 tree ssa_name_2
= NULL
;
1180 gcc_assert (TREE_CODE (t1
) == SSA_NAME
|| TREE_CODE (t1
) == INTEGER_CST
);
1181 gcc_assert (TREE_CODE (t2
) == SSA_NAME
|| TREE_CODE (t2
) == INTEGER_CST
);
1186 if (TREE_CODE (t1
) == INTEGER_CST
&& TREE_CODE (t2
) == INTEGER_CST
)
1188 if (TREE_CODE (t1
) == INTEGER_CST
|| TREE_CODE (t2
) == INTEGER_CST
)
1191 /* Check to see if t1 is expressed/defined with t2. */
1192 stmt
= SSA_NAME_DEF_STMT (t1
);
1193 gcc_assert (stmt
!= NULL
);
1194 if (is_gimple_assign (stmt
))
1196 ssa_name_1
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1197 if (ssa_name_1
&& ssa_name_1
== t2
)
1201 /* Check to see if t2 is expressed/defined with t1. */
1202 stmt
= SSA_NAME_DEF_STMT (t2
);
1203 gcc_assert (stmt
!= NULL
);
1204 if (is_gimple_assign (stmt
))
1206 ssa_name_2
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1207 if (ssa_name_2
&& ssa_name_2
== t1
)
1211 /* Compare if t1 and t2's def_stmts are identical. */
1212 if (ssa_name_2
!= NULL
&& ssa_name_1
== ssa_name_2
)
1218 /* Return true if E is predicted by one of loop heuristics. */
1221 predicted_by_loop_heuristics_p (basic_block bb
)
1223 struct edge_prediction
*i
;
1224 edge_prediction
**preds
= bb_predictions
->get (bb
);
1229 for (i
= *preds
; i
; i
= i
->ep_next
)
1230 if (i
->ep_predictor
== PRED_LOOP_ITERATIONS_GUESSED
1231 || i
->ep_predictor
== PRED_LOOP_ITERATIONS_MAX
1232 || i
->ep_predictor
== PRED_LOOP_ITERATIONS
1233 || i
->ep_predictor
== PRED_LOOP_EXIT
1234 || i
->ep_predictor
== PRED_LOOP_EXTRA_EXIT
)
1239 /* Predict branch probability of BB when BB contains a branch that compares
1240 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1241 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1244 for (int i = 0; i < bound; i++) {
1251 In this loop, we will predict the branch inside the loop to be taken. */
1254 predict_iv_comparison (struct loop
*loop
, basic_block bb
,
1255 tree loop_bound_var
,
1256 tree loop_iv_base_var
,
1257 enum tree_code loop_bound_code
,
1258 int loop_bound_step
)
1261 tree compare_var
, compare_base
;
1262 enum tree_code compare_code
;
1263 tree compare_step_var
;
1267 if (predicted_by_loop_heuristics_p (bb
))
1270 stmt
= last_stmt (bb
);
1271 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1273 if (!is_comparison_with_loop_invariant_p (as_a
<gcond
*> (stmt
),
1280 /* Find the taken edge. */
1281 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1282 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1285 /* When comparing an IV to a loop invariant, NE is more likely to be
1286 taken while EQ is more likely to be not-taken. */
1287 if (compare_code
== NE_EXPR
)
1289 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1292 else if (compare_code
== EQ_EXPR
)
1294 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1298 if (!expr_coherent_p (loop_iv_base_var
, compare_base
))
1301 /* If loop bound, base and compare bound are all constants, we can
1302 calculate the probability directly. */
1303 if (tree_fits_shwi_p (loop_bound_var
)
1304 && tree_fits_shwi_p (compare_var
)
1305 && tree_fits_shwi_p (compare_base
))
1308 bool overflow
, overall_overflow
= false;
1309 widest_int compare_count
, tem
;
1311 /* (loop_bound - base) / compare_step */
1312 tem
= wi::sub (wi::to_widest (loop_bound_var
),
1313 wi::to_widest (compare_base
), SIGNED
, &overflow
);
1314 overall_overflow
|= overflow
;
1315 widest_int loop_count
= wi::div_trunc (tem
,
1316 wi::to_widest (compare_step_var
),
1318 overall_overflow
|= overflow
;
1320 if (!wi::neg_p (wi::to_widest (compare_step_var
))
1321 ^ (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1323 /* (loop_bound - compare_bound) / compare_step */
1324 tem
= wi::sub (wi::to_widest (loop_bound_var
),
1325 wi::to_widest (compare_var
), SIGNED
, &overflow
);
1326 overall_overflow
|= overflow
;
1327 compare_count
= wi::div_trunc (tem
, wi::to_widest (compare_step_var
),
1329 overall_overflow
|= overflow
;
1333 /* (compare_bound - base) / compare_step */
1334 tem
= wi::sub (wi::to_widest (compare_var
),
1335 wi::to_widest (compare_base
), SIGNED
, &overflow
);
1336 overall_overflow
|= overflow
;
1337 compare_count
= wi::div_trunc (tem
, wi::to_widest (compare_step_var
),
1339 overall_overflow
|= overflow
;
1341 if (compare_code
== LE_EXPR
|| compare_code
== GE_EXPR
)
1343 if (loop_bound_code
== LE_EXPR
|| loop_bound_code
== GE_EXPR
)
1345 if (wi::neg_p (compare_count
))
1347 if (wi::neg_p (loop_count
))
1349 if (loop_count
== 0)
1351 else if (wi::cmps (compare_count
, loop_count
) == 1)
1352 probability
= REG_BR_PROB_BASE
;
1355 tem
= compare_count
* REG_BR_PROB_BASE
;
1356 tem
= wi::udiv_trunc (tem
, loop_count
);
1357 probability
= tem
.to_uhwi ();
1360 if (!overall_overflow
)
1361 predict_edge (then_edge
, PRED_LOOP_IV_COMPARE
, probability
);
1366 if (expr_coherent_p (loop_bound_var
, compare_var
))
1368 if ((loop_bound_code
== LT_EXPR
|| loop_bound_code
== LE_EXPR
)
1369 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1370 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1371 else if ((loop_bound_code
== GT_EXPR
|| loop_bound_code
== GE_EXPR
)
1372 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1373 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1374 else if (loop_bound_code
== NE_EXPR
)
1376 /* If the loop backedge condition is "(i != bound)", we do
1377 the comparison based on the step of IV:
1378 * step < 0 : backedge condition is like (i > bound)
1379 * step > 0 : backedge condition is like (i < bound) */
1380 gcc_assert (loop_bound_step
!= 0);
1381 if (loop_bound_step
> 0
1382 && (compare_code
== LT_EXPR
1383 || compare_code
== LE_EXPR
))
1384 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1385 else if (loop_bound_step
< 0
1386 && (compare_code
== GT_EXPR
1387 || compare_code
== GE_EXPR
))
1388 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1390 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1393 /* The branch is predicted not-taken if loop_bound_code is
1394 opposite with compare_code. */
1395 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1397 else if (expr_coherent_p (loop_iv_base_var
, compare_var
))
1400 for (i = s; i < h; i++)
1402 The branch should be predicted taken. */
1403 if (loop_bound_step
> 0
1404 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1405 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1406 else if (loop_bound_step
< 0
1407 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1408 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1410 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1414 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1415 exits are resulted from short-circuit conditions that will generate an
1418 if (foo() || global > 10)
1421 This will be translated into:
1426 if foo() goto BB6 else goto BB5
1428 if global > 10 goto BB6 else goto BB7
1432 iftmp = (PHI 0(BB5), 1(BB6))
1433 if iftmp == 1 goto BB8 else goto BB3
1435 outside of the loop...
1437 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1438 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1439 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1440 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1443 predict_extra_loop_exits (edge exit_edge
)
1446 bool check_value_one
;
1447 gimple
*lhs_def_stmt
;
1449 tree cmp_rhs
, cmp_lhs
;
1453 last
= last_stmt (exit_edge
->src
);
1456 cmp_stmt
= dyn_cast
<gcond
*> (last
);
1460 cmp_rhs
= gimple_cond_rhs (cmp_stmt
);
1461 cmp_lhs
= gimple_cond_lhs (cmp_stmt
);
1462 if (!TREE_CONSTANT (cmp_rhs
)
1463 || !(integer_zerop (cmp_rhs
) || integer_onep (cmp_rhs
)))
1465 if (TREE_CODE (cmp_lhs
) != SSA_NAME
)
1468 /* If check_value_one is true, only the phi_args with value '1' will lead
1469 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1471 check_value_one
= (((integer_onep (cmp_rhs
))
1472 ^ (gimple_cond_code (cmp_stmt
) == EQ_EXPR
))
1473 ^ ((exit_edge
->flags
& EDGE_TRUE_VALUE
) != 0));
1475 lhs_def_stmt
= SSA_NAME_DEF_STMT (cmp_lhs
);
1479 phi_stmt
= dyn_cast
<gphi
*> (lhs_def_stmt
);
1483 for (i
= 0; i
< gimple_phi_num_args (phi_stmt
); i
++)
1487 tree val
= gimple_phi_arg_def (phi_stmt
, i
);
1488 edge e
= gimple_phi_arg_edge (phi_stmt
, i
);
1490 if (!TREE_CONSTANT (val
) || !(integer_zerop (val
) || integer_onep (val
)))
1492 if ((check_value_one
^ integer_onep (val
)) == 1)
1494 if (EDGE_COUNT (e
->src
->succs
) != 1)
1496 predict_paths_leading_to_edge (e
, PRED_LOOP_EXTRA_EXIT
, NOT_TAKEN
);
1500 FOR_EACH_EDGE (e1
, ei
, e
->src
->preds
)
1501 predict_paths_leading_to_edge (e1
, PRED_LOOP_EXTRA_EXIT
, NOT_TAKEN
);
1506 /* Predict edge probabilities by exploiting loop structure. */
1509 predict_loops (void)
1513 /* Try to predict out blocks in a loop that are not part of a
1515 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
1517 basic_block bb
, *bbs
;
1518 unsigned j
, n_exits
= 0;
1520 struct tree_niter_desc niter_desc
;
1522 struct nb_iter_bound
*nb_iter
;
1523 enum tree_code loop_bound_code
= ERROR_MARK
;
1524 tree loop_bound_step
= NULL
;
1525 tree loop_bound_var
= NULL
;
1526 tree loop_iv_base
= NULL
;
1529 exits
= get_loop_exit_edges (loop
);
1530 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1531 if (!(ex
->flags
& (EDGE_EH
| EDGE_ABNORMAL_CALL
| EDGE_FAKE
)))
1539 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1542 HOST_WIDE_INT nitercst
;
1543 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
1545 enum br_predictor predictor
;
1548 if (ex
->flags
& (EDGE_EH
| EDGE_ABNORMAL_CALL
| EDGE_FAKE
))
1550 /* Loop heuristics do not expect exit conditional to be inside
1551 inner loop. We predict from innermost to outermost loop. */
1552 if (predicted_by_loop_heuristics_p (ex
->src
))
1554 predict_extra_loop_exits (ex
);
1556 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
1557 niter
= niter_desc
.niter
;
1558 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
1559 niter
= loop_niter_by_eval (loop
, ex
);
1561 if (TREE_CODE (niter
) == INTEGER_CST
)
1563 if (tree_fits_uhwi_p (niter
)
1565 && compare_tree_int (niter
, max
- 1) == -1)
1566 nitercst
= tree_to_uhwi (niter
) + 1;
1569 predictor
= PRED_LOOP_ITERATIONS
;
1571 /* If we have just one exit and we can derive some information about
1572 the number of iterations of the loop from the statements inside
1573 the loop, use it to predict this exit. */
1574 else if (n_exits
== 1
1575 && estimated_stmt_executions (loop
, &nit
))
1577 if (wi::gtu_p (nit
, max
))
1580 nitercst
= nit
.to_shwi ();
1581 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
1583 /* If we have likely upper bound, trust it for very small iteration
1584 counts. Such loops would otherwise get mispredicted by standard
1585 LOOP_EXIT heuristics. */
1586 else if (n_exits
== 1
1587 && likely_max_stmt_executions (loop
, &nit
)
1589 RDIV (REG_BR_PROB_BASE
,
1592 [PRED_LOOP_EXIT
].hitrate
)))
1594 nitercst
= nit
.to_shwi ();
1595 predictor
= PRED_LOOP_ITERATIONS_MAX
;
1600 gcc_checking_assert (nitercst
);
1601 probability
= RDIV (REG_BR_PROB_BASE
, nitercst
);
1602 predict_edge (ex
, predictor
, probability
);
1606 /* Find information about loop bound variables. */
1607 for (nb_iter
= loop
->bounds
; nb_iter
;
1608 nb_iter
= nb_iter
->next
)
1610 && gimple_code (nb_iter
->stmt
) == GIMPLE_COND
)
1612 stmt
= as_a
<gcond
*> (nb_iter
->stmt
);
1615 if (!stmt
&& last_stmt (loop
->header
)
1616 && gimple_code (last_stmt (loop
->header
)) == GIMPLE_COND
)
1617 stmt
= as_a
<gcond
*> (last_stmt (loop
->header
));
1619 is_comparison_with_loop_invariant_p (stmt
, loop
,
1625 bbs
= get_loop_body (loop
);
1627 for (j
= 0; j
< loop
->num_nodes
; j
++)
1629 int header_found
= 0;
1635 /* Bypass loop heuristics on continue statement. These
1636 statements construct loops via "non-loop" constructs
1637 in the source language and are better to be handled
1639 if (predicted_by_p (bb
, PRED_CONTINUE
))
1642 /* Loop branch heuristics - predict an edge back to a
1643 loop's head as taken. */
1644 if (bb
== loop
->latch
)
1646 e
= find_edge (loop
->latch
, loop
->header
);
1650 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
1654 /* Loop exit heuristics - predict an edge exiting the loop if the
1655 conditional has no loop header successors as not taken. */
1657 /* If we already used more reliable loop exit predictors, do not
1658 bother with PRED_LOOP_EXIT. */
1659 && !predicted_by_loop_heuristics_p (bb
))
1661 /* For loop with many exits we don't want to predict all exits
1662 with the pretty large probability, because if all exits are
1663 considered in row, the loop would be predicted to iterate
1664 almost never. The code to divide probability by number of
1665 exits is very rough. It should compute the number of exits
1666 taken in each patch through function (not the overall number
1667 of exits that might be a lot higher for loops with wide switch
1668 statements in them) and compute n-th square root.
1670 We limit the minimal probability by 2% to avoid
1671 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1672 as this was causing regression in perl benchmark containing such
1675 int probability
= ((REG_BR_PROB_BASE
1676 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
1678 if (probability
< HITRATE (2))
1679 probability
= HITRATE (2);
1680 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1681 if (e
->dest
->index
< NUM_FIXED_BLOCKS
1682 || !flow_bb_inside_loop_p (loop
, e
->dest
))
1683 predict_edge (e
, PRED_LOOP_EXIT
, probability
);
1686 predict_iv_comparison (loop
, bb
, loop_bound_var
, loop_iv_base
,
1688 tree_to_shwi (loop_bound_step
));
1691 /* Free basic blocks from get_loop_body. */
1696 /* Attempt to predict probabilities of BB outgoing edges using local
1699 bb_estimate_probability_locally (basic_block bb
)
1701 rtx_insn
*last_insn
= BB_END (bb
);
1704 if (! can_predict_insn_p (last_insn
))
1706 cond
= get_condition (last_insn
, NULL
, false, false);
1710 /* Try "pointer heuristic."
1711 A comparison ptr == 0 is predicted as false.
1712 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1713 if (COMPARISON_P (cond
)
1714 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
1715 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
1717 if (GET_CODE (cond
) == EQ
)
1718 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
1719 else if (GET_CODE (cond
) == NE
)
1720 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
1724 /* Try "opcode heuristic."
1725 EQ tests are usually false and NE tests are usually true. Also,
1726 most quantities are positive, so we can make the appropriate guesses
1727 about signed comparisons against zero. */
1728 switch (GET_CODE (cond
))
1731 /* Unconditional branch. */
1732 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
1733 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
1738 /* Floating point comparisons appears to behave in a very
1739 unpredictable way because of special role of = tests in
1741 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1743 /* Comparisons with 0 are often used for booleans and there is
1744 nothing useful to predict about them. */
1745 else if (XEXP (cond
, 1) == const0_rtx
1746 || XEXP (cond
, 0) == const0_rtx
)
1749 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
1754 /* Floating point comparisons appears to behave in a very
1755 unpredictable way because of special role of = tests in
1757 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
1759 /* Comparisons with 0 are often used for booleans and there is
1760 nothing useful to predict about them. */
1761 else if (XEXP (cond
, 1) == const0_rtx
1762 || XEXP (cond
, 0) == const0_rtx
)
1765 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
1769 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
1773 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
1778 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1779 || XEXP (cond
, 1) == constm1_rtx
)
1780 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
1785 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
1786 || XEXP (cond
, 1) == constm1_rtx
)
1787 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
1795 /* Set edge->probability for each successor edge of BB. */
1797 guess_outgoing_edge_probabilities (basic_block bb
)
1799 bb_estimate_probability_locally (bb
);
1800 combine_predictions_for_insn (BB_END (bb
), bb
);
1803 static tree
expr_expected_value (tree
, bitmap
, enum br_predictor
*predictor
);
1805 /* Helper function for expr_expected_value. */
1808 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
,
1809 tree op1
, bitmap visited
, enum br_predictor
*predictor
)
1814 *predictor
= PRED_UNCONDITIONAL
;
1816 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1818 if (TREE_CONSTANT (op0
))
1821 if (code
!= SSA_NAME
)
1824 def
= SSA_NAME_DEF_STMT (op0
);
1826 /* If we were already here, break the infinite cycle. */
1827 if (!bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
)))
1830 if (gimple_code (def
) == GIMPLE_PHI
)
1832 /* All the arguments of the PHI node must have the same constant
1834 int i
, n
= gimple_phi_num_args (def
);
1835 tree val
= NULL
, new_val
;
1837 for (i
= 0; i
< n
; i
++)
1839 tree arg
= PHI_ARG_DEF (def
, i
);
1840 enum br_predictor predictor2
;
1842 /* If this PHI has itself as an argument, we cannot
1843 determine the string length of this argument. However,
1844 if we can find an expected constant value for the other
1845 PHI args then we can still be sure that this is
1846 likely a constant. So be optimistic and just
1847 continue with the next argument. */
1848 if (arg
== PHI_RESULT (def
))
1851 new_val
= expr_expected_value (arg
, visited
, &predictor2
);
1853 /* It is difficult to combine value predictors. Simply assume
1854 that later predictor is weaker and take its prediction. */
1855 if (predictor
&& *predictor
< predictor2
)
1856 *predictor
= predictor2
;
1861 else if (!operand_equal_p (val
, new_val
, false))
1866 if (is_gimple_assign (def
))
1868 if (gimple_assign_lhs (def
) != op0
)
1871 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
1872 gimple_assign_rhs1 (def
),
1873 gimple_assign_rhs_code (def
),
1874 gimple_assign_rhs2 (def
),
1875 visited
, predictor
);
1878 if (is_gimple_call (def
))
1880 tree decl
= gimple_call_fndecl (def
);
1883 if (gimple_call_internal_p (def
)
1884 && gimple_call_internal_fn (def
) == IFN_BUILTIN_EXPECT
)
1886 gcc_assert (gimple_call_num_args (def
) == 3);
1887 tree val
= gimple_call_arg (def
, 0);
1888 if (TREE_CONSTANT (val
))
1892 tree val2
= gimple_call_arg (def
, 2);
1893 gcc_assert (TREE_CODE (val2
) == INTEGER_CST
1894 && tree_fits_uhwi_p (val2
)
1895 && tree_to_uhwi (val2
) < END_PREDICTORS
);
1896 *predictor
= (enum br_predictor
) tree_to_uhwi (val2
);
1898 return gimple_call_arg (def
, 1);
1902 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
1903 switch (DECL_FUNCTION_CODE (decl
))
1905 case BUILT_IN_EXPECT
:
1908 if (gimple_call_num_args (def
) != 2)
1910 val
= gimple_call_arg (def
, 0);
1911 if (TREE_CONSTANT (val
))
1914 *predictor
= PRED_BUILTIN_EXPECT
;
1915 return gimple_call_arg (def
, 1);
1918 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
:
1919 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1
:
1920 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2
:
1921 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4
:
1922 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8
:
1923 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16
:
1924 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE
:
1925 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
:
1926 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
:
1927 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2
:
1928 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4
:
1929 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8
:
1930 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16
:
1931 /* Assume that any given atomic operation has low contention,
1932 and thus the compare-and-swap operation succeeds. */
1934 *predictor
= PRED_COMPARE_AND_SWAP
;
1935 return boolean_true_node
;
1944 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
1947 enum br_predictor predictor2
;
1948 op0
= expr_expected_value (op0
, visited
, predictor
);
1951 op1
= expr_expected_value (op1
, visited
, &predictor2
);
1952 if (predictor
&& *predictor
< predictor2
)
1953 *predictor
= predictor2
;
1956 res
= fold_build2 (code
, type
, op0
, op1
);
1957 if (TREE_CONSTANT (res
))
1961 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
1964 op0
= expr_expected_value (op0
, visited
, predictor
);
1967 res
= fold_build1 (code
, type
, op0
);
1968 if (TREE_CONSTANT (res
))
1975 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1976 The function is used by builtin_expect branch predictor so the evidence
1977 must come from this construct and additional possible constant folding.
1979 We may want to implement more involved value guess (such as value range
1980 propagation based prediction), but such tricks shall go to new
1984 expr_expected_value (tree expr
, bitmap visited
,
1985 enum br_predictor
*predictor
)
1987 enum tree_code code
;
1990 if (TREE_CONSTANT (expr
))
1993 *predictor
= PRED_UNCONDITIONAL
;
1997 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
1998 return expr_expected_value_1 (TREE_TYPE (expr
),
1999 op0
, code
, op1
, visited
, predictor
);
2002 /* Predict using opcode of the last statement in basic block. */
2004 tree_predict_by_opcode (basic_block bb
)
2006 gimple
*stmt
= last_stmt (bb
);
2014 enum br_predictor predictor
;
2016 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
2018 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
2019 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
2021 op0
= gimple_cond_lhs (stmt
);
2022 op1
= gimple_cond_rhs (stmt
);
2023 cmp
= gimple_cond_code (stmt
);
2024 type
= TREE_TYPE (op0
);
2025 visited
= BITMAP_ALLOC (NULL
);
2026 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, visited
,
2028 BITMAP_FREE (visited
);
2029 if (val
&& TREE_CODE (val
) == INTEGER_CST
)
2031 if (predictor
== PRED_BUILTIN_EXPECT
)
2033 int percent
= PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY
);
2035 gcc_assert (percent
>= 0 && percent
<= 100);
2036 if (integer_zerop (val
))
2037 percent
= 100 - percent
;
2038 predict_edge (then_edge
, PRED_BUILTIN_EXPECT
, HITRATE (percent
));
2041 predict_edge (then_edge
, predictor
,
2042 integer_zerop (val
) ? NOT_TAKEN
: TAKEN
);
2044 /* Try "pointer heuristic."
2045 A comparison ptr == 0 is predicted as false.
2046 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2047 if (POINTER_TYPE_P (type
))
2050 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
2051 else if (cmp
== NE_EXPR
)
2052 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
2056 /* Try "opcode heuristic."
2057 EQ tests are usually false and NE tests are usually true. Also,
2058 most quantities are positive, so we can make the appropriate guesses
2059 about signed comparisons against zero. */
2064 /* Floating point comparisons appears to behave in a very
2065 unpredictable way because of special role of = tests in
2067 if (FLOAT_TYPE_P (type
))
2069 /* Comparisons with 0 are often used for booleans and there is
2070 nothing useful to predict about them. */
2071 else if (integer_zerop (op0
) || integer_zerop (op1
))
2074 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
2079 /* Floating point comparisons appears to behave in a very
2080 unpredictable way because of special role of = tests in
2082 if (FLOAT_TYPE_P (type
))
2084 /* Comparisons with 0 are often used for booleans and there is
2085 nothing useful to predict about them. */
2086 else if (integer_zerop (op0
)
2087 || integer_zerop (op1
))
2090 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
2094 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
2097 case UNORDERED_EXPR
:
2098 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
2103 if (integer_zerop (op1
)
2104 || integer_onep (op1
)
2105 || integer_all_onesp (op1
)
2108 || real_minus_onep (op1
))
2109 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
2114 if (integer_zerop (op1
)
2115 || integer_onep (op1
)
2116 || integer_all_onesp (op1
)
2119 || real_minus_onep (op1
))
2120 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
2128 /* Try to guess whether the value of return means error code. */
2130 static enum br_predictor
2131 return_prediction (tree val
, enum prediction
*prediction
)
2135 return PRED_NO_PREDICTION
;
2136 /* Different heuristics for pointers and scalars. */
2137 if (POINTER_TYPE_P (TREE_TYPE (val
)))
2139 /* NULL is usually not returned. */
2140 if (integer_zerop (val
))
2142 *prediction
= NOT_TAKEN
;
2143 return PRED_NULL_RETURN
;
2146 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
2148 /* Negative return values are often used to indicate
2150 if (TREE_CODE (val
) == INTEGER_CST
2151 && tree_int_cst_sgn (val
) < 0)
2153 *prediction
= NOT_TAKEN
;
2154 return PRED_NEGATIVE_RETURN
;
2156 /* Constant return values seems to be commonly taken.
2157 Zero/one often represent booleans so exclude them from the
2159 if (TREE_CONSTANT (val
)
2160 && (!integer_zerop (val
) && !integer_onep (val
)))
2162 *prediction
= TAKEN
;
2163 return PRED_CONST_RETURN
;
2166 return PRED_NO_PREDICTION
;
2169 /* Find the basic block with return expression and look up for possible
2170 return value trying to apply RETURN_PREDICTION heuristics. */
2172 apply_return_prediction (void)
2174 greturn
*return_stmt
= NULL
;
2178 int phi_num_args
, i
;
2179 enum br_predictor pred
;
2180 enum prediction direction
;
2183 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
2185 gimple
*last
= last_stmt (e
->src
);
2187 && gimple_code (last
) == GIMPLE_RETURN
)
2189 return_stmt
= as_a
<greturn
*> (last
);
2195 return_val
= gimple_return_retval (return_stmt
);
2198 if (TREE_CODE (return_val
) != SSA_NAME
2199 || !SSA_NAME_DEF_STMT (return_val
)
2200 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
2202 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (return_val
));
2203 phi_num_args
= gimple_phi_num_args (phi
);
2204 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
2206 /* Avoid the degenerate case where all return values form the function
2207 belongs to same category (ie they are all positive constants)
2208 so we can hardly say something about them. */
2209 for (i
= 1; i
< phi_num_args
; i
++)
2210 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
2212 if (i
!= phi_num_args
)
2213 for (i
= 0; i
< phi_num_args
; i
++)
2215 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
2216 if (pred
!= PRED_NO_PREDICTION
)
2217 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi
, i
), pred
,
2222 /* Look for basic block that contains unlikely to happen events
2223 (such as noreturn calls) and mark all paths leading to execution
2224 of this basic blocks as unlikely. */
2227 tree_bb_level_predictions (void)
2230 bool has_return_edges
= false;
2234 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
2235 if (!(e
->flags
& (EDGE_ABNORMAL
| EDGE_FAKE
| EDGE_EH
)))
2237 has_return_edges
= true;
2241 apply_return_prediction ();
2243 FOR_EACH_BB_FN (bb
, cfun
)
2245 gimple_stmt_iterator gsi
;
2247 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2249 gimple
*stmt
= gsi_stmt (gsi
);
2252 if (is_gimple_call (stmt
))
2254 if ((gimple_call_flags (stmt
) & ECF_NORETURN
)
2255 && has_return_edges
)
2256 predict_paths_leading_to (bb
, PRED_NORETURN
,
2258 decl
= gimple_call_fndecl (stmt
);
2260 && lookup_attribute ("cold",
2261 DECL_ATTRIBUTES (decl
)))
2262 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
2265 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
2267 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
2268 gimple_predict_outcome (stmt
));
2269 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2270 hints to callers. */
2276 /* Callback for hash_map::traverse, asserts that the pointer map is
2280 assert_is_empty (const_basic_block
const &, edge_prediction
*const &value
,
2283 gcc_assert (!value
);
2287 /* Predict branch probabilities and estimate profile for basic block BB. */
2290 tree_estimate_probability_bb (basic_block bb
)
2296 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2298 /* Predict edges to user labels with attributes. */
2299 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
2301 gimple_stmt_iterator gi
;
2302 for (gi
= gsi_start_bb (e
->dest
); !gsi_end_p (gi
); gsi_next (&gi
))
2304 glabel
*label_stmt
= dyn_cast
<glabel
*> (gsi_stmt (gi
));
2309 decl
= gimple_label_label (label_stmt
);
2310 if (DECL_ARTIFICIAL (decl
))
2313 /* Finally, we have a user-defined label. */
2314 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl
)))
2315 predict_edge_def (e
, PRED_COLD_LABEL
, NOT_TAKEN
);
2316 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl
)))
2317 predict_edge_def (e
, PRED_HOT_LABEL
, TAKEN
);
2321 /* Predict early returns to be probable, as we've already taken
2322 care for error returns and other cases are often used for
2323 fast paths through function.
2325 Since we've already removed the return statements, we are
2326 looking for CFG like:
2336 if (e
->dest
!= bb
->next_bb
2337 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2338 && single_succ_p (e
->dest
)
2339 && single_succ_edge (e
->dest
)->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
2340 && (last
= last_stmt (e
->dest
)) != NULL
2341 && gimple_code (last
) == GIMPLE_RETURN
)
2346 if (single_succ_p (bb
))
2348 FOR_EACH_EDGE (e1
, ei1
, bb
->preds
)
2349 if (!predicted_by_p (e1
->src
, PRED_NULL_RETURN
)
2350 && !predicted_by_p (e1
->src
, PRED_CONST_RETURN
)
2351 && !predicted_by_p (e1
->src
, PRED_NEGATIVE_RETURN
))
2352 predict_edge_def (e1
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2355 if (!predicted_by_p (e
->src
, PRED_NULL_RETURN
)
2356 && !predicted_by_p (e
->src
, PRED_CONST_RETURN
)
2357 && !predicted_by_p (e
->src
, PRED_NEGATIVE_RETURN
))
2358 predict_edge_def (e
, PRED_TREE_EARLY_RETURN
, NOT_TAKEN
);
2361 /* Look for block we are guarding (ie we dominate it,
2362 but it doesn't postdominate us). */
2363 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
) && e
->dest
!= bb
2364 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
2365 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
2367 gimple_stmt_iterator bi
;
2369 /* The call heuristic claims that a guarded function call
2370 is improbable. This is because such calls are often used
2371 to signal exceptional situations such as printing error
2373 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
2376 gimple
*stmt
= gsi_stmt (bi
);
2377 if (is_gimple_call (stmt
)
2378 /* Constant and pure calls are hardly used to signalize
2379 something exceptional. */
2380 && gimple_has_side_effects (stmt
))
2382 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
2388 tree_predict_by_opcode (bb
);
2391 /* Predict branch probabilities and estimate profile of the tree CFG.
2392 This function can be called from the loop optimizers to recompute
2393 the profile information.
2394 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2397 tree_estimate_probability (bool dry_run
)
2401 add_noreturn_fake_exit_edges ();
2402 connect_infinite_loops_to_exit ();
2403 /* We use loop_niter_by_eval, which requires that the loops have
2405 create_preheaders (CP_SIMPLE_PREHEADERS
);
2406 calculate_dominance_info (CDI_POST_DOMINATORS
);
2408 bb_predictions
= new hash_map
<const_basic_block
, edge_prediction
*>;
2409 tree_bb_level_predictions ();
2410 record_loop_exits ();
2412 if (number_of_loops (cfun
) > 1)
2415 FOR_EACH_BB_FN (bb
, cfun
)
2416 tree_estimate_probability_bb (bb
);
2418 FOR_EACH_BB_FN (bb
, cfun
)
2419 combine_predictions_for_bb (bb
, dry_run
);
2422 bb_predictions
->traverse
<void *, assert_is_empty
> (NULL
);
2424 delete bb_predictions
;
2425 bb_predictions
= NULL
;
2428 estimate_bb_frequencies (false);
2429 free_dominance_info (CDI_POST_DOMINATORS
);
2430 remove_fake_exit_edges ();
2433 /* Predict edges to successors of CUR whose sources are not postdominated by
2434 BB by PRED and recurse to all postdominators. */
2437 predict_paths_for_bb (basic_block cur
, basic_block bb
,
2438 enum br_predictor pred
,
2439 enum prediction taken
,
2446 /* We are looking for all edges forming edge cut induced by
2447 set of all blocks postdominated by BB. */
2448 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
2449 if (e
->src
->index
>= NUM_FIXED_BLOCKS
2450 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
2456 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2457 if (e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2459 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
2461 /* See if there is an edge from e->src that is not abnormal
2462 and does not lead to BB. */
2463 FOR_EACH_EDGE (e2
, ei2
, e
->src
->succs
)
2465 && !(e2
->flags
& (EDGE_EH
| EDGE_FAKE
))
2466 && !dominated_by_p (CDI_POST_DOMINATORS
, e2
->dest
, bb
))
2472 /* If there is non-abnormal path leaving e->src, predict edge
2473 using predictor. Otherwise we need to look for paths
2476 The second may lead to infinite loop in the case we are predicitng
2477 regions that are only reachable by abnormal edges. We simply
2478 prevent visiting given BB twice. */
2481 if (!edge_predicted_by_p (e
, pred
, taken
))
2482 predict_edge_def (e
, pred
, taken
);
2484 else if (bitmap_set_bit (visited
, e
->src
->index
))
2485 predict_paths_for_bb (e
->src
, e
->src
, pred
, taken
, visited
);
2487 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
2489 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
2490 predict_paths_for_bb (son
, bb
, pred
, taken
, visited
);
2493 /* Sets branch probabilities according to PREDiction and
2497 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
2498 enum prediction taken
)
2500 bitmap visited
= BITMAP_ALLOC (NULL
);
2501 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2502 BITMAP_FREE (visited
);
2505 /* Like predict_paths_leading_to but take edge instead of basic block. */
2508 predict_paths_leading_to_edge (edge e
, enum br_predictor pred
,
2509 enum prediction taken
)
2511 bool has_nonloop_edge
= false;
2515 basic_block bb
= e
->src
;
2516 FOR_EACH_EDGE (e2
, ei
, bb
->succs
)
2517 if (e2
->dest
!= e
->src
&& e2
->dest
!= e
->dest
2518 && !(e
->flags
& (EDGE_EH
| EDGE_FAKE
))
2519 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e2
->dest
))
2521 has_nonloop_edge
= true;
2524 if (!has_nonloop_edge
)
2526 bitmap visited
= BITMAP_ALLOC (NULL
);
2527 predict_paths_for_bb (bb
, bb
, pred
, taken
, visited
);
2528 BITMAP_FREE (visited
);
2531 predict_edge_def (e
, pred
, taken
);
2534 /* This is used to carry information about basic blocks. It is
2535 attached to the AUX field of the standard CFG block. */
2539 /* Estimated frequency of execution of basic_block. */
2542 /* To keep queue of basic blocks to process. */
2545 /* Number of predecessors we need to visit first. */
2549 /* Similar information for edges. */
2550 struct edge_prob_info
2552 /* In case edge is a loopback edge, the probability edge will be reached
2553 in case header is. Estimated number of iterations of the loop can be
2554 then computed as 1 / (1 - back_edge_prob). */
2555 sreal back_edge_prob
;
2556 /* True if the edge is a loopback edge in the natural loop. */
2557 unsigned int back_edge
:1;
2560 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2562 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2564 /* Helper function for estimate_bb_frequencies.
2565 Propagate the frequencies in blocks marked in
2566 TOVISIT, starting in HEAD. */
2569 propagate_freq (basic_block head
, bitmap tovisit
)
2578 /* For each basic block we need to visit count number of his predecessors
2579 we need to visit first. */
2580 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
2585 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2587 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2589 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
2591 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
2593 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
2595 "Irreducible region hit, ignoring edge to %i->%i\n",
2596 e
->src
->index
, bb
->index
);
2598 BLOCK_INFO (bb
)->npredecessors
= count
;
2599 /* When function never returns, we will never process exit block. */
2600 if (!count
&& bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
2601 bb
->count
= bb
->frequency
= 0;
2604 BLOCK_INFO (head
)->frequency
= 1;
2606 for (bb
= head
; bb
; bb
= nextbb
)
2609 sreal cyclic_probability
= 0;
2610 sreal frequency
= 0;
2612 nextbb
= BLOCK_INFO (bb
)->next
;
2613 BLOCK_INFO (bb
)->next
= NULL
;
2615 /* Compute frequency of basic block. */
2619 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2620 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
2621 || (e
->flags
& EDGE_DFS_BACK
));
2623 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2624 if (EDGE_INFO (e
)->back_edge
)
2626 cyclic_probability
+= EDGE_INFO (e
)->back_edge_prob
;
2628 else if (!(e
->flags
& EDGE_DFS_BACK
))
2630 /* frequency += (e->probability
2631 * BLOCK_INFO (e->src)->frequency /
2632 REG_BR_PROB_BASE); */
2634 sreal tmp
= e
->probability
;
2635 tmp
*= BLOCK_INFO (e
->src
)->frequency
;
2636 tmp
*= real_inv_br_prob_base
;
2640 if (cyclic_probability
== 0)
2642 BLOCK_INFO (bb
)->frequency
= frequency
;
2646 if (cyclic_probability
> real_almost_one
)
2647 cyclic_probability
= real_almost_one
;
2649 /* BLOCK_INFO (bb)->frequency = frequency
2650 / (1 - cyclic_probability) */
2652 cyclic_probability
= sreal (1) - cyclic_probability
;
2653 BLOCK_INFO (bb
)->frequency
= frequency
/ cyclic_probability
;
2657 bitmap_clear_bit (tovisit
, bb
->index
);
2659 e
= find_edge (bb
, head
);
2662 /* EDGE_INFO (e)->back_edge_prob
2663 = ((e->probability * BLOCK_INFO (bb)->frequency)
2664 / REG_BR_PROB_BASE); */
2666 sreal tmp
= e
->probability
;
2667 tmp
*= BLOCK_INFO (bb
)->frequency
;
2668 EDGE_INFO (e
)->back_edge_prob
= tmp
* real_inv_br_prob_base
;
2671 /* Propagate to successor blocks. */
2672 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2673 if (!(e
->flags
& EDGE_DFS_BACK
)
2674 && BLOCK_INFO (e
->dest
)->npredecessors
)
2676 BLOCK_INFO (e
->dest
)->npredecessors
--;
2677 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
2682 BLOCK_INFO (last
)->next
= e
->dest
;
2690 /* Estimate frequencies in loops at same nest level. */
2693 estimate_loops_at_level (struct loop
*first_loop
)
2697 for (loop
= first_loop
; loop
; loop
= loop
->next
)
2702 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2704 estimate_loops_at_level (loop
->inner
);
2706 /* Find current loop back edge and mark it. */
2707 e
= loop_latch_edge (loop
);
2708 EDGE_INFO (e
)->back_edge
= 1;
2710 bbs
= get_loop_body (loop
);
2711 for (i
= 0; i
< loop
->num_nodes
; i
++)
2712 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
2714 propagate_freq (loop
->header
, tovisit
);
2715 BITMAP_FREE (tovisit
);
2719 /* Propagates frequencies through structure of loops. */
2722 estimate_loops (void)
2724 bitmap tovisit
= BITMAP_ALLOC (NULL
);
2727 /* Start by estimating the frequencies in the loops. */
2728 if (number_of_loops (cfun
) > 1)
2729 estimate_loops_at_level (current_loops
->tree_root
->inner
);
2731 /* Now propagate the frequencies through all the blocks. */
2732 FOR_ALL_BB_FN (bb
, cfun
)
2734 bitmap_set_bit (tovisit
, bb
->index
);
2736 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun
), tovisit
);
2737 BITMAP_FREE (tovisit
);
2740 /* Drop the profile for NODE to guessed, and update its frequency based on
2741 whether it is expected to be hot given the CALL_COUNT. */
2744 drop_profile (struct cgraph_node
*node
, gcov_type call_count
)
2746 struct function
*fn
= DECL_STRUCT_FUNCTION (node
->decl
);
2747 /* In the case where this was called by another function with a
2748 dropped profile, call_count will be 0. Since there are no
2749 non-zero call counts to this function, we don't know for sure
2750 whether it is hot, and therefore it will be marked normal below. */
2751 bool hot
= maybe_hot_count_p (NULL
, call_count
);
2755 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2756 node
->name (), node
->order
,
2757 hot
? "Function is hot" : "Function is normal");
2758 /* We only expect to miss profiles for functions that are reached
2759 via non-zero call edges in cases where the function may have
2760 been linked from another module or library (COMDATs and extern
2761 templates). See the comments below for handle_missing_profiles.
2762 Also, only warn in cases where the missing counts exceed the
2763 number of training runs. In certain cases with an execv followed
2764 by a no-return call the profile for the no-return call is not
2765 dumped and there can be a mismatch. */
2766 if (!DECL_COMDAT (node
->decl
) && !DECL_EXTERNAL (node
->decl
)
2767 && call_count
> profile_info
->runs
)
2769 if (flag_profile_correction
)
2773 "Missing counts for called function %s/%i\n",
2774 node
->name (), node
->order
);
2777 warning (0, "Missing counts for called function %s/%i",
2778 node
->name (), node
->order
);
2781 profile_status_for_fn (fn
)
2782 = (flag_guess_branch_prob
? PROFILE_GUESSED
: PROFILE_ABSENT
);
2784 = hot
? NODE_FREQUENCY_HOT
: NODE_FREQUENCY_NORMAL
;
2787 /* In the case of COMDAT routines, multiple object files will contain the same
2788 function and the linker will select one for the binary. In that case
2789 all the other copies from the profile instrument binary will be missing
2790 profile counts. Look for cases where this happened, due to non-zero
2791 call counts going to 0-count functions, and drop the profile to guessed
2792 so that we can use the estimated probabilities and avoid optimizing only
2795 The other case where the profile may be missing is when the routine
2796 is not going to be emitted to the object file, e.g. for "extern template"
2797 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2798 all other cases of non-zero calls to 0-count functions. */
2801 handle_missing_profiles (void)
2803 struct cgraph_node
*node
;
2804 int unlikely_count_fraction
= PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION
);
2805 vec
<struct cgraph_node
*> worklist
;
2806 worklist
.create (64);
2808 /* See if 0 count function has non-0 count callers. In this case we
2809 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2810 FOR_EACH_DEFINED_FUNCTION (node
)
2812 struct cgraph_edge
*e
;
2813 gcov_type call_count
= 0;
2814 gcov_type max_tp_first_run
= 0;
2815 struct function
*fn
= DECL_STRUCT_FUNCTION (node
->decl
);
2819 for (e
= node
->callers
; e
; e
= e
->next_caller
)
2821 call_count
+= e
->count
;
2823 if (e
->caller
->tp_first_run
> max_tp_first_run
)
2824 max_tp_first_run
= e
->caller
->tp_first_run
;
2827 /* If time profile is missing, let assign the maximum that comes from
2828 caller functions. */
2829 if (!node
->tp_first_run
&& max_tp_first_run
)
2830 node
->tp_first_run
= max_tp_first_run
+ 1;
2834 && (call_count
* unlikely_count_fraction
>= profile_info
->runs
))
2836 drop_profile (node
, call_count
);
2837 worklist
.safe_push (node
);
2841 /* Propagate the profile dropping to other 0-count COMDATs that are
2842 potentially called by COMDATs we already dropped the profile on. */
2843 while (worklist
.length () > 0)
2845 struct cgraph_edge
*e
;
2847 node
= worklist
.pop ();
2848 for (e
= node
->callees
; e
; e
= e
->next_caller
)
2850 struct cgraph_node
*callee
= e
->callee
;
2851 struct function
*fn
= DECL_STRUCT_FUNCTION (callee
->decl
);
2853 if (callee
->count
> 0)
2855 if (DECL_COMDAT (callee
->decl
) && fn
&& fn
->cfg
2856 && profile_status_for_fn (fn
) == PROFILE_READ
)
2858 drop_profile (node
, 0);
2859 worklist
.safe_push (callee
);
2863 worklist
.release ();
2866 /* Convert counts measured by profile driven feedback to frequencies.
2867 Return nonzero iff there was any nonzero execution count. */
2870 counts_to_freqs (void)
2872 gcov_type count_max
, true_count_max
= 0;
2875 /* Don't overwrite the estimated frequencies when the profile for
2876 the function is missing. We may drop this function PROFILE_GUESSED
2877 later in drop_profile (). */
2878 if (!flag_auto_profile
&& !ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
)
2881 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
2882 true_count_max
= MAX (bb
->count
, true_count_max
);
2884 count_max
= MAX (true_count_max
, 1);
2885 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
2886 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
2888 return true_count_max
;
2891 /* Return true if function is likely to be expensive, so there is no point to
2892 optimize performance of prologue, epilogue or do inlining at the expense
2893 of code size growth. THRESHOLD is the limit of number of instructions
2894 function can execute at average to be still considered not expensive. */
2897 expensive_function_p (int threshold
)
2899 unsigned int sum
= 0;
2903 /* We can not compute accurately for large thresholds due to scaled
2905 gcc_assert (threshold
<= BB_FREQ_MAX
);
2907 /* Frequencies are out of range. This either means that function contains
2908 internal loop executing more than BB_FREQ_MAX times or profile feedback
2909 is available and function has not been executed at all. */
2910 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->frequency
== 0)
2913 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2914 limit
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->frequency
* threshold
;
2915 FOR_EACH_BB_FN (bb
, cfun
)
2919 FOR_BB_INSNS (bb
, insn
)
2920 if (active_insn_p (insn
))
2922 sum
+= bb
->frequency
;
2931 /* Estimate and propagate basic block frequencies using the given branch
2932 probabilities. If FORCE is true, the frequencies are used to estimate
2933 the counts even when there are already non-zero profile counts. */
2936 estimate_bb_frequencies (bool force
)
2941 if (force
|| profile_status_for_fn (cfun
) != PROFILE_READ
|| !counts_to_freqs ())
2943 static int real_values_initialized
= 0;
2945 if (!real_values_initialized
)
2947 real_values_initialized
= 1;
2948 real_br_prob_base
= REG_BR_PROB_BASE
;
2949 real_bb_freq_max
= BB_FREQ_MAX
;
2950 real_one_half
= sreal (1, -1);
2951 real_inv_br_prob_base
= sreal (1) / real_br_prob_base
;
2952 real_almost_one
= sreal (1) - real_inv_br_prob_base
;
2955 mark_dfs_back_edges ();
2957 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->probability
=
2960 /* Set up block info for each basic block. */
2961 alloc_aux_for_blocks (sizeof (block_info
));
2962 alloc_aux_for_edges (sizeof (edge_prob_info
));
2963 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
2968 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2970 EDGE_INFO (e
)->back_edge_prob
= e
->probability
;
2971 EDGE_INFO (e
)->back_edge_prob
*= real_inv_br_prob_base
;
2975 /* First compute frequencies locally for each loop from innermost
2976 to outermost to examine frequencies for back edges. */
2980 FOR_EACH_BB_FN (bb
, cfun
)
2981 if (freq_max
< BLOCK_INFO (bb
)->frequency
)
2982 freq_max
= BLOCK_INFO (bb
)->frequency
;
2984 freq_max
= real_bb_freq_max
/ freq_max
;
2985 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
2987 sreal tmp
= BLOCK_INFO (bb
)->frequency
* freq_max
+ real_one_half
;
2988 bb
->frequency
= tmp
.to_int ();
2991 free_aux_for_blocks ();
2992 free_aux_for_edges ();
2994 compute_function_frequency ();
2997 /* Decide whether function is hot, cold or unlikely executed. */
2999 compute_function_frequency (void)
3002 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3004 if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
3005 || MAIN_NAME_P (DECL_NAME (current_function_decl
)))
3006 node
->only_called_at_startup
= true;
3007 if (DECL_STATIC_DESTRUCTOR (current_function_decl
))
3008 node
->only_called_at_exit
= true;
3010 if (profile_status_for_fn (cfun
) != PROFILE_READ
)
3012 int flags
= flags_from_decl_or_type (current_function_decl
);
3013 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
3015 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
3016 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
3018 node
->frequency
= NODE_FREQUENCY_HOT
;
3019 else if (flags
& ECF_NORETURN
)
3020 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3021 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
3022 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3023 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
3024 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
3025 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3029 /* Only first time try to drop function into unlikely executed.
3030 After inlining the roundoff errors may confuse us.
3031 Ipa-profile pass will drop functions only called from unlikely
3032 functions to unlikely and that is most of what we care about. */
3033 if (!cfun
->after_inlining
)
3034 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
3035 FOR_EACH_BB_FN (bb
, cfun
)
3037 if (maybe_hot_bb_p (cfun
, bb
))
3039 node
->frequency
= NODE_FREQUENCY_HOT
;
3042 if (!probably_never_executed_bb_p (cfun
, bb
))
3043 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3047 /* Build PREDICT_EXPR. */
3049 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
3051 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
3052 build_int_cst (integer_type_node
, predictor
));
3053 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
3058 predictor_name (enum br_predictor predictor
)
3060 return predictor_info
[predictor
].name
;
3063 /* Predict branch probabilities and estimate profile of the tree CFG. */
3067 const pass_data pass_data_profile
=
3069 GIMPLE_PASS
, /* type */
3070 "profile_estimate", /* name */
3071 OPTGROUP_NONE
, /* optinfo_flags */
3072 TV_BRANCH_PROB
, /* tv_id */
3073 PROP_cfg
, /* properties_required */
3074 0, /* properties_provided */
3075 0, /* properties_destroyed */
3076 0, /* todo_flags_start */
3077 0, /* todo_flags_finish */
3080 class pass_profile
: public gimple_opt_pass
3083 pass_profile (gcc::context
*ctxt
)
3084 : gimple_opt_pass (pass_data_profile
, ctxt
)
3087 /* opt_pass methods: */
3088 virtual bool gate (function
*) { return flag_guess_branch_prob
; }
3089 virtual unsigned int execute (function
*);
3091 }; // class pass_profile
3094 pass_profile::execute (function
*fun
)
3098 if (profile_status_for_fn (cfun
) == PROFILE_GUESSED
)
3101 loop_optimizer_init (LOOPS_NORMAL
);
3102 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3103 flow_loops_dump (dump_file
, NULL
, 0);
3105 mark_irreducible_loops ();
3107 nb_loops
= number_of_loops (fun
);
3111 tree_estimate_probability (false);
3116 loop_optimizer_finalize ();
3117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3118 gimple_dump_cfg (dump_file
, dump_flags
);
3119 if (profile_status_for_fn (fun
) == PROFILE_ABSENT
)
3120 profile_status_for_fn (fun
) = PROFILE_GUESSED
;
3127 make_pass_profile (gcc::context
*ctxt
)
3129 return new pass_profile (ctxt
);
3134 const pass_data pass_data_strip_predict_hints
=
3136 GIMPLE_PASS
, /* type */
3137 "*strip_predict_hints", /* name */
3138 OPTGROUP_NONE
, /* optinfo_flags */
3139 TV_BRANCH_PROB
, /* tv_id */
3140 PROP_cfg
, /* properties_required */
3141 0, /* properties_provided */
3142 0, /* properties_destroyed */
3143 0, /* todo_flags_start */
3144 0, /* todo_flags_finish */
3147 class pass_strip_predict_hints
: public gimple_opt_pass
3150 pass_strip_predict_hints (gcc::context
*ctxt
)
3151 : gimple_opt_pass (pass_data_strip_predict_hints
, ctxt
)
3154 /* opt_pass methods: */
3155 opt_pass
* clone () { return new pass_strip_predict_hints (m_ctxt
); }
3156 virtual unsigned int execute (function
*);
3158 }; // class pass_strip_predict_hints
3160 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3161 we no longer need. */
3163 pass_strip_predict_hints::execute (function
*fun
)
3169 FOR_EACH_BB_FN (bb
, fun
)
3171 gimple_stmt_iterator bi
;
3172 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
3174 gimple
*stmt
= gsi_stmt (bi
);
3176 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
3178 gsi_remove (&bi
, true);
3181 else if (is_gimple_call (stmt
))
3183 tree fndecl
= gimple_call_fndecl (stmt
);
3186 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
3187 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
3188 && gimple_call_num_args (stmt
) == 2)
3189 || (gimple_call_internal_p (stmt
)
3190 && gimple_call_internal_fn (stmt
) == IFN_BUILTIN_EXPECT
))
3192 var
= gimple_call_lhs (stmt
);
3196 = gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
3197 gsi_replace (&bi
, ass_stmt
, true);
3201 gsi_remove (&bi
, true);
3215 make_pass_strip_predict_hints (gcc::context
*ctxt
)
3217 return new pass_strip_predict_hints (ctxt
);
3220 /* Rebuild function frequencies. Passes are in general expected to
3221 maintain profile by hand, however in some cases this is not possible:
3222 for example when inlining several functions with loops freuqencies might run
3223 out of scale and thus needs to be recomputed. */
3226 rebuild_frequencies (void)
3228 timevar_push (TV_REBUILD_FREQUENCIES
);
3230 /* When the max bb count in the function is small, there is a higher
3231 chance that there were truncation errors in the integer scaling
3232 of counts by inlining and other optimizations. This could lead
3233 to incorrect classification of code as being cold when it isn't.
3234 In that case, force the estimation of bb counts/frequencies from the
3235 branch probabilities, rather than computing frequencies from counts,
3236 which may also lead to frequencies incorrectly reduced to 0. There
3237 is less precision in the probabilities, so we only do this for small
3239 gcov_type count_max
= 0;
3241 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3242 count_max
= MAX (bb
->count
, count_max
);
3244 if (profile_status_for_fn (cfun
) == PROFILE_GUESSED
3245 || (!flag_auto_profile
&& profile_status_for_fn (cfun
) == PROFILE_READ
3246 && count_max
< REG_BR_PROB_BASE
/10))
3248 loop_optimizer_init (0);
3249 add_noreturn_fake_exit_edges ();
3250 mark_irreducible_loops ();
3251 connect_infinite_loops_to_exit ();
3252 estimate_bb_frequencies (true);
3253 remove_fake_exit_edges ();
3254 loop_optimizer_finalize ();
3256 else if (profile_status_for_fn (cfun
) == PROFILE_READ
)
3260 timevar_pop (TV_REBUILD_FREQUENCIES
);
3263 /* Perform a dry run of the branch prediction pass and report comparsion of
3264 the predicted and real profile into the dump file. */
3267 report_predictor_hitrates (void)
3271 loop_optimizer_init (LOOPS_NORMAL
);
3272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3273 flow_loops_dump (dump_file
, NULL
, 0);
3275 mark_irreducible_loops ();
3277 nb_loops
= number_of_loops (cfun
);
3281 tree_estimate_probability (true);
3286 loop_optimizer_finalize ();
3289 /* Force edge E to be cold.
3290 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3291 keep low probability to represent possible error in a guess. This is used
3292 i.e. in case we predict loop to likely iterate given number of times but
3293 we are not 100% sure.
3295 This function locally updates profile without attempt to keep global
3296 consistency which can not be reached in full generality without full profile
3297 rebuild from probabilities alone. Doing so is not necessarily a good idea
3298 because frequencies and counts may be more realistic then probabilities.
3300 In some cases (such as for elimination of early exits during full loop
3301 unrolling) the caller can ensure that profile will get consistent
3305 force_edge_cold (edge e
, bool impossible
)
3307 gcov_type count_sum
= 0;
3311 gcov_type old_count
= e
->count
;
3312 int old_probability
= e
->probability
;
3313 gcov_type gcov_scale
= REG_BR_PROB_BASE
;
3314 int prob_scale
= REG_BR_PROB_BASE
;
3316 /* If edge is already improbably or cold, just return. */
3317 if (e
->probability
<= impossible
? PROB_VERY_UNLIKELY
: 0
3318 && (!impossible
|| !e
->count
))
3320 FOR_EACH_EDGE (e2
, ei
, e
->src
->succs
)
3323 count_sum
+= e2
->count
;
3324 prob_sum
+= e2
->probability
;
3327 /* If there are other edges out of e->src, redistribute probabilitity
3332 = MIN (e
->probability
, impossible
? 0 : PROB_VERY_UNLIKELY
);
3333 if (old_probability
)
3334 e
->count
= RDIV (e
->count
* e
->probability
, old_probability
);
3336 e
->count
= MIN (e
->count
, impossible
? 0 : 1);
3339 gcov_scale
= RDIV ((count_sum
+ old_count
- e
->count
) * REG_BR_PROB_BASE
,
3341 prob_scale
= RDIV ((REG_BR_PROB_BASE
- e
->probability
) * REG_BR_PROB_BASE
,
3343 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3344 fprintf (dump_file
, "Making edge %i->%i %s by redistributing "
3345 "probability to other edges.\n",
3346 e
->src
->index
, e
->dest
->index
,
3347 impossible
? "imposisble" : "cold");
3348 FOR_EACH_EDGE (e2
, ei
, e
->src
->succs
)
3351 e2
->count
= RDIV (e2
->count
* gcov_scale
, REG_BR_PROB_BASE
);
3352 e2
->probability
= RDIV (e2
->probability
* prob_scale
,
3356 /* If all edges out of e->src are unlikely, the basic block itself
3360 e
->probability
= REG_BR_PROB_BASE
;
3362 /* If we did not adjusting, the source basic block has no likely edeges
3363 leaving other direction. In that case force that bb cold, too.
3364 This in general is difficult task to do, but handle special case when
3365 BB has only one predecestor. This is common case when we are updating
3366 after loop transforms. */
3367 if (!prob_sum
&& !count_sum
&& single_pred_p (e
->src
)
3368 && e
->src
->frequency
> (impossible
? 0 : 1))
3370 int old_frequency
= e
->src
->frequency
;
3371 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3372 fprintf (dump_file
, "Making bb %i %s.\n", e
->src
->index
,
3373 impossible
? "imposisble" : "cold");
3374 e
->src
->frequency
= MIN (e
->src
->frequency
, impossible
? 0 : 1);
3375 e
->src
->count
= e
->count
= RDIV (e
->src
->count
* e
->src
->frequency
,
3377 force_edge_cold (single_pred_edge (e
->src
), impossible
);
3379 else if (dump_file
&& (dump_flags
& TDF_DETAILS
)
3380 && maybe_hot_bb_p (cfun
, e
->src
))
3381 fprintf (dump_file
, "Giving up on making bb %i %s.\n", e
->src
->index
,
3382 impossible
? "imposisble" : "cold");