1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
32 #include "coretypes.h"
38 #include "tree-pass.h"
44 #include "diagnostic-core.h"
45 #include "gimple-predict.h"
46 #include "fold-const.h"
53 #include "gimple-iterator.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
58 #include "ipa-utils.h"
59 #include "gimple-pretty-print.h"
62 #include "stringpool.h"
65 /* Enum with reasons why a predictor is ignored. */
71 REASON_SINGLE_EDGE_DUPLICATE
,
72 REASON_EDGE_PAIR_DUPLICATE
75 /* String messages for the aforementioned enum. */
77 static const char *reason_messages
[] = {"", " (ignored)",
78 " (single edge duplicate)", " (edge pair duplicate)"};
80 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
81 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
82 static sreal real_almost_one
, real_br_prob_base
,
83 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
85 static void combine_predictions_for_insn (rtx_insn
*, basic_block
);
86 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
,
87 enum predictor_reason
, edge
);
88 static void predict_paths_leading_to (basic_block
, enum br_predictor
,
90 struct loop
*in_loop
= NULL
);
91 static void predict_paths_leading_to_edge (edge
, enum br_predictor
,
93 struct loop
*in_loop
= NULL
);
94 static bool can_predict_insn_p (const rtx_insn
*);
96 /* Information we hold about each branch predictor.
97 Filled using information from predict.def. */
101 const char *const name
; /* Name used in the debugging dumps. */
102 const int hitrate
; /* Expected hitrate used by
103 predict_insn_def call. */
107 /* Use given predictor without Dempster-Shaffer theory if it matches
108 using first_match heuristics. */
109 #define PRED_FLAG_FIRST_MATCH 1
111 /* Recompute hitrate in percent to our representation. */
113 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
115 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
116 static const struct predictor_info predictor_info
[]= {
117 #include "predict.def"
119 /* Upper bound on predictors. */
124 static gcov_type min_count
= -1;
126 /* Determine the threshold for hot BB counts. */
129 get_hot_bb_threshold ()
131 gcov_working_set_t
*ws
;
134 ws
= find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
));
136 min_count
= ws
->min_counter
;
141 /* Set the threshold for hot BB counts. */
144 set_hot_bb_threshold (gcov_type min
)
149 /* Return TRUE if frequency FREQ is considered to be hot. */
152 maybe_hot_count_p (struct function
*fun
, profile_count count
)
154 if (!count
.initialized_p ())
156 if (count
.ipa () == profile_count::zero ())
160 struct cgraph_node
*node
= cgraph_node::get (fun
->decl
);
161 if (!profile_info
|| profile_status_for_fn (fun
) != PROFILE_READ
)
163 if (node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
165 if (node
->frequency
== NODE_FREQUENCY_HOT
)
168 if (profile_status_for_fn (fun
) == PROFILE_ABSENT
)
170 if (node
->frequency
== NODE_FREQUENCY_EXECUTED_ONCE
171 && count
< (ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
.apply_scale (2, 3)))
173 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
) == 0)
175 if (count
.apply_scale (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
), 1)
176 < ENTRY_BLOCK_PTR_FOR_FN (fun
)->count
)
180 /* Code executed at most once is not hot. */
181 if (count
<= MAX (profile_info
? profile_info
->runs
: 1, 1))
183 return (count
.to_gcov_type () >= get_hot_bb_threshold ());
186 /* Return true in case BB can be CPU intensive and should be optimized
187 for maximal performance. */
190 maybe_hot_bb_p (struct function
*fun
, const_basic_block bb
)
192 gcc_checking_assert (fun
);
193 return maybe_hot_count_p (fun
, bb
->count
);
196 /* Return true in case BB can be CPU intensive and should be optimized
197 for maximal performance. */
200 maybe_hot_edge_p (edge e
)
202 return maybe_hot_count_p (cfun
, e
->count ());
205 /* Return true if profile COUNT and FREQUENCY, or function FUN static
206 node frequency reflects never being executed. */
209 probably_never_executed (struct function
*fun
,
212 gcc_checking_assert (fun
);
213 if (count
== profile_count::zero ())
215 if (count
.initialized_p () && profile_status_for_fn (fun
) == PROFILE_READ
)
217 int unlikely_count_fraction
= PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION
);
218 if (count
.apply_scale (unlikely_count_fraction
, 1) >= profile_info
->runs
)
222 if ((!profile_info
|| profile_status_for_fn (fun
) != PROFILE_READ
)
223 && (cgraph_node::get (fun
->decl
)->frequency
224 == NODE_FREQUENCY_UNLIKELY_EXECUTED
))
230 /* Return true in case BB is probably never executed. */
233 probably_never_executed_bb_p (struct function
*fun
, const_basic_block bb
)
235 return probably_never_executed (fun
, bb
->count
);
239 /* Return true if E is unlikely executed for obvious reasons. */
242 unlikely_executed_edge_p (edge e
)
244 return (e
->count () == profile_count::zero ()
245 || e
->probability
== profile_probability::never ())
246 || (e
->flags
& (EDGE_EH
| EDGE_FAKE
));
249 /* Return true in case edge E is probably never executed. */
252 probably_never_executed_edge_p (struct function
*fun
, edge e
)
254 if (unlikely_executed_edge_p (e
))
256 return probably_never_executed (fun
, e
->count ());
259 /* Return true when current function should always be optimized for size. */
262 optimize_function_for_size_p (struct function
*fun
)
264 if (!fun
|| !fun
->decl
)
265 return optimize_size
;
266 cgraph_node
*n
= cgraph_node::get (fun
->decl
);
267 return n
&& n
->optimize_for_size_p ();
270 /* Return true when current function should always be optimized for speed. */
273 optimize_function_for_speed_p (struct function
*fun
)
275 return !optimize_function_for_size_p (fun
);
278 /* Return the optimization type that should be used for the function FUN. */
281 function_optimization_type (struct function
*fun
)
283 return (optimize_function_for_speed_p (fun
)
285 : OPTIMIZE_FOR_SIZE
);
288 /* Return TRUE when BB should be optimized for size. */
291 optimize_bb_for_size_p (const_basic_block bb
)
293 return (optimize_function_for_size_p (cfun
)
294 || (bb
&& !maybe_hot_bb_p (cfun
, bb
)));
297 /* Return TRUE when BB should be optimized for speed. */
300 optimize_bb_for_speed_p (const_basic_block bb
)
302 return !optimize_bb_for_size_p (bb
);
305 /* Return the optimization type that should be used for block BB. */
308 bb_optimization_type (const_basic_block bb
)
310 return (optimize_bb_for_speed_p (bb
)
312 : OPTIMIZE_FOR_SIZE
);
315 /* Return TRUE when BB should be optimized for size. */
318 optimize_edge_for_size_p (edge e
)
320 return optimize_function_for_size_p (cfun
) || !maybe_hot_edge_p (e
);
323 /* Return TRUE when BB should be optimized for speed. */
326 optimize_edge_for_speed_p (edge e
)
328 return !optimize_edge_for_size_p (e
);
331 /* Return TRUE when BB should be optimized for size. */
334 optimize_insn_for_size_p (void)
336 return optimize_function_for_size_p (cfun
) || !crtl
->maybe_hot_insn_p
;
339 /* Return TRUE when BB should be optimized for speed. */
342 optimize_insn_for_speed_p (void)
344 return !optimize_insn_for_size_p ();
347 /* Return TRUE when LOOP should be optimized for size. */
350 optimize_loop_for_size_p (struct loop
*loop
)
352 return optimize_bb_for_size_p (loop
->header
);
355 /* Return TRUE when LOOP should be optimized for speed. */
358 optimize_loop_for_speed_p (struct loop
*loop
)
360 return optimize_bb_for_speed_p (loop
->header
);
363 /* Return TRUE when LOOP nest should be optimized for speed. */
366 optimize_loop_nest_for_speed_p (struct loop
*loop
)
368 struct loop
*l
= loop
;
369 if (optimize_loop_for_speed_p (loop
))
372 while (l
&& l
!= loop
)
374 if (optimize_loop_for_speed_p (l
))
382 while (l
!= loop
&& !l
->next
)
391 /* Return TRUE when LOOP nest should be optimized for size. */
394 optimize_loop_nest_for_size_p (struct loop
*loop
)
396 return !optimize_loop_nest_for_speed_p (loop
);
399 /* Return true when edge E is likely to be well predictable by branch
403 predictable_edge_p (edge e
)
405 if (!e
->probability
.initialized_p ())
407 if ((e
->probability
.to_reg_br_prob_base ()
408 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100)
409 || (REG_BR_PROB_BASE
- e
->probability
.to_reg_br_prob_base ()
410 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME
) * REG_BR_PROB_BASE
/ 100))
416 /* Set RTL expansion for BB profile. */
419 rtl_profile_for_bb (basic_block bb
)
421 crtl
->maybe_hot_insn_p
= maybe_hot_bb_p (cfun
, bb
);
424 /* Set RTL expansion for edge profile. */
427 rtl_profile_for_edge (edge e
)
429 crtl
->maybe_hot_insn_p
= maybe_hot_edge_p (e
);
432 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
434 default_rtl_profile (void)
436 crtl
->maybe_hot_insn_p
= true;
439 /* Return true if the one of outgoing edges is already predicted by
443 rtl_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
446 if (!INSN_P (BB_END (bb
)))
448 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
449 if (REG_NOTE_KIND (note
) == REG_BR_PRED
450 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
455 /* Structure representing predictions in tree level. */
457 struct edge_prediction
{
458 struct edge_prediction
*ep_next
;
460 enum br_predictor ep_predictor
;
464 /* This map contains for a basic block the list of predictions for the
467 static hash_map
<const_basic_block
, edge_prediction
*> *bb_predictions
;
469 /* Return true if the one of outgoing edges is already predicted by
473 gimple_predicted_by_p (const_basic_block bb
, enum br_predictor predictor
)
475 struct edge_prediction
*i
;
476 edge_prediction
**preds
= bb_predictions
->get (bb
);
481 for (i
= *preds
; i
; i
= i
->ep_next
)
482 if (i
->ep_predictor
== predictor
)
487 /* Return true if the one of outgoing edges is already predicted by
488 PREDICTOR for edge E predicted as TAKEN. */
491 edge_predicted_by_p (edge e
, enum br_predictor predictor
, bool taken
)
493 struct edge_prediction
*i
;
494 basic_block bb
= e
->src
;
495 edge_prediction
**preds
= bb_predictions
->get (bb
);
499 int probability
= predictor_info
[(int) predictor
].hitrate
;
502 probability
= REG_BR_PROB_BASE
- probability
;
504 for (i
= *preds
; i
; i
= i
->ep_next
)
505 if (i
->ep_predictor
== predictor
507 && i
->ep_probability
== probability
)
512 /* Same predicate as above, working on edges. */
514 edge_probability_reliable_p (const_edge e
)
516 return e
->probability
.probably_reliable_p ();
519 /* Same predicate as edge_probability_reliable_p, working on notes. */
521 br_prob_note_reliable_p (const_rtx note
)
523 gcc_assert (REG_NOTE_KIND (note
) == REG_BR_PROB
);
524 return profile_probability::from_reg_br_prob_note
525 (XINT (note
, 0)).probably_reliable_p ();
529 predict_insn (rtx_insn
*insn
, enum br_predictor predictor
, int probability
)
531 gcc_assert (any_condjump_p (insn
));
532 if (!flag_guess_branch_prob
)
535 add_reg_note (insn
, REG_BR_PRED
,
536 gen_rtx_CONCAT (VOIDmode
,
537 GEN_INT ((int) predictor
),
538 GEN_INT ((int) probability
)));
541 /* Predict insn by given predictor. */
544 predict_insn_def (rtx_insn
*insn
, enum br_predictor predictor
,
545 enum prediction taken
)
547 int probability
= predictor_info
[(int) predictor
].hitrate
;
548 gcc_assert (probability
!= PROB_UNINITIALIZED
);
551 probability
= REG_BR_PROB_BASE
- probability
;
553 predict_insn (insn
, predictor
, probability
);
556 /* Predict edge E with given probability if possible. */
559 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
562 last_insn
= BB_END (e
->src
);
564 /* We can store the branch prediction information only about
565 conditional jumps. */
566 if (!any_condjump_p (last_insn
))
569 /* We always store probability of branching. */
570 if (e
->flags
& EDGE_FALLTHRU
)
571 probability
= REG_BR_PROB_BASE
- probability
;
573 predict_insn (last_insn
, predictor
, probability
);
576 /* Predict edge E with the given PROBABILITY. */
578 gimple_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
580 if (e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
581 && EDGE_COUNT (e
->src
->succs
) > 1
582 && flag_guess_branch_prob
585 struct edge_prediction
*i
= XNEW (struct edge_prediction
);
586 edge_prediction
*&preds
= bb_predictions
->get_or_insert (e
->src
);
590 i
->ep_probability
= probability
;
591 i
->ep_predictor
= predictor
;
596 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
597 to the filter function. */
600 filter_predictions (edge_prediction
**preds
,
601 bool (*filter
) (edge_prediction
*, void *), void *data
)
608 struct edge_prediction
**prediction
= preds
;
609 struct edge_prediction
*next
;
613 if ((*filter
) (*prediction
, data
))
614 prediction
= &((*prediction
)->ep_next
);
617 next
= (*prediction
)->ep_next
;
625 /* Filter function predicate that returns true for a edge predicate P
626 if its edge is equal to DATA. */
629 equal_edge_p (edge_prediction
*p
, void *data
)
631 return p
->ep_edge
== (edge
)data
;
634 /* Remove all predictions on given basic block that are attached
637 remove_predictions_associated_with_edge (edge e
)
642 edge_prediction
**preds
= bb_predictions
->get (e
->src
);
643 filter_predictions (preds
, equal_edge_p
, e
);
646 /* Clears the list of predictions stored for BB. */
649 clear_bb_predictions (basic_block bb
)
651 edge_prediction
**preds
= bb_predictions
->get (bb
);
652 struct edge_prediction
*pred
, *next
;
657 for (pred
= *preds
; pred
; pred
= next
)
659 next
= pred
->ep_next
;
665 /* Return true when we can store prediction on insn INSN.
666 At the moment we represent predictions only on conditional
667 jumps, not at computed jump or other complicated cases. */
669 can_predict_insn_p (const rtx_insn
*insn
)
671 return (JUMP_P (insn
)
672 && any_condjump_p (insn
)
673 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
676 /* Predict edge E by given predictor if possible. */
679 predict_edge_def (edge e
, enum br_predictor predictor
,
680 enum prediction taken
)
682 int probability
= predictor_info
[(int) predictor
].hitrate
;
685 probability
= REG_BR_PROB_BASE
- probability
;
687 predict_edge (e
, predictor
, probability
);
690 /* Invert all branch predictions or probability notes in the INSN. This needs
691 to be done each time we invert the condition used by the jump. */
694 invert_br_probabilities (rtx insn
)
698 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
699 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
700 XINT (note
, 0) = profile_probability::from_reg_br_prob_note
701 (XINT (note
, 0)).invert ().to_reg_br_prob_note ();
702 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
703 XEXP (XEXP (note
, 0), 1)
704 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
707 /* Dump information about the branch prediction to the output file. */
710 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
711 basic_block bb
, enum predictor_reason reason
= REASON_NONE
,
721 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
722 if (! (e
->flags
& EDGE_FALLTHRU
))
725 char edge_info_str
[128];
727 sprintf (edge_info_str
, " of edge %d->%d", ep_edge
->src
->index
,
728 ep_edge
->dest
->index
);
730 edge_info_str
[0] = '\0';
732 fprintf (file
, " %s heuristics%s%s: %.1f%%",
733 predictor_info
[predictor
].name
,
734 edge_info_str
, reason_messages
[reason
],
735 probability
* 100.0 / REG_BR_PROB_BASE
);
737 if (bb
->count
.initialized_p ())
739 fprintf (file
, " exec ");
740 bb
->count
.dump (file
);
743 fprintf (file
, " hit ");
744 e
->count ().dump (file
);
745 fprintf (file
, " (%.1f%%)", e
->count ().to_gcov_type() * 100.0
746 / bb
->count
.to_gcov_type ());
750 fprintf (file
, "\n");
752 /* Print output that be easily read by analyze_brprob.py script. We are
753 interested only in counts that are read from GCDA files. */
754 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
755 && bb
->count
.precise_p ()
756 && reason
== REASON_NONE
)
758 gcc_assert (e
->count ().precise_p ());
759 fprintf (file
, ";;heuristics;%s;%" PRId64
";%" PRId64
";%.1f;\n",
760 predictor_info
[predictor
].name
,
761 bb
->count
.to_gcov_type (), e
->count ().to_gcov_type (),
762 probability
* 100.0 / REG_BR_PROB_BASE
);
766 /* Return true if STMT is known to be unlikely executed. */
769 unlikely_executed_stmt_p (gimple
*stmt
)
771 if (!is_gimple_call (stmt
))
773 /* NORETURN attribute alone is not strong enough: exit() may be quite
774 likely executed once during program run. */
775 if (gimple_call_fntype (stmt
)
776 && lookup_attribute ("cold",
777 TYPE_ATTRIBUTES (gimple_call_fntype (stmt
)))
778 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
)))
780 tree decl
= gimple_call_fndecl (stmt
);
783 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl
))
784 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
)))
787 cgraph_node
*n
= cgraph_node::get (decl
);
792 n
= n
->ultimate_alias_target (&avail
);
793 if (avail
< AVAIL_AVAILABLE
)
796 || n
->decl
== current_function_decl
)
798 return n
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
;
801 /* Return true if BB is unlikely executed. */
804 unlikely_executed_bb_p (basic_block bb
)
806 if (bb
->count
== profile_count::zero ())
808 if (bb
== ENTRY_BLOCK_PTR_FOR_FN (cfun
) || bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
810 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
811 !gsi_end_p (gsi
); gsi_next (&gsi
))
813 if (unlikely_executed_stmt_p (gsi_stmt (gsi
)))
815 if (stmt_can_terminate_bb_p (gsi_stmt (gsi
)))
821 /* We can not predict the probabilities of outgoing edges of bb. Set them
822 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
823 even probability for all edges not mentioned in the set. These edges
824 are given PROB_VERY_UNLIKELY probability. */
827 set_even_probabilities (basic_block bb
,
828 hash_set
<edge
> *unlikely_edges
= NULL
)
830 unsigned nedges
= 0, unlikely_count
= 0;
833 profile_probability all
= profile_probability::always ();
835 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
836 if (e
->probability
.initialized_p ())
837 all
-= e
->probability
;
838 else if (!unlikely_executed_edge_p (e
))
841 if (unlikely_edges
!= NULL
&& unlikely_edges
->contains (e
))
843 all
-= profile_probability::very_unlikely ();
848 /* Make the distribution even if all edges are unlikely. */
849 if (unlikely_count
== nedges
)
851 unlikely_edges
= NULL
;
855 unsigned c
= nedges
- unlikely_count
;
857 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
858 if (e
->probability
.initialized_p ())
860 else if (!unlikely_executed_edge_p (e
))
862 if (unlikely_edges
!= NULL
&& unlikely_edges
->contains (e
))
863 e
->probability
= profile_probability::very_unlikely ();
865 e
->probability
= all
.apply_scale (1, c
).guessed ();
868 e
->probability
= profile_probability::never ();
871 /* Add REG_BR_PROB note to JUMP with PROB. */
874 add_reg_br_prob_note (rtx_insn
*jump
, profile_probability prob
)
876 gcc_checking_assert (JUMP_P (jump
) && !find_reg_note (jump
, REG_BR_PROB
, 0));
877 add_int_reg_note (jump
, REG_BR_PROB
, prob
.to_reg_br_prob_note ());
880 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
881 note if not already present. Remove now useless REG_BR_PRED notes. */
884 combine_predictions_for_insn (rtx_insn
*insn
, basic_block bb
)
889 int best_probability
= PROB_EVEN
;
890 enum br_predictor best_predictor
= END_PREDICTORS
;
891 int combined_probability
= REG_BR_PROB_BASE
/ 2;
893 bool first_match
= false;
896 if (!can_predict_insn_p (insn
))
898 set_even_probabilities (bb
);
902 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
903 pnote
= ®_NOTES (insn
);
905 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
908 /* We implement "first match" heuristics and use probability guessed
909 by predictor with smallest index. */
910 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
911 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
913 enum br_predictor predictor
= ((enum br_predictor
)
914 INTVAL (XEXP (XEXP (note
, 0), 0)));
915 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
918 if (best_predictor
> predictor
919 && predictor_info
[predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
920 best_probability
= probability
, best_predictor
= predictor
;
922 d
= (combined_probability
* probability
923 + (REG_BR_PROB_BASE
- combined_probability
)
924 * (REG_BR_PROB_BASE
- probability
));
926 /* Use FP math to avoid overflows of 32bit integers. */
928 /* If one probability is 0% and one 100%, avoid division by zero. */
929 combined_probability
= REG_BR_PROB_BASE
/ 2;
931 combined_probability
= (((double) combined_probability
) * probability
932 * REG_BR_PROB_BASE
/ d
+ 0.5);
935 /* Decide which heuristic to use. In case we didn't match anything,
936 use no_prediction heuristic, in case we did match, use either
937 first match or Dempster-Shaffer theory depending on the flags. */
939 if (best_predictor
!= END_PREDICTORS
)
943 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
944 combined_probability
, bb
);
948 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
949 bb
, !first_match
? REASON_NONE
: REASON_IGNORED
);
951 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
952 bb
, first_match
? REASON_NONE
: REASON_IGNORED
);
956 combined_probability
= best_probability
;
957 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
);
961 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
963 enum br_predictor predictor
= ((enum br_predictor
)
964 INTVAL (XEXP (XEXP (*pnote
, 0), 0)));
965 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
967 dump_prediction (dump_file
, predictor
, probability
, bb
,
968 (!first_match
|| best_predictor
== predictor
)
969 ? REASON_NONE
: REASON_IGNORED
);
970 *pnote
= XEXP (*pnote
, 1);
973 pnote
= &XEXP (*pnote
, 1);
978 profile_probability p
979 = profile_probability::from_reg_br_prob_base (combined_probability
);
980 add_reg_br_prob_note (insn
, p
);
982 /* Save the prediction into CFG in case we are seeing non-degenerated
984 if (!single_succ_p (bb
))
986 BRANCH_EDGE (bb
)->probability
= p
;
987 FALLTHRU_EDGE (bb
)->probability
988 = BRANCH_EDGE (bb
)->probability
.invert ();
991 else if (!single_succ_p (bb
))
993 profile_probability prob
= profile_probability::from_reg_br_prob_note
994 (XINT (prob_note
, 0));
996 BRANCH_EDGE (bb
)->probability
= prob
;
997 FALLTHRU_EDGE (bb
)->probability
= prob
.invert ();
1000 single_succ_edge (bb
)->probability
= profile_probability::always ();
1003 /* Edge prediction hash traits. */
1005 struct predictor_hash
: pointer_hash
<edge_prediction
>
1008 static inline hashval_t
hash (const edge_prediction
*);
1009 static inline bool equal (const edge_prediction
*, const edge_prediction
*);
1012 /* Calculate hash value of an edge prediction P based on predictor and
1013 normalized probability. */
1016 predictor_hash::hash (const edge_prediction
*p
)
1018 inchash::hash hstate
;
1019 hstate
.add_int (p
->ep_predictor
);
1021 int prob
= p
->ep_probability
;
1022 if (prob
> REG_BR_PROB_BASE
/ 2)
1023 prob
= REG_BR_PROB_BASE
- prob
;
1025 hstate
.add_int (prob
);
1027 return hstate
.end ();
1030 /* Return true whether edge predictions P1 and P2 use the same predictor and
1031 have equal (or opposed probability). */
1034 predictor_hash::equal (const edge_prediction
*p1
, const edge_prediction
*p2
)
1036 return (p1
->ep_predictor
== p2
->ep_predictor
1037 && (p1
->ep_probability
== p2
->ep_probability
1038 || p1
->ep_probability
== REG_BR_PROB_BASE
- p2
->ep_probability
));
1041 struct predictor_hash_traits
: predictor_hash
,
1042 typed_noop_remove
<edge_prediction
*> {};
1044 /* Return true if edge prediction P is not in DATA hash set. */
1047 not_removed_prediction_p (edge_prediction
*p
, void *data
)
1049 hash_set
<edge_prediction
*> *remove
= (hash_set
<edge_prediction
*> *) data
;
1050 return !remove
->contains (p
);
1053 /* Prune predictions for a basic block BB. Currently we do following
1056 1) remove duplicate prediction that is guessed with the same probability
1057 (different than 1/2) to both edge
1058 2) remove duplicates for a prediction that belongs with the same probability
1064 prune_predictions_for_bb (basic_block bb
)
1066 edge_prediction
**preds
= bb_predictions
->get (bb
);
1070 hash_table
<predictor_hash_traits
> s (13);
1071 hash_set
<edge_prediction
*> remove
;
1073 /* Step 1: identify predictors that should be removed. */
1074 for (edge_prediction
*pred
= *preds
; pred
; pred
= pred
->ep_next
)
1076 edge_prediction
*existing
= s
.find (pred
);
1079 if (pred
->ep_edge
== existing
->ep_edge
1080 && pred
->ep_probability
== existing
->ep_probability
)
1082 /* Remove a duplicate predictor. */
1083 dump_prediction (dump_file
, pred
->ep_predictor
,
1084 pred
->ep_probability
, bb
,
1085 REASON_SINGLE_EDGE_DUPLICATE
, pred
->ep_edge
);
1089 else if (pred
->ep_edge
!= existing
->ep_edge
1090 && pred
->ep_probability
== existing
->ep_probability
1091 && pred
->ep_probability
!= REG_BR_PROB_BASE
/ 2)
1093 /* Remove both predictors as they predict the same
1095 dump_prediction (dump_file
, existing
->ep_predictor
,
1096 pred
->ep_probability
, bb
,
1097 REASON_EDGE_PAIR_DUPLICATE
,
1099 dump_prediction (dump_file
, pred
->ep_predictor
,
1100 pred
->ep_probability
, bb
,
1101 REASON_EDGE_PAIR_DUPLICATE
,
1104 remove
.add (existing
);
1109 edge_prediction
**slot2
= s
.find_slot (pred
, INSERT
);
1113 /* Step 2: Remove predictors. */
1114 filter_predictions (preds
, not_removed_prediction_p
, &remove
);
1118 /* Combine predictions into single probability and store them into CFG.
1119 Remove now useless prediction entries.
1120 If DRY_RUN is set, only produce dumps and do not modify profile. */
1123 combine_predictions_for_bb (basic_block bb
, bool dry_run
)
1125 int best_probability
= PROB_EVEN
;
1126 enum br_predictor best_predictor
= END_PREDICTORS
;
1127 int combined_probability
= REG_BR_PROB_BASE
/ 2;
1129 bool first_match
= false;
1131 struct edge_prediction
*pred
;
1133 edge e
, first
= NULL
, second
= NULL
;
1138 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1140 if (!unlikely_executed_edge_p (e
))
1143 if (first
&& !second
)
1148 else if (!e
->probability
.initialized_p ())
1149 e
->probability
= profile_probability::never ();
1150 if (!e
->probability
.initialized_p ())
1152 else if (e
->probability
== profile_probability::never ())
1156 /* When there is no successor or only one choice, prediction is easy.
1158 When we have a basic block with more than 2 successors, the situation
1159 is more complicated as DS theory cannot be used literally.
1160 More precisely, let's assume we predicted edge e1 with probability p1,
1161 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1162 need to find probability of e.g. m1({b2}), which we don't know.
1163 The only approximation is to equally distribute 1-p1 to all edges
1166 According to numbers we've got from SPEC2006 benchark, there's only
1167 one interesting reliable predictor (noreturn call), which can be
1168 handled with a bit easier approach. */
1171 hash_set
<edge
> unlikely_edges (4);
1173 /* Identify all edges that have a probability close to very unlikely.
1174 Doing the approach for very unlikely doesn't worth for doing as
1175 there's no such probability in SPEC2006 benchmark. */
1176 edge_prediction
**preds
= bb_predictions
->get (bb
);
1178 for (pred
= *preds
; pred
; pred
= pred
->ep_next
)
1179 if (pred
->ep_probability
<= PROB_VERY_UNLIKELY
)
1180 unlikely_edges
.add (pred
->ep_edge
);
1183 set_even_probabilities (bb
, &unlikely_edges
);
1184 clear_bb_predictions (bb
);
1187 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
1188 if (unlikely_edges
.elements () == 0)
1190 "%i edges in bb %i predicted to even probabilities\n",
1195 "%i edges in bb %i predicted with some unlikely edges\n",
1197 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1198 if (!unlikely_executed_edge_p (e
))
1199 dump_prediction (dump_file
, PRED_COMBINED
,
1200 e
->probability
.to_reg_br_prob_base (), bb
, REASON_NONE
, e
);
1207 fprintf (dump_file
, "Predictions for bb %i\n", bb
->index
);
1209 prune_predictions_for_bb (bb
);
1211 edge_prediction
**preds
= bb_predictions
->get (bb
);
1215 /* We implement "first match" heuristics and use probability guessed
1216 by predictor with smallest index. */
1217 for (pred
= *preds
; pred
; pred
= pred
->ep_next
)
1219 enum br_predictor predictor
= pred
->ep_predictor
;
1220 int probability
= pred
->ep_probability
;
1222 if (pred
->ep_edge
!= first
)
1223 probability
= REG_BR_PROB_BASE
- probability
;
1226 /* First match heuristics would be widly confused if we predicted
1228 if (best_predictor
> predictor
1229 && predictor_info
[predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
1231 struct edge_prediction
*pred2
;
1232 int prob
= probability
;
1234 for (pred2
= (struct edge_prediction
*) *preds
;
1235 pred2
; pred2
= pred2
->ep_next
)
1236 if (pred2
!= pred
&& pred2
->ep_predictor
== pred
->ep_predictor
)
1238 int probability2
= pred2
->ep_probability
;
1240 if (pred2
->ep_edge
!= first
)
1241 probability2
= REG_BR_PROB_BASE
- probability2
;
1243 if ((probability
< REG_BR_PROB_BASE
/ 2) !=
1244 (probability2
< REG_BR_PROB_BASE
/ 2))
1247 /* If the same predictor later gave better result, go for it! */
1248 if ((probability
>= REG_BR_PROB_BASE
/ 2 && (probability2
> probability
))
1249 || (probability
<= REG_BR_PROB_BASE
/ 2 && (probability2
< probability
)))
1250 prob
= probability2
;
1253 best_probability
= prob
, best_predictor
= predictor
;
1256 d
= (combined_probability
* probability
1257 + (REG_BR_PROB_BASE
- combined_probability
)
1258 * (REG_BR_PROB_BASE
- probability
));
1260 /* Use FP math to avoid overflows of 32bit integers. */
1262 /* If one probability is 0% and one 100%, avoid division by zero. */
1263 combined_probability
= REG_BR_PROB_BASE
/ 2;
1265 combined_probability
= (((double) combined_probability
)
1267 * REG_BR_PROB_BASE
/ d
+ 0.5);
1271 /* Decide which heuristic to use. In case we didn't match anything,
1272 use no_prediction heuristic, in case we did match, use either
1273 first match or Dempster-Shaffer theory depending on the flags. */
1275 if (best_predictor
!= END_PREDICTORS
)
1279 dump_prediction (dump_file
, PRED_NO_PREDICTION
, combined_probability
, bb
);
1283 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
, bb
,
1284 !first_match
? REASON_NONE
: REASON_IGNORED
);
1286 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
, bb
,
1287 first_match
? REASON_NONE
: REASON_IGNORED
);
1291 combined_probability
= best_probability
;
1292 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
);
1296 for (pred
= (struct edge_prediction
*) *preds
; pred
; pred
= pred
->ep_next
)
1298 enum br_predictor predictor
= pred
->ep_predictor
;
1299 int probability
= pred
->ep_probability
;
1301 dump_prediction (dump_file
, predictor
, probability
, bb
,
1302 (!first_match
|| best_predictor
== predictor
)
1303 ? REASON_NONE
: REASON_IGNORED
, pred
->ep_edge
);
1306 clear_bb_predictions (bb
);
1309 /* If we have only one successor which is unknown, we can compute missing
1313 profile_probability prob
= profile_probability::always ();
1314 edge missing
= NULL
;
1316 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1317 if (e
->probability
.initialized_p ())
1318 prob
-= e
->probability
;
1319 else if (missing
== NULL
)
1323 missing
->probability
= prob
;
1325 /* If nothing is unknown, we have nothing to update. */
1326 else if (!nunknown
&& nzero
!= (int)EDGE_COUNT (bb
->succs
))
1331 = profile_probability::from_reg_br_prob_base (combined_probability
);
1332 second
->probability
= first
->probability
.invert ();
1336 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1337 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1339 T1 and T2 should be one of the following cases:
1340 1. T1 is SSA_NAME, T2 is NULL
1341 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1342 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1345 strips_small_constant (tree t1
, tree t2
)
1352 else if (TREE_CODE (t1
) == SSA_NAME
)
1354 else if (tree_fits_shwi_p (t1
))
1355 value
= tree_to_shwi (t1
);
1361 else if (tree_fits_shwi_p (t2
))
1362 value
= tree_to_shwi (t2
);
1363 else if (TREE_CODE (t2
) == SSA_NAME
)
1371 if (value
<= 4 && value
>= -4)
1377 /* Return the SSA_NAME in T or T's operands.
1378 Return NULL if SSA_NAME cannot be found. */
1381 get_base_value (tree t
)
1383 if (TREE_CODE (t
) == SSA_NAME
)
1386 if (!BINARY_CLASS_P (t
))
1389 switch (TREE_OPERAND_LENGTH (t
))
1392 return strips_small_constant (TREE_OPERAND (t
, 0), NULL
);
1394 return strips_small_constant (TREE_OPERAND (t
, 0),
1395 TREE_OPERAND (t
, 1));
1401 /* Check the compare STMT in LOOP. If it compares an induction
1402 variable to a loop invariant, return true, and save
1403 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1404 Otherwise return false and set LOOP_INVAIANT to NULL. */
1407 is_comparison_with_loop_invariant_p (gcond
*stmt
, struct loop
*loop
,
1408 tree
*loop_invariant
,
1409 enum tree_code
*compare_code
,
1413 tree op0
, op1
, bound
, base
;
1415 enum tree_code code
;
1418 code
= gimple_cond_code (stmt
);
1419 *loop_invariant
= NULL
;
1435 op0
= gimple_cond_lhs (stmt
);
1436 op1
= gimple_cond_rhs (stmt
);
1438 if ((TREE_CODE (op0
) != SSA_NAME
&& TREE_CODE (op0
) != INTEGER_CST
)
1439 || (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op1
) != INTEGER_CST
))
1441 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op0
, &iv0
, true))
1443 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op1
, &iv1
, true))
1445 if (TREE_CODE (iv0
.step
) != INTEGER_CST
1446 || TREE_CODE (iv1
.step
) != INTEGER_CST
)
1448 if ((integer_zerop (iv0
.step
) && integer_zerop (iv1
.step
))
1449 || (!integer_zerop (iv0
.step
) && !integer_zerop (iv1
.step
)))
1452 if (integer_zerop (iv0
.step
))
1454 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
1455 code
= invert_tree_comparison (code
, false);
1458 if (tree_fits_shwi_p (iv1
.step
))
1467 if (tree_fits_shwi_p (iv0
.step
))
1473 if (TREE_CODE (bound
) != INTEGER_CST
)
1474 bound
= get_base_value (bound
);
1477 if (TREE_CODE (base
) != INTEGER_CST
)
1478 base
= get_base_value (base
);
1482 *loop_invariant
= bound
;
1483 *compare_code
= code
;
1485 *loop_iv_base
= base
;
1489 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1492 expr_coherent_p (tree t1
, tree t2
)
1495 tree ssa_name_1
= NULL
;
1496 tree ssa_name_2
= NULL
;
1498 gcc_assert (TREE_CODE (t1
) == SSA_NAME
|| TREE_CODE (t1
) == INTEGER_CST
);
1499 gcc_assert (TREE_CODE (t2
) == SSA_NAME
|| TREE_CODE (t2
) == INTEGER_CST
);
1504 if (TREE_CODE (t1
) == INTEGER_CST
&& TREE_CODE (t2
) == INTEGER_CST
)
1506 if (TREE_CODE (t1
) == INTEGER_CST
|| TREE_CODE (t2
) == INTEGER_CST
)
1509 /* Check to see if t1 is expressed/defined with t2. */
1510 stmt
= SSA_NAME_DEF_STMT (t1
);
1511 gcc_assert (stmt
!= NULL
);
1512 if (is_gimple_assign (stmt
))
1514 ssa_name_1
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1515 if (ssa_name_1
&& ssa_name_1
== t2
)
1519 /* Check to see if t2 is expressed/defined with t1. */
1520 stmt
= SSA_NAME_DEF_STMT (t2
);
1521 gcc_assert (stmt
!= NULL
);
1522 if (is_gimple_assign (stmt
))
1524 ssa_name_2
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
1525 if (ssa_name_2
&& ssa_name_2
== t1
)
1529 /* Compare if t1 and t2's def_stmts are identical. */
1530 if (ssa_name_2
!= NULL
&& ssa_name_1
== ssa_name_2
)
1536 /* Return true if E is predicted by one of loop heuristics. */
1539 predicted_by_loop_heuristics_p (basic_block bb
)
1541 struct edge_prediction
*i
;
1542 edge_prediction
**preds
= bb_predictions
->get (bb
);
1547 for (i
= *preds
; i
; i
= i
->ep_next
)
1548 if (i
->ep_predictor
== PRED_LOOP_ITERATIONS_GUESSED
1549 || i
->ep_predictor
== PRED_LOOP_ITERATIONS_MAX
1550 || i
->ep_predictor
== PRED_LOOP_ITERATIONS
1551 || i
->ep_predictor
== PRED_LOOP_EXIT
1552 || i
->ep_predictor
== PRED_LOOP_EXIT_WITH_RECURSION
1553 || i
->ep_predictor
== PRED_LOOP_EXTRA_EXIT
)
1558 /* Predict branch probability of BB when BB contains a branch that compares
1559 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1560 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1563 for (int i = 0; i < bound; i++) {
1570 In this loop, we will predict the branch inside the loop to be taken. */
1573 predict_iv_comparison (struct loop
*loop
, basic_block bb
,
1574 tree loop_bound_var
,
1575 tree loop_iv_base_var
,
1576 enum tree_code loop_bound_code
,
1577 int loop_bound_step
)
1580 tree compare_var
, compare_base
;
1581 enum tree_code compare_code
;
1582 tree compare_step_var
;
1586 if (predicted_by_loop_heuristics_p (bb
))
1589 stmt
= last_stmt (bb
);
1590 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
1592 if (!is_comparison_with_loop_invariant_p (as_a
<gcond
*> (stmt
),
1599 /* Find the taken edge. */
1600 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
1601 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
1604 /* When comparing an IV to a loop invariant, NE is more likely to be
1605 taken while EQ is more likely to be not-taken. */
1606 if (compare_code
== NE_EXPR
)
1608 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1611 else if (compare_code
== EQ_EXPR
)
1613 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1617 if (!expr_coherent_p (loop_iv_base_var
, compare_base
))
1620 /* If loop bound, base and compare bound are all constants, we can
1621 calculate the probability directly. */
1622 if (tree_fits_shwi_p (loop_bound_var
)
1623 && tree_fits_shwi_p (compare_var
)
1624 && tree_fits_shwi_p (compare_base
))
1627 bool overflow
, overall_overflow
= false;
1628 widest_int compare_count
, tem
;
1630 /* (loop_bound - base) / compare_step */
1631 tem
= wi::sub (wi::to_widest (loop_bound_var
),
1632 wi::to_widest (compare_base
), SIGNED
, &overflow
);
1633 overall_overflow
|= overflow
;
1634 widest_int loop_count
= wi::div_trunc (tem
,
1635 wi::to_widest (compare_step_var
),
1637 overall_overflow
|= overflow
;
1639 if (!wi::neg_p (wi::to_widest (compare_step_var
))
1640 ^ (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1642 /* (loop_bound - compare_bound) / compare_step */
1643 tem
= wi::sub (wi::to_widest (loop_bound_var
),
1644 wi::to_widest (compare_var
), SIGNED
, &overflow
);
1645 overall_overflow
|= overflow
;
1646 compare_count
= wi::div_trunc (tem
, wi::to_widest (compare_step_var
),
1648 overall_overflow
|= overflow
;
1652 /* (compare_bound - base) / compare_step */
1653 tem
= wi::sub (wi::to_widest (compare_var
),
1654 wi::to_widest (compare_base
), SIGNED
, &overflow
);
1655 overall_overflow
|= overflow
;
1656 compare_count
= wi::div_trunc (tem
, wi::to_widest (compare_step_var
),
1658 overall_overflow
|= overflow
;
1660 if (compare_code
== LE_EXPR
|| compare_code
== GE_EXPR
)
1662 if (loop_bound_code
== LE_EXPR
|| loop_bound_code
== GE_EXPR
)
1664 if (wi::neg_p (compare_count
))
1666 if (wi::neg_p (loop_count
))
1668 if (loop_count
== 0)
1670 else if (wi::cmps (compare_count
, loop_count
) == 1)
1671 probability
= REG_BR_PROB_BASE
;
1674 tem
= compare_count
* REG_BR_PROB_BASE
;
1675 tem
= wi::udiv_trunc (tem
, loop_count
);
1676 probability
= tem
.to_uhwi ();
1679 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1680 if (!overall_overflow
)
1681 predict_edge (then_edge
, PRED_LOOP_IV_COMPARE
, probability
);
1686 if (expr_coherent_p (loop_bound_var
, compare_var
))
1688 if ((loop_bound_code
== LT_EXPR
|| loop_bound_code
== LE_EXPR
)
1689 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1690 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1691 else if ((loop_bound_code
== GT_EXPR
|| loop_bound_code
== GE_EXPR
)
1692 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1693 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1694 else if (loop_bound_code
== NE_EXPR
)
1696 /* If the loop backedge condition is "(i != bound)", we do
1697 the comparison based on the step of IV:
1698 * step < 0 : backedge condition is like (i > bound)
1699 * step > 0 : backedge condition is like (i < bound) */
1700 gcc_assert (loop_bound_step
!= 0);
1701 if (loop_bound_step
> 0
1702 && (compare_code
== LT_EXPR
1703 || compare_code
== LE_EXPR
))
1704 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1705 else if (loop_bound_step
< 0
1706 && (compare_code
== GT_EXPR
1707 || compare_code
== GE_EXPR
))
1708 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1710 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1713 /* The branch is predicted not-taken if loop_bound_code is
1714 opposite with compare_code. */
1715 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1717 else if (expr_coherent_p (loop_iv_base_var
, compare_var
))
1720 for (i = s; i < h; i++)
1722 The branch should be predicted taken. */
1723 if (loop_bound_step
> 0
1724 && (compare_code
== GT_EXPR
|| compare_code
== GE_EXPR
))
1725 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1726 else if (loop_bound_step
< 0
1727 && (compare_code
== LT_EXPR
|| compare_code
== LE_EXPR
))
1728 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, TAKEN
);
1730 predict_edge_def (then_edge
, PRED_LOOP_IV_COMPARE_GUESS
, NOT_TAKEN
);
1734 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1735 exits are resulted from short-circuit conditions that will generate an
1738 if (foo() || global > 10)
1741 This will be translated into:
1746 if foo() goto BB6 else goto BB5
1748 if global > 10 goto BB6 else goto BB7
1752 iftmp = (PHI 0(BB5), 1(BB6))
1753 if iftmp == 1 goto BB8 else goto BB3
1755 outside of the loop...
1757 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1758 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1759 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1760 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1763 predict_extra_loop_exits (edge exit_edge
)
1766 bool check_value_one
;
1767 gimple
*lhs_def_stmt
;
1769 tree cmp_rhs
, cmp_lhs
;
1773 last
= last_stmt (exit_edge
->src
);
1776 cmp_stmt
= dyn_cast
<gcond
*> (last
);
1780 cmp_rhs
= gimple_cond_rhs (cmp_stmt
);
1781 cmp_lhs
= gimple_cond_lhs (cmp_stmt
);
1782 if (!TREE_CONSTANT (cmp_rhs
)
1783 || !(integer_zerop (cmp_rhs
) || integer_onep (cmp_rhs
)))
1785 if (TREE_CODE (cmp_lhs
) != SSA_NAME
)
1788 /* If check_value_one is true, only the phi_args with value '1' will lead
1789 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1791 check_value_one
= (((integer_onep (cmp_rhs
))
1792 ^ (gimple_cond_code (cmp_stmt
) == EQ_EXPR
))
1793 ^ ((exit_edge
->flags
& EDGE_TRUE_VALUE
) != 0));
1795 lhs_def_stmt
= SSA_NAME_DEF_STMT (cmp_lhs
);
1799 phi_stmt
= dyn_cast
<gphi
*> (lhs_def_stmt
);
1803 for (i
= 0; i
< gimple_phi_num_args (phi_stmt
); i
++)
1807 tree val
= gimple_phi_arg_def (phi_stmt
, i
);
1808 edge e
= gimple_phi_arg_edge (phi_stmt
, i
);
1810 if (!TREE_CONSTANT (val
) || !(integer_zerop (val
) || integer_onep (val
)))
1812 if ((check_value_one
^ integer_onep (val
)) == 1)
1814 if (EDGE_COUNT (e
->src
->succs
) != 1)
1816 predict_paths_leading_to_edge (e
, PRED_LOOP_EXTRA_EXIT
, NOT_TAKEN
);
1820 FOR_EACH_EDGE (e1
, ei
, e
->src
->preds
)
1821 predict_paths_leading_to_edge (e1
, PRED_LOOP_EXTRA_EXIT
, NOT_TAKEN
);
1826 /* Predict edge probabilities by exploiting loop structure. */
1829 predict_loops (void)
1833 hash_set
<struct loop
*> with_recursion(10);
1835 FOR_EACH_BB_FN (bb
, cfun
)
1837 gimple_stmt_iterator gsi
;
1840 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1841 if (is_gimple_call (gsi_stmt (gsi
))
1842 && (decl
= gimple_call_fndecl (gsi_stmt (gsi
))) != NULL
1843 && recursive_call_p (current_function_decl
, decl
))
1845 loop
= bb
->loop_father
;
1846 while (loop
&& !with_recursion
.add (loop
))
1847 loop
= loop_outer (loop
);
1851 /* Try to predict out blocks in a loop that are not part of a
1853 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
1855 basic_block bb
, *bbs
;
1856 unsigned j
, n_exits
= 0;
1858 struct tree_niter_desc niter_desc
;
1860 struct nb_iter_bound
*nb_iter
;
1861 enum tree_code loop_bound_code
= ERROR_MARK
;
1862 tree loop_bound_step
= NULL
;
1863 tree loop_bound_var
= NULL
;
1864 tree loop_iv_base
= NULL
;
1866 bool recursion
= with_recursion
.contains (loop
);
1868 exits
= get_loop_exit_edges (loop
);
1869 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1870 if (!unlikely_executed_edge_p (ex
) && !(ex
->flags
& EDGE_ABNORMAL_CALL
))
1878 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1879 fprintf (dump_file
, "Predicting loop %i%s with %i exits.\n",
1880 loop
->num
, recursion
? " (with recursion)":"", n_exits
);
1881 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1882 && max_loop_iterations_int (loop
) >= 0)
1885 "Loop %d iterates at most %i times.\n", loop
->num
,
1886 (int)max_loop_iterations_int (loop
));
1888 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1889 && likely_max_loop_iterations_int (loop
) >= 0)
1891 fprintf (dump_file
, "Loop %d likely iterates at most %i times.\n",
1892 loop
->num
, (int)likely_max_loop_iterations_int (loop
));
1895 FOR_EACH_VEC_ELT (exits
, j
, ex
)
1898 HOST_WIDE_INT nitercst
;
1899 int max
= PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS
);
1901 enum br_predictor predictor
;
1904 if (unlikely_executed_edge_p (ex
)
1905 || (ex
->flags
& EDGE_ABNORMAL_CALL
))
1907 /* Loop heuristics do not expect exit conditional to be inside
1908 inner loop. We predict from innermost to outermost loop. */
1909 if (predicted_by_loop_heuristics_p (ex
->src
))
1911 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1912 fprintf (dump_file
, "Skipping exit %i->%i because "
1913 "it is already predicted.\n",
1914 ex
->src
->index
, ex
->dest
->index
);
1917 predict_extra_loop_exits (ex
);
1919 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
1920 niter
= niter_desc
.niter
;
1921 if (!niter
|| TREE_CODE (niter_desc
.niter
) != INTEGER_CST
)
1922 niter
= loop_niter_by_eval (loop
, ex
);
1923 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1924 && TREE_CODE (niter
) == INTEGER_CST
)
1926 fprintf (dump_file
, "Exit %i->%i %d iterates ",
1927 ex
->src
->index
, ex
->dest
->index
,
1929 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1930 fprintf (dump_file
, " times.\n");
1933 if (TREE_CODE (niter
) == INTEGER_CST
)
1935 if (tree_fits_uhwi_p (niter
)
1937 && compare_tree_int (niter
, max
- 1) == -1)
1938 nitercst
= tree_to_uhwi (niter
) + 1;
1941 predictor
= PRED_LOOP_ITERATIONS
;
1943 /* If we have just one exit and we can derive some information about
1944 the number of iterations of the loop from the statements inside
1945 the loop, use it to predict this exit. */
1946 else if (n_exits
== 1
1947 && estimated_stmt_executions (loop
, &nit
))
1949 if (wi::gtu_p (nit
, max
))
1952 nitercst
= nit
.to_shwi ();
1953 predictor
= PRED_LOOP_ITERATIONS_GUESSED
;
1955 /* If we have likely upper bound, trust it for very small iteration
1956 counts. Such loops would otherwise get mispredicted by standard
1957 LOOP_EXIT heuristics. */
1958 else if (n_exits
== 1
1959 && likely_max_stmt_executions (loop
, &nit
)
1961 RDIV (REG_BR_PROB_BASE
,
1965 ? PRED_LOOP_EXIT_WITH_RECURSION
1966 : PRED_LOOP_EXIT
].hitrate
)))
1968 nitercst
= nit
.to_shwi ();
1969 predictor
= PRED_LOOP_ITERATIONS_MAX
;
1973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1974 fprintf (dump_file
, "Nothing known about exit %i->%i.\n",
1975 ex
->src
->index
, ex
->dest
->index
);
1979 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1980 fprintf (dump_file
, "Recording prediction to %i iterations by %s.\n",
1981 (int)nitercst
, predictor_info
[predictor
].name
);
1982 /* If the prediction for number of iterations is zero, do not
1983 predict the exit edges. */
1987 probability
= RDIV (REG_BR_PROB_BASE
, nitercst
);
1988 predict_edge (ex
, predictor
, probability
);
1992 /* Find information about loop bound variables. */
1993 for (nb_iter
= loop
->bounds
; nb_iter
;
1994 nb_iter
= nb_iter
->next
)
1996 && gimple_code (nb_iter
->stmt
) == GIMPLE_COND
)
1998 stmt
= as_a
<gcond
*> (nb_iter
->stmt
);
2001 if (!stmt
&& last_stmt (loop
->header
)
2002 && gimple_code (last_stmt (loop
->header
)) == GIMPLE_COND
)
2003 stmt
= as_a
<gcond
*> (last_stmt (loop
->header
));
2005 is_comparison_with_loop_invariant_p (stmt
, loop
,
2011 bbs
= get_loop_body (loop
);
2013 for (j
= 0; j
< loop
->num_nodes
; j
++)
2020 /* Bypass loop heuristics on continue statement. These
2021 statements construct loops via "non-loop" constructs
2022 in the source language and are better to be handled
2024 if (predicted_by_p (bb
, PRED_CONTINUE
))
2026 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2027 fprintf (dump_file
, "BB %i predicted by continue.\n",
2032 /* If we already used more reliable loop exit predictors, do not
2033 bother with PRED_LOOP_EXIT. */
2034 if (!predicted_by_loop_heuristics_p (bb
))
2036 /* For loop with many exits we don't want to predict all exits
2037 with the pretty large probability, because if all exits are
2038 considered in row, the loop would be predicted to iterate
2039 almost never. The code to divide probability by number of
2040 exits is very rough. It should compute the number of exits
2041 taken in each patch through function (not the overall number
2042 of exits that might be a lot higher for loops with wide switch
2043 statements in them) and compute n-th square root.
2045 We limit the minimal probability by 2% to avoid
2046 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
2047 as this was causing regression in perl benchmark containing such
2050 int probability
= ((REG_BR_PROB_BASE
2053 ? PRED_LOOP_EXIT_WITH_RECURSION
2054 : PRED_LOOP_EXIT
].hitrate
)
2056 if (probability
< HITRATE (2))
2057 probability
= HITRATE (2);
2058 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2059 if (e
->dest
->index
< NUM_FIXED_BLOCKS
2060 || !flow_bb_inside_loop_p (loop
, e
->dest
))
2062 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2064 "Predicting exit %i->%i with prob %i.\n",
2065 e
->src
->index
, e
->dest
->index
, probability
);
2067 recursion
? PRED_LOOP_EXIT_WITH_RECURSION
2068 : PRED_LOOP_EXIT
, probability
);
2072 predict_iv_comparison (loop
, bb
, loop_bound_var
, loop_iv_base
,
2074 tree_to_shwi (loop_bound_step
));
2077 /* In the following code
2082 guess that cond is unlikely. */
2083 if (loop_outer (loop
)->num
)
2085 basic_block bb
= NULL
;
2086 edge preheader_edge
= loop_preheader_edge (loop
);
2088 if (single_pred_p (preheader_edge
->src
)
2089 && single_succ_p (preheader_edge
->src
))
2090 preheader_edge
= single_pred_edge (preheader_edge
->src
);
2092 gimple
*stmt
= last_stmt (preheader_edge
->src
);
2093 /* Pattern match fortran loop preheader:
2094 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
2095 _17 = (logical(kind=4)) _16;
2101 Loop guard branch prediction says nothing about duplicated loop
2102 headers produced by fortran frontend and in this case we want
2103 to predict paths leading to this preheader. */
2106 && gimple_code (stmt
) == GIMPLE_COND
2107 && gimple_cond_code (stmt
) == NE_EXPR
2108 && TREE_CODE (gimple_cond_lhs (stmt
)) == SSA_NAME
2109 && integer_zerop (gimple_cond_rhs (stmt
)))
2111 gimple
*call_stmt
= SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt
));
2112 if (gimple_code (call_stmt
) == GIMPLE_ASSIGN
2113 && gimple_expr_code (call_stmt
) == NOP_EXPR
2114 && TREE_CODE (gimple_assign_rhs1 (call_stmt
)) == SSA_NAME
)
2115 call_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt
));
2116 if (gimple_call_internal_p (call_stmt
, IFN_BUILTIN_EXPECT
)
2117 && TREE_CODE (gimple_call_arg (call_stmt
, 2)) == INTEGER_CST
2118 && tree_fits_uhwi_p (gimple_call_arg (call_stmt
, 2))
2119 && tree_to_uhwi (gimple_call_arg (call_stmt
, 2))
2120 == PRED_FORTRAN_LOOP_PREHEADER
)
2121 bb
= preheader_edge
->src
;
2125 if (!dominated_by_p (CDI_DOMINATORS
,
2126 loop_outer (loop
)->latch
, loop
->header
))
2127 predict_paths_leading_to_edge (loop_preheader_edge (loop
),
2129 ? PRED_LOOP_GUARD_WITH_RECURSION
2136 if (!dominated_by_p (CDI_DOMINATORS
,
2137 loop_outer (loop
)->latch
, bb
))
2138 predict_paths_leading_to (bb
,
2140 ? PRED_LOOP_GUARD_WITH_RECURSION
2147 /* Free basic blocks from get_loop_body. */
2152 /* Attempt to predict probabilities of BB outgoing edges using local
2155 bb_estimate_probability_locally (basic_block bb
)
2157 rtx_insn
*last_insn
= BB_END (bb
);
2160 if (! can_predict_insn_p (last_insn
))
2162 cond
= get_condition (last_insn
, NULL
, false, false);
2166 /* Try "pointer heuristic."
2167 A comparison ptr == 0 is predicted as false.
2168 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2169 if (COMPARISON_P (cond
)
2170 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
2171 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
2173 if (GET_CODE (cond
) == EQ
)
2174 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
2175 else if (GET_CODE (cond
) == NE
)
2176 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
2180 /* Try "opcode heuristic."
2181 EQ tests are usually false and NE tests are usually true. Also,
2182 most quantities are positive, so we can make the appropriate guesses
2183 about signed comparisons against zero. */
2184 switch (GET_CODE (cond
))
2187 /* Unconditional branch. */
2188 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
2189 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
2194 /* Floating point comparisons appears to behave in a very
2195 unpredictable way because of special role of = tests in
2197 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
2199 /* Comparisons with 0 are often used for booleans and there is
2200 nothing useful to predict about them. */
2201 else if (XEXP (cond
, 1) == const0_rtx
2202 || XEXP (cond
, 0) == const0_rtx
)
2205 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
2210 /* Floating point comparisons appears to behave in a very
2211 unpredictable way because of special role of = tests in
2213 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
2215 /* Comparisons with 0 are often used for booleans and there is
2216 nothing useful to predict about them. */
2217 else if (XEXP (cond
, 1) == const0_rtx
2218 || XEXP (cond
, 0) == const0_rtx
)
2221 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
2225 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
2229 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
2234 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
2235 || XEXP (cond
, 1) == constm1_rtx
)
2236 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
2241 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
2242 || XEXP (cond
, 1) == constm1_rtx
)
2243 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
2251 /* Set edge->probability for each successor edge of BB. */
2253 guess_outgoing_edge_probabilities (basic_block bb
)
2255 bb_estimate_probability_locally (bb
);
2256 combine_predictions_for_insn (BB_END (bb
), bb
);
2259 static tree
expr_expected_value (tree
, bitmap
, enum br_predictor
*predictor
);
2261 /* Helper function for expr_expected_value. */
2264 expr_expected_value_1 (tree type
, tree op0
, enum tree_code code
,
2265 tree op1
, bitmap visited
, enum br_predictor
*predictor
)
2270 *predictor
= PRED_UNCONDITIONAL
;
2272 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
2274 if (TREE_CONSTANT (op0
))
2277 if (code
== IMAGPART_EXPR
)
2279 if (TREE_CODE (TREE_OPERAND (op0
, 0)) == SSA_NAME
)
2281 def
= SSA_NAME_DEF_STMT (TREE_OPERAND (op0
, 0));
2282 if (is_gimple_call (def
)
2283 && gimple_call_internal_p (def
)
2284 && (gimple_call_internal_fn (def
)
2285 == IFN_ATOMIC_COMPARE_EXCHANGE
))
2287 /* Assume that any given atomic operation has low contention,
2288 and thus the compare-and-swap operation succeeds. */
2290 *predictor
= PRED_COMPARE_AND_SWAP
;
2291 return build_one_cst (TREE_TYPE (op0
));
2296 if (code
!= SSA_NAME
)
2299 def
= SSA_NAME_DEF_STMT (op0
);
2301 /* If we were already here, break the infinite cycle. */
2302 if (!bitmap_set_bit (visited
, SSA_NAME_VERSION (op0
)))
2305 if (gimple_code (def
) == GIMPLE_PHI
)
2307 /* All the arguments of the PHI node must have the same constant
2309 int i
, n
= gimple_phi_num_args (def
);
2310 tree val
= NULL
, new_val
;
2312 for (i
= 0; i
< n
; i
++)
2314 tree arg
= PHI_ARG_DEF (def
, i
);
2315 enum br_predictor predictor2
;
2317 /* If this PHI has itself as an argument, we cannot
2318 determine the string length of this argument. However,
2319 if we can find an expected constant value for the other
2320 PHI args then we can still be sure that this is
2321 likely a constant. So be optimistic and just
2322 continue with the next argument. */
2323 if (arg
== PHI_RESULT (def
))
2326 new_val
= expr_expected_value (arg
, visited
, &predictor2
);
2328 /* It is difficult to combine value predictors. Simply assume
2329 that later predictor is weaker and take its prediction. */
2330 if (predictor
&& *predictor
< predictor2
)
2331 *predictor
= predictor2
;
2336 else if (!operand_equal_p (val
, new_val
, false))
2341 if (is_gimple_assign (def
))
2343 if (gimple_assign_lhs (def
) != op0
)
2346 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def
)),
2347 gimple_assign_rhs1 (def
),
2348 gimple_assign_rhs_code (def
),
2349 gimple_assign_rhs2 (def
),
2350 visited
, predictor
);
2353 if (is_gimple_call (def
))
2355 tree decl
= gimple_call_fndecl (def
);
2358 if (gimple_call_internal_p (def
)
2359 && gimple_call_internal_fn (def
) == IFN_BUILTIN_EXPECT
)
2361 gcc_assert (gimple_call_num_args (def
) == 3);
2362 tree val
= gimple_call_arg (def
, 0);
2363 if (TREE_CONSTANT (val
))
2367 tree val2
= gimple_call_arg (def
, 2);
2368 gcc_assert (TREE_CODE (val2
) == INTEGER_CST
2369 && tree_fits_uhwi_p (val2
)
2370 && tree_to_uhwi (val2
) < END_PREDICTORS
);
2371 *predictor
= (enum br_predictor
) tree_to_uhwi (val2
);
2373 return gimple_call_arg (def
, 1);
2377 if (DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
2378 switch (DECL_FUNCTION_CODE (decl
))
2380 case BUILT_IN_EXPECT
:
2383 if (gimple_call_num_args (def
) != 2)
2385 val
= gimple_call_arg (def
, 0);
2386 if (TREE_CONSTANT (val
))
2389 *predictor
= PRED_BUILTIN_EXPECT
;
2390 return gimple_call_arg (def
, 1);
2393 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
:
2394 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1
:
2395 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2
:
2396 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4
:
2397 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8
:
2398 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16
:
2399 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE
:
2400 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N
:
2401 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
:
2402 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2
:
2403 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4
:
2404 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8
:
2405 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16
:
2406 /* Assume that any given atomic operation has low contention,
2407 and thus the compare-and-swap operation succeeds. */
2409 *predictor
= PRED_COMPARE_AND_SWAP
;
2410 return boolean_true_node
;
2419 if (get_gimple_rhs_class (code
) == GIMPLE_BINARY_RHS
)
2422 enum br_predictor predictor2
;
2423 op0
= expr_expected_value (op0
, visited
, predictor
);
2426 op1
= expr_expected_value (op1
, visited
, &predictor2
);
2427 if (predictor
&& *predictor
< predictor2
)
2428 *predictor
= predictor2
;
2431 res
= fold_build2 (code
, type
, op0
, op1
);
2432 if (TREE_CONSTANT (res
))
2436 if (get_gimple_rhs_class (code
) == GIMPLE_UNARY_RHS
)
2439 op0
= expr_expected_value (op0
, visited
, predictor
);
2442 res
= fold_build1 (code
, type
, op0
);
2443 if (TREE_CONSTANT (res
))
2450 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2451 The function is used by builtin_expect branch predictor so the evidence
2452 must come from this construct and additional possible constant folding.
2454 We may want to implement more involved value guess (such as value range
2455 propagation based prediction), but such tricks shall go to new
2459 expr_expected_value (tree expr
, bitmap visited
,
2460 enum br_predictor
*predictor
)
2462 enum tree_code code
;
2465 if (TREE_CONSTANT (expr
))
2468 *predictor
= PRED_UNCONDITIONAL
;
2472 extract_ops_from_tree (expr
, &code
, &op0
, &op1
);
2473 return expr_expected_value_1 (TREE_TYPE (expr
),
2474 op0
, code
, op1
, visited
, predictor
);
2477 /* Predict using opcode of the last statement in basic block. */
2479 tree_predict_by_opcode (basic_block bb
)
2481 gimple
*stmt
= last_stmt (bb
);
2488 enum br_predictor predictor
;
2490 if (!stmt
|| gimple_code (stmt
) != GIMPLE_COND
)
2492 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
2493 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
2495 op0
= gimple_cond_lhs (stmt
);
2496 op1
= gimple_cond_rhs (stmt
);
2497 cmp
= gimple_cond_code (stmt
);
2498 type
= TREE_TYPE (op0
);
2499 val
= expr_expected_value_1 (boolean_type_node
, op0
, cmp
, op1
, auto_bitmap (),
2501 if (val
&& TREE_CODE (val
) == INTEGER_CST
)
2503 if (predictor
== PRED_BUILTIN_EXPECT
)
2505 int percent
= PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY
);
2507 gcc_assert (percent
>= 0 && percent
<= 100);
2508 if (integer_zerop (val
))
2509 percent
= 100 - percent
;
2510 predict_edge (then_edge
, PRED_BUILTIN_EXPECT
, HITRATE (percent
));
2513 predict_edge_def (then_edge
, predictor
,
2514 integer_zerop (val
) ? NOT_TAKEN
: TAKEN
);
2516 /* Try "pointer heuristic."
2517 A comparison ptr == 0 is predicted as false.
2518 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2519 if (POINTER_TYPE_P (type
))
2522 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
2523 else if (cmp
== NE_EXPR
)
2524 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
2528 /* Try "opcode heuristic."
2529 EQ tests are usually false and NE tests are usually true. Also,
2530 most quantities are positive, so we can make the appropriate guesses
2531 about signed comparisons against zero. */
2536 /* Floating point comparisons appears to behave in a very
2537 unpredictable way because of special role of = tests in
2539 if (FLOAT_TYPE_P (type
))
2541 /* Comparisons with 0 are often used for booleans and there is
2542 nothing useful to predict about them. */
2543 else if (integer_zerop (op0
) || integer_zerop (op1
))
2546 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
2551 /* Floating point comparisons appears to behave in a very
2552 unpredictable way because of special role of = tests in
2554 if (FLOAT_TYPE_P (type
))
2556 /* Comparisons with 0 are often used for booleans and there is
2557 nothing useful to predict about them. */
2558 else if (integer_zerop (op0
)
2559 || integer_zerop (op1
))
2562 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
2566 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
2569 case UNORDERED_EXPR
:
2570 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
2575 if (integer_zerop (op1
)
2576 || integer_onep (op1
)
2577 || integer_all_onesp (op1
)
2580 || real_minus_onep (op1
))
2581 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
2586 if (integer_zerop (op1
)
2587 || integer_onep (op1
)
2588 || integer_all_onesp (op1
)
2591 || real_minus_onep (op1
))
2592 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
2600 /* Returns TRUE if the STMT is exit(0) like statement. */
2603 is_exit_with_zero_arg (const gimple
*stmt
)
2605 /* This is not exit, _exit or _Exit. */
2606 if (!gimple_call_builtin_p (stmt
, BUILT_IN_EXIT
)
2607 && !gimple_call_builtin_p (stmt
, BUILT_IN__EXIT
)
2608 && !gimple_call_builtin_p (stmt
, BUILT_IN__EXIT2
))
2611 /* Argument is an interger zero. */
2612 return integer_zerop (gimple_call_arg (stmt
, 0));
2615 /* Try to guess whether the value of return means error code. */
2617 static enum br_predictor
2618 return_prediction (tree val
, enum prediction
*prediction
)
2622 return PRED_NO_PREDICTION
;
2623 /* Different heuristics for pointers and scalars. */
2624 if (POINTER_TYPE_P (TREE_TYPE (val
)))
2626 /* NULL is usually not returned. */
2627 if (integer_zerop (val
))
2629 *prediction
= NOT_TAKEN
;
2630 return PRED_NULL_RETURN
;
2633 else if (INTEGRAL_TYPE_P (TREE_TYPE (val
)))
2635 /* Negative return values are often used to indicate
2637 if (TREE_CODE (val
) == INTEGER_CST
2638 && tree_int_cst_sgn (val
) < 0)
2640 *prediction
= NOT_TAKEN
;
2641 return PRED_NEGATIVE_RETURN
;
2643 /* Constant return values seems to be commonly taken.
2644 Zero/one often represent booleans so exclude them from the
2646 if (TREE_CONSTANT (val
)
2647 && (!integer_zerop (val
) && !integer_onep (val
)))
2649 *prediction
= NOT_TAKEN
;
2650 return PRED_CONST_RETURN
;
2653 return PRED_NO_PREDICTION
;
2656 /* Return zero if phi result could have values other than -1, 0 or 1,
2657 otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1
2658 values are used or likely. */
2661 zero_one_minusone (gphi
*phi
, int limit
)
2663 int phi_num_args
= gimple_phi_num_args (phi
);
2665 for (int i
= 0; i
< phi_num_args
; i
++)
2667 tree t
= PHI_ARG_DEF (phi
, i
);
2668 if (TREE_CODE (t
) != INTEGER_CST
)
2670 wide_int w
= wi::to_wide (t
);
2680 for (int i
= 0; i
< phi_num_args
; i
++)
2682 tree t
= PHI_ARG_DEF (phi
, i
);
2683 if (TREE_CODE (t
) == INTEGER_CST
)
2685 if (TREE_CODE (t
) != SSA_NAME
)
2687 gimple
*g
= SSA_NAME_DEF_STMT (t
);
2688 if (gimple_code (g
) == GIMPLE_PHI
&& limit
> 0)
2689 if (int r
= zero_one_minusone (as_a
<gphi
*> (g
), limit
- 1))
2694 if (!is_gimple_assign (g
))
2696 if (gimple_assign_cast_p (g
))
2698 tree rhs1
= gimple_assign_rhs1 (g
);
2699 if (TREE_CODE (rhs1
) != SSA_NAME
2700 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
2701 || TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2702 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2707 if (TREE_CODE_CLASS (gimple_assign_rhs_code (g
)) != tcc_comparison
)
2714 /* Find the basic block with return expression and look up for possible
2715 return value trying to apply RETURN_PREDICTION heuristics. */
2717 apply_return_prediction (void)
2719 greturn
*return_stmt
= NULL
;
2723 int phi_num_args
, i
;
2724 enum br_predictor pred
;
2725 enum prediction direction
;
2728 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
2730 gimple
*last
= last_stmt (e
->src
);
2732 && gimple_code (last
) == GIMPLE_RETURN
)
2734 return_stmt
= as_a
<greturn
*> (last
);
2740 return_val
= gimple_return_retval (return_stmt
);
2743 if (TREE_CODE (return_val
) != SSA_NAME
2744 || !SSA_NAME_DEF_STMT (return_val
)
2745 || gimple_code (SSA_NAME_DEF_STMT (return_val
)) != GIMPLE_PHI
)
2747 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (return_val
));
2748 phi_num_args
= gimple_phi_num_args (phi
);
2749 pred
= return_prediction (PHI_ARG_DEF (phi
, 0), &direction
);
2751 /* Avoid the case where the function returns -1, 0 and 1 values and
2752 nothing else. Those could be qsort etc. comparison functions
2753 where the negative return isn't less probable than positive.
2754 For this require that the function returns at least -1 or 1
2755 or -1 and a boolean value or comparison result, so that functions
2756 returning just -1 and 0 are treated as if -1 represents error value. */
2757 if (INTEGRAL_TYPE_P (TREE_TYPE (return_val
))
2758 && !TYPE_UNSIGNED (TREE_TYPE (return_val
))
2759 && TYPE_PRECISION (TREE_TYPE (return_val
)) > 1)
2760 if (int r
= zero_one_minusone (phi
, 3))
2761 if ((r
& (1 | 4)) == (1 | 4))
2764 /* Avoid the degenerate case where all return values form the function
2765 belongs to same category (ie they are all positive constants)
2766 so we can hardly say something about them. */
2767 for (i
= 1; i
< phi_num_args
; i
++)
2768 if (pred
!= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
))
2770 if (i
!= phi_num_args
)
2771 for (i
= 0; i
< phi_num_args
; i
++)
2773 pred
= return_prediction (PHI_ARG_DEF (phi
, i
), &direction
);
2774 if (pred
!= PRED_NO_PREDICTION
)
2775 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi
, i
), pred
,
2780 /* Look for basic block that contains unlikely to happen events
2781 (such as noreturn calls) and mark all paths leading to execution
2782 of this basic blocks as unlikely. */
2785 tree_bb_level_predictions (void)
2788 bool has_return_edges
= false;
2792 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
2793 if (!unlikely_executed_edge_p (e
) && !(e
->flags
& EDGE_ABNORMAL_CALL
))
2795 has_return_edges
= true;
2799 apply_return_prediction ();
2801 FOR_EACH_BB_FN (bb
, cfun
)
2803 gimple_stmt_iterator gsi
;
2805 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2807 gimple
*stmt
= gsi_stmt (gsi
);
2810 if (is_gimple_call (stmt
))
2812 if (gimple_call_noreturn_p (stmt
)
2814 && !is_exit_with_zero_arg (stmt
))
2815 predict_paths_leading_to (bb
, PRED_NORETURN
,
2817 decl
= gimple_call_fndecl (stmt
);
2819 && lookup_attribute ("cold",
2820 DECL_ATTRIBUTES (decl
)))
2821 predict_paths_leading_to (bb
, PRED_COLD_FUNCTION
,
2823 if (decl
&& recursive_call_p (current_function_decl
, decl
))
2824 predict_paths_leading_to (bb
, PRED_RECURSIVE_CALL
,
2827 else if (gimple_code (stmt
) == GIMPLE_PREDICT
)
2829 predict_paths_leading_to (bb
, gimple_predict_predictor (stmt
),
2830 gimple_predict_outcome (stmt
));
2831 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2832 hints to callers. */
2838 /* Callback for hash_map::traverse, asserts that the pointer map is
2842 assert_is_empty (const_basic_block
const &, edge_prediction
*const &value
,
2845 gcc_assert (!value
);
2849 /* Predict branch probabilities and estimate profile for basic block BB.
2850 When LOCAL_ONLY is set do not use any global properties of CFG. */
2853 tree_estimate_probability_bb (basic_block bb
, bool local_only
)
2858 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2860 /* Look for block we are guarding (ie we dominate it,
2861 but it doesn't postdominate us). */
2862 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
) && e
->dest
!= bb
2864 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
2865 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
2867 gimple_stmt_iterator bi
;
2869 /* The call heuristic claims that a guarded function call
2870 is improbable. This is because such calls are often used
2871 to signal exceptional situations such as printing error
2873 for (bi
= gsi_start_bb (e
->dest
); !gsi_end_p (bi
);
2876 gimple
*stmt
= gsi_stmt (bi
);
2877 if (is_gimple_call (stmt
)
2878 && !gimple_inexpensive_call_p (as_a
<gcall
*> (stmt
))
2879 /* Constant and pure calls are hardly used to signalize
2880 something exceptional. */
2881 && gimple_has_side_effects (stmt
))
2883 if (gimple_call_fndecl (stmt
))
2884 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
2885 else if (virtual_method_call_p (gimple_call_fn (stmt
)))
2886 predict_edge_def (e
, PRED_POLYMORPHIC_CALL
, NOT_TAKEN
);
2888 predict_edge_def (e
, PRED_INDIR_CALL
, TAKEN
);
2894 tree_predict_by_opcode (bb
);
2897 /* Predict branch probabilities and estimate profile of the tree CFG.
2898 This function can be called from the loop optimizers to recompute
2899 the profile information.
2900 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2903 tree_estimate_probability (bool dry_run
)
2907 add_noreturn_fake_exit_edges ();
2908 connect_infinite_loops_to_exit ();
2909 /* We use loop_niter_by_eval, which requires that the loops have
2911 create_preheaders (CP_SIMPLE_PREHEADERS
);
2912 calculate_dominance_info (CDI_POST_DOMINATORS
);
2914 bb_predictions
= new hash_map
<const_basic_block
, edge_prediction
*>;
2915 tree_bb_level_predictions ();
2916 record_loop_exits ();
2918 if (number_of_loops (cfun
) > 1)
2921 FOR_EACH_BB_FN (bb
, cfun
)
2922 tree_estimate_probability_bb (bb
, false);
2924 FOR_EACH_BB_FN (bb
, cfun
)
2925 combine_predictions_for_bb (bb
, dry_run
);
2928 bb_predictions
->traverse
<void *, assert_is_empty
> (NULL
);
2930 delete bb_predictions
;
2931 bb_predictions
= NULL
;
2934 estimate_bb_frequencies (false);
2935 free_dominance_info (CDI_POST_DOMINATORS
);
2936 remove_fake_exit_edges ();
2939 /* Set edge->probability for each successor edge of BB. */
2941 tree_guess_outgoing_edge_probabilities (basic_block bb
)
2943 bb_predictions
= new hash_map
<const_basic_block
, edge_prediction
*>;
2944 tree_estimate_probability_bb (bb
, true);
2945 combine_predictions_for_bb (bb
, false);
2947 bb_predictions
->traverse
<void *, assert_is_empty
> (NULL
);
2948 delete bb_predictions
;
2949 bb_predictions
= NULL
;
2952 /* Predict edges to successors of CUR whose sources are not postdominated by
2953 BB by PRED and recurse to all postdominators. */
2956 predict_paths_for_bb (basic_block cur
, basic_block bb
,
2957 enum br_predictor pred
,
2958 enum prediction taken
,
2959 bitmap visited
, struct loop
*in_loop
= NULL
)
2965 /* If we exited the loop or CUR is unconditional in the loop, there is
2968 && (!flow_bb_inside_loop_p (in_loop
, cur
)
2969 || dominated_by_p (CDI_DOMINATORS
, in_loop
->latch
, cur
)))
2972 /* We are looking for all edges forming edge cut induced by
2973 set of all blocks postdominated by BB. */
2974 FOR_EACH_EDGE (e
, ei
, cur
->preds
)
2975 if (e
->src
->index
>= NUM_FIXED_BLOCKS
2976 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, bb
))
2982 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2983 if (unlikely_executed_edge_p (e
))
2985 gcc_assert (bb
== cur
|| dominated_by_p (CDI_POST_DOMINATORS
, cur
, bb
));
2987 /* See if there is an edge from e->src that is not abnormal
2988 and does not lead to BB and does not exit the loop. */
2989 FOR_EACH_EDGE (e2
, ei2
, e
->src
->succs
)
2991 && !unlikely_executed_edge_p (e2
)
2992 && !dominated_by_p (CDI_POST_DOMINATORS
, e2
->dest
, bb
)
2993 && (!in_loop
|| !loop_exit_edge_p (in_loop
, e2
)))
2999 /* If there is non-abnormal path leaving e->src, predict edge
3000 using predictor. Otherwise we need to look for paths
3003 The second may lead to infinite loop in the case we are predicitng
3004 regions that are only reachable by abnormal edges. We simply
3005 prevent visiting given BB twice. */
3008 if (!edge_predicted_by_p (e
, pred
, taken
))
3009 predict_edge_def (e
, pred
, taken
);
3011 else if (bitmap_set_bit (visited
, e
->src
->index
))
3012 predict_paths_for_bb (e
->src
, e
->src
, pred
, taken
, visited
, in_loop
);
3014 for (son
= first_dom_son (CDI_POST_DOMINATORS
, cur
);
3016 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
3017 predict_paths_for_bb (son
, bb
, pred
, taken
, visited
, in_loop
);
3020 /* Sets branch probabilities according to PREDiction and
3024 predict_paths_leading_to (basic_block bb
, enum br_predictor pred
,
3025 enum prediction taken
, struct loop
*in_loop
)
3027 predict_paths_for_bb (bb
, bb
, pred
, taken
, auto_bitmap (), in_loop
);
3030 /* Like predict_paths_leading_to but take edge instead of basic block. */
3033 predict_paths_leading_to_edge (edge e
, enum br_predictor pred
,
3034 enum prediction taken
, struct loop
*in_loop
)
3036 bool has_nonloop_edge
= false;
3040 basic_block bb
= e
->src
;
3041 FOR_EACH_EDGE (e2
, ei
, bb
->succs
)
3042 if (e2
->dest
!= e
->src
&& e2
->dest
!= e
->dest
3043 && !unlikely_executed_edge_p (e
)
3044 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e2
->dest
))
3046 has_nonloop_edge
= true;
3049 if (!has_nonloop_edge
)
3051 predict_paths_for_bb (bb
, bb
, pred
, taken
, auto_bitmap (), in_loop
);
3054 predict_edge_def (e
, pred
, taken
);
3057 /* This is used to carry information about basic blocks. It is
3058 attached to the AUX field of the standard CFG block. */
3062 /* Estimated frequency of execution of basic_block. */
3065 /* To keep queue of basic blocks to process. */
3068 /* Number of predecessors we need to visit first. */
3072 /* Similar information for edges. */
3073 struct edge_prob_info
3075 /* In case edge is a loopback edge, the probability edge will be reached
3076 in case header is. Estimated number of iterations of the loop can be
3077 then computed as 1 / (1 - back_edge_prob). */
3078 sreal back_edge_prob
;
3079 /* True if the edge is a loopback edge in the natural loop. */
3080 unsigned int back_edge
:1;
3083 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
3085 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
3087 /* Helper function for estimate_bb_frequencies.
3088 Propagate the frequencies in blocks marked in
3089 TOVISIT, starting in HEAD. */
3092 propagate_freq (basic_block head
, bitmap tovisit
)
3101 /* For each basic block we need to visit count number of his predecessors
3102 we need to visit first. */
3103 EXECUTE_IF_SET_IN_BITMAP (tovisit
, 0, i
, bi
)
3108 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
3110 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
3112 bool visit
= bitmap_bit_p (tovisit
, e
->src
->index
);
3114 if (visit
&& !(e
->flags
& EDGE_DFS_BACK
))
3116 else if (visit
&& dump_file
&& !EDGE_INFO (e
)->back_edge
)
3118 "Irreducible region hit, ignoring edge to %i->%i\n",
3119 e
->src
->index
, bb
->index
);
3121 BLOCK_INFO (bb
)->npredecessors
= count
;
3122 /* When function never returns, we will never process exit block. */
3123 if (!count
&& bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
3124 bb
->count
= profile_count::zero ();
3127 BLOCK_INFO (head
)->frequency
= 1;
3129 for (bb
= head
; bb
; bb
= nextbb
)
3132 sreal cyclic_probability
= 0;
3133 sreal frequency
= 0;
3135 nextbb
= BLOCK_INFO (bb
)->next
;
3136 BLOCK_INFO (bb
)->next
= NULL
;
3138 /* Compute frequency of basic block. */
3142 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
3143 gcc_assert (!bitmap_bit_p (tovisit
, e
->src
->index
)
3144 || (e
->flags
& EDGE_DFS_BACK
));
3146 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
3147 if (EDGE_INFO (e
)->back_edge
)
3149 cyclic_probability
+= EDGE_INFO (e
)->back_edge_prob
;
3151 else if (!(e
->flags
& EDGE_DFS_BACK
))
3153 /* frequency += (e->probability
3154 * BLOCK_INFO (e->src)->frequency /
3155 REG_BR_PROB_BASE); */
3157 /* FIXME: Graphite is producing edges with no profile. Once
3158 this is fixed, drop this. */
3159 sreal tmp
= e
->probability
.initialized_p () ?
3160 e
->probability
.to_reg_br_prob_base () : 0;
3161 tmp
*= BLOCK_INFO (e
->src
)->frequency
;
3162 tmp
*= real_inv_br_prob_base
;
3166 if (cyclic_probability
== 0)
3168 BLOCK_INFO (bb
)->frequency
= frequency
;
3172 if (cyclic_probability
> real_almost_one
)
3173 cyclic_probability
= real_almost_one
;
3175 /* BLOCK_INFO (bb)->frequency = frequency
3176 / (1 - cyclic_probability) */
3178 cyclic_probability
= sreal (1) - cyclic_probability
;
3179 BLOCK_INFO (bb
)->frequency
= frequency
/ cyclic_probability
;
3183 bitmap_clear_bit (tovisit
, bb
->index
);
3185 e
= find_edge (bb
, head
);
3188 /* EDGE_INFO (e)->back_edge_prob
3189 = ((e->probability * BLOCK_INFO (bb)->frequency)
3190 / REG_BR_PROB_BASE); */
3192 /* FIXME: Graphite is producing edges with no profile. Once
3193 this is fixed, drop this. */
3194 sreal tmp
= e
->probability
.initialized_p () ?
3195 e
->probability
.to_reg_br_prob_base () : 0;
3196 tmp
*= BLOCK_INFO (bb
)->frequency
;
3197 EDGE_INFO (e
)->back_edge_prob
= tmp
* real_inv_br_prob_base
;
3200 /* Propagate to successor blocks. */
3201 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3202 if (!(e
->flags
& EDGE_DFS_BACK
)
3203 && BLOCK_INFO (e
->dest
)->npredecessors
)
3205 BLOCK_INFO (e
->dest
)->npredecessors
--;
3206 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
3211 BLOCK_INFO (last
)->next
= e
->dest
;
3219 /* Estimate frequencies in loops at same nest level. */
3222 estimate_loops_at_level (struct loop
*first_loop
)
3226 for (loop
= first_loop
; loop
; loop
= loop
->next
)
3231 auto_bitmap tovisit
;
3233 estimate_loops_at_level (loop
->inner
);
3235 /* Find current loop back edge and mark it. */
3236 e
= loop_latch_edge (loop
);
3237 EDGE_INFO (e
)->back_edge
= 1;
3239 bbs
= get_loop_body (loop
);
3240 for (i
= 0; i
< loop
->num_nodes
; i
++)
3241 bitmap_set_bit (tovisit
, bbs
[i
]->index
);
3243 propagate_freq (loop
->header
, tovisit
);
3247 /* Propagates frequencies through structure of loops. */
3250 estimate_loops (void)
3252 auto_bitmap tovisit
;
3255 /* Start by estimating the frequencies in the loops. */
3256 if (number_of_loops (cfun
) > 1)
3257 estimate_loops_at_level (current_loops
->tree_root
->inner
);
3259 /* Now propagate the frequencies through all the blocks. */
3260 FOR_ALL_BB_FN (bb
, cfun
)
3262 bitmap_set_bit (tovisit
, bb
->index
);
3264 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun
), tovisit
);
3267 /* Drop the profile for NODE to guessed, and update its frequency based on
3268 whether it is expected to be hot given the CALL_COUNT. */
3271 drop_profile (struct cgraph_node
*node
, profile_count call_count
)
3273 struct function
*fn
= DECL_STRUCT_FUNCTION (node
->decl
);
3274 /* In the case where this was called by another function with a
3275 dropped profile, call_count will be 0. Since there are no
3276 non-zero call counts to this function, we don't know for sure
3277 whether it is hot, and therefore it will be marked normal below. */
3278 bool hot
= maybe_hot_count_p (NULL
, call_count
);
3282 "Dropping 0 profile for %s. %s based on calls.\n",
3284 hot
? "Function is hot" : "Function is normal");
3285 /* We only expect to miss profiles for functions that are reached
3286 via non-zero call edges in cases where the function may have
3287 been linked from another module or library (COMDATs and extern
3288 templates). See the comments below for handle_missing_profiles.
3289 Also, only warn in cases where the missing counts exceed the
3290 number of training runs. In certain cases with an execv followed
3291 by a no-return call the profile for the no-return call is not
3292 dumped and there can be a mismatch. */
3293 if (!DECL_COMDAT (node
->decl
) && !DECL_EXTERNAL (node
->decl
)
3294 && call_count
> profile_info
->runs
)
3296 if (flag_profile_correction
)
3300 "Missing counts for called function %s\n",
3301 node
->dump_name ());
3304 warning (0, "Missing counts for called function %s",
3305 node
->dump_name ());
3309 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3310 if (flag_guess_branch_prob
)
3313 = ENTRY_BLOCK_PTR_FOR_FN
3314 (DECL_STRUCT_FUNCTION (node
->decl
))->count
.nonzero_p ();
3315 FOR_ALL_BB_FN (bb
, fn
)
3316 if (clear_zeros
|| !(bb
->count
== profile_count::zero ()))
3317 bb
->count
= bb
->count
.guessed_local ();
3318 DECL_STRUCT_FUNCTION (node
->decl
)->cfg
->count_max
=
3319 DECL_STRUCT_FUNCTION (node
->decl
)->cfg
->count_max
.guessed_local ();
3323 FOR_ALL_BB_FN (bb
, fn
)
3324 bb
->count
= profile_count::uninitialized ();
3325 DECL_STRUCT_FUNCTION (node
->decl
)->cfg
->count_max
3326 = profile_count::uninitialized ();
3330 struct cgraph_edge
*e
;
3331 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3332 e
->count
= gimple_bb (e
->call_stmt
)->count
;
3333 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3334 e
->count
= gimple_bb (e
->call_stmt
)->count
;
3336 profile_status_for_fn (fn
)
3337 = (flag_guess_branch_prob
? PROFILE_GUESSED
: PROFILE_ABSENT
);
3339 = hot
? NODE_FREQUENCY_HOT
: NODE_FREQUENCY_NORMAL
;
3342 /* In the case of COMDAT routines, multiple object files will contain the same
3343 function and the linker will select one for the binary. In that case
3344 all the other copies from the profile instrument binary will be missing
3345 profile counts. Look for cases where this happened, due to non-zero
3346 call counts going to 0-count functions, and drop the profile to guessed
3347 so that we can use the estimated probabilities and avoid optimizing only
3350 The other case where the profile may be missing is when the routine
3351 is not going to be emitted to the object file, e.g. for "extern template"
3352 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3353 all other cases of non-zero calls to 0-count functions. */
3356 handle_missing_profiles (void)
3358 struct cgraph_node
*node
;
3359 int unlikely_count_fraction
= PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION
);
3360 auto_vec
<struct cgraph_node
*, 64> worklist
;
3362 /* See if 0 count function has non-0 count callers. In this case we
3363 lost some profile. Drop its function profile to PROFILE_GUESSED. */
3364 FOR_EACH_DEFINED_FUNCTION (node
)
3366 struct cgraph_edge
*e
;
3367 profile_count call_count
= profile_count::zero ();
3368 gcov_type max_tp_first_run
= 0;
3369 struct function
*fn
= DECL_STRUCT_FUNCTION (node
->decl
);
3371 if (!(node
->count
== profile_count::zero ()))
3373 for (e
= node
->callers
; e
; e
= e
->next_caller
)
3374 if (e
->count
.initialized_p () && e
->count
> 0)
3376 call_count
= call_count
+ e
->count
;
3378 if (e
->caller
->tp_first_run
> max_tp_first_run
)
3379 max_tp_first_run
= e
->caller
->tp_first_run
;
3382 /* If time profile is missing, let assign the maximum that comes from
3383 caller functions. */
3384 if (!node
->tp_first_run
&& max_tp_first_run
)
3385 node
->tp_first_run
= max_tp_first_run
+ 1;
3389 && (call_count
.apply_scale (unlikely_count_fraction
, 1)
3390 >= profile_info
->runs
))
3392 drop_profile (node
, call_count
);
3393 worklist
.safe_push (node
);
3397 /* Propagate the profile dropping to other 0-count COMDATs that are
3398 potentially called by COMDATs we already dropped the profile on. */
3399 while (worklist
.length () > 0)
3401 struct cgraph_edge
*e
;
3403 node
= worklist
.pop ();
3404 for (e
= node
->callees
; e
; e
= e
->next_caller
)
3406 struct cgraph_node
*callee
= e
->callee
;
3407 struct function
*fn
= DECL_STRUCT_FUNCTION (callee
->decl
);
3409 if (callee
->count
> 0)
3411 if ((DECL_COMDAT (callee
->decl
) || DECL_EXTERNAL (callee
->decl
))
3413 && profile_status_for_fn (fn
) == PROFILE_READ
)
3415 drop_profile (node
, profile_count::zero ());
3416 worklist
.safe_push (callee
);
3422 /* Convert counts measured by profile driven feedback to frequencies.
3423 Return nonzero iff there was any nonzero execution count. */
3426 update_max_bb_count (void)
3428 profile_count true_count_max
= profile_count::uninitialized ();
3431 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3432 true_count_max
= true_count_max
.max (bb
->count
);
3434 cfun
->cfg
->count_max
= true_count_max
;
3436 return true_count_max
.ipa ().nonzero_p ();
3439 /* Return true if function is likely to be expensive, so there is no point to
3440 optimize performance of prologue, epilogue or do inlining at the expense
3441 of code size growth. THRESHOLD is the limit of number of instructions
3442 function can execute at average to be still considered not expensive. */
3445 expensive_function_p (int threshold
)
3449 /* If profile was scaled in a way entry block has count 0, then the function
3450 is deifnitly taking a lot of time. */
3451 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
.nonzero_p ())
3454 profile_count limit
= ENTRY_BLOCK_PTR_FOR_FN
3455 (cfun
)->count
.apply_scale (threshold
, 1);
3456 profile_count sum
= profile_count::zero ();
3457 FOR_EACH_BB_FN (bb
, cfun
)
3461 if (!bb
->count
.initialized_p ())
3464 fprintf (dump_file
, "Function is considered expensive because"
3465 " count of bb %i is not initialized\n", bb
->index
);
3469 FOR_BB_INSNS (bb
, insn
)
3470 if (active_insn_p (insn
))
3481 /* All basic blocks that are reachable only from unlikely basic blocks are
3485 propagate_unlikely_bbs_forward (void)
3487 auto_vec
<basic_block
, 64> worklist
;
3492 if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
== profile_count::zero ()))
3494 ENTRY_BLOCK_PTR_FOR_FN (cfun
)->aux
= (void *)(size_t) 1;
3495 worklist
.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
3497 while (worklist
.length () > 0)
3499 bb
= worklist
.pop ();
3500 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3501 if (!(e
->count () == profile_count::zero ())
3502 && !(e
->dest
->count
== profile_count::zero ())
3505 e
->dest
->aux
= (void *)(size_t) 1;
3506 worklist
.safe_push (e
->dest
);
3511 FOR_ALL_BB_FN (bb
, cfun
)
3515 if (!(bb
->count
== profile_count::zero ())
3516 && (dump_file
&& (dump_flags
& TDF_DETAILS
)))
3518 "Basic block %i is marked unlikely by forward prop\n",
3520 bb
->count
= profile_count::zero ();
3527 /* Determine basic blocks/edges that are known to be unlikely executed and set
3528 their counters to zero.
3529 This is done with first identifying obviously unlikely BBs/edges and then
3530 propagating in both directions. */
3533 determine_unlikely_bbs ()
3536 auto_vec
<basic_block
, 64> worklist
;
3540 FOR_EACH_BB_FN (bb
, cfun
)
3542 if (!(bb
->count
== profile_count::zero ())
3543 && unlikely_executed_bb_p (bb
))
3545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3546 fprintf (dump_file
, "Basic block %i is locally unlikely\n",
3548 bb
->count
= profile_count::zero ();
3551 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3552 if (!(e
->probability
== profile_probability::never ())
3553 && unlikely_executed_edge_p (e
))
3555 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3556 fprintf (dump_file
, "Edge %i->%i is locally unlikely\n",
3557 bb
->index
, e
->dest
->index
);
3558 e
->probability
= profile_probability::never ();
3561 gcc_checking_assert (!bb
->aux
);
3563 propagate_unlikely_bbs_forward ();
3565 auto_vec
<int, 64> nsuccs
;
3566 nsuccs
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
3567 FOR_ALL_BB_FN (bb
, cfun
)
3568 if (!(bb
->count
== profile_count::zero ())
3569 && bb
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
3571 nsuccs
[bb
->index
] = 0;
3572 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3573 if (!(e
->probability
== profile_probability::never ())
3574 && !(e
->dest
->count
== profile_count::zero ()))
3575 nsuccs
[bb
->index
]++;
3576 if (!nsuccs
[bb
->index
])
3577 worklist
.safe_push (bb
);
3579 while (worklist
.length () > 0)
3581 bb
= worklist
.pop ();
3582 if (bb
->count
== profile_count::zero ())
3584 if (bb
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
3587 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
3588 !gsi_end_p (gsi
); gsi_next (&gsi
))
3589 if (stmt_can_terminate_bb_p (gsi_stmt (gsi
))
3590 /* stmt_can_terminate_bb_p special cases noreturns because it
3591 assumes that fake edges are created. We want to know that
3592 noreturn alone does not imply BB to be unlikely. */
3593 || (is_gimple_call (gsi_stmt (gsi
))
3594 && (gimple_call_flags (gsi_stmt (gsi
)) & ECF_NORETURN
)))
3602 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3604 "Basic block %i is marked unlikely by backward prop\n",
3606 bb
->count
= profile_count::zero ();
3607 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
3608 if (!(e
->probability
== profile_probability::never ()))
3610 if (!(e
->src
->count
== profile_count::zero ()))
3612 gcc_checking_assert (nsuccs
[e
->src
->index
] > 0);
3613 nsuccs
[e
->src
->index
]--;
3614 if (!nsuccs
[e
->src
->index
])
3615 worklist
.safe_push (e
->src
);
3619 /* Finally all edges from non-0 regions to 0 are unlikely. */
3620 FOR_ALL_BB_FN (bb
, cfun
)
3621 if (!(bb
->count
== profile_count::zero ()))
3622 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3623 if (!(e
->probability
== profile_probability::never ())
3624 && e
->dest
->count
== profile_count::zero ())
3626 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3627 fprintf (dump_file
, "Edge %i->%i is unlikely because "
3628 "it enters unlikely block\n",
3629 bb
->index
, e
->dest
->index
);
3630 e
->probability
= profile_probability::never ();
3632 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
== profile_count::zero ())
3633 cgraph_node::get (current_function_decl
)->count
= profile_count::zero ();
3636 /* Estimate and propagate basic block frequencies using the given branch
3637 probabilities. If FORCE is true, the frequencies are used to estimate
3638 the counts even when there are already non-zero profile counts. */
3641 estimate_bb_frequencies (bool force
)
3646 determine_unlikely_bbs ();
3648 if (force
|| profile_status_for_fn (cfun
) != PROFILE_READ
3649 || !update_max_bb_count ())
3651 static int real_values_initialized
= 0;
3653 if (!real_values_initialized
)
3655 real_values_initialized
= 1;
3656 real_br_prob_base
= REG_BR_PROB_BASE
;
3657 /* Scaling frequencies up to maximal profile count may result in
3658 frequent overflows especially when inlining loops.
3659 Small scalling results in unnecesary precision loss. Stay in
3660 the half of the (exponential) range. */
3661 real_bb_freq_max
= (uint64_t)1 << (profile_count::n_bits
/ 2);
3662 real_one_half
= sreal (1, -1);
3663 real_inv_br_prob_base
= sreal (1) / real_br_prob_base
;
3664 real_almost_one
= sreal (1) - real_inv_br_prob_base
;
3667 mark_dfs_back_edges ();
3669 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
))->probability
=
3670 profile_probability::always ();
3672 /* Set up block info for each basic block. */
3673 alloc_aux_for_blocks (sizeof (block_info
));
3674 alloc_aux_for_edges (sizeof (edge_prob_info
));
3675 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3680 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3682 /* FIXME: Graphite is producing edges with no profile. Once
3683 this is fixed, drop this. */
3684 if (e
->probability
.initialized_p ())
3685 EDGE_INFO (e
)->back_edge_prob
3686 = e
->probability
.to_reg_br_prob_base ();
3688 EDGE_INFO (e
)->back_edge_prob
= REG_BR_PROB_BASE
/ 2;
3689 EDGE_INFO (e
)->back_edge_prob
*= real_inv_br_prob_base
;
3693 /* First compute frequencies locally for each loop from innermost
3694 to outermost to examine frequencies for back edges. */
3698 FOR_EACH_BB_FN (bb
, cfun
)
3699 if (freq_max
< BLOCK_INFO (bb
)->frequency
)
3700 freq_max
= BLOCK_INFO (bb
)->frequency
;
3702 freq_max
= real_bb_freq_max
/ freq_max
;
3705 profile_count ipa_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
.ipa ();
3706 cfun
->cfg
->count_max
= profile_count::uninitialized ();
3707 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3709 sreal tmp
= BLOCK_INFO (bb
)->frequency
* freq_max
+ real_one_half
;
3710 profile_count count
= profile_count::from_gcov_type (tmp
.to_int ());
3712 /* If we have profile feedback in which this function was never
3713 executed, then preserve this info. */
3714 if (!(bb
->count
== profile_count::zero ()))
3715 bb
->count
= count
.guessed_local ().combine_with_ipa_count (ipa_count
);
3716 cfun
->cfg
->count_max
= cfun
->cfg
->count_max
.max (bb
->count
);
3719 free_aux_for_blocks ();
3720 free_aux_for_edges ();
3722 compute_function_frequency ();
3725 /* Decide whether function is hot, cold or unlikely executed. */
3727 compute_function_frequency (void)
3730 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3732 if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
3733 || MAIN_NAME_P (DECL_NAME (current_function_decl
)))
3734 node
->only_called_at_startup
= true;
3735 if (DECL_STATIC_DESTRUCTOR (current_function_decl
))
3736 node
->only_called_at_exit
= true;
3738 if (profile_status_for_fn (cfun
) != PROFILE_READ
)
3740 int flags
= flags_from_decl_or_type (current_function_decl
);
3741 if ((ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
.ipa_p ()
3742 && ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
.ipa() == profile_count::zero ())
3743 || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl
))
3746 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
3747 warn_function_cold (current_function_decl
);
3749 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl
))
3751 node
->frequency
= NODE_FREQUENCY_HOT
;
3752 else if (flags
& ECF_NORETURN
)
3753 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3754 else if (MAIN_NAME_P (DECL_NAME (current_function_decl
)))
3755 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3756 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl
)
3757 || DECL_STATIC_DESTRUCTOR (current_function_decl
))
3758 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
3762 /* Only first time try to drop function into unlikely executed.
3763 After inlining the roundoff errors may confuse us.
3764 Ipa-profile pass will drop functions only called from unlikely
3765 functions to unlikely and that is most of what we care about. */
3766 if (!cfun
->after_inlining
)
3768 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
3769 warn_function_cold (current_function_decl
);
3771 FOR_EACH_BB_FN (bb
, cfun
)
3773 if (maybe_hot_bb_p (cfun
, bb
))
3775 node
->frequency
= NODE_FREQUENCY_HOT
;
3778 if (!probably_never_executed_bb_p (cfun
, bb
))
3779 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3783 /* Build PREDICT_EXPR. */
3785 build_predict_expr (enum br_predictor predictor
, enum prediction taken
)
3787 tree t
= build1 (PREDICT_EXPR
, void_type_node
,
3788 build_int_cst (integer_type_node
, predictor
));
3789 SET_PREDICT_EXPR_OUTCOME (t
, taken
);
3794 predictor_name (enum br_predictor predictor
)
3796 return predictor_info
[predictor
].name
;
3799 /* Predict branch probabilities and estimate profile of the tree CFG. */
3803 const pass_data pass_data_profile
=
3805 GIMPLE_PASS
, /* type */
3806 "profile_estimate", /* name */
3807 OPTGROUP_NONE
, /* optinfo_flags */
3808 TV_BRANCH_PROB
, /* tv_id */
3809 PROP_cfg
, /* properties_required */
3810 0, /* properties_provided */
3811 0, /* properties_destroyed */
3812 0, /* todo_flags_start */
3813 0, /* todo_flags_finish */
3816 class pass_profile
: public gimple_opt_pass
3819 pass_profile (gcc::context
*ctxt
)
3820 : gimple_opt_pass (pass_data_profile
, ctxt
)
3823 /* opt_pass methods: */
3824 virtual bool gate (function
*) { return flag_guess_branch_prob
; }
3825 virtual unsigned int execute (function
*);
3827 }; // class pass_profile
3830 pass_profile::execute (function
*fun
)
3834 if (profile_status_for_fn (cfun
) == PROFILE_GUESSED
)
3837 loop_optimizer_init (LOOPS_NORMAL
);
3838 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3839 flow_loops_dump (dump_file
, NULL
, 0);
3841 mark_irreducible_loops ();
3843 nb_loops
= number_of_loops (fun
);
3847 tree_estimate_probability (false);
3852 loop_optimizer_finalize ();
3853 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3854 gimple_dump_cfg (dump_file
, dump_flags
);
3855 if (profile_status_for_fn (fun
) == PROFILE_ABSENT
)
3856 profile_status_for_fn (fun
) = PROFILE_GUESSED
;
3857 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3860 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
3861 if (loop
->header
->count
.initialized_p ())
3862 fprintf (dump_file
, "Loop got predicted %d to iterate %i times.\n",
3864 (int)expected_loop_iterations_unbounded (loop
));
3872 make_pass_profile (gcc::context
*ctxt
)
3874 return new pass_profile (ctxt
);
3879 const pass_data pass_data_strip_predict_hints
=
3881 GIMPLE_PASS
, /* type */
3882 "*strip_predict_hints", /* name */
3883 OPTGROUP_NONE
, /* optinfo_flags */
3884 TV_BRANCH_PROB
, /* tv_id */
3885 PROP_cfg
, /* properties_required */
3886 0, /* properties_provided */
3887 0, /* properties_destroyed */
3888 0, /* todo_flags_start */
3889 0, /* todo_flags_finish */
3892 class pass_strip_predict_hints
: public gimple_opt_pass
3895 pass_strip_predict_hints (gcc::context
*ctxt
)
3896 : gimple_opt_pass (pass_data_strip_predict_hints
, ctxt
)
3899 /* opt_pass methods: */
3900 opt_pass
* clone () { return new pass_strip_predict_hints (m_ctxt
); }
3901 virtual unsigned int execute (function
*);
3903 }; // class pass_strip_predict_hints
3905 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3906 we no longer need. */
3908 pass_strip_predict_hints::execute (function
*fun
)
3913 bool changed
= false;
3915 FOR_EACH_BB_FN (bb
, fun
)
3917 gimple_stmt_iterator bi
;
3918 for (bi
= gsi_start_bb (bb
); !gsi_end_p (bi
);)
3920 gimple
*stmt
= gsi_stmt (bi
);
3922 if (gimple_code (stmt
) == GIMPLE_PREDICT
)
3924 gsi_remove (&bi
, true);
3928 else if (is_gimple_call (stmt
))
3930 tree fndecl
= gimple_call_fndecl (stmt
);
3933 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
3934 && DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_EXPECT
3935 && gimple_call_num_args (stmt
) == 2)
3936 || (gimple_call_internal_p (stmt
)
3937 && gimple_call_internal_fn (stmt
) == IFN_BUILTIN_EXPECT
))
3939 var
= gimple_call_lhs (stmt
);
3944 = gimple_build_assign (var
, gimple_call_arg (stmt
, 0));
3945 gsi_replace (&bi
, ass_stmt
, true);
3949 gsi_remove (&bi
, true);
3957 return changed
? TODO_cleanup_cfg
: 0;
3963 make_pass_strip_predict_hints (gcc::context
*ctxt
)
3965 return new pass_strip_predict_hints (ctxt
);
3968 /* Rebuild function frequencies. Passes are in general expected to
3969 maintain profile by hand, however in some cases this is not possible:
3970 for example when inlining several functions with loops freuqencies might run
3971 out of scale and thus needs to be recomputed. */
3974 rebuild_frequencies (void)
3976 timevar_push (TV_REBUILD_FREQUENCIES
);
3978 /* When the max bb count in the function is small, there is a higher
3979 chance that there were truncation errors in the integer scaling
3980 of counts by inlining and other optimizations. This could lead
3981 to incorrect classification of code as being cold when it isn't.
3982 In that case, force the estimation of bb counts/frequencies from the
3983 branch probabilities, rather than computing frequencies from counts,
3984 which may also lead to frequencies incorrectly reduced to 0. There
3985 is less precision in the probabilities, so we only do this for small
3987 cfun
->cfg
->count_max
= profile_count::uninitialized ();
3989 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, next_bb
)
3990 cfun
->cfg
->count_max
= cfun
->cfg
->count_max
.max (bb
->count
);
3992 if (profile_status_for_fn (cfun
) == PROFILE_GUESSED
)
3994 loop_optimizer_init (0);
3995 add_noreturn_fake_exit_edges ();
3996 mark_irreducible_loops ();
3997 connect_infinite_loops_to_exit ();
3998 estimate_bb_frequencies (true);
3999 remove_fake_exit_edges ();
4000 loop_optimizer_finalize ();
4002 else if (profile_status_for_fn (cfun
) == PROFILE_READ
)
4003 update_max_bb_count ();
4006 timevar_pop (TV_REBUILD_FREQUENCIES
);
4009 /* Perform a dry run of the branch prediction pass and report comparsion of
4010 the predicted and real profile into the dump file. */
4013 report_predictor_hitrates (void)
4017 loop_optimizer_init (LOOPS_NORMAL
);
4018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4019 flow_loops_dump (dump_file
, NULL
, 0);
4021 mark_irreducible_loops ();
4023 nb_loops
= number_of_loops (cfun
);
4027 tree_estimate_probability (true);
4032 loop_optimizer_finalize ();
4035 /* Force edge E to be cold.
4036 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
4037 keep low probability to represent possible error in a guess. This is used
4038 i.e. in case we predict loop to likely iterate given number of times but
4039 we are not 100% sure.
4041 This function locally updates profile without attempt to keep global
4042 consistency which can not be reached in full generality without full profile
4043 rebuild from probabilities alone. Doing so is not necessarily a good idea
4044 because frequencies and counts may be more realistic then probabilities.
4046 In some cases (such as for elimination of early exits during full loop
4047 unrolling) the caller can ensure that profile will get consistent
4051 force_edge_cold (edge e
, bool impossible
)
4053 profile_count count_sum
= profile_count::zero ();
4054 profile_probability prob_sum
= profile_probability::never ();
4057 bool uninitialized_exit
= false;
4059 /* When branch probability guesses are not known, then do nothing. */
4060 if (!impossible
&& !e
->count ().initialized_p ())
4063 profile_probability goal
= (impossible
? profile_probability::never ()
4064 : profile_probability::very_unlikely ());
4066 /* If edge is already improbably or cold, just return. */
4067 if (e
->probability
<= goal
4068 && (!impossible
|| e
->count () == profile_count::zero ()))
4070 FOR_EACH_EDGE (e2
, ei
, e
->src
->succs
)
4073 if (e
->flags
& EDGE_FAKE
)
4075 if (e2
->count ().initialized_p ())
4076 count_sum
+= e2
->count ();
4077 if (e2
->probability
.initialized_p ())
4078 prob_sum
+= e2
->probability
;
4080 uninitialized_exit
= true;
4083 /* If we are not guessing profiles but have some other edges out,
4084 just assume the control flow goes elsewhere. */
4085 if (uninitialized_exit
)
4086 e
->probability
= goal
;
4087 /* If there are other edges out of e->src, redistribute probabilitity
4089 else if (prob_sum
> profile_probability::never ())
4091 if (!(e
->probability
< goal
))
4092 e
->probability
= goal
;
4094 profile_probability prob_comp
= prob_sum
/ e
->probability
.invert ();
4096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4097 fprintf (dump_file
, "Making edge %i->%i %s by redistributing "
4098 "probability to other edges.\n",
4099 e
->src
->index
, e
->dest
->index
,
4100 impossible
? "impossible" : "cold");
4101 FOR_EACH_EDGE (e2
, ei
, e
->src
->succs
)
4104 e2
->probability
/= prob_comp
;
4106 if (current_ir_type () != IR_GIMPLE
4107 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
4108 update_br_prob_note (e
->src
);
4110 /* If all edges out of e->src are unlikely, the basic block itself
4114 if (prob_sum
== profile_probability::never ())
4115 e
->probability
= profile_probability::always ();
4119 e
->probability
= profile_probability::never ();
4120 /* If BB has some edges out that are not impossible, we can not
4121 assume that BB itself is. */
4124 if (current_ir_type () != IR_GIMPLE
4125 && e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
4126 update_br_prob_note (e
->src
);
4127 if (e
->src
->count
== profile_count::zero ())
4129 if (count_sum
== profile_count::zero () && impossible
)
4132 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
4134 else if (current_ir_type () == IR_GIMPLE
)
4135 for (gimple_stmt_iterator gsi
= gsi_start_bb (e
->src
);
4136 !gsi_end_p (gsi
); gsi_next (&gsi
))
4138 if (stmt_can_terminate_bb_p (gsi_stmt (gsi
)))
4144 /* FIXME: Implement RTL path. */
4149 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4151 "Making bb %i impossible and dropping count to 0.\n",
4153 e
->src
->count
= profile_count::zero ();
4154 FOR_EACH_EDGE (e2
, ei
, e
->src
->preds
)
4155 force_edge_cold (e2
, impossible
);
4160 /* If we did not adjusting, the source basic block has no likely edeges
4161 leaving other direction. In that case force that bb cold, too.
4162 This in general is difficult task to do, but handle special case when
4163 BB has only one predecestor. This is common case when we are updating
4164 after loop transforms. */
4165 if (!(prob_sum
> profile_probability::never ())
4166 && count_sum
== profile_count::zero ()
4167 && single_pred_p (e
->src
) && e
->src
->count
.to_frequency (cfun
)
4168 > (impossible
? 0 : 1))
4170 int old_frequency
= e
->src
->count
.to_frequency (cfun
);
4171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4172 fprintf (dump_file
, "Making bb %i %s.\n", e
->src
->index
,
4173 impossible
? "impossible" : "cold");
4174 int new_frequency
= MIN (e
->src
->count
.to_frequency (cfun
),
4175 impossible
? 0 : 1);
4177 e
->src
->count
= profile_count::zero ();
4179 e
->src
->count
= e
->count ().apply_scale (new_frequency
,
4181 force_edge_cold (single_pred_edge (e
->src
), impossible
);
4183 else if (dump_file
&& (dump_flags
& TDF_DETAILS
)
4184 && maybe_hot_bb_p (cfun
, e
->src
))
4185 fprintf (dump_file
, "Giving up on making bb %i %s.\n", e
->src
->index
,
4186 impossible
? "impossible" : "cold");
4192 namespace selftest
{
4194 /* Test that value range of predictor values defined in predict.def is
4195 within range (50, 100]. */
4197 struct branch_predictor
4203 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
4206 test_prediction_value_range ()
4208 branch_predictor predictors
[] = {
4209 #include "predict.def"
4213 for (unsigned i
= 0; predictors
[i
].name
!= NULL
; i
++)
4215 if (predictors
[i
].probability
== PROB_UNINITIALIZED
)
4218 unsigned p
= 100 * predictors
[i
].probability
/ REG_BR_PROB_BASE
;
4219 ASSERT_TRUE (p
> 50 && p
<= 100);
4223 #undef DEF_PREDICTOR
4225 /* Run all of the selfests within this file. */
4230 test_prediction_value_range ();
4233 } // namespace selftest
4234 #endif /* CHECKING_P. */