2018-01-22 Sebastian Perta <sebastian.perta@renesas.com>
[official-gcc.git] / gcc / predict.c
blob340c76674344b663a63f10186d9f7ba28795dbb9
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "cfghooks.h"
38 #include "tree-pass.h"
39 #include "ssa.h"
40 #include "memmodel.h"
41 #include "emit-rtl.h"
42 #include "cgraph.h"
43 #include "coverage.h"
44 #include "diagnostic-core.h"
45 #include "gimple-predict.h"
46 #include "fold-const.h"
47 #include "calls.h"
48 #include "cfganal.h"
49 #include "profile.h"
50 #include "sreal.h"
51 #include "params.h"
52 #include "cfgloop.h"
53 #include "gimple-iterator.h"
54 #include "tree-cfg.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
58 #include "ipa-utils.h"
59 #include "gimple-pretty-print.h"
60 #include "selftest.h"
61 #include "cfgrtl.h"
62 #include "stringpool.h"
63 #include "attribs.h"
65 /* Enum with reasons why a predictor is ignored. */
67 enum predictor_reason
69 REASON_NONE,
70 REASON_IGNORED,
71 REASON_SINGLE_EDGE_DUPLICATE,
72 REASON_EDGE_PAIR_DUPLICATE
75 /* String messages for the aforementioned enum. */
77 static const char *reason_messages[] = {"", " (ignored)",
78 " (single edge duplicate)", " (edge pair duplicate)"};
80 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
81 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
82 static sreal real_almost_one, real_br_prob_base,
83 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
85 static void combine_predictions_for_insn (rtx_insn *, basic_block);
86 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
87 enum predictor_reason, edge);
88 static void predict_paths_leading_to (basic_block, enum br_predictor,
89 enum prediction,
90 struct loop *in_loop = NULL);
91 static void predict_paths_leading_to_edge (edge, enum br_predictor,
92 enum prediction,
93 struct loop *in_loop = NULL);
94 static bool can_predict_insn_p (const rtx_insn *);
96 /* Information we hold about each branch predictor.
97 Filled using information from predict.def. */
99 struct predictor_info
101 const char *const name; /* Name used in the debugging dumps. */
102 const int hitrate; /* Expected hitrate used by
103 predict_insn_def call. */
104 const int flags;
107 /* Use given predictor without Dempster-Shaffer theory if it matches
108 using first_match heuristics. */
109 #define PRED_FLAG_FIRST_MATCH 1
111 /* Recompute hitrate in percent to our representation. */
113 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
115 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
116 static const struct predictor_info predictor_info[]= {
117 #include "predict.def"
119 /* Upper bound on predictors. */
120 {NULL, 0, 0}
122 #undef DEF_PREDICTOR
124 static gcov_type min_count = -1;
126 /* Determine the threshold for hot BB counts. */
128 gcov_type
129 get_hot_bb_threshold ()
131 gcov_working_set_t *ws;
132 if (min_count == -1)
134 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
135 gcc_assert (ws);
136 min_count = ws->min_counter;
138 return min_count;
141 /* Set the threshold for hot BB counts. */
143 void
144 set_hot_bb_threshold (gcov_type min)
146 min_count = min;
149 /* Return TRUE if frequency FREQ is considered to be hot. */
151 bool
152 maybe_hot_count_p (struct function *fun, profile_count count)
154 if (!count.initialized_p ())
155 return true;
156 if (count.ipa () == profile_count::zero ())
157 return false;
158 if (!count.ipa_p ())
160 struct cgraph_node *node = cgraph_node::get (fun->decl);
161 if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
163 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
164 return false;
165 if (node->frequency == NODE_FREQUENCY_HOT)
166 return true;
168 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
169 return true;
170 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
171 && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3)))
172 return false;
173 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
174 return false;
175 if (count.apply_scale (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION), 1)
176 < ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
177 return false;
178 return true;
180 /* Code executed at most once is not hot. */
181 if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
182 return false;
183 return (count.to_gcov_type () >= get_hot_bb_threshold ());
186 /* Return true in case BB can be CPU intensive and should be optimized
187 for maximal performance. */
189 bool
190 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
192 gcc_checking_assert (fun);
193 return maybe_hot_count_p (fun, bb->count);
196 /* Return true in case BB can be CPU intensive and should be optimized
197 for maximal performance. */
199 bool
200 maybe_hot_edge_p (edge e)
202 return maybe_hot_count_p (cfun, e->count ());
205 /* Return true if profile COUNT and FREQUENCY, or function FUN static
206 node frequency reflects never being executed. */
208 static bool
209 probably_never_executed (struct function *fun,
210 profile_count count)
212 gcc_checking_assert (fun);
213 if (count == profile_count::zero ())
214 return true;
215 if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ)
217 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
218 if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs)
219 return false;
220 return true;
222 if ((!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
223 && (cgraph_node::get (fun->decl)->frequency
224 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
225 return true;
226 return false;
230 /* Return true in case BB is probably never executed. */
232 bool
233 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
235 return probably_never_executed (fun, bb->count);
239 /* Return true if E is unlikely executed for obvious reasons. */
241 static bool
242 unlikely_executed_edge_p (edge e)
244 return (e->count () == profile_count::zero ()
245 || e->probability == profile_probability::never ())
246 || (e->flags & (EDGE_EH | EDGE_FAKE));
249 /* Return true in case edge E is probably never executed. */
251 bool
252 probably_never_executed_edge_p (struct function *fun, edge e)
254 if (unlikely_executed_edge_p (e))
255 return true;
256 return probably_never_executed (fun, e->count ());
259 /* Return true when current function should always be optimized for size. */
261 bool
262 optimize_function_for_size_p (struct function *fun)
264 if (!fun || !fun->decl)
265 return optimize_size;
266 cgraph_node *n = cgraph_node::get (fun->decl);
267 return n && n->optimize_for_size_p ();
270 /* Return true when current function should always be optimized for speed. */
272 bool
273 optimize_function_for_speed_p (struct function *fun)
275 return !optimize_function_for_size_p (fun);
278 /* Return the optimization type that should be used for the function FUN. */
280 optimization_type
281 function_optimization_type (struct function *fun)
283 return (optimize_function_for_speed_p (fun)
284 ? OPTIMIZE_FOR_SPEED
285 : OPTIMIZE_FOR_SIZE);
288 /* Return TRUE when BB should be optimized for size. */
290 bool
291 optimize_bb_for_size_p (const_basic_block bb)
293 return (optimize_function_for_size_p (cfun)
294 || (bb && !maybe_hot_bb_p (cfun, bb)));
297 /* Return TRUE when BB should be optimized for speed. */
299 bool
300 optimize_bb_for_speed_p (const_basic_block bb)
302 return !optimize_bb_for_size_p (bb);
305 /* Return the optimization type that should be used for block BB. */
307 optimization_type
308 bb_optimization_type (const_basic_block bb)
310 return (optimize_bb_for_speed_p (bb)
311 ? OPTIMIZE_FOR_SPEED
312 : OPTIMIZE_FOR_SIZE);
315 /* Return TRUE when BB should be optimized for size. */
317 bool
318 optimize_edge_for_size_p (edge e)
320 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
323 /* Return TRUE when BB should be optimized for speed. */
325 bool
326 optimize_edge_for_speed_p (edge e)
328 return !optimize_edge_for_size_p (e);
331 /* Return TRUE when BB should be optimized for size. */
333 bool
334 optimize_insn_for_size_p (void)
336 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
339 /* Return TRUE when BB should be optimized for speed. */
341 bool
342 optimize_insn_for_speed_p (void)
344 return !optimize_insn_for_size_p ();
347 /* Return TRUE when LOOP should be optimized for size. */
349 bool
350 optimize_loop_for_size_p (struct loop *loop)
352 return optimize_bb_for_size_p (loop->header);
355 /* Return TRUE when LOOP should be optimized for speed. */
357 bool
358 optimize_loop_for_speed_p (struct loop *loop)
360 return optimize_bb_for_speed_p (loop->header);
363 /* Return TRUE when LOOP nest should be optimized for speed. */
365 bool
366 optimize_loop_nest_for_speed_p (struct loop *loop)
368 struct loop *l = loop;
369 if (optimize_loop_for_speed_p (loop))
370 return true;
371 l = loop->inner;
372 while (l && l != loop)
374 if (optimize_loop_for_speed_p (l))
375 return true;
376 if (l->inner)
377 l = l->inner;
378 else if (l->next)
379 l = l->next;
380 else
382 while (l != loop && !l->next)
383 l = loop_outer (l);
384 if (l != loop)
385 l = l->next;
388 return false;
391 /* Return TRUE when LOOP nest should be optimized for size. */
393 bool
394 optimize_loop_nest_for_size_p (struct loop *loop)
396 return !optimize_loop_nest_for_speed_p (loop);
399 /* Return true when edge E is likely to be well predictable by branch
400 predictor. */
402 bool
403 predictable_edge_p (edge e)
405 if (!e->probability.initialized_p ())
406 return false;
407 if ((e->probability.to_reg_br_prob_base ()
408 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
409 || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base ()
410 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
411 return true;
412 return false;
416 /* Set RTL expansion for BB profile. */
418 void
419 rtl_profile_for_bb (basic_block bb)
421 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
424 /* Set RTL expansion for edge profile. */
426 void
427 rtl_profile_for_edge (edge e)
429 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
432 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
433 void
434 default_rtl_profile (void)
436 crtl->maybe_hot_insn_p = true;
439 /* Return true if the one of outgoing edges is already predicted by
440 PREDICTOR. */
442 bool
443 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
445 rtx note;
446 if (!INSN_P (BB_END (bb)))
447 return false;
448 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
449 if (REG_NOTE_KIND (note) == REG_BR_PRED
450 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
451 return true;
452 return false;
455 /* Structure representing predictions in tree level. */
457 struct edge_prediction {
458 struct edge_prediction *ep_next;
459 edge ep_edge;
460 enum br_predictor ep_predictor;
461 int ep_probability;
464 /* This map contains for a basic block the list of predictions for the
465 outgoing edges. */
467 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
469 /* Return true if the one of outgoing edges is already predicted by
470 PREDICTOR. */
472 bool
473 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
475 struct edge_prediction *i;
476 edge_prediction **preds = bb_predictions->get (bb);
478 if (!preds)
479 return false;
481 for (i = *preds; i; i = i->ep_next)
482 if (i->ep_predictor == predictor)
483 return true;
484 return false;
487 /* Return true if the one of outgoing edges is already predicted by
488 PREDICTOR for edge E predicted as TAKEN. */
490 bool
491 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
493 struct edge_prediction *i;
494 basic_block bb = e->src;
495 edge_prediction **preds = bb_predictions->get (bb);
496 if (!preds)
497 return false;
499 int probability = predictor_info[(int) predictor].hitrate;
501 if (taken != TAKEN)
502 probability = REG_BR_PROB_BASE - probability;
504 for (i = *preds; i; i = i->ep_next)
505 if (i->ep_predictor == predictor
506 && i->ep_edge == e
507 && i->ep_probability == probability)
508 return true;
509 return false;
512 /* Same predicate as above, working on edges. */
513 bool
514 edge_probability_reliable_p (const_edge e)
516 return e->probability.probably_reliable_p ();
519 /* Same predicate as edge_probability_reliable_p, working on notes. */
520 bool
521 br_prob_note_reliable_p (const_rtx note)
523 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
524 return profile_probability::from_reg_br_prob_note
525 (XINT (note, 0)).probably_reliable_p ();
528 static void
529 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
531 gcc_assert (any_condjump_p (insn));
532 if (!flag_guess_branch_prob)
533 return;
535 add_reg_note (insn, REG_BR_PRED,
536 gen_rtx_CONCAT (VOIDmode,
537 GEN_INT ((int) predictor),
538 GEN_INT ((int) probability)));
541 /* Predict insn by given predictor. */
543 void
544 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
545 enum prediction taken)
547 int probability = predictor_info[(int) predictor].hitrate;
548 gcc_assert (probability != PROB_UNINITIALIZED);
550 if (taken != TAKEN)
551 probability = REG_BR_PROB_BASE - probability;
553 predict_insn (insn, predictor, probability);
556 /* Predict edge E with given probability if possible. */
558 void
559 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
561 rtx_insn *last_insn;
562 last_insn = BB_END (e->src);
564 /* We can store the branch prediction information only about
565 conditional jumps. */
566 if (!any_condjump_p (last_insn))
567 return;
569 /* We always store probability of branching. */
570 if (e->flags & EDGE_FALLTHRU)
571 probability = REG_BR_PROB_BASE - probability;
573 predict_insn (last_insn, predictor, probability);
576 /* Predict edge E with the given PROBABILITY. */
577 void
578 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
580 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
581 && EDGE_COUNT (e->src->succs) > 1
582 && flag_guess_branch_prob
583 && optimize)
585 struct edge_prediction *i = XNEW (struct edge_prediction);
586 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
588 i->ep_next = preds;
589 preds = i;
590 i->ep_probability = probability;
591 i->ep_predictor = predictor;
592 i->ep_edge = e;
596 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
597 to the filter function. */
599 void
600 filter_predictions (edge_prediction **preds,
601 bool (*filter) (edge_prediction *, void *), void *data)
603 if (!bb_predictions)
604 return;
606 if (preds)
608 struct edge_prediction **prediction = preds;
609 struct edge_prediction *next;
611 while (*prediction)
613 if ((*filter) (*prediction, data))
614 prediction = &((*prediction)->ep_next);
615 else
617 next = (*prediction)->ep_next;
618 free (*prediction);
619 *prediction = next;
625 /* Filter function predicate that returns true for a edge predicate P
626 if its edge is equal to DATA. */
628 bool
629 equal_edge_p (edge_prediction *p, void *data)
631 return p->ep_edge == (edge)data;
634 /* Remove all predictions on given basic block that are attached
635 to edge E. */
636 void
637 remove_predictions_associated_with_edge (edge e)
639 if (!bb_predictions)
640 return;
642 edge_prediction **preds = bb_predictions->get (e->src);
643 filter_predictions (preds, equal_edge_p, e);
646 /* Clears the list of predictions stored for BB. */
648 static void
649 clear_bb_predictions (basic_block bb)
651 edge_prediction **preds = bb_predictions->get (bb);
652 struct edge_prediction *pred, *next;
654 if (!preds)
655 return;
657 for (pred = *preds; pred; pred = next)
659 next = pred->ep_next;
660 free (pred);
662 *preds = NULL;
665 /* Return true when we can store prediction on insn INSN.
666 At the moment we represent predictions only on conditional
667 jumps, not at computed jump or other complicated cases. */
668 static bool
669 can_predict_insn_p (const rtx_insn *insn)
671 return (JUMP_P (insn)
672 && any_condjump_p (insn)
673 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
676 /* Predict edge E by given predictor if possible. */
678 void
679 predict_edge_def (edge e, enum br_predictor predictor,
680 enum prediction taken)
682 int probability = predictor_info[(int) predictor].hitrate;
684 if (taken != TAKEN)
685 probability = REG_BR_PROB_BASE - probability;
687 predict_edge (e, predictor, probability);
690 /* Invert all branch predictions or probability notes in the INSN. This needs
691 to be done each time we invert the condition used by the jump. */
693 void
694 invert_br_probabilities (rtx insn)
696 rtx note;
698 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
699 if (REG_NOTE_KIND (note) == REG_BR_PROB)
700 XINT (note, 0) = profile_probability::from_reg_br_prob_note
701 (XINT (note, 0)).invert ().to_reg_br_prob_note ();
702 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
703 XEXP (XEXP (note, 0), 1)
704 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
707 /* Dump information about the branch prediction to the output file. */
709 static void
710 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
711 basic_block bb, enum predictor_reason reason = REASON_NONE,
712 edge ep_edge = NULL)
714 edge e = ep_edge;
715 edge_iterator ei;
717 if (!file)
718 return;
720 if (e == NULL)
721 FOR_EACH_EDGE (e, ei, bb->succs)
722 if (! (e->flags & EDGE_FALLTHRU))
723 break;
725 char edge_info_str[128];
726 if (ep_edge)
727 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
728 ep_edge->dest->index);
729 else
730 edge_info_str[0] = '\0';
732 fprintf (file, " %s heuristics%s%s: %.1f%%",
733 predictor_info[predictor].name,
734 edge_info_str, reason_messages[reason],
735 probability * 100.0 / REG_BR_PROB_BASE);
737 if (bb->count.initialized_p ())
739 fprintf (file, " exec ");
740 bb->count.dump (file);
741 if (e)
743 fprintf (file, " hit ");
744 e->count ().dump (file);
745 fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0
746 / bb->count.to_gcov_type ());
750 fprintf (file, "\n");
752 /* Print output that be easily read by analyze_brprob.py script. We are
753 interested only in counts that are read from GCDA files. */
754 if (dump_file && (dump_flags & TDF_DETAILS)
755 && bb->count.precise_p ()
756 && reason == REASON_NONE)
758 gcc_assert (e->count ().precise_p ());
759 fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n",
760 predictor_info[predictor].name,
761 bb->count.to_gcov_type (), e->count ().to_gcov_type (),
762 probability * 100.0 / REG_BR_PROB_BASE);
766 /* Return true if STMT is known to be unlikely executed. */
768 static bool
769 unlikely_executed_stmt_p (gimple *stmt)
771 if (!is_gimple_call (stmt))
772 return false;
773 /* NORETURN attribute alone is not strong enough: exit() may be quite
774 likely executed once during program run. */
775 if (gimple_call_fntype (stmt)
776 && lookup_attribute ("cold",
777 TYPE_ATTRIBUTES (gimple_call_fntype (stmt)))
778 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
779 return true;
780 tree decl = gimple_call_fndecl (stmt);
781 if (!decl)
782 return false;
783 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))
784 && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
785 return true;
787 cgraph_node *n = cgraph_node::get (decl);
788 if (!n)
789 return false;
791 availability avail;
792 n = n->ultimate_alias_target (&avail);
793 if (avail < AVAIL_AVAILABLE)
794 return false;
795 if (!n->analyzed
796 || n->decl == current_function_decl)
797 return false;
798 return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED;
801 /* Return true if BB is unlikely executed. */
803 static bool
804 unlikely_executed_bb_p (basic_block bb)
806 if (bb->count == profile_count::zero ())
807 return true;
808 if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
809 return false;
810 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
811 !gsi_end_p (gsi); gsi_next (&gsi))
813 if (unlikely_executed_stmt_p (gsi_stmt (gsi)))
814 return true;
815 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
816 return false;
818 return false;
821 /* We can not predict the probabilities of outgoing edges of bb. Set them
822 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
823 even probability for all edges not mentioned in the set. These edges
824 are given PROB_VERY_UNLIKELY probability. */
826 static void
827 set_even_probabilities (basic_block bb,
828 hash_set<edge> *unlikely_edges = NULL)
830 unsigned nedges = 0, unlikely_count = 0;
831 edge e = NULL;
832 edge_iterator ei;
833 profile_probability all = profile_probability::always ();
835 FOR_EACH_EDGE (e, ei, bb->succs)
836 if (e->probability.initialized_p ())
837 all -= e->probability;
838 else if (!unlikely_executed_edge_p (e))
840 nedges ++;
841 if (unlikely_edges != NULL && unlikely_edges->contains (e))
843 all -= profile_probability::very_unlikely ();
844 unlikely_count++;
848 /* Make the distribution even if all edges are unlikely. */
849 if (unlikely_count == nedges)
851 unlikely_edges = NULL;
852 unlikely_count = 0;
855 unsigned c = nedges - unlikely_count;
857 FOR_EACH_EDGE (e, ei, bb->succs)
858 if (e->probability.initialized_p ())
860 else if (!unlikely_executed_edge_p (e))
862 if (unlikely_edges != NULL && unlikely_edges->contains (e))
863 e->probability = profile_probability::very_unlikely ();
864 else
865 e->probability = all.apply_scale (1, c).guessed ();
867 else
868 e->probability = profile_probability::never ();
871 /* Add REG_BR_PROB note to JUMP with PROB. */
873 void
874 add_reg_br_prob_note (rtx_insn *jump, profile_probability prob)
876 gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0));
877 add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ());
880 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
881 note if not already present. Remove now useless REG_BR_PRED notes. */
883 static void
884 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
886 rtx prob_note;
887 rtx *pnote;
888 rtx note;
889 int best_probability = PROB_EVEN;
890 enum br_predictor best_predictor = END_PREDICTORS;
891 int combined_probability = REG_BR_PROB_BASE / 2;
892 int d;
893 bool first_match = false;
894 bool found = false;
896 if (!can_predict_insn_p (insn))
898 set_even_probabilities (bb);
899 return;
902 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
903 pnote = &REG_NOTES (insn);
904 if (dump_file)
905 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
906 bb->index);
908 /* We implement "first match" heuristics and use probability guessed
909 by predictor with smallest index. */
910 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
911 if (REG_NOTE_KIND (note) == REG_BR_PRED)
913 enum br_predictor predictor = ((enum br_predictor)
914 INTVAL (XEXP (XEXP (note, 0), 0)));
915 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
917 found = true;
918 if (best_predictor > predictor
919 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
920 best_probability = probability, best_predictor = predictor;
922 d = (combined_probability * probability
923 + (REG_BR_PROB_BASE - combined_probability)
924 * (REG_BR_PROB_BASE - probability));
926 /* Use FP math to avoid overflows of 32bit integers. */
927 if (d == 0)
928 /* If one probability is 0% and one 100%, avoid division by zero. */
929 combined_probability = REG_BR_PROB_BASE / 2;
930 else
931 combined_probability = (((double) combined_probability) * probability
932 * REG_BR_PROB_BASE / d + 0.5);
935 /* Decide which heuristic to use. In case we didn't match anything,
936 use no_prediction heuristic, in case we did match, use either
937 first match or Dempster-Shaffer theory depending on the flags. */
939 if (best_predictor != END_PREDICTORS)
940 first_match = true;
942 if (!found)
943 dump_prediction (dump_file, PRED_NO_PREDICTION,
944 combined_probability, bb);
945 else
947 if (!first_match)
948 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
949 bb, !first_match ? REASON_NONE : REASON_IGNORED);
950 else
951 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
952 bb, first_match ? REASON_NONE : REASON_IGNORED);
955 if (first_match)
956 combined_probability = best_probability;
957 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
959 while (*pnote)
961 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
963 enum br_predictor predictor = ((enum br_predictor)
964 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
965 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
967 dump_prediction (dump_file, predictor, probability, bb,
968 (!first_match || best_predictor == predictor)
969 ? REASON_NONE : REASON_IGNORED);
970 *pnote = XEXP (*pnote, 1);
972 else
973 pnote = &XEXP (*pnote, 1);
976 if (!prob_note)
978 profile_probability p
979 = profile_probability::from_reg_br_prob_base (combined_probability);
980 add_reg_br_prob_note (insn, p);
982 /* Save the prediction into CFG in case we are seeing non-degenerated
983 conditional jump. */
984 if (!single_succ_p (bb))
986 BRANCH_EDGE (bb)->probability = p;
987 FALLTHRU_EDGE (bb)->probability
988 = BRANCH_EDGE (bb)->probability.invert ();
991 else if (!single_succ_p (bb))
993 profile_probability prob = profile_probability::from_reg_br_prob_note
994 (XINT (prob_note, 0));
996 BRANCH_EDGE (bb)->probability = prob;
997 FALLTHRU_EDGE (bb)->probability = prob.invert ();
999 else
1000 single_succ_edge (bb)->probability = profile_probability::always ();
1003 /* Edge prediction hash traits. */
1005 struct predictor_hash: pointer_hash <edge_prediction>
1008 static inline hashval_t hash (const edge_prediction *);
1009 static inline bool equal (const edge_prediction *, const edge_prediction *);
1012 /* Calculate hash value of an edge prediction P based on predictor and
1013 normalized probability. */
1015 inline hashval_t
1016 predictor_hash::hash (const edge_prediction *p)
1018 inchash::hash hstate;
1019 hstate.add_int (p->ep_predictor);
1021 int prob = p->ep_probability;
1022 if (prob > REG_BR_PROB_BASE / 2)
1023 prob = REG_BR_PROB_BASE - prob;
1025 hstate.add_int (prob);
1027 return hstate.end ();
1030 /* Return true whether edge predictions P1 and P2 use the same predictor and
1031 have equal (or opposed probability). */
1033 inline bool
1034 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
1036 return (p1->ep_predictor == p2->ep_predictor
1037 && (p1->ep_probability == p2->ep_probability
1038 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
1041 struct predictor_hash_traits: predictor_hash,
1042 typed_noop_remove <edge_prediction *> {};
1044 /* Return true if edge prediction P is not in DATA hash set. */
1046 static bool
1047 not_removed_prediction_p (edge_prediction *p, void *data)
1049 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
1050 return !remove->contains (p);
1053 /* Prune predictions for a basic block BB. Currently we do following
1054 clean-up steps:
1056 1) remove duplicate prediction that is guessed with the same probability
1057 (different than 1/2) to both edge
1058 2) remove duplicates for a prediction that belongs with the same probability
1059 to a single edge
1063 static void
1064 prune_predictions_for_bb (basic_block bb)
1066 edge_prediction **preds = bb_predictions->get (bb);
1068 if (preds)
1070 hash_table <predictor_hash_traits> s (13);
1071 hash_set <edge_prediction *> remove;
1073 /* Step 1: identify predictors that should be removed. */
1074 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
1076 edge_prediction *existing = s.find (pred);
1077 if (existing)
1079 if (pred->ep_edge == existing->ep_edge
1080 && pred->ep_probability == existing->ep_probability)
1082 /* Remove a duplicate predictor. */
1083 dump_prediction (dump_file, pred->ep_predictor,
1084 pred->ep_probability, bb,
1085 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1087 remove.add (pred);
1089 else if (pred->ep_edge != existing->ep_edge
1090 && pred->ep_probability == existing->ep_probability
1091 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1093 /* Remove both predictors as they predict the same
1094 for both edges. */
1095 dump_prediction (dump_file, existing->ep_predictor,
1096 pred->ep_probability, bb,
1097 REASON_EDGE_PAIR_DUPLICATE,
1098 existing->ep_edge);
1099 dump_prediction (dump_file, pred->ep_predictor,
1100 pred->ep_probability, bb,
1101 REASON_EDGE_PAIR_DUPLICATE,
1102 pred->ep_edge);
1104 remove.add (existing);
1105 remove.add (pred);
1109 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1110 *slot2 = pred;
1113 /* Step 2: Remove predictors. */
1114 filter_predictions (preds, not_removed_prediction_p, &remove);
1118 /* Combine predictions into single probability and store them into CFG.
1119 Remove now useless prediction entries.
1120 If DRY_RUN is set, only produce dumps and do not modify profile. */
1122 static void
1123 combine_predictions_for_bb (basic_block bb, bool dry_run)
1125 int best_probability = PROB_EVEN;
1126 enum br_predictor best_predictor = END_PREDICTORS;
1127 int combined_probability = REG_BR_PROB_BASE / 2;
1128 int d;
1129 bool first_match = false;
1130 bool found = false;
1131 struct edge_prediction *pred;
1132 int nedges = 0;
1133 edge e, first = NULL, second = NULL;
1134 edge_iterator ei;
1135 int nzero = 0;
1136 int nunknown = 0;
1138 FOR_EACH_EDGE (e, ei, bb->succs)
1140 if (!unlikely_executed_edge_p (e))
1142 nedges ++;
1143 if (first && !second)
1144 second = e;
1145 if (!first)
1146 first = e;
1148 else if (!e->probability.initialized_p ())
1149 e->probability = profile_probability::never ();
1150 if (!e->probability.initialized_p ())
1151 nunknown++;
1152 else if (e->probability == profile_probability::never ())
1153 nzero++;
1156 /* When there is no successor or only one choice, prediction is easy.
1158 When we have a basic block with more than 2 successors, the situation
1159 is more complicated as DS theory cannot be used literally.
1160 More precisely, let's assume we predicted edge e1 with probability p1,
1161 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1162 need to find probability of e.g. m1({b2}), which we don't know.
1163 The only approximation is to equally distribute 1-p1 to all edges
1164 different from b1.
1166 According to numbers we've got from SPEC2006 benchark, there's only
1167 one interesting reliable predictor (noreturn call), which can be
1168 handled with a bit easier approach. */
1169 if (nedges != 2)
1171 hash_set<edge> unlikely_edges (4);
1173 /* Identify all edges that have a probability close to very unlikely.
1174 Doing the approach for very unlikely doesn't worth for doing as
1175 there's no such probability in SPEC2006 benchmark. */
1176 edge_prediction **preds = bb_predictions->get (bb);
1177 if (preds)
1178 for (pred = *preds; pred; pred = pred->ep_next)
1179 if (pred->ep_probability <= PROB_VERY_UNLIKELY)
1180 unlikely_edges.add (pred->ep_edge);
1182 if (!dry_run)
1183 set_even_probabilities (bb, &unlikely_edges);
1184 clear_bb_predictions (bb);
1185 if (dump_file)
1187 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1188 if (unlikely_edges.elements () == 0)
1189 fprintf (dump_file,
1190 "%i edges in bb %i predicted to even probabilities\n",
1191 nedges, bb->index);
1192 else
1194 fprintf (dump_file,
1195 "%i edges in bb %i predicted with some unlikely edges\n",
1196 nedges, bb->index);
1197 FOR_EACH_EDGE (e, ei, bb->succs)
1198 if (!unlikely_executed_edge_p (e))
1199 dump_prediction (dump_file, PRED_COMBINED,
1200 e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e);
1203 return;
1206 if (dump_file)
1207 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1209 prune_predictions_for_bb (bb);
1211 edge_prediction **preds = bb_predictions->get (bb);
1213 if (preds)
1215 /* We implement "first match" heuristics and use probability guessed
1216 by predictor with smallest index. */
1217 for (pred = *preds; pred; pred = pred->ep_next)
1219 enum br_predictor predictor = pred->ep_predictor;
1220 int probability = pred->ep_probability;
1222 if (pred->ep_edge != first)
1223 probability = REG_BR_PROB_BASE - probability;
1225 found = true;
1226 /* First match heuristics would be widly confused if we predicted
1227 both directions. */
1228 if (best_predictor > predictor
1229 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1231 struct edge_prediction *pred2;
1232 int prob = probability;
1234 for (pred2 = (struct edge_prediction *) *preds;
1235 pred2; pred2 = pred2->ep_next)
1236 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1238 int probability2 = pred2->ep_probability;
1240 if (pred2->ep_edge != first)
1241 probability2 = REG_BR_PROB_BASE - probability2;
1243 if ((probability < REG_BR_PROB_BASE / 2) !=
1244 (probability2 < REG_BR_PROB_BASE / 2))
1245 break;
1247 /* If the same predictor later gave better result, go for it! */
1248 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1249 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1250 prob = probability2;
1252 if (!pred2)
1253 best_probability = prob, best_predictor = predictor;
1256 d = (combined_probability * probability
1257 + (REG_BR_PROB_BASE - combined_probability)
1258 * (REG_BR_PROB_BASE - probability));
1260 /* Use FP math to avoid overflows of 32bit integers. */
1261 if (d == 0)
1262 /* If one probability is 0% and one 100%, avoid division by zero. */
1263 combined_probability = REG_BR_PROB_BASE / 2;
1264 else
1265 combined_probability = (((double) combined_probability)
1266 * probability
1267 * REG_BR_PROB_BASE / d + 0.5);
1271 /* Decide which heuristic to use. In case we didn't match anything,
1272 use no_prediction heuristic, in case we did match, use either
1273 first match or Dempster-Shaffer theory depending on the flags. */
1275 if (best_predictor != END_PREDICTORS)
1276 first_match = true;
1278 if (!found)
1279 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1280 else
1282 if (!first_match)
1283 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1284 !first_match ? REASON_NONE : REASON_IGNORED);
1285 else
1286 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1287 first_match ? REASON_NONE : REASON_IGNORED);
1290 if (first_match)
1291 combined_probability = best_probability;
1292 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1294 if (preds)
1296 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1298 enum br_predictor predictor = pred->ep_predictor;
1299 int probability = pred->ep_probability;
1301 dump_prediction (dump_file, predictor, probability, bb,
1302 (!first_match || best_predictor == predictor)
1303 ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1306 clear_bb_predictions (bb);
1309 /* If we have only one successor which is unknown, we can compute missing
1310 probablity. */
1311 if (nunknown == 1)
1313 profile_probability prob = profile_probability::always ();
1314 edge missing = NULL;
1316 FOR_EACH_EDGE (e, ei, bb->succs)
1317 if (e->probability.initialized_p ())
1318 prob -= e->probability;
1319 else if (missing == NULL)
1320 missing = e;
1321 else
1322 gcc_unreachable ();
1323 missing->probability = prob;
1325 /* If nothing is unknown, we have nothing to update. */
1326 else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs))
1328 else if (!dry_run)
1330 first->probability
1331 = profile_probability::from_reg_br_prob_base (combined_probability);
1332 second->probability = first->probability.invert ();
1336 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1337 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1339 T1 and T2 should be one of the following cases:
1340 1. T1 is SSA_NAME, T2 is NULL
1341 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1342 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1344 static tree
1345 strips_small_constant (tree t1, tree t2)
1347 tree ret = NULL;
1348 int value = 0;
1350 if (!t1)
1351 return NULL;
1352 else if (TREE_CODE (t1) == SSA_NAME)
1353 ret = t1;
1354 else if (tree_fits_shwi_p (t1))
1355 value = tree_to_shwi (t1);
1356 else
1357 return NULL;
1359 if (!t2)
1360 return ret;
1361 else if (tree_fits_shwi_p (t2))
1362 value = tree_to_shwi (t2);
1363 else if (TREE_CODE (t2) == SSA_NAME)
1365 if (ret)
1366 return NULL;
1367 else
1368 ret = t2;
1371 if (value <= 4 && value >= -4)
1372 return ret;
1373 else
1374 return NULL;
1377 /* Return the SSA_NAME in T or T's operands.
1378 Return NULL if SSA_NAME cannot be found. */
1380 static tree
1381 get_base_value (tree t)
1383 if (TREE_CODE (t) == SSA_NAME)
1384 return t;
1386 if (!BINARY_CLASS_P (t))
1387 return NULL;
1389 switch (TREE_OPERAND_LENGTH (t))
1391 case 1:
1392 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1393 case 2:
1394 return strips_small_constant (TREE_OPERAND (t, 0),
1395 TREE_OPERAND (t, 1));
1396 default:
1397 return NULL;
1401 /* Check the compare STMT in LOOP. If it compares an induction
1402 variable to a loop invariant, return true, and save
1403 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1404 Otherwise return false and set LOOP_INVAIANT to NULL. */
1406 static bool
1407 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1408 tree *loop_invariant,
1409 enum tree_code *compare_code,
1410 tree *loop_step,
1411 tree *loop_iv_base)
1413 tree op0, op1, bound, base;
1414 affine_iv iv0, iv1;
1415 enum tree_code code;
1416 tree step;
1418 code = gimple_cond_code (stmt);
1419 *loop_invariant = NULL;
1421 switch (code)
1423 case GT_EXPR:
1424 case GE_EXPR:
1425 case NE_EXPR:
1426 case LT_EXPR:
1427 case LE_EXPR:
1428 case EQ_EXPR:
1429 break;
1431 default:
1432 return false;
1435 op0 = gimple_cond_lhs (stmt);
1436 op1 = gimple_cond_rhs (stmt);
1438 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1439 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1440 return false;
1441 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1442 return false;
1443 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1444 return false;
1445 if (TREE_CODE (iv0.step) != INTEGER_CST
1446 || TREE_CODE (iv1.step) != INTEGER_CST)
1447 return false;
1448 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1449 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1450 return false;
1452 if (integer_zerop (iv0.step))
1454 if (code != NE_EXPR && code != EQ_EXPR)
1455 code = invert_tree_comparison (code, false);
1456 bound = iv0.base;
1457 base = iv1.base;
1458 if (tree_fits_shwi_p (iv1.step))
1459 step = iv1.step;
1460 else
1461 return false;
1463 else
1465 bound = iv1.base;
1466 base = iv0.base;
1467 if (tree_fits_shwi_p (iv0.step))
1468 step = iv0.step;
1469 else
1470 return false;
1473 if (TREE_CODE (bound) != INTEGER_CST)
1474 bound = get_base_value (bound);
1475 if (!bound)
1476 return false;
1477 if (TREE_CODE (base) != INTEGER_CST)
1478 base = get_base_value (base);
1479 if (!base)
1480 return false;
1482 *loop_invariant = bound;
1483 *compare_code = code;
1484 *loop_step = step;
1485 *loop_iv_base = base;
1486 return true;
1489 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1491 static bool
1492 expr_coherent_p (tree t1, tree t2)
1494 gimple *stmt;
1495 tree ssa_name_1 = NULL;
1496 tree ssa_name_2 = NULL;
1498 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1499 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1501 if (t1 == t2)
1502 return true;
1504 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1505 return true;
1506 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1507 return false;
1509 /* Check to see if t1 is expressed/defined with t2. */
1510 stmt = SSA_NAME_DEF_STMT (t1);
1511 gcc_assert (stmt != NULL);
1512 if (is_gimple_assign (stmt))
1514 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1515 if (ssa_name_1 && ssa_name_1 == t2)
1516 return true;
1519 /* Check to see if t2 is expressed/defined with t1. */
1520 stmt = SSA_NAME_DEF_STMT (t2);
1521 gcc_assert (stmt != NULL);
1522 if (is_gimple_assign (stmt))
1524 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1525 if (ssa_name_2 && ssa_name_2 == t1)
1526 return true;
1529 /* Compare if t1 and t2's def_stmts are identical. */
1530 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1531 return true;
1532 else
1533 return false;
1536 /* Return true if E is predicted by one of loop heuristics. */
1538 static bool
1539 predicted_by_loop_heuristics_p (basic_block bb)
1541 struct edge_prediction *i;
1542 edge_prediction **preds = bb_predictions->get (bb);
1544 if (!preds)
1545 return false;
1547 for (i = *preds; i; i = i->ep_next)
1548 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1549 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1550 || i->ep_predictor == PRED_LOOP_ITERATIONS
1551 || i->ep_predictor == PRED_LOOP_EXIT
1552 || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1553 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1554 return true;
1555 return false;
1558 /* Predict branch probability of BB when BB contains a branch that compares
1559 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1560 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1562 E.g.
1563 for (int i = 0; i < bound; i++) {
1564 if (i < bound - 2)
1565 computation_1();
1566 else
1567 computation_2();
1570 In this loop, we will predict the branch inside the loop to be taken. */
1572 static void
1573 predict_iv_comparison (struct loop *loop, basic_block bb,
1574 tree loop_bound_var,
1575 tree loop_iv_base_var,
1576 enum tree_code loop_bound_code,
1577 int loop_bound_step)
1579 gimple *stmt;
1580 tree compare_var, compare_base;
1581 enum tree_code compare_code;
1582 tree compare_step_var;
1583 edge then_edge;
1584 edge_iterator ei;
1586 if (predicted_by_loop_heuristics_p (bb))
1587 return;
1589 stmt = last_stmt (bb);
1590 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1591 return;
1592 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1593 loop, &compare_var,
1594 &compare_code,
1595 &compare_step_var,
1596 &compare_base))
1597 return;
1599 /* Find the taken edge. */
1600 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1601 if (then_edge->flags & EDGE_TRUE_VALUE)
1602 break;
1604 /* When comparing an IV to a loop invariant, NE is more likely to be
1605 taken while EQ is more likely to be not-taken. */
1606 if (compare_code == NE_EXPR)
1608 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1609 return;
1611 else if (compare_code == EQ_EXPR)
1613 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1614 return;
1617 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1618 return;
1620 /* If loop bound, base and compare bound are all constants, we can
1621 calculate the probability directly. */
1622 if (tree_fits_shwi_p (loop_bound_var)
1623 && tree_fits_shwi_p (compare_var)
1624 && tree_fits_shwi_p (compare_base))
1626 int probability;
1627 bool overflow, overall_overflow = false;
1628 widest_int compare_count, tem;
1630 /* (loop_bound - base) / compare_step */
1631 tem = wi::sub (wi::to_widest (loop_bound_var),
1632 wi::to_widest (compare_base), SIGNED, &overflow);
1633 overall_overflow |= overflow;
1634 widest_int loop_count = wi::div_trunc (tem,
1635 wi::to_widest (compare_step_var),
1636 SIGNED, &overflow);
1637 overall_overflow |= overflow;
1639 if (!wi::neg_p (wi::to_widest (compare_step_var))
1640 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1642 /* (loop_bound - compare_bound) / compare_step */
1643 tem = wi::sub (wi::to_widest (loop_bound_var),
1644 wi::to_widest (compare_var), SIGNED, &overflow);
1645 overall_overflow |= overflow;
1646 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1647 SIGNED, &overflow);
1648 overall_overflow |= overflow;
1650 else
1652 /* (compare_bound - base) / compare_step */
1653 tem = wi::sub (wi::to_widest (compare_var),
1654 wi::to_widest (compare_base), SIGNED, &overflow);
1655 overall_overflow |= overflow;
1656 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1657 SIGNED, &overflow);
1658 overall_overflow |= overflow;
1660 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1661 ++compare_count;
1662 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1663 ++loop_count;
1664 if (wi::neg_p (compare_count))
1665 compare_count = 0;
1666 if (wi::neg_p (loop_count))
1667 loop_count = 0;
1668 if (loop_count == 0)
1669 probability = 0;
1670 else if (wi::cmps (compare_count, loop_count) == 1)
1671 probability = REG_BR_PROB_BASE;
1672 else
1674 tem = compare_count * REG_BR_PROB_BASE;
1675 tem = wi::udiv_trunc (tem, loop_count);
1676 probability = tem.to_uhwi ();
1679 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1680 if (!overall_overflow)
1681 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1683 return;
1686 if (expr_coherent_p (loop_bound_var, compare_var))
1688 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1689 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1690 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1691 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1692 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1693 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1694 else if (loop_bound_code == NE_EXPR)
1696 /* If the loop backedge condition is "(i != bound)", we do
1697 the comparison based on the step of IV:
1698 * step < 0 : backedge condition is like (i > bound)
1699 * step > 0 : backedge condition is like (i < bound) */
1700 gcc_assert (loop_bound_step != 0);
1701 if (loop_bound_step > 0
1702 && (compare_code == LT_EXPR
1703 || compare_code == LE_EXPR))
1704 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1705 else if (loop_bound_step < 0
1706 && (compare_code == GT_EXPR
1707 || compare_code == GE_EXPR))
1708 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1709 else
1710 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1712 else
1713 /* The branch is predicted not-taken if loop_bound_code is
1714 opposite with compare_code. */
1715 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1717 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1719 /* For cases like:
1720 for (i = s; i < h; i++)
1721 if (i > s + 2) ....
1722 The branch should be predicted taken. */
1723 if (loop_bound_step > 0
1724 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1725 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1726 else if (loop_bound_step < 0
1727 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1728 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1729 else
1730 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1734 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1735 exits are resulted from short-circuit conditions that will generate an
1736 if_tmp. E.g.:
1738 if (foo() || global > 10)
1739 break;
1741 This will be translated into:
1743 BB3:
1744 loop header...
1745 BB4:
1746 if foo() goto BB6 else goto BB5
1747 BB5:
1748 if global > 10 goto BB6 else goto BB7
1749 BB6:
1750 goto BB7
1751 BB7:
1752 iftmp = (PHI 0(BB5), 1(BB6))
1753 if iftmp == 1 goto BB8 else goto BB3
1754 BB8:
1755 outside of the loop...
1757 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1758 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1759 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1760 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1762 static void
1763 predict_extra_loop_exits (edge exit_edge)
1765 unsigned i;
1766 bool check_value_one;
1767 gimple *lhs_def_stmt;
1768 gphi *phi_stmt;
1769 tree cmp_rhs, cmp_lhs;
1770 gimple *last;
1771 gcond *cmp_stmt;
1773 last = last_stmt (exit_edge->src);
1774 if (!last)
1775 return;
1776 cmp_stmt = dyn_cast <gcond *> (last);
1777 if (!cmp_stmt)
1778 return;
1780 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1781 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1782 if (!TREE_CONSTANT (cmp_rhs)
1783 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1784 return;
1785 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1786 return;
1788 /* If check_value_one is true, only the phi_args with value '1' will lead
1789 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1790 loop exit. */
1791 check_value_one = (((integer_onep (cmp_rhs))
1792 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1793 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1795 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1796 if (!lhs_def_stmt)
1797 return;
1799 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1800 if (!phi_stmt)
1801 return;
1803 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1805 edge e1;
1806 edge_iterator ei;
1807 tree val = gimple_phi_arg_def (phi_stmt, i);
1808 edge e = gimple_phi_arg_edge (phi_stmt, i);
1810 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1811 continue;
1812 if ((check_value_one ^ integer_onep (val)) == 1)
1813 continue;
1814 if (EDGE_COUNT (e->src->succs) != 1)
1816 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1817 continue;
1820 FOR_EACH_EDGE (e1, ei, e->src->preds)
1821 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1826 /* Predict edge probabilities by exploiting loop structure. */
1828 static void
1829 predict_loops (void)
1831 struct loop *loop;
1832 basic_block bb;
1833 hash_set <struct loop *> with_recursion(10);
1835 FOR_EACH_BB_FN (bb, cfun)
1837 gimple_stmt_iterator gsi;
1838 tree decl;
1840 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1841 if (is_gimple_call (gsi_stmt (gsi))
1842 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1843 && recursive_call_p (current_function_decl, decl))
1845 loop = bb->loop_father;
1846 while (loop && !with_recursion.add (loop))
1847 loop = loop_outer (loop);
1851 /* Try to predict out blocks in a loop that are not part of a
1852 natural loop. */
1853 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1855 basic_block bb, *bbs;
1856 unsigned j, n_exits = 0;
1857 vec<edge> exits;
1858 struct tree_niter_desc niter_desc;
1859 edge ex;
1860 struct nb_iter_bound *nb_iter;
1861 enum tree_code loop_bound_code = ERROR_MARK;
1862 tree loop_bound_step = NULL;
1863 tree loop_bound_var = NULL;
1864 tree loop_iv_base = NULL;
1865 gcond *stmt = NULL;
1866 bool recursion = with_recursion.contains (loop);
1868 exits = get_loop_exit_edges (loop);
1869 FOR_EACH_VEC_ELT (exits, j, ex)
1870 if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL))
1871 n_exits ++;
1872 if (!n_exits)
1874 exits.release ();
1875 continue;
1878 if (dump_file && (dump_flags & TDF_DETAILS))
1879 fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
1880 loop->num, recursion ? " (with recursion)":"", n_exits);
1881 if (dump_file && (dump_flags & TDF_DETAILS)
1882 && max_loop_iterations_int (loop) >= 0)
1884 fprintf (dump_file,
1885 "Loop %d iterates at most %i times.\n", loop->num,
1886 (int)max_loop_iterations_int (loop));
1888 if (dump_file && (dump_flags & TDF_DETAILS)
1889 && likely_max_loop_iterations_int (loop) >= 0)
1891 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1892 loop->num, (int)likely_max_loop_iterations_int (loop));
1895 FOR_EACH_VEC_ELT (exits, j, ex)
1897 tree niter = NULL;
1898 HOST_WIDE_INT nitercst;
1899 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1900 int probability;
1901 enum br_predictor predictor;
1902 widest_int nit;
1904 if (unlikely_executed_edge_p (ex)
1905 || (ex->flags & EDGE_ABNORMAL_CALL))
1906 continue;
1907 /* Loop heuristics do not expect exit conditional to be inside
1908 inner loop. We predict from innermost to outermost loop. */
1909 if (predicted_by_loop_heuristics_p (ex->src))
1911 if (dump_file && (dump_flags & TDF_DETAILS))
1912 fprintf (dump_file, "Skipping exit %i->%i because "
1913 "it is already predicted.\n",
1914 ex->src->index, ex->dest->index);
1915 continue;
1917 predict_extra_loop_exits (ex);
1919 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1920 niter = niter_desc.niter;
1921 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1922 niter = loop_niter_by_eval (loop, ex);
1923 if (dump_file && (dump_flags & TDF_DETAILS)
1924 && TREE_CODE (niter) == INTEGER_CST)
1926 fprintf (dump_file, "Exit %i->%i %d iterates ",
1927 ex->src->index, ex->dest->index,
1928 loop->num);
1929 print_generic_expr (dump_file, niter, TDF_SLIM);
1930 fprintf (dump_file, " times.\n");
1933 if (TREE_CODE (niter) == INTEGER_CST)
1935 if (tree_fits_uhwi_p (niter)
1936 && max
1937 && compare_tree_int (niter, max - 1) == -1)
1938 nitercst = tree_to_uhwi (niter) + 1;
1939 else
1940 nitercst = max;
1941 predictor = PRED_LOOP_ITERATIONS;
1943 /* If we have just one exit and we can derive some information about
1944 the number of iterations of the loop from the statements inside
1945 the loop, use it to predict this exit. */
1946 else if (n_exits == 1
1947 && estimated_stmt_executions (loop, &nit))
1949 if (wi::gtu_p (nit, max))
1950 nitercst = max;
1951 else
1952 nitercst = nit.to_shwi ();
1953 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1955 /* If we have likely upper bound, trust it for very small iteration
1956 counts. Such loops would otherwise get mispredicted by standard
1957 LOOP_EXIT heuristics. */
1958 else if (n_exits == 1
1959 && likely_max_stmt_executions (loop, &nit)
1960 && wi::ltu_p (nit,
1961 RDIV (REG_BR_PROB_BASE,
1962 REG_BR_PROB_BASE
1963 - predictor_info
1964 [recursion
1965 ? PRED_LOOP_EXIT_WITH_RECURSION
1966 : PRED_LOOP_EXIT].hitrate)))
1968 nitercst = nit.to_shwi ();
1969 predictor = PRED_LOOP_ITERATIONS_MAX;
1971 else
1973 if (dump_file && (dump_flags & TDF_DETAILS))
1974 fprintf (dump_file, "Nothing known about exit %i->%i.\n",
1975 ex->src->index, ex->dest->index);
1976 continue;
1979 if (dump_file && (dump_flags & TDF_DETAILS))
1980 fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
1981 (int)nitercst, predictor_info[predictor].name);
1982 /* If the prediction for number of iterations is zero, do not
1983 predict the exit edges. */
1984 if (nitercst == 0)
1985 continue;
1987 probability = RDIV (REG_BR_PROB_BASE, nitercst);
1988 predict_edge (ex, predictor, probability);
1990 exits.release ();
1992 /* Find information about loop bound variables. */
1993 for (nb_iter = loop->bounds; nb_iter;
1994 nb_iter = nb_iter->next)
1995 if (nb_iter->stmt
1996 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1998 stmt = as_a <gcond *> (nb_iter->stmt);
1999 break;
2001 if (!stmt && last_stmt (loop->header)
2002 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
2003 stmt = as_a <gcond *> (last_stmt (loop->header));
2004 if (stmt)
2005 is_comparison_with_loop_invariant_p (stmt, loop,
2006 &loop_bound_var,
2007 &loop_bound_code,
2008 &loop_bound_step,
2009 &loop_iv_base);
2011 bbs = get_loop_body (loop);
2013 for (j = 0; j < loop->num_nodes; j++)
2015 edge e;
2016 edge_iterator ei;
2018 bb = bbs[j];
2020 /* Bypass loop heuristics on continue statement. These
2021 statements construct loops via "non-loop" constructs
2022 in the source language and are better to be handled
2023 separately. */
2024 if (predicted_by_p (bb, PRED_CONTINUE))
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 fprintf (dump_file, "BB %i predicted by continue.\n",
2028 bb->index);
2029 continue;
2032 /* If we already used more reliable loop exit predictors, do not
2033 bother with PRED_LOOP_EXIT. */
2034 if (!predicted_by_loop_heuristics_p (bb))
2036 /* For loop with many exits we don't want to predict all exits
2037 with the pretty large probability, because if all exits are
2038 considered in row, the loop would be predicted to iterate
2039 almost never. The code to divide probability by number of
2040 exits is very rough. It should compute the number of exits
2041 taken in each patch through function (not the overall number
2042 of exits that might be a lot higher for loops with wide switch
2043 statements in them) and compute n-th square root.
2045 We limit the minimal probability by 2% to avoid
2046 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
2047 as this was causing regression in perl benchmark containing such
2048 a wide loop. */
2050 int probability = ((REG_BR_PROB_BASE
2051 - predictor_info
2052 [recursion
2053 ? PRED_LOOP_EXIT_WITH_RECURSION
2054 : PRED_LOOP_EXIT].hitrate)
2055 / n_exits);
2056 if (probability < HITRATE (2))
2057 probability = HITRATE (2);
2058 FOR_EACH_EDGE (e, ei, bb->succs)
2059 if (e->dest->index < NUM_FIXED_BLOCKS
2060 || !flow_bb_inside_loop_p (loop, e->dest))
2062 if (dump_file && (dump_flags & TDF_DETAILS))
2063 fprintf (dump_file,
2064 "Predicting exit %i->%i with prob %i.\n",
2065 e->src->index, e->dest->index, probability);
2066 predict_edge (e,
2067 recursion ? PRED_LOOP_EXIT_WITH_RECURSION
2068 : PRED_LOOP_EXIT, probability);
2071 if (loop_bound_var)
2072 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
2073 loop_bound_code,
2074 tree_to_shwi (loop_bound_step));
2077 /* In the following code
2078 for (loop1)
2079 if (cond)
2080 for (loop2)
2081 body;
2082 guess that cond is unlikely. */
2083 if (loop_outer (loop)->num)
2085 basic_block bb = NULL;
2086 edge preheader_edge = loop_preheader_edge (loop);
2088 if (single_pred_p (preheader_edge->src)
2089 && single_succ_p (preheader_edge->src))
2090 preheader_edge = single_pred_edge (preheader_edge->src);
2092 gimple *stmt = last_stmt (preheader_edge->src);
2093 /* Pattern match fortran loop preheader:
2094 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
2095 _17 = (logical(kind=4)) _16;
2096 if (_17 != 0)
2097 goto <bb 11>;
2098 else
2099 goto <bb 13>;
2101 Loop guard branch prediction says nothing about duplicated loop
2102 headers produced by fortran frontend and in this case we want
2103 to predict paths leading to this preheader. */
2105 if (stmt
2106 && gimple_code (stmt) == GIMPLE_COND
2107 && gimple_cond_code (stmt) == NE_EXPR
2108 && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
2109 && integer_zerop (gimple_cond_rhs (stmt)))
2111 gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2112 if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2113 && gimple_expr_code (call_stmt) == NOP_EXPR
2114 && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2115 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2116 if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2117 && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2118 && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2119 && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2120 == PRED_FORTRAN_LOOP_PREHEADER)
2121 bb = preheader_edge->src;
2123 if (!bb)
2125 if (!dominated_by_p (CDI_DOMINATORS,
2126 loop_outer (loop)->latch, loop->header))
2127 predict_paths_leading_to_edge (loop_preheader_edge (loop),
2128 recursion
2129 ? PRED_LOOP_GUARD_WITH_RECURSION
2130 : PRED_LOOP_GUARD,
2131 NOT_TAKEN,
2132 loop_outer (loop));
2134 else
2136 if (!dominated_by_p (CDI_DOMINATORS,
2137 loop_outer (loop)->latch, bb))
2138 predict_paths_leading_to (bb,
2139 recursion
2140 ? PRED_LOOP_GUARD_WITH_RECURSION
2141 : PRED_LOOP_GUARD,
2142 NOT_TAKEN,
2143 loop_outer (loop));
2147 /* Free basic blocks from get_loop_body. */
2148 free (bbs);
2152 /* Attempt to predict probabilities of BB outgoing edges using local
2153 properties. */
2154 static void
2155 bb_estimate_probability_locally (basic_block bb)
2157 rtx_insn *last_insn = BB_END (bb);
2158 rtx cond;
2160 if (! can_predict_insn_p (last_insn))
2161 return;
2162 cond = get_condition (last_insn, NULL, false, false);
2163 if (! cond)
2164 return;
2166 /* Try "pointer heuristic."
2167 A comparison ptr == 0 is predicted as false.
2168 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2169 if (COMPARISON_P (cond)
2170 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
2171 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
2173 if (GET_CODE (cond) == EQ)
2174 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
2175 else if (GET_CODE (cond) == NE)
2176 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
2178 else
2180 /* Try "opcode heuristic."
2181 EQ tests are usually false and NE tests are usually true. Also,
2182 most quantities are positive, so we can make the appropriate guesses
2183 about signed comparisons against zero. */
2184 switch (GET_CODE (cond))
2186 case CONST_INT:
2187 /* Unconditional branch. */
2188 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
2189 cond == const0_rtx ? NOT_TAKEN : TAKEN);
2190 break;
2192 case EQ:
2193 case UNEQ:
2194 /* Floating point comparisons appears to behave in a very
2195 unpredictable way because of special role of = tests in
2196 FP code. */
2197 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2199 /* Comparisons with 0 are often used for booleans and there is
2200 nothing useful to predict about them. */
2201 else if (XEXP (cond, 1) == const0_rtx
2202 || XEXP (cond, 0) == const0_rtx)
2204 else
2205 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
2206 break;
2208 case NE:
2209 case LTGT:
2210 /* Floating point comparisons appears to behave in a very
2211 unpredictable way because of special role of = tests in
2212 FP code. */
2213 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2215 /* Comparisons with 0 are often used for booleans and there is
2216 nothing useful to predict about them. */
2217 else if (XEXP (cond, 1) == const0_rtx
2218 || XEXP (cond, 0) == const0_rtx)
2220 else
2221 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
2222 break;
2224 case ORDERED:
2225 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
2226 break;
2228 case UNORDERED:
2229 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
2230 break;
2232 case LE:
2233 case LT:
2234 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2235 || XEXP (cond, 1) == constm1_rtx)
2236 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
2237 break;
2239 case GE:
2240 case GT:
2241 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2242 || XEXP (cond, 1) == constm1_rtx)
2243 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
2244 break;
2246 default:
2247 break;
2251 /* Set edge->probability for each successor edge of BB. */
2252 void
2253 guess_outgoing_edge_probabilities (basic_block bb)
2255 bb_estimate_probability_locally (bb);
2256 combine_predictions_for_insn (BB_END (bb), bb);
2259 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
2261 /* Helper function for expr_expected_value. */
2263 static tree
2264 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
2265 tree op1, bitmap visited, enum br_predictor *predictor)
2267 gimple *def;
2269 if (predictor)
2270 *predictor = PRED_UNCONDITIONAL;
2272 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2274 if (TREE_CONSTANT (op0))
2275 return op0;
2277 if (code == IMAGPART_EXPR)
2279 if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2281 def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2282 if (is_gimple_call (def)
2283 && gimple_call_internal_p (def)
2284 && (gimple_call_internal_fn (def)
2285 == IFN_ATOMIC_COMPARE_EXCHANGE))
2287 /* Assume that any given atomic operation has low contention,
2288 and thus the compare-and-swap operation succeeds. */
2289 if (predictor)
2290 *predictor = PRED_COMPARE_AND_SWAP;
2291 return build_one_cst (TREE_TYPE (op0));
2296 if (code != SSA_NAME)
2297 return NULL_TREE;
2299 def = SSA_NAME_DEF_STMT (op0);
2301 /* If we were already here, break the infinite cycle. */
2302 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
2303 return NULL;
2305 if (gimple_code (def) == GIMPLE_PHI)
2307 /* All the arguments of the PHI node must have the same constant
2308 length. */
2309 int i, n = gimple_phi_num_args (def);
2310 tree val = NULL, new_val;
2312 for (i = 0; i < n; i++)
2314 tree arg = PHI_ARG_DEF (def, i);
2315 enum br_predictor predictor2;
2317 /* If this PHI has itself as an argument, we cannot
2318 determine the string length of this argument. However,
2319 if we can find an expected constant value for the other
2320 PHI args then we can still be sure that this is
2321 likely a constant. So be optimistic and just
2322 continue with the next argument. */
2323 if (arg == PHI_RESULT (def))
2324 continue;
2326 new_val = expr_expected_value (arg, visited, &predictor2);
2328 /* It is difficult to combine value predictors. Simply assume
2329 that later predictor is weaker and take its prediction. */
2330 if (predictor && *predictor < predictor2)
2331 *predictor = predictor2;
2332 if (!new_val)
2333 return NULL;
2334 if (!val)
2335 val = new_val;
2336 else if (!operand_equal_p (val, new_val, false))
2337 return NULL;
2339 return val;
2341 if (is_gimple_assign (def))
2343 if (gimple_assign_lhs (def) != op0)
2344 return NULL;
2346 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2347 gimple_assign_rhs1 (def),
2348 gimple_assign_rhs_code (def),
2349 gimple_assign_rhs2 (def),
2350 visited, predictor);
2353 if (is_gimple_call (def))
2355 tree decl = gimple_call_fndecl (def);
2356 if (!decl)
2358 if (gimple_call_internal_p (def)
2359 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2361 gcc_assert (gimple_call_num_args (def) == 3);
2362 tree val = gimple_call_arg (def, 0);
2363 if (TREE_CONSTANT (val))
2364 return val;
2365 if (predictor)
2367 tree val2 = gimple_call_arg (def, 2);
2368 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2369 && tree_fits_uhwi_p (val2)
2370 && tree_to_uhwi (val2) < END_PREDICTORS);
2371 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2373 return gimple_call_arg (def, 1);
2375 return NULL;
2377 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2378 switch (DECL_FUNCTION_CODE (decl))
2380 case BUILT_IN_EXPECT:
2382 tree val;
2383 if (gimple_call_num_args (def) != 2)
2384 return NULL;
2385 val = gimple_call_arg (def, 0);
2386 if (TREE_CONSTANT (val))
2387 return val;
2388 if (predictor)
2389 *predictor = PRED_BUILTIN_EXPECT;
2390 return gimple_call_arg (def, 1);
2393 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2394 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2395 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2396 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2397 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2398 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2399 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2400 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2401 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2402 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2403 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2404 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2405 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2406 /* Assume that any given atomic operation has low contention,
2407 and thus the compare-and-swap operation succeeds. */
2408 if (predictor)
2409 *predictor = PRED_COMPARE_AND_SWAP;
2410 return boolean_true_node;
2411 default:
2412 break;
2416 return NULL;
2419 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2421 tree res;
2422 enum br_predictor predictor2;
2423 op0 = expr_expected_value (op0, visited, predictor);
2424 if (!op0)
2425 return NULL;
2426 op1 = expr_expected_value (op1, visited, &predictor2);
2427 if (predictor && *predictor < predictor2)
2428 *predictor = predictor2;
2429 if (!op1)
2430 return NULL;
2431 res = fold_build2 (code, type, op0, op1);
2432 if (TREE_CONSTANT (res))
2433 return res;
2434 return NULL;
2436 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2438 tree res;
2439 op0 = expr_expected_value (op0, visited, predictor);
2440 if (!op0)
2441 return NULL;
2442 res = fold_build1 (code, type, op0);
2443 if (TREE_CONSTANT (res))
2444 return res;
2445 return NULL;
2447 return NULL;
2450 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2451 The function is used by builtin_expect branch predictor so the evidence
2452 must come from this construct and additional possible constant folding.
2454 We may want to implement more involved value guess (such as value range
2455 propagation based prediction), but such tricks shall go to new
2456 implementation. */
2458 static tree
2459 expr_expected_value (tree expr, bitmap visited,
2460 enum br_predictor *predictor)
2462 enum tree_code code;
2463 tree op0, op1;
2465 if (TREE_CONSTANT (expr))
2467 if (predictor)
2468 *predictor = PRED_UNCONDITIONAL;
2469 return expr;
2472 extract_ops_from_tree (expr, &code, &op0, &op1);
2473 return expr_expected_value_1 (TREE_TYPE (expr),
2474 op0, code, op1, visited, predictor);
2477 /* Predict using opcode of the last statement in basic block. */
2478 static void
2479 tree_predict_by_opcode (basic_block bb)
2481 gimple *stmt = last_stmt (bb);
2482 edge then_edge;
2483 tree op0, op1;
2484 tree type;
2485 tree val;
2486 enum tree_code cmp;
2487 edge_iterator ei;
2488 enum br_predictor predictor;
2490 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2491 return;
2492 FOR_EACH_EDGE (then_edge, ei, bb->succs)
2493 if (then_edge->flags & EDGE_TRUE_VALUE)
2494 break;
2495 op0 = gimple_cond_lhs (stmt);
2496 op1 = gimple_cond_rhs (stmt);
2497 cmp = gimple_cond_code (stmt);
2498 type = TREE_TYPE (op0);
2499 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (),
2500 &predictor);
2501 if (val && TREE_CODE (val) == INTEGER_CST)
2503 if (predictor == PRED_BUILTIN_EXPECT)
2505 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2507 gcc_assert (percent >= 0 && percent <= 100);
2508 if (integer_zerop (val))
2509 percent = 100 - percent;
2510 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2512 else
2513 predict_edge_def (then_edge, predictor,
2514 integer_zerop (val) ? NOT_TAKEN : TAKEN);
2516 /* Try "pointer heuristic."
2517 A comparison ptr == 0 is predicted as false.
2518 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2519 if (POINTER_TYPE_P (type))
2521 if (cmp == EQ_EXPR)
2522 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2523 else if (cmp == NE_EXPR)
2524 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2526 else
2528 /* Try "opcode heuristic."
2529 EQ tests are usually false and NE tests are usually true. Also,
2530 most quantities are positive, so we can make the appropriate guesses
2531 about signed comparisons against zero. */
2532 switch (cmp)
2534 case EQ_EXPR:
2535 case UNEQ_EXPR:
2536 /* Floating point comparisons appears to behave in a very
2537 unpredictable way because of special role of = tests in
2538 FP code. */
2539 if (FLOAT_TYPE_P (type))
2541 /* Comparisons with 0 are often used for booleans and there is
2542 nothing useful to predict about them. */
2543 else if (integer_zerop (op0) || integer_zerop (op1))
2545 else
2546 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2547 break;
2549 case NE_EXPR:
2550 case LTGT_EXPR:
2551 /* Floating point comparisons appears to behave in a very
2552 unpredictable way because of special role of = tests in
2553 FP code. */
2554 if (FLOAT_TYPE_P (type))
2556 /* Comparisons with 0 are often used for booleans and there is
2557 nothing useful to predict about them. */
2558 else if (integer_zerop (op0)
2559 || integer_zerop (op1))
2561 else
2562 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2563 break;
2565 case ORDERED_EXPR:
2566 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2567 break;
2569 case UNORDERED_EXPR:
2570 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2571 break;
2573 case LE_EXPR:
2574 case LT_EXPR:
2575 if (integer_zerop (op1)
2576 || integer_onep (op1)
2577 || integer_all_onesp (op1)
2578 || real_zerop (op1)
2579 || real_onep (op1)
2580 || real_minus_onep (op1))
2581 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2582 break;
2584 case GE_EXPR:
2585 case GT_EXPR:
2586 if (integer_zerop (op1)
2587 || integer_onep (op1)
2588 || integer_all_onesp (op1)
2589 || real_zerop (op1)
2590 || real_onep (op1)
2591 || real_minus_onep (op1))
2592 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2593 break;
2595 default:
2596 break;
2600 /* Returns TRUE if the STMT is exit(0) like statement. */
2602 static bool
2603 is_exit_with_zero_arg (const gimple *stmt)
2605 /* This is not exit, _exit or _Exit. */
2606 if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2607 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2608 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2609 return false;
2611 /* Argument is an interger zero. */
2612 return integer_zerop (gimple_call_arg (stmt, 0));
2615 /* Try to guess whether the value of return means error code. */
2617 static enum br_predictor
2618 return_prediction (tree val, enum prediction *prediction)
2620 /* VOID. */
2621 if (!val)
2622 return PRED_NO_PREDICTION;
2623 /* Different heuristics for pointers and scalars. */
2624 if (POINTER_TYPE_P (TREE_TYPE (val)))
2626 /* NULL is usually not returned. */
2627 if (integer_zerop (val))
2629 *prediction = NOT_TAKEN;
2630 return PRED_NULL_RETURN;
2633 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2635 /* Negative return values are often used to indicate
2636 errors. */
2637 if (TREE_CODE (val) == INTEGER_CST
2638 && tree_int_cst_sgn (val) < 0)
2640 *prediction = NOT_TAKEN;
2641 return PRED_NEGATIVE_RETURN;
2643 /* Constant return values seems to be commonly taken.
2644 Zero/one often represent booleans so exclude them from the
2645 heuristics. */
2646 if (TREE_CONSTANT (val)
2647 && (!integer_zerop (val) && !integer_onep (val)))
2649 *prediction = NOT_TAKEN;
2650 return PRED_CONST_RETURN;
2653 return PRED_NO_PREDICTION;
2656 /* Return zero if phi result could have values other than -1, 0 or 1,
2657 otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1
2658 values are used or likely. */
2660 static int
2661 zero_one_minusone (gphi *phi, int limit)
2663 int phi_num_args = gimple_phi_num_args (phi);
2664 int ret = 0;
2665 for (int i = 0; i < phi_num_args; i++)
2667 tree t = PHI_ARG_DEF (phi, i);
2668 if (TREE_CODE (t) != INTEGER_CST)
2669 continue;
2670 wide_int w = wi::to_wide (t);
2671 if (w == -1)
2672 ret |= 1;
2673 else if (w == 0)
2674 ret |= 2;
2675 else if (w == 1)
2676 ret |= 4;
2677 else
2678 return 0;
2680 for (int i = 0; i < phi_num_args; i++)
2682 tree t = PHI_ARG_DEF (phi, i);
2683 if (TREE_CODE (t) == INTEGER_CST)
2684 continue;
2685 if (TREE_CODE (t) != SSA_NAME)
2686 return 0;
2687 gimple *g = SSA_NAME_DEF_STMT (t);
2688 if (gimple_code (g) == GIMPLE_PHI && limit > 0)
2689 if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1))
2691 ret |= r;
2692 continue;
2694 if (!is_gimple_assign (g))
2695 return 0;
2696 if (gimple_assign_cast_p (g))
2698 tree rhs1 = gimple_assign_rhs1 (g);
2699 if (TREE_CODE (rhs1) != SSA_NAME
2700 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
2701 || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2702 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2703 return 0;
2704 ret |= (2 | 4);
2705 continue;
2707 if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison)
2708 return 0;
2709 ret |= (2 | 4);
2711 return ret;
2714 /* Find the basic block with return expression and look up for possible
2715 return value trying to apply RETURN_PREDICTION heuristics. */
2716 static void
2717 apply_return_prediction (void)
2719 greturn *return_stmt = NULL;
2720 tree return_val;
2721 edge e;
2722 gphi *phi;
2723 int phi_num_args, i;
2724 enum br_predictor pred;
2725 enum prediction direction;
2726 edge_iterator ei;
2728 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2730 gimple *last = last_stmt (e->src);
2731 if (last
2732 && gimple_code (last) == GIMPLE_RETURN)
2734 return_stmt = as_a <greturn *> (last);
2735 break;
2738 if (!e)
2739 return;
2740 return_val = gimple_return_retval (return_stmt);
2741 if (!return_val)
2742 return;
2743 if (TREE_CODE (return_val) != SSA_NAME
2744 || !SSA_NAME_DEF_STMT (return_val)
2745 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2746 return;
2747 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2748 phi_num_args = gimple_phi_num_args (phi);
2749 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2751 /* Avoid the case where the function returns -1, 0 and 1 values and
2752 nothing else. Those could be qsort etc. comparison functions
2753 where the negative return isn't less probable than positive.
2754 For this require that the function returns at least -1 or 1
2755 or -1 and a boolean value or comparison result, so that functions
2756 returning just -1 and 0 are treated as if -1 represents error value. */
2757 if (INTEGRAL_TYPE_P (TREE_TYPE (return_val))
2758 && !TYPE_UNSIGNED (TREE_TYPE (return_val))
2759 && TYPE_PRECISION (TREE_TYPE (return_val)) > 1)
2760 if (int r = zero_one_minusone (phi, 3))
2761 if ((r & (1 | 4)) == (1 | 4))
2762 return;
2764 /* Avoid the degenerate case where all return values form the function
2765 belongs to same category (ie they are all positive constants)
2766 so we can hardly say something about them. */
2767 for (i = 1; i < phi_num_args; i++)
2768 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2769 break;
2770 if (i != phi_num_args)
2771 for (i = 0; i < phi_num_args; i++)
2773 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2774 if (pred != PRED_NO_PREDICTION)
2775 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2776 direction);
2780 /* Look for basic block that contains unlikely to happen events
2781 (such as noreturn calls) and mark all paths leading to execution
2782 of this basic blocks as unlikely. */
2784 static void
2785 tree_bb_level_predictions (void)
2787 basic_block bb;
2788 bool has_return_edges = false;
2789 edge e;
2790 edge_iterator ei;
2792 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2793 if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL))
2795 has_return_edges = true;
2796 break;
2799 apply_return_prediction ();
2801 FOR_EACH_BB_FN (bb, cfun)
2803 gimple_stmt_iterator gsi;
2805 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2807 gimple *stmt = gsi_stmt (gsi);
2808 tree decl;
2810 if (is_gimple_call (stmt))
2812 if (gimple_call_noreturn_p (stmt)
2813 && has_return_edges
2814 && !is_exit_with_zero_arg (stmt))
2815 predict_paths_leading_to (bb, PRED_NORETURN,
2816 NOT_TAKEN);
2817 decl = gimple_call_fndecl (stmt);
2818 if (decl
2819 && lookup_attribute ("cold",
2820 DECL_ATTRIBUTES (decl)))
2821 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2822 NOT_TAKEN);
2823 if (decl && recursive_call_p (current_function_decl, decl))
2824 predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
2825 NOT_TAKEN);
2827 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2829 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2830 gimple_predict_outcome (stmt));
2831 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2832 hints to callers. */
2838 /* Callback for hash_map::traverse, asserts that the pointer map is
2839 empty. */
2841 bool
2842 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2843 void *)
2845 gcc_assert (!value);
2846 return false;
2849 /* Predict branch probabilities and estimate profile for basic block BB.
2850 When LOCAL_ONLY is set do not use any global properties of CFG. */
2852 static void
2853 tree_estimate_probability_bb (basic_block bb, bool local_only)
2855 edge e;
2856 edge_iterator ei;
2858 FOR_EACH_EDGE (e, ei, bb->succs)
2860 /* Look for block we are guarding (ie we dominate it,
2861 but it doesn't postdominate us). */
2862 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2863 && !local_only
2864 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2865 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2867 gimple_stmt_iterator bi;
2869 /* The call heuristic claims that a guarded function call
2870 is improbable. This is because such calls are often used
2871 to signal exceptional situations such as printing error
2872 messages. */
2873 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2874 gsi_next (&bi))
2876 gimple *stmt = gsi_stmt (bi);
2877 if (is_gimple_call (stmt)
2878 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))
2879 /* Constant and pure calls are hardly used to signalize
2880 something exceptional. */
2881 && gimple_has_side_effects (stmt))
2883 if (gimple_call_fndecl (stmt))
2884 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2885 else if (virtual_method_call_p (gimple_call_fn (stmt)))
2886 predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
2887 else
2888 predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
2889 break;
2894 tree_predict_by_opcode (bb);
2897 /* Predict branch probabilities and estimate profile of the tree CFG.
2898 This function can be called from the loop optimizers to recompute
2899 the profile information.
2900 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2902 void
2903 tree_estimate_probability (bool dry_run)
2905 basic_block bb;
2907 add_noreturn_fake_exit_edges ();
2908 connect_infinite_loops_to_exit ();
2909 /* We use loop_niter_by_eval, which requires that the loops have
2910 preheaders. */
2911 create_preheaders (CP_SIMPLE_PREHEADERS);
2912 calculate_dominance_info (CDI_POST_DOMINATORS);
2914 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2915 tree_bb_level_predictions ();
2916 record_loop_exits ();
2918 if (number_of_loops (cfun) > 1)
2919 predict_loops ();
2921 FOR_EACH_BB_FN (bb, cfun)
2922 tree_estimate_probability_bb (bb, false);
2924 FOR_EACH_BB_FN (bb, cfun)
2925 combine_predictions_for_bb (bb, dry_run);
2927 if (flag_checking)
2928 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2930 delete bb_predictions;
2931 bb_predictions = NULL;
2933 if (!dry_run)
2934 estimate_bb_frequencies (false);
2935 free_dominance_info (CDI_POST_DOMINATORS);
2936 remove_fake_exit_edges ();
2939 /* Set edge->probability for each successor edge of BB. */
2940 void
2941 tree_guess_outgoing_edge_probabilities (basic_block bb)
2943 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2944 tree_estimate_probability_bb (bb, true);
2945 combine_predictions_for_bb (bb, false);
2946 if (flag_checking)
2947 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2948 delete bb_predictions;
2949 bb_predictions = NULL;
2952 /* Predict edges to successors of CUR whose sources are not postdominated by
2953 BB by PRED and recurse to all postdominators. */
2955 static void
2956 predict_paths_for_bb (basic_block cur, basic_block bb,
2957 enum br_predictor pred,
2958 enum prediction taken,
2959 bitmap visited, struct loop *in_loop = NULL)
2961 edge e;
2962 edge_iterator ei;
2963 basic_block son;
2965 /* If we exited the loop or CUR is unconditional in the loop, there is
2966 nothing to do. */
2967 if (in_loop
2968 && (!flow_bb_inside_loop_p (in_loop, cur)
2969 || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
2970 return;
2972 /* We are looking for all edges forming edge cut induced by
2973 set of all blocks postdominated by BB. */
2974 FOR_EACH_EDGE (e, ei, cur->preds)
2975 if (e->src->index >= NUM_FIXED_BLOCKS
2976 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2978 edge e2;
2979 edge_iterator ei2;
2980 bool found = false;
2982 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2983 if (unlikely_executed_edge_p (e))
2984 continue;
2985 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2987 /* See if there is an edge from e->src that is not abnormal
2988 and does not lead to BB and does not exit the loop. */
2989 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2990 if (e2 != e
2991 && !unlikely_executed_edge_p (e2)
2992 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
2993 && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
2995 found = true;
2996 break;
2999 /* If there is non-abnormal path leaving e->src, predict edge
3000 using predictor. Otherwise we need to look for paths
3001 leading to e->src.
3003 The second may lead to infinite loop in the case we are predicitng
3004 regions that are only reachable by abnormal edges. We simply
3005 prevent visiting given BB twice. */
3006 if (found)
3008 if (!edge_predicted_by_p (e, pred, taken))
3009 predict_edge_def (e, pred, taken);
3011 else if (bitmap_set_bit (visited, e->src->index))
3012 predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
3014 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
3015 son;
3016 son = next_dom_son (CDI_POST_DOMINATORS, son))
3017 predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
3020 /* Sets branch probabilities according to PREDiction and
3021 FLAGS. */
3023 static void
3024 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
3025 enum prediction taken, struct loop *in_loop)
3027 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3030 /* Like predict_paths_leading_to but take edge instead of basic block. */
3032 static void
3033 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
3034 enum prediction taken, struct loop *in_loop)
3036 bool has_nonloop_edge = false;
3037 edge_iterator ei;
3038 edge e2;
3040 basic_block bb = e->src;
3041 FOR_EACH_EDGE (e2, ei, bb->succs)
3042 if (e2->dest != e->src && e2->dest != e->dest
3043 && !unlikely_executed_edge_p (e)
3044 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
3046 has_nonloop_edge = true;
3047 break;
3049 if (!has_nonloop_edge)
3051 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3053 else
3054 predict_edge_def (e, pred, taken);
3057 /* This is used to carry information about basic blocks. It is
3058 attached to the AUX field of the standard CFG block. */
3060 struct block_info
3062 /* Estimated frequency of execution of basic_block. */
3063 sreal frequency;
3065 /* To keep queue of basic blocks to process. */
3066 basic_block next;
3068 /* Number of predecessors we need to visit first. */
3069 int npredecessors;
3072 /* Similar information for edges. */
3073 struct edge_prob_info
3075 /* In case edge is a loopback edge, the probability edge will be reached
3076 in case header is. Estimated number of iterations of the loop can be
3077 then computed as 1 / (1 - back_edge_prob). */
3078 sreal back_edge_prob;
3079 /* True if the edge is a loopback edge in the natural loop. */
3080 unsigned int back_edge:1;
3083 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
3084 #undef EDGE_INFO
3085 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
3087 /* Helper function for estimate_bb_frequencies.
3088 Propagate the frequencies in blocks marked in
3089 TOVISIT, starting in HEAD. */
3091 static void
3092 propagate_freq (basic_block head, bitmap tovisit)
3094 basic_block bb;
3095 basic_block last;
3096 unsigned i;
3097 edge e;
3098 basic_block nextbb;
3099 bitmap_iterator bi;
3101 /* For each basic block we need to visit count number of his predecessors
3102 we need to visit first. */
3103 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
3105 edge_iterator ei;
3106 int count = 0;
3108 bb = BASIC_BLOCK_FOR_FN (cfun, i);
3110 FOR_EACH_EDGE (e, ei, bb->preds)
3112 bool visit = bitmap_bit_p (tovisit, e->src->index);
3114 if (visit && !(e->flags & EDGE_DFS_BACK))
3115 count++;
3116 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
3117 fprintf (dump_file,
3118 "Irreducible region hit, ignoring edge to %i->%i\n",
3119 e->src->index, bb->index);
3121 BLOCK_INFO (bb)->npredecessors = count;
3122 /* When function never returns, we will never process exit block. */
3123 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3124 bb->count = profile_count::zero ();
3127 BLOCK_INFO (head)->frequency = 1;
3128 last = head;
3129 for (bb = head; bb; bb = nextbb)
3131 edge_iterator ei;
3132 sreal cyclic_probability = 0;
3133 sreal frequency = 0;
3135 nextbb = BLOCK_INFO (bb)->next;
3136 BLOCK_INFO (bb)->next = NULL;
3138 /* Compute frequency of basic block. */
3139 if (bb != head)
3141 if (flag_checking)
3142 FOR_EACH_EDGE (e, ei, bb->preds)
3143 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
3144 || (e->flags & EDGE_DFS_BACK));
3146 FOR_EACH_EDGE (e, ei, bb->preds)
3147 if (EDGE_INFO (e)->back_edge)
3149 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
3151 else if (!(e->flags & EDGE_DFS_BACK))
3153 /* frequency += (e->probability
3154 * BLOCK_INFO (e->src)->frequency /
3155 REG_BR_PROB_BASE); */
3157 /* FIXME: Graphite is producing edges with no profile. Once
3158 this is fixed, drop this. */
3159 sreal tmp = e->probability.initialized_p () ?
3160 e->probability.to_reg_br_prob_base () : 0;
3161 tmp *= BLOCK_INFO (e->src)->frequency;
3162 tmp *= real_inv_br_prob_base;
3163 frequency += tmp;
3166 if (cyclic_probability == 0)
3168 BLOCK_INFO (bb)->frequency = frequency;
3170 else
3172 if (cyclic_probability > real_almost_one)
3173 cyclic_probability = real_almost_one;
3175 /* BLOCK_INFO (bb)->frequency = frequency
3176 / (1 - cyclic_probability) */
3178 cyclic_probability = sreal (1) - cyclic_probability;
3179 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
3183 bitmap_clear_bit (tovisit, bb->index);
3185 e = find_edge (bb, head);
3186 if (e)
3188 /* EDGE_INFO (e)->back_edge_prob
3189 = ((e->probability * BLOCK_INFO (bb)->frequency)
3190 / REG_BR_PROB_BASE); */
3192 /* FIXME: Graphite is producing edges with no profile. Once
3193 this is fixed, drop this. */
3194 sreal tmp = e->probability.initialized_p () ?
3195 e->probability.to_reg_br_prob_base () : 0;
3196 tmp *= BLOCK_INFO (bb)->frequency;
3197 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
3200 /* Propagate to successor blocks. */
3201 FOR_EACH_EDGE (e, ei, bb->succs)
3202 if (!(e->flags & EDGE_DFS_BACK)
3203 && BLOCK_INFO (e->dest)->npredecessors)
3205 BLOCK_INFO (e->dest)->npredecessors--;
3206 if (!BLOCK_INFO (e->dest)->npredecessors)
3208 if (!nextbb)
3209 nextbb = e->dest;
3210 else
3211 BLOCK_INFO (last)->next = e->dest;
3213 last = e->dest;
3219 /* Estimate frequencies in loops at same nest level. */
3221 static void
3222 estimate_loops_at_level (struct loop *first_loop)
3224 struct loop *loop;
3226 for (loop = first_loop; loop; loop = loop->next)
3228 edge e;
3229 basic_block *bbs;
3230 unsigned i;
3231 auto_bitmap tovisit;
3233 estimate_loops_at_level (loop->inner);
3235 /* Find current loop back edge and mark it. */
3236 e = loop_latch_edge (loop);
3237 EDGE_INFO (e)->back_edge = 1;
3239 bbs = get_loop_body (loop);
3240 for (i = 0; i < loop->num_nodes; i++)
3241 bitmap_set_bit (tovisit, bbs[i]->index);
3242 free (bbs);
3243 propagate_freq (loop->header, tovisit);
3247 /* Propagates frequencies through structure of loops. */
3249 static void
3250 estimate_loops (void)
3252 auto_bitmap tovisit;
3253 basic_block bb;
3255 /* Start by estimating the frequencies in the loops. */
3256 if (number_of_loops (cfun) > 1)
3257 estimate_loops_at_level (current_loops->tree_root->inner);
3259 /* Now propagate the frequencies through all the blocks. */
3260 FOR_ALL_BB_FN (bb, cfun)
3262 bitmap_set_bit (tovisit, bb->index);
3264 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
3267 /* Drop the profile for NODE to guessed, and update its frequency based on
3268 whether it is expected to be hot given the CALL_COUNT. */
3270 static void
3271 drop_profile (struct cgraph_node *node, profile_count call_count)
3273 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3274 /* In the case where this was called by another function with a
3275 dropped profile, call_count will be 0. Since there are no
3276 non-zero call counts to this function, we don't know for sure
3277 whether it is hot, and therefore it will be marked normal below. */
3278 bool hot = maybe_hot_count_p (NULL, call_count);
3280 if (dump_file)
3281 fprintf (dump_file,
3282 "Dropping 0 profile for %s. %s based on calls.\n",
3283 node->dump_name (),
3284 hot ? "Function is hot" : "Function is normal");
3285 /* We only expect to miss profiles for functions that are reached
3286 via non-zero call edges in cases where the function may have
3287 been linked from another module or library (COMDATs and extern
3288 templates). See the comments below for handle_missing_profiles.
3289 Also, only warn in cases where the missing counts exceed the
3290 number of training runs. In certain cases with an execv followed
3291 by a no-return call the profile for the no-return call is not
3292 dumped and there can be a mismatch. */
3293 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3294 && call_count > profile_info->runs)
3296 if (flag_profile_correction)
3298 if (dump_file)
3299 fprintf (dump_file,
3300 "Missing counts for called function %s\n",
3301 node->dump_name ());
3303 else
3304 warning (0, "Missing counts for called function %s",
3305 node->dump_name ());
3308 basic_block bb;
3309 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3310 if (flag_guess_branch_prob)
3312 bool clear_zeros
3313 = ENTRY_BLOCK_PTR_FOR_FN
3314 (DECL_STRUCT_FUNCTION (node->decl))->count.nonzero_p ();
3315 FOR_ALL_BB_FN (bb, fn)
3316 if (clear_zeros || !(bb->count == profile_count::zero ()))
3317 bb->count = bb->count.guessed_local ();
3318 DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max =
3319 DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max.guessed_local ();
3321 else
3323 FOR_ALL_BB_FN (bb, fn)
3324 bb->count = profile_count::uninitialized ();
3325 DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max
3326 = profile_count::uninitialized ();
3328 pop_cfun ();
3330 struct cgraph_edge *e;
3331 for (e = node->callees; e; e = e->next_callee)
3332 e->count = gimple_bb (e->call_stmt)->count;
3333 for (e = node->indirect_calls; e; e = e->next_callee)
3334 e->count = gimple_bb (e->call_stmt)->count;
3336 profile_status_for_fn (fn)
3337 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3338 node->frequency
3339 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3342 /* In the case of COMDAT routines, multiple object files will contain the same
3343 function and the linker will select one for the binary. In that case
3344 all the other copies from the profile instrument binary will be missing
3345 profile counts. Look for cases where this happened, due to non-zero
3346 call counts going to 0-count functions, and drop the profile to guessed
3347 so that we can use the estimated probabilities and avoid optimizing only
3348 for size.
3350 The other case where the profile may be missing is when the routine
3351 is not going to be emitted to the object file, e.g. for "extern template"
3352 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3353 all other cases of non-zero calls to 0-count functions. */
3355 void
3356 handle_missing_profiles (void)
3358 struct cgraph_node *node;
3359 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
3360 auto_vec<struct cgraph_node *, 64> worklist;
3362 /* See if 0 count function has non-0 count callers. In this case we
3363 lost some profile. Drop its function profile to PROFILE_GUESSED. */
3364 FOR_EACH_DEFINED_FUNCTION (node)
3366 struct cgraph_edge *e;
3367 profile_count call_count = profile_count::zero ();
3368 gcov_type max_tp_first_run = 0;
3369 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3371 if (!(node->count == profile_count::zero ()))
3372 continue;
3373 for (e = node->callers; e; e = e->next_caller)
3374 if (e->count.initialized_p () && e->count > 0)
3376 call_count = call_count + e->count;
3378 if (e->caller->tp_first_run > max_tp_first_run)
3379 max_tp_first_run = e->caller->tp_first_run;
3382 /* If time profile is missing, let assign the maximum that comes from
3383 caller functions. */
3384 if (!node->tp_first_run && max_tp_first_run)
3385 node->tp_first_run = max_tp_first_run + 1;
3387 if (call_count > 0
3388 && fn && fn->cfg
3389 && (call_count.apply_scale (unlikely_count_fraction, 1)
3390 >= profile_info->runs))
3392 drop_profile (node, call_count);
3393 worklist.safe_push (node);
3397 /* Propagate the profile dropping to other 0-count COMDATs that are
3398 potentially called by COMDATs we already dropped the profile on. */
3399 while (worklist.length () > 0)
3401 struct cgraph_edge *e;
3403 node = worklist.pop ();
3404 for (e = node->callees; e; e = e->next_caller)
3406 struct cgraph_node *callee = e->callee;
3407 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3409 if (callee->count > 0)
3410 continue;
3411 if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl))
3412 && fn && fn->cfg
3413 && profile_status_for_fn (fn) == PROFILE_READ)
3415 drop_profile (node, profile_count::zero ());
3416 worklist.safe_push (callee);
3422 /* Convert counts measured by profile driven feedback to frequencies.
3423 Return nonzero iff there was any nonzero execution count. */
3425 bool
3426 update_max_bb_count (void)
3428 profile_count true_count_max = profile_count::uninitialized ();
3429 basic_block bb;
3431 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3432 true_count_max = true_count_max.max (bb->count);
3434 cfun->cfg->count_max = true_count_max;
3436 return true_count_max.ipa ().nonzero_p ();
3439 /* Return true if function is likely to be expensive, so there is no point to
3440 optimize performance of prologue, epilogue or do inlining at the expense
3441 of code size growth. THRESHOLD is the limit of number of instructions
3442 function can execute at average to be still considered not expensive. */
3444 bool
3445 expensive_function_p (int threshold)
3447 basic_block bb;
3449 /* If profile was scaled in a way entry block has count 0, then the function
3450 is deifnitly taking a lot of time. */
3451 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ())
3452 return true;
3454 profile_count limit = ENTRY_BLOCK_PTR_FOR_FN
3455 (cfun)->count.apply_scale (threshold, 1);
3456 profile_count sum = profile_count::zero ();
3457 FOR_EACH_BB_FN (bb, cfun)
3459 rtx_insn *insn;
3461 if (!bb->count.initialized_p ())
3463 if (dump_file)
3464 fprintf (dump_file, "Function is considered expensive because"
3465 " count of bb %i is not initialized\n", bb->index);
3466 return true;
3469 FOR_BB_INSNS (bb, insn)
3470 if (active_insn_p (insn))
3472 sum += bb->count;
3473 if (sum > limit)
3474 return true;
3478 return false;
3481 /* All basic blocks that are reachable only from unlikely basic blocks are
3482 unlikely. */
3484 void
3485 propagate_unlikely_bbs_forward (void)
3487 auto_vec<basic_block, 64> worklist;
3488 basic_block bb;
3489 edge_iterator ei;
3490 edge e;
3492 if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()))
3494 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1;
3495 worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3497 while (worklist.length () > 0)
3499 bb = worklist.pop ();
3500 FOR_EACH_EDGE (e, ei, bb->succs)
3501 if (!(e->count () == profile_count::zero ())
3502 && !(e->dest->count == profile_count::zero ())
3503 && !e->dest->aux)
3505 e->dest->aux = (void *)(size_t) 1;
3506 worklist.safe_push (e->dest);
3511 FOR_ALL_BB_FN (bb, cfun)
3513 if (!bb->aux)
3515 if (!(bb->count == profile_count::zero ())
3516 && (dump_file && (dump_flags & TDF_DETAILS)))
3517 fprintf (dump_file,
3518 "Basic block %i is marked unlikely by forward prop\n",
3519 bb->index);
3520 bb->count = profile_count::zero ();
3522 else
3523 bb->aux = NULL;
3527 /* Determine basic blocks/edges that are known to be unlikely executed and set
3528 their counters to zero.
3529 This is done with first identifying obviously unlikely BBs/edges and then
3530 propagating in both directions. */
3532 static void
3533 determine_unlikely_bbs ()
3535 basic_block bb;
3536 auto_vec<basic_block, 64> worklist;
3537 edge_iterator ei;
3538 edge e;
3540 FOR_EACH_BB_FN (bb, cfun)
3542 if (!(bb->count == profile_count::zero ())
3543 && unlikely_executed_bb_p (bb))
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file, "Basic block %i is locally unlikely\n",
3547 bb->index);
3548 bb->count = profile_count::zero ();
3551 FOR_EACH_EDGE (e, ei, bb->succs)
3552 if (!(e->probability == profile_probability::never ())
3553 && unlikely_executed_edge_p (e))
3555 if (dump_file && (dump_flags & TDF_DETAILS))
3556 fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
3557 bb->index, e->dest->index);
3558 e->probability = profile_probability::never ();
3561 gcc_checking_assert (!bb->aux);
3563 propagate_unlikely_bbs_forward ();
3565 auto_vec<int, 64> nsuccs;
3566 nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun));
3567 FOR_ALL_BB_FN (bb, cfun)
3568 if (!(bb->count == profile_count::zero ())
3569 && bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
3571 nsuccs[bb->index] = 0;
3572 FOR_EACH_EDGE (e, ei, bb->succs)
3573 if (!(e->probability == profile_probability::never ())
3574 && !(e->dest->count == profile_count::zero ()))
3575 nsuccs[bb->index]++;
3576 if (!nsuccs[bb->index])
3577 worklist.safe_push (bb);
3579 while (worklist.length () > 0)
3581 bb = worklist.pop ();
3582 if (bb->count == profile_count::zero ())
3583 continue;
3584 if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3586 bool found = false;
3587 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
3588 !gsi_end_p (gsi); gsi_next (&gsi))
3589 if (stmt_can_terminate_bb_p (gsi_stmt (gsi))
3590 /* stmt_can_terminate_bb_p special cases noreturns because it
3591 assumes that fake edges are created. We want to know that
3592 noreturn alone does not imply BB to be unlikely. */
3593 || (is_gimple_call (gsi_stmt (gsi))
3594 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN)))
3596 found = true;
3597 break;
3599 if (found)
3600 continue;
3602 if (dump_file && (dump_flags & TDF_DETAILS))
3603 fprintf (dump_file,
3604 "Basic block %i is marked unlikely by backward prop\n",
3605 bb->index);
3606 bb->count = profile_count::zero ();
3607 FOR_EACH_EDGE (e, ei, bb->preds)
3608 if (!(e->probability == profile_probability::never ()))
3610 if (!(e->src->count == profile_count::zero ()))
3612 gcc_checking_assert (nsuccs[e->src->index] > 0);
3613 nsuccs[e->src->index]--;
3614 if (!nsuccs[e->src->index])
3615 worklist.safe_push (e->src);
3619 /* Finally all edges from non-0 regions to 0 are unlikely. */
3620 FOR_ALL_BB_FN (bb, cfun)
3621 if (!(bb->count == profile_count::zero ()))
3622 FOR_EACH_EDGE (e, ei, bb->succs)
3623 if (!(e->probability == profile_probability::never ())
3624 && e->dest->count == profile_count::zero ())
3626 if (dump_file && (dump_flags & TDF_DETAILS))
3627 fprintf (dump_file, "Edge %i->%i is unlikely because "
3628 "it enters unlikely block\n",
3629 bb->index, e->dest->index);
3630 e->probability = profile_probability::never ();
3632 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
3633 cgraph_node::get (current_function_decl)->count = profile_count::zero ();
3636 /* Estimate and propagate basic block frequencies using the given branch
3637 probabilities. If FORCE is true, the frequencies are used to estimate
3638 the counts even when there are already non-zero profile counts. */
3640 void
3641 estimate_bb_frequencies (bool force)
3643 basic_block bb;
3644 sreal freq_max;
3646 determine_unlikely_bbs ();
3648 if (force || profile_status_for_fn (cfun) != PROFILE_READ
3649 || !update_max_bb_count ())
3651 static int real_values_initialized = 0;
3653 if (!real_values_initialized)
3655 real_values_initialized = 1;
3656 real_br_prob_base = REG_BR_PROB_BASE;
3657 /* Scaling frequencies up to maximal profile count may result in
3658 frequent overflows especially when inlining loops.
3659 Small scalling results in unnecesary precision loss. Stay in
3660 the half of the (exponential) range. */
3661 real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
3662 real_one_half = sreal (1, -1);
3663 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
3664 real_almost_one = sreal (1) - real_inv_br_prob_base;
3667 mark_dfs_back_edges ();
3669 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3670 profile_probability::always ();
3672 /* Set up block info for each basic block. */
3673 alloc_aux_for_blocks (sizeof (block_info));
3674 alloc_aux_for_edges (sizeof (edge_prob_info));
3675 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3677 edge e;
3678 edge_iterator ei;
3680 FOR_EACH_EDGE (e, ei, bb->succs)
3682 /* FIXME: Graphite is producing edges with no profile. Once
3683 this is fixed, drop this. */
3684 if (e->probability.initialized_p ())
3685 EDGE_INFO (e)->back_edge_prob
3686 = e->probability.to_reg_br_prob_base ();
3687 else
3688 EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
3689 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
3693 /* First compute frequencies locally for each loop from innermost
3694 to outermost to examine frequencies for back edges. */
3695 estimate_loops ();
3697 freq_max = 0;
3698 FOR_EACH_BB_FN (bb, cfun)
3699 if (freq_max < BLOCK_INFO (bb)->frequency)
3700 freq_max = BLOCK_INFO (bb)->frequency;
3702 freq_max = real_bb_freq_max / freq_max;
3703 if (freq_max < 16)
3704 freq_max = 16;
3705 profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
3706 cfun->cfg->count_max = profile_count::uninitialized ();
3707 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3709 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
3710 profile_count count = profile_count::from_gcov_type (tmp.to_int ());
3712 /* If we have profile feedback in which this function was never
3713 executed, then preserve this info. */
3714 if (!(bb->count == profile_count::zero ()))
3715 bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
3716 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
3719 free_aux_for_blocks ();
3720 free_aux_for_edges ();
3722 compute_function_frequency ();
3725 /* Decide whether function is hot, cold or unlikely executed. */
3726 void
3727 compute_function_frequency (void)
3729 basic_block bb;
3730 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3732 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3733 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3734 node->only_called_at_startup = true;
3735 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3736 node->only_called_at_exit = true;
3738 if (profile_status_for_fn (cfun) != PROFILE_READ)
3740 int flags = flags_from_decl_or_type (current_function_decl);
3741 if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ()
3742 && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
3743 || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3744 != NULL)
3746 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3747 warn_function_cold (current_function_decl);
3749 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3750 != NULL)
3751 node->frequency = NODE_FREQUENCY_HOT;
3752 else if (flags & ECF_NORETURN)
3753 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3754 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3755 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3756 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3757 || DECL_STATIC_DESTRUCTOR (current_function_decl))
3758 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3759 return;
3762 /* Only first time try to drop function into unlikely executed.
3763 After inlining the roundoff errors may confuse us.
3764 Ipa-profile pass will drop functions only called from unlikely
3765 functions to unlikely and that is most of what we care about. */
3766 if (!cfun->after_inlining)
3768 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3769 warn_function_cold (current_function_decl);
3771 FOR_EACH_BB_FN (bb, cfun)
3773 if (maybe_hot_bb_p (cfun, bb))
3775 node->frequency = NODE_FREQUENCY_HOT;
3776 return;
3778 if (!probably_never_executed_bb_p (cfun, bb))
3779 node->frequency = NODE_FREQUENCY_NORMAL;
3783 /* Build PREDICT_EXPR. */
3784 tree
3785 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3787 tree t = build1 (PREDICT_EXPR, void_type_node,
3788 build_int_cst (integer_type_node, predictor));
3789 SET_PREDICT_EXPR_OUTCOME (t, taken);
3790 return t;
3793 const char *
3794 predictor_name (enum br_predictor predictor)
3796 return predictor_info[predictor].name;
3799 /* Predict branch probabilities and estimate profile of the tree CFG. */
3801 namespace {
3803 const pass_data pass_data_profile =
3805 GIMPLE_PASS, /* type */
3806 "profile_estimate", /* name */
3807 OPTGROUP_NONE, /* optinfo_flags */
3808 TV_BRANCH_PROB, /* tv_id */
3809 PROP_cfg, /* properties_required */
3810 0, /* properties_provided */
3811 0, /* properties_destroyed */
3812 0, /* todo_flags_start */
3813 0, /* todo_flags_finish */
3816 class pass_profile : public gimple_opt_pass
3818 public:
3819 pass_profile (gcc::context *ctxt)
3820 : gimple_opt_pass (pass_data_profile, ctxt)
3823 /* opt_pass methods: */
3824 virtual bool gate (function *) { return flag_guess_branch_prob; }
3825 virtual unsigned int execute (function *);
3827 }; // class pass_profile
3829 unsigned int
3830 pass_profile::execute (function *fun)
3832 unsigned nb_loops;
3834 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3835 return 0;
3837 loop_optimizer_init (LOOPS_NORMAL);
3838 if (dump_file && (dump_flags & TDF_DETAILS))
3839 flow_loops_dump (dump_file, NULL, 0);
3841 mark_irreducible_loops ();
3843 nb_loops = number_of_loops (fun);
3844 if (nb_loops > 1)
3845 scev_initialize ();
3847 tree_estimate_probability (false);
3849 if (nb_loops > 1)
3850 scev_finalize ();
3852 loop_optimizer_finalize ();
3853 if (dump_file && (dump_flags & TDF_DETAILS))
3854 gimple_dump_cfg (dump_file, dump_flags);
3855 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3856 profile_status_for_fn (fun) = PROFILE_GUESSED;
3857 if (dump_file && (dump_flags & TDF_DETAILS))
3859 struct loop *loop;
3860 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
3861 if (loop->header->count.initialized_p ())
3862 fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
3863 loop->num,
3864 (int)expected_loop_iterations_unbounded (loop));
3866 return 0;
3869 } // anon namespace
3871 gimple_opt_pass *
3872 make_pass_profile (gcc::context *ctxt)
3874 return new pass_profile (ctxt);
3877 namespace {
3879 const pass_data pass_data_strip_predict_hints =
3881 GIMPLE_PASS, /* type */
3882 "*strip_predict_hints", /* name */
3883 OPTGROUP_NONE, /* optinfo_flags */
3884 TV_BRANCH_PROB, /* tv_id */
3885 PROP_cfg, /* properties_required */
3886 0, /* properties_provided */
3887 0, /* properties_destroyed */
3888 0, /* todo_flags_start */
3889 0, /* todo_flags_finish */
3892 class pass_strip_predict_hints : public gimple_opt_pass
3894 public:
3895 pass_strip_predict_hints (gcc::context *ctxt)
3896 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3899 /* opt_pass methods: */
3900 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3901 virtual unsigned int execute (function *);
3903 }; // class pass_strip_predict_hints
3905 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3906 we no longer need. */
3907 unsigned int
3908 pass_strip_predict_hints::execute (function *fun)
3910 basic_block bb;
3911 gimple *ass_stmt;
3912 tree var;
3913 bool changed = false;
3915 FOR_EACH_BB_FN (bb, fun)
3917 gimple_stmt_iterator bi;
3918 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3920 gimple *stmt = gsi_stmt (bi);
3922 if (gimple_code (stmt) == GIMPLE_PREDICT)
3924 gsi_remove (&bi, true);
3925 changed = true;
3926 continue;
3928 else if (is_gimple_call (stmt))
3930 tree fndecl = gimple_call_fndecl (stmt);
3932 if ((fndecl
3933 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3934 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3935 && gimple_call_num_args (stmt) == 2)
3936 || (gimple_call_internal_p (stmt)
3937 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3939 var = gimple_call_lhs (stmt);
3940 changed = true;
3941 if (var)
3943 ass_stmt
3944 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3945 gsi_replace (&bi, ass_stmt, true);
3947 else
3949 gsi_remove (&bi, true);
3950 continue;
3954 gsi_next (&bi);
3957 return changed ? TODO_cleanup_cfg : 0;
3960 } // anon namespace
3962 gimple_opt_pass *
3963 make_pass_strip_predict_hints (gcc::context *ctxt)
3965 return new pass_strip_predict_hints (ctxt);
3968 /* Rebuild function frequencies. Passes are in general expected to
3969 maintain profile by hand, however in some cases this is not possible:
3970 for example when inlining several functions with loops freuqencies might run
3971 out of scale and thus needs to be recomputed. */
3973 void
3974 rebuild_frequencies (void)
3976 timevar_push (TV_REBUILD_FREQUENCIES);
3978 /* When the max bb count in the function is small, there is a higher
3979 chance that there were truncation errors in the integer scaling
3980 of counts by inlining and other optimizations. This could lead
3981 to incorrect classification of code as being cold when it isn't.
3982 In that case, force the estimation of bb counts/frequencies from the
3983 branch probabilities, rather than computing frequencies from counts,
3984 which may also lead to frequencies incorrectly reduced to 0. There
3985 is less precision in the probabilities, so we only do this for small
3986 max counts. */
3987 cfun->cfg->count_max = profile_count::uninitialized ();
3988 basic_block bb;
3989 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3990 cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
3992 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3994 loop_optimizer_init (0);
3995 add_noreturn_fake_exit_edges ();
3996 mark_irreducible_loops ();
3997 connect_infinite_loops_to_exit ();
3998 estimate_bb_frequencies (true);
3999 remove_fake_exit_edges ();
4000 loop_optimizer_finalize ();
4002 else if (profile_status_for_fn (cfun) == PROFILE_READ)
4003 update_max_bb_count ();
4004 else
4005 gcc_unreachable ();
4006 timevar_pop (TV_REBUILD_FREQUENCIES);
4009 /* Perform a dry run of the branch prediction pass and report comparsion of
4010 the predicted and real profile into the dump file. */
4012 void
4013 report_predictor_hitrates (void)
4015 unsigned nb_loops;
4017 loop_optimizer_init (LOOPS_NORMAL);
4018 if (dump_file && (dump_flags & TDF_DETAILS))
4019 flow_loops_dump (dump_file, NULL, 0);
4021 mark_irreducible_loops ();
4023 nb_loops = number_of_loops (cfun);
4024 if (nb_loops > 1)
4025 scev_initialize ();
4027 tree_estimate_probability (true);
4029 if (nb_loops > 1)
4030 scev_finalize ();
4032 loop_optimizer_finalize ();
4035 /* Force edge E to be cold.
4036 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
4037 keep low probability to represent possible error in a guess. This is used
4038 i.e. in case we predict loop to likely iterate given number of times but
4039 we are not 100% sure.
4041 This function locally updates profile without attempt to keep global
4042 consistency which can not be reached in full generality without full profile
4043 rebuild from probabilities alone. Doing so is not necessarily a good idea
4044 because frequencies and counts may be more realistic then probabilities.
4046 In some cases (such as for elimination of early exits during full loop
4047 unrolling) the caller can ensure that profile will get consistent
4048 afterwards. */
4050 void
4051 force_edge_cold (edge e, bool impossible)
4053 profile_count count_sum = profile_count::zero ();
4054 profile_probability prob_sum = profile_probability::never ();
4055 edge_iterator ei;
4056 edge e2;
4057 bool uninitialized_exit = false;
4059 /* When branch probability guesses are not known, then do nothing. */
4060 if (!impossible && !e->count ().initialized_p ())
4061 return;
4063 profile_probability goal = (impossible ? profile_probability::never ()
4064 : profile_probability::very_unlikely ());
4066 /* If edge is already improbably or cold, just return. */
4067 if (e->probability <= goal
4068 && (!impossible || e->count () == profile_count::zero ()))
4069 return;
4070 FOR_EACH_EDGE (e2, ei, e->src->succs)
4071 if (e2 != e)
4073 if (e->flags & EDGE_FAKE)
4074 continue;
4075 if (e2->count ().initialized_p ())
4076 count_sum += e2->count ();
4077 if (e2->probability.initialized_p ())
4078 prob_sum += e2->probability;
4079 else
4080 uninitialized_exit = true;
4083 /* If we are not guessing profiles but have some other edges out,
4084 just assume the control flow goes elsewhere. */
4085 if (uninitialized_exit)
4086 e->probability = goal;
4087 /* If there are other edges out of e->src, redistribute probabilitity
4088 there. */
4089 else if (prob_sum > profile_probability::never ())
4091 if (!(e->probability < goal))
4092 e->probability = goal;
4094 profile_probability prob_comp = prob_sum / e->probability.invert ();
4096 if (dump_file && (dump_flags & TDF_DETAILS))
4097 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
4098 "probability to other edges.\n",
4099 e->src->index, e->dest->index,
4100 impossible ? "impossible" : "cold");
4101 FOR_EACH_EDGE (e2, ei, e->src->succs)
4102 if (e2 != e)
4104 e2->probability /= prob_comp;
4106 if (current_ir_type () != IR_GIMPLE
4107 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4108 update_br_prob_note (e->src);
4110 /* If all edges out of e->src are unlikely, the basic block itself
4111 is unlikely. */
4112 else
4114 if (prob_sum == profile_probability::never ())
4115 e->probability = profile_probability::always ();
4116 else
4118 if (impossible)
4119 e->probability = profile_probability::never ();
4120 /* If BB has some edges out that are not impossible, we can not
4121 assume that BB itself is. */
4122 impossible = false;
4124 if (current_ir_type () != IR_GIMPLE
4125 && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4126 update_br_prob_note (e->src);
4127 if (e->src->count == profile_count::zero ())
4128 return;
4129 if (count_sum == profile_count::zero () && impossible)
4131 bool found = false;
4132 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
4134 else if (current_ir_type () == IR_GIMPLE)
4135 for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
4136 !gsi_end_p (gsi); gsi_next (&gsi))
4138 if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
4140 found = true;
4141 break;
4144 /* FIXME: Implement RTL path. */
4145 else
4146 found = true;
4147 if (!found)
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4150 fprintf (dump_file,
4151 "Making bb %i impossible and dropping count to 0.\n",
4152 e->src->index);
4153 e->src->count = profile_count::zero ();
4154 FOR_EACH_EDGE (e2, ei, e->src->preds)
4155 force_edge_cold (e2, impossible);
4156 return;
4160 /* If we did not adjusting, the source basic block has no likely edeges
4161 leaving other direction. In that case force that bb cold, too.
4162 This in general is difficult task to do, but handle special case when
4163 BB has only one predecestor. This is common case when we are updating
4164 after loop transforms. */
4165 if (!(prob_sum > profile_probability::never ())
4166 && count_sum == profile_count::zero ()
4167 && single_pred_p (e->src) && e->src->count.to_frequency (cfun)
4168 > (impossible ? 0 : 1))
4170 int old_frequency = e->src->count.to_frequency (cfun);
4171 if (dump_file && (dump_flags & TDF_DETAILS))
4172 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
4173 impossible ? "impossible" : "cold");
4174 int new_frequency = MIN (e->src->count.to_frequency (cfun),
4175 impossible ? 0 : 1);
4176 if (impossible)
4177 e->src->count = profile_count::zero ();
4178 else
4179 e->src->count = e->count ().apply_scale (new_frequency,
4180 old_frequency);
4181 force_edge_cold (single_pred_edge (e->src), impossible);
4183 else if (dump_file && (dump_flags & TDF_DETAILS)
4184 && maybe_hot_bb_p (cfun, e->src))
4185 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
4186 impossible ? "impossible" : "cold");
4190 #if CHECKING_P
4192 namespace selftest {
4194 /* Test that value range of predictor values defined in predict.def is
4195 within range (50, 100]. */
4197 struct branch_predictor
4199 const char *name;
4200 int probability;
4203 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
4205 static void
4206 test_prediction_value_range ()
4208 branch_predictor predictors[] = {
4209 #include "predict.def"
4210 {NULL, -1U}
4213 for (unsigned i = 0; predictors[i].name != NULL; i++)
4215 if (predictors[i].probability == PROB_UNINITIALIZED)
4216 continue;
4218 unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE;
4219 ASSERT_TRUE (p > 50 && p <= 100);
4223 #undef DEF_PREDICTOR
4225 /* Run all of the selfests within this file. */
4227 void
4228 predict_c_tests ()
4230 test_prediction_value_range ();
4233 } // namespace selftest
4234 #endif /* CHECKING_P. */