/cp
[official-gcc.git] / gcc / predict.c
blob642bd6287a0dbffa4bcd1f923ee82b97b0b02a37
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "cfghooks.h"
38 #include "tree-pass.h"
39 #include "ssa.h"
40 #include "emit-rtl.h"
41 #include "cgraph.h"
42 #include "coverage.h"
43 #include "diagnostic-core.h"
44 #include "gimple-predict.h"
45 #include "fold-const.h"
46 #include "calls.h"
47 #include "cfganal.h"
48 #include "profile.h"
49 #include "sreal.h"
50 #include "params.h"
51 #include "cfgloop.h"
52 #include "gimple-iterator.h"
53 #include "tree-cfg.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-scalar-evolution.h"
58 /* Enum with reasons why a predictor is ignored. */
60 enum predictor_reason
62 REASON_NONE,
63 REASON_IGNORED,
64 REASON_SINGLE_EDGE_DUPLICATE,
65 REASON_EDGE_PAIR_DUPLICATE
68 /* String messages for the aforementioned enum. */
70 static const char *reason_messages[] = {"", " (ignored)",
71 " (single edge duplicate)", " (edge pair duplicate)"};
73 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
74 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
75 static sreal real_almost_one, real_br_prob_base,
76 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
78 static void combine_predictions_for_insn (rtx_insn *, basic_block);
79 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
80 enum predictor_reason, edge);
81 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
82 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
83 static bool can_predict_insn_p (const rtx_insn *);
85 /* Information we hold about each branch predictor.
86 Filled using information from predict.def. */
88 struct predictor_info
90 const char *const name; /* Name used in the debugging dumps. */
91 const int hitrate; /* Expected hitrate used by
92 predict_insn_def call. */
93 const int flags;
96 /* Use given predictor without Dempster-Shaffer theory if it matches
97 using first_match heuristics. */
98 #define PRED_FLAG_FIRST_MATCH 1
100 /* Recompute hitrate in percent to our representation. */
102 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
104 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
105 static const struct predictor_info predictor_info[]= {
106 #include "predict.def"
108 /* Upper bound on predictors. */
109 {NULL, 0, 0}
111 #undef DEF_PREDICTOR
113 /* Return TRUE if frequency FREQ is considered to be hot. */
115 static inline bool
116 maybe_hot_frequency_p (struct function *fun, int freq)
118 struct cgraph_node *node = cgraph_node::get (fun->decl);
119 if (!profile_info
120 || !opt_for_fn (fun->decl, flag_branch_probabilities))
122 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
123 return false;
124 if (node->frequency == NODE_FREQUENCY_HOT)
125 return true;
127 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
128 return true;
129 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
130 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
131 return false;
132 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
133 return false;
134 if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
135 < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
136 return false;
137 return true;
140 static gcov_type min_count = -1;
142 /* Determine the threshold for hot BB counts. */
144 gcov_type
145 get_hot_bb_threshold ()
147 gcov_working_set_t *ws;
148 if (min_count == -1)
150 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
151 gcc_assert (ws);
152 min_count = ws->min_counter;
154 return min_count;
157 /* Set the threshold for hot BB counts. */
159 void
160 set_hot_bb_threshold (gcov_type min)
162 min_count = min;
165 /* Return TRUE if frequency FREQ is considered to be hot. */
167 bool
168 maybe_hot_count_p (struct function *fun, gcov_type count)
170 if (fun && profile_status_for_fn (fun) != PROFILE_READ)
171 return true;
172 /* Code executed at most once is not hot. */
173 if (profile_info->runs >= count)
174 return false;
175 return (count >= get_hot_bb_threshold ());
178 /* Return true in case BB can be CPU intensive and should be optimized
179 for maximal performance. */
181 bool
182 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
184 gcc_checking_assert (fun);
185 if (profile_status_for_fn (fun) == PROFILE_READ)
186 return maybe_hot_count_p (fun, bb->count);
187 return maybe_hot_frequency_p (fun, bb->frequency);
190 /* Return true in case BB can be CPU intensive and should be optimized
191 for maximal performance. */
193 bool
194 maybe_hot_edge_p (edge e)
196 if (profile_status_for_fn (cfun) == PROFILE_READ)
197 return maybe_hot_count_p (cfun, e->count);
198 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
201 /* Return true if profile COUNT and FREQUENCY, or function FUN static
202 node frequency reflects never being executed. */
204 static bool
205 probably_never_executed (struct function *fun,
206 gcov_type count, int frequency)
208 gcc_checking_assert (fun);
209 if (profile_status_for_fn (fun) == PROFILE_READ)
211 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
212 if (count * unlikely_count_fraction >= profile_info->runs)
213 return false;
214 if (!frequency)
215 return true;
216 if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
217 return false;
218 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
220 gcov_type computed_count;
221 /* Check for possibility of overflow, in which case entry bb count
222 is large enough to do the division first without losing much
223 precision. */
224 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
225 REG_BR_PROB_BASE)
227 gcov_type scaled_count
228 = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
229 unlikely_count_fraction;
230 computed_count = RDIV (scaled_count,
231 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
233 else
235 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
236 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
237 computed_count *= frequency * unlikely_count_fraction;
239 if (computed_count >= profile_info->runs)
240 return false;
242 return true;
244 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
245 && (cgraph_node::get (fun->decl)->frequency
246 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
247 return true;
248 return false;
252 /* Return true in case BB is probably never executed. */
254 bool
255 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
257 return probably_never_executed (fun, bb->count, bb->frequency);
261 /* Return true in case edge E is probably never executed. */
263 bool
264 probably_never_executed_edge_p (struct function *fun, edge e)
266 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
269 /* Return true when current function should always be optimized for size. */
271 bool
272 optimize_function_for_size_p (struct function *fun)
274 if (!fun || !fun->decl)
275 return optimize_size;
276 cgraph_node *n = cgraph_node::get (fun->decl);
277 return n && n->optimize_for_size_p ();
280 /* Return true when current function should always be optimized for speed. */
282 bool
283 optimize_function_for_speed_p (struct function *fun)
285 return !optimize_function_for_size_p (fun);
288 /* Return the optimization type that should be used for the function FUN. */
290 optimization_type
291 function_optimization_type (struct function *fun)
293 return (optimize_function_for_speed_p (fun)
294 ? OPTIMIZE_FOR_SPEED
295 : OPTIMIZE_FOR_SIZE);
298 /* Return TRUE when BB should be optimized for size. */
300 bool
301 optimize_bb_for_size_p (const_basic_block bb)
303 return (optimize_function_for_size_p (cfun)
304 || (bb && !maybe_hot_bb_p (cfun, bb)));
307 /* Return TRUE when BB should be optimized for speed. */
309 bool
310 optimize_bb_for_speed_p (const_basic_block bb)
312 return !optimize_bb_for_size_p (bb);
315 /* Return the optimization type that should be used for block BB. */
317 optimization_type
318 bb_optimization_type (const_basic_block bb)
320 return (optimize_bb_for_speed_p (bb)
321 ? OPTIMIZE_FOR_SPEED
322 : OPTIMIZE_FOR_SIZE);
325 /* Return TRUE when BB should be optimized for size. */
327 bool
328 optimize_edge_for_size_p (edge e)
330 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
333 /* Return TRUE when BB should be optimized for speed. */
335 bool
336 optimize_edge_for_speed_p (edge e)
338 return !optimize_edge_for_size_p (e);
341 /* Return TRUE when BB should be optimized for size. */
343 bool
344 optimize_insn_for_size_p (void)
346 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
349 /* Return TRUE when BB should be optimized for speed. */
351 bool
352 optimize_insn_for_speed_p (void)
354 return !optimize_insn_for_size_p ();
357 /* Return TRUE when LOOP should be optimized for size. */
359 bool
360 optimize_loop_for_size_p (struct loop *loop)
362 return optimize_bb_for_size_p (loop->header);
365 /* Return TRUE when LOOP should be optimized for speed. */
367 bool
368 optimize_loop_for_speed_p (struct loop *loop)
370 return optimize_bb_for_speed_p (loop->header);
373 /* Return TRUE when LOOP nest should be optimized for speed. */
375 bool
376 optimize_loop_nest_for_speed_p (struct loop *loop)
378 struct loop *l = loop;
379 if (optimize_loop_for_speed_p (loop))
380 return true;
381 l = loop->inner;
382 while (l && l != loop)
384 if (optimize_loop_for_speed_p (l))
385 return true;
386 if (l->inner)
387 l = l->inner;
388 else if (l->next)
389 l = l->next;
390 else
392 while (l != loop && !l->next)
393 l = loop_outer (l);
394 if (l != loop)
395 l = l->next;
398 return false;
401 /* Return TRUE when LOOP nest should be optimized for size. */
403 bool
404 optimize_loop_nest_for_size_p (struct loop *loop)
406 return !optimize_loop_nest_for_speed_p (loop);
409 /* Return true when edge E is likely to be well predictable by branch
410 predictor. */
412 bool
413 predictable_edge_p (edge e)
415 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
416 return false;
417 if ((e->probability
418 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
419 || (REG_BR_PROB_BASE - e->probability
420 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
421 return true;
422 return false;
426 /* Set RTL expansion for BB profile. */
428 void
429 rtl_profile_for_bb (basic_block bb)
431 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
434 /* Set RTL expansion for edge profile. */
436 void
437 rtl_profile_for_edge (edge e)
439 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
442 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
443 void
444 default_rtl_profile (void)
446 crtl->maybe_hot_insn_p = true;
449 /* Return true if the one of outgoing edges is already predicted by
450 PREDICTOR. */
452 bool
453 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
455 rtx note;
456 if (!INSN_P (BB_END (bb)))
457 return false;
458 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
459 if (REG_NOTE_KIND (note) == REG_BR_PRED
460 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
461 return true;
462 return false;
465 /* Structure representing predictions in tree level. */
467 struct edge_prediction {
468 struct edge_prediction *ep_next;
469 edge ep_edge;
470 enum br_predictor ep_predictor;
471 int ep_probability;
474 /* This map contains for a basic block the list of predictions for the
475 outgoing edges. */
477 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
479 /* Return true if the one of outgoing edges is already predicted by
480 PREDICTOR. */
482 bool
483 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
485 struct edge_prediction *i;
486 edge_prediction **preds = bb_predictions->get (bb);
488 if (!preds)
489 return false;
491 for (i = *preds; i; i = i->ep_next)
492 if (i->ep_predictor == predictor)
493 return true;
494 return false;
497 /* Return true if the one of outgoing edges is already predicted by
498 PREDICTOR for edge E predicted as TAKEN. */
500 bool
501 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
503 struct edge_prediction *i;
504 basic_block bb = e->src;
505 edge_prediction **preds = bb_predictions->get (bb);
506 if (!preds)
507 return false;
509 int probability = predictor_info[(int) predictor].hitrate;
511 if (taken != TAKEN)
512 probability = REG_BR_PROB_BASE - probability;
514 for (i = *preds; i; i = i->ep_next)
515 if (i->ep_predictor == predictor
516 && i->ep_edge == e
517 && i->ep_probability == probability)
518 return true;
519 return false;
522 /* Return true when the probability of edge is reliable.
524 The profile guessing code is good at predicting branch outcome (ie.
525 taken/not taken), that is predicted right slightly over 75% of time.
526 It is however notoriously poor on predicting the probability itself.
527 In general the profile appear a lot flatter (with probabilities closer
528 to 50%) than the reality so it is bad idea to use it to drive optimization
529 such as those disabling dynamic branch prediction for well predictable
530 branches.
532 There are two exceptions - edges leading to noreturn edges and edges
533 predicted by number of iterations heuristics are predicted well. This macro
534 should be able to distinguish those, but at the moment it simply check for
535 noreturn heuristic that is only one giving probability over 99% or bellow
536 1%. In future we might want to propagate reliability information across the
537 CFG if we find this information useful on multiple places. */
538 static bool
539 probability_reliable_p (int prob)
541 return (profile_status_for_fn (cfun) == PROFILE_READ
542 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
543 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
546 /* Same predicate as above, working on edges. */
547 bool
548 edge_probability_reliable_p (const_edge e)
550 return probability_reliable_p (e->probability);
553 /* Same predicate as edge_probability_reliable_p, working on notes. */
554 bool
555 br_prob_note_reliable_p (const_rtx note)
557 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
558 return probability_reliable_p (XINT (note, 0));
561 static void
562 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
564 gcc_assert (any_condjump_p (insn));
565 if (!flag_guess_branch_prob)
566 return;
568 add_reg_note (insn, REG_BR_PRED,
569 gen_rtx_CONCAT (VOIDmode,
570 GEN_INT ((int) predictor),
571 GEN_INT ((int) probability)));
574 /* Predict insn by given predictor. */
576 void
577 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
578 enum prediction taken)
580 int probability = predictor_info[(int) predictor].hitrate;
582 if (taken != TAKEN)
583 probability = REG_BR_PROB_BASE - probability;
585 predict_insn (insn, predictor, probability);
588 /* Predict edge E with given probability if possible. */
590 void
591 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
593 rtx_insn *last_insn;
594 last_insn = BB_END (e->src);
596 /* We can store the branch prediction information only about
597 conditional jumps. */
598 if (!any_condjump_p (last_insn))
599 return;
601 /* We always store probability of branching. */
602 if (e->flags & EDGE_FALLTHRU)
603 probability = REG_BR_PROB_BASE - probability;
605 predict_insn (last_insn, predictor, probability);
608 /* Predict edge E with the given PROBABILITY. */
609 void
610 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
612 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
613 && EDGE_COUNT (e->src->succs) > 1
614 && flag_guess_branch_prob
615 && optimize)
617 struct edge_prediction *i = XNEW (struct edge_prediction);
618 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
620 i->ep_next = preds;
621 preds = i;
622 i->ep_probability = probability;
623 i->ep_predictor = predictor;
624 i->ep_edge = e;
628 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
629 to the filter function. */
631 void
632 filter_predictions (edge_prediction **preds,
633 bool (*filter) (edge_prediction *, void *), void *data)
635 if (!bb_predictions)
636 return;
638 if (preds)
640 struct edge_prediction **prediction = preds;
641 struct edge_prediction *next;
643 while (*prediction)
645 if ((*filter) (*prediction, data))
646 prediction = &((*prediction)->ep_next);
647 else
649 next = (*prediction)->ep_next;
650 free (*prediction);
651 *prediction = next;
657 /* Filter function predicate that returns true for a edge predicate P
658 if its edge is equal to DATA. */
660 bool
661 equal_edge_p (edge_prediction *p, void *data)
663 return p->ep_edge == (edge)data;
666 /* Remove all predictions on given basic block that are attached
667 to edge E. */
668 void
669 remove_predictions_associated_with_edge (edge e)
671 if (!bb_predictions)
672 return;
674 edge_prediction **preds = bb_predictions->get (e->src);
675 filter_predictions (preds, equal_edge_p, e);
678 /* Clears the list of predictions stored for BB. */
680 static void
681 clear_bb_predictions (basic_block bb)
683 edge_prediction **preds = bb_predictions->get (bb);
684 struct edge_prediction *pred, *next;
686 if (!preds)
687 return;
689 for (pred = *preds; pred; pred = next)
691 next = pred->ep_next;
692 free (pred);
694 *preds = NULL;
697 /* Return true when we can store prediction on insn INSN.
698 At the moment we represent predictions only on conditional
699 jumps, not at computed jump or other complicated cases. */
700 static bool
701 can_predict_insn_p (const rtx_insn *insn)
703 return (JUMP_P (insn)
704 && any_condjump_p (insn)
705 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
708 /* Predict edge E by given predictor if possible. */
710 void
711 predict_edge_def (edge e, enum br_predictor predictor,
712 enum prediction taken)
714 int probability = predictor_info[(int) predictor].hitrate;
716 if (taken != TAKEN)
717 probability = REG_BR_PROB_BASE - probability;
719 predict_edge (e, predictor, probability);
722 /* Invert all branch predictions or probability notes in the INSN. This needs
723 to be done each time we invert the condition used by the jump. */
725 void
726 invert_br_probabilities (rtx insn)
728 rtx note;
730 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
731 if (REG_NOTE_KIND (note) == REG_BR_PROB)
732 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
733 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
734 XEXP (XEXP (note, 0), 1)
735 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
738 /* Dump information about the branch prediction to the output file. */
740 static void
741 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
742 basic_block bb, enum predictor_reason reason = REASON_NONE,
743 edge ep_edge = NULL)
745 edge e = ep_edge;
746 edge_iterator ei;
748 if (!file)
749 return;
751 if (e == NULL)
752 FOR_EACH_EDGE (e, ei, bb->succs)
753 if (! (e->flags & EDGE_FALLTHRU))
754 break;
756 char edge_info_str[128];
757 if (ep_edge)
758 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
759 ep_edge->dest->index);
760 else
761 edge_info_str[0] = '\0';
763 fprintf (file, " %s heuristics%s%s: %.1f%%",
764 predictor_info[predictor].name,
765 edge_info_str, reason_messages[reason],
766 probability * 100.0 / REG_BR_PROB_BASE);
768 if (bb->count)
770 fprintf (file, " exec %" PRId64, bb->count);
771 if (e)
773 fprintf (file, " hit %" PRId64, e->count);
774 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
778 fprintf (file, "\n");
781 /* We can not predict the probabilities of outgoing edges of bb. Set them
782 evenly and hope for the best. */
783 static void
784 set_even_probabilities (basic_block bb)
786 int nedges = 0;
787 edge e;
788 edge_iterator ei;
790 FOR_EACH_EDGE (e, ei, bb->succs)
791 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
792 nedges ++;
793 FOR_EACH_EDGE (e, ei, bb->succs)
794 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
795 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
796 else
797 e->probability = 0;
800 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
801 note if not already present. Remove now useless REG_BR_PRED notes. */
803 static void
804 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
806 rtx prob_note;
807 rtx *pnote;
808 rtx note;
809 int best_probability = PROB_EVEN;
810 enum br_predictor best_predictor = END_PREDICTORS;
811 int combined_probability = REG_BR_PROB_BASE / 2;
812 int d;
813 bool first_match = false;
814 bool found = false;
816 if (!can_predict_insn_p (insn))
818 set_even_probabilities (bb);
819 return;
822 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
823 pnote = &REG_NOTES (insn);
824 if (dump_file)
825 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
826 bb->index);
828 /* We implement "first match" heuristics and use probability guessed
829 by predictor with smallest index. */
830 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
831 if (REG_NOTE_KIND (note) == REG_BR_PRED)
833 enum br_predictor predictor = ((enum br_predictor)
834 INTVAL (XEXP (XEXP (note, 0), 0)));
835 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
837 found = true;
838 if (best_predictor > predictor
839 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
840 best_probability = probability, best_predictor = predictor;
842 d = (combined_probability * probability
843 + (REG_BR_PROB_BASE - combined_probability)
844 * (REG_BR_PROB_BASE - probability));
846 /* Use FP math to avoid overflows of 32bit integers. */
847 if (d == 0)
848 /* If one probability is 0% and one 100%, avoid division by zero. */
849 combined_probability = REG_BR_PROB_BASE / 2;
850 else
851 combined_probability = (((double) combined_probability) * probability
852 * REG_BR_PROB_BASE / d + 0.5);
855 /* Decide which heuristic to use. In case we didn't match anything,
856 use no_prediction heuristic, in case we did match, use either
857 first match or Dempster-Shaffer theory depending on the flags. */
859 if (best_predictor != END_PREDICTORS)
860 first_match = true;
862 if (!found)
863 dump_prediction (dump_file, PRED_NO_PREDICTION,
864 combined_probability, bb);
865 else
867 if (!first_match)
868 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
869 bb, !first_match ? REASON_NONE : REASON_IGNORED);
870 else
871 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
872 bb, first_match ? REASON_NONE : REASON_IGNORED);
875 if (first_match)
876 combined_probability = best_probability;
877 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
879 while (*pnote)
881 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
883 enum br_predictor predictor = ((enum br_predictor)
884 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
885 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
887 dump_prediction (dump_file, predictor, probability, bb,
888 (!first_match || best_predictor == predictor)
889 ? REASON_NONE : REASON_IGNORED);
890 *pnote = XEXP (*pnote, 1);
892 else
893 pnote = &XEXP (*pnote, 1);
896 if (!prob_note)
898 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
900 /* Save the prediction into CFG in case we are seeing non-degenerated
901 conditional jump. */
902 if (!single_succ_p (bb))
904 BRANCH_EDGE (bb)->probability = combined_probability;
905 FALLTHRU_EDGE (bb)->probability
906 = REG_BR_PROB_BASE - combined_probability;
909 else if (!single_succ_p (bb))
911 int prob = XINT (prob_note, 0);
913 BRANCH_EDGE (bb)->probability = prob;
914 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
916 else
917 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
920 /* Edge prediction hash traits. */
922 struct predictor_hash: pointer_hash <edge_prediction>
925 static inline hashval_t hash (const edge_prediction *);
926 static inline bool equal (const edge_prediction *, const edge_prediction *);
929 /* Calculate hash value of an edge prediction P based on predictor and
930 normalized probability. */
932 inline hashval_t
933 predictor_hash::hash (const edge_prediction *p)
935 inchash::hash hstate;
936 hstate.add_int (p->ep_predictor);
938 int prob = p->ep_probability;
939 if (prob > REG_BR_PROB_BASE / 2)
940 prob = REG_BR_PROB_BASE - prob;
942 hstate.add_int (prob);
944 return hstate.end ();
947 /* Return true whether edge predictions P1 and P2 use the same predictor and
948 have equal (or opposed probability). */
950 inline bool
951 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
953 return (p1->ep_predictor == p2->ep_predictor
954 && (p1->ep_probability == p2->ep_probability
955 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
958 struct predictor_hash_traits: predictor_hash,
959 typed_noop_remove <edge_prediction *> {};
961 /* Return true if edge prediction P is not in DATA hash set. */
963 static bool
964 not_removed_prediction_p (edge_prediction *p, void *data)
966 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
967 return !remove->contains (p);
970 /* Prune predictions for a basic block BB. Currently we do following
971 clean-up steps:
973 1) remove duplicate prediction that is guessed with the same probability
974 (different than 1/2) to both edge
975 2) remove duplicates for a prediction that belongs with the same probability
976 to a single edge
980 static void
981 prune_predictions_for_bb (basic_block bb)
983 edge_prediction **preds = bb_predictions->get (bb);
985 if (preds)
987 hash_table <predictor_hash_traits> s (13);
988 hash_set <edge_prediction *> remove;
990 /* Step 1: identify predictors that should be removed. */
991 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
993 edge_prediction *existing = s.find (pred);
994 if (existing)
996 if (pred->ep_edge == existing->ep_edge
997 && pred->ep_probability == existing->ep_probability)
999 /* Remove a duplicate predictor. */
1000 dump_prediction (dump_file, pred->ep_predictor,
1001 pred->ep_probability, bb,
1002 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1004 remove.add (pred);
1006 else if (pred->ep_edge != existing->ep_edge
1007 && pred->ep_probability == existing->ep_probability
1008 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1010 /* Remove both predictors as they predict the same
1011 for both edges. */
1012 dump_prediction (dump_file, existing->ep_predictor,
1013 pred->ep_probability, bb,
1014 REASON_EDGE_PAIR_DUPLICATE,
1015 existing->ep_edge);
1016 dump_prediction (dump_file, pred->ep_predictor,
1017 pred->ep_probability, bb,
1018 REASON_EDGE_PAIR_DUPLICATE,
1019 pred->ep_edge);
1021 remove.add (existing);
1022 remove.add (pred);
1026 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1027 *slot2 = pred;
1030 /* Step 2: Remove predictors. */
1031 filter_predictions (preds, not_removed_prediction_p, &remove);
1035 /* Combine predictions into single probability and store them into CFG.
1036 Remove now useless prediction entries.
1037 If DRY_RUN is set, only produce dumps and do not modify profile. */
1039 static void
1040 combine_predictions_for_bb (basic_block bb, bool dry_run)
1042 int best_probability = PROB_EVEN;
1043 enum br_predictor best_predictor = END_PREDICTORS;
1044 int combined_probability = REG_BR_PROB_BASE / 2;
1045 int d;
1046 bool first_match = false;
1047 bool found = false;
1048 struct edge_prediction *pred;
1049 int nedges = 0;
1050 edge e, first = NULL, second = NULL;
1051 edge_iterator ei;
1053 FOR_EACH_EDGE (e, ei, bb->succs)
1054 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
1056 nedges ++;
1057 if (first && !second)
1058 second = e;
1059 if (!first)
1060 first = e;
1063 /* When there is no successor or only one choice, prediction is easy.
1065 We are lazy for now and predict only basic blocks with two outgoing
1066 edges. It is possible to predict generic case too, but we have to
1067 ignore first match heuristics and do more involved combining. Implement
1068 this later. */
1069 if (nedges != 2)
1071 if (!bb->count && !dry_run)
1072 set_even_probabilities (bb);
1073 clear_bb_predictions (bb);
1074 if (dump_file)
1075 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
1076 nedges, bb->index);
1077 return;
1080 if (dump_file)
1081 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1083 prune_predictions_for_bb (bb);
1085 edge_prediction **preds = bb_predictions->get (bb);
1087 if (preds)
1089 /* We implement "first match" heuristics and use probability guessed
1090 by predictor with smallest index. */
1091 for (pred = *preds; pred; pred = pred->ep_next)
1093 enum br_predictor predictor = pred->ep_predictor;
1094 int probability = pred->ep_probability;
1096 if (pred->ep_edge != first)
1097 probability = REG_BR_PROB_BASE - probability;
1099 found = true;
1100 /* First match heuristics would be widly confused if we predicted
1101 both directions. */
1102 if (best_predictor > predictor
1103 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1105 struct edge_prediction *pred2;
1106 int prob = probability;
1108 for (pred2 = (struct edge_prediction *) *preds;
1109 pred2; pred2 = pred2->ep_next)
1110 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1112 int probability2 = pred2->ep_probability;
1114 if (pred2->ep_edge != first)
1115 probability2 = REG_BR_PROB_BASE - probability2;
1117 if ((probability < REG_BR_PROB_BASE / 2) !=
1118 (probability2 < REG_BR_PROB_BASE / 2))
1119 break;
1121 /* If the same predictor later gave better result, go for it! */
1122 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1123 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1124 prob = probability2;
1126 if (!pred2)
1127 best_probability = prob, best_predictor = predictor;
1130 d = (combined_probability * probability
1131 + (REG_BR_PROB_BASE - combined_probability)
1132 * (REG_BR_PROB_BASE - probability));
1134 /* Use FP math to avoid overflows of 32bit integers. */
1135 if (d == 0)
1136 /* If one probability is 0% and one 100%, avoid division by zero. */
1137 combined_probability = REG_BR_PROB_BASE / 2;
1138 else
1139 combined_probability = (((double) combined_probability)
1140 * probability
1141 * REG_BR_PROB_BASE / d + 0.5);
1145 /* Decide which heuristic to use. In case we didn't match anything,
1146 use no_prediction heuristic, in case we did match, use either
1147 first match or Dempster-Shaffer theory depending on the flags. */
1149 if (best_predictor != END_PREDICTORS)
1150 first_match = true;
1152 if (!found)
1153 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1154 else
1156 if (!first_match)
1157 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1158 !first_match ? REASON_NONE : REASON_IGNORED);
1159 else
1160 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1161 first_match ? REASON_NONE : REASON_IGNORED);
1164 if (first_match)
1165 combined_probability = best_probability;
1166 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1168 if (preds)
1170 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1172 enum br_predictor predictor = pred->ep_predictor;
1173 int probability = pred->ep_probability;
1175 dump_prediction (dump_file, predictor, probability, bb,
1176 (!first_match || best_predictor == predictor)
1177 ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1180 clear_bb_predictions (bb);
1182 if (!bb->count && !dry_run)
1184 first->probability = combined_probability;
1185 second->probability = REG_BR_PROB_BASE - combined_probability;
1189 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1190 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1192 T1 and T2 should be one of the following cases:
1193 1. T1 is SSA_NAME, T2 is NULL
1194 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1195 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1197 static tree
1198 strips_small_constant (tree t1, tree t2)
1200 tree ret = NULL;
1201 int value = 0;
1203 if (!t1)
1204 return NULL;
1205 else if (TREE_CODE (t1) == SSA_NAME)
1206 ret = t1;
1207 else if (tree_fits_shwi_p (t1))
1208 value = tree_to_shwi (t1);
1209 else
1210 return NULL;
1212 if (!t2)
1213 return ret;
1214 else if (tree_fits_shwi_p (t2))
1215 value = tree_to_shwi (t2);
1216 else if (TREE_CODE (t2) == SSA_NAME)
1218 if (ret)
1219 return NULL;
1220 else
1221 ret = t2;
1224 if (value <= 4 && value >= -4)
1225 return ret;
1226 else
1227 return NULL;
1230 /* Return the SSA_NAME in T or T's operands.
1231 Return NULL if SSA_NAME cannot be found. */
1233 static tree
1234 get_base_value (tree t)
1236 if (TREE_CODE (t) == SSA_NAME)
1237 return t;
1239 if (!BINARY_CLASS_P (t))
1240 return NULL;
1242 switch (TREE_OPERAND_LENGTH (t))
1244 case 1:
1245 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1246 case 2:
1247 return strips_small_constant (TREE_OPERAND (t, 0),
1248 TREE_OPERAND (t, 1));
1249 default:
1250 return NULL;
1254 /* Check the compare STMT in LOOP. If it compares an induction
1255 variable to a loop invariant, return true, and save
1256 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1257 Otherwise return false and set LOOP_INVAIANT to NULL. */
1259 static bool
1260 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1261 tree *loop_invariant,
1262 enum tree_code *compare_code,
1263 tree *loop_step,
1264 tree *loop_iv_base)
1266 tree op0, op1, bound, base;
1267 affine_iv iv0, iv1;
1268 enum tree_code code;
1269 tree step;
1271 code = gimple_cond_code (stmt);
1272 *loop_invariant = NULL;
1274 switch (code)
1276 case GT_EXPR:
1277 case GE_EXPR:
1278 case NE_EXPR:
1279 case LT_EXPR:
1280 case LE_EXPR:
1281 case EQ_EXPR:
1282 break;
1284 default:
1285 return false;
1288 op0 = gimple_cond_lhs (stmt);
1289 op1 = gimple_cond_rhs (stmt);
1291 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1292 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1293 return false;
1294 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1295 return false;
1296 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1297 return false;
1298 if (TREE_CODE (iv0.step) != INTEGER_CST
1299 || TREE_CODE (iv1.step) != INTEGER_CST)
1300 return false;
1301 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1302 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1303 return false;
1305 if (integer_zerop (iv0.step))
1307 if (code != NE_EXPR && code != EQ_EXPR)
1308 code = invert_tree_comparison (code, false);
1309 bound = iv0.base;
1310 base = iv1.base;
1311 if (tree_fits_shwi_p (iv1.step))
1312 step = iv1.step;
1313 else
1314 return false;
1316 else
1318 bound = iv1.base;
1319 base = iv0.base;
1320 if (tree_fits_shwi_p (iv0.step))
1321 step = iv0.step;
1322 else
1323 return false;
1326 if (TREE_CODE (bound) != INTEGER_CST)
1327 bound = get_base_value (bound);
1328 if (!bound)
1329 return false;
1330 if (TREE_CODE (base) != INTEGER_CST)
1331 base = get_base_value (base);
1332 if (!base)
1333 return false;
1335 *loop_invariant = bound;
1336 *compare_code = code;
1337 *loop_step = step;
1338 *loop_iv_base = base;
1339 return true;
1342 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1344 static bool
1345 expr_coherent_p (tree t1, tree t2)
1347 gimple *stmt;
1348 tree ssa_name_1 = NULL;
1349 tree ssa_name_2 = NULL;
1351 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1352 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1354 if (t1 == t2)
1355 return true;
1357 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1358 return true;
1359 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1360 return false;
1362 /* Check to see if t1 is expressed/defined with t2. */
1363 stmt = SSA_NAME_DEF_STMT (t1);
1364 gcc_assert (stmt != NULL);
1365 if (is_gimple_assign (stmt))
1367 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1368 if (ssa_name_1 && ssa_name_1 == t2)
1369 return true;
1372 /* Check to see if t2 is expressed/defined with t1. */
1373 stmt = SSA_NAME_DEF_STMT (t2);
1374 gcc_assert (stmt != NULL);
1375 if (is_gimple_assign (stmt))
1377 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1378 if (ssa_name_2 && ssa_name_2 == t1)
1379 return true;
1382 /* Compare if t1 and t2's def_stmts are identical. */
1383 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1384 return true;
1385 else
1386 return false;
1389 /* Return true if E is predicted by one of loop heuristics. */
1391 static bool
1392 predicted_by_loop_heuristics_p (basic_block bb)
1394 struct edge_prediction *i;
1395 edge_prediction **preds = bb_predictions->get (bb);
1397 if (!preds)
1398 return false;
1400 for (i = *preds; i; i = i->ep_next)
1401 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1402 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1403 || i->ep_predictor == PRED_LOOP_ITERATIONS
1404 || i->ep_predictor == PRED_LOOP_EXIT
1405 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1406 return true;
1407 return false;
1410 /* Predict branch probability of BB when BB contains a branch that compares
1411 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1412 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1414 E.g.
1415 for (int i = 0; i < bound; i++) {
1416 if (i < bound - 2)
1417 computation_1();
1418 else
1419 computation_2();
1422 In this loop, we will predict the branch inside the loop to be taken. */
1424 static void
1425 predict_iv_comparison (struct loop *loop, basic_block bb,
1426 tree loop_bound_var,
1427 tree loop_iv_base_var,
1428 enum tree_code loop_bound_code,
1429 int loop_bound_step)
1431 gimple *stmt;
1432 tree compare_var, compare_base;
1433 enum tree_code compare_code;
1434 tree compare_step_var;
1435 edge then_edge;
1436 edge_iterator ei;
1438 if (predicted_by_loop_heuristics_p (bb))
1439 return;
1441 stmt = last_stmt (bb);
1442 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1443 return;
1444 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1445 loop, &compare_var,
1446 &compare_code,
1447 &compare_step_var,
1448 &compare_base))
1449 return;
1451 /* Find the taken edge. */
1452 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1453 if (then_edge->flags & EDGE_TRUE_VALUE)
1454 break;
1456 /* When comparing an IV to a loop invariant, NE is more likely to be
1457 taken while EQ is more likely to be not-taken. */
1458 if (compare_code == NE_EXPR)
1460 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1461 return;
1463 else if (compare_code == EQ_EXPR)
1465 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1466 return;
1469 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1470 return;
1472 /* If loop bound, base and compare bound are all constants, we can
1473 calculate the probability directly. */
1474 if (tree_fits_shwi_p (loop_bound_var)
1475 && tree_fits_shwi_p (compare_var)
1476 && tree_fits_shwi_p (compare_base))
1478 int probability;
1479 bool overflow, overall_overflow = false;
1480 widest_int compare_count, tem;
1482 /* (loop_bound - base) / compare_step */
1483 tem = wi::sub (wi::to_widest (loop_bound_var),
1484 wi::to_widest (compare_base), SIGNED, &overflow);
1485 overall_overflow |= overflow;
1486 widest_int loop_count = wi::div_trunc (tem,
1487 wi::to_widest (compare_step_var),
1488 SIGNED, &overflow);
1489 overall_overflow |= overflow;
1491 if (!wi::neg_p (wi::to_widest (compare_step_var))
1492 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1494 /* (loop_bound - compare_bound) / compare_step */
1495 tem = wi::sub (wi::to_widest (loop_bound_var),
1496 wi::to_widest (compare_var), SIGNED, &overflow);
1497 overall_overflow |= overflow;
1498 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1499 SIGNED, &overflow);
1500 overall_overflow |= overflow;
1502 else
1504 /* (compare_bound - base) / compare_step */
1505 tem = wi::sub (wi::to_widest (compare_var),
1506 wi::to_widest (compare_base), SIGNED, &overflow);
1507 overall_overflow |= overflow;
1508 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1509 SIGNED, &overflow);
1510 overall_overflow |= overflow;
1512 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1513 ++compare_count;
1514 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1515 ++loop_count;
1516 if (wi::neg_p (compare_count))
1517 compare_count = 0;
1518 if (wi::neg_p (loop_count))
1519 loop_count = 0;
1520 if (loop_count == 0)
1521 probability = 0;
1522 else if (wi::cmps (compare_count, loop_count) == 1)
1523 probability = REG_BR_PROB_BASE;
1524 else
1526 tem = compare_count * REG_BR_PROB_BASE;
1527 tem = wi::udiv_trunc (tem, loop_count);
1528 probability = tem.to_uhwi ();
1531 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1532 if (!overall_overflow)
1533 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1535 return;
1538 if (expr_coherent_p (loop_bound_var, compare_var))
1540 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1541 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1542 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1543 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1544 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1545 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1546 else if (loop_bound_code == NE_EXPR)
1548 /* If the loop backedge condition is "(i != bound)", we do
1549 the comparison based on the step of IV:
1550 * step < 0 : backedge condition is like (i > bound)
1551 * step > 0 : backedge condition is like (i < bound) */
1552 gcc_assert (loop_bound_step != 0);
1553 if (loop_bound_step > 0
1554 && (compare_code == LT_EXPR
1555 || compare_code == LE_EXPR))
1556 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1557 else if (loop_bound_step < 0
1558 && (compare_code == GT_EXPR
1559 || compare_code == GE_EXPR))
1560 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1561 else
1562 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1564 else
1565 /* The branch is predicted not-taken if loop_bound_code is
1566 opposite with compare_code. */
1567 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1569 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1571 /* For cases like:
1572 for (i = s; i < h; i++)
1573 if (i > s + 2) ....
1574 The branch should be predicted taken. */
1575 if (loop_bound_step > 0
1576 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1577 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1578 else if (loop_bound_step < 0
1579 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1580 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1581 else
1582 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1586 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1587 exits are resulted from short-circuit conditions that will generate an
1588 if_tmp. E.g.:
1590 if (foo() || global > 10)
1591 break;
1593 This will be translated into:
1595 BB3:
1596 loop header...
1597 BB4:
1598 if foo() goto BB6 else goto BB5
1599 BB5:
1600 if global > 10 goto BB6 else goto BB7
1601 BB6:
1602 goto BB7
1603 BB7:
1604 iftmp = (PHI 0(BB5), 1(BB6))
1605 if iftmp == 1 goto BB8 else goto BB3
1606 BB8:
1607 outside of the loop...
1609 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1610 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1611 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1612 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1614 static void
1615 predict_extra_loop_exits (edge exit_edge)
1617 unsigned i;
1618 bool check_value_one;
1619 gimple *lhs_def_stmt;
1620 gphi *phi_stmt;
1621 tree cmp_rhs, cmp_lhs;
1622 gimple *last;
1623 gcond *cmp_stmt;
1625 last = last_stmt (exit_edge->src);
1626 if (!last)
1627 return;
1628 cmp_stmt = dyn_cast <gcond *> (last);
1629 if (!cmp_stmt)
1630 return;
1632 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1633 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1634 if (!TREE_CONSTANT (cmp_rhs)
1635 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1636 return;
1637 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1638 return;
1640 /* If check_value_one is true, only the phi_args with value '1' will lead
1641 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1642 loop exit. */
1643 check_value_one = (((integer_onep (cmp_rhs))
1644 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1645 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1647 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1648 if (!lhs_def_stmt)
1649 return;
1651 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1652 if (!phi_stmt)
1653 return;
1655 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1657 edge e1;
1658 edge_iterator ei;
1659 tree val = gimple_phi_arg_def (phi_stmt, i);
1660 edge e = gimple_phi_arg_edge (phi_stmt, i);
1662 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1663 continue;
1664 if ((check_value_one ^ integer_onep (val)) == 1)
1665 continue;
1666 if (EDGE_COUNT (e->src->succs) != 1)
1668 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1669 continue;
1672 FOR_EACH_EDGE (e1, ei, e->src->preds)
1673 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1678 /* Predict edge probabilities by exploiting loop structure. */
1680 static void
1681 predict_loops (void)
1683 struct loop *loop;
1685 /* Try to predict out blocks in a loop that are not part of a
1686 natural loop. */
1687 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1689 basic_block bb, *bbs;
1690 unsigned j, n_exits = 0;
1691 vec<edge> exits;
1692 struct tree_niter_desc niter_desc;
1693 edge ex;
1694 struct nb_iter_bound *nb_iter;
1695 enum tree_code loop_bound_code = ERROR_MARK;
1696 tree loop_bound_step = NULL;
1697 tree loop_bound_var = NULL;
1698 tree loop_iv_base = NULL;
1699 gcond *stmt = NULL;
1701 exits = get_loop_exit_edges (loop);
1702 FOR_EACH_VEC_ELT (exits, j, ex)
1703 if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE)))
1704 n_exits ++;
1705 if (!n_exits)
1707 exits.release ();
1708 continue;
1711 FOR_EACH_VEC_ELT (exits, j, ex)
1713 tree niter = NULL;
1714 HOST_WIDE_INT nitercst;
1715 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1716 int probability;
1717 enum br_predictor predictor;
1718 widest_int nit;
1720 if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1721 continue;
1722 /* Loop heuristics do not expect exit conditional to be inside
1723 inner loop. We predict from innermost to outermost loop. */
1724 if (predicted_by_loop_heuristics_p (ex->src))
1725 continue;
1726 predict_extra_loop_exits (ex);
1728 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1729 niter = niter_desc.niter;
1730 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1731 niter = loop_niter_by_eval (loop, ex);
1733 if (TREE_CODE (niter) == INTEGER_CST)
1735 if (tree_fits_uhwi_p (niter)
1736 && max
1737 && compare_tree_int (niter, max - 1) == -1)
1738 nitercst = tree_to_uhwi (niter) + 1;
1739 else
1740 nitercst = max;
1741 predictor = PRED_LOOP_ITERATIONS;
1743 /* If we have just one exit and we can derive some information about
1744 the number of iterations of the loop from the statements inside
1745 the loop, use it to predict this exit. */
1746 else if (n_exits == 1
1747 && estimated_stmt_executions (loop, &nit))
1749 if (wi::gtu_p (nit, max))
1750 nitercst = max;
1751 else
1752 nitercst = nit.to_shwi ();
1753 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1755 /* If we have likely upper bound, trust it for very small iteration
1756 counts. Such loops would otherwise get mispredicted by standard
1757 LOOP_EXIT heuristics. */
1758 else if (n_exits == 1
1759 && likely_max_stmt_executions (loop, &nit)
1760 && wi::ltu_p (nit,
1761 RDIV (REG_BR_PROB_BASE,
1762 REG_BR_PROB_BASE
1763 - predictor_info
1764 [PRED_LOOP_EXIT].hitrate)))
1766 nitercst = nit.to_shwi ();
1767 predictor = PRED_LOOP_ITERATIONS_MAX;
1769 else
1770 continue;
1772 gcc_checking_assert (nitercst);
1773 probability = RDIV (REG_BR_PROB_BASE, nitercst);
1774 predict_edge (ex, predictor, probability);
1776 exits.release ();
1778 /* Find information about loop bound variables. */
1779 for (nb_iter = loop->bounds; nb_iter;
1780 nb_iter = nb_iter->next)
1781 if (nb_iter->stmt
1782 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1784 stmt = as_a <gcond *> (nb_iter->stmt);
1785 break;
1787 if (!stmt && last_stmt (loop->header)
1788 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1789 stmt = as_a <gcond *> (last_stmt (loop->header));
1790 if (stmt)
1791 is_comparison_with_loop_invariant_p (stmt, loop,
1792 &loop_bound_var,
1793 &loop_bound_code,
1794 &loop_bound_step,
1795 &loop_iv_base);
1797 bbs = get_loop_body (loop);
1799 for (j = 0; j < loop->num_nodes; j++)
1801 int header_found = 0;
1802 edge e;
1803 edge_iterator ei;
1805 bb = bbs[j];
1807 /* Bypass loop heuristics on continue statement. These
1808 statements construct loops via "non-loop" constructs
1809 in the source language and are better to be handled
1810 separately. */
1811 if (predicted_by_p (bb, PRED_CONTINUE))
1812 continue;
1814 /* Loop exit heuristics - predict an edge exiting the loop if the
1815 conditional has no loop header successors as not taken. */
1816 if (!header_found
1817 /* If we already used more reliable loop exit predictors, do not
1818 bother with PRED_LOOP_EXIT. */
1819 && !predicted_by_loop_heuristics_p (bb))
1821 /* For loop with many exits we don't want to predict all exits
1822 with the pretty large probability, because if all exits are
1823 considered in row, the loop would be predicted to iterate
1824 almost never. The code to divide probability by number of
1825 exits is very rough. It should compute the number of exits
1826 taken in each patch through function (not the overall number
1827 of exits that might be a lot higher for loops with wide switch
1828 statements in them) and compute n-th square root.
1830 We limit the minimal probability by 2% to avoid
1831 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1832 as this was causing regression in perl benchmark containing such
1833 a wide loop. */
1835 int probability = ((REG_BR_PROB_BASE
1836 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1837 / n_exits);
1838 if (probability < HITRATE (2))
1839 probability = HITRATE (2);
1840 FOR_EACH_EDGE (e, ei, bb->succs)
1841 if (e->dest->index < NUM_FIXED_BLOCKS
1842 || !flow_bb_inside_loop_p (loop, e->dest))
1843 predict_edge (e, PRED_LOOP_EXIT, probability);
1845 if (loop_bound_var)
1846 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1847 loop_bound_code,
1848 tree_to_shwi (loop_bound_step));
1851 /* Free basic blocks from get_loop_body. */
1852 free (bbs);
1856 /* Attempt to predict probabilities of BB outgoing edges using local
1857 properties. */
1858 static void
1859 bb_estimate_probability_locally (basic_block bb)
1861 rtx_insn *last_insn = BB_END (bb);
1862 rtx cond;
1864 if (! can_predict_insn_p (last_insn))
1865 return;
1866 cond = get_condition (last_insn, NULL, false, false);
1867 if (! cond)
1868 return;
1870 /* Try "pointer heuristic."
1871 A comparison ptr == 0 is predicted as false.
1872 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1873 if (COMPARISON_P (cond)
1874 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1875 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1877 if (GET_CODE (cond) == EQ)
1878 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1879 else if (GET_CODE (cond) == NE)
1880 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1882 else
1884 /* Try "opcode heuristic."
1885 EQ tests are usually false and NE tests are usually true. Also,
1886 most quantities are positive, so we can make the appropriate guesses
1887 about signed comparisons against zero. */
1888 switch (GET_CODE (cond))
1890 case CONST_INT:
1891 /* Unconditional branch. */
1892 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1893 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1894 break;
1896 case EQ:
1897 case UNEQ:
1898 /* Floating point comparisons appears to behave in a very
1899 unpredictable way because of special role of = tests in
1900 FP code. */
1901 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1903 /* Comparisons with 0 are often used for booleans and there is
1904 nothing useful to predict about them. */
1905 else if (XEXP (cond, 1) == const0_rtx
1906 || XEXP (cond, 0) == const0_rtx)
1908 else
1909 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1910 break;
1912 case NE:
1913 case LTGT:
1914 /* Floating point comparisons appears to behave in a very
1915 unpredictable way because of special role of = tests in
1916 FP code. */
1917 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1919 /* Comparisons with 0 are often used for booleans and there is
1920 nothing useful to predict about them. */
1921 else if (XEXP (cond, 1) == const0_rtx
1922 || XEXP (cond, 0) == const0_rtx)
1924 else
1925 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1926 break;
1928 case ORDERED:
1929 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1930 break;
1932 case UNORDERED:
1933 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1934 break;
1936 case LE:
1937 case LT:
1938 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1939 || XEXP (cond, 1) == constm1_rtx)
1940 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1941 break;
1943 case GE:
1944 case GT:
1945 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1946 || XEXP (cond, 1) == constm1_rtx)
1947 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1948 break;
1950 default:
1951 break;
1955 /* Set edge->probability for each successor edge of BB. */
1956 void
1957 guess_outgoing_edge_probabilities (basic_block bb)
1959 bb_estimate_probability_locally (bb);
1960 combine_predictions_for_insn (BB_END (bb), bb);
1963 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1965 /* Helper function for expr_expected_value. */
1967 static tree
1968 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1969 tree op1, bitmap visited, enum br_predictor *predictor)
1971 gimple *def;
1973 if (predictor)
1974 *predictor = PRED_UNCONDITIONAL;
1976 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1978 if (TREE_CONSTANT (op0))
1979 return op0;
1981 if (code != SSA_NAME)
1982 return NULL_TREE;
1984 def = SSA_NAME_DEF_STMT (op0);
1986 /* If we were already here, break the infinite cycle. */
1987 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1988 return NULL;
1990 if (gimple_code (def) == GIMPLE_PHI)
1992 /* All the arguments of the PHI node must have the same constant
1993 length. */
1994 int i, n = gimple_phi_num_args (def);
1995 tree val = NULL, new_val;
1997 for (i = 0; i < n; i++)
1999 tree arg = PHI_ARG_DEF (def, i);
2000 enum br_predictor predictor2;
2002 /* If this PHI has itself as an argument, we cannot
2003 determine the string length of this argument. However,
2004 if we can find an expected constant value for the other
2005 PHI args then we can still be sure that this is
2006 likely a constant. So be optimistic and just
2007 continue with the next argument. */
2008 if (arg == PHI_RESULT (def))
2009 continue;
2011 new_val = expr_expected_value (arg, visited, &predictor2);
2013 /* It is difficult to combine value predictors. Simply assume
2014 that later predictor is weaker and take its prediction. */
2015 if (predictor && *predictor < predictor2)
2016 *predictor = predictor2;
2017 if (!new_val)
2018 return NULL;
2019 if (!val)
2020 val = new_val;
2021 else if (!operand_equal_p (val, new_val, false))
2022 return NULL;
2024 return val;
2026 if (is_gimple_assign (def))
2028 if (gimple_assign_lhs (def) != op0)
2029 return NULL;
2031 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2032 gimple_assign_rhs1 (def),
2033 gimple_assign_rhs_code (def),
2034 gimple_assign_rhs2 (def),
2035 visited, predictor);
2038 if (is_gimple_call (def))
2040 tree decl = gimple_call_fndecl (def);
2041 if (!decl)
2043 if (gimple_call_internal_p (def)
2044 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2046 gcc_assert (gimple_call_num_args (def) == 3);
2047 tree val = gimple_call_arg (def, 0);
2048 if (TREE_CONSTANT (val))
2049 return val;
2050 if (predictor)
2052 tree val2 = gimple_call_arg (def, 2);
2053 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2054 && tree_fits_uhwi_p (val2)
2055 && tree_to_uhwi (val2) < END_PREDICTORS);
2056 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2058 return gimple_call_arg (def, 1);
2060 return NULL;
2062 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2063 switch (DECL_FUNCTION_CODE (decl))
2065 case BUILT_IN_EXPECT:
2067 tree val;
2068 if (gimple_call_num_args (def) != 2)
2069 return NULL;
2070 val = gimple_call_arg (def, 0);
2071 if (TREE_CONSTANT (val))
2072 return val;
2073 if (predictor)
2074 *predictor = PRED_BUILTIN_EXPECT;
2075 return gimple_call_arg (def, 1);
2078 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2079 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2080 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2081 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2082 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2083 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2084 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2085 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2086 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2087 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2088 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2089 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2090 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2091 /* Assume that any given atomic operation has low contention,
2092 and thus the compare-and-swap operation succeeds. */
2093 if (predictor)
2094 *predictor = PRED_COMPARE_AND_SWAP;
2095 return boolean_true_node;
2096 default:
2097 break;
2101 return NULL;
2104 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2106 tree res;
2107 enum br_predictor predictor2;
2108 op0 = expr_expected_value (op0, visited, predictor);
2109 if (!op0)
2110 return NULL;
2111 op1 = expr_expected_value (op1, visited, &predictor2);
2112 if (predictor && *predictor < predictor2)
2113 *predictor = predictor2;
2114 if (!op1)
2115 return NULL;
2116 res = fold_build2 (code, type, op0, op1);
2117 if (TREE_CONSTANT (res))
2118 return res;
2119 return NULL;
2121 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2123 tree res;
2124 op0 = expr_expected_value (op0, visited, predictor);
2125 if (!op0)
2126 return NULL;
2127 res = fold_build1 (code, type, op0);
2128 if (TREE_CONSTANT (res))
2129 return res;
2130 return NULL;
2132 return NULL;
2135 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2136 The function is used by builtin_expect branch predictor so the evidence
2137 must come from this construct and additional possible constant folding.
2139 We may want to implement more involved value guess (such as value range
2140 propagation based prediction), but such tricks shall go to new
2141 implementation. */
2143 static tree
2144 expr_expected_value (tree expr, bitmap visited,
2145 enum br_predictor *predictor)
2147 enum tree_code code;
2148 tree op0, op1;
2150 if (TREE_CONSTANT (expr))
2152 if (predictor)
2153 *predictor = PRED_UNCONDITIONAL;
2154 return expr;
2157 extract_ops_from_tree (expr, &code, &op0, &op1);
2158 return expr_expected_value_1 (TREE_TYPE (expr),
2159 op0, code, op1, visited, predictor);
2162 /* Predict using opcode of the last statement in basic block. */
2163 static void
2164 tree_predict_by_opcode (basic_block bb)
2166 gimple *stmt = last_stmt (bb);
2167 edge then_edge;
2168 tree op0, op1;
2169 tree type;
2170 tree val;
2171 enum tree_code cmp;
2172 bitmap visited;
2173 edge_iterator ei;
2174 enum br_predictor predictor;
2176 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2177 return;
2178 FOR_EACH_EDGE (then_edge, ei, bb->succs)
2179 if (then_edge->flags & EDGE_TRUE_VALUE)
2180 break;
2181 op0 = gimple_cond_lhs (stmt);
2182 op1 = gimple_cond_rhs (stmt);
2183 cmp = gimple_cond_code (stmt);
2184 type = TREE_TYPE (op0);
2185 visited = BITMAP_ALLOC (NULL);
2186 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2187 &predictor);
2188 BITMAP_FREE (visited);
2189 if (val && TREE_CODE (val) == INTEGER_CST)
2191 if (predictor == PRED_BUILTIN_EXPECT)
2193 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2195 gcc_assert (percent >= 0 && percent <= 100);
2196 if (integer_zerop (val))
2197 percent = 100 - percent;
2198 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2200 else
2201 predict_edge_def (then_edge, predictor,
2202 integer_zerop (val) ? NOT_TAKEN : TAKEN);
2204 /* Try "pointer heuristic."
2205 A comparison ptr == 0 is predicted as false.
2206 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2207 if (POINTER_TYPE_P (type))
2209 if (cmp == EQ_EXPR)
2210 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2211 else if (cmp == NE_EXPR)
2212 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2214 else
2216 /* Try "opcode heuristic."
2217 EQ tests are usually false and NE tests are usually true. Also,
2218 most quantities are positive, so we can make the appropriate guesses
2219 about signed comparisons against zero. */
2220 switch (cmp)
2222 case EQ_EXPR:
2223 case UNEQ_EXPR:
2224 /* Floating point comparisons appears to behave in a very
2225 unpredictable way because of special role of = tests in
2226 FP code. */
2227 if (FLOAT_TYPE_P (type))
2229 /* Comparisons with 0 are often used for booleans and there is
2230 nothing useful to predict about them. */
2231 else if (integer_zerop (op0) || integer_zerop (op1))
2233 else
2234 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2235 break;
2237 case NE_EXPR:
2238 case LTGT_EXPR:
2239 /* Floating point comparisons appears to behave in a very
2240 unpredictable way because of special role of = tests in
2241 FP code. */
2242 if (FLOAT_TYPE_P (type))
2244 /* Comparisons with 0 are often used for booleans and there is
2245 nothing useful to predict about them. */
2246 else if (integer_zerop (op0)
2247 || integer_zerop (op1))
2249 else
2250 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2251 break;
2253 case ORDERED_EXPR:
2254 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2255 break;
2257 case UNORDERED_EXPR:
2258 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2259 break;
2261 case LE_EXPR:
2262 case LT_EXPR:
2263 if (integer_zerop (op1)
2264 || integer_onep (op1)
2265 || integer_all_onesp (op1)
2266 || real_zerop (op1)
2267 || real_onep (op1)
2268 || real_minus_onep (op1))
2269 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2270 break;
2272 case GE_EXPR:
2273 case GT_EXPR:
2274 if (integer_zerop (op1)
2275 || integer_onep (op1)
2276 || integer_all_onesp (op1)
2277 || real_zerop (op1)
2278 || real_onep (op1)
2279 || real_minus_onep (op1))
2280 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2281 break;
2283 default:
2284 break;
2288 /* Try to guess whether the value of return means error code. */
2290 static enum br_predictor
2291 return_prediction (tree val, enum prediction *prediction)
2293 /* VOID. */
2294 if (!val)
2295 return PRED_NO_PREDICTION;
2296 /* Different heuristics for pointers and scalars. */
2297 if (POINTER_TYPE_P (TREE_TYPE (val)))
2299 /* NULL is usually not returned. */
2300 if (integer_zerop (val))
2302 *prediction = NOT_TAKEN;
2303 return PRED_NULL_RETURN;
2306 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2308 /* Negative return values are often used to indicate
2309 errors. */
2310 if (TREE_CODE (val) == INTEGER_CST
2311 && tree_int_cst_sgn (val) < 0)
2313 *prediction = NOT_TAKEN;
2314 return PRED_NEGATIVE_RETURN;
2316 /* Constant return values seems to be commonly taken.
2317 Zero/one often represent booleans so exclude them from the
2318 heuristics. */
2319 if (TREE_CONSTANT (val)
2320 && (!integer_zerop (val) && !integer_onep (val)))
2322 *prediction = NOT_TAKEN;
2323 return PRED_CONST_RETURN;
2326 return PRED_NO_PREDICTION;
2329 /* Find the basic block with return expression and look up for possible
2330 return value trying to apply RETURN_PREDICTION heuristics. */
2331 static void
2332 apply_return_prediction (void)
2334 greturn *return_stmt = NULL;
2335 tree return_val;
2336 edge e;
2337 gphi *phi;
2338 int phi_num_args, i;
2339 enum br_predictor pred;
2340 enum prediction direction;
2341 edge_iterator ei;
2343 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2345 gimple *last = last_stmt (e->src);
2346 if (last
2347 && gimple_code (last) == GIMPLE_RETURN)
2349 return_stmt = as_a <greturn *> (last);
2350 break;
2353 if (!e)
2354 return;
2355 return_val = gimple_return_retval (return_stmt);
2356 if (!return_val)
2357 return;
2358 if (TREE_CODE (return_val) != SSA_NAME
2359 || !SSA_NAME_DEF_STMT (return_val)
2360 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2361 return;
2362 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2363 phi_num_args = gimple_phi_num_args (phi);
2364 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2366 /* Avoid the degenerate case where all return values form the function
2367 belongs to same category (ie they are all positive constants)
2368 so we can hardly say something about them. */
2369 for (i = 1; i < phi_num_args; i++)
2370 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2371 break;
2372 if (i != phi_num_args)
2373 for (i = 0; i < phi_num_args; i++)
2375 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2376 if (pred != PRED_NO_PREDICTION)
2377 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2378 direction);
2382 /* Look for basic block that contains unlikely to happen events
2383 (such as noreturn calls) and mark all paths leading to execution
2384 of this basic blocks as unlikely. */
2386 static void
2387 tree_bb_level_predictions (void)
2389 basic_block bb;
2390 bool has_return_edges = false;
2391 edge e;
2392 edge_iterator ei;
2394 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2395 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2397 has_return_edges = true;
2398 break;
2401 apply_return_prediction ();
2403 FOR_EACH_BB_FN (bb, cfun)
2405 gimple_stmt_iterator gsi;
2407 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2409 gimple *stmt = gsi_stmt (gsi);
2410 tree decl;
2412 if (is_gimple_call (stmt))
2414 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2415 && has_return_edges)
2416 predict_paths_leading_to (bb, PRED_NORETURN,
2417 NOT_TAKEN);
2418 decl = gimple_call_fndecl (stmt);
2419 if (decl
2420 && lookup_attribute ("cold",
2421 DECL_ATTRIBUTES (decl)))
2422 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2423 NOT_TAKEN);
2425 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2427 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2428 gimple_predict_outcome (stmt));
2429 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2430 hints to callers. */
2436 /* Callback for hash_map::traverse, asserts that the pointer map is
2437 empty. */
2439 bool
2440 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2441 void *)
2443 gcc_assert (!value);
2444 return false;
2447 /* Predict branch probabilities and estimate profile for basic block BB. */
2449 static void
2450 tree_estimate_probability_bb (basic_block bb)
2452 edge e;
2453 edge_iterator ei;
2454 gimple *last;
2456 FOR_EACH_EDGE (e, ei, bb->succs)
2458 /* Predict edges to user labels with attributes. */
2459 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2461 gimple_stmt_iterator gi;
2462 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2464 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2465 tree decl;
2467 if (!label_stmt)
2468 break;
2469 decl = gimple_label_label (label_stmt);
2470 if (DECL_ARTIFICIAL (decl))
2471 continue;
2473 /* Finally, we have a user-defined label. */
2474 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2475 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2476 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2477 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2481 /* Predict early returns to be probable, as we've already taken
2482 care for error returns and other cases are often used for
2483 fast paths through function.
2485 Since we've already removed the return statements, we are
2486 looking for CFG like:
2488 if (conditional)
2491 goto return_block
2493 some other blocks
2494 return_block:
2495 return_stmt. */
2496 if (e->dest != bb->next_bb
2497 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2498 && single_succ_p (e->dest)
2499 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2500 && (last = last_stmt (e->dest)) != NULL
2501 && gimple_code (last) == GIMPLE_RETURN)
2503 edge e1;
2504 edge_iterator ei1;
2506 if (single_succ_p (bb))
2508 FOR_EACH_EDGE (e1, ei1, bb->preds)
2509 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2510 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2511 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2512 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2514 else
2515 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2516 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2517 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2518 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2521 /* Look for block we are guarding (ie we dominate it,
2522 but it doesn't postdominate us). */
2523 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2524 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2525 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2527 gimple_stmt_iterator bi;
2529 /* The call heuristic claims that a guarded function call
2530 is improbable. This is because such calls are often used
2531 to signal exceptional situations such as printing error
2532 messages. */
2533 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2534 gsi_next (&bi))
2536 gimple *stmt = gsi_stmt (bi);
2537 if (is_gimple_call (stmt)
2538 /* Constant and pure calls are hardly used to signalize
2539 something exceptional. */
2540 && gimple_has_side_effects (stmt))
2542 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2543 break;
2548 tree_predict_by_opcode (bb);
2551 /* Predict branch probabilities and estimate profile of the tree CFG.
2552 This function can be called from the loop optimizers to recompute
2553 the profile information.
2554 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2556 void
2557 tree_estimate_probability (bool dry_run)
2559 basic_block bb;
2561 add_noreturn_fake_exit_edges ();
2562 connect_infinite_loops_to_exit ();
2563 /* We use loop_niter_by_eval, which requires that the loops have
2564 preheaders. */
2565 create_preheaders (CP_SIMPLE_PREHEADERS);
2566 calculate_dominance_info (CDI_POST_DOMINATORS);
2568 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2569 tree_bb_level_predictions ();
2570 record_loop_exits ();
2572 if (number_of_loops (cfun) > 1)
2573 predict_loops ();
2575 FOR_EACH_BB_FN (bb, cfun)
2576 tree_estimate_probability_bb (bb);
2578 FOR_EACH_BB_FN (bb, cfun)
2579 combine_predictions_for_bb (bb, dry_run);
2581 if (flag_checking)
2582 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2584 delete bb_predictions;
2585 bb_predictions = NULL;
2587 if (!dry_run)
2588 estimate_bb_frequencies (false);
2589 free_dominance_info (CDI_POST_DOMINATORS);
2590 remove_fake_exit_edges ();
2593 /* Predict edges to successors of CUR whose sources are not postdominated by
2594 BB by PRED and recurse to all postdominators. */
2596 static void
2597 predict_paths_for_bb (basic_block cur, basic_block bb,
2598 enum br_predictor pred,
2599 enum prediction taken,
2600 bitmap visited)
2602 edge e;
2603 edge_iterator ei;
2604 basic_block son;
2606 /* We are looking for all edges forming edge cut induced by
2607 set of all blocks postdominated by BB. */
2608 FOR_EACH_EDGE (e, ei, cur->preds)
2609 if (e->src->index >= NUM_FIXED_BLOCKS
2610 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2612 edge e2;
2613 edge_iterator ei2;
2614 bool found = false;
2616 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2617 if (e->flags & (EDGE_EH | EDGE_FAKE))
2618 continue;
2619 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2621 /* See if there is an edge from e->src that is not abnormal
2622 and does not lead to BB. */
2623 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2624 if (e2 != e
2625 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2626 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2628 found = true;
2629 break;
2632 /* If there is non-abnormal path leaving e->src, predict edge
2633 using predictor. Otherwise we need to look for paths
2634 leading to e->src.
2636 The second may lead to infinite loop in the case we are predicitng
2637 regions that are only reachable by abnormal edges. We simply
2638 prevent visiting given BB twice. */
2639 if (found)
2641 if (!edge_predicted_by_p (e, pred, taken))
2642 predict_edge_def (e, pred, taken);
2644 else if (bitmap_set_bit (visited, e->src->index))
2645 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2647 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2648 son;
2649 son = next_dom_son (CDI_POST_DOMINATORS, son))
2650 predict_paths_for_bb (son, bb, pred, taken, visited);
2653 /* Sets branch probabilities according to PREDiction and
2654 FLAGS. */
2656 static void
2657 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2658 enum prediction taken)
2660 bitmap visited = BITMAP_ALLOC (NULL);
2661 predict_paths_for_bb (bb, bb, pred, taken, visited);
2662 BITMAP_FREE (visited);
2665 /* Like predict_paths_leading_to but take edge instead of basic block. */
2667 static void
2668 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2669 enum prediction taken)
2671 bool has_nonloop_edge = false;
2672 edge_iterator ei;
2673 edge e2;
2675 basic_block bb = e->src;
2676 FOR_EACH_EDGE (e2, ei, bb->succs)
2677 if (e2->dest != e->src && e2->dest != e->dest
2678 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2679 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2681 has_nonloop_edge = true;
2682 break;
2684 if (!has_nonloop_edge)
2686 bitmap visited = BITMAP_ALLOC (NULL);
2687 predict_paths_for_bb (bb, bb, pred, taken, visited);
2688 BITMAP_FREE (visited);
2690 else
2691 predict_edge_def (e, pred, taken);
2694 /* This is used to carry information about basic blocks. It is
2695 attached to the AUX field of the standard CFG block. */
2697 struct block_info
2699 /* Estimated frequency of execution of basic_block. */
2700 sreal frequency;
2702 /* To keep queue of basic blocks to process. */
2703 basic_block next;
2705 /* Number of predecessors we need to visit first. */
2706 int npredecessors;
2709 /* Similar information for edges. */
2710 struct edge_prob_info
2712 /* In case edge is a loopback edge, the probability edge will be reached
2713 in case header is. Estimated number of iterations of the loop can be
2714 then computed as 1 / (1 - back_edge_prob). */
2715 sreal back_edge_prob;
2716 /* True if the edge is a loopback edge in the natural loop. */
2717 unsigned int back_edge:1;
2720 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2721 #undef EDGE_INFO
2722 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2724 /* Helper function for estimate_bb_frequencies.
2725 Propagate the frequencies in blocks marked in
2726 TOVISIT, starting in HEAD. */
2728 static void
2729 propagate_freq (basic_block head, bitmap tovisit)
2731 basic_block bb;
2732 basic_block last;
2733 unsigned i;
2734 edge e;
2735 basic_block nextbb;
2736 bitmap_iterator bi;
2738 /* For each basic block we need to visit count number of his predecessors
2739 we need to visit first. */
2740 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2742 edge_iterator ei;
2743 int count = 0;
2745 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2747 FOR_EACH_EDGE (e, ei, bb->preds)
2749 bool visit = bitmap_bit_p (tovisit, e->src->index);
2751 if (visit && !(e->flags & EDGE_DFS_BACK))
2752 count++;
2753 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2754 fprintf (dump_file,
2755 "Irreducible region hit, ignoring edge to %i->%i\n",
2756 e->src->index, bb->index);
2758 BLOCK_INFO (bb)->npredecessors = count;
2759 /* When function never returns, we will never process exit block. */
2760 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2761 bb->count = bb->frequency = 0;
2764 BLOCK_INFO (head)->frequency = 1;
2765 last = head;
2766 for (bb = head; bb; bb = nextbb)
2768 edge_iterator ei;
2769 sreal cyclic_probability = 0;
2770 sreal frequency = 0;
2772 nextbb = BLOCK_INFO (bb)->next;
2773 BLOCK_INFO (bb)->next = NULL;
2775 /* Compute frequency of basic block. */
2776 if (bb != head)
2778 if (flag_checking)
2779 FOR_EACH_EDGE (e, ei, bb->preds)
2780 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2781 || (e->flags & EDGE_DFS_BACK));
2783 FOR_EACH_EDGE (e, ei, bb->preds)
2784 if (EDGE_INFO (e)->back_edge)
2786 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2788 else if (!(e->flags & EDGE_DFS_BACK))
2790 /* frequency += (e->probability
2791 * BLOCK_INFO (e->src)->frequency /
2792 REG_BR_PROB_BASE); */
2794 sreal tmp = e->probability;
2795 tmp *= BLOCK_INFO (e->src)->frequency;
2796 tmp *= real_inv_br_prob_base;
2797 frequency += tmp;
2800 if (cyclic_probability == 0)
2802 BLOCK_INFO (bb)->frequency = frequency;
2804 else
2806 if (cyclic_probability > real_almost_one)
2807 cyclic_probability = real_almost_one;
2809 /* BLOCK_INFO (bb)->frequency = frequency
2810 / (1 - cyclic_probability) */
2812 cyclic_probability = sreal (1) - cyclic_probability;
2813 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2817 bitmap_clear_bit (tovisit, bb->index);
2819 e = find_edge (bb, head);
2820 if (e)
2822 /* EDGE_INFO (e)->back_edge_prob
2823 = ((e->probability * BLOCK_INFO (bb)->frequency)
2824 / REG_BR_PROB_BASE); */
2826 sreal tmp = e->probability;
2827 tmp *= BLOCK_INFO (bb)->frequency;
2828 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2831 /* Propagate to successor blocks. */
2832 FOR_EACH_EDGE (e, ei, bb->succs)
2833 if (!(e->flags & EDGE_DFS_BACK)
2834 && BLOCK_INFO (e->dest)->npredecessors)
2836 BLOCK_INFO (e->dest)->npredecessors--;
2837 if (!BLOCK_INFO (e->dest)->npredecessors)
2839 if (!nextbb)
2840 nextbb = e->dest;
2841 else
2842 BLOCK_INFO (last)->next = e->dest;
2844 last = e->dest;
2850 /* Estimate frequencies in loops at same nest level. */
2852 static void
2853 estimate_loops_at_level (struct loop *first_loop)
2855 struct loop *loop;
2857 for (loop = first_loop; loop; loop = loop->next)
2859 edge e;
2860 basic_block *bbs;
2861 unsigned i;
2862 bitmap tovisit = BITMAP_ALLOC (NULL);
2864 estimate_loops_at_level (loop->inner);
2866 /* Find current loop back edge and mark it. */
2867 e = loop_latch_edge (loop);
2868 EDGE_INFO (e)->back_edge = 1;
2870 bbs = get_loop_body (loop);
2871 for (i = 0; i < loop->num_nodes; i++)
2872 bitmap_set_bit (tovisit, bbs[i]->index);
2873 free (bbs);
2874 propagate_freq (loop->header, tovisit);
2875 BITMAP_FREE (tovisit);
2879 /* Propagates frequencies through structure of loops. */
2881 static void
2882 estimate_loops (void)
2884 bitmap tovisit = BITMAP_ALLOC (NULL);
2885 basic_block bb;
2887 /* Start by estimating the frequencies in the loops. */
2888 if (number_of_loops (cfun) > 1)
2889 estimate_loops_at_level (current_loops->tree_root->inner);
2891 /* Now propagate the frequencies through all the blocks. */
2892 FOR_ALL_BB_FN (bb, cfun)
2894 bitmap_set_bit (tovisit, bb->index);
2896 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2897 BITMAP_FREE (tovisit);
2900 /* Drop the profile for NODE to guessed, and update its frequency based on
2901 whether it is expected to be hot given the CALL_COUNT. */
2903 static void
2904 drop_profile (struct cgraph_node *node, gcov_type call_count)
2906 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2907 /* In the case where this was called by another function with a
2908 dropped profile, call_count will be 0. Since there are no
2909 non-zero call counts to this function, we don't know for sure
2910 whether it is hot, and therefore it will be marked normal below. */
2911 bool hot = maybe_hot_count_p (NULL, call_count);
2913 if (dump_file)
2914 fprintf (dump_file,
2915 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2916 node->name (), node->order,
2917 hot ? "Function is hot" : "Function is normal");
2918 /* We only expect to miss profiles for functions that are reached
2919 via non-zero call edges in cases where the function may have
2920 been linked from another module or library (COMDATs and extern
2921 templates). See the comments below for handle_missing_profiles.
2922 Also, only warn in cases where the missing counts exceed the
2923 number of training runs. In certain cases with an execv followed
2924 by a no-return call the profile for the no-return call is not
2925 dumped and there can be a mismatch. */
2926 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2927 && call_count > profile_info->runs)
2929 if (flag_profile_correction)
2931 if (dump_file)
2932 fprintf (dump_file,
2933 "Missing counts for called function %s/%i\n",
2934 node->name (), node->order);
2936 else
2937 warning (0, "Missing counts for called function %s/%i",
2938 node->name (), node->order);
2941 profile_status_for_fn (fn)
2942 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2943 node->frequency
2944 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2947 /* In the case of COMDAT routines, multiple object files will contain the same
2948 function and the linker will select one for the binary. In that case
2949 all the other copies from the profile instrument binary will be missing
2950 profile counts. Look for cases where this happened, due to non-zero
2951 call counts going to 0-count functions, and drop the profile to guessed
2952 so that we can use the estimated probabilities and avoid optimizing only
2953 for size.
2955 The other case where the profile may be missing is when the routine
2956 is not going to be emitted to the object file, e.g. for "extern template"
2957 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2958 all other cases of non-zero calls to 0-count functions. */
2960 void
2961 handle_missing_profiles (void)
2963 struct cgraph_node *node;
2964 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2965 vec<struct cgraph_node *> worklist;
2966 worklist.create (64);
2968 /* See if 0 count function has non-0 count callers. In this case we
2969 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2970 FOR_EACH_DEFINED_FUNCTION (node)
2972 struct cgraph_edge *e;
2973 gcov_type call_count = 0;
2974 gcov_type max_tp_first_run = 0;
2975 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2977 if (node->count)
2978 continue;
2979 for (e = node->callers; e; e = e->next_caller)
2981 call_count += e->count;
2983 if (e->caller->tp_first_run > max_tp_first_run)
2984 max_tp_first_run = e->caller->tp_first_run;
2987 /* If time profile is missing, let assign the maximum that comes from
2988 caller functions. */
2989 if (!node->tp_first_run && max_tp_first_run)
2990 node->tp_first_run = max_tp_first_run + 1;
2992 if (call_count
2993 && fn && fn->cfg
2994 && (call_count * unlikely_count_fraction >= profile_info->runs))
2996 drop_profile (node, call_count);
2997 worklist.safe_push (node);
3001 /* Propagate the profile dropping to other 0-count COMDATs that are
3002 potentially called by COMDATs we already dropped the profile on. */
3003 while (worklist.length () > 0)
3005 struct cgraph_edge *e;
3007 node = worklist.pop ();
3008 for (e = node->callees; e; e = e->next_caller)
3010 struct cgraph_node *callee = e->callee;
3011 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3013 if (callee->count > 0)
3014 continue;
3015 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
3016 && profile_status_for_fn (fn) == PROFILE_READ)
3018 drop_profile (node, 0);
3019 worklist.safe_push (callee);
3023 worklist.release ();
3026 /* Convert counts measured by profile driven feedback to frequencies.
3027 Return nonzero iff there was any nonzero execution count. */
3030 counts_to_freqs (void)
3032 gcov_type count_max, true_count_max = 0;
3033 basic_block bb;
3035 /* Don't overwrite the estimated frequencies when the profile for
3036 the function is missing. We may drop this function PROFILE_GUESSED
3037 later in drop_profile (). */
3038 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
3039 return 0;
3041 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3042 true_count_max = MAX (bb->count, true_count_max);
3044 count_max = MAX (true_count_max, 1);
3045 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3046 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
3048 return true_count_max;
3051 /* Return true if function is likely to be expensive, so there is no point to
3052 optimize performance of prologue, epilogue or do inlining at the expense
3053 of code size growth. THRESHOLD is the limit of number of instructions
3054 function can execute at average to be still considered not expensive. */
3056 bool
3057 expensive_function_p (int threshold)
3059 unsigned int sum = 0;
3060 basic_block bb;
3061 unsigned int limit;
3063 /* We can not compute accurately for large thresholds due to scaled
3064 frequencies. */
3065 gcc_assert (threshold <= BB_FREQ_MAX);
3067 /* Frequencies are out of range. This either means that function contains
3068 internal loop executing more than BB_FREQ_MAX times or profile feedback
3069 is available and function has not been executed at all. */
3070 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
3071 return true;
3073 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
3074 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
3075 FOR_EACH_BB_FN (bb, cfun)
3077 rtx_insn *insn;
3079 FOR_BB_INSNS (bb, insn)
3080 if (active_insn_p (insn))
3082 sum += bb->frequency;
3083 if (sum > limit)
3084 return true;
3088 return false;
3091 /* Estimate and propagate basic block frequencies using the given branch
3092 probabilities. If FORCE is true, the frequencies are used to estimate
3093 the counts even when there are already non-zero profile counts. */
3095 void
3096 estimate_bb_frequencies (bool force)
3098 basic_block bb;
3099 sreal freq_max;
3101 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
3103 static int real_values_initialized = 0;
3105 if (!real_values_initialized)
3107 real_values_initialized = 1;
3108 real_br_prob_base = REG_BR_PROB_BASE;
3109 real_bb_freq_max = BB_FREQ_MAX;
3110 real_one_half = sreal (1, -1);
3111 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
3112 real_almost_one = sreal (1) - real_inv_br_prob_base;
3115 mark_dfs_back_edges ();
3117 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3118 REG_BR_PROB_BASE;
3120 /* Set up block info for each basic block. */
3121 alloc_aux_for_blocks (sizeof (block_info));
3122 alloc_aux_for_edges (sizeof (edge_prob_info));
3123 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3125 edge e;
3126 edge_iterator ei;
3128 FOR_EACH_EDGE (e, ei, bb->succs)
3130 EDGE_INFO (e)->back_edge_prob = e->probability;
3131 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
3135 /* First compute frequencies locally for each loop from innermost
3136 to outermost to examine frequencies for back edges. */
3137 estimate_loops ();
3139 freq_max = 0;
3140 FOR_EACH_BB_FN (bb, cfun)
3141 if (freq_max < BLOCK_INFO (bb)->frequency)
3142 freq_max = BLOCK_INFO (bb)->frequency;
3144 freq_max = real_bb_freq_max / freq_max;
3145 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3147 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
3148 bb->frequency = tmp.to_int ();
3151 free_aux_for_blocks ();
3152 free_aux_for_edges ();
3154 compute_function_frequency ();
3157 /* Decide whether function is hot, cold or unlikely executed. */
3158 void
3159 compute_function_frequency (void)
3161 basic_block bb;
3162 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3164 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3165 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3166 node->only_called_at_startup = true;
3167 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3168 node->only_called_at_exit = true;
3170 if (profile_status_for_fn (cfun) != PROFILE_READ)
3172 int flags = flags_from_decl_or_type (current_function_decl);
3173 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3174 != NULL)
3175 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3176 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3177 != NULL)
3178 node->frequency = NODE_FREQUENCY_HOT;
3179 else if (flags & ECF_NORETURN)
3180 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3181 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3182 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3183 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3184 || DECL_STATIC_DESTRUCTOR (current_function_decl))
3185 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3186 return;
3189 /* Only first time try to drop function into unlikely executed.
3190 After inlining the roundoff errors may confuse us.
3191 Ipa-profile pass will drop functions only called from unlikely
3192 functions to unlikely and that is most of what we care about. */
3193 if (!cfun->after_inlining)
3194 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3195 FOR_EACH_BB_FN (bb, cfun)
3197 if (maybe_hot_bb_p (cfun, bb))
3199 node->frequency = NODE_FREQUENCY_HOT;
3200 return;
3202 if (!probably_never_executed_bb_p (cfun, bb))
3203 node->frequency = NODE_FREQUENCY_NORMAL;
3207 /* Build PREDICT_EXPR. */
3208 tree
3209 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3211 tree t = build1 (PREDICT_EXPR, void_type_node,
3212 build_int_cst (integer_type_node, predictor));
3213 SET_PREDICT_EXPR_OUTCOME (t, taken);
3214 return t;
3217 const char *
3218 predictor_name (enum br_predictor predictor)
3220 return predictor_info[predictor].name;
3223 /* Predict branch probabilities and estimate profile of the tree CFG. */
3225 namespace {
3227 const pass_data pass_data_profile =
3229 GIMPLE_PASS, /* type */
3230 "profile_estimate", /* name */
3231 OPTGROUP_NONE, /* optinfo_flags */
3232 TV_BRANCH_PROB, /* tv_id */
3233 PROP_cfg, /* properties_required */
3234 0, /* properties_provided */
3235 0, /* properties_destroyed */
3236 0, /* todo_flags_start */
3237 0, /* todo_flags_finish */
3240 class pass_profile : public gimple_opt_pass
3242 public:
3243 pass_profile (gcc::context *ctxt)
3244 : gimple_opt_pass (pass_data_profile, ctxt)
3247 /* opt_pass methods: */
3248 virtual bool gate (function *) { return flag_guess_branch_prob; }
3249 virtual unsigned int execute (function *);
3251 }; // class pass_profile
3253 unsigned int
3254 pass_profile::execute (function *fun)
3256 unsigned nb_loops;
3258 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3259 return 0;
3261 loop_optimizer_init (LOOPS_NORMAL);
3262 if (dump_file && (dump_flags & TDF_DETAILS))
3263 flow_loops_dump (dump_file, NULL, 0);
3265 mark_irreducible_loops ();
3267 nb_loops = number_of_loops (fun);
3268 if (nb_loops > 1)
3269 scev_initialize ();
3271 tree_estimate_probability (false);
3273 if (nb_loops > 1)
3274 scev_finalize ();
3276 loop_optimizer_finalize ();
3277 if (dump_file && (dump_flags & TDF_DETAILS))
3278 gimple_dump_cfg (dump_file, dump_flags);
3279 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3280 profile_status_for_fn (fun) = PROFILE_GUESSED;
3281 return 0;
3284 } // anon namespace
3286 gimple_opt_pass *
3287 make_pass_profile (gcc::context *ctxt)
3289 return new pass_profile (ctxt);
3292 namespace {
3294 const pass_data pass_data_strip_predict_hints =
3296 GIMPLE_PASS, /* type */
3297 "*strip_predict_hints", /* name */
3298 OPTGROUP_NONE, /* optinfo_flags */
3299 TV_BRANCH_PROB, /* tv_id */
3300 PROP_cfg, /* properties_required */
3301 0, /* properties_provided */
3302 0, /* properties_destroyed */
3303 0, /* todo_flags_start */
3304 0, /* todo_flags_finish */
3307 class pass_strip_predict_hints : public gimple_opt_pass
3309 public:
3310 pass_strip_predict_hints (gcc::context *ctxt)
3311 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3314 /* opt_pass methods: */
3315 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3316 virtual unsigned int execute (function *);
3318 }; // class pass_strip_predict_hints
3320 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3321 we no longer need. */
3322 unsigned int
3323 pass_strip_predict_hints::execute (function *fun)
3325 basic_block bb;
3326 gimple *ass_stmt;
3327 tree var;
3328 bool changed = false;
3330 FOR_EACH_BB_FN (bb, fun)
3332 gimple_stmt_iterator bi;
3333 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3335 gimple *stmt = gsi_stmt (bi);
3337 if (gimple_code (stmt) == GIMPLE_PREDICT)
3339 gsi_remove (&bi, true);
3340 changed = true;
3341 continue;
3343 else if (is_gimple_call (stmt))
3345 tree fndecl = gimple_call_fndecl (stmt);
3347 if ((fndecl
3348 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3349 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3350 && gimple_call_num_args (stmt) == 2)
3351 || (gimple_call_internal_p (stmt)
3352 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3354 var = gimple_call_lhs (stmt);
3355 changed = true;
3356 if (var)
3358 ass_stmt
3359 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3360 gsi_replace (&bi, ass_stmt, true);
3362 else
3364 gsi_remove (&bi, true);
3365 continue;
3369 gsi_next (&bi);
3372 return changed ? TODO_cleanup_cfg : 0;
3375 } // anon namespace
3377 gimple_opt_pass *
3378 make_pass_strip_predict_hints (gcc::context *ctxt)
3380 return new pass_strip_predict_hints (ctxt);
3383 /* Rebuild function frequencies. Passes are in general expected to
3384 maintain profile by hand, however in some cases this is not possible:
3385 for example when inlining several functions with loops freuqencies might run
3386 out of scale and thus needs to be recomputed. */
3388 void
3389 rebuild_frequencies (void)
3391 timevar_push (TV_REBUILD_FREQUENCIES);
3393 /* When the max bb count in the function is small, there is a higher
3394 chance that there were truncation errors in the integer scaling
3395 of counts by inlining and other optimizations. This could lead
3396 to incorrect classification of code as being cold when it isn't.
3397 In that case, force the estimation of bb counts/frequencies from the
3398 branch probabilities, rather than computing frequencies from counts,
3399 which may also lead to frequencies incorrectly reduced to 0. There
3400 is less precision in the probabilities, so we only do this for small
3401 max counts. */
3402 gcov_type count_max = 0;
3403 basic_block bb;
3404 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3405 count_max = MAX (bb->count, count_max);
3407 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3408 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3409 && count_max < REG_BR_PROB_BASE/10))
3411 loop_optimizer_init (0);
3412 add_noreturn_fake_exit_edges ();
3413 mark_irreducible_loops ();
3414 connect_infinite_loops_to_exit ();
3415 estimate_bb_frequencies (true);
3416 remove_fake_exit_edges ();
3417 loop_optimizer_finalize ();
3419 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3420 counts_to_freqs ();
3421 else
3422 gcc_unreachable ();
3423 timevar_pop (TV_REBUILD_FREQUENCIES);
3426 /* Perform a dry run of the branch prediction pass and report comparsion of
3427 the predicted and real profile into the dump file. */
3429 void
3430 report_predictor_hitrates (void)
3432 unsigned nb_loops;
3434 loop_optimizer_init (LOOPS_NORMAL);
3435 if (dump_file && (dump_flags & TDF_DETAILS))
3436 flow_loops_dump (dump_file, NULL, 0);
3438 mark_irreducible_loops ();
3440 nb_loops = number_of_loops (cfun);
3441 if (nb_loops > 1)
3442 scev_initialize ();
3444 tree_estimate_probability (true);
3446 if (nb_loops > 1)
3447 scev_finalize ();
3449 loop_optimizer_finalize ();
3452 /* Force edge E to be cold.
3453 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3454 keep low probability to represent possible error in a guess. This is used
3455 i.e. in case we predict loop to likely iterate given number of times but
3456 we are not 100% sure.
3458 This function locally updates profile without attempt to keep global
3459 consistency which can not be reached in full generality without full profile
3460 rebuild from probabilities alone. Doing so is not necessarily a good idea
3461 because frequencies and counts may be more realistic then probabilities.
3463 In some cases (such as for elimination of early exits during full loop
3464 unrolling) the caller can ensure that profile will get consistent
3465 afterwards. */
3467 void
3468 force_edge_cold (edge e, bool impossible)
3470 gcov_type count_sum = 0;
3471 int prob_sum = 0;
3472 edge_iterator ei;
3473 edge e2;
3474 gcov_type old_count = e->count;
3475 int old_probability = e->probability;
3476 gcov_type gcov_scale = REG_BR_PROB_BASE;
3477 int prob_scale = REG_BR_PROB_BASE;
3479 /* If edge is already improbably or cold, just return. */
3480 if (e->probability <= impossible ? PROB_VERY_UNLIKELY : 0
3481 && (!impossible || !e->count))
3482 return;
3483 FOR_EACH_EDGE (e2, ei, e->src->succs)
3484 if (e2 != e)
3486 count_sum += e2->count;
3487 prob_sum += e2->probability;
3490 /* If there are other edges out of e->src, redistribute probabilitity
3491 there. */
3492 if (prob_sum)
3494 e->probability
3495 = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
3496 if (old_probability)
3497 e->count = RDIV (e->count * e->probability, old_probability);
3498 else
3499 e->count = MIN (e->count, impossible ? 0 : 1);
3501 if (count_sum)
3502 gcov_scale = RDIV ((count_sum + old_count - e->count) * REG_BR_PROB_BASE,
3503 count_sum);
3504 prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
3505 prob_sum);
3506 if (dump_file && (dump_flags & TDF_DETAILS))
3507 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
3508 "probability to other edges.\n",
3509 e->src->index, e->dest->index,
3510 impossible ? "imposisble" : "cold");
3511 FOR_EACH_EDGE (e2, ei, e->src->succs)
3512 if (e2 != e)
3514 e2->count = RDIV (e2->count * gcov_scale, REG_BR_PROB_BASE);
3515 e2->probability = RDIV (e2->probability * prob_scale,
3516 REG_BR_PROB_BASE);
3519 /* If all edges out of e->src are unlikely, the basic block itself
3520 is unlikely. */
3521 else
3523 e->probability = REG_BR_PROB_BASE;
3525 /* If we did not adjusting, the source basic block has no likely edeges
3526 leaving other direction. In that case force that bb cold, too.
3527 This in general is difficult task to do, but handle special case when
3528 BB has only one predecestor. This is common case when we are updating
3529 after loop transforms. */
3530 if (!prob_sum && !count_sum && single_pred_p (e->src)
3531 && e->src->frequency > (impossible ? 0 : 1))
3533 int old_frequency = e->src->frequency;
3534 if (dump_file && (dump_flags & TDF_DETAILS))
3535 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
3536 impossible ? "imposisble" : "cold");
3537 e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
3538 e->src->count = e->count = RDIV (e->src->count * e->src->frequency,
3539 old_frequency);
3540 force_edge_cold (single_pred_edge (e->src), impossible);
3542 else if (dump_file && (dump_flags & TDF_DETAILS)
3543 && maybe_hot_bb_p (cfun, e->src))
3544 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
3545 impossible ? "imposisble" : "cold");