Fix bootstrap/PR63632
[official-gcc.git] / gcc / predict.c
blobec0c16926d9953ff03a16c737d3118007499338f
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "tree.h"
35 #include "calls.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
41 #include "regs.h"
42 #include "flags.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "profile.h"
50 #include "except.h"
51 #include "diagnostic-core.h"
52 #include "recog.h"
53 #include "expr.h"
54 #include "predict.h"
55 #include "coverage.h"
56 #include "sreal.h"
57 #include "params.h"
58 #include "target.h"
59 #include "cfgloop.h"
60 #include "hash-map.h"
61 #include "tree-ssa-alias.h"
62 #include "internal-fn.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimple-iterator.h"
67 #include "gimple-ssa.h"
68 #include "cgraph.h"
69 #include "tree-cfg.h"
70 #include "tree-phinodes.h"
71 #include "ssa-iterators.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-ssa-loop.h"
74 #include "tree-pass.h"
75 #include "tree-scalar-evolution.h"
76 #include "cfgloop.h"
78 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
79 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
80 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
81 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
83 static void combine_predictions_for_insn (rtx_insn *, basic_block);
84 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
85 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
86 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
87 static bool can_predict_insn_p (const rtx_insn *);
89 /* Information we hold about each branch predictor.
90 Filled using information from predict.def. */
92 struct predictor_info
94 const char *const name; /* Name used in the debugging dumps. */
95 const int hitrate; /* Expected hitrate used by
96 predict_insn_def call. */
97 const int flags;
100 /* Use given predictor without Dempster-Shaffer theory if it matches
101 using first_match heuristics. */
102 #define PRED_FLAG_FIRST_MATCH 1
104 /* Recompute hitrate in percent to our representation. */
106 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
108 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
109 static const struct predictor_info predictor_info[]= {
110 #include "predict.def"
112 /* Upper bound on predictors. */
113 {NULL, 0, 0}
115 #undef DEF_PREDICTOR
117 /* Return TRUE if frequency FREQ is considered to be hot. */
119 static inline bool
120 maybe_hot_frequency_p (struct function *fun, int freq)
122 struct cgraph_node *node = cgraph_node::get (fun->decl);
123 if (!profile_info || !flag_branch_probabilities)
125 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
126 return false;
127 if (node->frequency == NODE_FREQUENCY_HOT)
128 return true;
130 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
131 return true;
132 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
133 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
134 return false;
135 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
136 return false;
137 if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
138 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
139 return false;
140 return true;
143 static gcov_type min_count = -1;
145 /* Determine the threshold for hot BB counts. */
147 gcov_type
148 get_hot_bb_threshold ()
150 gcov_working_set_t *ws;
151 if (min_count == -1)
153 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
154 gcc_assert (ws);
155 min_count = ws->min_counter;
157 return min_count;
160 /* Set the threshold for hot BB counts. */
162 void
163 set_hot_bb_threshold (gcov_type min)
165 min_count = min;
168 /* Return TRUE if frequency FREQ is considered to be hot. */
170 bool
171 maybe_hot_count_p (struct function *fun, gcov_type count)
173 if (fun && profile_status_for_fn (fun) != PROFILE_READ)
174 return true;
175 /* Code executed at most once is not hot. */
176 if (profile_info->runs >= count)
177 return false;
178 return (count >= get_hot_bb_threshold ());
181 /* Return true in case BB can be CPU intensive and should be optimized
182 for maximal performance. */
184 bool
185 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
187 gcc_checking_assert (fun);
188 if (profile_status_for_fn (fun) == PROFILE_READ)
189 return maybe_hot_count_p (fun, bb->count);
190 return maybe_hot_frequency_p (fun, bb->frequency);
193 /* Return true in case BB can be CPU intensive and should be optimized
194 for maximal performance. */
196 bool
197 maybe_hot_edge_p (edge e)
199 if (profile_status_for_fn (cfun) == PROFILE_READ)
200 return maybe_hot_count_p (cfun, e->count);
201 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
204 /* Return true if profile COUNT and FREQUENCY, or function FUN static
205 node frequency reflects never being executed. */
207 static bool
208 probably_never_executed (struct function *fun,
209 gcov_type count, int frequency)
211 gcc_checking_assert (fun);
212 if (profile_status_for_fn (cfun) == PROFILE_READ)
214 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
215 if (count * unlikely_count_fraction >= profile_info->runs)
216 return false;
217 if (!frequency)
218 return true;
219 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
220 return false;
221 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
223 gcov_type computed_count;
224 /* Check for possibility of overflow, in which case entry bb count
225 is large enough to do the division first without losing much
226 precision. */
227 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
228 REG_BR_PROB_BASE)
230 gcov_type scaled_count
231 = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
232 unlikely_count_fraction;
233 computed_count = RDIV (scaled_count,
234 ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
236 else
238 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
239 ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
240 computed_count *= frequency * unlikely_count_fraction;
242 if (computed_count >= profile_info->runs)
243 return false;
245 return true;
247 if ((!profile_info || !flag_branch_probabilities)
248 && (cgraph_node::get (fun->decl)->frequency
249 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
250 return true;
251 return false;
255 /* Return true in case BB is probably never executed. */
257 bool
258 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
260 return probably_never_executed (fun, bb->count, bb->frequency);
264 /* Return true in case edge E is probably never executed. */
266 bool
267 probably_never_executed_edge_p (struct function *fun, edge e)
269 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
272 /* Return true when current function should always be optimized for size. */
274 bool
275 optimize_function_for_size_p (struct function *fun)
277 if (optimize_size)
278 return true;
279 if (!fun || !fun->decl)
280 return false;
282 cgraph_node *n = cgraph_node::get (fun->decl);
283 return n && n->optimize_for_size_p ();
286 /* Return true when current function should always be optimized for speed. */
288 bool
289 optimize_function_for_speed_p (struct function *fun)
291 return !optimize_function_for_size_p (fun);
294 /* Return TRUE when BB should be optimized for size. */
296 bool
297 optimize_bb_for_size_p (const_basic_block bb)
299 return (optimize_function_for_size_p (cfun)
300 || (bb && !maybe_hot_bb_p (cfun, bb)));
303 /* Return TRUE when BB should be optimized for speed. */
305 bool
306 optimize_bb_for_speed_p (const_basic_block bb)
308 return !optimize_bb_for_size_p (bb);
311 /* Return TRUE when BB should be optimized for size. */
313 bool
314 optimize_edge_for_size_p (edge e)
316 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
319 /* Return TRUE when BB should be optimized for speed. */
321 bool
322 optimize_edge_for_speed_p (edge e)
324 return !optimize_edge_for_size_p (e);
327 /* Return TRUE when BB should be optimized for size. */
329 bool
330 optimize_insn_for_size_p (void)
332 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
335 /* Return TRUE when BB should be optimized for speed. */
337 bool
338 optimize_insn_for_speed_p (void)
340 return !optimize_insn_for_size_p ();
343 /* Return TRUE when LOOP should be optimized for size. */
345 bool
346 optimize_loop_for_size_p (struct loop *loop)
348 return optimize_bb_for_size_p (loop->header);
351 /* Return TRUE when LOOP should be optimized for speed. */
353 bool
354 optimize_loop_for_speed_p (struct loop *loop)
356 return optimize_bb_for_speed_p (loop->header);
359 /* Return TRUE when LOOP nest should be optimized for speed. */
361 bool
362 optimize_loop_nest_for_speed_p (struct loop *loop)
364 struct loop *l = loop;
365 if (optimize_loop_for_speed_p (loop))
366 return true;
367 l = loop->inner;
368 while (l && l != loop)
370 if (optimize_loop_for_speed_p (l))
371 return true;
372 if (l->inner)
373 l = l->inner;
374 else if (l->next)
375 l = l->next;
376 else
378 while (l != loop && !l->next)
379 l = loop_outer (l);
380 if (l != loop)
381 l = l->next;
384 return false;
387 /* Return TRUE when LOOP nest should be optimized for size. */
389 bool
390 optimize_loop_nest_for_size_p (struct loop *loop)
392 return !optimize_loop_nest_for_speed_p (loop);
395 /* Return true when edge E is likely to be well predictable by branch
396 predictor. */
398 bool
399 predictable_edge_p (edge e)
401 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
402 return false;
403 if ((e->probability
404 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
405 || (REG_BR_PROB_BASE - e->probability
406 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
407 return true;
408 return false;
412 /* Set RTL expansion for BB profile. */
414 void
415 rtl_profile_for_bb (basic_block bb)
417 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
420 /* Set RTL expansion for edge profile. */
422 void
423 rtl_profile_for_edge (edge e)
425 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
428 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
429 void
430 default_rtl_profile (void)
432 crtl->maybe_hot_insn_p = true;
435 /* Return true if the one of outgoing edges is already predicted by
436 PREDICTOR. */
438 bool
439 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
441 rtx note;
442 if (!INSN_P (BB_END (bb)))
443 return false;
444 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
445 if (REG_NOTE_KIND (note) == REG_BR_PRED
446 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
447 return true;
448 return false;
451 /* Structure representing predictions in tree level. */
453 struct edge_prediction {
454 struct edge_prediction *ep_next;
455 edge ep_edge;
456 enum br_predictor ep_predictor;
457 int ep_probability;
460 /* This map contains for a basic block the list of predictions for the
461 outgoing edges. */
463 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
465 /* Return true if the one of outgoing edges is already predicted by
466 PREDICTOR. */
468 bool
469 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
471 struct edge_prediction *i;
472 edge_prediction **preds = bb_predictions->get (bb);
474 if (!preds)
475 return false;
477 for (i = *preds; i; i = i->ep_next)
478 if (i->ep_predictor == predictor)
479 return true;
480 return false;
483 /* Return true when the probability of edge is reliable.
485 The profile guessing code is good at predicting branch outcome (ie.
486 taken/not taken), that is predicted right slightly over 75% of time.
487 It is however notoriously poor on predicting the probability itself.
488 In general the profile appear a lot flatter (with probabilities closer
489 to 50%) than the reality so it is bad idea to use it to drive optimization
490 such as those disabling dynamic branch prediction for well predictable
491 branches.
493 There are two exceptions - edges leading to noreturn edges and edges
494 predicted by number of iterations heuristics are predicted well. This macro
495 should be able to distinguish those, but at the moment it simply check for
496 noreturn heuristic that is only one giving probability over 99% or bellow
497 1%. In future we might want to propagate reliability information across the
498 CFG if we find this information useful on multiple places. */
499 static bool
500 probability_reliable_p (int prob)
502 return (profile_status_for_fn (cfun) == PROFILE_READ
503 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
504 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
507 /* Same predicate as above, working on edges. */
508 bool
509 edge_probability_reliable_p (const_edge e)
511 return probability_reliable_p (e->probability);
514 /* Same predicate as edge_probability_reliable_p, working on notes. */
515 bool
516 br_prob_note_reliable_p (const_rtx note)
518 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
519 return probability_reliable_p (XINT (note, 0));
522 static void
523 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
525 gcc_assert (any_condjump_p (insn));
526 if (!flag_guess_branch_prob)
527 return;
529 add_reg_note (insn, REG_BR_PRED,
530 gen_rtx_CONCAT (VOIDmode,
531 GEN_INT ((int) predictor),
532 GEN_INT ((int) probability)));
535 /* Predict insn by given predictor. */
537 void
538 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
539 enum prediction taken)
541 int probability = predictor_info[(int) predictor].hitrate;
543 if (taken != TAKEN)
544 probability = REG_BR_PROB_BASE - probability;
546 predict_insn (insn, predictor, probability);
549 /* Predict edge E with given probability if possible. */
551 void
552 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
554 rtx_insn *last_insn;
555 last_insn = BB_END (e->src);
557 /* We can store the branch prediction information only about
558 conditional jumps. */
559 if (!any_condjump_p (last_insn))
560 return;
562 /* We always store probability of branching. */
563 if (e->flags & EDGE_FALLTHRU)
564 probability = REG_BR_PROB_BASE - probability;
566 predict_insn (last_insn, predictor, probability);
569 /* Predict edge E with the given PROBABILITY. */
570 void
571 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
573 gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
574 if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
576 && flag_guess_branch_prob && optimize)
578 struct edge_prediction *i = XNEW (struct edge_prediction);
579 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
581 i->ep_next = preds;
582 preds = i;
583 i->ep_probability = probability;
584 i->ep_predictor = predictor;
585 i->ep_edge = e;
589 /* Remove all predictions on given basic block that are attached
590 to edge E. */
591 void
592 remove_predictions_associated_with_edge (edge e)
594 if (!bb_predictions)
595 return;
597 edge_prediction **preds = bb_predictions->get (e->src);
599 if (preds)
601 struct edge_prediction **prediction = preds;
602 struct edge_prediction *next;
604 while (*prediction)
606 if ((*prediction)->ep_edge == e)
608 next = (*prediction)->ep_next;
609 free (*prediction);
610 *prediction = next;
612 else
613 prediction = &((*prediction)->ep_next);
618 /* Clears the list of predictions stored for BB. */
620 static void
621 clear_bb_predictions (basic_block bb)
623 edge_prediction **preds = bb_predictions->get (bb);
624 struct edge_prediction *pred, *next;
626 if (!preds)
627 return;
629 for (pred = *preds; pred; pred = next)
631 next = pred->ep_next;
632 free (pred);
634 *preds = NULL;
637 /* Return true when we can store prediction on insn INSN.
638 At the moment we represent predictions only on conditional
639 jumps, not at computed jump or other complicated cases. */
640 static bool
641 can_predict_insn_p (const rtx_insn *insn)
643 return (JUMP_P (insn)
644 && any_condjump_p (insn)
645 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
648 /* Predict edge E by given predictor if possible. */
650 void
651 predict_edge_def (edge e, enum br_predictor predictor,
652 enum prediction taken)
654 int probability = predictor_info[(int) predictor].hitrate;
656 if (taken != TAKEN)
657 probability = REG_BR_PROB_BASE - probability;
659 predict_edge (e, predictor, probability);
662 /* Invert all branch predictions or probability notes in the INSN. This needs
663 to be done each time we invert the condition used by the jump. */
665 void
666 invert_br_probabilities (rtx insn)
668 rtx note;
670 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
671 if (REG_NOTE_KIND (note) == REG_BR_PROB)
672 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
673 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
674 XEXP (XEXP (note, 0), 1)
675 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
678 /* Dump information about the branch prediction to the output file. */
680 static void
681 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
682 basic_block bb, int used)
684 edge e;
685 edge_iterator ei;
687 if (!file)
688 return;
690 FOR_EACH_EDGE (e, ei, bb->succs)
691 if (! (e->flags & EDGE_FALLTHRU))
692 break;
694 fprintf (file, " %s heuristics%s: %.1f%%",
695 predictor_info[predictor].name,
696 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
698 if (bb->count)
700 fprintf (file, " exec %"PRId64, bb->count);
701 if (e)
703 fprintf (file, " hit %"PRId64, e->count);
704 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
708 fprintf (file, "\n");
711 /* We can not predict the probabilities of outgoing edges of bb. Set them
712 evenly and hope for the best. */
713 static void
714 set_even_probabilities (basic_block bb)
716 int nedges = 0;
717 edge e;
718 edge_iterator ei;
720 FOR_EACH_EDGE (e, ei, bb->succs)
721 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
722 nedges ++;
723 FOR_EACH_EDGE (e, ei, bb->succs)
724 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
725 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
726 else
727 e->probability = 0;
730 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
731 note if not already present. Remove now useless REG_BR_PRED notes. */
733 static void
734 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
736 rtx prob_note;
737 rtx *pnote;
738 rtx note;
739 int best_probability = PROB_EVEN;
740 enum br_predictor best_predictor = END_PREDICTORS;
741 int combined_probability = REG_BR_PROB_BASE / 2;
742 int d;
743 bool first_match = false;
744 bool found = false;
746 if (!can_predict_insn_p (insn))
748 set_even_probabilities (bb);
749 return;
752 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
753 pnote = &REG_NOTES (insn);
754 if (dump_file)
755 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
756 bb->index);
758 /* We implement "first match" heuristics and use probability guessed
759 by predictor with smallest index. */
760 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
761 if (REG_NOTE_KIND (note) == REG_BR_PRED)
763 enum br_predictor predictor = ((enum br_predictor)
764 INTVAL (XEXP (XEXP (note, 0), 0)));
765 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
767 found = true;
768 if (best_predictor > predictor)
769 best_probability = probability, best_predictor = predictor;
771 d = (combined_probability * probability
772 + (REG_BR_PROB_BASE - combined_probability)
773 * (REG_BR_PROB_BASE - probability));
775 /* Use FP math to avoid overflows of 32bit integers. */
776 if (d == 0)
777 /* If one probability is 0% and one 100%, avoid division by zero. */
778 combined_probability = REG_BR_PROB_BASE / 2;
779 else
780 combined_probability = (((double) combined_probability) * probability
781 * REG_BR_PROB_BASE / d + 0.5);
784 /* Decide which heuristic to use. In case we didn't match anything,
785 use no_prediction heuristic, in case we did match, use either
786 first match or Dempster-Shaffer theory depending on the flags. */
788 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
789 first_match = true;
791 if (!found)
792 dump_prediction (dump_file, PRED_NO_PREDICTION,
793 combined_probability, bb, true);
794 else
796 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
797 bb, !first_match);
798 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
799 bb, first_match);
802 if (first_match)
803 combined_probability = best_probability;
804 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
806 while (*pnote)
808 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
810 enum br_predictor predictor = ((enum br_predictor)
811 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
812 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
814 dump_prediction (dump_file, predictor, probability, bb,
815 !first_match || best_predictor == predictor);
816 *pnote = XEXP (*pnote, 1);
818 else
819 pnote = &XEXP (*pnote, 1);
822 if (!prob_note)
824 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
826 /* Save the prediction into CFG in case we are seeing non-degenerated
827 conditional jump. */
828 if (!single_succ_p (bb))
830 BRANCH_EDGE (bb)->probability = combined_probability;
831 FALLTHRU_EDGE (bb)->probability
832 = REG_BR_PROB_BASE - combined_probability;
835 else if (!single_succ_p (bb))
837 int prob = XINT (prob_note, 0);
839 BRANCH_EDGE (bb)->probability = prob;
840 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
842 else
843 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
846 /* Combine predictions into single probability and store them into CFG.
847 Remove now useless prediction entries. */
849 static void
850 combine_predictions_for_bb (basic_block bb)
852 int best_probability = PROB_EVEN;
853 enum br_predictor best_predictor = END_PREDICTORS;
854 int combined_probability = REG_BR_PROB_BASE / 2;
855 int d;
856 bool first_match = false;
857 bool found = false;
858 struct edge_prediction *pred;
859 int nedges = 0;
860 edge e, first = NULL, second = NULL;
861 edge_iterator ei;
863 FOR_EACH_EDGE (e, ei, bb->succs)
864 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
866 nedges ++;
867 if (first && !second)
868 second = e;
869 if (!first)
870 first = e;
873 /* When there is no successor or only one choice, prediction is easy.
875 We are lazy for now and predict only basic blocks with two outgoing
876 edges. It is possible to predict generic case too, but we have to
877 ignore first match heuristics and do more involved combining. Implement
878 this later. */
879 if (nedges != 2)
881 if (!bb->count)
882 set_even_probabilities (bb);
883 clear_bb_predictions (bb);
884 if (dump_file)
885 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
886 nedges, bb->index);
887 return;
890 if (dump_file)
891 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
893 edge_prediction **preds = bb_predictions->get (bb);
894 if (preds)
896 /* We implement "first match" heuristics and use probability guessed
897 by predictor with smallest index. */
898 for (pred = *preds; pred; pred = pred->ep_next)
900 enum br_predictor predictor = pred->ep_predictor;
901 int probability = pred->ep_probability;
903 if (pred->ep_edge != first)
904 probability = REG_BR_PROB_BASE - probability;
906 found = true;
907 /* First match heuristics would be widly confused if we predicted
908 both directions. */
909 if (best_predictor > predictor)
911 struct edge_prediction *pred2;
912 int prob = probability;
914 for (pred2 = (struct edge_prediction *) *preds;
915 pred2; pred2 = pred2->ep_next)
916 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
918 int probability2 = pred->ep_probability;
920 if (pred2->ep_edge != first)
921 probability2 = REG_BR_PROB_BASE - probability2;
923 if ((probability < REG_BR_PROB_BASE / 2) !=
924 (probability2 < REG_BR_PROB_BASE / 2))
925 break;
927 /* If the same predictor later gave better result, go for it! */
928 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
929 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
930 prob = probability2;
932 if (!pred2)
933 best_probability = prob, best_predictor = predictor;
936 d = (combined_probability * probability
937 + (REG_BR_PROB_BASE - combined_probability)
938 * (REG_BR_PROB_BASE - probability));
940 /* Use FP math to avoid overflows of 32bit integers. */
941 if (d == 0)
942 /* If one probability is 0% and one 100%, avoid division by zero. */
943 combined_probability = REG_BR_PROB_BASE / 2;
944 else
945 combined_probability = (((double) combined_probability)
946 * probability
947 * REG_BR_PROB_BASE / d + 0.5);
951 /* Decide which heuristic to use. In case we didn't match anything,
952 use no_prediction heuristic, in case we did match, use either
953 first match or Dempster-Shaffer theory depending on the flags. */
955 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
956 first_match = true;
958 if (!found)
959 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
960 else
962 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
963 !first_match);
964 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
965 first_match);
968 if (first_match)
969 combined_probability = best_probability;
970 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
972 if (preds)
974 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
976 enum br_predictor predictor = pred->ep_predictor;
977 int probability = pred->ep_probability;
979 if (pred->ep_edge != EDGE_SUCC (bb, 0))
980 probability = REG_BR_PROB_BASE - probability;
981 dump_prediction (dump_file, predictor, probability, bb,
982 !first_match || best_predictor == predictor);
985 clear_bb_predictions (bb);
987 if (!bb->count)
989 first->probability = combined_probability;
990 second->probability = REG_BR_PROB_BASE - combined_probability;
994 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
995 Return the SSA_NAME if the condition satisfies, NULL otherwise.
997 T1 and T2 should be one of the following cases:
998 1. T1 is SSA_NAME, T2 is NULL
999 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1000 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1002 static tree
1003 strips_small_constant (tree t1, tree t2)
1005 tree ret = NULL;
1006 int value = 0;
1008 if (!t1)
1009 return NULL;
1010 else if (TREE_CODE (t1) == SSA_NAME)
1011 ret = t1;
1012 else if (tree_fits_shwi_p (t1))
1013 value = tree_to_shwi (t1);
1014 else
1015 return NULL;
1017 if (!t2)
1018 return ret;
1019 else if (tree_fits_shwi_p (t2))
1020 value = tree_to_shwi (t2);
1021 else if (TREE_CODE (t2) == SSA_NAME)
1023 if (ret)
1024 return NULL;
1025 else
1026 ret = t2;
1029 if (value <= 4 && value >= -4)
1030 return ret;
1031 else
1032 return NULL;
1035 /* Return the SSA_NAME in T or T's operands.
1036 Return NULL if SSA_NAME cannot be found. */
1038 static tree
1039 get_base_value (tree t)
1041 if (TREE_CODE (t) == SSA_NAME)
1042 return t;
1044 if (!BINARY_CLASS_P (t))
1045 return NULL;
1047 switch (TREE_OPERAND_LENGTH (t))
1049 case 1:
1050 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1051 case 2:
1052 return strips_small_constant (TREE_OPERAND (t, 0),
1053 TREE_OPERAND (t, 1));
1054 default:
1055 return NULL;
1059 /* Check the compare STMT in LOOP. If it compares an induction
1060 variable to a loop invariant, return true, and save
1061 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1062 Otherwise return false and set LOOP_INVAIANT to NULL. */
1064 static bool
1065 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1066 tree *loop_invariant,
1067 enum tree_code *compare_code,
1068 tree *loop_step,
1069 tree *loop_iv_base)
1071 tree op0, op1, bound, base;
1072 affine_iv iv0, iv1;
1073 enum tree_code code;
1074 tree step;
1076 code = gimple_cond_code (stmt);
1077 *loop_invariant = NULL;
1079 switch (code)
1081 case GT_EXPR:
1082 case GE_EXPR:
1083 case NE_EXPR:
1084 case LT_EXPR:
1085 case LE_EXPR:
1086 case EQ_EXPR:
1087 break;
1089 default:
1090 return false;
1093 op0 = gimple_cond_lhs (stmt);
1094 op1 = gimple_cond_rhs (stmt);
1096 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1097 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1098 return false;
1099 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1100 return false;
1101 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1102 return false;
1103 if (TREE_CODE (iv0.step) != INTEGER_CST
1104 || TREE_CODE (iv1.step) != INTEGER_CST)
1105 return false;
1106 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1107 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1108 return false;
1110 if (integer_zerop (iv0.step))
1112 if (code != NE_EXPR && code != EQ_EXPR)
1113 code = invert_tree_comparison (code, false);
1114 bound = iv0.base;
1115 base = iv1.base;
1116 if (tree_fits_shwi_p (iv1.step))
1117 step = iv1.step;
1118 else
1119 return false;
1121 else
1123 bound = iv1.base;
1124 base = iv0.base;
1125 if (tree_fits_shwi_p (iv0.step))
1126 step = iv0.step;
1127 else
1128 return false;
1131 if (TREE_CODE (bound) != INTEGER_CST)
1132 bound = get_base_value (bound);
1133 if (!bound)
1134 return false;
1135 if (TREE_CODE (base) != INTEGER_CST)
1136 base = get_base_value (base);
1137 if (!base)
1138 return false;
1140 *loop_invariant = bound;
1141 *compare_code = code;
1142 *loop_step = step;
1143 *loop_iv_base = base;
1144 return true;
1147 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1149 static bool
1150 expr_coherent_p (tree t1, tree t2)
1152 gimple stmt;
1153 tree ssa_name_1 = NULL;
1154 tree ssa_name_2 = NULL;
1156 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1157 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1159 if (t1 == t2)
1160 return true;
1162 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1163 return true;
1164 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1165 return false;
1167 /* Check to see if t1 is expressed/defined with t2. */
1168 stmt = SSA_NAME_DEF_STMT (t1);
1169 gcc_assert (stmt != NULL);
1170 if (is_gimple_assign (stmt))
1172 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1173 if (ssa_name_1 && ssa_name_1 == t2)
1174 return true;
1177 /* Check to see if t2 is expressed/defined with t1. */
1178 stmt = SSA_NAME_DEF_STMT (t2);
1179 gcc_assert (stmt != NULL);
1180 if (is_gimple_assign (stmt))
1182 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1183 if (ssa_name_2 && ssa_name_2 == t1)
1184 return true;
1187 /* Compare if t1 and t2's def_stmts are identical. */
1188 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1189 return true;
1190 else
1191 return false;
1194 /* Predict branch probability of BB when BB contains a branch that compares
1195 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1196 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1198 E.g.
1199 for (int i = 0; i < bound; i++) {
1200 if (i < bound - 2)
1201 computation_1();
1202 else
1203 computation_2();
1206 In this loop, we will predict the branch inside the loop to be taken. */
1208 static void
1209 predict_iv_comparison (struct loop *loop, basic_block bb,
1210 tree loop_bound_var,
1211 tree loop_iv_base_var,
1212 enum tree_code loop_bound_code,
1213 int loop_bound_step)
1215 gimple stmt;
1216 tree compare_var, compare_base;
1217 enum tree_code compare_code;
1218 tree compare_step_var;
1219 edge then_edge;
1220 edge_iterator ei;
1222 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1223 || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1224 || predicted_by_p (bb, PRED_LOOP_EXIT))
1225 return;
1227 stmt = last_stmt (bb);
1228 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1229 return;
1230 if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1231 &compare_code,
1232 &compare_step_var,
1233 &compare_base))
1234 return;
1236 /* Find the taken edge. */
1237 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1238 if (then_edge->flags & EDGE_TRUE_VALUE)
1239 break;
1241 /* When comparing an IV to a loop invariant, NE is more likely to be
1242 taken while EQ is more likely to be not-taken. */
1243 if (compare_code == NE_EXPR)
1245 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1246 return;
1248 else if (compare_code == EQ_EXPR)
1250 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1251 return;
1254 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1255 return;
1257 /* If loop bound, base and compare bound are all constants, we can
1258 calculate the probability directly. */
1259 if (tree_fits_shwi_p (loop_bound_var)
1260 && tree_fits_shwi_p (compare_var)
1261 && tree_fits_shwi_p (compare_base))
1263 int probability;
1264 bool overflow, overall_overflow = false;
1265 widest_int compare_count, tem;
1267 /* (loop_bound - base) / compare_step */
1268 tem = wi::sub (wi::to_widest (loop_bound_var),
1269 wi::to_widest (compare_base), SIGNED, &overflow);
1270 overall_overflow |= overflow;
1271 widest_int loop_count = wi::div_trunc (tem,
1272 wi::to_widest (compare_step_var),
1273 SIGNED, &overflow);
1274 overall_overflow |= overflow;
1276 if (!wi::neg_p (wi::to_widest (compare_step_var))
1277 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1279 /* (loop_bound - compare_bound) / compare_step */
1280 tem = wi::sub (wi::to_widest (loop_bound_var),
1281 wi::to_widest (compare_var), SIGNED, &overflow);
1282 overall_overflow |= overflow;
1283 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1284 SIGNED, &overflow);
1285 overall_overflow |= overflow;
1287 else
1289 /* (compare_bound - base) / compare_step */
1290 tem = wi::sub (wi::to_widest (compare_var),
1291 wi::to_widest (compare_base), SIGNED, &overflow);
1292 overall_overflow |= overflow;
1293 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1294 SIGNED, &overflow);
1295 overall_overflow |= overflow;
1297 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1298 ++compare_count;
1299 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1300 ++loop_count;
1301 if (wi::neg_p (compare_count))
1302 compare_count = 0;
1303 if (wi::neg_p (loop_count))
1304 loop_count = 0;
1305 if (loop_count == 0)
1306 probability = 0;
1307 else if (wi::cmps (compare_count, loop_count) == 1)
1308 probability = REG_BR_PROB_BASE;
1309 else
1311 tem = compare_count * REG_BR_PROB_BASE;
1312 tem = wi::udiv_trunc (tem, loop_count);
1313 probability = tem.to_uhwi ();
1316 if (!overall_overflow)
1317 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1319 return;
1322 if (expr_coherent_p (loop_bound_var, compare_var))
1324 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1325 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1326 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1327 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1328 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1329 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1330 else if (loop_bound_code == NE_EXPR)
1332 /* If the loop backedge condition is "(i != bound)", we do
1333 the comparison based on the step of IV:
1334 * step < 0 : backedge condition is like (i > bound)
1335 * step > 0 : backedge condition is like (i < bound) */
1336 gcc_assert (loop_bound_step != 0);
1337 if (loop_bound_step > 0
1338 && (compare_code == LT_EXPR
1339 || compare_code == LE_EXPR))
1340 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1341 else if (loop_bound_step < 0
1342 && (compare_code == GT_EXPR
1343 || compare_code == GE_EXPR))
1344 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345 else
1346 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1348 else
1349 /* The branch is predicted not-taken if loop_bound_code is
1350 opposite with compare_code. */
1351 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1353 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1355 /* For cases like:
1356 for (i = s; i < h; i++)
1357 if (i > s + 2) ....
1358 The branch should be predicted taken. */
1359 if (loop_bound_step > 0
1360 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1361 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1362 else if (loop_bound_step < 0
1363 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1364 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1365 else
1366 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1371 exits are resulted from short-circuit conditions that will generate an
1372 if_tmp. E.g.:
1374 if (foo() || global > 10)
1375 break;
1377 This will be translated into:
1379 BB3:
1380 loop header...
1381 BB4:
1382 if foo() goto BB6 else goto BB5
1383 BB5:
1384 if global > 10 goto BB6 else goto BB7
1385 BB6:
1386 goto BB7
1387 BB7:
1388 iftmp = (PHI 0(BB5), 1(BB6))
1389 if iftmp == 1 goto BB8 else goto BB3
1390 BB8:
1391 outside of the loop...
1393 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1394 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1395 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1396 exits to predict them using PRED_LOOP_EXIT. */
1398 static void
1399 predict_extra_loop_exits (edge exit_edge)
1401 unsigned i;
1402 bool check_value_one;
1403 gimple phi_stmt;
1404 tree cmp_rhs, cmp_lhs;
1405 gimple cmp_stmt = last_stmt (exit_edge->src);
1407 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1408 return;
1409 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1410 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1411 if (!TREE_CONSTANT (cmp_rhs)
1412 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1413 return;
1414 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1415 return;
1417 /* If check_value_one is true, only the phi_args with value '1' will lead
1418 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1419 loop exit. */
1420 check_value_one = (((integer_onep (cmp_rhs))
1421 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1422 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1424 phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1425 if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1426 return;
1428 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1430 edge e1;
1431 edge_iterator ei;
1432 tree val = gimple_phi_arg_def (phi_stmt, i);
1433 edge e = gimple_phi_arg_edge (phi_stmt, i);
1435 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1436 continue;
1437 if ((check_value_one ^ integer_onep (val)) == 1)
1438 continue;
1439 if (EDGE_COUNT (e->src->succs) != 1)
1441 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1442 continue;
1445 FOR_EACH_EDGE (e1, ei, e->src->preds)
1446 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1450 /* Predict edge probabilities by exploiting loop structure. */
1452 static void
1453 predict_loops (void)
1455 struct loop *loop;
1457 /* Try to predict out blocks in a loop that are not part of a
1458 natural loop. */
1459 FOR_EACH_LOOP (loop, 0)
1461 basic_block bb, *bbs;
1462 unsigned j, n_exits;
1463 vec<edge> exits;
1464 struct tree_niter_desc niter_desc;
1465 edge ex;
1466 struct nb_iter_bound *nb_iter;
1467 enum tree_code loop_bound_code = ERROR_MARK;
1468 tree loop_bound_step = NULL;
1469 tree loop_bound_var = NULL;
1470 tree loop_iv_base = NULL;
1471 gimple stmt = NULL;
1473 exits = get_loop_exit_edges (loop);
1474 n_exits = exits.length ();
1475 if (!n_exits)
1477 exits.release ();
1478 continue;
1481 FOR_EACH_VEC_ELT (exits, j, ex)
1483 tree niter = NULL;
1484 HOST_WIDE_INT nitercst;
1485 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1486 int probability;
1487 enum br_predictor predictor;
1489 predict_extra_loop_exits (ex);
1491 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1492 niter = niter_desc.niter;
1493 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1494 niter = loop_niter_by_eval (loop, ex);
1496 if (TREE_CODE (niter) == INTEGER_CST)
1498 if (tree_fits_uhwi_p (niter)
1499 && max
1500 && compare_tree_int (niter, max - 1) == -1)
1501 nitercst = tree_to_uhwi (niter) + 1;
1502 else
1503 nitercst = max;
1504 predictor = PRED_LOOP_ITERATIONS;
1506 /* If we have just one exit and we can derive some information about
1507 the number of iterations of the loop from the statements inside
1508 the loop, use it to predict this exit. */
1509 else if (n_exits == 1)
1511 nitercst = estimated_stmt_executions_int (loop);
1512 if (nitercst < 0)
1513 continue;
1514 if (nitercst > max)
1515 nitercst = max;
1517 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1519 else
1520 continue;
1522 /* If the prediction for number of iterations is zero, do not
1523 predict the exit edges. */
1524 if (nitercst == 0)
1525 continue;
1527 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1528 predict_edge (ex, predictor, probability);
1530 exits.release ();
1532 /* Find information about loop bound variables. */
1533 for (nb_iter = loop->bounds; nb_iter;
1534 nb_iter = nb_iter->next)
1535 if (nb_iter->stmt
1536 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1538 stmt = nb_iter->stmt;
1539 break;
1541 if (!stmt && last_stmt (loop->header)
1542 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1543 stmt = last_stmt (loop->header);
1544 if (stmt)
1545 is_comparison_with_loop_invariant_p (stmt, loop,
1546 &loop_bound_var,
1547 &loop_bound_code,
1548 &loop_bound_step,
1549 &loop_iv_base);
1551 bbs = get_loop_body (loop);
1553 for (j = 0; j < loop->num_nodes; j++)
1555 int header_found = 0;
1556 edge e;
1557 edge_iterator ei;
1559 bb = bbs[j];
1561 /* Bypass loop heuristics on continue statement. These
1562 statements construct loops via "non-loop" constructs
1563 in the source language and are better to be handled
1564 separately. */
1565 if (predicted_by_p (bb, PRED_CONTINUE))
1566 continue;
1568 /* Loop branch heuristics - predict an edge back to a
1569 loop's head as taken. */
1570 if (bb == loop->latch)
1572 e = find_edge (loop->latch, loop->header);
1573 if (e)
1575 header_found = 1;
1576 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1580 /* Loop exit heuristics - predict an edge exiting the loop if the
1581 conditional has no loop header successors as not taken. */
1582 if (!header_found
1583 /* If we already used more reliable loop exit predictors, do not
1584 bother with PRED_LOOP_EXIT. */
1585 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1586 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1588 /* For loop with many exits we don't want to predict all exits
1589 with the pretty large probability, because if all exits are
1590 considered in row, the loop would be predicted to iterate
1591 almost never. The code to divide probability by number of
1592 exits is very rough. It should compute the number of exits
1593 taken in each patch through function (not the overall number
1594 of exits that might be a lot higher for loops with wide switch
1595 statements in them) and compute n-th square root.
1597 We limit the minimal probability by 2% to avoid
1598 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1599 as this was causing regression in perl benchmark containing such
1600 a wide loop. */
1602 int probability = ((REG_BR_PROB_BASE
1603 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1604 / n_exits);
1605 if (probability < HITRATE (2))
1606 probability = HITRATE (2);
1607 FOR_EACH_EDGE (e, ei, bb->succs)
1608 if (e->dest->index < NUM_FIXED_BLOCKS
1609 || !flow_bb_inside_loop_p (loop, e->dest))
1610 predict_edge (e, PRED_LOOP_EXIT, probability);
1612 if (loop_bound_var)
1613 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1614 loop_bound_code,
1615 tree_to_shwi (loop_bound_step));
1618 /* Free basic blocks from get_loop_body. */
1619 free (bbs);
1623 /* Attempt to predict probabilities of BB outgoing edges using local
1624 properties. */
1625 static void
1626 bb_estimate_probability_locally (basic_block bb)
1628 rtx_insn *last_insn = BB_END (bb);
1629 rtx cond;
1631 if (! can_predict_insn_p (last_insn))
1632 return;
1633 cond = get_condition (last_insn, NULL, false, false);
1634 if (! cond)
1635 return;
1637 /* Try "pointer heuristic."
1638 A comparison ptr == 0 is predicted as false.
1639 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1640 if (COMPARISON_P (cond)
1641 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1642 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1644 if (GET_CODE (cond) == EQ)
1645 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1646 else if (GET_CODE (cond) == NE)
1647 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1649 else
1651 /* Try "opcode heuristic."
1652 EQ tests are usually false and NE tests are usually true. Also,
1653 most quantities are positive, so we can make the appropriate guesses
1654 about signed comparisons against zero. */
1655 switch (GET_CODE (cond))
1657 case CONST_INT:
1658 /* Unconditional branch. */
1659 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1660 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1661 break;
1663 case EQ:
1664 case UNEQ:
1665 /* Floating point comparisons appears to behave in a very
1666 unpredictable way because of special role of = tests in
1667 FP code. */
1668 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1670 /* Comparisons with 0 are often used for booleans and there is
1671 nothing useful to predict about them. */
1672 else if (XEXP (cond, 1) == const0_rtx
1673 || XEXP (cond, 0) == const0_rtx)
1675 else
1676 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1677 break;
1679 case NE:
1680 case LTGT:
1681 /* Floating point comparisons appears to behave in a very
1682 unpredictable way because of special role of = tests in
1683 FP code. */
1684 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1686 /* Comparisons with 0 are often used for booleans and there is
1687 nothing useful to predict about them. */
1688 else if (XEXP (cond, 1) == const0_rtx
1689 || XEXP (cond, 0) == const0_rtx)
1691 else
1692 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1693 break;
1695 case ORDERED:
1696 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1697 break;
1699 case UNORDERED:
1700 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1701 break;
1703 case LE:
1704 case LT:
1705 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1706 || XEXP (cond, 1) == constm1_rtx)
1707 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1708 break;
1710 case GE:
1711 case GT:
1712 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1713 || XEXP (cond, 1) == constm1_rtx)
1714 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1715 break;
1717 default:
1718 break;
1722 /* Set edge->probability for each successor edge of BB. */
1723 void
1724 guess_outgoing_edge_probabilities (basic_block bb)
1726 bb_estimate_probability_locally (bb);
1727 combine_predictions_for_insn (BB_END (bb), bb);
1730 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1732 /* Helper function for expr_expected_value. */
1734 static tree
1735 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1736 tree op1, bitmap visited, enum br_predictor *predictor)
1738 gimple def;
1740 if (predictor)
1741 *predictor = PRED_UNCONDITIONAL;
1743 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1745 if (TREE_CONSTANT (op0))
1746 return op0;
1748 if (code != SSA_NAME)
1749 return NULL_TREE;
1751 def = SSA_NAME_DEF_STMT (op0);
1753 /* If we were already here, break the infinite cycle. */
1754 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1755 return NULL;
1757 if (gimple_code (def) == GIMPLE_PHI)
1759 /* All the arguments of the PHI node must have the same constant
1760 length. */
1761 int i, n = gimple_phi_num_args (def);
1762 tree val = NULL, new_val;
1764 for (i = 0; i < n; i++)
1766 tree arg = PHI_ARG_DEF (def, i);
1767 enum br_predictor predictor2;
1769 /* If this PHI has itself as an argument, we cannot
1770 determine the string length of this argument. However,
1771 if we can find an expected constant value for the other
1772 PHI args then we can still be sure that this is
1773 likely a constant. So be optimistic and just
1774 continue with the next argument. */
1775 if (arg == PHI_RESULT (def))
1776 continue;
1778 new_val = expr_expected_value (arg, visited, &predictor2);
1780 /* It is difficult to combine value predictors. Simply assume
1781 that later predictor is weaker and take its prediction. */
1782 if (predictor && *predictor < predictor2)
1783 *predictor = predictor2;
1784 if (!new_val)
1785 return NULL;
1786 if (!val)
1787 val = new_val;
1788 else if (!operand_equal_p (val, new_val, false))
1789 return NULL;
1791 return val;
1793 if (is_gimple_assign (def))
1795 if (gimple_assign_lhs (def) != op0)
1796 return NULL;
1798 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1799 gimple_assign_rhs1 (def),
1800 gimple_assign_rhs_code (def),
1801 gimple_assign_rhs2 (def),
1802 visited, predictor);
1805 if (is_gimple_call (def))
1807 tree decl = gimple_call_fndecl (def);
1808 if (!decl)
1810 if (gimple_call_internal_p (def)
1811 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1813 gcc_assert (gimple_call_num_args (def) == 3);
1814 tree val = gimple_call_arg (def, 0);
1815 if (TREE_CONSTANT (val))
1816 return val;
1817 if (predictor)
1819 tree val2 = gimple_call_arg (def, 2);
1820 gcc_assert (TREE_CODE (val2) == INTEGER_CST
1821 && tree_fits_uhwi_p (val2)
1822 && tree_to_uhwi (val2) < END_PREDICTORS);
1823 *predictor = (enum br_predictor) tree_to_uhwi (val2);
1825 return gimple_call_arg (def, 1);
1827 return NULL;
1829 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1830 switch (DECL_FUNCTION_CODE (decl))
1832 case BUILT_IN_EXPECT:
1834 tree val;
1835 if (gimple_call_num_args (def) != 2)
1836 return NULL;
1837 val = gimple_call_arg (def, 0);
1838 if (TREE_CONSTANT (val))
1839 return val;
1840 if (predictor)
1841 *predictor = PRED_BUILTIN_EXPECT;
1842 return gimple_call_arg (def, 1);
1845 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1846 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1847 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1848 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1849 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1850 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1851 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1852 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1853 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1854 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1855 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1856 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1857 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1858 /* Assume that any given atomic operation has low contention,
1859 and thus the compare-and-swap operation succeeds. */
1860 if (predictor)
1861 *predictor = PRED_COMPARE_AND_SWAP;
1862 return boolean_true_node;
1863 default:
1864 break;
1868 return NULL;
1871 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1873 tree res;
1874 enum br_predictor predictor2;
1875 op0 = expr_expected_value (op0, visited, predictor);
1876 if (!op0)
1877 return NULL;
1878 op1 = expr_expected_value (op1, visited, &predictor2);
1879 if (predictor && *predictor < predictor2)
1880 *predictor = predictor2;
1881 if (!op1)
1882 return NULL;
1883 res = fold_build2 (code, type, op0, op1);
1884 if (TREE_CONSTANT (res))
1885 return res;
1886 return NULL;
1888 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1890 tree res;
1891 op0 = expr_expected_value (op0, visited, predictor);
1892 if (!op0)
1893 return NULL;
1894 res = fold_build1 (code, type, op0);
1895 if (TREE_CONSTANT (res))
1896 return res;
1897 return NULL;
1899 return NULL;
1902 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1903 The function is used by builtin_expect branch predictor so the evidence
1904 must come from this construct and additional possible constant folding.
1906 We may want to implement more involved value guess (such as value range
1907 propagation based prediction), but such tricks shall go to new
1908 implementation. */
1910 static tree
1911 expr_expected_value (tree expr, bitmap visited,
1912 enum br_predictor *predictor)
1914 enum tree_code code;
1915 tree op0, op1;
1917 if (TREE_CONSTANT (expr))
1919 if (predictor)
1920 *predictor = PRED_UNCONDITIONAL;
1921 return expr;
1924 extract_ops_from_tree (expr, &code, &op0, &op1);
1925 return expr_expected_value_1 (TREE_TYPE (expr),
1926 op0, code, op1, visited, predictor);
1929 /* Predict using opcode of the last statement in basic block. */
1930 static void
1931 tree_predict_by_opcode (basic_block bb)
1933 gimple stmt = last_stmt (bb);
1934 edge then_edge;
1935 tree op0, op1;
1936 tree type;
1937 tree val;
1938 enum tree_code cmp;
1939 bitmap visited;
1940 edge_iterator ei;
1941 enum br_predictor predictor;
1943 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1944 return;
1945 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1946 if (then_edge->flags & EDGE_TRUE_VALUE)
1947 break;
1948 op0 = gimple_cond_lhs (stmt);
1949 op1 = gimple_cond_rhs (stmt);
1950 cmp = gimple_cond_code (stmt);
1951 type = TREE_TYPE (op0);
1952 visited = BITMAP_ALLOC (NULL);
1953 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1954 &predictor);
1955 BITMAP_FREE (visited);
1956 if (val && TREE_CODE (val) == INTEGER_CST)
1958 if (predictor == PRED_BUILTIN_EXPECT)
1960 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1962 gcc_assert (percent >= 0 && percent <= 100);
1963 if (integer_zerop (val))
1964 percent = 100 - percent;
1965 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1967 else
1968 predict_edge (then_edge, predictor,
1969 integer_zerop (val) ? NOT_TAKEN : TAKEN);
1971 /* Try "pointer heuristic."
1972 A comparison ptr == 0 is predicted as false.
1973 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1974 if (POINTER_TYPE_P (type))
1976 if (cmp == EQ_EXPR)
1977 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1978 else if (cmp == NE_EXPR)
1979 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1981 else
1983 /* Try "opcode heuristic."
1984 EQ tests are usually false and NE tests are usually true. Also,
1985 most quantities are positive, so we can make the appropriate guesses
1986 about signed comparisons against zero. */
1987 switch (cmp)
1989 case EQ_EXPR:
1990 case UNEQ_EXPR:
1991 /* Floating point comparisons appears to behave in a very
1992 unpredictable way because of special role of = tests in
1993 FP code. */
1994 if (FLOAT_TYPE_P (type))
1996 /* Comparisons with 0 are often used for booleans and there is
1997 nothing useful to predict about them. */
1998 else if (integer_zerop (op0) || integer_zerop (op1))
2000 else
2001 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2002 break;
2004 case NE_EXPR:
2005 case LTGT_EXPR:
2006 /* Floating point comparisons appears to behave in a very
2007 unpredictable way because of special role of = tests in
2008 FP code. */
2009 if (FLOAT_TYPE_P (type))
2011 /* Comparisons with 0 are often used for booleans and there is
2012 nothing useful to predict about them. */
2013 else if (integer_zerop (op0)
2014 || integer_zerop (op1))
2016 else
2017 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2018 break;
2020 case ORDERED_EXPR:
2021 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2022 break;
2024 case UNORDERED_EXPR:
2025 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2026 break;
2028 case LE_EXPR:
2029 case LT_EXPR:
2030 if (integer_zerop (op1)
2031 || integer_onep (op1)
2032 || integer_all_onesp (op1)
2033 || real_zerop (op1)
2034 || real_onep (op1)
2035 || real_minus_onep (op1))
2036 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2037 break;
2039 case GE_EXPR:
2040 case GT_EXPR:
2041 if (integer_zerop (op1)
2042 || integer_onep (op1)
2043 || integer_all_onesp (op1)
2044 || real_zerop (op1)
2045 || real_onep (op1)
2046 || real_minus_onep (op1))
2047 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2048 break;
2050 default:
2051 break;
2055 /* Try to guess whether the value of return means error code. */
2057 static enum br_predictor
2058 return_prediction (tree val, enum prediction *prediction)
2060 /* VOID. */
2061 if (!val)
2062 return PRED_NO_PREDICTION;
2063 /* Different heuristics for pointers and scalars. */
2064 if (POINTER_TYPE_P (TREE_TYPE (val)))
2066 /* NULL is usually not returned. */
2067 if (integer_zerop (val))
2069 *prediction = NOT_TAKEN;
2070 return PRED_NULL_RETURN;
2073 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2075 /* Negative return values are often used to indicate
2076 errors. */
2077 if (TREE_CODE (val) == INTEGER_CST
2078 && tree_int_cst_sgn (val) < 0)
2080 *prediction = NOT_TAKEN;
2081 return PRED_NEGATIVE_RETURN;
2083 /* Constant return values seems to be commonly taken.
2084 Zero/one often represent booleans so exclude them from the
2085 heuristics. */
2086 if (TREE_CONSTANT (val)
2087 && (!integer_zerop (val) && !integer_onep (val)))
2089 *prediction = TAKEN;
2090 return PRED_CONST_RETURN;
2093 return PRED_NO_PREDICTION;
2096 /* Find the basic block with return expression and look up for possible
2097 return value trying to apply RETURN_PREDICTION heuristics. */
2098 static void
2099 apply_return_prediction (void)
2101 gimple return_stmt = NULL;
2102 tree return_val;
2103 edge e;
2104 gimple phi;
2105 int phi_num_args, i;
2106 enum br_predictor pred;
2107 enum prediction direction;
2108 edge_iterator ei;
2110 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2112 return_stmt = last_stmt (e->src);
2113 if (return_stmt
2114 && gimple_code (return_stmt) == GIMPLE_RETURN)
2115 break;
2117 if (!e)
2118 return;
2119 return_val = gimple_return_retval (return_stmt);
2120 if (!return_val)
2121 return;
2122 if (TREE_CODE (return_val) != SSA_NAME
2123 || !SSA_NAME_DEF_STMT (return_val)
2124 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2125 return;
2126 phi = SSA_NAME_DEF_STMT (return_val);
2127 phi_num_args = gimple_phi_num_args (phi);
2128 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2130 /* Avoid the degenerate case where all return values form the function
2131 belongs to same category (ie they are all positive constants)
2132 so we can hardly say something about them. */
2133 for (i = 1; i < phi_num_args; i++)
2134 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2135 break;
2136 if (i != phi_num_args)
2137 for (i = 0; i < phi_num_args; i++)
2139 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2140 if (pred != PRED_NO_PREDICTION)
2141 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2142 direction);
2146 /* Look for basic block that contains unlikely to happen events
2147 (such as noreturn calls) and mark all paths leading to execution
2148 of this basic blocks as unlikely. */
2150 static void
2151 tree_bb_level_predictions (void)
2153 basic_block bb;
2154 bool has_return_edges = false;
2155 edge e;
2156 edge_iterator ei;
2158 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2159 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2161 has_return_edges = true;
2162 break;
2165 apply_return_prediction ();
2167 FOR_EACH_BB_FN (bb, cfun)
2169 gimple_stmt_iterator gsi;
2171 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2173 gimple stmt = gsi_stmt (gsi);
2174 tree decl;
2176 if (is_gimple_call (stmt))
2178 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2179 && has_return_edges)
2180 predict_paths_leading_to (bb, PRED_NORETURN,
2181 NOT_TAKEN);
2182 decl = gimple_call_fndecl (stmt);
2183 if (decl
2184 && lookup_attribute ("cold",
2185 DECL_ATTRIBUTES (decl)))
2186 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2187 NOT_TAKEN);
2189 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2191 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2192 gimple_predict_outcome (stmt));
2193 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2194 hints to callers. */
2200 #ifdef ENABLE_CHECKING
2202 /* Callback for hash_map::traverse, asserts that the pointer map is
2203 empty. */
2205 bool
2206 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2207 void *)
2209 gcc_assert (!value);
2210 return false;
2212 #endif
2214 /* Predict branch probabilities and estimate profile for basic block BB. */
2216 static void
2217 tree_estimate_probability_bb (basic_block bb)
2219 edge e;
2220 edge_iterator ei;
2221 gimple last;
2223 FOR_EACH_EDGE (e, ei, bb->succs)
2225 /* Predict edges to user labels with attributes. */
2226 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2228 gimple_stmt_iterator gi;
2229 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2231 gimple stmt = gsi_stmt (gi);
2232 tree decl;
2234 if (gimple_code (stmt) != GIMPLE_LABEL)
2235 break;
2236 decl = gimple_label_label (stmt);
2237 if (DECL_ARTIFICIAL (decl))
2238 continue;
2240 /* Finally, we have a user-defined label. */
2241 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2242 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2243 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2244 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2248 /* Predict early returns to be probable, as we've already taken
2249 care for error returns and other cases are often used for
2250 fast paths through function.
2252 Since we've already removed the return statements, we are
2253 looking for CFG like:
2255 if (conditional)
2258 goto return_block
2260 some other blocks
2261 return_block:
2262 return_stmt. */
2263 if (e->dest != bb->next_bb
2264 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2265 && single_succ_p (e->dest)
2266 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2267 && (last = last_stmt (e->dest)) != NULL
2268 && gimple_code (last) == GIMPLE_RETURN)
2270 edge e1;
2271 edge_iterator ei1;
2273 if (single_succ_p (bb))
2275 FOR_EACH_EDGE (e1, ei1, bb->preds)
2276 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2277 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2278 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2279 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2281 else
2282 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2283 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2284 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2285 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2288 /* Look for block we are guarding (ie we dominate it,
2289 but it doesn't postdominate us). */
2290 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2291 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2292 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2294 gimple_stmt_iterator bi;
2296 /* The call heuristic claims that a guarded function call
2297 is improbable. This is because such calls are often used
2298 to signal exceptional situations such as printing error
2299 messages. */
2300 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2301 gsi_next (&bi))
2303 gimple stmt = gsi_stmt (bi);
2304 if (is_gimple_call (stmt)
2305 /* Constant and pure calls are hardly used to signalize
2306 something exceptional. */
2307 && gimple_has_side_effects (stmt))
2309 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2310 break;
2315 tree_predict_by_opcode (bb);
2318 /* Predict branch probabilities and estimate profile of the tree CFG.
2319 This function can be called from the loop optimizers to recompute
2320 the profile information. */
2322 void
2323 tree_estimate_probability (void)
2325 basic_block bb;
2327 add_noreturn_fake_exit_edges ();
2328 connect_infinite_loops_to_exit ();
2329 /* We use loop_niter_by_eval, which requires that the loops have
2330 preheaders. */
2331 create_preheaders (CP_SIMPLE_PREHEADERS);
2332 calculate_dominance_info (CDI_POST_DOMINATORS);
2334 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2335 tree_bb_level_predictions ();
2336 record_loop_exits ();
2338 if (number_of_loops (cfun) > 1)
2339 predict_loops ();
2341 FOR_EACH_BB_FN (bb, cfun)
2342 tree_estimate_probability_bb (bb);
2344 FOR_EACH_BB_FN (bb, cfun)
2345 combine_predictions_for_bb (bb);
2347 #ifdef ENABLE_CHECKING
2348 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2349 #endif
2350 delete bb_predictions;
2351 bb_predictions = NULL;
2353 estimate_bb_frequencies (false);
2354 free_dominance_info (CDI_POST_DOMINATORS);
2355 remove_fake_exit_edges ();
2358 /* Predict edges to successors of CUR whose sources are not postdominated by
2359 BB by PRED and recurse to all postdominators. */
2361 static void
2362 predict_paths_for_bb (basic_block cur, basic_block bb,
2363 enum br_predictor pred,
2364 enum prediction taken,
2365 bitmap visited)
2367 edge e;
2368 edge_iterator ei;
2369 basic_block son;
2371 /* We are looking for all edges forming edge cut induced by
2372 set of all blocks postdominated by BB. */
2373 FOR_EACH_EDGE (e, ei, cur->preds)
2374 if (e->src->index >= NUM_FIXED_BLOCKS
2375 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2377 edge e2;
2378 edge_iterator ei2;
2379 bool found = false;
2381 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2382 if (e->flags & (EDGE_EH | EDGE_FAKE))
2383 continue;
2384 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2386 /* See if there is an edge from e->src that is not abnormal
2387 and does not lead to BB. */
2388 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2389 if (e2 != e
2390 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2391 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2393 found = true;
2394 break;
2397 /* If there is non-abnormal path leaving e->src, predict edge
2398 using predictor. Otherwise we need to look for paths
2399 leading to e->src.
2401 The second may lead to infinite loop in the case we are predicitng
2402 regions that are only reachable by abnormal edges. We simply
2403 prevent visiting given BB twice. */
2404 if (found)
2405 predict_edge_def (e, pred, taken);
2406 else if (bitmap_set_bit (visited, e->src->index))
2407 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2409 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2410 son;
2411 son = next_dom_son (CDI_POST_DOMINATORS, son))
2412 predict_paths_for_bb (son, bb, pred, taken, visited);
2415 /* Sets branch probabilities according to PREDiction and
2416 FLAGS. */
2418 static void
2419 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2420 enum prediction taken)
2422 bitmap visited = BITMAP_ALLOC (NULL);
2423 predict_paths_for_bb (bb, bb, pred, taken, visited);
2424 BITMAP_FREE (visited);
2427 /* Like predict_paths_leading_to but take edge instead of basic block. */
2429 static void
2430 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2431 enum prediction taken)
2433 bool has_nonloop_edge = false;
2434 edge_iterator ei;
2435 edge e2;
2437 basic_block bb = e->src;
2438 FOR_EACH_EDGE (e2, ei, bb->succs)
2439 if (e2->dest != e->src && e2->dest != e->dest
2440 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2441 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2443 has_nonloop_edge = true;
2444 break;
2446 if (!has_nonloop_edge)
2448 bitmap visited = BITMAP_ALLOC (NULL);
2449 predict_paths_for_bb (bb, bb, pred, taken, visited);
2450 BITMAP_FREE (visited);
2452 else
2453 predict_edge_def (e, pred, taken);
2456 /* This is used to carry information about basic blocks. It is
2457 attached to the AUX field of the standard CFG block. */
2459 struct block_info
2461 /* Estimated frequency of execution of basic_block. */
2462 sreal frequency;
2464 /* To keep queue of basic blocks to process. */
2465 basic_block next;
2467 /* Number of predecessors we need to visit first. */
2468 int npredecessors;
2471 /* Similar information for edges. */
2472 struct edge_prob_info
2474 /* In case edge is a loopback edge, the probability edge will be reached
2475 in case header is. Estimated number of iterations of the loop can be
2476 then computed as 1 / (1 - back_edge_prob). */
2477 sreal back_edge_prob;
2478 /* True if the edge is a loopback edge in the natural loop. */
2479 unsigned int back_edge:1;
2482 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2483 #undef EDGE_INFO
2484 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2486 /* Helper function for estimate_bb_frequencies.
2487 Propagate the frequencies in blocks marked in
2488 TOVISIT, starting in HEAD. */
2490 static void
2491 propagate_freq (basic_block head, bitmap tovisit)
2493 basic_block bb;
2494 basic_block last;
2495 unsigned i;
2496 edge e;
2497 basic_block nextbb;
2498 bitmap_iterator bi;
2500 /* For each basic block we need to visit count number of his predecessors
2501 we need to visit first. */
2502 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2504 edge_iterator ei;
2505 int count = 0;
2507 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2509 FOR_EACH_EDGE (e, ei, bb->preds)
2511 bool visit = bitmap_bit_p (tovisit, e->src->index);
2513 if (visit && !(e->flags & EDGE_DFS_BACK))
2514 count++;
2515 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2516 fprintf (dump_file,
2517 "Irreducible region hit, ignoring edge to %i->%i\n",
2518 e->src->index, bb->index);
2520 BLOCK_INFO (bb)->npredecessors = count;
2521 /* When function never returns, we will never process exit block. */
2522 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2523 bb->count = bb->frequency = 0;
2526 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2527 last = head;
2528 for (bb = head; bb; bb = nextbb)
2530 edge_iterator ei;
2531 sreal cyclic_probability, frequency;
2533 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2534 memcpy (&frequency, &real_zero, sizeof (real_zero));
2536 nextbb = BLOCK_INFO (bb)->next;
2537 BLOCK_INFO (bb)->next = NULL;
2539 /* Compute frequency of basic block. */
2540 if (bb != head)
2542 #ifdef ENABLE_CHECKING
2543 FOR_EACH_EDGE (e, ei, bb->preds)
2544 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2545 || (e->flags & EDGE_DFS_BACK));
2546 #endif
2548 FOR_EACH_EDGE (e, ei, bb->preds)
2549 if (EDGE_INFO (e)->back_edge)
2551 sreal_add (&cyclic_probability, &cyclic_probability,
2552 &EDGE_INFO (e)->back_edge_prob);
2554 else if (!(e->flags & EDGE_DFS_BACK))
2556 sreal tmp;
2558 /* frequency += (e->probability
2559 * BLOCK_INFO (e->src)->frequency /
2560 REG_BR_PROB_BASE); */
2562 sreal_init (&tmp, e->probability, 0);
2563 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2564 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2565 sreal_add (&frequency, &frequency, &tmp);
2568 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2570 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2571 sizeof (frequency));
2573 else
2575 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2577 memcpy (&cyclic_probability, &real_almost_one,
2578 sizeof (real_almost_one));
2581 /* BLOCK_INFO (bb)->frequency = frequency
2582 / (1 - cyclic_probability) */
2584 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2585 sreal_div (&BLOCK_INFO (bb)->frequency,
2586 &frequency, &cyclic_probability);
2590 bitmap_clear_bit (tovisit, bb->index);
2592 e = find_edge (bb, head);
2593 if (e)
2595 sreal tmp;
2597 /* EDGE_INFO (e)->back_edge_prob
2598 = ((e->probability * BLOCK_INFO (bb)->frequency)
2599 / REG_BR_PROB_BASE); */
2601 sreal_init (&tmp, e->probability, 0);
2602 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2603 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2604 &tmp, &real_inv_br_prob_base);
2607 /* Propagate to successor blocks. */
2608 FOR_EACH_EDGE (e, ei, bb->succs)
2609 if (!(e->flags & EDGE_DFS_BACK)
2610 && BLOCK_INFO (e->dest)->npredecessors)
2612 BLOCK_INFO (e->dest)->npredecessors--;
2613 if (!BLOCK_INFO (e->dest)->npredecessors)
2615 if (!nextbb)
2616 nextbb = e->dest;
2617 else
2618 BLOCK_INFO (last)->next = e->dest;
2620 last = e->dest;
2626 /* Estimate frequencies in loops at same nest level. */
2628 static void
2629 estimate_loops_at_level (struct loop *first_loop)
2631 struct loop *loop;
2633 for (loop = first_loop; loop; loop = loop->next)
2635 edge e;
2636 basic_block *bbs;
2637 unsigned i;
2638 bitmap tovisit = BITMAP_ALLOC (NULL);
2640 estimate_loops_at_level (loop->inner);
2642 /* Find current loop back edge and mark it. */
2643 e = loop_latch_edge (loop);
2644 EDGE_INFO (e)->back_edge = 1;
2646 bbs = get_loop_body (loop);
2647 for (i = 0; i < loop->num_nodes; i++)
2648 bitmap_set_bit (tovisit, bbs[i]->index);
2649 free (bbs);
2650 propagate_freq (loop->header, tovisit);
2651 BITMAP_FREE (tovisit);
2655 /* Propagates frequencies through structure of loops. */
2657 static void
2658 estimate_loops (void)
2660 bitmap tovisit = BITMAP_ALLOC (NULL);
2661 basic_block bb;
2663 /* Start by estimating the frequencies in the loops. */
2664 if (number_of_loops (cfun) > 1)
2665 estimate_loops_at_level (current_loops->tree_root->inner);
2667 /* Now propagate the frequencies through all the blocks. */
2668 FOR_ALL_BB_FN (bb, cfun)
2670 bitmap_set_bit (tovisit, bb->index);
2672 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2673 BITMAP_FREE (tovisit);
2676 /* Drop the profile for NODE to guessed, and update its frequency based on
2677 whether it is expected to be hot given the CALL_COUNT. */
2679 static void
2680 drop_profile (struct cgraph_node *node, gcov_type call_count)
2682 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2683 /* In the case where this was called by another function with a
2684 dropped profile, call_count will be 0. Since there are no
2685 non-zero call counts to this function, we don't know for sure
2686 whether it is hot, and therefore it will be marked normal below. */
2687 bool hot = maybe_hot_count_p (NULL, call_count);
2689 if (dump_file)
2690 fprintf (dump_file,
2691 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2692 node->name (), node->order,
2693 hot ? "Function is hot" : "Function is normal");
2694 /* We only expect to miss profiles for functions that are reached
2695 via non-zero call edges in cases where the function may have
2696 been linked from another module or library (COMDATs and extern
2697 templates). See the comments below for handle_missing_profiles.
2698 Also, only warn in cases where the missing counts exceed the
2699 number of training runs. In certain cases with an execv followed
2700 by a no-return call the profile for the no-return call is not
2701 dumped and there can be a mismatch. */
2702 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2703 && call_count > profile_info->runs)
2705 if (flag_profile_correction)
2707 if (dump_file)
2708 fprintf (dump_file,
2709 "Missing counts for called function %s/%i\n",
2710 node->name (), node->order);
2712 else
2713 warning (0, "Missing counts for called function %s/%i",
2714 node->name (), node->order);
2717 profile_status_for_fn (fn)
2718 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2719 node->frequency
2720 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2723 /* In the case of COMDAT routines, multiple object files will contain the same
2724 function and the linker will select one for the binary. In that case
2725 all the other copies from the profile instrument binary will be missing
2726 profile counts. Look for cases where this happened, due to non-zero
2727 call counts going to 0-count functions, and drop the profile to guessed
2728 so that we can use the estimated probabilities and avoid optimizing only
2729 for size.
2731 The other case where the profile may be missing is when the routine
2732 is not going to be emitted to the object file, e.g. for "extern template"
2733 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2734 all other cases of non-zero calls to 0-count functions. */
2736 void
2737 handle_missing_profiles (void)
2739 struct cgraph_node *node;
2740 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2741 vec<struct cgraph_node *> worklist;
2742 worklist.create (64);
2744 /* See if 0 count function has non-0 count callers. In this case we
2745 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2746 FOR_EACH_DEFINED_FUNCTION (node)
2748 struct cgraph_edge *e;
2749 gcov_type call_count = 0;
2750 gcov_type max_tp_first_run = 0;
2751 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2753 if (node->count)
2754 continue;
2755 for (e = node->callers; e; e = e->next_caller)
2757 call_count += e->count;
2759 if (e->caller->tp_first_run > max_tp_first_run)
2760 max_tp_first_run = e->caller->tp_first_run;
2763 /* If time profile is missing, let assign the maximum that comes from
2764 caller functions. */
2765 if (!node->tp_first_run && max_tp_first_run)
2766 node->tp_first_run = max_tp_first_run + 1;
2768 if (call_count
2769 && fn && fn->cfg
2770 && (call_count * unlikely_count_fraction >= profile_info->runs))
2772 drop_profile (node, call_count);
2773 worklist.safe_push (node);
2777 /* Propagate the profile dropping to other 0-count COMDATs that are
2778 potentially called by COMDATs we already dropped the profile on. */
2779 while (worklist.length () > 0)
2781 struct cgraph_edge *e;
2783 node = worklist.pop ();
2784 for (e = node->callees; e; e = e->next_caller)
2786 struct cgraph_node *callee = e->callee;
2787 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2789 if (callee->count > 0)
2790 continue;
2791 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2792 && profile_status_for_fn (fn) == PROFILE_READ)
2794 drop_profile (node, 0);
2795 worklist.safe_push (callee);
2799 worklist.release ();
2802 /* Convert counts measured by profile driven feedback to frequencies.
2803 Return nonzero iff there was any nonzero execution count. */
2806 counts_to_freqs (void)
2808 gcov_type count_max, true_count_max = 0;
2809 basic_block bb;
2811 /* Don't overwrite the estimated frequencies when the profile for
2812 the function is missing. We may drop this function PROFILE_GUESSED
2813 later in drop_profile (). */
2814 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2815 return 0;
2817 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2818 true_count_max = MAX (bb->count, true_count_max);
2820 count_max = MAX (true_count_max, 1);
2821 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2822 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2824 return true_count_max;
2827 /* Return true if function is likely to be expensive, so there is no point to
2828 optimize performance of prologue, epilogue or do inlining at the expense
2829 of code size growth. THRESHOLD is the limit of number of instructions
2830 function can execute at average to be still considered not expensive. */
2832 bool
2833 expensive_function_p (int threshold)
2835 unsigned int sum = 0;
2836 basic_block bb;
2837 unsigned int limit;
2839 /* We can not compute accurately for large thresholds due to scaled
2840 frequencies. */
2841 gcc_assert (threshold <= BB_FREQ_MAX);
2843 /* Frequencies are out of range. This either means that function contains
2844 internal loop executing more than BB_FREQ_MAX times or profile feedback
2845 is available and function has not been executed at all. */
2846 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2847 return true;
2849 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2850 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2851 FOR_EACH_BB_FN (bb, cfun)
2853 rtx_insn *insn;
2855 FOR_BB_INSNS (bb, insn)
2856 if (active_insn_p (insn))
2858 sum += bb->frequency;
2859 if (sum > limit)
2860 return true;
2864 return false;
2867 /* Estimate and propagate basic block frequencies using the given branch
2868 probabilities. If FORCE is true, the frequencies are used to estimate
2869 the counts even when there are already non-zero profile counts. */
2871 void
2872 estimate_bb_frequencies (bool force)
2874 basic_block bb;
2875 sreal freq_max;
2877 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2879 static int real_values_initialized = 0;
2881 if (!real_values_initialized)
2883 real_values_initialized = 1;
2884 sreal_init (&real_zero, 0, 0);
2885 sreal_init (&real_one, 1, 0);
2886 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2887 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2888 sreal_init (&real_one_half, 1, -1);
2889 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2890 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2893 mark_dfs_back_edges ();
2895 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2896 REG_BR_PROB_BASE;
2898 /* Set up block info for each basic block. */
2899 alloc_aux_for_blocks (sizeof (block_info));
2900 alloc_aux_for_edges (sizeof (edge_prob_info));
2901 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2903 edge e;
2904 edge_iterator ei;
2906 FOR_EACH_EDGE (e, ei, bb->succs)
2908 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2909 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2910 &EDGE_INFO (e)->back_edge_prob,
2911 &real_inv_br_prob_base);
2915 /* First compute frequencies locally for each loop from innermost
2916 to outermost to examine frequencies for back edges. */
2917 estimate_loops ();
2919 memcpy (&freq_max, &real_zero, sizeof (real_zero));
2920 FOR_EACH_BB_FN (bb, cfun)
2921 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2922 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2924 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2925 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2927 sreal tmp;
2929 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2930 sreal_add (&tmp, &tmp, &real_one_half);
2931 bb->frequency = sreal_to_int (&tmp);
2934 free_aux_for_blocks ();
2935 free_aux_for_edges ();
2937 compute_function_frequency ();
2940 /* Decide whether function is hot, cold or unlikely executed. */
2941 void
2942 compute_function_frequency (void)
2944 basic_block bb;
2945 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2947 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2948 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2949 node->only_called_at_startup = true;
2950 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2951 node->only_called_at_exit = true;
2953 if (profile_status_for_fn (cfun) != PROFILE_READ)
2955 int flags = flags_from_decl_or_type (current_function_decl);
2956 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2957 != NULL)
2958 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2959 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2960 != NULL)
2961 node->frequency = NODE_FREQUENCY_HOT;
2962 else if (flags & ECF_NORETURN)
2963 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2964 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2965 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2966 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2967 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2968 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2969 return;
2972 /* Only first time try to drop function into unlikely executed.
2973 After inlining the roundoff errors may confuse us.
2974 Ipa-profile pass will drop functions only called from unlikely
2975 functions to unlikely and that is most of what we care about. */
2976 if (!cfun->after_inlining)
2977 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2978 FOR_EACH_BB_FN (bb, cfun)
2980 if (maybe_hot_bb_p (cfun, bb))
2982 node->frequency = NODE_FREQUENCY_HOT;
2983 return;
2985 if (!probably_never_executed_bb_p (cfun, bb))
2986 node->frequency = NODE_FREQUENCY_NORMAL;
2990 /* Build PREDICT_EXPR. */
2991 tree
2992 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2994 tree t = build1 (PREDICT_EXPR, void_type_node,
2995 build_int_cst (integer_type_node, predictor));
2996 SET_PREDICT_EXPR_OUTCOME (t, taken);
2997 return t;
3000 const char *
3001 predictor_name (enum br_predictor predictor)
3003 return predictor_info[predictor].name;
3006 /* Predict branch probabilities and estimate profile of the tree CFG. */
3008 namespace {
3010 const pass_data pass_data_profile =
3012 GIMPLE_PASS, /* type */
3013 "profile_estimate", /* name */
3014 OPTGROUP_NONE, /* optinfo_flags */
3015 TV_BRANCH_PROB, /* tv_id */
3016 PROP_cfg, /* properties_required */
3017 0, /* properties_provided */
3018 0, /* properties_destroyed */
3019 0, /* todo_flags_start */
3020 0, /* todo_flags_finish */
3023 class pass_profile : public gimple_opt_pass
3025 public:
3026 pass_profile (gcc::context *ctxt)
3027 : gimple_opt_pass (pass_data_profile, ctxt)
3030 /* opt_pass methods: */
3031 virtual bool gate (function *) { return flag_guess_branch_prob; }
3032 virtual unsigned int execute (function *);
3034 }; // class pass_profile
3036 unsigned int
3037 pass_profile::execute (function *fun)
3039 unsigned nb_loops;
3041 loop_optimizer_init (LOOPS_NORMAL);
3042 if (dump_file && (dump_flags & TDF_DETAILS))
3043 flow_loops_dump (dump_file, NULL, 0);
3045 mark_irreducible_loops ();
3047 nb_loops = number_of_loops (fun);
3048 if (nb_loops > 1)
3049 scev_initialize ();
3051 tree_estimate_probability ();
3053 if (nb_loops > 1)
3054 scev_finalize ();
3056 loop_optimizer_finalize ();
3057 if (dump_file && (dump_flags & TDF_DETAILS))
3058 gimple_dump_cfg (dump_file, dump_flags);
3059 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3060 profile_status_for_fn (fun) = PROFILE_GUESSED;
3061 return 0;
3064 } // anon namespace
3066 gimple_opt_pass *
3067 make_pass_profile (gcc::context *ctxt)
3069 return new pass_profile (ctxt);
3072 namespace {
3074 const pass_data pass_data_strip_predict_hints =
3076 GIMPLE_PASS, /* type */
3077 "*strip_predict_hints", /* name */
3078 OPTGROUP_NONE, /* optinfo_flags */
3079 TV_BRANCH_PROB, /* tv_id */
3080 PROP_cfg, /* properties_required */
3081 0, /* properties_provided */
3082 0, /* properties_destroyed */
3083 0, /* todo_flags_start */
3084 0, /* todo_flags_finish */
3087 class pass_strip_predict_hints : public gimple_opt_pass
3089 public:
3090 pass_strip_predict_hints (gcc::context *ctxt)
3091 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3094 /* opt_pass methods: */
3095 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3096 virtual unsigned int execute (function *);
3098 }; // class pass_strip_predict_hints
3100 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3101 we no longer need. */
3102 unsigned int
3103 pass_strip_predict_hints::execute (function *fun)
3105 basic_block bb;
3106 gimple ass_stmt;
3107 tree var;
3109 FOR_EACH_BB_FN (bb, fun)
3111 gimple_stmt_iterator bi;
3112 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3114 gimple stmt = gsi_stmt (bi);
3116 if (gimple_code (stmt) == GIMPLE_PREDICT)
3118 gsi_remove (&bi, true);
3119 continue;
3121 else if (is_gimple_call (stmt))
3123 tree fndecl = gimple_call_fndecl (stmt);
3125 if ((fndecl
3126 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3127 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3128 && gimple_call_num_args (stmt) == 2)
3129 || (gimple_call_internal_p (stmt)
3130 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3132 var = gimple_call_lhs (stmt);
3133 if (var)
3135 ass_stmt
3136 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3137 gsi_replace (&bi, ass_stmt, true);
3139 else
3141 gsi_remove (&bi, true);
3142 continue;
3146 gsi_next (&bi);
3149 return 0;
3152 } // anon namespace
3154 gimple_opt_pass *
3155 make_pass_strip_predict_hints (gcc::context *ctxt)
3157 return new pass_strip_predict_hints (ctxt);
3160 /* Rebuild function frequencies. Passes are in general expected to
3161 maintain profile by hand, however in some cases this is not possible:
3162 for example when inlining several functions with loops freuqencies might run
3163 out of scale and thus needs to be recomputed. */
3165 void
3166 rebuild_frequencies (void)
3168 timevar_push (TV_REBUILD_FREQUENCIES);
3170 /* When the max bb count in the function is small, there is a higher
3171 chance that there were truncation errors in the integer scaling
3172 of counts by inlining and other optimizations. This could lead
3173 to incorrect classification of code as being cold when it isn't.
3174 In that case, force the estimation of bb counts/frequencies from the
3175 branch probabilities, rather than computing frequencies from counts,
3176 which may also lead to frequencies incorrectly reduced to 0. There
3177 is less precision in the probabilities, so we only do this for small
3178 max counts. */
3179 gcov_type count_max = 0;
3180 basic_block bb;
3181 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3182 count_max = MAX (bb->count, count_max);
3184 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3185 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3186 && count_max < REG_BR_PROB_BASE/10))
3188 loop_optimizer_init (0);
3189 add_noreturn_fake_exit_edges ();
3190 mark_irreducible_loops ();
3191 connect_infinite_loops_to_exit ();
3192 estimate_bb_frequencies (true);
3193 remove_fake_exit_edges ();
3194 loop_optimizer_finalize ();
3196 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3197 counts_to_freqs ();
3198 else
3199 gcc_unreachable ();
3200 timevar_pop (TV_REBUILD_FREQUENCIES);