PR tree-optimization/66718
[official-gcc.git] / gcc / predict.c
blob6a3ff4241c2a70a76fb2e269160a7a3e642cb437
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "tree.h"
35 #include "gimple.h"
36 #include "rtl.h"
37 #include "ssa.h"
38 #include "alias.h"
39 #include "fold-const.h"
40 #include "calls.h"
41 #include "tm_p.h"
42 #include "cfganal.h"
43 #include "insn-config.h"
44 #include "regs.h"
45 #include "flags.h"
46 #include "profile.h"
47 #include "except.h"
48 #include "diagnostic-core.h"
49 #include "recog.h"
50 #include "expmed.h"
51 #include "dojump.h"
52 #include "explow.h"
53 #include "emit-rtl.h"
54 #include "varasm.h"
55 #include "stmt.h"
56 #include "expr.h"
57 #include "coverage.h"
58 #include "sreal.h"
59 #include "params.h"
60 #include "target.h"
61 #include "cfgloop.h"
62 #include "internal-fn.h"
63 #include "gimple-iterator.h"
64 #include "cgraph.h"
65 #include "tree-cfg.h"
66 #include "tree-ssa-loop-niter.h"
67 #include "tree-ssa-loop.h"
68 #include "tree-pass.h"
69 #include "tree-scalar-evolution.h"
71 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
72 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
73 static sreal real_almost_one, real_br_prob_base,
74 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
76 static void combine_predictions_for_insn (rtx_insn *, basic_block);
77 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
78 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
79 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
80 static bool can_predict_insn_p (const rtx_insn *);
82 /* Information we hold about each branch predictor.
83 Filled using information from predict.def. */
85 struct predictor_info
87 const char *const name; /* Name used in the debugging dumps. */
88 const int hitrate; /* Expected hitrate used by
89 predict_insn_def call. */
90 const int flags;
93 /* Use given predictor without Dempster-Shaffer theory if it matches
94 using first_match heuristics. */
95 #define PRED_FLAG_FIRST_MATCH 1
97 /* Recompute hitrate in percent to our representation. */
99 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
102 static const struct predictor_info predictor_info[]= {
103 #include "predict.def"
105 /* Upper bound on predictors. */
106 {NULL, 0, 0}
108 #undef DEF_PREDICTOR
110 /* Return TRUE if frequency FREQ is considered to be hot. */
112 static inline bool
113 maybe_hot_frequency_p (struct function *fun, int freq)
115 struct cgraph_node *node = cgraph_node::get (fun->decl);
116 if (!profile_info
117 || !opt_for_fn (fun->decl, flag_branch_probabilities))
119 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120 return false;
121 if (node->frequency == NODE_FREQUENCY_HOT)
122 return true;
124 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125 return true;
126 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128 return false;
129 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
130 return false;
131 if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
132 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
133 return false;
134 return true;
137 static gcov_type min_count = -1;
139 /* Determine the threshold for hot BB counts. */
141 gcov_type
142 get_hot_bb_threshold ()
144 gcov_working_set_t *ws;
145 if (min_count == -1)
147 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
148 gcc_assert (ws);
149 min_count = ws->min_counter;
151 return min_count;
154 /* Set the threshold for hot BB counts. */
156 void
157 set_hot_bb_threshold (gcov_type min)
159 min_count = min;
162 /* Return TRUE if frequency FREQ is considered to be hot. */
164 bool
165 maybe_hot_count_p (struct function *fun, gcov_type count)
167 if (fun && profile_status_for_fn (fun) != PROFILE_READ)
168 return true;
169 /* Code executed at most once is not hot. */
170 if (profile_info->runs >= count)
171 return false;
172 return (count >= get_hot_bb_threshold ());
175 /* Return true in case BB can be CPU intensive and should be optimized
176 for maximal performance. */
178 bool
179 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
181 gcc_checking_assert (fun);
182 if (profile_status_for_fn (fun) == PROFILE_READ)
183 return maybe_hot_count_p (fun, bb->count);
184 return maybe_hot_frequency_p (fun, bb->frequency);
187 /* Return true in case BB can be CPU intensive and should be optimized
188 for maximal performance. */
190 bool
191 maybe_hot_edge_p (edge e)
193 if (profile_status_for_fn (cfun) == PROFILE_READ)
194 return maybe_hot_count_p (cfun, e->count);
195 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
198 /* Return true if profile COUNT and FREQUENCY, or function FUN static
199 node frequency reflects never being executed. */
201 static bool
202 probably_never_executed (struct function *fun,
203 gcov_type count, int frequency)
205 gcc_checking_assert (fun);
206 if (profile_status_for_fn (fun) == PROFILE_READ)
208 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
209 if (count * unlikely_count_fraction >= profile_info->runs)
210 return false;
211 if (!frequency)
212 return true;
213 if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
214 return false;
215 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
217 gcov_type computed_count;
218 /* Check for possibility of overflow, in which case entry bb count
219 is large enough to do the division first without losing much
220 precision. */
221 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
222 REG_BR_PROB_BASE)
224 gcov_type scaled_count
225 = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
226 unlikely_count_fraction;
227 computed_count = RDIV (scaled_count,
228 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
230 else
232 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
233 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
234 computed_count *= frequency * unlikely_count_fraction;
236 if (computed_count >= profile_info->runs)
237 return false;
239 return true;
241 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
242 && (cgraph_node::get (fun->decl)->frequency
243 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
244 return true;
245 return false;
249 /* Return true in case BB is probably never executed. */
251 bool
252 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
254 return probably_never_executed (fun, bb->count, bb->frequency);
258 /* Return true in case edge E is probably never executed. */
260 bool
261 probably_never_executed_edge_p (struct function *fun, edge e)
263 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
266 /* Return true when current function should always be optimized for size. */
268 bool
269 optimize_function_for_size_p (struct function *fun)
271 if (!fun || !fun->decl)
272 return optimize_size;
273 cgraph_node *n = cgraph_node::get (fun->decl);
274 return n && n->optimize_for_size_p ();
277 /* Return true when current function should always be optimized for speed. */
279 bool
280 optimize_function_for_speed_p (struct function *fun)
282 return !optimize_function_for_size_p (fun);
285 /* Return TRUE when BB should be optimized for size. */
287 bool
288 optimize_bb_for_size_p (const_basic_block bb)
290 return (optimize_function_for_size_p (cfun)
291 || (bb && !maybe_hot_bb_p (cfun, bb)));
294 /* Return TRUE when BB should be optimized for speed. */
296 bool
297 optimize_bb_for_speed_p (const_basic_block bb)
299 return !optimize_bb_for_size_p (bb);
302 /* Return TRUE when BB should be optimized for size. */
304 bool
305 optimize_edge_for_size_p (edge e)
307 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
310 /* Return TRUE when BB should be optimized for speed. */
312 bool
313 optimize_edge_for_speed_p (edge e)
315 return !optimize_edge_for_size_p (e);
318 /* Return TRUE when BB should be optimized for size. */
320 bool
321 optimize_insn_for_size_p (void)
323 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
326 /* Return TRUE when BB should be optimized for speed. */
328 bool
329 optimize_insn_for_speed_p (void)
331 return !optimize_insn_for_size_p ();
334 /* Return TRUE when LOOP should be optimized for size. */
336 bool
337 optimize_loop_for_size_p (struct loop *loop)
339 return optimize_bb_for_size_p (loop->header);
342 /* Return TRUE when LOOP should be optimized for speed. */
344 bool
345 optimize_loop_for_speed_p (struct loop *loop)
347 return optimize_bb_for_speed_p (loop->header);
350 /* Return TRUE when LOOP nest should be optimized for speed. */
352 bool
353 optimize_loop_nest_for_speed_p (struct loop *loop)
355 struct loop *l = loop;
356 if (optimize_loop_for_speed_p (loop))
357 return true;
358 l = loop->inner;
359 while (l && l != loop)
361 if (optimize_loop_for_speed_p (l))
362 return true;
363 if (l->inner)
364 l = l->inner;
365 else if (l->next)
366 l = l->next;
367 else
369 while (l != loop && !l->next)
370 l = loop_outer (l);
371 if (l != loop)
372 l = l->next;
375 return false;
378 /* Return TRUE when LOOP nest should be optimized for size. */
380 bool
381 optimize_loop_nest_for_size_p (struct loop *loop)
383 return !optimize_loop_nest_for_speed_p (loop);
386 /* Return true when edge E is likely to be well predictable by branch
387 predictor. */
389 bool
390 predictable_edge_p (edge e)
392 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
393 return false;
394 if ((e->probability
395 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
396 || (REG_BR_PROB_BASE - e->probability
397 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
398 return true;
399 return false;
403 /* Set RTL expansion for BB profile. */
405 void
406 rtl_profile_for_bb (basic_block bb)
408 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
411 /* Set RTL expansion for edge profile. */
413 void
414 rtl_profile_for_edge (edge e)
416 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
419 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
420 void
421 default_rtl_profile (void)
423 crtl->maybe_hot_insn_p = true;
426 /* Return true if the one of outgoing edges is already predicted by
427 PREDICTOR. */
429 bool
430 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
432 rtx note;
433 if (!INSN_P (BB_END (bb)))
434 return false;
435 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
436 if (REG_NOTE_KIND (note) == REG_BR_PRED
437 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
438 return true;
439 return false;
442 /* Structure representing predictions in tree level. */
444 struct edge_prediction {
445 struct edge_prediction *ep_next;
446 edge ep_edge;
447 enum br_predictor ep_predictor;
448 int ep_probability;
451 /* This map contains for a basic block the list of predictions for the
452 outgoing edges. */
454 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
456 /* Return true if the one of outgoing edges is already predicted by
457 PREDICTOR. */
459 bool
460 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
462 struct edge_prediction *i;
463 edge_prediction **preds = bb_predictions->get (bb);
465 if (!preds)
466 return false;
468 for (i = *preds; i; i = i->ep_next)
469 if (i->ep_predictor == predictor)
470 return true;
471 return false;
474 /* Return true when the probability of edge is reliable.
476 The profile guessing code is good at predicting branch outcome (ie.
477 taken/not taken), that is predicted right slightly over 75% of time.
478 It is however notoriously poor on predicting the probability itself.
479 In general the profile appear a lot flatter (with probabilities closer
480 to 50%) than the reality so it is bad idea to use it to drive optimization
481 such as those disabling dynamic branch prediction for well predictable
482 branches.
484 There are two exceptions - edges leading to noreturn edges and edges
485 predicted by number of iterations heuristics are predicted well. This macro
486 should be able to distinguish those, but at the moment it simply check for
487 noreturn heuristic that is only one giving probability over 99% or bellow
488 1%. In future we might want to propagate reliability information across the
489 CFG if we find this information useful on multiple places. */
490 static bool
491 probability_reliable_p (int prob)
493 return (profile_status_for_fn (cfun) == PROFILE_READ
494 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
495 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
498 /* Same predicate as above, working on edges. */
499 bool
500 edge_probability_reliable_p (const_edge e)
502 return probability_reliable_p (e->probability);
505 /* Same predicate as edge_probability_reliable_p, working on notes. */
506 bool
507 br_prob_note_reliable_p (const_rtx note)
509 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
510 return probability_reliable_p (XINT (note, 0));
513 static void
514 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
516 gcc_assert (any_condjump_p (insn));
517 if (!flag_guess_branch_prob)
518 return;
520 add_reg_note (insn, REG_BR_PRED,
521 gen_rtx_CONCAT (VOIDmode,
522 GEN_INT ((int) predictor),
523 GEN_INT ((int) probability)));
526 /* Predict insn by given predictor. */
528 void
529 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
530 enum prediction taken)
532 int probability = predictor_info[(int) predictor].hitrate;
534 if (taken != TAKEN)
535 probability = REG_BR_PROB_BASE - probability;
537 predict_insn (insn, predictor, probability);
540 /* Predict edge E with given probability if possible. */
542 void
543 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
545 rtx_insn *last_insn;
546 last_insn = BB_END (e->src);
548 /* We can store the branch prediction information only about
549 conditional jumps. */
550 if (!any_condjump_p (last_insn))
551 return;
553 /* We always store probability of branching. */
554 if (e->flags & EDGE_FALLTHRU)
555 probability = REG_BR_PROB_BASE - probability;
557 predict_insn (last_insn, predictor, probability);
560 /* Predict edge E with the given PROBABILITY. */
561 void
562 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
564 gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
565 if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
567 && flag_guess_branch_prob && optimize)
569 struct edge_prediction *i = XNEW (struct edge_prediction);
570 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
572 i->ep_next = preds;
573 preds = i;
574 i->ep_probability = probability;
575 i->ep_predictor = predictor;
576 i->ep_edge = e;
580 /* Remove all predictions on given basic block that are attached
581 to edge E. */
582 void
583 remove_predictions_associated_with_edge (edge e)
585 if (!bb_predictions)
586 return;
588 edge_prediction **preds = bb_predictions->get (e->src);
590 if (preds)
592 struct edge_prediction **prediction = preds;
593 struct edge_prediction *next;
595 while (*prediction)
597 if ((*prediction)->ep_edge == e)
599 next = (*prediction)->ep_next;
600 free (*prediction);
601 *prediction = next;
603 else
604 prediction = &((*prediction)->ep_next);
609 /* Clears the list of predictions stored for BB. */
611 static void
612 clear_bb_predictions (basic_block bb)
614 edge_prediction **preds = bb_predictions->get (bb);
615 struct edge_prediction *pred, *next;
617 if (!preds)
618 return;
620 for (pred = *preds; pred; pred = next)
622 next = pred->ep_next;
623 free (pred);
625 *preds = NULL;
628 /* Return true when we can store prediction on insn INSN.
629 At the moment we represent predictions only on conditional
630 jumps, not at computed jump or other complicated cases. */
631 static bool
632 can_predict_insn_p (const rtx_insn *insn)
634 return (JUMP_P (insn)
635 && any_condjump_p (insn)
636 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
639 /* Predict edge E by given predictor if possible. */
641 void
642 predict_edge_def (edge e, enum br_predictor predictor,
643 enum prediction taken)
645 int probability = predictor_info[(int) predictor].hitrate;
647 if (taken != TAKEN)
648 probability = REG_BR_PROB_BASE - probability;
650 predict_edge (e, predictor, probability);
653 /* Invert all branch predictions or probability notes in the INSN. This needs
654 to be done each time we invert the condition used by the jump. */
656 void
657 invert_br_probabilities (rtx insn)
659 rtx note;
661 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
662 if (REG_NOTE_KIND (note) == REG_BR_PROB)
663 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
664 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
665 XEXP (XEXP (note, 0), 1)
666 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
669 /* Dump information about the branch prediction to the output file. */
671 static void
672 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
673 basic_block bb, int used)
675 edge e;
676 edge_iterator ei;
678 if (!file)
679 return;
681 FOR_EACH_EDGE (e, ei, bb->succs)
682 if (! (e->flags & EDGE_FALLTHRU))
683 break;
685 fprintf (file, " %s heuristics%s: %.1f%%",
686 predictor_info[predictor].name,
687 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
689 if (bb->count)
691 fprintf (file, " exec %" PRId64, bb->count);
692 if (e)
694 fprintf (file, " hit %" PRId64, e->count);
695 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
699 fprintf (file, "\n");
702 /* We can not predict the probabilities of outgoing edges of bb. Set them
703 evenly and hope for the best. */
704 static void
705 set_even_probabilities (basic_block bb)
707 int nedges = 0;
708 edge e;
709 edge_iterator ei;
711 FOR_EACH_EDGE (e, ei, bb->succs)
712 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
713 nedges ++;
714 FOR_EACH_EDGE (e, ei, bb->succs)
715 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
716 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
717 else
718 e->probability = 0;
721 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
722 note if not already present. Remove now useless REG_BR_PRED notes. */
724 static void
725 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
727 rtx prob_note;
728 rtx *pnote;
729 rtx note;
730 int best_probability = PROB_EVEN;
731 enum br_predictor best_predictor = END_PREDICTORS;
732 int combined_probability = REG_BR_PROB_BASE / 2;
733 int d;
734 bool first_match = false;
735 bool found = false;
737 if (!can_predict_insn_p (insn))
739 set_even_probabilities (bb);
740 return;
743 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
744 pnote = &REG_NOTES (insn);
745 if (dump_file)
746 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
747 bb->index);
749 /* We implement "first match" heuristics and use probability guessed
750 by predictor with smallest index. */
751 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
752 if (REG_NOTE_KIND (note) == REG_BR_PRED)
754 enum br_predictor predictor = ((enum br_predictor)
755 INTVAL (XEXP (XEXP (note, 0), 0)));
756 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
758 found = true;
759 if (best_predictor > predictor)
760 best_probability = probability, best_predictor = predictor;
762 d = (combined_probability * probability
763 + (REG_BR_PROB_BASE - combined_probability)
764 * (REG_BR_PROB_BASE - probability));
766 /* Use FP math to avoid overflows of 32bit integers. */
767 if (d == 0)
768 /* If one probability is 0% and one 100%, avoid division by zero. */
769 combined_probability = REG_BR_PROB_BASE / 2;
770 else
771 combined_probability = (((double) combined_probability) * probability
772 * REG_BR_PROB_BASE / d + 0.5);
775 /* Decide which heuristic to use. In case we didn't match anything,
776 use no_prediction heuristic, in case we did match, use either
777 first match or Dempster-Shaffer theory depending on the flags. */
779 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
780 first_match = true;
782 if (!found)
783 dump_prediction (dump_file, PRED_NO_PREDICTION,
784 combined_probability, bb, true);
785 else
787 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
788 bb, !first_match);
789 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
790 bb, first_match);
793 if (first_match)
794 combined_probability = best_probability;
795 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
797 while (*pnote)
799 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
801 enum br_predictor predictor = ((enum br_predictor)
802 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
803 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
805 dump_prediction (dump_file, predictor, probability, bb,
806 !first_match || best_predictor == predictor);
807 *pnote = XEXP (*pnote, 1);
809 else
810 pnote = &XEXP (*pnote, 1);
813 if (!prob_note)
815 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
817 /* Save the prediction into CFG in case we are seeing non-degenerated
818 conditional jump. */
819 if (!single_succ_p (bb))
821 BRANCH_EDGE (bb)->probability = combined_probability;
822 FALLTHRU_EDGE (bb)->probability
823 = REG_BR_PROB_BASE - combined_probability;
826 else if (!single_succ_p (bb))
828 int prob = XINT (prob_note, 0);
830 BRANCH_EDGE (bb)->probability = prob;
831 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
833 else
834 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
837 /* Combine predictions into single probability and store them into CFG.
838 Remove now useless prediction entries. */
840 static void
841 combine_predictions_for_bb (basic_block bb)
843 int best_probability = PROB_EVEN;
844 enum br_predictor best_predictor = END_PREDICTORS;
845 int combined_probability = REG_BR_PROB_BASE / 2;
846 int d;
847 bool first_match = false;
848 bool found = false;
849 struct edge_prediction *pred;
850 int nedges = 0;
851 edge e, first = NULL, second = NULL;
852 edge_iterator ei;
854 FOR_EACH_EDGE (e, ei, bb->succs)
855 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
857 nedges ++;
858 if (first && !second)
859 second = e;
860 if (!first)
861 first = e;
864 /* When there is no successor or only one choice, prediction is easy.
866 We are lazy for now and predict only basic blocks with two outgoing
867 edges. It is possible to predict generic case too, but we have to
868 ignore first match heuristics and do more involved combining. Implement
869 this later. */
870 if (nedges != 2)
872 if (!bb->count)
873 set_even_probabilities (bb);
874 clear_bb_predictions (bb);
875 if (dump_file)
876 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
877 nedges, bb->index);
878 return;
881 if (dump_file)
882 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
884 edge_prediction **preds = bb_predictions->get (bb);
885 if (preds)
887 /* We implement "first match" heuristics and use probability guessed
888 by predictor with smallest index. */
889 for (pred = *preds; pred; pred = pred->ep_next)
891 enum br_predictor predictor = pred->ep_predictor;
892 int probability = pred->ep_probability;
894 if (pred->ep_edge != first)
895 probability = REG_BR_PROB_BASE - probability;
897 found = true;
898 /* First match heuristics would be widly confused if we predicted
899 both directions. */
900 if (best_predictor > predictor)
902 struct edge_prediction *pred2;
903 int prob = probability;
905 for (pred2 = (struct edge_prediction *) *preds;
906 pred2; pred2 = pred2->ep_next)
907 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
909 int probability2 = pred->ep_probability;
911 if (pred2->ep_edge != first)
912 probability2 = REG_BR_PROB_BASE - probability2;
914 if ((probability < REG_BR_PROB_BASE / 2) !=
915 (probability2 < REG_BR_PROB_BASE / 2))
916 break;
918 /* If the same predictor later gave better result, go for it! */
919 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
920 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
921 prob = probability2;
923 if (!pred2)
924 best_probability = prob, best_predictor = predictor;
927 d = (combined_probability * probability
928 + (REG_BR_PROB_BASE - combined_probability)
929 * (REG_BR_PROB_BASE - probability));
931 /* Use FP math to avoid overflows of 32bit integers. */
932 if (d == 0)
933 /* If one probability is 0% and one 100%, avoid division by zero. */
934 combined_probability = REG_BR_PROB_BASE / 2;
935 else
936 combined_probability = (((double) combined_probability)
937 * probability
938 * REG_BR_PROB_BASE / d + 0.5);
942 /* Decide which heuristic to use. In case we didn't match anything,
943 use no_prediction heuristic, in case we did match, use either
944 first match or Dempster-Shaffer theory depending on the flags. */
946 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
947 first_match = true;
949 if (!found)
950 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
951 else
953 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
954 !first_match);
955 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
956 first_match);
959 if (first_match)
960 combined_probability = best_probability;
961 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
963 if (preds)
965 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
967 enum br_predictor predictor = pred->ep_predictor;
968 int probability = pred->ep_probability;
970 if (pred->ep_edge != EDGE_SUCC (bb, 0))
971 probability = REG_BR_PROB_BASE - probability;
972 dump_prediction (dump_file, predictor, probability, bb,
973 !first_match || best_predictor == predictor);
976 clear_bb_predictions (bb);
978 if (!bb->count)
980 first->probability = combined_probability;
981 second->probability = REG_BR_PROB_BASE - combined_probability;
985 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
986 Return the SSA_NAME if the condition satisfies, NULL otherwise.
988 T1 and T2 should be one of the following cases:
989 1. T1 is SSA_NAME, T2 is NULL
990 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
991 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
993 static tree
994 strips_small_constant (tree t1, tree t2)
996 tree ret = NULL;
997 int value = 0;
999 if (!t1)
1000 return NULL;
1001 else if (TREE_CODE (t1) == SSA_NAME)
1002 ret = t1;
1003 else if (tree_fits_shwi_p (t1))
1004 value = tree_to_shwi (t1);
1005 else
1006 return NULL;
1008 if (!t2)
1009 return ret;
1010 else if (tree_fits_shwi_p (t2))
1011 value = tree_to_shwi (t2);
1012 else if (TREE_CODE (t2) == SSA_NAME)
1014 if (ret)
1015 return NULL;
1016 else
1017 ret = t2;
1020 if (value <= 4 && value >= -4)
1021 return ret;
1022 else
1023 return NULL;
1026 /* Return the SSA_NAME in T or T's operands.
1027 Return NULL if SSA_NAME cannot be found. */
1029 static tree
1030 get_base_value (tree t)
1032 if (TREE_CODE (t) == SSA_NAME)
1033 return t;
1035 if (!BINARY_CLASS_P (t))
1036 return NULL;
1038 switch (TREE_OPERAND_LENGTH (t))
1040 case 1:
1041 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1042 case 2:
1043 return strips_small_constant (TREE_OPERAND (t, 0),
1044 TREE_OPERAND (t, 1));
1045 default:
1046 return NULL;
1050 /* Check the compare STMT in LOOP. If it compares an induction
1051 variable to a loop invariant, return true, and save
1052 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1053 Otherwise return false and set LOOP_INVAIANT to NULL. */
1055 static bool
1056 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1057 tree *loop_invariant,
1058 enum tree_code *compare_code,
1059 tree *loop_step,
1060 tree *loop_iv_base)
1062 tree op0, op1, bound, base;
1063 affine_iv iv0, iv1;
1064 enum tree_code code;
1065 tree step;
1067 code = gimple_cond_code (stmt);
1068 *loop_invariant = NULL;
1070 switch (code)
1072 case GT_EXPR:
1073 case GE_EXPR:
1074 case NE_EXPR:
1075 case LT_EXPR:
1076 case LE_EXPR:
1077 case EQ_EXPR:
1078 break;
1080 default:
1081 return false;
1084 op0 = gimple_cond_lhs (stmt);
1085 op1 = gimple_cond_rhs (stmt);
1087 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1088 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1089 return false;
1090 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1091 return false;
1092 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1093 return false;
1094 if (TREE_CODE (iv0.step) != INTEGER_CST
1095 || TREE_CODE (iv1.step) != INTEGER_CST)
1096 return false;
1097 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1098 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1099 return false;
1101 if (integer_zerop (iv0.step))
1103 if (code != NE_EXPR && code != EQ_EXPR)
1104 code = invert_tree_comparison (code, false);
1105 bound = iv0.base;
1106 base = iv1.base;
1107 if (tree_fits_shwi_p (iv1.step))
1108 step = iv1.step;
1109 else
1110 return false;
1112 else
1114 bound = iv1.base;
1115 base = iv0.base;
1116 if (tree_fits_shwi_p (iv0.step))
1117 step = iv0.step;
1118 else
1119 return false;
1122 if (TREE_CODE (bound) != INTEGER_CST)
1123 bound = get_base_value (bound);
1124 if (!bound)
1125 return false;
1126 if (TREE_CODE (base) != INTEGER_CST)
1127 base = get_base_value (base);
1128 if (!base)
1129 return false;
1131 *loop_invariant = bound;
1132 *compare_code = code;
1133 *loop_step = step;
1134 *loop_iv_base = base;
1135 return true;
1138 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1140 static bool
1141 expr_coherent_p (tree t1, tree t2)
1143 gimple stmt;
1144 tree ssa_name_1 = NULL;
1145 tree ssa_name_2 = NULL;
1147 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1148 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1150 if (t1 == t2)
1151 return true;
1153 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1154 return true;
1155 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1156 return false;
1158 /* Check to see if t1 is expressed/defined with t2. */
1159 stmt = SSA_NAME_DEF_STMT (t1);
1160 gcc_assert (stmt != NULL);
1161 if (is_gimple_assign (stmt))
1163 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1164 if (ssa_name_1 && ssa_name_1 == t2)
1165 return true;
1168 /* Check to see if t2 is expressed/defined with t1. */
1169 stmt = SSA_NAME_DEF_STMT (t2);
1170 gcc_assert (stmt != NULL);
1171 if (is_gimple_assign (stmt))
1173 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1174 if (ssa_name_2 && ssa_name_2 == t1)
1175 return true;
1178 /* Compare if t1 and t2's def_stmts are identical. */
1179 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1180 return true;
1181 else
1182 return false;
1185 /* Predict branch probability of BB when BB contains a branch that compares
1186 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1187 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1189 E.g.
1190 for (int i = 0; i < bound; i++) {
1191 if (i < bound - 2)
1192 computation_1();
1193 else
1194 computation_2();
1197 In this loop, we will predict the branch inside the loop to be taken. */
1199 static void
1200 predict_iv_comparison (struct loop *loop, basic_block bb,
1201 tree loop_bound_var,
1202 tree loop_iv_base_var,
1203 enum tree_code loop_bound_code,
1204 int loop_bound_step)
1206 gimple stmt;
1207 tree compare_var, compare_base;
1208 enum tree_code compare_code;
1209 tree compare_step_var;
1210 edge then_edge;
1211 edge_iterator ei;
1213 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1214 || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1215 || predicted_by_p (bb, PRED_LOOP_EXIT))
1216 return;
1218 stmt = last_stmt (bb);
1219 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1220 return;
1221 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1222 loop, &compare_var,
1223 &compare_code,
1224 &compare_step_var,
1225 &compare_base))
1226 return;
1228 /* Find the taken edge. */
1229 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1230 if (then_edge->flags & EDGE_TRUE_VALUE)
1231 break;
1233 /* When comparing an IV to a loop invariant, NE is more likely to be
1234 taken while EQ is more likely to be not-taken. */
1235 if (compare_code == NE_EXPR)
1237 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1238 return;
1240 else if (compare_code == EQ_EXPR)
1242 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1243 return;
1246 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1247 return;
1249 /* If loop bound, base and compare bound are all constants, we can
1250 calculate the probability directly. */
1251 if (tree_fits_shwi_p (loop_bound_var)
1252 && tree_fits_shwi_p (compare_var)
1253 && tree_fits_shwi_p (compare_base))
1255 int probability;
1256 bool overflow, overall_overflow = false;
1257 widest_int compare_count, tem;
1259 /* (loop_bound - base) / compare_step */
1260 tem = wi::sub (wi::to_widest (loop_bound_var),
1261 wi::to_widest (compare_base), SIGNED, &overflow);
1262 overall_overflow |= overflow;
1263 widest_int loop_count = wi::div_trunc (tem,
1264 wi::to_widest (compare_step_var),
1265 SIGNED, &overflow);
1266 overall_overflow |= overflow;
1268 if (!wi::neg_p (wi::to_widest (compare_step_var))
1269 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1271 /* (loop_bound - compare_bound) / compare_step */
1272 tem = wi::sub (wi::to_widest (loop_bound_var),
1273 wi::to_widest (compare_var), SIGNED, &overflow);
1274 overall_overflow |= overflow;
1275 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1276 SIGNED, &overflow);
1277 overall_overflow |= overflow;
1279 else
1281 /* (compare_bound - base) / compare_step */
1282 tem = wi::sub (wi::to_widest (compare_var),
1283 wi::to_widest (compare_base), SIGNED, &overflow);
1284 overall_overflow |= overflow;
1285 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1286 SIGNED, &overflow);
1287 overall_overflow |= overflow;
1289 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1290 ++compare_count;
1291 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1292 ++loop_count;
1293 if (wi::neg_p (compare_count))
1294 compare_count = 0;
1295 if (wi::neg_p (loop_count))
1296 loop_count = 0;
1297 if (loop_count == 0)
1298 probability = 0;
1299 else if (wi::cmps (compare_count, loop_count) == 1)
1300 probability = REG_BR_PROB_BASE;
1301 else
1303 tem = compare_count * REG_BR_PROB_BASE;
1304 tem = wi::udiv_trunc (tem, loop_count);
1305 probability = tem.to_uhwi ();
1308 if (!overall_overflow)
1309 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1311 return;
1314 if (expr_coherent_p (loop_bound_var, compare_var))
1316 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1317 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1318 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1319 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1320 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1321 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1322 else if (loop_bound_code == NE_EXPR)
1324 /* If the loop backedge condition is "(i != bound)", we do
1325 the comparison based on the step of IV:
1326 * step < 0 : backedge condition is like (i > bound)
1327 * step > 0 : backedge condition is like (i < bound) */
1328 gcc_assert (loop_bound_step != 0);
1329 if (loop_bound_step > 0
1330 && (compare_code == LT_EXPR
1331 || compare_code == LE_EXPR))
1332 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1333 else if (loop_bound_step < 0
1334 && (compare_code == GT_EXPR
1335 || compare_code == GE_EXPR))
1336 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1337 else
1338 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1340 else
1341 /* The branch is predicted not-taken if loop_bound_code is
1342 opposite with compare_code. */
1343 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1345 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1347 /* For cases like:
1348 for (i = s; i < h; i++)
1349 if (i > s + 2) ....
1350 The branch should be predicted taken. */
1351 if (loop_bound_step > 0
1352 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1353 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1354 else if (loop_bound_step < 0
1355 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1356 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1357 else
1358 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1362 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1363 exits are resulted from short-circuit conditions that will generate an
1364 if_tmp. E.g.:
1366 if (foo() || global > 10)
1367 break;
1369 This will be translated into:
1371 BB3:
1372 loop header...
1373 BB4:
1374 if foo() goto BB6 else goto BB5
1375 BB5:
1376 if global > 10 goto BB6 else goto BB7
1377 BB6:
1378 goto BB7
1379 BB7:
1380 iftmp = (PHI 0(BB5), 1(BB6))
1381 if iftmp == 1 goto BB8 else goto BB3
1382 BB8:
1383 outside of the loop...
1385 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1386 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1387 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1388 exits to predict them using PRED_LOOP_EXIT. */
1390 static void
1391 predict_extra_loop_exits (edge exit_edge)
1393 unsigned i;
1394 bool check_value_one;
1395 gimple lhs_def_stmt;
1396 gphi *phi_stmt;
1397 tree cmp_rhs, cmp_lhs;
1398 gimple last;
1399 gcond *cmp_stmt;
1401 last = last_stmt (exit_edge->src);
1402 if (!last)
1403 return;
1404 cmp_stmt = dyn_cast <gcond *> (last);
1405 if (!cmp_stmt)
1406 return;
1408 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1409 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1410 if (!TREE_CONSTANT (cmp_rhs)
1411 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1412 return;
1413 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1414 return;
1416 /* If check_value_one is true, only the phi_args with value '1' will lead
1417 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1418 loop exit. */
1419 check_value_one = (((integer_onep (cmp_rhs))
1420 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1421 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1423 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1424 if (!lhs_def_stmt)
1425 return;
1427 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1428 if (!phi_stmt)
1429 return;
1431 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1433 edge e1;
1434 edge_iterator ei;
1435 tree val = gimple_phi_arg_def (phi_stmt, i);
1436 edge e = gimple_phi_arg_edge (phi_stmt, i);
1438 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1439 continue;
1440 if ((check_value_one ^ integer_onep (val)) == 1)
1441 continue;
1442 if (EDGE_COUNT (e->src->succs) != 1)
1444 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1445 continue;
1448 FOR_EACH_EDGE (e1, ei, e->src->preds)
1449 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1453 /* Predict edge probabilities by exploiting loop structure. */
1455 static void
1456 predict_loops (void)
1458 struct loop *loop;
1460 /* Try to predict out blocks in a loop that are not part of a
1461 natural loop. */
1462 FOR_EACH_LOOP (loop, 0)
1464 basic_block bb, *bbs;
1465 unsigned j, n_exits;
1466 vec<edge> exits;
1467 struct tree_niter_desc niter_desc;
1468 edge ex;
1469 struct nb_iter_bound *nb_iter;
1470 enum tree_code loop_bound_code = ERROR_MARK;
1471 tree loop_bound_step = NULL;
1472 tree loop_bound_var = NULL;
1473 tree loop_iv_base = NULL;
1474 gcond *stmt = NULL;
1476 exits = get_loop_exit_edges (loop);
1477 n_exits = exits.length ();
1478 if (!n_exits)
1480 exits.release ();
1481 continue;
1484 FOR_EACH_VEC_ELT (exits, j, ex)
1486 tree niter = NULL;
1487 HOST_WIDE_INT nitercst;
1488 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1489 int probability;
1490 enum br_predictor predictor;
1492 predict_extra_loop_exits (ex);
1494 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1495 niter = niter_desc.niter;
1496 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1497 niter = loop_niter_by_eval (loop, ex);
1499 if (TREE_CODE (niter) == INTEGER_CST)
1501 if (tree_fits_uhwi_p (niter)
1502 && max
1503 && compare_tree_int (niter, max - 1) == -1)
1504 nitercst = tree_to_uhwi (niter) + 1;
1505 else
1506 nitercst = max;
1507 predictor = PRED_LOOP_ITERATIONS;
1509 /* If we have just one exit and we can derive some information about
1510 the number of iterations of the loop from the statements inside
1511 the loop, use it to predict this exit. */
1512 else if (n_exits == 1)
1514 nitercst = estimated_stmt_executions_int (loop);
1515 if (nitercst < 0)
1516 continue;
1517 if (nitercst > max)
1518 nitercst = max;
1520 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1522 else
1523 continue;
1525 /* If the prediction for number of iterations is zero, do not
1526 predict the exit edges. */
1527 if (nitercst == 0)
1528 continue;
1530 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1531 predict_edge (ex, predictor, probability);
1533 exits.release ();
1535 /* Find information about loop bound variables. */
1536 for (nb_iter = loop->bounds; nb_iter;
1537 nb_iter = nb_iter->next)
1538 if (nb_iter->stmt
1539 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1541 stmt = as_a <gcond *> (nb_iter->stmt);
1542 break;
1544 if (!stmt && last_stmt (loop->header)
1545 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1546 stmt = as_a <gcond *> (last_stmt (loop->header));
1547 if (stmt)
1548 is_comparison_with_loop_invariant_p (stmt, loop,
1549 &loop_bound_var,
1550 &loop_bound_code,
1551 &loop_bound_step,
1552 &loop_iv_base);
1554 bbs = get_loop_body (loop);
1556 for (j = 0; j < loop->num_nodes; j++)
1558 int header_found = 0;
1559 edge e;
1560 edge_iterator ei;
1562 bb = bbs[j];
1564 /* Bypass loop heuristics on continue statement. These
1565 statements construct loops via "non-loop" constructs
1566 in the source language and are better to be handled
1567 separately. */
1568 if (predicted_by_p (bb, PRED_CONTINUE))
1569 continue;
1571 /* Loop branch heuristics - predict an edge back to a
1572 loop's head as taken. */
1573 if (bb == loop->latch)
1575 e = find_edge (loop->latch, loop->header);
1576 if (e)
1578 header_found = 1;
1579 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1583 /* Loop exit heuristics - predict an edge exiting the loop if the
1584 conditional has no loop header successors as not taken. */
1585 if (!header_found
1586 /* If we already used more reliable loop exit predictors, do not
1587 bother with PRED_LOOP_EXIT. */
1588 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1589 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1591 /* For loop with many exits we don't want to predict all exits
1592 with the pretty large probability, because if all exits are
1593 considered in row, the loop would be predicted to iterate
1594 almost never. The code to divide probability by number of
1595 exits is very rough. It should compute the number of exits
1596 taken in each patch through function (not the overall number
1597 of exits that might be a lot higher for loops with wide switch
1598 statements in them) and compute n-th square root.
1600 We limit the minimal probability by 2% to avoid
1601 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1602 as this was causing regression in perl benchmark containing such
1603 a wide loop. */
1605 int probability = ((REG_BR_PROB_BASE
1606 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1607 / n_exits);
1608 if (probability < HITRATE (2))
1609 probability = HITRATE (2);
1610 FOR_EACH_EDGE (e, ei, bb->succs)
1611 if (e->dest->index < NUM_FIXED_BLOCKS
1612 || !flow_bb_inside_loop_p (loop, e->dest))
1613 predict_edge (e, PRED_LOOP_EXIT, probability);
1615 if (loop_bound_var)
1616 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1617 loop_bound_code,
1618 tree_to_shwi (loop_bound_step));
1621 /* Free basic blocks from get_loop_body. */
1622 free (bbs);
1626 /* Attempt to predict probabilities of BB outgoing edges using local
1627 properties. */
1628 static void
1629 bb_estimate_probability_locally (basic_block bb)
1631 rtx_insn *last_insn = BB_END (bb);
1632 rtx cond;
1634 if (! can_predict_insn_p (last_insn))
1635 return;
1636 cond = get_condition (last_insn, NULL, false, false);
1637 if (! cond)
1638 return;
1640 /* Try "pointer heuristic."
1641 A comparison ptr == 0 is predicted as false.
1642 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1643 if (COMPARISON_P (cond)
1644 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1645 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1647 if (GET_CODE (cond) == EQ)
1648 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1649 else if (GET_CODE (cond) == NE)
1650 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1652 else
1654 /* Try "opcode heuristic."
1655 EQ tests are usually false and NE tests are usually true. Also,
1656 most quantities are positive, so we can make the appropriate guesses
1657 about signed comparisons against zero. */
1658 switch (GET_CODE (cond))
1660 case CONST_INT:
1661 /* Unconditional branch. */
1662 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1663 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1664 break;
1666 case EQ:
1667 case UNEQ:
1668 /* Floating point comparisons appears to behave in a very
1669 unpredictable way because of special role of = tests in
1670 FP code. */
1671 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1673 /* Comparisons with 0 are often used for booleans and there is
1674 nothing useful to predict about them. */
1675 else if (XEXP (cond, 1) == const0_rtx
1676 || XEXP (cond, 0) == const0_rtx)
1678 else
1679 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1680 break;
1682 case NE:
1683 case LTGT:
1684 /* Floating point comparisons appears to behave in a very
1685 unpredictable way because of special role of = tests in
1686 FP code. */
1687 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1689 /* Comparisons with 0 are often used for booleans and there is
1690 nothing useful to predict about them. */
1691 else if (XEXP (cond, 1) == const0_rtx
1692 || XEXP (cond, 0) == const0_rtx)
1694 else
1695 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1696 break;
1698 case ORDERED:
1699 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1700 break;
1702 case UNORDERED:
1703 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1704 break;
1706 case LE:
1707 case LT:
1708 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1709 || XEXP (cond, 1) == constm1_rtx)
1710 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1711 break;
1713 case GE:
1714 case GT:
1715 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1716 || XEXP (cond, 1) == constm1_rtx)
1717 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1718 break;
1720 default:
1721 break;
1725 /* Set edge->probability for each successor edge of BB. */
1726 void
1727 guess_outgoing_edge_probabilities (basic_block bb)
1729 bb_estimate_probability_locally (bb);
1730 combine_predictions_for_insn (BB_END (bb), bb);
1733 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1735 /* Helper function for expr_expected_value. */
1737 static tree
1738 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1739 tree op1, bitmap visited, enum br_predictor *predictor)
1741 gimple def;
1743 if (predictor)
1744 *predictor = PRED_UNCONDITIONAL;
1746 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1748 if (TREE_CONSTANT (op0))
1749 return op0;
1751 if (code != SSA_NAME)
1752 return NULL_TREE;
1754 def = SSA_NAME_DEF_STMT (op0);
1756 /* If we were already here, break the infinite cycle. */
1757 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1758 return NULL;
1760 if (gimple_code (def) == GIMPLE_PHI)
1762 /* All the arguments of the PHI node must have the same constant
1763 length. */
1764 int i, n = gimple_phi_num_args (def);
1765 tree val = NULL, new_val;
1767 for (i = 0; i < n; i++)
1769 tree arg = PHI_ARG_DEF (def, i);
1770 enum br_predictor predictor2;
1772 /* If this PHI has itself as an argument, we cannot
1773 determine the string length of this argument. However,
1774 if we can find an expected constant value for the other
1775 PHI args then we can still be sure that this is
1776 likely a constant. So be optimistic and just
1777 continue with the next argument. */
1778 if (arg == PHI_RESULT (def))
1779 continue;
1781 new_val = expr_expected_value (arg, visited, &predictor2);
1783 /* It is difficult to combine value predictors. Simply assume
1784 that later predictor is weaker and take its prediction. */
1785 if (predictor && *predictor < predictor2)
1786 *predictor = predictor2;
1787 if (!new_val)
1788 return NULL;
1789 if (!val)
1790 val = new_val;
1791 else if (!operand_equal_p (val, new_val, false))
1792 return NULL;
1794 return val;
1796 if (is_gimple_assign (def))
1798 if (gimple_assign_lhs (def) != op0)
1799 return NULL;
1801 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1802 gimple_assign_rhs1 (def),
1803 gimple_assign_rhs_code (def),
1804 gimple_assign_rhs2 (def),
1805 visited, predictor);
1808 if (is_gimple_call (def))
1810 tree decl = gimple_call_fndecl (def);
1811 if (!decl)
1813 if (gimple_call_internal_p (def)
1814 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1816 gcc_assert (gimple_call_num_args (def) == 3);
1817 tree val = gimple_call_arg (def, 0);
1818 if (TREE_CONSTANT (val))
1819 return val;
1820 if (predictor)
1822 tree val2 = gimple_call_arg (def, 2);
1823 gcc_assert (TREE_CODE (val2) == INTEGER_CST
1824 && tree_fits_uhwi_p (val2)
1825 && tree_to_uhwi (val2) < END_PREDICTORS);
1826 *predictor = (enum br_predictor) tree_to_uhwi (val2);
1828 return gimple_call_arg (def, 1);
1830 return NULL;
1832 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1833 switch (DECL_FUNCTION_CODE (decl))
1835 case BUILT_IN_EXPECT:
1837 tree val;
1838 if (gimple_call_num_args (def) != 2)
1839 return NULL;
1840 val = gimple_call_arg (def, 0);
1841 if (TREE_CONSTANT (val))
1842 return val;
1843 if (predictor)
1844 *predictor = PRED_BUILTIN_EXPECT;
1845 return gimple_call_arg (def, 1);
1848 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1849 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1850 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1851 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1852 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1853 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1854 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1855 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1856 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1857 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1858 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1859 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1860 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1861 /* Assume that any given atomic operation has low contention,
1862 and thus the compare-and-swap operation succeeds. */
1863 if (predictor)
1864 *predictor = PRED_COMPARE_AND_SWAP;
1865 return boolean_true_node;
1866 default:
1867 break;
1871 return NULL;
1874 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1876 tree res;
1877 enum br_predictor predictor2;
1878 op0 = expr_expected_value (op0, visited, predictor);
1879 if (!op0)
1880 return NULL;
1881 op1 = expr_expected_value (op1, visited, &predictor2);
1882 if (predictor && *predictor < predictor2)
1883 *predictor = predictor2;
1884 if (!op1)
1885 return NULL;
1886 res = fold_build2 (code, type, op0, op1);
1887 if (TREE_CONSTANT (res))
1888 return res;
1889 return NULL;
1891 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1893 tree res;
1894 op0 = expr_expected_value (op0, visited, predictor);
1895 if (!op0)
1896 return NULL;
1897 res = fold_build1 (code, type, op0);
1898 if (TREE_CONSTANT (res))
1899 return res;
1900 return NULL;
1902 return NULL;
1905 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1906 The function is used by builtin_expect branch predictor so the evidence
1907 must come from this construct and additional possible constant folding.
1909 We may want to implement more involved value guess (such as value range
1910 propagation based prediction), but such tricks shall go to new
1911 implementation. */
1913 static tree
1914 expr_expected_value (tree expr, bitmap visited,
1915 enum br_predictor *predictor)
1917 enum tree_code code;
1918 tree op0, op1;
1920 if (TREE_CONSTANT (expr))
1922 if (predictor)
1923 *predictor = PRED_UNCONDITIONAL;
1924 return expr;
1927 extract_ops_from_tree (expr, &code, &op0, &op1);
1928 return expr_expected_value_1 (TREE_TYPE (expr),
1929 op0, code, op1, visited, predictor);
1932 /* Predict using opcode of the last statement in basic block. */
1933 static void
1934 tree_predict_by_opcode (basic_block bb)
1936 gimple stmt = last_stmt (bb);
1937 edge then_edge;
1938 tree op0, op1;
1939 tree type;
1940 tree val;
1941 enum tree_code cmp;
1942 bitmap visited;
1943 edge_iterator ei;
1944 enum br_predictor predictor;
1946 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1947 return;
1948 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1949 if (then_edge->flags & EDGE_TRUE_VALUE)
1950 break;
1951 op0 = gimple_cond_lhs (stmt);
1952 op1 = gimple_cond_rhs (stmt);
1953 cmp = gimple_cond_code (stmt);
1954 type = TREE_TYPE (op0);
1955 visited = BITMAP_ALLOC (NULL);
1956 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1957 &predictor);
1958 BITMAP_FREE (visited);
1959 if (val && TREE_CODE (val) == INTEGER_CST)
1961 if (predictor == PRED_BUILTIN_EXPECT)
1963 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1965 gcc_assert (percent >= 0 && percent <= 100);
1966 if (integer_zerop (val))
1967 percent = 100 - percent;
1968 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1970 else
1971 predict_edge (then_edge, predictor,
1972 integer_zerop (val) ? NOT_TAKEN : TAKEN);
1974 /* Try "pointer heuristic."
1975 A comparison ptr == 0 is predicted as false.
1976 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1977 if (POINTER_TYPE_P (type))
1979 if (cmp == EQ_EXPR)
1980 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1981 else if (cmp == NE_EXPR)
1982 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1984 else
1986 /* Try "opcode heuristic."
1987 EQ tests are usually false and NE tests are usually true. Also,
1988 most quantities are positive, so we can make the appropriate guesses
1989 about signed comparisons against zero. */
1990 switch (cmp)
1992 case EQ_EXPR:
1993 case UNEQ_EXPR:
1994 /* Floating point comparisons appears to behave in a very
1995 unpredictable way because of special role of = tests in
1996 FP code. */
1997 if (FLOAT_TYPE_P (type))
1999 /* Comparisons with 0 are often used for booleans and there is
2000 nothing useful to predict about them. */
2001 else if (integer_zerop (op0) || integer_zerop (op1))
2003 else
2004 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2005 break;
2007 case NE_EXPR:
2008 case LTGT_EXPR:
2009 /* Floating point comparisons appears to behave in a very
2010 unpredictable way because of special role of = tests in
2011 FP code. */
2012 if (FLOAT_TYPE_P (type))
2014 /* Comparisons with 0 are often used for booleans and there is
2015 nothing useful to predict about them. */
2016 else if (integer_zerop (op0)
2017 || integer_zerop (op1))
2019 else
2020 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2021 break;
2023 case ORDERED_EXPR:
2024 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2025 break;
2027 case UNORDERED_EXPR:
2028 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2029 break;
2031 case LE_EXPR:
2032 case LT_EXPR:
2033 if (integer_zerop (op1)
2034 || integer_onep (op1)
2035 || integer_all_onesp (op1)
2036 || real_zerop (op1)
2037 || real_onep (op1)
2038 || real_minus_onep (op1))
2039 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2040 break;
2042 case GE_EXPR:
2043 case GT_EXPR:
2044 if (integer_zerop (op1)
2045 || integer_onep (op1)
2046 || integer_all_onesp (op1)
2047 || real_zerop (op1)
2048 || real_onep (op1)
2049 || real_minus_onep (op1))
2050 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2051 break;
2053 default:
2054 break;
2058 /* Try to guess whether the value of return means error code. */
2060 static enum br_predictor
2061 return_prediction (tree val, enum prediction *prediction)
2063 /* VOID. */
2064 if (!val)
2065 return PRED_NO_PREDICTION;
2066 /* Different heuristics for pointers and scalars. */
2067 if (POINTER_TYPE_P (TREE_TYPE (val)))
2069 /* NULL is usually not returned. */
2070 if (integer_zerop (val))
2072 *prediction = NOT_TAKEN;
2073 return PRED_NULL_RETURN;
2076 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2078 /* Negative return values are often used to indicate
2079 errors. */
2080 if (TREE_CODE (val) == INTEGER_CST
2081 && tree_int_cst_sgn (val) < 0)
2083 *prediction = NOT_TAKEN;
2084 return PRED_NEGATIVE_RETURN;
2086 /* Constant return values seems to be commonly taken.
2087 Zero/one often represent booleans so exclude them from the
2088 heuristics. */
2089 if (TREE_CONSTANT (val)
2090 && (!integer_zerop (val) && !integer_onep (val)))
2092 *prediction = TAKEN;
2093 return PRED_CONST_RETURN;
2096 return PRED_NO_PREDICTION;
2099 /* Find the basic block with return expression and look up for possible
2100 return value trying to apply RETURN_PREDICTION heuristics. */
2101 static void
2102 apply_return_prediction (void)
2104 greturn *return_stmt = NULL;
2105 tree return_val;
2106 edge e;
2107 gphi *phi;
2108 int phi_num_args, i;
2109 enum br_predictor pred;
2110 enum prediction direction;
2111 edge_iterator ei;
2113 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2115 gimple last = last_stmt (e->src);
2116 if (last
2117 && gimple_code (last) == GIMPLE_RETURN)
2119 return_stmt = as_a <greturn *> (last);
2120 break;
2123 if (!e)
2124 return;
2125 return_val = gimple_return_retval (return_stmt);
2126 if (!return_val)
2127 return;
2128 if (TREE_CODE (return_val) != SSA_NAME
2129 || !SSA_NAME_DEF_STMT (return_val)
2130 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2131 return;
2132 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2133 phi_num_args = gimple_phi_num_args (phi);
2134 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2136 /* Avoid the degenerate case where all return values form the function
2137 belongs to same category (ie they are all positive constants)
2138 so we can hardly say something about them. */
2139 for (i = 1; i < phi_num_args; i++)
2140 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2141 break;
2142 if (i != phi_num_args)
2143 for (i = 0; i < phi_num_args; i++)
2145 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2146 if (pred != PRED_NO_PREDICTION)
2147 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2148 direction);
2152 /* Look for basic block that contains unlikely to happen events
2153 (such as noreturn calls) and mark all paths leading to execution
2154 of this basic blocks as unlikely. */
2156 static void
2157 tree_bb_level_predictions (void)
2159 basic_block bb;
2160 bool has_return_edges = false;
2161 edge e;
2162 edge_iterator ei;
2164 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2165 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2167 has_return_edges = true;
2168 break;
2171 apply_return_prediction ();
2173 FOR_EACH_BB_FN (bb, cfun)
2175 gimple_stmt_iterator gsi;
2177 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2179 gimple stmt = gsi_stmt (gsi);
2180 tree decl;
2182 if (is_gimple_call (stmt))
2184 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2185 && has_return_edges)
2186 predict_paths_leading_to (bb, PRED_NORETURN,
2187 NOT_TAKEN);
2188 decl = gimple_call_fndecl (stmt);
2189 if (decl
2190 && lookup_attribute ("cold",
2191 DECL_ATTRIBUTES (decl)))
2192 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2193 NOT_TAKEN);
2195 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2197 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2198 gimple_predict_outcome (stmt));
2199 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2200 hints to callers. */
2206 #ifdef ENABLE_CHECKING
2208 /* Callback for hash_map::traverse, asserts that the pointer map is
2209 empty. */
2211 bool
2212 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2213 void *)
2215 gcc_assert (!value);
2216 return false;
2218 #endif
2220 /* Predict branch probabilities and estimate profile for basic block BB. */
2222 static void
2223 tree_estimate_probability_bb (basic_block bb)
2225 edge e;
2226 edge_iterator ei;
2227 gimple last;
2229 FOR_EACH_EDGE (e, ei, bb->succs)
2231 /* Predict edges to user labels with attributes. */
2232 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2234 gimple_stmt_iterator gi;
2235 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2237 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2238 tree decl;
2240 if (!label_stmt)
2241 break;
2242 decl = gimple_label_label (label_stmt);
2243 if (DECL_ARTIFICIAL (decl))
2244 continue;
2246 /* Finally, we have a user-defined label. */
2247 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2248 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2249 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2250 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2254 /* Predict early returns to be probable, as we've already taken
2255 care for error returns and other cases are often used for
2256 fast paths through function.
2258 Since we've already removed the return statements, we are
2259 looking for CFG like:
2261 if (conditional)
2264 goto return_block
2266 some other blocks
2267 return_block:
2268 return_stmt. */
2269 if (e->dest != bb->next_bb
2270 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2271 && single_succ_p (e->dest)
2272 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2273 && (last = last_stmt (e->dest)) != NULL
2274 && gimple_code (last) == GIMPLE_RETURN)
2276 edge e1;
2277 edge_iterator ei1;
2279 if (single_succ_p (bb))
2281 FOR_EACH_EDGE (e1, ei1, bb->preds)
2282 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2283 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2284 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2285 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2287 else
2288 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2289 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2290 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2291 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2294 /* Look for block we are guarding (ie we dominate it,
2295 but it doesn't postdominate us). */
2296 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2297 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2298 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2300 gimple_stmt_iterator bi;
2302 /* The call heuristic claims that a guarded function call
2303 is improbable. This is because such calls are often used
2304 to signal exceptional situations such as printing error
2305 messages. */
2306 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2307 gsi_next (&bi))
2309 gimple stmt = gsi_stmt (bi);
2310 if (is_gimple_call (stmt)
2311 /* Constant and pure calls are hardly used to signalize
2312 something exceptional. */
2313 && gimple_has_side_effects (stmt))
2315 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2316 break;
2321 tree_predict_by_opcode (bb);
2324 /* Predict branch probabilities and estimate profile of the tree CFG.
2325 This function can be called from the loop optimizers to recompute
2326 the profile information. */
2328 void
2329 tree_estimate_probability (void)
2331 basic_block bb;
2333 add_noreturn_fake_exit_edges ();
2334 connect_infinite_loops_to_exit ();
2335 /* We use loop_niter_by_eval, which requires that the loops have
2336 preheaders. */
2337 create_preheaders (CP_SIMPLE_PREHEADERS);
2338 calculate_dominance_info (CDI_POST_DOMINATORS);
2340 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2341 tree_bb_level_predictions ();
2342 record_loop_exits ();
2344 if (number_of_loops (cfun) > 1)
2345 predict_loops ();
2347 FOR_EACH_BB_FN (bb, cfun)
2348 tree_estimate_probability_bb (bb);
2350 FOR_EACH_BB_FN (bb, cfun)
2351 combine_predictions_for_bb (bb);
2353 #ifdef ENABLE_CHECKING
2354 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2355 #endif
2356 delete bb_predictions;
2357 bb_predictions = NULL;
2359 estimate_bb_frequencies (false);
2360 free_dominance_info (CDI_POST_DOMINATORS);
2361 remove_fake_exit_edges ();
2364 /* Predict edges to successors of CUR whose sources are not postdominated by
2365 BB by PRED and recurse to all postdominators. */
2367 static void
2368 predict_paths_for_bb (basic_block cur, basic_block bb,
2369 enum br_predictor pred,
2370 enum prediction taken,
2371 bitmap visited)
2373 edge e;
2374 edge_iterator ei;
2375 basic_block son;
2377 /* We are looking for all edges forming edge cut induced by
2378 set of all blocks postdominated by BB. */
2379 FOR_EACH_EDGE (e, ei, cur->preds)
2380 if (e->src->index >= NUM_FIXED_BLOCKS
2381 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2383 edge e2;
2384 edge_iterator ei2;
2385 bool found = false;
2387 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2388 if (e->flags & (EDGE_EH | EDGE_FAKE))
2389 continue;
2390 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2392 /* See if there is an edge from e->src that is not abnormal
2393 and does not lead to BB. */
2394 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2395 if (e2 != e
2396 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2397 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2399 found = true;
2400 break;
2403 /* If there is non-abnormal path leaving e->src, predict edge
2404 using predictor. Otherwise we need to look for paths
2405 leading to e->src.
2407 The second may lead to infinite loop in the case we are predicitng
2408 regions that are only reachable by abnormal edges. We simply
2409 prevent visiting given BB twice. */
2410 if (found)
2411 predict_edge_def (e, pred, taken);
2412 else if (bitmap_set_bit (visited, e->src->index))
2413 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2415 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2416 son;
2417 son = next_dom_son (CDI_POST_DOMINATORS, son))
2418 predict_paths_for_bb (son, bb, pred, taken, visited);
2421 /* Sets branch probabilities according to PREDiction and
2422 FLAGS. */
2424 static void
2425 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2426 enum prediction taken)
2428 bitmap visited = BITMAP_ALLOC (NULL);
2429 predict_paths_for_bb (bb, bb, pred, taken, visited);
2430 BITMAP_FREE (visited);
2433 /* Like predict_paths_leading_to but take edge instead of basic block. */
2435 static void
2436 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2437 enum prediction taken)
2439 bool has_nonloop_edge = false;
2440 edge_iterator ei;
2441 edge e2;
2443 basic_block bb = e->src;
2444 FOR_EACH_EDGE (e2, ei, bb->succs)
2445 if (e2->dest != e->src && e2->dest != e->dest
2446 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2447 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2449 has_nonloop_edge = true;
2450 break;
2452 if (!has_nonloop_edge)
2454 bitmap visited = BITMAP_ALLOC (NULL);
2455 predict_paths_for_bb (bb, bb, pred, taken, visited);
2456 BITMAP_FREE (visited);
2458 else
2459 predict_edge_def (e, pred, taken);
2462 /* This is used to carry information about basic blocks. It is
2463 attached to the AUX field of the standard CFG block. */
2465 struct block_info
2467 /* Estimated frequency of execution of basic_block. */
2468 sreal frequency;
2470 /* To keep queue of basic blocks to process. */
2471 basic_block next;
2473 /* Number of predecessors we need to visit first. */
2474 int npredecessors;
2477 /* Similar information for edges. */
2478 struct edge_prob_info
2480 /* In case edge is a loopback edge, the probability edge will be reached
2481 in case header is. Estimated number of iterations of the loop can be
2482 then computed as 1 / (1 - back_edge_prob). */
2483 sreal back_edge_prob;
2484 /* True if the edge is a loopback edge in the natural loop. */
2485 unsigned int back_edge:1;
2488 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2489 #undef EDGE_INFO
2490 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2492 /* Helper function for estimate_bb_frequencies.
2493 Propagate the frequencies in blocks marked in
2494 TOVISIT, starting in HEAD. */
2496 static void
2497 propagate_freq (basic_block head, bitmap tovisit)
2499 basic_block bb;
2500 basic_block last;
2501 unsigned i;
2502 edge e;
2503 basic_block nextbb;
2504 bitmap_iterator bi;
2506 /* For each basic block we need to visit count number of his predecessors
2507 we need to visit first. */
2508 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2510 edge_iterator ei;
2511 int count = 0;
2513 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2515 FOR_EACH_EDGE (e, ei, bb->preds)
2517 bool visit = bitmap_bit_p (tovisit, e->src->index);
2519 if (visit && !(e->flags & EDGE_DFS_BACK))
2520 count++;
2521 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2522 fprintf (dump_file,
2523 "Irreducible region hit, ignoring edge to %i->%i\n",
2524 e->src->index, bb->index);
2526 BLOCK_INFO (bb)->npredecessors = count;
2527 /* When function never returns, we will never process exit block. */
2528 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2529 bb->count = bb->frequency = 0;
2532 BLOCK_INFO (head)->frequency = 1;
2533 last = head;
2534 for (bb = head; bb; bb = nextbb)
2536 edge_iterator ei;
2537 sreal cyclic_probability = 0;
2538 sreal frequency = 0;
2540 nextbb = BLOCK_INFO (bb)->next;
2541 BLOCK_INFO (bb)->next = NULL;
2543 /* Compute frequency of basic block. */
2544 if (bb != head)
2546 #ifdef ENABLE_CHECKING
2547 FOR_EACH_EDGE (e, ei, bb->preds)
2548 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2549 || (e->flags & EDGE_DFS_BACK));
2550 #endif
2552 FOR_EACH_EDGE (e, ei, bb->preds)
2553 if (EDGE_INFO (e)->back_edge)
2555 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2557 else if (!(e->flags & EDGE_DFS_BACK))
2559 /* frequency += (e->probability
2560 * BLOCK_INFO (e->src)->frequency /
2561 REG_BR_PROB_BASE); */
2563 sreal tmp = e->probability;
2564 tmp *= BLOCK_INFO (e->src)->frequency;
2565 tmp *= real_inv_br_prob_base;
2566 frequency += tmp;
2569 if (cyclic_probability == 0)
2571 BLOCK_INFO (bb)->frequency = frequency;
2573 else
2575 if (cyclic_probability > real_almost_one)
2576 cyclic_probability = real_almost_one;
2578 /* BLOCK_INFO (bb)->frequency = frequency
2579 / (1 - cyclic_probability) */
2581 cyclic_probability = sreal (1) - cyclic_probability;
2582 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2586 bitmap_clear_bit (tovisit, bb->index);
2588 e = find_edge (bb, head);
2589 if (e)
2591 /* EDGE_INFO (e)->back_edge_prob
2592 = ((e->probability * BLOCK_INFO (bb)->frequency)
2593 / REG_BR_PROB_BASE); */
2595 sreal tmp = e->probability;
2596 tmp *= BLOCK_INFO (bb)->frequency;
2597 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2600 /* Propagate to successor blocks. */
2601 FOR_EACH_EDGE (e, ei, bb->succs)
2602 if (!(e->flags & EDGE_DFS_BACK)
2603 && BLOCK_INFO (e->dest)->npredecessors)
2605 BLOCK_INFO (e->dest)->npredecessors--;
2606 if (!BLOCK_INFO (e->dest)->npredecessors)
2608 if (!nextbb)
2609 nextbb = e->dest;
2610 else
2611 BLOCK_INFO (last)->next = e->dest;
2613 last = e->dest;
2619 /* Estimate frequencies in loops at same nest level. */
2621 static void
2622 estimate_loops_at_level (struct loop *first_loop)
2624 struct loop *loop;
2626 for (loop = first_loop; loop; loop = loop->next)
2628 edge e;
2629 basic_block *bbs;
2630 unsigned i;
2631 bitmap tovisit = BITMAP_ALLOC (NULL);
2633 estimate_loops_at_level (loop->inner);
2635 /* Find current loop back edge and mark it. */
2636 e = loop_latch_edge (loop);
2637 EDGE_INFO (e)->back_edge = 1;
2639 bbs = get_loop_body (loop);
2640 for (i = 0; i < loop->num_nodes; i++)
2641 bitmap_set_bit (tovisit, bbs[i]->index);
2642 free (bbs);
2643 propagate_freq (loop->header, tovisit);
2644 BITMAP_FREE (tovisit);
2648 /* Propagates frequencies through structure of loops. */
2650 static void
2651 estimate_loops (void)
2653 bitmap tovisit = BITMAP_ALLOC (NULL);
2654 basic_block bb;
2656 /* Start by estimating the frequencies in the loops. */
2657 if (number_of_loops (cfun) > 1)
2658 estimate_loops_at_level (current_loops->tree_root->inner);
2660 /* Now propagate the frequencies through all the blocks. */
2661 FOR_ALL_BB_FN (bb, cfun)
2663 bitmap_set_bit (tovisit, bb->index);
2665 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2666 BITMAP_FREE (tovisit);
2669 /* Drop the profile for NODE to guessed, and update its frequency based on
2670 whether it is expected to be hot given the CALL_COUNT. */
2672 static void
2673 drop_profile (struct cgraph_node *node, gcov_type call_count)
2675 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2676 /* In the case where this was called by another function with a
2677 dropped profile, call_count will be 0. Since there are no
2678 non-zero call counts to this function, we don't know for sure
2679 whether it is hot, and therefore it will be marked normal below. */
2680 bool hot = maybe_hot_count_p (NULL, call_count);
2682 if (dump_file)
2683 fprintf (dump_file,
2684 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2685 node->name (), node->order,
2686 hot ? "Function is hot" : "Function is normal");
2687 /* We only expect to miss profiles for functions that are reached
2688 via non-zero call edges in cases where the function may have
2689 been linked from another module or library (COMDATs and extern
2690 templates). See the comments below for handle_missing_profiles.
2691 Also, only warn in cases where the missing counts exceed the
2692 number of training runs. In certain cases with an execv followed
2693 by a no-return call the profile for the no-return call is not
2694 dumped and there can be a mismatch. */
2695 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2696 && call_count > profile_info->runs)
2698 if (flag_profile_correction)
2700 if (dump_file)
2701 fprintf (dump_file,
2702 "Missing counts for called function %s/%i\n",
2703 node->name (), node->order);
2705 else
2706 warning (0, "Missing counts for called function %s/%i",
2707 node->name (), node->order);
2710 profile_status_for_fn (fn)
2711 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2712 node->frequency
2713 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2716 /* In the case of COMDAT routines, multiple object files will contain the same
2717 function and the linker will select one for the binary. In that case
2718 all the other copies from the profile instrument binary will be missing
2719 profile counts. Look for cases where this happened, due to non-zero
2720 call counts going to 0-count functions, and drop the profile to guessed
2721 so that we can use the estimated probabilities and avoid optimizing only
2722 for size.
2724 The other case where the profile may be missing is when the routine
2725 is not going to be emitted to the object file, e.g. for "extern template"
2726 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2727 all other cases of non-zero calls to 0-count functions. */
2729 void
2730 handle_missing_profiles (void)
2732 struct cgraph_node *node;
2733 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2734 vec<struct cgraph_node *> worklist;
2735 worklist.create (64);
2737 /* See if 0 count function has non-0 count callers. In this case we
2738 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2739 FOR_EACH_DEFINED_FUNCTION (node)
2741 struct cgraph_edge *e;
2742 gcov_type call_count = 0;
2743 gcov_type max_tp_first_run = 0;
2744 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2746 if (node->count)
2747 continue;
2748 for (e = node->callers; e; e = e->next_caller)
2750 call_count += e->count;
2752 if (e->caller->tp_first_run > max_tp_first_run)
2753 max_tp_first_run = e->caller->tp_first_run;
2756 /* If time profile is missing, let assign the maximum that comes from
2757 caller functions. */
2758 if (!node->tp_first_run && max_tp_first_run)
2759 node->tp_first_run = max_tp_first_run + 1;
2761 if (call_count
2762 && fn && fn->cfg
2763 && (call_count * unlikely_count_fraction >= profile_info->runs))
2765 drop_profile (node, call_count);
2766 worklist.safe_push (node);
2770 /* Propagate the profile dropping to other 0-count COMDATs that are
2771 potentially called by COMDATs we already dropped the profile on. */
2772 while (worklist.length () > 0)
2774 struct cgraph_edge *e;
2776 node = worklist.pop ();
2777 for (e = node->callees; e; e = e->next_caller)
2779 struct cgraph_node *callee = e->callee;
2780 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2782 if (callee->count > 0)
2783 continue;
2784 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2785 && profile_status_for_fn (fn) == PROFILE_READ)
2787 drop_profile (node, 0);
2788 worklist.safe_push (callee);
2792 worklist.release ();
2795 /* Convert counts measured by profile driven feedback to frequencies.
2796 Return nonzero iff there was any nonzero execution count. */
2799 counts_to_freqs (void)
2801 gcov_type count_max, true_count_max = 0;
2802 basic_block bb;
2804 /* Don't overwrite the estimated frequencies when the profile for
2805 the function is missing. We may drop this function PROFILE_GUESSED
2806 later in drop_profile (). */
2807 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2808 return 0;
2810 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2811 true_count_max = MAX (bb->count, true_count_max);
2813 count_max = MAX (true_count_max, 1);
2814 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2815 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2817 return true_count_max;
2820 /* Return true if function is likely to be expensive, so there is no point to
2821 optimize performance of prologue, epilogue or do inlining at the expense
2822 of code size growth. THRESHOLD is the limit of number of instructions
2823 function can execute at average to be still considered not expensive. */
2825 bool
2826 expensive_function_p (int threshold)
2828 unsigned int sum = 0;
2829 basic_block bb;
2830 unsigned int limit;
2832 /* We can not compute accurately for large thresholds due to scaled
2833 frequencies. */
2834 gcc_assert (threshold <= BB_FREQ_MAX);
2836 /* Frequencies are out of range. This either means that function contains
2837 internal loop executing more than BB_FREQ_MAX times or profile feedback
2838 is available and function has not been executed at all. */
2839 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2840 return true;
2842 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2843 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2844 FOR_EACH_BB_FN (bb, cfun)
2846 rtx_insn *insn;
2848 FOR_BB_INSNS (bb, insn)
2849 if (active_insn_p (insn))
2851 sum += bb->frequency;
2852 if (sum > limit)
2853 return true;
2857 return false;
2860 /* Estimate and propagate basic block frequencies using the given branch
2861 probabilities. If FORCE is true, the frequencies are used to estimate
2862 the counts even when there are already non-zero profile counts. */
2864 void
2865 estimate_bb_frequencies (bool force)
2867 basic_block bb;
2868 sreal freq_max;
2870 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2872 static int real_values_initialized = 0;
2874 if (!real_values_initialized)
2876 real_values_initialized = 1;
2877 real_br_prob_base = REG_BR_PROB_BASE;
2878 real_bb_freq_max = BB_FREQ_MAX;
2879 real_one_half = sreal (1, -1);
2880 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2881 real_almost_one = sreal (1) - real_inv_br_prob_base;
2884 mark_dfs_back_edges ();
2886 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2887 REG_BR_PROB_BASE;
2889 /* Set up block info for each basic block. */
2890 alloc_aux_for_blocks (sizeof (block_info));
2891 alloc_aux_for_edges (sizeof (edge_prob_info));
2892 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2894 edge e;
2895 edge_iterator ei;
2897 FOR_EACH_EDGE (e, ei, bb->succs)
2899 EDGE_INFO (e)->back_edge_prob = e->probability;
2900 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2904 /* First compute frequencies locally for each loop from innermost
2905 to outermost to examine frequencies for back edges. */
2906 estimate_loops ();
2908 freq_max = 0;
2909 FOR_EACH_BB_FN (bb, cfun)
2910 if (freq_max < BLOCK_INFO (bb)->frequency)
2911 freq_max = BLOCK_INFO (bb)->frequency;
2913 freq_max = real_bb_freq_max / freq_max;
2914 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2916 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2917 bb->frequency = tmp.to_int ();
2920 free_aux_for_blocks ();
2921 free_aux_for_edges ();
2923 compute_function_frequency ();
2926 /* Decide whether function is hot, cold or unlikely executed. */
2927 void
2928 compute_function_frequency (void)
2930 basic_block bb;
2931 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2933 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2934 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2935 node->only_called_at_startup = true;
2936 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2937 node->only_called_at_exit = true;
2939 if (profile_status_for_fn (cfun) != PROFILE_READ)
2941 int flags = flags_from_decl_or_type (current_function_decl);
2942 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2943 != NULL)
2944 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2945 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2946 != NULL)
2947 node->frequency = NODE_FREQUENCY_HOT;
2948 else if (flags & ECF_NORETURN)
2949 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2950 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2951 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2952 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2953 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2954 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2955 return;
2958 /* Only first time try to drop function into unlikely executed.
2959 After inlining the roundoff errors may confuse us.
2960 Ipa-profile pass will drop functions only called from unlikely
2961 functions to unlikely and that is most of what we care about. */
2962 if (!cfun->after_inlining)
2963 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2964 FOR_EACH_BB_FN (bb, cfun)
2966 if (maybe_hot_bb_p (cfun, bb))
2968 node->frequency = NODE_FREQUENCY_HOT;
2969 return;
2971 if (!probably_never_executed_bb_p (cfun, bb))
2972 node->frequency = NODE_FREQUENCY_NORMAL;
2976 /* Build PREDICT_EXPR. */
2977 tree
2978 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2980 tree t = build1 (PREDICT_EXPR, void_type_node,
2981 build_int_cst (integer_type_node, predictor));
2982 SET_PREDICT_EXPR_OUTCOME (t, taken);
2983 return t;
2986 const char *
2987 predictor_name (enum br_predictor predictor)
2989 return predictor_info[predictor].name;
2992 /* Predict branch probabilities and estimate profile of the tree CFG. */
2994 namespace {
2996 const pass_data pass_data_profile =
2998 GIMPLE_PASS, /* type */
2999 "profile_estimate", /* name */
3000 OPTGROUP_NONE, /* optinfo_flags */
3001 TV_BRANCH_PROB, /* tv_id */
3002 PROP_cfg, /* properties_required */
3003 0, /* properties_provided */
3004 0, /* properties_destroyed */
3005 0, /* todo_flags_start */
3006 0, /* todo_flags_finish */
3009 class pass_profile : public gimple_opt_pass
3011 public:
3012 pass_profile (gcc::context *ctxt)
3013 : gimple_opt_pass (pass_data_profile, ctxt)
3016 /* opt_pass methods: */
3017 virtual bool gate (function *) { return flag_guess_branch_prob; }
3018 virtual unsigned int execute (function *);
3020 }; // class pass_profile
3022 unsigned int
3023 pass_profile::execute (function *fun)
3025 unsigned nb_loops;
3027 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3028 return 0;
3030 loop_optimizer_init (LOOPS_NORMAL);
3031 if (dump_file && (dump_flags & TDF_DETAILS))
3032 flow_loops_dump (dump_file, NULL, 0);
3034 mark_irreducible_loops ();
3036 nb_loops = number_of_loops (fun);
3037 if (nb_loops > 1)
3038 scev_initialize ();
3040 tree_estimate_probability ();
3042 if (nb_loops > 1)
3043 scev_finalize ();
3045 loop_optimizer_finalize ();
3046 if (dump_file && (dump_flags & TDF_DETAILS))
3047 gimple_dump_cfg (dump_file, dump_flags);
3048 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3049 profile_status_for_fn (fun) = PROFILE_GUESSED;
3050 return 0;
3053 } // anon namespace
3055 gimple_opt_pass *
3056 make_pass_profile (gcc::context *ctxt)
3058 return new pass_profile (ctxt);
3061 namespace {
3063 const pass_data pass_data_strip_predict_hints =
3065 GIMPLE_PASS, /* type */
3066 "*strip_predict_hints", /* name */
3067 OPTGROUP_NONE, /* optinfo_flags */
3068 TV_BRANCH_PROB, /* tv_id */
3069 PROP_cfg, /* properties_required */
3070 0, /* properties_provided */
3071 0, /* properties_destroyed */
3072 0, /* todo_flags_start */
3073 0, /* todo_flags_finish */
3076 class pass_strip_predict_hints : public gimple_opt_pass
3078 public:
3079 pass_strip_predict_hints (gcc::context *ctxt)
3080 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3083 /* opt_pass methods: */
3084 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3085 virtual unsigned int execute (function *);
3087 }; // class pass_strip_predict_hints
3089 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3090 we no longer need. */
3091 unsigned int
3092 pass_strip_predict_hints::execute (function *fun)
3094 basic_block bb;
3095 gimple ass_stmt;
3096 tree var;
3098 FOR_EACH_BB_FN (bb, fun)
3100 gimple_stmt_iterator bi;
3101 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3103 gimple stmt = gsi_stmt (bi);
3105 if (gimple_code (stmt) == GIMPLE_PREDICT)
3107 gsi_remove (&bi, true);
3108 continue;
3110 else if (is_gimple_call (stmt))
3112 tree fndecl = gimple_call_fndecl (stmt);
3114 if ((fndecl
3115 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3116 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3117 && gimple_call_num_args (stmt) == 2)
3118 || (gimple_call_internal_p (stmt)
3119 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3121 var = gimple_call_lhs (stmt);
3122 if (var)
3124 ass_stmt
3125 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3126 gsi_replace (&bi, ass_stmt, true);
3128 else
3130 gsi_remove (&bi, true);
3131 continue;
3135 gsi_next (&bi);
3138 return 0;
3141 } // anon namespace
3143 gimple_opt_pass *
3144 make_pass_strip_predict_hints (gcc::context *ctxt)
3146 return new pass_strip_predict_hints (ctxt);
3149 /* Rebuild function frequencies. Passes are in general expected to
3150 maintain profile by hand, however in some cases this is not possible:
3151 for example when inlining several functions with loops freuqencies might run
3152 out of scale and thus needs to be recomputed. */
3154 void
3155 rebuild_frequencies (void)
3157 timevar_push (TV_REBUILD_FREQUENCIES);
3159 /* When the max bb count in the function is small, there is a higher
3160 chance that there were truncation errors in the integer scaling
3161 of counts by inlining and other optimizations. This could lead
3162 to incorrect classification of code as being cold when it isn't.
3163 In that case, force the estimation of bb counts/frequencies from the
3164 branch probabilities, rather than computing frequencies from counts,
3165 which may also lead to frequencies incorrectly reduced to 0. There
3166 is less precision in the probabilities, so we only do this for small
3167 max counts. */
3168 gcov_type count_max = 0;
3169 basic_block bb;
3170 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3171 count_max = MAX (bb->count, count_max);
3173 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3174 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3175 && count_max < REG_BR_PROB_BASE/10))
3177 loop_optimizer_init (0);
3178 add_noreturn_fake_exit_edges ();
3179 mark_irreducible_loops ();
3180 connect_infinite_loops_to_exit ();
3181 estimate_bb_frequencies (true);
3182 remove_fake_exit_edges ();
3183 loop_optimizer_finalize ();
3185 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3186 counts_to_freqs ();
3187 else
3188 gcc_unreachable ();
3189 timevar_pop (TV_REBUILD_FREQUENCIES);