re PR tree-optimization/64454 (optimize (x%5)%5)
[official-gcc.git] / gcc / predict.c
blob1568633561ed0d8073fde26ccf00bc24ae828156
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "calls.h"
46 #include "rtl.h"
47 #include "tm_p.h"
48 #include "hard-reg-set.h"
49 #include "predict.h"
50 #include "function.h"
51 #include "dominance.h"
52 #include "cfg.h"
53 #include "cfganal.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
56 #include "regs.h"
57 #include "flags.h"
58 #include "profile.h"
59 #include "except.h"
60 #include "diagnostic-core.h"
61 #include "recog.h"
62 #include "hashtab.h"
63 #include "statistics.h"
64 #include "real.h"
65 #include "fixed-value.h"
66 #include "expmed.h"
67 #include "dojump.h"
68 #include "explow.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "coverage.h"
74 #include "sreal.h"
75 #include "params.h"
76 #include "target.h"
77 #include "cfgloop.h"
78 #include "hash-map.h"
79 #include "tree-ssa-alias.h"
80 #include "internal-fn.h"
81 #include "gimple-expr.h"
82 #include "is-a.h"
83 #include "gimple.h"
84 #include "gimple-iterator.h"
85 #include "gimple-ssa.h"
86 #include "plugin-api.h"
87 #include "ipa-ref.h"
88 #include "cgraph.h"
89 #include "tree-cfg.h"
90 #include "tree-phinodes.h"
91 #include "ssa-iterators.h"
92 #include "tree-ssa-loop-niter.h"
93 #include "tree-ssa-loop.h"
94 #include "tree-pass.h"
95 #include "tree-scalar-evolution.h"
97 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
98 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
99 static sreal real_almost_one, real_br_prob_base,
100 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
102 static void combine_predictions_for_insn (rtx_insn *, basic_block);
103 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
104 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
105 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
106 static bool can_predict_insn_p (const rtx_insn *);
108 /* Information we hold about each branch predictor.
109 Filled using information from predict.def. */
111 struct predictor_info
113 const char *const name; /* Name used in the debugging dumps. */
114 const int hitrate; /* Expected hitrate used by
115 predict_insn_def call. */
116 const int flags;
119 /* Use given predictor without Dempster-Shaffer theory if it matches
120 using first_match heuristics. */
121 #define PRED_FLAG_FIRST_MATCH 1
123 /* Recompute hitrate in percent to our representation. */
125 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
127 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
128 static const struct predictor_info predictor_info[]= {
129 #include "predict.def"
131 /* Upper bound on predictors. */
132 {NULL, 0, 0}
134 #undef DEF_PREDICTOR
136 /* Return TRUE if frequency FREQ is considered to be hot. */
138 static inline bool
139 maybe_hot_frequency_p (struct function *fun, int freq)
141 struct cgraph_node *node = cgraph_node::get (fun->decl);
142 if (!profile_info
143 || !opt_for_fn (fun->decl, flag_branch_probabilities))
145 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
146 return false;
147 if (node->frequency == NODE_FREQUENCY_HOT)
148 return true;
150 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
151 return true;
152 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
153 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
154 return false;
155 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
156 return false;
157 if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
158 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
159 return false;
160 return true;
163 static gcov_type min_count = -1;
165 /* Determine the threshold for hot BB counts. */
167 gcov_type
168 get_hot_bb_threshold ()
170 gcov_working_set_t *ws;
171 if (min_count == -1)
173 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
174 gcc_assert (ws);
175 min_count = ws->min_counter;
177 return min_count;
180 /* Set the threshold for hot BB counts. */
182 void
183 set_hot_bb_threshold (gcov_type min)
185 min_count = min;
188 /* Return TRUE if frequency FREQ is considered to be hot. */
190 bool
191 maybe_hot_count_p (struct function *fun, gcov_type count)
193 if (fun && profile_status_for_fn (fun) != PROFILE_READ)
194 return true;
195 /* Code executed at most once is not hot. */
196 if (profile_info->runs >= count)
197 return false;
198 return (count >= get_hot_bb_threshold ());
201 /* Return true in case BB can be CPU intensive and should be optimized
202 for maximal performance. */
204 bool
205 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
207 gcc_checking_assert (fun);
208 if (profile_status_for_fn (fun) == PROFILE_READ)
209 return maybe_hot_count_p (fun, bb->count);
210 return maybe_hot_frequency_p (fun, bb->frequency);
213 /* Return true in case BB can be CPU intensive and should be optimized
214 for maximal performance. */
216 bool
217 maybe_hot_edge_p (edge e)
219 if (profile_status_for_fn (cfun) == PROFILE_READ)
220 return maybe_hot_count_p (cfun, e->count);
221 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
224 /* Return true if profile COUNT and FREQUENCY, or function FUN static
225 node frequency reflects never being executed. */
227 static bool
228 probably_never_executed (struct function *fun,
229 gcov_type count, int frequency)
231 gcc_checking_assert (fun);
232 if (profile_status_for_fn (fun) == PROFILE_READ)
234 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
235 if (count * unlikely_count_fraction >= profile_info->runs)
236 return false;
237 if (!frequency)
238 return true;
239 if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
240 return false;
241 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
243 gcov_type computed_count;
244 /* Check for possibility of overflow, in which case entry bb count
245 is large enough to do the division first without losing much
246 precision. */
247 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
248 REG_BR_PROB_BASE)
250 gcov_type scaled_count
251 = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
252 unlikely_count_fraction;
253 computed_count = RDIV (scaled_count,
254 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
256 else
258 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
259 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
260 computed_count *= frequency * unlikely_count_fraction;
262 if (computed_count >= profile_info->runs)
263 return false;
265 return true;
267 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
268 && (cgraph_node::get (fun->decl)->frequency
269 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
270 return true;
271 return false;
275 /* Return true in case BB is probably never executed. */
277 bool
278 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
280 return probably_never_executed (fun, bb->count, bb->frequency);
284 /* Return true in case edge E is probably never executed. */
286 bool
287 probably_never_executed_edge_p (struct function *fun, edge e)
289 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
292 /* Return true when current function should always be optimized for size. */
294 bool
295 optimize_function_for_size_p (struct function *fun)
297 if (!fun || !fun->decl)
298 return optimize_size;
299 cgraph_node *n = cgraph_node::get (fun->decl);
300 return n && n->optimize_for_size_p ();
303 /* Return true when current function should always be optimized for speed. */
305 bool
306 optimize_function_for_speed_p (struct function *fun)
308 return !optimize_function_for_size_p (fun);
311 /* Return TRUE when BB should be optimized for size. */
313 bool
314 optimize_bb_for_size_p (const_basic_block bb)
316 return (optimize_function_for_size_p (cfun)
317 || (bb && !maybe_hot_bb_p (cfun, bb)));
320 /* Return TRUE when BB should be optimized for speed. */
322 bool
323 optimize_bb_for_speed_p (const_basic_block bb)
325 return !optimize_bb_for_size_p (bb);
328 /* Return TRUE when BB should be optimized for size. */
330 bool
331 optimize_edge_for_size_p (edge e)
333 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
336 /* Return TRUE when BB should be optimized for speed. */
338 bool
339 optimize_edge_for_speed_p (edge e)
341 return !optimize_edge_for_size_p (e);
344 /* Return TRUE when BB should be optimized for size. */
346 bool
347 optimize_insn_for_size_p (void)
349 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
352 /* Return TRUE when BB should be optimized for speed. */
354 bool
355 optimize_insn_for_speed_p (void)
357 return !optimize_insn_for_size_p ();
360 /* Return TRUE when LOOP should be optimized for size. */
362 bool
363 optimize_loop_for_size_p (struct loop *loop)
365 return optimize_bb_for_size_p (loop->header);
368 /* Return TRUE when LOOP should be optimized for speed. */
370 bool
371 optimize_loop_for_speed_p (struct loop *loop)
373 return optimize_bb_for_speed_p (loop->header);
376 /* Return TRUE when LOOP nest should be optimized for speed. */
378 bool
379 optimize_loop_nest_for_speed_p (struct loop *loop)
381 struct loop *l = loop;
382 if (optimize_loop_for_speed_p (loop))
383 return true;
384 l = loop->inner;
385 while (l && l != loop)
387 if (optimize_loop_for_speed_p (l))
388 return true;
389 if (l->inner)
390 l = l->inner;
391 else if (l->next)
392 l = l->next;
393 else
395 while (l != loop && !l->next)
396 l = loop_outer (l);
397 if (l != loop)
398 l = l->next;
401 return false;
404 /* Return TRUE when LOOP nest should be optimized for size. */
406 bool
407 optimize_loop_nest_for_size_p (struct loop *loop)
409 return !optimize_loop_nest_for_speed_p (loop);
412 /* Return true when edge E is likely to be well predictable by branch
413 predictor. */
415 bool
416 predictable_edge_p (edge e)
418 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
419 return false;
420 if ((e->probability
421 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
422 || (REG_BR_PROB_BASE - e->probability
423 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
424 return true;
425 return false;
429 /* Set RTL expansion for BB profile. */
431 void
432 rtl_profile_for_bb (basic_block bb)
434 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
437 /* Set RTL expansion for edge profile. */
439 void
440 rtl_profile_for_edge (edge e)
442 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
445 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
446 void
447 default_rtl_profile (void)
449 crtl->maybe_hot_insn_p = true;
452 /* Return true if the one of outgoing edges is already predicted by
453 PREDICTOR. */
455 bool
456 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
458 rtx note;
459 if (!INSN_P (BB_END (bb)))
460 return false;
461 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
462 if (REG_NOTE_KIND (note) == REG_BR_PRED
463 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
464 return true;
465 return false;
468 /* Structure representing predictions in tree level. */
470 struct edge_prediction {
471 struct edge_prediction *ep_next;
472 edge ep_edge;
473 enum br_predictor ep_predictor;
474 int ep_probability;
477 /* This map contains for a basic block the list of predictions for the
478 outgoing edges. */
480 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
482 /* Return true if the one of outgoing edges is already predicted by
483 PREDICTOR. */
485 bool
486 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
488 struct edge_prediction *i;
489 edge_prediction **preds = bb_predictions->get (bb);
491 if (!preds)
492 return false;
494 for (i = *preds; i; i = i->ep_next)
495 if (i->ep_predictor == predictor)
496 return true;
497 return false;
500 /* Return true when the probability of edge is reliable.
502 The profile guessing code is good at predicting branch outcome (ie.
503 taken/not taken), that is predicted right slightly over 75% of time.
504 It is however notoriously poor on predicting the probability itself.
505 In general the profile appear a lot flatter (with probabilities closer
506 to 50%) than the reality so it is bad idea to use it to drive optimization
507 such as those disabling dynamic branch prediction for well predictable
508 branches.
510 There are two exceptions - edges leading to noreturn edges and edges
511 predicted by number of iterations heuristics are predicted well. This macro
512 should be able to distinguish those, but at the moment it simply check for
513 noreturn heuristic that is only one giving probability over 99% or bellow
514 1%. In future we might want to propagate reliability information across the
515 CFG if we find this information useful on multiple places. */
516 static bool
517 probability_reliable_p (int prob)
519 return (profile_status_for_fn (cfun) == PROFILE_READ
520 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
521 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
524 /* Same predicate as above, working on edges. */
525 bool
526 edge_probability_reliable_p (const_edge e)
528 return probability_reliable_p (e->probability);
531 /* Same predicate as edge_probability_reliable_p, working on notes. */
532 bool
533 br_prob_note_reliable_p (const_rtx note)
535 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
536 return probability_reliable_p (XINT (note, 0));
539 static void
540 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
542 gcc_assert (any_condjump_p (insn));
543 if (!flag_guess_branch_prob)
544 return;
546 add_reg_note (insn, REG_BR_PRED,
547 gen_rtx_CONCAT (VOIDmode,
548 GEN_INT ((int) predictor),
549 GEN_INT ((int) probability)));
552 /* Predict insn by given predictor. */
554 void
555 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
556 enum prediction taken)
558 int probability = predictor_info[(int) predictor].hitrate;
560 if (taken != TAKEN)
561 probability = REG_BR_PROB_BASE - probability;
563 predict_insn (insn, predictor, probability);
566 /* Predict edge E with given probability if possible. */
568 void
569 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
571 rtx_insn *last_insn;
572 last_insn = BB_END (e->src);
574 /* We can store the branch prediction information only about
575 conditional jumps. */
576 if (!any_condjump_p (last_insn))
577 return;
579 /* We always store probability of branching. */
580 if (e->flags & EDGE_FALLTHRU)
581 probability = REG_BR_PROB_BASE - probability;
583 predict_insn (last_insn, predictor, probability);
586 /* Predict edge E with the given PROBABILITY. */
587 void
588 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
590 gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
591 if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
593 && flag_guess_branch_prob && optimize)
595 struct edge_prediction *i = XNEW (struct edge_prediction);
596 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
598 i->ep_next = preds;
599 preds = i;
600 i->ep_probability = probability;
601 i->ep_predictor = predictor;
602 i->ep_edge = e;
606 /* Remove all predictions on given basic block that are attached
607 to edge E. */
608 void
609 remove_predictions_associated_with_edge (edge e)
611 if (!bb_predictions)
612 return;
614 edge_prediction **preds = bb_predictions->get (e->src);
616 if (preds)
618 struct edge_prediction **prediction = preds;
619 struct edge_prediction *next;
621 while (*prediction)
623 if ((*prediction)->ep_edge == e)
625 next = (*prediction)->ep_next;
626 free (*prediction);
627 *prediction = next;
629 else
630 prediction = &((*prediction)->ep_next);
635 /* Clears the list of predictions stored for BB. */
637 static void
638 clear_bb_predictions (basic_block bb)
640 edge_prediction **preds = bb_predictions->get (bb);
641 struct edge_prediction *pred, *next;
643 if (!preds)
644 return;
646 for (pred = *preds; pred; pred = next)
648 next = pred->ep_next;
649 free (pred);
651 *preds = NULL;
654 /* Return true when we can store prediction on insn INSN.
655 At the moment we represent predictions only on conditional
656 jumps, not at computed jump or other complicated cases. */
657 static bool
658 can_predict_insn_p (const rtx_insn *insn)
660 return (JUMP_P (insn)
661 && any_condjump_p (insn)
662 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
665 /* Predict edge E by given predictor if possible. */
667 void
668 predict_edge_def (edge e, enum br_predictor predictor,
669 enum prediction taken)
671 int probability = predictor_info[(int) predictor].hitrate;
673 if (taken != TAKEN)
674 probability = REG_BR_PROB_BASE - probability;
676 predict_edge (e, predictor, probability);
679 /* Invert all branch predictions or probability notes in the INSN. This needs
680 to be done each time we invert the condition used by the jump. */
682 void
683 invert_br_probabilities (rtx insn)
685 rtx note;
687 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
688 if (REG_NOTE_KIND (note) == REG_BR_PROB)
689 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
690 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
691 XEXP (XEXP (note, 0), 1)
692 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
695 /* Dump information about the branch prediction to the output file. */
697 static void
698 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
699 basic_block bb, int used)
701 edge e;
702 edge_iterator ei;
704 if (!file)
705 return;
707 FOR_EACH_EDGE (e, ei, bb->succs)
708 if (! (e->flags & EDGE_FALLTHRU))
709 break;
711 fprintf (file, " %s heuristics%s: %.1f%%",
712 predictor_info[predictor].name,
713 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
715 if (bb->count)
717 fprintf (file, " exec %" PRId64, bb->count);
718 if (e)
720 fprintf (file, " hit %" PRId64, e->count);
721 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
725 fprintf (file, "\n");
728 /* We can not predict the probabilities of outgoing edges of bb. Set them
729 evenly and hope for the best. */
730 static void
731 set_even_probabilities (basic_block bb)
733 int nedges = 0;
734 edge e;
735 edge_iterator ei;
737 FOR_EACH_EDGE (e, ei, bb->succs)
738 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
739 nedges ++;
740 FOR_EACH_EDGE (e, ei, bb->succs)
741 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
742 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
743 else
744 e->probability = 0;
747 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
748 note if not already present. Remove now useless REG_BR_PRED notes. */
750 static void
751 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
753 rtx prob_note;
754 rtx *pnote;
755 rtx note;
756 int best_probability = PROB_EVEN;
757 enum br_predictor best_predictor = END_PREDICTORS;
758 int combined_probability = REG_BR_PROB_BASE / 2;
759 int d;
760 bool first_match = false;
761 bool found = false;
763 if (!can_predict_insn_p (insn))
765 set_even_probabilities (bb);
766 return;
769 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
770 pnote = &REG_NOTES (insn);
771 if (dump_file)
772 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
773 bb->index);
775 /* We implement "first match" heuristics and use probability guessed
776 by predictor with smallest index. */
777 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
778 if (REG_NOTE_KIND (note) == REG_BR_PRED)
780 enum br_predictor predictor = ((enum br_predictor)
781 INTVAL (XEXP (XEXP (note, 0), 0)));
782 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
784 found = true;
785 if (best_predictor > predictor)
786 best_probability = probability, best_predictor = predictor;
788 d = (combined_probability * probability
789 + (REG_BR_PROB_BASE - combined_probability)
790 * (REG_BR_PROB_BASE - probability));
792 /* Use FP math to avoid overflows of 32bit integers. */
793 if (d == 0)
794 /* If one probability is 0% and one 100%, avoid division by zero. */
795 combined_probability = REG_BR_PROB_BASE / 2;
796 else
797 combined_probability = (((double) combined_probability) * probability
798 * REG_BR_PROB_BASE / d + 0.5);
801 /* Decide which heuristic to use. In case we didn't match anything,
802 use no_prediction heuristic, in case we did match, use either
803 first match or Dempster-Shaffer theory depending on the flags. */
805 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
806 first_match = true;
808 if (!found)
809 dump_prediction (dump_file, PRED_NO_PREDICTION,
810 combined_probability, bb, true);
811 else
813 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
814 bb, !first_match);
815 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
816 bb, first_match);
819 if (first_match)
820 combined_probability = best_probability;
821 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
823 while (*pnote)
825 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
827 enum br_predictor predictor = ((enum br_predictor)
828 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
829 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
831 dump_prediction (dump_file, predictor, probability, bb,
832 !first_match || best_predictor == predictor);
833 *pnote = XEXP (*pnote, 1);
835 else
836 pnote = &XEXP (*pnote, 1);
839 if (!prob_note)
841 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
843 /* Save the prediction into CFG in case we are seeing non-degenerated
844 conditional jump. */
845 if (!single_succ_p (bb))
847 BRANCH_EDGE (bb)->probability = combined_probability;
848 FALLTHRU_EDGE (bb)->probability
849 = REG_BR_PROB_BASE - combined_probability;
852 else if (!single_succ_p (bb))
854 int prob = XINT (prob_note, 0);
856 BRANCH_EDGE (bb)->probability = prob;
857 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
859 else
860 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
863 /* Combine predictions into single probability and store them into CFG.
864 Remove now useless prediction entries. */
866 static void
867 combine_predictions_for_bb (basic_block bb)
869 int best_probability = PROB_EVEN;
870 enum br_predictor best_predictor = END_PREDICTORS;
871 int combined_probability = REG_BR_PROB_BASE / 2;
872 int d;
873 bool first_match = false;
874 bool found = false;
875 struct edge_prediction *pred;
876 int nedges = 0;
877 edge e, first = NULL, second = NULL;
878 edge_iterator ei;
880 FOR_EACH_EDGE (e, ei, bb->succs)
881 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
883 nedges ++;
884 if (first && !second)
885 second = e;
886 if (!first)
887 first = e;
890 /* When there is no successor or only one choice, prediction is easy.
892 We are lazy for now and predict only basic blocks with two outgoing
893 edges. It is possible to predict generic case too, but we have to
894 ignore first match heuristics and do more involved combining. Implement
895 this later. */
896 if (nedges != 2)
898 if (!bb->count)
899 set_even_probabilities (bb);
900 clear_bb_predictions (bb);
901 if (dump_file)
902 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
903 nedges, bb->index);
904 return;
907 if (dump_file)
908 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
910 edge_prediction **preds = bb_predictions->get (bb);
911 if (preds)
913 /* We implement "first match" heuristics and use probability guessed
914 by predictor with smallest index. */
915 for (pred = *preds; pred; pred = pred->ep_next)
917 enum br_predictor predictor = pred->ep_predictor;
918 int probability = pred->ep_probability;
920 if (pred->ep_edge != first)
921 probability = REG_BR_PROB_BASE - probability;
923 found = true;
924 /* First match heuristics would be widly confused if we predicted
925 both directions. */
926 if (best_predictor > predictor)
928 struct edge_prediction *pred2;
929 int prob = probability;
931 for (pred2 = (struct edge_prediction *) *preds;
932 pred2; pred2 = pred2->ep_next)
933 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
935 int probability2 = pred->ep_probability;
937 if (pred2->ep_edge != first)
938 probability2 = REG_BR_PROB_BASE - probability2;
940 if ((probability < REG_BR_PROB_BASE / 2) !=
941 (probability2 < REG_BR_PROB_BASE / 2))
942 break;
944 /* If the same predictor later gave better result, go for it! */
945 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
946 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
947 prob = probability2;
949 if (!pred2)
950 best_probability = prob, best_predictor = predictor;
953 d = (combined_probability * probability
954 + (REG_BR_PROB_BASE - combined_probability)
955 * (REG_BR_PROB_BASE - probability));
957 /* Use FP math to avoid overflows of 32bit integers. */
958 if (d == 0)
959 /* If one probability is 0% and one 100%, avoid division by zero. */
960 combined_probability = REG_BR_PROB_BASE / 2;
961 else
962 combined_probability = (((double) combined_probability)
963 * probability
964 * REG_BR_PROB_BASE / d + 0.5);
968 /* Decide which heuristic to use. In case we didn't match anything,
969 use no_prediction heuristic, in case we did match, use either
970 first match or Dempster-Shaffer theory depending on the flags. */
972 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
973 first_match = true;
975 if (!found)
976 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
977 else
979 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
980 !first_match);
981 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
982 first_match);
985 if (first_match)
986 combined_probability = best_probability;
987 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
989 if (preds)
991 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
993 enum br_predictor predictor = pred->ep_predictor;
994 int probability = pred->ep_probability;
996 if (pred->ep_edge != EDGE_SUCC (bb, 0))
997 probability = REG_BR_PROB_BASE - probability;
998 dump_prediction (dump_file, predictor, probability, bb,
999 !first_match || best_predictor == predictor);
1002 clear_bb_predictions (bb);
1004 if (!bb->count)
1006 first->probability = combined_probability;
1007 second->probability = REG_BR_PROB_BASE - combined_probability;
1011 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1012 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1014 T1 and T2 should be one of the following cases:
1015 1. T1 is SSA_NAME, T2 is NULL
1016 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1017 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1019 static tree
1020 strips_small_constant (tree t1, tree t2)
1022 tree ret = NULL;
1023 int value = 0;
1025 if (!t1)
1026 return NULL;
1027 else if (TREE_CODE (t1) == SSA_NAME)
1028 ret = t1;
1029 else if (tree_fits_shwi_p (t1))
1030 value = tree_to_shwi (t1);
1031 else
1032 return NULL;
1034 if (!t2)
1035 return ret;
1036 else if (tree_fits_shwi_p (t2))
1037 value = tree_to_shwi (t2);
1038 else if (TREE_CODE (t2) == SSA_NAME)
1040 if (ret)
1041 return NULL;
1042 else
1043 ret = t2;
1046 if (value <= 4 && value >= -4)
1047 return ret;
1048 else
1049 return NULL;
1052 /* Return the SSA_NAME in T or T's operands.
1053 Return NULL if SSA_NAME cannot be found. */
1055 static tree
1056 get_base_value (tree t)
1058 if (TREE_CODE (t) == SSA_NAME)
1059 return t;
1061 if (!BINARY_CLASS_P (t))
1062 return NULL;
1064 switch (TREE_OPERAND_LENGTH (t))
1066 case 1:
1067 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1068 case 2:
1069 return strips_small_constant (TREE_OPERAND (t, 0),
1070 TREE_OPERAND (t, 1));
1071 default:
1072 return NULL;
1076 /* Check the compare STMT in LOOP. If it compares an induction
1077 variable to a loop invariant, return true, and save
1078 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1079 Otherwise return false and set LOOP_INVAIANT to NULL. */
1081 static bool
1082 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1083 tree *loop_invariant,
1084 enum tree_code *compare_code,
1085 tree *loop_step,
1086 tree *loop_iv_base)
1088 tree op0, op1, bound, base;
1089 affine_iv iv0, iv1;
1090 enum tree_code code;
1091 tree step;
1093 code = gimple_cond_code (stmt);
1094 *loop_invariant = NULL;
1096 switch (code)
1098 case GT_EXPR:
1099 case GE_EXPR:
1100 case NE_EXPR:
1101 case LT_EXPR:
1102 case LE_EXPR:
1103 case EQ_EXPR:
1104 break;
1106 default:
1107 return false;
1110 op0 = gimple_cond_lhs (stmt);
1111 op1 = gimple_cond_rhs (stmt);
1113 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1114 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1115 return false;
1116 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1117 return false;
1118 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1119 return false;
1120 if (TREE_CODE (iv0.step) != INTEGER_CST
1121 || TREE_CODE (iv1.step) != INTEGER_CST)
1122 return false;
1123 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1124 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1125 return false;
1127 if (integer_zerop (iv0.step))
1129 if (code != NE_EXPR && code != EQ_EXPR)
1130 code = invert_tree_comparison (code, false);
1131 bound = iv0.base;
1132 base = iv1.base;
1133 if (tree_fits_shwi_p (iv1.step))
1134 step = iv1.step;
1135 else
1136 return false;
1138 else
1140 bound = iv1.base;
1141 base = iv0.base;
1142 if (tree_fits_shwi_p (iv0.step))
1143 step = iv0.step;
1144 else
1145 return false;
1148 if (TREE_CODE (bound) != INTEGER_CST)
1149 bound = get_base_value (bound);
1150 if (!bound)
1151 return false;
1152 if (TREE_CODE (base) != INTEGER_CST)
1153 base = get_base_value (base);
1154 if (!base)
1155 return false;
1157 *loop_invariant = bound;
1158 *compare_code = code;
1159 *loop_step = step;
1160 *loop_iv_base = base;
1161 return true;
1164 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1166 static bool
1167 expr_coherent_p (tree t1, tree t2)
1169 gimple stmt;
1170 tree ssa_name_1 = NULL;
1171 tree ssa_name_2 = NULL;
1173 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1174 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1176 if (t1 == t2)
1177 return true;
1179 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1180 return true;
1181 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1182 return false;
1184 /* Check to see if t1 is expressed/defined with t2. */
1185 stmt = SSA_NAME_DEF_STMT (t1);
1186 gcc_assert (stmt != NULL);
1187 if (is_gimple_assign (stmt))
1189 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1190 if (ssa_name_1 && ssa_name_1 == t2)
1191 return true;
1194 /* Check to see if t2 is expressed/defined with t1. */
1195 stmt = SSA_NAME_DEF_STMT (t2);
1196 gcc_assert (stmt != NULL);
1197 if (is_gimple_assign (stmt))
1199 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1200 if (ssa_name_2 && ssa_name_2 == t1)
1201 return true;
1204 /* Compare if t1 and t2's def_stmts are identical. */
1205 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1206 return true;
1207 else
1208 return false;
1211 /* Predict branch probability of BB when BB contains a branch that compares
1212 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1213 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1215 E.g.
1216 for (int i = 0; i < bound; i++) {
1217 if (i < bound - 2)
1218 computation_1();
1219 else
1220 computation_2();
1223 In this loop, we will predict the branch inside the loop to be taken. */
1225 static void
1226 predict_iv_comparison (struct loop *loop, basic_block bb,
1227 tree loop_bound_var,
1228 tree loop_iv_base_var,
1229 enum tree_code loop_bound_code,
1230 int loop_bound_step)
1232 gimple stmt;
1233 tree compare_var, compare_base;
1234 enum tree_code compare_code;
1235 tree compare_step_var;
1236 edge then_edge;
1237 edge_iterator ei;
1239 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1240 || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1241 || predicted_by_p (bb, PRED_LOOP_EXIT))
1242 return;
1244 stmt = last_stmt (bb);
1245 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1246 return;
1247 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1248 loop, &compare_var,
1249 &compare_code,
1250 &compare_step_var,
1251 &compare_base))
1252 return;
1254 /* Find the taken edge. */
1255 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1256 if (then_edge->flags & EDGE_TRUE_VALUE)
1257 break;
1259 /* When comparing an IV to a loop invariant, NE is more likely to be
1260 taken while EQ is more likely to be not-taken. */
1261 if (compare_code == NE_EXPR)
1263 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1264 return;
1266 else if (compare_code == EQ_EXPR)
1268 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1269 return;
1272 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1273 return;
1275 /* If loop bound, base and compare bound are all constants, we can
1276 calculate the probability directly. */
1277 if (tree_fits_shwi_p (loop_bound_var)
1278 && tree_fits_shwi_p (compare_var)
1279 && tree_fits_shwi_p (compare_base))
1281 int probability;
1282 bool overflow, overall_overflow = false;
1283 widest_int compare_count, tem;
1285 /* (loop_bound - base) / compare_step */
1286 tem = wi::sub (wi::to_widest (loop_bound_var),
1287 wi::to_widest (compare_base), SIGNED, &overflow);
1288 overall_overflow |= overflow;
1289 widest_int loop_count = wi::div_trunc (tem,
1290 wi::to_widest (compare_step_var),
1291 SIGNED, &overflow);
1292 overall_overflow |= overflow;
1294 if (!wi::neg_p (wi::to_widest (compare_step_var))
1295 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1297 /* (loop_bound - compare_bound) / compare_step */
1298 tem = wi::sub (wi::to_widest (loop_bound_var),
1299 wi::to_widest (compare_var), SIGNED, &overflow);
1300 overall_overflow |= overflow;
1301 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1302 SIGNED, &overflow);
1303 overall_overflow |= overflow;
1305 else
1307 /* (compare_bound - base) / compare_step */
1308 tem = wi::sub (wi::to_widest (compare_var),
1309 wi::to_widest (compare_base), SIGNED, &overflow);
1310 overall_overflow |= overflow;
1311 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1312 SIGNED, &overflow);
1313 overall_overflow |= overflow;
1315 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1316 ++compare_count;
1317 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1318 ++loop_count;
1319 if (wi::neg_p (compare_count))
1320 compare_count = 0;
1321 if (wi::neg_p (loop_count))
1322 loop_count = 0;
1323 if (loop_count == 0)
1324 probability = 0;
1325 else if (wi::cmps (compare_count, loop_count) == 1)
1326 probability = REG_BR_PROB_BASE;
1327 else
1329 tem = compare_count * REG_BR_PROB_BASE;
1330 tem = wi::udiv_trunc (tem, loop_count);
1331 probability = tem.to_uhwi ();
1334 if (!overall_overflow)
1335 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1337 return;
1340 if (expr_coherent_p (loop_bound_var, compare_var))
1342 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1343 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1344 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1346 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1347 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348 else if (loop_bound_code == NE_EXPR)
1350 /* If the loop backedge condition is "(i != bound)", we do
1351 the comparison based on the step of IV:
1352 * step < 0 : backedge condition is like (i > bound)
1353 * step > 0 : backedge condition is like (i < bound) */
1354 gcc_assert (loop_bound_step != 0);
1355 if (loop_bound_step > 0
1356 && (compare_code == LT_EXPR
1357 || compare_code == LE_EXPR))
1358 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1359 else if (loop_bound_step < 0
1360 && (compare_code == GT_EXPR
1361 || compare_code == GE_EXPR))
1362 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1363 else
1364 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1366 else
1367 /* The branch is predicted not-taken if loop_bound_code is
1368 opposite with compare_code. */
1369 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1371 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1373 /* For cases like:
1374 for (i = s; i < h; i++)
1375 if (i > s + 2) ....
1376 The branch should be predicted taken. */
1377 if (loop_bound_step > 0
1378 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1379 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1380 else if (loop_bound_step < 0
1381 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1382 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383 else
1384 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1388 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1389 exits are resulted from short-circuit conditions that will generate an
1390 if_tmp. E.g.:
1392 if (foo() || global > 10)
1393 break;
1395 This will be translated into:
1397 BB3:
1398 loop header...
1399 BB4:
1400 if foo() goto BB6 else goto BB5
1401 BB5:
1402 if global > 10 goto BB6 else goto BB7
1403 BB6:
1404 goto BB7
1405 BB7:
1406 iftmp = (PHI 0(BB5), 1(BB6))
1407 if iftmp == 1 goto BB8 else goto BB3
1408 BB8:
1409 outside of the loop...
1411 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1412 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1413 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1414 exits to predict them using PRED_LOOP_EXIT. */
1416 static void
1417 predict_extra_loop_exits (edge exit_edge)
1419 unsigned i;
1420 bool check_value_one;
1421 gimple lhs_def_stmt;
1422 gphi *phi_stmt;
1423 tree cmp_rhs, cmp_lhs;
1424 gimple last;
1425 gcond *cmp_stmt;
1427 last = last_stmt (exit_edge->src);
1428 if (!last)
1429 return;
1430 cmp_stmt = dyn_cast <gcond *> (last);
1431 if (!cmp_stmt)
1432 return;
1434 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1435 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1436 if (!TREE_CONSTANT (cmp_rhs)
1437 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1438 return;
1439 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1440 return;
1442 /* If check_value_one is true, only the phi_args with value '1' will lead
1443 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1444 loop exit. */
1445 check_value_one = (((integer_onep (cmp_rhs))
1446 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1447 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1449 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1450 if (!lhs_def_stmt)
1451 return;
1453 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1454 if (!phi_stmt)
1455 return;
1457 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1459 edge e1;
1460 edge_iterator ei;
1461 tree val = gimple_phi_arg_def (phi_stmt, i);
1462 edge e = gimple_phi_arg_edge (phi_stmt, i);
1464 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1465 continue;
1466 if ((check_value_one ^ integer_onep (val)) == 1)
1467 continue;
1468 if (EDGE_COUNT (e->src->succs) != 1)
1470 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1471 continue;
1474 FOR_EACH_EDGE (e1, ei, e->src->preds)
1475 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1479 /* Predict edge probabilities by exploiting loop structure. */
1481 static void
1482 predict_loops (void)
1484 struct loop *loop;
1486 /* Try to predict out blocks in a loop that are not part of a
1487 natural loop. */
1488 FOR_EACH_LOOP (loop, 0)
1490 basic_block bb, *bbs;
1491 unsigned j, n_exits;
1492 vec<edge> exits;
1493 struct tree_niter_desc niter_desc;
1494 edge ex;
1495 struct nb_iter_bound *nb_iter;
1496 enum tree_code loop_bound_code = ERROR_MARK;
1497 tree loop_bound_step = NULL;
1498 tree loop_bound_var = NULL;
1499 tree loop_iv_base = NULL;
1500 gcond *stmt = NULL;
1502 exits = get_loop_exit_edges (loop);
1503 n_exits = exits.length ();
1504 if (!n_exits)
1506 exits.release ();
1507 continue;
1510 FOR_EACH_VEC_ELT (exits, j, ex)
1512 tree niter = NULL;
1513 HOST_WIDE_INT nitercst;
1514 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1515 int probability;
1516 enum br_predictor predictor;
1518 predict_extra_loop_exits (ex);
1520 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1521 niter = niter_desc.niter;
1522 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1523 niter = loop_niter_by_eval (loop, ex);
1525 if (TREE_CODE (niter) == INTEGER_CST)
1527 if (tree_fits_uhwi_p (niter)
1528 && max
1529 && compare_tree_int (niter, max - 1) == -1)
1530 nitercst = tree_to_uhwi (niter) + 1;
1531 else
1532 nitercst = max;
1533 predictor = PRED_LOOP_ITERATIONS;
1535 /* If we have just one exit and we can derive some information about
1536 the number of iterations of the loop from the statements inside
1537 the loop, use it to predict this exit. */
1538 else if (n_exits == 1)
1540 nitercst = estimated_stmt_executions_int (loop);
1541 if (nitercst < 0)
1542 continue;
1543 if (nitercst > max)
1544 nitercst = max;
1546 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1548 else
1549 continue;
1551 /* If the prediction for number of iterations is zero, do not
1552 predict the exit edges. */
1553 if (nitercst == 0)
1554 continue;
1556 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1557 predict_edge (ex, predictor, probability);
1559 exits.release ();
1561 /* Find information about loop bound variables. */
1562 for (nb_iter = loop->bounds; nb_iter;
1563 nb_iter = nb_iter->next)
1564 if (nb_iter->stmt
1565 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1567 stmt = as_a <gcond *> (nb_iter->stmt);
1568 break;
1570 if (!stmt && last_stmt (loop->header)
1571 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1572 stmt = as_a <gcond *> (last_stmt (loop->header));
1573 if (stmt)
1574 is_comparison_with_loop_invariant_p (stmt, loop,
1575 &loop_bound_var,
1576 &loop_bound_code,
1577 &loop_bound_step,
1578 &loop_iv_base);
1580 bbs = get_loop_body (loop);
1582 for (j = 0; j < loop->num_nodes; j++)
1584 int header_found = 0;
1585 edge e;
1586 edge_iterator ei;
1588 bb = bbs[j];
1590 /* Bypass loop heuristics on continue statement. These
1591 statements construct loops via "non-loop" constructs
1592 in the source language and are better to be handled
1593 separately. */
1594 if (predicted_by_p (bb, PRED_CONTINUE))
1595 continue;
1597 /* Loop branch heuristics - predict an edge back to a
1598 loop's head as taken. */
1599 if (bb == loop->latch)
1601 e = find_edge (loop->latch, loop->header);
1602 if (e)
1604 header_found = 1;
1605 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1609 /* Loop exit heuristics - predict an edge exiting the loop if the
1610 conditional has no loop header successors as not taken. */
1611 if (!header_found
1612 /* If we already used more reliable loop exit predictors, do not
1613 bother with PRED_LOOP_EXIT. */
1614 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1615 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1617 /* For loop with many exits we don't want to predict all exits
1618 with the pretty large probability, because if all exits are
1619 considered in row, the loop would be predicted to iterate
1620 almost never. The code to divide probability by number of
1621 exits is very rough. It should compute the number of exits
1622 taken in each patch through function (not the overall number
1623 of exits that might be a lot higher for loops with wide switch
1624 statements in them) and compute n-th square root.
1626 We limit the minimal probability by 2% to avoid
1627 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1628 as this was causing regression in perl benchmark containing such
1629 a wide loop. */
1631 int probability = ((REG_BR_PROB_BASE
1632 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1633 / n_exits);
1634 if (probability < HITRATE (2))
1635 probability = HITRATE (2);
1636 FOR_EACH_EDGE (e, ei, bb->succs)
1637 if (e->dest->index < NUM_FIXED_BLOCKS
1638 || !flow_bb_inside_loop_p (loop, e->dest))
1639 predict_edge (e, PRED_LOOP_EXIT, probability);
1641 if (loop_bound_var)
1642 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1643 loop_bound_code,
1644 tree_to_shwi (loop_bound_step));
1647 /* Free basic blocks from get_loop_body. */
1648 free (bbs);
1652 /* Attempt to predict probabilities of BB outgoing edges using local
1653 properties. */
1654 static void
1655 bb_estimate_probability_locally (basic_block bb)
1657 rtx_insn *last_insn = BB_END (bb);
1658 rtx cond;
1660 if (! can_predict_insn_p (last_insn))
1661 return;
1662 cond = get_condition (last_insn, NULL, false, false);
1663 if (! cond)
1664 return;
1666 /* Try "pointer heuristic."
1667 A comparison ptr == 0 is predicted as false.
1668 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1669 if (COMPARISON_P (cond)
1670 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1671 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1673 if (GET_CODE (cond) == EQ)
1674 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1675 else if (GET_CODE (cond) == NE)
1676 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1678 else
1680 /* Try "opcode heuristic."
1681 EQ tests are usually false and NE tests are usually true. Also,
1682 most quantities are positive, so we can make the appropriate guesses
1683 about signed comparisons against zero. */
1684 switch (GET_CODE (cond))
1686 case CONST_INT:
1687 /* Unconditional branch. */
1688 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1689 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1690 break;
1692 case EQ:
1693 case UNEQ:
1694 /* Floating point comparisons appears to behave in a very
1695 unpredictable way because of special role of = tests in
1696 FP code. */
1697 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1699 /* Comparisons with 0 are often used for booleans and there is
1700 nothing useful to predict about them. */
1701 else if (XEXP (cond, 1) == const0_rtx
1702 || XEXP (cond, 0) == const0_rtx)
1704 else
1705 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1706 break;
1708 case NE:
1709 case LTGT:
1710 /* Floating point comparisons appears to behave in a very
1711 unpredictable way because of special role of = tests in
1712 FP code. */
1713 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1715 /* Comparisons with 0 are often used for booleans and there is
1716 nothing useful to predict about them. */
1717 else if (XEXP (cond, 1) == const0_rtx
1718 || XEXP (cond, 0) == const0_rtx)
1720 else
1721 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1722 break;
1724 case ORDERED:
1725 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1726 break;
1728 case UNORDERED:
1729 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1730 break;
1732 case LE:
1733 case LT:
1734 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1735 || XEXP (cond, 1) == constm1_rtx)
1736 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1737 break;
1739 case GE:
1740 case GT:
1741 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1742 || XEXP (cond, 1) == constm1_rtx)
1743 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1744 break;
1746 default:
1747 break;
1751 /* Set edge->probability for each successor edge of BB. */
1752 void
1753 guess_outgoing_edge_probabilities (basic_block bb)
1755 bb_estimate_probability_locally (bb);
1756 combine_predictions_for_insn (BB_END (bb), bb);
1759 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1761 /* Helper function for expr_expected_value. */
1763 static tree
1764 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1765 tree op1, bitmap visited, enum br_predictor *predictor)
1767 gimple def;
1769 if (predictor)
1770 *predictor = PRED_UNCONDITIONAL;
1772 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1774 if (TREE_CONSTANT (op0))
1775 return op0;
1777 if (code != SSA_NAME)
1778 return NULL_TREE;
1780 def = SSA_NAME_DEF_STMT (op0);
1782 /* If we were already here, break the infinite cycle. */
1783 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1784 return NULL;
1786 if (gimple_code (def) == GIMPLE_PHI)
1788 /* All the arguments of the PHI node must have the same constant
1789 length. */
1790 int i, n = gimple_phi_num_args (def);
1791 tree val = NULL, new_val;
1793 for (i = 0; i < n; i++)
1795 tree arg = PHI_ARG_DEF (def, i);
1796 enum br_predictor predictor2;
1798 /* If this PHI has itself as an argument, we cannot
1799 determine the string length of this argument. However,
1800 if we can find an expected constant value for the other
1801 PHI args then we can still be sure that this is
1802 likely a constant. So be optimistic and just
1803 continue with the next argument. */
1804 if (arg == PHI_RESULT (def))
1805 continue;
1807 new_val = expr_expected_value (arg, visited, &predictor2);
1809 /* It is difficult to combine value predictors. Simply assume
1810 that later predictor is weaker and take its prediction. */
1811 if (predictor && *predictor < predictor2)
1812 *predictor = predictor2;
1813 if (!new_val)
1814 return NULL;
1815 if (!val)
1816 val = new_val;
1817 else if (!operand_equal_p (val, new_val, false))
1818 return NULL;
1820 return val;
1822 if (is_gimple_assign (def))
1824 if (gimple_assign_lhs (def) != op0)
1825 return NULL;
1827 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1828 gimple_assign_rhs1 (def),
1829 gimple_assign_rhs_code (def),
1830 gimple_assign_rhs2 (def),
1831 visited, predictor);
1834 if (is_gimple_call (def))
1836 tree decl = gimple_call_fndecl (def);
1837 if (!decl)
1839 if (gimple_call_internal_p (def)
1840 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1842 gcc_assert (gimple_call_num_args (def) == 3);
1843 tree val = gimple_call_arg (def, 0);
1844 if (TREE_CONSTANT (val))
1845 return val;
1846 if (predictor)
1848 tree val2 = gimple_call_arg (def, 2);
1849 gcc_assert (TREE_CODE (val2) == INTEGER_CST
1850 && tree_fits_uhwi_p (val2)
1851 && tree_to_uhwi (val2) < END_PREDICTORS);
1852 *predictor = (enum br_predictor) tree_to_uhwi (val2);
1854 return gimple_call_arg (def, 1);
1856 return NULL;
1858 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1859 switch (DECL_FUNCTION_CODE (decl))
1861 case BUILT_IN_EXPECT:
1863 tree val;
1864 if (gimple_call_num_args (def) != 2)
1865 return NULL;
1866 val = gimple_call_arg (def, 0);
1867 if (TREE_CONSTANT (val))
1868 return val;
1869 if (predictor)
1870 *predictor = PRED_BUILTIN_EXPECT;
1871 return gimple_call_arg (def, 1);
1874 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1875 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1876 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1877 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1878 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1879 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1880 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1881 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1882 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1883 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1884 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1885 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1886 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1887 /* Assume that any given atomic operation has low contention,
1888 and thus the compare-and-swap operation succeeds. */
1889 if (predictor)
1890 *predictor = PRED_COMPARE_AND_SWAP;
1891 return boolean_true_node;
1892 default:
1893 break;
1897 return NULL;
1900 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1902 tree res;
1903 enum br_predictor predictor2;
1904 op0 = expr_expected_value (op0, visited, predictor);
1905 if (!op0)
1906 return NULL;
1907 op1 = expr_expected_value (op1, visited, &predictor2);
1908 if (predictor && *predictor < predictor2)
1909 *predictor = predictor2;
1910 if (!op1)
1911 return NULL;
1912 res = fold_build2 (code, type, op0, op1);
1913 if (TREE_CONSTANT (res))
1914 return res;
1915 return NULL;
1917 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1919 tree res;
1920 op0 = expr_expected_value (op0, visited, predictor);
1921 if (!op0)
1922 return NULL;
1923 res = fold_build1 (code, type, op0);
1924 if (TREE_CONSTANT (res))
1925 return res;
1926 return NULL;
1928 return NULL;
1931 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1932 The function is used by builtin_expect branch predictor so the evidence
1933 must come from this construct and additional possible constant folding.
1935 We may want to implement more involved value guess (such as value range
1936 propagation based prediction), but such tricks shall go to new
1937 implementation. */
1939 static tree
1940 expr_expected_value (tree expr, bitmap visited,
1941 enum br_predictor *predictor)
1943 enum tree_code code;
1944 tree op0, op1;
1946 if (TREE_CONSTANT (expr))
1948 if (predictor)
1949 *predictor = PRED_UNCONDITIONAL;
1950 return expr;
1953 extract_ops_from_tree (expr, &code, &op0, &op1);
1954 return expr_expected_value_1 (TREE_TYPE (expr),
1955 op0, code, op1, visited, predictor);
1958 /* Predict using opcode of the last statement in basic block. */
1959 static void
1960 tree_predict_by_opcode (basic_block bb)
1962 gimple stmt = last_stmt (bb);
1963 edge then_edge;
1964 tree op0, op1;
1965 tree type;
1966 tree val;
1967 enum tree_code cmp;
1968 bitmap visited;
1969 edge_iterator ei;
1970 enum br_predictor predictor;
1972 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1973 return;
1974 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1975 if (then_edge->flags & EDGE_TRUE_VALUE)
1976 break;
1977 op0 = gimple_cond_lhs (stmt);
1978 op1 = gimple_cond_rhs (stmt);
1979 cmp = gimple_cond_code (stmt);
1980 type = TREE_TYPE (op0);
1981 visited = BITMAP_ALLOC (NULL);
1982 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1983 &predictor);
1984 BITMAP_FREE (visited);
1985 if (val && TREE_CODE (val) == INTEGER_CST)
1987 if (predictor == PRED_BUILTIN_EXPECT)
1989 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1991 gcc_assert (percent >= 0 && percent <= 100);
1992 if (integer_zerop (val))
1993 percent = 100 - percent;
1994 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1996 else
1997 predict_edge (then_edge, predictor,
1998 integer_zerop (val) ? NOT_TAKEN : TAKEN);
2000 /* Try "pointer heuristic."
2001 A comparison ptr == 0 is predicted as false.
2002 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2003 if (POINTER_TYPE_P (type))
2005 if (cmp == EQ_EXPR)
2006 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2007 else if (cmp == NE_EXPR)
2008 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2010 else
2012 /* Try "opcode heuristic."
2013 EQ tests are usually false and NE tests are usually true. Also,
2014 most quantities are positive, so we can make the appropriate guesses
2015 about signed comparisons against zero. */
2016 switch (cmp)
2018 case EQ_EXPR:
2019 case UNEQ_EXPR:
2020 /* Floating point comparisons appears to behave in a very
2021 unpredictable way because of special role of = tests in
2022 FP code. */
2023 if (FLOAT_TYPE_P (type))
2025 /* Comparisons with 0 are often used for booleans and there is
2026 nothing useful to predict about them. */
2027 else if (integer_zerop (op0) || integer_zerop (op1))
2029 else
2030 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2031 break;
2033 case NE_EXPR:
2034 case LTGT_EXPR:
2035 /* Floating point comparisons appears to behave in a very
2036 unpredictable way because of special role of = tests in
2037 FP code. */
2038 if (FLOAT_TYPE_P (type))
2040 /* Comparisons with 0 are often used for booleans and there is
2041 nothing useful to predict about them. */
2042 else if (integer_zerop (op0)
2043 || integer_zerop (op1))
2045 else
2046 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2047 break;
2049 case ORDERED_EXPR:
2050 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2051 break;
2053 case UNORDERED_EXPR:
2054 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2055 break;
2057 case LE_EXPR:
2058 case LT_EXPR:
2059 if (integer_zerop (op1)
2060 || integer_onep (op1)
2061 || integer_all_onesp (op1)
2062 || real_zerop (op1)
2063 || real_onep (op1)
2064 || real_minus_onep (op1))
2065 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2066 break;
2068 case GE_EXPR:
2069 case GT_EXPR:
2070 if (integer_zerop (op1)
2071 || integer_onep (op1)
2072 || integer_all_onesp (op1)
2073 || real_zerop (op1)
2074 || real_onep (op1)
2075 || real_minus_onep (op1))
2076 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2077 break;
2079 default:
2080 break;
2084 /* Try to guess whether the value of return means error code. */
2086 static enum br_predictor
2087 return_prediction (tree val, enum prediction *prediction)
2089 /* VOID. */
2090 if (!val)
2091 return PRED_NO_PREDICTION;
2092 /* Different heuristics for pointers and scalars. */
2093 if (POINTER_TYPE_P (TREE_TYPE (val)))
2095 /* NULL is usually not returned. */
2096 if (integer_zerop (val))
2098 *prediction = NOT_TAKEN;
2099 return PRED_NULL_RETURN;
2102 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2104 /* Negative return values are often used to indicate
2105 errors. */
2106 if (TREE_CODE (val) == INTEGER_CST
2107 && tree_int_cst_sgn (val) < 0)
2109 *prediction = NOT_TAKEN;
2110 return PRED_NEGATIVE_RETURN;
2112 /* Constant return values seems to be commonly taken.
2113 Zero/one often represent booleans so exclude them from the
2114 heuristics. */
2115 if (TREE_CONSTANT (val)
2116 && (!integer_zerop (val) && !integer_onep (val)))
2118 *prediction = TAKEN;
2119 return PRED_CONST_RETURN;
2122 return PRED_NO_PREDICTION;
2125 /* Find the basic block with return expression and look up for possible
2126 return value trying to apply RETURN_PREDICTION heuristics. */
2127 static void
2128 apply_return_prediction (void)
2130 greturn *return_stmt = NULL;
2131 tree return_val;
2132 edge e;
2133 gphi *phi;
2134 int phi_num_args, i;
2135 enum br_predictor pred;
2136 enum prediction direction;
2137 edge_iterator ei;
2139 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2141 gimple last = last_stmt (e->src);
2142 if (last
2143 && gimple_code (last) == GIMPLE_RETURN)
2145 return_stmt = as_a <greturn *> (last);
2146 break;
2149 if (!e)
2150 return;
2151 return_val = gimple_return_retval (return_stmt);
2152 if (!return_val)
2153 return;
2154 if (TREE_CODE (return_val) != SSA_NAME
2155 || !SSA_NAME_DEF_STMT (return_val)
2156 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2157 return;
2158 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2159 phi_num_args = gimple_phi_num_args (phi);
2160 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2162 /* Avoid the degenerate case where all return values form the function
2163 belongs to same category (ie they are all positive constants)
2164 so we can hardly say something about them. */
2165 for (i = 1; i < phi_num_args; i++)
2166 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2167 break;
2168 if (i != phi_num_args)
2169 for (i = 0; i < phi_num_args; i++)
2171 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2172 if (pred != PRED_NO_PREDICTION)
2173 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2174 direction);
2178 /* Look for basic block that contains unlikely to happen events
2179 (such as noreturn calls) and mark all paths leading to execution
2180 of this basic blocks as unlikely. */
2182 static void
2183 tree_bb_level_predictions (void)
2185 basic_block bb;
2186 bool has_return_edges = false;
2187 edge e;
2188 edge_iterator ei;
2190 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2191 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2193 has_return_edges = true;
2194 break;
2197 apply_return_prediction ();
2199 FOR_EACH_BB_FN (bb, cfun)
2201 gimple_stmt_iterator gsi;
2203 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2205 gimple stmt = gsi_stmt (gsi);
2206 tree decl;
2208 if (is_gimple_call (stmt))
2210 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2211 && has_return_edges)
2212 predict_paths_leading_to (bb, PRED_NORETURN,
2213 NOT_TAKEN);
2214 decl = gimple_call_fndecl (stmt);
2215 if (decl
2216 && lookup_attribute ("cold",
2217 DECL_ATTRIBUTES (decl)))
2218 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2219 NOT_TAKEN);
2221 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2223 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2224 gimple_predict_outcome (stmt));
2225 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2226 hints to callers. */
2232 #ifdef ENABLE_CHECKING
2234 /* Callback for hash_map::traverse, asserts that the pointer map is
2235 empty. */
2237 bool
2238 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2239 void *)
2241 gcc_assert (!value);
2242 return false;
2244 #endif
2246 /* Predict branch probabilities and estimate profile for basic block BB. */
2248 static void
2249 tree_estimate_probability_bb (basic_block bb)
2251 edge e;
2252 edge_iterator ei;
2253 gimple last;
2255 FOR_EACH_EDGE (e, ei, bb->succs)
2257 /* Predict edges to user labels with attributes. */
2258 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2260 gimple_stmt_iterator gi;
2261 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2263 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2264 tree decl;
2266 if (!label_stmt)
2267 break;
2268 decl = gimple_label_label (label_stmt);
2269 if (DECL_ARTIFICIAL (decl))
2270 continue;
2272 /* Finally, we have a user-defined label. */
2273 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2274 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2275 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2276 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2280 /* Predict early returns to be probable, as we've already taken
2281 care for error returns and other cases are often used for
2282 fast paths through function.
2284 Since we've already removed the return statements, we are
2285 looking for CFG like:
2287 if (conditional)
2290 goto return_block
2292 some other blocks
2293 return_block:
2294 return_stmt. */
2295 if (e->dest != bb->next_bb
2296 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2297 && single_succ_p (e->dest)
2298 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2299 && (last = last_stmt (e->dest)) != NULL
2300 && gimple_code (last) == GIMPLE_RETURN)
2302 edge e1;
2303 edge_iterator ei1;
2305 if (single_succ_p (bb))
2307 FOR_EACH_EDGE (e1, ei1, bb->preds)
2308 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2309 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2310 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2311 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2313 else
2314 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2315 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2316 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2317 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2320 /* Look for block we are guarding (ie we dominate it,
2321 but it doesn't postdominate us). */
2322 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2323 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2324 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2326 gimple_stmt_iterator bi;
2328 /* The call heuristic claims that a guarded function call
2329 is improbable. This is because such calls are often used
2330 to signal exceptional situations such as printing error
2331 messages. */
2332 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2333 gsi_next (&bi))
2335 gimple stmt = gsi_stmt (bi);
2336 if (is_gimple_call (stmt)
2337 /* Constant and pure calls are hardly used to signalize
2338 something exceptional. */
2339 && gimple_has_side_effects (stmt))
2341 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2342 break;
2347 tree_predict_by_opcode (bb);
2350 /* Predict branch probabilities and estimate profile of the tree CFG.
2351 This function can be called from the loop optimizers to recompute
2352 the profile information. */
2354 void
2355 tree_estimate_probability (void)
2357 basic_block bb;
2359 add_noreturn_fake_exit_edges ();
2360 connect_infinite_loops_to_exit ();
2361 /* We use loop_niter_by_eval, which requires that the loops have
2362 preheaders. */
2363 create_preheaders (CP_SIMPLE_PREHEADERS);
2364 calculate_dominance_info (CDI_POST_DOMINATORS);
2366 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2367 tree_bb_level_predictions ();
2368 record_loop_exits ();
2370 if (number_of_loops (cfun) > 1)
2371 predict_loops ();
2373 FOR_EACH_BB_FN (bb, cfun)
2374 tree_estimate_probability_bb (bb);
2376 FOR_EACH_BB_FN (bb, cfun)
2377 combine_predictions_for_bb (bb);
2379 #ifdef ENABLE_CHECKING
2380 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2381 #endif
2382 delete bb_predictions;
2383 bb_predictions = NULL;
2385 estimate_bb_frequencies (false);
2386 free_dominance_info (CDI_POST_DOMINATORS);
2387 remove_fake_exit_edges ();
2390 /* Predict edges to successors of CUR whose sources are not postdominated by
2391 BB by PRED and recurse to all postdominators. */
2393 static void
2394 predict_paths_for_bb (basic_block cur, basic_block bb,
2395 enum br_predictor pred,
2396 enum prediction taken,
2397 bitmap visited)
2399 edge e;
2400 edge_iterator ei;
2401 basic_block son;
2403 /* We are looking for all edges forming edge cut induced by
2404 set of all blocks postdominated by BB. */
2405 FOR_EACH_EDGE (e, ei, cur->preds)
2406 if (e->src->index >= NUM_FIXED_BLOCKS
2407 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2409 edge e2;
2410 edge_iterator ei2;
2411 bool found = false;
2413 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2414 if (e->flags & (EDGE_EH | EDGE_FAKE))
2415 continue;
2416 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2418 /* See if there is an edge from e->src that is not abnormal
2419 and does not lead to BB. */
2420 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2421 if (e2 != e
2422 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2423 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2425 found = true;
2426 break;
2429 /* If there is non-abnormal path leaving e->src, predict edge
2430 using predictor. Otherwise we need to look for paths
2431 leading to e->src.
2433 The second may lead to infinite loop in the case we are predicitng
2434 regions that are only reachable by abnormal edges. We simply
2435 prevent visiting given BB twice. */
2436 if (found)
2437 predict_edge_def (e, pred, taken);
2438 else if (bitmap_set_bit (visited, e->src->index))
2439 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2441 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2442 son;
2443 son = next_dom_son (CDI_POST_DOMINATORS, son))
2444 predict_paths_for_bb (son, bb, pred, taken, visited);
2447 /* Sets branch probabilities according to PREDiction and
2448 FLAGS. */
2450 static void
2451 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2452 enum prediction taken)
2454 bitmap visited = BITMAP_ALLOC (NULL);
2455 predict_paths_for_bb (bb, bb, pred, taken, visited);
2456 BITMAP_FREE (visited);
2459 /* Like predict_paths_leading_to but take edge instead of basic block. */
2461 static void
2462 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2463 enum prediction taken)
2465 bool has_nonloop_edge = false;
2466 edge_iterator ei;
2467 edge e2;
2469 basic_block bb = e->src;
2470 FOR_EACH_EDGE (e2, ei, bb->succs)
2471 if (e2->dest != e->src && e2->dest != e->dest
2472 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2473 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2475 has_nonloop_edge = true;
2476 break;
2478 if (!has_nonloop_edge)
2480 bitmap visited = BITMAP_ALLOC (NULL);
2481 predict_paths_for_bb (bb, bb, pred, taken, visited);
2482 BITMAP_FREE (visited);
2484 else
2485 predict_edge_def (e, pred, taken);
2488 /* This is used to carry information about basic blocks. It is
2489 attached to the AUX field of the standard CFG block. */
2491 struct block_info
2493 /* Estimated frequency of execution of basic_block. */
2494 sreal frequency;
2496 /* To keep queue of basic blocks to process. */
2497 basic_block next;
2499 /* Number of predecessors we need to visit first. */
2500 int npredecessors;
2503 /* Similar information for edges. */
2504 struct edge_prob_info
2506 /* In case edge is a loopback edge, the probability edge will be reached
2507 in case header is. Estimated number of iterations of the loop can be
2508 then computed as 1 / (1 - back_edge_prob). */
2509 sreal back_edge_prob;
2510 /* True if the edge is a loopback edge in the natural loop. */
2511 unsigned int back_edge:1;
2514 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2515 #undef EDGE_INFO
2516 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2518 /* Helper function for estimate_bb_frequencies.
2519 Propagate the frequencies in blocks marked in
2520 TOVISIT, starting in HEAD. */
2522 static void
2523 propagate_freq (basic_block head, bitmap tovisit)
2525 basic_block bb;
2526 basic_block last;
2527 unsigned i;
2528 edge e;
2529 basic_block nextbb;
2530 bitmap_iterator bi;
2532 /* For each basic block we need to visit count number of his predecessors
2533 we need to visit first. */
2534 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2536 edge_iterator ei;
2537 int count = 0;
2539 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2541 FOR_EACH_EDGE (e, ei, bb->preds)
2543 bool visit = bitmap_bit_p (tovisit, e->src->index);
2545 if (visit && !(e->flags & EDGE_DFS_BACK))
2546 count++;
2547 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2548 fprintf (dump_file,
2549 "Irreducible region hit, ignoring edge to %i->%i\n",
2550 e->src->index, bb->index);
2552 BLOCK_INFO (bb)->npredecessors = count;
2553 /* When function never returns, we will never process exit block. */
2554 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2555 bb->count = bb->frequency = 0;
2558 BLOCK_INFO (head)->frequency = 1;
2559 last = head;
2560 for (bb = head; bb; bb = nextbb)
2562 edge_iterator ei;
2563 sreal cyclic_probability = 0;
2564 sreal frequency = 0;
2566 nextbb = BLOCK_INFO (bb)->next;
2567 BLOCK_INFO (bb)->next = NULL;
2569 /* Compute frequency of basic block. */
2570 if (bb != head)
2572 #ifdef ENABLE_CHECKING
2573 FOR_EACH_EDGE (e, ei, bb->preds)
2574 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2575 || (e->flags & EDGE_DFS_BACK));
2576 #endif
2578 FOR_EACH_EDGE (e, ei, bb->preds)
2579 if (EDGE_INFO (e)->back_edge)
2581 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2583 else if (!(e->flags & EDGE_DFS_BACK))
2585 /* frequency += (e->probability
2586 * BLOCK_INFO (e->src)->frequency /
2587 REG_BR_PROB_BASE); */
2589 sreal tmp = e->probability;
2590 tmp *= BLOCK_INFO (e->src)->frequency;
2591 tmp *= real_inv_br_prob_base;
2592 frequency += tmp;
2595 if (cyclic_probability == 0)
2597 BLOCK_INFO (bb)->frequency = frequency;
2599 else
2601 if (cyclic_probability > real_almost_one)
2602 cyclic_probability = real_almost_one;
2604 /* BLOCK_INFO (bb)->frequency = frequency
2605 / (1 - cyclic_probability) */
2607 cyclic_probability = sreal (1) - cyclic_probability;
2608 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2612 bitmap_clear_bit (tovisit, bb->index);
2614 e = find_edge (bb, head);
2615 if (e)
2617 /* EDGE_INFO (e)->back_edge_prob
2618 = ((e->probability * BLOCK_INFO (bb)->frequency)
2619 / REG_BR_PROB_BASE); */
2621 sreal tmp = e->probability;
2622 tmp *= BLOCK_INFO (bb)->frequency;
2623 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2626 /* Propagate to successor blocks. */
2627 FOR_EACH_EDGE (e, ei, bb->succs)
2628 if (!(e->flags & EDGE_DFS_BACK)
2629 && BLOCK_INFO (e->dest)->npredecessors)
2631 BLOCK_INFO (e->dest)->npredecessors--;
2632 if (!BLOCK_INFO (e->dest)->npredecessors)
2634 if (!nextbb)
2635 nextbb = e->dest;
2636 else
2637 BLOCK_INFO (last)->next = e->dest;
2639 last = e->dest;
2645 /* Estimate frequencies in loops at same nest level. */
2647 static void
2648 estimate_loops_at_level (struct loop *first_loop)
2650 struct loop *loop;
2652 for (loop = first_loop; loop; loop = loop->next)
2654 edge e;
2655 basic_block *bbs;
2656 unsigned i;
2657 bitmap tovisit = BITMAP_ALLOC (NULL);
2659 estimate_loops_at_level (loop->inner);
2661 /* Find current loop back edge and mark it. */
2662 e = loop_latch_edge (loop);
2663 EDGE_INFO (e)->back_edge = 1;
2665 bbs = get_loop_body (loop);
2666 for (i = 0; i < loop->num_nodes; i++)
2667 bitmap_set_bit (tovisit, bbs[i]->index);
2668 free (bbs);
2669 propagate_freq (loop->header, tovisit);
2670 BITMAP_FREE (tovisit);
2674 /* Propagates frequencies through structure of loops. */
2676 static void
2677 estimate_loops (void)
2679 bitmap tovisit = BITMAP_ALLOC (NULL);
2680 basic_block bb;
2682 /* Start by estimating the frequencies in the loops. */
2683 if (number_of_loops (cfun) > 1)
2684 estimate_loops_at_level (current_loops->tree_root->inner);
2686 /* Now propagate the frequencies through all the blocks. */
2687 FOR_ALL_BB_FN (bb, cfun)
2689 bitmap_set_bit (tovisit, bb->index);
2691 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2692 BITMAP_FREE (tovisit);
2695 /* Drop the profile for NODE to guessed, and update its frequency based on
2696 whether it is expected to be hot given the CALL_COUNT. */
2698 static void
2699 drop_profile (struct cgraph_node *node, gcov_type call_count)
2701 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2702 /* In the case where this was called by another function with a
2703 dropped profile, call_count will be 0. Since there are no
2704 non-zero call counts to this function, we don't know for sure
2705 whether it is hot, and therefore it will be marked normal below. */
2706 bool hot = maybe_hot_count_p (NULL, call_count);
2708 if (dump_file)
2709 fprintf (dump_file,
2710 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2711 node->name (), node->order,
2712 hot ? "Function is hot" : "Function is normal");
2713 /* We only expect to miss profiles for functions that are reached
2714 via non-zero call edges in cases where the function may have
2715 been linked from another module or library (COMDATs and extern
2716 templates). See the comments below for handle_missing_profiles.
2717 Also, only warn in cases where the missing counts exceed the
2718 number of training runs. In certain cases with an execv followed
2719 by a no-return call the profile for the no-return call is not
2720 dumped and there can be a mismatch. */
2721 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2722 && call_count > profile_info->runs)
2724 if (flag_profile_correction)
2726 if (dump_file)
2727 fprintf (dump_file,
2728 "Missing counts for called function %s/%i\n",
2729 node->name (), node->order);
2731 else
2732 warning (0, "Missing counts for called function %s/%i",
2733 node->name (), node->order);
2736 profile_status_for_fn (fn)
2737 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2738 node->frequency
2739 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2742 /* In the case of COMDAT routines, multiple object files will contain the same
2743 function and the linker will select one for the binary. In that case
2744 all the other copies from the profile instrument binary will be missing
2745 profile counts. Look for cases where this happened, due to non-zero
2746 call counts going to 0-count functions, and drop the profile to guessed
2747 so that we can use the estimated probabilities and avoid optimizing only
2748 for size.
2750 The other case where the profile may be missing is when the routine
2751 is not going to be emitted to the object file, e.g. for "extern template"
2752 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2753 all other cases of non-zero calls to 0-count functions. */
2755 void
2756 handle_missing_profiles (void)
2758 struct cgraph_node *node;
2759 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2760 vec<struct cgraph_node *> worklist;
2761 worklist.create (64);
2763 /* See if 0 count function has non-0 count callers. In this case we
2764 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2765 FOR_EACH_DEFINED_FUNCTION (node)
2767 struct cgraph_edge *e;
2768 gcov_type call_count = 0;
2769 gcov_type max_tp_first_run = 0;
2770 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2772 if (node->count)
2773 continue;
2774 for (e = node->callers; e; e = e->next_caller)
2776 call_count += e->count;
2778 if (e->caller->tp_first_run > max_tp_first_run)
2779 max_tp_first_run = e->caller->tp_first_run;
2782 /* If time profile is missing, let assign the maximum that comes from
2783 caller functions. */
2784 if (!node->tp_first_run && max_tp_first_run)
2785 node->tp_first_run = max_tp_first_run + 1;
2787 if (call_count
2788 && fn && fn->cfg
2789 && (call_count * unlikely_count_fraction >= profile_info->runs))
2791 drop_profile (node, call_count);
2792 worklist.safe_push (node);
2796 /* Propagate the profile dropping to other 0-count COMDATs that are
2797 potentially called by COMDATs we already dropped the profile on. */
2798 while (worklist.length () > 0)
2800 struct cgraph_edge *e;
2802 node = worklist.pop ();
2803 for (e = node->callees; e; e = e->next_caller)
2805 struct cgraph_node *callee = e->callee;
2806 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2808 if (callee->count > 0)
2809 continue;
2810 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2811 && profile_status_for_fn (fn) == PROFILE_READ)
2813 drop_profile (node, 0);
2814 worklist.safe_push (callee);
2818 worklist.release ();
2821 /* Convert counts measured by profile driven feedback to frequencies.
2822 Return nonzero iff there was any nonzero execution count. */
2825 counts_to_freqs (void)
2827 gcov_type count_max, true_count_max = 0;
2828 basic_block bb;
2830 /* Don't overwrite the estimated frequencies when the profile for
2831 the function is missing. We may drop this function PROFILE_GUESSED
2832 later in drop_profile (). */
2833 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2834 return 0;
2836 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2837 true_count_max = MAX (bb->count, true_count_max);
2839 count_max = MAX (true_count_max, 1);
2840 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2841 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2843 return true_count_max;
2846 /* Return true if function is likely to be expensive, so there is no point to
2847 optimize performance of prologue, epilogue or do inlining at the expense
2848 of code size growth. THRESHOLD is the limit of number of instructions
2849 function can execute at average to be still considered not expensive. */
2851 bool
2852 expensive_function_p (int threshold)
2854 unsigned int sum = 0;
2855 basic_block bb;
2856 unsigned int limit;
2858 /* We can not compute accurately for large thresholds due to scaled
2859 frequencies. */
2860 gcc_assert (threshold <= BB_FREQ_MAX);
2862 /* Frequencies are out of range. This either means that function contains
2863 internal loop executing more than BB_FREQ_MAX times or profile feedback
2864 is available and function has not been executed at all. */
2865 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2866 return true;
2868 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2869 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2870 FOR_EACH_BB_FN (bb, cfun)
2872 rtx_insn *insn;
2874 FOR_BB_INSNS (bb, insn)
2875 if (active_insn_p (insn))
2877 sum += bb->frequency;
2878 if (sum > limit)
2879 return true;
2883 return false;
2886 /* Estimate and propagate basic block frequencies using the given branch
2887 probabilities. If FORCE is true, the frequencies are used to estimate
2888 the counts even when there are already non-zero profile counts. */
2890 void
2891 estimate_bb_frequencies (bool force)
2893 basic_block bb;
2894 sreal freq_max;
2896 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2898 static int real_values_initialized = 0;
2900 if (!real_values_initialized)
2902 real_values_initialized = 1;
2903 real_br_prob_base = REG_BR_PROB_BASE;
2904 real_bb_freq_max = BB_FREQ_MAX;
2905 real_one_half = sreal (1, -1);
2906 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2907 real_almost_one = sreal (1) - real_inv_br_prob_base;
2910 mark_dfs_back_edges ();
2912 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2913 REG_BR_PROB_BASE;
2915 /* Set up block info for each basic block. */
2916 alloc_aux_for_blocks (sizeof (block_info));
2917 alloc_aux_for_edges (sizeof (edge_prob_info));
2918 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2920 edge e;
2921 edge_iterator ei;
2923 FOR_EACH_EDGE (e, ei, bb->succs)
2925 EDGE_INFO (e)->back_edge_prob = e->probability;
2926 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2930 /* First compute frequencies locally for each loop from innermost
2931 to outermost to examine frequencies for back edges. */
2932 estimate_loops ();
2934 freq_max = 0;
2935 FOR_EACH_BB_FN (bb, cfun)
2936 if (freq_max < BLOCK_INFO (bb)->frequency)
2937 freq_max = BLOCK_INFO (bb)->frequency;
2939 freq_max = real_bb_freq_max / freq_max;
2940 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2942 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2943 bb->frequency = tmp.to_int ();
2946 free_aux_for_blocks ();
2947 free_aux_for_edges ();
2949 compute_function_frequency ();
2952 /* Decide whether function is hot, cold or unlikely executed. */
2953 void
2954 compute_function_frequency (void)
2956 basic_block bb;
2957 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2959 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2960 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2961 node->only_called_at_startup = true;
2962 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2963 node->only_called_at_exit = true;
2965 if (profile_status_for_fn (cfun) != PROFILE_READ)
2967 int flags = flags_from_decl_or_type (current_function_decl);
2968 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2969 != NULL)
2970 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2971 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2972 != NULL)
2973 node->frequency = NODE_FREQUENCY_HOT;
2974 else if (flags & ECF_NORETURN)
2975 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2976 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2977 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2978 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2979 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2980 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2981 return;
2984 /* Only first time try to drop function into unlikely executed.
2985 After inlining the roundoff errors may confuse us.
2986 Ipa-profile pass will drop functions only called from unlikely
2987 functions to unlikely and that is most of what we care about. */
2988 if (!cfun->after_inlining)
2989 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2990 FOR_EACH_BB_FN (bb, cfun)
2992 if (maybe_hot_bb_p (cfun, bb))
2994 node->frequency = NODE_FREQUENCY_HOT;
2995 return;
2997 if (!probably_never_executed_bb_p (cfun, bb))
2998 node->frequency = NODE_FREQUENCY_NORMAL;
3002 /* Build PREDICT_EXPR. */
3003 tree
3004 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3006 tree t = build1 (PREDICT_EXPR, void_type_node,
3007 build_int_cst (integer_type_node, predictor));
3008 SET_PREDICT_EXPR_OUTCOME (t, taken);
3009 return t;
3012 const char *
3013 predictor_name (enum br_predictor predictor)
3015 return predictor_info[predictor].name;
3018 /* Predict branch probabilities and estimate profile of the tree CFG. */
3020 namespace {
3022 const pass_data pass_data_profile =
3024 GIMPLE_PASS, /* type */
3025 "profile_estimate", /* name */
3026 OPTGROUP_NONE, /* optinfo_flags */
3027 TV_BRANCH_PROB, /* tv_id */
3028 PROP_cfg, /* properties_required */
3029 0, /* properties_provided */
3030 0, /* properties_destroyed */
3031 0, /* todo_flags_start */
3032 0, /* todo_flags_finish */
3035 class pass_profile : public gimple_opt_pass
3037 public:
3038 pass_profile (gcc::context *ctxt)
3039 : gimple_opt_pass (pass_data_profile, ctxt)
3042 /* opt_pass methods: */
3043 virtual bool gate (function *) { return flag_guess_branch_prob; }
3044 virtual unsigned int execute (function *);
3046 }; // class pass_profile
3048 unsigned int
3049 pass_profile::execute (function *fun)
3051 unsigned nb_loops;
3053 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3054 return 0;
3056 loop_optimizer_init (LOOPS_NORMAL);
3057 if (dump_file && (dump_flags & TDF_DETAILS))
3058 flow_loops_dump (dump_file, NULL, 0);
3060 mark_irreducible_loops ();
3062 nb_loops = number_of_loops (fun);
3063 if (nb_loops > 1)
3064 scev_initialize ();
3066 tree_estimate_probability ();
3068 if (nb_loops > 1)
3069 scev_finalize ();
3071 loop_optimizer_finalize ();
3072 if (dump_file && (dump_flags & TDF_DETAILS))
3073 gimple_dump_cfg (dump_file, dump_flags);
3074 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3075 profile_status_for_fn (fun) = PROFILE_GUESSED;
3076 return 0;
3079 } // anon namespace
3081 gimple_opt_pass *
3082 make_pass_profile (gcc::context *ctxt)
3084 return new pass_profile (ctxt);
3087 namespace {
3089 const pass_data pass_data_strip_predict_hints =
3091 GIMPLE_PASS, /* type */
3092 "*strip_predict_hints", /* name */
3093 OPTGROUP_NONE, /* optinfo_flags */
3094 TV_BRANCH_PROB, /* tv_id */
3095 PROP_cfg, /* properties_required */
3096 0, /* properties_provided */
3097 0, /* properties_destroyed */
3098 0, /* todo_flags_start */
3099 0, /* todo_flags_finish */
3102 class pass_strip_predict_hints : public gimple_opt_pass
3104 public:
3105 pass_strip_predict_hints (gcc::context *ctxt)
3106 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3109 /* opt_pass methods: */
3110 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3111 virtual unsigned int execute (function *);
3113 }; // class pass_strip_predict_hints
3115 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3116 we no longer need. */
3117 unsigned int
3118 pass_strip_predict_hints::execute (function *fun)
3120 basic_block bb;
3121 gimple ass_stmt;
3122 tree var;
3124 FOR_EACH_BB_FN (bb, fun)
3126 gimple_stmt_iterator bi;
3127 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3129 gimple stmt = gsi_stmt (bi);
3131 if (gimple_code (stmt) == GIMPLE_PREDICT)
3133 gsi_remove (&bi, true);
3134 continue;
3136 else if (is_gimple_call (stmt))
3138 tree fndecl = gimple_call_fndecl (stmt);
3140 if ((fndecl
3141 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3142 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3143 && gimple_call_num_args (stmt) == 2)
3144 || (gimple_call_internal_p (stmt)
3145 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3147 var = gimple_call_lhs (stmt);
3148 if (var)
3150 ass_stmt
3151 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3152 gsi_replace (&bi, ass_stmt, true);
3154 else
3156 gsi_remove (&bi, true);
3157 continue;
3161 gsi_next (&bi);
3164 return 0;
3167 } // anon namespace
3169 gimple_opt_pass *
3170 make_pass_strip_predict_hints (gcc::context *ctxt)
3172 return new pass_strip_predict_hints (ctxt);
3175 /* Rebuild function frequencies. Passes are in general expected to
3176 maintain profile by hand, however in some cases this is not possible:
3177 for example when inlining several functions with loops freuqencies might run
3178 out of scale and thus needs to be recomputed. */
3180 void
3181 rebuild_frequencies (void)
3183 timevar_push (TV_REBUILD_FREQUENCIES);
3185 /* When the max bb count in the function is small, there is a higher
3186 chance that there were truncation errors in the integer scaling
3187 of counts by inlining and other optimizations. This could lead
3188 to incorrect classification of code as being cold when it isn't.
3189 In that case, force the estimation of bb counts/frequencies from the
3190 branch probabilities, rather than computing frequencies from counts,
3191 which may also lead to frequencies incorrectly reduced to 0. There
3192 is less precision in the probabilities, so we only do this for small
3193 max counts. */
3194 gcov_type count_max = 0;
3195 basic_block bb;
3196 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3197 count_max = MAX (bb->count, count_max);
3199 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3200 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3201 && count_max < REG_BR_PROB_BASE/10))
3203 loop_optimizer_init (0);
3204 add_noreturn_fake_exit_edges ();
3205 mark_irreducible_loops ();
3206 connect_infinite_loops_to_exit ();
3207 estimate_bb_frequencies (true);
3208 remove_fake_exit_edges ();
3209 loop_optimizer_finalize ();
3211 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3212 counts_to_freqs ();
3213 else
3214 gcc_unreachable ();
3215 timevar_pop (TV_REBUILD_FREQUENCIES);