Fix memory leak in tree-vect-slp.c
[official-gcc.git] / gcc / predict.c
blobf1e22aef5f41f394d0ea439c0d3f130d9b81aa1f
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "cfghooks.h"
38 #include "tree-pass.h"
39 #include "ssa.h"
40 #include "emit-rtl.h"
41 #include "cgraph.h"
42 #include "coverage.h"
43 #include "diagnostic-core.h"
44 #include "gimple-predict.h"
45 #include "fold-const.h"
46 #include "calls.h"
47 #include "cfganal.h"
48 #include "profile.h"
49 #include "sreal.h"
50 #include "params.h"
51 #include "cfgloop.h"
52 #include "gimple-iterator.h"
53 #include "tree-cfg.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "tree-ssa-loop.h"
56 #include "tree-scalar-evolution.h"
58 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
59 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
60 static sreal real_almost_one, real_br_prob_base,
61 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
63 static void combine_predictions_for_insn (rtx_insn *, basic_block);
64 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
65 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
66 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
67 static bool can_predict_insn_p (const rtx_insn *);
69 /* Information we hold about each branch predictor.
70 Filled using information from predict.def. */
72 struct predictor_info
74 const char *const name; /* Name used in the debugging dumps. */
75 const int hitrate; /* Expected hitrate used by
76 predict_insn_def call. */
77 const int flags;
80 /* Use given predictor without Dempster-Shaffer theory if it matches
81 using first_match heuristics. */
82 #define PRED_FLAG_FIRST_MATCH 1
84 /* Recompute hitrate in percent to our representation. */
86 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
88 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
89 static const struct predictor_info predictor_info[]= {
90 #include "predict.def"
92 /* Upper bound on predictors. */
93 {NULL, 0, 0}
95 #undef DEF_PREDICTOR
97 /* Return TRUE if frequency FREQ is considered to be hot. */
99 static inline bool
100 maybe_hot_frequency_p (struct function *fun, int freq)
102 struct cgraph_node *node = cgraph_node::get (fun->decl);
103 if (!profile_info
104 || !opt_for_fn (fun->decl, flag_branch_probabilities))
106 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
107 return false;
108 if (node->frequency == NODE_FREQUENCY_HOT)
109 return true;
111 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
112 return true;
113 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
114 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
115 return false;
116 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
117 return false;
118 if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
119 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
120 return false;
121 return true;
124 static gcov_type min_count = -1;
126 /* Determine the threshold for hot BB counts. */
128 gcov_type
129 get_hot_bb_threshold ()
131 gcov_working_set_t *ws;
132 if (min_count == -1)
134 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
135 gcc_assert (ws);
136 min_count = ws->min_counter;
138 return min_count;
141 /* Set the threshold for hot BB counts. */
143 void
144 set_hot_bb_threshold (gcov_type min)
146 min_count = min;
149 /* Return TRUE if frequency FREQ is considered to be hot. */
151 bool
152 maybe_hot_count_p (struct function *fun, gcov_type count)
154 if (fun && profile_status_for_fn (fun) != PROFILE_READ)
155 return true;
156 /* Code executed at most once is not hot. */
157 if (profile_info->runs >= count)
158 return false;
159 return (count >= get_hot_bb_threshold ());
162 /* Return true in case BB can be CPU intensive and should be optimized
163 for maximal performance. */
165 bool
166 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
168 gcc_checking_assert (fun);
169 if (profile_status_for_fn (fun) == PROFILE_READ)
170 return maybe_hot_count_p (fun, bb->count);
171 return maybe_hot_frequency_p (fun, bb->frequency);
174 /* Return true in case BB can be CPU intensive and should be optimized
175 for maximal performance. */
177 bool
178 maybe_hot_edge_p (edge e)
180 if (profile_status_for_fn (cfun) == PROFILE_READ)
181 return maybe_hot_count_p (cfun, e->count);
182 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
185 /* Return true if profile COUNT and FREQUENCY, or function FUN static
186 node frequency reflects never being executed. */
188 static bool
189 probably_never_executed (struct function *fun,
190 gcov_type count, int frequency)
192 gcc_checking_assert (fun);
193 if (profile_status_for_fn (fun) == PROFILE_READ)
195 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
196 if (count * unlikely_count_fraction >= profile_info->runs)
197 return false;
198 if (!frequency)
199 return true;
200 if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
201 return false;
202 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
204 gcov_type computed_count;
205 /* Check for possibility of overflow, in which case entry bb count
206 is large enough to do the division first without losing much
207 precision. */
208 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
209 REG_BR_PROB_BASE)
211 gcov_type scaled_count
212 = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
213 unlikely_count_fraction;
214 computed_count = RDIV (scaled_count,
215 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
217 else
219 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
220 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
221 computed_count *= frequency * unlikely_count_fraction;
223 if (computed_count >= profile_info->runs)
224 return false;
226 return true;
228 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
229 && (cgraph_node::get (fun->decl)->frequency
230 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
231 return true;
232 return false;
236 /* Return true in case BB is probably never executed. */
238 bool
239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
241 return probably_never_executed (fun, bb->count, bb->frequency);
245 /* Return true in case edge E is probably never executed. */
247 bool
248 probably_never_executed_edge_p (struct function *fun, edge e)
250 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
253 /* Return true when current function should always be optimized for size. */
255 bool
256 optimize_function_for_size_p (struct function *fun)
258 if (!fun || !fun->decl)
259 return optimize_size;
260 cgraph_node *n = cgraph_node::get (fun->decl);
261 return n && n->optimize_for_size_p ();
264 /* Return true when current function should always be optimized for speed. */
266 bool
267 optimize_function_for_speed_p (struct function *fun)
269 return !optimize_function_for_size_p (fun);
272 /* Return the optimization type that should be used for the function FUN. */
274 optimization_type
275 function_optimization_type (struct function *fun)
277 return (optimize_function_for_speed_p (fun)
278 ? OPTIMIZE_FOR_SPEED
279 : OPTIMIZE_FOR_SIZE);
282 /* Return TRUE when BB should be optimized for size. */
284 bool
285 optimize_bb_for_size_p (const_basic_block bb)
287 return (optimize_function_for_size_p (cfun)
288 || (bb && !maybe_hot_bb_p (cfun, bb)));
291 /* Return TRUE when BB should be optimized for speed. */
293 bool
294 optimize_bb_for_speed_p (const_basic_block bb)
296 return !optimize_bb_for_size_p (bb);
299 /* Return the optimization type that should be used for block BB. */
301 optimization_type
302 bb_optimization_type (const_basic_block bb)
304 return (optimize_bb_for_speed_p (bb)
305 ? OPTIMIZE_FOR_SPEED
306 : OPTIMIZE_FOR_SIZE);
309 /* Return TRUE when BB should be optimized for size. */
311 bool
312 optimize_edge_for_size_p (edge e)
314 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
317 /* Return TRUE when BB should be optimized for speed. */
319 bool
320 optimize_edge_for_speed_p (edge e)
322 return !optimize_edge_for_size_p (e);
325 /* Return TRUE when BB should be optimized for size. */
327 bool
328 optimize_insn_for_size_p (void)
330 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
333 /* Return TRUE when BB should be optimized for speed. */
335 bool
336 optimize_insn_for_speed_p (void)
338 return !optimize_insn_for_size_p ();
341 /* Return TRUE when LOOP should be optimized for size. */
343 bool
344 optimize_loop_for_size_p (struct loop *loop)
346 return optimize_bb_for_size_p (loop->header);
349 /* Return TRUE when LOOP should be optimized for speed. */
351 bool
352 optimize_loop_for_speed_p (struct loop *loop)
354 return optimize_bb_for_speed_p (loop->header);
357 /* Return TRUE when LOOP nest should be optimized for speed. */
359 bool
360 optimize_loop_nest_for_speed_p (struct loop *loop)
362 struct loop *l = loop;
363 if (optimize_loop_for_speed_p (loop))
364 return true;
365 l = loop->inner;
366 while (l && l != loop)
368 if (optimize_loop_for_speed_p (l))
369 return true;
370 if (l->inner)
371 l = l->inner;
372 else if (l->next)
373 l = l->next;
374 else
376 while (l != loop && !l->next)
377 l = loop_outer (l);
378 if (l != loop)
379 l = l->next;
382 return false;
385 /* Return TRUE when LOOP nest should be optimized for size. */
387 bool
388 optimize_loop_nest_for_size_p (struct loop *loop)
390 return !optimize_loop_nest_for_speed_p (loop);
393 /* Return true when edge E is likely to be well predictable by branch
394 predictor. */
396 bool
397 predictable_edge_p (edge e)
399 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
400 return false;
401 if ((e->probability
402 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
403 || (REG_BR_PROB_BASE - e->probability
404 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
405 return true;
406 return false;
410 /* Set RTL expansion for BB profile. */
412 void
413 rtl_profile_for_bb (basic_block bb)
415 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
418 /* Set RTL expansion for edge profile. */
420 void
421 rtl_profile_for_edge (edge e)
423 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
426 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
427 void
428 default_rtl_profile (void)
430 crtl->maybe_hot_insn_p = true;
433 /* Return true if the one of outgoing edges is already predicted by
434 PREDICTOR. */
436 bool
437 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
439 rtx note;
440 if (!INSN_P (BB_END (bb)))
441 return false;
442 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
443 if (REG_NOTE_KIND (note) == REG_BR_PRED
444 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
445 return true;
446 return false;
449 /* Structure representing predictions in tree level. */
451 struct edge_prediction {
452 struct edge_prediction *ep_next;
453 edge ep_edge;
454 enum br_predictor ep_predictor;
455 int ep_probability;
458 /* This map contains for a basic block the list of predictions for the
459 outgoing edges. */
461 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
463 /* Return true if the one of outgoing edges is already predicted by
464 PREDICTOR. */
466 bool
467 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
469 struct edge_prediction *i;
470 edge_prediction **preds = bb_predictions->get (bb);
472 if (!preds)
473 return false;
475 for (i = *preds; i; i = i->ep_next)
476 if (i->ep_predictor == predictor)
477 return true;
478 return false;
481 /* Return true when the probability of edge is reliable.
483 The profile guessing code is good at predicting branch outcome (ie.
484 taken/not taken), that is predicted right slightly over 75% of time.
485 It is however notoriously poor on predicting the probability itself.
486 In general the profile appear a lot flatter (with probabilities closer
487 to 50%) than the reality so it is bad idea to use it to drive optimization
488 such as those disabling dynamic branch prediction for well predictable
489 branches.
491 There are two exceptions - edges leading to noreturn edges and edges
492 predicted by number of iterations heuristics are predicted well. This macro
493 should be able to distinguish those, but at the moment it simply check for
494 noreturn heuristic that is only one giving probability over 99% or bellow
495 1%. In future we might want to propagate reliability information across the
496 CFG if we find this information useful on multiple places. */
497 static bool
498 probability_reliable_p (int prob)
500 return (profile_status_for_fn (cfun) == PROFILE_READ
501 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
502 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
505 /* Same predicate as above, working on edges. */
506 bool
507 edge_probability_reliable_p (const_edge e)
509 return probability_reliable_p (e->probability);
512 /* Same predicate as edge_probability_reliable_p, working on notes. */
513 bool
514 br_prob_note_reliable_p (const_rtx note)
516 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
517 return probability_reliable_p (XINT (note, 0));
520 static void
521 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
523 gcc_assert (any_condjump_p (insn));
524 if (!flag_guess_branch_prob)
525 return;
527 add_reg_note (insn, REG_BR_PRED,
528 gen_rtx_CONCAT (VOIDmode,
529 GEN_INT ((int) predictor),
530 GEN_INT ((int) probability)));
533 /* Predict insn by given predictor. */
535 void
536 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
537 enum prediction taken)
539 int probability = predictor_info[(int) predictor].hitrate;
541 if (taken != TAKEN)
542 probability = REG_BR_PROB_BASE - probability;
544 predict_insn (insn, predictor, probability);
547 /* Predict edge E with given probability if possible. */
549 void
550 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
552 rtx_insn *last_insn;
553 last_insn = BB_END (e->src);
555 /* We can store the branch prediction information only about
556 conditional jumps. */
557 if (!any_condjump_p (last_insn))
558 return;
560 /* We always store probability of branching. */
561 if (e->flags & EDGE_FALLTHRU)
562 probability = REG_BR_PROB_BASE - probability;
564 predict_insn (last_insn, predictor, probability);
567 /* Predict edge E with the given PROBABILITY. */
568 void
569 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
571 gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
572 if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
574 && flag_guess_branch_prob && optimize)
576 struct edge_prediction *i = XNEW (struct edge_prediction);
577 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
579 i->ep_next = preds;
580 preds = i;
581 i->ep_probability = probability;
582 i->ep_predictor = predictor;
583 i->ep_edge = e;
587 /* Remove all predictions on given basic block that are attached
588 to edge E. */
589 void
590 remove_predictions_associated_with_edge (edge e)
592 if (!bb_predictions)
593 return;
595 edge_prediction **preds = bb_predictions->get (e->src);
597 if (preds)
599 struct edge_prediction **prediction = preds;
600 struct edge_prediction *next;
602 while (*prediction)
604 if ((*prediction)->ep_edge == e)
606 next = (*prediction)->ep_next;
607 free (*prediction);
608 *prediction = next;
610 else
611 prediction = &((*prediction)->ep_next);
616 /* Clears the list of predictions stored for BB. */
618 static void
619 clear_bb_predictions (basic_block bb)
621 edge_prediction **preds = bb_predictions->get (bb);
622 struct edge_prediction *pred, *next;
624 if (!preds)
625 return;
627 for (pred = *preds; pred; pred = next)
629 next = pred->ep_next;
630 free (pred);
632 *preds = NULL;
635 /* Return true when we can store prediction on insn INSN.
636 At the moment we represent predictions only on conditional
637 jumps, not at computed jump or other complicated cases. */
638 static bool
639 can_predict_insn_p (const rtx_insn *insn)
641 return (JUMP_P (insn)
642 && any_condjump_p (insn)
643 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
646 /* Predict edge E by given predictor if possible. */
648 void
649 predict_edge_def (edge e, enum br_predictor predictor,
650 enum prediction taken)
652 int probability = predictor_info[(int) predictor].hitrate;
654 if (taken != TAKEN)
655 probability = REG_BR_PROB_BASE - probability;
657 predict_edge (e, predictor, probability);
660 /* Invert all branch predictions or probability notes in the INSN. This needs
661 to be done each time we invert the condition used by the jump. */
663 void
664 invert_br_probabilities (rtx insn)
666 rtx note;
668 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
669 if (REG_NOTE_KIND (note) == REG_BR_PROB)
670 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
671 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
672 XEXP (XEXP (note, 0), 1)
673 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
676 /* Dump information about the branch prediction to the output file. */
678 static void
679 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
680 basic_block bb, int used)
682 edge e;
683 edge_iterator ei;
685 if (!file)
686 return;
688 FOR_EACH_EDGE (e, ei, bb->succs)
689 if (! (e->flags & EDGE_FALLTHRU))
690 break;
692 fprintf (file, " %s heuristics%s: %.1f%%",
693 predictor_info[predictor].name,
694 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
696 if (bb->count)
698 fprintf (file, " exec %" PRId64, bb->count);
699 if (e)
701 fprintf (file, " hit %" PRId64, e->count);
702 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
706 fprintf (file, "\n");
709 /* We can not predict the probabilities of outgoing edges of bb. Set them
710 evenly and hope for the best. */
711 static void
712 set_even_probabilities (basic_block bb)
714 int nedges = 0;
715 edge e;
716 edge_iterator ei;
718 FOR_EACH_EDGE (e, ei, bb->succs)
719 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
720 nedges ++;
721 FOR_EACH_EDGE (e, ei, bb->succs)
722 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
723 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
724 else
725 e->probability = 0;
728 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
729 note if not already present. Remove now useless REG_BR_PRED notes. */
731 static void
732 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
734 rtx prob_note;
735 rtx *pnote;
736 rtx note;
737 int best_probability = PROB_EVEN;
738 enum br_predictor best_predictor = END_PREDICTORS;
739 int combined_probability = REG_BR_PROB_BASE / 2;
740 int d;
741 bool first_match = false;
742 bool found = false;
744 if (!can_predict_insn_p (insn))
746 set_even_probabilities (bb);
747 return;
750 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
751 pnote = &REG_NOTES (insn);
752 if (dump_file)
753 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
754 bb->index);
756 /* We implement "first match" heuristics and use probability guessed
757 by predictor with smallest index. */
758 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
759 if (REG_NOTE_KIND (note) == REG_BR_PRED)
761 enum br_predictor predictor = ((enum br_predictor)
762 INTVAL (XEXP (XEXP (note, 0), 0)));
763 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
765 found = true;
766 if (best_predictor > predictor)
767 best_probability = probability, best_predictor = predictor;
769 d = (combined_probability * probability
770 + (REG_BR_PROB_BASE - combined_probability)
771 * (REG_BR_PROB_BASE - probability));
773 /* Use FP math to avoid overflows of 32bit integers. */
774 if (d == 0)
775 /* If one probability is 0% and one 100%, avoid division by zero. */
776 combined_probability = REG_BR_PROB_BASE / 2;
777 else
778 combined_probability = (((double) combined_probability) * probability
779 * REG_BR_PROB_BASE / d + 0.5);
782 /* Decide which heuristic to use. In case we didn't match anything,
783 use no_prediction heuristic, in case we did match, use either
784 first match or Dempster-Shaffer theory depending on the flags. */
786 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
787 first_match = true;
789 if (!found)
790 dump_prediction (dump_file, PRED_NO_PREDICTION,
791 combined_probability, bb, true);
792 else
794 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
795 bb, !first_match);
796 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
797 bb, first_match);
800 if (first_match)
801 combined_probability = best_probability;
802 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
804 while (*pnote)
806 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
808 enum br_predictor predictor = ((enum br_predictor)
809 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
810 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
812 dump_prediction (dump_file, predictor, probability, bb,
813 !first_match || best_predictor == predictor);
814 *pnote = XEXP (*pnote, 1);
816 else
817 pnote = &XEXP (*pnote, 1);
820 if (!prob_note)
822 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
824 /* Save the prediction into CFG in case we are seeing non-degenerated
825 conditional jump. */
826 if (!single_succ_p (bb))
828 BRANCH_EDGE (bb)->probability = combined_probability;
829 FALLTHRU_EDGE (bb)->probability
830 = REG_BR_PROB_BASE - combined_probability;
833 else if (!single_succ_p (bb))
835 int prob = XINT (prob_note, 0);
837 BRANCH_EDGE (bb)->probability = prob;
838 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
840 else
841 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
844 /* Combine predictions into single probability and store them into CFG.
845 Remove now useless prediction entries.
846 If DRY_RUN is set, only produce dumps and do not modify profile. */
848 static void
849 combine_predictions_for_bb (basic_block bb, bool dry_run)
851 int best_probability = PROB_EVEN;
852 enum br_predictor best_predictor = END_PREDICTORS;
853 int combined_probability = REG_BR_PROB_BASE / 2;
854 int d;
855 bool first_match = false;
856 bool found = false;
857 struct edge_prediction *pred;
858 int nedges = 0;
859 edge e, first = NULL, second = NULL;
860 edge_iterator ei;
862 FOR_EACH_EDGE (e, ei, bb->succs)
863 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
865 nedges ++;
866 if (first && !second)
867 second = e;
868 if (!first)
869 first = e;
872 /* When there is no successor or only one choice, prediction is easy.
874 We are lazy for now and predict only basic blocks with two outgoing
875 edges. It is possible to predict generic case too, but we have to
876 ignore first match heuristics and do more involved combining. Implement
877 this later. */
878 if (nedges != 2)
880 if (!bb->count && !dry_run)
881 set_even_probabilities (bb);
882 clear_bb_predictions (bb);
883 if (dump_file)
884 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
885 nedges, bb->index);
886 return;
889 if (dump_file)
890 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
892 edge_prediction **preds = bb_predictions->get (bb);
893 if (preds)
895 /* We implement "first match" heuristics and use probability guessed
896 by predictor with smallest index. */
897 for (pred = *preds; pred; pred = pred->ep_next)
899 enum br_predictor predictor = pred->ep_predictor;
900 int probability = pred->ep_probability;
902 if (pred->ep_edge != first)
903 probability = REG_BR_PROB_BASE - probability;
905 found = true;
906 /* First match heuristics would be widly confused if we predicted
907 both directions. */
908 if (best_predictor > predictor)
910 struct edge_prediction *pred2;
911 int prob = probability;
913 for (pred2 = (struct edge_prediction *) *preds;
914 pred2; pred2 = pred2->ep_next)
915 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
917 int probability2 = pred->ep_probability;
919 if (pred2->ep_edge != first)
920 probability2 = REG_BR_PROB_BASE - probability2;
922 if ((probability < REG_BR_PROB_BASE / 2) !=
923 (probability2 < REG_BR_PROB_BASE / 2))
924 break;
926 /* If the same predictor later gave better result, go for it! */
927 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
928 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
929 prob = probability2;
931 if (!pred2)
932 best_probability = prob, best_predictor = predictor;
935 d = (combined_probability * probability
936 + (REG_BR_PROB_BASE - combined_probability)
937 * (REG_BR_PROB_BASE - probability));
939 /* Use FP math to avoid overflows of 32bit integers. */
940 if (d == 0)
941 /* If one probability is 0% and one 100%, avoid division by zero. */
942 combined_probability = REG_BR_PROB_BASE / 2;
943 else
944 combined_probability = (((double) combined_probability)
945 * probability
946 * REG_BR_PROB_BASE / d + 0.5);
950 /* Decide which heuristic to use. In case we didn't match anything,
951 use no_prediction heuristic, in case we did match, use either
952 first match or Dempster-Shaffer theory depending on the flags. */
954 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
955 first_match = true;
957 if (!found)
958 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
959 else
961 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
962 !first_match);
963 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
964 first_match);
967 if (first_match)
968 combined_probability = best_probability;
969 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
971 if (preds)
973 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
975 enum br_predictor predictor = pred->ep_predictor;
976 int probability = pred->ep_probability;
978 if (pred->ep_edge != EDGE_SUCC (bb, 0))
979 probability = REG_BR_PROB_BASE - probability;
980 dump_prediction (dump_file, predictor, probability, bb,
981 !first_match || best_predictor == predictor);
984 clear_bb_predictions (bb);
986 if (!bb->count && !dry_run)
988 first->probability = combined_probability;
989 second->probability = REG_BR_PROB_BASE - combined_probability;
993 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
994 Return the SSA_NAME if the condition satisfies, NULL otherwise.
996 T1 and T2 should be one of the following cases:
997 1. T1 is SSA_NAME, T2 is NULL
998 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
999 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1001 static tree
1002 strips_small_constant (tree t1, tree t2)
1004 tree ret = NULL;
1005 int value = 0;
1007 if (!t1)
1008 return NULL;
1009 else if (TREE_CODE (t1) == SSA_NAME)
1010 ret = t1;
1011 else if (tree_fits_shwi_p (t1))
1012 value = tree_to_shwi (t1);
1013 else
1014 return NULL;
1016 if (!t2)
1017 return ret;
1018 else if (tree_fits_shwi_p (t2))
1019 value = tree_to_shwi (t2);
1020 else if (TREE_CODE (t2) == SSA_NAME)
1022 if (ret)
1023 return NULL;
1024 else
1025 ret = t2;
1028 if (value <= 4 && value >= -4)
1029 return ret;
1030 else
1031 return NULL;
1034 /* Return the SSA_NAME in T or T's operands.
1035 Return NULL if SSA_NAME cannot be found. */
1037 static tree
1038 get_base_value (tree t)
1040 if (TREE_CODE (t) == SSA_NAME)
1041 return t;
1043 if (!BINARY_CLASS_P (t))
1044 return NULL;
1046 switch (TREE_OPERAND_LENGTH (t))
1048 case 1:
1049 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1050 case 2:
1051 return strips_small_constant (TREE_OPERAND (t, 0),
1052 TREE_OPERAND (t, 1));
1053 default:
1054 return NULL;
1058 /* Check the compare STMT in LOOP. If it compares an induction
1059 variable to a loop invariant, return true, and save
1060 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1061 Otherwise return false and set LOOP_INVAIANT to NULL. */
1063 static bool
1064 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1065 tree *loop_invariant,
1066 enum tree_code *compare_code,
1067 tree *loop_step,
1068 tree *loop_iv_base)
1070 tree op0, op1, bound, base;
1071 affine_iv iv0, iv1;
1072 enum tree_code code;
1073 tree step;
1075 code = gimple_cond_code (stmt);
1076 *loop_invariant = NULL;
1078 switch (code)
1080 case GT_EXPR:
1081 case GE_EXPR:
1082 case NE_EXPR:
1083 case LT_EXPR:
1084 case LE_EXPR:
1085 case EQ_EXPR:
1086 break;
1088 default:
1089 return false;
1092 op0 = gimple_cond_lhs (stmt);
1093 op1 = gimple_cond_rhs (stmt);
1095 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1096 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1097 return false;
1098 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1099 return false;
1100 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1101 return false;
1102 if (TREE_CODE (iv0.step) != INTEGER_CST
1103 || TREE_CODE (iv1.step) != INTEGER_CST)
1104 return false;
1105 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1106 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1107 return false;
1109 if (integer_zerop (iv0.step))
1111 if (code != NE_EXPR && code != EQ_EXPR)
1112 code = invert_tree_comparison (code, false);
1113 bound = iv0.base;
1114 base = iv1.base;
1115 if (tree_fits_shwi_p (iv1.step))
1116 step = iv1.step;
1117 else
1118 return false;
1120 else
1122 bound = iv1.base;
1123 base = iv0.base;
1124 if (tree_fits_shwi_p (iv0.step))
1125 step = iv0.step;
1126 else
1127 return false;
1130 if (TREE_CODE (bound) != INTEGER_CST)
1131 bound = get_base_value (bound);
1132 if (!bound)
1133 return false;
1134 if (TREE_CODE (base) != INTEGER_CST)
1135 base = get_base_value (base);
1136 if (!base)
1137 return false;
1139 *loop_invariant = bound;
1140 *compare_code = code;
1141 *loop_step = step;
1142 *loop_iv_base = base;
1143 return true;
1146 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1148 static bool
1149 expr_coherent_p (tree t1, tree t2)
1151 gimple *stmt;
1152 tree ssa_name_1 = NULL;
1153 tree ssa_name_2 = NULL;
1155 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1156 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1158 if (t1 == t2)
1159 return true;
1161 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1162 return true;
1163 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1164 return false;
1166 /* Check to see if t1 is expressed/defined with t2. */
1167 stmt = SSA_NAME_DEF_STMT (t1);
1168 gcc_assert (stmt != NULL);
1169 if (is_gimple_assign (stmt))
1171 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1172 if (ssa_name_1 && ssa_name_1 == t2)
1173 return true;
1176 /* Check to see if t2 is expressed/defined with t1. */
1177 stmt = SSA_NAME_DEF_STMT (t2);
1178 gcc_assert (stmt != NULL);
1179 if (is_gimple_assign (stmt))
1181 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1182 if (ssa_name_2 && ssa_name_2 == t1)
1183 return true;
1186 /* Compare if t1 and t2's def_stmts are identical. */
1187 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1188 return true;
1189 else
1190 return false;
1193 /* Predict branch probability of BB when BB contains a branch that compares
1194 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1195 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1197 E.g.
1198 for (int i = 0; i < bound; i++) {
1199 if (i < bound - 2)
1200 computation_1();
1201 else
1202 computation_2();
1205 In this loop, we will predict the branch inside the loop to be taken. */
1207 static void
1208 predict_iv_comparison (struct loop *loop, basic_block bb,
1209 tree loop_bound_var,
1210 tree loop_iv_base_var,
1211 enum tree_code loop_bound_code,
1212 int loop_bound_step)
1214 gimple *stmt;
1215 tree compare_var, compare_base;
1216 enum tree_code compare_code;
1217 tree compare_step_var;
1218 edge then_edge;
1219 edge_iterator ei;
1221 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1222 || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1223 || predicted_by_p (bb, PRED_LOOP_EXIT))
1224 return;
1226 stmt = last_stmt (bb);
1227 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1228 return;
1229 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1230 loop, &compare_var,
1231 &compare_code,
1232 &compare_step_var,
1233 &compare_base))
1234 return;
1236 /* Find the taken edge. */
1237 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1238 if (then_edge->flags & EDGE_TRUE_VALUE)
1239 break;
1241 /* When comparing an IV to a loop invariant, NE is more likely to be
1242 taken while EQ is more likely to be not-taken. */
1243 if (compare_code == NE_EXPR)
1245 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1246 return;
1248 else if (compare_code == EQ_EXPR)
1250 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1251 return;
1254 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1255 return;
1257 /* If loop bound, base and compare bound are all constants, we can
1258 calculate the probability directly. */
1259 if (tree_fits_shwi_p (loop_bound_var)
1260 && tree_fits_shwi_p (compare_var)
1261 && tree_fits_shwi_p (compare_base))
1263 int probability;
1264 bool overflow, overall_overflow = false;
1265 widest_int compare_count, tem;
1267 /* (loop_bound - base) / compare_step */
1268 tem = wi::sub (wi::to_widest (loop_bound_var),
1269 wi::to_widest (compare_base), SIGNED, &overflow);
1270 overall_overflow |= overflow;
1271 widest_int loop_count = wi::div_trunc (tem,
1272 wi::to_widest (compare_step_var),
1273 SIGNED, &overflow);
1274 overall_overflow |= overflow;
1276 if (!wi::neg_p (wi::to_widest (compare_step_var))
1277 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1279 /* (loop_bound - compare_bound) / compare_step */
1280 tem = wi::sub (wi::to_widest (loop_bound_var),
1281 wi::to_widest (compare_var), SIGNED, &overflow);
1282 overall_overflow |= overflow;
1283 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1284 SIGNED, &overflow);
1285 overall_overflow |= overflow;
1287 else
1289 /* (compare_bound - base) / compare_step */
1290 tem = wi::sub (wi::to_widest (compare_var),
1291 wi::to_widest (compare_base), SIGNED, &overflow);
1292 overall_overflow |= overflow;
1293 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1294 SIGNED, &overflow);
1295 overall_overflow |= overflow;
1297 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1298 ++compare_count;
1299 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1300 ++loop_count;
1301 if (wi::neg_p (compare_count))
1302 compare_count = 0;
1303 if (wi::neg_p (loop_count))
1304 loop_count = 0;
1305 if (loop_count == 0)
1306 probability = 0;
1307 else if (wi::cmps (compare_count, loop_count) == 1)
1308 probability = REG_BR_PROB_BASE;
1309 else
1311 tem = compare_count * REG_BR_PROB_BASE;
1312 tem = wi::udiv_trunc (tem, loop_count);
1313 probability = tem.to_uhwi ();
1316 if (!overall_overflow)
1317 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1319 return;
1322 if (expr_coherent_p (loop_bound_var, compare_var))
1324 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1325 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1326 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1327 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1328 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1329 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1330 else if (loop_bound_code == NE_EXPR)
1332 /* If the loop backedge condition is "(i != bound)", we do
1333 the comparison based on the step of IV:
1334 * step < 0 : backedge condition is like (i > bound)
1335 * step > 0 : backedge condition is like (i < bound) */
1336 gcc_assert (loop_bound_step != 0);
1337 if (loop_bound_step > 0
1338 && (compare_code == LT_EXPR
1339 || compare_code == LE_EXPR))
1340 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1341 else if (loop_bound_step < 0
1342 && (compare_code == GT_EXPR
1343 || compare_code == GE_EXPR))
1344 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345 else
1346 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1348 else
1349 /* The branch is predicted not-taken if loop_bound_code is
1350 opposite with compare_code. */
1351 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1353 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1355 /* For cases like:
1356 for (i = s; i < h; i++)
1357 if (i > s + 2) ....
1358 The branch should be predicted taken. */
1359 if (loop_bound_step > 0
1360 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1361 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1362 else if (loop_bound_step < 0
1363 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1364 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1365 else
1366 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1371 exits are resulted from short-circuit conditions that will generate an
1372 if_tmp. E.g.:
1374 if (foo() || global > 10)
1375 break;
1377 This will be translated into:
1379 BB3:
1380 loop header...
1381 BB4:
1382 if foo() goto BB6 else goto BB5
1383 BB5:
1384 if global > 10 goto BB6 else goto BB7
1385 BB6:
1386 goto BB7
1387 BB7:
1388 iftmp = (PHI 0(BB5), 1(BB6))
1389 if iftmp == 1 goto BB8 else goto BB3
1390 BB8:
1391 outside of the loop...
1393 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1394 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1395 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1396 exits to predict them using PRED_LOOP_EXIT. */
1398 static void
1399 predict_extra_loop_exits (edge exit_edge)
1401 unsigned i;
1402 bool check_value_one;
1403 gimple *lhs_def_stmt;
1404 gphi *phi_stmt;
1405 tree cmp_rhs, cmp_lhs;
1406 gimple *last;
1407 gcond *cmp_stmt;
1409 last = last_stmt (exit_edge->src);
1410 if (!last)
1411 return;
1412 cmp_stmt = dyn_cast <gcond *> (last);
1413 if (!cmp_stmt)
1414 return;
1416 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1417 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1418 if (!TREE_CONSTANT (cmp_rhs)
1419 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1420 return;
1421 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1422 return;
1424 /* If check_value_one is true, only the phi_args with value '1' will lead
1425 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1426 loop exit. */
1427 check_value_one = (((integer_onep (cmp_rhs))
1428 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1429 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1431 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1432 if (!lhs_def_stmt)
1433 return;
1435 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1436 if (!phi_stmt)
1437 return;
1439 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1441 edge e1;
1442 edge_iterator ei;
1443 tree val = gimple_phi_arg_def (phi_stmt, i);
1444 edge e = gimple_phi_arg_edge (phi_stmt, i);
1446 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1447 continue;
1448 if ((check_value_one ^ integer_onep (val)) == 1)
1449 continue;
1450 if (EDGE_COUNT (e->src->succs) != 1)
1452 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1453 continue;
1456 FOR_EACH_EDGE (e1, ei, e->src->preds)
1457 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1461 /* Predict edge probabilities by exploiting loop structure. */
1463 static void
1464 predict_loops (void)
1466 struct loop *loop;
1468 /* Try to predict out blocks in a loop that are not part of a
1469 natural loop. */
1470 FOR_EACH_LOOP (loop, 0)
1472 basic_block bb, *bbs;
1473 unsigned j, n_exits;
1474 vec<edge> exits;
1475 struct tree_niter_desc niter_desc;
1476 edge ex;
1477 struct nb_iter_bound *nb_iter;
1478 enum tree_code loop_bound_code = ERROR_MARK;
1479 tree loop_bound_step = NULL;
1480 tree loop_bound_var = NULL;
1481 tree loop_iv_base = NULL;
1482 gcond *stmt = NULL;
1484 exits = get_loop_exit_edges (loop);
1485 n_exits = exits.length ();
1486 if (!n_exits)
1488 exits.release ();
1489 continue;
1492 FOR_EACH_VEC_ELT (exits, j, ex)
1494 tree niter = NULL;
1495 HOST_WIDE_INT nitercst;
1496 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1497 int probability;
1498 enum br_predictor predictor;
1500 predict_extra_loop_exits (ex);
1502 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1503 niter = niter_desc.niter;
1504 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1505 niter = loop_niter_by_eval (loop, ex);
1507 if (TREE_CODE (niter) == INTEGER_CST)
1509 if (tree_fits_uhwi_p (niter)
1510 && max
1511 && compare_tree_int (niter, max - 1) == -1)
1512 nitercst = tree_to_uhwi (niter) + 1;
1513 else
1514 nitercst = max;
1515 predictor = PRED_LOOP_ITERATIONS;
1517 /* If we have just one exit and we can derive some information about
1518 the number of iterations of the loop from the statements inside
1519 the loop, use it to predict this exit. */
1520 else if (n_exits == 1)
1522 nitercst = estimated_stmt_executions_int (loop);
1523 if (nitercst < 0)
1524 continue;
1525 if (nitercst > max)
1526 nitercst = max;
1528 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1530 else
1531 continue;
1533 /* If the prediction for number of iterations is zero, do not
1534 predict the exit edges. */
1535 if (nitercst == 0)
1536 continue;
1538 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1539 predict_edge (ex, predictor, probability);
1541 exits.release ();
1543 /* Find information about loop bound variables. */
1544 for (nb_iter = loop->bounds; nb_iter;
1545 nb_iter = nb_iter->next)
1546 if (nb_iter->stmt
1547 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1549 stmt = as_a <gcond *> (nb_iter->stmt);
1550 break;
1552 if (!stmt && last_stmt (loop->header)
1553 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1554 stmt = as_a <gcond *> (last_stmt (loop->header));
1555 if (stmt)
1556 is_comparison_with_loop_invariant_p (stmt, loop,
1557 &loop_bound_var,
1558 &loop_bound_code,
1559 &loop_bound_step,
1560 &loop_iv_base);
1562 bbs = get_loop_body (loop);
1564 for (j = 0; j < loop->num_nodes; j++)
1566 int header_found = 0;
1567 edge e;
1568 edge_iterator ei;
1570 bb = bbs[j];
1572 /* Bypass loop heuristics on continue statement. These
1573 statements construct loops via "non-loop" constructs
1574 in the source language and are better to be handled
1575 separately. */
1576 if (predicted_by_p (bb, PRED_CONTINUE))
1577 continue;
1579 /* Loop branch heuristics - predict an edge back to a
1580 loop's head as taken. */
1581 if (bb == loop->latch)
1583 e = find_edge (loop->latch, loop->header);
1584 if (e)
1586 header_found = 1;
1587 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1591 /* Loop exit heuristics - predict an edge exiting the loop if the
1592 conditional has no loop header successors as not taken. */
1593 if (!header_found
1594 /* If we already used more reliable loop exit predictors, do not
1595 bother with PRED_LOOP_EXIT. */
1596 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1597 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1599 /* For loop with many exits we don't want to predict all exits
1600 with the pretty large probability, because if all exits are
1601 considered in row, the loop would be predicted to iterate
1602 almost never. The code to divide probability by number of
1603 exits is very rough. It should compute the number of exits
1604 taken in each patch through function (not the overall number
1605 of exits that might be a lot higher for loops with wide switch
1606 statements in them) and compute n-th square root.
1608 We limit the minimal probability by 2% to avoid
1609 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1610 as this was causing regression in perl benchmark containing such
1611 a wide loop. */
1613 int probability = ((REG_BR_PROB_BASE
1614 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1615 / n_exits);
1616 if (probability < HITRATE (2))
1617 probability = HITRATE (2);
1618 FOR_EACH_EDGE (e, ei, bb->succs)
1619 if (e->dest->index < NUM_FIXED_BLOCKS
1620 || !flow_bb_inside_loop_p (loop, e->dest))
1621 predict_edge (e, PRED_LOOP_EXIT, probability);
1623 if (loop_bound_var)
1624 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1625 loop_bound_code,
1626 tree_to_shwi (loop_bound_step));
1629 /* Free basic blocks from get_loop_body. */
1630 free (bbs);
1634 /* Attempt to predict probabilities of BB outgoing edges using local
1635 properties. */
1636 static void
1637 bb_estimate_probability_locally (basic_block bb)
1639 rtx_insn *last_insn = BB_END (bb);
1640 rtx cond;
1642 if (! can_predict_insn_p (last_insn))
1643 return;
1644 cond = get_condition (last_insn, NULL, false, false);
1645 if (! cond)
1646 return;
1648 /* Try "pointer heuristic."
1649 A comparison ptr == 0 is predicted as false.
1650 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1651 if (COMPARISON_P (cond)
1652 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1653 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1655 if (GET_CODE (cond) == EQ)
1656 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1657 else if (GET_CODE (cond) == NE)
1658 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1660 else
1662 /* Try "opcode heuristic."
1663 EQ tests are usually false and NE tests are usually true. Also,
1664 most quantities are positive, so we can make the appropriate guesses
1665 about signed comparisons against zero. */
1666 switch (GET_CODE (cond))
1668 case CONST_INT:
1669 /* Unconditional branch. */
1670 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1671 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1672 break;
1674 case EQ:
1675 case UNEQ:
1676 /* Floating point comparisons appears to behave in a very
1677 unpredictable way because of special role of = tests in
1678 FP code. */
1679 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1681 /* Comparisons with 0 are often used for booleans and there is
1682 nothing useful to predict about them. */
1683 else if (XEXP (cond, 1) == const0_rtx
1684 || XEXP (cond, 0) == const0_rtx)
1686 else
1687 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1688 break;
1690 case NE:
1691 case LTGT:
1692 /* Floating point comparisons appears to behave in a very
1693 unpredictable way because of special role of = tests in
1694 FP code. */
1695 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1697 /* Comparisons with 0 are often used for booleans and there is
1698 nothing useful to predict about them. */
1699 else if (XEXP (cond, 1) == const0_rtx
1700 || XEXP (cond, 0) == const0_rtx)
1702 else
1703 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1704 break;
1706 case ORDERED:
1707 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1708 break;
1710 case UNORDERED:
1711 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1712 break;
1714 case LE:
1715 case LT:
1716 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1717 || XEXP (cond, 1) == constm1_rtx)
1718 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1719 break;
1721 case GE:
1722 case GT:
1723 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1724 || XEXP (cond, 1) == constm1_rtx)
1725 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1726 break;
1728 default:
1729 break;
1733 /* Set edge->probability for each successor edge of BB. */
1734 void
1735 guess_outgoing_edge_probabilities (basic_block bb)
1737 bb_estimate_probability_locally (bb);
1738 combine_predictions_for_insn (BB_END (bb), bb);
1741 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1743 /* Helper function for expr_expected_value. */
1745 static tree
1746 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1747 tree op1, bitmap visited, enum br_predictor *predictor)
1749 gimple *def;
1751 if (predictor)
1752 *predictor = PRED_UNCONDITIONAL;
1754 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1756 if (TREE_CONSTANT (op0))
1757 return op0;
1759 if (code != SSA_NAME)
1760 return NULL_TREE;
1762 def = SSA_NAME_DEF_STMT (op0);
1764 /* If we were already here, break the infinite cycle. */
1765 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1766 return NULL;
1768 if (gimple_code (def) == GIMPLE_PHI)
1770 /* All the arguments of the PHI node must have the same constant
1771 length. */
1772 int i, n = gimple_phi_num_args (def);
1773 tree val = NULL, new_val;
1775 for (i = 0; i < n; i++)
1777 tree arg = PHI_ARG_DEF (def, i);
1778 enum br_predictor predictor2;
1780 /* If this PHI has itself as an argument, we cannot
1781 determine the string length of this argument. However,
1782 if we can find an expected constant value for the other
1783 PHI args then we can still be sure that this is
1784 likely a constant. So be optimistic and just
1785 continue with the next argument. */
1786 if (arg == PHI_RESULT (def))
1787 continue;
1789 new_val = expr_expected_value (arg, visited, &predictor2);
1791 /* It is difficult to combine value predictors. Simply assume
1792 that later predictor is weaker and take its prediction. */
1793 if (predictor && *predictor < predictor2)
1794 *predictor = predictor2;
1795 if (!new_val)
1796 return NULL;
1797 if (!val)
1798 val = new_val;
1799 else if (!operand_equal_p (val, new_val, false))
1800 return NULL;
1802 return val;
1804 if (is_gimple_assign (def))
1806 if (gimple_assign_lhs (def) != op0)
1807 return NULL;
1809 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1810 gimple_assign_rhs1 (def),
1811 gimple_assign_rhs_code (def),
1812 gimple_assign_rhs2 (def),
1813 visited, predictor);
1816 if (is_gimple_call (def))
1818 tree decl = gimple_call_fndecl (def);
1819 if (!decl)
1821 if (gimple_call_internal_p (def)
1822 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1824 gcc_assert (gimple_call_num_args (def) == 3);
1825 tree val = gimple_call_arg (def, 0);
1826 if (TREE_CONSTANT (val))
1827 return val;
1828 if (predictor)
1830 tree val2 = gimple_call_arg (def, 2);
1831 gcc_assert (TREE_CODE (val2) == INTEGER_CST
1832 && tree_fits_uhwi_p (val2)
1833 && tree_to_uhwi (val2) < END_PREDICTORS);
1834 *predictor = (enum br_predictor) tree_to_uhwi (val2);
1836 return gimple_call_arg (def, 1);
1838 return NULL;
1840 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1841 switch (DECL_FUNCTION_CODE (decl))
1843 case BUILT_IN_EXPECT:
1845 tree val;
1846 if (gimple_call_num_args (def) != 2)
1847 return NULL;
1848 val = gimple_call_arg (def, 0);
1849 if (TREE_CONSTANT (val))
1850 return val;
1851 if (predictor)
1852 *predictor = PRED_BUILTIN_EXPECT;
1853 return gimple_call_arg (def, 1);
1856 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1857 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1858 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1859 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1860 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1861 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1862 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1863 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1864 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1865 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1866 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1867 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1868 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1869 /* Assume that any given atomic operation has low contention,
1870 and thus the compare-and-swap operation succeeds. */
1871 if (predictor)
1872 *predictor = PRED_COMPARE_AND_SWAP;
1873 return boolean_true_node;
1874 default:
1875 break;
1879 return NULL;
1882 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1884 tree res;
1885 enum br_predictor predictor2;
1886 op0 = expr_expected_value (op0, visited, predictor);
1887 if (!op0)
1888 return NULL;
1889 op1 = expr_expected_value (op1, visited, &predictor2);
1890 if (predictor && *predictor < predictor2)
1891 *predictor = predictor2;
1892 if (!op1)
1893 return NULL;
1894 res = fold_build2 (code, type, op0, op1);
1895 if (TREE_CONSTANT (res))
1896 return res;
1897 return NULL;
1899 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1901 tree res;
1902 op0 = expr_expected_value (op0, visited, predictor);
1903 if (!op0)
1904 return NULL;
1905 res = fold_build1 (code, type, op0);
1906 if (TREE_CONSTANT (res))
1907 return res;
1908 return NULL;
1910 return NULL;
1913 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1914 The function is used by builtin_expect branch predictor so the evidence
1915 must come from this construct and additional possible constant folding.
1917 We may want to implement more involved value guess (such as value range
1918 propagation based prediction), but such tricks shall go to new
1919 implementation. */
1921 static tree
1922 expr_expected_value (tree expr, bitmap visited,
1923 enum br_predictor *predictor)
1925 enum tree_code code;
1926 tree op0, op1;
1928 if (TREE_CONSTANT (expr))
1930 if (predictor)
1931 *predictor = PRED_UNCONDITIONAL;
1932 return expr;
1935 extract_ops_from_tree (expr, &code, &op0, &op1);
1936 return expr_expected_value_1 (TREE_TYPE (expr),
1937 op0, code, op1, visited, predictor);
1940 /* Predict using opcode of the last statement in basic block. */
1941 static void
1942 tree_predict_by_opcode (basic_block bb)
1944 gimple *stmt = last_stmt (bb);
1945 edge then_edge;
1946 tree op0, op1;
1947 tree type;
1948 tree val;
1949 enum tree_code cmp;
1950 bitmap visited;
1951 edge_iterator ei;
1952 enum br_predictor predictor;
1954 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1955 return;
1956 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1957 if (then_edge->flags & EDGE_TRUE_VALUE)
1958 break;
1959 op0 = gimple_cond_lhs (stmt);
1960 op1 = gimple_cond_rhs (stmt);
1961 cmp = gimple_cond_code (stmt);
1962 type = TREE_TYPE (op0);
1963 visited = BITMAP_ALLOC (NULL);
1964 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1965 &predictor);
1966 BITMAP_FREE (visited);
1967 if (val && TREE_CODE (val) == INTEGER_CST)
1969 if (predictor == PRED_BUILTIN_EXPECT)
1971 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1973 gcc_assert (percent >= 0 && percent <= 100);
1974 if (integer_zerop (val))
1975 percent = 100 - percent;
1976 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1978 else
1979 predict_edge (then_edge, predictor,
1980 integer_zerop (val) ? NOT_TAKEN : TAKEN);
1982 /* Try "pointer heuristic."
1983 A comparison ptr == 0 is predicted as false.
1984 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1985 if (POINTER_TYPE_P (type))
1987 if (cmp == EQ_EXPR)
1988 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1989 else if (cmp == NE_EXPR)
1990 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1992 else
1994 /* Try "opcode heuristic."
1995 EQ tests are usually false and NE tests are usually true. Also,
1996 most quantities are positive, so we can make the appropriate guesses
1997 about signed comparisons against zero. */
1998 switch (cmp)
2000 case EQ_EXPR:
2001 case UNEQ_EXPR:
2002 /* Floating point comparisons appears to behave in a very
2003 unpredictable way because of special role of = tests in
2004 FP code. */
2005 if (FLOAT_TYPE_P (type))
2007 /* Comparisons with 0 are often used for booleans and there is
2008 nothing useful to predict about them. */
2009 else if (integer_zerop (op0) || integer_zerop (op1))
2011 else
2012 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2013 break;
2015 case NE_EXPR:
2016 case LTGT_EXPR:
2017 /* Floating point comparisons appears to behave in a very
2018 unpredictable way because of special role of = tests in
2019 FP code. */
2020 if (FLOAT_TYPE_P (type))
2022 /* Comparisons with 0 are often used for booleans and there is
2023 nothing useful to predict about them. */
2024 else if (integer_zerop (op0)
2025 || integer_zerop (op1))
2027 else
2028 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2029 break;
2031 case ORDERED_EXPR:
2032 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2033 break;
2035 case UNORDERED_EXPR:
2036 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2037 break;
2039 case LE_EXPR:
2040 case LT_EXPR:
2041 if (integer_zerop (op1)
2042 || integer_onep (op1)
2043 || integer_all_onesp (op1)
2044 || real_zerop (op1)
2045 || real_onep (op1)
2046 || real_minus_onep (op1))
2047 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2048 break;
2050 case GE_EXPR:
2051 case GT_EXPR:
2052 if (integer_zerop (op1)
2053 || integer_onep (op1)
2054 || integer_all_onesp (op1)
2055 || real_zerop (op1)
2056 || real_onep (op1)
2057 || real_minus_onep (op1))
2058 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2059 break;
2061 default:
2062 break;
2066 /* Try to guess whether the value of return means error code. */
2068 static enum br_predictor
2069 return_prediction (tree val, enum prediction *prediction)
2071 /* VOID. */
2072 if (!val)
2073 return PRED_NO_PREDICTION;
2074 /* Different heuristics for pointers and scalars. */
2075 if (POINTER_TYPE_P (TREE_TYPE (val)))
2077 /* NULL is usually not returned. */
2078 if (integer_zerop (val))
2080 *prediction = NOT_TAKEN;
2081 return PRED_NULL_RETURN;
2084 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2086 /* Negative return values are often used to indicate
2087 errors. */
2088 if (TREE_CODE (val) == INTEGER_CST
2089 && tree_int_cst_sgn (val) < 0)
2091 *prediction = NOT_TAKEN;
2092 return PRED_NEGATIVE_RETURN;
2094 /* Constant return values seems to be commonly taken.
2095 Zero/one often represent booleans so exclude them from the
2096 heuristics. */
2097 if (TREE_CONSTANT (val)
2098 && (!integer_zerop (val) && !integer_onep (val)))
2100 *prediction = TAKEN;
2101 return PRED_CONST_RETURN;
2104 return PRED_NO_PREDICTION;
2107 /* Find the basic block with return expression and look up for possible
2108 return value trying to apply RETURN_PREDICTION heuristics. */
2109 static void
2110 apply_return_prediction (void)
2112 greturn *return_stmt = NULL;
2113 tree return_val;
2114 edge e;
2115 gphi *phi;
2116 int phi_num_args, i;
2117 enum br_predictor pred;
2118 enum prediction direction;
2119 edge_iterator ei;
2121 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2123 gimple *last = last_stmt (e->src);
2124 if (last
2125 && gimple_code (last) == GIMPLE_RETURN)
2127 return_stmt = as_a <greturn *> (last);
2128 break;
2131 if (!e)
2132 return;
2133 return_val = gimple_return_retval (return_stmt);
2134 if (!return_val)
2135 return;
2136 if (TREE_CODE (return_val) != SSA_NAME
2137 || !SSA_NAME_DEF_STMT (return_val)
2138 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2139 return;
2140 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2141 phi_num_args = gimple_phi_num_args (phi);
2142 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2144 /* Avoid the degenerate case where all return values form the function
2145 belongs to same category (ie they are all positive constants)
2146 so we can hardly say something about them. */
2147 for (i = 1; i < phi_num_args; i++)
2148 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2149 break;
2150 if (i != phi_num_args)
2151 for (i = 0; i < phi_num_args; i++)
2153 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2154 if (pred != PRED_NO_PREDICTION)
2155 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2156 direction);
2160 /* Look for basic block that contains unlikely to happen events
2161 (such as noreturn calls) and mark all paths leading to execution
2162 of this basic blocks as unlikely. */
2164 static void
2165 tree_bb_level_predictions (void)
2167 basic_block bb;
2168 bool has_return_edges = false;
2169 edge e;
2170 edge_iterator ei;
2172 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2173 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2175 has_return_edges = true;
2176 break;
2179 apply_return_prediction ();
2181 FOR_EACH_BB_FN (bb, cfun)
2183 gimple_stmt_iterator gsi;
2185 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2187 gimple *stmt = gsi_stmt (gsi);
2188 tree decl;
2190 if (is_gimple_call (stmt))
2192 if ((gimple_call_flags (stmt) & ECF_NORETURN)
2193 && has_return_edges)
2194 predict_paths_leading_to (bb, PRED_NORETURN,
2195 NOT_TAKEN);
2196 decl = gimple_call_fndecl (stmt);
2197 if (decl
2198 && lookup_attribute ("cold",
2199 DECL_ATTRIBUTES (decl)))
2200 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2201 NOT_TAKEN);
2203 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2205 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2206 gimple_predict_outcome (stmt));
2207 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2208 hints to callers. */
2214 /* Callback for hash_map::traverse, asserts that the pointer map is
2215 empty. */
2217 bool
2218 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2219 void *)
2221 gcc_assert (!value);
2222 return false;
2225 /* Predict branch probabilities and estimate profile for basic block BB. */
2227 static void
2228 tree_estimate_probability_bb (basic_block bb)
2230 edge e;
2231 edge_iterator ei;
2232 gimple *last;
2234 FOR_EACH_EDGE (e, ei, bb->succs)
2236 /* Predict edges to user labels with attributes. */
2237 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2239 gimple_stmt_iterator gi;
2240 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2242 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2243 tree decl;
2245 if (!label_stmt)
2246 break;
2247 decl = gimple_label_label (label_stmt);
2248 if (DECL_ARTIFICIAL (decl))
2249 continue;
2251 /* Finally, we have a user-defined label. */
2252 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2253 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2254 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2255 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2259 /* Predict early returns to be probable, as we've already taken
2260 care for error returns and other cases are often used for
2261 fast paths through function.
2263 Since we've already removed the return statements, we are
2264 looking for CFG like:
2266 if (conditional)
2269 goto return_block
2271 some other blocks
2272 return_block:
2273 return_stmt. */
2274 if (e->dest != bb->next_bb
2275 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2276 && single_succ_p (e->dest)
2277 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2278 && (last = last_stmt (e->dest)) != NULL
2279 && gimple_code (last) == GIMPLE_RETURN)
2281 edge e1;
2282 edge_iterator ei1;
2284 if (single_succ_p (bb))
2286 FOR_EACH_EDGE (e1, ei1, bb->preds)
2287 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2288 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2289 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2290 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2292 else
2293 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2294 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2295 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2296 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2299 /* Look for block we are guarding (ie we dominate it,
2300 but it doesn't postdominate us). */
2301 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2302 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2303 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2305 gimple_stmt_iterator bi;
2307 /* The call heuristic claims that a guarded function call
2308 is improbable. This is because such calls are often used
2309 to signal exceptional situations such as printing error
2310 messages. */
2311 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2312 gsi_next (&bi))
2314 gimple *stmt = gsi_stmt (bi);
2315 if (is_gimple_call (stmt)
2316 /* Constant and pure calls are hardly used to signalize
2317 something exceptional. */
2318 && gimple_has_side_effects (stmt))
2320 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2321 break;
2326 tree_predict_by_opcode (bb);
2329 /* Predict branch probabilities and estimate profile of the tree CFG.
2330 This function can be called from the loop optimizers to recompute
2331 the profile information.
2332 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2334 void
2335 tree_estimate_probability (bool dry_run)
2337 basic_block bb;
2339 add_noreturn_fake_exit_edges ();
2340 connect_infinite_loops_to_exit ();
2341 /* We use loop_niter_by_eval, which requires that the loops have
2342 preheaders. */
2343 create_preheaders (CP_SIMPLE_PREHEADERS);
2344 calculate_dominance_info (CDI_POST_DOMINATORS);
2346 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2347 tree_bb_level_predictions ();
2348 record_loop_exits ();
2350 if (number_of_loops (cfun) > 1)
2351 predict_loops ();
2353 FOR_EACH_BB_FN (bb, cfun)
2354 tree_estimate_probability_bb (bb);
2356 FOR_EACH_BB_FN (bb, cfun)
2357 combine_predictions_for_bb (bb, dry_run);
2359 if (flag_checking)
2360 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2362 delete bb_predictions;
2363 bb_predictions = NULL;
2365 if (!dry_run)
2366 estimate_bb_frequencies (false);
2367 free_dominance_info (CDI_POST_DOMINATORS);
2368 remove_fake_exit_edges ();
2371 /* Predict edges to successors of CUR whose sources are not postdominated by
2372 BB by PRED and recurse to all postdominators. */
2374 static void
2375 predict_paths_for_bb (basic_block cur, basic_block bb,
2376 enum br_predictor pred,
2377 enum prediction taken,
2378 bitmap visited)
2380 edge e;
2381 edge_iterator ei;
2382 basic_block son;
2384 /* We are looking for all edges forming edge cut induced by
2385 set of all blocks postdominated by BB. */
2386 FOR_EACH_EDGE (e, ei, cur->preds)
2387 if (e->src->index >= NUM_FIXED_BLOCKS
2388 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2390 edge e2;
2391 edge_iterator ei2;
2392 bool found = false;
2394 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2395 if (e->flags & (EDGE_EH | EDGE_FAKE))
2396 continue;
2397 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2399 /* See if there is an edge from e->src that is not abnormal
2400 and does not lead to BB. */
2401 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2402 if (e2 != e
2403 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2404 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2406 found = true;
2407 break;
2410 /* If there is non-abnormal path leaving e->src, predict edge
2411 using predictor. Otherwise we need to look for paths
2412 leading to e->src.
2414 The second may lead to infinite loop in the case we are predicitng
2415 regions that are only reachable by abnormal edges. We simply
2416 prevent visiting given BB twice. */
2417 if (found)
2418 predict_edge_def (e, pred, taken);
2419 else if (bitmap_set_bit (visited, e->src->index))
2420 predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2422 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2423 son;
2424 son = next_dom_son (CDI_POST_DOMINATORS, son))
2425 predict_paths_for_bb (son, bb, pred, taken, visited);
2428 /* Sets branch probabilities according to PREDiction and
2429 FLAGS. */
2431 static void
2432 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2433 enum prediction taken)
2435 bitmap visited = BITMAP_ALLOC (NULL);
2436 predict_paths_for_bb (bb, bb, pred, taken, visited);
2437 BITMAP_FREE (visited);
2440 /* Like predict_paths_leading_to but take edge instead of basic block. */
2442 static void
2443 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2444 enum prediction taken)
2446 bool has_nonloop_edge = false;
2447 edge_iterator ei;
2448 edge e2;
2450 basic_block bb = e->src;
2451 FOR_EACH_EDGE (e2, ei, bb->succs)
2452 if (e2->dest != e->src && e2->dest != e->dest
2453 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2454 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2456 has_nonloop_edge = true;
2457 break;
2459 if (!has_nonloop_edge)
2461 bitmap visited = BITMAP_ALLOC (NULL);
2462 predict_paths_for_bb (bb, bb, pred, taken, visited);
2463 BITMAP_FREE (visited);
2465 else
2466 predict_edge_def (e, pred, taken);
2469 /* This is used to carry information about basic blocks. It is
2470 attached to the AUX field of the standard CFG block. */
2472 struct block_info
2474 /* Estimated frequency of execution of basic_block. */
2475 sreal frequency;
2477 /* To keep queue of basic blocks to process. */
2478 basic_block next;
2480 /* Number of predecessors we need to visit first. */
2481 int npredecessors;
2484 /* Similar information for edges. */
2485 struct edge_prob_info
2487 /* In case edge is a loopback edge, the probability edge will be reached
2488 in case header is. Estimated number of iterations of the loop can be
2489 then computed as 1 / (1 - back_edge_prob). */
2490 sreal back_edge_prob;
2491 /* True if the edge is a loopback edge in the natural loop. */
2492 unsigned int back_edge:1;
2495 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2496 #undef EDGE_INFO
2497 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2499 /* Helper function for estimate_bb_frequencies.
2500 Propagate the frequencies in blocks marked in
2501 TOVISIT, starting in HEAD. */
2503 static void
2504 propagate_freq (basic_block head, bitmap tovisit)
2506 basic_block bb;
2507 basic_block last;
2508 unsigned i;
2509 edge e;
2510 basic_block nextbb;
2511 bitmap_iterator bi;
2513 /* For each basic block we need to visit count number of his predecessors
2514 we need to visit first. */
2515 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2517 edge_iterator ei;
2518 int count = 0;
2520 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2522 FOR_EACH_EDGE (e, ei, bb->preds)
2524 bool visit = bitmap_bit_p (tovisit, e->src->index);
2526 if (visit && !(e->flags & EDGE_DFS_BACK))
2527 count++;
2528 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2529 fprintf (dump_file,
2530 "Irreducible region hit, ignoring edge to %i->%i\n",
2531 e->src->index, bb->index);
2533 BLOCK_INFO (bb)->npredecessors = count;
2534 /* When function never returns, we will never process exit block. */
2535 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2536 bb->count = bb->frequency = 0;
2539 BLOCK_INFO (head)->frequency = 1;
2540 last = head;
2541 for (bb = head; bb; bb = nextbb)
2543 edge_iterator ei;
2544 sreal cyclic_probability = 0;
2545 sreal frequency = 0;
2547 nextbb = BLOCK_INFO (bb)->next;
2548 BLOCK_INFO (bb)->next = NULL;
2550 /* Compute frequency of basic block. */
2551 if (bb != head)
2553 if (flag_checking)
2554 FOR_EACH_EDGE (e, ei, bb->preds)
2555 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2556 || (e->flags & EDGE_DFS_BACK));
2558 FOR_EACH_EDGE (e, ei, bb->preds)
2559 if (EDGE_INFO (e)->back_edge)
2561 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2563 else if (!(e->flags & EDGE_DFS_BACK))
2565 /* frequency += (e->probability
2566 * BLOCK_INFO (e->src)->frequency /
2567 REG_BR_PROB_BASE); */
2569 sreal tmp = e->probability;
2570 tmp *= BLOCK_INFO (e->src)->frequency;
2571 tmp *= real_inv_br_prob_base;
2572 frequency += tmp;
2575 if (cyclic_probability == 0)
2577 BLOCK_INFO (bb)->frequency = frequency;
2579 else
2581 if (cyclic_probability > real_almost_one)
2582 cyclic_probability = real_almost_one;
2584 /* BLOCK_INFO (bb)->frequency = frequency
2585 / (1 - cyclic_probability) */
2587 cyclic_probability = sreal (1) - cyclic_probability;
2588 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2592 bitmap_clear_bit (tovisit, bb->index);
2594 e = find_edge (bb, head);
2595 if (e)
2597 /* EDGE_INFO (e)->back_edge_prob
2598 = ((e->probability * BLOCK_INFO (bb)->frequency)
2599 / REG_BR_PROB_BASE); */
2601 sreal tmp = e->probability;
2602 tmp *= BLOCK_INFO (bb)->frequency;
2603 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2606 /* Propagate to successor blocks. */
2607 FOR_EACH_EDGE (e, ei, bb->succs)
2608 if (!(e->flags & EDGE_DFS_BACK)
2609 && BLOCK_INFO (e->dest)->npredecessors)
2611 BLOCK_INFO (e->dest)->npredecessors--;
2612 if (!BLOCK_INFO (e->dest)->npredecessors)
2614 if (!nextbb)
2615 nextbb = e->dest;
2616 else
2617 BLOCK_INFO (last)->next = e->dest;
2619 last = e->dest;
2625 /* Estimate frequencies in loops at same nest level. */
2627 static void
2628 estimate_loops_at_level (struct loop *first_loop)
2630 struct loop *loop;
2632 for (loop = first_loop; loop; loop = loop->next)
2634 edge e;
2635 basic_block *bbs;
2636 unsigned i;
2637 bitmap tovisit = BITMAP_ALLOC (NULL);
2639 estimate_loops_at_level (loop->inner);
2641 /* Find current loop back edge and mark it. */
2642 e = loop_latch_edge (loop);
2643 EDGE_INFO (e)->back_edge = 1;
2645 bbs = get_loop_body (loop);
2646 for (i = 0; i < loop->num_nodes; i++)
2647 bitmap_set_bit (tovisit, bbs[i]->index);
2648 free (bbs);
2649 propagate_freq (loop->header, tovisit);
2650 BITMAP_FREE (tovisit);
2654 /* Propagates frequencies through structure of loops. */
2656 static void
2657 estimate_loops (void)
2659 bitmap tovisit = BITMAP_ALLOC (NULL);
2660 basic_block bb;
2662 /* Start by estimating the frequencies in the loops. */
2663 if (number_of_loops (cfun) > 1)
2664 estimate_loops_at_level (current_loops->tree_root->inner);
2666 /* Now propagate the frequencies through all the blocks. */
2667 FOR_ALL_BB_FN (bb, cfun)
2669 bitmap_set_bit (tovisit, bb->index);
2671 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2672 BITMAP_FREE (tovisit);
2675 /* Drop the profile for NODE to guessed, and update its frequency based on
2676 whether it is expected to be hot given the CALL_COUNT. */
2678 static void
2679 drop_profile (struct cgraph_node *node, gcov_type call_count)
2681 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2682 /* In the case where this was called by another function with a
2683 dropped profile, call_count will be 0. Since there are no
2684 non-zero call counts to this function, we don't know for sure
2685 whether it is hot, and therefore it will be marked normal below. */
2686 bool hot = maybe_hot_count_p (NULL, call_count);
2688 if (dump_file)
2689 fprintf (dump_file,
2690 "Dropping 0 profile for %s/%i. %s based on calls.\n",
2691 node->name (), node->order,
2692 hot ? "Function is hot" : "Function is normal");
2693 /* We only expect to miss profiles for functions that are reached
2694 via non-zero call edges in cases where the function may have
2695 been linked from another module or library (COMDATs and extern
2696 templates). See the comments below for handle_missing_profiles.
2697 Also, only warn in cases where the missing counts exceed the
2698 number of training runs. In certain cases with an execv followed
2699 by a no-return call the profile for the no-return call is not
2700 dumped and there can be a mismatch. */
2701 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2702 && call_count > profile_info->runs)
2704 if (flag_profile_correction)
2706 if (dump_file)
2707 fprintf (dump_file,
2708 "Missing counts for called function %s/%i\n",
2709 node->name (), node->order);
2711 else
2712 warning (0, "Missing counts for called function %s/%i",
2713 node->name (), node->order);
2716 profile_status_for_fn (fn)
2717 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2718 node->frequency
2719 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2722 /* In the case of COMDAT routines, multiple object files will contain the same
2723 function and the linker will select one for the binary. In that case
2724 all the other copies from the profile instrument binary will be missing
2725 profile counts. Look for cases where this happened, due to non-zero
2726 call counts going to 0-count functions, and drop the profile to guessed
2727 so that we can use the estimated probabilities and avoid optimizing only
2728 for size.
2730 The other case where the profile may be missing is when the routine
2731 is not going to be emitted to the object file, e.g. for "extern template"
2732 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2733 all other cases of non-zero calls to 0-count functions. */
2735 void
2736 handle_missing_profiles (void)
2738 struct cgraph_node *node;
2739 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2740 vec<struct cgraph_node *> worklist;
2741 worklist.create (64);
2743 /* See if 0 count function has non-0 count callers. In this case we
2744 lost some profile. Drop its function profile to PROFILE_GUESSED. */
2745 FOR_EACH_DEFINED_FUNCTION (node)
2747 struct cgraph_edge *e;
2748 gcov_type call_count = 0;
2749 gcov_type max_tp_first_run = 0;
2750 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2752 if (node->count)
2753 continue;
2754 for (e = node->callers; e; e = e->next_caller)
2756 call_count += e->count;
2758 if (e->caller->tp_first_run > max_tp_first_run)
2759 max_tp_first_run = e->caller->tp_first_run;
2762 /* If time profile is missing, let assign the maximum that comes from
2763 caller functions. */
2764 if (!node->tp_first_run && max_tp_first_run)
2765 node->tp_first_run = max_tp_first_run + 1;
2767 if (call_count
2768 && fn && fn->cfg
2769 && (call_count * unlikely_count_fraction >= profile_info->runs))
2771 drop_profile (node, call_count);
2772 worklist.safe_push (node);
2776 /* Propagate the profile dropping to other 0-count COMDATs that are
2777 potentially called by COMDATs we already dropped the profile on. */
2778 while (worklist.length () > 0)
2780 struct cgraph_edge *e;
2782 node = worklist.pop ();
2783 for (e = node->callees; e; e = e->next_caller)
2785 struct cgraph_node *callee = e->callee;
2786 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2788 if (callee->count > 0)
2789 continue;
2790 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2791 && profile_status_for_fn (fn) == PROFILE_READ)
2793 drop_profile (node, 0);
2794 worklist.safe_push (callee);
2798 worklist.release ();
2801 /* Convert counts measured by profile driven feedback to frequencies.
2802 Return nonzero iff there was any nonzero execution count. */
2805 counts_to_freqs (void)
2807 gcov_type count_max, true_count_max = 0;
2808 basic_block bb;
2810 /* Don't overwrite the estimated frequencies when the profile for
2811 the function is missing. We may drop this function PROFILE_GUESSED
2812 later in drop_profile (). */
2813 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2814 return 0;
2816 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2817 true_count_max = MAX (bb->count, true_count_max);
2819 count_max = MAX (true_count_max, 1);
2820 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2821 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2823 return true_count_max;
2826 /* Return true if function is likely to be expensive, so there is no point to
2827 optimize performance of prologue, epilogue or do inlining at the expense
2828 of code size growth. THRESHOLD is the limit of number of instructions
2829 function can execute at average to be still considered not expensive. */
2831 bool
2832 expensive_function_p (int threshold)
2834 unsigned int sum = 0;
2835 basic_block bb;
2836 unsigned int limit;
2838 /* We can not compute accurately for large thresholds due to scaled
2839 frequencies. */
2840 gcc_assert (threshold <= BB_FREQ_MAX);
2842 /* Frequencies are out of range. This either means that function contains
2843 internal loop executing more than BB_FREQ_MAX times or profile feedback
2844 is available and function has not been executed at all. */
2845 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2846 return true;
2848 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
2849 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2850 FOR_EACH_BB_FN (bb, cfun)
2852 rtx_insn *insn;
2854 FOR_BB_INSNS (bb, insn)
2855 if (active_insn_p (insn))
2857 sum += bb->frequency;
2858 if (sum > limit)
2859 return true;
2863 return false;
2866 /* Estimate and propagate basic block frequencies using the given branch
2867 probabilities. If FORCE is true, the frequencies are used to estimate
2868 the counts even when there are already non-zero profile counts. */
2870 void
2871 estimate_bb_frequencies (bool force)
2873 basic_block bb;
2874 sreal freq_max;
2876 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2878 static int real_values_initialized = 0;
2880 if (!real_values_initialized)
2882 real_values_initialized = 1;
2883 real_br_prob_base = REG_BR_PROB_BASE;
2884 real_bb_freq_max = BB_FREQ_MAX;
2885 real_one_half = sreal (1, -1);
2886 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2887 real_almost_one = sreal (1) - real_inv_br_prob_base;
2890 mark_dfs_back_edges ();
2892 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2893 REG_BR_PROB_BASE;
2895 /* Set up block info for each basic block. */
2896 alloc_aux_for_blocks (sizeof (block_info));
2897 alloc_aux_for_edges (sizeof (edge_prob_info));
2898 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2900 edge e;
2901 edge_iterator ei;
2903 FOR_EACH_EDGE (e, ei, bb->succs)
2905 EDGE_INFO (e)->back_edge_prob = e->probability;
2906 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2910 /* First compute frequencies locally for each loop from innermost
2911 to outermost to examine frequencies for back edges. */
2912 estimate_loops ();
2914 freq_max = 0;
2915 FOR_EACH_BB_FN (bb, cfun)
2916 if (freq_max < BLOCK_INFO (bb)->frequency)
2917 freq_max = BLOCK_INFO (bb)->frequency;
2919 freq_max = real_bb_freq_max / freq_max;
2920 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2922 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2923 bb->frequency = tmp.to_int ();
2926 free_aux_for_blocks ();
2927 free_aux_for_edges ();
2929 compute_function_frequency ();
2932 /* Decide whether function is hot, cold or unlikely executed. */
2933 void
2934 compute_function_frequency (void)
2936 basic_block bb;
2937 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2939 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2940 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2941 node->only_called_at_startup = true;
2942 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2943 node->only_called_at_exit = true;
2945 if (profile_status_for_fn (cfun) != PROFILE_READ)
2947 int flags = flags_from_decl_or_type (current_function_decl);
2948 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2949 != NULL)
2950 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2951 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2952 != NULL)
2953 node->frequency = NODE_FREQUENCY_HOT;
2954 else if (flags & ECF_NORETURN)
2955 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2956 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2957 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2958 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2959 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2960 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2961 return;
2964 /* Only first time try to drop function into unlikely executed.
2965 After inlining the roundoff errors may confuse us.
2966 Ipa-profile pass will drop functions only called from unlikely
2967 functions to unlikely and that is most of what we care about. */
2968 if (!cfun->after_inlining)
2969 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2970 FOR_EACH_BB_FN (bb, cfun)
2972 if (maybe_hot_bb_p (cfun, bb))
2974 node->frequency = NODE_FREQUENCY_HOT;
2975 return;
2977 if (!probably_never_executed_bb_p (cfun, bb))
2978 node->frequency = NODE_FREQUENCY_NORMAL;
2982 /* Build PREDICT_EXPR. */
2983 tree
2984 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2986 tree t = build1 (PREDICT_EXPR, void_type_node,
2987 build_int_cst (integer_type_node, predictor));
2988 SET_PREDICT_EXPR_OUTCOME (t, taken);
2989 return t;
2992 const char *
2993 predictor_name (enum br_predictor predictor)
2995 return predictor_info[predictor].name;
2998 /* Predict branch probabilities and estimate profile of the tree CFG. */
3000 namespace {
3002 const pass_data pass_data_profile =
3004 GIMPLE_PASS, /* type */
3005 "profile_estimate", /* name */
3006 OPTGROUP_NONE, /* optinfo_flags */
3007 TV_BRANCH_PROB, /* tv_id */
3008 PROP_cfg, /* properties_required */
3009 0, /* properties_provided */
3010 0, /* properties_destroyed */
3011 0, /* todo_flags_start */
3012 0, /* todo_flags_finish */
3015 class pass_profile : public gimple_opt_pass
3017 public:
3018 pass_profile (gcc::context *ctxt)
3019 : gimple_opt_pass (pass_data_profile, ctxt)
3022 /* opt_pass methods: */
3023 virtual bool gate (function *) { return flag_guess_branch_prob; }
3024 virtual unsigned int execute (function *);
3026 }; // class pass_profile
3028 unsigned int
3029 pass_profile::execute (function *fun)
3031 unsigned nb_loops;
3033 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3034 return 0;
3036 loop_optimizer_init (LOOPS_NORMAL);
3037 if (dump_file && (dump_flags & TDF_DETAILS))
3038 flow_loops_dump (dump_file, NULL, 0);
3040 mark_irreducible_loops ();
3042 nb_loops = number_of_loops (fun);
3043 if (nb_loops > 1)
3044 scev_initialize ();
3046 tree_estimate_probability (false);
3048 if (nb_loops > 1)
3049 scev_finalize ();
3051 loop_optimizer_finalize ();
3052 if (dump_file && (dump_flags & TDF_DETAILS))
3053 gimple_dump_cfg (dump_file, dump_flags);
3054 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3055 profile_status_for_fn (fun) = PROFILE_GUESSED;
3056 return 0;
3059 } // anon namespace
3061 gimple_opt_pass *
3062 make_pass_profile (gcc::context *ctxt)
3064 return new pass_profile (ctxt);
3067 namespace {
3069 const pass_data pass_data_strip_predict_hints =
3071 GIMPLE_PASS, /* type */
3072 "*strip_predict_hints", /* name */
3073 OPTGROUP_NONE, /* optinfo_flags */
3074 TV_BRANCH_PROB, /* tv_id */
3075 PROP_cfg, /* properties_required */
3076 0, /* properties_provided */
3077 0, /* properties_destroyed */
3078 0, /* todo_flags_start */
3079 0, /* todo_flags_finish */
3082 class pass_strip_predict_hints : public gimple_opt_pass
3084 public:
3085 pass_strip_predict_hints (gcc::context *ctxt)
3086 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3089 /* opt_pass methods: */
3090 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3091 virtual unsigned int execute (function *);
3093 }; // class pass_strip_predict_hints
3095 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3096 we no longer need. */
3097 unsigned int
3098 pass_strip_predict_hints::execute (function *fun)
3100 basic_block bb;
3101 gimple *ass_stmt;
3102 tree var;
3104 FOR_EACH_BB_FN (bb, fun)
3106 gimple_stmt_iterator bi;
3107 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3109 gimple *stmt = gsi_stmt (bi);
3111 if (gimple_code (stmt) == GIMPLE_PREDICT)
3113 gsi_remove (&bi, true);
3114 continue;
3116 else if (is_gimple_call (stmt))
3118 tree fndecl = gimple_call_fndecl (stmt);
3120 if ((fndecl
3121 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3122 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3123 && gimple_call_num_args (stmt) == 2)
3124 || (gimple_call_internal_p (stmt)
3125 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3127 var = gimple_call_lhs (stmt);
3128 if (var)
3130 ass_stmt
3131 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3132 gsi_replace (&bi, ass_stmt, true);
3134 else
3136 gsi_remove (&bi, true);
3137 continue;
3141 gsi_next (&bi);
3144 return 0;
3147 } // anon namespace
3149 gimple_opt_pass *
3150 make_pass_strip_predict_hints (gcc::context *ctxt)
3152 return new pass_strip_predict_hints (ctxt);
3155 /* Rebuild function frequencies. Passes are in general expected to
3156 maintain profile by hand, however in some cases this is not possible:
3157 for example when inlining several functions with loops freuqencies might run
3158 out of scale and thus needs to be recomputed. */
3160 void
3161 rebuild_frequencies (void)
3163 timevar_push (TV_REBUILD_FREQUENCIES);
3165 /* When the max bb count in the function is small, there is a higher
3166 chance that there were truncation errors in the integer scaling
3167 of counts by inlining and other optimizations. This could lead
3168 to incorrect classification of code as being cold when it isn't.
3169 In that case, force the estimation of bb counts/frequencies from the
3170 branch probabilities, rather than computing frequencies from counts,
3171 which may also lead to frequencies incorrectly reduced to 0. There
3172 is less precision in the probabilities, so we only do this for small
3173 max counts. */
3174 gcov_type count_max = 0;
3175 basic_block bb;
3176 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3177 count_max = MAX (bb->count, count_max);
3179 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3180 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3181 && count_max < REG_BR_PROB_BASE/10))
3183 loop_optimizer_init (0);
3184 add_noreturn_fake_exit_edges ();
3185 mark_irreducible_loops ();
3186 connect_infinite_loops_to_exit ();
3187 estimate_bb_frequencies (true);
3188 remove_fake_exit_edges ();
3189 loop_optimizer_finalize ();
3191 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3192 counts_to_freqs ();
3193 else
3194 gcc_unreachable ();
3195 timevar_pop (TV_REBUILD_FREQUENCIES);
3198 /* Perform a dry run of the branch prediction pass and report comparsion of
3199 the predicted and real profile into the dump file. */
3201 void
3202 report_predictor_hitrates (void)
3204 unsigned nb_loops;
3206 loop_optimizer_init (LOOPS_NORMAL);
3207 if (dump_file && (dump_flags & TDF_DETAILS))
3208 flow_loops_dump (dump_file, NULL, 0);
3210 mark_irreducible_loops ();
3212 nb_loops = number_of_loops (cfun);
3213 if (nb_loops > 1)
3214 scev_initialize ();
3216 tree_estimate_probability (true);
3218 if (nb_loops > 1)
3219 scev_finalize ();
3221 loop_optimizer_finalize ();