* tree-ssa-loop-ivopts.c (ivopts_estimate_reg_pressure): New
[official-gcc.git] / gcc / predict.c
blob2dbe3afa48ba49a6e38b9d5269605939eeafcc20
1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* References:
22 [1] "Branch Prediction for Free"
23 Ball and Larus; PLDI '93.
24 [2] "Static Branch Frequency and Program Profile Analysis"
25 Wu and Larus; MICRO-27.
26 [3] "Corpus-based Static Branch Prediction"
27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "backend.h"
34 #include "rtl.h"
35 #include "tree.h"
36 #include "gimple.h"
37 #include "cfghooks.h"
38 #include "tree-pass.h"
39 #include "ssa.h"
40 #include "memmodel.h"
41 #include "emit-rtl.h"
42 #include "cgraph.h"
43 #include "coverage.h"
44 #include "diagnostic-core.h"
45 #include "gimple-predict.h"
46 #include "fold-const.h"
47 #include "calls.h"
48 #include "cfganal.h"
49 #include "profile.h"
50 #include "sreal.h"
51 #include "params.h"
52 #include "cfgloop.h"
53 #include "gimple-iterator.h"
54 #include "tree-cfg.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
58 #include "ipa-utils.h"
59 #include "gimple-pretty-print.h"
61 /* Enum with reasons why a predictor is ignored. */
63 enum predictor_reason
65 REASON_NONE,
66 REASON_IGNORED,
67 REASON_SINGLE_EDGE_DUPLICATE,
68 REASON_EDGE_PAIR_DUPLICATE
71 /* String messages for the aforementioned enum. */
73 static const char *reason_messages[] = {"", " (ignored)",
74 " (single edge duplicate)", " (edge pair duplicate)"};
76 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
77 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
78 static sreal real_almost_one, real_br_prob_base,
79 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
81 static void combine_predictions_for_insn (rtx_insn *, basic_block);
82 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
83 enum predictor_reason, edge);
84 static void predict_paths_leading_to (basic_block, enum br_predictor,
85 enum prediction,
86 struct loop *in_loop = NULL);
87 static void predict_paths_leading_to_edge (edge, enum br_predictor,
88 enum prediction,
89 struct loop *in_loop = NULL);
90 static bool can_predict_insn_p (const rtx_insn *);
92 /* Information we hold about each branch predictor.
93 Filled using information from predict.def. */
95 struct predictor_info
97 const char *const name; /* Name used in the debugging dumps. */
98 const int hitrate; /* Expected hitrate used by
99 predict_insn_def call. */
100 const int flags;
103 /* Use given predictor without Dempster-Shaffer theory if it matches
104 using first_match heuristics. */
105 #define PRED_FLAG_FIRST_MATCH 1
107 /* Recompute hitrate in percent to our representation. */
109 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
111 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
112 static const struct predictor_info predictor_info[]= {
113 #include "predict.def"
115 /* Upper bound on predictors. */
116 {NULL, 0, 0}
118 #undef DEF_PREDICTOR
120 /* Return TRUE if frequency FREQ is considered to be hot. */
122 static inline bool
123 maybe_hot_frequency_p (struct function *fun, int freq)
125 struct cgraph_node *node = cgraph_node::get (fun->decl);
126 if (!profile_info
127 || !opt_for_fn (fun->decl, flag_branch_probabilities))
129 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
130 return false;
131 if (node->frequency == NODE_FREQUENCY_HOT)
132 return true;
134 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
135 return true;
136 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
137 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
138 return false;
139 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
140 return false;
141 if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
142 < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
143 return false;
144 return true;
147 static gcov_type min_count = -1;
149 /* Determine the threshold for hot BB counts. */
151 gcov_type
152 get_hot_bb_threshold ()
154 gcov_working_set_t *ws;
155 if (min_count == -1)
157 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
158 gcc_assert (ws);
159 min_count = ws->min_counter;
161 return min_count;
164 /* Set the threshold for hot BB counts. */
166 void
167 set_hot_bb_threshold (gcov_type min)
169 min_count = min;
172 /* Return TRUE if frequency FREQ is considered to be hot. */
174 bool
175 maybe_hot_count_p (struct function *, profile_count count)
177 if (!count.initialized_p ())
178 return true;
179 /* Code executed at most once is not hot. */
180 if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
181 return false;
182 return (count.to_gcov_type () >= get_hot_bb_threshold ());
185 /* Return true in case BB can be CPU intensive and should be optimized
186 for maximal performance. */
188 bool
189 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
191 gcc_checking_assert (fun);
192 if (profile_status_for_fn (fun) == PROFILE_READ)
193 return maybe_hot_count_p (fun, bb->count);
194 return maybe_hot_frequency_p (fun, bb->frequency);
197 /* Return true in case BB can be CPU intensive and should be optimized
198 for maximal performance. */
200 bool
201 maybe_hot_edge_p (edge e)
203 if (profile_status_for_fn (cfun) == PROFILE_READ)
204 return maybe_hot_count_p (cfun, e->count);
205 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
208 /* Return true if profile COUNT and FREQUENCY, or function FUN static
209 node frequency reflects never being executed. */
211 static bool
212 probably_never_executed (struct function *fun,
213 profile_count count, int)
215 gcc_checking_assert (fun);
216 if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ)
218 if (count == profile_count::zero ())
219 return true;
221 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
222 if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs)
223 return false;
224 return true;
226 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
227 && (cgraph_node::get (fun->decl)->frequency
228 == NODE_FREQUENCY_UNLIKELY_EXECUTED))
229 return true;
230 return false;
234 /* Return true in case BB is probably never executed. */
236 bool
237 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
239 return probably_never_executed (fun, bb->count, bb->frequency);
243 /* Return true in case edge E is probably never executed. */
245 bool
246 probably_never_executed_edge_p (struct function *fun, edge e)
248 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
251 /* Return true when current function should always be optimized for size. */
253 bool
254 optimize_function_for_size_p (struct function *fun)
256 if (!fun || !fun->decl)
257 return optimize_size;
258 cgraph_node *n = cgraph_node::get (fun->decl);
259 return n && n->optimize_for_size_p ();
262 /* Return true when current function should always be optimized for speed. */
264 bool
265 optimize_function_for_speed_p (struct function *fun)
267 return !optimize_function_for_size_p (fun);
270 /* Return the optimization type that should be used for the function FUN. */
272 optimization_type
273 function_optimization_type (struct function *fun)
275 return (optimize_function_for_speed_p (fun)
276 ? OPTIMIZE_FOR_SPEED
277 : OPTIMIZE_FOR_SIZE);
280 /* Return TRUE when BB should be optimized for size. */
282 bool
283 optimize_bb_for_size_p (const_basic_block bb)
285 return (optimize_function_for_size_p (cfun)
286 || (bb && !maybe_hot_bb_p (cfun, bb)));
289 /* Return TRUE when BB should be optimized for speed. */
291 bool
292 optimize_bb_for_speed_p (const_basic_block bb)
294 return !optimize_bb_for_size_p (bb);
297 /* Return the optimization type that should be used for block BB. */
299 optimization_type
300 bb_optimization_type (const_basic_block bb)
302 return (optimize_bb_for_speed_p (bb)
303 ? OPTIMIZE_FOR_SPEED
304 : OPTIMIZE_FOR_SIZE);
307 /* Return TRUE when BB should be optimized for size. */
309 bool
310 optimize_edge_for_size_p (edge e)
312 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
315 /* Return TRUE when BB should be optimized for speed. */
317 bool
318 optimize_edge_for_speed_p (edge e)
320 return !optimize_edge_for_size_p (e);
323 /* Return TRUE when BB should be optimized for size. */
325 bool
326 optimize_insn_for_size_p (void)
328 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
331 /* Return TRUE when BB should be optimized for speed. */
333 bool
334 optimize_insn_for_speed_p (void)
336 return !optimize_insn_for_size_p ();
339 /* Return TRUE when LOOP should be optimized for size. */
341 bool
342 optimize_loop_for_size_p (struct loop *loop)
344 return optimize_bb_for_size_p (loop->header);
347 /* Return TRUE when LOOP should be optimized for speed. */
349 bool
350 optimize_loop_for_speed_p (struct loop *loop)
352 return optimize_bb_for_speed_p (loop->header);
355 /* Return TRUE when LOOP nest should be optimized for speed. */
357 bool
358 optimize_loop_nest_for_speed_p (struct loop *loop)
360 struct loop *l = loop;
361 if (optimize_loop_for_speed_p (loop))
362 return true;
363 l = loop->inner;
364 while (l && l != loop)
366 if (optimize_loop_for_speed_p (l))
367 return true;
368 if (l->inner)
369 l = l->inner;
370 else if (l->next)
371 l = l->next;
372 else
374 while (l != loop && !l->next)
375 l = loop_outer (l);
376 if (l != loop)
377 l = l->next;
380 return false;
383 /* Return TRUE when LOOP nest should be optimized for size. */
385 bool
386 optimize_loop_nest_for_size_p (struct loop *loop)
388 return !optimize_loop_nest_for_speed_p (loop);
391 /* Return true when edge E is likely to be well predictable by branch
392 predictor. */
394 bool
395 predictable_edge_p (edge e)
397 if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
398 return false;
399 if ((e->probability
400 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
401 || (REG_BR_PROB_BASE - e->probability
402 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
403 return true;
404 return false;
408 /* Set RTL expansion for BB profile. */
410 void
411 rtl_profile_for_bb (basic_block bb)
413 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
416 /* Set RTL expansion for edge profile. */
418 void
419 rtl_profile_for_edge (edge e)
421 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
424 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
425 void
426 default_rtl_profile (void)
428 crtl->maybe_hot_insn_p = true;
431 /* Return true if the one of outgoing edges is already predicted by
432 PREDICTOR. */
434 bool
435 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
437 rtx note;
438 if (!INSN_P (BB_END (bb)))
439 return false;
440 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
441 if (REG_NOTE_KIND (note) == REG_BR_PRED
442 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
443 return true;
444 return false;
447 /* Structure representing predictions in tree level. */
449 struct edge_prediction {
450 struct edge_prediction *ep_next;
451 edge ep_edge;
452 enum br_predictor ep_predictor;
453 int ep_probability;
456 /* This map contains for a basic block the list of predictions for the
457 outgoing edges. */
459 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
461 /* Return true if the one of outgoing edges is already predicted by
462 PREDICTOR. */
464 bool
465 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
467 struct edge_prediction *i;
468 edge_prediction **preds = bb_predictions->get (bb);
470 if (!preds)
471 return false;
473 for (i = *preds; i; i = i->ep_next)
474 if (i->ep_predictor == predictor)
475 return true;
476 return false;
479 /* Return true if the one of outgoing edges is already predicted by
480 PREDICTOR for edge E predicted as TAKEN. */
482 bool
483 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
485 struct edge_prediction *i;
486 basic_block bb = e->src;
487 edge_prediction **preds = bb_predictions->get (bb);
488 if (!preds)
489 return false;
491 int probability = predictor_info[(int) predictor].hitrate;
493 if (taken != TAKEN)
494 probability = REG_BR_PROB_BASE - probability;
496 for (i = *preds; i; i = i->ep_next)
497 if (i->ep_predictor == predictor
498 && i->ep_edge == e
499 && i->ep_probability == probability)
500 return true;
501 return false;
504 /* Return true when the probability of edge is reliable.
506 The profile guessing code is good at predicting branch outcome (ie.
507 taken/not taken), that is predicted right slightly over 75% of time.
508 It is however notoriously poor on predicting the probability itself.
509 In general the profile appear a lot flatter (with probabilities closer
510 to 50%) than the reality so it is bad idea to use it to drive optimization
511 such as those disabling dynamic branch prediction for well predictable
512 branches.
514 There are two exceptions - edges leading to noreturn edges and edges
515 predicted by number of iterations heuristics are predicted well. This macro
516 should be able to distinguish those, but at the moment it simply check for
517 noreturn heuristic that is only one giving probability over 99% or bellow
518 1%. In future we might want to propagate reliability information across the
519 CFG if we find this information useful on multiple places. */
520 static bool
521 probability_reliable_p (int prob)
523 return (profile_status_for_fn (cfun) == PROFILE_READ
524 || (profile_status_for_fn (cfun) == PROFILE_GUESSED
525 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
528 /* Same predicate as above, working on edges. */
529 bool
530 edge_probability_reliable_p (const_edge e)
532 return probability_reliable_p (e->probability);
535 /* Same predicate as edge_probability_reliable_p, working on notes. */
536 bool
537 br_prob_note_reliable_p (const_rtx note)
539 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
540 return probability_reliable_p (XINT (note, 0));
543 static void
544 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
546 gcc_assert (any_condjump_p (insn));
547 if (!flag_guess_branch_prob)
548 return;
550 add_reg_note (insn, REG_BR_PRED,
551 gen_rtx_CONCAT (VOIDmode,
552 GEN_INT ((int) predictor),
553 GEN_INT ((int) probability)));
556 /* Predict insn by given predictor. */
558 void
559 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
560 enum prediction taken)
562 int probability = predictor_info[(int) predictor].hitrate;
564 if (taken != TAKEN)
565 probability = REG_BR_PROB_BASE - probability;
567 predict_insn (insn, predictor, probability);
570 /* Predict edge E with given probability if possible. */
572 void
573 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
575 rtx_insn *last_insn;
576 last_insn = BB_END (e->src);
578 /* We can store the branch prediction information only about
579 conditional jumps. */
580 if (!any_condjump_p (last_insn))
581 return;
583 /* We always store probability of branching. */
584 if (e->flags & EDGE_FALLTHRU)
585 probability = REG_BR_PROB_BASE - probability;
587 predict_insn (last_insn, predictor, probability);
590 /* Predict edge E with the given PROBABILITY. */
591 void
592 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
594 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
595 && EDGE_COUNT (e->src->succs) > 1
596 && flag_guess_branch_prob
597 && optimize)
599 struct edge_prediction *i = XNEW (struct edge_prediction);
600 edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
602 i->ep_next = preds;
603 preds = i;
604 i->ep_probability = probability;
605 i->ep_predictor = predictor;
606 i->ep_edge = e;
610 /* Filter edge predictions PREDS by a function FILTER. DATA are passed
611 to the filter function. */
613 void
614 filter_predictions (edge_prediction **preds,
615 bool (*filter) (edge_prediction *, void *), void *data)
617 if (!bb_predictions)
618 return;
620 if (preds)
622 struct edge_prediction **prediction = preds;
623 struct edge_prediction *next;
625 while (*prediction)
627 if ((*filter) (*prediction, data))
628 prediction = &((*prediction)->ep_next);
629 else
631 next = (*prediction)->ep_next;
632 free (*prediction);
633 *prediction = next;
639 /* Filter function predicate that returns true for a edge predicate P
640 if its edge is equal to DATA. */
642 bool
643 equal_edge_p (edge_prediction *p, void *data)
645 return p->ep_edge == (edge)data;
648 /* Remove all predictions on given basic block that are attached
649 to edge E. */
650 void
651 remove_predictions_associated_with_edge (edge e)
653 if (!bb_predictions)
654 return;
656 edge_prediction **preds = bb_predictions->get (e->src);
657 filter_predictions (preds, equal_edge_p, e);
660 /* Clears the list of predictions stored for BB. */
662 static void
663 clear_bb_predictions (basic_block bb)
665 edge_prediction **preds = bb_predictions->get (bb);
666 struct edge_prediction *pred, *next;
668 if (!preds)
669 return;
671 for (pred = *preds; pred; pred = next)
673 next = pred->ep_next;
674 free (pred);
676 *preds = NULL;
679 /* Return true when we can store prediction on insn INSN.
680 At the moment we represent predictions only on conditional
681 jumps, not at computed jump or other complicated cases. */
682 static bool
683 can_predict_insn_p (const rtx_insn *insn)
685 return (JUMP_P (insn)
686 && any_condjump_p (insn)
687 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
690 /* Predict edge E by given predictor if possible. */
692 void
693 predict_edge_def (edge e, enum br_predictor predictor,
694 enum prediction taken)
696 int probability = predictor_info[(int) predictor].hitrate;
698 if (taken != TAKEN)
699 probability = REG_BR_PROB_BASE - probability;
701 predict_edge (e, predictor, probability);
704 /* Invert all branch predictions or probability notes in the INSN. This needs
705 to be done each time we invert the condition used by the jump. */
707 void
708 invert_br_probabilities (rtx insn)
710 rtx note;
712 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
713 if (REG_NOTE_KIND (note) == REG_BR_PROB)
714 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
715 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
716 XEXP (XEXP (note, 0), 1)
717 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
720 /* Dump information about the branch prediction to the output file. */
722 static void
723 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
724 basic_block bb, enum predictor_reason reason = REASON_NONE,
725 edge ep_edge = NULL)
727 edge e = ep_edge;
728 edge_iterator ei;
730 if (!file)
731 return;
733 if (e == NULL)
734 FOR_EACH_EDGE (e, ei, bb->succs)
735 if (! (e->flags & EDGE_FALLTHRU))
736 break;
738 char edge_info_str[128];
739 if (ep_edge)
740 sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
741 ep_edge->dest->index);
742 else
743 edge_info_str[0] = '\0';
745 fprintf (file, " %s heuristics%s%s: %.1f%%",
746 predictor_info[predictor].name,
747 edge_info_str, reason_messages[reason],
748 probability * 100.0 / REG_BR_PROB_BASE);
750 if (bb->count.initialized_p ())
752 fprintf (file, " exec ");
753 bb->count.dump (file);
754 if (e)
756 fprintf (file, " hit ");
757 e->count.dump (file);
758 fprintf (file, " (%.1f%%)", e->count.to_gcov_type() * 100.0
759 / bb->count.to_gcov_type ());
763 fprintf (file, "\n");
766 /* We can not predict the probabilities of outgoing edges of bb. Set them
767 evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
768 even probability for all edges not mentioned in the set. These edges
769 are given PROB_VERY_UNLIKELY probability. */
771 static void
772 set_even_probabilities (basic_block bb,
773 hash_set<edge> *unlikely_edges = NULL)
775 unsigned nedges = 0;
776 edge e = NULL;
777 edge_iterator ei;
779 FOR_EACH_EDGE (e, ei, bb->succs)
780 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
781 nedges ++;
783 /* Make the distribution even if all edges are unlikely. */
784 unsigned unlikely_count = unlikely_edges ? unlikely_edges->elements () : 0;
785 if (unlikely_count == nedges)
787 unlikely_edges = NULL;
788 unlikely_count = 0;
791 unsigned c = nedges - unlikely_count;
793 FOR_EACH_EDGE (e, ei, bb->succs)
794 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
796 if (unlikely_edges != NULL && unlikely_edges->contains (e))
797 e->probability = PROB_VERY_UNLIKELY;
798 else
799 e->probability = (REG_BR_PROB_BASE + c / 2) / c;
801 else
802 e->probability = 0;
805 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
806 note if not already present. Remove now useless REG_BR_PRED notes. */
808 static void
809 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
811 rtx prob_note;
812 rtx *pnote;
813 rtx note;
814 int best_probability = PROB_EVEN;
815 enum br_predictor best_predictor = END_PREDICTORS;
816 int combined_probability = REG_BR_PROB_BASE / 2;
817 int d;
818 bool first_match = false;
819 bool found = false;
821 if (!can_predict_insn_p (insn))
823 set_even_probabilities (bb);
824 return;
827 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
828 pnote = &REG_NOTES (insn);
829 if (dump_file)
830 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
831 bb->index);
833 /* We implement "first match" heuristics and use probability guessed
834 by predictor with smallest index. */
835 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
836 if (REG_NOTE_KIND (note) == REG_BR_PRED)
838 enum br_predictor predictor = ((enum br_predictor)
839 INTVAL (XEXP (XEXP (note, 0), 0)));
840 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
842 found = true;
843 if (best_predictor > predictor
844 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
845 best_probability = probability, best_predictor = predictor;
847 d = (combined_probability * probability
848 + (REG_BR_PROB_BASE - combined_probability)
849 * (REG_BR_PROB_BASE - probability));
851 /* Use FP math to avoid overflows of 32bit integers. */
852 if (d == 0)
853 /* If one probability is 0% and one 100%, avoid division by zero. */
854 combined_probability = REG_BR_PROB_BASE / 2;
855 else
856 combined_probability = (((double) combined_probability) * probability
857 * REG_BR_PROB_BASE / d + 0.5);
860 /* Decide which heuristic to use. In case we didn't match anything,
861 use no_prediction heuristic, in case we did match, use either
862 first match or Dempster-Shaffer theory depending on the flags. */
864 if (best_predictor != END_PREDICTORS)
865 first_match = true;
867 if (!found)
868 dump_prediction (dump_file, PRED_NO_PREDICTION,
869 combined_probability, bb);
870 else
872 if (!first_match)
873 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
874 bb, !first_match ? REASON_NONE : REASON_IGNORED);
875 else
876 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
877 bb, first_match ? REASON_NONE : REASON_IGNORED);
880 if (first_match)
881 combined_probability = best_probability;
882 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
884 while (*pnote)
886 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
888 enum br_predictor predictor = ((enum br_predictor)
889 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
890 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
892 dump_prediction (dump_file, predictor, probability, bb,
893 (!first_match || best_predictor == predictor)
894 ? REASON_NONE : REASON_IGNORED);
895 *pnote = XEXP (*pnote, 1);
897 else
898 pnote = &XEXP (*pnote, 1);
901 if (!prob_note)
903 add_int_reg_note (insn, REG_BR_PROB, combined_probability);
905 /* Save the prediction into CFG in case we are seeing non-degenerated
906 conditional jump. */
907 if (!single_succ_p (bb))
909 BRANCH_EDGE (bb)->probability = combined_probability;
910 FALLTHRU_EDGE (bb)->probability
911 = REG_BR_PROB_BASE - combined_probability;
914 else if (!single_succ_p (bb))
916 int prob = XINT (prob_note, 0);
918 BRANCH_EDGE (bb)->probability = prob;
919 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
921 else
922 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
925 /* Edge prediction hash traits. */
927 struct predictor_hash: pointer_hash <edge_prediction>
930 static inline hashval_t hash (const edge_prediction *);
931 static inline bool equal (const edge_prediction *, const edge_prediction *);
934 /* Calculate hash value of an edge prediction P based on predictor and
935 normalized probability. */
937 inline hashval_t
938 predictor_hash::hash (const edge_prediction *p)
940 inchash::hash hstate;
941 hstate.add_int (p->ep_predictor);
943 int prob = p->ep_probability;
944 if (prob > REG_BR_PROB_BASE / 2)
945 prob = REG_BR_PROB_BASE - prob;
947 hstate.add_int (prob);
949 return hstate.end ();
952 /* Return true whether edge predictions P1 and P2 use the same predictor and
953 have equal (or opposed probability). */
955 inline bool
956 predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
958 return (p1->ep_predictor == p2->ep_predictor
959 && (p1->ep_probability == p2->ep_probability
960 || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
963 struct predictor_hash_traits: predictor_hash,
964 typed_noop_remove <edge_prediction *> {};
966 /* Return true if edge prediction P is not in DATA hash set. */
968 static bool
969 not_removed_prediction_p (edge_prediction *p, void *data)
971 hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
972 return !remove->contains (p);
975 /* Prune predictions for a basic block BB. Currently we do following
976 clean-up steps:
978 1) remove duplicate prediction that is guessed with the same probability
979 (different than 1/2) to both edge
980 2) remove duplicates for a prediction that belongs with the same probability
981 to a single edge
985 static void
986 prune_predictions_for_bb (basic_block bb)
988 edge_prediction **preds = bb_predictions->get (bb);
990 if (preds)
992 hash_table <predictor_hash_traits> s (13);
993 hash_set <edge_prediction *> remove;
995 /* Step 1: identify predictors that should be removed. */
996 for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
998 edge_prediction *existing = s.find (pred);
999 if (existing)
1001 if (pred->ep_edge == existing->ep_edge
1002 && pred->ep_probability == existing->ep_probability)
1004 /* Remove a duplicate predictor. */
1005 dump_prediction (dump_file, pred->ep_predictor,
1006 pred->ep_probability, bb,
1007 REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1009 remove.add (pred);
1011 else if (pred->ep_edge != existing->ep_edge
1012 && pred->ep_probability == existing->ep_probability
1013 && pred->ep_probability != REG_BR_PROB_BASE / 2)
1015 /* Remove both predictors as they predict the same
1016 for both edges. */
1017 dump_prediction (dump_file, existing->ep_predictor,
1018 pred->ep_probability, bb,
1019 REASON_EDGE_PAIR_DUPLICATE,
1020 existing->ep_edge);
1021 dump_prediction (dump_file, pred->ep_predictor,
1022 pred->ep_probability, bb,
1023 REASON_EDGE_PAIR_DUPLICATE,
1024 pred->ep_edge);
1026 remove.add (existing);
1027 remove.add (pred);
1031 edge_prediction **slot2 = s.find_slot (pred, INSERT);
1032 *slot2 = pred;
1035 /* Step 2: Remove predictors. */
1036 filter_predictions (preds, not_removed_prediction_p, &remove);
1040 /* Combine predictions into single probability and store them into CFG.
1041 Remove now useless prediction entries.
1042 If DRY_RUN is set, only produce dumps and do not modify profile. */
1044 static void
1045 combine_predictions_for_bb (basic_block bb, bool dry_run)
1047 int best_probability = PROB_EVEN;
1048 enum br_predictor best_predictor = END_PREDICTORS;
1049 int combined_probability = REG_BR_PROB_BASE / 2;
1050 int d;
1051 bool first_match = false;
1052 bool found = false;
1053 struct edge_prediction *pred;
1054 int nedges = 0;
1055 edge e, first = NULL, second = NULL;
1056 edge_iterator ei;
1058 FOR_EACH_EDGE (e, ei, bb->succs)
1059 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
1061 nedges ++;
1062 if (first && !second)
1063 second = e;
1064 if (!first)
1065 first = e;
1068 /* When there is no successor or only one choice, prediction is easy.
1070 When we have a basic block with more than 2 successors, the situation
1071 is more complicated as DS theory cannot be used literally.
1072 More precisely, let's assume we predicted edge e1 with probability p1,
1073 thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1074 need to find probability of e.g. m1({b2}), which we don't know.
1075 The only approximation is to equally distribute 1-p1 to all edges
1076 different from b1.
1078 According to numbers we've got from SPEC2006 benchark, there's only
1079 one interesting reliable predictor (noreturn call), which can be
1080 handled with a bit easier approach. */
1081 if (nedges != 2)
1083 hash_set<edge> unlikely_edges (4);
1085 /* Identify all edges that have a probability close to very unlikely.
1086 Doing the approach for very unlikely doesn't worth for doing as
1087 there's no such probability in SPEC2006 benchmark. */
1088 edge_prediction **preds = bb_predictions->get (bb);
1089 if (preds)
1090 for (pred = *preds; pred; pred = pred->ep_next)
1091 if (pred->ep_probability <= PROB_VERY_UNLIKELY)
1092 unlikely_edges.add (pred->ep_edge);
1094 if (!bb->count.initialized_p () && !dry_run)
1095 set_even_probabilities (bb, &unlikely_edges);
1096 clear_bb_predictions (bb);
1097 if (dump_file)
1099 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1100 if (unlikely_edges.elements () == 0)
1101 fprintf (dump_file,
1102 "%i edges in bb %i predicted to even probabilities\n",
1103 nedges, bb->index);
1104 else
1106 fprintf (dump_file,
1107 "%i edges in bb %i predicted with some unlikely edges\n",
1108 nedges, bb->index);
1109 FOR_EACH_EDGE (e, ei, bb->succs)
1110 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
1111 dump_prediction (dump_file, PRED_COMBINED, e->probability,
1112 bb, REASON_NONE, e);
1115 return;
1118 if (dump_file)
1119 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1121 prune_predictions_for_bb (bb);
1123 edge_prediction **preds = bb_predictions->get (bb);
1125 if (preds)
1127 /* We implement "first match" heuristics and use probability guessed
1128 by predictor with smallest index. */
1129 for (pred = *preds; pred; pred = pred->ep_next)
1131 enum br_predictor predictor = pred->ep_predictor;
1132 int probability = pred->ep_probability;
1134 if (pred->ep_edge != first)
1135 probability = REG_BR_PROB_BASE - probability;
1137 found = true;
1138 /* First match heuristics would be widly confused if we predicted
1139 both directions. */
1140 if (best_predictor > predictor
1141 && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1143 struct edge_prediction *pred2;
1144 int prob = probability;
1146 for (pred2 = (struct edge_prediction *) *preds;
1147 pred2; pred2 = pred2->ep_next)
1148 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1150 int probability2 = pred2->ep_probability;
1152 if (pred2->ep_edge != first)
1153 probability2 = REG_BR_PROB_BASE - probability2;
1155 if ((probability < REG_BR_PROB_BASE / 2) !=
1156 (probability2 < REG_BR_PROB_BASE / 2))
1157 break;
1159 /* If the same predictor later gave better result, go for it! */
1160 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1161 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1162 prob = probability2;
1164 if (!pred2)
1165 best_probability = prob, best_predictor = predictor;
1168 d = (combined_probability * probability
1169 + (REG_BR_PROB_BASE - combined_probability)
1170 * (REG_BR_PROB_BASE - probability));
1172 /* Use FP math to avoid overflows of 32bit integers. */
1173 if (d == 0)
1174 /* If one probability is 0% and one 100%, avoid division by zero. */
1175 combined_probability = REG_BR_PROB_BASE / 2;
1176 else
1177 combined_probability = (((double) combined_probability)
1178 * probability
1179 * REG_BR_PROB_BASE / d + 0.5);
1183 /* Decide which heuristic to use. In case we didn't match anything,
1184 use no_prediction heuristic, in case we did match, use either
1185 first match or Dempster-Shaffer theory depending on the flags. */
1187 if (best_predictor != END_PREDICTORS)
1188 first_match = true;
1190 if (!found)
1191 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1192 else
1194 if (!first_match)
1195 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1196 !first_match ? REASON_NONE : REASON_IGNORED);
1197 else
1198 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1199 first_match ? REASON_NONE : REASON_IGNORED);
1202 if (first_match)
1203 combined_probability = best_probability;
1204 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1206 if (preds)
1208 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1210 enum br_predictor predictor = pred->ep_predictor;
1211 int probability = pred->ep_probability;
1213 dump_prediction (dump_file, predictor, probability, bb,
1214 (!first_match || best_predictor == predictor)
1215 ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1218 clear_bb_predictions (bb);
1220 if (!bb->count.initialized_p () && !dry_run)
1222 first->probability = combined_probability;
1223 second->probability = REG_BR_PROB_BASE - combined_probability;
1227 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1228 Return the SSA_NAME if the condition satisfies, NULL otherwise.
1230 T1 and T2 should be one of the following cases:
1231 1. T1 is SSA_NAME, T2 is NULL
1232 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1233 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1235 static tree
1236 strips_small_constant (tree t1, tree t2)
1238 tree ret = NULL;
1239 int value = 0;
1241 if (!t1)
1242 return NULL;
1243 else if (TREE_CODE (t1) == SSA_NAME)
1244 ret = t1;
1245 else if (tree_fits_shwi_p (t1))
1246 value = tree_to_shwi (t1);
1247 else
1248 return NULL;
1250 if (!t2)
1251 return ret;
1252 else if (tree_fits_shwi_p (t2))
1253 value = tree_to_shwi (t2);
1254 else if (TREE_CODE (t2) == SSA_NAME)
1256 if (ret)
1257 return NULL;
1258 else
1259 ret = t2;
1262 if (value <= 4 && value >= -4)
1263 return ret;
1264 else
1265 return NULL;
1268 /* Return the SSA_NAME in T or T's operands.
1269 Return NULL if SSA_NAME cannot be found. */
1271 static tree
1272 get_base_value (tree t)
1274 if (TREE_CODE (t) == SSA_NAME)
1275 return t;
1277 if (!BINARY_CLASS_P (t))
1278 return NULL;
1280 switch (TREE_OPERAND_LENGTH (t))
1282 case 1:
1283 return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1284 case 2:
1285 return strips_small_constant (TREE_OPERAND (t, 0),
1286 TREE_OPERAND (t, 1));
1287 default:
1288 return NULL;
1292 /* Check the compare STMT in LOOP. If it compares an induction
1293 variable to a loop invariant, return true, and save
1294 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1295 Otherwise return false and set LOOP_INVAIANT to NULL. */
1297 static bool
1298 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1299 tree *loop_invariant,
1300 enum tree_code *compare_code,
1301 tree *loop_step,
1302 tree *loop_iv_base)
1304 tree op0, op1, bound, base;
1305 affine_iv iv0, iv1;
1306 enum tree_code code;
1307 tree step;
1309 code = gimple_cond_code (stmt);
1310 *loop_invariant = NULL;
1312 switch (code)
1314 case GT_EXPR:
1315 case GE_EXPR:
1316 case NE_EXPR:
1317 case LT_EXPR:
1318 case LE_EXPR:
1319 case EQ_EXPR:
1320 break;
1322 default:
1323 return false;
1326 op0 = gimple_cond_lhs (stmt);
1327 op1 = gimple_cond_rhs (stmt);
1329 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1330 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1331 return false;
1332 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1333 return false;
1334 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1335 return false;
1336 if (TREE_CODE (iv0.step) != INTEGER_CST
1337 || TREE_CODE (iv1.step) != INTEGER_CST)
1338 return false;
1339 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1340 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1341 return false;
1343 if (integer_zerop (iv0.step))
1345 if (code != NE_EXPR && code != EQ_EXPR)
1346 code = invert_tree_comparison (code, false);
1347 bound = iv0.base;
1348 base = iv1.base;
1349 if (tree_fits_shwi_p (iv1.step))
1350 step = iv1.step;
1351 else
1352 return false;
1354 else
1356 bound = iv1.base;
1357 base = iv0.base;
1358 if (tree_fits_shwi_p (iv0.step))
1359 step = iv0.step;
1360 else
1361 return false;
1364 if (TREE_CODE (bound) != INTEGER_CST)
1365 bound = get_base_value (bound);
1366 if (!bound)
1367 return false;
1368 if (TREE_CODE (base) != INTEGER_CST)
1369 base = get_base_value (base);
1370 if (!base)
1371 return false;
1373 *loop_invariant = bound;
1374 *compare_code = code;
1375 *loop_step = step;
1376 *loop_iv_base = base;
1377 return true;
1380 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1382 static bool
1383 expr_coherent_p (tree t1, tree t2)
1385 gimple *stmt;
1386 tree ssa_name_1 = NULL;
1387 tree ssa_name_2 = NULL;
1389 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1390 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1392 if (t1 == t2)
1393 return true;
1395 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1396 return true;
1397 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1398 return false;
1400 /* Check to see if t1 is expressed/defined with t2. */
1401 stmt = SSA_NAME_DEF_STMT (t1);
1402 gcc_assert (stmt != NULL);
1403 if (is_gimple_assign (stmt))
1405 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1406 if (ssa_name_1 && ssa_name_1 == t2)
1407 return true;
1410 /* Check to see if t2 is expressed/defined with t1. */
1411 stmt = SSA_NAME_DEF_STMT (t2);
1412 gcc_assert (stmt != NULL);
1413 if (is_gimple_assign (stmt))
1415 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1416 if (ssa_name_2 && ssa_name_2 == t1)
1417 return true;
1420 /* Compare if t1 and t2's def_stmts are identical. */
1421 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1422 return true;
1423 else
1424 return false;
1427 /* Return true if E is predicted by one of loop heuristics. */
1429 static bool
1430 predicted_by_loop_heuristics_p (basic_block bb)
1432 struct edge_prediction *i;
1433 edge_prediction **preds = bb_predictions->get (bb);
1435 if (!preds)
1436 return false;
1438 for (i = *preds; i; i = i->ep_next)
1439 if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1440 || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1441 || i->ep_predictor == PRED_LOOP_ITERATIONS
1442 || i->ep_predictor == PRED_LOOP_EXIT
1443 || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1444 || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1445 return true;
1446 return false;
1449 /* Predict branch probability of BB when BB contains a branch that compares
1450 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1451 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1453 E.g.
1454 for (int i = 0; i < bound; i++) {
1455 if (i < bound - 2)
1456 computation_1();
1457 else
1458 computation_2();
1461 In this loop, we will predict the branch inside the loop to be taken. */
1463 static void
1464 predict_iv_comparison (struct loop *loop, basic_block bb,
1465 tree loop_bound_var,
1466 tree loop_iv_base_var,
1467 enum tree_code loop_bound_code,
1468 int loop_bound_step)
1470 gimple *stmt;
1471 tree compare_var, compare_base;
1472 enum tree_code compare_code;
1473 tree compare_step_var;
1474 edge then_edge;
1475 edge_iterator ei;
1477 if (predicted_by_loop_heuristics_p (bb))
1478 return;
1480 stmt = last_stmt (bb);
1481 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1482 return;
1483 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1484 loop, &compare_var,
1485 &compare_code,
1486 &compare_step_var,
1487 &compare_base))
1488 return;
1490 /* Find the taken edge. */
1491 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1492 if (then_edge->flags & EDGE_TRUE_VALUE)
1493 break;
1495 /* When comparing an IV to a loop invariant, NE is more likely to be
1496 taken while EQ is more likely to be not-taken. */
1497 if (compare_code == NE_EXPR)
1499 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1500 return;
1502 else if (compare_code == EQ_EXPR)
1504 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1505 return;
1508 if (!expr_coherent_p (loop_iv_base_var, compare_base))
1509 return;
1511 /* If loop bound, base and compare bound are all constants, we can
1512 calculate the probability directly. */
1513 if (tree_fits_shwi_p (loop_bound_var)
1514 && tree_fits_shwi_p (compare_var)
1515 && tree_fits_shwi_p (compare_base))
1517 int probability;
1518 bool overflow, overall_overflow = false;
1519 widest_int compare_count, tem;
1521 /* (loop_bound - base) / compare_step */
1522 tem = wi::sub (wi::to_widest (loop_bound_var),
1523 wi::to_widest (compare_base), SIGNED, &overflow);
1524 overall_overflow |= overflow;
1525 widest_int loop_count = wi::div_trunc (tem,
1526 wi::to_widest (compare_step_var),
1527 SIGNED, &overflow);
1528 overall_overflow |= overflow;
1530 if (!wi::neg_p (wi::to_widest (compare_step_var))
1531 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1533 /* (loop_bound - compare_bound) / compare_step */
1534 tem = wi::sub (wi::to_widest (loop_bound_var),
1535 wi::to_widest (compare_var), SIGNED, &overflow);
1536 overall_overflow |= overflow;
1537 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1538 SIGNED, &overflow);
1539 overall_overflow |= overflow;
1541 else
1543 /* (compare_bound - base) / compare_step */
1544 tem = wi::sub (wi::to_widest (compare_var),
1545 wi::to_widest (compare_base), SIGNED, &overflow);
1546 overall_overflow |= overflow;
1547 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1548 SIGNED, &overflow);
1549 overall_overflow |= overflow;
1551 if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1552 ++compare_count;
1553 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1554 ++loop_count;
1555 if (wi::neg_p (compare_count))
1556 compare_count = 0;
1557 if (wi::neg_p (loop_count))
1558 loop_count = 0;
1559 if (loop_count == 0)
1560 probability = 0;
1561 else if (wi::cmps (compare_count, loop_count) == 1)
1562 probability = REG_BR_PROB_BASE;
1563 else
1565 tem = compare_count * REG_BR_PROB_BASE;
1566 tem = wi::udiv_trunc (tem, loop_count);
1567 probability = tem.to_uhwi ();
1570 /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1571 if (!overall_overflow)
1572 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1574 return;
1577 if (expr_coherent_p (loop_bound_var, compare_var))
1579 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1580 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1581 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1582 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1583 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1584 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1585 else if (loop_bound_code == NE_EXPR)
1587 /* If the loop backedge condition is "(i != bound)", we do
1588 the comparison based on the step of IV:
1589 * step < 0 : backedge condition is like (i > bound)
1590 * step > 0 : backedge condition is like (i < bound) */
1591 gcc_assert (loop_bound_step != 0);
1592 if (loop_bound_step > 0
1593 && (compare_code == LT_EXPR
1594 || compare_code == LE_EXPR))
1595 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1596 else if (loop_bound_step < 0
1597 && (compare_code == GT_EXPR
1598 || compare_code == GE_EXPR))
1599 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1600 else
1601 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1603 else
1604 /* The branch is predicted not-taken if loop_bound_code is
1605 opposite with compare_code. */
1606 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1608 else if (expr_coherent_p (loop_iv_base_var, compare_var))
1610 /* For cases like:
1611 for (i = s; i < h; i++)
1612 if (i > s + 2) ....
1613 The branch should be predicted taken. */
1614 if (loop_bound_step > 0
1615 && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1616 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1617 else if (loop_bound_step < 0
1618 && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1619 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1620 else
1621 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1625 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1626 exits are resulted from short-circuit conditions that will generate an
1627 if_tmp. E.g.:
1629 if (foo() || global > 10)
1630 break;
1632 This will be translated into:
1634 BB3:
1635 loop header...
1636 BB4:
1637 if foo() goto BB6 else goto BB5
1638 BB5:
1639 if global > 10 goto BB6 else goto BB7
1640 BB6:
1641 goto BB7
1642 BB7:
1643 iftmp = (PHI 0(BB5), 1(BB6))
1644 if iftmp == 1 goto BB8 else goto BB3
1645 BB8:
1646 outside of the loop...
1648 The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1649 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1650 exits. This function takes BB7->BB8 as input, and finds out the extra loop
1651 exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1653 static void
1654 predict_extra_loop_exits (edge exit_edge)
1656 unsigned i;
1657 bool check_value_one;
1658 gimple *lhs_def_stmt;
1659 gphi *phi_stmt;
1660 tree cmp_rhs, cmp_lhs;
1661 gimple *last;
1662 gcond *cmp_stmt;
1664 last = last_stmt (exit_edge->src);
1665 if (!last)
1666 return;
1667 cmp_stmt = dyn_cast <gcond *> (last);
1668 if (!cmp_stmt)
1669 return;
1671 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1672 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1673 if (!TREE_CONSTANT (cmp_rhs)
1674 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1675 return;
1676 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1677 return;
1679 /* If check_value_one is true, only the phi_args with value '1' will lead
1680 to loop exit. Otherwise, only the phi_args with value '0' will lead to
1681 loop exit. */
1682 check_value_one = (((integer_onep (cmp_rhs))
1683 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1684 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1686 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1687 if (!lhs_def_stmt)
1688 return;
1690 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1691 if (!phi_stmt)
1692 return;
1694 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1696 edge e1;
1697 edge_iterator ei;
1698 tree val = gimple_phi_arg_def (phi_stmt, i);
1699 edge e = gimple_phi_arg_edge (phi_stmt, i);
1701 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1702 continue;
1703 if ((check_value_one ^ integer_onep (val)) == 1)
1704 continue;
1705 if (EDGE_COUNT (e->src->succs) != 1)
1707 predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1708 continue;
1711 FOR_EACH_EDGE (e1, ei, e->src->preds)
1712 predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1717 /* Predict edge probabilities by exploiting loop structure. */
1719 static void
1720 predict_loops (void)
1722 struct loop *loop;
1723 basic_block bb;
1724 hash_set <struct loop *> with_recursion(10);
1726 FOR_EACH_BB_FN (bb, cfun)
1728 gimple_stmt_iterator gsi;
1729 tree decl;
1731 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1732 if (is_gimple_call (gsi_stmt (gsi))
1733 && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1734 && recursive_call_p (current_function_decl, decl))
1736 loop = bb->loop_father;
1737 while (loop && !with_recursion.add (loop))
1738 loop = loop_outer (loop);
1742 /* Try to predict out blocks in a loop that are not part of a
1743 natural loop. */
1744 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1746 basic_block bb, *bbs;
1747 unsigned j, n_exits = 0;
1748 vec<edge> exits;
1749 struct tree_niter_desc niter_desc;
1750 edge ex;
1751 struct nb_iter_bound *nb_iter;
1752 enum tree_code loop_bound_code = ERROR_MARK;
1753 tree loop_bound_step = NULL;
1754 tree loop_bound_var = NULL;
1755 tree loop_iv_base = NULL;
1756 gcond *stmt = NULL;
1757 bool recursion = with_recursion.contains (loop);
1759 exits = get_loop_exit_edges (loop);
1760 FOR_EACH_VEC_ELT (exits, j, ex)
1761 if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE)))
1762 n_exits ++;
1763 if (!n_exits)
1765 exits.release ();
1766 continue;
1769 if (dump_file && (dump_flags & TDF_DETAILS))
1770 fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
1771 loop->num, recursion ? " (with recursion)":"", n_exits);
1772 if (dump_file && (dump_flags & TDF_DETAILS)
1773 && max_loop_iterations_int (loop) >= 0)
1775 fprintf (dump_file,
1776 "Loop %d iterates at most %i times.\n", loop->num,
1777 (int)max_loop_iterations_int (loop));
1779 if (dump_file && (dump_flags & TDF_DETAILS)
1780 && likely_max_loop_iterations_int (loop) >= 0)
1782 fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1783 loop->num, (int)likely_max_loop_iterations_int (loop));
1786 FOR_EACH_VEC_ELT (exits, j, ex)
1788 tree niter = NULL;
1789 HOST_WIDE_INT nitercst;
1790 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1791 int probability;
1792 enum br_predictor predictor;
1793 widest_int nit;
1795 if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1796 continue;
1797 /* Loop heuristics do not expect exit conditional to be inside
1798 inner loop. We predict from innermost to outermost loop. */
1799 if (predicted_by_loop_heuristics_p (ex->src))
1801 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, "Skipping exit %i->%i because "
1803 "it is already predicted.\n",
1804 ex->src->index, ex->dest->index);
1805 continue;
1807 predict_extra_loop_exits (ex);
1809 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1810 niter = niter_desc.niter;
1811 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1812 niter = loop_niter_by_eval (loop, ex);
1813 if (dump_file && (dump_flags & TDF_DETAILS)
1814 && TREE_CODE (niter) == INTEGER_CST)
1816 fprintf (dump_file, "Exit %i->%i %d iterates ",
1817 ex->src->index, ex->dest->index,
1818 loop->num);
1819 print_generic_expr (dump_file, niter, TDF_SLIM);
1820 fprintf (dump_file, " times.\n");
1823 if (TREE_CODE (niter) == INTEGER_CST)
1825 if (tree_fits_uhwi_p (niter)
1826 && max
1827 && compare_tree_int (niter, max - 1) == -1)
1828 nitercst = tree_to_uhwi (niter) + 1;
1829 else
1830 nitercst = max;
1831 predictor = PRED_LOOP_ITERATIONS;
1833 /* If we have just one exit and we can derive some information about
1834 the number of iterations of the loop from the statements inside
1835 the loop, use it to predict this exit. */
1836 else if (n_exits == 1
1837 && estimated_stmt_executions (loop, &nit))
1839 if (wi::gtu_p (nit, max))
1840 nitercst = max;
1841 else
1842 nitercst = nit.to_shwi ();
1843 predictor = PRED_LOOP_ITERATIONS_GUESSED;
1845 /* If we have likely upper bound, trust it for very small iteration
1846 counts. Such loops would otherwise get mispredicted by standard
1847 LOOP_EXIT heuristics. */
1848 else if (n_exits == 1
1849 && likely_max_stmt_executions (loop, &nit)
1850 && wi::ltu_p (nit,
1851 RDIV (REG_BR_PROB_BASE,
1852 REG_BR_PROB_BASE
1853 - predictor_info
1854 [recursion
1855 ? PRED_LOOP_EXIT_WITH_RECURSION
1856 : PRED_LOOP_EXIT].hitrate)))
1858 nitercst = nit.to_shwi ();
1859 predictor = PRED_LOOP_ITERATIONS_MAX;
1861 else
1863 if (dump_file && (dump_flags & TDF_DETAILS))
1864 fprintf (dump_file, "Nothing known about exit %i->%i.\n",
1865 ex->src->index, ex->dest->index);
1866 continue;
1869 if (dump_file && (dump_flags & TDF_DETAILS))
1870 fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
1871 (int)nitercst, predictor_info[predictor].name);
1872 /* If the prediction for number of iterations is zero, do not
1873 predict the exit edges. */
1874 if (nitercst == 0)
1875 continue;
1877 probability = RDIV (REG_BR_PROB_BASE, nitercst);
1878 predict_edge (ex, predictor, probability);
1880 exits.release ();
1882 /* Find information about loop bound variables. */
1883 for (nb_iter = loop->bounds; nb_iter;
1884 nb_iter = nb_iter->next)
1885 if (nb_iter->stmt
1886 && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1888 stmt = as_a <gcond *> (nb_iter->stmt);
1889 break;
1891 if (!stmt && last_stmt (loop->header)
1892 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1893 stmt = as_a <gcond *> (last_stmt (loop->header));
1894 if (stmt)
1895 is_comparison_with_loop_invariant_p (stmt, loop,
1896 &loop_bound_var,
1897 &loop_bound_code,
1898 &loop_bound_step,
1899 &loop_iv_base);
1901 bbs = get_loop_body (loop);
1903 for (j = 0; j < loop->num_nodes; j++)
1905 edge e;
1906 edge_iterator ei;
1908 bb = bbs[j];
1910 /* Bypass loop heuristics on continue statement. These
1911 statements construct loops via "non-loop" constructs
1912 in the source language and are better to be handled
1913 separately. */
1914 if (predicted_by_p (bb, PRED_CONTINUE))
1916 if (dump_file && (dump_flags & TDF_DETAILS))
1917 fprintf (dump_file, "BB %i predicted by continue.\n",
1918 bb->index);
1919 continue;
1922 /* If we already used more reliable loop exit predictors, do not
1923 bother with PRED_LOOP_EXIT. */
1924 if (!predicted_by_loop_heuristics_p (bb))
1926 /* For loop with many exits we don't want to predict all exits
1927 with the pretty large probability, because if all exits are
1928 considered in row, the loop would be predicted to iterate
1929 almost never. The code to divide probability by number of
1930 exits is very rough. It should compute the number of exits
1931 taken in each patch through function (not the overall number
1932 of exits that might be a lot higher for loops with wide switch
1933 statements in them) and compute n-th square root.
1935 We limit the minimal probability by 2% to avoid
1936 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1937 as this was causing regression in perl benchmark containing such
1938 a wide loop. */
1940 int probability = ((REG_BR_PROB_BASE
1941 - predictor_info
1942 [recursion
1943 ? PRED_LOOP_EXIT_WITH_RECURSION
1944 : PRED_LOOP_EXIT].hitrate)
1945 / n_exits);
1946 if (probability < HITRATE (2))
1947 probability = HITRATE (2);
1948 FOR_EACH_EDGE (e, ei, bb->succs)
1949 if (e->dest->index < NUM_FIXED_BLOCKS
1950 || !flow_bb_inside_loop_p (loop, e->dest))
1952 if (dump_file && (dump_flags & TDF_DETAILS))
1953 fprintf (dump_file,
1954 "Predicting exit %i->%i with prob %i.\n",
1955 e->src->index, e->dest->index, probability);
1956 predict_edge (e,
1957 recursion ? PRED_LOOP_EXIT_WITH_RECURSION
1958 : PRED_LOOP_EXIT, probability);
1961 if (loop_bound_var)
1962 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1963 loop_bound_code,
1964 tree_to_shwi (loop_bound_step));
1967 /* In the following code
1968 for (loop1)
1969 if (cond)
1970 for (loop2)
1971 body;
1972 guess that cond is unlikely. */
1973 if (loop_outer (loop)->num)
1975 basic_block bb = NULL;
1976 edge preheader_edge = loop_preheader_edge (loop);
1978 if (single_pred_p (preheader_edge->src)
1979 && single_succ_p (preheader_edge->src))
1980 preheader_edge = single_pred_edge (preheader_edge->src);
1982 gimple *stmt = last_stmt (preheader_edge->src);
1983 /* Pattern match fortran loop preheader:
1984 _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
1985 _17 = (logical(kind=4)) _16;
1986 if (_17 != 0)
1987 goto <bb 11>;
1988 else
1989 goto <bb 13>;
1991 Loop guard branch prediction says nothing about duplicated loop
1992 headers produced by fortran frontend and in this case we want
1993 to predict paths leading to this preheader. */
1995 if (stmt
1996 && gimple_code (stmt) == GIMPLE_COND
1997 && gimple_cond_code (stmt) == NE_EXPR
1998 && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
1999 && integer_zerop (gimple_cond_rhs (stmt)))
2001 gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2002 if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2003 && gimple_expr_code (call_stmt) == NOP_EXPR
2004 && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2005 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2006 if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2007 && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2008 && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2009 && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2010 == PRED_FORTRAN_LOOP_PREHEADER)
2011 bb = preheader_edge->src;
2013 if (!bb)
2015 if (!dominated_by_p (CDI_DOMINATORS,
2016 loop_outer (loop)->latch, loop->header))
2017 predict_paths_leading_to_edge (loop_preheader_edge (loop),
2018 recursion
2019 ? PRED_LOOP_GUARD_WITH_RECURSION
2020 : PRED_LOOP_GUARD,
2021 NOT_TAKEN,
2022 loop_outer (loop));
2024 else
2026 if (!dominated_by_p (CDI_DOMINATORS,
2027 loop_outer (loop)->latch, bb))
2028 predict_paths_leading_to (bb,
2029 recursion
2030 ? PRED_LOOP_GUARD_WITH_RECURSION
2031 : PRED_LOOP_GUARD,
2032 NOT_TAKEN,
2033 loop_outer (loop));
2037 /* Free basic blocks from get_loop_body. */
2038 free (bbs);
2042 /* Attempt to predict probabilities of BB outgoing edges using local
2043 properties. */
2044 static void
2045 bb_estimate_probability_locally (basic_block bb)
2047 rtx_insn *last_insn = BB_END (bb);
2048 rtx cond;
2050 if (! can_predict_insn_p (last_insn))
2051 return;
2052 cond = get_condition (last_insn, NULL, false, false);
2053 if (! cond)
2054 return;
2056 /* Try "pointer heuristic."
2057 A comparison ptr == 0 is predicted as false.
2058 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2059 if (COMPARISON_P (cond)
2060 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
2061 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
2063 if (GET_CODE (cond) == EQ)
2064 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
2065 else if (GET_CODE (cond) == NE)
2066 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
2068 else
2070 /* Try "opcode heuristic."
2071 EQ tests are usually false and NE tests are usually true. Also,
2072 most quantities are positive, so we can make the appropriate guesses
2073 about signed comparisons against zero. */
2074 switch (GET_CODE (cond))
2076 case CONST_INT:
2077 /* Unconditional branch. */
2078 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
2079 cond == const0_rtx ? NOT_TAKEN : TAKEN);
2080 break;
2082 case EQ:
2083 case UNEQ:
2084 /* Floating point comparisons appears to behave in a very
2085 unpredictable way because of special role of = tests in
2086 FP code. */
2087 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2089 /* Comparisons with 0 are often used for booleans and there is
2090 nothing useful to predict about them. */
2091 else if (XEXP (cond, 1) == const0_rtx
2092 || XEXP (cond, 0) == const0_rtx)
2094 else
2095 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
2096 break;
2098 case NE:
2099 case LTGT:
2100 /* Floating point comparisons appears to behave in a very
2101 unpredictable way because of special role of = tests in
2102 FP code. */
2103 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2105 /* Comparisons with 0 are often used for booleans and there is
2106 nothing useful to predict about them. */
2107 else if (XEXP (cond, 1) == const0_rtx
2108 || XEXP (cond, 0) == const0_rtx)
2110 else
2111 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
2112 break;
2114 case ORDERED:
2115 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
2116 break;
2118 case UNORDERED:
2119 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
2120 break;
2122 case LE:
2123 case LT:
2124 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2125 || XEXP (cond, 1) == constm1_rtx)
2126 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
2127 break;
2129 case GE:
2130 case GT:
2131 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2132 || XEXP (cond, 1) == constm1_rtx)
2133 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
2134 break;
2136 default:
2137 break;
2141 /* Set edge->probability for each successor edge of BB. */
2142 void
2143 guess_outgoing_edge_probabilities (basic_block bb)
2145 bb_estimate_probability_locally (bb);
2146 combine_predictions_for_insn (BB_END (bb), bb);
2149 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
2151 /* Helper function for expr_expected_value. */
2153 static tree
2154 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
2155 tree op1, bitmap visited, enum br_predictor *predictor)
2157 gimple *def;
2159 if (predictor)
2160 *predictor = PRED_UNCONDITIONAL;
2162 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2164 if (TREE_CONSTANT (op0))
2165 return op0;
2167 if (code == IMAGPART_EXPR)
2169 if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2171 def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2172 if (is_gimple_call (def)
2173 && gimple_call_internal_p (def)
2174 && (gimple_call_internal_fn (def)
2175 == IFN_ATOMIC_COMPARE_EXCHANGE))
2177 /* Assume that any given atomic operation has low contention,
2178 and thus the compare-and-swap operation succeeds. */
2179 if (predictor)
2180 *predictor = PRED_COMPARE_AND_SWAP;
2181 return build_one_cst (TREE_TYPE (op0));
2186 if (code != SSA_NAME)
2187 return NULL_TREE;
2189 def = SSA_NAME_DEF_STMT (op0);
2191 /* If we were already here, break the infinite cycle. */
2192 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
2193 return NULL;
2195 if (gimple_code (def) == GIMPLE_PHI)
2197 /* All the arguments of the PHI node must have the same constant
2198 length. */
2199 int i, n = gimple_phi_num_args (def);
2200 tree val = NULL, new_val;
2202 for (i = 0; i < n; i++)
2204 tree arg = PHI_ARG_DEF (def, i);
2205 enum br_predictor predictor2;
2207 /* If this PHI has itself as an argument, we cannot
2208 determine the string length of this argument. However,
2209 if we can find an expected constant value for the other
2210 PHI args then we can still be sure that this is
2211 likely a constant. So be optimistic and just
2212 continue with the next argument. */
2213 if (arg == PHI_RESULT (def))
2214 continue;
2216 new_val = expr_expected_value (arg, visited, &predictor2);
2218 /* It is difficult to combine value predictors. Simply assume
2219 that later predictor is weaker and take its prediction. */
2220 if (predictor && *predictor < predictor2)
2221 *predictor = predictor2;
2222 if (!new_val)
2223 return NULL;
2224 if (!val)
2225 val = new_val;
2226 else if (!operand_equal_p (val, new_val, false))
2227 return NULL;
2229 return val;
2231 if (is_gimple_assign (def))
2233 if (gimple_assign_lhs (def) != op0)
2234 return NULL;
2236 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2237 gimple_assign_rhs1 (def),
2238 gimple_assign_rhs_code (def),
2239 gimple_assign_rhs2 (def),
2240 visited, predictor);
2243 if (is_gimple_call (def))
2245 tree decl = gimple_call_fndecl (def);
2246 if (!decl)
2248 if (gimple_call_internal_p (def)
2249 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2251 gcc_assert (gimple_call_num_args (def) == 3);
2252 tree val = gimple_call_arg (def, 0);
2253 if (TREE_CONSTANT (val))
2254 return val;
2255 if (predictor)
2257 tree val2 = gimple_call_arg (def, 2);
2258 gcc_assert (TREE_CODE (val2) == INTEGER_CST
2259 && tree_fits_uhwi_p (val2)
2260 && tree_to_uhwi (val2) < END_PREDICTORS);
2261 *predictor = (enum br_predictor) tree_to_uhwi (val2);
2263 return gimple_call_arg (def, 1);
2265 return NULL;
2267 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2268 switch (DECL_FUNCTION_CODE (decl))
2270 case BUILT_IN_EXPECT:
2272 tree val;
2273 if (gimple_call_num_args (def) != 2)
2274 return NULL;
2275 val = gimple_call_arg (def, 0);
2276 if (TREE_CONSTANT (val))
2277 return val;
2278 if (predictor)
2279 *predictor = PRED_BUILTIN_EXPECT;
2280 return gimple_call_arg (def, 1);
2283 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2284 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2285 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2286 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2287 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2288 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2289 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2290 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2291 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2292 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2293 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2294 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2295 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2296 /* Assume that any given atomic operation has low contention,
2297 and thus the compare-and-swap operation succeeds. */
2298 if (predictor)
2299 *predictor = PRED_COMPARE_AND_SWAP;
2300 return boolean_true_node;
2301 default:
2302 break;
2306 return NULL;
2309 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2311 tree res;
2312 enum br_predictor predictor2;
2313 op0 = expr_expected_value (op0, visited, predictor);
2314 if (!op0)
2315 return NULL;
2316 op1 = expr_expected_value (op1, visited, &predictor2);
2317 if (predictor && *predictor < predictor2)
2318 *predictor = predictor2;
2319 if (!op1)
2320 return NULL;
2321 res = fold_build2 (code, type, op0, op1);
2322 if (TREE_CONSTANT (res))
2323 return res;
2324 return NULL;
2326 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2328 tree res;
2329 op0 = expr_expected_value (op0, visited, predictor);
2330 if (!op0)
2331 return NULL;
2332 res = fold_build1 (code, type, op0);
2333 if (TREE_CONSTANT (res))
2334 return res;
2335 return NULL;
2337 return NULL;
2340 /* Return constant EXPR will likely have at execution time, NULL if unknown.
2341 The function is used by builtin_expect branch predictor so the evidence
2342 must come from this construct and additional possible constant folding.
2344 We may want to implement more involved value guess (such as value range
2345 propagation based prediction), but such tricks shall go to new
2346 implementation. */
2348 static tree
2349 expr_expected_value (tree expr, bitmap visited,
2350 enum br_predictor *predictor)
2352 enum tree_code code;
2353 tree op0, op1;
2355 if (TREE_CONSTANT (expr))
2357 if (predictor)
2358 *predictor = PRED_UNCONDITIONAL;
2359 return expr;
2362 extract_ops_from_tree (expr, &code, &op0, &op1);
2363 return expr_expected_value_1 (TREE_TYPE (expr),
2364 op0, code, op1, visited, predictor);
2367 /* Predict using opcode of the last statement in basic block. */
2368 static void
2369 tree_predict_by_opcode (basic_block bb)
2371 gimple *stmt = last_stmt (bb);
2372 edge then_edge;
2373 tree op0, op1;
2374 tree type;
2375 tree val;
2376 enum tree_code cmp;
2377 edge_iterator ei;
2378 enum br_predictor predictor;
2380 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2381 return;
2382 FOR_EACH_EDGE (then_edge, ei, bb->succs)
2383 if (then_edge->flags & EDGE_TRUE_VALUE)
2384 break;
2385 op0 = gimple_cond_lhs (stmt);
2386 op1 = gimple_cond_rhs (stmt);
2387 cmp = gimple_cond_code (stmt);
2388 type = TREE_TYPE (op0);
2389 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (),
2390 &predictor);
2391 if (val && TREE_CODE (val) == INTEGER_CST)
2393 if (predictor == PRED_BUILTIN_EXPECT)
2395 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2397 gcc_assert (percent >= 0 && percent <= 100);
2398 if (integer_zerop (val))
2399 percent = 100 - percent;
2400 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2402 else
2403 predict_edge_def (then_edge, predictor,
2404 integer_zerop (val) ? NOT_TAKEN : TAKEN);
2406 /* Try "pointer heuristic."
2407 A comparison ptr == 0 is predicted as false.
2408 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2409 if (POINTER_TYPE_P (type))
2411 if (cmp == EQ_EXPR)
2412 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2413 else if (cmp == NE_EXPR)
2414 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2416 else
2418 /* Try "opcode heuristic."
2419 EQ tests are usually false and NE tests are usually true. Also,
2420 most quantities are positive, so we can make the appropriate guesses
2421 about signed comparisons against zero. */
2422 switch (cmp)
2424 case EQ_EXPR:
2425 case UNEQ_EXPR:
2426 /* Floating point comparisons appears to behave in a very
2427 unpredictable way because of special role of = tests in
2428 FP code. */
2429 if (FLOAT_TYPE_P (type))
2431 /* Comparisons with 0 are often used for booleans and there is
2432 nothing useful to predict about them. */
2433 else if (integer_zerop (op0) || integer_zerop (op1))
2435 else
2436 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2437 break;
2439 case NE_EXPR:
2440 case LTGT_EXPR:
2441 /* Floating point comparisons appears to behave in a very
2442 unpredictable way because of special role of = tests in
2443 FP code. */
2444 if (FLOAT_TYPE_P (type))
2446 /* Comparisons with 0 are often used for booleans and there is
2447 nothing useful to predict about them. */
2448 else if (integer_zerop (op0)
2449 || integer_zerop (op1))
2451 else
2452 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2453 break;
2455 case ORDERED_EXPR:
2456 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2457 break;
2459 case UNORDERED_EXPR:
2460 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2461 break;
2463 case LE_EXPR:
2464 case LT_EXPR:
2465 if (integer_zerop (op1)
2466 || integer_onep (op1)
2467 || integer_all_onesp (op1)
2468 || real_zerop (op1)
2469 || real_onep (op1)
2470 || real_minus_onep (op1))
2471 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2472 break;
2474 case GE_EXPR:
2475 case GT_EXPR:
2476 if (integer_zerop (op1)
2477 || integer_onep (op1)
2478 || integer_all_onesp (op1)
2479 || real_zerop (op1)
2480 || real_onep (op1)
2481 || real_minus_onep (op1))
2482 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2483 break;
2485 default:
2486 break;
2490 /* Returns TRUE if the STMT is exit(0) like statement. */
2492 static bool
2493 is_exit_with_zero_arg (const gimple *stmt)
2495 /* This is not exit, _exit or _Exit. */
2496 if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2497 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2498 && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2499 return false;
2501 /* Argument is an interger zero. */
2502 return integer_zerop (gimple_call_arg (stmt, 0));
2505 /* Try to guess whether the value of return means error code. */
2507 static enum br_predictor
2508 return_prediction (tree val, enum prediction *prediction)
2510 /* VOID. */
2511 if (!val)
2512 return PRED_NO_PREDICTION;
2513 /* Different heuristics for pointers and scalars. */
2514 if (POINTER_TYPE_P (TREE_TYPE (val)))
2516 /* NULL is usually not returned. */
2517 if (integer_zerop (val))
2519 *prediction = NOT_TAKEN;
2520 return PRED_NULL_RETURN;
2523 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2525 /* Negative return values are often used to indicate
2526 errors. */
2527 if (TREE_CODE (val) == INTEGER_CST
2528 && tree_int_cst_sgn (val) < 0)
2530 *prediction = NOT_TAKEN;
2531 return PRED_NEGATIVE_RETURN;
2533 /* Constant return values seems to be commonly taken.
2534 Zero/one often represent booleans so exclude them from the
2535 heuristics. */
2536 if (TREE_CONSTANT (val)
2537 && (!integer_zerop (val) && !integer_onep (val)))
2539 *prediction = NOT_TAKEN;
2540 return PRED_CONST_RETURN;
2543 return PRED_NO_PREDICTION;
2546 /* Find the basic block with return expression and look up for possible
2547 return value trying to apply RETURN_PREDICTION heuristics. */
2548 static void
2549 apply_return_prediction (void)
2551 greturn *return_stmt = NULL;
2552 tree return_val;
2553 edge e;
2554 gphi *phi;
2555 int phi_num_args, i;
2556 enum br_predictor pred;
2557 enum prediction direction;
2558 edge_iterator ei;
2560 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2562 gimple *last = last_stmt (e->src);
2563 if (last
2564 && gimple_code (last) == GIMPLE_RETURN)
2566 return_stmt = as_a <greturn *> (last);
2567 break;
2570 if (!e)
2571 return;
2572 return_val = gimple_return_retval (return_stmt);
2573 if (!return_val)
2574 return;
2575 if (TREE_CODE (return_val) != SSA_NAME
2576 || !SSA_NAME_DEF_STMT (return_val)
2577 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2578 return;
2579 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2580 phi_num_args = gimple_phi_num_args (phi);
2581 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2583 /* Avoid the degenerate case where all return values form the function
2584 belongs to same category (ie they are all positive constants)
2585 so we can hardly say something about them. */
2586 for (i = 1; i < phi_num_args; i++)
2587 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2588 break;
2589 if (i != phi_num_args)
2590 for (i = 0; i < phi_num_args; i++)
2592 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2593 if (pred != PRED_NO_PREDICTION)
2594 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2595 direction);
2599 /* Look for basic block that contains unlikely to happen events
2600 (such as noreturn calls) and mark all paths leading to execution
2601 of this basic blocks as unlikely. */
2603 static void
2604 tree_bb_level_predictions (void)
2606 basic_block bb;
2607 bool has_return_edges = false;
2608 edge e;
2609 edge_iterator ei;
2611 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2612 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2614 has_return_edges = true;
2615 break;
2618 apply_return_prediction ();
2620 FOR_EACH_BB_FN (bb, cfun)
2622 gimple_stmt_iterator gsi;
2624 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2626 gimple *stmt = gsi_stmt (gsi);
2627 tree decl;
2629 if (is_gimple_call (stmt))
2631 if (gimple_call_noreturn_p (stmt)
2632 && has_return_edges
2633 && !is_exit_with_zero_arg (stmt))
2634 predict_paths_leading_to (bb, PRED_NORETURN,
2635 NOT_TAKEN);
2636 decl = gimple_call_fndecl (stmt);
2637 if (decl
2638 && lookup_attribute ("cold",
2639 DECL_ATTRIBUTES (decl)))
2640 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2641 NOT_TAKEN);
2642 if (decl && recursive_call_p (current_function_decl, decl))
2643 predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
2644 NOT_TAKEN);
2646 else if (gimple_code (stmt) == GIMPLE_PREDICT)
2648 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2649 gimple_predict_outcome (stmt));
2650 /* Keep GIMPLE_PREDICT around so early inlining will propagate
2651 hints to callers. */
2657 /* Callback for hash_map::traverse, asserts that the pointer map is
2658 empty. */
2660 bool
2661 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2662 void *)
2664 gcc_assert (!value);
2665 return false;
2668 /* Predict branch probabilities and estimate profile for basic block BB.
2669 When LOCAL_ONLY is set do not use any global properties of CFG. */
2671 static void
2672 tree_estimate_probability_bb (basic_block bb, bool local_only)
2674 edge e;
2675 edge_iterator ei;
2676 gimple *last;
2678 FOR_EACH_EDGE (e, ei, bb->succs)
2680 /* Predict edges to user labels with attributes. */
2681 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2683 gimple_stmt_iterator gi;
2684 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2686 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2687 tree decl;
2689 if (!label_stmt)
2690 break;
2691 decl = gimple_label_label (label_stmt);
2692 if (DECL_ARTIFICIAL (decl))
2693 continue;
2695 /* Finally, we have a user-defined label. */
2696 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2697 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2698 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2699 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2703 /* Predict early returns to be probable, as we've already taken
2704 care for error returns and other cases are often used for
2705 fast paths through function.
2707 Since we've already removed the return statements, we are
2708 looking for CFG like:
2710 if (conditional)
2713 goto return_block
2715 some other blocks
2716 return_block:
2717 return_stmt. */
2718 if (e->dest != bb->next_bb
2719 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2720 && single_succ_p (e->dest)
2721 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2722 && (last = last_stmt (e->dest)) != NULL
2723 && gimple_code (last) == GIMPLE_RETURN)
2725 edge e1;
2726 edge_iterator ei1;
2728 if (single_succ_p (bb))
2730 FOR_EACH_EDGE (e1, ei1, bb->preds)
2731 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2732 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2733 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2734 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2736 else
2737 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2738 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2739 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2740 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2743 /* Look for block we are guarding (ie we dominate it,
2744 but it doesn't postdominate us). */
2745 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2746 && !local_only
2747 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2748 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2750 gimple_stmt_iterator bi;
2752 /* The call heuristic claims that a guarded function call
2753 is improbable. This is because such calls are often used
2754 to signal exceptional situations such as printing error
2755 messages. */
2756 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2757 gsi_next (&bi))
2759 gimple *stmt = gsi_stmt (bi);
2760 if (is_gimple_call (stmt)
2761 && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))
2762 /* Constant and pure calls are hardly used to signalize
2763 something exceptional. */
2764 && gimple_has_side_effects (stmt))
2766 if (gimple_call_fndecl (stmt))
2767 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2768 else if (virtual_method_call_p (gimple_call_fn (stmt)))
2769 predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
2770 else
2771 predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
2772 break;
2777 tree_predict_by_opcode (bb);
2780 /* Predict branch probabilities and estimate profile of the tree CFG.
2781 This function can be called from the loop optimizers to recompute
2782 the profile information.
2783 If DRY_RUN is set, do not modify CFG and only produce dump files. */
2785 void
2786 tree_estimate_probability (bool dry_run)
2788 basic_block bb;
2790 add_noreturn_fake_exit_edges ();
2791 connect_infinite_loops_to_exit ();
2792 /* We use loop_niter_by_eval, which requires that the loops have
2793 preheaders. */
2794 create_preheaders (CP_SIMPLE_PREHEADERS);
2795 calculate_dominance_info (CDI_POST_DOMINATORS);
2797 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2798 tree_bb_level_predictions ();
2799 record_loop_exits ();
2801 if (number_of_loops (cfun) > 1)
2802 predict_loops ();
2804 FOR_EACH_BB_FN (bb, cfun)
2805 tree_estimate_probability_bb (bb, false);
2807 FOR_EACH_BB_FN (bb, cfun)
2808 combine_predictions_for_bb (bb, dry_run);
2810 if (flag_checking)
2811 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2813 delete bb_predictions;
2814 bb_predictions = NULL;
2816 if (!dry_run)
2817 estimate_bb_frequencies (false);
2818 free_dominance_info (CDI_POST_DOMINATORS);
2819 remove_fake_exit_edges ();
2822 /* Set edge->probability for each successor edge of BB. */
2823 void
2824 tree_guess_outgoing_edge_probabilities (basic_block bb)
2826 bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2827 tree_estimate_probability_bb (bb, true);
2828 combine_predictions_for_bb (bb, false);
2829 if (flag_checking)
2830 bb_predictions->traverse<void *, assert_is_empty> (NULL);
2831 delete bb_predictions;
2832 bb_predictions = NULL;
2835 /* Predict edges to successors of CUR whose sources are not postdominated by
2836 BB by PRED and recurse to all postdominators. */
2838 static void
2839 predict_paths_for_bb (basic_block cur, basic_block bb,
2840 enum br_predictor pred,
2841 enum prediction taken,
2842 bitmap visited, struct loop *in_loop = NULL)
2844 edge e;
2845 edge_iterator ei;
2846 basic_block son;
2848 /* If we exited the loop or CUR is unconditional in the loop, there is
2849 nothing to do. */
2850 if (in_loop
2851 && (!flow_bb_inside_loop_p (in_loop, cur)
2852 || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
2853 return;
2855 /* We are looking for all edges forming edge cut induced by
2856 set of all blocks postdominated by BB. */
2857 FOR_EACH_EDGE (e, ei, cur->preds)
2858 if (e->src->index >= NUM_FIXED_BLOCKS
2859 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2861 edge e2;
2862 edge_iterator ei2;
2863 bool found = false;
2865 /* Ignore fake edges and eh, we predict them as not taken anyway. */
2866 if (e->flags & (EDGE_EH | EDGE_FAKE))
2867 continue;
2868 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2870 /* See if there is an edge from e->src that is not abnormal
2871 and does not lead to BB and does not exit the loop. */
2872 FOR_EACH_EDGE (e2, ei2, e->src->succs)
2873 if (e2 != e
2874 && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2875 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
2876 && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
2878 found = true;
2879 break;
2882 /* If there is non-abnormal path leaving e->src, predict edge
2883 using predictor. Otherwise we need to look for paths
2884 leading to e->src.
2886 The second may lead to infinite loop in the case we are predicitng
2887 regions that are only reachable by abnormal edges. We simply
2888 prevent visiting given BB twice. */
2889 if (found)
2891 if (!edge_predicted_by_p (e, pred, taken))
2892 predict_edge_def (e, pred, taken);
2894 else if (bitmap_set_bit (visited, e->src->index))
2895 predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
2897 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2898 son;
2899 son = next_dom_son (CDI_POST_DOMINATORS, son))
2900 predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
2903 /* Sets branch probabilities according to PREDiction and
2904 FLAGS. */
2906 static void
2907 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2908 enum prediction taken, struct loop *in_loop)
2910 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
2913 /* Like predict_paths_leading_to but take edge instead of basic block. */
2915 static void
2916 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2917 enum prediction taken, struct loop *in_loop)
2919 bool has_nonloop_edge = false;
2920 edge_iterator ei;
2921 edge e2;
2923 basic_block bb = e->src;
2924 FOR_EACH_EDGE (e2, ei, bb->succs)
2925 if (e2->dest != e->src && e2->dest != e->dest
2926 && !(e->flags & (EDGE_EH | EDGE_FAKE))
2927 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2929 has_nonloop_edge = true;
2930 break;
2932 if (!has_nonloop_edge)
2934 predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
2936 else
2937 predict_edge_def (e, pred, taken);
2940 /* This is used to carry information about basic blocks. It is
2941 attached to the AUX field of the standard CFG block. */
2943 struct block_info
2945 /* Estimated frequency of execution of basic_block. */
2946 sreal frequency;
2948 /* To keep queue of basic blocks to process. */
2949 basic_block next;
2951 /* Number of predecessors we need to visit first. */
2952 int npredecessors;
2955 /* Similar information for edges. */
2956 struct edge_prob_info
2958 /* In case edge is a loopback edge, the probability edge will be reached
2959 in case header is. Estimated number of iterations of the loop can be
2960 then computed as 1 / (1 - back_edge_prob). */
2961 sreal back_edge_prob;
2962 /* True if the edge is a loopback edge in the natural loop. */
2963 unsigned int back_edge:1;
2966 #define BLOCK_INFO(B) ((block_info *) (B)->aux)
2967 #undef EDGE_INFO
2968 #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
2970 /* Helper function for estimate_bb_frequencies.
2971 Propagate the frequencies in blocks marked in
2972 TOVISIT, starting in HEAD. */
2974 static void
2975 propagate_freq (basic_block head, bitmap tovisit)
2977 basic_block bb;
2978 basic_block last;
2979 unsigned i;
2980 edge e;
2981 basic_block nextbb;
2982 bitmap_iterator bi;
2984 /* For each basic block we need to visit count number of his predecessors
2985 we need to visit first. */
2986 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2988 edge_iterator ei;
2989 int count = 0;
2991 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2993 FOR_EACH_EDGE (e, ei, bb->preds)
2995 bool visit = bitmap_bit_p (tovisit, e->src->index);
2997 if (visit && !(e->flags & EDGE_DFS_BACK))
2998 count++;
2999 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
3000 fprintf (dump_file,
3001 "Irreducible region hit, ignoring edge to %i->%i\n",
3002 e->src->index, bb->index);
3004 BLOCK_INFO (bb)->npredecessors = count;
3005 /* When function never returns, we will never process exit block. */
3006 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3008 bb->count = profile_count::zero ();
3009 bb->frequency = 0;
3013 BLOCK_INFO (head)->frequency = 1;
3014 last = head;
3015 for (bb = head; bb; bb = nextbb)
3017 edge_iterator ei;
3018 sreal cyclic_probability = 0;
3019 sreal frequency = 0;
3021 nextbb = BLOCK_INFO (bb)->next;
3022 BLOCK_INFO (bb)->next = NULL;
3024 /* Compute frequency of basic block. */
3025 if (bb != head)
3027 if (flag_checking)
3028 FOR_EACH_EDGE (e, ei, bb->preds)
3029 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
3030 || (e->flags & EDGE_DFS_BACK));
3032 FOR_EACH_EDGE (e, ei, bb->preds)
3033 if (EDGE_INFO (e)->back_edge)
3035 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
3037 else if (!(e->flags & EDGE_DFS_BACK))
3039 /* frequency += (e->probability
3040 * BLOCK_INFO (e->src)->frequency /
3041 REG_BR_PROB_BASE); */
3043 sreal tmp = e->probability;
3044 tmp *= BLOCK_INFO (e->src)->frequency;
3045 tmp *= real_inv_br_prob_base;
3046 frequency += tmp;
3049 if (cyclic_probability == 0)
3051 BLOCK_INFO (bb)->frequency = frequency;
3053 else
3055 if (cyclic_probability > real_almost_one)
3056 cyclic_probability = real_almost_one;
3058 /* BLOCK_INFO (bb)->frequency = frequency
3059 / (1 - cyclic_probability) */
3061 cyclic_probability = sreal (1) - cyclic_probability;
3062 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
3066 bitmap_clear_bit (tovisit, bb->index);
3068 e = find_edge (bb, head);
3069 if (e)
3071 /* EDGE_INFO (e)->back_edge_prob
3072 = ((e->probability * BLOCK_INFO (bb)->frequency)
3073 / REG_BR_PROB_BASE); */
3075 sreal tmp = e->probability;
3076 tmp *= BLOCK_INFO (bb)->frequency;
3077 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
3080 /* Propagate to successor blocks. */
3081 FOR_EACH_EDGE (e, ei, bb->succs)
3082 if (!(e->flags & EDGE_DFS_BACK)
3083 && BLOCK_INFO (e->dest)->npredecessors)
3085 BLOCK_INFO (e->dest)->npredecessors--;
3086 if (!BLOCK_INFO (e->dest)->npredecessors)
3088 if (!nextbb)
3089 nextbb = e->dest;
3090 else
3091 BLOCK_INFO (last)->next = e->dest;
3093 last = e->dest;
3099 /* Estimate frequencies in loops at same nest level. */
3101 static void
3102 estimate_loops_at_level (struct loop *first_loop)
3104 struct loop *loop;
3106 for (loop = first_loop; loop; loop = loop->next)
3108 edge e;
3109 basic_block *bbs;
3110 unsigned i;
3111 auto_bitmap tovisit;
3113 estimate_loops_at_level (loop->inner);
3115 /* Find current loop back edge and mark it. */
3116 e = loop_latch_edge (loop);
3117 EDGE_INFO (e)->back_edge = 1;
3119 bbs = get_loop_body (loop);
3120 for (i = 0; i < loop->num_nodes; i++)
3121 bitmap_set_bit (tovisit, bbs[i]->index);
3122 free (bbs);
3123 propagate_freq (loop->header, tovisit);
3127 /* Propagates frequencies through structure of loops. */
3129 static void
3130 estimate_loops (void)
3132 auto_bitmap tovisit;
3133 basic_block bb;
3135 /* Start by estimating the frequencies in the loops. */
3136 if (number_of_loops (cfun) > 1)
3137 estimate_loops_at_level (current_loops->tree_root->inner);
3139 /* Now propagate the frequencies through all the blocks. */
3140 FOR_ALL_BB_FN (bb, cfun)
3142 bitmap_set_bit (tovisit, bb->index);
3144 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
3147 /* Drop the profile for NODE to guessed, and update its frequency based on
3148 whether it is expected to be hot given the CALL_COUNT. */
3150 static void
3151 drop_profile (struct cgraph_node *node, profile_count call_count)
3153 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3154 /* In the case where this was called by another function with a
3155 dropped profile, call_count will be 0. Since there are no
3156 non-zero call counts to this function, we don't know for sure
3157 whether it is hot, and therefore it will be marked normal below. */
3158 bool hot = maybe_hot_count_p (NULL, call_count);
3160 if (dump_file)
3161 fprintf (dump_file,
3162 "Dropping 0 profile for %s. %s based on calls.\n",
3163 node->dump_name (),
3164 hot ? "Function is hot" : "Function is normal");
3165 /* We only expect to miss profiles for functions that are reached
3166 via non-zero call edges in cases where the function may have
3167 been linked from another module or library (COMDATs and extern
3168 templates). See the comments below for handle_missing_profiles.
3169 Also, only warn in cases where the missing counts exceed the
3170 number of training runs. In certain cases with an execv followed
3171 by a no-return call the profile for the no-return call is not
3172 dumped and there can be a mismatch. */
3173 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3174 && call_count > profile_info->runs)
3176 if (flag_profile_correction)
3178 if (dump_file)
3179 fprintf (dump_file,
3180 "Missing counts for called function %s\n",
3181 node->dump_name ());
3183 else
3184 warning (0, "Missing counts for called function %s",
3185 node->dump_name ());
3188 profile_status_for_fn (fn)
3189 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3190 node->frequency
3191 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3194 /* In the case of COMDAT routines, multiple object files will contain the same
3195 function and the linker will select one for the binary. In that case
3196 all the other copies from the profile instrument binary will be missing
3197 profile counts. Look for cases where this happened, due to non-zero
3198 call counts going to 0-count functions, and drop the profile to guessed
3199 so that we can use the estimated probabilities and avoid optimizing only
3200 for size.
3202 The other case where the profile may be missing is when the routine
3203 is not going to be emitted to the object file, e.g. for "extern template"
3204 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3205 all other cases of non-zero calls to 0-count functions. */
3207 void
3208 handle_missing_profiles (void)
3210 struct cgraph_node *node;
3211 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
3212 auto_vec<struct cgraph_node *, 64> worklist;
3214 /* See if 0 count function has non-0 count callers. In this case we
3215 lost some profile. Drop its function profile to PROFILE_GUESSED. */
3216 FOR_EACH_DEFINED_FUNCTION (node)
3218 struct cgraph_edge *e;
3219 profile_count call_count = profile_count::zero ();
3220 gcov_type max_tp_first_run = 0;
3221 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3223 if (!(node->count == profile_count::zero ()))
3224 continue;
3225 for (e = node->callers; e; e = e->next_caller)
3227 if (e->count.initialized_p () > 0)
3229 call_count = call_count + e->count;
3231 if (e->caller->tp_first_run > max_tp_first_run)
3232 max_tp_first_run = e->caller->tp_first_run;
3236 /* If time profile is missing, let assign the maximum that comes from
3237 caller functions. */
3238 if (!node->tp_first_run && max_tp_first_run)
3239 node->tp_first_run = max_tp_first_run + 1;
3241 if (call_count > 0
3242 && fn && fn->cfg
3243 && (call_count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs))
3245 drop_profile (node, call_count);
3246 worklist.safe_push (node);
3250 /* Propagate the profile dropping to other 0-count COMDATs that are
3251 potentially called by COMDATs we already dropped the profile on. */
3252 while (worklist.length () > 0)
3254 struct cgraph_edge *e;
3256 node = worklist.pop ();
3257 for (e = node->callees; e; e = e->next_caller)
3259 struct cgraph_node *callee = e->callee;
3260 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3262 if (callee->count > 0)
3263 continue;
3264 if (DECL_COMDAT (callee->decl) && fn && fn->cfg
3265 && profile_status_for_fn (fn) == PROFILE_READ)
3267 drop_profile (node, profile_count::zero ());
3268 worklist.safe_push (callee);
3274 /* Convert counts measured by profile driven feedback to frequencies.
3275 Return nonzero iff there was any nonzero execution count. */
3277 bool
3278 counts_to_freqs (void)
3280 gcov_type count_max;
3281 profile_count true_count_max = profile_count::zero ();
3282 basic_block bb;
3284 /* Don't overwrite the estimated frequencies when the profile for
3285 the function is missing. We may drop this function PROFILE_GUESSED
3286 later in drop_profile (). */
3287 if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
3288 || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
3289 return 0;
3291 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3292 if (bb->count > true_count_max)
3293 true_count_max = bb->count;
3295 count_max = MAX (true_count_max.to_gcov_type (), 1);
3297 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3298 if (bb->count.initialized_p ())
3299 bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max);
3301 return !(true_count_max == profile_count::zero ());
3304 /* Return true if function is likely to be expensive, so there is no point to
3305 optimize performance of prologue, epilogue or do inlining at the expense
3306 of code size growth. THRESHOLD is the limit of number of instructions
3307 function can execute at average to be still considered not expensive. */
3309 bool
3310 expensive_function_p (int threshold)
3312 unsigned int sum = 0;
3313 basic_block bb;
3314 unsigned int limit;
3316 /* We can not compute accurately for large thresholds due to scaled
3317 frequencies. */
3318 gcc_assert (threshold <= BB_FREQ_MAX);
3320 /* Frequencies are out of range. This either means that function contains
3321 internal loop executing more than BB_FREQ_MAX times or profile feedback
3322 is available and function has not been executed at all. */
3323 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
3324 return true;
3326 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
3327 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
3328 FOR_EACH_BB_FN (bb, cfun)
3330 rtx_insn *insn;
3332 FOR_BB_INSNS (bb, insn)
3333 if (active_insn_p (insn))
3335 sum += bb->frequency;
3336 if (sum > limit)
3337 return true;
3341 return false;
3344 /* Estimate and propagate basic block frequencies using the given branch
3345 probabilities. If FORCE is true, the frequencies are used to estimate
3346 the counts even when there are already non-zero profile counts. */
3348 void
3349 estimate_bb_frequencies (bool force)
3351 basic_block bb;
3352 sreal freq_max;
3354 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
3356 static int real_values_initialized = 0;
3358 if (!real_values_initialized)
3360 real_values_initialized = 1;
3361 real_br_prob_base = REG_BR_PROB_BASE;
3362 real_bb_freq_max = BB_FREQ_MAX;
3363 real_one_half = sreal (1, -1);
3364 real_inv_br_prob_base = sreal (1) / real_br_prob_base;
3365 real_almost_one = sreal (1) - real_inv_br_prob_base;
3368 mark_dfs_back_edges ();
3370 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3371 REG_BR_PROB_BASE;
3373 /* Set up block info for each basic block. */
3374 alloc_aux_for_blocks (sizeof (block_info));
3375 alloc_aux_for_edges (sizeof (edge_prob_info));
3376 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3378 edge e;
3379 edge_iterator ei;
3381 FOR_EACH_EDGE (e, ei, bb->succs)
3383 EDGE_INFO (e)->back_edge_prob = e->probability;
3384 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
3388 /* First compute frequencies locally for each loop from innermost
3389 to outermost to examine frequencies for back edges. */
3390 estimate_loops ();
3392 freq_max = 0;
3393 FOR_EACH_BB_FN (bb, cfun)
3394 if (freq_max < BLOCK_INFO (bb)->frequency)
3395 freq_max = BLOCK_INFO (bb)->frequency;
3397 freq_max = real_bb_freq_max / freq_max;
3398 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3400 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
3401 bb->frequency = tmp.to_int ();
3404 free_aux_for_blocks ();
3405 free_aux_for_edges ();
3407 compute_function_frequency ();
3410 /* Decide whether function is hot, cold or unlikely executed. */
3411 void
3412 compute_function_frequency (void)
3414 basic_block bb;
3415 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3417 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3418 || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3419 node->only_called_at_startup = true;
3420 if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3421 node->only_called_at_exit = true;
3423 if (profile_status_for_fn (cfun) != PROFILE_READ)
3425 int flags = flags_from_decl_or_type (current_function_decl);
3426 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3427 != NULL)
3428 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3429 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3430 != NULL)
3431 node->frequency = NODE_FREQUENCY_HOT;
3432 else if (flags & ECF_NORETURN)
3433 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3434 else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3435 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3436 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3437 || DECL_STATIC_DESTRUCTOR (current_function_decl))
3438 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3439 return;
3442 /* Only first time try to drop function into unlikely executed.
3443 After inlining the roundoff errors may confuse us.
3444 Ipa-profile pass will drop functions only called from unlikely
3445 functions to unlikely and that is most of what we care about. */
3446 if (!cfun->after_inlining)
3447 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3448 FOR_EACH_BB_FN (bb, cfun)
3450 if (maybe_hot_bb_p (cfun, bb))
3452 node->frequency = NODE_FREQUENCY_HOT;
3453 return;
3455 if (!probably_never_executed_bb_p (cfun, bb))
3456 node->frequency = NODE_FREQUENCY_NORMAL;
3460 /* Build PREDICT_EXPR. */
3461 tree
3462 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3464 tree t = build1 (PREDICT_EXPR, void_type_node,
3465 build_int_cst (integer_type_node, predictor));
3466 SET_PREDICT_EXPR_OUTCOME (t, taken);
3467 return t;
3470 const char *
3471 predictor_name (enum br_predictor predictor)
3473 return predictor_info[predictor].name;
3476 /* Predict branch probabilities and estimate profile of the tree CFG. */
3478 namespace {
3480 const pass_data pass_data_profile =
3482 GIMPLE_PASS, /* type */
3483 "profile_estimate", /* name */
3484 OPTGROUP_NONE, /* optinfo_flags */
3485 TV_BRANCH_PROB, /* tv_id */
3486 PROP_cfg, /* properties_required */
3487 0, /* properties_provided */
3488 0, /* properties_destroyed */
3489 0, /* todo_flags_start */
3490 0, /* todo_flags_finish */
3493 class pass_profile : public gimple_opt_pass
3495 public:
3496 pass_profile (gcc::context *ctxt)
3497 : gimple_opt_pass (pass_data_profile, ctxt)
3500 /* opt_pass methods: */
3501 virtual bool gate (function *) { return flag_guess_branch_prob; }
3502 virtual unsigned int execute (function *);
3504 }; // class pass_profile
3506 unsigned int
3507 pass_profile::execute (function *fun)
3509 unsigned nb_loops;
3511 if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3512 return 0;
3514 loop_optimizer_init (LOOPS_NORMAL);
3515 if (dump_file && (dump_flags & TDF_DETAILS))
3516 flow_loops_dump (dump_file, NULL, 0);
3518 mark_irreducible_loops ();
3520 nb_loops = number_of_loops (fun);
3521 if (nb_loops > 1)
3522 scev_initialize ();
3524 tree_estimate_probability (false);
3526 if (nb_loops > 1)
3527 scev_finalize ();
3529 loop_optimizer_finalize ();
3530 if (dump_file && (dump_flags & TDF_DETAILS))
3531 gimple_dump_cfg (dump_file, dump_flags);
3532 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3533 profile_status_for_fn (fun) = PROFILE_GUESSED;
3534 if (dump_file && (dump_flags & TDF_DETAILS))
3536 struct loop *loop;
3537 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
3538 if (loop->header->frequency)
3539 fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
3540 loop->num,
3541 (int)expected_loop_iterations_unbounded (loop));
3543 return 0;
3546 } // anon namespace
3548 gimple_opt_pass *
3549 make_pass_profile (gcc::context *ctxt)
3551 return new pass_profile (ctxt);
3554 namespace {
3556 const pass_data pass_data_strip_predict_hints =
3558 GIMPLE_PASS, /* type */
3559 "*strip_predict_hints", /* name */
3560 OPTGROUP_NONE, /* optinfo_flags */
3561 TV_BRANCH_PROB, /* tv_id */
3562 PROP_cfg, /* properties_required */
3563 0, /* properties_provided */
3564 0, /* properties_destroyed */
3565 0, /* todo_flags_start */
3566 0, /* todo_flags_finish */
3569 class pass_strip_predict_hints : public gimple_opt_pass
3571 public:
3572 pass_strip_predict_hints (gcc::context *ctxt)
3573 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3576 /* opt_pass methods: */
3577 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3578 virtual unsigned int execute (function *);
3580 }; // class pass_strip_predict_hints
3582 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3583 we no longer need. */
3584 unsigned int
3585 pass_strip_predict_hints::execute (function *fun)
3587 basic_block bb;
3588 gimple *ass_stmt;
3589 tree var;
3590 bool changed = false;
3592 FOR_EACH_BB_FN (bb, fun)
3594 gimple_stmt_iterator bi;
3595 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3597 gimple *stmt = gsi_stmt (bi);
3599 if (gimple_code (stmt) == GIMPLE_PREDICT)
3601 gsi_remove (&bi, true);
3602 changed = true;
3603 continue;
3605 else if (is_gimple_call (stmt))
3607 tree fndecl = gimple_call_fndecl (stmt);
3609 if ((fndecl
3610 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3611 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3612 && gimple_call_num_args (stmt) == 2)
3613 || (gimple_call_internal_p (stmt)
3614 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3616 var = gimple_call_lhs (stmt);
3617 changed = true;
3618 if (var)
3620 ass_stmt
3621 = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3622 gsi_replace (&bi, ass_stmt, true);
3624 else
3626 gsi_remove (&bi, true);
3627 continue;
3631 gsi_next (&bi);
3634 return changed ? TODO_cleanup_cfg : 0;
3637 } // anon namespace
3639 gimple_opt_pass *
3640 make_pass_strip_predict_hints (gcc::context *ctxt)
3642 return new pass_strip_predict_hints (ctxt);
3645 /* Rebuild function frequencies. Passes are in general expected to
3646 maintain profile by hand, however in some cases this is not possible:
3647 for example when inlining several functions with loops freuqencies might run
3648 out of scale and thus needs to be recomputed. */
3650 void
3651 rebuild_frequencies (void)
3653 timevar_push (TV_REBUILD_FREQUENCIES);
3655 /* When the max bb count in the function is small, there is a higher
3656 chance that there were truncation errors in the integer scaling
3657 of counts by inlining and other optimizations. This could lead
3658 to incorrect classification of code as being cold when it isn't.
3659 In that case, force the estimation of bb counts/frequencies from the
3660 branch probabilities, rather than computing frequencies from counts,
3661 which may also lead to frequencies incorrectly reduced to 0. There
3662 is less precision in the probabilities, so we only do this for small
3663 max counts. */
3664 profile_count count_max = profile_count::zero ();
3665 basic_block bb;
3666 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3667 if (bb->count > count_max)
3668 count_max = bb->count;
3670 if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3671 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3672 && count_max < REG_BR_PROB_BASE / 10))
3674 loop_optimizer_init (0);
3675 add_noreturn_fake_exit_edges ();
3676 mark_irreducible_loops ();
3677 connect_infinite_loops_to_exit ();
3678 estimate_bb_frequencies (true);
3679 remove_fake_exit_edges ();
3680 loop_optimizer_finalize ();
3682 else if (profile_status_for_fn (cfun) == PROFILE_READ)
3683 counts_to_freqs ();
3684 else
3685 gcc_unreachable ();
3686 timevar_pop (TV_REBUILD_FREQUENCIES);
3689 /* Perform a dry run of the branch prediction pass and report comparsion of
3690 the predicted and real profile into the dump file. */
3692 void
3693 report_predictor_hitrates (void)
3695 unsigned nb_loops;
3697 loop_optimizer_init (LOOPS_NORMAL);
3698 if (dump_file && (dump_flags & TDF_DETAILS))
3699 flow_loops_dump (dump_file, NULL, 0);
3701 mark_irreducible_loops ();
3703 nb_loops = number_of_loops (cfun);
3704 if (nb_loops > 1)
3705 scev_initialize ();
3707 tree_estimate_probability (true);
3709 if (nb_loops > 1)
3710 scev_finalize ();
3712 loop_optimizer_finalize ();
3715 /* Force edge E to be cold.
3716 If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3717 keep low probability to represent possible error in a guess. This is used
3718 i.e. in case we predict loop to likely iterate given number of times but
3719 we are not 100% sure.
3721 This function locally updates profile without attempt to keep global
3722 consistency which can not be reached in full generality without full profile
3723 rebuild from probabilities alone. Doing so is not necessarily a good idea
3724 because frequencies and counts may be more realistic then probabilities.
3726 In some cases (such as for elimination of early exits during full loop
3727 unrolling) the caller can ensure that profile will get consistent
3728 afterwards. */
3730 void
3731 force_edge_cold (edge e, bool impossible)
3733 profile_count count_sum = profile_count::zero ();
3734 int prob_sum = 0;
3735 edge_iterator ei;
3736 edge e2;
3737 profile_count old_count = e->count;
3738 int old_probability = e->probability;
3739 int prob_scale = REG_BR_PROB_BASE;
3741 /* If edge is already improbably or cold, just return. */
3742 if (e->probability <= (impossible ? PROB_VERY_UNLIKELY : 0)
3743 && (!impossible || e->count == profile_count::zero ()))
3744 return;
3745 FOR_EACH_EDGE (e2, ei, e->src->succs)
3746 if (e2 != e)
3748 if (e2->count.initialized_p ())
3749 count_sum += e2->count;
3750 prob_sum += e2->probability;
3753 /* If there are other edges out of e->src, redistribute probabilitity
3754 there. */
3755 if (prob_sum)
3757 e->probability
3758 = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
3759 if (impossible)
3760 e->count = profile_count::zero ();
3761 if (old_probability)
3762 e->count = e->count.apply_scale (e->probability, old_probability);
3763 else
3764 e->count = e->count.apply_scale (1, REG_BR_PROB_BASE);
3766 prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
3767 prob_sum);
3768 if (dump_file && (dump_flags & TDF_DETAILS))
3769 fprintf (dump_file, "Making edge %i->%i %s by redistributing "
3770 "probability to other edges.\n",
3771 e->src->index, e->dest->index,
3772 impossible ? "impossible" : "cold");
3773 profile_count count_sum2 = count_sum + old_count - e->count;
3774 FOR_EACH_EDGE (e2, ei, e->src->succs)
3775 if (e2 != e)
3777 if (count_sum > 0)
3778 e2->count.apply_scale (count_sum2, count_sum);
3779 e2->probability = RDIV (e2->probability * prob_scale,
3780 REG_BR_PROB_BASE);
3783 /* If all edges out of e->src are unlikely, the basic block itself
3784 is unlikely. */
3785 else
3787 e->probability = REG_BR_PROB_BASE;
3789 /* If we did not adjusting, the source basic block has no likely edeges
3790 leaving other direction. In that case force that bb cold, too.
3791 This in general is difficult task to do, but handle special case when
3792 BB has only one predecestor. This is common case when we are updating
3793 after loop transforms. */
3794 if (!prob_sum && count_sum == profile_count::zero ()
3795 && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1))
3797 int old_frequency = e->src->frequency;
3798 if (dump_file && (dump_flags & TDF_DETAILS))
3799 fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
3800 impossible ? "impossible" : "cold");
3801 e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
3802 if (impossible)
3803 e->src->count = e->count = profile_count::zero ();
3804 else
3805 e->src->count = e->count = e->count.apply_scale (e->src->frequency,
3806 old_frequency);
3807 force_edge_cold (single_pred_edge (e->src), impossible);
3809 else if (dump_file && (dump_flags & TDF_DETAILS)
3810 && maybe_hot_bb_p (cfun, e->src))
3811 fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
3812 impossible ? "impossible" : "cold");