gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "input.h"
  35 #include "alias.h"
  36 #include "symtab.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "calls.h"
  40 #include "rtl.h"
  41 #include "tm_p.h"
  42 #include "hard-reg-set.h"
  43 #include "predict.h"
  44 #include "function.h"
  45 #include "dominance.h"
  46 #include "cfg.h"
  47 #include "cfganal.h"
  48 #include "basic-block.h"
  49 #include "insn-config.h"
  50 #include "regs.h"
  51 #include "flags.h"
  52 #include "profile.h"
  53 #include "except.h"
  54 #include "diagnostic-core.h"
  55 #include "recog.h"
  56 #include "expmed.h"
  57 #include "dojump.h"
  58 #include "explow.h"
  59 #include "emit-rtl.h"
  60 #include "varasm.h"
  61 #include "stmt.h"
  62 #include "expr.h"
  63 #include "coverage.h"
  64 #include "sreal.h"
  65 #include "params.h"
  66 #include "target.h"
  67 #include "cfgloop.h"
  68 #include "tree-ssa-alias.h"
  69 #include "internal-fn.h"
  70 #include "gimple-expr.h"
  71 #include "is-a.h"
  72 #include "gimple.h"
  73 #include "gimple-iterator.h"
  74 #include "gimple-ssa.h"
  75 #include "plugin-api.h"
  76 #include "ipa-ref.h"
  77 #include "cgraph.h"
  78 #include "tree-cfg.h"
  79 #include "tree-phinodes.h"
  80 #include "ssa-iterators.h"
  81 #include "tree-ssa-loop-niter.h"
  82 #include "tree-ssa-loop.h"
  83 #include "tree-pass.h"
  84 #include "tree-scalar-evolution.h"
  85
  86 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  87                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  88 static sreal real_almost_one, real_br_prob_base,
  89              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  90
  91 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  92 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  93 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  94 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  95 static bool can_predict_insn_p (const rtx_insn *);
  96
  97 /* Information we hold about each branch predictor.
  98    Filled using information from predict.def.  */
  99
 100 struct predictor_info
 101 {
 102   const char *const name;       /* Name used in the debugging dumps.  */
 103   const int hitrate;            /* Expected hitrate used by
 104                                    predict_insn_def call.  */
 105   const int flags;
 106 };
 107
 108 /* Use given predictor without Dempster-Shaffer theory if it matches
 109    using first_match heuristics.  */
 110 #define PRED_FLAG_FIRST_MATCH 1
 111
 112 /* Recompute hitrate in percent to our representation.  */
 113
 114 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 115
 116 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 117 static const struct predictor_info predictor_info[]= {
 118 #include "predict.def"
 119
 120   /* Upper bound on predictors.  */
 121   {NULL, 0, 0}
 122 };
 123 #undef DEF_PREDICTOR
 124
 125 /* Return TRUE if frequency FREQ is considered to be hot.  */
 126
 127 static inline bool
 128 maybe_hot_frequency_p (struct function *fun, int freq)
 129 {
 130   struct cgraph_node *node = cgraph_node::get (fun->decl);
 131   if (!profile_info
 132       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 133     {
 134       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 135         return false;
 136       if (node->frequency == NODE_FREQUENCY_HOT)
 137         return true;
 138     }
 139   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 140     return true;
 141   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 142       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 143     return false;
 144   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 145     return false;
 146   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 147               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 148     return false;
 149   return true;
 150 }
 151
 152 static gcov_type min_count = -1;
 153
 154 /* Determine the threshold for hot BB counts.  */
 155
 156 gcov_type
 157 get_hot_bb_threshold ()
 158 {
 159   gcov_working_set_t *ws;
 160   if (min_count == -1)
 161     {
 162       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 163       gcc_assert (ws);
 164       min_count = ws->min_counter;
 165     }
 166   return min_count;
 167 }
 168
 169 /* Set the threshold for hot BB counts.  */
 170
 171 void
 172 set_hot_bb_threshold (gcov_type min)
 173 {
 174   min_count = min;
 175 }
 176
 177 /* Return TRUE if frequency FREQ is considered to be hot.  */
 178
 179 bool
 180 maybe_hot_count_p (struct function *fun, gcov_type count)
 181 {
 182   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 183     return true;
 184   /* Code executed at most once is not hot.  */
 185   if (profile_info->runs >= count)
 186     return false;
 187   return (count >= get_hot_bb_threshold ());
 188 }
 189
 190 /* Return true in case BB can be CPU intensive and should be optimized
 191    for maximal performance.  */
 192
 193 bool
 194 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 195 {
 196   gcc_checking_assert (fun);
 197   if (profile_status_for_fn (fun) == PROFILE_READ)
 198     return maybe_hot_count_p (fun, bb->count);
 199   return maybe_hot_frequency_p (fun, bb->frequency);
 200 }
 201
 202 /* Return true in case BB can be CPU intensive and should be optimized
 203    for maximal performance.  */
 204
 205 bool
 206 maybe_hot_edge_p (edge e)
 207 {
 208   if (profile_status_for_fn (cfun) == PROFILE_READ)
 209     return maybe_hot_count_p (cfun, e->count);
 210   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 211 }
 212
 213 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 214    node frequency reflects never being executed.  */
 215
 216 static bool
 217 probably_never_executed (struct function *fun,
 218                          gcov_type count, int frequency)
 219 {
 220   gcc_checking_assert (fun);
 221   if (profile_status_for_fn (fun) == PROFILE_READ)
 222     {
 223       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 224       if (count * unlikely_count_fraction >= profile_info->runs)
 225         return false;
 226       if (!frequency)
 227         return true;
 228       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 229         return false;
 230       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 231         {
 232           gcov_type computed_count;
 233           /* Check for possibility of overflow, in which case entry bb count
 234              is large enough to do the division first without losing much
 235              precision.  */
 236           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 237               REG_BR_PROB_BASE)
 238             {
 239               gcov_type scaled_count
 240                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 241              unlikely_count_fraction;
 242               computed_count = RDIV (scaled_count,
 243                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 244             }
 245           else
 246             {
 247               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 248                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 249               computed_count *= frequency * unlikely_count_fraction;
 250             }
 251           if (computed_count >= profile_info->runs)
 252             return false;
 253         }
 254       return true;
 255     }
 256   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 257       && (cgraph_node::get (fun->decl)->frequency
 258           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 259     return true;
 260   return false;
 261 }
 262
 263
 264 /* Return true in case BB is probably never executed.  */
 265
 266 bool
 267 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 268 {
 269   return probably_never_executed (fun, bb->count, bb->frequency);
 270 }
 271
 272
 273 /* Return true in case edge E is probably never executed.  */
 274
 275 bool
 276 probably_never_executed_edge_p (struct function *fun, edge e)
 277 {
 278   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 279 }
 280
 281 /* Return true when current function should always be optimized for size.  */
 282
 283 bool
 284 optimize_function_for_size_p (struct function *fun)
 285 {
 286   if (!fun || !fun->decl)
 287     return optimize_size;
 288   cgraph_node *n = cgraph_node::get (fun->decl);
 289   return n && n->optimize_for_size_p ();
 290 }
 291
 292 /* Return true when current function should always be optimized for speed.  */
 293
 294 bool
 295 optimize_function_for_speed_p (struct function *fun)
 296 {
 297   return !optimize_function_for_size_p (fun);
 298 }
 299
 300 /* Return TRUE when BB should be optimized for size.  */
 301
 302 bool
 303 optimize_bb_for_size_p (const_basic_block bb)
 304 {
 305   return (optimize_function_for_size_p (cfun)
 306           || (bb && !maybe_hot_bb_p (cfun, bb)));
 307 }
 308
 309 /* Return TRUE when BB should be optimized for speed.  */
 310
 311 bool
 312 optimize_bb_for_speed_p (const_basic_block bb)
 313 {
 314   return !optimize_bb_for_size_p (bb);
 315 }
 316
 317 /* Return TRUE when BB should be optimized for size.  */
 318
 319 bool
 320 optimize_edge_for_size_p (edge e)
 321 {
 322   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 323 }
 324
 325 /* Return TRUE when BB should be optimized for speed.  */
 326
 327 bool
 328 optimize_edge_for_speed_p (edge e)
 329 {
 330   return !optimize_edge_for_size_p (e);
 331 }
 332
 333 /* Return TRUE when BB should be optimized for size.  */
 334
 335 bool
 336 optimize_insn_for_size_p (void)
 337 {
 338   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 339 }
 340
 341 /* Return TRUE when BB should be optimized for speed.  */
 342
 343 bool
 344 optimize_insn_for_speed_p (void)
 345 {
 346   return !optimize_insn_for_size_p ();
 347 }
 348
 349 /* Return TRUE when LOOP should be optimized for size.  */
 350
 351 bool
 352 optimize_loop_for_size_p (struct loop *loop)
 353 {
 354   return optimize_bb_for_size_p (loop->header);
 355 }
 356
 357 /* Return TRUE when LOOP should be optimized for speed.  */
 358
 359 bool
 360 optimize_loop_for_speed_p (struct loop *loop)
 361 {
 362   return optimize_bb_for_speed_p (loop->header);
 363 }
 364
 365 /* Return TRUE when LOOP nest should be optimized for speed.  */
 366
 367 bool
 368 optimize_loop_nest_for_speed_p (struct loop *loop)
 369 {
 370   struct loop *l = loop;
 371   if (optimize_loop_for_speed_p (loop))
 372     return true;
 373   l = loop->inner;
 374   while (l && l != loop)
 375     {
 376       if (optimize_loop_for_speed_p (l))
 377         return true;
 378       if (l->inner)
 379         l = l->inner;
 380       else if (l->next)
 381         l = l->next;
 382       else
 383         {
 384           while (l != loop && !l->next)
 385             l = loop_outer (l);
 386           if (l != loop)
 387             l = l->next;
 388         }
 389     }
 390   return false;
 391 }
 392
 393 /* Return TRUE when LOOP nest should be optimized for size.  */
 394
 395 bool
 396 optimize_loop_nest_for_size_p (struct loop *loop)
 397 {
 398   return !optimize_loop_nest_for_speed_p (loop);
 399 }
 400
 401 /* Return true when edge E is likely to be well predictable by branch
 402    predictor.  */
 403
 404 bool
 405 predictable_edge_p (edge e)
 406 {
 407   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 408     return false;
 409   if ((e->probability
 410        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 411       || (REG_BR_PROB_BASE - e->probability
 412           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 413     return true;
 414   return false;
 415 }
 416
 417
 418 /* Set RTL expansion for BB profile.  */
 419
 420 void
 421 rtl_profile_for_bb (basic_block bb)
 422 {
 423   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 424 }
 425
 426 /* Set RTL expansion for edge profile.  */
 427
 428 void
 429 rtl_profile_for_edge (edge e)
 430 {
 431   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 432 }
 433
 434 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 435 void
 436 default_rtl_profile (void)
 437 {
 438   crtl->maybe_hot_insn_p = true;
 439 }
 440
 441 /* Return true if the one of outgoing edges is already predicted by
 442    PREDICTOR.  */
 443
 444 bool
 445 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 446 {
 447   rtx note;
 448   if (!INSN_P (BB_END (bb)))
 449     return false;
 450   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 451     if (REG_NOTE_KIND (note) == REG_BR_PRED
 452         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 453       return true;
 454   return false;
 455 }
 456
 457 /*  Structure representing predictions in tree level. */
 458
 459 struct edge_prediction {
 460     struct edge_prediction *ep_next;
 461     edge ep_edge;
 462     enum br_predictor ep_predictor;
 463     int ep_probability;
 464 };
 465
 466 /* This map contains for a basic block the list of predictions for the
 467    outgoing edges.  */
 468
 469 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 470
 471 /* Return true if the one of outgoing edges is already predicted by
 472    PREDICTOR.  */
 473
 474 bool
 475 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 476 {
 477   struct edge_prediction *i;
 478   edge_prediction **preds = bb_predictions->get (bb);
 479
 480   if (!preds)
 481     return false;
 482
 483   for (i = *preds; i; i = i->ep_next)
 484     if (i->ep_predictor == predictor)
 485       return true;
 486   return false;
 487 }
 488
 489 /* Return true when the probability of edge is reliable.
 490
 491    The profile guessing code is good at predicting branch outcome (ie.
 492    taken/not taken), that is predicted right slightly over 75% of time.
 493    It is however notoriously poor on predicting the probability itself.
 494    In general the profile appear a lot flatter (with probabilities closer
 495    to 50%) than the reality so it is bad idea to use it to drive optimization
 496    such as those disabling dynamic branch prediction for well predictable
 497    branches.
 498
 499    There are two exceptions - edges leading to noreturn edges and edges
 500    predicted by number of iterations heuristics are predicted well.  This macro
 501    should be able to distinguish those, but at the moment it simply check for
 502    noreturn heuristic that is only one giving probability over 99% or bellow
 503    1%.  In future we might want to propagate reliability information across the
 504    CFG if we find this information useful on multiple places.   */
 505 static bool
 506 probability_reliable_p (int prob)
 507 {
 508   return (profile_status_for_fn (cfun) == PROFILE_READ
 509           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 510               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 511 }
 512
 513 /* Same predicate as above, working on edges.  */
 514 bool
 515 edge_probability_reliable_p (const_edge e)
 516 {
 517   return probability_reliable_p (e->probability);
 518 }
 519
 520 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 521 bool
 522 br_prob_note_reliable_p (const_rtx note)
 523 {
 524   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 525   return probability_reliable_p (XINT (note, 0));
 526 }
 527
 528 static void
 529 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 530 {
 531   gcc_assert (any_condjump_p (insn));
 532   if (!flag_guess_branch_prob)
 533     return;
 534
 535   add_reg_note (insn, REG_BR_PRED,
 536                 gen_rtx_CONCAT (VOIDmode,
 537                                 GEN_INT ((int) predictor),
 538                                 GEN_INT ((int) probability)));
 539 }
 540
 541 /* Predict insn by given predictor.  */
 542
 543 void
 544 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 545                   enum prediction taken)
 546 {
 547    int probability = predictor_info[(int) predictor].hitrate;
 548
 549    if (taken != TAKEN)
 550      probability = REG_BR_PROB_BASE - probability;
 551
 552    predict_insn (insn, predictor, probability);
 553 }
 554
 555 /* Predict edge E with given probability if possible.  */
 556
 557 void
 558 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 559 {
 560   rtx_insn *last_insn;
 561   last_insn = BB_END (e->src);
 562
 563   /* We can store the branch prediction information only about
 564      conditional jumps.  */
 565   if (!any_condjump_p (last_insn))
 566     return;
 567
 568   /* We always store probability of branching.  */
 569   if (e->flags & EDGE_FALLTHRU)
 570     probability = REG_BR_PROB_BASE - probability;
 571
 572   predict_insn (last_insn, predictor, probability);
 573 }
 574
 575 /* Predict edge E with the given PROBABILITY.  */
 576 void
 577 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 578 {
 579   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 580   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 581        1)
 582       && flag_guess_branch_prob && optimize)
 583     {
 584       struct edge_prediction *i = XNEW (struct edge_prediction);
 585       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 586
 587       i->ep_next = preds;
 588       preds = i;
 589       i->ep_probability = probability;
 590       i->ep_predictor = predictor;
 591       i->ep_edge = e;
 592     }
 593 }
 594
 595 /* Remove all predictions on given basic block that are attached
 596    to edge E.  */
 597 void
 598 remove_predictions_associated_with_edge (edge e)
 599 {
 600   if (!bb_predictions)
 601     return;
 602
 603   edge_prediction **preds = bb_predictions->get (e->src);
 604
 605   if (preds)
 606     {
 607       struct edge_prediction **prediction = preds;
 608       struct edge_prediction *next;
 609
 610       while (*prediction)
 611         {
 612           if ((*prediction)->ep_edge == e)
 613             {
 614               next = (*prediction)->ep_next;
 615               free (*prediction);
 616               *prediction = next;
 617             }
 618           else
 619             prediction = &((*prediction)->ep_next);
 620         }
 621     }
 622 }
 623
 624 /* Clears the list of predictions stored for BB.  */
 625
 626 static void
 627 clear_bb_predictions (basic_block bb)
 628 {
 629   edge_prediction **preds = bb_predictions->get (bb);
 630   struct edge_prediction *pred, *next;
 631
 632   if (!preds)
 633     return;
 634
 635   for (pred = *preds; pred; pred = next)
 636     {
 637       next = pred->ep_next;
 638       free (pred);
 639     }
 640   *preds = NULL;
 641 }
 642
 643 /* Return true when we can store prediction on insn INSN.
 644    At the moment we represent predictions only on conditional
 645    jumps, not at computed jump or other complicated cases.  */
 646 static bool
 647 can_predict_insn_p (const rtx_insn *insn)
 648 {
 649   return (JUMP_P (insn)
 650           && any_condjump_p (insn)
 651           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 652 }
 653
 654 /* Predict edge E by given predictor if possible.  */
 655
 656 void
 657 predict_edge_def (edge e, enum br_predictor predictor,
 658                   enum prediction taken)
 659 {
 660    int probability = predictor_info[(int) predictor].hitrate;
 661
 662    if (taken != TAKEN)
 663      probability = REG_BR_PROB_BASE - probability;
 664
 665    predict_edge (e, predictor, probability);
 666 }
 667
 668 /* Invert all branch predictions or probability notes in the INSN.  This needs
 669    to be done each time we invert the condition used by the jump.  */
 670
 671 void
 672 invert_br_probabilities (rtx insn)
 673 {
 674   rtx note;
 675
 676   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 677     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 678       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 679     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 680       XEXP (XEXP (note, 0), 1)
 681         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 682 }
 683
 684 /* Dump information about the branch prediction to the output file.  */
 685
 686 static void
 687 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 688                  basic_block bb, int used)
 689 {
 690   edge e;
 691   edge_iterator ei;
 692
 693   if (!file)
 694     return;
 695
 696   FOR_EACH_EDGE (e, ei, bb->succs)
 697     if (! (e->flags & EDGE_FALLTHRU))
 698       break;
 699
 700   fprintf (file, "  %s heuristics%s: %.1f%%",
 701            predictor_info[predictor].name,
 702            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 703
 704   if (bb->count)
 705     {
 706       fprintf (file, "  exec %" PRId64, bb->count);
 707       if (e)
 708         {
 709           fprintf (file, " hit %" PRId64, e->count);
 710           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 711         }
 712     }
 713
 714   fprintf (file, "\n");
 715 }
 716
 717 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 718    evenly and hope for the best.  */
 719 static void
 720 set_even_probabilities (basic_block bb)
 721 {
 722   int nedges = 0;
 723   edge e;
 724   edge_iterator ei;
 725
 726   FOR_EACH_EDGE (e, ei, bb->succs)
 727     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 728       nedges ++;
 729   FOR_EACH_EDGE (e, ei, bb->succs)
 730     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 731       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 732     else
 733       e->probability = 0;
 734 }
 735
 736 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 737    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 738
 739 static void
 740 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 741 {
 742   rtx prob_note;
 743   rtx *pnote;
 744   rtx note;
 745   int best_probability = PROB_EVEN;
 746   enum br_predictor best_predictor = END_PREDICTORS;
 747   int combined_probability = REG_BR_PROB_BASE / 2;
 748   int d;
 749   bool first_match = false;
 750   bool found = false;
 751
 752   if (!can_predict_insn_p (insn))
 753     {
 754       set_even_probabilities (bb);
 755       return;
 756     }
 757
 758   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 759   pnote = &REG_NOTES (insn);
 760   if (dump_file)
 761     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 762              bb->index);
 763
 764   /* We implement "first match" heuristics and use probability guessed
 765      by predictor with smallest index.  */
 766   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 767     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 768       {
 769         enum br_predictor predictor = ((enum br_predictor)
 770                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 771         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 772
 773         found = true;
 774         if (best_predictor > predictor)
 775           best_probability = probability, best_predictor = predictor;
 776
 777         d = (combined_probability * probability
 778              + (REG_BR_PROB_BASE - combined_probability)
 779              * (REG_BR_PROB_BASE - probability));
 780
 781         /* Use FP math to avoid overflows of 32bit integers.  */
 782         if (d == 0)
 783           /* If one probability is 0% and one 100%, avoid division by zero.  */
 784           combined_probability = REG_BR_PROB_BASE / 2;
 785         else
 786           combined_probability = (((double) combined_probability) * probability
 787                                   * REG_BR_PROB_BASE / d + 0.5);
 788       }
 789
 790   /* Decide which heuristic to use.  In case we didn't match anything,
 791      use no_prediction heuristic, in case we did match, use either
 792      first match or Dempster-Shaffer theory depending on the flags.  */
 793
 794   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 795     first_match = true;
 796
 797   if (!found)
 798     dump_prediction (dump_file, PRED_NO_PREDICTION,
 799                      combined_probability, bb, true);
 800   else
 801     {
 802       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 803                        bb, !first_match);
 804       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 805                        bb, first_match);
 806     }
 807
 808   if (first_match)
 809     combined_probability = best_probability;
 810   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 811
 812   while (*pnote)
 813     {
 814       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 815         {
 816           enum br_predictor predictor = ((enum br_predictor)
 817                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 818           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 819
 820           dump_prediction (dump_file, predictor, probability, bb,
 821                            !first_match || best_predictor == predictor);
 822           *pnote = XEXP (*pnote, 1);
 823         }
 824       else
 825         pnote = &XEXP (*pnote, 1);
 826     }
 827
 828   if (!prob_note)
 829     {
 830       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 831
 832       /* Save the prediction into CFG in case we are seeing non-degenerated
 833          conditional jump.  */
 834       if (!single_succ_p (bb))
 835         {
 836           BRANCH_EDGE (bb)->probability = combined_probability;
 837           FALLTHRU_EDGE (bb)->probability
 838             = REG_BR_PROB_BASE - combined_probability;
 839         }
 840     }
 841   else if (!single_succ_p (bb))
 842     {
 843       int prob = XINT (prob_note, 0);
 844
 845       BRANCH_EDGE (bb)->probability = prob;
 846       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 847     }
 848   else
 849     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 850 }
 851
 852 /* Combine predictions into single probability and store them into CFG.
 853    Remove now useless prediction entries.  */
 854
 855 static void
 856 combine_predictions_for_bb (basic_block bb)
 857 {
 858   int best_probability = PROB_EVEN;
 859   enum br_predictor best_predictor = END_PREDICTORS;
 860   int combined_probability = REG_BR_PROB_BASE / 2;
 861   int d;
 862   bool first_match = false;
 863   bool found = false;
 864   struct edge_prediction *pred;
 865   int nedges = 0;
 866   edge e, first = NULL, second = NULL;
 867   edge_iterator ei;
 868
 869   FOR_EACH_EDGE (e, ei, bb->succs)
 870     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 871       {
 872         nedges ++;
 873         if (first && !second)
 874           second = e;
 875         if (!first)
 876           first = e;
 877       }
 878
 879   /* When there is no successor or only one choice, prediction is easy.
 880
 881      We are lazy for now and predict only basic blocks with two outgoing
 882      edges.  It is possible to predict generic case too, but we have to
 883      ignore first match heuristics and do more involved combining.  Implement
 884      this later.  */
 885   if (nedges != 2)
 886     {
 887       if (!bb->count)
 888         set_even_probabilities (bb);
 889       clear_bb_predictions (bb);
 890       if (dump_file)
 891         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 892                  nedges, bb->index);
 893       return;
 894     }
 895
 896   if (dump_file)
 897     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 898
 899   edge_prediction **preds = bb_predictions->get (bb);
 900   if (preds)
 901     {
 902       /* We implement "first match" heuristics and use probability guessed
 903          by predictor with smallest index.  */
 904       for (pred = *preds; pred; pred = pred->ep_next)
 905         {
 906           enum br_predictor predictor = pred->ep_predictor;
 907           int probability = pred->ep_probability;
 908
 909           if (pred->ep_edge != first)
 910             probability = REG_BR_PROB_BASE - probability;
 911
 912           found = true;
 913           /* First match heuristics would be widly confused if we predicted
 914              both directions.  */
 915           if (best_predictor > predictor)
 916             {
 917               struct edge_prediction *pred2;
 918               int prob = probability;
 919
 920               for (pred2 = (struct edge_prediction *) *preds;
 921                    pred2; pred2 = pred2->ep_next)
 922                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 923                  {
 924                    int probability2 = pred->ep_probability;
 925
 926                    if (pred2->ep_edge != first)
 927                      probability2 = REG_BR_PROB_BASE - probability2;
 928
 929                    if ((probability < REG_BR_PROB_BASE / 2) !=
 930                        (probability2 < REG_BR_PROB_BASE / 2))
 931                      break;
 932
 933                    /* If the same predictor later gave better result, go for it! */
 934                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 935                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 936                      prob = probability2;
 937                  }
 938               if (!pred2)
 939                 best_probability = prob, best_predictor = predictor;
 940             }
 941
 942           d = (combined_probability * probability
 943                + (REG_BR_PROB_BASE - combined_probability)
 944                * (REG_BR_PROB_BASE - probability));
 945
 946           /* Use FP math to avoid overflows of 32bit integers.  */
 947           if (d == 0)
 948             /* If one probability is 0% and one 100%, avoid division by zero.  */
 949             combined_probability = REG_BR_PROB_BASE / 2;
 950           else
 951             combined_probability = (((double) combined_probability)
 952                                     * probability
 953                                     * REG_BR_PROB_BASE / d + 0.5);
 954         }
 955     }
 956
 957   /* Decide which heuristic to use.  In case we didn't match anything,
 958      use no_prediction heuristic, in case we did match, use either
 959      first match or Dempster-Shaffer theory depending on the flags.  */
 960
 961   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 962     first_match = true;
 963
 964   if (!found)
 965     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 966   else
 967     {
 968       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 969                        !first_match);
 970       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 971                        first_match);
 972     }
 973
 974   if (first_match)
 975     combined_probability = best_probability;
 976   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 977
 978   if (preds)
 979     {
 980       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 981         {
 982           enum br_predictor predictor = pred->ep_predictor;
 983           int probability = pred->ep_probability;
 984
 985           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 986             probability = REG_BR_PROB_BASE - probability;
 987           dump_prediction (dump_file, predictor, probability, bb,
 988                            !first_match || best_predictor == predictor);
 989         }
 990     }
 991   clear_bb_predictions (bb);
 992
 993   if (!bb->count)
 994     {
 995       first->probability = combined_probability;
 996       second->probability = REG_BR_PROB_BASE - combined_probability;
 997     }
 998 }
 999
1000 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1001    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1002
1003    T1 and T2 should be one of the following cases:
1004      1. T1 is SSA_NAME, T2 is NULL
1005      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1006      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1007
1008 static tree
1009 strips_small_constant (tree t1, tree t2)
1010 {
1011   tree ret = NULL;
1012   int value = 0;
1013
1014   if (!t1)
1015     return NULL;
1016   else if (TREE_CODE (t1) == SSA_NAME)
1017     ret = t1;
1018   else if (tree_fits_shwi_p (t1))
1019     value = tree_to_shwi (t1);
1020   else
1021     return NULL;
1022
1023   if (!t2)
1024     return ret;
1025   else if (tree_fits_shwi_p (t2))
1026     value = tree_to_shwi (t2);
1027   else if (TREE_CODE (t2) == SSA_NAME)
1028     {
1029       if (ret)
1030         return NULL;
1031       else
1032         ret = t2;
1033     }
1034
1035   if (value <= 4 && value >= -4)
1036     return ret;
1037   else
1038     return NULL;
1039 }
1040
1041 /* Return the SSA_NAME in T or T's operands.
1042    Return NULL if SSA_NAME cannot be found.  */
1043
1044 static tree
1045 get_base_value (tree t)
1046 {
1047   if (TREE_CODE (t) == SSA_NAME)
1048     return t;
1049
1050   if (!BINARY_CLASS_P (t))
1051     return NULL;
1052
1053   switch (TREE_OPERAND_LENGTH (t))
1054     {
1055     case 1:
1056       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1057     case 2:
1058       return strips_small_constant (TREE_OPERAND (t, 0),
1059                                     TREE_OPERAND (t, 1));
1060     default:
1061       return NULL;
1062     }
1063 }
1064
1065 /* Check the compare STMT in LOOP. If it compares an induction
1066    variable to a loop invariant, return true, and save
1067    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1068    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1069
1070 static bool
1071 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1072                                      tree *loop_invariant,
1073                                      enum tree_code *compare_code,
1074                                      tree *loop_step,
1075                                      tree *loop_iv_base)
1076 {
1077   tree op0, op1, bound, base;
1078   affine_iv iv0, iv1;
1079   enum tree_code code;
1080   tree step;
1081
1082   code = gimple_cond_code (stmt);
1083   *loop_invariant = NULL;
1084
1085   switch (code)
1086     {
1087     case GT_EXPR:
1088     case GE_EXPR:
1089     case NE_EXPR:
1090     case LT_EXPR:
1091     case LE_EXPR:
1092     case EQ_EXPR:
1093       break;
1094
1095     default:
1096       return false;
1097     }
1098
1099   op0 = gimple_cond_lhs (stmt);
1100   op1 = gimple_cond_rhs (stmt);
1101
1102   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1103        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1104     return false;
1105   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1106     return false;
1107   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1108     return false;
1109   if (TREE_CODE (iv0.step) != INTEGER_CST
1110       || TREE_CODE (iv1.step) != INTEGER_CST)
1111     return false;
1112   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1113       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1114     return false;
1115
1116   if (integer_zerop (iv0.step))
1117     {
1118       if (code != NE_EXPR && code != EQ_EXPR)
1119         code = invert_tree_comparison (code, false);
1120       bound = iv0.base;
1121       base = iv1.base;
1122       if (tree_fits_shwi_p (iv1.step))
1123         step = iv1.step;
1124       else
1125         return false;
1126     }
1127   else
1128     {
1129       bound = iv1.base;
1130       base = iv0.base;
1131       if (tree_fits_shwi_p (iv0.step))
1132         step = iv0.step;
1133       else
1134         return false;
1135     }
1136
1137   if (TREE_CODE (bound) != INTEGER_CST)
1138     bound = get_base_value (bound);
1139   if (!bound)
1140     return false;
1141   if (TREE_CODE (base) != INTEGER_CST)
1142     base = get_base_value (base);
1143   if (!base)
1144     return false;
1145
1146   *loop_invariant = bound;
1147   *compare_code = code;
1148   *loop_step = step;
1149   *loop_iv_base = base;
1150   return true;
1151 }
1152
1153 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1154
1155 static bool
1156 expr_coherent_p (tree t1, tree t2)
1157 {
1158   gimple stmt;
1159   tree ssa_name_1 = NULL;
1160   tree ssa_name_2 = NULL;
1161
1162   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1163   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1164
1165   if (t1 == t2)
1166     return true;
1167
1168   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1169     return true;
1170   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1171     return false;
1172
1173   /* Check to see if t1 is expressed/defined with t2.  */
1174   stmt = SSA_NAME_DEF_STMT (t1);
1175   gcc_assert (stmt != NULL);
1176   if (is_gimple_assign (stmt))
1177     {
1178       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1179       if (ssa_name_1 && ssa_name_1 == t2)
1180         return true;
1181     }
1182
1183   /* Check to see if t2 is expressed/defined with t1.  */
1184   stmt = SSA_NAME_DEF_STMT (t2);
1185   gcc_assert (stmt != NULL);
1186   if (is_gimple_assign (stmt))
1187     {
1188       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1189       if (ssa_name_2 && ssa_name_2 == t1)
1190         return true;
1191     }
1192
1193   /* Compare if t1 and t2's def_stmts are identical.  */
1194   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1195     return true;
1196   else
1197     return false;
1198 }
1199
1200 /* Predict branch probability of BB when BB contains a branch that compares
1201    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1202    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1203
1204    E.g.
1205      for (int i = 0; i < bound; i++) {
1206        if (i < bound - 2)
1207          computation_1();
1208        else
1209          computation_2();
1210      }
1211
1212   In this loop, we will predict the branch inside the loop to be taken.  */
1213
1214 static void
1215 predict_iv_comparison (struct loop *loop, basic_block bb,
1216                        tree loop_bound_var,
1217                        tree loop_iv_base_var,
1218                        enum tree_code loop_bound_code,
1219                        int loop_bound_step)
1220 {
1221   gimple stmt;
1222   tree compare_var, compare_base;
1223   enum tree_code compare_code;
1224   tree compare_step_var;
1225   edge then_edge;
1226   edge_iterator ei;
1227
1228   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1229       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1230       || predicted_by_p (bb, PRED_LOOP_EXIT))
1231     return;
1232
1233   stmt = last_stmt (bb);
1234   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1235     return;
1236   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1237                                             loop, &compare_var,
1238                                             &compare_code,
1239                                             &compare_step_var,
1240                                             &compare_base))
1241     return;
1242
1243   /* Find the taken edge.  */
1244   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1245     if (then_edge->flags & EDGE_TRUE_VALUE)
1246       break;
1247
1248   /* When comparing an IV to a loop invariant, NE is more likely to be
1249      taken while EQ is more likely to be not-taken.  */
1250   if (compare_code == NE_EXPR)
1251     {
1252       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1253       return;
1254     }
1255   else if (compare_code == EQ_EXPR)
1256     {
1257       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1258       return;
1259     }
1260
1261   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1262     return;
1263
1264   /* If loop bound, base and compare bound are all constants, we can
1265      calculate the probability directly.  */
1266   if (tree_fits_shwi_p (loop_bound_var)
1267       && tree_fits_shwi_p (compare_var)
1268       && tree_fits_shwi_p (compare_base))
1269     {
1270       int probability;
1271       bool overflow, overall_overflow = false;
1272       widest_int compare_count, tem;
1273
1274       /* (loop_bound - base) / compare_step */
1275       tem = wi::sub (wi::to_widest (loop_bound_var),
1276                      wi::to_widest (compare_base), SIGNED, &overflow);
1277       overall_overflow |= overflow;
1278       widest_int loop_count = wi::div_trunc (tem,
1279                                              wi::to_widest (compare_step_var),
1280                                              SIGNED, &overflow);
1281       overall_overflow |= overflow;
1282
1283       if (!wi::neg_p (wi::to_widest (compare_step_var))
1284           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1285         {
1286           /* (loop_bound - compare_bound) / compare_step */
1287           tem = wi::sub (wi::to_widest (loop_bound_var),
1288                          wi::to_widest (compare_var), SIGNED, &overflow);
1289           overall_overflow |= overflow;
1290           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1291                                          SIGNED, &overflow);
1292           overall_overflow |= overflow;
1293         }
1294       else
1295         {
1296           /* (compare_bound - base) / compare_step */
1297           tem = wi::sub (wi::to_widest (compare_var),
1298                          wi::to_widest (compare_base), SIGNED, &overflow);
1299           overall_overflow |= overflow;
1300           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1301                                          SIGNED, &overflow);
1302           overall_overflow |= overflow;
1303         }
1304       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1305         ++compare_count;
1306       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1307         ++loop_count;
1308       if (wi::neg_p (compare_count))
1309         compare_count = 0;
1310       if (wi::neg_p (loop_count))
1311         loop_count = 0;
1312       if (loop_count == 0)
1313         probability = 0;
1314       else if (wi::cmps (compare_count, loop_count) == 1)
1315         probability = REG_BR_PROB_BASE;
1316       else
1317         {
1318           tem = compare_count * REG_BR_PROB_BASE;
1319           tem = wi::udiv_trunc (tem, loop_count);
1320           probability = tem.to_uhwi ();
1321         }
1322
1323       if (!overall_overflow)
1324         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1325
1326       return;
1327     }
1328
1329   if (expr_coherent_p (loop_bound_var, compare_var))
1330     {
1331       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1332           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1333         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1334       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1335                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1336         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1337       else if (loop_bound_code == NE_EXPR)
1338         {
1339           /* If the loop backedge condition is "(i != bound)", we do
1340              the comparison based on the step of IV:
1341              * step < 0 : backedge condition is like (i > bound)
1342              * step > 0 : backedge condition is like (i < bound)  */
1343           gcc_assert (loop_bound_step != 0);
1344           if (loop_bound_step > 0
1345               && (compare_code == LT_EXPR
1346                   || compare_code == LE_EXPR))
1347             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348           else if (loop_bound_step < 0
1349                    && (compare_code == GT_EXPR
1350                        || compare_code == GE_EXPR))
1351             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1352           else
1353             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1354         }
1355       else
1356         /* The branch is predicted not-taken if loop_bound_code is
1357            opposite with compare_code.  */
1358         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1359     }
1360   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1361     {
1362       /* For cases like:
1363            for (i = s; i < h; i++)
1364              if (i > s + 2) ....
1365          The branch should be predicted taken.  */
1366       if (loop_bound_step > 0
1367           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1368         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1369       else if (loop_bound_step < 0
1370                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1371         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1372       else
1373         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1374     }
1375 }
1376
1377 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1378    exits are resulted from short-circuit conditions that will generate an
1379    if_tmp. E.g.:
1380
1381    if (foo() || global > 10)
1382      break;
1383
1384    This will be translated into:
1385
1386    BB3:
1387      loop header...
1388    BB4:
1389      if foo() goto BB6 else goto BB5
1390    BB5:
1391      if global > 10 goto BB6 else goto BB7
1392    BB6:
1393      goto BB7
1394    BB7:
1395      iftmp = (PHI 0(BB5), 1(BB6))
1396      if iftmp == 1 goto BB8 else goto BB3
1397    BB8:
1398      outside of the loop...
1399
1400    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1401    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1402    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1403    exits to predict them using PRED_LOOP_EXIT.  */
1404
1405 static void
1406 predict_extra_loop_exits (edge exit_edge)
1407 {
1408   unsigned i;
1409   bool check_value_one;
1410   gimple lhs_def_stmt;
1411   gphi *phi_stmt;
1412   tree cmp_rhs, cmp_lhs;
1413   gimple last;
1414   gcond *cmp_stmt;
1415
1416   last = last_stmt (exit_edge->src);
1417   if (!last)
1418     return;
1419   cmp_stmt = dyn_cast <gcond *> (last);
1420   if (!cmp_stmt)
1421     return;
1422
1423   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1424   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1425   if (!TREE_CONSTANT (cmp_rhs)
1426       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1427     return;
1428   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1429     return;
1430
1431   /* If check_value_one is true, only the phi_args with value '1' will lead
1432      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1433      loop exit.  */
1434   check_value_one = (((integer_onep (cmp_rhs))
1435                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1436                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1437
1438   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1439   if (!lhs_def_stmt)
1440     return;
1441
1442   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1443   if (!phi_stmt)
1444     return;
1445
1446   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1447     {
1448       edge e1;
1449       edge_iterator ei;
1450       tree val = gimple_phi_arg_def (phi_stmt, i);
1451       edge e = gimple_phi_arg_edge (phi_stmt, i);
1452
1453       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1454         continue;
1455       if ((check_value_one ^ integer_onep (val)) == 1)
1456         continue;
1457       if (EDGE_COUNT (e->src->succs) != 1)
1458         {
1459           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1460           continue;
1461         }
1462
1463       FOR_EACH_EDGE (e1, ei, e->src->preds)
1464         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1465     }
1466 }
1467
1468 /* Predict edge probabilities by exploiting loop structure.  */
1469
1470 static void
1471 predict_loops (void)
1472 {
1473   struct loop *loop;
1474
1475   /* Try to predict out blocks in a loop that are not part of a
1476      natural loop.  */
1477   FOR_EACH_LOOP (loop, 0)
1478     {
1479       basic_block bb, *bbs;
1480       unsigned j, n_exits;
1481       vec<edge> exits;
1482       struct tree_niter_desc niter_desc;
1483       edge ex;
1484       struct nb_iter_bound *nb_iter;
1485       enum tree_code loop_bound_code = ERROR_MARK;
1486       tree loop_bound_step = NULL;
1487       tree loop_bound_var = NULL;
1488       tree loop_iv_base = NULL;
1489       gcond *stmt = NULL;
1490
1491       exits = get_loop_exit_edges (loop);
1492       n_exits = exits.length ();
1493       if (!n_exits)
1494         {
1495           exits.release ();
1496           continue;
1497         }
1498
1499       FOR_EACH_VEC_ELT (exits, j, ex)
1500         {
1501           tree niter = NULL;
1502           HOST_WIDE_INT nitercst;
1503           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1504           int probability;
1505           enum br_predictor predictor;
1506
1507           predict_extra_loop_exits (ex);
1508
1509           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1510             niter = niter_desc.niter;
1511           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1512             niter = loop_niter_by_eval (loop, ex);
1513
1514           if (TREE_CODE (niter) == INTEGER_CST)
1515             {
1516               if (tree_fits_uhwi_p (niter)
1517                   && max
1518                   && compare_tree_int (niter, max - 1) == -1)
1519                 nitercst = tree_to_uhwi (niter) + 1;
1520               else
1521                 nitercst = max;
1522               predictor = PRED_LOOP_ITERATIONS;
1523             }
1524           /* If we have just one exit and we can derive some information about
1525              the number of iterations of the loop from the statements inside
1526              the loop, use it to predict this exit.  */
1527           else if (n_exits == 1)
1528             {
1529               nitercst = estimated_stmt_executions_int (loop);
1530               if (nitercst < 0)
1531                 continue;
1532               if (nitercst > max)
1533                 nitercst = max;
1534
1535               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1536             }
1537           else
1538             continue;
1539
1540           /* If the prediction for number of iterations is zero, do not
1541              predict the exit edges.  */
1542           if (nitercst == 0)
1543             continue;
1544
1545           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1546           predict_edge (ex, predictor, probability);
1547         }
1548       exits.release ();
1549
1550       /* Find information about loop bound variables.  */
1551       for (nb_iter = loop->bounds; nb_iter;
1552            nb_iter = nb_iter->next)
1553         if (nb_iter->stmt
1554             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1555           {
1556             stmt = as_a <gcond *> (nb_iter->stmt);
1557             break;
1558           }
1559       if (!stmt && last_stmt (loop->header)
1560           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1561         stmt = as_a <gcond *> (last_stmt (loop->header));
1562       if (stmt)
1563         is_comparison_with_loop_invariant_p (stmt, loop,
1564                                              &loop_bound_var,
1565                                              &loop_bound_code,
1566                                              &loop_bound_step,
1567                                              &loop_iv_base);
1568
1569       bbs = get_loop_body (loop);
1570
1571       for (j = 0; j < loop->num_nodes; j++)
1572         {
1573           int header_found = 0;
1574           edge e;
1575           edge_iterator ei;
1576
1577           bb = bbs[j];
1578
1579           /* Bypass loop heuristics on continue statement.  These
1580              statements construct loops via "non-loop" constructs
1581              in the source language and are better to be handled
1582              separately.  */
1583           if (predicted_by_p (bb, PRED_CONTINUE))
1584             continue;
1585
1586           /* Loop branch heuristics - predict an edge back to a
1587              loop's head as taken.  */
1588           if (bb == loop->latch)
1589             {
1590               e = find_edge (loop->latch, loop->header);
1591               if (e)
1592                 {
1593                   header_found = 1;
1594                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1595                 }
1596             }
1597
1598           /* Loop exit heuristics - predict an edge exiting the loop if the
1599              conditional has no loop header successors as not taken.  */
1600           if (!header_found
1601               /* If we already used more reliable loop exit predictors, do not
1602                  bother with PRED_LOOP_EXIT.  */
1603               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1604               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1605             {
1606               /* For loop with many exits we don't want to predict all exits
1607                  with the pretty large probability, because if all exits are
1608                  considered in row, the loop would be predicted to iterate
1609                  almost never.  The code to divide probability by number of
1610                  exits is very rough.  It should compute the number of exits
1611                  taken in each patch through function (not the overall number
1612                  of exits that might be a lot higher for loops with wide switch
1613                  statements in them) and compute n-th square root.
1614
1615                  We limit the minimal probability by 2% to avoid
1616                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1617                  as this was causing regression in perl benchmark containing such
1618                  a wide loop.  */
1619
1620               int probability = ((REG_BR_PROB_BASE
1621                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1622                                  / n_exits);
1623               if (probability < HITRATE (2))
1624                 probability = HITRATE (2);
1625               FOR_EACH_EDGE (e, ei, bb->succs)
1626                 if (e->dest->index < NUM_FIXED_BLOCKS
1627                     || !flow_bb_inside_loop_p (loop, e->dest))
1628                   predict_edge (e, PRED_LOOP_EXIT, probability);
1629             }
1630           if (loop_bound_var)
1631             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1632                                    loop_bound_code,
1633                                    tree_to_shwi (loop_bound_step));
1634         }
1635
1636       /* Free basic blocks from get_loop_body.  */
1637       free (bbs);
1638     }
1639 }
1640
1641 /* Attempt to predict probabilities of BB outgoing edges using local
1642    properties.  */
1643 static void
1644 bb_estimate_probability_locally (basic_block bb)
1645 {
1646   rtx_insn *last_insn = BB_END (bb);
1647   rtx cond;
1648
1649   if (! can_predict_insn_p (last_insn))
1650     return;
1651   cond = get_condition (last_insn, NULL, false, false);
1652   if (! cond)
1653     return;
1654
1655   /* Try "pointer heuristic."
1656      A comparison ptr == 0 is predicted as false.
1657      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1658   if (COMPARISON_P (cond)
1659       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1660           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1661     {
1662       if (GET_CODE (cond) == EQ)
1663         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1664       else if (GET_CODE (cond) == NE)
1665         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1666     }
1667   else
1668
1669   /* Try "opcode heuristic."
1670      EQ tests are usually false and NE tests are usually true. Also,
1671      most quantities are positive, so we can make the appropriate guesses
1672      about signed comparisons against zero.  */
1673     switch (GET_CODE (cond))
1674       {
1675       case CONST_INT:
1676         /* Unconditional branch.  */
1677         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1678                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1679         break;
1680
1681       case EQ:
1682       case UNEQ:
1683         /* Floating point comparisons appears to behave in a very
1684            unpredictable way because of special role of = tests in
1685            FP code.  */
1686         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1687           ;
1688         /* Comparisons with 0 are often used for booleans and there is
1689            nothing useful to predict about them.  */
1690         else if (XEXP (cond, 1) == const0_rtx
1691                  || XEXP (cond, 0) == const0_rtx)
1692           ;
1693         else
1694           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1695         break;
1696
1697       case NE:
1698       case LTGT:
1699         /* Floating point comparisons appears to behave in a very
1700            unpredictable way because of special role of = tests in
1701            FP code.  */
1702         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1703           ;
1704         /* Comparisons with 0 are often used for booleans and there is
1705            nothing useful to predict about them.  */
1706         else if (XEXP (cond, 1) == const0_rtx
1707                  || XEXP (cond, 0) == const0_rtx)
1708           ;
1709         else
1710           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1711         break;
1712
1713       case ORDERED:
1714         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1715         break;
1716
1717       case UNORDERED:
1718         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1719         break;
1720
1721       case LE:
1722       case LT:
1723         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1724             || XEXP (cond, 1) == constm1_rtx)
1725           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1726         break;
1727
1728       case GE:
1729       case GT:
1730         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1731             || XEXP (cond, 1) == constm1_rtx)
1732           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1733         break;
1734
1735       default:
1736         break;
1737       }
1738 }
1739
1740 /* Set edge->probability for each successor edge of BB.  */
1741 void
1742 guess_outgoing_edge_probabilities (basic_block bb)
1743 {
1744   bb_estimate_probability_locally (bb);
1745   combine_predictions_for_insn (BB_END (bb), bb);
1746 }
1747 \f
1748 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1749
1750 /* Helper function for expr_expected_value.  */
1751
1752 static tree
1753 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1754                        tree op1, bitmap visited, enum br_predictor *predictor)
1755 {
1756   gimple def;
1757
1758   if (predictor)
1759     *predictor = PRED_UNCONDITIONAL;
1760
1761   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1762     {
1763       if (TREE_CONSTANT (op0))
1764         return op0;
1765
1766       if (code != SSA_NAME)
1767         return NULL_TREE;
1768
1769       def = SSA_NAME_DEF_STMT (op0);
1770
1771       /* If we were already here, break the infinite cycle.  */
1772       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1773         return NULL;
1774
1775       if (gimple_code (def) == GIMPLE_PHI)
1776         {
1777           /* All the arguments of the PHI node must have the same constant
1778              length.  */
1779           int i, n = gimple_phi_num_args (def);
1780           tree val = NULL, new_val;
1781
1782           for (i = 0; i < n; i++)
1783             {
1784               tree arg = PHI_ARG_DEF (def, i);
1785               enum br_predictor predictor2;
1786
1787               /* If this PHI has itself as an argument, we cannot
1788                  determine the string length of this argument.  However,
1789                  if we can find an expected constant value for the other
1790                  PHI args then we can still be sure that this is
1791                  likely a constant.  So be optimistic and just
1792                  continue with the next argument.  */
1793               if (arg == PHI_RESULT (def))
1794                 continue;
1795
1796               new_val = expr_expected_value (arg, visited, &predictor2);
1797
1798               /* It is difficult to combine value predictors.  Simply assume
1799                  that later predictor is weaker and take its prediction.  */
1800               if (predictor && *predictor < predictor2)
1801                 *predictor = predictor2;
1802               if (!new_val)
1803                 return NULL;
1804               if (!val)
1805                 val = new_val;
1806               else if (!operand_equal_p (val, new_val, false))
1807                 return NULL;
1808             }
1809           return val;
1810         }
1811       if (is_gimple_assign (def))
1812         {
1813           if (gimple_assign_lhs (def) != op0)
1814             return NULL;
1815
1816           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1817                                         gimple_assign_rhs1 (def),
1818                                         gimple_assign_rhs_code (def),
1819                                         gimple_assign_rhs2 (def),
1820                                         visited, predictor);
1821         }
1822
1823       if (is_gimple_call (def))
1824         {
1825           tree decl = gimple_call_fndecl (def);
1826           if (!decl)
1827             {
1828               if (gimple_call_internal_p (def)
1829                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1830                 {
1831                   gcc_assert (gimple_call_num_args (def) == 3);
1832                   tree val = gimple_call_arg (def, 0);
1833                   if (TREE_CONSTANT (val))
1834                     return val;
1835                   if (predictor)
1836                     {
1837                       tree val2 = gimple_call_arg (def, 2);
1838                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1839                                   && tree_fits_uhwi_p (val2)
1840                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1841                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1842                     }
1843                   return gimple_call_arg (def, 1);
1844                 }
1845               return NULL;
1846             }
1847           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1848             switch (DECL_FUNCTION_CODE (decl))
1849               {
1850               case BUILT_IN_EXPECT:
1851                 {
1852                   tree val;
1853                   if (gimple_call_num_args (def) != 2)
1854                     return NULL;
1855                   val = gimple_call_arg (def, 0);
1856                   if (TREE_CONSTANT (val))
1857                     return val;
1858                   if (predictor)
1859                     *predictor = PRED_BUILTIN_EXPECT;
1860                   return gimple_call_arg (def, 1);
1861                 }
1862
1863               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1864               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1865               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1866               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1867               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1868               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1869               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1870               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1871               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1872               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1873               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1874               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1875               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1876                 /* Assume that any given atomic operation has low contention,
1877                    and thus the compare-and-swap operation succeeds.  */
1878                 if (predictor)
1879                   *predictor = PRED_COMPARE_AND_SWAP;
1880                 return boolean_true_node;
1881               default:
1882                 break;
1883             }
1884         }
1885
1886       return NULL;
1887     }
1888
1889   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1890     {
1891       tree res;
1892       enum br_predictor predictor2;
1893       op0 = expr_expected_value (op0, visited, predictor);
1894       if (!op0)
1895         return NULL;
1896       op1 = expr_expected_value (op1, visited, &predictor2);
1897       if (predictor && *predictor < predictor2)
1898         *predictor = predictor2;
1899       if (!op1)
1900         return NULL;
1901       res = fold_build2 (code, type, op0, op1);
1902       if (TREE_CONSTANT (res))
1903         return res;
1904       return NULL;
1905     }
1906   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1907     {
1908       tree res;
1909       op0 = expr_expected_value (op0, visited, predictor);
1910       if (!op0)
1911         return NULL;
1912       res = fold_build1 (code, type, op0);
1913       if (TREE_CONSTANT (res))
1914         return res;
1915       return NULL;
1916     }
1917   return NULL;
1918 }
1919
1920 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1921    The function is used by builtin_expect branch predictor so the evidence
1922    must come from this construct and additional possible constant folding.
1923
1924    We may want to implement more involved value guess (such as value range
1925    propagation based prediction), but such tricks shall go to new
1926    implementation.  */
1927
1928 static tree
1929 expr_expected_value (tree expr, bitmap visited,
1930                      enum br_predictor *predictor)
1931 {
1932   enum tree_code code;
1933   tree op0, op1;
1934
1935   if (TREE_CONSTANT (expr))
1936     {
1937       if (predictor)
1938         *predictor = PRED_UNCONDITIONAL;
1939       return expr;
1940     }
1941
1942   extract_ops_from_tree (expr, &code, &op0, &op1);
1943   return expr_expected_value_1 (TREE_TYPE (expr),
1944                                 op0, code, op1, visited, predictor);
1945 }
1946 \f
1947 /* Predict using opcode of the last statement in basic block.  */
1948 static void
1949 tree_predict_by_opcode (basic_block bb)
1950 {
1951   gimple stmt = last_stmt (bb);
1952   edge then_edge;
1953   tree op0, op1;
1954   tree type;
1955   tree val;
1956   enum tree_code cmp;
1957   bitmap visited;
1958   edge_iterator ei;
1959   enum br_predictor predictor;
1960
1961   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1962     return;
1963   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1964     if (then_edge->flags & EDGE_TRUE_VALUE)
1965       break;
1966   op0 = gimple_cond_lhs (stmt);
1967   op1 = gimple_cond_rhs (stmt);
1968   cmp = gimple_cond_code (stmt);
1969   type = TREE_TYPE (op0);
1970   visited = BITMAP_ALLOC (NULL);
1971   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1972                                &predictor);
1973   BITMAP_FREE (visited);
1974   if (val && TREE_CODE (val) == INTEGER_CST)
1975     {
1976       if (predictor == PRED_BUILTIN_EXPECT)
1977         {
1978           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1979
1980           gcc_assert (percent >= 0 && percent <= 100);
1981           if (integer_zerop (val))
1982             percent = 100 - percent;
1983           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1984         }
1985       else
1986         predict_edge (then_edge, predictor,
1987                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1988     }
1989   /* Try "pointer heuristic."
1990      A comparison ptr == 0 is predicted as false.
1991      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1992   if (POINTER_TYPE_P (type))
1993     {
1994       if (cmp == EQ_EXPR)
1995         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1996       else if (cmp == NE_EXPR)
1997         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1998     }
1999   else
2000
2001   /* Try "opcode heuristic."
2002      EQ tests are usually false and NE tests are usually true. Also,
2003      most quantities are positive, so we can make the appropriate guesses
2004      about signed comparisons against zero.  */
2005     switch (cmp)
2006       {
2007       case EQ_EXPR:
2008       case UNEQ_EXPR:
2009         /* Floating point comparisons appears to behave in a very
2010            unpredictable way because of special role of = tests in
2011            FP code.  */
2012         if (FLOAT_TYPE_P (type))
2013           ;
2014         /* Comparisons with 0 are often used for booleans and there is
2015            nothing useful to predict about them.  */
2016         else if (integer_zerop (op0) || integer_zerop (op1))
2017           ;
2018         else
2019           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2020         break;
2021
2022       case NE_EXPR:
2023       case LTGT_EXPR:
2024         /* Floating point comparisons appears to behave in a very
2025            unpredictable way because of special role of = tests in
2026            FP code.  */
2027         if (FLOAT_TYPE_P (type))
2028           ;
2029         /* Comparisons with 0 are often used for booleans and there is
2030            nothing useful to predict about them.  */
2031         else if (integer_zerop (op0)
2032                  || integer_zerop (op1))
2033           ;
2034         else
2035           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2036         break;
2037
2038       case ORDERED_EXPR:
2039         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2040         break;
2041
2042       case UNORDERED_EXPR:
2043         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2044         break;
2045
2046       case LE_EXPR:
2047       case LT_EXPR:
2048         if (integer_zerop (op1)
2049             || integer_onep (op1)
2050             || integer_all_onesp (op1)
2051             || real_zerop (op1)
2052             || real_onep (op1)
2053             || real_minus_onep (op1))
2054           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2055         break;
2056
2057       case GE_EXPR:
2058       case GT_EXPR:
2059         if (integer_zerop (op1)
2060             || integer_onep (op1)
2061             || integer_all_onesp (op1)
2062             || real_zerop (op1)
2063             || real_onep (op1)
2064             || real_minus_onep (op1))
2065           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2066         break;
2067
2068       default:
2069         break;
2070       }
2071 }
2072
2073 /* Try to guess whether the value of return means error code.  */
2074
2075 static enum br_predictor
2076 return_prediction (tree val, enum prediction *prediction)
2077 {
2078   /* VOID.  */
2079   if (!val)
2080     return PRED_NO_PREDICTION;
2081   /* Different heuristics for pointers and scalars.  */
2082   if (POINTER_TYPE_P (TREE_TYPE (val)))
2083     {
2084       /* NULL is usually not returned.  */
2085       if (integer_zerop (val))
2086         {
2087           *prediction = NOT_TAKEN;
2088           return PRED_NULL_RETURN;
2089         }
2090     }
2091   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2092     {
2093       /* Negative return values are often used to indicate
2094          errors.  */
2095       if (TREE_CODE (val) == INTEGER_CST
2096           && tree_int_cst_sgn (val) < 0)
2097         {
2098           *prediction = NOT_TAKEN;
2099           return PRED_NEGATIVE_RETURN;
2100         }
2101       /* Constant return values seems to be commonly taken.
2102          Zero/one often represent booleans so exclude them from the
2103          heuristics.  */
2104       if (TREE_CONSTANT (val)
2105           && (!integer_zerop (val) && !integer_onep (val)))
2106         {
2107           *prediction = TAKEN;
2108           return PRED_CONST_RETURN;
2109         }
2110     }
2111   return PRED_NO_PREDICTION;
2112 }
2113
2114 /* Find the basic block with return expression and look up for possible
2115    return value trying to apply RETURN_PREDICTION heuristics.  */
2116 static void
2117 apply_return_prediction (void)
2118 {
2119   greturn *return_stmt = NULL;
2120   tree return_val;
2121   edge e;
2122   gphi *phi;
2123   int phi_num_args, i;
2124   enum br_predictor pred;
2125   enum prediction direction;
2126   edge_iterator ei;
2127
2128   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2129     {
2130       gimple last = last_stmt (e->src);
2131       if (last
2132           && gimple_code (last) == GIMPLE_RETURN)
2133         {
2134           return_stmt = as_a <greturn *> (last);
2135           break;
2136         }
2137     }
2138   if (!e)
2139     return;
2140   return_val = gimple_return_retval (return_stmt);
2141   if (!return_val)
2142     return;
2143   if (TREE_CODE (return_val) != SSA_NAME
2144       || !SSA_NAME_DEF_STMT (return_val)
2145       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2146     return;
2147   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2148   phi_num_args = gimple_phi_num_args (phi);
2149   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2150
2151   /* Avoid the degenerate case where all return values form the function
2152      belongs to same category (ie they are all positive constants)
2153      so we can hardly say something about them.  */
2154   for (i = 1; i < phi_num_args; i++)
2155     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2156       break;
2157   if (i != phi_num_args)
2158     for (i = 0; i < phi_num_args; i++)
2159       {
2160         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2161         if (pred != PRED_NO_PREDICTION)
2162           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2163                                          direction);
2164       }
2165 }
2166
2167 /* Look for basic block that contains unlikely to happen events
2168    (such as noreturn calls) and mark all paths leading to execution
2169    of this basic blocks as unlikely.  */
2170
2171 static void
2172 tree_bb_level_predictions (void)
2173 {
2174   basic_block bb;
2175   bool has_return_edges = false;
2176   edge e;
2177   edge_iterator ei;
2178
2179   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2180     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2181       {
2182         has_return_edges = true;
2183         break;
2184       }
2185
2186   apply_return_prediction ();
2187
2188   FOR_EACH_BB_FN (bb, cfun)
2189     {
2190       gimple_stmt_iterator gsi;
2191
2192       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2193         {
2194           gimple stmt = gsi_stmt (gsi);
2195           tree decl;
2196
2197           if (is_gimple_call (stmt))
2198             {
2199               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2200                   && has_return_edges)
2201                 predict_paths_leading_to (bb, PRED_NORETURN,
2202                                           NOT_TAKEN);
2203               decl = gimple_call_fndecl (stmt);
2204               if (decl
2205                   && lookup_attribute ("cold",
2206                                        DECL_ATTRIBUTES (decl)))
2207                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2208                                           NOT_TAKEN);
2209             }
2210           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2211             {
2212               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2213                                         gimple_predict_outcome (stmt));
2214               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2215                  hints to callers.  */
2216             }
2217         }
2218     }
2219 }
2220
2221 #ifdef ENABLE_CHECKING
2222
2223 /* Callback for hash_map::traverse, asserts that the pointer map is
2224    empty.  */
2225
2226 bool
2227 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2228                  void *)
2229 {
2230   gcc_assert (!value);
2231   return false;
2232 }
2233 #endif
2234
2235 /* Predict branch probabilities and estimate profile for basic block BB.  */
2236
2237 static void
2238 tree_estimate_probability_bb (basic_block bb)
2239 {
2240   edge e;
2241   edge_iterator ei;
2242   gimple last;
2243
2244   FOR_EACH_EDGE (e, ei, bb->succs)
2245     {
2246       /* Predict edges to user labels with attributes.  */
2247       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2248         {
2249           gimple_stmt_iterator gi;
2250           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2251             {
2252               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2253               tree decl;
2254
2255               if (!label_stmt)
2256                 break;
2257               decl = gimple_label_label (label_stmt);
2258               if (DECL_ARTIFICIAL (decl))
2259                 continue;
2260
2261               /* Finally, we have a user-defined label.  */
2262               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2263                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2264               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2265                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2266             }
2267         }
2268
2269       /* Predict early returns to be probable, as we've already taken
2270          care for error returns and other cases are often used for
2271          fast paths through function.
2272
2273          Since we've already removed the return statements, we are
2274          looking for CFG like:
2275
2276          if (conditional)
2277          {
2278          ..
2279          goto return_block
2280          }
2281          some other blocks
2282          return_block:
2283          return_stmt.  */
2284       if (e->dest != bb->next_bb
2285           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2286           && single_succ_p (e->dest)
2287           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2288           && (last = last_stmt (e->dest)) != NULL
2289           && gimple_code (last) == GIMPLE_RETURN)
2290         {
2291           edge e1;
2292           edge_iterator ei1;
2293
2294           if (single_succ_p (bb))
2295             {
2296               FOR_EACH_EDGE (e1, ei1, bb->preds)
2297                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2298                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2299                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2300                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2301             }
2302           else
2303             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2304                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2305                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2306               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2307         }
2308
2309       /* Look for block we are guarding (ie we dominate it,
2310          but it doesn't postdominate us).  */
2311       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2312           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2313           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2314         {
2315           gimple_stmt_iterator bi;
2316
2317           /* The call heuristic claims that a guarded function call
2318              is improbable.  This is because such calls are often used
2319              to signal exceptional situations such as printing error
2320              messages.  */
2321           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2322                gsi_next (&bi))
2323             {
2324               gimple stmt = gsi_stmt (bi);
2325               if (is_gimple_call (stmt)
2326                   /* Constant and pure calls are hardly used to signalize
2327                      something exceptional.  */
2328                   && gimple_has_side_effects (stmt))
2329                 {
2330                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2331                   break;
2332                 }
2333             }
2334         }
2335     }
2336   tree_predict_by_opcode (bb);
2337 }
2338
2339 /* Predict branch probabilities and estimate profile of the tree CFG.
2340    This function can be called from the loop optimizers to recompute
2341    the profile information.  */
2342
2343 void
2344 tree_estimate_probability (void)
2345 {
2346   basic_block bb;
2347
2348   add_noreturn_fake_exit_edges ();
2349   connect_infinite_loops_to_exit ();
2350   /* We use loop_niter_by_eval, which requires that the loops have
2351      preheaders.  */
2352   create_preheaders (CP_SIMPLE_PREHEADERS);
2353   calculate_dominance_info (CDI_POST_DOMINATORS);
2354
2355   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2356   tree_bb_level_predictions ();
2357   record_loop_exits ();
2358
2359   if (number_of_loops (cfun) > 1)
2360     predict_loops ();
2361
2362   FOR_EACH_BB_FN (bb, cfun)
2363     tree_estimate_probability_bb (bb);
2364
2365   FOR_EACH_BB_FN (bb, cfun)
2366     combine_predictions_for_bb (bb);
2367
2368 #ifdef ENABLE_CHECKING
2369   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2370 #endif
2371   delete bb_predictions;
2372   bb_predictions = NULL;
2373
2374   estimate_bb_frequencies (false);
2375   free_dominance_info (CDI_POST_DOMINATORS);
2376   remove_fake_exit_edges ();
2377 }
2378 \f
2379 /* Predict edges to successors of CUR whose sources are not postdominated by
2380    BB by PRED and recurse to all postdominators.  */
2381
2382 static void
2383 predict_paths_for_bb (basic_block cur, basic_block bb,
2384                       enum br_predictor pred,
2385                       enum prediction taken,
2386                       bitmap visited)
2387 {
2388   edge e;
2389   edge_iterator ei;
2390   basic_block son;
2391
2392   /* We are looking for all edges forming edge cut induced by
2393      set of all blocks postdominated by BB.  */
2394   FOR_EACH_EDGE (e, ei, cur->preds)
2395     if (e->src->index >= NUM_FIXED_BLOCKS
2396         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2397     {
2398       edge e2;
2399       edge_iterator ei2;
2400       bool found = false;
2401
2402       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2403       if (e->flags & (EDGE_EH | EDGE_FAKE))
2404         continue;
2405       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2406
2407       /* See if there is an edge from e->src that is not abnormal
2408          and does not lead to BB.  */
2409       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2410         if (e2 != e
2411             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2412             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2413           {
2414             found = true;
2415             break;
2416           }
2417
2418       /* If there is non-abnormal path leaving e->src, predict edge
2419          using predictor.  Otherwise we need to look for paths
2420          leading to e->src.
2421
2422          The second may lead to infinite loop in the case we are predicitng
2423          regions that are only reachable by abnormal edges.  We simply
2424          prevent visiting given BB twice.  */
2425       if (found)
2426         predict_edge_def (e, pred, taken);
2427       else if (bitmap_set_bit (visited, e->src->index))
2428         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2429     }
2430   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2431        son;
2432        son = next_dom_son (CDI_POST_DOMINATORS, son))
2433     predict_paths_for_bb (son, bb, pred, taken, visited);
2434 }
2435
2436 /* Sets branch probabilities according to PREDiction and
2437    FLAGS.  */
2438
2439 static void
2440 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2441                           enum prediction taken)
2442 {
2443   bitmap visited = BITMAP_ALLOC (NULL);
2444   predict_paths_for_bb (bb, bb, pred, taken, visited);
2445   BITMAP_FREE (visited);
2446 }
2447
2448 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2449
2450 static void
2451 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2452                                enum prediction taken)
2453 {
2454   bool has_nonloop_edge = false;
2455   edge_iterator ei;
2456   edge e2;
2457
2458   basic_block bb = e->src;
2459   FOR_EACH_EDGE (e2, ei, bb->succs)
2460     if (e2->dest != e->src && e2->dest != e->dest
2461         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2462         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2463       {
2464         has_nonloop_edge = true;
2465         break;
2466       }
2467   if (!has_nonloop_edge)
2468     {
2469       bitmap visited = BITMAP_ALLOC (NULL);
2470       predict_paths_for_bb (bb, bb, pred, taken, visited);
2471       BITMAP_FREE (visited);
2472     }
2473   else
2474     predict_edge_def (e, pred, taken);
2475 }
2476 \f
2477 /* This is used to carry information about basic blocks.  It is
2478    attached to the AUX field of the standard CFG block.  */
2479
2480 struct block_info
2481 {
2482   /* Estimated frequency of execution of basic_block.  */
2483   sreal frequency;
2484
2485   /* To keep queue of basic blocks to process.  */
2486   basic_block next;
2487
2488   /* Number of predecessors we need to visit first.  */
2489   int npredecessors;
2490 };
2491
2492 /* Similar information for edges.  */
2493 struct edge_prob_info
2494 {
2495   /* In case edge is a loopback edge, the probability edge will be reached
2496      in case header is.  Estimated number of iterations of the loop can be
2497      then computed as 1 / (1 - back_edge_prob).  */
2498   sreal back_edge_prob;
2499   /* True if the edge is a loopback edge in the natural loop.  */
2500   unsigned int back_edge:1;
2501 };
2502
2503 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2504 #undef EDGE_INFO
2505 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2506
2507 /* Helper function for estimate_bb_frequencies.
2508    Propagate the frequencies in blocks marked in
2509    TOVISIT, starting in HEAD.  */
2510
2511 static void
2512 propagate_freq (basic_block head, bitmap tovisit)
2513 {
2514   basic_block bb;
2515   basic_block last;
2516   unsigned i;
2517   edge e;
2518   basic_block nextbb;
2519   bitmap_iterator bi;
2520
2521   /* For each basic block we need to visit count number of his predecessors
2522      we need to visit first.  */
2523   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2524     {
2525       edge_iterator ei;
2526       int count = 0;
2527
2528       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2529
2530       FOR_EACH_EDGE (e, ei, bb->preds)
2531         {
2532           bool visit = bitmap_bit_p (tovisit, e->src->index);
2533
2534           if (visit && !(e->flags & EDGE_DFS_BACK))
2535             count++;
2536           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2537             fprintf (dump_file,
2538                      "Irreducible region hit, ignoring edge to %i->%i\n",
2539                      e->src->index, bb->index);
2540         }
2541       BLOCK_INFO (bb)->npredecessors = count;
2542       /* When function never returns, we will never process exit block.  */
2543       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2544         bb->count = bb->frequency = 0;
2545     }
2546
2547   BLOCK_INFO (head)->frequency = 1;
2548   last = head;
2549   for (bb = head; bb; bb = nextbb)
2550     {
2551       edge_iterator ei;
2552       sreal cyclic_probability = 0;
2553       sreal frequency = 0;
2554
2555       nextbb = BLOCK_INFO (bb)->next;
2556       BLOCK_INFO (bb)->next = NULL;
2557
2558       /* Compute frequency of basic block.  */
2559       if (bb != head)
2560         {
2561 #ifdef ENABLE_CHECKING
2562           FOR_EACH_EDGE (e, ei, bb->preds)
2563             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2564                         || (e->flags & EDGE_DFS_BACK));
2565 #endif
2566
2567           FOR_EACH_EDGE (e, ei, bb->preds)
2568             if (EDGE_INFO (e)->back_edge)
2569               {
2570                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2571               }
2572             else if (!(e->flags & EDGE_DFS_BACK))
2573               {
2574                 /*  frequency += (e->probability
2575                                   * BLOCK_INFO (e->src)->frequency /
2576                                   REG_BR_PROB_BASE);  */
2577
2578                 sreal tmp = e->probability;
2579                 tmp *= BLOCK_INFO (e->src)->frequency;
2580                 tmp *= real_inv_br_prob_base;
2581                 frequency += tmp;
2582               }
2583
2584           if (cyclic_probability == 0)
2585             {
2586               BLOCK_INFO (bb)->frequency = frequency;
2587             }
2588           else
2589             {
2590               if (cyclic_probability > real_almost_one)
2591                 cyclic_probability = real_almost_one;
2592
2593               /* BLOCK_INFO (bb)->frequency = frequency
2594                                               / (1 - cyclic_probability) */
2595
2596               cyclic_probability = sreal (1) - cyclic_probability;
2597               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2598             }
2599         }
2600
2601       bitmap_clear_bit (tovisit, bb->index);
2602
2603       e = find_edge (bb, head);
2604       if (e)
2605         {
2606           /* EDGE_INFO (e)->back_edge_prob
2607              = ((e->probability * BLOCK_INFO (bb)->frequency)
2608              / REG_BR_PROB_BASE); */
2609
2610           sreal tmp = e->probability;
2611           tmp *= BLOCK_INFO (bb)->frequency;
2612           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2613         }
2614
2615       /* Propagate to successor blocks.  */
2616       FOR_EACH_EDGE (e, ei, bb->succs)
2617         if (!(e->flags & EDGE_DFS_BACK)
2618             && BLOCK_INFO (e->dest)->npredecessors)
2619           {
2620             BLOCK_INFO (e->dest)->npredecessors--;
2621             if (!BLOCK_INFO (e->dest)->npredecessors)
2622               {
2623                 if (!nextbb)
2624                   nextbb = e->dest;
2625                 else
2626                   BLOCK_INFO (last)->next = e->dest;
2627
2628                 last = e->dest;
2629               }
2630           }
2631     }
2632 }
2633
2634 /* Estimate frequencies in loops at same nest level.  */
2635
2636 static void
2637 estimate_loops_at_level (struct loop *first_loop)
2638 {
2639   struct loop *loop;
2640
2641   for (loop = first_loop; loop; loop = loop->next)
2642     {
2643       edge e;
2644       basic_block *bbs;
2645       unsigned i;
2646       bitmap tovisit = BITMAP_ALLOC (NULL);
2647
2648       estimate_loops_at_level (loop->inner);
2649
2650       /* Find current loop back edge and mark it.  */
2651       e = loop_latch_edge (loop);
2652       EDGE_INFO (e)->back_edge = 1;
2653
2654       bbs = get_loop_body (loop);
2655       for (i = 0; i < loop->num_nodes; i++)
2656         bitmap_set_bit (tovisit, bbs[i]->index);
2657       free (bbs);
2658       propagate_freq (loop->header, tovisit);
2659       BITMAP_FREE (tovisit);
2660     }
2661 }
2662
2663 /* Propagates frequencies through structure of loops.  */
2664
2665 static void
2666 estimate_loops (void)
2667 {
2668   bitmap tovisit = BITMAP_ALLOC (NULL);
2669   basic_block bb;
2670
2671   /* Start by estimating the frequencies in the loops.  */
2672   if (number_of_loops (cfun) > 1)
2673     estimate_loops_at_level (current_loops->tree_root->inner);
2674
2675   /* Now propagate the frequencies through all the blocks.  */
2676   FOR_ALL_BB_FN (bb, cfun)
2677     {
2678       bitmap_set_bit (tovisit, bb->index);
2679     }
2680   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2681   BITMAP_FREE (tovisit);
2682 }
2683
2684 /* Drop the profile for NODE to guessed, and update its frequency based on
2685    whether it is expected to be hot given the CALL_COUNT.  */
2686
2687 static void
2688 drop_profile (struct cgraph_node *node, gcov_type call_count)
2689 {
2690   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2691   /* In the case where this was called by another function with a
2692      dropped profile, call_count will be 0. Since there are no
2693      non-zero call counts to this function, we don't know for sure
2694      whether it is hot, and therefore it will be marked normal below.  */
2695   bool hot = maybe_hot_count_p (NULL, call_count);
2696
2697   if (dump_file)
2698     fprintf (dump_file,
2699              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2700              node->name (), node->order,
2701              hot ? "Function is hot" : "Function is normal");
2702   /* We only expect to miss profiles for functions that are reached
2703      via non-zero call edges in cases where the function may have
2704      been linked from another module or library (COMDATs and extern
2705      templates). See the comments below for handle_missing_profiles.
2706      Also, only warn in cases where the missing counts exceed the
2707      number of training runs. In certain cases with an execv followed
2708      by a no-return call the profile for the no-return call is not
2709      dumped and there can be a mismatch.  */
2710   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2711       && call_count > profile_info->runs)
2712     {
2713       if (flag_profile_correction)
2714         {
2715           if (dump_file)
2716             fprintf (dump_file,
2717                      "Missing counts for called function %s/%i\n",
2718                      node->name (), node->order);
2719         }
2720       else
2721         warning (0, "Missing counts for called function %s/%i",
2722                  node->name (), node->order);
2723     }
2724
2725   profile_status_for_fn (fn)
2726       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2727   node->frequency
2728       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2729 }
2730
2731 /* In the case of COMDAT routines, multiple object files will contain the same
2732    function and the linker will select one for the binary. In that case
2733    all the other copies from the profile instrument binary will be missing
2734    profile counts. Look for cases where this happened, due to non-zero
2735    call counts going to 0-count functions, and drop the profile to guessed
2736    so that we can use the estimated probabilities and avoid optimizing only
2737    for size.
2738
2739    The other case where the profile may be missing is when the routine
2740    is not going to be emitted to the object file, e.g. for "extern template"
2741    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2742    all other cases of non-zero calls to 0-count functions.  */
2743
2744 void
2745 handle_missing_profiles (void)
2746 {
2747   struct cgraph_node *node;
2748   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2749   vec<struct cgraph_node *> worklist;
2750   worklist.create (64);
2751
2752   /* See if 0 count function has non-0 count callers.  In this case we
2753      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2754   FOR_EACH_DEFINED_FUNCTION (node)
2755     {
2756       struct cgraph_edge *e;
2757       gcov_type call_count = 0;
2758       gcov_type max_tp_first_run = 0;
2759       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2760
2761       if (node->count)
2762         continue;
2763       for (e = node->callers; e; e = e->next_caller)
2764       {
2765         call_count += e->count;
2766
2767         if (e->caller->tp_first_run > max_tp_first_run)
2768           max_tp_first_run = e->caller->tp_first_run;
2769       }
2770
2771       /* If time profile is missing, let assign the maximum that comes from
2772          caller functions.  */
2773       if (!node->tp_first_run && max_tp_first_run)
2774         node->tp_first_run = max_tp_first_run + 1;
2775
2776       if (call_count
2777           && fn && fn->cfg
2778           && (call_count * unlikely_count_fraction >= profile_info->runs))
2779         {
2780           drop_profile (node, call_count);
2781           worklist.safe_push (node);
2782         }
2783     }
2784
2785   /* Propagate the profile dropping to other 0-count COMDATs that are
2786      potentially called by COMDATs we already dropped the profile on.  */
2787   while (worklist.length () > 0)
2788     {
2789       struct cgraph_edge *e;
2790
2791       node = worklist.pop ();
2792       for (e = node->callees; e; e = e->next_caller)
2793         {
2794           struct cgraph_node *callee = e->callee;
2795           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2796
2797           if (callee->count > 0)
2798             continue;
2799           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2800               && profile_status_for_fn (fn) == PROFILE_READ)
2801             {
2802               drop_profile (node, 0);
2803               worklist.safe_push (callee);
2804             }
2805         }
2806     }
2807   worklist.release ();
2808 }
2809
2810 /* Convert counts measured by profile driven feedback to frequencies.
2811    Return nonzero iff there was any nonzero execution count.  */
2812
2813 int
2814 counts_to_freqs (void)
2815 {
2816   gcov_type count_max, true_count_max = 0;
2817   basic_block bb;
2818
2819   /* Don't overwrite the estimated frequencies when the profile for
2820      the function is missing.  We may drop this function PROFILE_GUESSED
2821      later in drop_profile ().  */
2822   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2823     return 0;
2824
2825   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2826     true_count_max = MAX (bb->count, true_count_max);
2827
2828   count_max = MAX (true_count_max, 1);
2829   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2830     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2831
2832   return true_count_max;
2833 }
2834
2835 /* Return true if function is likely to be expensive, so there is no point to
2836    optimize performance of prologue, epilogue or do inlining at the expense
2837    of code size growth.  THRESHOLD is the limit of number of instructions
2838    function can execute at average to be still considered not expensive.  */
2839
2840 bool
2841 expensive_function_p (int threshold)
2842 {
2843   unsigned int sum = 0;
2844   basic_block bb;
2845   unsigned int limit;
2846
2847   /* We can not compute accurately for large thresholds due to scaled
2848      frequencies.  */
2849   gcc_assert (threshold <= BB_FREQ_MAX);
2850
2851   /* Frequencies are out of range.  This either means that function contains
2852      internal loop executing more than BB_FREQ_MAX times or profile feedback
2853      is available and function has not been executed at all.  */
2854   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2855     return true;
2856
2857   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2858   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2859   FOR_EACH_BB_FN (bb, cfun)
2860     {
2861       rtx_insn *insn;
2862
2863       FOR_BB_INSNS (bb, insn)
2864         if (active_insn_p (insn))
2865           {
2866             sum += bb->frequency;
2867             if (sum > limit)
2868               return true;
2869         }
2870     }
2871
2872   return false;
2873 }
2874
2875 /* Estimate and propagate basic block frequencies using the given branch
2876    probabilities.  If FORCE is true, the frequencies are used to estimate
2877    the counts even when there are already non-zero profile counts.  */
2878
2879 void
2880 estimate_bb_frequencies (bool force)
2881 {
2882   basic_block bb;
2883   sreal freq_max;
2884
2885   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2886     {
2887       static int real_values_initialized = 0;
2888
2889       if (!real_values_initialized)
2890         {
2891           real_values_initialized = 1;
2892           real_br_prob_base = REG_BR_PROB_BASE;
2893           real_bb_freq_max = BB_FREQ_MAX;
2894           real_one_half = sreal (1, -1);
2895           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2896           real_almost_one = sreal (1) - real_inv_br_prob_base;
2897         }
2898
2899       mark_dfs_back_edges ();
2900
2901       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2902          REG_BR_PROB_BASE;
2903
2904       /* Set up block info for each basic block.  */
2905       alloc_aux_for_blocks (sizeof (block_info));
2906       alloc_aux_for_edges (sizeof (edge_prob_info));
2907       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2908         {
2909           edge e;
2910           edge_iterator ei;
2911
2912           FOR_EACH_EDGE (e, ei, bb->succs)
2913             {
2914               EDGE_INFO (e)->back_edge_prob = e->probability;
2915               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2916             }
2917         }
2918
2919       /* First compute frequencies locally for each loop from innermost
2920          to outermost to examine frequencies for back edges.  */
2921       estimate_loops ();
2922
2923       freq_max = 0;
2924       FOR_EACH_BB_FN (bb, cfun)
2925         if (freq_max < BLOCK_INFO (bb)->frequency)
2926           freq_max = BLOCK_INFO (bb)->frequency;
2927
2928       freq_max = real_bb_freq_max / freq_max;
2929       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2930         {
2931           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2932           bb->frequency = tmp.to_int ();
2933         }
2934
2935       free_aux_for_blocks ();
2936       free_aux_for_edges ();
2937     }
2938   compute_function_frequency ();
2939 }
2940
2941 /* Decide whether function is hot, cold or unlikely executed.  */
2942 void
2943 compute_function_frequency (void)
2944 {
2945   basic_block bb;
2946   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2947
2948   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2949       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2950     node->only_called_at_startup = true;
2951   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2952     node->only_called_at_exit = true;
2953
2954   if (profile_status_for_fn (cfun) != PROFILE_READ)
2955     {
2956       int flags = flags_from_decl_or_type (current_function_decl);
2957       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2958           != NULL)
2959         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2960       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2961                != NULL)
2962         node->frequency = NODE_FREQUENCY_HOT;
2963       else if (flags & ECF_NORETURN)
2964         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2965       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2966         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2967       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2968                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2969         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2970       return;
2971     }
2972
2973   /* Only first time try to drop function into unlikely executed.
2974      After inlining the roundoff errors may confuse us.
2975      Ipa-profile pass will drop functions only called from unlikely
2976      functions to unlikely and that is most of what we care about.  */
2977   if (!cfun->after_inlining)
2978     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2979   FOR_EACH_BB_FN (bb, cfun)
2980     {
2981       if (maybe_hot_bb_p (cfun, bb))
2982         {
2983           node->frequency = NODE_FREQUENCY_HOT;
2984           return;
2985         }
2986       if (!probably_never_executed_bb_p (cfun, bb))
2987         node->frequency = NODE_FREQUENCY_NORMAL;
2988     }
2989 }
2990
2991 /* Build PREDICT_EXPR.  */
2992 tree
2993 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2994 {
2995   tree t = build1 (PREDICT_EXPR, void_type_node,
2996                    build_int_cst (integer_type_node, predictor));
2997   SET_PREDICT_EXPR_OUTCOME (t, taken);
2998   return t;
2999 }
3000
3001 const char *
3002 predictor_name (enum br_predictor predictor)
3003 {
3004   return predictor_info[predictor].name;
3005 }
3006
3007 /* Predict branch probabilities and estimate profile of the tree CFG. */
3008
3009 namespace {
3010
3011 const pass_data pass_data_profile =
3012 {
3013   GIMPLE_PASS, /* type */
3014   "profile_estimate", /* name */
3015   OPTGROUP_NONE, /* optinfo_flags */
3016   TV_BRANCH_PROB, /* tv_id */
3017   PROP_cfg, /* properties_required */
3018   0, /* properties_provided */
3019   0, /* properties_destroyed */
3020   0, /* todo_flags_start */
3021   0, /* todo_flags_finish */
3022 };
3023
3024 class pass_profile : public gimple_opt_pass
3025 {
3026 public:
3027   pass_profile (gcc::context *ctxt)
3028     : gimple_opt_pass (pass_data_profile, ctxt)
3029   {}
3030
3031   /* opt_pass methods: */
3032   virtual bool gate (function *) { return flag_guess_branch_prob; }
3033   virtual unsigned int execute (function *);
3034
3035 }; // class pass_profile
3036
3037 unsigned int
3038 pass_profile::execute (function *fun)
3039 {
3040   unsigned nb_loops;
3041
3042   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3043     return 0;
3044
3045   loop_optimizer_init (LOOPS_NORMAL);
3046   if (dump_file && (dump_flags & TDF_DETAILS))
3047     flow_loops_dump (dump_file, NULL, 0);
3048
3049   mark_irreducible_loops ();
3050
3051   nb_loops = number_of_loops (fun);
3052   if (nb_loops > 1)
3053     scev_initialize ();
3054
3055   tree_estimate_probability ();
3056
3057   if (nb_loops > 1)
3058     scev_finalize ();
3059
3060   loop_optimizer_finalize ();
3061   if (dump_file && (dump_flags & TDF_DETAILS))
3062     gimple_dump_cfg (dump_file, dump_flags);
3063  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3064     profile_status_for_fn (fun) = PROFILE_GUESSED;
3065   return 0;
3066 }
3067
3068 } // anon namespace
3069
3070 gimple_opt_pass *
3071 make_pass_profile (gcc::context *ctxt)
3072 {
3073   return new pass_profile (ctxt);
3074 }
3075
3076 namespace {
3077
3078 const pass_data pass_data_strip_predict_hints =
3079 {
3080   GIMPLE_PASS, /* type */
3081   "*strip_predict_hints", /* name */
3082   OPTGROUP_NONE, /* optinfo_flags */
3083   TV_BRANCH_PROB, /* tv_id */
3084   PROP_cfg, /* properties_required */
3085   0, /* properties_provided */
3086   0, /* properties_destroyed */
3087   0, /* todo_flags_start */
3088   0, /* todo_flags_finish */
3089 };
3090
3091 class pass_strip_predict_hints : public gimple_opt_pass
3092 {
3093 public:
3094   pass_strip_predict_hints (gcc::context *ctxt)
3095     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3096   {}
3097
3098   /* opt_pass methods: */
3099   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3100   virtual unsigned int execute (function *);
3101
3102 }; // class pass_strip_predict_hints
3103
3104 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3105    we no longer need.  */
3106 unsigned int
3107 pass_strip_predict_hints::execute (function *fun)
3108 {
3109   basic_block bb;
3110   gimple ass_stmt;
3111   tree var;
3112
3113   FOR_EACH_BB_FN (bb, fun)
3114     {
3115       gimple_stmt_iterator bi;
3116       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3117         {
3118           gimple stmt = gsi_stmt (bi);
3119
3120           if (gimple_code (stmt) == GIMPLE_PREDICT)
3121             {
3122               gsi_remove (&bi, true);
3123               continue;
3124             }
3125           else if (is_gimple_call (stmt))
3126             {
3127               tree fndecl = gimple_call_fndecl (stmt);
3128
3129               if ((fndecl
3130                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3131                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3132                    && gimple_call_num_args (stmt) == 2)
3133                   || (gimple_call_internal_p (stmt)
3134                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3135                 {
3136                   var = gimple_call_lhs (stmt);
3137                   if (var)
3138                     {
3139                       ass_stmt
3140                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3141                       gsi_replace (&bi, ass_stmt, true);
3142                     }
3143                   else
3144                     {
3145                       gsi_remove (&bi, true);
3146                       continue;
3147                     }
3148                 }
3149             }
3150           gsi_next (&bi);
3151         }
3152     }
3153   return 0;
3154 }
3155
3156 } // anon namespace
3157
3158 gimple_opt_pass *
3159 make_pass_strip_predict_hints (gcc::context *ctxt)
3160 {
3161   return new pass_strip_predict_hints (ctxt);
3162 }
3163
3164 /* Rebuild function frequencies.  Passes are in general expected to
3165    maintain profile by hand, however in some cases this is not possible:
3166    for example when inlining several functions with loops freuqencies might run
3167    out of scale and thus needs to be recomputed.  */
3168
3169 void
3170 rebuild_frequencies (void)
3171 {
3172   timevar_push (TV_REBUILD_FREQUENCIES);
3173
3174   /* When the max bb count in the function is small, there is a higher
3175      chance that there were truncation errors in the integer scaling
3176      of counts by inlining and other optimizations. This could lead
3177      to incorrect classification of code as being cold when it isn't.
3178      In that case, force the estimation of bb counts/frequencies from the
3179      branch probabilities, rather than computing frequencies from counts,
3180      which may also lead to frequencies incorrectly reduced to 0. There
3181      is less precision in the probabilities, so we only do this for small
3182      max counts.  */
3183   gcov_type count_max = 0;
3184   basic_block bb;
3185   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3186     count_max = MAX (bb->count, count_max);
3187
3188   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3189       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3190           && count_max < REG_BR_PROB_BASE/10))
3191     {
3192       loop_optimizer_init (0);
3193       add_noreturn_fake_exit_edges ();
3194       mark_irreducible_loops ();
3195       connect_infinite_loops_to_exit ();
3196       estimate_bb_frequencies (true);
3197       remove_fake_exit_edges ();
3198       loop_optimizer_finalize ();
3199     }
3200   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3201     counts_to_freqs ();
3202   else
3203     gcc_unreachable ();
3204   timevar_pop (TV_REBUILD_FREQUENCIES);
3205 }