gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "predict.h"
  40 #include "vec.h"
  41 #include "hashtab.h"
  42 #include "hash-set.h"
  43 #include "machmode.h"
  44 #include "input.h"
  45 #include "function.h"
  46 #include "dominance.h"
  47 #include "cfg.h"
  48 #include "cfganal.h"
  49 #include "basic-block.h"
  50 #include "insn-config.h"
  51 #include "regs.h"
  52 #include "flags.h"
  53 #include "profile.h"
  54 #include "except.h"
  55 #include "diagnostic-core.h"
  56 #include "recog.h"
  57 #include "expr.h"
  58 #include "coverage.h"
  59 #include "sreal.h"
  60 #include "params.h"
  61 #include "target.h"
  62 #include "cfgloop.h"
  63 #include "hash-map.h"
  64 #include "tree-ssa-alias.h"
  65 #include "internal-fn.h"
  66 #include "gimple-expr.h"
  67 #include "is-a.h"
  68 #include "gimple.h"
  69 #include "gimple-iterator.h"
  70 #include "gimple-ssa.h"
  71 #include "plugin-api.h"
  72 #include "ipa-ref.h"
  73 #include "cgraph.h"
  74 #include "tree-cfg.h"
  75 #include "tree-phinodes.h"
  76 #include "ssa-iterators.h"
  77 #include "tree-ssa-loop-niter.h"
  78 #include "tree-ssa-loop.h"
  79 #include "tree-pass.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82
  83 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  84                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  85 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  86              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  87
  88 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  89 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  90 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  91 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  92 static bool can_predict_insn_p (const rtx_insn *);
  93
  94 /* Information we hold about each branch predictor.
  95    Filled using information from predict.def.  */
  96
  97 struct predictor_info
  98 {
  99   const char *const name;       /* Name used in the debugging dumps.  */
 100   const int hitrate;            /* Expected hitrate used by
 101                                    predict_insn_def call.  */
 102   const int flags;
 103 };
 104
 105 /* Use given predictor without Dempster-Shaffer theory if it matches
 106    using first_match heuristics.  */
 107 #define PRED_FLAG_FIRST_MATCH 1
 108
 109 /* Recompute hitrate in percent to our representation.  */
 110
 111 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 112
 113 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 114 static const struct predictor_info predictor_info[]= {
 115 #include "predict.def"
 116
 117   /* Upper bound on predictors.  */
 118   {NULL, 0, 0}
 119 };
 120 #undef DEF_PREDICTOR
 121
 122 /* Return TRUE if frequency FREQ is considered to be hot.  */
 123
 124 static inline bool
 125 maybe_hot_frequency_p (struct function *fun, int freq)
 126 {
 127   struct cgraph_node *node = cgraph_node::get (fun->decl);
 128   if (!profile_info || !flag_branch_probabilities)
 129     {
 130       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 131         return false;
 132       if (node->frequency == NODE_FREQUENCY_HOT)
 133         return true;
 134     }
 135   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 136     return true;
 137   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 138       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 139     return false;
 140   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 141     return false;
 142   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 143               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 144     return false;
 145   return true;
 146 }
 147
 148 static gcov_type min_count = -1;
 149
 150 /* Determine the threshold for hot BB counts.  */
 151
 152 gcov_type
 153 get_hot_bb_threshold ()
 154 {
 155   gcov_working_set_t *ws;
 156   if (min_count == -1)
 157     {
 158       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 159       gcc_assert (ws);
 160       min_count = ws->min_counter;
 161     }
 162   return min_count;
 163 }
 164
 165 /* Set the threshold for hot BB counts.  */
 166
 167 void
 168 set_hot_bb_threshold (gcov_type min)
 169 {
 170   min_count = min;
 171 }
 172
 173 /* Return TRUE if frequency FREQ is considered to be hot.  */
 174
 175 bool
 176 maybe_hot_count_p (struct function *fun, gcov_type count)
 177 {
 178   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 179     return true;
 180   /* Code executed at most once is not hot.  */
 181   if (profile_info->runs >= count)
 182     return false;
 183   return (count >= get_hot_bb_threshold ());
 184 }
 185
 186 /* Return true in case BB can be CPU intensive and should be optimized
 187    for maximal performance.  */
 188
 189 bool
 190 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 191 {
 192   gcc_checking_assert (fun);
 193   if (profile_status_for_fn (fun) == PROFILE_READ)
 194     return maybe_hot_count_p (fun, bb->count);
 195   return maybe_hot_frequency_p (fun, bb->frequency);
 196 }
 197
 198 /* Return true in case BB can be CPU intensive and should be optimized
 199    for maximal performance.  */
 200
 201 bool
 202 maybe_hot_edge_p (edge e)
 203 {
 204   if (profile_status_for_fn (cfun) == PROFILE_READ)
 205     return maybe_hot_count_p (cfun, e->count);
 206   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 207 }
 208
 209 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 210    node frequency reflects never being executed.  */
 211
 212 static bool
 213 probably_never_executed (struct function *fun,
 214                          gcov_type count, int frequency)
 215 {
 216   gcc_checking_assert (fun);
 217   if (profile_status_for_fn (cfun) == PROFILE_READ)
 218     {
 219       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 220       if (count * unlikely_count_fraction >= profile_info->runs)
 221         return false;
 222       if (!frequency)
 223         return true;
 224       if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
 225         return false;
 226       if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
 227         {
 228           gcov_type computed_count;
 229           /* Check for possibility of overflow, in which case entry bb count
 230              is large enough to do the division first without losing much
 231              precision.  */
 232           if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
 233               REG_BR_PROB_BASE)
 234             {
 235               gcov_type scaled_count
 236                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
 237              unlikely_count_fraction;
 238               computed_count = RDIV (scaled_count,
 239                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 240             }
 241           else
 242             {
 243               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
 244                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 245               computed_count *= frequency * unlikely_count_fraction;
 246             }
 247           if (computed_count >= profile_info->runs)
 248             return false;
 249         }
 250       return true;
 251     }
 252   if ((!profile_info || !flag_branch_probabilities)
 253       && (cgraph_node::get (fun->decl)->frequency
 254           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 255     return true;
 256   return false;
 257 }
 258
 259
 260 /* Return true in case BB is probably never executed.  */
 261
 262 bool
 263 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 264 {
 265   return probably_never_executed (fun, bb->count, bb->frequency);
 266 }
 267
 268
 269 /* Return true in case edge E is probably never executed.  */
 270
 271 bool
 272 probably_never_executed_edge_p (struct function *fun, edge e)
 273 {
 274   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 275 }
 276
 277 /* Return true when current function should always be optimized for size.  */
 278
 279 bool
 280 optimize_function_for_size_p (struct function *fun)
 281 {
 282   if (optimize_size)
 283     return true;
 284   if (!fun || !fun->decl)
 285     return false;
 286
 287   cgraph_node *n = cgraph_node::get (fun->decl);
 288   return n && n->optimize_for_size_p ();
 289 }
 290
 291 /* Return true when current function should always be optimized for speed.  */
 292
 293 bool
 294 optimize_function_for_speed_p (struct function *fun)
 295 {
 296   return !optimize_function_for_size_p (fun);
 297 }
 298
 299 /* Return TRUE when BB should be optimized for size.  */
 300
 301 bool
 302 optimize_bb_for_size_p (const_basic_block bb)
 303 {
 304   return (optimize_function_for_size_p (cfun)
 305           || (bb && !maybe_hot_bb_p (cfun, bb)));
 306 }
 307
 308 /* Return TRUE when BB should be optimized for speed.  */
 309
 310 bool
 311 optimize_bb_for_speed_p (const_basic_block bb)
 312 {
 313   return !optimize_bb_for_size_p (bb);
 314 }
 315
 316 /* Return TRUE when BB should be optimized for size.  */
 317
 318 bool
 319 optimize_edge_for_size_p (edge e)
 320 {
 321   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 322 }
 323
 324 /* Return TRUE when BB should be optimized for speed.  */
 325
 326 bool
 327 optimize_edge_for_speed_p (edge e)
 328 {
 329   return !optimize_edge_for_size_p (e);
 330 }
 331
 332 /* Return TRUE when BB should be optimized for size.  */
 333
 334 bool
 335 optimize_insn_for_size_p (void)
 336 {
 337   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 338 }
 339
 340 /* Return TRUE when BB should be optimized for speed.  */
 341
 342 bool
 343 optimize_insn_for_speed_p (void)
 344 {
 345   return !optimize_insn_for_size_p ();
 346 }
 347
 348 /* Return TRUE when LOOP should be optimized for size.  */
 349
 350 bool
 351 optimize_loop_for_size_p (struct loop *loop)
 352 {
 353   return optimize_bb_for_size_p (loop->header);
 354 }
 355
 356 /* Return TRUE when LOOP should be optimized for speed.  */
 357
 358 bool
 359 optimize_loop_for_speed_p (struct loop *loop)
 360 {
 361   return optimize_bb_for_speed_p (loop->header);
 362 }
 363
 364 /* Return TRUE when LOOP nest should be optimized for speed.  */
 365
 366 bool
 367 optimize_loop_nest_for_speed_p (struct loop *loop)
 368 {
 369   struct loop *l = loop;
 370   if (optimize_loop_for_speed_p (loop))
 371     return true;
 372   l = loop->inner;
 373   while (l && l != loop)
 374     {
 375       if (optimize_loop_for_speed_p (l))
 376         return true;
 377       if (l->inner)
 378         l = l->inner;
 379       else if (l->next)
 380         l = l->next;
 381       else
 382         {
 383           while (l != loop && !l->next)
 384             l = loop_outer (l);
 385           if (l != loop)
 386             l = l->next;
 387         }
 388     }
 389   return false;
 390 }
 391
 392 /* Return TRUE when LOOP nest should be optimized for size.  */
 393
 394 bool
 395 optimize_loop_nest_for_size_p (struct loop *loop)
 396 {
 397   return !optimize_loop_nest_for_speed_p (loop);
 398 }
 399
 400 /* Return true when edge E is likely to be well predictable by branch
 401    predictor.  */
 402
 403 bool
 404 predictable_edge_p (edge e)
 405 {
 406   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 407     return false;
 408   if ((e->probability
 409        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 410       || (REG_BR_PROB_BASE - e->probability
 411           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 412     return true;
 413   return false;
 414 }
 415
 416
 417 /* Set RTL expansion for BB profile.  */
 418
 419 void
 420 rtl_profile_for_bb (basic_block bb)
 421 {
 422   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 423 }
 424
 425 /* Set RTL expansion for edge profile.  */
 426
 427 void
 428 rtl_profile_for_edge (edge e)
 429 {
 430   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 431 }
 432
 433 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 434 void
 435 default_rtl_profile (void)
 436 {
 437   crtl->maybe_hot_insn_p = true;
 438 }
 439
 440 /* Return true if the one of outgoing edges is already predicted by
 441    PREDICTOR.  */
 442
 443 bool
 444 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 445 {
 446   rtx note;
 447   if (!INSN_P (BB_END (bb)))
 448     return false;
 449   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 450     if (REG_NOTE_KIND (note) == REG_BR_PRED
 451         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 452       return true;
 453   return false;
 454 }
 455
 456 /*  Structure representing predictions in tree level. */
 457
 458 struct edge_prediction {
 459     struct edge_prediction *ep_next;
 460     edge ep_edge;
 461     enum br_predictor ep_predictor;
 462     int ep_probability;
 463 };
 464
 465 /* This map contains for a basic block the list of predictions for the
 466    outgoing edges.  */
 467
 468 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 469
 470 /* Return true if the one of outgoing edges is already predicted by
 471    PREDICTOR.  */
 472
 473 bool
 474 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 475 {
 476   struct edge_prediction *i;
 477   edge_prediction **preds = bb_predictions->get (bb);
 478
 479   if (!preds)
 480     return false;
 481
 482   for (i = *preds; i; i = i->ep_next)
 483     if (i->ep_predictor == predictor)
 484       return true;
 485   return false;
 486 }
 487
 488 /* Return true when the probability of edge is reliable.
 489
 490    The profile guessing code is good at predicting branch outcome (ie.
 491    taken/not taken), that is predicted right slightly over 75% of time.
 492    It is however notoriously poor on predicting the probability itself.
 493    In general the profile appear a lot flatter (with probabilities closer
 494    to 50%) than the reality so it is bad idea to use it to drive optimization
 495    such as those disabling dynamic branch prediction for well predictable
 496    branches.
 497
 498    There are two exceptions - edges leading to noreturn edges and edges
 499    predicted by number of iterations heuristics are predicted well.  This macro
 500    should be able to distinguish those, but at the moment it simply check for
 501    noreturn heuristic that is only one giving probability over 99% or bellow
 502    1%.  In future we might want to propagate reliability information across the
 503    CFG if we find this information useful on multiple places.   */
 504 static bool
 505 probability_reliable_p (int prob)
 506 {
 507   return (profile_status_for_fn (cfun) == PROFILE_READ
 508           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 509               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 510 }
 511
 512 /* Same predicate as above, working on edges.  */
 513 bool
 514 edge_probability_reliable_p (const_edge e)
 515 {
 516   return probability_reliable_p (e->probability);
 517 }
 518
 519 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 520 bool
 521 br_prob_note_reliable_p (const_rtx note)
 522 {
 523   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 524   return probability_reliable_p (XINT (note, 0));
 525 }
 526
 527 static void
 528 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 529 {
 530   gcc_assert (any_condjump_p (insn));
 531   if (!flag_guess_branch_prob)
 532     return;
 533
 534   add_reg_note (insn, REG_BR_PRED,
 535                 gen_rtx_CONCAT (VOIDmode,
 536                                 GEN_INT ((int) predictor),
 537                                 GEN_INT ((int) probability)));
 538 }
 539
 540 /* Predict insn by given predictor.  */
 541
 542 void
 543 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 544                   enum prediction taken)
 545 {
 546    int probability = predictor_info[(int) predictor].hitrate;
 547
 548    if (taken != TAKEN)
 549      probability = REG_BR_PROB_BASE - probability;
 550
 551    predict_insn (insn, predictor, probability);
 552 }
 553
 554 /* Predict edge E with given probability if possible.  */
 555
 556 void
 557 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 558 {
 559   rtx_insn *last_insn;
 560   last_insn = BB_END (e->src);
 561
 562   /* We can store the branch prediction information only about
 563      conditional jumps.  */
 564   if (!any_condjump_p (last_insn))
 565     return;
 566
 567   /* We always store probability of branching.  */
 568   if (e->flags & EDGE_FALLTHRU)
 569     probability = REG_BR_PROB_BASE - probability;
 570
 571   predict_insn (last_insn, predictor, probability);
 572 }
 573
 574 /* Predict edge E with the given PROBABILITY.  */
 575 void
 576 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 577 {
 578   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 579   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 580        1)
 581       && flag_guess_branch_prob && optimize)
 582     {
 583       struct edge_prediction *i = XNEW (struct edge_prediction);
 584       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 585
 586       i->ep_next = preds;
 587       preds = i;
 588       i->ep_probability = probability;
 589       i->ep_predictor = predictor;
 590       i->ep_edge = e;
 591     }
 592 }
 593
 594 /* Remove all predictions on given basic block that are attached
 595    to edge E.  */
 596 void
 597 remove_predictions_associated_with_edge (edge e)
 598 {
 599   if (!bb_predictions)
 600     return;
 601
 602   edge_prediction **preds = bb_predictions->get (e->src);
 603
 604   if (preds)
 605     {
 606       struct edge_prediction **prediction = preds;
 607       struct edge_prediction *next;
 608
 609       while (*prediction)
 610         {
 611           if ((*prediction)->ep_edge == e)
 612             {
 613               next = (*prediction)->ep_next;
 614               free (*prediction);
 615               *prediction = next;
 616             }
 617           else
 618             prediction = &((*prediction)->ep_next);
 619         }
 620     }
 621 }
 622
 623 /* Clears the list of predictions stored for BB.  */
 624
 625 static void
 626 clear_bb_predictions (basic_block bb)
 627 {
 628   edge_prediction **preds = bb_predictions->get (bb);
 629   struct edge_prediction *pred, *next;
 630
 631   if (!preds)
 632     return;
 633
 634   for (pred = *preds; pred; pred = next)
 635     {
 636       next = pred->ep_next;
 637       free (pred);
 638     }
 639   *preds = NULL;
 640 }
 641
 642 /* Return true when we can store prediction on insn INSN.
 643    At the moment we represent predictions only on conditional
 644    jumps, not at computed jump or other complicated cases.  */
 645 static bool
 646 can_predict_insn_p (const rtx_insn *insn)
 647 {
 648   return (JUMP_P (insn)
 649           && any_condjump_p (insn)
 650           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 651 }
 652
 653 /* Predict edge E by given predictor if possible.  */
 654
 655 void
 656 predict_edge_def (edge e, enum br_predictor predictor,
 657                   enum prediction taken)
 658 {
 659    int probability = predictor_info[(int) predictor].hitrate;
 660
 661    if (taken != TAKEN)
 662      probability = REG_BR_PROB_BASE - probability;
 663
 664    predict_edge (e, predictor, probability);
 665 }
 666
 667 /* Invert all branch predictions or probability notes in the INSN.  This needs
 668    to be done each time we invert the condition used by the jump.  */
 669
 670 void
 671 invert_br_probabilities (rtx insn)
 672 {
 673   rtx note;
 674
 675   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 676     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 677       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 678     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 679       XEXP (XEXP (note, 0), 1)
 680         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 681 }
 682
 683 /* Dump information about the branch prediction to the output file.  */
 684
 685 static void
 686 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 687                  basic_block bb, int used)
 688 {
 689   edge e;
 690   edge_iterator ei;
 691
 692   if (!file)
 693     return;
 694
 695   FOR_EACH_EDGE (e, ei, bb->succs)
 696     if (! (e->flags & EDGE_FALLTHRU))
 697       break;
 698
 699   fprintf (file, "  %s heuristics%s: %.1f%%",
 700            predictor_info[predictor].name,
 701            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 702
 703   if (bb->count)
 704     {
 705       fprintf (file, "  exec %"PRId64, bb->count);
 706       if (e)
 707         {
 708           fprintf (file, " hit %"PRId64, e->count);
 709           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 710         }
 711     }
 712
 713   fprintf (file, "\n");
 714 }
 715
 716 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 717    evenly and hope for the best.  */
 718 static void
 719 set_even_probabilities (basic_block bb)
 720 {
 721   int nedges = 0;
 722   edge e;
 723   edge_iterator ei;
 724
 725   FOR_EACH_EDGE (e, ei, bb->succs)
 726     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 727       nedges ++;
 728   FOR_EACH_EDGE (e, ei, bb->succs)
 729     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 730       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 731     else
 732       e->probability = 0;
 733 }
 734
 735 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 736    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 737
 738 static void
 739 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 740 {
 741   rtx prob_note;
 742   rtx *pnote;
 743   rtx note;
 744   int best_probability = PROB_EVEN;
 745   enum br_predictor best_predictor = END_PREDICTORS;
 746   int combined_probability = REG_BR_PROB_BASE / 2;
 747   int d;
 748   bool first_match = false;
 749   bool found = false;
 750
 751   if (!can_predict_insn_p (insn))
 752     {
 753       set_even_probabilities (bb);
 754       return;
 755     }
 756
 757   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 758   pnote = &REG_NOTES (insn);
 759   if (dump_file)
 760     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 761              bb->index);
 762
 763   /* We implement "first match" heuristics and use probability guessed
 764      by predictor with smallest index.  */
 765   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 766     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 767       {
 768         enum br_predictor predictor = ((enum br_predictor)
 769                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 770         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 771
 772         found = true;
 773         if (best_predictor > predictor)
 774           best_probability = probability, best_predictor = predictor;
 775
 776         d = (combined_probability * probability
 777              + (REG_BR_PROB_BASE - combined_probability)
 778              * (REG_BR_PROB_BASE - probability));
 779
 780         /* Use FP math to avoid overflows of 32bit integers.  */
 781         if (d == 0)
 782           /* If one probability is 0% and one 100%, avoid division by zero.  */
 783           combined_probability = REG_BR_PROB_BASE / 2;
 784         else
 785           combined_probability = (((double) combined_probability) * probability
 786                                   * REG_BR_PROB_BASE / d + 0.5);
 787       }
 788
 789   /* Decide which heuristic to use.  In case we didn't match anything,
 790      use no_prediction heuristic, in case we did match, use either
 791      first match or Dempster-Shaffer theory depending on the flags.  */
 792
 793   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 794     first_match = true;
 795
 796   if (!found)
 797     dump_prediction (dump_file, PRED_NO_PREDICTION,
 798                      combined_probability, bb, true);
 799   else
 800     {
 801       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 802                        bb, !first_match);
 803       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 804                        bb, first_match);
 805     }
 806
 807   if (first_match)
 808     combined_probability = best_probability;
 809   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 810
 811   while (*pnote)
 812     {
 813       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 814         {
 815           enum br_predictor predictor = ((enum br_predictor)
 816                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 817           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 818
 819           dump_prediction (dump_file, predictor, probability, bb,
 820                            !first_match || best_predictor == predictor);
 821           *pnote = XEXP (*pnote, 1);
 822         }
 823       else
 824         pnote = &XEXP (*pnote, 1);
 825     }
 826
 827   if (!prob_note)
 828     {
 829       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 830
 831       /* Save the prediction into CFG in case we are seeing non-degenerated
 832          conditional jump.  */
 833       if (!single_succ_p (bb))
 834         {
 835           BRANCH_EDGE (bb)->probability = combined_probability;
 836           FALLTHRU_EDGE (bb)->probability
 837             = REG_BR_PROB_BASE - combined_probability;
 838         }
 839     }
 840   else if (!single_succ_p (bb))
 841     {
 842       int prob = XINT (prob_note, 0);
 843
 844       BRANCH_EDGE (bb)->probability = prob;
 845       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 846     }
 847   else
 848     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 849 }
 850
 851 /* Combine predictions into single probability and store them into CFG.
 852    Remove now useless prediction entries.  */
 853
 854 static void
 855 combine_predictions_for_bb (basic_block bb)
 856 {
 857   int best_probability = PROB_EVEN;
 858   enum br_predictor best_predictor = END_PREDICTORS;
 859   int combined_probability = REG_BR_PROB_BASE / 2;
 860   int d;
 861   bool first_match = false;
 862   bool found = false;
 863   struct edge_prediction *pred;
 864   int nedges = 0;
 865   edge e, first = NULL, second = NULL;
 866   edge_iterator ei;
 867
 868   FOR_EACH_EDGE (e, ei, bb->succs)
 869     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 870       {
 871         nedges ++;
 872         if (first && !second)
 873           second = e;
 874         if (!first)
 875           first = e;
 876       }
 877
 878   /* When there is no successor or only one choice, prediction is easy.
 879
 880      We are lazy for now and predict only basic blocks with two outgoing
 881      edges.  It is possible to predict generic case too, but we have to
 882      ignore first match heuristics and do more involved combining.  Implement
 883      this later.  */
 884   if (nedges != 2)
 885     {
 886       if (!bb->count)
 887         set_even_probabilities (bb);
 888       clear_bb_predictions (bb);
 889       if (dump_file)
 890         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 891                  nedges, bb->index);
 892       return;
 893     }
 894
 895   if (dump_file)
 896     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 897
 898   edge_prediction **preds = bb_predictions->get (bb);
 899   if (preds)
 900     {
 901       /* We implement "first match" heuristics and use probability guessed
 902          by predictor with smallest index.  */
 903       for (pred = *preds; pred; pred = pred->ep_next)
 904         {
 905           enum br_predictor predictor = pred->ep_predictor;
 906           int probability = pred->ep_probability;
 907
 908           if (pred->ep_edge != first)
 909             probability = REG_BR_PROB_BASE - probability;
 910
 911           found = true;
 912           /* First match heuristics would be widly confused if we predicted
 913              both directions.  */
 914           if (best_predictor > predictor)
 915             {
 916               struct edge_prediction *pred2;
 917               int prob = probability;
 918
 919               for (pred2 = (struct edge_prediction *) *preds;
 920                    pred2; pred2 = pred2->ep_next)
 921                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 922                  {
 923                    int probability2 = pred->ep_probability;
 924
 925                    if (pred2->ep_edge != first)
 926                      probability2 = REG_BR_PROB_BASE - probability2;
 927
 928                    if ((probability < REG_BR_PROB_BASE / 2) !=
 929                        (probability2 < REG_BR_PROB_BASE / 2))
 930                      break;
 931
 932                    /* If the same predictor later gave better result, go for it! */
 933                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 934                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 935                      prob = probability2;
 936                  }
 937               if (!pred2)
 938                 best_probability = prob, best_predictor = predictor;
 939             }
 940
 941           d = (combined_probability * probability
 942                + (REG_BR_PROB_BASE - combined_probability)
 943                * (REG_BR_PROB_BASE - probability));
 944
 945           /* Use FP math to avoid overflows of 32bit integers.  */
 946           if (d == 0)
 947             /* If one probability is 0% and one 100%, avoid division by zero.  */
 948             combined_probability = REG_BR_PROB_BASE / 2;
 949           else
 950             combined_probability = (((double) combined_probability)
 951                                     * probability
 952                                     * REG_BR_PROB_BASE / d + 0.5);
 953         }
 954     }
 955
 956   /* Decide which heuristic to use.  In case we didn't match anything,
 957      use no_prediction heuristic, in case we did match, use either
 958      first match or Dempster-Shaffer theory depending on the flags.  */
 959
 960   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 961     first_match = true;
 962
 963   if (!found)
 964     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 965   else
 966     {
 967       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 968                        !first_match);
 969       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 970                        first_match);
 971     }
 972
 973   if (first_match)
 974     combined_probability = best_probability;
 975   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 976
 977   if (preds)
 978     {
 979       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 980         {
 981           enum br_predictor predictor = pred->ep_predictor;
 982           int probability = pred->ep_probability;
 983
 984           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 985             probability = REG_BR_PROB_BASE - probability;
 986           dump_prediction (dump_file, predictor, probability, bb,
 987                            !first_match || best_predictor == predictor);
 988         }
 989     }
 990   clear_bb_predictions (bb);
 991
 992   if (!bb->count)
 993     {
 994       first->probability = combined_probability;
 995       second->probability = REG_BR_PROB_BASE - combined_probability;
 996     }
 997 }
 998
 999 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1000    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1001
1002    T1 and T2 should be one of the following cases:
1003      1. T1 is SSA_NAME, T2 is NULL
1004      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1005      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1006
1007 static tree
1008 strips_small_constant (tree t1, tree t2)
1009 {
1010   tree ret = NULL;
1011   int value = 0;
1012
1013   if (!t1)
1014     return NULL;
1015   else if (TREE_CODE (t1) == SSA_NAME)
1016     ret = t1;
1017   else if (tree_fits_shwi_p (t1))
1018     value = tree_to_shwi (t1);
1019   else
1020     return NULL;
1021
1022   if (!t2)
1023     return ret;
1024   else if (tree_fits_shwi_p (t2))
1025     value = tree_to_shwi (t2);
1026   else if (TREE_CODE (t2) == SSA_NAME)
1027     {
1028       if (ret)
1029         return NULL;
1030       else
1031         ret = t2;
1032     }
1033
1034   if (value <= 4 && value >= -4)
1035     return ret;
1036   else
1037     return NULL;
1038 }
1039
1040 /* Return the SSA_NAME in T or T's operands.
1041    Return NULL if SSA_NAME cannot be found.  */
1042
1043 static tree
1044 get_base_value (tree t)
1045 {
1046   if (TREE_CODE (t) == SSA_NAME)
1047     return t;
1048
1049   if (!BINARY_CLASS_P (t))
1050     return NULL;
1051
1052   switch (TREE_OPERAND_LENGTH (t))
1053     {
1054     case 1:
1055       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1056     case 2:
1057       return strips_small_constant (TREE_OPERAND (t, 0),
1058                                     TREE_OPERAND (t, 1));
1059     default:
1060       return NULL;
1061     }
1062 }
1063
1064 /* Check the compare STMT in LOOP. If it compares an induction
1065    variable to a loop invariant, return true, and save
1066    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1067    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1068
1069 static bool
1070 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1071                                      tree *loop_invariant,
1072                                      enum tree_code *compare_code,
1073                                      tree *loop_step,
1074                                      tree *loop_iv_base)
1075 {
1076   tree op0, op1, bound, base;
1077   affine_iv iv0, iv1;
1078   enum tree_code code;
1079   tree step;
1080
1081   code = gimple_cond_code (stmt);
1082   *loop_invariant = NULL;
1083
1084   switch (code)
1085     {
1086     case GT_EXPR:
1087     case GE_EXPR:
1088     case NE_EXPR:
1089     case LT_EXPR:
1090     case LE_EXPR:
1091     case EQ_EXPR:
1092       break;
1093
1094     default:
1095       return false;
1096     }
1097
1098   op0 = gimple_cond_lhs (stmt);
1099   op1 = gimple_cond_rhs (stmt);
1100
1101   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1102        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1103     return false;
1104   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1105     return false;
1106   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1107     return false;
1108   if (TREE_CODE (iv0.step) != INTEGER_CST
1109       || TREE_CODE (iv1.step) != INTEGER_CST)
1110     return false;
1111   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1112       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1113     return false;
1114
1115   if (integer_zerop (iv0.step))
1116     {
1117       if (code != NE_EXPR && code != EQ_EXPR)
1118         code = invert_tree_comparison (code, false);
1119       bound = iv0.base;
1120       base = iv1.base;
1121       if (tree_fits_shwi_p (iv1.step))
1122         step = iv1.step;
1123       else
1124         return false;
1125     }
1126   else
1127     {
1128       bound = iv1.base;
1129       base = iv0.base;
1130       if (tree_fits_shwi_p (iv0.step))
1131         step = iv0.step;
1132       else
1133         return false;
1134     }
1135
1136   if (TREE_CODE (bound) != INTEGER_CST)
1137     bound = get_base_value (bound);
1138   if (!bound)
1139     return false;
1140   if (TREE_CODE (base) != INTEGER_CST)
1141     base = get_base_value (base);
1142   if (!base)
1143     return false;
1144
1145   *loop_invariant = bound;
1146   *compare_code = code;
1147   *loop_step = step;
1148   *loop_iv_base = base;
1149   return true;
1150 }
1151
1152 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1153
1154 static bool
1155 expr_coherent_p (tree t1, tree t2)
1156 {
1157   gimple stmt;
1158   tree ssa_name_1 = NULL;
1159   tree ssa_name_2 = NULL;
1160
1161   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1162   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1163
1164   if (t1 == t2)
1165     return true;
1166
1167   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1168     return true;
1169   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1170     return false;
1171
1172   /* Check to see if t1 is expressed/defined with t2.  */
1173   stmt = SSA_NAME_DEF_STMT (t1);
1174   gcc_assert (stmt != NULL);
1175   if (is_gimple_assign (stmt))
1176     {
1177       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1178       if (ssa_name_1 && ssa_name_1 == t2)
1179         return true;
1180     }
1181
1182   /* Check to see if t2 is expressed/defined with t1.  */
1183   stmt = SSA_NAME_DEF_STMT (t2);
1184   gcc_assert (stmt != NULL);
1185   if (is_gimple_assign (stmt))
1186     {
1187       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1188       if (ssa_name_2 && ssa_name_2 == t1)
1189         return true;
1190     }
1191
1192   /* Compare if t1 and t2's def_stmts are identical.  */
1193   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1194     return true;
1195   else
1196     return false;
1197 }
1198
1199 /* Predict branch probability of BB when BB contains a branch that compares
1200    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1201    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1202
1203    E.g.
1204      for (int i = 0; i < bound; i++) {
1205        if (i < bound - 2)
1206          computation_1();
1207        else
1208          computation_2();
1209      }
1210
1211   In this loop, we will predict the branch inside the loop to be taken.  */
1212
1213 static void
1214 predict_iv_comparison (struct loop *loop, basic_block bb,
1215                        tree loop_bound_var,
1216                        tree loop_iv_base_var,
1217                        enum tree_code loop_bound_code,
1218                        int loop_bound_step)
1219 {
1220   gimple stmt;
1221   tree compare_var, compare_base;
1222   enum tree_code compare_code;
1223   tree compare_step_var;
1224   edge then_edge;
1225   edge_iterator ei;
1226
1227   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1228       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1229       || predicted_by_p (bb, PRED_LOOP_EXIT))
1230     return;
1231
1232   stmt = last_stmt (bb);
1233   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1234     return;
1235   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1236                                             loop, &compare_var,
1237                                             &compare_code,
1238                                             &compare_step_var,
1239                                             &compare_base))
1240     return;
1241
1242   /* Find the taken edge.  */
1243   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1244     if (then_edge->flags & EDGE_TRUE_VALUE)
1245       break;
1246
1247   /* When comparing an IV to a loop invariant, NE is more likely to be
1248      taken while EQ is more likely to be not-taken.  */
1249   if (compare_code == NE_EXPR)
1250     {
1251       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1252       return;
1253     }
1254   else if (compare_code == EQ_EXPR)
1255     {
1256       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1257       return;
1258     }
1259
1260   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1261     return;
1262
1263   /* If loop bound, base and compare bound are all constants, we can
1264      calculate the probability directly.  */
1265   if (tree_fits_shwi_p (loop_bound_var)
1266       && tree_fits_shwi_p (compare_var)
1267       && tree_fits_shwi_p (compare_base))
1268     {
1269       int probability;
1270       bool overflow, overall_overflow = false;
1271       widest_int compare_count, tem;
1272
1273       /* (loop_bound - base) / compare_step */
1274       tem = wi::sub (wi::to_widest (loop_bound_var),
1275                      wi::to_widest (compare_base), SIGNED, &overflow);
1276       overall_overflow |= overflow;
1277       widest_int loop_count = wi::div_trunc (tem,
1278                                              wi::to_widest (compare_step_var),
1279                                              SIGNED, &overflow);
1280       overall_overflow |= overflow;
1281
1282       if (!wi::neg_p (wi::to_widest (compare_step_var))
1283           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1284         {
1285           /* (loop_bound - compare_bound) / compare_step */
1286           tem = wi::sub (wi::to_widest (loop_bound_var),
1287                          wi::to_widest (compare_var), SIGNED, &overflow);
1288           overall_overflow |= overflow;
1289           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1290                                          SIGNED, &overflow);
1291           overall_overflow |= overflow;
1292         }
1293       else
1294         {
1295           /* (compare_bound - base) / compare_step */
1296           tem = wi::sub (wi::to_widest (compare_var),
1297                          wi::to_widest (compare_base), SIGNED, &overflow);
1298           overall_overflow |= overflow;
1299           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1300                                          SIGNED, &overflow);
1301           overall_overflow |= overflow;
1302         }
1303       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1304         ++compare_count;
1305       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1306         ++loop_count;
1307       if (wi::neg_p (compare_count))
1308         compare_count = 0;
1309       if (wi::neg_p (loop_count))
1310         loop_count = 0;
1311       if (loop_count == 0)
1312         probability = 0;
1313       else if (wi::cmps (compare_count, loop_count) == 1)
1314         probability = REG_BR_PROB_BASE;
1315       else
1316         {
1317           tem = compare_count * REG_BR_PROB_BASE;
1318           tem = wi::udiv_trunc (tem, loop_count);
1319           probability = tem.to_uhwi ();
1320         }
1321
1322       if (!overall_overflow)
1323         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1324
1325       return;
1326     }
1327
1328   if (expr_coherent_p (loop_bound_var, compare_var))
1329     {
1330       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1331           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1332         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1333       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1334                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1335         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1336       else if (loop_bound_code == NE_EXPR)
1337         {
1338           /* If the loop backedge condition is "(i != bound)", we do
1339              the comparison based on the step of IV:
1340              * step < 0 : backedge condition is like (i > bound)
1341              * step > 0 : backedge condition is like (i < bound)  */
1342           gcc_assert (loop_bound_step != 0);
1343           if (loop_bound_step > 0
1344               && (compare_code == LT_EXPR
1345                   || compare_code == LE_EXPR))
1346             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1347           else if (loop_bound_step < 0
1348                    && (compare_code == GT_EXPR
1349                        || compare_code == GE_EXPR))
1350             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1351           else
1352             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1353         }
1354       else
1355         /* The branch is predicted not-taken if loop_bound_code is
1356            opposite with compare_code.  */
1357         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1358     }
1359   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1360     {
1361       /* For cases like:
1362            for (i = s; i < h; i++)
1363              if (i > s + 2) ....
1364          The branch should be predicted taken.  */
1365       if (loop_bound_step > 0
1366           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1367         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1368       else if (loop_bound_step < 0
1369                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1370         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1371       else
1372         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1373     }
1374 }
1375
1376 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1377    exits are resulted from short-circuit conditions that will generate an
1378    if_tmp. E.g.:
1379
1380    if (foo() || global > 10)
1381      break;
1382
1383    This will be translated into:
1384
1385    BB3:
1386      loop header...
1387    BB4:
1388      if foo() goto BB6 else goto BB5
1389    BB5:
1390      if global > 10 goto BB6 else goto BB7
1391    BB6:
1392      goto BB7
1393    BB7:
1394      iftmp = (PHI 0(BB5), 1(BB6))
1395      if iftmp == 1 goto BB8 else goto BB3
1396    BB8:
1397      outside of the loop...
1398
1399    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1400    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1401    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1402    exits to predict them using PRED_LOOP_EXIT.  */
1403
1404 static void
1405 predict_extra_loop_exits (edge exit_edge)
1406 {
1407   unsigned i;
1408   bool check_value_one;
1409   gimple lhs_def_stmt;
1410   gphi *phi_stmt;
1411   tree cmp_rhs, cmp_lhs;
1412   gimple last;
1413   gcond *cmp_stmt;
1414
1415   last = last_stmt (exit_edge->src);
1416   if (!last)
1417     return;
1418   cmp_stmt = dyn_cast <gcond *> (last);
1419   if (!cmp_stmt)
1420     return;
1421
1422   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1423   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1424   if (!TREE_CONSTANT (cmp_rhs)
1425       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1426     return;
1427   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1428     return;
1429
1430   /* If check_value_one is true, only the phi_args with value '1' will lead
1431      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1432      loop exit.  */
1433   check_value_one = (((integer_onep (cmp_rhs))
1434                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1435                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1436
1437   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1438   if (!lhs_def_stmt)
1439     return;
1440
1441   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1442   if (!phi_stmt)
1443     return;
1444
1445   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1446     {
1447       edge e1;
1448       edge_iterator ei;
1449       tree val = gimple_phi_arg_def (phi_stmt, i);
1450       edge e = gimple_phi_arg_edge (phi_stmt, i);
1451
1452       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1453         continue;
1454       if ((check_value_one ^ integer_onep (val)) == 1)
1455         continue;
1456       if (EDGE_COUNT (e->src->succs) != 1)
1457         {
1458           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1459           continue;
1460         }
1461
1462       FOR_EACH_EDGE (e1, ei, e->src->preds)
1463         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1464     }
1465 }
1466
1467 /* Predict edge probabilities by exploiting loop structure.  */
1468
1469 static void
1470 predict_loops (void)
1471 {
1472   struct loop *loop;
1473
1474   /* Try to predict out blocks in a loop that are not part of a
1475      natural loop.  */
1476   FOR_EACH_LOOP (loop, 0)
1477     {
1478       basic_block bb, *bbs;
1479       unsigned j, n_exits;
1480       vec<edge> exits;
1481       struct tree_niter_desc niter_desc;
1482       edge ex;
1483       struct nb_iter_bound *nb_iter;
1484       enum tree_code loop_bound_code = ERROR_MARK;
1485       tree loop_bound_step = NULL;
1486       tree loop_bound_var = NULL;
1487       tree loop_iv_base = NULL;
1488       gcond *stmt = NULL;
1489
1490       exits = get_loop_exit_edges (loop);
1491       n_exits = exits.length ();
1492       if (!n_exits)
1493         {
1494           exits.release ();
1495           continue;
1496         }
1497
1498       FOR_EACH_VEC_ELT (exits, j, ex)
1499         {
1500           tree niter = NULL;
1501           HOST_WIDE_INT nitercst;
1502           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1503           int probability;
1504           enum br_predictor predictor;
1505
1506           predict_extra_loop_exits (ex);
1507
1508           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1509             niter = niter_desc.niter;
1510           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1511             niter = loop_niter_by_eval (loop, ex);
1512
1513           if (TREE_CODE (niter) == INTEGER_CST)
1514             {
1515               if (tree_fits_uhwi_p (niter)
1516                   && max
1517                   && compare_tree_int (niter, max - 1) == -1)
1518                 nitercst = tree_to_uhwi (niter) + 1;
1519               else
1520                 nitercst = max;
1521               predictor = PRED_LOOP_ITERATIONS;
1522             }
1523           /* If we have just one exit and we can derive some information about
1524              the number of iterations of the loop from the statements inside
1525              the loop, use it to predict this exit.  */
1526           else if (n_exits == 1)
1527             {
1528               nitercst = estimated_stmt_executions_int (loop);
1529               if (nitercst < 0)
1530                 continue;
1531               if (nitercst > max)
1532                 nitercst = max;
1533
1534               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1535             }
1536           else
1537             continue;
1538
1539           /* If the prediction for number of iterations is zero, do not
1540              predict the exit edges.  */
1541           if (nitercst == 0)
1542             continue;
1543
1544           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1545           predict_edge (ex, predictor, probability);
1546         }
1547       exits.release ();
1548
1549       /* Find information about loop bound variables.  */
1550       for (nb_iter = loop->bounds; nb_iter;
1551            nb_iter = nb_iter->next)
1552         if (nb_iter->stmt
1553             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1554           {
1555             stmt = as_a <gcond *> (nb_iter->stmt);
1556             break;
1557           }
1558       if (!stmt && last_stmt (loop->header)
1559           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1560         stmt = as_a <gcond *> (last_stmt (loop->header));
1561       if (stmt)
1562         is_comparison_with_loop_invariant_p (stmt, loop,
1563                                              &loop_bound_var,
1564                                              &loop_bound_code,
1565                                              &loop_bound_step,
1566                                              &loop_iv_base);
1567
1568       bbs = get_loop_body (loop);
1569
1570       for (j = 0; j < loop->num_nodes; j++)
1571         {
1572           int header_found = 0;
1573           edge e;
1574           edge_iterator ei;
1575
1576           bb = bbs[j];
1577
1578           /* Bypass loop heuristics on continue statement.  These
1579              statements construct loops via "non-loop" constructs
1580              in the source language and are better to be handled
1581              separately.  */
1582           if (predicted_by_p (bb, PRED_CONTINUE))
1583             continue;
1584
1585           /* Loop branch heuristics - predict an edge back to a
1586              loop's head as taken.  */
1587           if (bb == loop->latch)
1588             {
1589               e = find_edge (loop->latch, loop->header);
1590               if (e)
1591                 {
1592                   header_found = 1;
1593                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1594                 }
1595             }
1596
1597           /* Loop exit heuristics - predict an edge exiting the loop if the
1598              conditional has no loop header successors as not taken.  */
1599           if (!header_found
1600               /* If we already used more reliable loop exit predictors, do not
1601                  bother with PRED_LOOP_EXIT.  */
1602               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1603               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1604             {
1605               /* For loop with many exits we don't want to predict all exits
1606                  with the pretty large probability, because if all exits are
1607                  considered in row, the loop would be predicted to iterate
1608                  almost never.  The code to divide probability by number of
1609                  exits is very rough.  It should compute the number of exits
1610                  taken in each patch through function (not the overall number
1611                  of exits that might be a lot higher for loops with wide switch
1612                  statements in them) and compute n-th square root.
1613
1614                  We limit the minimal probability by 2% to avoid
1615                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1616                  as this was causing regression in perl benchmark containing such
1617                  a wide loop.  */
1618
1619               int probability = ((REG_BR_PROB_BASE
1620                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1621                                  / n_exits);
1622               if (probability < HITRATE (2))
1623                 probability = HITRATE (2);
1624               FOR_EACH_EDGE (e, ei, bb->succs)
1625                 if (e->dest->index < NUM_FIXED_BLOCKS
1626                     || !flow_bb_inside_loop_p (loop, e->dest))
1627                   predict_edge (e, PRED_LOOP_EXIT, probability);
1628             }
1629           if (loop_bound_var)
1630             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1631                                    loop_bound_code,
1632                                    tree_to_shwi (loop_bound_step));
1633         }
1634
1635       /* Free basic blocks from get_loop_body.  */
1636       free (bbs);
1637     }
1638 }
1639
1640 /* Attempt to predict probabilities of BB outgoing edges using local
1641    properties.  */
1642 static void
1643 bb_estimate_probability_locally (basic_block bb)
1644 {
1645   rtx_insn *last_insn = BB_END (bb);
1646   rtx cond;
1647
1648   if (! can_predict_insn_p (last_insn))
1649     return;
1650   cond = get_condition (last_insn, NULL, false, false);
1651   if (! cond)
1652     return;
1653
1654   /* Try "pointer heuristic."
1655      A comparison ptr == 0 is predicted as false.
1656      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1657   if (COMPARISON_P (cond)
1658       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1659           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1660     {
1661       if (GET_CODE (cond) == EQ)
1662         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1663       else if (GET_CODE (cond) == NE)
1664         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1665     }
1666   else
1667
1668   /* Try "opcode heuristic."
1669      EQ tests are usually false and NE tests are usually true. Also,
1670      most quantities are positive, so we can make the appropriate guesses
1671      about signed comparisons against zero.  */
1672     switch (GET_CODE (cond))
1673       {
1674       case CONST_INT:
1675         /* Unconditional branch.  */
1676         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1677                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1678         break;
1679
1680       case EQ:
1681       case UNEQ:
1682         /* Floating point comparisons appears to behave in a very
1683            unpredictable way because of special role of = tests in
1684            FP code.  */
1685         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1686           ;
1687         /* Comparisons with 0 are often used for booleans and there is
1688            nothing useful to predict about them.  */
1689         else if (XEXP (cond, 1) == const0_rtx
1690                  || XEXP (cond, 0) == const0_rtx)
1691           ;
1692         else
1693           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1694         break;
1695
1696       case NE:
1697       case LTGT:
1698         /* Floating point comparisons appears to behave in a very
1699            unpredictable way because of special role of = tests in
1700            FP code.  */
1701         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1702           ;
1703         /* Comparisons with 0 are often used for booleans and there is
1704            nothing useful to predict about them.  */
1705         else if (XEXP (cond, 1) == const0_rtx
1706                  || XEXP (cond, 0) == const0_rtx)
1707           ;
1708         else
1709           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1710         break;
1711
1712       case ORDERED:
1713         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1714         break;
1715
1716       case UNORDERED:
1717         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1718         break;
1719
1720       case LE:
1721       case LT:
1722         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1723             || XEXP (cond, 1) == constm1_rtx)
1724           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1725         break;
1726
1727       case GE:
1728       case GT:
1729         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1730             || XEXP (cond, 1) == constm1_rtx)
1731           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1732         break;
1733
1734       default:
1735         break;
1736       }
1737 }
1738
1739 /* Set edge->probability for each successor edge of BB.  */
1740 void
1741 guess_outgoing_edge_probabilities (basic_block bb)
1742 {
1743   bb_estimate_probability_locally (bb);
1744   combine_predictions_for_insn (BB_END (bb), bb);
1745 }
1746 \f
1747 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1748
1749 /* Helper function for expr_expected_value.  */
1750
1751 static tree
1752 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1753                        tree op1, bitmap visited, enum br_predictor *predictor)
1754 {
1755   gimple def;
1756
1757   if (predictor)
1758     *predictor = PRED_UNCONDITIONAL;
1759
1760   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1761     {
1762       if (TREE_CONSTANT (op0))
1763         return op0;
1764
1765       if (code != SSA_NAME)
1766         return NULL_TREE;
1767
1768       def = SSA_NAME_DEF_STMT (op0);
1769
1770       /* If we were already here, break the infinite cycle.  */
1771       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1772         return NULL;
1773
1774       if (gimple_code (def) == GIMPLE_PHI)
1775         {
1776           /* All the arguments of the PHI node must have the same constant
1777              length.  */
1778           int i, n = gimple_phi_num_args (def);
1779           tree val = NULL, new_val;
1780
1781           for (i = 0; i < n; i++)
1782             {
1783               tree arg = PHI_ARG_DEF (def, i);
1784               enum br_predictor predictor2;
1785
1786               /* If this PHI has itself as an argument, we cannot
1787                  determine the string length of this argument.  However,
1788                  if we can find an expected constant value for the other
1789                  PHI args then we can still be sure that this is
1790                  likely a constant.  So be optimistic and just
1791                  continue with the next argument.  */
1792               if (arg == PHI_RESULT (def))
1793                 continue;
1794
1795               new_val = expr_expected_value (arg, visited, &predictor2);
1796
1797               /* It is difficult to combine value predictors.  Simply assume
1798                  that later predictor is weaker and take its prediction.  */
1799               if (predictor && *predictor < predictor2)
1800                 *predictor = predictor2;
1801               if (!new_val)
1802                 return NULL;
1803               if (!val)
1804                 val = new_val;
1805               else if (!operand_equal_p (val, new_val, false))
1806                 return NULL;
1807             }
1808           return val;
1809         }
1810       if (is_gimple_assign (def))
1811         {
1812           if (gimple_assign_lhs (def) != op0)
1813             return NULL;
1814
1815           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1816                                         gimple_assign_rhs1 (def),
1817                                         gimple_assign_rhs_code (def),
1818                                         gimple_assign_rhs2 (def),
1819                                         visited, predictor);
1820         }
1821
1822       if (is_gimple_call (def))
1823         {
1824           tree decl = gimple_call_fndecl (def);
1825           if (!decl)
1826             {
1827               if (gimple_call_internal_p (def)
1828                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1829                 {
1830                   gcc_assert (gimple_call_num_args (def) == 3);
1831                   tree val = gimple_call_arg (def, 0);
1832                   if (TREE_CONSTANT (val))
1833                     return val;
1834                   if (predictor)
1835                     {
1836                       tree val2 = gimple_call_arg (def, 2);
1837                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1838                                   && tree_fits_uhwi_p (val2)
1839                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1840                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1841                     }
1842                   return gimple_call_arg (def, 1);
1843                 }
1844               return NULL;
1845             }
1846           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1847             switch (DECL_FUNCTION_CODE (decl))
1848               {
1849               case BUILT_IN_EXPECT:
1850                 {
1851                   tree val;
1852                   if (gimple_call_num_args (def) != 2)
1853                     return NULL;
1854                   val = gimple_call_arg (def, 0);
1855                   if (TREE_CONSTANT (val))
1856                     return val;
1857                   if (predictor)
1858                     *predictor = PRED_BUILTIN_EXPECT;
1859                   return gimple_call_arg (def, 1);
1860                 }
1861
1862               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1863               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1864               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1865               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1866               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1867               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1868               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1869               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1870               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1871               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1872               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1873               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1874               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1875                 /* Assume that any given atomic operation has low contention,
1876                    and thus the compare-and-swap operation succeeds.  */
1877                 if (predictor)
1878                   *predictor = PRED_COMPARE_AND_SWAP;
1879                 return boolean_true_node;
1880               default:
1881                 break;
1882             }
1883         }
1884
1885       return NULL;
1886     }
1887
1888   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1889     {
1890       tree res;
1891       enum br_predictor predictor2;
1892       op0 = expr_expected_value (op0, visited, predictor);
1893       if (!op0)
1894         return NULL;
1895       op1 = expr_expected_value (op1, visited, &predictor2);
1896       if (predictor && *predictor < predictor2)
1897         *predictor = predictor2;
1898       if (!op1)
1899         return NULL;
1900       res = fold_build2 (code, type, op0, op1);
1901       if (TREE_CONSTANT (res))
1902         return res;
1903       return NULL;
1904     }
1905   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1906     {
1907       tree res;
1908       op0 = expr_expected_value (op0, visited, predictor);
1909       if (!op0)
1910         return NULL;
1911       res = fold_build1 (code, type, op0);
1912       if (TREE_CONSTANT (res))
1913         return res;
1914       return NULL;
1915     }
1916   return NULL;
1917 }
1918
1919 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1920    The function is used by builtin_expect branch predictor so the evidence
1921    must come from this construct and additional possible constant folding.
1922
1923    We may want to implement more involved value guess (such as value range
1924    propagation based prediction), but such tricks shall go to new
1925    implementation.  */
1926
1927 static tree
1928 expr_expected_value (tree expr, bitmap visited,
1929                      enum br_predictor *predictor)
1930 {
1931   enum tree_code code;
1932   tree op0, op1;
1933
1934   if (TREE_CONSTANT (expr))
1935     {
1936       if (predictor)
1937         *predictor = PRED_UNCONDITIONAL;
1938       return expr;
1939     }
1940
1941   extract_ops_from_tree (expr, &code, &op0, &op1);
1942   return expr_expected_value_1 (TREE_TYPE (expr),
1943                                 op0, code, op1, visited, predictor);
1944 }
1945 \f
1946 /* Predict using opcode of the last statement in basic block.  */
1947 static void
1948 tree_predict_by_opcode (basic_block bb)
1949 {
1950   gimple stmt = last_stmt (bb);
1951   edge then_edge;
1952   tree op0, op1;
1953   tree type;
1954   tree val;
1955   enum tree_code cmp;
1956   bitmap visited;
1957   edge_iterator ei;
1958   enum br_predictor predictor;
1959
1960   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1961     return;
1962   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1963     if (then_edge->flags & EDGE_TRUE_VALUE)
1964       break;
1965   op0 = gimple_cond_lhs (stmt);
1966   op1 = gimple_cond_rhs (stmt);
1967   cmp = gimple_cond_code (stmt);
1968   type = TREE_TYPE (op0);
1969   visited = BITMAP_ALLOC (NULL);
1970   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1971                                &predictor);
1972   BITMAP_FREE (visited);
1973   if (val && TREE_CODE (val) == INTEGER_CST)
1974     {
1975       if (predictor == PRED_BUILTIN_EXPECT)
1976         {
1977           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1978
1979           gcc_assert (percent >= 0 && percent <= 100);
1980           if (integer_zerop (val))
1981             percent = 100 - percent;
1982           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1983         }
1984       else
1985         predict_edge (then_edge, predictor,
1986                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1987     }
1988   /* Try "pointer heuristic."
1989      A comparison ptr == 0 is predicted as false.
1990      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1991   if (POINTER_TYPE_P (type))
1992     {
1993       if (cmp == EQ_EXPR)
1994         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1995       else if (cmp == NE_EXPR)
1996         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1997     }
1998   else
1999
2000   /* Try "opcode heuristic."
2001      EQ tests are usually false and NE tests are usually true. Also,
2002      most quantities are positive, so we can make the appropriate guesses
2003      about signed comparisons against zero.  */
2004     switch (cmp)
2005       {
2006       case EQ_EXPR:
2007       case UNEQ_EXPR:
2008         /* Floating point comparisons appears to behave in a very
2009            unpredictable way because of special role of = tests in
2010            FP code.  */
2011         if (FLOAT_TYPE_P (type))
2012           ;
2013         /* Comparisons with 0 are often used for booleans and there is
2014            nothing useful to predict about them.  */
2015         else if (integer_zerop (op0) || integer_zerop (op1))
2016           ;
2017         else
2018           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2019         break;
2020
2021       case NE_EXPR:
2022       case LTGT_EXPR:
2023         /* Floating point comparisons appears to behave in a very
2024            unpredictable way because of special role of = tests in
2025            FP code.  */
2026         if (FLOAT_TYPE_P (type))
2027           ;
2028         /* Comparisons with 0 are often used for booleans and there is
2029            nothing useful to predict about them.  */
2030         else if (integer_zerop (op0)
2031                  || integer_zerop (op1))
2032           ;
2033         else
2034           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2035         break;
2036
2037       case ORDERED_EXPR:
2038         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2039         break;
2040
2041       case UNORDERED_EXPR:
2042         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2043         break;
2044
2045       case LE_EXPR:
2046       case LT_EXPR:
2047         if (integer_zerop (op1)
2048             || integer_onep (op1)
2049             || integer_all_onesp (op1)
2050             || real_zerop (op1)
2051             || real_onep (op1)
2052             || real_minus_onep (op1))
2053           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2054         break;
2055
2056       case GE_EXPR:
2057       case GT_EXPR:
2058         if (integer_zerop (op1)
2059             || integer_onep (op1)
2060             || integer_all_onesp (op1)
2061             || real_zerop (op1)
2062             || real_onep (op1)
2063             || real_minus_onep (op1))
2064           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2065         break;
2066
2067       default:
2068         break;
2069       }
2070 }
2071
2072 /* Try to guess whether the value of return means error code.  */
2073
2074 static enum br_predictor
2075 return_prediction (tree val, enum prediction *prediction)
2076 {
2077   /* VOID.  */
2078   if (!val)
2079     return PRED_NO_PREDICTION;
2080   /* Different heuristics for pointers and scalars.  */
2081   if (POINTER_TYPE_P (TREE_TYPE (val)))
2082     {
2083       /* NULL is usually not returned.  */
2084       if (integer_zerop (val))
2085         {
2086           *prediction = NOT_TAKEN;
2087           return PRED_NULL_RETURN;
2088         }
2089     }
2090   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2091     {
2092       /* Negative return values are often used to indicate
2093          errors.  */
2094       if (TREE_CODE (val) == INTEGER_CST
2095           && tree_int_cst_sgn (val) < 0)
2096         {
2097           *prediction = NOT_TAKEN;
2098           return PRED_NEGATIVE_RETURN;
2099         }
2100       /* Constant return values seems to be commonly taken.
2101          Zero/one often represent booleans so exclude them from the
2102          heuristics.  */
2103       if (TREE_CONSTANT (val)
2104           && (!integer_zerop (val) && !integer_onep (val)))
2105         {
2106           *prediction = TAKEN;
2107           return PRED_CONST_RETURN;
2108         }
2109     }
2110   return PRED_NO_PREDICTION;
2111 }
2112
2113 /* Find the basic block with return expression and look up for possible
2114    return value trying to apply RETURN_PREDICTION heuristics.  */
2115 static void
2116 apply_return_prediction (void)
2117 {
2118   greturn *return_stmt = NULL;
2119   tree return_val;
2120   edge e;
2121   gphi *phi;
2122   int phi_num_args, i;
2123   enum br_predictor pred;
2124   enum prediction direction;
2125   edge_iterator ei;
2126
2127   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2128     {
2129       gimple last = last_stmt (e->src);
2130       if (last
2131           && gimple_code (last) == GIMPLE_RETURN)
2132         {
2133           return_stmt = as_a <greturn *> (last);
2134           break;
2135         }
2136     }
2137   if (!e)
2138     return;
2139   return_val = gimple_return_retval (return_stmt);
2140   if (!return_val)
2141     return;
2142   if (TREE_CODE (return_val) != SSA_NAME
2143       || !SSA_NAME_DEF_STMT (return_val)
2144       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2145     return;
2146   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2147   phi_num_args = gimple_phi_num_args (phi);
2148   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2149
2150   /* Avoid the degenerate case where all return values form the function
2151      belongs to same category (ie they are all positive constants)
2152      so we can hardly say something about them.  */
2153   for (i = 1; i < phi_num_args; i++)
2154     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2155       break;
2156   if (i != phi_num_args)
2157     for (i = 0; i < phi_num_args; i++)
2158       {
2159         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2160         if (pred != PRED_NO_PREDICTION)
2161           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2162                                          direction);
2163       }
2164 }
2165
2166 /* Look for basic block that contains unlikely to happen events
2167    (such as noreturn calls) and mark all paths leading to execution
2168    of this basic blocks as unlikely.  */
2169
2170 static void
2171 tree_bb_level_predictions (void)
2172 {
2173   basic_block bb;
2174   bool has_return_edges = false;
2175   edge e;
2176   edge_iterator ei;
2177
2178   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2179     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2180       {
2181         has_return_edges = true;
2182         break;
2183       }
2184
2185   apply_return_prediction ();
2186
2187   FOR_EACH_BB_FN (bb, cfun)
2188     {
2189       gimple_stmt_iterator gsi;
2190
2191       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2192         {
2193           gimple stmt = gsi_stmt (gsi);
2194           tree decl;
2195
2196           if (is_gimple_call (stmt))
2197             {
2198               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2199                   && has_return_edges)
2200                 predict_paths_leading_to (bb, PRED_NORETURN,
2201                                           NOT_TAKEN);
2202               decl = gimple_call_fndecl (stmt);
2203               if (decl
2204                   && lookup_attribute ("cold",
2205                                        DECL_ATTRIBUTES (decl)))
2206                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2207                                           NOT_TAKEN);
2208             }
2209           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2210             {
2211               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2212                                         gimple_predict_outcome (stmt));
2213               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2214                  hints to callers.  */
2215             }
2216         }
2217     }
2218 }
2219
2220 #ifdef ENABLE_CHECKING
2221
2222 /* Callback for hash_map::traverse, asserts that the pointer map is
2223    empty.  */
2224
2225 bool
2226 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2227                  void *)
2228 {
2229   gcc_assert (!value);
2230   return false;
2231 }
2232 #endif
2233
2234 /* Predict branch probabilities and estimate profile for basic block BB.  */
2235
2236 static void
2237 tree_estimate_probability_bb (basic_block bb)
2238 {
2239   edge e;
2240   edge_iterator ei;
2241   gimple last;
2242
2243   FOR_EACH_EDGE (e, ei, bb->succs)
2244     {
2245       /* Predict edges to user labels with attributes.  */
2246       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2247         {
2248           gimple_stmt_iterator gi;
2249           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2250             {
2251               glabel *label_stmt =
2252                 dyn_cast <glabel *> (gsi_stmt (gi));
2253               tree decl;
2254
2255               if (!label_stmt)
2256                 break;
2257               decl = gimple_label_label (label_stmt);
2258               if (DECL_ARTIFICIAL (decl))
2259                 continue;
2260
2261               /* Finally, we have a user-defined label.  */
2262               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2263                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2264               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2265                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2266             }
2267         }
2268
2269       /* Predict early returns to be probable, as we've already taken
2270          care for error returns and other cases are often used for
2271          fast paths through function.
2272
2273          Since we've already removed the return statements, we are
2274          looking for CFG like:
2275
2276          if (conditional)
2277          {
2278          ..
2279          goto return_block
2280          }
2281          some other blocks
2282          return_block:
2283          return_stmt.  */
2284       if (e->dest != bb->next_bb
2285           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2286           && single_succ_p (e->dest)
2287           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2288           && (last = last_stmt (e->dest)) != NULL
2289           && gimple_code (last) == GIMPLE_RETURN)
2290         {
2291           edge e1;
2292           edge_iterator ei1;
2293
2294           if (single_succ_p (bb))
2295             {
2296               FOR_EACH_EDGE (e1, ei1, bb->preds)
2297                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2298                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2299                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2300                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2301             }
2302           else
2303             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2304                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2305                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2306               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2307         }
2308
2309       /* Look for block we are guarding (ie we dominate it,
2310          but it doesn't postdominate us).  */
2311       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2312           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2313           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2314         {
2315           gimple_stmt_iterator bi;
2316
2317           /* The call heuristic claims that a guarded function call
2318              is improbable.  This is because such calls are often used
2319              to signal exceptional situations such as printing error
2320              messages.  */
2321           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2322                gsi_next (&bi))
2323             {
2324               gimple stmt = gsi_stmt (bi);
2325               if (is_gimple_call (stmt)
2326                   /* Constant and pure calls are hardly used to signalize
2327                      something exceptional.  */
2328                   && gimple_has_side_effects (stmt))
2329                 {
2330                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2331                   break;
2332                 }
2333             }
2334         }
2335     }
2336   tree_predict_by_opcode (bb);
2337 }
2338
2339 /* Predict branch probabilities and estimate profile of the tree CFG.
2340    This function can be called from the loop optimizers to recompute
2341    the profile information.  */
2342
2343 void
2344 tree_estimate_probability (void)
2345 {
2346   basic_block bb;
2347
2348   add_noreturn_fake_exit_edges ();
2349   connect_infinite_loops_to_exit ();
2350   /* We use loop_niter_by_eval, which requires that the loops have
2351      preheaders.  */
2352   create_preheaders (CP_SIMPLE_PREHEADERS);
2353   calculate_dominance_info (CDI_POST_DOMINATORS);
2354
2355   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2356   tree_bb_level_predictions ();
2357   record_loop_exits ();
2358
2359   if (number_of_loops (cfun) > 1)
2360     predict_loops ();
2361
2362   FOR_EACH_BB_FN (bb, cfun)
2363     tree_estimate_probability_bb (bb);
2364
2365   FOR_EACH_BB_FN (bb, cfun)
2366     combine_predictions_for_bb (bb);
2367
2368 #ifdef ENABLE_CHECKING
2369   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2370 #endif
2371   delete bb_predictions;
2372   bb_predictions = NULL;
2373
2374   estimate_bb_frequencies (false);
2375   free_dominance_info (CDI_POST_DOMINATORS);
2376   remove_fake_exit_edges ();
2377 }
2378 \f
2379 /* Predict edges to successors of CUR whose sources are not postdominated by
2380    BB by PRED and recurse to all postdominators.  */
2381
2382 static void
2383 predict_paths_for_bb (basic_block cur, basic_block bb,
2384                       enum br_predictor pred,
2385                       enum prediction taken,
2386                       bitmap visited)
2387 {
2388   edge e;
2389   edge_iterator ei;
2390   basic_block son;
2391
2392   /* We are looking for all edges forming edge cut induced by
2393      set of all blocks postdominated by BB.  */
2394   FOR_EACH_EDGE (e, ei, cur->preds)
2395     if (e->src->index >= NUM_FIXED_BLOCKS
2396         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2397     {
2398       edge e2;
2399       edge_iterator ei2;
2400       bool found = false;
2401
2402       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2403       if (e->flags & (EDGE_EH | EDGE_FAKE))
2404         continue;
2405       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2406
2407       /* See if there is an edge from e->src that is not abnormal
2408          and does not lead to BB.  */
2409       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2410         if (e2 != e
2411             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2412             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2413           {
2414             found = true;
2415             break;
2416           }
2417
2418       /* If there is non-abnormal path leaving e->src, predict edge
2419          using predictor.  Otherwise we need to look for paths
2420          leading to e->src.
2421
2422          The second may lead to infinite loop in the case we are predicitng
2423          regions that are only reachable by abnormal edges.  We simply
2424          prevent visiting given BB twice.  */
2425       if (found)
2426         predict_edge_def (e, pred, taken);
2427       else if (bitmap_set_bit (visited, e->src->index))
2428         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2429     }
2430   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2431        son;
2432        son = next_dom_son (CDI_POST_DOMINATORS, son))
2433     predict_paths_for_bb (son, bb, pred, taken, visited);
2434 }
2435
2436 /* Sets branch probabilities according to PREDiction and
2437    FLAGS.  */
2438
2439 static void
2440 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2441                           enum prediction taken)
2442 {
2443   bitmap visited = BITMAP_ALLOC (NULL);
2444   predict_paths_for_bb (bb, bb, pred, taken, visited);
2445   BITMAP_FREE (visited);
2446 }
2447
2448 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2449
2450 static void
2451 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2452                                enum prediction taken)
2453 {
2454   bool has_nonloop_edge = false;
2455   edge_iterator ei;
2456   edge e2;
2457
2458   basic_block bb = e->src;
2459   FOR_EACH_EDGE (e2, ei, bb->succs)
2460     if (e2->dest != e->src && e2->dest != e->dest
2461         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2462         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2463       {
2464         has_nonloop_edge = true;
2465         break;
2466       }
2467   if (!has_nonloop_edge)
2468     {
2469       bitmap visited = BITMAP_ALLOC (NULL);
2470       predict_paths_for_bb (bb, bb, pred, taken, visited);
2471       BITMAP_FREE (visited);
2472     }
2473   else
2474     predict_edge_def (e, pred, taken);
2475 }
2476 \f
2477 /* This is used to carry information about basic blocks.  It is
2478    attached to the AUX field of the standard CFG block.  */
2479
2480 struct block_info
2481 {
2482   /* Estimated frequency of execution of basic_block.  */
2483   sreal frequency;
2484
2485   /* To keep queue of basic blocks to process.  */
2486   basic_block next;
2487
2488   /* Number of predecessors we need to visit first.  */
2489   int npredecessors;
2490 };
2491
2492 /* Similar information for edges.  */
2493 struct edge_prob_info
2494 {
2495   /* In case edge is a loopback edge, the probability edge will be reached
2496      in case header is.  Estimated number of iterations of the loop can be
2497      then computed as 1 / (1 - back_edge_prob).  */
2498   sreal back_edge_prob;
2499   /* True if the edge is a loopback edge in the natural loop.  */
2500   unsigned int back_edge:1;
2501 };
2502
2503 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2504 #undef EDGE_INFO
2505 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2506
2507 /* Helper function for estimate_bb_frequencies.
2508    Propagate the frequencies in blocks marked in
2509    TOVISIT, starting in HEAD.  */
2510
2511 static void
2512 propagate_freq (basic_block head, bitmap tovisit)
2513 {
2514   basic_block bb;
2515   basic_block last;
2516   unsigned i;
2517   edge e;
2518   basic_block nextbb;
2519   bitmap_iterator bi;
2520
2521   /* For each basic block we need to visit count number of his predecessors
2522      we need to visit first.  */
2523   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2524     {
2525       edge_iterator ei;
2526       int count = 0;
2527
2528       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2529
2530       FOR_EACH_EDGE (e, ei, bb->preds)
2531         {
2532           bool visit = bitmap_bit_p (tovisit, e->src->index);
2533
2534           if (visit && !(e->flags & EDGE_DFS_BACK))
2535             count++;
2536           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2537             fprintf (dump_file,
2538                      "Irreducible region hit, ignoring edge to %i->%i\n",
2539                      e->src->index, bb->index);
2540         }
2541       BLOCK_INFO (bb)->npredecessors = count;
2542       /* When function never returns, we will never process exit block.  */
2543       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2544         bb->count = bb->frequency = 0;
2545     }
2546
2547   BLOCK_INFO (head)->frequency = real_one;
2548   last = head;
2549   for (bb = head; bb; bb = nextbb)
2550     {
2551       edge_iterator ei;
2552       sreal cyclic_probability = real_zero;
2553       sreal frequency = real_zero;
2554
2555       nextbb = BLOCK_INFO (bb)->next;
2556       BLOCK_INFO (bb)->next = NULL;
2557
2558       /* Compute frequency of basic block.  */
2559       if (bb != head)
2560         {
2561 #ifdef ENABLE_CHECKING
2562           FOR_EACH_EDGE (e, ei, bb->preds)
2563             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2564                         || (e->flags & EDGE_DFS_BACK));
2565 #endif
2566
2567           FOR_EACH_EDGE (e, ei, bb->preds)
2568             if (EDGE_INFO (e)->back_edge)
2569               {
2570                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2571               }
2572             else if (!(e->flags & EDGE_DFS_BACK))
2573               {
2574                 /*  frequency += (e->probability
2575                                   * BLOCK_INFO (e->src)->frequency /
2576                                   REG_BR_PROB_BASE);  */
2577
2578                 sreal tmp (e->probability, 0);
2579                 tmp *= BLOCK_INFO (e->src)->frequency;
2580                 tmp *= real_inv_br_prob_base;
2581                 frequency += tmp;
2582               }
2583
2584           if (cyclic_probability == real_zero)
2585             {
2586               BLOCK_INFO (bb)->frequency = frequency;
2587             }
2588           else
2589             {
2590               if (cyclic_probability > real_almost_one)
2591                 cyclic_probability = real_almost_one;
2592
2593               /* BLOCK_INFO (bb)->frequency = frequency
2594                                               / (1 - cyclic_probability) */
2595
2596               cyclic_probability = real_one - cyclic_probability;
2597               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2598             }
2599         }
2600
2601       bitmap_clear_bit (tovisit, bb->index);
2602
2603       e = find_edge (bb, head);
2604       if (e)
2605         {
2606           /* EDGE_INFO (e)->back_edge_prob
2607              = ((e->probability * BLOCK_INFO (bb)->frequency)
2608              / REG_BR_PROB_BASE); */
2609
2610           sreal tmp (e->probability, 0);
2611           tmp *= BLOCK_INFO (bb)->frequency;
2612           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2613         }
2614
2615       /* Propagate to successor blocks.  */
2616       FOR_EACH_EDGE (e, ei, bb->succs)
2617         if (!(e->flags & EDGE_DFS_BACK)
2618             && BLOCK_INFO (e->dest)->npredecessors)
2619           {
2620             BLOCK_INFO (e->dest)->npredecessors--;
2621             if (!BLOCK_INFO (e->dest)->npredecessors)
2622               {
2623                 if (!nextbb)
2624                   nextbb = e->dest;
2625                 else
2626                   BLOCK_INFO (last)->next = e->dest;
2627
2628                 last = e->dest;
2629               }
2630           }
2631     }
2632 }
2633
2634 /* Estimate frequencies in loops at same nest level.  */
2635
2636 static void
2637 estimate_loops_at_level (struct loop *first_loop)
2638 {
2639   struct loop *loop;
2640
2641   for (loop = first_loop; loop; loop = loop->next)
2642     {
2643       edge e;
2644       basic_block *bbs;
2645       unsigned i;
2646       bitmap tovisit = BITMAP_ALLOC (NULL);
2647
2648       estimate_loops_at_level (loop->inner);
2649
2650       /* Find current loop back edge and mark it.  */
2651       e = loop_latch_edge (loop);
2652       EDGE_INFO (e)->back_edge = 1;
2653
2654       bbs = get_loop_body (loop);
2655       for (i = 0; i < loop->num_nodes; i++)
2656         bitmap_set_bit (tovisit, bbs[i]->index);
2657       free (bbs);
2658       propagate_freq (loop->header, tovisit);
2659       BITMAP_FREE (tovisit);
2660     }
2661 }
2662
2663 /* Propagates frequencies through structure of loops.  */
2664
2665 static void
2666 estimate_loops (void)
2667 {
2668   bitmap tovisit = BITMAP_ALLOC (NULL);
2669   basic_block bb;
2670
2671   /* Start by estimating the frequencies in the loops.  */
2672   if (number_of_loops (cfun) > 1)
2673     estimate_loops_at_level (current_loops->tree_root->inner);
2674
2675   /* Now propagate the frequencies through all the blocks.  */
2676   FOR_ALL_BB_FN (bb, cfun)
2677     {
2678       bitmap_set_bit (tovisit, bb->index);
2679     }
2680   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2681   BITMAP_FREE (tovisit);
2682 }
2683
2684 /* Drop the profile for NODE to guessed, and update its frequency based on
2685    whether it is expected to be hot given the CALL_COUNT.  */
2686
2687 static void
2688 drop_profile (struct cgraph_node *node, gcov_type call_count)
2689 {
2690   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2691   /* In the case where this was called by another function with a
2692      dropped profile, call_count will be 0. Since there are no
2693      non-zero call counts to this function, we don't know for sure
2694      whether it is hot, and therefore it will be marked normal below.  */
2695   bool hot = maybe_hot_count_p (NULL, call_count);
2696
2697   if (dump_file)
2698     fprintf (dump_file,
2699              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2700              node->name (), node->order,
2701              hot ? "Function is hot" : "Function is normal");
2702   /* We only expect to miss profiles for functions that are reached
2703      via non-zero call edges in cases where the function may have
2704      been linked from another module or library (COMDATs and extern
2705      templates). See the comments below for handle_missing_profiles.
2706      Also, only warn in cases where the missing counts exceed the
2707      number of training runs. In certain cases with an execv followed
2708      by a no-return call the profile for the no-return call is not
2709      dumped and there can be a mismatch.  */
2710   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2711       && call_count > profile_info->runs)
2712     {
2713       if (flag_profile_correction)
2714         {
2715           if (dump_file)
2716             fprintf (dump_file,
2717                      "Missing counts for called function %s/%i\n",
2718                      node->name (), node->order);
2719         }
2720       else
2721         warning (0, "Missing counts for called function %s/%i",
2722                  node->name (), node->order);
2723     }
2724
2725   profile_status_for_fn (fn)
2726       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2727   node->frequency
2728       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2729 }
2730
2731 /* In the case of COMDAT routines, multiple object files will contain the same
2732    function and the linker will select one for the binary. In that case
2733    all the other copies from the profile instrument binary will be missing
2734    profile counts. Look for cases where this happened, due to non-zero
2735    call counts going to 0-count functions, and drop the profile to guessed
2736    so that we can use the estimated probabilities and avoid optimizing only
2737    for size.
2738
2739    The other case where the profile may be missing is when the routine
2740    is not going to be emitted to the object file, e.g. for "extern template"
2741    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2742    all other cases of non-zero calls to 0-count functions.  */
2743
2744 void
2745 handle_missing_profiles (void)
2746 {
2747   struct cgraph_node *node;
2748   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2749   vec<struct cgraph_node *> worklist;
2750   worklist.create (64);
2751
2752   /* See if 0 count function has non-0 count callers.  In this case we
2753      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2754   FOR_EACH_DEFINED_FUNCTION (node)
2755     {
2756       struct cgraph_edge *e;
2757       gcov_type call_count = 0;
2758       gcov_type max_tp_first_run = 0;
2759       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2760
2761       if (node->count)
2762         continue;
2763       for (e = node->callers; e; e = e->next_caller)
2764       {
2765         call_count += e->count;
2766
2767         if (e->caller->tp_first_run > max_tp_first_run)
2768           max_tp_first_run = e->caller->tp_first_run;
2769       }
2770
2771       /* If time profile is missing, let assign the maximum that comes from
2772          caller functions.  */
2773       if (!node->tp_first_run && max_tp_first_run)
2774         node->tp_first_run = max_tp_first_run + 1;
2775
2776       if (call_count
2777           && fn && fn->cfg
2778           && (call_count * unlikely_count_fraction >= profile_info->runs))
2779         {
2780           drop_profile (node, call_count);
2781           worklist.safe_push (node);
2782         }
2783     }
2784
2785   /* Propagate the profile dropping to other 0-count COMDATs that are
2786      potentially called by COMDATs we already dropped the profile on.  */
2787   while (worklist.length () > 0)
2788     {
2789       struct cgraph_edge *e;
2790
2791       node = worklist.pop ();
2792       for (e = node->callees; e; e = e->next_caller)
2793         {
2794           struct cgraph_node *callee = e->callee;
2795           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2796
2797           if (callee->count > 0)
2798             continue;
2799           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2800               && profile_status_for_fn (fn) == PROFILE_READ)
2801             {
2802               drop_profile (node, 0);
2803               worklist.safe_push (callee);
2804             }
2805         }
2806     }
2807   worklist.release ();
2808 }
2809
2810 /* Convert counts measured by profile driven feedback to frequencies.
2811    Return nonzero iff there was any nonzero execution count.  */
2812
2813 int
2814 counts_to_freqs (void)
2815 {
2816   gcov_type count_max, true_count_max = 0;
2817   basic_block bb;
2818
2819   /* Don't overwrite the estimated frequencies when the profile for
2820      the function is missing.  We may drop this function PROFILE_GUESSED
2821      later in drop_profile ().  */
2822   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2823     return 0;
2824
2825   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2826     true_count_max = MAX (bb->count, true_count_max);
2827
2828   count_max = MAX (true_count_max, 1);
2829   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2830     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2831
2832   return true_count_max;
2833 }
2834
2835 /* Return true if function is likely to be expensive, so there is no point to
2836    optimize performance of prologue, epilogue or do inlining at the expense
2837    of code size growth.  THRESHOLD is the limit of number of instructions
2838    function can execute at average to be still considered not expensive.  */
2839
2840 bool
2841 expensive_function_p (int threshold)
2842 {
2843   unsigned int sum = 0;
2844   basic_block bb;
2845   unsigned int limit;
2846
2847   /* We can not compute accurately for large thresholds due to scaled
2848      frequencies.  */
2849   gcc_assert (threshold <= BB_FREQ_MAX);
2850
2851   /* Frequencies are out of range.  This either means that function contains
2852      internal loop executing more than BB_FREQ_MAX times or profile feedback
2853      is available and function has not been executed at all.  */
2854   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2855     return true;
2856
2857   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2858   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2859   FOR_EACH_BB_FN (bb, cfun)
2860     {
2861       rtx_insn *insn;
2862
2863       FOR_BB_INSNS (bb, insn)
2864         if (active_insn_p (insn))
2865           {
2866             sum += bb->frequency;
2867             if (sum > limit)
2868               return true;
2869         }
2870     }
2871
2872   return false;
2873 }
2874
2875 /* Estimate and propagate basic block frequencies using the given branch
2876    probabilities.  If FORCE is true, the frequencies are used to estimate
2877    the counts even when there are already non-zero profile counts.  */
2878
2879 void
2880 estimate_bb_frequencies (bool force)
2881 {
2882   basic_block bb;
2883   sreal freq_max;
2884
2885   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2886     {
2887       static int real_values_initialized = 0;
2888
2889       if (!real_values_initialized)
2890         {
2891           real_values_initialized = 1;
2892           real_zero = sreal (0, 0);
2893           real_one = sreal (1, 0);
2894           real_br_prob_base = sreal (REG_BR_PROB_BASE, 0);
2895           real_bb_freq_max = sreal (BB_FREQ_MAX, 0);
2896           real_one_half = sreal (1, -1);
2897           real_inv_br_prob_base = real_one / real_br_prob_base;
2898           real_almost_one = real_one - real_inv_br_prob_base;
2899         }
2900
2901       mark_dfs_back_edges ();
2902
2903       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2904          REG_BR_PROB_BASE;
2905
2906       /* Set up block info for each basic block.  */
2907       alloc_aux_for_blocks (sizeof (block_info));
2908       alloc_aux_for_edges (sizeof (edge_prob_info));
2909       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2910         {
2911           edge e;
2912           edge_iterator ei;
2913
2914           FOR_EACH_EDGE (e, ei, bb->succs)
2915             {
2916               EDGE_INFO (e)->back_edge_prob = sreal (e->probability, 0);
2917               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2918             }
2919         }
2920
2921       /* First compute frequencies locally for each loop from innermost
2922          to outermost to examine frequencies for back edges.  */
2923       estimate_loops ();
2924
2925       freq_max = real_zero;
2926       FOR_EACH_BB_FN (bb, cfun)
2927         if (freq_max < BLOCK_INFO (bb)->frequency)
2928           freq_max = BLOCK_INFO (bb)->frequency;
2929
2930       freq_max = real_bb_freq_max / freq_max;
2931       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2932         {
2933           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2934           bb->frequency = tmp.to_int ();
2935         }
2936
2937       free_aux_for_blocks ();
2938       free_aux_for_edges ();
2939     }
2940   compute_function_frequency ();
2941 }
2942
2943 /* Decide whether function is hot, cold or unlikely executed.  */
2944 void
2945 compute_function_frequency (void)
2946 {
2947   basic_block bb;
2948   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2949
2950   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2951       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2952     node->only_called_at_startup = true;
2953   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2954     node->only_called_at_exit = true;
2955
2956   if (profile_status_for_fn (cfun) != PROFILE_READ)
2957     {
2958       int flags = flags_from_decl_or_type (current_function_decl);
2959       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2960           != NULL)
2961         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2962       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2963                != NULL)
2964         node->frequency = NODE_FREQUENCY_HOT;
2965       else if (flags & ECF_NORETURN)
2966         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2967       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2968         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2969       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2970                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2971         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2972       return;
2973     }
2974
2975   /* Only first time try to drop function into unlikely executed.
2976      After inlining the roundoff errors may confuse us.
2977      Ipa-profile pass will drop functions only called from unlikely
2978      functions to unlikely and that is most of what we care about.  */
2979   if (!cfun->after_inlining)
2980     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2981   FOR_EACH_BB_FN (bb, cfun)
2982     {
2983       if (maybe_hot_bb_p (cfun, bb))
2984         {
2985           node->frequency = NODE_FREQUENCY_HOT;
2986           return;
2987         }
2988       if (!probably_never_executed_bb_p (cfun, bb))
2989         node->frequency = NODE_FREQUENCY_NORMAL;
2990     }
2991 }
2992
2993 /* Build PREDICT_EXPR.  */
2994 tree
2995 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2996 {
2997   tree t = build1 (PREDICT_EXPR, void_type_node,
2998                    build_int_cst (integer_type_node, predictor));
2999   SET_PREDICT_EXPR_OUTCOME (t, taken);
3000   return t;
3001 }
3002
3003 const char *
3004 predictor_name (enum br_predictor predictor)
3005 {
3006   return predictor_info[predictor].name;
3007 }
3008
3009 /* Predict branch probabilities and estimate profile of the tree CFG. */
3010
3011 namespace {
3012
3013 const pass_data pass_data_profile =
3014 {
3015   GIMPLE_PASS, /* type */
3016   "profile_estimate", /* name */
3017   OPTGROUP_NONE, /* optinfo_flags */
3018   TV_BRANCH_PROB, /* tv_id */
3019   PROP_cfg, /* properties_required */
3020   0, /* properties_provided */
3021   0, /* properties_destroyed */
3022   0, /* todo_flags_start */
3023   0, /* todo_flags_finish */
3024 };
3025
3026 class pass_profile : public gimple_opt_pass
3027 {
3028 public:
3029   pass_profile (gcc::context *ctxt)
3030     : gimple_opt_pass (pass_data_profile, ctxt)
3031   {}
3032
3033   /* opt_pass methods: */
3034   virtual bool gate (function *) { return flag_guess_branch_prob; }
3035   virtual unsigned int execute (function *);
3036
3037 }; // class pass_profile
3038
3039 unsigned int
3040 pass_profile::execute (function *fun)
3041 {
3042   unsigned nb_loops;
3043
3044   loop_optimizer_init (LOOPS_NORMAL);
3045   if (dump_file && (dump_flags & TDF_DETAILS))
3046     flow_loops_dump (dump_file, NULL, 0);
3047
3048   mark_irreducible_loops ();
3049
3050   nb_loops = number_of_loops (fun);
3051   if (nb_loops > 1)
3052     scev_initialize ();
3053
3054   tree_estimate_probability ();
3055
3056   if (nb_loops > 1)
3057     scev_finalize ();
3058
3059   loop_optimizer_finalize ();
3060   if (dump_file && (dump_flags & TDF_DETAILS))
3061     gimple_dump_cfg (dump_file, dump_flags);
3062  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3063     profile_status_for_fn (fun) = PROFILE_GUESSED;
3064   return 0;
3065 }
3066
3067 } // anon namespace
3068
3069 gimple_opt_pass *
3070 make_pass_profile (gcc::context *ctxt)
3071 {
3072   return new pass_profile (ctxt);
3073 }
3074
3075 namespace {
3076
3077 const pass_data pass_data_strip_predict_hints =
3078 {
3079   GIMPLE_PASS, /* type */
3080   "*strip_predict_hints", /* name */
3081   OPTGROUP_NONE, /* optinfo_flags */
3082   TV_BRANCH_PROB, /* tv_id */
3083   PROP_cfg, /* properties_required */
3084   0, /* properties_provided */
3085   0, /* properties_destroyed */
3086   0, /* todo_flags_start */
3087   0, /* todo_flags_finish */
3088 };
3089
3090 class pass_strip_predict_hints : public gimple_opt_pass
3091 {
3092 public:
3093   pass_strip_predict_hints (gcc::context *ctxt)
3094     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3095   {}
3096
3097   /* opt_pass methods: */
3098   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3099   virtual unsigned int execute (function *);
3100
3101 }; // class pass_strip_predict_hints
3102
3103 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3104    we no longer need.  */
3105 unsigned int
3106 pass_strip_predict_hints::execute (function *fun)
3107 {
3108   basic_block bb;
3109   gimple ass_stmt;
3110   tree var;
3111
3112   FOR_EACH_BB_FN (bb, fun)
3113     {
3114       gimple_stmt_iterator bi;
3115       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3116         {
3117           gimple stmt = gsi_stmt (bi);
3118
3119           if (gimple_code (stmt) == GIMPLE_PREDICT)
3120             {
3121               gsi_remove (&bi, true);
3122               continue;
3123             }
3124           else if (is_gimple_call (stmt))
3125             {
3126               tree fndecl = gimple_call_fndecl (stmt);
3127
3128               if ((fndecl
3129                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3130                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3131                    && gimple_call_num_args (stmt) == 2)
3132                   || (gimple_call_internal_p (stmt)
3133                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3134                 {
3135                   var = gimple_call_lhs (stmt);
3136                   if (var)
3137                     {
3138                       ass_stmt
3139                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3140                       gsi_replace (&bi, ass_stmt, true);
3141                     }
3142                   else
3143                     {
3144                       gsi_remove (&bi, true);
3145                       continue;
3146                     }
3147                 }
3148             }
3149           gsi_next (&bi);
3150         }
3151     }
3152   return 0;
3153 }
3154
3155 } // anon namespace
3156
3157 gimple_opt_pass *
3158 make_pass_strip_predict_hints (gcc::context *ctxt)
3159 {
3160   return new pass_strip_predict_hints (ctxt);
3161 }
3162
3163 /* Rebuild function frequencies.  Passes are in general expected to
3164    maintain profile by hand, however in some cases this is not possible:
3165    for example when inlining several functions with loops freuqencies might run
3166    out of scale and thus needs to be recomputed.  */
3167
3168 void
3169 rebuild_frequencies (void)
3170 {
3171   timevar_push (TV_REBUILD_FREQUENCIES);
3172
3173   /* When the max bb count in the function is small, there is a higher
3174      chance that there were truncation errors in the integer scaling
3175      of counts by inlining and other optimizations. This could lead
3176      to incorrect classification of code as being cold when it isn't.
3177      In that case, force the estimation of bb counts/frequencies from the
3178      branch probabilities, rather than computing frequencies from counts,
3179      which may also lead to frequencies incorrectly reduced to 0. There
3180      is less precision in the probabilities, so we only do this for small
3181      max counts.  */
3182   gcov_type count_max = 0;
3183   basic_block bb;
3184   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3185     count_max = MAX (bb->count, count_max);
3186
3187   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3188       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3189           && count_max < REG_BR_PROB_BASE/10))
3190     {
3191       loop_optimizer_init (0);
3192       add_noreturn_fake_exit_edges ();
3193       mark_irreducible_loops ();
3194       connect_infinite_loops_to_exit ();
3195       estimate_bb_frequencies (true);
3196       remove_fake_exit_edges ();
3197       loop_optimizer_finalize ();
3198     }
3199   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3200     counts_to_freqs ();
3201   else
3202     gcc_unreachable ();
3203   timevar_pop (TV_REBUILD_FREQUENCIES);
3204 }