gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "backend.h"
  34 #include "tree.h"
  35 #include "gimple.h"
  36 #include "rtl.h"
  37 #include "ssa.h"
  38 #include "alias.h"
  39 #include "fold-const.h"
  40 #include "calls.h"
  41 #include "tm_p.h"
  42 #include "cfganal.h"
  43 #include "insn-config.h"
  44 #include "regs.h"
  45 #include "flags.h"
  46 #include "profile.h"
  47 #include "except.h"
  48 #include "diagnostic-core.h"
  49 #include "recog.h"
  50 #include "expmed.h"
  51 #include "dojump.h"
  52 #include "explow.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "coverage.h"
  58 #include "sreal.h"
  59 #include "params.h"
  60 #include "target.h"
  61 #include "cfgloop.h"
  62 #include "internal-fn.h"
  63 #include "gimple-iterator.h"
  64 #include "cgraph.h"
  65 #include "tree-cfg.h"
  66 #include "tree-ssa-loop-niter.h"
  67 #include "tree-ssa-loop.h"
  68 #include "tree-pass.h"
  69 #include "tree-scalar-evolution.h"
  70
  71 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  72                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  73 static sreal real_almost_one, real_br_prob_base,
  74              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  75
  76 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  77 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  78 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  79 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  80 static bool can_predict_insn_p (const rtx_insn *);
  81
  82 /* Information we hold about each branch predictor.
  83    Filled using information from predict.def.  */
  84
  85 struct predictor_info
  86 {
  87   const char *const name;       /* Name used in the debugging dumps.  */
  88   const int hitrate;            /* Expected hitrate used by
  89                                    predict_insn_def call.  */
  90   const int flags;
  91 };
  92
  93 /* Use given predictor without Dempster-Shaffer theory if it matches
  94    using first_match heuristics.  */
  95 #define PRED_FLAG_FIRST_MATCH 1
  96
  97 /* Recompute hitrate in percent to our representation.  */
  98
  99 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 100
 101 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 102 static const struct predictor_info predictor_info[]= {
 103 #include "predict.def"
 104
 105   /* Upper bound on predictors.  */
 106   {NULL, 0, 0}
 107 };
 108 #undef DEF_PREDICTOR
 109
 110 /* Return TRUE if frequency FREQ is considered to be hot.  */
 111
 112 static inline bool
 113 maybe_hot_frequency_p (struct function *fun, int freq)
 114 {
 115   struct cgraph_node *node = cgraph_node::get (fun->decl);
 116   if (!profile_info
 117       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 118     {
 119       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 120         return false;
 121       if (node->frequency == NODE_FREQUENCY_HOT)
 122         return true;
 123     }
 124   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 125     return true;
 126   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 127       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 128     return false;
 129   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 130     return false;
 131   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 132               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 133     return false;
 134   return true;
 135 }
 136
 137 static gcov_type min_count = -1;
 138
 139 /* Determine the threshold for hot BB counts.  */
 140
 141 gcov_type
 142 get_hot_bb_threshold ()
 143 {
 144   gcov_working_set_t *ws;
 145   if (min_count == -1)
 146     {
 147       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 148       gcc_assert (ws);
 149       min_count = ws->min_counter;
 150     }
 151   return min_count;
 152 }
 153
 154 /* Set the threshold for hot BB counts.  */
 155
 156 void
 157 set_hot_bb_threshold (gcov_type min)
 158 {
 159   min_count = min;
 160 }
 161
 162 /* Return TRUE if frequency FREQ is considered to be hot.  */
 163
 164 bool
 165 maybe_hot_count_p (struct function *fun, gcov_type count)
 166 {
 167   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 168     return true;
 169   /* Code executed at most once is not hot.  */
 170   if (profile_info->runs >= count)
 171     return false;
 172   return (count >= get_hot_bb_threshold ());
 173 }
 174
 175 /* Return true in case BB can be CPU intensive and should be optimized
 176    for maximal performance.  */
 177
 178 bool
 179 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 180 {
 181   gcc_checking_assert (fun);
 182   if (profile_status_for_fn (fun) == PROFILE_READ)
 183     return maybe_hot_count_p (fun, bb->count);
 184   return maybe_hot_frequency_p (fun, bb->frequency);
 185 }
 186
 187 /* Return true in case BB can be CPU intensive and should be optimized
 188    for maximal performance.  */
 189
 190 bool
 191 maybe_hot_edge_p (edge e)
 192 {
 193   if (profile_status_for_fn (cfun) == PROFILE_READ)
 194     return maybe_hot_count_p (cfun, e->count);
 195   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 196 }
 197
 198 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 199    node frequency reflects never being executed.  */
 200
 201 static bool
 202 probably_never_executed (struct function *fun,
 203                          gcov_type count, int frequency)
 204 {
 205   gcc_checking_assert (fun);
 206   if (profile_status_for_fn (fun) == PROFILE_READ)
 207     {
 208       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 209       if (count * unlikely_count_fraction >= profile_info->runs)
 210         return false;
 211       if (!frequency)
 212         return true;
 213       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 214         return false;
 215       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 216         {
 217           gcov_type computed_count;
 218           /* Check for possibility of overflow, in which case entry bb count
 219              is large enough to do the division first without losing much
 220              precision.  */
 221           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 222               REG_BR_PROB_BASE)
 223             {
 224               gcov_type scaled_count
 225                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 226              unlikely_count_fraction;
 227               computed_count = RDIV (scaled_count,
 228                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 229             }
 230           else
 231             {
 232               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 233                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 234               computed_count *= frequency * unlikely_count_fraction;
 235             }
 236           if (computed_count >= profile_info->runs)
 237             return false;
 238         }
 239       return true;
 240     }
 241   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 242       && (cgraph_node::get (fun->decl)->frequency
 243           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 244     return true;
 245   return false;
 246 }
 247
 248
 249 /* Return true in case BB is probably never executed.  */
 250
 251 bool
 252 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 253 {
 254   return probably_never_executed (fun, bb->count, bb->frequency);
 255 }
 256
 257
 258 /* Return true in case edge E is probably never executed.  */
 259
 260 bool
 261 probably_never_executed_edge_p (struct function *fun, edge e)
 262 {
 263   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 264 }
 265
 266 /* Return true when current function should always be optimized for size.  */
 267
 268 bool
 269 optimize_function_for_size_p (struct function *fun)
 270 {
 271   if (!fun || !fun->decl)
 272     return optimize_size;
 273   cgraph_node *n = cgraph_node::get (fun->decl);
 274   return n && n->optimize_for_size_p ();
 275 }
 276
 277 /* Return true when current function should always be optimized for speed.  */
 278
 279 bool
 280 optimize_function_for_speed_p (struct function *fun)
 281 {
 282   return !optimize_function_for_size_p (fun);
 283 }
 284
 285 /* Return TRUE when BB should be optimized for size.  */
 286
 287 bool
 288 optimize_bb_for_size_p (const_basic_block bb)
 289 {
 290   return (optimize_function_for_size_p (cfun)
 291           || (bb && !maybe_hot_bb_p (cfun, bb)));
 292 }
 293
 294 /* Return TRUE when BB should be optimized for speed.  */
 295
 296 bool
 297 optimize_bb_for_speed_p (const_basic_block bb)
 298 {
 299   return !optimize_bb_for_size_p (bb);
 300 }
 301
 302 /* Return TRUE when BB should be optimized for size.  */
 303
 304 bool
 305 optimize_edge_for_size_p (edge e)
 306 {
 307   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 308 }
 309
 310 /* Return TRUE when BB should be optimized for speed.  */
 311
 312 bool
 313 optimize_edge_for_speed_p (edge e)
 314 {
 315   return !optimize_edge_for_size_p (e);
 316 }
 317
 318 /* Return TRUE when BB should be optimized for size.  */
 319
 320 bool
 321 optimize_insn_for_size_p (void)
 322 {
 323   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 324 }
 325
 326 /* Return TRUE when BB should be optimized for speed.  */
 327
 328 bool
 329 optimize_insn_for_speed_p (void)
 330 {
 331   return !optimize_insn_for_size_p ();
 332 }
 333
 334 /* Return TRUE when LOOP should be optimized for size.  */
 335
 336 bool
 337 optimize_loop_for_size_p (struct loop *loop)
 338 {
 339   return optimize_bb_for_size_p (loop->header);
 340 }
 341
 342 /* Return TRUE when LOOP should be optimized for speed.  */
 343
 344 bool
 345 optimize_loop_for_speed_p (struct loop *loop)
 346 {
 347   return optimize_bb_for_speed_p (loop->header);
 348 }
 349
 350 /* Return TRUE when LOOP nest should be optimized for speed.  */
 351
 352 bool
 353 optimize_loop_nest_for_speed_p (struct loop *loop)
 354 {
 355   struct loop *l = loop;
 356   if (optimize_loop_for_speed_p (loop))
 357     return true;
 358   l = loop->inner;
 359   while (l && l != loop)
 360     {
 361       if (optimize_loop_for_speed_p (l))
 362         return true;
 363       if (l->inner)
 364         l = l->inner;
 365       else if (l->next)
 366         l = l->next;
 367       else
 368         {
 369           while (l != loop && !l->next)
 370             l = loop_outer (l);
 371           if (l != loop)
 372             l = l->next;
 373         }
 374     }
 375   return false;
 376 }
 377
 378 /* Return TRUE when LOOP nest should be optimized for size.  */
 379
 380 bool
 381 optimize_loop_nest_for_size_p (struct loop *loop)
 382 {
 383   return !optimize_loop_nest_for_speed_p (loop);
 384 }
 385
 386 /* Return true when edge E is likely to be well predictable by branch
 387    predictor.  */
 388
 389 bool
 390 predictable_edge_p (edge e)
 391 {
 392   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 393     return false;
 394   if ((e->probability
 395        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 396       || (REG_BR_PROB_BASE - e->probability
 397           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 398     return true;
 399   return false;
 400 }
 401
 402
 403 /* Set RTL expansion for BB profile.  */
 404
 405 void
 406 rtl_profile_for_bb (basic_block bb)
 407 {
 408   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 409 }
 410
 411 /* Set RTL expansion for edge profile.  */
 412
 413 void
 414 rtl_profile_for_edge (edge e)
 415 {
 416   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 417 }
 418
 419 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 420 void
 421 default_rtl_profile (void)
 422 {
 423   crtl->maybe_hot_insn_p = true;
 424 }
 425
 426 /* Return true if the one of outgoing edges is already predicted by
 427    PREDICTOR.  */
 428
 429 bool
 430 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 431 {
 432   rtx note;
 433   if (!INSN_P (BB_END (bb)))
 434     return false;
 435   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 436     if (REG_NOTE_KIND (note) == REG_BR_PRED
 437         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 438       return true;
 439   return false;
 440 }
 441
 442 /*  Structure representing predictions in tree level. */
 443
 444 struct edge_prediction {
 445     struct edge_prediction *ep_next;
 446     edge ep_edge;
 447     enum br_predictor ep_predictor;
 448     int ep_probability;
 449 };
 450
 451 /* This map contains for a basic block the list of predictions for the
 452    outgoing edges.  */
 453
 454 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 455
 456 /* Return true if the one of outgoing edges is already predicted by
 457    PREDICTOR.  */
 458
 459 bool
 460 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 461 {
 462   struct edge_prediction *i;
 463   edge_prediction **preds = bb_predictions->get (bb);
 464
 465   if (!preds)
 466     return false;
 467
 468   for (i = *preds; i; i = i->ep_next)
 469     if (i->ep_predictor == predictor)
 470       return true;
 471   return false;
 472 }
 473
 474 /* Return true when the probability of edge is reliable.
 475
 476    The profile guessing code is good at predicting branch outcome (ie.
 477    taken/not taken), that is predicted right slightly over 75% of time.
 478    It is however notoriously poor on predicting the probability itself.
 479    In general the profile appear a lot flatter (with probabilities closer
 480    to 50%) than the reality so it is bad idea to use it to drive optimization
 481    such as those disabling dynamic branch prediction for well predictable
 482    branches.
 483
 484    There are two exceptions - edges leading to noreturn edges and edges
 485    predicted by number of iterations heuristics are predicted well.  This macro
 486    should be able to distinguish those, but at the moment it simply check for
 487    noreturn heuristic that is only one giving probability over 99% or bellow
 488    1%.  In future we might want to propagate reliability information across the
 489    CFG if we find this information useful on multiple places.   */
 490 static bool
 491 probability_reliable_p (int prob)
 492 {
 493   return (profile_status_for_fn (cfun) == PROFILE_READ
 494           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 495               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 496 }
 497
 498 /* Same predicate as above, working on edges.  */
 499 bool
 500 edge_probability_reliable_p (const_edge e)
 501 {
 502   return probability_reliable_p (e->probability);
 503 }
 504
 505 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 506 bool
 507 br_prob_note_reliable_p (const_rtx note)
 508 {
 509   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 510   return probability_reliable_p (XINT (note, 0));
 511 }
 512
 513 static void
 514 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 515 {
 516   gcc_assert (any_condjump_p (insn));
 517   if (!flag_guess_branch_prob)
 518     return;
 519
 520   add_reg_note (insn, REG_BR_PRED,
 521                 gen_rtx_CONCAT (VOIDmode,
 522                                 GEN_INT ((int) predictor),
 523                                 GEN_INT ((int) probability)));
 524 }
 525
 526 /* Predict insn by given predictor.  */
 527
 528 void
 529 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 530                   enum prediction taken)
 531 {
 532    int probability = predictor_info[(int) predictor].hitrate;
 533
 534    if (taken != TAKEN)
 535      probability = REG_BR_PROB_BASE - probability;
 536
 537    predict_insn (insn, predictor, probability);
 538 }
 539
 540 /* Predict edge E with given probability if possible.  */
 541
 542 void
 543 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 544 {
 545   rtx_insn *last_insn;
 546   last_insn = BB_END (e->src);
 547
 548   /* We can store the branch prediction information only about
 549      conditional jumps.  */
 550   if (!any_condjump_p (last_insn))
 551     return;
 552
 553   /* We always store probability of branching.  */
 554   if (e->flags & EDGE_FALLTHRU)
 555     probability = REG_BR_PROB_BASE - probability;
 556
 557   predict_insn (last_insn, predictor, probability);
 558 }
 559
 560 /* Predict edge E with the given PROBABILITY.  */
 561 void
 562 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 563 {
 564   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 565   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 566        1)
 567       && flag_guess_branch_prob && optimize)
 568     {
 569       struct edge_prediction *i = XNEW (struct edge_prediction);
 570       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 571
 572       i->ep_next = preds;
 573       preds = i;
 574       i->ep_probability = probability;
 575       i->ep_predictor = predictor;
 576       i->ep_edge = e;
 577     }
 578 }
 579
 580 /* Remove all predictions on given basic block that are attached
 581    to edge E.  */
 582 void
 583 remove_predictions_associated_with_edge (edge e)
 584 {
 585   if (!bb_predictions)
 586     return;
 587
 588   edge_prediction **preds = bb_predictions->get (e->src);
 589
 590   if (preds)
 591     {
 592       struct edge_prediction **prediction = preds;
 593       struct edge_prediction *next;
 594
 595       while (*prediction)
 596         {
 597           if ((*prediction)->ep_edge == e)
 598             {
 599               next = (*prediction)->ep_next;
 600               free (*prediction);
 601               *prediction = next;
 602             }
 603           else
 604             prediction = &((*prediction)->ep_next);
 605         }
 606     }
 607 }
 608
 609 /* Clears the list of predictions stored for BB.  */
 610
 611 static void
 612 clear_bb_predictions (basic_block bb)
 613 {
 614   edge_prediction **preds = bb_predictions->get (bb);
 615   struct edge_prediction *pred, *next;
 616
 617   if (!preds)
 618     return;
 619
 620   for (pred = *preds; pred; pred = next)
 621     {
 622       next = pred->ep_next;
 623       free (pred);
 624     }
 625   *preds = NULL;
 626 }
 627
 628 /* Return true when we can store prediction on insn INSN.
 629    At the moment we represent predictions only on conditional
 630    jumps, not at computed jump or other complicated cases.  */
 631 static bool
 632 can_predict_insn_p (const rtx_insn *insn)
 633 {
 634   return (JUMP_P (insn)
 635           && any_condjump_p (insn)
 636           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 637 }
 638
 639 /* Predict edge E by given predictor if possible.  */
 640
 641 void
 642 predict_edge_def (edge e, enum br_predictor predictor,
 643                   enum prediction taken)
 644 {
 645    int probability = predictor_info[(int) predictor].hitrate;
 646
 647    if (taken != TAKEN)
 648      probability = REG_BR_PROB_BASE - probability;
 649
 650    predict_edge (e, predictor, probability);
 651 }
 652
 653 /* Invert all branch predictions or probability notes in the INSN.  This needs
 654    to be done each time we invert the condition used by the jump.  */
 655
 656 void
 657 invert_br_probabilities (rtx insn)
 658 {
 659   rtx note;
 660
 661   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 662     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 663       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 664     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 665       XEXP (XEXP (note, 0), 1)
 666         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 667 }
 668
 669 /* Dump information about the branch prediction to the output file.  */
 670
 671 static void
 672 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 673                  basic_block bb, int used)
 674 {
 675   edge e;
 676   edge_iterator ei;
 677
 678   if (!file)
 679     return;
 680
 681   FOR_EACH_EDGE (e, ei, bb->succs)
 682     if (! (e->flags & EDGE_FALLTHRU))
 683       break;
 684
 685   fprintf (file, "  %s heuristics%s: %.1f%%",
 686            predictor_info[predictor].name,
 687            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 688
 689   if (bb->count)
 690     {
 691       fprintf (file, "  exec %" PRId64, bb->count);
 692       if (e)
 693         {
 694           fprintf (file, " hit %" PRId64, e->count);
 695           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 696         }
 697     }
 698
 699   fprintf (file, "\n");
 700 }
 701
 702 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 703    evenly and hope for the best.  */
 704 static void
 705 set_even_probabilities (basic_block bb)
 706 {
 707   int nedges = 0;
 708   edge e;
 709   edge_iterator ei;
 710
 711   FOR_EACH_EDGE (e, ei, bb->succs)
 712     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 713       nedges ++;
 714   FOR_EACH_EDGE (e, ei, bb->succs)
 715     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 716       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 717     else
 718       e->probability = 0;
 719 }
 720
 721 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 722    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 723
 724 static void
 725 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 726 {
 727   rtx prob_note;
 728   rtx *pnote;
 729   rtx note;
 730   int best_probability = PROB_EVEN;
 731   enum br_predictor best_predictor = END_PREDICTORS;
 732   int combined_probability = REG_BR_PROB_BASE / 2;
 733   int d;
 734   bool first_match = false;
 735   bool found = false;
 736
 737   if (!can_predict_insn_p (insn))
 738     {
 739       set_even_probabilities (bb);
 740       return;
 741     }
 742
 743   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 744   pnote = &REG_NOTES (insn);
 745   if (dump_file)
 746     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 747              bb->index);
 748
 749   /* We implement "first match" heuristics and use probability guessed
 750      by predictor with smallest index.  */
 751   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 752     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 753       {
 754         enum br_predictor predictor = ((enum br_predictor)
 755                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 756         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 757
 758         found = true;
 759         if (best_predictor > predictor)
 760           best_probability = probability, best_predictor = predictor;
 761
 762         d = (combined_probability * probability
 763              + (REG_BR_PROB_BASE - combined_probability)
 764              * (REG_BR_PROB_BASE - probability));
 765
 766         /* Use FP math to avoid overflows of 32bit integers.  */
 767         if (d == 0)
 768           /* If one probability is 0% and one 100%, avoid division by zero.  */
 769           combined_probability = REG_BR_PROB_BASE / 2;
 770         else
 771           combined_probability = (((double) combined_probability) * probability
 772                                   * REG_BR_PROB_BASE / d + 0.5);
 773       }
 774
 775   /* Decide which heuristic to use.  In case we didn't match anything,
 776      use no_prediction heuristic, in case we did match, use either
 777      first match or Dempster-Shaffer theory depending on the flags.  */
 778
 779   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 780     first_match = true;
 781
 782   if (!found)
 783     dump_prediction (dump_file, PRED_NO_PREDICTION,
 784                      combined_probability, bb, true);
 785   else
 786     {
 787       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 788                        bb, !first_match);
 789       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 790                        bb, first_match);
 791     }
 792
 793   if (first_match)
 794     combined_probability = best_probability;
 795   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 796
 797   while (*pnote)
 798     {
 799       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 800         {
 801           enum br_predictor predictor = ((enum br_predictor)
 802                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 803           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 804
 805           dump_prediction (dump_file, predictor, probability, bb,
 806                            !first_match || best_predictor == predictor);
 807           *pnote = XEXP (*pnote, 1);
 808         }
 809       else
 810         pnote = &XEXP (*pnote, 1);
 811     }
 812
 813   if (!prob_note)
 814     {
 815       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 816
 817       /* Save the prediction into CFG in case we are seeing non-degenerated
 818          conditional jump.  */
 819       if (!single_succ_p (bb))
 820         {
 821           BRANCH_EDGE (bb)->probability = combined_probability;
 822           FALLTHRU_EDGE (bb)->probability
 823             = REG_BR_PROB_BASE - combined_probability;
 824         }
 825     }
 826   else if (!single_succ_p (bb))
 827     {
 828       int prob = XINT (prob_note, 0);
 829
 830       BRANCH_EDGE (bb)->probability = prob;
 831       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 832     }
 833   else
 834     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 835 }
 836
 837 /* Combine predictions into single probability and store them into CFG.
 838    Remove now useless prediction entries.  */
 839
 840 static void
 841 combine_predictions_for_bb (basic_block bb)
 842 {
 843   int best_probability = PROB_EVEN;
 844   enum br_predictor best_predictor = END_PREDICTORS;
 845   int combined_probability = REG_BR_PROB_BASE / 2;
 846   int d;
 847   bool first_match = false;
 848   bool found = false;
 849   struct edge_prediction *pred;
 850   int nedges = 0;
 851   edge e, first = NULL, second = NULL;
 852   edge_iterator ei;
 853
 854   FOR_EACH_EDGE (e, ei, bb->succs)
 855     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 856       {
 857         nedges ++;
 858         if (first && !second)
 859           second = e;
 860         if (!first)
 861           first = e;
 862       }
 863
 864   /* When there is no successor or only one choice, prediction is easy.
 865
 866      We are lazy for now and predict only basic blocks with two outgoing
 867      edges.  It is possible to predict generic case too, but we have to
 868      ignore first match heuristics and do more involved combining.  Implement
 869      this later.  */
 870   if (nedges != 2)
 871     {
 872       if (!bb->count)
 873         set_even_probabilities (bb);
 874       clear_bb_predictions (bb);
 875       if (dump_file)
 876         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 877                  nedges, bb->index);
 878       return;
 879     }
 880
 881   if (dump_file)
 882     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 883
 884   edge_prediction **preds = bb_predictions->get (bb);
 885   if (preds)
 886     {
 887       /* We implement "first match" heuristics and use probability guessed
 888          by predictor with smallest index.  */
 889       for (pred = *preds; pred; pred = pred->ep_next)
 890         {
 891           enum br_predictor predictor = pred->ep_predictor;
 892           int probability = pred->ep_probability;
 893
 894           if (pred->ep_edge != first)
 895             probability = REG_BR_PROB_BASE - probability;
 896
 897           found = true;
 898           /* First match heuristics would be widly confused if we predicted
 899              both directions.  */
 900           if (best_predictor > predictor)
 901             {
 902               struct edge_prediction *pred2;
 903               int prob = probability;
 904
 905               for (pred2 = (struct edge_prediction *) *preds;
 906                    pred2; pred2 = pred2->ep_next)
 907                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 908                  {
 909                    int probability2 = pred->ep_probability;
 910
 911                    if (pred2->ep_edge != first)
 912                      probability2 = REG_BR_PROB_BASE - probability2;
 913
 914                    if ((probability < REG_BR_PROB_BASE / 2) !=
 915                        (probability2 < REG_BR_PROB_BASE / 2))
 916                      break;
 917
 918                    /* If the same predictor later gave better result, go for it! */
 919                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 920                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 921                      prob = probability2;
 922                  }
 923               if (!pred2)
 924                 best_probability = prob, best_predictor = predictor;
 925             }
 926
 927           d = (combined_probability * probability
 928                + (REG_BR_PROB_BASE - combined_probability)
 929                * (REG_BR_PROB_BASE - probability));
 930
 931           /* Use FP math to avoid overflows of 32bit integers.  */
 932           if (d == 0)
 933             /* If one probability is 0% and one 100%, avoid division by zero.  */
 934             combined_probability = REG_BR_PROB_BASE / 2;
 935           else
 936             combined_probability = (((double) combined_probability)
 937                                     * probability
 938                                     * REG_BR_PROB_BASE / d + 0.5);
 939         }
 940     }
 941
 942   /* Decide which heuristic to use.  In case we didn't match anything,
 943      use no_prediction heuristic, in case we did match, use either
 944      first match or Dempster-Shaffer theory depending on the flags.  */
 945
 946   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 947     first_match = true;
 948
 949   if (!found)
 950     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 951   else
 952     {
 953       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 954                        !first_match);
 955       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 956                        first_match);
 957     }
 958
 959   if (first_match)
 960     combined_probability = best_probability;
 961   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 962
 963   if (preds)
 964     {
 965       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 966         {
 967           enum br_predictor predictor = pred->ep_predictor;
 968           int probability = pred->ep_probability;
 969
 970           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 971             probability = REG_BR_PROB_BASE - probability;
 972           dump_prediction (dump_file, predictor, probability, bb,
 973                            !first_match || best_predictor == predictor);
 974         }
 975     }
 976   clear_bb_predictions (bb);
 977
 978   if (!bb->count)
 979     {
 980       first->probability = combined_probability;
 981       second->probability = REG_BR_PROB_BASE - combined_probability;
 982     }
 983 }
 984
 985 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
 986    Return the SSA_NAME if the condition satisfies, NULL otherwise.
 987
 988    T1 and T2 should be one of the following cases:
 989      1. T1 is SSA_NAME, T2 is NULL
 990      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
 991      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
 992
 993 static tree
 994 strips_small_constant (tree t1, tree t2)
 995 {
 996   tree ret = NULL;
 997   int value = 0;
 998
 999   if (!t1)
1000     return NULL;
1001   else if (TREE_CODE (t1) == SSA_NAME)
1002     ret = t1;
1003   else if (tree_fits_shwi_p (t1))
1004     value = tree_to_shwi (t1);
1005   else
1006     return NULL;
1007
1008   if (!t2)
1009     return ret;
1010   else if (tree_fits_shwi_p (t2))
1011     value = tree_to_shwi (t2);
1012   else if (TREE_CODE (t2) == SSA_NAME)
1013     {
1014       if (ret)
1015         return NULL;
1016       else
1017         ret = t2;
1018     }
1019
1020   if (value <= 4 && value >= -4)
1021     return ret;
1022   else
1023     return NULL;
1024 }
1025
1026 /* Return the SSA_NAME in T or T's operands.
1027    Return NULL if SSA_NAME cannot be found.  */
1028
1029 static tree
1030 get_base_value (tree t)
1031 {
1032   if (TREE_CODE (t) == SSA_NAME)
1033     return t;
1034
1035   if (!BINARY_CLASS_P (t))
1036     return NULL;
1037
1038   switch (TREE_OPERAND_LENGTH (t))
1039     {
1040     case 1:
1041       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1042     case 2:
1043       return strips_small_constant (TREE_OPERAND (t, 0),
1044                                     TREE_OPERAND (t, 1));
1045     default:
1046       return NULL;
1047     }
1048 }
1049
1050 /* Check the compare STMT in LOOP. If it compares an induction
1051    variable to a loop invariant, return true, and save
1052    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1053    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1054
1055 static bool
1056 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1057                                      tree *loop_invariant,
1058                                      enum tree_code *compare_code,
1059                                      tree *loop_step,
1060                                      tree *loop_iv_base)
1061 {
1062   tree op0, op1, bound, base;
1063   affine_iv iv0, iv1;
1064   enum tree_code code;
1065   tree step;
1066
1067   code = gimple_cond_code (stmt);
1068   *loop_invariant = NULL;
1069
1070   switch (code)
1071     {
1072     case GT_EXPR:
1073     case GE_EXPR:
1074     case NE_EXPR:
1075     case LT_EXPR:
1076     case LE_EXPR:
1077     case EQ_EXPR:
1078       break;
1079
1080     default:
1081       return false;
1082     }
1083
1084   op0 = gimple_cond_lhs (stmt);
1085   op1 = gimple_cond_rhs (stmt);
1086
1087   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1088        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1089     return false;
1090   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1091     return false;
1092   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1093     return false;
1094   if (TREE_CODE (iv0.step) != INTEGER_CST
1095       || TREE_CODE (iv1.step) != INTEGER_CST)
1096     return false;
1097   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1098       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1099     return false;
1100
1101   if (integer_zerop (iv0.step))
1102     {
1103       if (code != NE_EXPR && code != EQ_EXPR)
1104         code = invert_tree_comparison (code, false);
1105       bound = iv0.base;
1106       base = iv1.base;
1107       if (tree_fits_shwi_p (iv1.step))
1108         step = iv1.step;
1109       else
1110         return false;
1111     }
1112   else
1113     {
1114       bound = iv1.base;
1115       base = iv0.base;
1116       if (tree_fits_shwi_p (iv0.step))
1117         step = iv0.step;
1118       else
1119         return false;
1120     }
1121
1122   if (TREE_CODE (bound) != INTEGER_CST)
1123     bound = get_base_value (bound);
1124   if (!bound)
1125     return false;
1126   if (TREE_CODE (base) != INTEGER_CST)
1127     base = get_base_value (base);
1128   if (!base)
1129     return false;
1130
1131   *loop_invariant = bound;
1132   *compare_code = code;
1133   *loop_step = step;
1134   *loop_iv_base = base;
1135   return true;
1136 }
1137
1138 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1139
1140 static bool
1141 expr_coherent_p (tree t1, tree t2)
1142 {
1143   gimple stmt;
1144   tree ssa_name_1 = NULL;
1145   tree ssa_name_2 = NULL;
1146
1147   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1148   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1149
1150   if (t1 == t2)
1151     return true;
1152
1153   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1154     return true;
1155   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1156     return false;
1157
1158   /* Check to see if t1 is expressed/defined with t2.  */
1159   stmt = SSA_NAME_DEF_STMT (t1);
1160   gcc_assert (stmt != NULL);
1161   if (is_gimple_assign (stmt))
1162     {
1163       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1164       if (ssa_name_1 && ssa_name_1 == t2)
1165         return true;
1166     }
1167
1168   /* Check to see if t2 is expressed/defined with t1.  */
1169   stmt = SSA_NAME_DEF_STMT (t2);
1170   gcc_assert (stmt != NULL);
1171   if (is_gimple_assign (stmt))
1172     {
1173       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1174       if (ssa_name_2 && ssa_name_2 == t1)
1175         return true;
1176     }
1177
1178   /* Compare if t1 and t2's def_stmts are identical.  */
1179   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1180     return true;
1181   else
1182     return false;
1183 }
1184
1185 /* Predict branch probability of BB when BB contains a branch that compares
1186    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1187    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1188
1189    E.g.
1190      for (int i = 0; i < bound; i++) {
1191        if (i < bound - 2)
1192          computation_1();
1193        else
1194          computation_2();
1195      }
1196
1197   In this loop, we will predict the branch inside the loop to be taken.  */
1198
1199 static void
1200 predict_iv_comparison (struct loop *loop, basic_block bb,
1201                        tree loop_bound_var,
1202                        tree loop_iv_base_var,
1203                        enum tree_code loop_bound_code,
1204                        int loop_bound_step)
1205 {
1206   gimple stmt;
1207   tree compare_var, compare_base;
1208   enum tree_code compare_code;
1209   tree compare_step_var;
1210   edge then_edge;
1211   edge_iterator ei;
1212
1213   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1214       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1215       || predicted_by_p (bb, PRED_LOOP_EXIT))
1216     return;
1217
1218   stmt = last_stmt (bb);
1219   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1220     return;
1221   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1222                                             loop, &compare_var,
1223                                             &compare_code,
1224                                             &compare_step_var,
1225                                             &compare_base))
1226     return;
1227
1228   /* Find the taken edge.  */
1229   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1230     if (then_edge->flags & EDGE_TRUE_VALUE)
1231       break;
1232
1233   /* When comparing an IV to a loop invariant, NE is more likely to be
1234      taken while EQ is more likely to be not-taken.  */
1235   if (compare_code == NE_EXPR)
1236     {
1237       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1238       return;
1239     }
1240   else if (compare_code == EQ_EXPR)
1241     {
1242       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1243       return;
1244     }
1245
1246   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1247     return;
1248
1249   /* If loop bound, base and compare bound are all constants, we can
1250      calculate the probability directly.  */
1251   if (tree_fits_shwi_p (loop_bound_var)
1252       && tree_fits_shwi_p (compare_var)
1253       && tree_fits_shwi_p (compare_base))
1254     {
1255       int probability;
1256       bool overflow, overall_overflow = false;
1257       widest_int compare_count, tem;
1258
1259       /* (loop_bound - base) / compare_step */
1260       tem = wi::sub (wi::to_widest (loop_bound_var),
1261                      wi::to_widest (compare_base), SIGNED, &overflow);
1262       overall_overflow |= overflow;
1263       widest_int loop_count = wi::div_trunc (tem,
1264                                              wi::to_widest (compare_step_var),
1265                                              SIGNED, &overflow);
1266       overall_overflow |= overflow;
1267
1268       if (!wi::neg_p (wi::to_widest (compare_step_var))
1269           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1270         {
1271           /* (loop_bound - compare_bound) / compare_step */
1272           tem = wi::sub (wi::to_widest (loop_bound_var),
1273                          wi::to_widest (compare_var), SIGNED, &overflow);
1274           overall_overflow |= overflow;
1275           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1276                                          SIGNED, &overflow);
1277           overall_overflow |= overflow;
1278         }
1279       else
1280         {
1281           /* (compare_bound - base) / compare_step */
1282           tem = wi::sub (wi::to_widest (compare_var),
1283                          wi::to_widest (compare_base), SIGNED, &overflow);
1284           overall_overflow |= overflow;
1285           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1286                                          SIGNED, &overflow);
1287           overall_overflow |= overflow;
1288         }
1289       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1290         ++compare_count;
1291       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1292         ++loop_count;
1293       if (wi::neg_p (compare_count))
1294         compare_count = 0;
1295       if (wi::neg_p (loop_count))
1296         loop_count = 0;
1297       if (loop_count == 0)
1298         probability = 0;
1299       else if (wi::cmps (compare_count, loop_count) == 1)
1300         probability = REG_BR_PROB_BASE;
1301       else
1302         {
1303           tem = compare_count * REG_BR_PROB_BASE;
1304           tem = wi::udiv_trunc (tem, loop_count);
1305           probability = tem.to_uhwi ();
1306         }
1307
1308       if (!overall_overflow)
1309         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1310
1311       return;
1312     }
1313
1314   if (expr_coherent_p (loop_bound_var, compare_var))
1315     {
1316       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1317           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1318         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1319       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1320                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1321         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1322       else if (loop_bound_code == NE_EXPR)
1323         {
1324           /* If the loop backedge condition is "(i != bound)", we do
1325              the comparison based on the step of IV:
1326              * step < 0 : backedge condition is like (i > bound)
1327              * step > 0 : backedge condition is like (i < bound)  */
1328           gcc_assert (loop_bound_step != 0);
1329           if (loop_bound_step > 0
1330               && (compare_code == LT_EXPR
1331                   || compare_code == LE_EXPR))
1332             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1333           else if (loop_bound_step < 0
1334                    && (compare_code == GT_EXPR
1335                        || compare_code == GE_EXPR))
1336             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1337           else
1338             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1339         }
1340       else
1341         /* The branch is predicted not-taken if loop_bound_code is
1342            opposite with compare_code.  */
1343         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1344     }
1345   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1346     {
1347       /* For cases like:
1348            for (i = s; i < h; i++)
1349              if (i > s + 2) ....
1350          The branch should be predicted taken.  */
1351       if (loop_bound_step > 0
1352           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1353         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1354       else if (loop_bound_step < 0
1355                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1356         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1357       else
1358         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1359     }
1360 }
1361
1362 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1363    exits are resulted from short-circuit conditions that will generate an
1364    if_tmp. E.g.:
1365
1366    if (foo() || global > 10)
1367      break;
1368
1369    This will be translated into:
1370
1371    BB3:
1372      loop header...
1373    BB4:
1374      if foo() goto BB6 else goto BB5
1375    BB5:
1376      if global > 10 goto BB6 else goto BB7
1377    BB6:
1378      goto BB7
1379    BB7:
1380      iftmp = (PHI 0(BB5), 1(BB6))
1381      if iftmp == 1 goto BB8 else goto BB3
1382    BB8:
1383      outside of the loop...
1384
1385    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1386    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1387    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1388    exits to predict them using PRED_LOOP_EXIT.  */
1389
1390 static void
1391 predict_extra_loop_exits (edge exit_edge)
1392 {
1393   unsigned i;
1394   bool check_value_one;
1395   gimple lhs_def_stmt;
1396   gphi *phi_stmt;
1397   tree cmp_rhs, cmp_lhs;
1398   gimple last;
1399   gcond *cmp_stmt;
1400
1401   last = last_stmt (exit_edge->src);
1402   if (!last)
1403     return;
1404   cmp_stmt = dyn_cast <gcond *> (last);
1405   if (!cmp_stmt)
1406     return;
1407
1408   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1409   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1410   if (!TREE_CONSTANT (cmp_rhs)
1411       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1412     return;
1413   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1414     return;
1415
1416   /* If check_value_one is true, only the phi_args with value '1' will lead
1417      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1418      loop exit.  */
1419   check_value_one = (((integer_onep (cmp_rhs))
1420                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1421                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1422
1423   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1424   if (!lhs_def_stmt)
1425     return;
1426
1427   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1428   if (!phi_stmt)
1429     return;
1430
1431   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1432     {
1433       edge e1;
1434       edge_iterator ei;
1435       tree val = gimple_phi_arg_def (phi_stmt, i);
1436       edge e = gimple_phi_arg_edge (phi_stmt, i);
1437
1438       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1439         continue;
1440       if ((check_value_one ^ integer_onep (val)) == 1)
1441         continue;
1442       if (EDGE_COUNT (e->src->succs) != 1)
1443         {
1444           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1445           continue;
1446         }
1447
1448       FOR_EACH_EDGE (e1, ei, e->src->preds)
1449         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1450     }
1451 }
1452
1453 /* Predict edge probabilities by exploiting loop structure.  */
1454
1455 static void
1456 predict_loops (void)
1457 {
1458   struct loop *loop;
1459
1460   /* Try to predict out blocks in a loop that are not part of a
1461      natural loop.  */
1462   FOR_EACH_LOOP (loop, 0)
1463     {
1464       basic_block bb, *bbs;
1465       unsigned j, n_exits;
1466       vec<edge> exits;
1467       struct tree_niter_desc niter_desc;
1468       edge ex;
1469       struct nb_iter_bound *nb_iter;
1470       enum tree_code loop_bound_code = ERROR_MARK;
1471       tree loop_bound_step = NULL;
1472       tree loop_bound_var = NULL;
1473       tree loop_iv_base = NULL;
1474       gcond *stmt = NULL;
1475
1476       exits = get_loop_exit_edges (loop);
1477       n_exits = exits.length ();
1478       if (!n_exits)
1479         {
1480           exits.release ();
1481           continue;
1482         }
1483
1484       FOR_EACH_VEC_ELT (exits, j, ex)
1485         {
1486           tree niter = NULL;
1487           HOST_WIDE_INT nitercst;
1488           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1489           int probability;
1490           enum br_predictor predictor;
1491
1492           predict_extra_loop_exits (ex);
1493
1494           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1495             niter = niter_desc.niter;
1496           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1497             niter = loop_niter_by_eval (loop, ex);
1498
1499           if (TREE_CODE (niter) == INTEGER_CST)
1500             {
1501               if (tree_fits_uhwi_p (niter)
1502                   && max
1503                   && compare_tree_int (niter, max - 1) == -1)
1504                 nitercst = tree_to_uhwi (niter) + 1;
1505               else
1506                 nitercst = max;
1507               predictor = PRED_LOOP_ITERATIONS;
1508             }
1509           /* If we have just one exit and we can derive some information about
1510              the number of iterations of the loop from the statements inside
1511              the loop, use it to predict this exit.  */
1512           else if (n_exits == 1)
1513             {
1514               nitercst = estimated_stmt_executions_int (loop);
1515               if (nitercst < 0)
1516                 continue;
1517               if (nitercst > max)
1518                 nitercst = max;
1519
1520               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1521             }
1522           else
1523             continue;
1524
1525           /* If the prediction for number of iterations is zero, do not
1526              predict the exit edges.  */
1527           if (nitercst == 0)
1528             continue;
1529
1530           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1531           predict_edge (ex, predictor, probability);
1532         }
1533       exits.release ();
1534
1535       /* Find information about loop bound variables.  */
1536       for (nb_iter = loop->bounds; nb_iter;
1537            nb_iter = nb_iter->next)
1538         if (nb_iter->stmt
1539             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1540           {
1541             stmt = as_a <gcond *> (nb_iter->stmt);
1542             break;
1543           }
1544       if (!stmt && last_stmt (loop->header)
1545           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1546         stmt = as_a <gcond *> (last_stmt (loop->header));
1547       if (stmt)
1548         is_comparison_with_loop_invariant_p (stmt, loop,
1549                                              &loop_bound_var,
1550                                              &loop_bound_code,
1551                                              &loop_bound_step,
1552                                              &loop_iv_base);
1553
1554       bbs = get_loop_body (loop);
1555
1556       for (j = 0; j < loop->num_nodes; j++)
1557         {
1558           int header_found = 0;
1559           edge e;
1560           edge_iterator ei;
1561
1562           bb = bbs[j];
1563
1564           /* Bypass loop heuristics on continue statement.  These
1565              statements construct loops via "non-loop" constructs
1566              in the source language and are better to be handled
1567              separately.  */
1568           if (predicted_by_p (bb, PRED_CONTINUE))
1569             continue;
1570
1571           /* Loop branch heuristics - predict an edge back to a
1572              loop's head as taken.  */
1573           if (bb == loop->latch)
1574             {
1575               e = find_edge (loop->latch, loop->header);
1576               if (e)
1577                 {
1578                   header_found = 1;
1579                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1580                 }
1581             }
1582
1583           /* Loop exit heuristics - predict an edge exiting the loop if the
1584              conditional has no loop header successors as not taken.  */
1585           if (!header_found
1586               /* If we already used more reliable loop exit predictors, do not
1587                  bother with PRED_LOOP_EXIT.  */
1588               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1589               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1590             {
1591               /* For loop with many exits we don't want to predict all exits
1592                  with the pretty large probability, because if all exits are
1593                  considered in row, the loop would be predicted to iterate
1594                  almost never.  The code to divide probability by number of
1595                  exits is very rough.  It should compute the number of exits
1596                  taken in each patch through function (not the overall number
1597                  of exits that might be a lot higher for loops with wide switch
1598                  statements in them) and compute n-th square root.
1599
1600                  We limit the minimal probability by 2% to avoid
1601                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1602                  as this was causing regression in perl benchmark containing such
1603                  a wide loop.  */
1604
1605               int probability = ((REG_BR_PROB_BASE
1606                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1607                                  / n_exits);
1608               if (probability < HITRATE (2))
1609                 probability = HITRATE (2);
1610               FOR_EACH_EDGE (e, ei, bb->succs)
1611                 if (e->dest->index < NUM_FIXED_BLOCKS
1612                     || !flow_bb_inside_loop_p (loop, e->dest))
1613                   predict_edge (e, PRED_LOOP_EXIT, probability);
1614             }
1615           if (loop_bound_var)
1616             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1617                                    loop_bound_code,
1618                                    tree_to_shwi (loop_bound_step));
1619         }
1620
1621       /* Free basic blocks from get_loop_body.  */
1622       free (bbs);
1623     }
1624 }
1625
1626 /* Attempt to predict probabilities of BB outgoing edges using local
1627    properties.  */
1628 static void
1629 bb_estimate_probability_locally (basic_block bb)
1630 {
1631   rtx_insn *last_insn = BB_END (bb);
1632   rtx cond;
1633
1634   if (! can_predict_insn_p (last_insn))
1635     return;
1636   cond = get_condition (last_insn, NULL, false, false);
1637   if (! cond)
1638     return;
1639
1640   /* Try "pointer heuristic."
1641      A comparison ptr == 0 is predicted as false.
1642      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1643   if (COMPARISON_P (cond)
1644       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1645           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1646     {
1647       if (GET_CODE (cond) == EQ)
1648         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1649       else if (GET_CODE (cond) == NE)
1650         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1651     }
1652   else
1653
1654   /* Try "opcode heuristic."
1655      EQ tests are usually false and NE tests are usually true. Also,
1656      most quantities are positive, so we can make the appropriate guesses
1657      about signed comparisons against zero.  */
1658     switch (GET_CODE (cond))
1659       {
1660       case CONST_INT:
1661         /* Unconditional branch.  */
1662         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1663                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1664         break;
1665
1666       case EQ:
1667       case UNEQ:
1668         /* Floating point comparisons appears to behave in a very
1669            unpredictable way because of special role of = tests in
1670            FP code.  */
1671         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1672           ;
1673         /* Comparisons with 0 are often used for booleans and there is
1674            nothing useful to predict about them.  */
1675         else if (XEXP (cond, 1) == const0_rtx
1676                  || XEXP (cond, 0) == const0_rtx)
1677           ;
1678         else
1679           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1680         break;
1681
1682       case NE:
1683       case LTGT:
1684         /* Floating point comparisons appears to behave in a very
1685            unpredictable way because of special role of = tests in
1686            FP code.  */
1687         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1688           ;
1689         /* Comparisons with 0 are often used for booleans and there is
1690            nothing useful to predict about them.  */
1691         else if (XEXP (cond, 1) == const0_rtx
1692                  || XEXP (cond, 0) == const0_rtx)
1693           ;
1694         else
1695           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1696         break;
1697
1698       case ORDERED:
1699         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1700         break;
1701
1702       case UNORDERED:
1703         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1704         break;
1705
1706       case LE:
1707       case LT:
1708         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1709             || XEXP (cond, 1) == constm1_rtx)
1710           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1711         break;
1712
1713       case GE:
1714       case GT:
1715         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1716             || XEXP (cond, 1) == constm1_rtx)
1717           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1718         break;
1719
1720       default:
1721         break;
1722       }
1723 }
1724
1725 /* Set edge->probability for each successor edge of BB.  */
1726 void
1727 guess_outgoing_edge_probabilities (basic_block bb)
1728 {
1729   bb_estimate_probability_locally (bb);
1730   combine_predictions_for_insn (BB_END (bb), bb);
1731 }
1732 \f
1733 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1734
1735 /* Helper function for expr_expected_value.  */
1736
1737 static tree
1738 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1739                        tree op1, bitmap visited, enum br_predictor *predictor)
1740 {
1741   gimple def;
1742
1743   if (predictor)
1744     *predictor = PRED_UNCONDITIONAL;
1745
1746   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1747     {
1748       if (TREE_CONSTANT (op0))
1749         return op0;
1750
1751       if (code != SSA_NAME)
1752         return NULL_TREE;
1753
1754       def = SSA_NAME_DEF_STMT (op0);
1755
1756       /* If we were already here, break the infinite cycle.  */
1757       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1758         return NULL;
1759
1760       if (gimple_code (def) == GIMPLE_PHI)
1761         {
1762           /* All the arguments of the PHI node must have the same constant
1763              length.  */
1764           int i, n = gimple_phi_num_args (def);
1765           tree val = NULL, new_val;
1766
1767           for (i = 0; i < n; i++)
1768             {
1769               tree arg = PHI_ARG_DEF (def, i);
1770               enum br_predictor predictor2;
1771
1772               /* If this PHI has itself as an argument, we cannot
1773                  determine the string length of this argument.  However,
1774                  if we can find an expected constant value for the other
1775                  PHI args then we can still be sure that this is
1776                  likely a constant.  So be optimistic and just
1777                  continue with the next argument.  */
1778               if (arg == PHI_RESULT (def))
1779                 continue;
1780
1781               new_val = expr_expected_value (arg, visited, &predictor2);
1782
1783               /* It is difficult to combine value predictors.  Simply assume
1784                  that later predictor is weaker and take its prediction.  */
1785               if (predictor && *predictor < predictor2)
1786                 *predictor = predictor2;
1787               if (!new_val)
1788                 return NULL;
1789               if (!val)
1790                 val = new_val;
1791               else if (!operand_equal_p (val, new_val, false))
1792                 return NULL;
1793             }
1794           return val;
1795         }
1796       if (is_gimple_assign (def))
1797         {
1798           if (gimple_assign_lhs (def) != op0)
1799             return NULL;
1800
1801           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1802                                         gimple_assign_rhs1 (def),
1803                                         gimple_assign_rhs_code (def),
1804                                         gimple_assign_rhs2 (def),
1805                                         visited, predictor);
1806         }
1807
1808       if (is_gimple_call (def))
1809         {
1810           tree decl = gimple_call_fndecl (def);
1811           if (!decl)
1812             {
1813               if (gimple_call_internal_p (def)
1814                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1815                 {
1816                   gcc_assert (gimple_call_num_args (def) == 3);
1817                   tree val = gimple_call_arg (def, 0);
1818                   if (TREE_CONSTANT (val))
1819                     return val;
1820                   if (predictor)
1821                     {
1822                       tree val2 = gimple_call_arg (def, 2);
1823                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1824                                   && tree_fits_uhwi_p (val2)
1825                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1826                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1827                     }
1828                   return gimple_call_arg (def, 1);
1829                 }
1830               return NULL;
1831             }
1832           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1833             switch (DECL_FUNCTION_CODE (decl))
1834               {
1835               case BUILT_IN_EXPECT:
1836                 {
1837                   tree val;
1838                   if (gimple_call_num_args (def) != 2)
1839                     return NULL;
1840                   val = gimple_call_arg (def, 0);
1841                   if (TREE_CONSTANT (val))
1842                     return val;
1843                   if (predictor)
1844                     *predictor = PRED_BUILTIN_EXPECT;
1845                   return gimple_call_arg (def, 1);
1846                 }
1847
1848               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1849               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1850               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1851               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1852               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1853               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1854               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1855               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1856               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1857               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1858               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1859               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1860               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1861                 /* Assume that any given atomic operation has low contention,
1862                    and thus the compare-and-swap operation succeeds.  */
1863                 if (predictor)
1864                   *predictor = PRED_COMPARE_AND_SWAP;
1865                 return boolean_true_node;
1866               default:
1867                 break;
1868             }
1869         }
1870
1871       return NULL;
1872     }
1873
1874   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1875     {
1876       tree res;
1877       enum br_predictor predictor2;
1878       op0 = expr_expected_value (op0, visited, predictor);
1879       if (!op0)
1880         return NULL;
1881       op1 = expr_expected_value (op1, visited, &predictor2);
1882       if (predictor && *predictor < predictor2)
1883         *predictor = predictor2;
1884       if (!op1)
1885         return NULL;
1886       res = fold_build2 (code, type, op0, op1);
1887       if (TREE_CONSTANT (res))
1888         return res;
1889       return NULL;
1890     }
1891   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1892     {
1893       tree res;
1894       op0 = expr_expected_value (op0, visited, predictor);
1895       if (!op0)
1896         return NULL;
1897       res = fold_build1 (code, type, op0);
1898       if (TREE_CONSTANT (res))
1899         return res;
1900       return NULL;
1901     }
1902   return NULL;
1903 }
1904
1905 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1906    The function is used by builtin_expect branch predictor so the evidence
1907    must come from this construct and additional possible constant folding.
1908
1909    We may want to implement more involved value guess (such as value range
1910    propagation based prediction), but such tricks shall go to new
1911    implementation.  */
1912
1913 static tree
1914 expr_expected_value (tree expr, bitmap visited,
1915                      enum br_predictor *predictor)
1916 {
1917   enum tree_code code;
1918   tree op0, op1;
1919
1920   if (TREE_CONSTANT (expr))
1921     {
1922       if (predictor)
1923         *predictor = PRED_UNCONDITIONAL;
1924       return expr;
1925     }
1926
1927   extract_ops_from_tree (expr, &code, &op0, &op1);
1928   return expr_expected_value_1 (TREE_TYPE (expr),
1929                                 op0, code, op1, visited, predictor);
1930 }
1931 \f
1932 /* Predict using opcode of the last statement in basic block.  */
1933 static void
1934 tree_predict_by_opcode (basic_block bb)
1935 {
1936   gimple stmt = last_stmt (bb);
1937   edge then_edge;
1938   tree op0, op1;
1939   tree type;
1940   tree val;
1941   enum tree_code cmp;
1942   bitmap visited;
1943   edge_iterator ei;
1944   enum br_predictor predictor;
1945
1946   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1947     return;
1948   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1949     if (then_edge->flags & EDGE_TRUE_VALUE)
1950       break;
1951   op0 = gimple_cond_lhs (stmt);
1952   op1 = gimple_cond_rhs (stmt);
1953   cmp = gimple_cond_code (stmt);
1954   type = TREE_TYPE (op0);
1955   visited = BITMAP_ALLOC (NULL);
1956   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1957                                &predictor);
1958   BITMAP_FREE (visited);
1959   if (val && TREE_CODE (val) == INTEGER_CST)
1960     {
1961       if (predictor == PRED_BUILTIN_EXPECT)
1962         {
1963           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1964
1965           gcc_assert (percent >= 0 && percent <= 100);
1966           if (integer_zerop (val))
1967             percent = 100 - percent;
1968           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1969         }
1970       else
1971         predict_edge (then_edge, predictor,
1972                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1973     }
1974   /* Try "pointer heuristic."
1975      A comparison ptr == 0 is predicted as false.
1976      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1977   if (POINTER_TYPE_P (type))
1978     {
1979       if (cmp == EQ_EXPR)
1980         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1981       else if (cmp == NE_EXPR)
1982         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1983     }
1984   else
1985
1986   /* Try "opcode heuristic."
1987      EQ tests are usually false and NE tests are usually true. Also,
1988      most quantities are positive, so we can make the appropriate guesses
1989      about signed comparisons against zero.  */
1990     switch (cmp)
1991       {
1992       case EQ_EXPR:
1993       case UNEQ_EXPR:
1994         /* Floating point comparisons appears to behave in a very
1995            unpredictable way because of special role of = tests in
1996            FP code.  */
1997         if (FLOAT_TYPE_P (type))
1998           ;
1999         /* Comparisons with 0 are often used for booleans and there is
2000            nothing useful to predict about them.  */
2001         else if (integer_zerop (op0) || integer_zerop (op1))
2002           ;
2003         else
2004           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2005         break;
2006
2007       case NE_EXPR:
2008       case LTGT_EXPR:
2009         /* Floating point comparisons appears to behave in a very
2010            unpredictable way because of special role of = tests in
2011            FP code.  */
2012         if (FLOAT_TYPE_P (type))
2013           ;
2014         /* Comparisons with 0 are often used for booleans and there is
2015            nothing useful to predict about them.  */
2016         else if (integer_zerop (op0)
2017                  || integer_zerop (op1))
2018           ;
2019         else
2020           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2021         break;
2022
2023       case ORDERED_EXPR:
2024         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2025         break;
2026
2027       case UNORDERED_EXPR:
2028         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2029         break;
2030
2031       case LE_EXPR:
2032       case LT_EXPR:
2033         if (integer_zerop (op1)
2034             || integer_onep (op1)
2035             || integer_all_onesp (op1)
2036             || real_zerop (op1)
2037             || real_onep (op1)
2038             || real_minus_onep (op1))
2039           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2040         break;
2041
2042       case GE_EXPR:
2043       case GT_EXPR:
2044         if (integer_zerop (op1)
2045             || integer_onep (op1)
2046             || integer_all_onesp (op1)
2047             || real_zerop (op1)
2048             || real_onep (op1)
2049             || real_minus_onep (op1))
2050           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2051         break;
2052
2053       default:
2054         break;
2055       }
2056 }
2057
2058 /* Try to guess whether the value of return means error code.  */
2059
2060 static enum br_predictor
2061 return_prediction (tree val, enum prediction *prediction)
2062 {
2063   /* VOID.  */
2064   if (!val)
2065     return PRED_NO_PREDICTION;
2066   /* Different heuristics for pointers and scalars.  */
2067   if (POINTER_TYPE_P (TREE_TYPE (val)))
2068     {
2069       /* NULL is usually not returned.  */
2070       if (integer_zerop (val))
2071         {
2072           *prediction = NOT_TAKEN;
2073           return PRED_NULL_RETURN;
2074         }
2075     }
2076   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2077     {
2078       /* Negative return values are often used to indicate
2079          errors.  */
2080       if (TREE_CODE (val) == INTEGER_CST
2081           && tree_int_cst_sgn (val) < 0)
2082         {
2083           *prediction = NOT_TAKEN;
2084           return PRED_NEGATIVE_RETURN;
2085         }
2086       /* Constant return values seems to be commonly taken.
2087          Zero/one often represent booleans so exclude them from the
2088          heuristics.  */
2089       if (TREE_CONSTANT (val)
2090           && (!integer_zerop (val) && !integer_onep (val)))
2091         {
2092           *prediction = TAKEN;
2093           return PRED_CONST_RETURN;
2094         }
2095     }
2096   return PRED_NO_PREDICTION;
2097 }
2098
2099 /* Find the basic block with return expression and look up for possible
2100    return value trying to apply RETURN_PREDICTION heuristics.  */
2101 static void
2102 apply_return_prediction (void)
2103 {
2104   greturn *return_stmt = NULL;
2105   tree return_val;
2106   edge e;
2107   gphi *phi;
2108   int phi_num_args, i;
2109   enum br_predictor pred;
2110   enum prediction direction;
2111   edge_iterator ei;
2112
2113   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2114     {
2115       gimple last = last_stmt (e->src);
2116       if (last
2117           && gimple_code (last) == GIMPLE_RETURN)
2118         {
2119           return_stmt = as_a <greturn *> (last);
2120           break;
2121         }
2122     }
2123   if (!e)
2124     return;
2125   return_val = gimple_return_retval (return_stmt);
2126   if (!return_val)
2127     return;
2128   if (TREE_CODE (return_val) != SSA_NAME
2129       || !SSA_NAME_DEF_STMT (return_val)
2130       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2131     return;
2132   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2133   phi_num_args = gimple_phi_num_args (phi);
2134   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2135
2136   /* Avoid the degenerate case where all return values form the function
2137      belongs to same category (ie they are all positive constants)
2138      so we can hardly say something about them.  */
2139   for (i = 1; i < phi_num_args; i++)
2140     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2141       break;
2142   if (i != phi_num_args)
2143     for (i = 0; i < phi_num_args; i++)
2144       {
2145         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2146         if (pred != PRED_NO_PREDICTION)
2147           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2148                                          direction);
2149       }
2150 }
2151
2152 /* Look for basic block that contains unlikely to happen events
2153    (such as noreturn calls) and mark all paths leading to execution
2154    of this basic blocks as unlikely.  */
2155
2156 static void
2157 tree_bb_level_predictions (void)
2158 {
2159   basic_block bb;
2160   bool has_return_edges = false;
2161   edge e;
2162   edge_iterator ei;
2163
2164   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2165     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2166       {
2167         has_return_edges = true;
2168         break;
2169       }
2170
2171   apply_return_prediction ();
2172
2173   FOR_EACH_BB_FN (bb, cfun)
2174     {
2175       gimple_stmt_iterator gsi;
2176
2177       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2178         {
2179           gimple stmt = gsi_stmt (gsi);
2180           tree decl;
2181
2182           if (is_gimple_call (stmt))
2183             {
2184               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2185                   && has_return_edges)
2186                 predict_paths_leading_to (bb, PRED_NORETURN,
2187                                           NOT_TAKEN);
2188               decl = gimple_call_fndecl (stmt);
2189               if (decl
2190                   && lookup_attribute ("cold",
2191                                        DECL_ATTRIBUTES (decl)))
2192                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2193                                           NOT_TAKEN);
2194             }
2195           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2196             {
2197               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2198                                         gimple_predict_outcome (stmt));
2199               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2200                  hints to callers.  */
2201             }
2202         }
2203     }
2204 }
2205
2206 #ifdef ENABLE_CHECKING
2207
2208 /* Callback for hash_map::traverse, asserts that the pointer map is
2209    empty.  */
2210
2211 bool
2212 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2213                  void *)
2214 {
2215   gcc_assert (!value);
2216   return false;
2217 }
2218 #endif
2219
2220 /* Predict branch probabilities and estimate profile for basic block BB.  */
2221
2222 static void
2223 tree_estimate_probability_bb (basic_block bb)
2224 {
2225   edge e;
2226   edge_iterator ei;
2227   gimple last;
2228
2229   FOR_EACH_EDGE (e, ei, bb->succs)
2230     {
2231       /* Predict edges to user labels with attributes.  */
2232       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2233         {
2234           gimple_stmt_iterator gi;
2235           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2236             {
2237               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2238               tree decl;
2239
2240               if (!label_stmt)
2241                 break;
2242               decl = gimple_label_label (label_stmt);
2243               if (DECL_ARTIFICIAL (decl))
2244                 continue;
2245
2246               /* Finally, we have a user-defined label.  */
2247               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2248                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2249               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2250                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2251             }
2252         }
2253
2254       /* Predict early returns to be probable, as we've already taken
2255          care for error returns and other cases are often used for
2256          fast paths through function.
2257
2258          Since we've already removed the return statements, we are
2259          looking for CFG like:
2260
2261          if (conditional)
2262          {
2263          ..
2264          goto return_block
2265          }
2266          some other blocks
2267          return_block:
2268          return_stmt.  */
2269       if (e->dest != bb->next_bb
2270           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2271           && single_succ_p (e->dest)
2272           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2273           && (last = last_stmt (e->dest)) != NULL
2274           && gimple_code (last) == GIMPLE_RETURN)
2275         {
2276           edge e1;
2277           edge_iterator ei1;
2278
2279           if (single_succ_p (bb))
2280             {
2281               FOR_EACH_EDGE (e1, ei1, bb->preds)
2282                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2283                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2284                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2285                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2286             }
2287           else
2288             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2289                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2290                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2291               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2292         }
2293
2294       /* Look for block we are guarding (ie we dominate it,
2295          but it doesn't postdominate us).  */
2296       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2297           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2298           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2299         {
2300           gimple_stmt_iterator bi;
2301
2302           /* The call heuristic claims that a guarded function call
2303              is improbable.  This is because such calls are often used
2304              to signal exceptional situations such as printing error
2305              messages.  */
2306           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2307                gsi_next (&bi))
2308             {
2309               gimple stmt = gsi_stmt (bi);
2310               if (is_gimple_call (stmt)
2311                   /* Constant and pure calls are hardly used to signalize
2312                      something exceptional.  */
2313                   && gimple_has_side_effects (stmt))
2314                 {
2315                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2316                   break;
2317                 }
2318             }
2319         }
2320     }
2321   tree_predict_by_opcode (bb);
2322 }
2323
2324 /* Predict branch probabilities and estimate profile of the tree CFG.
2325    This function can be called from the loop optimizers to recompute
2326    the profile information.  */
2327
2328 void
2329 tree_estimate_probability (void)
2330 {
2331   basic_block bb;
2332
2333   add_noreturn_fake_exit_edges ();
2334   connect_infinite_loops_to_exit ();
2335   /* We use loop_niter_by_eval, which requires that the loops have
2336      preheaders.  */
2337   create_preheaders (CP_SIMPLE_PREHEADERS);
2338   calculate_dominance_info (CDI_POST_DOMINATORS);
2339
2340   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2341   tree_bb_level_predictions ();
2342   record_loop_exits ();
2343
2344   if (number_of_loops (cfun) > 1)
2345     predict_loops ();
2346
2347   FOR_EACH_BB_FN (bb, cfun)
2348     tree_estimate_probability_bb (bb);
2349
2350   FOR_EACH_BB_FN (bb, cfun)
2351     combine_predictions_for_bb (bb);
2352
2353 #ifdef ENABLE_CHECKING
2354   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2355 #endif
2356   delete bb_predictions;
2357   bb_predictions = NULL;
2358
2359   estimate_bb_frequencies (false);
2360   free_dominance_info (CDI_POST_DOMINATORS);
2361   remove_fake_exit_edges ();
2362 }
2363 \f
2364 /* Predict edges to successors of CUR whose sources are not postdominated by
2365    BB by PRED and recurse to all postdominators.  */
2366
2367 static void
2368 predict_paths_for_bb (basic_block cur, basic_block bb,
2369                       enum br_predictor pred,
2370                       enum prediction taken,
2371                       bitmap visited)
2372 {
2373   edge e;
2374   edge_iterator ei;
2375   basic_block son;
2376
2377   /* We are looking for all edges forming edge cut induced by
2378      set of all blocks postdominated by BB.  */
2379   FOR_EACH_EDGE (e, ei, cur->preds)
2380     if (e->src->index >= NUM_FIXED_BLOCKS
2381         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2382     {
2383       edge e2;
2384       edge_iterator ei2;
2385       bool found = false;
2386
2387       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2388       if (e->flags & (EDGE_EH | EDGE_FAKE))
2389         continue;
2390       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2391
2392       /* See if there is an edge from e->src that is not abnormal
2393          and does not lead to BB.  */
2394       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2395         if (e2 != e
2396             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2397             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2398           {
2399             found = true;
2400             break;
2401           }
2402
2403       /* If there is non-abnormal path leaving e->src, predict edge
2404          using predictor.  Otherwise we need to look for paths
2405          leading to e->src.
2406
2407          The second may lead to infinite loop in the case we are predicitng
2408          regions that are only reachable by abnormal edges.  We simply
2409          prevent visiting given BB twice.  */
2410       if (found)
2411         predict_edge_def (e, pred, taken);
2412       else if (bitmap_set_bit (visited, e->src->index))
2413         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2414     }
2415   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2416        son;
2417        son = next_dom_son (CDI_POST_DOMINATORS, son))
2418     predict_paths_for_bb (son, bb, pred, taken, visited);
2419 }
2420
2421 /* Sets branch probabilities according to PREDiction and
2422    FLAGS.  */
2423
2424 static void
2425 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2426                           enum prediction taken)
2427 {
2428   bitmap visited = BITMAP_ALLOC (NULL);
2429   predict_paths_for_bb (bb, bb, pred, taken, visited);
2430   BITMAP_FREE (visited);
2431 }
2432
2433 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2434
2435 static void
2436 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2437                                enum prediction taken)
2438 {
2439   bool has_nonloop_edge = false;
2440   edge_iterator ei;
2441   edge e2;
2442
2443   basic_block bb = e->src;
2444   FOR_EACH_EDGE (e2, ei, bb->succs)
2445     if (e2->dest != e->src && e2->dest != e->dest
2446         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2447         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2448       {
2449         has_nonloop_edge = true;
2450         break;
2451       }
2452   if (!has_nonloop_edge)
2453     {
2454       bitmap visited = BITMAP_ALLOC (NULL);
2455       predict_paths_for_bb (bb, bb, pred, taken, visited);
2456       BITMAP_FREE (visited);
2457     }
2458   else
2459     predict_edge_def (e, pred, taken);
2460 }
2461 \f
2462 /* This is used to carry information about basic blocks.  It is
2463    attached to the AUX field of the standard CFG block.  */
2464
2465 struct block_info
2466 {
2467   /* Estimated frequency of execution of basic_block.  */
2468   sreal frequency;
2469
2470   /* To keep queue of basic blocks to process.  */
2471   basic_block next;
2472
2473   /* Number of predecessors we need to visit first.  */
2474   int npredecessors;
2475 };
2476
2477 /* Similar information for edges.  */
2478 struct edge_prob_info
2479 {
2480   /* In case edge is a loopback edge, the probability edge will be reached
2481      in case header is.  Estimated number of iterations of the loop can be
2482      then computed as 1 / (1 - back_edge_prob).  */
2483   sreal back_edge_prob;
2484   /* True if the edge is a loopback edge in the natural loop.  */
2485   unsigned int back_edge:1;
2486 };
2487
2488 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2489 #undef EDGE_INFO
2490 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2491
2492 /* Helper function for estimate_bb_frequencies.
2493    Propagate the frequencies in blocks marked in
2494    TOVISIT, starting in HEAD.  */
2495
2496 static void
2497 propagate_freq (basic_block head, bitmap tovisit)
2498 {
2499   basic_block bb;
2500   basic_block last;
2501   unsigned i;
2502   edge e;
2503   basic_block nextbb;
2504   bitmap_iterator bi;
2505
2506   /* For each basic block we need to visit count number of his predecessors
2507      we need to visit first.  */
2508   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2509     {
2510       edge_iterator ei;
2511       int count = 0;
2512
2513       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2514
2515       FOR_EACH_EDGE (e, ei, bb->preds)
2516         {
2517           bool visit = bitmap_bit_p (tovisit, e->src->index);
2518
2519           if (visit && !(e->flags & EDGE_DFS_BACK))
2520             count++;
2521           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2522             fprintf (dump_file,
2523                      "Irreducible region hit, ignoring edge to %i->%i\n",
2524                      e->src->index, bb->index);
2525         }
2526       BLOCK_INFO (bb)->npredecessors = count;
2527       /* When function never returns, we will never process exit block.  */
2528       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2529         bb->count = bb->frequency = 0;
2530     }
2531
2532   BLOCK_INFO (head)->frequency = 1;
2533   last = head;
2534   for (bb = head; bb; bb = nextbb)
2535     {
2536       edge_iterator ei;
2537       sreal cyclic_probability = 0;
2538       sreal frequency = 0;
2539
2540       nextbb = BLOCK_INFO (bb)->next;
2541       BLOCK_INFO (bb)->next = NULL;
2542
2543       /* Compute frequency of basic block.  */
2544       if (bb != head)
2545         {
2546 #ifdef ENABLE_CHECKING
2547           FOR_EACH_EDGE (e, ei, bb->preds)
2548             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2549                         || (e->flags & EDGE_DFS_BACK));
2550 #endif
2551
2552           FOR_EACH_EDGE (e, ei, bb->preds)
2553             if (EDGE_INFO (e)->back_edge)
2554               {
2555                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2556               }
2557             else if (!(e->flags & EDGE_DFS_BACK))
2558               {
2559                 /*  frequency += (e->probability
2560                                   * BLOCK_INFO (e->src)->frequency /
2561                                   REG_BR_PROB_BASE);  */
2562
2563                 sreal tmp = e->probability;
2564                 tmp *= BLOCK_INFO (e->src)->frequency;
2565                 tmp *= real_inv_br_prob_base;
2566                 frequency += tmp;
2567               }
2568
2569           if (cyclic_probability == 0)
2570             {
2571               BLOCK_INFO (bb)->frequency = frequency;
2572             }
2573           else
2574             {
2575               if (cyclic_probability > real_almost_one)
2576                 cyclic_probability = real_almost_one;
2577
2578               /* BLOCK_INFO (bb)->frequency = frequency
2579                                               / (1 - cyclic_probability) */
2580
2581               cyclic_probability = sreal (1) - cyclic_probability;
2582               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2583             }
2584         }
2585
2586       bitmap_clear_bit (tovisit, bb->index);
2587
2588       e = find_edge (bb, head);
2589       if (e)
2590         {
2591           /* EDGE_INFO (e)->back_edge_prob
2592              = ((e->probability * BLOCK_INFO (bb)->frequency)
2593              / REG_BR_PROB_BASE); */
2594
2595           sreal tmp = e->probability;
2596           tmp *= BLOCK_INFO (bb)->frequency;
2597           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2598         }
2599
2600       /* Propagate to successor blocks.  */
2601       FOR_EACH_EDGE (e, ei, bb->succs)
2602         if (!(e->flags & EDGE_DFS_BACK)
2603             && BLOCK_INFO (e->dest)->npredecessors)
2604           {
2605             BLOCK_INFO (e->dest)->npredecessors--;
2606             if (!BLOCK_INFO (e->dest)->npredecessors)
2607               {
2608                 if (!nextbb)
2609                   nextbb = e->dest;
2610                 else
2611                   BLOCK_INFO (last)->next = e->dest;
2612
2613                 last = e->dest;
2614               }
2615           }
2616     }
2617 }
2618
2619 /* Estimate frequencies in loops at same nest level.  */
2620
2621 static void
2622 estimate_loops_at_level (struct loop *first_loop)
2623 {
2624   struct loop *loop;
2625
2626   for (loop = first_loop; loop; loop = loop->next)
2627     {
2628       edge e;
2629       basic_block *bbs;
2630       unsigned i;
2631       bitmap tovisit = BITMAP_ALLOC (NULL);
2632
2633       estimate_loops_at_level (loop->inner);
2634
2635       /* Find current loop back edge and mark it.  */
2636       e = loop_latch_edge (loop);
2637       EDGE_INFO (e)->back_edge = 1;
2638
2639       bbs = get_loop_body (loop);
2640       for (i = 0; i < loop->num_nodes; i++)
2641         bitmap_set_bit (tovisit, bbs[i]->index);
2642       free (bbs);
2643       propagate_freq (loop->header, tovisit);
2644       BITMAP_FREE (tovisit);
2645     }
2646 }
2647
2648 /* Propagates frequencies through structure of loops.  */
2649
2650 static void
2651 estimate_loops (void)
2652 {
2653   bitmap tovisit = BITMAP_ALLOC (NULL);
2654   basic_block bb;
2655
2656   /* Start by estimating the frequencies in the loops.  */
2657   if (number_of_loops (cfun) > 1)
2658     estimate_loops_at_level (current_loops->tree_root->inner);
2659
2660   /* Now propagate the frequencies through all the blocks.  */
2661   FOR_ALL_BB_FN (bb, cfun)
2662     {
2663       bitmap_set_bit (tovisit, bb->index);
2664     }
2665   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2666   BITMAP_FREE (tovisit);
2667 }
2668
2669 /* Drop the profile for NODE to guessed, and update its frequency based on
2670    whether it is expected to be hot given the CALL_COUNT.  */
2671
2672 static void
2673 drop_profile (struct cgraph_node *node, gcov_type call_count)
2674 {
2675   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2676   /* In the case where this was called by another function with a
2677      dropped profile, call_count will be 0. Since there are no
2678      non-zero call counts to this function, we don't know for sure
2679      whether it is hot, and therefore it will be marked normal below.  */
2680   bool hot = maybe_hot_count_p (NULL, call_count);
2681
2682   if (dump_file)
2683     fprintf (dump_file,
2684              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2685              node->name (), node->order,
2686              hot ? "Function is hot" : "Function is normal");
2687   /* We only expect to miss profiles for functions that are reached
2688      via non-zero call edges in cases where the function may have
2689      been linked from another module or library (COMDATs and extern
2690      templates). See the comments below for handle_missing_profiles.
2691      Also, only warn in cases where the missing counts exceed the
2692      number of training runs. In certain cases with an execv followed
2693      by a no-return call the profile for the no-return call is not
2694      dumped and there can be a mismatch.  */
2695   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2696       && call_count > profile_info->runs)
2697     {
2698       if (flag_profile_correction)
2699         {
2700           if (dump_file)
2701             fprintf (dump_file,
2702                      "Missing counts for called function %s/%i\n",
2703                      node->name (), node->order);
2704         }
2705       else
2706         warning (0, "Missing counts for called function %s/%i",
2707                  node->name (), node->order);
2708     }
2709
2710   profile_status_for_fn (fn)
2711       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2712   node->frequency
2713       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2714 }
2715
2716 /* In the case of COMDAT routines, multiple object files will contain the same
2717    function and the linker will select one for the binary. In that case
2718    all the other copies from the profile instrument binary will be missing
2719    profile counts. Look for cases where this happened, due to non-zero
2720    call counts going to 0-count functions, and drop the profile to guessed
2721    so that we can use the estimated probabilities and avoid optimizing only
2722    for size.
2723
2724    The other case where the profile may be missing is when the routine
2725    is not going to be emitted to the object file, e.g. for "extern template"
2726    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2727    all other cases of non-zero calls to 0-count functions.  */
2728
2729 void
2730 handle_missing_profiles (void)
2731 {
2732   struct cgraph_node *node;
2733   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2734   vec<struct cgraph_node *> worklist;
2735   worklist.create (64);
2736
2737   /* See if 0 count function has non-0 count callers.  In this case we
2738      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2739   FOR_EACH_DEFINED_FUNCTION (node)
2740     {
2741       struct cgraph_edge *e;
2742       gcov_type call_count = 0;
2743       gcov_type max_tp_first_run = 0;
2744       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2745
2746       if (node->count)
2747         continue;
2748       for (e = node->callers; e; e = e->next_caller)
2749       {
2750         call_count += e->count;
2751
2752         if (e->caller->tp_first_run > max_tp_first_run)
2753           max_tp_first_run = e->caller->tp_first_run;
2754       }
2755
2756       /* If time profile is missing, let assign the maximum that comes from
2757          caller functions.  */
2758       if (!node->tp_first_run && max_tp_first_run)
2759         node->tp_first_run = max_tp_first_run + 1;
2760
2761       if (call_count
2762           && fn && fn->cfg
2763           && (call_count * unlikely_count_fraction >= profile_info->runs))
2764         {
2765           drop_profile (node, call_count);
2766           worklist.safe_push (node);
2767         }
2768     }
2769
2770   /* Propagate the profile dropping to other 0-count COMDATs that are
2771      potentially called by COMDATs we already dropped the profile on.  */
2772   while (worklist.length () > 0)
2773     {
2774       struct cgraph_edge *e;
2775
2776       node = worklist.pop ();
2777       for (e = node->callees; e; e = e->next_caller)
2778         {
2779           struct cgraph_node *callee = e->callee;
2780           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2781
2782           if (callee->count > 0)
2783             continue;
2784           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2785               && profile_status_for_fn (fn) == PROFILE_READ)
2786             {
2787               drop_profile (node, 0);
2788               worklist.safe_push (callee);
2789             }
2790         }
2791     }
2792   worklist.release ();
2793 }
2794
2795 /* Convert counts measured by profile driven feedback to frequencies.
2796    Return nonzero iff there was any nonzero execution count.  */
2797
2798 int
2799 counts_to_freqs (void)
2800 {
2801   gcov_type count_max, true_count_max = 0;
2802   basic_block bb;
2803
2804   /* Don't overwrite the estimated frequencies when the profile for
2805      the function is missing.  We may drop this function PROFILE_GUESSED
2806      later in drop_profile ().  */
2807   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2808     return 0;
2809
2810   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2811     true_count_max = MAX (bb->count, true_count_max);
2812
2813   count_max = MAX (true_count_max, 1);
2814   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2815     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2816
2817   return true_count_max;
2818 }
2819
2820 /* Return true if function is likely to be expensive, so there is no point to
2821    optimize performance of prologue, epilogue or do inlining at the expense
2822    of code size growth.  THRESHOLD is the limit of number of instructions
2823    function can execute at average to be still considered not expensive.  */
2824
2825 bool
2826 expensive_function_p (int threshold)
2827 {
2828   unsigned int sum = 0;
2829   basic_block bb;
2830   unsigned int limit;
2831
2832   /* We can not compute accurately for large thresholds due to scaled
2833      frequencies.  */
2834   gcc_assert (threshold <= BB_FREQ_MAX);
2835
2836   /* Frequencies are out of range.  This either means that function contains
2837      internal loop executing more than BB_FREQ_MAX times or profile feedback
2838      is available and function has not been executed at all.  */
2839   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2840     return true;
2841
2842   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2843   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2844   FOR_EACH_BB_FN (bb, cfun)
2845     {
2846       rtx_insn *insn;
2847
2848       FOR_BB_INSNS (bb, insn)
2849         if (active_insn_p (insn))
2850           {
2851             sum += bb->frequency;
2852             if (sum > limit)
2853               return true;
2854         }
2855     }
2856
2857   return false;
2858 }
2859
2860 /* Estimate and propagate basic block frequencies using the given branch
2861    probabilities.  If FORCE is true, the frequencies are used to estimate
2862    the counts even when there are already non-zero profile counts.  */
2863
2864 void
2865 estimate_bb_frequencies (bool force)
2866 {
2867   basic_block bb;
2868   sreal freq_max;
2869
2870   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2871     {
2872       static int real_values_initialized = 0;
2873
2874       if (!real_values_initialized)
2875         {
2876           real_values_initialized = 1;
2877           real_br_prob_base = REG_BR_PROB_BASE;
2878           real_bb_freq_max = BB_FREQ_MAX;
2879           real_one_half = sreal (1, -1);
2880           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2881           real_almost_one = sreal (1) - real_inv_br_prob_base;
2882         }
2883
2884       mark_dfs_back_edges ();
2885
2886       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2887          REG_BR_PROB_BASE;
2888
2889       /* Set up block info for each basic block.  */
2890       alloc_aux_for_blocks (sizeof (block_info));
2891       alloc_aux_for_edges (sizeof (edge_prob_info));
2892       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2893         {
2894           edge e;
2895           edge_iterator ei;
2896
2897           FOR_EACH_EDGE (e, ei, bb->succs)
2898             {
2899               EDGE_INFO (e)->back_edge_prob = e->probability;
2900               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2901             }
2902         }
2903
2904       /* First compute frequencies locally for each loop from innermost
2905          to outermost to examine frequencies for back edges.  */
2906       estimate_loops ();
2907
2908       freq_max = 0;
2909       FOR_EACH_BB_FN (bb, cfun)
2910         if (freq_max < BLOCK_INFO (bb)->frequency)
2911           freq_max = BLOCK_INFO (bb)->frequency;
2912
2913       freq_max = real_bb_freq_max / freq_max;
2914       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2915         {
2916           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2917           bb->frequency = tmp.to_int ();
2918         }
2919
2920       free_aux_for_blocks ();
2921       free_aux_for_edges ();
2922     }
2923   compute_function_frequency ();
2924 }
2925
2926 /* Decide whether function is hot, cold or unlikely executed.  */
2927 void
2928 compute_function_frequency (void)
2929 {
2930   basic_block bb;
2931   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2932
2933   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2934       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2935     node->only_called_at_startup = true;
2936   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2937     node->only_called_at_exit = true;
2938
2939   if (profile_status_for_fn (cfun) != PROFILE_READ)
2940     {
2941       int flags = flags_from_decl_or_type (current_function_decl);
2942       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2943           != NULL)
2944         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2945       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2946                != NULL)
2947         node->frequency = NODE_FREQUENCY_HOT;
2948       else if (flags & ECF_NORETURN)
2949         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2950       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2951         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2952       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2953                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2954         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2955       return;
2956     }
2957
2958   /* Only first time try to drop function into unlikely executed.
2959      After inlining the roundoff errors may confuse us.
2960      Ipa-profile pass will drop functions only called from unlikely
2961      functions to unlikely and that is most of what we care about.  */
2962   if (!cfun->after_inlining)
2963     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2964   FOR_EACH_BB_FN (bb, cfun)
2965     {
2966       if (maybe_hot_bb_p (cfun, bb))
2967         {
2968           node->frequency = NODE_FREQUENCY_HOT;
2969           return;
2970         }
2971       if (!probably_never_executed_bb_p (cfun, bb))
2972         node->frequency = NODE_FREQUENCY_NORMAL;
2973     }
2974 }
2975
2976 /* Build PREDICT_EXPR.  */
2977 tree
2978 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2979 {
2980   tree t = build1 (PREDICT_EXPR, void_type_node,
2981                    build_int_cst (integer_type_node, predictor));
2982   SET_PREDICT_EXPR_OUTCOME (t, taken);
2983   return t;
2984 }
2985
2986 const char *
2987 predictor_name (enum br_predictor predictor)
2988 {
2989   return predictor_info[predictor].name;
2990 }
2991
2992 /* Predict branch probabilities and estimate profile of the tree CFG. */
2993
2994 namespace {
2995
2996 const pass_data pass_data_profile =
2997 {
2998   GIMPLE_PASS, /* type */
2999   "profile_estimate", /* name */
3000   OPTGROUP_NONE, /* optinfo_flags */
3001   TV_BRANCH_PROB, /* tv_id */
3002   PROP_cfg, /* properties_required */
3003   0, /* properties_provided */
3004   0, /* properties_destroyed */
3005   0, /* todo_flags_start */
3006   0, /* todo_flags_finish */
3007 };
3008
3009 class pass_profile : public gimple_opt_pass
3010 {
3011 public:
3012   pass_profile (gcc::context *ctxt)
3013     : gimple_opt_pass (pass_data_profile, ctxt)
3014   {}
3015
3016   /* opt_pass methods: */
3017   virtual bool gate (function *) { return flag_guess_branch_prob; }
3018   virtual unsigned int execute (function *);
3019
3020 }; // class pass_profile
3021
3022 unsigned int
3023 pass_profile::execute (function *fun)
3024 {
3025   unsigned nb_loops;
3026
3027   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3028     return 0;
3029
3030   loop_optimizer_init (LOOPS_NORMAL);
3031   if (dump_file && (dump_flags & TDF_DETAILS))
3032     flow_loops_dump (dump_file, NULL, 0);
3033
3034   mark_irreducible_loops ();
3035
3036   nb_loops = number_of_loops (fun);
3037   if (nb_loops > 1)
3038     scev_initialize ();
3039
3040   tree_estimate_probability ();
3041
3042   if (nb_loops > 1)
3043     scev_finalize ();
3044
3045   loop_optimizer_finalize ();
3046   if (dump_file && (dump_flags & TDF_DETAILS))
3047     gimple_dump_cfg (dump_file, dump_flags);
3048  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3049     profile_status_for_fn (fun) = PROFILE_GUESSED;
3050   return 0;
3051 }
3052
3053 } // anon namespace
3054
3055 gimple_opt_pass *
3056 make_pass_profile (gcc::context *ctxt)
3057 {
3058   return new pass_profile (ctxt);
3059 }
3060
3061 namespace {
3062
3063 const pass_data pass_data_strip_predict_hints =
3064 {
3065   GIMPLE_PASS, /* type */
3066   "*strip_predict_hints", /* name */
3067   OPTGROUP_NONE, /* optinfo_flags */
3068   TV_BRANCH_PROB, /* tv_id */
3069   PROP_cfg, /* properties_required */
3070   0, /* properties_provided */
3071   0, /* properties_destroyed */
3072   0, /* todo_flags_start */
3073   0, /* todo_flags_finish */
3074 };
3075
3076 class pass_strip_predict_hints : public gimple_opt_pass
3077 {
3078 public:
3079   pass_strip_predict_hints (gcc::context *ctxt)
3080     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3081   {}
3082
3083   /* opt_pass methods: */
3084   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3085   virtual unsigned int execute (function *);
3086
3087 }; // class pass_strip_predict_hints
3088
3089 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3090    we no longer need.  */
3091 unsigned int
3092 pass_strip_predict_hints::execute (function *fun)
3093 {
3094   basic_block bb;
3095   gimple ass_stmt;
3096   tree var;
3097
3098   FOR_EACH_BB_FN (bb, fun)
3099     {
3100       gimple_stmt_iterator bi;
3101       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3102         {
3103           gimple stmt = gsi_stmt (bi);
3104
3105           if (gimple_code (stmt) == GIMPLE_PREDICT)
3106             {
3107               gsi_remove (&bi, true);
3108               continue;
3109             }
3110           else if (is_gimple_call (stmt))
3111             {
3112               tree fndecl = gimple_call_fndecl (stmt);
3113
3114               if ((fndecl
3115                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3116                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3117                    && gimple_call_num_args (stmt) == 2)
3118                   || (gimple_call_internal_p (stmt)
3119                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3120                 {
3121                   var = gimple_call_lhs (stmt);
3122                   if (var)
3123                     {
3124                       ass_stmt
3125                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3126                       gsi_replace (&bi, ass_stmt, true);
3127                     }
3128                   else
3129                     {
3130                       gsi_remove (&bi, true);
3131                       continue;
3132                     }
3133                 }
3134             }
3135           gsi_next (&bi);
3136         }
3137     }
3138   return 0;
3139 }
3140
3141 } // anon namespace
3142
3143 gimple_opt_pass *
3144 make_pass_strip_predict_hints (gcc::context *ctxt)
3145 {
3146   return new pass_strip_predict_hints (ctxt);
3147 }
3148
3149 /* Rebuild function frequencies.  Passes are in general expected to
3150    maintain profile by hand, however in some cases this is not possible:
3151    for example when inlining several functions with loops freuqencies might run
3152    out of scale and thus needs to be recomputed.  */
3153
3154 void
3155 rebuild_frequencies (void)
3156 {
3157   timevar_push (TV_REBUILD_FREQUENCIES);
3158
3159   /* When the max bb count in the function is small, there is a higher
3160      chance that there were truncation errors in the integer scaling
3161      of counts by inlining and other optimizations. This could lead
3162      to incorrect classification of code as being cold when it isn't.
3163      In that case, force the estimation of bb counts/frequencies from the
3164      branch probabilities, rather than computing frequencies from counts,
3165      which may also lead to frequencies incorrectly reduced to 0. There
3166      is less precision in the probabilities, so we only do this for small
3167      max counts.  */
3168   gcov_type count_max = 0;
3169   basic_block bb;
3170   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3171     count_max = MAX (bb->count, count_max);
3172
3173   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3174       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3175           && count_max < REG_BR_PROB_BASE/10))
3176     {
3177       loop_optimizer_init (0);
3178       add_noreturn_fake_exit_edges ();
3179       mark_irreducible_loops ();
3180       connect_infinite_loops_to_exit ();
3181       estimate_bb_frequencies (true);
3182       remove_fake_exit_edges ();
3183       loop_optimizer_finalize ();
3184     }
3185   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3186     counts_to_freqs ();
3187   else
3188     gcc_unreachable ();
3189   timevar_pop (TV_REBUILD_FREQUENCIES);
3190 }