gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "backend.h"
  34 #include "rtl.h"
  35 #include "tree.h"
  36 #include "gimple.h"
  37 #include "cfghooks.h"
  38 #include "tree-pass.h"
  39 #include "ssa.h"
  40 #include "emit-rtl.h"
  41 #include "cgraph.h"
  42 #include "coverage.h"
  43 #include "diagnostic-core.h"
  44 #include "gimple-predict.h"
  45 #include "fold-const.h"
  46 #include "calls.h"
  47 #include "cfganal.h"
  48 #include "profile.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "cfgloop.h"
  52 #include "gimple-iterator.h"
  53 #include "tree-cfg.h"
  54 #include "tree-ssa-loop-niter.h"
  55 #include "tree-ssa-loop.h"
  56 #include "tree-scalar-evolution.h"
  57
  58 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  59                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  60 static sreal real_almost_one, real_br_prob_base,
  61              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  62
  63 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  64 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  65 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  66 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  67 static bool can_predict_insn_p (const rtx_insn *);
  68
  69 /* Information we hold about each branch predictor.
  70    Filled using information from predict.def.  */
  71
  72 struct predictor_info
  73 {
  74   const char *const name;       /* Name used in the debugging dumps.  */
  75   const int hitrate;            /* Expected hitrate used by
  76                                    predict_insn_def call.  */
  77   const int flags;
  78 };
  79
  80 /* Use given predictor without Dempster-Shaffer theory if it matches
  81    using first_match heuristics.  */
  82 #define PRED_FLAG_FIRST_MATCH 1
  83
  84 /* Recompute hitrate in percent to our representation.  */
  85
  86 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  87
  88 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  89 static const struct predictor_info predictor_info[]= {
  90 #include "predict.def"
  91
  92   /* Upper bound on predictors.  */
  93   {NULL, 0, 0}
  94 };
  95 #undef DEF_PREDICTOR
  96
  97 /* Return TRUE if frequency FREQ is considered to be hot.  */
  98
  99 static inline bool
 100 maybe_hot_frequency_p (struct function *fun, int freq)
 101 {
 102   struct cgraph_node *node = cgraph_node::get (fun->decl);
 103   if (!profile_info
 104       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 105     {
 106       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 107         return false;
 108       if (node->frequency == NODE_FREQUENCY_HOT)
 109         return true;
 110     }
 111   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 112     return true;
 113   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 114       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 115     return false;
 116   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 117     return false;
 118   if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
 119       < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 120     return false;
 121   return true;
 122 }
 123
 124 static gcov_type min_count = -1;
 125
 126 /* Determine the threshold for hot BB counts.  */
 127
 128 gcov_type
 129 get_hot_bb_threshold ()
 130 {
 131   gcov_working_set_t *ws;
 132   if (min_count == -1)
 133     {
 134       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 135       gcc_assert (ws);
 136       min_count = ws->min_counter;
 137     }
 138   return min_count;
 139 }
 140
 141 /* Set the threshold for hot BB counts.  */
 142
 143 void
 144 set_hot_bb_threshold (gcov_type min)
 145 {
 146   min_count = min;
 147 }
 148
 149 /* Return TRUE if frequency FREQ is considered to be hot.  */
 150
 151 bool
 152 maybe_hot_count_p (struct function *fun, gcov_type count)
 153 {
 154   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 155     return true;
 156   /* Code executed at most once is not hot.  */
 157   if (profile_info->runs >= count)
 158     return false;
 159   return (count >= get_hot_bb_threshold ());
 160 }
 161
 162 /* Return true in case BB can be CPU intensive and should be optimized
 163    for maximal performance.  */
 164
 165 bool
 166 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 167 {
 168   gcc_checking_assert (fun);
 169   if (profile_status_for_fn (fun) == PROFILE_READ)
 170     return maybe_hot_count_p (fun, bb->count);
 171   return maybe_hot_frequency_p (fun, bb->frequency);
 172 }
 173
 174 /* Return true in case BB can be CPU intensive and should be optimized
 175    for maximal performance.  */
 176
 177 bool
 178 maybe_hot_edge_p (edge e)
 179 {
 180   if (profile_status_for_fn (cfun) == PROFILE_READ)
 181     return maybe_hot_count_p (cfun, e->count);
 182   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 183 }
 184
 185 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 186    node frequency reflects never being executed.  */
 187
 188 static bool
 189 probably_never_executed (struct function *fun,
 190                          gcov_type count, int frequency)
 191 {
 192   gcc_checking_assert (fun);
 193   if (profile_status_for_fn (fun) == PROFILE_READ)
 194     {
 195       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 196       if (count * unlikely_count_fraction >= profile_info->runs)
 197         return false;
 198       if (!frequency)
 199         return true;
 200       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 201         return false;
 202       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 203         {
 204           gcov_type computed_count;
 205           /* Check for possibility of overflow, in which case entry bb count
 206              is large enough to do the division first without losing much
 207              precision.  */
 208           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 209               REG_BR_PROB_BASE)
 210             {
 211               gcov_type scaled_count
 212                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 213              unlikely_count_fraction;
 214               computed_count = RDIV (scaled_count,
 215                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 216             }
 217           else
 218             {
 219               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 220                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 221               computed_count *= frequency * unlikely_count_fraction;
 222             }
 223           if (computed_count >= profile_info->runs)
 224             return false;
 225         }
 226       return true;
 227     }
 228   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 229       && (cgraph_node::get (fun->decl)->frequency
 230           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 231     return true;
 232   return false;
 233 }
 234
 235
 236 /* Return true in case BB is probably never executed.  */
 237
 238 bool
 239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 240 {
 241   return probably_never_executed (fun, bb->count, bb->frequency);
 242 }
 243
 244
 245 /* Return true in case edge E is probably never executed.  */
 246
 247 bool
 248 probably_never_executed_edge_p (struct function *fun, edge e)
 249 {
 250   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 251 }
 252
 253 /* Return true when current function should always be optimized for size.  */
 254
 255 bool
 256 optimize_function_for_size_p (struct function *fun)
 257 {
 258   if (!fun || !fun->decl)
 259     return optimize_size;
 260   cgraph_node *n = cgraph_node::get (fun->decl);
 261   return n && n->optimize_for_size_p ();
 262 }
 263
 264 /* Return true when current function should always be optimized for speed.  */
 265
 266 bool
 267 optimize_function_for_speed_p (struct function *fun)
 268 {
 269   return !optimize_function_for_size_p (fun);
 270 }
 271
 272 /* Return the optimization type that should be used for the function FUN.  */
 273
 274 optimization_type
 275 function_optimization_type (struct function *fun)
 276 {
 277   return (optimize_function_for_speed_p (fun)
 278           ? OPTIMIZE_FOR_SPEED
 279           : OPTIMIZE_FOR_SIZE);
 280 }
 281
 282 /* Return TRUE when BB should be optimized for size.  */
 283
 284 bool
 285 optimize_bb_for_size_p (const_basic_block bb)
 286 {
 287   return (optimize_function_for_size_p (cfun)
 288           || (bb && !maybe_hot_bb_p (cfun, bb)));
 289 }
 290
 291 /* Return TRUE when BB should be optimized for speed.  */
 292
 293 bool
 294 optimize_bb_for_speed_p (const_basic_block bb)
 295 {
 296   return !optimize_bb_for_size_p (bb);
 297 }
 298
 299 /* Return the optimization type that should be used for block BB.  */
 300
 301 optimization_type
 302 bb_optimization_type (const_basic_block bb)
 303 {
 304   return (optimize_bb_for_speed_p (bb)
 305           ? OPTIMIZE_FOR_SPEED
 306           : OPTIMIZE_FOR_SIZE);
 307 }
 308
 309 /* Return TRUE when BB should be optimized for size.  */
 310
 311 bool
 312 optimize_edge_for_size_p (edge e)
 313 {
 314   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 315 }
 316
 317 /* Return TRUE when BB should be optimized for speed.  */
 318
 319 bool
 320 optimize_edge_for_speed_p (edge e)
 321 {
 322   return !optimize_edge_for_size_p (e);
 323 }
 324
 325 /* Return TRUE when BB should be optimized for size.  */
 326
 327 bool
 328 optimize_insn_for_size_p (void)
 329 {
 330   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 331 }
 332
 333 /* Return TRUE when BB should be optimized for speed.  */
 334
 335 bool
 336 optimize_insn_for_speed_p (void)
 337 {
 338   return !optimize_insn_for_size_p ();
 339 }
 340
 341 /* Return TRUE when LOOP should be optimized for size.  */
 342
 343 bool
 344 optimize_loop_for_size_p (struct loop *loop)
 345 {
 346   return optimize_bb_for_size_p (loop->header);
 347 }
 348
 349 /* Return TRUE when LOOP should be optimized for speed.  */
 350
 351 bool
 352 optimize_loop_for_speed_p (struct loop *loop)
 353 {
 354   return optimize_bb_for_speed_p (loop->header);
 355 }
 356
 357 /* Return TRUE when LOOP nest should be optimized for speed.  */
 358
 359 bool
 360 optimize_loop_nest_for_speed_p (struct loop *loop)
 361 {
 362   struct loop *l = loop;
 363   if (optimize_loop_for_speed_p (loop))
 364     return true;
 365   l = loop->inner;
 366   while (l && l != loop)
 367     {
 368       if (optimize_loop_for_speed_p (l))
 369         return true;
 370       if (l->inner)
 371         l = l->inner;
 372       else if (l->next)
 373         l = l->next;
 374       else
 375         {
 376           while (l != loop && !l->next)
 377             l = loop_outer (l);
 378           if (l != loop)
 379             l = l->next;
 380         }
 381     }
 382   return false;
 383 }
 384
 385 /* Return TRUE when LOOP nest should be optimized for size.  */
 386
 387 bool
 388 optimize_loop_nest_for_size_p (struct loop *loop)
 389 {
 390   return !optimize_loop_nest_for_speed_p (loop);
 391 }
 392
 393 /* Return true when edge E is likely to be well predictable by branch
 394    predictor.  */
 395
 396 bool
 397 predictable_edge_p (edge e)
 398 {
 399   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 400     return false;
 401   if ((e->probability
 402        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 403       || (REG_BR_PROB_BASE - e->probability
 404           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 405     return true;
 406   return false;
 407 }
 408
 409
 410 /* Set RTL expansion for BB profile.  */
 411
 412 void
 413 rtl_profile_for_bb (basic_block bb)
 414 {
 415   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 416 }
 417
 418 /* Set RTL expansion for edge profile.  */
 419
 420 void
 421 rtl_profile_for_edge (edge e)
 422 {
 423   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 424 }
 425
 426 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 427 void
 428 default_rtl_profile (void)
 429 {
 430   crtl->maybe_hot_insn_p = true;
 431 }
 432
 433 /* Return true if the one of outgoing edges is already predicted by
 434    PREDICTOR.  */
 435
 436 bool
 437 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 438 {
 439   rtx note;
 440   if (!INSN_P (BB_END (bb)))
 441     return false;
 442   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 443     if (REG_NOTE_KIND (note) == REG_BR_PRED
 444         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 445       return true;
 446   return false;
 447 }
 448
 449 /*  Structure representing predictions in tree level. */
 450
 451 struct edge_prediction {
 452     struct edge_prediction *ep_next;
 453     edge ep_edge;
 454     enum br_predictor ep_predictor;
 455     int ep_probability;
 456 };
 457
 458 /* This map contains for a basic block the list of predictions for the
 459    outgoing edges.  */
 460
 461 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 462
 463 /* Return true if the one of outgoing edges is already predicted by
 464    PREDICTOR.  */
 465
 466 bool
 467 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 468 {
 469   struct edge_prediction *i;
 470   edge_prediction **preds = bb_predictions->get (bb);
 471
 472   if (!preds)
 473     return false;
 474
 475   for (i = *preds; i; i = i->ep_next)
 476     if (i->ep_predictor == predictor)
 477       return true;
 478   return false;
 479 }
 480
 481 /* Return true if the one of outgoing edges is already predicted by
 482    PREDICTOR for edge E predicted as TAKEN.  */
 483
 484 bool
 485 edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
 486 {
 487   struct edge_prediction *i;
 488   basic_block bb = e->src;
 489   edge_prediction **preds = bb_predictions->get (bb);
 490   if (!preds)
 491     return false;
 492
 493   int probability = predictor_info[(int) predictor].hitrate;
 494
 495   if (taken != TAKEN)
 496     probability = REG_BR_PROB_BASE - probability;
 497
 498   for (i = *preds; i; i = i->ep_next)
 499     if (i->ep_predictor == predictor
 500         && i->ep_edge == e
 501         && i->ep_probability == probability)
 502       return true;
 503   return false;
 504 }
 505
 506 /* Return true when the probability of edge is reliable.
 507
 508    The profile guessing code is good at predicting branch outcome (ie.
 509    taken/not taken), that is predicted right slightly over 75% of time.
 510    It is however notoriously poor on predicting the probability itself.
 511    In general the profile appear a lot flatter (with probabilities closer
 512    to 50%) than the reality so it is bad idea to use it to drive optimization
 513    such as those disabling dynamic branch prediction for well predictable
 514    branches.
 515
 516    There are two exceptions - edges leading to noreturn edges and edges
 517    predicted by number of iterations heuristics are predicted well.  This macro
 518    should be able to distinguish those, but at the moment it simply check for
 519    noreturn heuristic that is only one giving probability over 99% or bellow
 520    1%.  In future we might want to propagate reliability information across the
 521    CFG if we find this information useful on multiple places.   */
 522 static bool
 523 probability_reliable_p (int prob)
 524 {
 525   return (profile_status_for_fn (cfun) == PROFILE_READ
 526           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 527               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 528 }
 529
 530 /* Same predicate as above, working on edges.  */
 531 bool
 532 edge_probability_reliable_p (const_edge e)
 533 {
 534   return probability_reliable_p (e->probability);
 535 }
 536
 537 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 538 bool
 539 br_prob_note_reliable_p (const_rtx note)
 540 {
 541   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 542   return probability_reliable_p (XINT (note, 0));
 543 }
 544
 545 static void
 546 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 547 {
 548   gcc_assert (any_condjump_p (insn));
 549   if (!flag_guess_branch_prob)
 550     return;
 551
 552   add_reg_note (insn, REG_BR_PRED,
 553                 gen_rtx_CONCAT (VOIDmode,
 554                                 GEN_INT ((int) predictor),
 555                                 GEN_INT ((int) probability)));
 556 }
 557
 558 /* Predict insn by given predictor.  */
 559
 560 void
 561 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 562                   enum prediction taken)
 563 {
 564    int probability = predictor_info[(int) predictor].hitrate;
 565
 566    if (taken != TAKEN)
 567      probability = REG_BR_PROB_BASE - probability;
 568
 569    predict_insn (insn, predictor, probability);
 570 }
 571
 572 /* Predict edge E with given probability if possible.  */
 573
 574 void
 575 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 576 {
 577   rtx_insn *last_insn;
 578   last_insn = BB_END (e->src);
 579
 580   /* We can store the branch prediction information only about
 581      conditional jumps.  */
 582   if (!any_condjump_p (last_insn))
 583     return;
 584
 585   /* We always store probability of branching.  */
 586   if (e->flags & EDGE_FALLTHRU)
 587     probability = REG_BR_PROB_BASE - probability;
 588
 589   predict_insn (last_insn, predictor, probability);
 590 }
 591
 592 /* Predict edge E with the given PROBABILITY.  */
 593 void
 594 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 595 {
 596   if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 597       && EDGE_COUNT (e->src->succs) > 1
 598       && flag_guess_branch_prob
 599       && optimize)
 600     {
 601       struct edge_prediction *i = XNEW (struct edge_prediction);
 602       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 603
 604       i->ep_next = preds;
 605       preds = i;
 606       i->ep_probability = probability;
 607       i->ep_predictor = predictor;
 608       i->ep_edge = e;
 609     }
 610 }
 611
 612 /* Remove all predictions on given basic block that are attached
 613    to edge E.  */
 614 void
 615 remove_predictions_associated_with_edge (edge e)
 616 {
 617   if (!bb_predictions)
 618     return;
 619
 620   edge_prediction **preds = bb_predictions->get (e->src);
 621
 622   if (preds)
 623     {
 624       struct edge_prediction **prediction = preds;
 625       struct edge_prediction *next;
 626
 627       while (*prediction)
 628         {
 629           if ((*prediction)->ep_edge == e)
 630             {
 631               next = (*prediction)->ep_next;
 632               free (*prediction);
 633               *prediction = next;
 634             }
 635           else
 636             prediction = &((*prediction)->ep_next);
 637         }
 638     }
 639 }
 640
 641 /* Clears the list of predictions stored for BB.  */
 642
 643 static void
 644 clear_bb_predictions (basic_block bb)
 645 {
 646   edge_prediction **preds = bb_predictions->get (bb);
 647   struct edge_prediction *pred, *next;
 648
 649   if (!preds)
 650     return;
 651
 652   for (pred = *preds; pred; pred = next)
 653     {
 654       next = pred->ep_next;
 655       free (pred);
 656     }
 657   *preds = NULL;
 658 }
 659
 660 /* Return true when we can store prediction on insn INSN.
 661    At the moment we represent predictions only on conditional
 662    jumps, not at computed jump or other complicated cases.  */
 663 static bool
 664 can_predict_insn_p (const rtx_insn *insn)
 665 {
 666   return (JUMP_P (insn)
 667           && any_condjump_p (insn)
 668           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 669 }
 670
 671 /* Predict edge E by given predictor if possible.  */
 672
 673 void
 674 predict_edge_def (edge e, enum br_predictor predictor,
 675                   enum prediction taken)
 676 {
 677    int probability = predictor_info[(int) predictor].hitrate;
 678
 679    if (taken != TAKEN)
 680      probability = REG_BR_PROB_BASE - probability;
 681
 682    predict_edge (e, predictor, probability);
 683 }
 684
 685 /* Invert all branch predictions or probability notes in the INSN.  This needs
 686    to be done each time we invert the condition used by the jump.  */
 687
 688 void
 689 invert_br_probabilities (rtx insn)
 690 {
 691   rtx note;
 692
 693   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 694     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 695       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 696     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 697       XEXP (XEXP (note, 0), 1)
 698         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 699 }
 700
 701 /* Dump information about the branch prediction to the output file.  */
 702
 703 static void
 704 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 705                  basic_block bb, int used)
 706 {
 707   edge e;
 708   edge_iterator ei;
 709
 710   if (!file)
 711     return;
 712
 713   FOR_EACH_EDGE (e, ei, bb->succs)
 714     if (! (e->flags & EDGE_FALLTHRU))
 715       break;
 716
 717   fprintf (file, "  %s heuristics%s: %.1f%%",
 718            predictor_info[predictor].name,
 719            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 720
 721   if (bb->count)
 722     {
 723       fprintf (file, "  exec %" PRId64, bb->count);
 724       if (e)
 725         {
 726           fprintf (file, " hit %" PRId64, e->count);
 727           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 728         }
 729     }
 730
 731   fprintf (file, "\n");
 732 }
 733
 734 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 735    evenly and hope for the best.  */
 736 static void
 737 set_even_probabilities (basic_block bb)
 738 {
 739   int nedges = 0;
 740   edge e;
 741   edge_iterator ei;
 742
 743   FOR_EACH_EDGE (e, ei, bb->succs)
 744     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 745       nedges ++;
 746   FOR_EACH_EDGE (e, ei, bb->succs)
 747     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 748       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 749     else
 750       e->probability = 0;
 751 }
 752
 753 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 754    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 755
 756 static void
 757 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 758 {
 759   rtx prob_note;
 760   rtx *pnote;
 761   rtx note;
 762   int best_probability = PROB_EVEN;
 763   enum br_predictor best_predictor = END_PREDICTORS;
 764   int combined_probability = REG_BR_PROB_BASE / 2;
 765   int d;
 766   bool first_match = false;
 767   bool found = false;
 768
 769   if (!can_predict_insn_p (insn))
 770     {
 771       set_even_probabilities (bb);
 772       return;
 773     }
 774
 775   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 776   pnote = &REG_NOTES (insn);
 777   if (dump_file)
 778     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 779              bb->index);
 780
 781   /* We implement "first match" heuristics and use probability guessed
 782      by predictor with smallest index.  */
 783   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 784     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 785       {
 786         enum br_predictor predictor = ((enum br_predictor)
 787                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 788         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 789
 790         found = true;
 791         if (best_predictor > predictor)
 792           best_probability = probability, best_predictor = predictor;
 793
 794         d = (combined_probability * probability
 795              + (REG_BR_PROB_BASE - combined_probability)
 796              * (REG_BR_PROB_BASE - probability));
 797
 798         /* Use FP math to avoid overflows of 32bit integers.  */
 799         if (d == 0)
 800           /* If one probability is 0% and one 100%, avoid division by zero.  */
 801           combined_probability = REG_BR_PROB_BASE / 2;
 802         else
 803           combined_probability = (((double) combined_probability) * probability
 804                                   * REG_BR_PROB_BASE / d + 0.5);
 805       }
 806
 807   /* Decide which heuristic to use.  In case we didn't match anything,
 808      use no_prediction heuristic, in case we did match, use either
 809      first match or Dempster-Shaffer theory depending on the flags.  */
 810
 811   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 812     first_match = true;
 813
 814   if (!found)
 815     dump_prediction (dump_file, PRED_NO_PREDICTION,
 816                      combined_probability, bb, true);
 817   else
 818     {
 819       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 820                        bb, !first_match);
 821       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 822                        bb, first_match);
 823     }
 824
 825   if (first_match)
 826     combined_probability = best_probability;
 827   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 828
 829   while (*pnote)
 830     {
 831       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 832         {
 833           enum br_predictor predictor = ((enum br_predictor)
 834                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 835           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 836
 837           dump_prediction (dump_file, predictor, probability, bb,
 838                            !first_match || best_predictor == predictor);
 839           *pnote = XEXP (*pnote, 1);
 840         }
 841       else
 842         pnote = &XEXP (*pnote, 1);
 843     }
 844
 845   if (!prob_note)
 846     {
 847       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 848
 849       /* Save the prediction into CFG in case we are seeing non-degenerated
 850          conditional jump.  */
 851       if (!single_succ_p (bb))
 852         {
 853           BRANCH_EDGE (bb)->probability = combined_probability;
 854           FALLTHRU_EDGE (bb)->probability
 855             = REG_BR_PROB_BASE - combined_probability;
 856         }
 857     }
 858   else if (!single_succ_p (bb))
 859     {
 860       int prob = XINT (prob_note, 0);
 861
 862       BRANCH_EDGE (bb)->probability = prob;
 863       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 864     }
 865   else
 866     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 867 }
 868
 869 /* Combine predictions into single probability and store them into CFG.
 870    Remove now useless prediction entries.
 871    If DRY_RUN is set, only produce dumps and do not modify profile.  */
 872
 873 static void
 874 combine_predictions_for_bb (basic_block bb, bool dry_run)
 875 {
 876   int best_probability = PROB_EVEN;
 877   enum br_predictor best_predictor = END_PREDICTORS;
 878   int combined_probability = REG_BR_PROB_BASE / 2;
 879   int d;
 880   bool first_match = false;
 881   bool found = false;
 882   struct edge_prediction *pred;
 883   int nedges = 0;
 884   edge e, first = NULL, second = NULL;
 885   edge_iterator ei;
 886
 887   FOR_EACH_EDGE (e, ei, bb->succs)
 888     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 889       {
 890         nedges ++;
 891         if (first && !second)
 892           second = e;
 893         if (!first)
 894           first = e;
 895       }
 896
 897   /* When there is no successor or only one choice, prediction is easy.
 898
 899      We are lazy for now and predict only basic blocks with two outgoing
 900      edges.  It is possible to predict generic case too, but we have to
 901      ignore first match heuristics and do more involved combining.  Implement
 902      this later.  */
 903   if (nedges != 2)
 904     {
 905       if (!bb->count && !dry_run)
 906         set_even_probabilities (bb);
 907       clear_bb_predictions (bb);
 908       if (dump_file)
 909         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 910                  nedges, bb->index);
 911       return;
 912     }
 913
 914   if (dump_file)
 915     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 916
 917   edge_prediction **preds = bb_predictions->get (bb);
 918   if (preds)
 919     {
 920       /* We implement "first match" heuristics and use probability guessed
 921          by predictor with smallest index.  */
 922       for (pred = *preds; pred; pred = pred->ep_next)
 923         {
 924           enum br_predictor predictor = pred->ep_predictor;
 925           int probability = pred->ep_probability;
 926
 927           if (pred->ep_edge != first)
 928             probability = REG_BR_PROB_BASE - probability;
 929
 930           found = true;
 931           /* First match heuristics would be widly confused if we predicted
 932              both directions.  */
 933           if (best_predictor > predictor)
 934             {
 935               struct edge_prediction *pred2;
 936               int prob = probability;
 937
 938               for (pred2 = (struct edge_prediction *) *preds;
 939                    pred2; pred2 = pred2->ep_next)
 940                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 941                  {
 942                    int probability2 = pred2->ep_probability;
 943
 944                    if (pred2->ep_edge != first)
 945                      probability2 = REG_BR_PROB_BASE - probability2;
 946
 947                    if ((probability < REG_BR_PROB_BASE / 2) !=
 948                        (probability2 < REG_BR_PROB_BASE / 2))
 949                      break;
 950
 951                    /* If the same predictor later gave better result, go for it! */
 952                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 953                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 954                      prob = probability2;
 955                  }
 956               if (!pred2)
 957                 best_probability = prob, best_predictor = predictor;
 958             }
 959
 960           d = (combined_probability * probability
 961                + (REG_BR_PROB_BASE - combined_probability)
 962                * (REG_BR_PROB_BASE - probability));
 963
 964           /* Use FP math to avoid overflows of 32bit integers.  */
 965           if (d == 0)
 966             /* If one probability is 0% and one 100%, avoid division by zero.  */
 967             combined_probability = REG_BR_PROB_BASE / 2;
 968           else
 969             combined_probability = (((double) combined_probability)
 970                                     * probability
 971                                     * REG_BR_PROB_BASE / d + 0.5);
 972         }
 973     }
 974
 975   /* Decide which heuristic to use.  In case we didn't match anything,
 976      use no_prediction heuristic, in case we did match, use either
 977      first match or Dempster-Shaffer theory depending on the flags.  */
 978
 979   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 980     first_match = true;
 981
 982   if (!found)
 983     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 984   else
 985     {
 986       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 987                        !first_match);
 988       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 989                        first_match);
 990     }
 991
 992   if (first_match)
 993     combined_probability = best_probability;
 994   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 995
 996   if (preds)
 997     {
 998       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 999         {
1000           enum br_predictor predictor = pred->ep_predictor;
1001           int probability = pred->ep_probability;
1002
1003           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1004             probability = REG_BR_PROB_BASE - probability;
1005           dump_prediction (dump_file, predictor, probability, bb,
1006                            !first_match || best_predictor == predictor);
1007         }
1008     }
1009   clear_bb_predictions (bb);
1010
1011   if (!bb->count && !dry_run)
1012     {
1013       first->probability = combined_probability;
1014       second->probability = REG_BR_PROB_BASE - combined_probability;
1015     }
1016 }
1017
1018 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1019    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1020
1021    T1 and T2 should be one of the following cases:
1022      1. T1 is SSA_NAME, T2 is NULL
1023      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1024      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1025
1026 static tree
1027 strips_small_constant (tree t1, tree t2)
1028 {
1029   tree ret = NULL;
1030   int value = 0;
1031
1032   if (!t1)
1033     return NULL;
1034   else if (TREE_CODE (t1) == SSA_NAME)
1035     ret = t1;
1036   else if (tree_fits_shwi_p (t1))
1037     value = tree_to_shwi (t1);
1038   else
1039     return NULL;
1040
1041   if (!t2)
1042     return ret;
1043   else if (tree_fits_shwi_p (t2))
1044     value = tree_to_shwi (t2);
1045   else if (TREE_CODE (t2) == SSA_NAME)
1046     {
1047       if (ret)
1048         return NULL;
1049       else
1050         ret = t2;
1051     }
1052
1053   if (value <= 4 && value >= -4)
1054     return ret;
1055   else
1056     return NULL;
1057 }
1058
1059 /* Return the SSA_NAME in T or T's operands.
1060    Return NULL if SSA_NAME cannot be found.  */
1061
1062 static tree
1063 get_base_value (tree t)
1064 {
1065   if (TREE_CODE (t) == SSA_NAME)
1066     return t;
1067
1068   if (!BINARY_CLASS_P (t))
1069     return NULL;
1070
1071   switch (TREE_OPERAND_LENGTH (t))
1072     {
1073     case 1:
1074       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1075     case 2:
1076       return strips_small_constant (TREE_OPERAND (t, 0),
1077                                     TREE_OPERAND (t, 1));
1078     default:
1079       return NULL;
1080     }
1081 }
1082
1083 /* Check the compare STMT in LOOP. If it compares an induction
1084    variable to a loop invariant, return true, and save
1085    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1086    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1087
1088 static bool
1089 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1090                                      tree *loop_invariant,
1091                                      enum tree_code *compare_code,
1092                                      tree *loop_step,
1093                                      tree *loop_iv_base)
1094 {
1095   tree op0, op1, bound, base;
1096   affine_iv iv0, iv1;
1097   enum tree_code code;
1098   tree step;
1099
1100   code = gimple_cond_code (stmt);
1101   *loop_invariant = NULL;
1102
1103   switch (code)
1104     {
1105     case GT_EXPR:
1106     case GE_EXPR:
1107     case NE_EXPR:
1108     case LT_EXPR:
1109     case LE_EXPR:
1110     case EQ_EXPR:
1111       break;
1112
1113     default:
1114       return false;
1115     }
1116
1117   op0 = gimple_cond_lhs (stmt);
1118   op1 = gimple_cond_rhs (stmt);
1119
1120   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1121        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1122     return false;
1123   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1124     return false;
1125   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1126     return false;
1127   if (TREE_CODE (iv0.step) != INTEGER_CST
1128       || TREE_CODE (iv1.step) != INTEGER_CST)
1129     return false;
1130   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1131       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1132     return false;
1133
1134   if (integer_zerop (iv0.step))
1135     {
1136       if (code != NE_EXPR && code != EQ_EXPR)
1137         code = invert_tree_comparison (code, false);
1138       bound = iv0.base;
1139       base = iv1.base;
1140       if (tree_fits_shwi_p (iv1.step))
1141         step = iv1.step;
1142       else
1143         return false;
1144     }
1145   else
1146     {
1147       bound = iv1.base;
1148       base = iv0.base;
1149       if (tree_fits_shwi_p (iv0.step))
1150         step = iv0.step;
1151       else
1152         return false;
1153     }
1154
1155   if (TREE_CODE (bound) != INTEGER_CST)
1156     bound = get_base_value (bound);
1157   if (!bound)
1158     return false;
1159   if (TREE_CODE (base) != INTEGER_CST)
1160     base = get_base_value (base);
1161   if (!base)
1162     return false;
1163
1164   *loop_invariant = bound;
1165   *compare_code = code;
1166   *loop_step = step;
1167   *loop_iv_base = base;
1168   return true;
1169 }
1170
1171 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1172
1173 static bool
1174 expr_coherent_p (tree t1, tree t2)
1175 {
1176   gimple *stmt;
1177   tree ssa_name_1 = NULL;
1178   tree ssa_name_2 = NULL;
1179
1180   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1181   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1182
1183   if (t1 == t2)
1184     return true;
1185
1186   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1187     return true;
1188   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1189     return false;
1190
1191   /* Check to see if t1 is expressed/defined with t2.  */
1192   stmt = SSA_NAME_DEF_STMT (t1);
1193   gcc_assert (stmt != NULL);
1194   if (is_gimple_assign (stmt))
1195     {
1196       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1197       if (ssa_name_1 && ssa_name_1 == t2)
1198         return true;
1199     }
1200
1201   /* Check to see if t2 is expressed/defined with t1.  */
1202   stmt = SSA_NAME_DEF_STMT (t2);
1203   gcc_assert (stmt != NULL);
1204   if (is_gimple_assign (stmt))
1205     {
1206       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1207       if (ssa_name_2 && ssa_name_2 == t1)
1208         return true;
1209     }
1210
1211   /* Compare if t1 and t2's def_stmts are identical.  */
1212   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1213     return true;
1214   else
1215     return false;
1216 }
1217
1218 /* Return true if E is predicted by one of loop heuristics.  */
1219
1220 static bool
1221 predicted_by_loop_heuristics_p (basic_block bb)
1222 {
1223   struct edge_prediction *i;
1224   edge_prediction **preds = bb_predictions->get (bb);
1225
1226   if (!preds)
1227     return false;
1228
1229   for (i = *preds; i; i = i->ep_next)
1230     if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1231         || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1232         || i->ep_predictor == PRED_LOOP_ITERATIONS
1233         || i->ep_predictor == PRED_LOOP_EXIT
1234         || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1235       return true;
1236   return false;
1237 }
1238
1239 /* Predict branch probability of BB when BB contains a branch that compares
1240    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1241    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1242
1243    E.g.
1244      for (int i = 0; i < bound; i++) {
1245        if (i < bound - 2)
1246          computation_1();
1247        else
1248          computation_2();
1249      }
1250
1251   In this loop, we will predict the branch inside the loop to be taken.  */
1252
1253 static void
1254 predict_iv_comparison (struct loop *loop, basic_block bb,
1255                        tree loop_bound_var,
1256                        tree loop_iv_base_var,
1257                        enum tree_code loop_bound_code,
1258                        int loop_bound_step)
1259 {
1260   gimple *stmt;
1261   tree compare_var, compare_base;
1262   enum tree_code compare_code;
1263   tree compare_step_var;
1264   edge then_edge;
1265   edge_iterator ei;
1266
1267   if (predicted_by_loop_heuristics_p (bb))
1268     return;
1269
1270   stmt = last_stmt (bb);
1271   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1272     return;
1273   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1274                                             loop, &compare_var,
1275                                             &compare_code,
1276                                             &compare_step_var,
1277                                             &compare_base))
1278     return;
1279
1280   /* Find the taken edge.  */
1281   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1282     if (then_edge->flags & EDGE_TRUE_VALUE)
1283       break;
1284
1285   /* When comparing an IV to a loop invariant, NE is more likely to be
1286      taken while EQ is more likely to be not-taken.  */
1287   if (compare_code == NE_EXPR)
1288     {
1289       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1290       return;
1291     }
1292   else if (compare_code == EQ_EXPR)
1293     {
1294       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1295       return;
1296     }
1297
1298   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1299     return;
1300
1301   /* If loop bound, base and compare bound are all constants, we can
1302      calculate the probability directly.  */
1303   if (tree_fits_shwi_p (loop_bound_var)
1304       && tree_fits_shwi_p (compare_var)
1305       && tree_fits_shwi_p (compare_base))
1306     {
1307       int probability;
1308       bool overflow, overall_overflow = false;
1309       widest_int compare_count, tem;
1310
1311       /* (loop_bound - base) / compare_step */
1312       tem = wi::sub (wi::to_widest (loop_bound_var),
1313                      wi::to_widest (compare_base), SIGNED, &overflow);
1314       overall_overflow |= overflow;
1315       widest_int loop_count = wi::div_trunc (tem,
1316                                              wi::to_widest (compare_step_var),
1317                                              SIGNED, &overflow);
1318       overall_overflow |= overflow;
1319
1320       if (!wi::neg_p (wi::to_widest (compare_step_var))
1321           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1322         {
1323           /* (loop_bound - compare_bound) / compare_step */
1324           tem = wi::sub (wi::to_widest (loop_bound_var),
1325                          wi::to_widest (compare_var), SIGNED, &overflow);
1326           overall_overflow |= overflow;
1327           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1328                                          SIGNED, &overflow);
1329           overall_overflow |= overflow;
1330         }
1331       else
1332         {
1333           /* (compare_bound - base) / compare_step */
1334           tem = wi::sub (wi::to_widest (compare_var),
1335                          wi::to_widest (compare_base), SIGNED, &overflow);
1336           overall_overflow |= overflow;
1337           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1338                                          SIGNED, &overflow);
1339           overall_overflow |= overflow;
1340         }
1341       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1342         ++compare_count;
1343       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1344         ++loop_count;
1345       if (wi::neg_p (compare_count))
1346         compare_count = 0;
1347       if (wi::neg_p (loop_count))
1348         loop_count = 0;
1349       if (loop_count == 0)
1350         probability = 0;
1351       else if (wi::cmps (compare_count, loop_count) == 1)
1352         probability = REG_BR_PROB_BASE;
1353       else
1354         {
1355           tem = compare_count * REG_BR_PROB_BASE;
1356           tem = wi::udiv_trunc (tem, loop_count);
1357           probability = tem.to_uhwi ();
1358         }
1359
1360       if (!overall_overflow)
1361         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1362
1363       return;
1364     }
1365
1366   if (expr_coherent_p (loop_bound_var, compare_var))
1367     {
1368       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1369           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1370         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1371       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1372                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1373         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1374       else if (loop_bound_code == NE_EXPR)
1375         {
1376           /* If the loop backedge condition is "(i != bound)", we do
1377              the comparison based on the step of IV:
1378              * step < 0 : backedge condition is like (i > bound)
1379              * step > 0 : backedge condition is like (i < bound)  */
1380           gcc_assert (loop_bound_step != 0);
1381           if (loop_bound_step > 0
1382               && (compare_code == LT_EXPR
1383                   || compare_code == LE_EXPR))
1384             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1385           else if (loop_bound_step < 0
1386                    && (compare_code == GT_EXPR
1387                        || compare_code == GE_EXPR))
1388             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1389           else
1390             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1391         }
1392       else
1393         /* The branch is predicted not-taken if loop_bound_code is
1394            opposite with compare_code.  */
1395         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1396     }
1397   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1398     {
1399       /* For cases like:
1400            for (i = s; i < h; i++)
1401              if (i > s + 2) ....
1402          The branch should be predicted taken.  */
1403       if (loop_bound_step > 0
1404           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1405         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1406       else if (loop_bound_step < 0
1407                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1408         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1409       else
1410         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1411     }
1412 }
1413
1414 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1415    exits are resulted from short-circuit conditions that will generate an
1416    if_tmp. E.g.:
1417
1418    if (foo() || global > 10)
1419      break;
1420
1421    This will be translated into:
1422
1423    BB3:
1424      loop header...
1425    BB4:
1426      if foo() goto BB6 else goto BB5
1427    BB5:
1428      if global > 10 goto BB6 else goto BB7
1429    BB6:
1430      goto BB7
1431    BB7:
1432      iftmp = (PHI 0(BB5), 1(BB6))
1433      if iftmp == 1 goto BB8 else goto BB3
1434    BB8:
1435      outside of the loop...
1436
1437    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1438    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1439    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1440    exits to predict them using PRED_LOOP_EXTRA_EXIT.  */
1441
1442 static void
1443 predict_extra_loop_exits (edge exit_edge)
1444 {
1445   unsigned i;
1446   bool check_value_one;
1447   gimple *lhs_def_stmt;
1448   gphi *phi_stmt;
1449   tree cmp_rhs, cmp_lhs;
1450   gimple *last;
1451   gcond *cmp_stmt;
1452
1453   last = last_stmt (exit_edge->src);
1454   if (!last)
1455     return;
1456   cmp_stmt = dyn_cast <gcond *> (last);
1457   if (!cmp_stmt)
1458     return;
1459
1460   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1461   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1462   if (!TREE_CONSTANT (cmp_rhs)
1463       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1464     return;
1465   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1466     return;
1467
1468   /* If check_value_one is true, only the phi_args with value '1' will lead
1469      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1470      loop exit.  */
1471   check_value_one = (((integer_onep (cmp_rhs))
1472                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1473                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1474
1475   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1476   if (!lhs_def_stmt)
1477     return;
1478
1479   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1480   if (!phi_stmt)
1481     return;
1482
1483   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1484     {
1485       edge e1;
1486       edge_iterator ei;
1487       tree val = gimple_phi_arg_def (phi_stmt, i);
1488       edge e = gimple_phi_arg_edge (phi_stmt, i);
1489
1490       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1491         continue;
1492       if ((check_value_one ^ integer_onep (val)) == 1)
1493         continue;
1494       if (EDGE_COUNT (e->src->succs) != 1)
1495         {
1496           predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1497           continue;
1498         }
1499
1500       FOR_EACH_EDGE (e1, ei, e->src->preds)
1501         predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1502     }
1503 }
1504
1505
1506 /* Predict edge probabilities by exploiting loop structure.  */
1507
1508 static void
1509 predict_loops (void)
1510 {
1511   struct loop *loop;
1512
1513   /* Try to predict out blocks in a loop that are not part of a
1514      natural loop.  */
1515   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1516     {
1517       basic_block bb, *bbs;
1518       unsigned j, n_exits = 0;
1519       vec<edge> exits;
1520       struct tree_niter_desc niter_desc;
1521       edge ex;
1522       struct nb_iter_bound *nb_iter;
1523       enum tree_code loop_bound_code = ERROR_MARK;
1524       tree loop_bound_step = NULL;
1525       tree loop_bound_var = NULL;
1526       tree loop_iv_base = NULL;
1527       gcond *stmt = NULL;
1528
1529       exits = get_loop_exit_edges (loop);
1530       FOR_EACH_VEC_ELT (exits, j, ex)
1531         if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE)))
1532           n_exits ++;
1533       if (!n_exits)
1534         {
1535           exits.release ();
1536           continue;
1537         }
1538
1539       FOR_EACH_VEC_ELT (exits, j, ex)
1540         {
1541           tree niter = NULL;
1542           HOST_WIDE_INT nitercst;
1543           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1544           int probability;
1545           enum br_predictor predictor;
1546           widest_int nit;
1547
1548           if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1549             continue;
1550           /* Loop heuristics do not expect exit conditional to be inside
1551              inner loop.  We predict from innermost to outermost loop.  */
1552           if (predicted_by_loop_heuristics_p (ex->src))
1553             continue;
1554           predict_extra_loop_exits (ex);
1555
1556           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1557             niter = niter_desc.niter;
1558           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1559             niter = loop_niter_by_eval (loop, ex);
1560
1561           if (TREE_CODE (niter) == INTEGER_CST)
1562             {
1563               if (tree_fits_uhwi_p (niter)
1564                   && max
1565                   && compare_tree_int (niter, max - 1) == -1)
1566                 nitercst = tree_to_uhwi (niter) + 1;
1567               else
1568                 nitercst = max;
1569               predictor = PRED_LOOP_ITERATIONS;
1570             }
1571           /* If we have just one exit and we can derive some information about
1572              the number of iterations of the loop from the statements inside
1573              the loop, use it to predict this exit.  */
1574           else if (n_exits == 1
1575                    && estimated_stmt_executions (loop, &nit))
1576             {
1577               if (wi::gtu_p (nit, max))
1578                 nitercst = max;
1579               else
1580                 nitercst = nit.to_shwi ();
1581               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1582             }
1583           /* If we have likely upper bound, trust it for very small iteration
1584              counts.  Such loops would otherwise get mispredicted by standard
1585              LOOP_EXIT heuristics.  */
1586           else if (n_exits == 1
1587                    && likely_max_stmt_executions (loop, &nit)
1588                    && wi::ltu_p (nit,
1589                                  RDIV (REG_BR_PROB_BASE,
1590                                        REG_BR_PROB_BASE
1591                                          - predictor_info
1592                                                  [PRED_LOOP_EXIT].hitrate)))
1593             {
1594               nitercst = nit.to_shwi ();
1595               predictor = PRED_LOOP_ITERATIONS_MAX;
1596             }
1597           else
1598             continue;
1599
1600           gcc_checking_assert (nitercst);
1601           probability = RDIV (REG_BR_PROB_BASE, nitercst);
1602           predict_edge (ex, predictor, probability);
1603         }
1604       exits.release ();
1605
1606       /* Find information about loop bound variables.  */
1607       for (nb_iter = loop->bounds; nb_iter;
1608            nb_iter = nb_iter->next)
1609         if (nb_iter->stmt
1610             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1611           {
1612             stmt = as_a <gcond *> (nb_iter->stmt);
1613             break;
1614           }
1615       if (!stmt && last_stmt (loop->header)
1616           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1617         stmt = as_a <gcond *> (last_stmt (loop->header));
1618       if (stmt)
1619         is_comparison_with_loop_invariant_p (stmt, loop,
1620                                              &loop_bound_var,
1621                                              &loop_bound_code,
1622                                              &loop_bound_step,
1623                                              &loop_iv_base);
1624
1625       bbs = get_loop_body (loop);
1626
1627       for (j = 0; j < loop->num_nodes; j++)
1628         {
1629           int header_found = 0;
1630           edge e;
1631           edge_iterator ei;
1632
1633           bb = bbs[j];
1634
1635           /* Bypass loop heuristics on continue statement.  These
1636              statements construct loops via "non-loop" constructs
1637              in the source language and are better to be handled
1638              separately.  */
1639           if (predicted_by_p (bb, PRED_CONTINUE))
1640             continue;
1641
1642           /* Loop branch heuristics - predict an edge back to a
1643              loop's head as taken.  */
1644           if (bb == loop->latch)
1645             {
1646               e = find_edge (loop->latch, loop->header);
1647               if (e)
1648                 {
1649                   header_found = 1;
1650                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1651                 }
1652             }
1653
1654           /* Loop exit heuristics - predict an edge exiting the loop if the
1655              conditional has no loop header successors as not taken.  */
1656           if (!header_found
1657               /* If we already used more reliable loop exit predictors, do not
1658                  bother with PRED_LOOP_EXIT.  */
1659               && !predicted_by_loop_heuristics_p (bb))
1660             {
1661               /* For loop with many exits we don't want to predict all exits
1662                  with the pretty large probability, because if all exits are
1663                  considered in row, the loop would be predicted to iterate
1664                  almost never.  The code to divide probability by number of
1665                  exits is very rough.  It should compute the number of exits
1666                  taken in each patch through function (not the overall number
1667                  of exits that might be a lot higher for loops with wide switch
1668                  statements in them) and compute n-th square root.
1669
1670                  We limit the minimal probability by 2% to avoid
1671                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1672                  as this was causing regression in perl benchmark containing such
1673                  a wide loop.  */
1674
1675               int probability = ((REG_BR_PROB_BASE
1676                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1677                                  / n_exits);
1678               if (probability < HITRATE (2))
1679                 probability = HITRATE (2);
1680               FOR_EACH_EDGE (e, ei, bb->succs)
1681                 if (e->dest->index < NUM_FIXED_BLOCKS
1682                     || !flow_bb_inside_loop_p (loop, e->dest))
1683                   predict_edge (e, PRED_LOOP_EXIT, probability);
1684             }
1685           if (loop_bound_var)
1686             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1687                                    loop_bound_code,
1688                                    tree_to_shwi (loop_bound_step));
1689         }
1690
1691       /* Free basic blocks from get_loop_body.  */
1692       free (bbs);
1693     }
1694 }
1695
1696 /* Attempt to predict probabilities of BB outgoing edges using local
1697    properties.  */
1698 static void
1699 bb_estimate_probability_locally (basic_block bb)
1700 {
1701   rtx_insn *last_insn = BB_END (bb);
1702   rtx cond;
1703
1704   if (! can_predict_insn_p (last_insn))
1705     return;
1706   cond = get_condition (last_insn, NULL, false, false);
1707   if (! cond)
1708     return;
1709
1710   /* Try "pointer heuristic."
1711      A comparison ptr == 0 is predicted as false.
1712      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1713   if (COMPARISON_P (cond)
1714       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1715           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1716     {
1717       if (GET_CODE (cond) == EQ)
1718         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1719       else if (GET_CODE (cond) == NE)
1720         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1721     }
1722   else
1723
1724   /* Try "opcode heuristic."
1725      EQ tests are usually false and NE tests are usually true. Also,
1726      most quantities are positive, so we can make the appropriate guesses
1727      about signed comparisons against zero.  */
1728     switch (GET_CODE (cond))
1729       {
1730       case CONST_INT:
1731         /* Unconditional branch.  */
1732         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1733                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1734         break;
1735
1736       case EQ:
1737       case UNEQ:
1738         /* Floating point comparisons appears to behave in a very
1739            unpredictable way because of special role of = tests in
1740            FP code.  */
1741         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1742           ;
1743         /* Comparisons with 0 are often used for booleans and there is
1744            nothing useful to predict about them.  */
1745         else if (XEXP (cond, 1) == const0_rtx
1746                  || XEXP (cond, 0) == const0_rtx)
1747           ;
1748         else
1749           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1750         break;
1751
1752       case NE:
1753       case LTGT:
1754         /* Floating point comparisons appears to behave in a very
1755            unpredictable way because of special role of = tests in
1756            FP code.  */
1757         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1758           ;
1759         /* Comparisons with 0 are often used for booleans and there is
1760            nothing useful to predict about them.  */
1761         else if (XEXP (cond, 1) == const0_rtx
1762                  || XEXP (cond, 0) == const0_rtx)
1763           ;
1764         else
1765           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1766         break;
1767
1768       case ORDERED:
1769         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1770         break;
1771
1772       case UNORDERED:
1773         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1774         break;
1775
1776       case LE:
1777       case LT:
1778         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1779             || XEXP (cond, 1) == constm1_rtx)
1780           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1781         break;
1782
1783       case GE:
1784       case GT:
1785         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1786             || XEXP (cond, 1) == constm1_rtx)
1787           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1788         break;
1789
1790       default:
1791         break;
1792       }
1793 }
1794
1795 /* Set edge->probability for each successor edge of BB.  */
1796 void
1797 guess_outgoing_edge_probabilities (basic_block bb)
1798 {
1799   bb_estimate_probability_locally (bb);
1800   combine_predictions_for_insn (BB_END (bb), bb);
1801 }
1802 \f
1803 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1804
1805 /* Helper function for expr_expected_value.  */
1806
1807 static tree
1808 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1809                        tree op1, bitmap visited, enum br_predictor *predictor)
1810 {
1811   gimple *def;
1812
1813   if (predictor)
1814     *predictor = PRED_UNCONDITIONAL;
1815
1816   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1817     {
1818       if (TREE_CONSTANT (op0))
1819         return op0;
1820
1821       if (code != SSA_NAME)
1822         return NULL_TREE;
1823
1824       def = SSA_NAME_DEF_STMT (op0);
1825
1826       /* If we were already here, break the infinite cycle.  */
1827       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1828         return NULL;
1829
1830       if (gimple_code (def) == GIMPLE_PHI)
1831         {
1832           /* All the arguments of the PHI node must have the same constant
1833              length.  */
1834           int i, n = gimple_phi_num_args (def);
1835           tree val = NULL, new_val;
1836
1837           for (i = 0; i < n; i++)
1838             {
1839               tree arg = PHI_ARG_DEF (def, i);
1840               enum br_predictor predictor2;
1841
1842               /* If this PHI has itself as an argument, we cannot
1843                  determine the string length of this argument.  However,
1844                  if we can find an expected constant value for the other
1845                  PHI args then we can still be sure that this is
1846                  likely a constant.  So be optimistic and just
1847                  continue with the next argument.  */
1848               if (arg == PHI_RESULT (def))
1849                 continue;
1850
1851               new_val = expr_expected_value (arg, visited, &predictor2);
1852
1853               /* It is difficult to combine value predictors.  Simply assume
1854                  that later predictor is weaker and take its prediction.  */
1855               if (predictor && *predictor < predictor2)
1856                 *predictor = predictor2;
1857               if (!new_val)
1858                 return NULL;
1859               if (!val)
1860                 val = new_val;
1861               else if (!operand_equal_p (val, new_val, false))
1862                 return NULL;
1863             }
1864           return val;
1865         }
1866       if (is_gimple_assign (def))
1867         {
1868           if (gimple_assign_lhs (def) != op0)
1869             return NULL;
1870
1871           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1872                                         gimple_assign_rhs1 (def),
1873                                         gimple_assign_rhs_code (def),
1874                                         gimple_assign_rhs2 (def),
1875                                         visited, predictor);
1876         }
1877
1878       if (is_gimple_call (def))
1879         {
1880           tree decl = gimple_call_fndecl (def);
1881           if (!decl)
1882             {
1883               if (gimple_call_internal_p (def)
1884                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1885                 {
1886                   gcc_assert (gimple_call_num_args (def) == 3);
1887                   tree val = gimple_call_arg (def, 0);
1888                   if (TREE_CONSTANT (val))
1889                     return val;
1890                   if (predictor)
1891                     {
1892                       tree val2 = gimple_call_arg (def, 2);
1893                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1894                                   && tree_fits_uhwi_p (val2)
1895                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1896                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1897                     }
1898                   return gimple_call_arg (def, 1);
1899                 }
1900               return NULL;
1901             }
1902           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1903             switch (DECL_FUNCTION_CODE (decl))
1904               {
1905               case BUILT_IN_EXPECT:
1906                 {
1907                   tree val;
1908                   if (gimple_call_num_args (def) != 2)
1909                     return NULL;
1910                   val = gimple_call_arg (def, 0);
1911                   if (TREE_CONSTANT (val))
1912                     return val;
1913                   if (predictor)
1914                     *predictor = PRED_BUILTIN_EXPECT;
1915                   return gimple_call_arg (def, 1);
1916                 }
1917
1918               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1919               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1920               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1921               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1922               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1923               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1924               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1925               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1926               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1927               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1928               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1929               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1930               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1931                 /* Assume that any given atomic operation has low contention,
1932                    and thus the compare-and-swap operation succeeds.  */
1933                 if (predictor)
1934                   *predictor = PRED_COMPARE_AND_SWAP;
1935                 return boolean_true_node;
1936               default:
1937                 break;
1938             }
1939         }
1940
1941       return NULL;
1942     }
1943
1944   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1945     {
1946       tree res;
1947       enum br_predictor predictor2;
1948       op0 = expr_expected_value (op0, visited, predictor);
1949       if (!op0)
1950         return NULL;
1951       op1 = expr_expected_value (op1, visited, &predictor2);
1952       if (predictor && *predictor < predictor2)
1953         *predictor = predictor2;
1954       if (!op1)
1955         return NULL;
1956       res = fold_build2 (code, type, op0, op1);
1957       if (TREE_CONSTANT (res))
1958         return res;
1959       return NULL;
1960     }
1961   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1962     {
1963       tree res;
1964       op0 = expr_expected_value (op0, visited, predictor);
1965       if (!op0)
1966         return NULL;
1967       res = fold_build1 (code, type, op0);
1968       if (TREE_CONSTANT (res))
1969         return res;
1970       return NULL;
1971     }
1972   return NULL;
1973 }
1974
1975 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1976    The function is used by builtin_expect branch predictor so the evidence
1977    must come from this construct and additional possible constant folding.
1978
1979    We may want to implement more involved value guess (such as value range
1980    propagation based prediction), but such tricks shall go to new
1981    implementation.  */
1982
1983 static tree
1984 expr_expected_value (tree expr, bitmap visited,
1985                      enum br_predictor *predictor)
1986 {
1987   enum tree_code code;
1988   tree op0, op1;
1989
1990   if (TREE_CONSTANT (expr))
1991     {
1992       if (predictor)
1993         *predictor = PRED_UNCONDITIONAL;
1994       return expr;
1995     }
1996
1997   extract_ops_from_tree (expr, &code, &op0, &op1);
1998   return expr_expected_value_1 (TREE_TYPE (expr),
1999                                 op0, code, op1, visited, predictor);
2000 }
2001 \f
2002 /* Predict using opcode of the last statement in basic block.  */
2003 static void
2004 tree_predict_by_opcode (basic_block bb)
2005 {
2006   gimple *stmt = last_stmt (bb);
2007   edge then_edge;
2008   tree op0, op1;
2009   tree type;
2010   tree val;
2011   enum tree_code cmp;
2012   bitmap visited;
2013   edge_iterator ei;
2014   enum br_predictor predictor;
2015
2016   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2017     return;
2018   FOR_EACH_EDGE (then_edge, ei, bb->succs)
2019     if (then_edge->flags & EDGE_TRUE_VALUE)
2020       break;
2021   op0 = gimple_cond_lhs (stmt);
2022   op1 = gimple_cond_rhs (stmt);
2023   cmp = gimple_cond_code (stmt);
2024   type = TREE_TYPE (op0);
2025   visited = BITMAP_ALLOC (NULL);
2026   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2027                                &predictor);
2028   BITMAP_FREE (visited);
2029   if (val && TREE_CODE (val) == INTEGER_CST)
2030     {
2031       if (predictor == PRED_BUILTIN_EXPECT)
2032         {
2033           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2034
2035           gcc_assert (percent >= 0 && percent <= 100);
2036           if (integer_zerop (val))
2037             percent = 100 - percent;
2038           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2039         }
2040       else
2041         predict_edge (then_edge, predictor,
2042                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
2043     }
2044   /* Try "pointer heuristic."
2045      A comparison ptr == 0 is predicted as false.
2046      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2047   if (POINTER_TYPE_P (type))
2048     {
2049       if (cmp == EQ_EXPR)
2050         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2051       else if (cmp == NE_EXPR)
2052         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2053     }
2054   else
2055
2056   /* Try "opcode heuristic."
2057      EQ tests are usually false and NE tests are usually true. Also,
2058      most quantities are positive, so we can make the appropriate guesses
2059      about signed comparisons against zero.  */
2060     switch (cmp)
2061       {
2062       case EQ_EXPR:
2063       case UNEQ_EXPR:
2064         /* Floating point comparisons appears to behave in a very
2065            unpredictable way because of special role of = tests in
2066            FP code.  */
2067         if (FLOAT_TYPE_P (type))
2068           ;
2069         /* Comparisons with 0 are often used for booleans and there is
2070            nothing useful to predict about them.  */
2071         else if (integer_zerop (op0) || integer_zerop (op1))
2072           ;
2073         else
2074           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2075         break;
2076
2077       case NE_EXPR:
2078       case LTGT_EXPR:
2079         /* Floating point comparisons appears to behave in a very
2080            unpredictable way because of special role of = tests in
2081            FP code.  */
2082         if (FLOAT_TYPE_P (type))
2083           ;
2084         /* Comparisons with 0 are often used for booleans and there is
2085            nothing useful to predict about them.  */
2086         else if (integer_zerop (op0)
2087                  || integer_zerop (op1))
2088           ;
2089         else
2090           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2091         break;
2092
2093       case ORDERED_EXPR:
2094         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2095         break;
2096
2097       case UNORDERED_EXPR:
2098         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2099         break;
2100
2101       case LE_EXPR:
2102       case LT_EXPR:
2103         if (integer_zerop (op1)
2104             || integer_onep (op1)
2105             || integer_all_onesp (op1)
2106             || real_zerop (op1)
2107             || real_onep (op1)
2108             || real_minus_onep (op1))
2109           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2110         break;
2111
2112       case GE_EXPR:
2113       case GT_EXPR:
2114         if (integer_zerop (op1)
2115             || integer_onep (op1)
2116             || integer_all_onesp (op1)
2117             || real_zerop (op1)
2118             || real_onep (op1)
2119             || real_minus_onep (op1))
2120           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2121         break;
2122
2123       default:
2124         break;
2125       }
2126 }
2127
2128 /* Try to guess whether the value of return means error code.  */
2129
2130 static enum br_predictor
2131 return_prediction (tree val, enum prediction *prediction)
2132 {
2133   /* VOID.  */
2134   if (!val)
2135     return PRED_NO_PREDICTION;
2136   /* Different heuristics for pointers and scalars.  */
2137   if (POINTER_TYPE_P (TREE_TYPE (val)))
2138     {
2139       /* NULL is usually not returned.  */
2140       if (integer_zerop (val))
2141         {
2142           *prediction = NOT_TAKEN;
2143           return PRED_NULL_RETURN;
2144         }
2145     }
2146   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2147     {
2148       /* Negative return values are often used to indicate
2149          errors.  */
2150       if (TREE_CODE (val) == INTEGER_CST
2151           && tree_int_cst_sgn (val) < 0)
2152         {
2153           *prediction = NOT_TAKEN;
2154           return PRED_NEGATIVE_RETURN;
2155         }
2156       /* Constant return values seems to be commonly taken.
2157          Zero/one often represent booleans so exclude them from the
2158          heuristics.  */
2159       if (TREE_CONSTANT (val)
2160           && (!integer_zerop (val) && !integer_onep (val)))
2161         {
2162           *prediction = TAKEN;
2163           return PRED_CONST_RETURN;
2164         }
2165     }
2166   return PRED_NO_PREDICTION;
2167 }
2168
2169 /* Find the basic block with return expression and look up for possible
2170    return value trying to apply RETURN_PREDICTION heuristics.  */
2171 static void
2172 apply_return_prediction (void)
2173 {
2174   greturn *return_stmt = NULL;
2175   tree return_val;
2176   edge e;
2177   gphi *phi;
2178   int phi_num_args, i;
2179   enum br_predictor pred;
2180   enum prediction direction;
2181   edge_iterator ei;
2182
2183   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2184     {
2185       gimple *last = last_stmt (e->src);
2186       if (last
2187           && gimple_code (last) == GIMPLE_RETURN)
2188         {
2189           return_stmt = as_a <greturn *> (last);
2190           break;
2191         }
2192     }
2193   if (!e)
2194     return;
2195   return_val = gimple_return_retval (return_stmt);
2196   if (!return_val)
2197     return;
2198   if (TREE_CODE (return_val) != SSA_NAME
2199       || !SSA_NAME_DEF_STMT (return_val)
2200       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2201     return;
2202   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2203   phi_num_args = gimple_phi_num_args (phi);
2204   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2205
2206   /* Avoid the degenerate case where all return values form the function
2207      belongs to same category (ie they are all positive constants)
2208      so we can hardly say something about them.  */
2209   for (i = 1; i < phi_num_args; i++)
2210     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2211       break;
2212   if (i != phi_num_args)
2213     for (i = 0; i < phi_num_args; i++)
2214       {
2215         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2216         if (pred != PRED_NO_PREDICTION)
2217           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2218                                          direction);
2219       }
2220 }
2221
2222 /* Look for basic block that contains unlikely to happen events
2223    (such as noreturn calls) and mark all paths leading to execution
2224    of this basic blocks as unlikely.  */
2225
2226 static void
2227 tree_bb_level_predictions (void)
2228 {
2229   basic_block bb;
2230   bool has_return_edges = false;
2231   edge e;
2232   edge_iterator ei;
2233
2234   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2235     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2236       {
2237         has_return_edges = true;
2238         break;
2239       }
2240
2241   apply_return_prediction ();
2242
2243   FOR_EACH_BB_FN (bb, cfun)
2244     {
2245       gimple_stmt_iterator gsi;
2246
2247       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2248         {
2249           gimple *stmt = gsi_stmt (gsi);
2250           tree decl;
2251
2252           if (is_gimple_call (stmt))
2253             {
2254               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2255                   && has_return_edges)
2256                 predict_paths_leading_to (bb, PRED_NORETURN,
2257                                           NOT_TAKEN);
2258               decl = gimple_call_fndecl (stmt);
2259               if (decl
2260                   && lookup_attribute ("cold",
2261                                        DECL_ATTRIBUTES (decl)))
2262                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2263                                           NOT_TAKEN);
2264             }
2265           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2266             {
2267               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2268                                         gimple_predict_outcome (stmt));
2269               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2270                  hints to callers.  */
2271             }
2272         }
2273     }
2274 }
2275
2276 /* Callback for hash_map::traverse, asserts that the pointer map is
2277    empty.  */
2278
2279 bool
2280 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2281                  void *)
2282 {
2283   gcc_assert (!value);
2284   return false;
2285 }
2286
2287 /* Predict branch probabilities and estimate profile for basic block BB.  */
2288
2289 static void
2290 tree_estimate_probability_bb (basic_block bb)
2291 {
2292   edge e;
2293   edge_iterator ei;
2294   gimple *last;
2295
2296   FOR_EACH_EDGE (e, ei, bb->succs)
2297     {
2298       /* Predict edges to user labels with attributes.  */
2299       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2300         {
2301           gimple_stmt_iterator gi;
2302           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2303             {
2304               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2305               tree decl;
2306
2307               if (!label_stmt)
2308                 break;
2309               decl = gimple_label_label (label_stmt);
2310               if (DECL_ARTIFICIAL (decl))
2311                 continue;
2312
2313               /* Finally, we have a user-defined label.  */
2314               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2315                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2316               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2317                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2318             }
2319         }
2320
2321       /* Predict early returns to be probable, as we've already taken
2322          care for error returns and other cases are often used for
2323          fast paths through function.
2324
2325          Since we've already removed the return statements, we are
2326          looking for CFG like:
2327
2328          if (conditional)
2329          {
2330          ..
2331          goto return_block
2332          }
2333          some other blocks
2334          return_block:
2335          return_stmt.  */
2336       if (e->dest != bb->next_bb
2337           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2338           && single_succ_p (e->dest)
2339           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2340           && (last = last_stmt (e->dest)) != NULL
2341           && gimple_code (last) == GIMPLE_RETURN)
2342         {
2343           edge e1;
2344           edge_iterator ei1;
2345
2346           if (single_succ_p (bb))
2347             {
2348               FOR_EACH_EDGE (e1, ei1, bb->preds)
2349                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2350                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2351                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2352                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2353             }
2354           else
2355             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2356                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2357                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2358               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2359         }
2360
2361       /* Look for block we are guarding (ie we dominate it,
2362          but it doesn't postdominate us).  */
2363       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2364           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2365           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2366         {
2367           gimple_stmt_iterator bi;
2368
2369           /* The call heuristic claims that a guarded function call
2370              is improbable.  This is because such calls are often used
2371              to signal exceptional situations such as printing error
2372              messages.  */
2373           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2374                gsi_next (&bi))
2375             {
2376               gimple *stmt = gsi_stmt (bi);
2377               if (is_gimple_call (stmt)
2378                   /* Constant and pure calls are hardly used to signalize
2379                      something exceptional.  */
2380                   && gimple_has_side_effects (stmt))
2381                 {
2382                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2383                   break;
2384                 }
2385             }
2386         }
2387     }
2388   tree_predict_by_opcode (bb);
2389 }
2390
2391 /* Predict branch probabilities and estimate profile of the tree CFG.
2392    This function can be called from the loop optimizers to recompute
2393    the profile information.
2394    If DRY_RUN is set, do not modify CFG and only produce dump files.  */
2395
2396 void
2397 tree_estimate_probability (bool dry_run)
2398 {
2399   basic_block bb;
2400
2401   add_noreturn_fake_exit_edges ();
2402   connect_infinite_loops_to_exit ();
2403   /* We use loop_niter_by_eval, which requires that the loops have
2404      preheaders.  */
2405   create_preheaders (CP_SIMPLE_PREHEADERS);
2406   calculate_dominance_info (CDI_POST_DOMINATORS);
2407
2408   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2409   tree_bb_level_predictions ();
2410   record_loop_exits ();
2411
2412   if (number_of_loops (cfun) > 1)
2413     predict_loops ();
2414
2415   FOR_EACH_BB_FN (bb, cfun)
2416     tree_estimate_probability_bb (bb);
2417
2418   FOR_EACH_BB_FN (bb, cfun)
2419     combine_predictions_for_bb (bb, dry_run);
2420
2421   if (flag_checking)
2422     bb_predictions->traverse<void *, assert_is_empty> (NULL);
2423
2424   delete bb_predictions;
2425   bb_predictions = NULL;
2426
2427   if (!dry_run)
2428     estimate_bb_frequencies (false);
2429   free_dominance_info (CDI_POST_DOMINATORS);
2430   remove_fake_exit_edges ();
2431 }
2432 \f
2433 /* Predict edges to successors of CUR whose sources are not postdominated by
2434    BB by PRED and recurse to all postdominators.  */
2435
2436 static void
2437 predict_paths_for_bb (basic_block cur, basic_block bb,
2438                       enum br_predictor pred,
2439                       enum prediction taken,
2440                       bitmap visited)
2441 {
2442   edge e;
2443   edge_iterator ei;
2444   basic_block son;
2445
2446   /* We are looking for all edges forming edge cut induced by
2447      set of all blocks postdominated by BB.  */
2448   FOR_EACH_EDGE (e, ei, cur->preds)
2449     if (e->src->index >= NUM_FIXED_BLOCKS
2450         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2451     {
2452       edge e2;
2453       edge_iterator ei2;
2454       bool found = false;
2455
2456       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2457       if (e->flags & (EDGE_EH | EDGE_FAKE))
2458         continue;
2459       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2460
2461       /* See if there is an edge from e->src that is not abnormal
2462          and does not lead to BB.  */
2463       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2464         if (e2 != e
2465             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2466             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2467           {
2468             found = true;
2469             break;
2470           }
2471
2472       /* If there is non-abnormal path leaving e->src, predict edge
2473          using predictor.  Otherwise we need to look for paths
2474          leading to e->src.
2475
2476          The second may lead to infinite loop in the case we are predicitng
2477          regions that are only reachable by abnormal edges.  We simply
2478          prevent visiting given BB twice.  */
2479       if (found)
2480         {
2481           if (!edge_predicted_by_p (e, pred, taken))
2482             predict_edge_def (e, pred, taken);
2483         }
2484       else if (bitmap_set_bit (visited, e->src->index))
2485         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2486     }
2487   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2488        son;
2489        son = next_dom_son (CDI_POST_DOMINATORS, son))
2490     predict_paths_for_bb (son, bb, pred, taken, visited);
2491 }
2492
2493 /* Sets branch probabilities according to PREDiction and
2494    FLAGS.  */
2495
2496 static void
2497 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2498                           enum prediction taken)
2499 {
2500   bitmap visited = BITMAP_ALLOC (NULL);
2501   predict_paths_for_bb (bb, bb, pred, taken, visited);
2502   BITMAP_FREE (visited);
2503 }
2504
2505 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2506
2507 static void
2508 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2509                                enum prediction taken)
2510 {
2511   bool has_nonloop_edge = false;
2512   edge_iterator ei;
2513   edge e2;
2514
2515   basic_block bb = e->src;
2516   FOR_EACH_EDGE (e2, ei, bb->succs)
2517     if (e2->dest != e->src && e2->dest != e->dest
2518         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2519         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2520       {
2521         has_nonloop_edge = true;
2522         break;
2523       }
2524   if (!has_nonloop_edge)
2525     {
2526       bitmap visited = BITMAP_ALLOC (NULL);
2527       predict_paths_for_bb (bb, bb, pred, taken, visited);
2528       BITMAP_FREE (visited);
2529     }
2530   else
2531     predict_edge_def (e, pred, taken);
2532 }
2533 \f
2534 /* This is used to carry information about basic blocks.  It is
2535    attached to the AUX field of the standard CFG block.  */
2536
2537 struct block_info
2538 {
2539   /* Estimated frequency of execution of basic_block.  */
2540   sreal frequency;
2541
2542   /* To keep queue of basic blocks to process.  */
2543   basic_block next;
2544
2545   /* Number of predecessors we need to visit first.  */
2546   int npredecessors;
2547 };
2548
2549 /* Similar information for edges.  */
2550 struct edge_prob_info
2551 {
2552   /* In case edge is a loopback edge, the probability edge will be reached
2553      in case header is.  Estimated number of iterations of the loop can be
2554      then computed as 1 / (1 - back_edge_prob).  */
2555   sreal back_edge_prob;
2556   /* True if the edge is a loopback edge in the natural loop.  */
2557   unsigned int back_edge:1;
2558 };
2559
2560 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2561 #undef EDGE_INFO
2562 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2563
2564 /* Helper function for estimate_bb_frequencies.
2565    Propagate the frequencies in blocks marked in
2566    TOVISIT, starting in HEAD.  */
2567
2568 static void
2569 propagate_freq (basic_block head, bitmap tovisit)
2570 {
2571   basic_block bb;
2572   basic_block last;
2573   unsigned i;
2574   edge e;
2575   basic_block nextbb;
2576   bitmap_iterator bi;
2577
2578   /* For each basic block we need to visit count number of his predecessors
2579      we need to visit first.  */
2580   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2581     {
2582       edge_iterator ei;
2583       int count = 0;
2584
2585       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2586
2587       FOR_EACH_EDGE (e, ei, bb->preds)
2588         {
2589           bool visit = bitmap_bit_p (tovisit, e->src->index);
2590
2591           if (visit && !(e->flags & EDGE_DFS_BACK))
2592             count++;
2593           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2594             fprintf (dump_file,
2595                      "Irreducible region hit, ignoring edge to %i->%i\n",
2596                      e->src->index, bb->index);
2597         }
2598       BLOCK_INFO (bb)->npredecessors = count;
2599       /* When function never returns, we will never process exit block.  */
2600       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2601         bb->count = bb->frequency = 0;
2602     }
2603
2604   BLOCK_INFO (head)->frequency = 1;
2605   last = head;
2606   for (bb = head; bb; bb = nextbb)
2607     {
2608       edge_iterator ei;
2609       sreal cyclic_probability = 0;
2610       sreal frequency = 0;
2611
2612       nextbb = BLOCK_INFO (bb)->next;
2613       BLOCK_INFO (bb)->next = NULL;
2614
2615       /* Compute frequency of basic block.  */
2616       if (bb != head)
2617         {
2618           if (flag_checking)
2619             FOR_EACH_EDGE (e, ei, bb->preds)
2620               gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2621                           || (e->flags & EDGE_DFS_BACK));
2622
2623           FOR_EACH_EDGE (e, ei, bb->preds)
2624             if (EDGE_INFO (e)->back_edge)
2625               {
2626                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2627               }
2628             else if (!(e->flags & EDGE_DFS_BACK))
2629               {
2630                 /*  frequency += (e->probability
2631                                   * BLOCK_INFO (e->src)->frequency /
2632                                   REG_BR_PROB_BASE);  */
2633
2634                 sreal tmp = e->probability;
2635                 tmp *= BLOCK_INFO (e->src)->frequency;
2636                 tmp *= real_inv_br_prob_base;
2637                 frequency += tmp;
2638               }
2639
2640           if (cyclic_probability == 0)
2641             {
2642               BLOCK_INFO (bb)->frequency = frequency;
2643             }
2644           else
2645             {
2646               if (cyclic_probability > real_almost_one)
2647                 cyclic_probability = real_almost_one;
2648
2649               /* BLOCK_INFO (bb)->frequency = frequency
2650                                               / (1 - cyclic_probability) */
2651
2652               cyclic_probability = sreal (1) - cyclic_probability;
2653               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2654             }
2655         }
2656
2657       bitmap_clear_bit (tovisit, bb->index);
2658
2659       e = find_edge (bb, head);
2660       if (e)
2661         {
2662           /* EDGE_INFO (e)->back_edge_prob
2663              = ((e->probability * BLOCK_INFO (bb)->frequency)
2664              / REG_BR_PROB_BASE); */
2665
2666           sreal tmp = e->probability;
2667           tmp *= BLOCK_INFO (bb)->frequency;
2668           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2669         }
2670
2671       /* Propagate to successor blocks.  */
2672       FOR_EACH_EDGE (e, ei, bb->succs)
2673         if (!(e->flags & EDGE_DFS_BACK)
2674             && BLOCK_INFO (e->dest)->npredecessors)
2675           {
2676             BLOCK_INFO (e->dest)->npredecessors--;
2677             if (!BLOCK_INFO (e->dest)->npredecessors)
2678               {
2679                 if (!nextbb)
2680                   nextbb = e->dest;
2681                 else
2682                   BLOCK_INFO (last)->next = e->dest;
2683
2684                 last = e->dest;
2685               }
2686           }
2687     }
2688 }
2689
2690 /* Estimate frequencies in loops at same nest level.  */
2691
2692 static void
2693 estimate_loops_at_level (struct loop *first_loop)
2694 {
2695   struct loop *loop;
2696
2697   for (loop = first_loop; loop; loop = loop->next)
2698     {
2699       edge e;
2700       basic_block *bbs;
2701       unsigned i;
2702       bitmap tovisit = BITMAP_ALLOC (NULL);
2703
2704       estimate_loops_at_level (loop->inner);
2705
2706       /* Find current loop back edge and mark it.  */
2707       e = loop_latch_edge (loop);
2708       EDGE_INFO (e)->back_edge = 1;
2709
2710       bbs = get_loop_body (loop);
2711       for (i = 0; i < loop->num_nodes; i++)
2712         bitmap_set_bit (tovisit, bbs[i]->index);
2713       free (bbs);
2714       propagate_freq (loop->header, tovisit);
2715       BITMAP_FREE (tovisit);
2716     }
2717 }
2718
2719 /* Propagates frequencies through structure of loops.  */
2720
2721 static void
2722 estimate_loops (void)
2723 {
2724   bitmap tovisit = BITMAP_ALLOC (NULL);
2725   basic_block bb;
2726
2727   /* Start by estimating the frequencies in the loops.  */
2728   if (number_of_loops (cfun) > 1)
2729     estimate_loops_at_level (current_loops->tree_root->inner);
2730
2731   /* Now propagate the frequencies through all the blocks.  */
2732   FOR_ALL_BB_FN (bb, cfun)
2733     {
2734       bitmap_set_bit (tovisit, bb->index);
2735     }
2736   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2737   BITMAP_FREE (tovisit);
2738 }
2739
2740 /* Drop the profile for NODE to guessed, and update its frequency based on
2741    whether it is expected to be hot given the CALL_COUNT.  */
2742
2743 static void
2744 drop_profile (struct cgraph_node *node, gcov_type call_count)
2745 {
2746   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2747   /* In the case where this was called by another function with a
2748      dropped profile, call_count will be 0. Since there are no
2749      non-zero call counts to this function, we don't know for sure
2750      whether it is hot, and therefore it will be marked normal below.  */
2751   bool hot = maybe_hot_count_p (NULL, call_count);
2752
2753   if (dump_file)
2754     fprintf (dump_file,
2755              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2756              node->name (), node->order,
2757              hot ? "Function is hot" : "Function is normal");
2758   /* We only expect to miss profiles for functions that are reached
2759      via non-zero call edges in cases where the function may have
2760      been linked from another module or library (COMDATs and extern
2761      templates). See the comments below for handle_missing_profiles.
2762      Also, only warn in cases where the missing counts exceed the
2763      number of training runs. In certain cases with an execv followed
2764      by a no-return call the profile for the no-return call is not
2765      dumped and there can be a mismatch.  */
2766   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2767       && call_count > profile_info->runs)
2768     {
2769       if (flag_profile_correction)
2770         {
2771           if (dump_file)
2772             fprintf (dump_file,
2773                      "Missing counts for called function %s/%i\n",
2774                      node->name (), node->order);
2775         }
2776       else
2777         warning (0, "Missing counts for called function %s/%i",
2778                  node->name (), node->order);
2779     }
2780
2781   profile_status_for_fn (fn)
2782       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2783   node->frequency
2784       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2785 }
2786
2787 /* In the case of COMDAT routines, multiple object files will contain the same
2788    function and the linker will select one for the binary. In that case
2789    all the other copies from the profile instrument binary will be missing
2790    profile counts. Look for cases where this happened, due to non-zero
2791    call counts going to 0-count functions, and drop the profile to guessed
2792    so that we can use the estimated probabilities and avoid optimizing only
2793    for size.
2794
2795    The other case where the profile may be missing is when the routine
2796    is not going to be emitted to the object file, e.g. for "extern template"
2797    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2798    all other cases of non-zero calls to 0-count functions.  */
2799
2800 void
2801 handle_missing_profiles (void)
2802 {
2803   struct cgraph_node *node;
2804   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2805   vec<struct cgraph_node *> worklist;
2806   worklist.create (64);
2807
2808   /* See if 0 count function has non-0 count callers.  In this case we
2809      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2810   FOR_EACH_DEFINED_FUNCTION (node)
2811     {
2812       struct cgraph_edge *e;
2813       gcov_type call_count = 0;
2814       gcov_type max_tp_first_run = 0;
2815       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2816
2817       if (node->count)
2818         continue;
2819       for (e = node->callers; e; e = e->next_caller)
2820       {
2821         call_count += e->count;
2822
2823         if (e->caller->tp_first_run > max_tp_first_run)
2824           max_tp_first_run = e->caller->tp_first_run;
2825       }
2826
2827       /* If time profile is missing, let assign the maximum that comes from
2828          caller functions.  */
2829       if (!node->tp_first_run && max_tp_first_run)
2830         node->tp_first_run = max_tp_first_run + 1;
2831
2832       if (call_count
2833           && fn && fn->cfg
2834           && (call_count * unlikely_count_fraction >= profile_info->runs))
2835         {
2836           drop_profile (node, call_count);
2837           worklist.safe_push (node);
2838         }
2839     }
2840
2841   /* Propagate the profile dropping to other 0-count COMDATs that are
2842      potentially called by COMDATs we already dropped the profile on.  */
2843   while (worklist.length () > 0)
2844     {
2845       struct cgraph_edge *e;
2846
2847       node = worklist.pop ();
2848       for (e = node->callees; e; e = e->next_caller)
2849         {
2850           struct cgraph_node *callee = e->callee;
2851           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2852
2853           if (callee->count > 0)
2854             continue;
2855           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2856               && profile_status_for_fn (fn) == PROFILE_READ)
2857             {
2858               drop_profile (node, 0);
2859               worklist.safe_push (callee);
2860             }
2861         }
2862     }
2863   worklist.release ();
2864 }
2865
2866 /* Convert counts measured by profile driven feedback to frequencies.
2867    Return nonzero iff there was any nonzero execution count.  */
2868
2869 int
2870 counts_to_freqs (void)
2871 {
2872   gcov_type count_max, true_count_max = 0;
2873   basic_block bb;
2874
2875   /* Don't overwrite the estimated frequencies when the profile for
2876      the function is missing.  We may drop this function PROFILE_GUESSED
2877      later in drop_profile ().  */
2878   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2879     return 0;
2880
2881   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2882     true_count_max = MAX (bb->count, true_count_max);
2883
2884   count_max = MAX (true_count_max, 1);
2885   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2886     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2887
2888   return true_count_max;
2889 }
2890
2891 /* Return true if function is likely to be expensive, so there is no point to
2892    optimize performance of prologue, epilogue or do inlining at the expense
2893    of code size growth.  THRESHOLD is the limit of number of instructions
2894    function can execute at average to be still considered not expensive.  */
2895
2896 bool
2897 expensive_function_p (int threshold)
2898 {
2899   unsigned int sum = 0;
2900   basic_block bb;
2901   unsigned int limit;
2902
2903   /* We can not compute accurately for large thresholds due to scaled
2904      frequencies.  */
2905   gcc_assert (threshold <= BB_FREQ_MAX);
2906
2907   /* Frequencies are out of range.  This either means that function contains
2908      internal loop executing more than BB_FREQ_MAX times or profile feedback
2909      is available and function has not been executed at all.  */
2910   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2911     return true;
2912
2913   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2914   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2915   FOR_EACH_BB_FN (bb, cfun)
2916     {
2917       rtx_insn *insn;
2918
2919       FOR_BB_INSNS (bb, insn)
2920         if (active_insn_p (insn))
2921           {
2922             sum += bb->frequency;
2923             if (sum > limit)
2924               return true;
2925         }
2926     }
2927
2928   return false;
2929 }
2930
2931 /* Estimate and propagate basic block frequencies using the given branch
2932    probabilities.  If FORCE is true, the frequencies are used to estimate
2933    the counts even when there are already non-zero profile counts.  */
2934
2935 void
2936 estimate_bb_frequencies (bool force)
2937 {
2938   basic_block bb;
2939   sreal freq_max;
2940
2941   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2942     {
2943       static int real_values_initialized = 0;
2944
2945       if (!real_values_initialized)
2946         {
2947           real_values_initialized = 1;
2948           real_br_prob_base = REG_BR_PROB_BASE;
2949           real_bb_freq_max = BB_FREQ_MAX;
2950           real_one_half = sreal (1, -1);
2951           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2952           real_almost_one = sreal (1) - real_inv_br_prob_base;
2953         }
2954
2955       mark_dfs_back_edges ();
2956
2957       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2958          REG_BR_PROB_BASE;
2959
2960       /* Set up block info for each basic block.  */
2961       alloc_aux_for_blocks (sizeof (block_info));
2962       alloc_aux_for_edges (sizeof (edge_prob_info));
2963       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2964         {
2965           edge e;
2966           edge_iterator ei;
2967
2968           FOR_EACH_EDGE (e, ei, bb->succs)
2969             {
2970               EDGE_INFO (e)->back_edge_prob = e->probability;
2971               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2972             }
2973         }
2974
2975       /* First compute frequencies locally for each loop from innermost
2976          to outermost to examine frequencies for back edges.  */
2977       estimate_loops ();
2978
2979       freq_max = 0;
2980       FOR_EACH_BB_FN (bb, cfun)
2981         if (freq_max < BLOCK_INFO (bb)->frequency)
2982           freq_max = BLOCK_INFO (bb)->frequency;
2983
2984       freq_max = real_bb_freq_max / freq_max;
2985       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2986         {
2987           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2988           bb->frequency = tmp.to_int ();
2989         }
2990
2991       free_aux_for_blocks ();
2992       free_aux_for_edges ();
2993     }
2994   compute_function_frequency ();
2995 }
2996
2997 /* Decide whether function is hot, cold or unlikely executed.  */
2998 void
2999 compute_function_frequency (void)
3000 {
3001   basic_block bb;
3002   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3003
3004   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3005       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3006     node->only_called_at_startup = true;
3007   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3008     node->only_called_at_exit = true;
3009
3010   if (profile_status_for_fn (cfun) != PROFILE_READ)
3011     {
3012       int flags = flags_from_decl_or_type (current_function_decl);
3013       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3014           != NULL)
3015         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3016       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3017                != NULL)
3018         node->frequency = NODE_FREQUENCY_HOT;
3019       else if (flags & ECF_NORETURN)
3020         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3021       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3022         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3023       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3024                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3025         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3026       return;
3027     }
3028
3029   /* Only first time try to drop function into unlikely executed.
3030      After inlining the roundoff errors may confuse us.
3031      Ipa-profile pass will drop functions only called from unlikely
3032      functions to unlikely and that is most of what we care about.  */
3033   if (!cfun->after_inlining)
3034     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3035   FOR_EACH_BB_FN (bb, cfun)
3036     {
3037       if (maybe_hot_bb_p (cfun, bb))
3038         {
3039           node->frequency = NODE_FREQUENCY_HOT;
3040           return;
3041         }
3042       if (!probably_never_executed_bb_p (cfun, bb))
3043         node->frequency = NODE_FREQUENCY_NORMAL;
3044     }
3045 }
3046
3047 /* Build PREDICT_EXPR.  */
3048 tree
3049 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3050 {
3051   tree t = build1 (PREDICT_EXPR, void_type_node,
3052                    build_int_cst (integer_type_node, predictor));
3053   SET_PREDICT_EXPR_OUTCOME (t, taken);
3054   return t;
3055 }
3056
3057 const char *
3058 predictor_name (enum br_predictor predictor)
3059 {
3060   return predictor_info[predictor].name;
3061 }
3062
3063 /* Predict branch probabilities and estimate profile of the tree CFG. */
3064
3065 namespace {
3066
3067 const pass_data pass_data_profile =
3068 {
3069   GIMPLE_PASS, /* type */
3070   "profile_estimate", /* name */
3071   OPTGROUP_NONE, /* optinfo_flags */
3072   TV_BRANCH_PROB, /* tv_id */
3073   PROP_cfg, /* properties_required */
3074   0, /* properties_provided */
3075   0, /* properties_destroyed */
3076   0, /* todo_flags_start */
3077   0, /* todo_flags_finish */
3078 };
3079
3080 class pass_profile : public gimple_opt_pass
3081 {
3082 public:
3083   pass_profile (gcc::context *ctxt)
3084     : gimple_opt_pass (pass_data_profile, ctxt)
3085   {}
3086
3087   /* opt_pass methods: */
3088   virtual bool gate (function *) { return flag_guess_branch_prob; }
3089   virtual unsigned int execute (function *);
3090
3091 }; // class pass_profile
3092
3093 unsigned int
3094 pass_profile::execute (function *fun)
3095 {
3096   unsigned nb_loops;
3097
3098   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3099     return 0;
3100
3101   loop_optimizer_init (LOOPS_NORMAL);
3102   if (dump_file && (dump_flags & TDF_DETAILS))
3103     flow_loops_dump (dump_file, NULL, 0);
3104
3105   mark_irreducible_loops ();
3106
3107   nb_loops = number_of_loops (fun);
3108   if (nb_loops > 1)
3109     scev_initialize ();
3110
3111   tree_estimate_probability (false);
3112
3113   if (nb_loops > 1)
3114     scev_finalize ();
3115
3116   loop_optimizer_finalize ();
3117   if (dump_file && (dump_flags & TDF_DETAILS))
3118     gimple_dump_cfg (dump_file, dump_flags);
3119  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3120     profile_status_for_fn (fun) = PROFILE_GUESSED;
3121   return 0;
3122 }
3123
3124 } // anon namespace
3125
3126 gimple_opt_pass *
3127 make_pass_profile (gcc::context *ctxt)
3128 {
3129   return new pass_profile (ctxt);
3130 }
3131
3132 namespace {
3133
3134 const pass_data pass_data_strip_predict_hints =
3135 {
3136   GIMPLE_PASS, /* type */
3137   "*strip_predict_hints", /* name */
3138   OPTGROUP_NONE, /* optinfo_flags */
3139   TV_BRANCH_PROB, /* tv_id */
3140   PROP_cfg, /* properties_required */
3141   0, /* properties_provided */
3142   0, /* properties_destroyed */
3143   0, /* todo_flags_start */
3144   0, /* todo_flags_finish */
3145 };
3146
3147 class pass_strip_predict_hints : public gimple_opt_pass
3148 {
3149 public:
3150   pass_strip_predict_hints (gcc::context *ctxt)
3151     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3152   {}
3153
3154   /* opt_pass methods: */
3155   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3156   virtual unsigned int execute (function *);
3157
3158 }; // class pass_strip_predict_hints
3159
3160 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3161    we no longer need.  */
3162 unsigned int
3163 pass_strip_predict_hints::execute (function *fun)
3164 {
3165   basic_block bb;
3166   gimple *ass_stmt;
3167   tree var;
3168
3169   FOR_EACH_BB_FN (bb, fun)
3170     {
3171       gimple_stmt_iterator bi;
3172       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3173         {
3174           gimple *stmt = gsi_stmt (bi);
3175
3176           if (gimple_code (stmt) == GIMPLE_PREDICT)
3177             {
3178               gsi_remove (&bi, true);
3179               continue;
3180             }
3181           else if (is_gimple_call (stmt))
3182             {
3183               tree fndecl = gimple_call_fndecl (stmt);
3184
3185               if ((fndecl
3186                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3187                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3188                    && gimple_call_num_args (stmt) == 2)
3189                   || (gimple_call_internal_p (stmt)
3190                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3191                 {
3192                   var = gimple_call_lhs (stmt);
3193                   if (var)
3194                     {
3195                       ass_stmt
3196                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3197                       gsi_replace (&bi, ass_stmt, true);
3198                     }
3199                   else
3200                     {
3201                       gsi_remove (&bi, true);
3202                       continue;
3203                     }
3204                 }
3205             }
3206           gsi_next (&bi);
3207         }
3208     }
3209   return 0;
3210 }
3211
3212 } // anon namespace
3213
3214 gimple_opt_pass *
3215 make_pass_strip_predict_hints (gcc::context *ctxt)
3216 {
3217   return new pass_strip_predict_hints (ctxt);
3218 }
3219
3220 /* Rebuild function frequencies.  Passes are in general expected to
3221    maintain profile by hand, however in some cases this is not possible:
3222    for example when inlining several functions with loops freuqencies might run
3223    out of scale and thus needs to be recomputed.  */
3224
3225 void
3226 rebuild_frequencies (void)
3227 {
3228   timevar_push (TV_REBUILD_FREQUENCIES);
3229
3230   /* When the max bb count in the function is small, there is a higher
3231      chance that there were truncation errors in the integer scaling
3232      of counts by inlining and other optimizations. This could lead
3233      to incorrect classification of code as being cold when it isn't.
3234      In that case, force the estimation of bb counts/frequencies from the
3235      branch probabilities, rather than computing frequencies from counts,
3236      which may also lead to frequencies incorrectly reduced to 0. There
3237      is less precision in the probabilities, so we only do this for small
3238      max counts.  */
3239   gcov_type count_max = 0;
3240   basic_block bb;
3241   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3242     count_max = MAX (bb->count, count_max);
3243
3244   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3245       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3246           && count_max < REG_BR_PROB_BASE/10))
3247     {
3248       loop_optimizer_init (0);
3249       add_noreturn_fake_exit_edges ();
3250       mark_irreducible_loops ();
3251       connect_infinite_loops_to_exit ();
3252       estimate_bb_frequencies (true);
3253       remove_fake_exit_edges ();
3254       loop_optimizer_finalize ();
3255     }
3256   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3257     counts_to_freqs ();
3258   else
3259     gcc_unreachable ();
3260   timevar_pop (TV_REBUILD_FREQUENCIES);
3261 }
3262
3263 /* Perform a dry run of the branch prediction pass and report comparsion of
3264    the predicted and real profile into the dump file.  */
3265
3266 void
3267 report_predictor_hitrates (void)
3268 {
3269   unsigned nb_loops;
3270
3271   loop_optimizer_init (LOOPS_NORMAL);
3272   if (dump_file && (dump_flags & TDF_DETAILS))
3273     flow_loops_dump (dump_file, NULL, 0);
3274
3275   mark_irreducible_loops ();
3276
3277   nb_loops = number_of_loops (cfun);
3278   if (nb_loops > 1)
3279     scev_initialize ();
3280
3281   tree_estimate_probability (true);
3282
3283   if (nb_loops > 1)
3284     scev_finalize ();
3285
3286   loop_optimizer_finalize ();
3287 }
3288
3289 /* Force edge E to be cold.
3290    If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
3291    keep low probability to represent possible error in a guess.  This is used
3292    i.e. in case we predict loop to likely iterate given number of times but
3293    we are not 100% sure.
3294
3295    This function locally updates profile without attempt to keep global
3296    consistency which can not be reached in full generality without full profile
3297    rebuild from probabilities alone.  Doing so is not necessarily a good idea
3298    because frequencies and counts may be more realistic then probabilities.
3299
3300    In some cases (such as for elimination of early exits during full loop
3301    unrolling) the caller can ensure that profile will get consistent
3302    afterwards.  */
3303
3304 void
3305 force_edge_cold (edge e, bool impossible)
3306 {
3307   gcov_type count_sum = 0;
3308   int prob_sum = 0;
3309   edge_iterator ei;
3310   edge e2;
3311   gcov_type old_count = e->count;
3312   int old_probability = e->probability;
3313   gcov_type gcov_scale = REG_BR_PROB_BASE;
3314   int prob_scale = REG_BR_PROB_BASE;
3315
3316   /* If edge is already improbably or cold, just return.  */
3317   if (e->probability <= impossible ? PROB_VERY_UNLIKELY : 0
3318       && (!impossible || !e->count))
3319     return;
3320   FOR_EACH_EDGE (e2, ei, e->src->succs)
3321     if (e2 != e)
3322       {
3323         count_sum += e2->count;
3324         prob_sum += e2->probability;
3325       }
3326
3327   /* If there are other edges out of e->src, redistribute probabilitity
3328      there.  */
3329   if (prob_sum)
3330     {
3331       e->probability
3332          = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
3333       if (old_probability)
3334         e->count = RDIV (e->count * e->probability, old_probability);
3335       else
3336         e->count = MIN (e->count, impossible ? 0 : 1);
3337
3338       if (count_sum)
3339         gcov_scale = RDIV ((count_sum + old_count - e->count) * REG_BR_PROB_BASE,
3340                            count_sum);
3341       prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
3342                          prob_sum);
3343       if (dump_file && (dump_flags & TDF_DETAILS))
3344         fprintf (dump_file, "Making edge %i->%i %s by redistributing "
3345                  "probability to other edges.\n",
3346                  e->src->index, e->dest->index,
3347                  impossible ? "imposisble" : "cold");
3348       FOR_EACH_EDGE (e2, ei, e->src->succs)
3349         if (e2 != e)
3350           {
3351             e2->count = RDIV (e2->count * gcov_scale, REG_BR_PROB_BASE);
3352             e2->probability = RDIV (e2->probability * prob_scale,
3353                                     REG_BR_PROB_BASE);
3354           }
3355     }
3356   /* If all edges out of e->src are unlikely, the basic block itself
3357      is unlikely.  */
3358   else
3359     {
3360       e->probability = REG_BR_PROB_BASE;
3361
3362       /* If we did not adjusting, the source basic block has no likely edeges
3363          leaving other direction. In that case force that bb cold, too.
3364          This in general is difficult task to do, but handle special case when
3365          BB has only one predecestor.  This is common case when we are updating
3366          after loop transforms.  */
3367       if (!prob_sum && !count_sum && single_pred_p (e->src)
3368           && e->src->frequency > (impossible ? 0 : 1))
3369         {
3370           int old_frequency = e->src->frequency;
3371           if (dump_file && (dump_flags & TDF_DETAILS))
3372             fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
3373                      impossible ? "imposisble" : "cold");
3374           e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
3375           e->src->count = e->count = RDIV (e->src->count * e->src->frequency,
3376                                            old_frequency);
3377           force_edge_cold (single_pred_edge (e->src), impossible);
3378         }
3379       else if (dump_file && (dump_flags & TDF_DETAILS)
3380                && maybe_hot_bb_p (cfun, e->src))
3381         fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
3382                  impossible ? "imposisble" : "cold");
3383     }
3384 }