gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "backend.h"
  34 #include "rtl.h"
  35 #include "tree.h"
  36 #include "gimple.h"
  37 #include "cfghooks.h"
  38 #include "tree-pass.h"
  39 #include "ssa.h"
  40 #include "emit-rtl.h"
  41 #include "cgraph.h"
  42 #include "coverage.h"
  43 #include "diagnostic-core.h"
  44 #include "gimple-predict.h"
  45 #include "fold-const.h"
  46 #include "calls.h"
  47 #include "cfganal.h"
  48 #include "profile.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "cfgloop.h"
  52 #include "gimple-iterator.h"
  53 #include "tree-cfg.h"
  54 #include "tree-ssa-loop-niter.h"
  55 #include "tree-ssa-loop.h"
  56 #include "tree-scalar-evolution.h"
  57
  58 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  59                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  60 static sreal real_almost_one, real_br_prob_base,
  61              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  62
  63 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  64 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  65 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  66 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  67 static bool can_predict_insn_p (const rtx_insn *);
  68
  69 /* Information we hold about each branch predictor.
  70    Filled using information from predict.def.  */
  71
  72 struct predictor_info
  73 {
  74   const char *const name;       /* Name used in the debugging dumps.  */
  75   const int hitrate;            /* Expected hitrate used by
  76                                    predict_insn_def call.  */
  77   const int flags;
  78 };
  79
  80 /* Use given predictor without Dempster-Shaffer theory if it matches
  81    using first_match heuristics.  */
  82 #define PRED_FLAG_FIRST_MATCH 1
  83
  84 /* Recompute hitrate in percent to our representation.  */
  85
  86 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  87
  88 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  89 static const struct predictor_info predictor_info[]= {
  90 #include "predict.def"
  91
  92   /* Upper bound on predictors.  */
  93   {NULL, 0, 0}
  94 };
  95 #undef DEF_PREDICTOR
  96
  97 /* Return TRUE if frequency FREQ is considered to be hot.  */
  98
  99 static inline bool
 100 maybe_hot_frequency_p (struct function *fun, int freq)
 101 {
 102   struct cgraph_node *node = cgraph_node::get (fun->decl);
 103   if (!profile_info
 104       || !opt_for_fn (fun->decl, flag_branch_probabilities))
 105     {
 106       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 107         return false;
 108       if (node->frequency == NODE_FREQUENCY_HOT)
 109         return true;
 110     }
 111   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 112     return true;
 113   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 114       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 115     return false;
 116   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 117     return false;
 118   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 119               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 120     return false;
 121   return true;
 122 }
 123
 124 static gcov_type min_count = -1;
 125
 126 /* Determine the threshold for hot BB counts.  */
 127
 128 gcov_type
 129 get_hot_bb_threshold ()
 130 {
 131   gcov_working_set_t *ws;
 132   if (min_count == -1)
 133     {
 134       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 135       gcc_assert (ws);
 136       min_count = ws->min_counter;
 137     }
 138   return min_count;
 139 }
 140
 141 /* Set the threshold for hot BB counts.  */
 142
 143 void
 144 set_hot_bb_threshold (gcov_type min)
 145 {
 146   min_count = min;
 147 }
 148
 149 /* Return TRUE if frequency FREQ is considered to be hot.  */
 150
 151 bool
 152 maybe_hot_count_p (struct function *fun, gcov_type count)
 153 {
 154   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 155     return true;
 156   /* Code executed at most once is not hot.  */
 157   if (profile_info->runs >= count)
 158     return false;
 159   return (count >= get_hot_bb_threshold ());
 160 }
 161
 162 /* Return true in case BB can be CPU intensive and should be optimized
 163    for maximal performance.  */
 164
 165 bool
 166 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 167 {
 168   gcc_checking_assert (fun);
 169   if (profile_status_for_fn (fun) == PROFILE_READ)
 170     return maybe_hot_count_p (fun, bb->count);
 171   return maybe_hot_frequency_p (fun, bb->frequency);
 172 }
 173
 174 /* Return true in case BB can be CPU intensive and should be optimized
 175    for maximal performance.  */
 176
 177 bool
 178 maybe_hot_edge_p (edge e)
 179 {
 180   if (profile_status_for_fn (cfun) == PROFILE_READ)
 181     return maybe_hot_count_p (cfun, e->count);
 182   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 183 }
 184
 185 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 186    node frequency reflects never being executed.  */
 187
 188 static bool
 189 probably_never_executed (struct function *fun,
 190                          gcov_type count, int frequency)
 191 {
 192   gcc_checking_assert (fun);
 193   if (profile_status_for_fn (fun) == PROFILE_READ)
 194     {
 195       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 196       if (count * unlikely_count_fraction >= profile_info->runs)
 197         return false;
 198       if (!frequency)
 199         return true;
 200       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
 201         return false;
 202       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
 203         {
 204           gcov_type computed_count;
 205           /* Check for possibility of overflow, in which case entry bb count
 206              is large enough to do the division first without losing much
 207              precision.  */
 208           if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
 209               REG_BR_PROB_BASE)
 210             {
 211               gcov_type scaled_count
 212                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
 213              unlikely_count_fraction;
 214               computed_count = RDIV (scaled_count,
 215                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 216             }
 217           else
 218             {
 219               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
 220                                      ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
 221               computed_count *= frequency * unlikely_count_fraction;
 222             }
 223           if (computed_count >= profile_info->runs)
 224             return false;
 225         }
 226       return true;
 227     }
 228   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
 229       && (cgraph_node::get (fun->decl)->frequency
 230           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 231     return true;
 232   return false;
 233 }
 234
 235
 236 /* Return true in case BB is probably never executed.  */
 237
 238 bool
 239 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 240 {
 241   return probably_never_executed (fun, bb->count, bb->frequency);
 242 }
 243
 244
 245 /* Return true in case edge E is probably never executed.  */
 246
 247 bool
 248 probably_never_executed_edge_p (struct function *fun, edge e)
 249 {
 250   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 251 }
 252
 253 /* Return true when current function should always be optimized for size.  */
 254
 255 bool
 256 optimize_function_for_size_p (struct function *fun)
 257 {
 258   if (!fun || !fun->decl)
 259     return optimize_size;
 260   cgraph_node *n = cgraph_node::get (fun->decl);
 261   return n && n->optimize_for_size_p ();
 262 }
 263
 264 /* Return true when current function should always be optimized for speed.  */
 265
 266 bool
 267 optimize_function_for_speed_p (struct function *fun)
 268 {
 269   return !optimize_function_for_size_p (fun);
 270 }
 271
 272 /* Return the optimization type that should be used for the function FUN.  */
 273
 274 optimization_type
 275 function_optimization_type (struct function *fun)
 276 {
 277   return (optimize_function_for_speed_p (fun)
 278           ? OPTIMIZE_FOR_SPEED
 279           : OPTIMIZE_FOR_SIZE);
 280 }
 281
 282 /* Return TRUE when BB should be optimized for size.  */
 283
 284 bool
 285 optimize_bb_for_size_p (const_basic_block bb)
 286 {
 287   return (optimize_function_for_size_p (cfun)
 288           || (bb && !maybe_hot_bb_p (cfun, bb)));
 289 }
 290
 291 /* Return TRUE when BB should be optimized for speed.  */
 292
 293 bool
 294 optimize_bb_for_speed_p (const_basic_block bb)
 295 {
 296   return !optimize_bb_for_size_p (bb);
 297 }
 298
 299 /* Return the optimization type that should be used for block BB.  */
 300
 301 optimization_type
 302 bb_optimization_type (const_basic_block bb)
 303 {
 304   return (optimize_bb_for_speed_p (bb)
 305           ? OPTIMIZE_FOR_SPEED
 306           : OPTIMIZE_FOR_SIZE);
 307 }
 308
 309 /* Return TRUE when BB should be optimized for size.  */
 310
 311 bool
 312 optimize_edge_for_size_p (edge e)
 313 {
 314   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 315 }
 316
 317 /* Return TRUE when BB should be optimized for speed.  */
 318
 319 bool
 320 optimize_edge_for_speed_p (edge e)
 321 {
 322   return !optimize_edge_for_size_p (e);
 323 }
 324
 325 /* Return TRUE when BB should be optimized for size.  */
 326
 327 bool
 328 optimize_insn_for_size_p (void)
 329 {
 330   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 331 }
 332
 333 /* Return TRUE when BB should be optimized for speed.  */
 334
 335 bool
 336 optimize_insn_for_speed_p (void)
 337 {
 338   return !optimize_insn_for_size_p ();
 339 }
 340
 341 /* Return TRUE when LOOP should be optimized for size.  */
 342
 343 bool
 344 optimize_loop_for_size_p (struct loop *loop)
 345 {
 346   return optimize_bb_for_size_p (loop->header);
 347 }
 348
 349 /* Return TRUE when LOOP should be optimized for speed.  */
 350
 351 bool
 352 optimize_loop_for_speed_p (struct loop *loop)
 353 {
 354   return optimize_bb_for_speed_p (loop->header);
 355 }
 356
 357 /* Return TRUE when LOOP nest should be optimized for speed.  */
 358
 359 bool
 360 optimize_loop_nest_for_speed_p (struct loop *loop)
 361 {
 362   struct loop *l = loop;
 363   if (optimize_loop_for_speed_p (loop))
 364     return true;
 365   l = loop->inner;
 366   while (l && l != loop)
 367     {
 368       if (optimize_loop_for_speed_p (l))
 369         return true;
 370       if (l->inner)
 371         l = l->inner;
 372       else if (l->next)
 373         l = l->next;
 374       else
 375         {
 376           while (l != loop && !l->next)
 377             l = loop_outer (l);
 378           if (l != loop)
 379             l = l->next;
 380         }
 381     }
 382   return false;
 383 }
 384
 385 /* Return TRUE when LOOP nest should be optimized for size.  */
 386
 387 bool
 388 optimize_loop_nest_for_size_p (struct loop *loop)
 389 {
 390   return !optimize_loop_nest_for_speed_p (loop);
 391 }
 392
 393 /* Return true when edge E is likely to be well predictable by branch
 394    predictor.  */
 395
 396 bool
 397 predictable_edge_p (edge e)
 398 {
 399   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 400     return false;
 401   if ((e->probability
 402        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 403       || (REG_BR_PROB_BASE - e->probability
 404           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 405     return true;
 406   return false;
 407 }
 408
 409
 410 /* Set RTL expansion for BB profile.  */
 411
 412 void
 413 rtl_profile_for_bb (basic_block bb)
 414 {
 415   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 416 }
 417
 418 /* Set RTL expansion for edge profile.  */
 419
 420 void
 421 rtl_profile_for_edge (edge e)
 422 {
 423   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 424 }
 425
 426 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 427 void
 428 default_rtl_profile (void)
 429 {
 430   crtl->maybe_hot_insn_p = true;
 431 }
 432
 433 /* Return true if the one of outgoing edges is already predicted by
 434    PREDICTOR.  */
 435
 436 bool
 437 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 438 {
 439   rtx note;
 440   if (!INSN_P (BB_END (bb)))
 441     return false;
 442   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 443     if (REG_NOTE_KIND (note) == REG_BR_PRED
 444         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 445       return true;
 446   return false;
 447 }
 448
 449 /*  Structure representing predictions in tree level. */
 450
 451 struct edge_prediction {
 452     struct edge_prediction *ep_next;
 453     edge ep_edge;
 454     enum br_predictor ep_predictor;
 455     int ep_probability;
 456 };
 457
 458 /* This map contains for a basic block the list of predictions for the
 459    outgoing edges.  */
 460
 461 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 462
 463 /* Return true if the one of outgoing edges is already predicted by
 464    PREDICTOR.  */
 465
 466 bool
 467 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 468 {
 469   struct edge_prediction *i;
 470   edge_prediction **preds = bb_predictions->get (bb);
 471
 472   if (!preds)
 473     return false;
 474
 475   for (i = *preds; i; i = i->ep_next)
 476     if (i->ep_predictor == predictor)
 477       return true;
 478   return false;
 479 }
 480
 481 /* Return true when the probability of edge is reliable.
 482
 483    The profile guessing code is good at predicting branch outcome (ie.
 484    taken/not taken), that is predicted right slightly over 75% of time.
 485    It is however notoriously poor on predicting the probability itself.
 486    In general the profile appear a lot flatter (with probabilities closer
 487    to 50%) than the reality so it is bad idea to use it to drive optimization
 488    such as those disabling dynamic branch prediction for well predictable
 489    branches.
 490
 491    There are two exceptions - edges leading to noreturn edges and edges
 492    predicted by number of iterations heuristics are predicted well.  This macro
 493    should be able to distinguish those, but at the moment it simply check for
 494    noreturn heuristic that is only one giving probability over 99% or bellow
 495    1%.  In future we might want to propagate reliability information across the
 496    CFG if we find this information useful on multiple places.   */
 497 static bool
 498 probability_reliable_p (int prob)
 499 {
 500   return (profile_status_for_fn (cfun) == PROFILE_READ
 501           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 502               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 503 }
 504
 505 /* Same predicate as above, working on edges.  */
 506 bool
 507 edge_probability_reliable_p (const_edge e)
 508 {
 509   return probability_reliable_p (e->probability);
 510 }
 511
 512 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 513 bool
 514 br_prob_note_reliable_p (const_rtx note)
 515 {
 516   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 517   return probability_reliable_p (XINT (note, 0));
 518 }
 519
 520 static void
 521 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 522 {
 523   gcc_assert (any_condjump_p (insn));
 524   if (!flag_guess_branch_prob)
 525     return;
 526
 527   add_reg_note (insn, REG_BR_PRED,
 528                 gen_rtx_CONCAT (VOIDmode,
 529                                 GEN_INT ((int) predictor),
 530                                 GEN_INT ((int) probability)));
 531 }
 532
 533 /* Predict insn by given predictor.  */
 534
 535 void
 536 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 537                   enum prediction taken)
 538 {
 539    int probability = predictor_info[(int) predictor].hitrate;
 540
 541    if (taken != TAKEN)
 542      probability = REG_BR_PROB_BASE - probability;
 543
 544    predict_insn (insn, predictor, probability);
 545 }
 546
 547 /* Predict edge E with given probability if possible.  */
 548
 549 void
 550 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 551 {
 552   rtx_insn *last_insn;
 553   last_insn = BB_END (e->src);
 554
 555   /* We can store the branch prediction information only about
 556      conditional jumps.  */
 557   if (!any_condjump_p (last_insn))
 558     return;
 559
 560   /* We always store probability of branching.  */
 561   if (e->flags & EDGE_FALLTHRU)
 562     probability = REG_BR_PROB_BASE - probability;
 563
 564   predict_insn (last_insn, predictor, probability);
 565 }
 566
 567 /* Predict edge E with the given PROBABILITY.  */
 568 void
 569 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 570 {
 571   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 572   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 573        1)
 574       && flag_guess_branch_prob && optimize)
 575     {
 576       struct edge_prediction *i = XNEW (struct edge_prediction);
 577       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 578
 579       i->ep_next = preds;
 580       preds = i;
 581       i->ep_probability = probability;
 582       i->ep_predictor = predictor;
 583       i->ep_edge = e;
 584     }
 585 }
 586
 587 /* Remove all predictions on given basic block that are attached
 588    to edge E.  */
 589 void
 590 remove_predictions_associated_with_edge (edge e)
 591 {
 592   if (!bb_predictions)
 593     return;
 594
 595   edge_prediction **preds = bb_predictions->get (e->src);
 596
 597   if (preds)
 598     {
 599       struct edge_prediction **prediction = preds;
 600       struct edge_prediction *next;
 601
 602       while (*prediction)
 603         {
 604           if ((*prediction)->ep_edge == e)
 605             {
 606               next = (*prediction)->ep_next;
 607               free (*prediction);
 608               *prediction = next;
 609             }
 610           else
 611             prediction = &((*prediction)->ep_next);
 612         }
 613     }
 614 }
 615
 616 /* Clears the list of predictions stored for BB.  */
 617
 618 static void
 619 clear_bb_predictions (basic_block bb)
 620 {
 621   edge_prediction **preds = bb_predictions->get (bb);
 622   struct edge_prediction *pred, *next;
 623
 624   if (!preds)
 625     return;
 626
 627   for (pred = *preds; pred; pred = next)
 628     {
 629       next = pred->ep_next;
 630       free (pred);
 631     }
 632   *preds = NULL;
 633 }
 634
 635 /* Return true when we can store prediction on insn INSN.
 636    At the moment we represent predictions only on conditional
 637    jumps, not at computed jump or other complicated cases.  */
 638 static bool
 639 can_predict_insn_p (const rtx_insn *insn)
 640 {
 641   return (JUMP_P (insn)
 642           && any_condjump_p (insn)
 643           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 644 }
 645
 646 /* Predict edge E by given predictor if possible.  */
 647
 648 void
 649 predict_edge_def (edge e, enum br_predictor predictor,
 650                   enum prediction taken)
 651 {
 652    int probability = predictor_info[(int) predictor].hitrate;
 653
 654    if (taken != TAKEN)
 655      probability = REG_BR_PROB_BASE - probability;
 656
 657    predict_edge (e, predictor, probability);
 658 }
 659
 660 /* Invert all branch predictions or probability notes in the INSN.  This needs
 661    to be done each time we invert the condition used by the jump.  */
 662
 663 void
 664 invert_br_probabilities (rtx insn)
 665 {
 666   rtx note;
 667
 668   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 669     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 670       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 671     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 672       XEXP (XEXP (note, 0), 1)
 673         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 674 }
 675
 676 /* Dump information about the branch prediction to the output file.  */
 677
 678 static void
 679 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 680                  basic_block bb, int used)
 681 {
 682   edge e;
 683   edge_iterator ei;
 684
 685   if (!file)
 686     return;
 687
 688   FOR_EACH_EDGE (e, ei, bb->succs)
 689     if (! (e->flags & EDGE_FALLTHRU))
 690       break;
 691
 692   fprintf (file, "  %s heuristics%s: %.1f%%",
 693            predictor_info[predictor].name,
 694            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 695
 696   if (bb->count)
 697     {
 698       fprintf (file, "  exec %" PRId64, bb->count);
 699       if (e)
 700         {
 701           fprintf (file, " hit %" PRId64, e->count);
 702           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 703         }
 704     }
 705
 706   fprintf (file, "\n");
 707 }
 708
 709 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 710    evenly and hope for the best.  */
 711 static void
 712 set_even_probabilities (basic_block bb)
 713 {
 714   int nedges = 0;
 715   edge e;
 716   edge_iterator ei;
 717
 718   FOR_EACH_EDGE (e, ei, bb->succs)
 719     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 720       nedges ++;
 721   FOR_EACH_EDGE (e, ei, bb->succs)
 722     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 723       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 724     else
 725       e->probability = 0;
 726 }
 727
 728 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 729    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 730
 731 static void
 732 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 733 {
 734   rtx prob_note;
 735   rtx *pnote;
 736   rtx note;
 737   int best_probability = PROB_EVEN;
 738   enum br_predictor best_predictor = END_PREDICTORS;
 739   int combined_probability = REG_BR_PROB_BASE / 2;
 740   int d;
 741   bool first_match = false;
 742   bool found = false;
 743
 744   if (!can_predict_insn_p (insn))
 745     {
 746       set_even_probabilities (bb);
 747       return;
 748     }
 749
 750   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 751   pnote = &REG_NOTES (insn);
 752   if (dump_file)
 753     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 754              bb->index);
 755
 756   /* We implement "first match" heuristics and use probability guessed
 757      by predictor with smallest index.  */
 758   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 759     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 760       {
 761         enum br_predictor predictor = ((enum br_predictor)
 762                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 763         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 764
 765         found = true;
 766         if (best_predictor > predictor)
 767           best_probability = probability, best_predictor = predictor;
 768
 769         d = (combined_probability * probability
 770              + (REG_BR_PROB_BASE - combined_probability)
 771              * (REG_BR_PROB_BASE - probability));
 772
 773         /* Use FP math to avoid overflows of 32bit integers.  */
 774         if (d == 0)
 775           /* If one probability is 0% and one 100%, avoid division by zero.  */
 776           combined_probability = REG_BR_PROB_BASE / 2;
 777         else
 778           combined_probability = (((double) combined_probability) * probability
 779                                   * REG_BR_PROB_BASE / d + 0.5);
 780       }
 781
 782   /* Decide which heuristic to use.  In case we didn't match anything,
 783      use no_prediction heuristic, in case we did match, use either
 784      first match or Dempster-Shaffer theory depending on the flags.  */
 785
 786   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 787     first_match = true;
 788
 789   if (!found)
 790     dump_prediction (dump_file, PRED_NO_PREDICTION,
 791                      combined_probability, bb, true);
 792   else
 793     {
 794       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 795                        bb, !first_match);
 796       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 797                        bb, first_match);
 798     }
 799
 800   if (first_match)
 801     combined_probability = best_probability;
 802   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 803
 804   while (*pnote)
 805     {
 806       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 807         {
 808           enum br_predictor predictor = ((enum br_predictor)
 809                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 810           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 811
 812           dump_prediction (dump_file, predictor, probability, bb,
 813                            !first_match || best_predictor == predictor);
 814           *pnote = XEXP (*pnote, 1);
 815         }
 816       else
 817         pnote = &XEXP (*pnote, 1);
 818     }
 819
 820   if (!prob_note)
 821     {
 822       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 823
 824       /* Save the prediction into CFG in case we are seeing non-degenerated
 825          conditional jump.  */
 826       if (!single_succ_p (bb))
 827         {
 828           BRANCH_EDGE (bb)->probability = combined_probability;
 829           FALLTHRU_EDGE (bb)->probability
 830             = REG_BR_PROB_BASE - combined_probability;
 831         }
 832     }
 833   else if (!single_succ_p (bb))
 834     {
 835       int prob = XINT (prob_note, 0);
 836
 837       BRANCH_EDGE (bb)->probability = prob;
 838       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 839     }
 840   else
 841     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 842 }
 843
 844 /* Combine predictions into single probability and store them into CFG.
 845    Remove now useless prediction entries.
 846    If DRY_RUN is set, only produce dumps and do not modify profile.  */
 847
 848 static void
 849 combine_predictions_for_bb (basic_block bb, bool dry_run)
 850 {
 851   int best_probability = PROB_EVEN;
 852   enum br_predictor best_predictor = END_PREDICTORS;
 853   int combined_probability = REG_BR_PROB_BASE / 2;
 854   int d;
 855   bool first_match = false;
 856   bool found = false;
 857   struct edge_prediction *pred;
 858   int nedges = 0;
 859   edge e, first = NULL, second = NULL;
 860   edge_iterator ei;
 861
 862   FOR_EACH_EDGE (e, ei, bb->succs)
 863     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 864       {
 865         nedges ++;
 866         if (first && !second)
 867           second = e;
 868         if (!first)
 869           first = e;
 870       }
 871
 872   /* When there is no successor or only one choice, prediction is easy.
 873
 874      We are lazy for now and predict only basic blocks with two outgoing
 875      edges.  It is possible to predict generic case too, but we have to
 876      ignore first match heuristics and do more involved combining.  Implement
 877      this later.  */
 878   if (nedges != 2)
 879     {
 880       if (!bb->count && !dry_run)
 881         set_even_probabilities (bb);
 882       clear_bb_predictions (bb);
 883       if (dump_file)
 884         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 885                  nedges, bb->index);
 886       return;
 887     }
 888
 889   if (dump_file)
 890     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 891
 892   edge_prediction **preds = bb_predictions->get (bb);
 893   if (preds)
 894     {
 895       /* We implement "first match" heuristics and use probability guessed
 896          by predictor with smallest index.  */
 897       for (pred = *preds; pred; pred = pred->ep_next)
 898         {
 899           enum br_predictor predictor = pred->ep_predictor;
 900           int probability = pred->ep_probability;
 901
 902           if (pred->ep_edge != first)
 903             probability = REG_BR_PROB_BASE - probability;
 904
 905           found = true;
 906           /* First match heuristics would be widly confused if we predicted
 907              both directions.  */
 908           if (best_predictor > predictor)
 909             {
 910               struct edge_prediction *pred2;
 911               int prob = probability;
 912
 913               for (pred2 = (struct edge_prediction *) *preds;
 914                    pred2; pred2 = pred2->ep_next)
 915                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 916                  {
 917                    int probability2 = pred->ep_probability;
 918
 919                    if (pred2->ep_edge != first)
 920                      probability2 = REG_BR_PROB_BASE - probability2;
 921
 922                    if ((probability < REG_BR_PROB_BASE / 2) !=
 923                        (probability2 < REG_BR_PROB_BASE / 2))
 924                      break;
 925
 926                    /* If the same predictor later gave better result, go for it! */
 927                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 928                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 929                      prob = probability2;
 930                  }
 931               if (!pred2)
 932                 best_probability = prob, best_predictor = predictor;
 933             }
 934
 935           d = (combined_probability * probability
 936                + (REG_BR_PROB_BASE - combined_probability)
 937                * (REG_BR_PROB_BASE - probability));
 938
 939           /* Use FP math to avoid overflows of 32bit integers.  */
 940           if (d == 0)
 941             /* If one probability is 0% and one 100%, avoid division by zero.  */
 942             combined_probability = REG_BR_PROB_BASE / 2;
 943           else
 944             combined_probability = (((double) combined_probability)
 945                                     * probability
 946                                     * REG_BR_PROB_BASE / d + 0.5);
 947         }
 948     }
 949
 950   /* Decide which heuristic to use.  In case we didn't match anything,
 951      use no_prediction heuristic, in case we did match, use either
 952      first match or Dempster-Shaffer theory depending on the flags.  */
 953
 954   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 955     first_match = true;
 956
 957   if (!found)
 958     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 959   else
 960     {
 961       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 962                        !first_match);
 963       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 964                        first_match);
 965     }
 966
 967   if (first_match)
 968     combined_probability = best_probability;
 969   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 970
 971   if (preds)
 972     {
 973       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 974         {
 975           enum br_predictor predictor = pred->ep_predictor;
 976           int probability = pred->ep_probability;
 977
 978           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 979             probability = REG_BR_PROB_BASE - probability;
 980           dump_prediction (dump_file, predictor, probability, bb,
 981                            !first_match || best_predictor == predictor);
 982         }
 983     }
 984   clear_bb_predictions (bb);
 985
 986   if (!bb->count && !dry_run)
 987     {
 988       first->probability = combined_probability;
 989       second->probability = REG_BR_PROB_BASE - combined_probability;
 990     }
 991 }
 992
 993 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
 994    Return the SSA_NAME if the condition satisfies, NULL otherwise.
 995
 996    T1 and T2 should be one of the following cases:
 997      1. T1 is SSA_NAME, T2 is NULL
 998      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
 999      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1000
1001 static tree
1002 strips_small_constant (tree t1, tree t2)
1003 {
1004   tree ret = NULL;
1005   int value = 0;
1006
1007   if (!t1)
1008     return NULL;
1009   else if (TREE_CODE (t1) == SSA_NAME)
1010     ret = t1;
1011   else if (tree_fits_shwi_p (t1))
1012     value = tree_to_shwi (t1);
1013   else
1014     return NULL;
1015
1016   if (!t2)
1017     return ret;
1018   else if (tree_fits_shwi_p (t2))
1019     value = tree_to_shwi (t2);
1020   else if (TREE_CODE (t2) == SSA_NAME)
1021     {
1022       if (ret)
1023         return NULL;
1024       else
1025         ret = t2;
1026     }
1027
1028   if (value <= 4 && value >= -4)
1029     return ret;
1030   else
1031     return NULL;
1032 }
1033
1034 /* Return the SSA_NAME in T or T's operands.
1035    Return NULL if SSA_NAME cannot be found.  */
1036
1037 static tree
1038 get_base_value (tree t)
1039 {
1040   if (TREE_CODE (t) == SSA_NAME)
1041     return t;
1042
1043   if (!BINARY_CLASS_P (t))
1044     return NULL;
1045
1046   switch (TREE_OPERAND_LENGTH (t))
1047     {
1048     case 1:
1049       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1050     case 2:
1051       return strips_small_constant (TREE_OPERAND (t, 0),
1052                                     TREE_OPERAND (t, 1));
1053     default:
1054       return NULL;
1055     }
1056 }
1057
1058 /* Check the compare STMT in LOOP. If it compares an induction
1059    variable to a loop invariant, return true, and save
1060    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1061    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1062
1063 static bool
1064 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1065                                      tree *loop_invariant,
1066                                      enum tree_code *compare_code,
1067                                      tree *loop_step,
1068                                      tree *loop_iv_base)
1069 {
1070   tree op0, op1, bound, base;
1071   affine_iv iv0, iv1;
1072   enum tree_code code;
1073   tree step;
1074
1075   code = gimple_cond_code (stmt);
1076   *loop_invariant = NULL;
1077
1078   switch (code)
1079     {
1080     case GT_EXPR:
1081     case GE_EXPR:
1082     case NE_EXPR:
1083     case LT_EXPR:
1084     case LE_EXPR:
1085     case EQ_EXPR:
1086       break;
1087
1088     default:
1089       return false;
1090     }
1091
1092   op0 = gimple_cond_lhs (stmt);
1093   op1 = gimple_cond_rhs (stmt);
1094
1095   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1096        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1097     return false;
1098   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1099     return false;
1100   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1101     return false;
1102   if (TREE_CODE (iv0.step) != INTEGER_CST
1103       || TREE_CODE (iv1.step) != INTEGER_CST)
1104     return false;
1105   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1106       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1107     return false;
1108
1109   if (integer_zerop (iv0.step))
1110     {
1111       if (code != NE_EXPR && code != EQ_EXPR)
1112         code = invert_tree_comparison (code, false);
1113       bound = iv0.base;
1114       base = iv1.base;
1115       if (tree_fits_shwi_p (iv1.step))
1116         step = iv1.step;
1117       else
1118         return false;
1119     }
1120   else
1121     {
1122       bound = iv1.base;
1123       base = iv0.base;
1124       if (tree_fits_shwi_p (iv0.step))
1125         step = iv0.step;
1126       else
1127         return false;
1128     }
1129
1130   if (TREE_CODE (bound) != INTEGER_CST)
1131     bound = get_base_value (bound);
1132   if (!bound)
1133     return false;
1134   if (TREE_CODE (base) != INTEGER_CST)
1135     base = get_base_value (base);
1136   if (!base)
1137     return false;
1138
1139   *loop_invariant = bound;
1140   *compare_code = code;
1141   *loop_step = step;
1142   *loop_iv_base = base;
1143   return true;
1144 }
1145
1146 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1147
1148 static bool
1149 expr_coherent_p (tree t1, tree t2)
1150 {
1151   gimple *stmt;
1152   tree ssa_name_1 = NULL;
1153   tree ssa_name_2 = NULL;
1154
1155   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1156   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1157
1158   if (t1 == t2)
1159     return true;
1160
1161   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1162     return true;
1163   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1164     return false;
1165
1166   /* Check to see if t1 is expressed/defined with t2.  */
1167   stmt = SSA_NAME_DEF_STMT (t1);
1168   gcc_assert (stmt != NULL);
1169   if (is_gimple_assign (stmt))
1170     {
1171       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1172       if (ssa_name_1 && ssa_name_1 == t2)
1173         return true;
1174     }
1175
1176   /* Check to see if t2 is expressed/defined with t1.  */
1177   stmt = SSA_NAME_DEF_STMT (t2);
1178   gcc_assert (stmt != NULL);
1179   if (is_gimple_assign (stmt))
1180     {
1181       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1182       if (ssa_name_2 && ssa_name_2 == t1)
1183         return true;
1184     }
1185
1186   /* Compare if t1 and t2's def_stmts are identical.  */
1187   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1188     return true;
1189   else
1190     return false;
1191 }
1192
1193 /* Predict branch probability of BB when BB contains a branch that compares
1194    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1195    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1196
1197    E.g.
1198      for (int i = 0; i < bound; i++) {
1199        if (i < bound - 2)
1200          computation_1();
1201        else
1202          computation_2();
1203      }
1204
1205   In this loop, we will predict the branch inside the loop to be taken.  */
1206
1207 static void
1208 predict_iv_comparison (struct loop *loop, basic_block bb,
1209                        tree loop_bound_var,
1210                        tree loop_iv_base_var,
1211                        enum tree_code loop_bound_code,
1212                        int loop_bound_step)
1213 {
1214   gimple *stmt;
1215   tree compare_var, compare_base;
1216   enum tree_code compare_code;
1217   tree compare_step_var;
1218   edge then_edge;
1219   edge_iterator ei;
1220
1221   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1222       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1223       || predicted_by_p (bb, PRED_LOOP_EXIT))
1224     return;
1225
1226   stmt = last_stmt (bb);
1227   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1228     return;
1229   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1230                                             loop, &compare_var,
1231                                             &compare_code,
1232                                             &compare_step_var,
1233                                             &compare_base))
1234     return;
1235
1236   /* Find the taken edge.  */
1237   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1238     if (then_edge->flags & EDGE_TRUE_VALUE)
1239       break;
1240
1241   /* When comparing an IV to a loop invariant, NE is more likely to be
1242      taken while EQ is more likely to be not-taken.  */
1243   if (compare_code == NE_EXPR)
1244     {
1245       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1246       return;
1247     }
1248   else if (compare_code == EQ_EXPR)
1249     {
1250       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1251       return;
1252     }
1253
1254   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1255     return;
1256
1257   /* If loop bound, base and compare bound are all constants, we can
1258      calculate the probability directly.  */
1259   if (tree_fits_shwi_p (loop_bound_var)
1260       && tree_fits_shwi_p (compare_var)
1261       && tree_fits_shwi_p (compare_base))
1262     {
1263       int probability;
1264       bool overflow, overall_overflow = false;
1265       widest_int compare_count, tem;
1266
1267       /* (loop_bound - base) / compare_step */
1268       tem = wi::sub (wi::to_widest (loop_bound_var),
1269                      wi::to_widest (compare_base), SIGNED, &overflow);
1270       overall_overflow |= overflow;
1271       widest_int loop_count = wi::div_trunc (tem,
1272                                              wi::to_widest (compare_step_var),
1273                                              SIGNED, &overflow);
1274       overall_overflow |= overflow;
1275
1276       if (!wi::neg_p (wi::to_widest (compare_step_var))
1277           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1278         {
1279           /* (loop_bound - compare_bound) / compare_step */
1280           tem = wi::sub (wi::to_widest (loop_bound_var),
1281                          wi::to_widest (compare_var), SIGNED, &overflow);
1282           overall_overflow |= overflow;
1283           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1284                                          SIGNED, &overflow);
1285           overall_overflow |= overflow;
1286         }
1287       else
1288         {
1289           /* (compare_bound - base) / compare_step */
1290           tem = wi::sub (wi::to_widest (compare_var),
1291                          wi::to_widest (compare_base), SIGNED, &overflow);
1292           overall_overflow |= overflow;
1293           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1294                                          SIGNED, &overflow);
1295           overall_overflow |= overflow;
1296         }
1297       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1298         ++compare_count;
1299       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1300         ++loop_count;
1301       if (wi::neg_p (compare_count))
1302         compare_count = 0;
1303       if (wi::neg_p (loop_count))
1304         loop_count = 0;
1305       if (loop_count == 0)
1306         probability = 0;
1307       else if (wi::cmps (compare_count, loop_count) == 1)
1308         probability = REG_BR_PROB_BASE;
1309       else
1310         {
1311           tem = compare_count * REG_BR_PROB_BASE;
1312           tem = wi::udiv_trunc (tem, loop_count);
1313           probability = tem.to_uhwi ();
1314         }
1315
1316       if (!overall_overflow)
1317         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1318
1319       return;
1320     }
1321
1322   if (expr_coherent_p (loop_bound_var, compare_var))
1323     {
1324       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1325           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1326         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1327       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1328                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1329         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1330       else if (loop_bound_code == NE_EXPR)
1331         {
1332           /* If the loop backedge condition is "(i != bound)", we do
1333              the comparison based on the step of IV:
1334              * step < 0 : backedge condition is like (i > bound)
1335              * step > 0 : backedge condition is like (i < bound)  */
1336           gcc_assert (loop_bound_step != 0);
1337           if (loop_bound_step > 0
1338               && (compare_code == LT_EXPR
1339                   || compare_code == LE_EXPR))
1340             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1341           else if (loop_bound_step < 0
1342                    && (compare_code == GT_EXPR
1343                        || compare_code == GE_EXPR))
1344             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345           else
1346             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1347         }
1348       else
1349         /* The branch is predicted not-taken if loop_bound_code is
1350            opposite with compare_code.  */
1351         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1352     }
1353   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1354     {
1355       /* For cases like:
1356            for (i = s; i < h; i++)
1357              if (i > s + 2) ....
1358          The branch should be predicted taken.  */
1359       if (loop_bound_step > 0
1360           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1361         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1362       else if (loop_bound_step < 0
1363                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1364         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1365       else
1366         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1367     }
1368 }
1369
1370 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1371    exits are resulted from short-circuit conditions that will generate an
1372    if_tmp. E.g.:
1373
1374    if (foo() || global > 10)
1375      break;
1376
1377    This will be translated into:
1378
1379    BB3:
1380      loop header...
1381    BB4:
1382      if foo() goto BB6 else goto BB5
1383    BB5:
1384      if global > 10 goto BB6 else goto BB7
1385    BB6:
1386      goto BB7
1387    BB7:
1388      iftmp = (PHI 0(BB5), 1(BB6))
1389      if iftmp == 1 goto BB8 else goto BB3
1390    BB8:
1391      outside of the loop...
1392
1393    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1394    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1395    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1396    exits to predict them using PRED_LOOP_EXIT.  */
1397
1398 static void
1399 predict_extra_loop_exits (edge exit_edge)
1400 {
1401   unsigned i;
1402   bool check_value_one;
1403   gimple *lhs_def_stmt;
1404   gphi *phi_stmt;
1405   tree cmp_rhs, cmp_lhs;
1406   gimple *last;
1407   gcond *cmp_stmt;
1408
1409   last = last_stmt (exit_edge->src);
1410   if (!last)
1411     return;
1412   cmp_stmt = dyn_cast <gcond *> (last);
1413   if (!cmp_stmt)
1414     return;
1415
1416   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1417   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1418   if (!TREE_CONSTANT (cmp_rhs)
1419       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1420     return;
1421   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1422     return;
1423
1424   /* If check_value_one is true, only the phi_args with value '1' will lead
1425      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1426      loop exit.  */
1427   check_value_one = (((integer_onep (cmp_rhs))
1428                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1429                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1430
1431   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1432   if (!lhs_def_stmt)
1433     return;
1434
1435   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1436   if (!phi_stmt)
1437     return;
1438
1439   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1440     {
1441       edge e1;
1442       edge_iterator ei;
1443       tree val = gimple_phi_arg_def (phi_stmt, i);
1444       edge e = gimple_phi_arg_edge (phi_stmt, i);
1445
1446       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1447         continue;
1448       if ((check_value_one ^ integer_onep (val)) == 1)
1449         continue;
1450       if (EDGE_COUNT (e->src->succs) != 1)
1451         {
1452           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1453           continue;
1454         }
1455
1456       FOR_EACH_EDGE (e1, ei, e->src->preds)
1457         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1458     }
1459 }
1460
1461 /* Predict edge probabilities by exploiting loop structure.  */
1462
1463 static void
1464 predict_loops (void)
1465 {
1466   struct loop *loop;
1467
1468   /* Try to predict out blocks in a loop that are not part of a
1469      natural loop.  */
1470   FOR_EACH_LOOP (loop, 0)
1471     {
1472       basic_block bb, *bbs;
1473       unsigned j, n_exits;
1474       vec<edge> exits;
1475       struct tree_niter_desc niter_desc;
1476       edge ex;
1477       struct nb_iter_bound *nb_iter;
1478       enum tree_code loop_bound_code = ERROR_MARK;
1479       tree loop_bound_step = NULL;
1480       tree loop_bound_var = NULL;
1481       tree loop_iv_base = NULL;
1482       gcond *stmt = NULL;
1483
1484       exits = get_loop_exit_edges (loop);
1485       n_exits = exits.length ();
1486       if (!n_exits)
1487         {
1488           exits.release ();
1489           continue;
1490         }
1491
1492       FOR_EACH_VEC_ELT (exits, j, ex)
1493         {
1494           tree niter = NULL;
1495           HOST_WIDE_INT nitercst;
1496           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1497           int probability;
1498           enum br_predictor predictor;
1499
1500           predict_extra_loop_exits (ex);
1501
1502           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1503             niter = niter_desc.niter;
1504           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1505             niter = loop_niter_by_eval (loop, ex);
1506
1507           if (TREE_CODE (niter) == INTEGER_CST)
1508             {
1509               if (tree_fits_uhwi_p (niter)
1510                   && max
1511                   && compare_tree_int (niter, max - 1) == -1)
1512                 nitercst = tree_to_uhwi (niter) + 1;
1513               else
1514                 nitercst = max;
1515               predictor = PRED_LOOP_ITERATIONS;
1516             }
1517           /* If we have just one exit and we can derive some information about
1518              the number of iterations of the loop from the statements inside
1519              the loop, use it to predict this exit.  */
1520           else if (n_exits == 1)
1521             {
1522               nitercst = estimated_stmt_executions_int (loop);
1523               if (nitercst < 0)
1524                 continue;
1525               if (nitercst > max)
1526                 nitercst = max;
1527
1528               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1529             }
1530           else
1531             continue;
1532
1533           /* If the prediction for number of iterations is zero, do not
1534              predict the exit edges.  */
1535           if (nitercst == 0)
1536             continue;
1537
1538           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1539           predict_edge (ex, predictor, probability);
1540         }
1541       exits.release ();
1542
1543       /* Find information about loop bound variables.  */
1544       for (nb_iter = loop->bounds; nb_iter;
1545            nb_iter = nb_iter->next)
1546         if (nb_iter->stmt
1547             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1548           {
1549             stmt = as_a <gcond *> (nb_iter->stmt);
1550             break;
1551           }
1552       if (!stmt && last_stmt (loop->header)
1553           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1554         stmt = as_a <gcond *> (last_stmt (loop->header));
1555       if (stmt)
1556         is_comparison_with_loop_invariant_p (stmt, loop,
1557                                              &loop_bound_var,
1558                                              &loop_bound_code,
1559                                              &loop_bound_step,
1560                                              &loop_iv_base);
1561
1562       bbs = get_loop_body (loop);
1563
1564       for (j = 0; j < loop->num_nodes; j++)
1565         {
1566           int header_found = 0;
1567           edge e;
1568           edge_iterator ei;
1569
1570           bb = bbs[j];
1571
1572           /* Bypass loop heuristics on continue statement.  These
1573              statements construct loops via "non-loop" constructs
1574              in the source language and are better to be handled
1575              separately.  */
1576           if (predicted_by_p (bb, PRED_CONTINUE))
1577             continue;
1578
1579           /* Loop branch heuristics - predict an edge back to a
1580              loop's head as taken.  */
1581           if (bb == loop->latch)
1582             {
1583               e = find_edge (loop->latch, loop->header);
1584               if (e)
1585                 {
1586                   header_found = 1;
1587                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1588                 }
1589             }
1590
1591           /* Loop exit heuristics - predict an edge exiting the loop if the
1592              conditional has no loop header successors as not taken.  */
1593           if (!header_found
1594               /* If we already used more reliable loop exit predictors, do not
1595                  bother with PRED_LOOP_EXIT.  */
1596               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1597               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1598             {
1599               /* For loop with many exits we don't want to predict all exits
1600                  with the pretty large probability, because if all exits are
1601                  considered in row, the loop would be predicted to iterate
1602                  almost never.  The code to divide probability by number of
1603                  exits is very rough.  It should compute the number of exits
1604                  taken in each patch through function (not the overall number
1605                  of exits that might be a lot higher for loops with wide switch
1606                  statements in them) and compute n-th square root.
1607
1608                  We limit the minimal probability by 2% to avoid
1609                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1610                  as this was causing regression in perl benchmark containing such
1611                  a wide loop.  */
1612
1613               int probability = ((REG_BR_PROB_BASE
1614                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1615                                  / n_exits);
1616               if (probability < HITRATE (2))
1617                 probability = HITRATE (2);
1618               FOR_EACH_EDGE (e, ei, bb->succs)
1619                 if (e->dest->index < NUM_FIXED_BLOCKS
1620                     || !flow_bb_inside_loop_p (loop, e->dest))
1621                   predict_edge (e, PRED_LOOP_EXIT, probability);
1622             }
1623           if (loop_bound_var)
1624             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1625                                    loop_bound_code,
1626                                    tree_to_shwi (loop_bound_step));
1627         }
1628
1629       /* Free basic blocks from get_loop_body.  */
1630       free (bbs);
1631     }
1632 }
1633
1634 /* Attempt to predict probabilities of BB outgoing edges using local
1635    properties.  */
1636 static void
1637 bb_estimate_probability_locally (basic_block bb)
1638 {
1639   rtx_insn *last_insn = BB_END (bb);
1640   rtx cond;
1641
1642   if (! can_predict_insn_p (last_insn))
1643     return;
1644   cond = get_condition (last_insn, NULL, false, false);
1645   if (! cond)
1646     return;
1647
1648   /* Try "pointer heuristic."
1649      A comparison ptr == 0 is predicted as false.
1650      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1651   if (COMPARISON_P (cond)
1652       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1653           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1654     {
1655       if (GET_CODE (cond) == EQ)
1656         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1657       else if (GET_CODE (cond) == NE)
1658         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1659     }
1660   else
1661
1662   /* Try "opcode heuristic."
1663      EQ tests are usually false and NE tests are usually true. Also,
1664      most quantities are positive, so we can make the appropriate guesses
1665      about signed comparisons against zero.  */
1666     switch (GET_CODE (cond))
1667       {
1668       case CONST_INT:
1669         /* Unconditional branch.  */
1670         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1671                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1672         break;
1673
1674       case EQ:
1675       case UNEQ:
1676         /* Floating point comparisons appears to behave in a very
1677            unpredictable way because of special role of = tests in
1678            FP code.  */
1679         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1680           ;
1681         /* Comparisons with 0 are often used for booleans and there is
1682            nothing useful to predict about them.  */
1683         else if (XEXP (cond, 1) == const0_rtx
1684                  || XEXP (cond, 0) == const0_rtx)
1685           ;
1686         else
1687           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1688         break;
1689
1690       case NE:
1691       case LTGT:
1692         /* Floating point comparisons appears to behave in a very
1693            unpredictable way because of special role of = tests in
1694            FP code.  */
1695         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1696           ;
1697         /* Comparisons with 0 are often used for booleans and there is
1698            nothing useful to predict about them.  */
1699         else if (XEXP (cond, 1) == const0_rtx
1700                  || XEXP (cond, 0) == const0_rtx)
1701           ;
1702         else
1703           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1704         break;
1705
1706       case ORDERED:
1707         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1708         break;
1709
1710       case UNORDERED:
1711         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1712         break;
1713
1714       case LE:
1715       case LT:
1716         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1717             || XEXP (cond, 1) == constm1_rtx)
1718           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1719         break;
1720
1721       case GE:
1722       case GT:
1723         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1724             || XEXP (cond, 1) == constm1_rtx)
1725           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1726         break;
1727
1728       default:
1729         break;
1730       }
1731 }
1732
1733 /* Set edge->probability for each successor edge of BB.  */
1734 void
1735 guess_outgoing_edge_probabilities (basic_block bb)
1736 {
1737   bb_estimate_probability_locally (bb);
1738   combine_predictions_for_insn (BB_END (bb), bb);
1739 }
1740 \f
1741 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1742
1743 /* Helper function for expr_expected_value.  */
1744
1745 static tree
1746 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1747                        tree op1, bitmap visited, enum br_predictor *predictor)
1748 {
1749   gimple *def;
1750
1751   if (predictor)
1752     *predictor = PRED_UNCONDITIONAL;
1753
1754   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1755     {
1756       if (TREE_CONSTANT (op0))
1757         return op0;
1758
1759       if (code != SSA_NAME)
1760         return NULL_TREE;
1761
1762       def = SSA_NAME_DEF_STMT (op0);
1763
1764       /* If we were already here, break the infinite cycle.  */
1765       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1766         return NULL;
1767
1768       if (gimple_code (def) == GIMPLE_PHI)
1769         {
1770           /* All the arguments of the PHI node must have the same constant
1771              length.  */
1772           int i, n = gimple_phi_num_args (def);
1773           tree val = NULL, new_val;
1774
1775           for (i = 0; i < n; i++)
1776             {
1777               tree arg = PHI_ARG_DEF (def, i);
1778               enum br_predictor predictor2;
1779
1780               /* If this PHI has itself as an argument, we cannot
1781                  determine the string length of this argument.  However,
1782                  if we can find an expected constant value for the other
1783                  PHI args then we can still be sure that this is
1784                  likely a constant.  So be optimistic and just
1785                  continue with the next argument.  */
1786               if (arg == PHI_RESULT (def))
1787                 continue;
1788
1789               new_val = expr_expected_value (arg, visited, &predictor2);
1790
1791               /* It is difficult to combine value predictors.  Simply assume
1792                  that later predictor is weaker and take its prediction.  */
1793               if (predictor && *predictor < predictor2)
1794                 *predictor = predictor2;
1795               if (!new_val)
1796                 return NULL;
1797               if (!val)
1798                 val = new_val;
1799               else if (!operand_equal_p (val, new_val, false))
1800                 return NULL;
1801             }
1802           return val;
1803         }
1804       if (is_gimple_assign (def))
1805         {
1806           if (gimple_assign_lhs (def) != op0)
1807             return NULL;
1808
1809           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1810                                         gimple_assign_rhs1 (def),
1811                                         gimple_assign_rhs_code (def),
1812                                         gimple_assign_rhs2 (def),
1813                                         visited, predictor);
1814         }
1815
1816       if (is_gimple_call (def))
1817         {
1818           tree decl = gimple_call_fndecl (def);
1819           if (!decl)
1820             {
1821               if (gimple_call_internal_p (def)
1822                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1823                 {
1824                   gcc_assert (gimple_call_num_args (def) == 3);
1825                   tree val = gimple_call_arg (def, 0);
1826                   if (TREE_CONSTANT (val))
1827                     return val;
1828                   if (predictor)
1829                     {
1830                       tree val2 = gimple_call_arg (def, 2);
1831                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1832                                   && tree_fits_uhwi_p (val2)
1833                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1834                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1835                     }
1836                   return gimple_call_arg (def, 1);
1837                 }
1838               return NULL;
1839             }
1840           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1841             switch (DECL_FUNCTION_CODE (decl))
1842               {
1843               case BUILT_IN_EXPECT:
1844                 {
1845                   tree val;
1846                   if (gimple_call_num_args (def) != 2)
1847                     return NULL;
1848                   val = gimple_call_arg (def, 0);
1849                   if (TREE_CONSTANT (val))
1850                     return val;
1851                   if (predictor)
1852                     *predictor = PRED_BUILTIN_EXPECT;
1853                   return gimple_call_arg (def, 1);
1854                 }
1855
1856               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1857               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1858               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1859               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1860               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1861               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1862               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1863               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1864               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1865               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1866               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1867               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1868               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1869                 /* Assume that any given atomic operation has low contention,
1870                    and thus the compare-and-swap operation succeeds.  */
1871                 if (predictor)
1872                   *predictor = PRED_COMPARE_AND_SWAP;
1873                 return boolean_true_node;
1874               default:
1875                 break;
1876             }
1877         }
1878
1879       return NULL;
1880     }
1881
1882   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1883     {
1884       tree res;
1885       enum br_predictor predictor2;
1886       op0 = expr_expected_value (op0, visited, predictor);
1887       if (!op0)
1888         return NULL;
1889       op1 = expr_expected_value (op1, visited, &predictor2);
1890       if (predictor && *predictor < predictor2)
1891         *predictor = predictor2;
1892       if (!op1)
1893         return NULL;
1894       res = fold_build2 (code, type, op0, op1);
1895       if (TREE_CONSTANT (res))
1896         return res;
1897       return NULL;
1898     }
1899   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1900     {
1901       tree res;
1902       op0 = expr_expected_value (op0, visited, predictor);
1903       if (!op0)
1904         return NULL;
1905       res = fold_build1 (code, type, op0);
1906       if (TREE_CONSTANT (res))
1907         return res;
1908       return NULL;
1909     }
1910   return NULL;
1911 }
1912
1913 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1914    The function is used by builtin_expect branch predictor so the evidence
1915    must come from this construct and additional possible constant folding.
1916
1917    We may want to implement more involved value guess (such as value range
1918    propagation based prediction), but such tricks shall go to new
1919    implementation.  */
1920
1921 static tree
1922 expr_expected_value (tree expr, bitmap visited,
1923                      enum br_predictor *predictor)
1924 {
1925   enum tree_code code;
1926   tree op0, op1;
1927
1928   if (TREE_CONSTANT (expr))
1929     {
1930       if (predictor)
1931         *predictor = PRED_UNCONDITIONAL;
1932       return expr;
1933     }
1934
1935   extract_ops_from_tree (expr, &code, &op0, &op1);
1936   return expr_expected_value_1 (TREE_TYPE (expr),
1937                                 op0, code, op1, visited, predictor);
1938 }
1939 \f
1940 /* Predict using opcode of the last statement in basic block.  */
1941 static void
1942 tree_predict_by_opcode (basic_block bb)
1943 {
1944   gimple *stmt = last_stmt (bb);
1945   edge then_edge;
1946   tree op0, op1;
1947   tree type;
1948   tree val;
1949   enum tree_code cmp;
1950   bitmap visited;
1951   edge_iterator ei;
1952   enum br_predictor predictor;
1953
1954   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1955     return;
1956   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1957     if (then_edge->flags & EDGE_TRUE_VALUE)
1958       break;
1959   op0 = gimple_cond_lhs (stmt);
1960   op1 = gimple_cond_rhs (stmt);
1961   cmp = gimple_cond_code (stmt);
1962   type = TREE_TYPE (op0);
1963   visited = BITMAP_ALLOC (NULL);
1964   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1965                                &predictor);
1966   BITMAP_FREE (visited);
1967   if (val && TREE_CODE (val) == INTEGER_CST)
1968     {
1969       if (predictor == PRED_BUILTIN_EXPECT)
1970         {
1971           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1972
1973           gcc_assert (percent >= 0 && percent <= 100);
1974           if (integer_zerop (val))
1975             percent = 100 - percent;
1976           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1977         }
1978       else
1979         predict_edge (then_edge, predictor,
1980                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1981     }
1982   /* Try "pointer heuristic."
1983      A comparison ptr == 0 is predicted as false.
1984      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1985   if (POINTER_TYPE_P (type))
1986     {
1987       if (cmp == EQ_EXPR)
1988         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1989       else if (cmp == NE_EXPR)
1990         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1991     }
1992   else
1993
1994   /* Try "opcode heuristic."
1995      EQ tests are usually false and NE tests are usually true. Also,
1996      most quantities are positive, so we can make the appropriate guesses
1997      about signed comparisons against zero.  */
1998     switch (cmp)
1999       {
2000       case EQ_EXPR:
2001       case UNEQ_EXPR:
2002         /* Floating point comparisons appears to behave in a very
2003            unpredictable way because of special role of = tests in
2004            FP code.  */
2005         if (FLOAT_TYPE_P (type))
2006           ;
2007         /* Comparisons with 0 are often used for booleans and there is
2008            nothing useful to predict about them.  */
2009         else if (integer_zerop (op0) || integer_zerop (op1))
2010           ;
2011         else
2012           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2013         break;
2014
2015       case NE_EXPR:
2016       case LTGT_EXPR:
2017         /* Floating point comparisons appears to behave in a very
2018            unpredictable way because of special role of = tests in
2019            FP code.  */
2020         if (FLOAT_TYPE_P (type))
2021           ;
2022         /* Comparisons with 0 are often used for booleans and there is
2023            nothing useful to predict about them.  */
2024         else if (integer_zerop (op0)
2025                  || integer_zerop (op1))
2026           ;
2027         else
2028           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2029         break;
2030
2031       case ORDERED_EXPR:
2032         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2033         break;
2034
2035       case UNORDERED_EXPR:
2036         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2037         break;
2038
2039       case LE_EXPR:
2040       case LT_EXPR:
2041         if (integer_zerop (op1)
2042             || integer_onep (op1)
2043             || integer_all_onesp (op1)
2044             || real_zerop (op1)
2045             || real_onep (op1)
2046             || real_minus_onep (op1))
2047           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2048         break;
2049
2050       case GE_EXPR:
2051       case GT_EXPR:
2052         if (integer_zerop (op1)
2053             || integer_onep (op1)
2054             || integer_all_onesp (op1)
2055             || real_zerop (op1)
2056             || real_onep (op1)
2057             || real_minus_onep (op1))
2058           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2059         break;
2060
2061       default:
2062         break;
2063       }
2064 }
2065
2066 /* Try to guess whether the value of return means error code.  */
2067
2068 static enum br_predictor
2069 return_prediction (tree val, enum prediction *prediction)
2070 {
2071   /* VOID.  */
2072   if (!val)
2073     return PRED_NO_PREDICTION;
2074   /* Different heuristics for pointers and scalars.  */
2075   if (POINTER_TYPE_P (TREE_TYPE (val)))
2076     {
2077       /* NULL is usually not returned.  */
2078       if (integer_zerop (val))
2079         {
2080           *prediction = NOT_TAKEN;
2081           return PRED_NULL_RETURN;
2082         }
2083     }
2084   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2085     {
2086       /* Negative return values are often used to indicate
2087          errors.  */
2088       if (TREE_CODE (val) == INTEGER_CST
2089           && tree_int_cst_sgn (val) < 0)
2090         {
2091           *prediction = NOT_TAKEN;
2092           return PRED_NEGATIVE_RETURN;
2093         }
2094       /* Constant return values seems to be commonly taken.
2095          Zero/one often represent booleans so exclude them from the
2096          heuristics.  */
2097       if (TREE_CONSTANT (val)
2098           && (!integer_zerop (val) && !integer_onep (val)))
2099         {
2100           *prediction = TAKEN;
2101           return PRED_CONST_RETURN;
2102         }
2103     }
2104   return PRED_NO_PREDICTION;
2105 }
2106
2107 /* Find the basic block with return expression and look up for possible
2108    return value trying to apply RETURN_PREDICTION heuristics.  */
2109 static void
2110 apply_return_prediction (void)
2111 {
2112   greturn *return_stmt = NULL;
2113   tree return_val;
2114   edge e;
2115   gphi *phi;
2116   int phi_num_args, i;
2117   enum br_predictor pred;
2118   enum prediction direction;
2119   edge_iterator ei;
2120
2121   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2122     {
2123       gimple *last = last_stmt (e->src);
2124       if (last
2125           && gimple_code (last) == GIMPLE_RETURN)
2126         {
2127           return_stmt = as_a <greturn *> (last);
2128           break;
2129         }
2130     }
2131   if (!e)
2132     return;
2133   return_val = gimple_return_retval (return_stmt);
2134   if (!return_val)
2135     return;
2136   if (TREE_CODE (return_val) != SSA_NAME
2137       || !SSA_NAME_DEF_STMT (return_val)
2138       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2139     return;
2140   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2141   phi_num_args = gimple_phi_num_args (phi);
2142   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2143
2144   /* Avoid the degenerate case where all return values form the function
2145      belongs to same category (ie they are all positive constants)
2146      so we can hardly say something about them.  */
2147   for (i = 1; i < phi_num_args; i++)
2148     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2149       break;
2150   if (i != phi_num_args)
2151     for (i = 0; i < phi_num_args; i++)
2152       {
2153         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2154         if (pred != PRED_NO_PREDICTION)
2155           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2156                                          direction);
2157       }
2158 }
2159
2160 /* Look for basic block that contains unlikely to happen events
2161    (such as noreturn calls) and mark all paths leading to execution
2162    of this basic blocks as unlikely.  */
2163
2164 static void
2165 tree_bb_level_predictions (void)
2166 {
2167   basic_block bb;
2168   bool has_return_edges = false;
2169   edge e;
2170   edge_iterator ei;
2171
2172   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2173     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2174       {
2175         has_return_edges = true;
2176         break;
2177       }
2178
2179   apply_return_prediction ();
2180
2181   FOR_EACH_BB_FN (bb, cfun)
2182     {
2183       gimple_stmt_iterator gsi;
2184
2185       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2186         {
2187           gimple *stmt = gsi_stmt (gsi);
2188           tree decl;
2189
2190           if (is_gimple_call (stmt))
2191             {
2192               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2193                   && has_return_edges)
2194                 predict_paths_leading_to (bb, PRED_NORETURN,
2195                                           NOT_TAKEN);
2196               decl = gimple_call_fndecl (stmt);
2197               if (decl
2198                   && lookup_attribute ("cold",
2199                                        DECL_ATTRIBUTES (decl)))
2200                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2201                                           NOT_TAKEN);
2202             }
2203           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2204             {
2205               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2206                                         gimple_predict_outcome (stmt));
2207               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2208                  hints to callers.  */
2209             }
2210         }
2211     }
2212 }
2213
2214 /* Callback for hash_map::traverse, asserts that the pointer map is
2215    empty.  */
2216
2217 bool
2218 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2219                  void *)
2220 {
2221   gcc_assert (!value);
2222   return false;
2223 }
2224
2225 /* Predict branch probabilities and estimate profile for basic block BB.  */
2226
2227 static void
2228 tree_estimate_probability_bb (basic_block bb)
2229 {
2230   edge e;
2231   edge_iterator ei;
2232   gimple *last;
2233
2234   FOR_EACH_EDGE (e, ei, bb->succs)
2235     {
2236       /* Predict edges to user labels with attributes.  */
2237       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2238         {
2239           gimple_stmt_iterator gi;
2240           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2241             {
2242               glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2243               tree decl;
2244
2245               if (!label_stmt)
2246                 break;
2247               decl = gimple_label_label (label_stmt);
2248               if (DECL_ARTIFICIAL (decl))
2249                 continue;
2250
2251               /* Finally, we have a user-defined label.  */
2252               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2253                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2254               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2255                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2256             }
2257         }
2258
2259       /* Predict early returns to be probable, as we've already taken
2260          care for error returns and other cases are often used for
2261          fast paths through function.
2262
2263          Since we've already removed the return statements, we are
2264          looking for CFG like:
2265
2266          if (conditional)
2267          {
2268          ..
2269          goto return_block
2270          }
2271          some other blocks
2272          return_block:
2273          return_stmt.  */
2274       if (e->dest != bb->next_bb
2275           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2276           && single_succ_p (e->dest)
2277           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2278           && (last = last_stmt (e->dest)) != NULL
2279           && gimple_code (last) == GIMPLE_RETURN)
2280         {
2281           edge e1;
2282           edge_iterator ei1;
2283
2284           if (single_succ_p (bb))
2285             {
2286               FOR_EACH_EDGE (e1, ei1, bb->preds)
2287                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2288                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2289                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2290                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2291             }
2292           else
2293             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2294                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2295                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2296               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2297         }
2298
2299       /* Look for block we are guarding (ie we dominate it,
2300          but it doesn't postdominate us).  */
2301       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2302           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2303           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2304         {
2305           gimple_stmt_iterator bi;
2306
2307           /* The call heuristic claims that a guarded function call
2308              is improbable.  This is because such calls are often used
2309              to signal exceptional situations such as printing error
2310              messages.  */
2311           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2312                gsi_next (&bi))
2313             {
2314               gimple *stmt = gsi_stmt (bi);
2315               if (is_gimple_call (stmt)
2316                   /* Constant and pure calls are hardly used to signalize
2317                      something exceptional.  */
2318                   && gimple_has_side_effects (stmt))
2319                 {
2320                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2321                   break;
2322                 }
2323             }
2324         }
2325     }
2326   tree_predict_by_opcode (bb);
2327 }
2328
2329 /* Predict branch probabilities and estimate profile of the tree CFG.
2330    This function can be called from the loop optimizers to recompute
2331    the profile information.
2332    If DRY_RUN is set, do not modify CFG and only produce dump files.  */
2333
2334 void
2335 tree_estimate_probability (bool dry_run)
2336 {
2337   basic_block bb;
2338
2339   add_noreturn_fake_exit_edges ();
2340   connect_infinite_loops_to_exit ();
2341   /* We use loop_niter_by_eval, which requires that the loops have
2342      preheaders.  */
2343   create_preheaders (CP_SIMPLE_PREHEADERS);
2344   calculate_dominance_info (CDI_POST_DOMINATORS);
2345
2346   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2347   tree_bb_level_predictions ();
2348   record_loop_exits ();
2349
2350   if (number_of_loops (cfun) > 1)
2351     predict_loops ();
2352
2353   FOR_EACH_BB_FN (bb, cfun)
2354     tree_estimate_probability_bb (bb);
2355
2356   FOR_EACH_BB_FN (bb, cfun)
2357     combine_predictions_for_bb (bb, dry_run);
2358
2359   if (flag_checking)
2360     bb_predictions->traverse<void *, assert_is_empty> (NULL);
2361
2362   delete bb_predictions;
2363   bb_predictions = NULL;
2364
2365   if (!dry_run)
2366     estimate_bb_frequencies (false);
2367   free_dominance_info (CDI_POST_DOMINATORS);
2368   remove_fake_exit_edges ();
2369 }
2370 \f
2371 /* Predict edges to successors of CUR whose sources are not postdominated by
2372    BB by PRED and recurse to all postdominators.  */
2373
2374 static void
2375 predict_paths_for_bb (basic_block cur, basic_block bb,
2376                       enum br_predictor pred,
2377                       enum prediction taken,
2378                       bitmap visited)
2379 {
2380   edge e;
2381   edge_iterator ei;
2382   basic_block son;
2383
2384   /* We are looking for all edges forming edge cut induced by
2385      set of all blocks postdominated by BB.  */
2386   FOR_EACH_EDGE (e, ei, cur->preds)
2387     if (e->src->index >= NUM_FIXED_BLOCKS
2388         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2389     {
2390       edge e2;
2391       edge_iterator ei2;
2392       bool found = false;
2393
2394       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2395       if (e->flags & (EDGE_EH | EDGE_FAKE))
2396         continue;
2397       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2398
2399       /* See if there is an edge from e->src that is not abnormal
2400          and does not lead to BB.  */
2401       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2402         if (e2 != e
2403             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2404             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2405           {
2406             found = true;
2407             break;
2408           }
2409
2410       /* If there is non-abnormal path leaving e->src, predict edge
2411          using predictor.  Otherwise we need to look for paths
2412          leading to e->src.
2413
2414          The second may lead to infinite loop in the case we are predicitng
2415          regions that are only reachable by abnormal edges.  We simply
2416          prevent visiting given BB twice.  */
2417       if (found)
2418         predict_edge_def (e, pred, taken);
2419       else if (bitmap_set_bit (visited, e->src->index))
2420         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2421     }
2422   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2423        son;
2424        son = next_dom_son (CDI_POST_DOMINATORS, son))
2425     predict_paths_for_bb (son, bb, pred, taken, visited);
2426 }
2427
2428 /* Sets branch probabilities according to PREDiction and
2429    FLAGS.  */
2430
2431 static void
2432 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2433                           enum prediction taken)
2434 {
2435   bitmap visited = BITMAP_ALLOC (NULL);
2436   predict_paths_for_bb (bb, bb, pred, taken, visited);
2437   BITMAP_FREE (visited);
2438 }
2439
2440 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2441
2442 static void
2443 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2444                                enum prediction taken)
2445 {
2446   bool has_nonloop_edge = false;
2447   edge_iterator ei;
2448   edge e2;
2449
2450   basic_block bb = e->src;
2451   FOR_EACH_EDGE (e2, ei, bb->succs)
2452     if (e2->dest != e->src && e2->dest != e->dest
2453         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2454         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2455       {
2456         has_nonloop_edge = true;
2457         break;
2458       }
2459   if (!has_nonloop_edge)
2460     {
2461       bitmap visited = BITMAP_ALLOC (NULL);
2462       predict_paths_for_bb (bb, bb, pred, taken, visited);
2463       BITMAP_FREE (visited);
2464     }
2465   else
2466     predict_edge_def (e, pred, taken);
2467 }
2468 \f
2469 /* This is used to carry information about basic blocks.  It is
2470    attached to the AUX field of the standard CFG block.  */
2471
2472 struct block_info
2473 {
2474   /* Estimated frequency of execution of basic_block.  */
2475   sreal frequency;
2476
2477   /* To keep queue of basic blocks to process.  */
2478   basic_block next;
2479
2480   /* Number of predecessors we need to visit first.  */
2481   int npredecessors;
2482 };
2483
2484 /* Similar information for edges.  */
2485 struct edge_prob_info
2486 {
2487   /* In case edge is a loopback edge, the probability edge will be reached
2488      in case header is.  Estimated number of iterations of the loop can be
2489      then computed as 1 / (1 - back_edge_prob).  */
2490   sreal back_edge_prob;
2491   /* True if the edge is a loopback edge in the natural loop.  */
2492   unsigned int back_edge:1;
2493 };
2494
2495 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2496 #undef EDGE_INFO
2497 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2498
2499 /* Helper function for estimate_bb_frequencies.
2500    Propagate the frequencies in blocks marked in
2501    TOVISIT, starting in HEAD.  */
2502
2503 static void
2504 propagate_freq (basic_block head, bitmap tovisit)
2505 {
2506   basic_block bb;
2507   basic_block last;
2508   unsigned i;
2509   edge e;
2510   basic_block nextbb;
2511   bitmap_iterator bi;
2512
2513   /* For each basic block we need to visit count number of his predecessors
2514      we need to visit first.  */
2515   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2516     {
2517       edge_iterator ei;
2518       int count = 0;
2519
2520       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2521
2522       FOR_EACH_EDGE (e, ei, bb->preds)
2523         {
2524           bool visit = bitmap_bit_p (tovisit, e->src->index);
2525
2526           if (visit && !(e->flags & EDGE_DFS_BACK))
2527             count++;
2528           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2529             fprintf (dump_file,
2530                      "Irreducible region hit, ignoring edge to %i->%i\n",
2531                      e->src->index, bb->index);
2532         }
2533       BLOCK_INFO (bb)->npredecessors = count;
2534       /* When function never returns, we will never process exit block.  */
2535       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2536         bb->count = bb->frequency = 0;
2537     }
2538
2539   BLOCK_INFO (head)->frequency = 1;
2540   last = head;
2541   for (bb = head; bb; bb = nextbb)
2542     {
2543       edge_iterator ei;
2544       sreal cyclic_probability = 0;
2545       sreal frequency = 0;
2546
2547       nextbb = BLOCK_INFO (bb)->next;
2548       BLOCK_INFO (bb)->next = NULL;
2549
2550       /* Compute frequency of basic block.  */
2551       if (bb != head)
2552         {
2553           if (flag_checking)
2554             FOR_EACH_EDGE (e, ei, bb->preds)
2555               gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2556                           || (e->flags & EDGE_DFS_BACK));
2557
2558           FOR_EACH_EDGE (e, ei, bb->preds)
2559             if (EDGE_INFO (e)->back_edge)
2560               {
2561                 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2562               }
2563             else if (!(e->flags & EDGE_DFS_BACK))
2564               {
2565                 /*  frequency += (e->probability
2566                                   * BLOCK_INFO (e->src)->frequency /
2567                                   REG_BR_PROB_BASE);  */
2568
2569                 sreal tmp = e->probability;
2570                 tmp *= BLOCK_INFO (e->src)->frequency;
2571                 tmp *= real_inv_br_prob_base;
2572                 frequency += tmp;
2573               }
2574
2575           if (cyclic_probability == 0)
2576             {
2577               BLOCK_INFO (bb)->frequency = frequency;
2578             }
2579           else
2580             {
2581               if (cyclic_probability > real_almost_one)
2582                 cyclic_probability = real_almost_one;
2583
2584               /* BLOCK_INFO (bb)->frequency = frequency
2585                                               / (1 - cyclic_probability) */
2586
2587               cyclic_probability = sreal (1) - cyclic_probability;
2588               BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2589             }
2590         }
2591
2592       bitmap_clear_bit (tovisit, bb->index);
2593
2594       e = find_edge (bb, head);
2595       if (e)
2596         {
2597           /* EDGE_INFO (e)->back_edge_prob
2598              = ((e->probability * BLOCK_INFO (bb)->frequency)
2599              / REG_BR_PROB_BASE); */
2600
2601           sreal tmp = e->probability;
2602           tmp *= BLOCK_INFO (bb)->frequency;
2603           EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2604         }
2605
2606       /* Propagate to successor blocks.  */
2607       FOR_EACH_EDGE (e, ei, bb->succs)
2608         if (!(e->flags & EDGE_DFS_BACK)
2609             && BLOCK_INFO (e->dest)->npredecessors)
2610           {
2611             BLOCK_INFO (e->dest)->npredecessors--;
2612             if (!BLOCK_INFO (e->dest)->npredecessors)
2613               {
2614                 if (!nextbb)
2615                   nextbb = e->dest;
2616                 else
2617                   BLOCK_INFO (last)->next = e->dest;
2618
2619                 last = e->dest;
2620               }
2621           }
2622     }
2623 }
2624
2625 /* Estimate frequencies in loops at same nest level.  */
2626
2627 static void
2628 estimate_loops_at_level (struct loop *first_loop)
2629 {
2630   struct loop *loop;
2631
2632   for (loop = first_loop; loop; loop = loop->next)
2633     {
2634       edge e;
2635       basic_block *bbs;
2636       unsigned i;
2637       bitmap tovisit = BITMAP_ALLOC (NULL);
2638
2639       estimate_loops_at_level (loop->inner);
2640
2641       /* Find current loop back edge and mark it.  */
2642       e = loop_latch_edge (loop);
2643       EDGE_INFO (e)->back_edge = 1;
2644
2645       bbs = get_loop_body (loop);
2646       for (i = 0; i < loop->num_nodes; i++)
2647         bitmap_set_bit (tovisit, bbs[i]->index);
2648       free (bbs);
2649       propagate_freq (loop->header, tovisit);
2650       BITMAP_FREE (tovisit);
2651     }
2652 }
2653
2654 /* Propagates frequencies through structure of loops.  */
2655
2656 static void
2657 estimate_loops (void)
2658 {
2659   bitmap tovisit = BITMAP_ALLOC (NULL);
2660   basic_block bb;
2661
2662   /* Start by estimating the frequencies in the loops.  */
2663   if (number_of_loops (cfun) > 1)
2664     estimate_loops_at_level (current_loops->tree_root->inner);
2665
2666   /* Now propagate the frequencies through all the blocks.  */
2667   FOR_ALL_BB_FN (bb, cfun)
2668     {
2669       bitmap_set_bit (tovisit, bb->index);
2670     }
2671   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2672   BITMAP_FREE (tovisit);
2673 }
2674
2675 /* Drop the profile for NODE to guessed, and update its frequency based on
2676    whether it is expected to be hot given the CALL_COUNT.  */
2677
2678 static void
2679 drop_profile (struct cgraph_node *node, gcov_type call_count)
2680 {
2681   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2682   /* In the case where this was called by another function with a
2683      dropped profile, call_count will be 0. Since there are no
2684      non-zero call counts to this function, we don't know for sure
2685      whether it is hot, and therefore it will be marked normal below.  */
2686   bool hot = maybe_hot_count_p (NULL, call_count);
2687
2688   if (dump_file)
2689     fprintf (dump_file,
2690              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2691              node->name (), node->order,
2692              hot ? "Function is hot" : "Function is normal");
2693   /* We only expect to miss profiles for functions that are reached
2694      via non-zero call edges in cases where the function may have
2695      been linked from another module or library (COMDATs and extern
2696      templates). See the comments below for handle_missing_profiles.
2697      Also, only warn in cases where the missing counts exceed the
2698      number of training runs. In certain cases with an execv followed
2699      by a no-return call the profile for the no-return call is not
2700      dumped and there can be a mismatch.  */
2701   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2702       && call_count > profile_info->runs)
2703     {
2704       if (flag_profile_correction)
2705         {
2706           if (dump_file)
2707             fprintf (dump_file,
2708                      "Missing counts for called function %s/%i\n",
2709                      node->name (), node->order);
2710         }
2711       else
2712         warning (0, "Missing counts for called function %s/%i",
2713                  node->name (), node->order);
2714     }
2715
2716   profile_status_for_fn (fn)
2717       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2718   node->frequency
2719       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2720 }
2721
2722 /* In the case of COMDAT routines, multiple object files will contain the same
2723    function and the linker will select one for the binary. In that case
2724    all the other copies from the profile instrument binary will be missing
2725    profile counts. Look for cases where this happened, due to non-zero
2726    call counts going to 0-count functions, and drop the profile to guessed
2727    so that we can use the estimated probabilities and avoid optimizing only
2728    for size.
2729
2730    The other case where the profile may be missing is when the routine
2731    is not going to be emitted to the object file, e.g. for "extern template"
2732    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2733    all other cases of non-zero calls to 0-count functions.  */
2734
2735 void
2736 handle_missing_profiles (void)
2737 {
2738   struct cgraph_node *node;
2739   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2740   vec<struct cgraph_node *> worklist;
2741   worklist.create (64);
2742
2743   /* See if 0 count function has non-0 count callers.  In this case we
2744      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2745   FOR_EACH_DEFINED_FUNCTION (node)
2746     {
2747       struct cgraph_edge *e;
2748       gcov_type call_count = 0;
2749       gcov_type max_tp_first_run = 0;
2750       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2751
2752       if (node->count)
2753         continue;
2754       for (e = node->callers; e; e = e->next_caller)
2755       {
2756         call_count += e->count;
2757
2758         if (e->caller->tp_first_run > max_tp_first_run)
2759           max_tp_first_run = e->caller->tp_first_run;
2760       }
2761
2762       /* If time profile is missing, let assign the maximum that comes from
2763          caller functions.  */
2764       if (!node->tp_first_run && max_tp_first_run)
2765         node->tp_first_run = max_tp_first_run + 1;
2766
2767       if (call_count
2768           && fn && fn->cfg
2769           && (call_count * unlikely_count_fraction >= profile_info->runs))
2770         {
2771           drop_profile (node, call_count);
2772           worklist.safe_push (node);
2773         }
2774     }
2775
2776   /* Propagate the profile dropping to other 0-count COMDATs that are
2777      potentially called by COMDATs we already dropped the profile on.  */
2778   while (worklist.length () > 0)
2779     {
2780       struct cgraph_edge *e;
2781
2782       node = worklist.pop ();
2783       for (e = node->callees; e; e = e->next_caller)
2784         {
2785           struct cgraph_node *callee = e->callee;
2786           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2787
2788           if (callee->count > 0)
2789             continue;
2790           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2791               && profile_status_for_fn (fn) == PROFILE_READ)
2792             {
2793               drop_profile (node, 0);
2794               worklist.safe_push (callee);
2795             }
2796         }
2797     }
2798   worklist.release ();
2799 }
2800
2801 /* Convert counts measured by profile driven feedback to frequencies.
2802    Return nonzero iff there was any nonzero execution count.  */
2803
2804 int
2805 counts_to_freqs (void)
2806 {
2807   gcov_type count_max, true_count_max = 0;
2808   basic_block bb;
2809
2810   /* Don't overwrite the estimated frequencies when the profile for
2811      the function is missing.  We may drop this function PROFILE_GUESSED
2812      later in drop_profile ().  */
2813   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2814     return 0;
2815
2816   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2817     true_count_max = MAX (bb->count, true_count_max);
2818
2819   count_max = MAX (true_count_max, 1);
2820   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2821     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2822
2823   return true_count_max;
2824 }
2825
2826 /* Return true if function is likely to be expensive, so there is no point to
2827    optimize performance of prologue, epilogue or do inlining at the expense
2828    of code size growth.  THRESHOLD is the limit of number of instructions
2829    function can execute at average to be still considered not expensive.  */
2830
2831 bool
2832 expensive_function_p (int threshold)
2833 {
2834   unsigned int sum = 0;
2835   basic_block bb;
2836   unsigned int limit;
2837
2838   /* We can not compute accurately for large thresholds due to scaled
2839      frequencies.  */
2840   gcc_assert (threshold <= BB_FREQ_MAX);
2841
2842   /* Frequencies are out of range.  This either means that function contains
2843      internal loop executing more than BB_FREQ_MAX times or profile feedback
2844      is available and function has not been executed at all.  */
2845   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2846     return true;
2847
2848   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2849   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2850   FOR_EACH_BB_FN (bb, cfun)
2851     {
2852       rtx_insn *insn;
2853
2854       FOR_BB_INSNS (bb, insn)
2855         if (active_insn_p (insn))
2856           {
2857             sum += bb->frequency;
2858             if (sum > limit)
2859               return true;
2860         }
2861     }
2862
2863   return false;
2864 }
2865
2866 /* Estimate and propagate basic block frequencies using the given branch
2867    probabilities.  If FORCE is true, the frequencies are used to estimate
2868    the counts even when there are already non-zero profile counts.  */
2869
2870 void
2871 estimate_bb_frequencies (bool force)
2872 {
2873   basic_block bb;
2874   sreal freq_max;
2875
2876   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2877     {
2878       static int real_values_initialized = 0;
2879
2880       if (!real_values_initialized)
2881         {
2882           real_values_initialized = 1;
2883           real_br_prob_base = REG_BR_PROB_BASE;
2884           real_bb_freq_max = BB_FREQ_MAX;
2885           real_one_half = sreal (1, -1);
2886           real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2887           real_almost_one = sreal (1) - real_inv_br_prob_base;
2888         }
2889
2890       mark_dfs_back_edges ();
2891
2892       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2893          REG_BR_PROB_BASE;
2894
2895       /* Set up block info for each basic block.  */
2896       alloc_aux_for_blocks (sizeof (block_info));
2897       alloc_aux_for_edges (sizeof (edge_prob_info));
2898       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2899         {
2900           edge e;
2901           edge_iterator ei;
2902
2903           FOR_EACH_EDGE (e, ei, bb->succs)
2904             {
2905               EDGE_INFO (e)->back_edge_prob = e->probability;
2906               EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2907             }
2908         }
2909
2910       /* First compute frequencies locally for each loop from innermost
2911          to outermost to examine frequencies for back edges.  */
2912       estimate_loops ();
2913
2914       freq_max = 0;
2915       FOR_EACH_BB_FN (bb, cfun)
2916         if (freq_max < BLOCK_INFO (bb)->frequency)
2917           freq_max = BLOCK_INFO (bb)->frequency;
2918
2919       freq_max = real_bb_freq_max / freq_max;
2920       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2921         {
2922           sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2923           bb->frequency = tmp.to_int ();
2924         }
2925
2926       free_aux_for_blocks ();
2927       free_aux_for_edges ();
2928     }
2929   compute_function_frequency ();
2930 }
2931
2932 /* Decide whether function is hot, cold or unlikely executed.  */
2933 void
2934 compute_function_frequency (void)
2935 {
2936   basic_block bb;
2937   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2938
2939   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2940       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2941     node->only_called_at_startup = true;
2942   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2943     node->only_called_at_exit = true;
2944
2945   if (profile_status_for_fn (cfun) != PROFILE_READ)
2946     {
2947       int flags = flags_from_decl_or_type (current_function_decl);
2948       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2949           != NULL)
2950         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2951       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2952                != NULL)
2953         node->frequency = NODE_FREQUENCY_HOT;
2954       else if (flags & ECF_NORETURN)
2955         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2956       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2957         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2958       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2959                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2960         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2961       return;
2962     }
2963
2964   /* Only first time try to drop function into unlikely executed.
2965      After inlining the roundoff errors may confuse us.
2966      Ipa-profile pass will drop functions only called from unlikely
2967      functions to unlikely and that is most of what we care about.  */
2968   if (!cfun->after_inlining)
2969     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2970   FOR_EACH_BB_FN (bb, cfun)
2971     {
2972       if (maybe_hot_bb_p (cfun, bb))
2973         {
2974           node->frequency = NODE_FREQUENCY_HOT;
2975           return;
2976         }
2977       if (!probably_never_executed_bb_p (cfun, bb))
2978         node->frequency = NODE_FREQUENCY_NORMAL;
2979     }
2980 }
2981
2982 /* Build PREDICT_EXPR.  */
2983 tree
2984 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2985 {
2986   tree t = build1 (PREDICT_EXPR, void_type_node,
2987                    build_int_cst (integer_type_node, predictor));
2988   SET_PREDICT_EXPR_OUTCOME (t, taken);
2989   return t;
2990 }
2991
2992 const char *
2993 predictor_name (enum br_predictor predictor)
2994 {
2995   return predictor_info[predictor].name;
2996 }
2997
2998 /* Predict branch probabilities and estimate profile of the tree CFG. */
2999
3000 namespace {
3001
3002 const pass_data pass_data_profile =
3003 {
3004   GIMPLE_PASS, /* type */
3005   "profile_estimate", /* name */
3006   OPTGROUP_NONE, /* optinfo_flags */
3007   TV_BRANCH_PROB, /* tv_id */
3008   PROP_cfg, /* properties_required */
3009   0, /* properties_provided */
3010   0, /* properties_destroyed */
3011   0, /* todo_flags_start */
3012   0, /* todo_flags_finish */
3013 };
3014
3015 class pass_profile : public gimple_opt_pass
3016 {
3017 public:
3018   pass_profile (gcc::context *ctxt)
3019     : gimple_opt_pass (pass_data_profile, ctxt)
3020   {}
3021
3022   /* opt_pass methods: */
3023   virtual bool gate (function *) { return flag_guess_branch_prob; }
3024   virtual unsigned int execute (function *);
3025
3026 }; // class pass_profile
3027
3028 unsigned int
3029 pass_profile::execute (function *fun)
3030 {
3031   unsigned nb_loops;
3032
3033   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3034     return 0;
3035
3036   loop_optimizer_init (LOOPS_NORMAL);
3037   if (dump_file && (dump_flags & TDF_DETAILS))
3038     flow_loops_dump (dump_file, NULL, 0);
3039
3040   mark_irreducible_loops ();
3041
3042   nb_loops = number_of_loops (fun);
3043   if (nb_loops > 1)
3044     scev_initialize ();
3045
3046   tree_estimate_probability (false);
3047
3048   if (nb_loops > 1)
3049     scev_finalize ();
3050
3051   loop_optimizer_finalize ();
3052   if (dump_file && (dump_flags & TDF_DETAILS))
3053     gimple_dump_cfg (dump_file, dump_flags);
3054  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3055     profile_status_for_fn (fun) = PROFILE_GUESSED;
3056   return 0;
3057 }
3058
3059 } // anon namespace
3060
3061 gimple_opt_pass *
3062 make_pass_profile (gcc::context *ctxt)
3063 {
3064   return new pass_profile (ctxt);
3065 }
3066
3067 namespace {
3068
3069 const pass_data pass_data_strip_predict_hints =
3070 {
3071   GIMPLE_PASS, /* type */
3072   "*strip_predict_hints", /* name */
3073   OPTGROUP_NONE, /* optinfo_flags */
3074   TV_BRANCH_PROB, /* tv_id */
3075   PROP_cfg, /* properties_required */
3076   0, /* properties_provided */
3077   0, /* properties_destroyed */
3078   0, /* todo_flags_start */
3079   0, /* todo_flags_finish */
3080 };
3081
3082 class pass_strip_predict_hints : public gimple_opt_pass
3083 {
3084 public:
3085   pass_strip_predict_hints (gcc::context *ctxt)
3086     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3087   {}
3088
3089   /* opt_pass methods: */
3090   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3091   virtual unsigned int execute (function *);
3092
3093 }; // class pass_strip_predict_hints
3094
3095 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3096    we no longer need.  */
3097 unsigned int
3098 pass_strip_predict_hints::execute (function *fun)
3099 {
3100   basic_block bb;
3101   gimple *ass_stmt;
3102   tree var;
3103
3104   FOR_EACH_BB_FN (bb, fun)
3105     {
3106       gimple_stmt_iterator bi;
3107       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3108         {
3109           gimple *stmt = gsi_stmt (bi);
3110
3111           if (gimple_code (stmt) == GIMPLE_PREDICT)
3112             {
3113               gsi_remove (&bi, true);
3114               continue;
3115             }
3116           else if (is_gimple_call (stmt))
3117             {
3118               tree fndecl = gimple_call_fndecl (stmt);
3119
3120               if ((fndecl
3121                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3122                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3123                    && gimple_call_num_args (stmt) == 2)
3124                   || (gimple_call_internal_p (stmt)
3125                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3126                 {
3127                   var = gimple_call_lhs (stmt);
3128                   if (var)
3129                     {
3130                       ass_stmt
3131                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3132                       gsi_replace (&bi, ass_stmt, true);
3133                     }
3134                   else
3135                     {
3136                       gsi_remove (&bi, true);
3137                       continue;
3138                     }
3139                 }
3140             }
3141           gsi_next (&bi);
3142         }
3143     }
3144   return 0;
3145 }
3146
3147 } // anon namespace
3148
3149 gimple_opt_pass *
3150 make_pass_strip_predict_hints (gcc::context *ctxt)
3151 {
3152   return new pass_strip_predict_hints (ctxt);
3153 }
3154
3155 /* Rebuild function frequencies.  Passes are in general expected to
3156    maintain profile by hand, however in some cases this is not possible:
3157    for example when inlining several functions with loops freuqencies might run
3158    out of scale and thus needs to be recomputed.  */
3159
3160 void
3161 rebuild_frequencies (void)
3162 {
3163   timevar_push (TV_REBUILD_FREQUENCIES);
3164
3165   /* When the max bb count in the function is small, there is a higher
3166      chance that there were truncation errors in the integer scaling
3167      of counts by inlining and other optimizations. This could lead
3168      to incorrect classification of code as being cold when it isn't.
3169      In that case, force the estimation of bb counts/frequencies from the
3170      branch probabilities, rather than computing frequencies from counts,
3171      which may also lead to frequencies incorrectly reduced to 0. There
3172      is less precision in the probabilities, so we only do this for small
3173      max counts.  */
3174   gcov_type count_max = 0;
3175   basic_block bb;
3176   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3177     count_max = MAX (bb->count, count_max);
3178
3179   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3180       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3181           && count_max < REG_BR_PROB_BASE/10))
3182     {
3183       loop_optimizer_init (0);
3184       add_noreturn_fake_exit_edges ();
3185       mark_irreducible_loops ();
3186       connect_infinite_loops_to_exit ();
3187       estimate_bb_frequencies (true);
3188       remove_fake_exit_edges ();
3189       loop_optimizer_finalize ();
3190     }
3191   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3192     counts_to_freqs ();
3193   else
3194     gcc_unreachable ();
3195   timevar_pop (TV_REBUILD_FREQUENCIES);
3196 }
3197
3198 /* Perform a dry run of the branch prediction pass and report comparsion of
3199    the predicted and real profile into the dump file.  */
3200
3201 void
3202 report_predictor_hitrates (void)
3203 {
3204   unsigned nb_loops;
3205
3206   loop_optimizer_init (LOOPS_NORMAL);
3207   if (dump_file && (dump_flags & TDF_DETAILS))
3208     flow_loops_dump (dump_file, NULL, 0);
3209
3210   mark_irreducible_loops ();
3211
3212   nb_loops = number_of_loops (cfun);
3213   if (nb_loops > 1)
3214     scev_initialize ();
3215
3216   tree_estimate_probability (true);
3217
3218   if (nb_loops > 1)
3219     scev_finalize ();
3220
3221   loop_optimizer_finalize ();
3222 }
3223