gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "basic-block.h"
  40 #include "insn-config.h"
  41 #include "regs.h"
  42 #include "flags.h"
  43 #include "hashtab.h"
  44 #include "hash-set.h"
  45 #include "vec.h"
  46 #include "machmode.h"
  47 #include "input.h"
  48 #include "function.h"
  49 #include "profile.h"
  50 #include "except.h"
  51 #include "diagnostic-core.h"
  52 #include "recog.h"
  53 #include "expr.h"
  54 #include "predict.h"
  55 #include "coverage.h"
  56 #include "sreal.h"
  57 #include "params.h"
  58 #include "target.h"
  59 #include "cfgloop.h"
  60 #include "hash-map.h"
  61 #include "tree-ssa-alias.h"
  62 #include "internal-fn.h"
  63 #include "gimple-expr.h"
  64 #include "is-a.h"
  65 #include "gimple.h"
  66 #include "gimple-iterator.h"
  67 #include "gimple-ssa.h"
  68 #include "cgraph.h"
  69 #include "tree-cfg.h"
  70 #include "tree-phinodes.h"
  71 #include "ssa-iterators.h"
  72 #include "tree-ssa-loop-niter.h"
  73 #include "tree-ssa-loop.h"
  74 #include "tree-pass.h"
  75 #include "tree-scalar-evolution.h"
  76 #include "cfgloop.h"
  77
  78 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  79                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  80 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  81              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  82
  83 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  84 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  85 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  86 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  87 static bool can_predict_insn_p (const rtx_insn *);
  88
  89 /* Information we hold about each branch predictor.
  90    Filled using information from predict.def.  */
  91
  92 struct predictor_info
  93 {
  94   const char *const name;       /* Name used in the debugging dumps.  */
  95   const int hitrate;            /* Expected hitrate used by
  96                                    predict_insn_def call.  */
  97   const int flags;
  98 };
  99
 100 /* Use given predictor without Dempster-Shaffer theory if it matches
 101    using first_match heuristics.  */
 102 #define PRED_FLAG_FIRST_MATCH 1
 103
 104 /* Recompute hitrate in percent to our representation.  */
 105
 106 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 107
 108 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 109 static const struct predictor_info predictor_info[]= {
 110 #include "predict.def"
 111
 112   /* Upper bound on predictors.  */
 113   {NULL, 0, 0}
 114 };
 115 #undef DEF_PREDICTOR
 116
 117 /* Return TRUE if frequency FREQ is considered to be hot.  */
 118
 119 static inline bool
 120 maybe_hot_frequency_p (struct function *fun, int freq)
 121 {
 122   struct cgraph_node *node = cgraph_node::get (fun->decl);
 123   if (!profile_info || !flag_branch_probabilities)
 124     {
 125       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 126         return false;
 127       if (node->frequency == NODE_FREQUENCY_HOT)
 128         return true;
 129     }
 130   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 131     return true;
 132   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 133       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 134     return false;
 135   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 136     return false;
 137   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 138               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 139     return false;
 140   return true;
 141 }
 142
 143 static gcov_type min_count = -1;
 144
 145 /* Determine the threshold for hot BB counts.  */
 146
 147 gcov_type
 148 get_hot_bb_threshold ()
 149 {
 150   gcov_working_set_t *ws;
 151   if (min_count == -1)
 152     {
 153       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 154       gcc_assert (ws);
 155       min_count = ws->min_counter;
 156     }
 157   return min_count;
 158 }
 159
 160 /* Set the threshold for hot BB counts.  */
 161
 162 void
 163 set_hot_bb_threshold (gcov_type min)
 164 {
 165   min_count = min;
 166 }
 167
 168 /* Return TRUE if frequency FREQ is considered to be hot.  */
 169
 170 bool
 171 maybe_hot_count_p (struct function *fun, gcov_type count)
 172 {
 173   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 174     return true;
 175   /* Code executed at most once is not hot.  */
 176   if (profile_info->runs >= count)
 177     return false;
 178   return (count >= get_hot_bb_threshold ());
 179 }
 180
 181 /* Return true in case BB can be CPU intensive and should be optimized
 182    for maximal performance.  */
 183
 184 bool
 185 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 186 {
 187   gcc_checking_assert (fun);
 188   if (profile_status_for_fn (fun) == PROFILE_READ)
 189     return maybe_hot_count_p (fun, bb->count);
 190   return maybe_hot_frequency_p (fun, bb->frequency);
 191 }
 192
 193 /* Return true in case BB can be CPU intensive and should be optimized
 194    for maximal performance.  */
 195
 196 bool
 197 maybe_hot_edge_p (edge e)
 198 {
 199   if (profile_status_for_fn (cfun) == PROFILE_READ)
 200     return maybe_hot_count_p (cfun, e->count);
 201   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 202 }
 203
 204 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 205    node frequency reflects never being executed.  */
 206
 207 static bool
 208 probably_never_executed (struct function *fun,
 209                          gcov_type count, int frequency)
 210 {
 211   gcc_checking_assert (fun);
 212   if (profile_status_for_fn (cfun) == PROFILE_READ)
 213     {
 214       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 215       if (count * unlikely_count_fraction >= profile_info->runs)
 216         return false;
 217       if (!frequency)
 218         return true;
 219       if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
 220         return false;
 221       if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
 222         {
 223           gcov_type computed_count;
 224           /* Check for possibility of overflow, in which case entry bb count
 225              is large enough to do the division first without losing much
 226              precision.  */
 227           if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
 228               REG_BR_PROB_BASE)
 229             {
 230               gcov_type scaled_count
 231                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
 232              unlikely_count_fraction;
 233               computed_count = RDIV (scaled_count,
 234                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 235             }
 236           else
 237             {
 238               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
 239                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 240               computed_count *= frequency * unlikely_count_fraction;
 241             }
 242           if (computed_count >= profile_info->runs)
 243             return false;
 244         }
 245       return true;
 246     }
 247   if ((!profile_info || !flag_branch_probabilities)
 248       && (cgraph_node::get (fun->decl)->frequency
 249           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 250     return true;
 251   return false;
 252 }
 253
 254
 255 /* Return true in case BB is probably never executed.  */
 256
 257 bool
 258 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 259 {
 260   return probably_never_executed (fun, bb->count, bb->frequency);
 261 }
 262
 263
 264 /* Return true in case edge E is probably never executed.  */
 265
 266 bool
 267 probably_never_executed_edge_p (struct function *fun, edge e)
 268 {
 269   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 270 }
 271
 272 /* Return true when current function should always be optimized for size.  */
 273
 274 bool
 275 optimize_function_for_size_p (struct function *fun)
 276 {
 277   if (optimize_size)
 278     return true;
 279   if (!fun || !fun->decl)
 280     return false;
 281
 282   cgraph_node *n = cgraph_node::get (fun->decl);
 283   return n && n->optimize_for_size_p ();
 284 }
 285
 286 /* Return true when current function should always be optimized for speed.  */
 287
 288 bool
 289 optimize_function_for_speed_p (struct function *fun)
 290 {
 291   return !optimize_function_for_size_p (fun);
 292 }
 293
 294 /* Return TRUE when BB should be optimized for size.  */
 295
 296 bool
 297 optimize_bb_for_size_p (const_basic_block bb)
 298 {
 299   return (optimize_function_for_size_p (cfun)
 300           || (bb && !maybe_hot_bb_p (cfun, bb)));
 301 }
 302
 303 /* Return TRUE when BB should be optimized for speed.  */
 304
 305 bool
 306 optimize_bb_for_speed_p (const_basic_block bb)
 307 {
 308   return !optimize_bb_for_size_p (bb);
 309 }
 310
 311 /* Return TRUE when BB should be optimized for size.  */
 312
 313 bool
 314 optimize_edge_for_size_p (edge e)
 315 {
 316   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 317 }
 318
 319 /* Return TRUE when BB should be optimized for speed.  */
 320
 321 bool
 322 optimize_edge_for_speed_p (edge e)
 323 {
 324   return !optimize_edge_for_size_p (e);
 325 }
 326
 327 /* Return TRUE when BB should be optimized for size.  */
 328
 329 bool
 330 optimize_insn_for_size_p (void)
 331 {
 332   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 333 }
 334
 335 /* Return TRUE when BB should be optimized for speed.  */
 336
 337 bool
 338 optimize_insn_for_speed_p (void)
 339 {
 340   return !optimize_insn_for_size_p ();
 341 }
 342
 343 /* Return TRUE when LOOP should be optimized for size.  */
 344
 345 bool
 346 optimize_loop_for_size_p (struct loop *loop)
 347 {
 348   return optimize_bb_for_size_p (loop->header);
 349 }
 350
 351 /* Return TRUE when LOOP should be optimized for speed.  */
 352
 353 bool
 354 optimize_loop_for_speed_p (struct loop *loop)
 355 {
 356   return optimize_bb_for_speed_p (loop->header);
 357 }
 358
 359 /* Return TRUE when LOOP nest should be optimized for speed.  */
 360
 361 bool
 362 optimize_loop_nest_for_speed_p (struct loop *loop)
 363 {
 364   struct loop *l = loop;
 365   if (optimize_loop_for_speed_p (loop))
 366     return true;
 367   l = loop->inner;
 368   while (l && l != loop)
 369     {
 370       if (optimize_loop_for_speed_p (l))
 371         return true;
 372       if (l->inner)
 373         l = l->inner;
 374       else if (l->next)
 375         l = l->next;
 376       else
 377         {
 378           while (l != loop && !l->next)
 379             l = loop_outer (l);
 380           if (l != loop)
 381             l = l->next;
 382         }
 383     }
 384   return false;
 385 }
 386
 387 /* Return TRUE when LOOP nest should be optimized for size.  */
 388
 389 bool
 390 optimize_loop_nest_for_size_p (struct loop *loop)
 391 {
 392   return !optimize_loop_nest_for_speed_p (loop);
 393 }
 394
 395 /* Return true when edge E is likely to be well predictable by branch
 396    predictor.  */
 397
 398 bool
 399 predictable_edge_p (edge e)
 400 {
 401   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 402     return false;
 403   if ((e->probability
 404        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 405       || (REG_BR_PROB_BASE - e->probability
 406           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 407     return true;
 408   return false;
 409 }
 410
 411
 412 /* Set RTL expansion for BB profile.  */
 413
 414 void
 415 rtl_profile_for_bb (basic_block bb)
 416 {
 417   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 418 }
 419
 420 /* Set RTL expansion for edge profile.  */
 421
 422 void
 423 rtl_profile_for_edge (edge e)
 424 {
 425   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 426 }
 427
 428 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 429 void
 430 default_rtl_profile (void)
 431 {
 432   crtl->maybe_hot_insn_p = true;
 433 }
 434
 435 /* Return true if the one of outgoing edges is already predicted by
 436    PREDICTOR.  */
 437
 438 bool
 439 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 440 {
 441   rtx note;
 442   if (!INSN_P (BB_END (bb)))
 443     return false;
 444   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 445     if (REG_NOTE_KIND (note) == REG_BR_PRED
 446         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 447       return true;
 448   return false;
 449 }
 450
 451 /*  Structure representing predictions in tree level. */
 452
 453 struct edge_prediction {
 454     struct edge_prediction *ep_next;
 455     edge ep_edge;
 456     enum br_predictor ep_predictor;
 457     int ep_probability;
 458 };
 459
 460 /* This map contains for a basic block the list of predictions for the
 461    outgoing edges.  */
 462
 463 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 464
 465 /* Return true if the one of outgoing edges is already predicted by
 466    PREDICTOR.  */
 467
 468 bool
 469 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 470 {
 471   struct edge_prediction *i;
 472   edge_prediction **preds = bb_predictions->get (bb);
 473
 474   if (!preds)
 475     return false;
 476
 477   for (i = *preds; i; i = i->ep_next)
 478     if (i->ep_predictor == predictor)
 479       return true;
 480   return false;
 481 }
 482
 483 /* Return true when the probability of edge is reliable.
 484
 485    The profile guessing code is good at predicting branch outcome (ie.
 486    taken/not taken), that is predicted right slightly over 75% of time.
 487    It is however notoriously poor on predicting the probability itself.
 488    In general the profile appear a lot flatter (with probabilities closer
 489    to 50%) than the reality so it is bad idea to use it to drive optimization
 490    such as those disabling dynamic branch prediction for well predictable
 491    branches.
 492
 493    There are two exceptions - edges leading to noreturn edges and edges
 494    predicted by number of iterations heuristics are predicted well.  This macro
 495    should be able to distinguish those, but at the moment it simply check for
 496    noreturn heuristic that is only one giving probability over 99% or bellow
 497    1%.  In future we might want to propagate reliability information across the
 498    CFG if we find this information useful on multiple places.   */
 499 static bool
 500 probability_reliable_p (int prob)
 501 {
 502   return (profile_status_for_fn (cfun) == PROFILE_READ
 503           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 504               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 505 }
 506
 507 /* Same predicate as above, working on edges.  */
 508 bool
 509 edge_probability_reliable_p (const_edge e)
 510 {
 511   return probability_reliable_p (e->probability);
 512 }
 513
 514 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 515 bool
 516 br_prob_note_reliable_p (const_rtx note)
 517 {
 518   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 519   return probability_reliable_p (XINT (note, 0));
 520 }
 521
 522 static void
 523 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 524 {
 525   gcc_assert (any_condjump_p (insn));
 526   if (!flag_guess_branch_prob)
 527     return;
 528
 529   add_reg_note (insn, REG_BR_PRED,
 530                 gen_rtx_CONCAT (VOIDmode,
 531                                 GEN_INT ((int) predictor),
 532                                 GEN_INT ((int) probability)));
 533 }
 534
 535 /* Predict insn by given predictor.  */
 536
 537 void
 538 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 539                   enum prediction taken)
 540 {
 541    int probability = predictor_info[(int) predictor].hitrate;
 542
 543    if (taken != TAKEN)
 544      probability = REG_BR_PROB_BASE - probability;
 545
 546    predict_insn (insn, predictor, probability);
 547 }
 548
 549 /* Predict edge E with given probability if possible.  */
 550
 551 void
 552 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 553 {
 554   rtx_insn *last_insn;
 555   last_insn = BB_END (e->src);
 556
 557   /* We can store the branch prediction information only about
 558      conditional jumps.  */
 559   if (!any_condjump_p (last_insn))
 560     return;
 561
 562   /* We always store probability of branching.  */
 563   if (e->flags & EDGE_FALLTHRU)
 564     probability = REG_BR_PROB_BASE - probability;
 565
 566   predict_insn (last_insn, predictor, probability);
 567 }
 568
 569 /* Predict edge E with the given PROBABILITY.  */
 570 void
 571 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 572 {
 573   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 574   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 575        1)
 576       && flag_guess_branch_prob && optimize)
 577     {
 578       struct edge_prediction *i = XNEW (struct edge_prediction);
 579       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 580
 581       i->ep_next = preds;
 582       preds = i;
 583       i->ep_probability = probability;
 584       i->ep_predictor = predictor;
 585       i->ep_edge = e;
 586     }
 587 }
 588
 589 /* Remove all predictions on given basic block that are attached
 590    to edge E.  */
 591 void
 592 remove_predictions_associated_with_edge (edge e)
 593 {
 594   if (!bb_predictions)
 595     return;
 596
 597   edge_prediction **preds = bb_predictions->get (e->src);
 598
 599   if (preds)
 600     {
 601       struct edge_prediction **prediction = preds;
 602       struct edge_prediction *next;
 603
 604       while (*prediction)
 605         {
 606           if ((*prediction)->ep_edge == e)
 607             {
 608               next = (*prediction)->ep_next;
 609               free (*prediction);
 610               *prediction = next;
 611             }
 612           else
 613             prediction = &((*prediction)->ep_next);
 614         }
 615     }
 616 }
 617
 618 /* Clears the list of predictions stored for BB.  */
 619
 620 static void
 621 clear_bb_predictions (basic_block bb)
 622 {
 623   edge_prediction **preds = bb_predictions->get (bb);
 624   struct edge_prediction *pred, *next;
 625
 626   if (!preds)
 627     return;
 628
 629   for (pred = *preds; pred; pred = next)
 630     {
 631       next = pred->ep_next;
 632       free (pred);
 633     }
 634   *preds = NULL;
 635 }
 636
 637 /* Return true when we can store prediction on insn INSN.
 638    At the moment we represent predictions only on conditional
 639    jumps, not at computed jump or other complicated cases.  */
 640 static bool
 641 can_predict_insn_p (const rtx_insn *insn)
 642 {
 643   return (JUMP_P (insn)
 644           && any_condjump_p (insn)
 645           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 646 }
 647
 648 /* Predict edge E by given predictor if possible.  */
 649
 650 void
 651 predict_edge_def (edge e, enum br_predictor predictor,
 652                   enum prediction taken)
 653 {
 654    int probability = predictor_info[(int) predictor].hitrate;
 655
 656    if (taken != TAKEN)
 657      probability = REG_BR_PROB_BASE - probability;
 658
 659    predict_edge (e, predictor, probability);
 660 }
 661
 662 /* Invert all branch predictions or probability notes in the INSN.  This needs
 663    to be done each time we invert the condition used by the jump.  */
 664
 665 void
 666 invert_br_probabilities (rtx insn)
 667 {
 668   rtx note;
 669
 670   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 671     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 672       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 673     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 674       XEXP (XEXP (note, 0), 1)
 675         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 676 }
 677
 678 /* Dump information about the branch prediction to the output file.  */
 679
 680 static void
 681 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 682                  basic_block bb, int used)
 683 {
 684   edge e;
 685   edge_iterator ei;
 686
 687   if (!file)
 688     return;
 689
 690   FOR_EACH_EDGE (e, ei, bb->succs)
 691     if (! (e->flags & EDGE_FALLTHRU))
 692       break;
 693
 694   fprintf (file, "  %s heuristics%s: %.1f%%",
 695            predictor_info[predictor].name,
 696            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 697
 698   if (bb->count)
 699     {
 700       fprintf (file, "  exec %"PRId64, bb->count);
 701       if (e)
 702         {
 703           fprintf (file, " hit %"PRId64, e->count);
 704           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 705         }
 706     }
 707
 708   fprintf (file, "\n");
 709 }
 710
 711 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 712    evenly and hope for the best.  */
 713 static void
 714 set_even_probabilities (basic_block bb)
 715 {
 716   int nedges = 0;
 717   edge e;
 718   edge_iterator ei;
 719
 720   FOR_EACH_EDGE (e, ei, bb->succs)
 721     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 722       nedges ++;
 723   FOR_EACH_EDGE (e, ei, bb->succs)
 724     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 725       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 726     else
 727       e->probability = 0;
 728 }
 729
 730 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 731    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 732
 733 static void
 734 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 735 {
 736   rtx prob_note;
 737   rtx *pnote;
 738   rtx note;
 739   int best_probability = PROB_EVEN;
 740   enum br_predictor best_predictor = END_PREDICTORS;
 741   int combined_probability = REG_BR_PROB_BASE / 2;
 742   int d;
 743   bool first_match = false;
 744   bool found = false;
 745
 746   if (!can_predict_insn_p (insn))
 747     {
 748       set_even_probabilities (bb);
 749       return;
 750     }
 751
 752   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 753   pnote = &REG_NOTES (insn);
 754   if (dump_file)
 755     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 756              bb->index);
 757
 758   /* We implement "first match" heuristics and use probability guessed
 759      by predictor with smallest index.  */
 760   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 761     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 762       {
 763         enum br_predictor predictor = ((enum br_predictor)
 764                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 765         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 766
 767         found = true;
 768         if (best_predictor > predictor)
 769           best_probability = probability, best_predictor = predictor;
 770
 771         d = (combined_probability * probability
 772              + (REG_BR_PROB_BASE - combined_probability)
 773              * (REG_BR_PROB_BASE - probability));
 774
 775         /* Use FP math to avoid overflows of 32bit integers.  */
 776         if (d == 0)
 777           /* If one probability is 0% and one 100%, avoid division by zero.  */
 778           combined_probability = REG_BR_PROB_BASE / 2;
 779         else
 780           combined_probability = (((double) combined_probability) * probability
 781                                   * REG_BR_PROB_BASE / d + 0.5);
 782       }
 783
 784   /* Decide which heuristic to use.  In case we didn't match anything,
 785      use no_prediction heuristic, in case we did match, use either
 786      first match or Dempster-Shaffer theory depending on the flags.  */
 787
 788   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 789     first_match = true;
 790
 791   if (!found)
 792     dump_prediction (dump_file, PRED_NO_PREDICTION,
 793                      combined_probability, bb, true);
 794   else
 795     {
 796       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 797                        bb, !first_match);
 798       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 799                        bb, first_match);
 800     }
 801
 802   if (first_match)
 803     combined_probability = best_probability;
 804   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 805
 806   while (*pnote)
 807     {
 808       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 809         {
 810           enum br_predictor predictor = ((enum br_predictor)
 811                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 812           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 813
 814           dump_prediction (dump_file, predictor, probability, bb,
 815                            !first_match || best_predictor == predictor);
 816           *pnote = XEXP (*pnote, 1);
 817         }
 818       else
 819         pnote = &XEXP (*pnote, 1);
 820     }
 821
 822   if (!prob_note)
 823     {
 824       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 825
 826       /* Save the prediction into CFG in case we are seeing non-degenerated
 827          conditional jump.  */
 828       if (!single_succ_p (bb))
 829         {
 830           BRANCH_EDGE (bb)->probability = combined_probability;
 831           FALLTHRU_EDGE (bb)->probability
 832             = REG_BR_PROB_BASE - combined_probability;
 833         }
 834     }
 835   else if (!single_succ_p (bb))
 836     {
 837       int prob = XINT (prob_note, 0);
 838
 839       BRANCH_EDGE (bb)->probability = prob;
 840       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 841     }
 842   else
 843     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 844 }
 845
 846 /* Combine predictions into single probability and store them into CFG.
 847    Remove now useless prediction entries.  */
 848
 849 static void
 850 combine_predictions_for_bb (basic_block bb)
 851 {
 852   int best_probability = PROB_EVEN;
 853   enum br_predictor best_predictor = END_PREDICTORS;
 854   int combined_probability = REG_BR_PROB_BASE / 2;
 855   int d;
 856   bool first_match = false;
 857   bool found = false;
 858   struct edge_prediction *pred;
 859   int nedges = 0;
 860   edge e, first = NULL, second = NULL;
 861   edge_iterator ei;
 862
 863   FOR_EACH_EDGE (e, ei, bb->succs)
 864     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 865       {
 866         nedges ++;
 867         if (first && !second)
 868           second = e;
 869         if (!first)
 870           first = e;
 871       }
 872
 873   /* When there is no successor or only one choice, prediction is easy.
 874
 875      We are lazy for now and predict only basic blocks with two outgoing
 876      edges.  It is possible to predict generic case too, but we have to
 877      ignore first match heuristics and do more involved combining.  Implement
 878      this later.  */
 879   if (nedges != 2)
 880     {
 881       if (!bb->count)
 882         set_even_probabilities (bb);
 883       clear_bb_predictions (bb);
 884       if (dump_file)
 885         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 886                  nedges, bb->index);
 887       return;
 888     }
 889
 890   if (dump_file)
 891     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 892
 893   edge_prediction **preds = bb_predictions->get (bb);
 894   if (preds)
 895     {
 896       /* We implement "first match" heuristics and use probability guessed
 897          by predictor with smallest index.  */
 898       for (pred = *preds; pred; pred = pred->ep_next)
 899         {
 900           enum br_predictor predictor = pred->ep_predictor;
 901           int probability = pred->ep_probability;
 902
 903           if (pred->ep_edge != first)
 904             probability = REG_BR_PROB_BASE - probability;
 905
 906           found = true;
 907           /* First match heuristics would be widly confused if we predicted
 908              both directions.  */
 909           if (best_predictor > predictor)
 910             {
 911               struct edge_prediction *pred2;
 912               int prob = probability;
 913
 914               for (pred2 = (struct edge_prediction *) *preds;
 915                    pred2; pred2 = pred2->ep_next)
 916                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 917                  {
 918                    int probability2 = pred->ep_probability;
 919
 920                    if (pred2->ep_edge != first)
 921                      probability2 = REG_BR_PROB_BASE - probability2;
 922
 923                    if ((probability < REG_BR_PROB_BASE / 2) !=
 924                        (probability2 < REG_BR_PROB_BASE / 2))
 925                      break;
 926
 927                    /* If the same predictor later gave better result, go for it! */
 928                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 929                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 930                      prob = probability2;
 931                  }
 932               if (!pred2)
 933                 best_probability = prob, best_predictor = predictor;
 934             }
 935
 936           d = (combined_probability * probability
 937                + (REG_BR_PROB_BASE - combined_probability)
 938                * (REG_BR_PROB_BASE - probability));
 939
 940           /* Use FP math to avoid overflows of 32bit integers.  */
 941           if (d == 0)
 942             /* If one probability is 0% and one 100%, avoid division by zero.  */
 943             combined_probability = REG_BR_PROB_BASE / 2;
 944           else
 945             combined_probability = (((double) combined_probability)
 946                                     * probability
 947                                     * REG_BR_PROB_BASE / d + 0.5);
 948         }
 949     }
 950
 951   /* Decide which heuristic to use.  In case we didn't match anything,
 952      use no_prediction heuristic, in case we did match, use either
 953      first match or Dempster-Shaffer theory depending on the flags.  */
 954
 955   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 956     first_match = true;
 957
 958   if (!found)
 959     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 960   else
 961     {
 962       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 963                        !first_match);
 964       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 965                        first_match);
 966     }
 967
 968   if (first_match)
 969     combined_probability = best_probability;
 970   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 971
 972   if (preds)
 973     {
 974       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 975         {
 976           enum br_predictor predictor = pred->ep_predictor;
 977           int probability = pred->ep_probability;
 978
 979           if (pred->ep_edge != EDGE_SUCC (bb, 0))
 980             probability = REG_BR_PROB_BASE - probability;
 981           dump_prediction (dump_file, predictor, probability, bb,
 982                            !first_match || best_predictor == predictor);
 983         }
 984     }
 985   clear_bb_predictions (bb);
 986
 987   if (!bb->count)
 988     {
 989       first->probability = combined_probability;
 990       second->probability = REG_BR_PROB_BASE - combined_probability;
 991     }
 992 }
 993
 994 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
 995    Return the SSA_NAME if the condition satisfies, NULL otherwise.
 996
 997    T1 and T2 should be one of the following cases:
 998      1. T1 is SSA_NAME, T2 is NULL
 999      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1000      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1001
1002 static tree
1003 strips_small_constant (tree t1, tree t2)
1004 {
1005   tree ret = NULL;
1006   int value = 0;
1007
1008   if (!t1)
1009     return NULL;
1010   else if (TREE_CODE (t1) == SSA_NAME)
1011     ret = t1;
1012   else if (tree_fits_shwi_p (t1))
1013     value = tree_to_shwi (t1);
1014   else
1015     return NULL;
1016
1017   if (!t2)
1018     return ret;
1019   else if (tree_fits_shwi_p (t2))
1020     value = tree_to_shwi (t2);
1021   else if (TREE_CODE (t2) == SSA_NAME)
1022     {
1023       if (ret)
1024         return NULL;
1025       else
1026         ret = t2;
1027     }
1028
1029   if (value <= 4 && value >= -4)
1030     return ret;
1031   else
1032     return NULL;
1033 }
1034
1035 /* Return the SSA_NAME in T or T's operands.
1036    Return NULL if SSA_NAME cannot be found.  */
1037
1038 static tree
1039 get_base_value (tree t)
1040 {
1041   if (TREE_CODE (t) == SSA_NAME)
1042     return t;
1043
1044   if (!BINARY_CLASS_P (t))
1045     return NULL;
1046
1047   switch (TREE_OPERAND_LENGTH (t))
1048     {
1049     case 1:
1050       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1051     case 2:
1052       return strips_small_constant (TREE_OPERAND (t, 0),
1053                                     TREE_OPERAND (t, 1));
1054     default:
1055       return NULL;
1056     }
1057 }
1058
1059 /* Check the compare STMT in LOOP. If it compares an induction
1060    variable to a loop invariant, return true, and save
1061    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1062    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1063
1064 static bool
1065 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1066                                      tree *loop_invariant,
1067                                      enum tree_code *compare_code,
1068                                      tree *loop_step,
1069                                      tree *loop_iv_base)
1070 {
1071   tree op0, op1, bound, base;
1072   affine_iv iv0, iv1;
1073   enum tree_code code;
1074   tree step;
1075
1076   code = gimple_cond_code (stmt);
1077   *loop_invariant = NULL;
1078
1079   switch (code)
1080     {
1081     case GT_EXPR:
1082     case GE_EXPR:
1083     case NE_EXPR:
1084     case LT_EXPR:
1085     case LE_EXPR:
1086     case EQ_EXPR:
1087       break;
1088
1089     default:
1090       return false;
1091     }
1092
1093   op0 = gimple_cond_lhs (stmt);
1094   op1 = gimple_cond_rhs (stmt);
1095
1096   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1097        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1098     return false;
1099   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1100     return false;
1101   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1102     return false;
1103   if (TREE_CODE (iv0.step) != INTEGER_CST
1104       || TREE_CODE (iv1.step) != INTEGER_CST)
1105     return false;
1106   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1107       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1108     return false;
1109
1110   if (integer_zerop (iv0.step))
1111     {
1112       if (code != NE_EXPR && code != EQ_EXPR)
1113         code = invert_tree_comparison (code, false);
1114       bound = iv0.base;
1115       base = iv1.base;
1116       if (tree_fits_shwi_p (iv1.step))
1117         step = iv1.step;
1118       else
1119         return false;
1120     }
1121   else
1122     {
1123       bound = iv1.base;
1124       base = iv0.base;
1125       if (tree_fits_shwi_p (iv0.step))
1126         step = iv0.step;
1127       else
1128         return false;
1129     }
1130
1131   if (TREE_CODE (bound) != INTEGER_CST)
1132     bound = get_base_value (bound);
1133   if (!bound)
1134     return false;
1135   if (TREE_CODE (base) != INTEGER_CST)
1136     base = get_base_value (base);
1137   if (!base)
1138     return false;
1139
1140   *loop_invariant = bound;
1141   *compare_code = code;
1142   *loop_step = step;
1143   *loop_iv_base = base;
1144   return true;
1145 }
1146
1147 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1148
1149 static bool
1150 expr_coherent_p (tree t1, tree t2)
1151 {
1152   gimple stmt;
1153   tree ssa_name_1 = NULL;
1154   tree ssa_name_2 = NULL;
1155
1156   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1157   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1158
1159   if (t1 == t2)
1160     return true;
1161
1162   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1163     return true;
1164   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1165     return false;
1166
1167   /* Check to see if t1 is expressed/defined with t2.  */
1168   stmt = SSA_NAME_DEF_STMT (t1);
1169   gcc_assert (stmt != NULL);
1170   if (is_gimple_assign (stmt))
1171     {
1172       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1173       if (ssa_name_1 && ssa_name_1 == t2)
1174         return true;
1175     }
1176
1177   /* Check to see if t2 is expressed/defined with t1.  */
1178   stmt = SSA_NAME_DEF_STMT (t2);
1179   gcc_assert (stmt != NULL);
1180   if (is_gimple_assign (stmt))
1181     {
1182       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1183       if (ssa_name_2 && ssa_name_2 == t1)
1184         return true;
1185     }
1186
1187   /* Compare if t1 and t2's def_stmts are identical.  */
1188   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1189     return true;
1190   else
1191     return false;
1192 }
1193
1194 /* Predict branch probability of BB when BB contains a branch that compares
1195    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1196    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1197
1198    E.g.
1199      for (int i = 0; i < bound; i++) {
1200        if (i < bound - 2)
1201          computation_1();
1202        else
1203          computation_2();
1204      }
1205
1206   In this loop, we will predict the branch inside the loop to be taken.  */
1207
1208 static void
1209 predict_iv_comparison (struct loop *loop, basic_block bb,
1210                        tree loop_bound_var,
1211                        tree loop_iv_base_var,
1212                        enum tree_code loop_bound_code,
1213                        int loop_bound_step)
1214 {
1215   gimple stmt;
1216   tree compare_var, compare_base;
1217   enum tree_code compare_code;
1218   tree compare_step_var;
1219   edge then_edge;
1220   edge_iterator ei;
1221
1222   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1223       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1224       || predicted_by_p (bb, PRED_LOOP_EXIT))
1225     return;
1226
1227   stmt = last_stmt (bb);
1228   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1229     return;
1230   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1231                                             &compare_code,
1232                                             &compare_step_var,
1233                                             &compare_base))
1234     return;
1235
1236   /* Find the taken edge.  */
1237   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1238     if (then_edge->flags & EDGE_TRUE_VALUE)
1239       break;
1240
1241   /* When comparing an IV to a loop invariant, NE is more likely to be
1242      taken while EQ is more likely to be not-taken.  */
1243   if (compare_code == NE_EXPR)
1244     {
1245       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1246       return;
1247     }
1248   else if (compare_code == EQ_EXPR)
1249     {
1250       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1251       return;
1252     }
1253
1254   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1255     return;
1256
1257   /* If loop bound, base and compare bound are all constants, we can
1258      calculate the probability directly.  */
1259   if (tree_fits_shwi_p (loop_bound_var)
1260       && tree_fits_shwi_p (compare_var)
1261       && tree_fits_shwi_p (compare_base))
1262     {
1263       int probability;
1264       bool overflow, overall_overflow = false;
1265       widest_int compare_count, tem;
1266
1267       /* (loop_bound - base) / compare_step */
1268       tem = wi::sub (wi::to_widest (loop_bound_var),
1269                      wi::to_widest (compare_base), SIGNED, &overflow);
1270       overall_overflow |= overflow;
1271       widest_int loop_count = wi::div_trunc (tem,
1272                                              wi::to_widest (compare_step_var),
1273                                              SIGNED, &overflow);
1274       overall_overflow |= overflow;
1275
1276       if (!wi::neg_p (wi::to_widest (compare_step_var))
1277           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1278         {
1279           /* (loop_bound - compare_bound) / compare_step */
1280           tem = wi::sub (wi::to_widest (loop_bound_var),
1281                          wi::to_widest (compare_var), SIGNED, &overflow);
1282           overall_overflow |= overflow;
1283           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1284                                          SIGNED, &overflow);
1285           overall_overflow |= overflow;
1286         }
1287       else
1288         {
1289           /* (compare_bound - base) / compare_step */
1290           tem = wi::sub (wi::to_widest (compare_var),
1291                          wi::to_widest (compare_base), SIGNED, &overflow);
1292           overall_overflow |= overflow;
1293           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1294                                          SIGNED, &overflow);
1295           overall_overflow |= overflow;
1296         }
1297       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1298         ++compare_count;
1299       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1300         ++loop_count;
1301       if (wi::neg_p (compare_count))
1302         compare_count = 0;
1303       if (wi::neg_p (loop_count))
1304         loop_count = 0;
1305       if (loop_count == 0)
1306         probability = 0;
1307       else if (wi::cmps (compare_count, loop_count) == 1)
1308         probability = REG_BR_PROB_BASE;
1309       else
1310         {
1311           tem = compare_count * REG_BR_PROB_BASE;
1312           tem = wi::udiv_trunc (tem, loop_count);
1313           probability = tem.to_uhwi ();
1314         }
1315
1316       if (!overall_overflow)
1317         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1318
1319       return;
1320     }
1321
1322   if (expr_coherent_p (loop_bound_var, compare_var))
1323     {
1324       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1325           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1326         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1327       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1328                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1329         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1330       else if (loop_bound_code == NE_EXPR)
1331         {
1332           /* If the loop backedge condition is "(i != bound)", we do
1333              the comparison based on the step of IV:
1334              * step < 0 : backedge condition is like (i > bound)
1335              * step > 0 : backedge condition is like (i < bound)  */
1336           gcc_assert (loop_bound_step != 0);
1337           if (loop_bound_step > 0
1338               && (compare_code == LT_EXPR
1339                   || compare_code == LE_EXPR))
1340             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1341           else if (loop_bound_step < 0
1342                    && (compare_code == GT_EXPR
1343                        || compare_code == GE_EXPR))
1344             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345           else
1346             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1347         }
1348       else
1349         /* The branch is predicted not-taken if loop_bound_code is
1350            opposite with compare_code.  */
1351         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1352     }
1353   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1354     {
1355       /* For cases like:
1356            for (i = s; i < h; i++)
1357              if (i > s + 2) ....
1358          The branch should be predicted taken.  */
1359       if (loop_bound_step > 0
1360           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1361         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1362       else if (loop_bound_step < 0
1363                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1364         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1365       else
1366         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1367     }
1368 }
1369
1370 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1371    exits are resulted from short-circuit conditions that will generate an
1372    if_tmp. E.g.:
1373
1374    if (foo() || global > 10)
1375      break;
1376
1377    This will be translated into:
1378
1379    BB3:
1380      loop header...
1381    BB4:
1382      if foo() goto BB6 else goto BB5
1383    BB5:
1384      if global > 10 goto BB6 else goto BB7
1385    BB6:
1386      goto BB7
1387    BB7:
1388      iftmp = (PHI 0(BB5), 1(BB6))
1389      if iftmp == 1 goto BB8 else goto BB3
1390    BB8:
1391      outside of the loop...
1392
1393    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1394    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1395    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1396    exits to predict them using PRED_LOOP_EXIT.  */
1397
1398 static void
1399 predict_extra_loop_exits (edge exit_edge)
1400 {
1401   unsigned i;
1402   bool check_value_one;
1403   gimple phi_stmt;
1404   tree cmp_rhs, cmp_lhs;
1405   gimple cmp_stmt = last_stmt (exit_edge->src);
1406
1407   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1408     return;
1409   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1410   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1411   if (!TREE_CONSTANT (cmp_rhs)
1412       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1413     return;
1414   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1415     return;
1416
1417   /* If check_value_one is true, only the phi_args with value '1' will lead
1418      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1419      loop exit.  */
1420   check_value_one = (((integer_onep (cmp_rhs))
1421                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1422                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1423
1424   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1425   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1426     return;
1427
1428   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1429     {
1430       edge e1;
1431       edge_iterator ei;
1432       tree val = gimple_phi_arg_def (phi_stmt, i);
1433       edge e = gimple_phi_arg_edge (phi_stmt, i);
1434
1435       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1436         continue;
1437       if ((check_value_one ^ integer_onep (val)) == 1)
1438         continue;
1439       if (EDGE_COUNT (e->src->succs) != 1)
1440         {
1441           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1442           continue;
1443         }
1444
1445       FOR_EACH_EDGE (e1, ei, e->src->preds)
1446         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1447     }
1448 }
1449
1450 /* Predict edge probabilities by exploiting loop structure.  */
1451
1452 static void
1453 predict_loops (void)
1454 {
1455   struct loop *loop;
1456
1457   /* Try to predict out blocks in a loop that are not part of a
1458      natural loop.  */
1459   FOR_EACH_LOOP (loop, 0)
1460     {
1461       basic_block bb, *bbs;
1462       unsigned j, n_exits;
1463       vec<edge> exits;
1464       struct tree_niter_desc niter_desc;
1465       edge ex;
1466       struct nb_iter_bound *nb_iter;
1467       enum tree_code loop_bound_code = ERROR_MARK;
1468       tree loop_bound_step = NULL;
1469       tree loop_bound_var = NULL;
1470       tree loop_iv_base = NULL;
1471       gimple stmt = NULL;
1472
1473       exits = get_loop_exit_edges (loop);
1474       n_exits = exits.length ();
1475       if (!n_exits)
1476         {
1477           exits.release ();
1478           continue;
1479         }
1480
1481       FOR_EACH_VEC_ELT (exits, j, ex)
1482         {
1483           tree niter = NULL;
1484           HOST_WIDE_INT nitercst;
1485           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1486           int probability;
1487           enum br_predictor predictor;
1488
1489           predict_extra_loop_exits (ex);
1490
1491           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1492             niter = niter_desc.niter;
1493           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1494             niter = loop_niter_by_eval (loop, ex);
1495
1496           if (TREE_CODE (niter) == INTEGER_CST)
1497             {
1498               if (tree_fits_uhwi_p (niter)
1499                   && max
1500                   && compare_tree_int (niter, max - 1) == -1)
1501                 nitercst = tree_to_uhwi (niter) + 1;
1502               else
1503                 nitercst = max;
1504               predictor = PRED_LOOP_ITERATIONS;
1505             }
1506           /* If we have just one exit and we can derive some information about
1507              the number of iterations of the loop from the statements inside
1508              the loop, use it to predict this exit.  */
1509           else if (n_exits == 1)
1510             {
1511               nitercst = estimated_stmt_executions_int (loop);
1512               if (nitercst < 0)
1513                 continue;
1514               if (nitercst > max)
1515                 nitercst = max;
1516
1517               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1518             }
1519           else
1520             continue;
1521
1522           /* If the prediction for number of iterations is zero, do not
1523              predict the exit edges.  */
1524           if (nitercst == 0)
1525             continue;
1526
1527           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1528           predict_edge (ex, predictor, probability);
1529         }
1530       exits.release ();
1531
1532       /* Find information about loop bound variables.  */
1533       for (nb_iter = loop->bounds; nb_iter;
1534            nb_iter = nb_iter->next)
1535         if (nb_iter->stmt
1536             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1537           {
1538             stmt = nb_iter->stmt;
1539             break;
1540           }
1541       if (!stmt && last_stmt (loop->header)
1542           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1543         stmt = last_stmt (loop->header);
1544       if (stmt)
1545         is_comparison_with_loop_invariant_p (stmt, loop,
1546                                              &loop_bound_var,
1547                                              &loop_bound_code,
1548                                              &loop_bound_step,
1549                                              &loop_iv_base);
1550
1551       bbs = get_loop_body (loop);
1552
1553       for (j = 0; j < loop->num_nodes; j++)
1554         {
1555           int header_found = 0;
1556           edge e;
1557           edge_iterator ei;
1558
1559           bb = bbs[j];
1560
1561           /* Bypass loop heuristics on continue statement.  These
1562              statements construct loops via "non-loop" constructs
1563              in the source language and are better to be handled
1564              separately.  */
1565           if (predicted_by_p (bb, PRED_CONTINUE))
1566             continue;
1567
1568           /* Loop branch heuristics - predict an edge back to a
1569              loop's head as taken.  */
1570           if (bb == loop->latch)
1571             {
1572               e = find_edge (loop->latch, loop->header);
1573               if (e)
1574                 {
1575                   header_found = 1;
1576                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1577                 }
1578             }
1579
1580           /* Loop exit heuristics - predict an edge exiting the loop if the
1581              conditional has no loop header successors as not taken.  */
1582           if (!header_found
1583               /* If we already used more reliable loop exit predictors, do not
1584                  bother with PRED_LOOP_EXIT.  */
1585               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1586               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1587             {
1588               /* For loop with many exits we don't want to predict all exits
1589                  with the pretty large probability, because if all exits are
1590                  considered in row, the loop would be predicted to iterate
1591                  almost never.  The code to divide probability by number of
1592                  exits is very rough.  It should compute the number of exits
1593                  taken in each patch through function (not the overall number
1594                  of exits that might be a lot higher for loops with wide switch
1595                  statements in them) and compute n-th square root.
1596
1597                  We limit the minimal probability by 2% to avoid
1598                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1599                  as this was causing regression in perl benchmark containing such
1600                  a wide loop.  */
1601
1602               int probability = ((REG_BR_PROB_BASE
1603                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1604                                  / n_exits);
1605               if (probability < HITRATE (2))
1606                 probability = HITRATE (2);
1607               FOR_EACH_EDGE (e, ei, bb->succs)
1608                 if (e->dest->index < NUM_FIXED_BLOCKS
1609                     || !flow_bb_inside_loop_p (loop, e->dest))
1610                   predict_edge (e, PRED_LOOP_EXIT, probability);
1611             }
1612           if (loop_bound_var)
1613             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1614                                    loop_bound_code,
1615                                    tree_to_shwi (loop_bound_step));
1616         }
1617
1618       /* Free basic blocks from get_loop_body.  */
1619       free (bbs);
1620     }
1621 }
1622
1623 /* Attempt to predict probabilities of BB outgoing edges using local
1624    properties.  */
1625 static void
1626 bb_estimate_probability_locally (basic_block bb)
1627 {
1628   rtx_insn *last_insn = BB_END (bb);
1629   rtx cond;
1630
1631   if (! can_predict_insn_p (last_insn))
1632     return;
1633   cond = get_condition (last_insn, NULL, false, false);
1634   if (! cond)
1635     return;
1636
1637   /* Try "pointer heuristic."
1638      A comparison ptr == 0 is predicted as false.
1639      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1640   if (COMPARISON_P (cond)
1641       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1642           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1643     {
1644       if (GET_CODE (cond) == EQ)
1645         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1646       else if (GET_CODE (cond) == NE)
1647         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1648     }
1649   else
1650
1651   /* Try "opcode heuristic."
1652      EQ tests are usually false and NE tests are usually true. Also,
1653      most quantities are positive, so we can make the appropriate guesses
1654      about signed comparisons against zero.  */
1655     switch (GET_CODE (cond))
1656       {
1657       case CONST_INT:
1658         /* Unconditional branch.  */
1659         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1660                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1661         break;
1662
1663       case EQ:
1664       case UNEQ:
1665         /* Floating point comparisons appears to behave in a very
1666            unpredictable way because of special role of = tests in
1667            FP code.  */
1668         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1669           ;
1670         /* Comparisons with 0 are often used for booleans and there is
1671            nothing useful to predict about them.  */
1672         else if (XEXP (cond, 1) == const0_rtx
1673                  || XEXP (cond, 0) == const0_rtx)
1674           ;
1675         else
1676           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1677         break;
1678
1679       case NE:
1680       case LTGT:
1681         /* Floating point comparisons appears to behave in a very
1682            unpredictable way because of special role of = tests in
1683            FP code.  */
1684         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1685           ;
1686         /* Comparisons with 0 are often used for booleans and there is
1687            nothing useful to predict about them.  */
1688         else if (XEXP (cond, 1) == const0_rtx
1689                  || XEXP (cond, 0) == const0_rtx)
1690           ;
1691         else
1692           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1693         break;
1694
1695       case ORDERED:
1696         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1697         break;
1698
1699       case UNORDERED:
1700         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1701         break;
1702
1703       case LE:
1704       case LT:
1705         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1706             || XEXP (cond, 1) == constm1_rtx)
1707           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1708         break;
1709
1710       case GE:
1711       case GT:
1712         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1713             || XEXP (cond, 1) == constm1_rtx)
1714           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1715         break;
1716
1717       default:
1718         break;
1719       }
1720 }
1721
1722 /* Set edge->probability for each successor edge of BB.  */
1723 void
1724 guess_outgoing_edge_probabilities (basic_block bb)
1725 {
1726   bb_estimate_probability_locally (bb);
1727   combine_predictions_for_insn (BB_END (bb), bb);
1728 }
1729 \f
1730 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1731
1732 /* Helper function for expr_expected_value.  */
1733
1734 static tree
1735 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1736                        tree op1, bitmap visited, enum br_predictor *predictor)
1737 {
1738   gimple def;
1739
1740   if (predictor)
1741     *predictor = PRED_UNCONDITIONAL;
1742
1743   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1744     {
1745       if (TREE_CONSTANT (op0))
1746         return op0;
1747
1748       if (code != SSA_NAME)
1749         return NULL_TREE;
1750
1751       def = SSA_NAME_DEF_STMT (op0);
1752
1753       /* If we were already here, break the infinite cycle.  */
1754       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1755         return NULL;
1756
1757       if (gimple_code (def) == GIMPLE_PHI)
1758         {
1759           /* All the arguments of the PHI node must have the same constant
1760              length.  */
1761           int i, n = gimple_phi_num_args (def);
1762           tree val = NULL, new_val;
1763
1764           for (i = 0; i < n; i++)
1765             {
1766               tree arg = PHI_ARG_DEF (def, i);
1767               enum br_predictor predictor2;
1768
1769               /* If this PHI has itself as an argument, we cannot
1770                  determine the string length of this argument.  However,
1771                  if we can find an expected constant value for the other
1772                  PHI args then we can still be sure that this is
1773                  likely a constant.  So be optimistic and just
1774                  continue with the next argument.  */
1775               if (arg == PHI_RESULT (def))
1776                 continue;
1777
1778               new_val = expr_expected_value (arg, visited, &predictor2);
1779
1780               /* It is difficult to combine value predictors.  Simply assume
1781                  that later predictor is weaker and take its prediction.  */
1782               if (predictor && *predictor < predictor2)
1783                 *predictor = predictor2;
1784               if (!new_val)
1785                 return NULL;
1786               if (!val)
1787                 val = new_val;
1788               else if (!operand_equal_p (val, new_val, false))
1789                 return NULL;
1790             }
1791           return val;
1792         }
1793       if (is_gimple_assign (def))
1794         {
1795           if (gimple_assign_lhs (def) != op0)
1796             return NULL;
1797
1798           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1799                                         gimple_assign_rhs1 (def),
1800                                         gimple_assign_rhs_code (def),
1801                                         gimple_assign_rhs2 (def),
1802                                         visited, predictor);
1803         }
1804
1805       if (is_gimple_call (def))
1806         {
1807           tree decl = gimple_call_fndecl (def);
1808           if (!decl)
1809             {
1810               if (gimple_call_internal_p (def)
1811                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1812                 {
1813                   gcc_assert (gimple_call_num_args (def) == 3);
1814                   tree val = gimple_call_arg (def, 0);
1815                   if (TREE_CONSTANT (val))
1816                     return val;
1817                   if (predictor)
1818                     {
1819                       tree val2 = gimple_call_arg (def, 2);
1820                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1821                                   && tree_fits_uhwi_p (val2)
1822                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1823                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1824                     }
1825                   return gimple_call_arg (def, 1);
1826                 }
1827               return NULL;
1828             }
1829           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1830             switch (DECL_FUNCTION_CODE (decl))
1831               {
1832               case BUILT_IN_EXPECT:
1833                 {
1834                   tree val;
1835                   if (gimple_call_num_args (def) != 2)
1836                     return NULL;
1837                   val = gimple_call_arg (def, 0);
1838                   if (TREE_CONSTANT (val))
1839                     return val;
1840                   if (predictor)
1841                     *predictor = PRED_BUILTIN_EXPECT;
1842                   return gimple_call_arg (def, 1);
1843                 }
1844
1845               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1846               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1847               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1848               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1849               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1850               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1851               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1852               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1853               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1854               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1855               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1856               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1857               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1858                 /* Assume that any given atomic operation has low contention,
1859                    and thus the compare-and-swap operation succeeds.  */
1860                 if (predictor)
1861                   *predictor = PRED_COMPARE_AND_SWAP;
1862                 return boolean_true_node;
1863               default:
1864                 break;
1865             }
1866         }
1867
1868       return NULL;
1869     }
1870
1871   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1872     {
1873       tree res;
1874       enum br_predictor predictor2;
1875       op0 = expr_expected_value (op0, visited, predictor);
1876       if (!op0)
1877         return NULL;
1878       op1 = expr_expected_value (op1, visited, &predictor2);
1879       if (predictor && *predictor < predictor2)
1880         *predictor = predictor2;
1881       if (!op1)
1882         return NULL;
1883       res = fold_build2 (code, type, op0, op1);
1884       if (TREE_CONSTANT (res))
1885         return res;
1886       return NULL;
1887     }
1888   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1889     {
1890       tree res;
1891       op0 = expr_expected_value (op0, visited, predictor);
1892       if (!op0)
1893         return NULL;
1894       res = fold_build1 (code, type, op0);
1895       if (TREE_CONSTANT (res))
1896         return res;
1897       return NULL;
1898     }
1899   return NULL;
1900 }
1901
1902 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1903    The function is used by builtin_expect branch predictor so the evidence
1904    must come from this construct and additional possible constant folding.
1905
1906    We may want to implement more involved value guess (such as value range
1907    propagation based prediction), but such tricks shall go to new
1908    implementation.  */
1909
1910 static tree
1911 expr_expected_value (tree expr, bitmap visited,
1912                      enum br_predictor *predictor)
1913 {
1914   enum tree_code code;
1915   tree op0, op1;
1916
1917   if (TREE_CONSTANT (expr))
1918     {
1919       if (predictor)
1920         *predictor = PRED_UNCONDITIONAL;
1921       return expr;
1922     }
1923
1924   extract_ops_from_tree (expr, &code, &op0, &op1);
1925   return expr_expected_value_1 (TREE_TYPE (expr),
1926                                 op0, code, op1, visited, predictor);
1927 }
1928 \f
1929 /* Predict using opcode of the last statement in basic block.  */
1930 static void
1931 tree_predict_by_opcode (basic_block bb)
1932 {
1933   gimple stmt = last_stmt (bb);
1934   edge then_edge;
1935   tree op0, op1;
1936   tree type;
1937   tree val;
1938   enum tree_code cmp;
1939   bitmap visited;
1940   edge_iterator ei;
1941   enum br_predictor predictor;
1942
1943   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1944     return;
1945   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1946     if (then_edge->flags & EDGE_TRUE_VALUE)
1947       break;
1948   op0 = gimple_cond_lhs (stmt);
1949   op1 = gimple_cond_rhs (stmt);
1950   cmp = gimple_cond_code (stmt);
1951   type = TREE_TYPE (op0);
1952   visited = BITMAP_ALLOC (NULL);
1953   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1954                                &predictor);
1955   BITMAP_FREE (visited);
1956   if (val && TREE_CODE (val) == INTEGER_CST)
1957     {
1958       if (predictor == PRED_BUILTIN_EXPECT)
1959         {
1960           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1961
1962           gcc_assert (percent >= 0 && percent <= 100);
1963           if (integer_zerop (val))
1964             percent = 100 - percent;
1965           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1966         }
1967       else
1968         predict_edge (then_edge, predictor,
1969                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
1970     }
1971   /* Try "pointer heuristic."
1972      A comparison ptr == 0 is predicted as false.
1973      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1974   if (POINTER_TYPE_P (type))
1975     {
1976       if (cmp == EQ_EXPR)
1977         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1978       else if (cmp == NE_EXPR)
1979         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1980     }
1981   else
1982
1983   /* Try "opcode heuristic."
1984      EQ tests are usually false and NE tests are usually true. Also,
1985      most quantities are positive, so we can make the appropriate guesses
1986      about signed comparisons against zero.  */
1987     switch (cmp)
1988       {
1989       case EQ_EXPR:
1990       case UNEQ_EXPR:
1991         /* Floating point comparisons appears to behave in a very
1992            unpredictable way because of special role of = tests in
1993            FP code.  */
1994         if (FLOAT_TYPE_P (type))
1995           ;
1996         /* Comparisons with 0 are often used for booleans and there is
1997            nothing useful to predict about them.  */
1998         else if (integer_zerop (op0) || integer_zerop (op1))
1999           ;
2000         else
2001           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2002         break;
2003
2004       case NE_EXPR:
2005       case LTGT_EXPR:
2006         /* Floating point comparisons appears to behave in a very
2007            unpredictable way because of special role of = tests in
2008            FP code.  */
2009         if (FLOAT_TYPE_P (type))
2010           ;
2011         /* Comparisons with 0 are often used for booleans and there is
2012            nothing useful to predict about them.  */
2013         else if (integer_zerop (op0)
2014                  || integer_zerop (op1))
2015           ;
2016         else
2017           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2018         break;
2019
2020       case ORDERED_EXPR:
2021         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2022         break;
2023
2024       case UNORDERED_EXPR:
2025         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2026         break;
2027
2028       case LE_EXPR:
2029       case LT_EXPR:
2030         if (integer_zerop (op1)
2031             || integer_onep (op1)
2032             || integer_all_onesp (op1)
2033             || real_zerop (op1)
2034             || real_onep (op1)
2035             || real_minus_onep (op1))
2036           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2037         break;
2038
2039       case GE_EXPR:
2040       case GT_EXPR:
2041         if (integer_zerop (op1)
2042             || integer_onep (op1)
2043             || integer_all_onesp (op1)
2044             || real_zerop (op1)
2045             || real_onep (op1)
2046             || real_minus_onep (op1))
2047           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2048         break;
2049
2050       default:
2051         break;
2052       }
2053 }
2054
2055 /* Try to guess whether the value of return means error code.  */
2056
2057 static enum br_predictor
2058 return_prediction (tree val, enum prediction *prediction)
2059 {
2060   /* VOID.  */
2061   if (!val)
2062     return PRED_NO_PREDICTION;
2063   /* Different heuristics for pointers and scalars.  */
2064   if (POINTER_TYPE_P (TREE_TYPE (val)))
2065     {
2066       /* NULL is usually not returned.  */
2067       if (integer_zerop (val))
2068         {
2069           *prediction = NOT_TAKEN;
2070           return PRED_NULL_RETURN;
2071         }
2072     }
2073   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2074     {
2075       /* Negative return values are often used to indicate
2076          errors.  */
2077       if (TREE_CODE (val) == INTEGER_CST
2078           && tree_int_cst_sgn (val) < 0)
2079         {
2080           *prediction = NOT_TAKEN;
2081           return PRED_NEGATIVE_RETURN;
2082         }
2083       /* Constant return values seems to be commonly taken.
2084          Zero/one often represent booleans so exclude them from the
2085          heuristics.  */
2086       if (TREE_CONSTANT (val)
2087           && (!integer_zerop (val) && !integer_onep (val)))
2088         {
2089           *prediction = TAKEN;
2090           return PRED_CONST_RETURN;
2091         }
2092     }
2093   return PRED_NO_PREDICTION;
2094 }
2095
2096 /* Find the basic block with return expression and look up for possible
2097    return value trying to apply RETURN_PREDICTION heuristics.  */
2098 static void
2099 apply_return_prediction (void)
2100 {
2101   gimple return_stmt = NULL;
2102   tree return_val;
2103   edge e;
2104   gimple phi;
2105   int phi_num_args, i;
2106   enum br_predictor pred;
2107   enum prediction direction;
2108   edge_iterator ei;
2109
2110   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2111     {
2112       return_stmt = last_stmt (e->src);
2113       if (return_stmt
2114           && gimple_code (return_stmt) == GIMPLE_RETURN)
2115         break;
2116     }
2117   if (!e)
2118     return;
2119   return_val = gimple_return_retval (return_stmt);
2120   if (!return_val)
2121     return;
2122   if (TREE_CODE (return_val) != SSA_NAME
2123       || !SSA_NAME_DEF_STMT (return_val)
2124       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2125     return;
2126   phi = SSA_NAME_DEF_STMT (return_val);
2127   phi_num_args = gimple_phi_num_args (phi);
2128   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2129
2130   /* Avoid the degenerate case where all return values form the function
2131      belongs to same category (ie they are all positive constants)
2132      so we can hardly say something about them.  */
2133   for (i = 1; i < phi_num_args; i++)
2134     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2135       break;
2136   if (i != phi_num_args)
2137     for (i = 0; i < phi_num_args; i++)
2138       {
2139         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2140         if (pred != PRED_NO_PREDICTION)
2141           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2142                                          direction);
2143       }
2144 }
2145
2146 /* Look for basic block that contains unlikely to happen events
2147    (such as noreturn calls) and mark all paths leading to execution
2148    of this basic blocks as unlikely.  */
2149
2150 static void
2151 tree_bb_level_predictions (void)
2152 {
2153   basic_block bb;
2154   bool has_return_edges = false;
2155   edge e;
2156   edge_iterator ei;
2157
2158   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2159     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2160       {
2161         has_return_edges = true;
2162         break;
2163       }
2164
2165   apply_return_prediction ();
2166
2167   FOR_EACH_BB_FN (bb, cfun)
2168     {
2169       gimple_stmt_iterator gsi;
2170
2171       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2172         {
2173           gimple stmt = gsi_stmt (gsi);
2174           tree decl;
2175
2176           if (is_gimple_call (stmt))
2177             {
2178               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2179                   && has_return_edges)
2180                 predict_paths_leading_to (bb, PRED_NORETURN,
2181                                           NOT_TAKEN);
2182               decl = gimple_call_fndecl (stmt);
2183               if (decl
2184                   && lookup_attribute ("cold",
2185                                        DECL_ATTRIBUTES (decl)))
2186                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2187                                           NOT_TAKEN);
2188             }
2189           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2190             {
2191               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2192                                         gimple_predict_outcome (stmt));
2193               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2194                  hints to callers.  */
2195             }
2196         }
2197     }
2198 }
2199
2200 #ifdef ENABLE_CHECKING
2201
2202 /* Callback for hash_map::traverse, asserts that the pointer map is
2203    empty.  */
2204
2205 bool
2206 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2207                  void *)
2208 {
2209   gcc_assert (!value);
2210   return false;
2211 }
2212 #endif
2213
2214 /* Predict branch probabilities and estimate profile for basic block BB.  */
2215
2216 static void
2217 tree_estimate_probability_bb (basic_block bb)
2218 {
2219   edge e;
2220   edge_iterator ei;
2221   gimple last;
2222
2223   FOR_EACH_EDGE (e, ei, bb->succs)
2224     {
2225       /* Predict edges to user labels with attributes.  */
2226       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2227         {
2228           gimple_stmt_iterator gi;
2229           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2230             {
2231               gimple stmt = gsi_stmt (gi);
2232               tree decl;
2233
2234               if (gimple_code (stmt) != GIMPLE_LABEL)
2235                 break;
2236               decl = gimple_label_label (stmt);
2237               if (DECL_ARTIFICIAL (decl))
2238                 continue;
2239
2240               /* Finally, we have a user-defined label.  */
2241               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2242                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2243               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2244                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2245             }
2246         }
2247
2248       /* Predict early returns to be probable, as we've already taken
2249          care for error returns and other cases are often used for
2250          fast paths through function.
2251
2252          Since we've already removed the return statements, we are
2253          looking for CFG like:
2254
2255          if (conditional)
2256          {
2257          ..
2258          goto return_block
2259          }
2260          some other blocks
2261          return_block:
2262          return_stmt.  */
2263       if (e->dest != bb->next_bb
2264           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2265           && single_succ_p (e->dest)
2266           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2267           && (last = last_stmt (e->dest)) != NULL
2268           && gimple_code (last) == GIMPLE_RETURN)
2269         {
2270           edge e1;
2271           edge_iterator ei1;
2272
2273           if (single_succ_p (bb))
2274             {
2275               FOR_EACH_EDGE (e1, ei1, bb->preds)
2276                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2277                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2278                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2279                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2280             }
2281           else
2282             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2283                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2284                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2285               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2286         }
2287
2288       /* Look for block we are guarding (ie we dominate it,
2289          but it doesn't postdominate us).  */
2290       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2291           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2292           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2293         {
2294           gimple_stmt_iterator bi;
2295
2296           /* The call heuristic claims that a guarded function call
2297              is improbable.  This is because such calls are often used
2298              to signal exceptional situations such as printing error
2299              messages.  */
2300           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2301                gsi_next (&bi))
2302             {
2303               gimple stmt = gsi_stmt (bi);
2304               if (is_gimple_call (stmt)
2305                   /* Constant and pure calls are hardly used to signalize
2306                      something exceptional.  */
2307                   && gimple_has_side_effects (stmt))
2308                 {
2309                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2310                   break;
2311                 }
2312             }
2313         }
2314     }
2315   tree_predict_by_opcode (bb);
2316 }
2317
2318 /* Predict branch probabilities and estimate profile of the tree CFG.
2319    This function can be called from the loop optimizers to recompute
2320    the profile information.  */
2321
2322 void
2323 tree_estimate_probability (void)
2324 {
2325   basic_block bb;
2326
2327   add_noreturn_fake_exit_edges ();
2328   connect_infinite_loops_to_exit ();
2329   /* We use loop_niter_by_eval, which requires that the loops have
2330      preheaders.  */
2331   create_preheaders (CP_SIMPLE_PREHEADERS);
2332   calculate_dominance_info (CDI_POST_DOMINATORS);
2333
2334   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2335   tree_bb_level_predictions ();
2336   record_loop_exits ();
2337
2338   if (number_of_loops (cfun) > 1)
2339     predict_loops ();
2340
2341   FOR_EACH_BB_FN (bb, cfun)
2342     tree_estimate_probability_bb (bb);
2343
2344   FOR_EACH_BB_FN (bb, cfun)
2345     combine_predictions_for_bb (bb);
2346
2347 #ifdef ENABLE_CHECKING
2348   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2349 #endif
2350   delete bb_predictions;
2351   bb_predictions = NULL;
2352
2353   estimate_bb_frequencies (false);
2354   free_dominance_info (CDI_POST_DOMINATORS);
2355   remove_fake_exit_edges ();
2356 }
2357 \f
2358 /* Predict edges to successors of CUR whose sources are not postdominated by
2359    BB by PRED and recurse to all postdominators.  */
2360
2361 static void
2362 predict_paths_for_bb (basic_block cur, basic_block bb,
2363                       enum br_predictor pred,
2364                       enum prediction taken,
2365                       bitmap visited)
2366 {
2367   edge e;
2368   edge_iterator ei;
2369   basic_block son;
2370
2371   /* We are looking for all edges forming edge cut induced by
2372      set of all blocks postdominated by BB.  */
2373   FOR_EACH_EDGE (e, ei, cur->preds)
2374     if (e->src->index >= NUM_FIXED_BLOCKS
2375         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2376     {
2377       edge e2;
2378       edge_iterator ei2;
2379       bool found = false;
2380
2381       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2382       if (e->flags & (EDGE_EH | EDGE_FAKE))
2383         continue;
2384       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2385
2386       /* See if there is an edge from e->src that is not abnormal
2387          and does not lead to BB.  */
2388       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2389         if (e2 != e
2390             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2391             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2392           {
2393             found = true;
2394             break;
2395           }
2396
2397       /* If there is non-abnormal path leaving e->src, predict edge
2398          using predictor.  Otherwise we need to look for paths
2399          leading to e->src.
2400
2401          The second may lead to infinite loop in the case we are predicitng
2402          regions that are only reachable by abnormal edges.  We simply
2403          prevent visiting given BB twice.  */
2404       if (found)
2405         predict_edge_def (e, pred, taken);
2406       else if (bitmap_set_bit (visited, e->src->index))
2407         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2408     }
2409   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2410        son;
2411        son = next_dom_son (CDI_POST_DOMINATORS, son))
2412     predict_paths_for_bb (son, bb, pred, taken, visited);
2413 }
2414
2415 /* Sets branch probabilities according to PREDiction and
2416    FLAGS.  */
2417
2418 static void
2419 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2420                           enum prediction taken)
2421 {
2422   bitmap visited = BITMAP_ALLOC (NULL);
2423   predict_paths_for_bb (bb, bb, pred, taken, visited);
2424   BITMAP_FREE (visited);
2425 }
2426
2427 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2428
2429 static void
2430 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2431                                enum prediction taken)
2432 {
2433   bool has_nonloop_edge = false;
2434   edge_iterator ei;
2435   edge e2;
2436
2437   basic_block bb = e->src;
2438   FOR_EACH_EDGE (e2, ei, bb->succs)
2439     if (e2->dest != e->src && e2->dest != e->dest
2440         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2441         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2442       {
2443         has_nonloop_edge = true;
2444         break;
2445       }
2446   if (!has_nonloop_edge)
2447     {
2448       bitmap visited = BITMAP_ALLOC (NULL);
2449       predict_paths_for_bb (bb, bb, pred, taken, visited);
2450       BITMAP_FREE (visited);
2451     }
2452   else
2453     predict_edge_def (e, pred, taken);
2454 }
2455 \f
2456 /* This is used to carry information about basic blocks.  It is
2457    attached to the AUX field of the standard CFG block.  */
2458
2459 struct block_info
2460 {
2461   /* Estimated frequency of execution of basic_block.  */
2462   sreal frequency;
2463
2464   /* To keep queue of basic blocks to process.  */
2465   basic_block next;
2466
2467   /* Number of predecessors we need to visit first.  */
2468   int npredecessors;
2469 };
2470
2471 /* Similar information for edges.  */
2472 struct edge_prob_info
2473 {
2474   /* In case edge is a loopback edge, the probability edge will be reached
2475      in case header is.  Estimated number of iterations of the loop can be
2476      then computed as 1 / (1 - back_edge_prob).  */
2477   sreal back_edge_prob;
2478   /* True if the edge is a loopback edge in the natural loop.  */
2479   unsigned int back_edge:1;
2480 };
2481
2482 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2483 #undef EDGE_INFO
2484 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2485
2486 /* Helper function for estimate_bb_frequencies.
2487    Propagate the frequencies in blocks marked in
2488    TOVISIT, starting in HEAD.  */
2489
2490 static void
2491 propagate_freq (basic_block head, bitmap tovisit)
2492 {
2493   basic_block bb;
2494   basic_block last;
2495   unsigned i;
2496   edge e;
2497   basic_block nextbb;
2498   bitmap_iterator bi;
2499
2500   /* For each basic block we need to visit count number of his predecessors
2501      we need to visit first.  */
2502   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2503     {
2504       edge_iterator ei;
2505       int count = 0;
2506
2507       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2508
2509       FOR_EACH_EDGE (e, ei, bb->preds)
2510         {
2511           bool visit = bitmap_bit_p (tovisit, e->src->index);
2512
2513           if (visit && !(e->flags & EDGE_DFS_BACK))
2514             count++;
2515           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2516             fprintf (dump_file,
2517                      "Irreducible region hit, ignoring edge to %i->%i\n",
2518                      e->src->index, bb->index);
2519         }
2520       BLOCK_INFO (bb)->npredecessors = count;
2521       /* When function never returns, we will never process exit block.  */
2522       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2523         bb->count = bb->frequency = 0;
2524     }
2525
2526   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2527   last = head;
2528   for (bb = head; bb; bb = nextbb)
2529     {
2530       edge_iterator ei;
2531       sreal cyclic_probability, frequency;
2532
2533       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2534       memcpy (&frequency, &real_zero, sizeof (real_zero));
2535
2536       nextbb = BLOCK_INFO (bb)->next;
2537       BLOCK_INFO (bb)->next = NULL;
2538
2539       /* Compute frequency of basic block.  */
2540       if (bb != head)
2541         {
2542 #ifdef ENABLE_CHECKING
2543           FOR_EACH_EDGE (e, ei, bb->preds)
2544             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2545                         || (e->flags & EDGE_DFS_BACK));
2546 #endif
2547
2548           FOR_EACH_EDGE (e, ei, bb->preds)
2549             if (EDGE_INFO (e)->back_edge)
2550               {
2551                 sreal_add (&cyclic_probability, &cyclic_probability,
2552                            &EDGE_INFO (e)->back_edge_prob);
2553               }
2554             else if (!(e->flags & EDGE_DFS_BACK))
2555               {
2556                 sreal tmp;
2557
2558                 /*  frequency += (e->probability
2559                                   * BLOCK_INFO (e->src)->frequency /
2560                                   REG_BR_PROB_BASE);  */
2561
2562                 sreal_init (&tmp, e->probability, 0);
2563                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2564                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2565                 sreal_add (&frequency, &frequency, &tmp);
2566               }
2567
2568           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2569             {
2570               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2571                       sizeof (frequency));
2572             }
2573           else
2574             {
2575               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2576                 {
2577                   memcpy (&cyclic_probability, &real_almost_one,
2578                           sizeof (real_almost_one));
2579                 }
2580
2581               /* BLOCK_INFO (bb)->frequency = frequency
2582                                               / (1 - cyclic_probability) */
2583
2584               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2585               sreal_div (&BLOCK_INFO (bb)->frequency,
2586                          &frequency, &cyclic_probability);
2587             }
2588         }
2589
2590       bitmap_clear_bit (tovisit, bb->index);
2591
2592       e = find_edge (bb, head);
2593       if (e)
2594         {
2595           sreal tmp;
2596
2597           /* EDGE_INFO (e)->back_edge_prob
2598              = ((e->probability * BLOCK_INFO (bb)->frequency)
2599              / REG_BR_PROB_BASE); */
2600
2601           sreal_init (&tmp, e->probability, 0);
2602           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2603           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2604                      &tmp, &real_inv_br_prob_base);
2605         }
2606
2607       /* Propagate to successor blocks.  */
2608       FOR_EACH_EDGE (e, ei, bb->succs)
2609         if (!(e->flags & EDGE_DFS_BACK)
2610             && BLOCK_INFO (e->dest)->npredecessors)
2611           {
2612             BLOCK_INFO (e->dest)->npredecessors--;
2613             if (!BLOCK_INFO (e->dest)->npredecessors)
2614               {
2615                 if (!nextbb)
2616                   nextbb = e->dest;
2617                 else
2618                   BLOCK_INFO (last)->next = e->dest;
2619
2620                 last = e->dest;
2621               }
2622           }
2623     }
2624 }
2625
2626 /* Estimate frequencies in loops at same nest level.  */
2627
2628 static void
2629 estimate_loops_at_level (struct loop *first_loop)
2630 {
2631   struct loop *loop;
2632
2633   for (loop = first_loop; loop; loop = loop->next)
2634     {
2635       edge e;
2636       basic_block *bbs;
2637       unsigned i;
2638       bitmap tovisit = BITMAP_ALLOC (NULL);
2639
2640       estimate_loops_at_level (loop->inner);
2641
2642       /* Find current loop back edge and mark it.  */
2643       e = loop_latch_edge (loop);
2644       EDGE_INFO (e)->back_edge = 1;
2645
2646       bbs = get_loop_body (loop);
2647       for (i = 0; i < loop->num_nodes; i++)
2648         bitmap_set_bit (tovisit, bbs[i]->index);
2649       free (bbs);
2650       propagate_freq (loop->header, tovisit);
2651       BITMAP_FREE (tovisit);
2652     }
2653 }
2654
2655 /* Propagates frequencies through structure of loops.  */
2656
2657 static void
2658 estimate_loops (void)
2659 {
2660   bitmap tovisit = BITMAP_ALLOC (NULL);
2661   basic_block bb;
2662
2663   /* Start by estimating the frequencies in the loops.  */
2664   if (number_of_loops (cfun) > 1)
2665     estimate_loops_at_level (current_loops->tree_root->inner);
2666
2667   /* Now propagate the frequencies through all the blocks.  */
2668   FOR_ALL_BB_FN (bb, cfun)
2669     {
2670       bitmap_set_bit (tovisit, bb->index);
2671     }
2672   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2673   BITMAP_FREE (tovisit);
2674 }
2675
2676 /* Drop the profile for NODE to guessed, and update its frequency based on
2677    whether it is expected to be hot given the CALL_COUNT.  */
2678
2679 static void
2680 drop_profile (struct cgraph_node *node, gcov_type call_count)
2681 {
2682   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2683   /* In the case where this was called by another function with a
2684      dropped profile, call_count will be 0. Since there are no
2685      non-zero call counts to this function, we don't know for sure
2686      whether it is hot, and therefore it will be marked normal below.  */
2687   bool hot = maybe_hot_count_p (NULL, call_count);
2688
2689   if (dump_file)
2690     fprintf (dump_file,
2691              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2692              node->name (), node->order,
2693              hot ? "Function is hot" : "Function is normal");
2694   /* We only expect to miss profiles for functions that are reached
2695      via non-zero call edges in cases where the function may have
2696      been linked from another module or library (COMDATs and extern
2697      templates). See the comments below for handle_missing_profiles.
2698      Also, only warn in cases where the missing counts exceed the
2699      number of training runs. In certain cases with an execv followed
2700      by a no-return call the profile for the no-return call is not
2701      dumped and there can be a mismatch.  */
2702   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2703       && call_count > profile_info->runs)
2704     {
2705       if (flag_profile_correction)
2706         {
2707           if (dump_file)
2708             fprintf (dump_file,
2709                      "Missing counts for called function %s/%i\n",
2710                      node->name (), node->order);
2711         }
2712       else
2713         warning (0, "Missing counts for called function %s/%i",
2714                  node->name (), node->order);
2715     }
2716
2717   profile_status_for_fn (fn)
2718       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2719   node->frequency
2720       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2721 }
2722
2723 /* In the case of COMDAT routines, multiple object files will contain the same
2724    function and the linker will select one for the binary. In that case
2725    all the other copies from the profile instrument binary will be missing
2726    profile counts. Look for cases where this happened, due to non-zero
2727    call counts going to 0-count functions, and drop the profile to guessed
2728    so that we can use the estimated probabilities and avoid optimizing only
2729    for size.
2730
2731    The other case where the profile may be missing is when the routine
2732    is not going to be emitted to the object file, e.g. for "extern template"
2733    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2734    all other cases of non-zero calls to 0-count functions.  */
2735
2736 void
2737 handle_missing_profiles (void)
2738 {
2739   struct cgraph_node *node;
2740   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2741   vec<struct cgraph_node *> worklist;
2742   worklist.create (64);
2743
2744   /* See if 0 count function has non-0 count callers.  In this case we
2745      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2746   FOR_EACH_DEFINED_FUNCTION (node)
2747     {
2748       struct cgraph_edge *e;
2749       gcov_type call_count = 0;
2750       gcov_type max_tp_first_run = 0;
2751       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2752
2753       if (node->count)
2754         continue;
2755       for (e = node->callers; e; e = e->next_caller)
2756       {
2757         call_count += e->count;
2758
2759         if (e->caller->tp_first_run > max_tp_first_run)
2760           max_tp_first_run = e->caller->tp_first_run;
2761       }
2762
2763       /* If time profile is missing, let assign the maximum that comes from
2764          caller functions.  */
2765       if (!node->tp_first_run && max_tp_first_run)
2766         node->tp_first_run = max_tp_first_run + 1;
2767
2768       if (call_count
2769           && fn && fn->cfg
2770           && (call_count * unlikely_count_fraction >= profile_info->runs))
2771         {
2772           drop_profile (node, call_count);
2773           worklist.safe_push (node);
2774         }
2775     }
2776
2777   /* Propagate the profile dropping to other 0-count COMDATs that are
2778      potentially called by COMDATs we already dropped the profile on.  */
2779   while (worklist.length () > 0)
2780     {
2781       struct cgraph_edge *e;
2782
2783       node = worklist.pop ();
2784       for (e = node->callees; e; e = e->next_caller)
2785         {
2786           struct cgraph_node *callee = e->callee;
2787           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2788
2789           if (callee->count > 0)
2790             continue;
2791           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2792               && profile_status_for_fn (fn) == PROFILE_READ)
2793             {
2794               drop_profile (node, 0);
2795               worklist.safe_push (callee);
2796             }
2797         }
2798     }
2799   worklist.release ();
2800 }
2801
2802 /* Convert counts measured by profile driven feedback to frequencies.
2803    Return nonzero iff there was any nonzero execution count.  */
2804
2805 int
2806 counts_to_freqs (void)
2807 {
2808   gcov_type count_max, true_count_max = 0;
2809   basic_block bb;
2810
2811   /* Don't overwrite the estimated frequencies when the profile for
2812      the function is missing.  We may drop this function PROFILE_GUESSED
2813      later in drop_profile ().  */
2814   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2815     return 0;
2816
2817   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2818     true_count_max = MAX (bb->count, true_count_max);
2819
2820   count_max = MAX (true_count_max, 1);
2821   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2822     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2823
2824   return true_count_max;
2825 }
2826
2827 /* Return true if function is likely to be expensive, so there is no point to
2828    optimize performance of prologue, epilogue or do inlining at the expense
2829    of code size growth.  THRESHOLD is the limit of number of instructions
2830    function can execute at average to be still considered not expensive.  */
2831
2832 bool
2833 expensive_function_p (int threshold)
2834 {
2835   unsigned int sum = 0;
2836   basic_block bb;
2837   unsigned int limit;
2838
2839   /* We can not compute accurately for large thresholds due to scaled
2840      frequencies.  */
2841   gcc_assert (threshold <= BB_FREQ_MAX);
2842
2843   /* Frequencies are out of range.  This either means that function contains
2844      internal loop executing more than BB_FREQ_MAX times or profile feedback
2845      is available and function has not been executed at all.  */
2846   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2847     return true;
2848
2849   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2850   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2851   FOR_EACH_BB_FN (bb, cfun)
2852     {
2853       rtx_insn *insn;
2854
2855       FOR_BB_INSNS (bb, insn)
2856         if (active_insn_p (insn))
2857           {
2858             sum += bb->frequency;
2859             if (sum > limit)
2860               return true;
2861         }
2862     }
2863
2864   return false;
2865 }
2866
2867 /* Estimate and propagate basic block frequencies using the given branch
2868    probabilities.  If FORCE is true, the frequencies are used to estimate
2869    the counts even when there are already non-zero profile counts.  */
2870
2871 void
2872 estimate_bb_frequencies (bool force)
2873 {
2874   basic_block bb;
2875   sreal freq_max;
2876
2877   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2878     {
2879       static int real_values_initialized = 0;
2880
2881       if (!real_values_initialized)
2882         {
2883           real_values_initialized = 1;
2884           sreal_init (&real_zero, 0, 0);
2885           sreal_init (&real_one, 1, 0);
2886           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2887           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2888           sreal_init (&real_one_half, 1, -1);
2889           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2890           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2891         }
2892
2893       mark_dfs_back_edges ();
2894
2895       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2896          REG_BR_PROB_BASE;
2897
2898       /* Set up block info for each basic block.  */
2899       alloc_aux_for_blocks (sizeof (block_info));
2900       alloc_aux_for_edges (sizeof (edge_prob_info));
2901       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2902         {
2903           edge e;
2904           edge_iterator ei;
2905
2906           FOR_EACH_EDGE (e, ei, bb->succs)
2907             {
2908               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2909               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2910                          &EDGE_INFO (e)->back_edge_prob,
2911                          &real_inv_br_prob_base);
2912             }
2913         }
2914
2915       /* First compute frequencies locally for each loop from innermost
2916          to outermost to examine frequencies for back edges.  */
2917       estimate_loops ();
2918
2919       memcpy (&freq_max, &real_zero, sizeof (real_zero));
2920       FOR_EACH_BB_FN (bb, cfun)
2921         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2922           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2923
2924       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2925       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2926         {
2927           sreal tmp;
2928
2929           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2930           sreal_add (&tmp, &tmp, &real_one_half);
2931           bb->frequency = sreal_to_int (&tmp);
2932         }
2933
2934       free_aux_for_blocks ();
2935       free_aux_for_edges ();
2936     }
2937   compute_function_frequency ();
2938 }
2939
2940 /* Decide whether function is hot, cold or unlikely executed.  */
2941 void
2942 compute_function_frequency (void)
2943 {
2944   basic_block bb;
2945   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2946
2947   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2948       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2949     node->only_called_at_startup = true;
2950   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2951     node->only_called_at_exit = true;
2952
2953   if (profile_status_for_fn (cfun) != PROFILE_READ)
2954     {
2955       int flags = flags_from_decl_or_type (current_function_decl);
2956       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2957           != NULL)
2958         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2959       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2960                != NULL)
2961         node->frequency = NODE_FREQUENCY_HOT;
2962       else if (flags & ECF_NORETURN)
2963         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2964       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2965         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2966       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2967                || DECL_STATIC_DESTRUCTOR (current_function_decl))
2968         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2969       return;
2970     }
2971
2972   /* Only first time try to drop function into unlikely executed.
2973      After inlining the roundoff errors may confuse us.
2974      Ipa-profile pass will drop functions only called from unlikely
2975      functions to unlikely and that is most of what we care about.  */
2976   if (!cfun->after_inlining)
2977     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2978   FOR_EACH_BB_FN (bb, cfun)
2979     {
2980       if (maybe_hot_bb_p (cfun, bb))
2981         {
2982           node->frequency = NODE_FREQUENCY_HOT;
2983           return;
2984         }
2985       if (!probably_never_executed_bb_p (cfun, bb))
2986         node->frequency = NODE_FREQUENCY_NORMAL;
2987     }
2988 }
2989
2990 /* Build PREDICT_EXPR.  */
2991 tree
2992 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2993 {
2994   tree t = build1 (PREDICT_EXPR, void_type_node,
2995                    build_int_cst (integer_type_node, predictor));
2996   SET_PREDICT_EXPR_OUTCOME (t, taken);
2997   return t;
2998 }
2999
3000 const char *
3001 predictor_name (enum br_predictor predictor)
3002 {
3003   return predictor_info[predictor].name;
3004 }
3005
3006 /* Predict branch probabilities and estimate profile of the tree CFG. */
3007
3008 namespace {
3009
3010 const pass_data pass_data_profile =
3011 {
3012   GIMPLE_PASS, /* type */
3013   "profile_estimate", /* name */
3014   OPTGROUP_NONE, /* optinfo_flags */
3015   TV_BRANCH_PROB, /* tv_id */
3016   PROP_cfg, /* properties_required */
3017   0, /* properties_provided */
3018   0, /* properties_destroyed */
3019   0, /* todo_flags_start */
3020   0, /* todo_flags_finish */
3021 };
3022
3023 class pass_profile : public gimple_opt_pass
3024 {
3025 public:
3026   pass_profile (gcc::context *ctxt)
3027     : gimple_opt_pass (pass_data_profile, ctxt)
3028   {}
3029
3030   /* opt_pass methods: */
3031   virtual bool gate (function *) { return flag_guess_branch_prob; }
3032   virtual unsigned int execute (function *);
3033
3034 }; // class pass_profile
3035
3036 unsigned int
3037 pass_profile::execute (function *fun)
3038 {
3039   unsigned nb_loops;
3040
3041   loop_optimizer_init (LOOPS_NORMAL);
3042   if (dump_file && (dump_flags & TDF_DETAILS))
3043     flow_loops_dump (dump_file, NULL, 0);
3044
3045   mark_irreducible_loops ();
3046
3047   nb_loops = number_of_loops (fun);
3048   if (nb_loops > 1)
3049     scev_initialize ();
3050
3051   tree_estimate_probability ();
3052
3053   if (nb_loops > 1)
3054     scev_finalize ();
3055
3056   loop_optimizer_finalize ();
3057   if (dump_file && (dump_flags & TDF_DETAILS))
3058     gimple_dump_cfg (dump_file, dump_flags);
3059  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3060     profile_status_for_fn (fun) = PROFILE_GUESSED;
3061   return 0;
3062 }
3063
3064 } // anon namespace
3065
3066 gimple_opt_pass *
3067 make_pass_profile (gcc::context *ctxt)
3068 {
3069   return new pass_profile (ctxt);
3070 }
3071
3072 namespace {
3073
3074 const pass_data pass_data_strip_predict_hints =
3075 {
3076   GIMPLE_PASS, /* type */
3077   "*strip_predict_hints", /* name */
3078   OPTGROUP_NONE, /* optinfo_flags */
3079   TV_BRANCH_PROB, /* tv_id */
3080   PROP_cfg, /* properties_required */
3081   0, /* properties_provided */
3082   0, /* properties_destroyed */
3083   0, /* todo_flags_start */
3084   0, /* todo_flags_finish */
3085 };
3086
3087 class pass_strip_predict_hints : public gimple_opt_pass
3088 {
3089 public:
3090   pass_strip_predict_hints (gcc::context *ctxt)
3091     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3092   {}
3093
3094   /* opt_pass methods: */
3095   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3096   virtual unsigned int execute (function *);
3097
3098 }; // class pass_strip_predict_hints
3099
3100 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3101    we no longer need.  */
3102 unsigned int
3103 pass_strip_predict_hints::execute (function *fun)
3104 {
3105   basic_block bb;
3106   gimple ass_stmt;
3107   tree var;
3108
3109   FOR_EACH_BB_FN (bb, fun)
3110     {
3111       gimple_stmt_iterator bi;
3112       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3113         {
3114           gimple stmt = gsi_stmt (bi);
3115
3116           if (gimple_code (stmt) == GIMPLE_PREDICT)
3117             {
3118               gsi_remove (&bi, true);
3119               continue;
3120             }
3121           else if (is_gimple_call (stmt))
3122             {
3123               tree fndecl = gimple_call_fndecl (stmt);
3124
3125               if ((fndecl
3126                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3127                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3128                    && gimple_call_num_args (stmt) == 2)
3129                   || (gimple_call_internal_p (stmt)
3130                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3131                 {
3132                   var = gimple_call_lhs (stmt);
3133                   if (var)
3134                     {
3135                       ass_stmt
3136                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3137                       gsi_replace (&bi, ass_stmt, true);
3138                     }
3139                   else
3140                     {
3141                       gsi_remove (&bi, true);
3142                       continue;
3143                     }
3144                 }
3145             }
3146           gsi_next (&bi);
3147         }
3148     }
3149   return 0;
3150 }
3151
3152 } // anon namespace
3153
3154 gimple_opt_pass *
3155 make_pass_strip_predict_hints (gcc::context *ctxt)
3156 {
3157   return new pass_strip_predict_hints (ctxt);
3158 }
3159
3160 /* Rebuild function frequencies.  Passes are in general expected to
3161    maintain profile by hand, however in some cases this is not possible:
3162    for example when inlining several functions with loops freuqencies might run
3163    out of scale and thus needs to be recomputed.  */
3164
3165 void
3166 rebuild_frequencies (void)
3167 {
3168   timevar_push (TV_REBUILD_FREQUENCIES);
3169
3170   /* When the max bb count in the function is small, there is a higher
3171      chance that there were truncation errors in the integer scaling
3172      of counts by inlining and other optimizations. This could lead
3173      to incorrect classification of code as being cold when it isn't.
3174      In that case, force the estimation of bb counts/frequencies from the
3175      branch probabilities, rather than computing frequencies from counts,
3176      which may also lead to frequencies incorrectly reduced to 0. There
3177      is less precision in the probabilities, so we only do this for small
3178      max counts.  */
3179   gcov_type count_max = 0;
3180   basic_block bb;
3181   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3182     count_max = MAX (bb->count, count_max);
3183
3184   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3185       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3186           && count_max < REG_BR_PROB_BASE/10))
3187     {
3188       loop_optimizer_init (0);
3189       add_noreturn_fake_exit_edges ();
3190       mark_irreducible_loops ();
3191       connect_infinite_loops_to_exit ();
3192       estimate_bb_frequencies (true);
3193       remove_fake_exit_edges ();
3194       loop_optimizer_finalize ();
3195     }
3196   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3197     counts_to_freqs ();
3198   else
3199     gcc_unreachable ();
3200   timevar_pop (TV_REBUILD_FREQUENCIES);
3201 }