gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "calls.h"
  36 #include "rtl.h"
  37 #include "tm_p.h"
  38 #include "hard-reg-set.h"
  39 #include "basic-block.h"
  40 #include "insn-config.h"
  41 #include "regs.h"
  42 #include "flags.h"
  43 #include "hashtab.h"
  44 #include "hash-set.h"
  45 #include "vec.h"
  46 #include "machmode.h"
  47 #include "input.h"
  48 #include "function.h"
  49 #include "except.h"
  50 #include "diagnostic-core.h"
  51 #include "recog.h"
  52 #include "expr.h"
  53 #include "predict.h"
  54 #include "coverage.h"
  55 #include "sreal.h"
  56 #include "params.h"
  57 #include "target.h"
  58 #include "cfgloop.h"
  59 #include "hash-map.h"
  60 #include "tree-ssa-alias.h"
  61 #include "internal-fn.h"
  62 #include "gimple-expr.h"
  63 #include "is-a.h"
  64 #include "gimple.h"
  65 #include "gimple-iterator.h"
  66 #include "gimple-ssa.h"
  67 #include "cgraph.h"
  68 #include "tree-cfg.h"
  69 #include "tree-phinodes.h"
  70 #include "ssa-iterators.h"
  71 #include "tree-ssa-loop-niter.h"
  72 #include "tree-ssa-loop.h"
  73 #include "tree-pass.h"
  74 #include "tree-scalar-evolution.h"
  75 #include "cfgloop.h"
  76
  77 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  78                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  79 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  80              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  81
  82 static void combine_predictions_for_insn (rtx_insn *, basic_block);
  83 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  84 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  85 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  86 static bool can_predict_insn_p (const rtx_insn *);
  87
  88 /* Information we hold about each branch predictor.
  89    Filled using information from predict.def.  */
  90
  91 struct predictor_info
  92 {
  93   const char *const name;       /* Name used in the debugging dumps.  */
  94   const int hitrate;            /* Expected hitrate used by
  95                                    predict_insn_def call.  */
  96   const int flags;
  97 };
  98
  99 /* Use given predictor without Dempster-Shaffer theory if it matches
 100    using first_match heuristics.  */
 101 #define PRED_FLAG_FIRST_MATCH 1
 102
 103 /* Recompute hitrate in percent to our representation.  */
 104
 105 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
 106
 107 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
 108 static const struct predictor_info predictor_info[]= {
 109 #include "predict.def"
 110
 111   /* Upper bound on predictors.  */
 112   {NULL, 0, 0}
 113 };
 114 #undef DEF_PREDICTOR
 115
 116 /* Return TRUE if frequency FREQ is considered to be hot.  */
 117
 118 static inline bool
 119 maybe_hot_frequency_p (struct function *fun, int freq)
 120 {
 121   struct cgraph_node *node = cgraph_node::get (fun->decl);
 122   if (!profile_info || !flag_branch_probabilities)
 123     {
 124       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 125         return false;
 126       if (node->frequency == NODE_FREQUENCY_HOT)
 127         return true;
 128     }
 129   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 130     return true;
 131   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 132       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
 133     return false;
 134   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 135     return false;
 136   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
 137               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 138     return false;
 139   return true;
 140 }
 141
 142 static gcov_type min_count = -1;
 143
 144 /* Determine the threshold for hot BB counts.  */
 145
 146 gcov_type
 147 get_hot_bb_threshold ()
 148 {
 149   gcov_working_set_t *ws;
 150   if (min_count == -1)
 151     {
 152       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 153       gcc_assert (ws);
 154       min_count = ws->min_counter;
 155     }
 156   return min_count;
 157 }
 158
 159 /* Set the threshold for hot BB counts.  */
 160
 161 void
 162 set_hot_bb_threshold (gcov_type min)
 163 {
 164   min_count = min;
 165 }
 166
 167 /* Return TRUE if frequency FREQ is considered to be hot.  */
 168
 169 static inline bool
 170 maybe_hot_count_p (struct function *fun, gcov_type count)
 171 {
 172   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 173     return true;
 174   /* Code executed at most once is not hot.  */
 175   if (profile_info->runs >= count)
 176     return false;
 177   return (count >= get_hot_bb_threshold ());
 178 }
 179
 180 /* Return true in case BB can be CPU intensive and should be optimized
 181    for maximal performance.  */
 182
 183 bool
 184 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 185 {
 186   gcc_checking_assert (fun);
 187   if (profile_status_for_fn (fun) == PROFILE_READ)
 188     return maybe_hot_count_p (fun, bb->count);
 189   return maybe_hot_frequency_p (fun, bb->frequency);
 190 }
 191
 192 /* Return true if the call can be hot.  */
 193
 194 bool
 195 cgraph_edge::maybe_hot_p (void)
 196 {
 197   if (profile_info && flag_branch_probabilities
 198       && !maybe_hot_count_p (NULL, count))
 199     return false;
 200   if (caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 201       || (callee
 202           && callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 203     return false;
 204   if (caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 205       && (callee
 206           && callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 207     return false;
 208   if (optimize_size)
 209     return false;
 210   if (caller->frequency == NODE_FREQUENCY_HOT)
 211     return true;
 212   if (caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 213       && frequency < CGRAPH_FREQ_BASE * 3 / 2)
 214     return false;
 215   if (flag_guess_branch_prob)
 216     {
 217       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 218           || frequency <= (CGRAPH_FREQ_BASE
 219                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 220         return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Return true in case BB can be CPU intensive and should be optimized
 226    for maximal performance.  */
 227
 228 bool
 229 maybe_hot_edge_p (edge e)
 230 {
 231   if (profile_status_for_fn (cfun) == PROFILE_READ)
 232     return maybe_hot_count_p (cfun, e->count);
 233   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 234 }
 235
 236
 237
 238 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 239    node frequency reflects never being executed.  */
 240
 241 static bool
 242 probably_never_executed (struct function *fun,
 243                          gcov_type count, int frequency)
 244 {
 245   gcc_checking_assert (fun);
 246   if (profile_status_for_fn (cfun) == PROFILE_READ)
 247     {
 248       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 249       if (count * unlikely_count_fraction >= profile_info->runs)
 250         return false;
 251       if (!frequency)
 252         return true;
 253       if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
 254         return false;
 255       if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
 256         {
 257           gcov_type computed_count;
 258           /* Check for possibility of overflow, in which case entry bb count
 259              is large enough to do the division first without losing much
 260              precision.  */
 261           if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
 262               REG_BR_PROB_BASE)
 263             {
 264               gcov_type scaled_count
 265                   = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
 266              unlikely_count_fraction;
 267               computed_count = RDIV (scaled_count,
 268                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 269             }
 270           else
 271             {
 272               computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
 273                                      ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
 274               computed_count *= frequency * unlikely_count_fraction;
 275             }
 276           if (computed_count >= profile_info->runs)
 277             return false;
 278         }
 279       return true;
 280     }
 281   if ((!profile_info || !flag_branch_probabilities)
 282       && (cgraph_node::get (fun->decl)->frequency
 283           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 284     return true;
 285   return false;
 286 }
 287
 288
 289 /* Return true in case BB is probably never executed.  */
 290
 291 bool
 292 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 293 {
 294   return probably_never_executed (fun, bb->count, bb->frequency);
 295 }
 296
 297
 298 /* Return true in case edge E is probably never executed.  */
 299
 300 bool
 301 probably_never_executed_edge_p (struct function *fun, edge e)
 302 {
 303   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 304 }
 305
 306 /* Return true if function should be optimized for size.  */
 307
 308 bool
 309 cgraph_node::optimize_for_size_p (void)
 310 {
 311   if (optimize_size)
 312     return true;
 313   if (frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 314     return true;
 315   else
 316     return false;
 317 }
 318
 319 /* Return true when current function should always be optimized for size.  */
 320
 321 bool
 322 optimize_function_for_size_p (struct function *fun)
 323 {
 324   if (optimize_size)
 325     return true;
 326   if (!fun || !fun->decl)
 327     return false;
 328
 329   cgraph_node *n = cgraph_node::get (fun->decl);
 330   return n && n->optimize_for_size_p ();
 331 }
 332
 333 /* Return true when current function should always be optimized for speed.  */
 334
 335 bool
 336 optimize_function_for_speed_p (struct function *fun)
 337 {
 338   return !optimize_function_for_size_p (fun);
 339 }
 340
 341 /* Return TRUE when BB should be optimized for size.  */
 342
 343 bool
 344 optimize_bb_for_size_p (const_basic_block bb)
 345 {
 346   return (optimize_function_for_size_p (cfun)
 347           || (bb && !maybe_hot_bb_p (cfun, bb)));
 348 }
 349
 350 /* Return TRUE when BB should be optimized for speed.  */
 351
 352 bool
 353 optimize_bb_for_speed_p (const_basic_block bb)
 354 {
 355   return !optimize_bb_for_size_p (bb);
 356 }
 357
 358 /* Return TRUE when BB should be optimized for size.  */
 359
 360 bool
 361 optimize_edge_for_size_p (edge e)
 362 {
 363   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 364 }
 365
 366 /* Return TRUE when BB should be optimized for speed.  */
 367
 368 bool
 369 optimize_edge_for_speed_p (edge e)
 370 {
 371   return !optimize_edge_for_size_p (e);
 372 }
 373
 374 /* Return TRUE when BB should be optimized for size.  */
 375
 376 bool
 377 optimize_insn_for_size_p (void)
 378 {
 379   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 380 }
 381
 382 /* Return TRUE when BB should be optimized for speed.  */
 383
 384 bool
 385 optimize_insn_for_speed_p (void)
 386 {
 387   return !optimize_insn_for_size_p ();
 388 }
 389
 390 /* Return TRUE when LOOP should be optimized for size.  */
 391
 392 bool
 393 optimize_loop_for_size_p (struct loop *loop)
 394 {
 395   return optimize_bb_for_size_p (loop->header);
 396 }
 397
 398 /* Return TRUE when LOOP should be optimized for speed.  */
 399
 400 bool
 401 optimize_loop_for_speed_p (struct loop *loop)
 402 {
 403   return optimize_bb_for_speed_p (loop->header);
 404 }
 405
 406 /* Return TRUE when LOOP nest should be optimized for speed.  */
 407
 408 bool
 409 optimize_loop_nest_for_speed_p (struct loop *loop)
 410 {
 411   struct loop *l = loop;
 412   if (optimize_loop_for_speed_p (loop))
 413     return true;
 414   l = loop->inner;
 415   while (l && l != loop)
 416     {
 417       if (optimize_loop_for_speed_p (l))
 418         return true;
 419       if (l->inner)
 420         l = l->inner;
 421       else if (l->next)
 422         l = l->next;
 423       else
 424         {
 425           while (l != loop && !l->next)
 426             l = loop_outer (l);
 427           if (l != loop)
 428             l = l->next;
 429         }
 430     }
 431   return false;
 432 }
 433
 434 /* Return TRUE when LOOP nest should be optimized for size.  */
 435
 436 bool
 437 optimize_loop_nest_for_size_p (struct loop *loop)
 438 {
 439   return !optimize_loop_nest_for_speed_p (loop);
 440 }
 441
 442 /* Return true when edge E is likely to be well predictable by branch
 443    predictor.  */
 444
 445 bool
 446 predictable_edge_p (edge e)
 447 {
 448   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 449     return false;
 450   if ((e->probability
 451        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 452       || (REG_BR_PROB_BASE - e->probability
 453           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 454     return true;
 455   return false;
 456 }
 457
 458
 459 /* Set RTL expansion for BB profile.  */
 460
 461 void
 462 rtl_profile_for_bb (basic_block bb)
 463 {
 464   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 465 }
 466
 467 /* Set RTL expansion for edge profile.  */
 468
 469 void
 470 rtl_profile_for_edge (edge e)
 471 {
 472   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 473 }
 474
 475 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 476 void
 477 default_rtl_profile (void)
 478 {
 479   crtl->maybe_hot_insn_p = true;
 480 }
 481
 482 /* Return true if the one of outgoing edges is already predicted by
 483    PREDICTOR.  */
 484
 485 bool
 486 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 487 {
 488   rtx note;
 489   if (!INSN_P (BB_END (bb)))
 490     return false;
 491   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 492     if (REG_NOTE_KIND (note) == REG_BR_PRED
 493         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 494       return true;
 495   return false;
 496 }
 497
 498 /*  Structure representing predictions in tree level. */
 499
 500 struct edge_prediction {
 501     struct edge_prediction *ep_next;
 502     edge ep_edge;
 503     enum br_predictor ep_predictor;
 504     int ep_probability;
 505 };
 506
 507 /* This map contains for a basic block the list of predictions for the
 508    outgoing edges.  */
 509
 510 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
 511
 512 /* Return true if the one of outgoing edges is already predicted by
 513    PREDICTOR.  */
 514
 515 bool
 516 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 517 {
 518   struct edge_prediction *i;
 519   edge_prediction **preds = bb_predictions->get (bb);
 520
 521   if (!preds)
 522     return false;
 523
 524   for (i = *preds; i; i = i->ep_next)
 525     if (i->ep_predictor == predictor)
 526       return true;
 527   return false;
 528 }
 529
 530 /* Return true when the probability of edge is reliable.
 531
 532    The profile guessing code is good at predicting branch outcome (ie.
 533    taken/not taken), that is predicted right slightly over 75% of time.
 534    It is however notoriously poor on predicting the probability itself.
 535    In general the profile appear a lot flatter (with probabilities closer
 536    to 50%) than the reality so it is bad idea to use it to drive optimization
 537    such as those disabling dynamic branch prediction for well predictable
 538    branches.
 539
 540    There are two exceptions - edges leading to noreturn edges and edges
 541    predicted by number of iterations heuristics are predicted well.  This macro
 542    should be able to distinguish those, but at the moment it simply check for
 543    noreturn heuristic that is only one giving probability over 99% or bellow
 544    1%.  In future we might want to propagate reliability information across the
 545    CFG if we find this information useful on multiple places.   */
 546 static bool
 547 probability_reliable_p (int prob)
 548 {
 549   return (profile_status_for_fn (cfun) == PROFILE_READ
 550           || (profile_status_for_fn (cfun) == PROFILE_GUESSED
 551               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 552 }
 553
 554 /* Same predicate as above, working on edges.  */
 555 bool
 556 edge_probability_reliable_p (const_edge e)
 557 {
 558   return probability_reliable_p (e->probability);
 559 }
 560
 561 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 562 bool
 563 br_prob_note_reliable_p (const_rtx note)
 564 {
 565   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 566   return probability_reliable_p (XINT (note, 0));
 567 }
 568
 569 static void
 570 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
 571 {
 572   gcc_assert (any_condjump_p (insn));
 573   if (!flag_guess_branch_prob)
 574     return;
 575
 576   add_reg_note (insn, REG_BR_PRED,
 577                 gen_rtx_CONCAT (VOIDmode,
 578                                 GEN_INT ((int) predictor),
 579                                 GEN_INT ((int) probability)));
 580 }
 581
 582 /* Predict insn by given predictor.  */
 583
 584 void
 585 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
 586                   enum prediction taken)
 587 {
 588    int probability = predictor_info[(int) predictor].hitrate;
 589
 590    if (taken != TAKEN)
 591      probability = REG_BR_PROB_BASE - probability;
 592
 593    predict_insn (insn, predictor, probability);
 594 }
 595
 596 /* Predict edge E with given probability if possible.  */
 597
 598 void
 599 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 600 {
 601   rtx_insn *last_insn;
 602   last_insn = BB_END (e->src);
 603
 604   /* We can store the branch prediction information only about
 605      conditional jumps.  */
 606   if (!any_condjump_p (last_insn))
 607     return;
 608
 609   /* We always store probability of branching.  */
 610   if (e->flags & EDGE_FALLTHRU)
 611     probability = REG_BR_PROB_BASE - probability;
 612
 613   predict_insn (last_insn, predictor, probability);
 614 }
 615
 616 /* Predict edge E with the given PROBABILITY.  */
 617 void
 618 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 619 {
 620   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
 621   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
 622        1)
 623       && flag_guess_branch_prob && optimize)
 624     {
 625       struct edge_prediction *i = XNEW (struct edge_prediction);
 626       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
 627
 628       i->ep_next = preds;
 629       preds = i;
 630       i->ep_probability = probability;
 631       i->ep_predictor = predictor;
 632       i->ep_edge = e;
 633     }
 634 }
 635
 636 /* Remove all predictions on given basic block that are attached
 637    to edge E.  */
 638 void
 639 remove_predictions_associated_with_edge (edge e)
 640 {
 641   if (!bb_predictions)
 642     return;
 643
 644   edge_prediction **preds = bb_predictions->get (e->src);
 645
 646   if (preds)
 647     {
 648       struct edge_prediction **prediction = preds;
 649       struct edge_prediction *next;
 650
 651       while (*prediction)
 652         {
 653           if ((*prediction)->ep_edge == e)
 654             {
 655               next = (*prediction)->ep_next;
 656               free (*prediction);
 657               *prediction = next;
 658             }
 659           else
 660             prediction = &((*prediction)->ep_next);
 661         }
 662     }
 663 }
 664
 665 /* Clears the list of predictions stored for BB.  */
 666
 667 static void
 668 clear_bb_predictions (basic_block bb)
 669 {
 670   edge_prediction **preds = bb_predictions->get (bb);
 671   struct edge_prediction *pred, *next;
 672
 673   if (!preds)
 674     return;
 675
 676   for (pred = *preds; pred; pred = next)
 677     {
 678       next = pred->ep_next;
 679       free (pred);
 680     }
 681   *preds = NULL;
 682 }
 683
 684 /* Return true when we can store prediction on insn INSN.
 685    At the moment we represent predictions only on conditional
 686    jumps, not at computed jump or other complicated cases.  */
 687 static bool
 688 can_predict_insn_p (const rtx_insn *insn)
 689 {
 690   return (JUMP_P (insn)
 691           && any_condjump_p (insn)
 692           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 693 }
 694
 695 /* Predict edge E by given predictor if possible.  */
 696
 697 void
 698 predict_edge_def (edge e, enum br_predictor predictor,
 699                   enum prediction taken)
 700 {
 701    int probability = predictor_info[(int) predictor].hitrate;
 702
 703    if (taken != TAKEN)
 704      probability = REG_BR_PROB_BASE - probability;
 705
 706    predict_edge (e, predictor, probability);
 707 }
 708
 709 /* Invert all branch predictions or probability notes in the INSN.  This needs
 710    to be done each time we invert the condition used by the jump.  */
 711
 712 void
 713 invert_br_probabilities (rtx insn)
 714 {
 715   rtx note;
 716
 717   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 718     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 719       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
 720     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 721       XEXP (XEXP (note, 0), 1)
 722         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 723 }
 724
 725 /* Dump information about the branch prediction to the output file.  */
 726
 727 static void
 728 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 729                  basic_block bb, int used)
 730 {
 731   edge e;
 732   edge_iterator ei;
 733
 734   if (!file)
 735     return;
 736
 737   FOR_EACH_EDGE (e, ei, bb->succs)
 738     if (! (e->flags & EDGE_FALLTHRU))
 739       break;
 740
 741   fprintf (file, "  %s heuristics%s: %.1f%%",
 742            predictor_info[predictor].name,
 743            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 744
 745   if (bb->count)
 746     {
 747       fprintf (file, "  exec %"PRId64, bb->count);
 748       if (e)
 749         {
 750           fprintf (file, " hit %"PRId64, e->count);
 751           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 752         }
 753     }
 754
 755   fprintf (file, "\n");
 756 }
 757
 758 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 759    evenly and hope for the best.  */
 760 static void
 761 set_even_probabilities (basic_block bb)
 762 {
 763   int nedges = 0;
 764   edge e;
 765   edge_iterator ei;
 766
 767   FOR_EACH_EDGE (e, ei, bb->succs)
 768     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 769       nedges ++;
 770   FOR_EACH_EDGE (e, ei, bb->succs)
 771     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 772       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 773     else
 774       e->probability = 0;
 775 }
 776
 777 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 778    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 779
 780 static void
 781 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
 782 {
 783   rtx prob_note;
 784   rtx *pnote;
 785   rtx note;
 786   int best_probability = PROB_EVEN;
 787   enum br_predictor best_predictor = END_PREDICTORS;
 788   int combined_probability = REG_BR_PROB_BASE / 2;
 789   int d;
 790   bool first_match = false;
 791   bool found = false;
 792
 793   if (!can_predict_insn_p (insn))
 794     {
 795       set_even_probabilities (bb);
 796       return;
 797     }
 798
 799   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 800   pnote = &REG_NOTES (insn);
 801   if (dump_file)
 802     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 803              bb->index);
 804
 805   /* We implement "first match" heuristics and use probability guessed
 806      by predictor with smallest index.  */
 807   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 808     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 809       {
 810         enum br_predictor predictor = ((enum br_predictor)
 811                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 812         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 813
 814         found = true;
 815         if (best_predictor > predictor)
 816           best_probability = probability, best_predictor = predictor;
 817
 818         d = (combined_probability * probability
 819              + (REG_BR_PROB_BASE - combined_probability)
 820              * (REG_BR_PROB_BASE - probability));
 821
 822         /* Use FP math to avoid overflows of 32bit integers.  */
 823         if (d == 0)
 824           /* If one probability is 0% and one 100%, avoid division by zero.  */
 825           combined_probability = REG_BR_PROB_BASE / 2;
 826         else
 827           combined_probability = (((double) combined_probability) * probability
 828                                   * REG_BR_PROB_BASE / d + 0.5);
 829       }
 830
 831   /* Decide which heuristic to use.  In case we didn't match anything,
 832      use no_prediction heuristic, in case we did match, use either
 833      first match or Dempster-Shaffer theory depending on the flags.  */
 834
 835   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 836     first_match = true;
 837
 838   if (!found)
 839     dump_prediction (dump_file, PRED_NO_PREDICTION,
 840                      combined_probability, bb, true);
 841   else
 842     {
 843       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 844                        bb, !first_match);
 845       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 846                        bb, first_match);
 847     }
 848
 849   if (first_match)
 850     combined_probability = best_probability;
 851   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 852
 853   while (*pnote)
 854     {
 855       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 856         {
 857           enum br_predictor predictor = ((enum br_predictor)
 858                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 859           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 860
 861           dump_prediction (dump_file, predictor, probability, bb,
 862                            !first_match || best_predictor == predictor);
 863           *pnote = XEXP (*pnote, 1);
 864         }
 865       else
 866         pnote = &XEXP (*pnote, 1);
 867     }
 868
 869   if (!prob_note)
 870     {
 871       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
 872
 873       /* Save the prediction into CFG in case we are seeing non-degenerated
 874          conditional jump.  */
 875       if (!single_succ_p (bb))
 876         {
 877           BRANCH_EDGE (bb)->probability = combined_probability;
 878           FALLTHRU_EDGE (bb)->probability
 879             = REG_BR_PROB_BASE - combined_probability;
 880         }
 881     }
 882   else if (!single_succ_p (bb))
 883     {
 884       int prob = XINT (prob_note, 0);
 885
 886       BRANCH_EDGE (bb)->probability = prob;
 887       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 888     }
 889   else
 890     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 891 }
 892
 893 /* Combine predictions into single probability and store them into CFG.
 894    Remove now useless prediction entries.  */
 895
 896 static void
 897 combine_predictions_for_bb (basic_block bb)
 898 {
 899   int best_probability = PROB_EVEN;
 900   enum br_predictor best_predictor = END_PREDICTORS;
 901   int combined_probability = REG_BR_PROB_BASE / 2;
 902   int d;
 903   bool first_match = false;
 904   bool found = false;
 905   struct edge_prediction *pred;
 906   int nedges = 0;
 907   edge e, first = NULL, second = NULL;
 908   edge_iterator ei;
 909
 910   FOR_EACH_EDGE (e, ei, bb->succs)
 911     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 912       {
 913         nedges ++;
 914         if (first && !second)
 915           second = e;
 916         if (!first)
 917           first = e;
 918       }
 919
 920   /* When there is no successor or only one choice, prediction is easy.
 921
 922      We are lazy for now and predict only basic blocks with two outgoing
 923      edges.  It is possible to predict generic case too, but we have to
 924      ignore first match heuristics and do more involved combining.  Implement
 925      this later.  */
 926   if (nedges != 2)
 927     {
 928       if (!bb->count)
 929         set_even_probabilities (bb);
 930       clear_bb_predictions (bb);
 931       if (dump_file)
 932         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 933                  nedges, bb->index);
 934       return;
 935     }
 936
 937   if (dump_file)
 938     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 939
 940   edge_prediction **preds = bb_predictions->get (bb);
 941   if (preds)
 942     {
 943       /* We implement "first match" heuristics and use probability guessed
 944          by predictor with smallest index.  */
 945       for (pred = *preds; pred; pred = pred->ep_next)
 946         {
 947           enum br_predictor predictor = pred->ep_predictor;
 948           int probability = pred->ep_probability;
 949
 950           if (pred->ep_edge != first)
 951             probability = REG_BR_PROB_BASE - probability;
 952
 953           found = true;
 954           /* First match heuristics would be widly confused if we predicted
 955              both directions.  */
 956           if (best_predictor > predictor)
 957             {
 958               struct edge_prediction *pred2;
 959               int prob = probability;
 960
 961               for (pred2 = (struct edge_prediction *) *preds;
 962                    pred2; pred2 = pred2->ep_next)
 963                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 964                  {
 965                    int probability2 = pred->ep_probability;
 966
 967                    if (pred2->ep_edge != first)
 968                      probability2 = REG_BR_PROB_BASE - probability2;
 969
 970                    if ((probability < REG_BR_PROB_BASE / 2) !=
 971                        (probability2 < REG_BR_PROB_BASE / 2))
 972                      break;
 973
 974                    /* If the same predictor later gave better result, go for it! */
 975                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 976                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 977                      prob = probability2;
 978                  }
 979               if (!pred2)
 980                 best_probability = prob, best_predictor = predictor;
 981             }
 982
 983           d = (combined_probability * probability
 984                + (REG_BR_PROB_BASE - combined_probability)
 985                * (REG_BR_PROB_BASE - probability));
 986
 987           /* Use FP math to avoid overflows of 32bit integers.  */
 988           if (d == 0)
 989             /* If one probability is 0% and one 100%, avoid division by zero.  */
 990             combined_probability = REG_BR_PROB_BASE / 2;
 991           else
 992             combined_probability = (((double) combined_probability)
 993                                     * probability
 994                                     * REG_BR_PROB_BASE / d + 0.5);
 995         }
 996     }
 997
 998   /* Decide which heuristic to use.  In case we didn't match anything,
 999      use no_prediction heuristic, in case we did match, use either
1000      first match or Dempster-Shaffer theory depending on the flags.  */
1001
1002   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1003     first_match = true;
1004
1005   if (!found)
1006     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1007   else
1008     {
1009       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1010                        !first_match);
1011       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1012                        first_match);
1013     }
1014
1015   if (first_match)
1016     combined_probability = best_probability;
1017   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1018
1019   if (preds)
1020     {
1021       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1022         {
1023           enum br_predictor predictor = pred->ep_predictor;
1024           int probability = pred->ep_probability;
1025
1026           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1027             probability = REG_BR_PROB_BASE - probability;
1028           dump_prediction (dump_file, predictor, probability, bb,
1029                            !first_match || best_predictor == predictor);
1030         }
1031     }
1032   clear_bb_predictions (bb);
1033
1034   if (!bb->count)
1035     {
1036       first->probability = combined_probability;
1037       second->probability = REG_BR_PROB_BASE - combined_probability;
1038     }
1039 }
1040
1041 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1042    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1043
1044    T1 and T2 should be one of the following cases:
1045      1. T1 is SSA_NAME, T2 is NULL
1046      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1047      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1048
1049 static tree
1050 strips_small_constant (tree t1, tree t2)
1051 {
1052   tree ret = NULL;
1053   int value = 0;
1054
1055   if (!t1)
1056     return NULL;
1057   else if (TREE_CODE (t1) == SSA_NAME)
1058     ret = t1;
1059   else if (tree_fits_shwi_p (t1))
1060     value = tree_to_shwi (t1);
1061   else
1062     return NULL;
1063
1064   if (!t2)
1065     return ret;
1066   else if (tree_fits_shwi_p (t2))
1067     value = tree_to_shwi (t2);
1068   else if (TREE_CODE (t2) == SSA_NAME)
1069     {
1070       if (ret)
1071         return NULL;
1072       else
1073         ret = t2;
1074     }
1075
1076   if (value <= 4 && value >= -4)
1077     return ret;
1078   else
1079     return NULL;
1080 }
1081
1082 /* Return the SSA_NAME in T or T's operands.
1083    Return NULL if SSA_NAME cannot be found.  */
1084
1085 static tree
1086 get_base_value (tree t)
1087 {
1088   if (TREE_CODE (t) == SSA_NAME)
1089     return t;
1090
1091   if (!BINARY_CLASS_P (t))
1092     return NULL;
1093
1094   switch (TREE_OPERAND_LENGTH (t))
1095     {
1096     case 1:
1097       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1098     case 2:
1099       return strips_small_constant (TREE_OPERAND (t, 0),
1100                                     TREE_OPERAND (t, 1));
1101     default:
1102       return NULL;
1103     }
1104 }
1105
1106 /* Check the compare STMT in LOOP. If it compares an induction
1107    variable to a loop invariant, return true, and save
1108    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1109    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1110
1111 static bool
1112 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1113                                      tree *loop_invariant,
1114                                      enum tree_code *compare_code,
1115                                      tree *loop_step,
1116                                      tree *loop_iv_base)
1117 {
1118   tree op0, op1, bound, base;
1119   affine_iv iv0, iv1;
1120   enum tree_code code;
1121   tree step;
1122
1123   code = gimple_cond_code (stmt);
1124   *loop_invariant = NULL;
1125
1126   switch (code)
1127     {
1128     case GT_EXPR:
1129     case GE_EXPR:
1130     case NE_EXPR:
1131     case LT_EXPR:
1132     case LE_EXPR:
1133     case EQ_EXPR:
1134       break;
1135
1136     default:
1137       return false;
1138     }
1139
1140   op0 = gimple_cond_lhs (stmt);
1141   op1 = gimple_cond_rhs (stmt);
1142
1143   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1144        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1145     return false;
1146   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1147     return false;
1148   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1149     return false;
1150   if (TREE_CODE (iv0.step) != INTEGER_CST
1151       || TREE_CODE (iv1.step) != INTEGER_CST)
1152     return false;
1153   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1154       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1155     return false;
1156
1157   if (integer_zerop (iv0.step))
1158     {
1159       if (code != NE_EXPR && code != EQ_EXPR)
1160         code = invert_tree_comparison (code, false);
1161       bound = iv0.base;
1162       base = iv1.base;
1163       if (tree_fits_shwi_p (iv1.step))
1164         step = iv1.step;
1165       else
1166         return false;
1167     }
1168   else
1169     {
1170       bound = iv1.base;
1171       base = iv0.base;
1172       if (tree_fits_shwi_p (iv0.step))
1173         step = iv0.step;
1174       else
1175         return false;
1176     }
1177
1178   if (TREE_CODE (bound) != INTEGER_CST)
1179     bound = get_base_value (bound);
1180   if (!bound)
1181     return false;
1182   if (TREE_CODE (base) != INTEGER_CST)
1183     base = get_base_value (base);
1184   if (!base)
1185     return false;
1186
1187   *loop_invariant = bound;
1188   *compare_code = code;
1189   *loop_step = step;
1190   *loop_iv_base = base;
1191   return true;
1192 }
1193
1194 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1195
1196 static bool
1197 expr_coherent_p (tree t1, tree t2)
1198 {
1199   gimple stmt;
1200   tree ssa_name_1 = NULL;
1201   tree ssa_name_2 = NULL;
1202
1203   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1204   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1205
1206   if (t1 == t2)
1207     return true;
1208
1209   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1210     return true;
1211   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1212     return false;
1213
1214   /* Check to see if t1 is expressed/defined with t2.  */
1215   stmt = SSA_NAME_DEF_STMT (t1);
1216   gcc_assert (stmt != NULL);
1217   if (is_gimple_assign (stmt))
1218     {
1219       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1220       if (ssa_name_1 && ssa_name_1 == t2)
1221         return true;
1222     }
1223
1224   /* Check to see if t2 is expressed/defined with t1.  */
1225   stmt = SSA_NAME_DEF_STMT (t2);
1226   gcc_assert (stmt != NULL);
1227   if (is_gimple_assign (stmt))
1228     {
1229       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1230       if (ssa_name_2 && ssa_name_2 == t1)
1231         return true;
1232     }
1233
1234   /* Compare if t1 and t2's def_stmts are identical.  */
1235   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1236     return true;
1237   else
1238     return false;
1239 }
1240
1241 /* Predict branch probability of BB when BB contains a branch that compares
1242    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1243    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1244
1245    E.g.
1246      for (int i = 0; i < bound; i++) {
1247        if (i < bound - 2)
1248          computation_1();
1249        else
1250          computation_2();
1251      }
1252
1253   In this loop, we will predict the branch inside the loop to be taken.  */
1254
1255 static void
1256 predict_iv_comparison (struct loop *loop, basic_block bb,
1257                        tree loop_bound_var,
1258                        tree loop_iv_base_var,
1259                        enum tree_code loop_bound_code,
1260                        int loop_bound_step)
1261 {
1262   gimple stmt;
1263   tree compare_var, compare_base;
1264   enum tree_code compare_code;
1265   tree compare_step_var;
1266   edge then_edge;
1267   edge_iterator ei;
1268
1269   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1270       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1271       || predicted_by_p (bb, PRED_LOOP_EXIT))
1272     return;
1273
1274   stmt = last_stmt (bb);
1275   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1276     return;
1277   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1278                                             &compare_code,
1279                                             &compare_step_var,
1280                                             &compare_base))
1281     return;
1282
1283   /* Find the taken edge.  */
1284   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1285     if (then_edge->flags & EDGE_TRUE_VALUE)
1286       break;
1287
1288   /* When comparing an IV to a loop invariant, NE is more likely to be
1289      taken while EQ is more likely to be not-taken.  */
1290   if (compare_code == NE_EXPR)
1291     {
1292       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1293       return;
1294     }
1295   else if (compare_code == EQ_EXPR)
1296     {
1297       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1298       return;
1299     }
1300
1301   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1302     return;
1303
1304   /* If loop bound, base and compare bound are all constants, we can
1305      calculate the probability directly.  */
1306   if (tree_fits_shwi_p (loop_bound_var)
1307       && tree_fits_shwi_p (compare_var)
1308       && tree_fits_shwi_p (compare_base))
1309     {
1310       int probability;
1311       bool overflow, overall_overflow = false;
1312       widest_int compare_count, tem;
1313
1314       /* (loop_bound - base) / compare_step */
1315       tem = wi::sub (wi::to_widest (loop_bound_var),
1316                      wi::to_widest (compare_base), SIGNED, &overflow);
1317       overall_overflow |= overflow;
1318       widest_int loop_count = wi::div_trunc (tem,
1319                                              wi::to_widest (compare_step_var),
1320                                              SIGNED, &overflow);
1321       overall_overflow |= overflow;
1322
1323       if (!wi::neg_p (wi::to_widest (compare_step_var))
1324           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1325         {
1326           /* (loop_bound - compare_bound) / compare_step */
1327           tem = wi::sub (wi::to_widest (loop_bound_var),
1328                          wi::to_widest (compare_var), SIGNED, &overflow);
1329           overall_overflow |= overflow;
1330           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1331                                          SIGNED, &overflow);
1332           overall_overflow |= overflow;
1333         }
1334       else
1335         {
1336           /* (compare_bound - base) / compare_step */
1337           tem = wi::sub (wi::to_widest (compare_var),
1338                          wi::to_widest (compare_base), SIGNED, &overflow);
1339           overall_overflow |= overflow;
1340           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1341                                          SIGNED, &overflow);
1342           overall_overflow |= overflow;
1343         }
1344       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1345         ++compare_count;
1346       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1347         ++loop_count;
1348       if (wi::neg_p (compare_count))
1349         compare_count = 0;
1350       if (wi::neg_p (loop_count))
1351         loop_count = 0;
1352       if (loop_count == 0)
1353         probability = 0;
1354       else if (wi::cmps (compare_count, loop_count) == 1)
1355         probability = REG_BR_PROB_BASE;
1356       else
1357         {
1358           tem = compare_count * REG_BR_PROB_BASE;
1359           tem = wi::udiv_trunc (tem, loop_count);
1360           probability = tem.to_uhwi ();
1361         }
1362
1363       if (!overall_overflow)
1364         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1365
1366       return;
1367     }
1368
1369   if (expr_coherent_p (loop_bound_var, compare_var))
1370     {
1371       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1372           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1373         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1374       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1375                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1376         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1377       else if (loop_bound_code == NE_EXPR)
1378         {
1379           /* If the loop backedge condition is "(i != bound)", we do
1380              the comparison based on the step of IV:
1381              * step < 0 : backedge condition is like (i > bound)
1382              * step > 0 : backedge condition is like (i < bound)  */
1383           gcc_assert (loop_bound_step != 0);
1384           if (loop_bound_step > 0
1385               && (compare_code == LT_EXPR
1386                   || compare_code == LE_EXPR))
1387             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1388           else if (loop_bound_step < 0
1389                    && (compare_code == GT_EXPR
1390                        || compare_code == GE_EXPR))
1391             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1392           else
1393             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1394         }
1395       else
1396         /* The branch is predicted not-taken if loop_bound_code is
1397            opposite with compare_code.  */
1398         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1399     }
1400   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1401     {
1402       /* For cases like:
1403            for (i = s; i < h; i++)
1404              if (i > s + 2) ....
1405          The branch should be predicted taken.  */
1406       if (loop_bound_step > 0
1407           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1408         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1409       else if (loop_bound_step < 0
1410                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1411         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1412       else
1413         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1414     }
1415 }
1416
1417 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1418    exits are resulted from short-circuit conditions that will generate an
1419    if_tmp. E.g.:
1420
1421    if (foo() || global > 10)
1422      break;
1423
1424    This will be translated into:
1425
1426    BB3:
1427      loop header...
1428    BB4:
1429      if foo() goto BB6 else goto BB5
1430    BB5:
1431      if global > 10 goto BB6 else goto BB7
1432    BB6:
1433      goto BB7
1434    BB7:
1435      iftmp = (PHI 0(BB5), 1(BB6))
1436      if iftmp == 1 goto BB8 else goto BB3
1437    BB8:
1438      outside of the loop...
1439
1440    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1441    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1442    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1443    exits to predict them using PRED_LOOP_EXIT.  */
1444
1445 static void
1446 predict_extra_loop_exits (edge exit_edge)
1447 {
1448   unsigned i;
1449   bool check_value_one;
1450   gimple phi_stmt;
1451   tree cmp_rhs, cmp_lhs;
1452   gimple cmp_stmt = last_stmt (exit_edge->src);
1453
1454   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1455     return;
1456   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1457   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1458   if (!TREE_CONSTANT (cmp_rhs)
1459       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1460     return;
1461   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1462     return;
1463
1464   /* If check_value_one is true, only the phi_args with value '1' will lead
1465      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1466      loop exit.  */
1467   check_value_one = (((integer_onep (cmp_rhs))
1468                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1469                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1470
1471   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1472   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1473     return;
1474
1475   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1476     {
1477       edge e1;
1478       edge_iterator ei;
1479       tree val = gimple_phi_arg_def (phi_stmt, i);
1480       edge e = gimple_phi_arg_edge (phi_stmt, i);
1481
1482       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1483         continue;
1484       if ((check_value_one ^ integer_onep (val)) == 1)
1485         continue;
1486       if (EDGE_COUNT (e->src->succs) != 1)
1487         {
1488           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1489           continue;
1490         }
1491
1492       FOR_EACH_EDGE (e1, ei, e->src->preds)
1493         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1494     }
1495 }
1496
1497 /* Predict edge probabilities by exploiting loop structure.  */
1498
1499 static void
1500 predict_loops (void)
1501 {
1502   struct loop *loop;
1503
1504   /* Try to predict out blocks in a loop that are not part of a
1505      natural loop.  */
1506   FOR_EACH_LOOP (loop, 0)
1507     {
1508       basic_block bb, *bbs;
1509       unsigned j, n_exits;
1510       vec<edge> exits;
1511       struct tree_niter_desc niter_desc;
1512       edge ex;
1513       struct nb_iter_bound *nb_iter;
1514       enum tree_code loop_bound_code = ERROR_MARK;
1515       tree loop_bound_step = NULL;
1516       tree loop_bound_var = NULL;
1517       tree loop_iv_base = NULL;
1518       gimple stmt = NULL;
1519
1520       exits = get_loop_exit_edges (loop);
1521       n_exits = exits.length ();
1522       if (!n_exits)
1523         {
1524           exits.release ();
1525           continue;
1526         }
1527
1528       FOR_EACH_VEC_ELT (exits, j, ex)
1529         {
1530           tree niter = NULL;
1531           HOST_WIDE_INT nitercst;
1532           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1533           int probability;
1534           enum br_predictor predictor;
1535
1536           predict_extra_loop_exits (ex);
1537
1538           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1539             niter = niter_desc.niter;
1540           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1541             niter = loop_niter_by_eval (loop, ex);
1542
1543           if (TREE_CODE (niter) == INTEGER_CST)
1544             {
1545               if (tree_fits_uhwi_p (niter)
1546                   && max
1547                   && compare_tree_int (niter, max - 1) == -1)
1548                 nitercst = tree_to_uhwi (niter) + 1;
1549               else
1550                 nitercst = max;
1551               predictor = PRED_LOOP_ITERATIONS;
1552             }
1553           /* If we have just one exit and we can derive some information about
1554              the number of iterations of the loop from the statements inside
1555              the loop, use it to predict this exit.  */
1556           else if (n_exits == 1)
1557             {
1558               nitercst = estimated_stmt_executions_int (loop);
1559               if (nitercst < 0)
1560                 continue;
1561               if (nitercst > max)
1562                 nitercst = max;
1563
1564               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1565             }
1566           else
1567             continue;
1568
1569           /* If the prediction for number of iterations is zero, do not
1570              predict the exit edges.  */
1571           if (nitercst == 0)
1572             continue;
1573
1574           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1575           predict_edge (ex, predictor, probability);
1576         }
1577       exits.release ();
1578
1579       /* Find information about loop bound variables.  */
1580       for (nb_iter = loop->bounds; nb_iter;
1581            nb_iter = nb_iter->next)
1582         if (nb_iter->stmt
1583             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1584           {
1585             stmt = nb_iter->stmt;
1586             break;
1587           }
1588       if (!stmt && last_stmt (loop->header)
1589           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1590         stmt = last_stmt (loop->header);
1591       if (stmt)
1592         is_comparison_with_loop_invariant_p (stmt, loop,
1593                                              &loop_bound_var,
1594                                              &loop_bound_code,
1595                                              &loop_bound_step,
1596                                              &loop_iv_base);
1597
1598       bbs = get_loop_body (loop);
1599
1600       for (j = 0; j < loop->num_nodes; j++)
1601         {
1602           int header_found = 0;
1603           edge e;
1604           edge_iterator ei;
1605
1606           bb = bbs[j];
1607
1608           /* Bypass loop heuristics on continue statement.  These
1609              statements construct loops via "non-loop" constructs
1610              in the source language and are better to be handled
1611              separately.  */
1612           if (predicted_by_p (bb, PRED_CONTINUE))
1613             continue;
1614
1615           /* Loop branch heuristics - predict an edge back to a
1616              loop's head as taken.  */
1617           if (bb == loop->latch)
1618             {
1619               e = find_edge (loop->latch, loop->header);
1620               if (e)
1621                 {
1622                   header_found = 1;
1623                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1624                 }
1625             }
1626
1627           /* Loop exit heuristics - predict an edge exiting the loop if the
1628              conditional has no loop header successors as not taken.  */
1629           if (!header_found
1630               /* If we already used more reliable loop exit predictors, do not
1631                  bother with PRED_LOOP_EXIT.  */
1632               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1633               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1634             {
1635               /* For loop with many exits we don't want to predict all exits
1636                  with the pretty large probability, because if all exits are
1637                  considered in row, the loop would be predicted to iterate
1638                  almost never.  The code to divide probability by number of
1639                  exits is very rough.  It should compute the number of exits
1640                  taken in each patch through function (not the overall number
1641                  of exits that might be a lot higher for loops with wide switch
1642                  statements in them) and compute n-th square root.
1643
1644                  We limit the minimal probability by 2% to avoid
1645                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1646                  as this was causing regression in perl benchmark containing such
1647                  a wide loop.  */
1648
1649               int probability = ((REG_BR_PROB_BASE
1650                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1651                                  / n_exits);
1652               if (probability < HITRATE (2))
1653                 probability = HITRATE (2);
1654               FOR_EACH_EDGE (e, ei, bb->succs)
1655                 if (e->dest->index < NUM_FIXED_BLOCKS
1656                     || !flow_bb_inside_loop_p (loop, e->dest))
1657                   predict_edge (e, PRED_LOOP_EXIT, probability);
1658             }
1659           if (loop_bound_var)
1660             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1661                                    loop_bound_code,
1662                                    tree_to_shwi (loop_bound_step));
1663         }
1664
1665       /* Free basic blocks from get_loop_body.  */
1666       free (bbs);
1667     }
1668 }
1669
1670 /* Attempt to predict probabilities of BB outgoing edges using local
1671    properties.  */
1672 static void
1673 bb_estimate_probability_locally (basic_block bb)
1674 {
1675   rtx_insn *last_insn = BB_END (bb);
1676   rtx cond;
1677
1678   if (! can_predict_insn_p (last_insn))
1679     return;
1680   cond = get_condition (last_insn, NULL, false, false);
1681   if (! cond)
1682     return;
1683
1684   /* Try "pointer heuristic."
1685      A comparison ptr == 0 is predicted as false.
1686      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1687   if (COMPARISON_P (cond)
1688       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1689           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1690     {
1691       if (GET_CODE (cond) == EQ)
1692         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1693       else if (GET_CODE (cond) == NE)
1694         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1695     }
1696   else
1697
1698   /* Try "opcode heuristic."
1699      EQ tests are usually false and NE tests are usually true. Also,
1700      most quantities are positive, so we can make the appropriate guesses
1701      about signed comparisons against zero.  */
1702     switch (GET_CODE (cond))
1703       {
1704       case CONST_INT:
1705         /* Unconditional branch.  */
1706         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1707                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1708         break;
1709
1710       case EQ:
1711       case UNEQ:
1712         /* Floating point comparisons appears to behave in a very
1713            unpredictable way because of special role of = tests in
1714            FP code.  */
1715         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1716           ;
1717         /* Comparisons with 0 are often used for booleans and there is
1718            nothing useful to predict about them.  */
1719         else if (XEXP (cond, 1) == const0_rtx
1720                  || XEXP (cond, 0) == const0_rtx)
1721           ;
1722         else
1723           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1724         break;
1725
1726       case NE:
1727       case LTGT:
1728         /* Floating point comparisons appears to behave in a very
1729            unpredictable way because of special role of = tests in
1730            FP code.  */
1731         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1732           ;
1733         /* Comparisons with 0 are often used for booleans and there is
1734            nothing useful to predict about them.  */
1735         else if (XEXP (cond, 1) == const0_rtx
1736                  || XEXP (cond, 0) == const0_rtx)
1737           ;
1738         else
1739           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1740         break;
1741
1742       case ORDERED:
1743         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1744         break;
1745
1746       case UNORDERED:
1747         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1748         break;
1749
1750       case LE:
1751       case LT:
1752         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1753             || XEXP (cond, 1) == constm1_rtx)
1754           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1755         break;
1756
1757       case GE:
1758       case GT:
1759         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1760             || XEXP (cond, 1) == constm1_rtx)
1761           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1762         break;
1763
1764       default:
1765         break;
1766       }
1767 }
1768
1769 /* Set edge->probability for each successor edge of BB.  */
1770 void
1771 guess_outgoing_edge_probabilities (basic_block bb)
1772 {
1773   bb_estimate_probability_locally (bb);
1774   combine_predictions_for_insn (BB_END (bb), bb);
1775 }
1776 \f
1777 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1778
1779 /* Helper function for expr_expected_value.  */
1780
1781 static tree
1782 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1783                        tree op1, bitmap visited, enum br_predictor *predictor)
1784 {
1785   gimple def;
1786
1787   if (predictor)
1788     *predictor = PRED_UNCONDITIONAL;
1789
1790   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1791     {
1792       if (TREE_CONSTANT (op0))
1793         return op0;
1794
1795       if (code != SSA_NAME)
1796         return NULL_TREE;
1797
1798       def = SSA_NAME_DEF_STMT (op0);
1799
1800       /* If we were already here, break the infinite cycle.  */
1801       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1802         return NULL;
1803
1804       if (gimple_code (def) == GIMPLE_PHI)
1805         {
1806           /* All the arguments of the PHI node must have the same constant
1807              length.  */
1808           int i, n = gimple_phi_num_args (def);
1809           tree val = NULL, new_val;
1810
1811           for (i = 0; i < n; i++)
1812             {
1813               tree arg = PHI_ARG_DEF (def, i);
1814               enum br_predictor predictor2;
1815
1816               /* If this PHI has itself as an argument, we cannot
1817                  determine the string length of this argument.  However,
1818                  if we can find an expected constant value for the other
1819                  PHI args then we can still be sure that this is
1820                  likely a constant.  So be optimistic and just
1821                  continue with the next argument.  */
1822               if (arg == PHI_RESULT (def))
1823                 continue;
1824
1825               new_val = expr_expected_value (arg, visited, &predictor2);
1826
1827               /* It is difficult to combine value predictors.  Simply assume
1828                  that later predictor is weaker and take its prediction.  */
1829               if (predictor && *predictor < predictor2)
1830                 *predictor = predictor2;
1831               if (!new_val)
1832                 return NULL;
1833               if (!val)
1834                 val = new_val;
1835               else if (!operand_equal_p (val, new_val, false))
1836                 return NULL;
1837             }
1838           return val;
1839         }
1840       if (is_gimple_assign (def))
1841         {
1842           if (gimple_assign_lhs (def) != op0)
1843             return NULL;
1844
1845           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1846                                         gimple_assign_rhs1 (def),
1847                                         gimple_assign_rhs_code (def),
1848                                         gimple_assign_rhs2 (def),
1849                                         visited, predictor);
1850         }
1851
1852       if (is_gimple_call (def))
1853         {
1854           tree decl = gimple_call_fndecl (def);
1855           if (!decl)
1856             {
1857               if (gimple_call_internal_p (def)
1858                   && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1859                 {
1860                   gcc_assert (gimple_call_num_args (def) == 3);
1861                   tree val = gimple_call_arg (def, 0);
1862                   if (TREE_CONSTANT (val))
1863                     return val;
1864                   if (predictor)
1865                     {
1866                       tree val2 = gimple_call_arg (def, 2);
1867                       gcc_assert (TREE_CODE (val2) == INTEGER_CST
1868                                   && tree_fits_uhwi_p (val2)
1869                                   && tree_to_uhwi (val2) < END_PREDICTORS);
1870                       *predictor = (enum br_predictor) tree_to_uhwi (val2);
1871                     }
1872                   return gimple_call_arg (def, 1);
1873                 }
1874               return NULL;
1875             }
1876           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1877             switch (DECL_FUNCTION_CODE (decl))
1878               {
1879               case BUILT_IN_EXPECT:
1880                 {
1881                   tree val;
1882                   if (gimple_call_num_args (def) != 2)
1883                     return NULL;
1884                   val = gimple_call_arg (def, 0);
1885                   if (TREE_CONSTANT (val))
1886                     return val;
1887                   if (predictor)
1888                     *predictor = PRED_BUILTIN_EXPECT;
1889                   return gimple_call_arg (def, 1);
1890                 }
1891
1892               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1893               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1894               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1895               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1896               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1897               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1898               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1899               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1900               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1901               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1902               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1903               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1904               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1905                 /* Assume that any given atomic operation has low contention,
1906                    and thus the compare-and-swap operation succeeds.  */
1907                 if (predictor)
1908                   *predictor = PRED_COMPARE_AND_SWAP;
1909                 return boolean_true_node;
1910               default:
1911                 break;
1912             }
1913         }
1914
1915       return NULL;
1916     }
1917
1918   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1919     {
1920       tree res;
1921       enum br_predictor predictor2;
1922       op0 = expr_expected_value (op0, visited, predictor);
1923       if (!op0)
1924         return NULL;
1925       op1 = expr_expected_value (op1, visited, &predictor2);
1926       if (predictor && *predictor < predictor2)
1927         *predictor = predictor2;
1928       if (!op1)
1929         return NULL;
1930       res = fold_build2 (code, type, op0, op1);
1931       if (TREE_CONSTANT (res))
1932         return res;
1933       return NULL;
1934     }
1935   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1936     {
1937       tree res;
1938       op0 = expr_expected_value (op0, visited, predictor);
1939       if (!op0)
1940         return NULL;
1941       res = fold_build1 (code, type, op0);
1942       if (TREE_CONSTANT (res))
1943         return res;
1944       return NULL;
1945     }
1946   return NULL;
1947 }
1948
1949 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1950    The function is used by builtin_expect branch predictor so the evidence
1951    must come from this construct and additional possible constant folding.
1952
1953    We may want to implement more involved value guess (such as value range
1954    propagation based prediction), but such tricks shall go to new
1955    implementation.  */
1956
1957 static tree
1958 expr_expected_value (tree expr, bitmap visited,
1959                      enum br_predictor *predictor)
1960 {
1961   enum tree_code code;
1962   tree op0, op1;
1963
1964   if (TREE_CONSTANT (expr))
1965     {
1966       if (predictor)
1967         *predictor = PRED_UNCONDITIONAL;
1968       return expr;
1969     }
1970
1971   extract_ops_from_tree (expr, &code, &op0, &op1);
1972   return expr_expected_value_1 (TREE_TYPE (expr),
1973                                 op0, code, op1, visited, predictor);
1974 }
1975 \f
1976 /* Predict using opcode of the last statement in basic block.  */
1977 static void
1978 tree_predict_by_opcode (basic_block bb)
1979 {
1980   gimple stmt = last_stmt (bb);
1981   edge then_edge;
1982   tree op0, op1;
1983   tree type;
1984   tree val;
1985   enum tree_code cmp;
1986   bitmap visited;
1987   edge_iterator ei;
1988   enum br_predictor predictor;
1989
1990   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1991     return;
1992   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1993     if (then_edge->flags & EDGE_TRUE_VALUE)
1994       break;
1995   op0 = gimple_cond_lhs (stmt);
1996   op1 = gimple_cond_rhs (stmt);
1997   cmp = gimple_cond_code (stmt);
1998   type = TREE_TYPE (op0);
1999   visited = BITMAP_ALLOC (NULL);
2000   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2001                                &predictor);
2002   BITMAP_FREE (visited);
2003   if (val && TREE_CODE (val) == INTEGER_CST)
2004     {
2005       if (predictor == PRED_BUILTIN_EXPECT)
2006         {
2007           int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2008
2009           gcc_assert (percent >= 0 && percent <= 100);
2010           if (integer_zerop (val))
2011             percent = 100 - percent;
2012           predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2013         }
2014       else
2015         predict_edge (then_edge, predictor,
2016                       integer_zerop (val) ? NOT_TAKEN : TAKEN);
2017     }
2018   /* Try "pointer heuristic."
2019      A comparison ptr == 0 is predicted as false.
2020      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2021   if (POINTER_TYPE_P (type))
2022     {
2023       if (cmp == EQ_EXPR)
2024         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2025       else if (cmp == NE_EXPR)
2026         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2027     }
2028   else
2029
2030   /* Try "opcode heuristic."
2031      EQ tests are usually false and NE tests are usually true. Also,
2032      most quantities are positive, so we can make the appropriate guesses
2033      about signed comparisons against zero.  */
2034     switch (cmp)
2035       {
2036       case EQ_EXPR:
2037       case UNEQ_EXPR:
2038         /* Floating point comparisons appears to behave in a very
2039            unpredictable way because of special role of = tests in
2040            FP code.  */
2041         if (FLOAT_TYPE_P (type))
2042           ;
2043         /* Comparisons with 0 are often used for booleans and there is
2044            nothing useful to predict about them.  */
2045         else if (integer_zerop (op0) || integer_zerop (op1))
2046           ;
2047         else
2048           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2049         break;
2050
2051       case NE_EXPR:
2052       case LTGT_EXPR:
2053         /* Floating point comparisons appears to behave in a very
2054            unpredictable way because of special role of = tests in
2055            FP code.  */
2056         if (FLOAT_TYPE_P (type))
2057           ;
2058         /* Comparisons with 0 are often used for booleans and there is
2059            nothing useful to predict about them.  */
2060         else if (integer_zerop (op0)
2061                  || integer_zerop (op1))
2062           ;
2063         else
2064           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2065         break;
2066
2067       case ORDERED_EXPR:
2068         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2069         break;
2070
2071       case UNORDERED_EXPR:
2072         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2073         break;
2074
2075       case LE_EXPR:
2076       case LT_EXPR:
2077         if (integer_zerop (op1)
2078             || integer_onep (op1)
2079             || integer_all_onesp (op1)
2080             || real_zerop (op1)
2081             || real_onep (op1)
2082             || real_minus_onep (op1))
2083           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2084         break;
2085
2086       case GE_EXPR:
2087       case GT_EXPR:
2088         if (integer_zerop (op1)
2089             || integer_onep (op1)
2090             || integer_all_onesp (op1)
2091             || real_zerop (op1)
2092             || real_onep (op1)
2093             || real_minus_onep (op1))
2094           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2095         break;
2096
2097       default:
2098         break;
2099       }
2100 }
2101
2102 /* Try to guess whether the value of return means error code.  */
2103
2104 static enum br_predictor
2105 return_prediction (tree val, enum prediction *prediction)
2106 {
2107   /* VOID.  */
2108   if (!val)
2109     return PRED_NO_PREDICTION;
2110   /* Different heuristics for pointers and scalars.  */
2111   if (POINTER_TYPE_P (TREE_TYPE (val)))
2112     {
2113       /* NULL is usually not returned.  */
2114       if (integer_zerop (val))
2115         {
2116           *prediction = NOT_TAKEN;
2117           return PRED_NULL_RETURN;
2118         }
2119     }
2120   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2121     {
2122       /* Negative return values are often used to indicate
2123          errors.  */
2124       if (TREE_CODE (val) == INTEGER_CST
2125           && tree_int_cst_sgn (val) < 0)
2126         {
2127           *prediction = NOT_TAKEN;
2128           return PRED_NEGATIVE_RETURN;
2129         }
2130       /* Constant return values seems to be commonly taken.
2131          Zero/one often represent booleans so exclude them from the
2132          heuristics.  */
2133       if (TREE_CONSTANT (val)
2134           && (!integer_zerop (val) && !integer_onep (val)))
2135         {
2136           *prediction = TAKEN;
2137           return PRED_CONST_RETURN;
2138         }
2139     }
2140   return PRED_NO_PREDICTION;
2141 }
2142
2143 /* Find the basic block with return expression and look up for possible
2144    return value trying to apply RETURN_PREDICTION heuristics.  */
2145 static void
2146 apply_return_prediction (void)
2147 {
2148   gimple return_stmt = NULL;
2149   tree return_val;
2150   edge e;
2151   gimple phi;
2152   int phi_num_args, i;
2153   enum br_predictor pred;
2154   enum prediction direction;
2155   edge_iterator ei;
2156
2157   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2158     {
2159       return_stmt = last_stmt (e->src);
2160       if (return_stmt
2161           && gimple_code (return_stmt) == GIMPLE_RETURN)
2162         break;
2163     }
2164   if (!e)
2165     return;
2166   return_val = gimple_return_retval (return_stmt);
2167   if (!return_val)
2168     return;
2169   if (TREE_CODE (return_val) != SSA_NAME
2170       || !SSA_NAME_DEF_STMT (return_val)
2171       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2172     return;
2173   phi = SSA_NAME_DEF_STMT (return_val);
2174   phi_num_args = gimple_phi_num_args (phi);
2175   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2176
2177   /* Avoid the degenerate case where all return values form the function
2178      belongs to same category (ie they are all positive constants)
2179      so we can hardly say something about them.  */
2180   for (i = 1; i < phi_num_args; i++)
2181     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2182       break;
2183   if (i != phi_num_args)
2184     for (i = 0; i < phi_num_args; i++)
2185       {
2186         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2187         if (pred != PRED_NO_PREDICTION)
2188           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2189                                          direction);
2190       }
2191 }
2192
2193 /* Look for basic block that contains unlikely to happen events
2194    (such as noreturn calls) and mark all paths leading to execution
2195    of this basic blocks as unlikely.  */
2196
2197 static void
2198 tree_bb_level_predictions (void)
2199 {
2200   basic_block bb;
2201   bool has_return_edges = false;
2202   edge e;
2203   edge_iterator ei;
2204
2205   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2206     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2207       {
2208         has_return_edges = true;
2209         break;
2210       }
2211
2212   apply_return_prediction ();
2213
2214   FOR_EACH_BB_FN (bb, cfun)
2215     {
2216       gimple_stmt_iterator gsi;
2217
2218       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2219         {
2220           gimple stmt = gsi_stmt (gsi);
2221           tree decl;
2222
2223           if (is_gimple_call (stmt))
2224             {
2225               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2226                   && has_return_edges)
2227                 predict_paths_leading_to (bb, PRED_NORETURN,
2228                                           NOT_TAKEN);
2229               decl = gimple_call_fndecl (stmt);
2230               if (decl
2231                   && lookup_attribute ("cold",
2232                                        DECL_ATTRIBUTES (decl)))
2233                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2234                                           NOT_TAKEN);
2235             }
2236           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2237             {
2238               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2239                                         gimple_predict_outcome (stmt));
2240               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2241                  hints to callers.  */
2242             }
2243         }
2244     }
2245 }
2246
2247 #ifdef ENABLE_CHECKING
2248
2249 /* Callback for hash_map::traverse, asserts that the pointer map is
2250    empty.  */
2251
2252 bool
2253 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2254                  void *)
2255 {
2256   gcc_assert (!value);
2257   return false;
2258 }
2259 #endif
2260
2261 /* Predict branch probabilities and estimate profile for basic block BB.  */
2262
2263 static void
2264 tree_estimate_probability_bb (basic_block bb)
2265 {
2266   edge e;
2267   edge_iterator ei;
2268   gimple last;
2269
2270   FOR_EACH_EDGE (e, ei, bb->succs)
2271     {
2272       /* Predict edges to user labels with attributes.  */
2273       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2274         {
2275           gimple_stmt_iterator gi;
2276           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2277             {
2278               gimple stmt = gsi_stmt (gi);
2279               tree decl;
2280
2281               if (gimple_code (stmt) != GIMPLE_LABEL)
2282                 break;
2283               decl = gimple_label_label (stmt);
2284               if (DECL_ARTIFICIAL (decl))
2285                 continue;
2286
2287               /* Finally, we have a user-defined label.  */
2288               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2289                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2290               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2291                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2292             }
2293         }
2294
2295       /* Predict early returns to be probable, as we've already taken
2296          care for error returns and other cases are often used for
2297          fast paths through function.
2298
2299          Since we've already removed the return statements, we are
2300          looking for CFG like:
2301
2302          if (conditional)
2303          {
2304          ..
2305          goto return_block
2306          }
2307          some other blocks
2308          return_block:
2309          return_stmt.  */
2310       if (e->dest != bb->next_bb
2311           && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2312           && single_succ_p (e->dest)
2313           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2314           && (last = last_stmt (e->dest)) != NULL
2315           && gimple_code (last) == GIMPLE_RETURN)
2316         {
2317           edge e1;
2318           edge_iterator ei1;
2319
2320           if (single_succ_p (bb))
2321             {
2322               FOR_EACH_EDGE (e1, ei1, bb->preds)
2323                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2324                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2325                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2326                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2327             }
2328           else
2329             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2330                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2331                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2332               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2333         }
2334
2335       /* Look for block we are guarding (ie we dominate it,
2336          but it doesn't postdominate us).  */
2337       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2338           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2339           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2340         {
2341           gimple_stmt_iterator bi;
2342
2343           /* The call heuristic claims that a guarded function call
2344              is improbable.  This is because such calls are often used
2345              to signal exceptional situations such as printing error
2346              messages.  */
2347           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2348                gsi_next (&bi))
2349             {
2350               gimple stmt = gsi_stmt (bi);
2351               if (is_gimple_call (stmt)
2352                   /* Constant and pure calls are hardly used to signalize
2353                      something exceptional.  */
2354                   && gimple_has_side_effects (stmt))
2355                 {
2356                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2357                   break;
2358                 }
2359             }
2360         }
2361     }
2362   tree_predict_by_opcode (bb);
2363 }
2364
2365 /* Predict branch probabilities and estimate profile of the tree CFG.
2366    This function can be called from the loop optimizers to recompute
2367    the profile information.  */
2368
2369 void
2370 tree_estimate_probability (void)
2371 {
2372   basic_block bb;
2373
2374   add_noreturn_fake_exit_edges ();
2375   connect_infinite_loops_to_exit ();
2376   /* We use loop_niter_by_eval, which requires that the loops have
2377      preheaders.  */
2378   create_preheaders (CP_SIMPLE_PREHEADERS);
2379   calculate_dominance_info (CDI_POST_DOMINATORS);
2380
2381   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2382   tree_bb_level_predictions ();
2383   record_loop_exits ();
2384
2385   if (number_of_loops (cfun) > 1)
2386     predict_loops ();
2387
2388   FOR_EACH_BB_FN (bb, cfun)
2389     tree_estimate_probability_bb (bb);
2390
2391   FOR_EACH_BB_FN (bb, cfun)
2392     combine_predictions_for_bb (bb);
2393
2394 #ifdef ENABLE_CHECKING
2395   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2396 #endif
2397   delete bb_predictions;
2398   bb_predictions = NULL;
2399
2400   estimate_bb_frequencies (false);
2401   free_dominance_info (CDI_POST_DOMINATORS);
2402   remove_fake_exit_edges ();
2403 }
2404 \f
2405 /* Predict edges to successors of CUR whose sources are not postdominated by
2406    BB by PRED and recurse to all postdominators.  */
2407
2408 static void
2409 predict_paths_for_bb (basic_block cur, basic_block bb,
2410                       enum br_predictor pred,
2411                       enum prediction taken,
2412                       bitmap visited)
2413 {
2414   edge e;
2415   edge_iterator ei;
2416   basic_block son;
2417
2418   /* We are looking for all edges forming edge cut induced by
2419      set of all blocks postdominated by BB.  */
2420   FOR_EACH_EDGE (e, ei, cur->preds)
2421     if (e->src->index >= NUM_FIXED_BLOCKS
2422         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2423     {
2424       edge e2;
2425       edge_iterator ei2;
2426       bool found = false;
2427
2428       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2429       if (e->flags & (EDGE_EH | EDGE_FAKE))
2430         continue;
2431       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2432
2433       /* See if there is an edge from e->src that is not abnormal
2434          and does not lead to BB.  */
2435       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2436         if (e2 != e
2437             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2438             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2439           {
2440             found = true;
2441             break;
2442           }
2443
2444       /* If there is non-abnormal path leaving e->src, predict edge
2445          using predictor.  Otherwise we need to look for paths
2446          leading to e->src.
2447
2448          The second may lead to infinite loop in the case we are predicitng
2449          regions that are only reachable by abnormal edges.  We simply
2450          prevent visiting given BB twice.  */
2451       if (found)
2452         predict_edge_def (e, pred, taken);
2453       else if (bitmap_set_bit (visited, e->src->index))
2454         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2455     }
2456   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2457        son;
2458        son = next_dom_son (CDI_POST_DOMINATORS, son))
2459     predict_paths_for_bb (son, bb, pred, taken, visited);
2460 }
2461
2462 /* Sets branch probabilities according to PREDiction and
2463    FLAGS.  */
2464
2465 static void
2466 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2467                           enum prediction taken)
2468 {
2469   bitmap visited = BITMAP_ALLOC (NULL);
2470   predict_paths_for_bb (bb, bb, pred, taken, visited);
2471   BITMAP_FREE (visited);
2472 }
2473
2474 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2475
2476 static void
2477 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2478                                enum prediction taken)
2479 {
2480   bool has_nonloop_edge = false;
2481   edge_iterator ei;
2482   edge e2;
2483
2484   basic_block bb = e->src;
2485   FOR_EACH_EDGE (e2, ei, bb->succs)
2486     if (e2->dest != e->src && e2->dest != e->dest
2487         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2488         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2489       {
2490         has_nonloop_edge = true;
2491         break;
2492       }
2493   if (!has_nonloop_edge)
2494     {
2495       bitmap visited = BITMAP_ALLOC (NULL);
2496       predict_paths_for_bb (bb, bb, pred, taken, visited);
2497       BITMAP_FREE (visited);
2498     }
2499   else
2500     predict_edge_def (e, pred, taken);
2501 }
2502 \f
2503 /* This is used to carry information about basic blocks.  It is
2504    attached to the AUX field of the standard CFG block.  */
2505
2506 struct block_info
2507 {
2508   /* Estimated frequency of execution of basic_block.  */
2509   sreal frequency;
2510
2511   /* To keep queue of basic blocks to process.  */
2512   basic_block next;
2513
2514   /* Number of predecessors we need to visit first.  */
2515   int npredecessors;
2516 };
2517
2518 /* Similar information for edges.  */
2519 struct edge_prob_info
2520 {
2521   /* In case edge is a loopback edge, the probability edge will be reached
2522      in case header is.  Estimated number of iterations of the loop can be
2523      then computed as 1 / (1 - back_edge_prob).  */
2524   sreal back_edge_prob;
2525   /* True if the edge is a loopback edge in the natural loop.  */
2526   unsigned int back_edge:1;
2527 };
2528
2529 #define BLOCK_INFO(B)   ((block_info *) (B)->aux)
2530 #define EDGE_INFO(E)    ((edge_prob_info *) (E)->aux)
2531
2532 /* Helper function for estimate_bb_frequencies.
2533    Propagate the frequencies in blocks marked in
2534    TOVISIT, starting in HEAD.  */
2535
2536 static void
2537 propagate_freq (basic_block head, bitmap tovisit)
2538 {
2539   basic_block bb;
2540   basic_block last;
2541   unsigned i;
2542   edge e;
2543   basic_block nextbb;
2544   bitmap_iterator bi;
2545
2546   /* For each basic block we need to visit count number of his predecessors
2547      we need to visit first.  */
2548   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2549     {
2550       edge_iterator ei;
2551       int count = 0;
2552
2553       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2554
2555       FOR_EACH_EDGE (e, ei, bb->preds)
2556         {
2557           bool visit = bitmap_bit_p (tovisit, e->src->index);
2558
2559           if (visit && !(e->flags & EDGE_DFS_BACK))
2560             count++;
2561           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2562             fprintf (dump_file,
2563                      "Irreducible region hit, ignoring edge to %i->%i\n",
2564                      e->src->index, bb->index);
2565         }
2566       BLOCK_INFO (bb)->npredecessors = count;
2567       /* When function never returns, we will never process exit block.  */
2568       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2569         bb->count = bb->frequency = 0;
2570     }
2571
2572   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2573   last = head;
2574   for (bb = head; bb; bb = nextbb)
2575     {
2576       edge_iterator ei;
2577       sreal cyclic_probability, frequency;
2578
2579       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2580       memcpy (&frequency, &real_zero, sizeof (real_zero));
2581
2582       nextbb = BLOCK_INFO (bb)->next;
2583       BLOCK_INFO (bb)->next = NULL;
2584
2585       /* Compute frequency of basic block.  */
2586       if (bb != head)
2587         {
2588 #ifdef ENABLE_CHECKING
2589           FOR_EACH_EDGE (e, ei, bb->preds)
2590             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2591                         || (e->flags & EDGE_DFS_BACK));
2592 #endif
2593
2594           FOR_EACH_EDGE (e, ei, bb->preds)
2595             if (EDGE_INFO (e)->back_edge)
2596               {
2597                 sreal_add (&cyclic_probability, &cyclic_probability,
2598                            &EDGE_INFO (e)->back_edge_prob);
2599               }
2600             else if (!(e->flags & EDGE_DFS_BACK))
2601               {
2602                 sreal tmp;
2603
2604                 /*  frequency += (e->probability
2605                                   * BLOCK_INFO (e->src)->frequency /
2606                                   REG_BR_PROB_BASE);  */
2607
2608                 sreal_init (&tmp, e->probability, 0);
2609                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2610                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2611                 sreal_add (&frequency, &frequency, &tmp);
2612               }
2613
2614           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2615             {
2616               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2617                       sizeof (frequency));
2618             }
2619           else
2620             {
2621               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2622                 {
2623                   memcpy (&cyclic_probability, &real_almost_one,
2624                           sizeof (real_almost_one));
2625                 }
2626
2627               /* BLOCK_INFO (bb)->frequency = frequency
2628                                               / (1 - cyclic_probability) */
2629
2630               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2631               sreal_div (&BLOCK_INFO (bb)->frequency,
2632                          &frequency, &cyclic_probability);
2633             }
2634         }
2635
2636       bitmap_clear_bit (tovisit, bb->index);
2637
2638       e = find_edge (bb, head);
2639       if (e)
2640         {
2641           sreal tmp;
2642
2643           /* EDGE_INFO (e)->back_edge_prob
2644              = ((e->probability * BLOCK_INFO (bb)->frequency)
2645              / REG_BR_PROB_BASE); */
2646
2647           sreal_init (&tmp, e->probability, 0);
2648           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2649           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2650                      &tmp, &real_inv_br_prob_base);
2651         }
2652
2653       /* Propagate to successor blocks.  */
2654       FOR_EACH_EDGE (e, ei, bb->succs)
2655         if (!(e->flags & EDGE_DFS_BACK)
2656             && BLOCK_INFO (e->dest)->npredecessors)
2657           {
2658             BLOCK_INFO (e->dest)->npredecessors--;
2659             if (!BLOCK_INFO (e->dest)->npredecessors)
2660               {
2661                 if (!nextbb)
2662                   nextbb = e->dest;
2663                 else
2664                   BLOCK_INFO (last)->next = e->dest;
2665
2666                 last = e->dest;
2667               }
2668           }
2669     }
2670 }
2671
2672 /* Estimate frequencies in loops at same nest level.  */
2673
2674 static void
2675 estimate_loops_at_level (struct loop *first_loop)
2676 {
2677   struct loop *loop;
2678
2679   for (loop = first_loop; loop; loop = loop->next)
2680     {
2681       edge e;
2682       basic_block *bbs;
2683       unsigned i;
2684       bitmap tovisit = BITMAP_ALLOC (NULL);
2685
2686       estimate_loops_at_level (loop->inner);
2687
2688       /* Find current loop back edge and mark it.  */
2689       e = loop_latch_edge (loop);
2690       EDGE_INFO (e)->back_edge = 1;
2691
2692       bbs = get_loop_body (loop);
2693       for (i = 0; i < loop->num_nodes; i++)
2694         bitmap_set_bit (tovisit, bbs[i]->index);
2695       free (bbs);
2696       propagate_freq (loop->header, tovisit);
2697       BITMAP_FREE (tovisit);
2698     }
2699 }
2700
2701 /* Propagates frequencies through structure of loops.  */
2702
2703 static void
2704 estimate_loops (void)
2705 {
2706   bitmap tovisit = BITMAP_ALLOC (NULL);
2707   basic_block bb;
2708
2709   /* Start by estimating the frequencies in the loops.  */
2710   if (number_of_loops (cfun) > 1)
2711     estimate_loops_at_level (current_loops->tree_root->inner);
2712
2713   /* Now propagate the frequencies through all the blocks.  */
2714   FOR_ALL_BB_FN (bb, cfun)
2715     {
2716       bitmap_set_bit (tovisit, bb->index);
2717     }
2718   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2719   BITMAP_FREE (tovisit);
2720 }
2721
2722 /* Drop the profile for NODE to guessed, and update its frequency based on
2723    whether it is expected to be hot given the CALL_COUNT.  */
2724
2725 static void
2726 drop_profile (struct cgraph_node *node, gcov_type call_count)
2727 {
2728   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2729   /* In the case where this was called by another function with a
2730      dropped profile, call_count will be 0. Since there are no
2731      non-zero call counts to this function, we don't know for sure
2732      whether it is hot, and therefore it will be marked normal below.  */
2733   bool hot = maybe_hot_count_p (NULL, call_count);
2734
2735   if (dump_file)
2736     fprintf (dump_file,
2737              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2738              node->name (), node->order,
2739              hot ? "Function is hot" : "Function is normal");
2740   /* We only expect to miss profiles for functions that are reached
2741      via non-zero call edges in cases where the function may have
2742      been linked from another module or library (COMDATs and extern
2743      templates). See the comments below for handle_missing_profiles.
2744      Also, only warn in cases where the missing counts exceed the
2745      number of training runs. In certain cases with an execv followed
2746      by a no-return call the profile for the no-return call is not
2747      dumped and there can be a mismatch.  */
2748   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2749       && call_count > profile_info->runs)
2750     {
2751       if (flag_profile_correction)
2752         {
2753           if (dump_file)
2754             fprintf (dump_file,
2755                      "Missing counts for called function %s/%i\n",
2756                      node->name (), node->order);
2757         }
2758       else
2759         warning (0, "Missing counts for called function %s/%i",
2760                  node->name (), node->order);
2761     }
2762
2763   profile_status_for_fn (fn)
2764       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2765   node->frequency
2766       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2767 }
2768
2769 /* In the case of COMDAT routines, multiple object files will contain the same
2770    function and the linker will select one for the binary. In that case
2771    all the other copies from the profile instrument binary will be missing
2772    profile counts. Look for cases where this happened, due to non-zero
2773    call counts going to 0-count functions, and drop the profile to guessed
2774    so that we can use the estimated probabilities and avoid optimizing only
2775    for size.
2776
2777    The other case where the profile may be missing is when the routine
2778    is not going to be emitted to the object file, e.g. for "extern template"
2779    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2780    all other cases of non-zero calls to 0-count functions.  */
2781
2782 void
2783 handle_missing_profiles (void)
2784 {
2785   struct cgraph_node *node;
2786   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2787   vec<struct cgraph_node *> worklist;
2788   worklist.create (64);
2789
2790   /* See if 0 count function has non-0 count callers.  In this case we
2791      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2792   FOR_EACH_DEFINED_FUNCTION (node)
2793     {
2794       struct cgraph_edge *e;
2795       gcov_type call_count = 0;
2796       gcov_type max_tp_first_run = 0;
2797       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2798
2799       if (node->count)
2800         continue;
2801       for (e = node->callers; e; e = e->next_caller)
2802       {
2803         call_count += e->count;
2804
2805         if (e->caller->tp_first_run > max_tp_first_run)
2806           max_tp_first_run = e->caller->tp_first_run;
2807       }
2808
2809       /* If time profile is missing, let assign the maximum that comes from
2810          caller functions.  */
2811       if (!node->tp_first_run && max_tp_first_run)
2812         node->tp_first_run = max_tp_first_run + 1;
2813
2814       if (call_count
2815           && fn && fn->cfg
2816           && (call_count * unlikely_count_fraction >= profile_info->runs))
2817         {
2818           drop_profile (node, call_count);
2819           worklist.safe_push (node);
2820         }
2821     }
2822
2823   /* Propagate the profile dropping to other 0-count COMDATs that are
2824      potentially called by COMDATs we already dropped the profile on.  */
2825   while (worklist.length () > 0)
2826     {
2827       struct cgraph_edge *e;
2828
2829       node = worklist.pop ();
2830       for (e = node->callees; e; e = e->next_caller)
2831         {
2832           struct cgraph_node *callee = e->callee;
2833           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2834
2835           if (callee->count > 0)
2836             continue;
2837           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2838               && profile_status_for_fn (fn) == PROFILE_READ)
2839             {
2840               drop_profile (node, 0);
2841               worklist.safe_push (callee);
2842             }
2843         }
2844     }
2845   worklist.release ();
2846 }
2847
2848 /* Convert counts measured by profile driven feedback to frequencies.
2849    Return nonzero iff there was any nonzero execution count.  */
2850
2851 int
2852 counts_to_freqs (void)
2853 {
2854   gcov_type count_max, true_count_max = 0;
2855   basic_block bb;
2856
2857   /* Don't overwrite the estimated frequencies when the profile for
2858      the function is missing.  We may drop this function PROFILE_GUESSED
2859      later in drop_profile ().  */
2860   if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2861     return 0;
2862
2863   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2864     true_count_max = MAX (bb->count, true_count_max);
2865
2866   count_max = MAX (true_count_max, 1);
2867   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2868     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2869
2870   return true_count_max;
2871 }
2872
2873 /* Return true if function is likely to be expensive, so there is no point to
2874    optimize performance of prologue, epilogue or do inlining at the expense
2875    of code size growth.  THRESHOLD is the limit of number of instructions
2876    function can execute at average to be still considered not expensive.  */
2877
2878 bool
2879 expensive_function_p (int threshold)
2880 {
2881   unsigned int sum = 0;
2882   basic_block bb;
2883   unsigned int limit;
2884
2885   /* We can not compute accurately for large thresholds due to scaled
2886      frequencies.  */
2887   gcc_assert (threshold <= BB_FREQ_MAX);
2888
2889   /* Frequencies are out of range.  This either means that function contains
2890      internal loop executing more than BB_FREQ_MAX times or profile feedback
2891      is available and function has not been executed at all.  */
2892   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2893     return true;
2894
2895   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2896   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2897   FOR_EACH_BB_FN (bb, cfun)
2898     {
2899       rtx_insn *insn;
2900
2901       FOR_BB_INSNS (bb, insn)
2902         if (active_insn_p (insn))
2903           {
2904             sum += bb->frequency;
2905             if (sum > limit)
2906               return true;
2907         }
2908     }
2909
2910   return false;
2911 }
2912
2913 /* Estimate and propagate basic block frequencies using the given branch
2914    probabilities.  If FORCE is true, the frequencies are used to estimate
2915    the counts even when there are already non-zero profile counts.  */
2916
2917 void
2918 estimate_bb_frequencies (bool force)
2919 {
2920   basic_block bb;
2921   sreal freq_max;
2922
2923   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2924     {
2925       static int real_values_initialized = 0;
2926
2927       if (!real_values_initialized)
2928         {
2929           real_values_initialized = 1;
2930           sreal_init (&real_zero, 0, 0);
2931           sreal_init (&real_one, 1, 0);
2932           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2933           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2934           sreal_init (&real_one_half, 1, -1);
2935           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2936           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2937         }
2938
2939       mark_dfs_back_edges ();
2940
2941       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2942          REG_BR_PROB_BASE;
2943
2944       /* Set up block info for each basic block.  */
2945       alloc_aux_for_blocks (sizeof (block_info));
2946       alloc_aux_for_edges (sizeof (edge_prob_info));
2947       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2948         {
2949           edge e;
2950           edge_iterator ei;
2951
2952           FOR_EACH_EDGE (e, ei, bb->succs)
2953             {
2954               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2955               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2956                          &EDGE_INFO (e)->back_edge_prob,
2957                          &real_inv_br_prob_base);
2958             }
2959         }
2960
2961       /* First compute frequencies locally for each loop from innermost
2962          to outermost to examine frequencies for back edges.  */
2963       estimate_loops ();
2964
2965       memcpy (&freq_max, &real_zero, sizeof (real_zero));
2966       FOR_EACH_BB_FN (bb, cfun)
2967         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2968           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2969
2970       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2971       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2972         {
2973           sreal tmp;
2974
2975           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2976           sreal_add (&tmp, &tmp, &real_one_half);
2977           bb->frequency = sreal_to_int (&tmp);
2978         }
2979
2980       free_aux_for_blocks ();
2981       free_aux_for_edges ();
2982     }
2983   compute_function_frequency ();
2984 }
2985
2986 /* Decide whether function is hot, cold or unlikely executed.  */
2987 void
2988 compute_function_frequency (void)
2989 {
2990   basic_block bb;
2991   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2992
2993   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2994       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2995     node->only_called_at_startup = true;
2996   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2997     node->only_called_at_exit = true;
2998
2999   if (profile_status_for_fn (cfun) != PROFILE_READ)
3000     {
3001       int flags = flags_from_decl_or_type (current_function_decl);
3002       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3003           != NULL)
3004         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3005       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3006                != NULL)
3007         node->frequency = NODE_FREQUENCY_HOT;
3008       else if (flags & ECF_NORETURN)
3009         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3010       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3011         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3012       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3013                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3014         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3015       return;
3016     }
3017
3018   /* Only first time try to drop function into unlikely executed.
3019      After inlining the roundoff errors may confuse us.
3020      Ipa-profile pass will drop functions only called from unlikely
3021      functions to unlikely and that is most of what we care about.  */
3022   if (!cfun->after_inlining)
3023     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3024   FOR_EACH_BB_FN (bb, cfun)
3025     {
3026       if (maybe_hot_bb_p (cfun, bb))
3027         {
3028           node->frequency = NODE_FREQUENCY_HOT;
3029           return;
3030         }
3031       if (!probably_never_executed_bb_p (cfun, bb))
3032         node->frequency = NODE_FREQUENCY_NORMAL;
3033     }
3034 }
3035
3036 /* Build PREDICT_EXPR.  */
3037 tree
3038 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3039 {
3040   tree t = build1 (PREDICT_EXPR, void_type_node,
3041                    build_int_cst (integer_type_node, predictor));
3042   SET_PREDICT_EXPR_OUTCOME (t, taken);
3043   return t;
3044 }
3045
3046 const char *
3047 predictor_name (enum br_predictor predictor)
3048 {
3049   return predictor_info[predictor].name;
3050 }
3051
3052 /* Predict branch probabilities and estimate profile of the tree CFG. */
3053
3054 namespace {
3055
3056 const pass_data pass_data_profile =
3057 {
3058   GIMPLE_PASS, /* type */
3059   "profile_estimate", /* name */
3060   OPTGROUP_NONE, /* optinfo_flags */
3061   TV_BRANCH_PROB, /* tv_id */
3062   PROP_cfg, /* properties_required */
3063   0, /* properties_provided */
3064   0, /* properties_destroyed */
3065   0, /* todo_flags_start */
3066   0, /* todo_flags_finish */
3067 };
3068
3069 class pass_profile : public gimple_opt_pass
3070 {
3071 public:
3072   pass_profile (gcc::context *ctxt)
3073     : gimple_opt_pass (pass_data_profile, ctxt)
3074   {}
3075
3076   /* opt_pass methods: */
3077   virtual bool gate (function *) { return flag_guess_branch_prob; }
3078   virtual unsigned int execute (function *);
3079
3080 }; // class pass_profile
3081
3082 unsigned int
3083 pass_profile::execute (function *fun)
3084 {
3085   unsigned nb_loops;
3086
3087   loop_optimizer_init (LOOPS_NORMAL);
3088   if (dump_file && (dump_flags & TDF_DETAILS))
3089     flow_loops_dump (dump_file, NULL, 0);
3090
3091   mark_irreducible_loops ();
3092
3093   nb_loops = number_of_loops (fun);
3094   if (nb_loops > 1)
3095     scev_initialize ();
3096
3097   tree_estimate_probability ();
3098
3099   if (nb_loops > 1)
3100     scev_finalize ();
3101
3102   loop_optimizer_finalize ();
3103   if (dump_file && (dump_flags & TDF_DETAILS))
3104     gimple_dump_cfg (dump_file, dump_flags);
3105  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3106     profile_status_for_fn (fun) = PROFILE_GUESSED;
3107   return 0;
3108 }
3109
3110 } // anon namespace
3111
3112 gimple_opt_pass *
3113 make_pass_profile (gcc::context *ctxt)
3114 {
3115   return new pass_profile (ctxt);
3116 }
3117
3118 namespace {
3119
3120 const pass_data pass_data_strip_predict_hints =
3121 {
3122   GIMPLE_PASS, /* type */
3123   "*strip_predict_hints", /* name */
3124   OPTGROUP_NONE, /* optinfo_flags */
3125   TV_BRANCH_PROB, /* tv_id */
3126   PROP_cfg, /* properties_required */
3127   0, /* properties_provided */
3128   0, /* properties_destroyed */
3129   0, /* todo_flags_start */
3130   0, /* todo_flags_finish */
3131 };
3132
3133 class pass_strip_predict_hints : public gimple_opt_pass
3134 {
3135 public:
3136   pass_strip_predict_hints (gcc::context *ctxt)
3137     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3138   {}
3139
3140   /* opt_pass methods: */
3141   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3142   virtual unsigned int execute (function *);
3143
3144 }; // class pass_strip_predict_hints
3145
3146 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3147    we no longer need.  */
3148 unsigned int
3149 pass_strip_predict_hints::execute (function *fun)
3150 {
3151   basic_block bb;
3152   gimple ass_stmt;
3153   tree var;
3154
3155   FOR_EACH_BB_FN (bb, fun)
3156     {
3157       gimple_stmt_iterator bi;
3158       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3159         {
3160           gimple stmt = gsi_stmt (bi);
3161
3162           if (gimple_code (stmt) == GIMPLE_PREDICT)
3163             {
3164               gsi_remove (&bi, true);
3165               continue;
3166             }
3167           else if (is_gimple_call (stmt))
3168             {
3169               tree fndecl = gimple_call_fndecl (stmt);
3170
3171               if ((fndecl
3172                    && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3173                    && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3174                    && gimple_call_num_args (stmt) == 2)
3175                   || (gimple_call_internal_p (stmt)
3176                       && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3177                 {
3178                   var = gimple_call_lhs (stmt);
3179                   if (var)
3180                     {
3181                       ass_stmt
3182                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
3183                       gsi_replace (&bi, ass_stmt, true);
3184                     }
3185                   else
3186                     {
3187                       gsi_remove (&bi, true);
3188                       continue;
3189                     }
3190                 }
3191             }
3192           gsi_next (&bi);
3193         }
3194     }
3195   return 0;
3196 }
3197
3198 } // anon namespace
3199
3200 gimple_opt_pass *
3201 make_pass_strip_predict_hints (gcc::context *ctxt)
3202 {
3203   return new pass_strip_predict_hints (ctxt);
3204 }
3205
3206 /* Rebuild function frequencies.  Passes are in general expected to
3207    maintain profile by hand, however in some cases this is not possible:
3208    for example when inlining several functions with loops freuqencies might run
3209    out of scale and thus needs to be recomputed.  */
3210
3211 void
3212 rebuild_frequencies (void)
3213 {
3214   timevar_push (TV_REBUILD_FREQUENCIES);
3215
3216   /* When the max bb count in the function is small, there is a higher
3217      chance that there were truncation errors in the integer scaling
3218      of counts by inlining and other optimizations. This could lead
3219      to incorrect classification of code as being cold when it isn't.
3220      In that case, force the estimation of bb counts/frequencies from the
3221      branch probabilities, rather than computing frequencies from counts,
3222      which may also lead to frequencies incorrectly reduced to 0. There
3223      is less precision in the probabilities, so we only do this for small
3224      max counts.  */
3225   gcov_type count_max = 0;
3226   basic_block bb;
3227   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3228     count_max = MAX (bb->count, count_max);
3229
3230   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3231       || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3232     {
3233       loop_optimizer_init (0);
3234       add_noreturn_fake_exit_edges ();
3235       mark_irreducible_loops ();
3236       connect_infinite_loops_to_exit ();
3237       estimate_bb_frequencies (true);
3238       remove_fake_exit_edges ();
3239       loop_optimizer_finalize ();
3240     }
3241   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3242     counts_to_freqs ();
3243   else
3244     gcc_unreachable ();
3245   timevar_pop (TV_REBUILD_FREQUENCIES);
3246 }