gcc-4_8/gcc/predict.c

   1 /* Branch prediction routines for the GNU compiler.
   2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* References:
  21
  22    [1] "Branch Prediction for Free"
  23        Ball and Larus; PLDI '93.
  24    [2] "Static Branch Frequency and Program Profile Analysis"
  25        Wu and Larus; MICRO-27.
  26    [3] "Corpus-based Static Branch Prediction"
  27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
  28
  29
  30 #include "config.h"
  31 #include "system.h"
  32 #include "coretypes.h"
  33 #include "tm.h"
  34 #include "tree.h"
  35 #include "rtl.h"
  36 #include "tm_p.h"
  37 #include "hard-reg-set.h"
  38 #include "basic-block.h"
  39 #include "insn-config.h"
  40 #include "regs.h"
  41 #include "flags.h"
  42 #include "function.h"
  43 #include "except.h"
  44 #include "diagnostic-core.h"
  45 #include "recog.h"
  46 #include "expr.h"
  47 #include "predict.h"
  48 #include "coverage.h"
  49 #include "sreal.h"
  50 #include "params.h"
  51 #include "target.h"
  52 #include "cfgloop.h"
  53 #include "tree-flow.h"
  54 #include "ggc.h"
  55 #include "tree-pass.h"
  56 #include "tree-scalar-evolution.h"
  57 #include "cfgloop.h"
  58 #include "pointer-set.h"
  59
  60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
  61                    1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
  62 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
  63              real_inv_br_prob_base, real_one_half, real_bb_freq_max;
  64
  65 /* Random guesstimation given names.
  66    PROV_VERY_UNLIKELY should be small enough so basic block predicted
  67    by it gets bellow HOT_BB_FREQUENCY_FRANCTION.  */
  68 #define PROB_VERY_UNLIKELY      (REG_BR_PROB_BASE / 2000 - 1)
  69 #define PROB_EVEN               (REG_BR_PROB_BASE / 2)
  70 #define PROB_VERY_LIKELY        (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
  71 #define PROB_ALWAYS             (REG_BR_PROB_BASE)
  72
  73 static void combine_predictions_for_insn (rtx, basic_block);
  74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
  75 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
  76 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
  77 static bool can_predict_insn_p (const_rtx);
  78
  79 /* Information we hold about each branch predictor.
  80    Filled using information from predict.def.  */
  81
  82 struct predictor_info
  83 {
  84   const char *const name;       /* Name used in the debugging dumps.  */
  85   const int hitrate;            /* Expected hitrate used by
  86                                    predict_insn_def call.  */
  87   const int flags;
  88 };
  89
  90 /* Use given predictor without Dempster-Shaffer theory if it matches
  91    using first_match heuristics.  */
  92 #define PRED_FLAG_FIRST_MATCH 1
  93
  94 /* Recompute hitrate in percent to our representation.  */
  95
  96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
  97
  98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
  99 static const struct predictor_info predictor_info[]= {
 100 #include "predict.def"
 101
 102   /* Upper bound on predictors.  */
 103   {NULL, 0, 0}
 104 };
 105 #undef DEF_PREDICTOR
 106
 107 /* Return TRUE if frequency FREQ is considered to be hot.  */
 108
 109 static inline bool
 110 maybe_hot_frequency_p (struct function *fun, int freq)
 111 {
 112   struct cgraph_node *node = cgraph_get_node (fun->decl);
 113   if (!profile_info || !flag_branch_probabilities)
 114     {
 115       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
 116         return false;
 117       if (node->frequency == NODE_FREQUENCY_HOT)
 118         return true;
 119     }
 120   if (profile_status_for_function (fun) == PROFILE_ABSENT)
 121     return true;
 122   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 123       && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3))
 124     return false;
 125   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
 126     return false;
 127   if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency
 128               / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 129     return false;
 130   return true;
 131 }
 132
 133 /* Return TRUE if frequency FREQ is considered to be hot.  */
 134
 135 bool
 136 maybe_hot_count_p (struct function *fun, gcov_type count)
 137 {
 138   gcov_working_set_t *ws;
 139   static gcov_type min_count = -1;
 140   if (fun && profile_status_for_function (fun) != PROFILE_READ)
 141     return true;
 142   if (!profile_info)
 143     return false;
 144   /* Code executed at most once is not hot.  */
 145   if (profile_info->runs >= count)
 146     return false;
 147   if (min_count == -1)
 148     {
 149       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
 150       gcc_assert (ws);
 151       min_count = ws->min_counter;
 152     }
 153   return (count >= min_count);
 154 }
 155
 156 /* Return true in case BB can be CPU intensive and should be optimized
 157    for maximal performance.  */
 158
 159 bool
 160 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 161 {
 162   gcc_checking_assert (fun);
 163   if (profile_status_for_function (fun) == PROFILE_READ)
 164     return maybe_hot_count_p (fun, bb->count);
 165   return maybe_hot_frequency_p (fun, bb->frequency);
 166 }
 167
 168 /* Return true if the call can be hot.  */
 169
 170 bool
 171 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 172 {
 173   if (profile_info && flag_branch_probabilities
 174       && !maybe_hot_count_p (NULL,
 175                              edge->count))
 176     return false;
 177   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
 178       || (edge->callee
 179           && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 180     return false;
 181   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
 182       && (edge->callee
 183           && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
 184     return false;
 185   if (optimize_size)
 186     return false;
 187   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
 188     return true;
 189   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
 190       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
 191     return false;
 192   if (flag_guess_branch_prob)
 193     {
 194       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
 195           || edge->frequency <= (CGRAPH_FREQ_BASE
 196                                  / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
 197         return false;
 198     }
 199   return true;
 200 }
 201
 202 /* Return true in case BB can be CPU intensive and should be optimized
 203    for maximal performance.  */
 204
 205 bool
 206 maybe_hot_edge_p (edge e)
 207 {
 208   if (profile_status == PROFILE_READ)
 209     return maybe_hot_count_p (cfun, e->count);
 210   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
 211 }
 212
 213
 214
 215 /* Return true if profile COUNT and FREQUENCY, or function FUN static
 216    node frequency reflects never being executed.  */
 217
 218 static bool
 219 probably_never_executed (struct function *fun,
 220                          gcov_type count, int frequency)
 221 {
 222   gcc_checking_assert (fun);
 223   if (profile_status_for_function (fun) == PROFILE_READ)
 224     {
 225       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
 226       if (count * unlikely_count_fraction >= profile_info->runs)
 227         return false;
 228       if (!frequency)
 229         return true;
 230       if (!ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)->frequency)
 231         return false;
 232       if (ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)->count)
 233         {
 234           gcov_type computed_count;
 235           /* Check for possibility of overflow, in which case entry bb count
 236              is large enough to do the division first without losing much
 237              precision.  */
 238           if (ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)->count < REG_BR_PROB_BASE *
 239               REG_BR_PROB_BASE)
 240             {
 241               gcov_type scaled_count
 242                   = frequency * ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)->count
 243                   * unlikely_count_fraction;
 244               computed_count
 245                   = RDIV (scaled_count,
 246                           ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)->frequency);
 247             }
 248           else
 249             {
 250               computed_count
 251                   = RDIV (ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)->count,
 252                           ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)->frequency);
 253               computed_count *= frequency * unlikely_count_fraction;
 254             }
 255           if (computed_count >= profile_info->runs)
 256             return false;
 257         }
 258       return true;
 259     }
 260   if ((!profile_info || !flag_branch_probabilities)
 261       && (cgraph_get_node (fun->decl)->frequency
 262           == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 263     return true;
 264   return false;
 265 }
 266
 267
 268 /* Return true in case BB is probably never executed.  */
 269
 270 bool
 271 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
 272 {
 273   return probably_never_executed (fun, bb->count, bb->frequency);
 274 }
 275
 276
 277 /* Return true in case edge E is probably never executed.  */
 278
 279 bool
 280 probably_never_executed_edge_p (struct function *fun, edge e)
 281 {
 282   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
 283 }
 284
 285 /* Return true if NODE should be optimized for size.  */
 286
 287 bool
 288 cgraph_optimize_for_size_p (struct cgraph_node *node)
 289 {
 290   if (optimize_size)
 291     return true;
 292   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
 293     return true;
 294   else
 295     return false;
 296 }
 297
 298 /* Return true when current function should always be optimized for size.  */
 299
 300 bool
 301 optimize_function_for_size_p (struct function *fun)
 302 {
 303   if (optimize_size)
 304     return true;
 305   if (!fun || !fun->decl)
 306     return false;
 307   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
 308 }
 309
 310 /* Return true when current function should always be optimized for speed.  */
 311
 312 bool
 313 optimize_function_for_speed_p (struct function *fun)
 314 {
 315   return !optimize_function_for_size_p (fun);
 316 }
 317
 318 /* Return TRUE when BB should be optimized for size.  */
 319
 320 bool
 321 optimize_bb_for_size_p (const_basic_block bb)
 322 {
 323   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
 324 }
 325
 326 /* Return TRUE when BB should be optimized for speed.  */
 327
 328 bool
 329 optimize_bb_for_speed_p (const_basic_block bb)
 330 {
 331   return !optimize_bb_for_size_p (bb);
 332 }
 333
 334 /* Return TRUE when BB should be optimized for size.  */
 335
 336 bool
 337 optimize_edge_for_size_p (edge e)
 338 {
 339   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
 340 }
 341
 342 /* Return TRUE when BB should be optimized for speed.  */
 343
 344 bool
 345 optimize_edge_for_speed_p (edge e)
 346 {
 347   return !optimize_edge_for_size_p (e);
 348 }
 349
 350 /* Return TRUE when BB should be optimized for size.  */
 351
 352 bool
 353 optimize_insn_for_size_p (void)
 354 {
 355   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
 356 }
 357
 358 /* Return TRUE when BB should be optimized for speed.  */
 359
 360 bool
 361 optimize_insn_for_speed_p (void)
 362 {
 363   return !optimize_insn_for_size_p ();
 364 }
 365
 366 /* Return TRUE when LOOP should be optimized for size.  */
 367
 368 bool
 369 optimize_loop_for_size_p (struct loop *loop)
 370 {
 371   return optimize_bb_for_size_p (loop->header);
 372 }
 373
 374 /* Return TRUE when LOOP should be optimized for speed.  */
 375
 376 bool
 377 optimize_loop_for_speed_p (struct loop *loop)
 378 {
 379   return optimize_bb_for_speed_p (loop->header);
 380 }
 381
 382 /* Return TRUE when LOOP nest should be optimized for speed.  */
 383
 384 bool
 385 optimize_loop_nest_for_speed_p (struct loop *loop)
 386 {
 387   struct loop *l = loop;
 388   if (optimize_loop_for_speed_p (loop))
 389     return true;
 390   l = loop->inner;
 391   while (l && l != loop)
 392     {
 393       if (optimize_loop_for_speed_p (l))
 394         return true;
 395       if (l->inner)
 396         l = l->inner;
 397       else if (l->next)
 398         l = l->next;
 399       else
 400         {
 401           while (l != loop && !l->next)
 402             l = loop_outer (l);
 403           if (l != loop)
 404             l = l->next;
 405         }
 406     }
 407   return false;
 408 }
 409
 410 /* Return TRUE when LOOP nest should be optimized for size.  */
 411
 412 bool
 413 optimize_loop_nest_for_size_p (struct loop *loop)
 414 {
 415   return !optimize_loop_nest_for_speed_p (loop);
 416 }
 417
 418 /* Return true when edge E is likely to be well predictable by branch
 419    predictor.  */
 420
 421 bool
 422 predictable_edge_p (edge e)
 423 {
 424   if (profile_status == PROFILE_ABSENT)
 425     return false;
 426   if ((e->probability
 427        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
 428       || (REG_BR_PROB_BASE - e->probability
 429           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
 430     return true;
 431   return false;
 432 }
 433
 434
 435 /* Set RTL expansion for BB profile.  */
 436
 437 void
 438 rtl_profile_for_bb (basic_block bb)
 439 {
 440   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
 441 }
 442
 443 /* Set RTL expansion for edge profile.  */
 444
 445 void
 446 rtl_profile_for_edge (edge e)
 447 {
 448   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
 449 }
 450
 451 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
 452 void
 453 default_rtl_profile (void)
 454 {
 455   crtl->maybe_hot_insn_p = true;
 456 }
 457
 458 /* Return true if the one of outgoing edges is already predicted by
 459    PREDICTOR.  */
 460
 461 bool
 462 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 463 {
 464   rtx note;
 465   if (!INSN_P (BB_END (bb)))
 466     return false;
 467   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
 468     if (REG_NOTE_KIND (note) == REG_BR_PRED
 469         && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
 470       return true;
 471   return false;
 472 }
 473
 474 /* This map contains for a basic block the list of predictions for the
 475    outgoing edges.  */
 476
 477 static struct pointer_map_t *bb_predictions;
 478
 479 /*  Structure representing predictions in tree level. */
 480
 481 struct edge_prediction {
 482     struct edge_prediction *ep_next;
 483     edge ep_edge;
 484     enum br_predictor ep_predictor;
 485     int ep_probability;
 486 };
 487
 488 /* Return true if the one of outgoing edges is already predicted by
 489    PREDICTOR.  */
 490
 491 bool
 492 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
 493 {
 494   struct edge_prediction *i;
 495   void **preds = pointer_map_contains (bb_predictions, bb);
 496
 497   if (!preds)
 498     return false;
 499
 500   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
 501     if (i->ep_predictor == predictor)
 502       return true;
 503   return false;
 504 }
 505
 506 /* Return true when the probability of edge is reliable.
 507
 508    The profile guessing code is good at predicting branch outcome (ie.
 509    taken/not taken), that is predicted right slightly over 75% of time.
 510    It is however notoriously poor on predicting the probability itself.
 511    In general the profile appear a lot flatter (with probabilities closer
 512    to 50%) than the reality so it is bad idea to use it to drive optimization
 513    such as those disabling dynamic branch prediction for well predictable
 514    branches.
 515
 516    There are two exceptions - edges leading to noreturn edges and edges
 517    predicted by number of iterations heuristics are predicted well.  This macro
 518    should be able to distinguish those, but at the moment it simply check for
 519    noreturn heuristic that is only one giving probability over 99% or bellow
 520    1%.  In future we might want to propagate reliability information across the
 521    CFG if we find this information useful on multiple places.   */
 522 static bool
 523 probability_reliable_p (int prob)
 524 {
 525   return (profile_status == PROFILE_READ
 526           || (profile_status == PROFILE_GUESSED
 527               && (prob <= HITRATE (1) || prob >= HITRATE (99))));
 528 }
 529
 530 /* Same predicate as above, working on edges.  */
 531 bool
 532 edge_probability_reliable_p (const_edge e)
 533 {
 534   return probability_reliable_p (e->probability);
 535 }
 536
 537 /* Same predicate as edge_probability_reliable_p, working on notes.  */
 538 bool
 539 br_prob_note_reliable_p (const_rtx note)
 540 {
 541   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
 542   return probability_reliable_p (INTVAL (XEXP (note, 0)));
 543 }
 544
 545 static void
 546 predict_insn (rtx insn, enum br_predictor predictor, int probability)
 547 {
 548   gcc_assert (any_condjump_p (insn));
 549   if (!flag_guess_branch_prob)
 550     return;
 551
 552   add_reg_note (insn, REG_BR_PRED,
 553                 gen_rtx_CONCAT (VOIDmode,
 554                                 GEN_INT ((int) predictor),
 555                                 GEN_INT ((int) probability)));
 556 }
 557
 558 /* Predict insn by given predictor.  */
 559
 560 void
 561 predict_insn_def (rtx insn, enum br_predictor predictor,
 562                   enum prediction taken)
 563 {
 564    int probability = predictor_info[(int) predictor].hitrate;
 565
 566    if (taken != TAKEN)
 567      probability = REG_BR_PROB_BASE - probability;
 568
 569    predict_insn (insn, predictor, probability);
 570 }
 571
 572 /* Predict edge E with given probability if possible.  */
 573
 574 void
 575 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
 576 {
 577   rtx last_insn;
 578   last_insn = BB_END (e->src);
 579
 580   /* We can store the branch prediction information only about
 581      conditional jumps.  */
 582   if (!any_condjump_p (last_insn))
 583     return;
 584
 585   /* We always store probability of branching.  */
 586   if (e->flags & EDGE_FALLTHRU)
 587     probability = REG_BR_PROB_BASE - probability;
 588
 589   predict_insn (last_insn, predictor, probability);
 590 }
 591
 592 /* Predict edge E with the given PROBABILITY.  */
 593 void
 594 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
 595 {
 596   gcc_assert (profile_status != PROFILE_GUESSED);
 597   if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
 598       && flag_guess_branch_prob && optimize)
 599     {
 600       struct edge_prediction *i = XNEW (struct edge_prediction);
 601       void **preds = pointer_map_insert (bb_predictions, e->src);
 602
 603       i->ep_next = (struct edge_prediction *) *preds;
 604       *preds = i;
 605       i->ep_probability = probability;
 606       i->ep_predictor = predictor;
 607       i->ep_edge = e;
 608     }
 609 }
 610
 611 /* Remove all predictions on given basic block that are attached
 612    to edge E.  */
 613 void
 614 remove_predictions_associated_with_edge (edge e)
 615 {
 616   void **preds;
 617
 618   if (!bb_predictions)
 619     return;
 620
 621   preds = pointer_map_contains (bb_predictions, e->src);
 622
 623   if (preds)
 624     {
 625       struct edge_prediction **prediction = (struct edge_prediction **) preds;
 626       struct edge_prediction *next;
 627
 628       while (*prediction)
 629         {
 630           if ((*prediction)->ep_edge == e)
 631             {
 632               next = (*prediction)->ep_next;
 633               free (*prediction);
 634               *prediction = next;
 635             }
 636           else
 637             prediction = &((*prediction)->ep_next);
 638         }
 639     }
 640 }
 641
 642 /* Clears the list of predictions stored for BB.  */
 643
 644 static void
 645 clear_bb_predictions (basic_block bb)
 646 {
 647   void **preds = pointer_map_contains (bb_predictions, bb);
 648   struct edge_prediction *pred, *next;
 649
 650   if (!preds)
 651     return;
 652
 653   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
 654     {
 655       next = pred->ep_next;
 656       free (pred);
 657     }
 658   *preds = NULL;
 659 }
 660
 661 /* Return true when we can store prediction on insn INSN.
 662    At the moment we represent predictions only on conditional
 663    jumps, not at computed jump or other complicated cases.  */
 664 static bool
 665 can_predict_insn_p (const_rtx insn)
 666 {
 667   return (JUMP_P (insn)
 668           && any_condjump_p (insn)
 669           && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
 670 }
 671
 672 /* Predict edge E by given predictor if possible.  */
 673
 674 void
 675 predict_edge_def (edge e, enum br_predictor predictor,
 676                   enum prediction taken)
 677 {
 678    int probability = predictor_info[(int) predictor].hitrate;
 679
 680    if (taken != TAKEN)
 681      probability = REG_BR_PROB_BASE - probability;
 682
 683    predict_edge (e, predictor, probability);
 684 }
 685
 686 /* Invert all branch predictions or probability notes in the INSN.  This needs
 687    to be done each time we invert the condition used by the jump.  */
 688
 689 void
 690 invert_br_probabilities (rtx insn)
 691 {
 692   rtx note;
 693
 694   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 695     if (REG_NOTE_KIND (note) == REG_BR_PROB)
 696       XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
 697     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
 698       XEXP (XEXP (note, 0), 1)
 699         = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
 700 }
 701
 702 /* Dump information about the branch prediction to the output file.  */
 703
 704 static void
 705 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
 706                  basic_block bb, int used)
 707 {
 708   edge e;
 709   edge_iterator ei;
 710
 711   if (!file)
 712     return;
 713
 714   FOR_EACH_EDGE (e, ei, bb->succs)
 715     if (! (e->flags & EDGE_FALLTHRU))
 716       break;
 717
 718   fprintf (file, "  %s heuristics%s: %.1f%%",
 719            predictor_info[predictor].name,
 720            used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
 721
 722   if (bb->count)
 723     {
 724       fprintf (file, "  exec ");
 725       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
 726       if (e)
 727         {
 728           fprintf (file, " hit ");
 729           fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
 730           fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
 731         }
 732     }
 733
 734   fprintf (file, "\n");
 735 }
 736
 737 /* We can not predict the probabilities of outgoing edges of bb.  Set them
 738    evenly and hope for the best.  */
 739 static void
 740 set_even_probabilities (basic_block bb)
 741 {
 742   int nedges = 0;
 743   edge e;
 744   edge_iterator ei;
 745
 746   FOR_EACH_EDGE (e, ei, bb->succs)
 747     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 748       nedges ++;
 749   FOR_EACH_EDGE (e, ei, bb->succs)
 750     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 751       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 752     else
 753       e->probability = 0;
 754 }
 755
 756 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
 757    note if not already present.  Remove now useless REG_BR_PRED notes.  */
 758
 759 static void
 760 combine_predictions_for_insn (rtx insn, basic_block bb)
 761 {
 762   rtx prob_note;
 763   rtx *pnote;
 764   rtx note;
 765   int best_probability = PROB_EVEN;
 766   enum br_predictor best_predictor = END_PREDICTORS;
 767   int combined_probability = REG_BR_PROB_BASE / 2;
 768   int d;
 769   bool first_match = false;
 770   bool found = false;
 771
 772   if (!can_predict_insn_p (insn))
 773     {
 774       set_even_probabilities (bb);
 775       return;
 776     }
 777
 778   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
 779   pnote = &REG_NOTES (insn);
 780   if (dump_file)
 781     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
 782              bb->index);
 783
 784   /* We implement "first match" heuristics and use probability guessed
 785      by predictor with smallest index.  */
 786   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
 787     if (REG_NOTE_KIND (note) == REG_BR_PRED)
 788       {
 789         enum br_predictor predictor = ((enum br_predictor)
 790                                        INTVAL (XEXP (XEXP (note, 0), 0)));
 791         int probability = INTVAL (XEXP (XEXP (note, 0), 1));
 792
 793         found = true;
 794         if (best_predictor > predictor)
 795           best_probability = probability, best_predictor = predictor;
 796
 797         d = (combined_probability * probability
 798              + (REG_BR_PROB_BASE - combined_probability)
 799              * (REG_BR_PROB_BASE - probability));
 800
 801         /* Use FP math to avoid overflows of 32bit integers.  */
 802         if (d == 0)
 803           /* If one probability is 0% and one 100%, avoid division by zero.  */
 804           combined_probability = REG_BR_PROB_BASE / 2;
 805         else
 806           combined_probability = (((double) combined_probability) * probability
 807                                   * REG_BR_PROB_BASE / d + 0.5);
 808       }
 809
 810   /* Decide which heuristic to use.  In case we didn't match anything,
 811      use no_prediction heuristic, in case we did match, use either
 812      first match or Dempster-Shaffer theory depending on the flags.  */
 813
 814   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 815     first_match = true;
 816
 817   if (!found)
 818     dump_prediction (dump_file, PRED_NO_PREDICTION,
 819                      combined_probability, bb, true);
 820   else
 821     {
 822       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
 823                        bb, !first_match);
 824       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
 825                        bb, first_match);
 826     }
 827
 828   if (first_match)
 829     combined_probability = best_probability;
 830   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 831
 832   while (*pnote)
 833     {
 834       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
 835         {
 836           enum br_predictor predictor = ((enum br_predictor)
 837                                          INTVAL (XEXP (XEXP (*pnote, 0), 0)));
 838           int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
 839
 840           dump_prediction (dump_file, predictor, probability, bb,
 841                            !first_match || best_predictor == predictor);
 842           *pnote = XEXP (*pnote, 1);
 843         }
 844       else
 845         pnote = &XEXP (*pnote, 1);
 846     }
 847
 848   if (!prob_note)
 849     {
 850       add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability));
 851
 852       /* Save the prediction into CFG in case we are seeing non-degenerated
 853          conditional jump.  */
 854       if (!single_succ_p (bb))
 855         {
 856           BRANCH_EDGE (bb)->probability = combined_probability;
 857           FALLTHRU_EDGE (bb)->probability
 858             = REG_BR_PROB_BASE - combined_probability;
 859         }
 860     }
 861   else if (!single_succ_p (bb))
 862     {
 863       int prob = INTVAL (XEXP (prob_note, 0));
 864
 865       BRANCH_EDGE (bb)->probability = prob;
 866       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
 867     }
 868   else
 869     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
 870 }
 871
 872 /* Combine predictions into single probability and store them into CFG.
 873    Remove now useless prediction entries.  */
 874
 875 static void
 876 combine_predictions_for_bb (basic_block bb)
 877 {
 878   int best_probability = PROB_EVEN;
 879   enum br_predictor best_predictor = END_PREDICTORS;
 880   int combined_probability = REG_BR_PROB_BASE / 2;
 881   int d;
 882   bool first_match = false;
 883   bool found = false;
 884   struct edge_prediction *pred;
 885   int nedges = 0;
 886   edge e, first = NULL, second = NULL;
 887   edge_iterator ei;
 888   void **preds;
 889
 890   FOR_EACH_EDGE (e, ei, bb->succs)
 891     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
 892       {
 893         nedges ++;
 894         if (first && !second)
 895           second = e;
 896         if (!first)
 897           first = e;
 898       }
 899
 900   /* When there is no successor or only one choice, prediction is easy.
 901
 902      We are lazy for now and predict only basic blocks with two outgoing
 903      edges.  It is possible to predict generic case too, but we have to
 904      ignore first match heuristics and do more involved combining.  Implement
 905      this later.  */
 906   if (nedges != 2)
 907     {
 908       if (!bb->count)
 909         set_even_probabilities (bb);
 910       clear_bb_predictions (bb);
 911       if (dump_file)
 912         fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
 913                  nedges, bb->index);
 914       return;
 915     }
 916
 917   if (dump_file)
 918     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
 919
 920   preds = pointer_map_contains (bb_predictions, bb);
 921   if (preds)
 922     {
 923       /* We implement "first match" heuristics and use probability guessed
 924          by predictor with smallest index.  */
 925       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
 926         {
 927           enum br_predictor predictor = pred->ep_predictor;
 928           int probability = pred->ep_probability;
 929
 930           if (pred->ep_edge != first)
 931             probability = REG_BR_PROB_BASE - probability;
 932
 933           found = true;
 934           /* First match heuristics would be widly confused if we predicted
 935              both directions.  */
 936           if (best_predictor > predictor)
 937             {
 938               struct edge_prediction *pred2;
 939               int prob = probability;
 940
 941               for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next)
 942                if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
 943                  {
 944                    int probability2 = pred->ep_probability;
 945
 946                    if (pred2->ep_edge != first)
 947                      probability2 = REG_BR_PROB_BASE - probability2;
 948
 949                    if ((probability < REG_BR_PROB_BASE / 2) !=
 950                        (probability2 < REG_BR_PROB_BASE / 2))
 951                      break;
 952
 953                    /* If the same predictor later gave better result, go for it! */
 954                    if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
 955                        || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
 956                      prob = probability2;
 957                  }
 958               if (!pred2)
 959                 best_probability = prob, best_predictor = predictor;
 960             }
 961
 962           d = (combined_probability * probability
 963                + (REG_BR_PROB_BASE - combined_probability)
 964                * (REG_BR_PROB_BASE - probability));
 965
 966           /* Use FP math to avoid overflows of 32bit integers.  */
 967           if (d == 0)
 968             /* If one probability is 0% and one 100%, avoid division by zero.  */
 969             combined_probability = REG_BR_PROB_BASE / 2;
 970           else
 971             combined_probability = (((double) combined_probability)
 972                                     * probability
 973                                     * REG_BR_PROB_BASE / d + 0.5);
 974         }
 975     }
 976
 977   /* Decide which heuristic to use.  In case we didn't match anything,
 978      use no_prediction heuristic, in case we did match, use either
 979      first match or Dempster-Shaffer theory depending on the flags.  */
 980
 981   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
 982     first_match = true;
 983
 984   if (!found)
 985     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
 986   else
 987     {
 988       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
 989                        !first_match);
 990       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
 991                        first_match);
 992     }
 993
 994   if (first_match)
 995     combined_probability = best_probability;
 996   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
 997
 998   if (preds)
 999     {
1000       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1001         {
1002           enum br_predictor predictor = pred->ep_predictor;
1003           int probability = pred->ep_probability;
1004
1005           if (pred->ep_edge != EDGE_SUCC (bb, 0))
1006             probability = REG_BR_PROB_BASE - probability;
1007           dump_prediction (dump_file, predictor, probability, bb,
1008                            !first_match || best_predictor == predictor);
1009         }
1010     }
1011   clear_bb_predictions (bb);
1012
1013   if (!bb->count)
1014     {
1015       first->probability = combined_probability;
1016       second->probability = REG_BR_PROB_BASE - combined_probability;
1017     }
1018 }
1019
1020 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1021    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1022
1023    T1 and T2 should be one of the following cases:
1024      1. T1 is SSA_NAME, T2 is NULL
1025      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1026      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1027
1028 static tree
1029 strips_small_constant (tree t1, tree t2)
1030 {
1031   tree ret = NULL;
1032   int value = 0;
1033
1034   if (!t1)
1035     return NULL;
1036   else if (TREE_CODE (t1) == SSA_NAME)
1037     ret = t1;
1038   else if (host_integerp (t1, 0))
1039     value = tree_low_cst (t1, 0);
1040   else
1041     return NULL;
1042
1043   if (!t2)
1044     return ret;
1045   else if (host_integerp (t2, 0))
1046     value = tree_low_cst (t2, 0);
1047   else if (TREE_CODE (t2) == SSA_NAME)
1048     {
1049       if (ret)
1050         return NULL;
1051       else
1052         ret = t2;
1053     }
1054
1055   if (value <= 4 && value >= -4)
1056     return ret;
1057   else
1058     return NULL;
1059 }
1060
1061 /* Return the SSA_NAME in T or T's operands.
1062    Return NULL if SSA_NAME cannot be found.  */
1063
1064 static tree
1065 get_base_value (tree t)
1066 {
1067   if (TREE_CODE (t) == SSA_NAME)
1068     return t;
1069
1070   if (!BINARY_CLASS_P (t))
1071     return NULL;
1072
1073   switch (TREE_OPERAND_LENGTH (t))
1074     {
1075     case 1:
1076       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1077     case 2:
1078       return strips_small_constant (TREE_OPERAND (t, 0),
1079                                     TREE_OPERAND (t, 1));
1080     default:
1081       return NULL;
1082     }
1083 }
1084
1085 /* Check the compare STMT in LOOP. If it compares an induction
1086    variable to a loop invariant, return true, and save
1087    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1088    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1089
1090 static bool
1091 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1092                                      tree *loop_invariant,
1093                                      enum tree_code *compare_code,
1094                                      tree *loop_step,
1095                                      tree *loop_iv_base)
1096 {
1097   tree op0, op1, bound, base;
1098   affine_iv iv0, iv1;
1099   enum tree_code code;
1100   tree step;
1101
1102   code = gimple_cond_code (stmt);
1103   *loop_invariant = NULL;
1104
1105   switch (code)
1106     {
1107     case GT_EXPR:
1108     case GE_EXPR:
1109     case NE_EXPR:
1110     case LT_EXPR:
1111     case LE_EXPR:
1112     case EQ_EXPR:
1113       break;
1114
1115     default:
1116       return false;
1117     }
1118
1119   op0 = gimple_cond_lhs (stmt);
1120   op1 = gimple_cond_rhs (stmt);
1121
1122   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1123        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1124     return false;
1125   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1126     return false;
1127   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1128     return false;
1129   if (TREE_CODE (iv0.step) != INTEGER_CST
1130       || TREE_CODE (iv1.step) != INTEGER_CST)
1131     return false;
1132   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1133       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1134     return false;
1135
1136   if (integer_zerop (iv0.step))
1137     {
1138       if (code != NE_EXPR && code != EQ_EXPR)
1139         code = invert_tree_comparison (code, false);
1140       bound = iv0.base;
1141       base = iv1.base;
1142       if (host_integerp (iv1.step, 0))
1143         step = iv1.step;
1144       else
1145         return false;
1146     }
1147   else
1148     {
1149       bound = iv1.base;
1150       base = iv0.base;
1151       if (host_integerp (iv0.step, 0))
1152         step = iv0.step;
1153       else
1154         return false;
1155     }
1156
1157   if (TREE_CODE (bound) != INTEGER_CST)
1158     bound = get_base_value (bound);
1159   if (!bound)
1160     return false;
1161   if (TREE_CODE (base) != INTEGER_CST)
1162     base = get_base_value (base);
1163   if (!base)
1164     return false;
1165
1166   *loop_invariant = bound;
1167   *compare_code = code;
1168   *loop_step = step;
1169   *loop_iv_base = base;
1170   return true;
1171 }
1172
1173 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1174
1175 static bool
1176 expr_coherent_p (tree t1, tree t2)
1177 {
1178   gimple stmt;
1179   tree ssa_name_1 = NULL;
1180   tree ssa_name_2 = NULL;
1181
1182   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1183   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1184
1185   if (t1 == t2)
1186     return true;
1187
1188   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1189     return true;
1190   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1191     return false;
1192
1193   /* Check to see if t1 is expressed/defined with t2.  */
1194   stmt = SSA_NAME_DEF_STMT (t1);
1195   gcc_assert (stmt != NULL);
1196   if (is_gimple_assign (stmt))
1197     {
1198       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1199       if (ssa_name_1 && ssa_name_1 == t2)
1200         return true;
1201     }
1202
1203   /* Check to see if t2 is expressed/defined with t1.  */
1204   stmt = SSA_NAME_DEF_STMT (t2);
1205   gcc_assert (stmt != NULL);
1206   if (is_gimple_assign (stmt))
1207     {
1208       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1209       if (ssa_name_2 && ssa_name_2 == t1)
1210         return true;
1211     }
1212
1213   /* Compare if t1 and t2's def_stmts are identical.  */
1214   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1215     return true;
1216   else
1217     return false;
1218 }
1219
1220 /* Predict branch probability of BB when BB contains a branch that compares
1221    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1222    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1223
1224    E.g.
1225      for (int i = 0; i < bound; i++) {
1226        if (i < bound - 2)
1227          computation_1();
1228        else
1229          computation_2();
1230      }
1231
1232   In this loop, we will predict the branch inside the loop to be taken.  */
1233
1234 static void
1235 predict_iv_comparison (struct loop *loop, basic_block bb,
1236                        tree loop_bound_var,
1237                        tree loop_iv_base_var,
1238                        enum tree_code loop_bound_code,
1239                        int loop_bound_step)
1240 {
1241   gimple stmt;
1242   tree compare_var, compare_base;
1243   enum tree_code compare_code;
1244   tree compare_step_var;
1245   edge then_edge;
1246   edge_iterator ei;
1247
1248   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1249       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1250       || predicted_by_p (bb, PRED_LOOP_EXIT))
1251     return;
1252
1253   stmt = last_stmt (bb);
1254   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1255     return;
1256   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1257                                             &compare_code,
1258                                             &compare_step_var,
1259                                             &compare_base))
1260     return;
1261
1262   /* Find the taken edge.  */
1263   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1264     if (then_edge->flags & EDGE_TRUE_VALUE)
1265       break;
1266
1267   /* When comparing an IV to a loop invariant, NE is more likely to be
1268      taken while EQ is more likely to be not-taken.  */
1269   if (compare_code == NE_EXPR)
1270     {
1271       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1272       return;
1273     }
1274   else if (compare_code == EQ_EXPR)
1275     {
1276       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1277       return;
1278     }
1279
1280   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1281     return;
1282
1283   /* If loop bound, base and compare bound are all constants, we can
1284      calculate the probability directly.  */
1285   if (host_integerp (loop_bound_var, 0)
1286       && host_integerp (compare_var, 0)
1287       && host_integerp (compare_base, 0))
1288     {
1289       int probability;
1290       bool of, overflow = false;
1291       double_int mod, compare_count, tem, loop_count;
1292
1293       double_int loop_bound = tree_to_double_int (loop_bound_var);
1294       double_int compare_bound = tree_to_double_int (compare_var);
1295       double_int base = tree_to_double_int (compare_base);
1296       double_int compare_step = tree_to_double_int (compare_step_var);
1297
1298       /* (loop_bound - base) / compare_step */
1299       tem = loop_bound.sub_with_overflow (base, &of);
1300       overflow |= of;
1301       loop_count = tem.divmod_with_overflow (compare_step,
1302                                               0, TRUNC_DIV_EXPR,
1303                                               &mod, &of);
1304       overflow |= of;
1305
1306       if ((!compare_step.is_negative ())
1307           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1308         {
1309           /* (loop_bound - compare_bound) / compare_step */
1310           tem = loop_bound.sub_with_overflow (compare_bound, &of);
1311           overflow |= of;
1312           compare_count = tem.divmod_with_overflow (compare_step,
1313                                                      0, TRUNC_DIV_EXPR,
1314                                                      &mod, &of);
1315           overflow |= of;
1316         }
1317       else
1318         {
1319           /* (compare_bound - base) / compare_step */
1320           tem = compare_bound.sub_with_overflow (base, &of);
1321           overflow |= of;
1322           compare_count = tem.divmod_with_overflow (compare_step,
1323                                                      0, TRUNC_DIV_EXPR,
1324                                                      &mod, &of);
1325           overflow |= of;
1326         }
1327       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1328         ++compare_count;
1329       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1330         ++loop_count;
1331       if (compare_count.is_negative ())
1332         compare_count = double_int_zero;
1333       if (loop_count.is_negative ())
1334         loop_count = double_int_zero;
1335       if (loop_count.is_zero ())
1336         probability = 0;
1337       else if (compare_count.scmp (loop_count) == 1)
1338         probability = REG_BR_PROB_BASE;
1339       else
1340         {
1341           /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1342              could overflow, shift both loop_count and compare_count right
1343              a bit so that it doesn't overflow.  Note both counts are known not
1344              to be negative at this point.  */
1345           int clz_bits = clz_hwi (loop_count.high);
1346           gcc_assert (REG_BR_PROB_BASE < 32768);
1347           if (clz_bits < 16)
1348             {
1349               loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1350               compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1351             }
1352           tem = compare_count.mul_with_sign (double_int::from_shwi
1353                                             (REG_BR_PROB_BASE), true, &of);
1354           gcc_assert (!of);
1355           tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1356           probability = tem.to_uhwi ();
1357         }
1358
1359       if (!overflow)
1360         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1361
1362       return;
1363     }
1364
1365   if (expr_coherent_p (loop_bound_var, compare_var))
1366     {
1367       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1368           && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1369         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1370       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1371                && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1372         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1373       else if (loop_bound_code == NE_EXPR)
1374         {
1375           /* If the loop backedge condition is "(i != bound)", we do
1376              the comparison based on the step of IV:
1377              * step < 0 : backedge condition is like (i > bound)
1378              * step > 0 : backedge condition is like (i < bound)  */
1379           gcc_assert (loop_bound_step != 0);
1380           if (loop_bound_step > 0
1381               && (compare_code == LT_EXPR
1382                   || compare_code == LE_EXPR))
1383             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1384           else if (loop_bound_step < 0
1385                    && (compare_code == GT_EXPR
1386                        || compare_code == GE_EXPR))
1387             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1388           else
1389             predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1390         }
1391       else
1392         /* The branch is predicted not-taken if loop_bound_code is
1393            opposite with compare_code.  */
1394         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1395     }
1396   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1397     {
1398       /* For cases like:
1399            for (i = s; i < h; i++)
1400              if (i > s + 2) ....
1401          The branch should be predicted taken.  */
1402       if (loop_bound_step > 0
1403           && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1404         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1405       else if (loop_bound_step < 0
1406                && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1407         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1408       else
1409         predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1410     }
1411 }
1412
1413 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1414    exits are resulted from short-circuit conditions that will generate an
1415    if_tmp. E.g.:
1416
1417    if (foo() || global > 10)
1418      break;
1419
1420    This will be translated into:
1421
1422    BB3:
1423      loop header...
1424    BB4:
1425      if foo() goto BB6 else goto BB5
1426    BB5:
1427      if global > 10 goto BB6 else goto BB7
1428    BB6:
1429      goto BB7
1430    BB7:
1431      iftmp = (PHI 0(BB5), 1(BB6))
1432      if iftmp == 1 goto BB8 else goto BB3
1433    BB8:
1434      outside of the loop...
1435
1436    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1437    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1438    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1439    exits to predict them using PRED_LOOP_EXIT.  */
1440
1441 static void
1442 predict_extra_loop_exits (edge exit_edge)
1443 {
1444   unsigned i;
1445   bool check_value_one;
1446   gimple phi_stmt;
1447   tree cmp_rhs, cmp_lhs;
1448   gimple cmp_stmt = last_stmt (exit_edge->src);
1449
1450   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1451     return;
1452   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1453   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1454   if (!TREE_CONSTANT (cmp_rhs)
1455       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1456     return;
1457   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1458     return;
1459
1460   /* If check_value_one is true, only the phi_args with value '1' will lead
1461      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1462      loop exit.  */
1463   check_value_one = (((integer_onep (cmp_rhs))
1464                     ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1465                     ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1466
1467   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1468   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1469     return;
1470
1471   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1472     {
1473       edge e1;
1474       edge_iterator ei;
1475       tree val = gimple_phi_arg_def (phi_stmt, i);
1476       edge e = gimple_phi_arg_edge (phi_stmt, i);
1477
1478       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1479         continue;
1480       if ((check_value_one ^ integer_onep (val)) == 1)
1481         continue;
1482       if (EDGE_COUNT (e->src->succs) != 1)
1483         {
1484           predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1485           continue;
1486         }
1487
1488       FOR_EACH_EDGE (e1, ei, e->src->preds)
1489         predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1490     }
1491 }
1492
1493 /* Predict edge probabilities by exploiting loop structure.  */
1494
1495 static void
1496 predict_loops (void)
1497 {
1498   loop_iterator li;
1499   struct loop *loop;
1500
1501   /* Try to predict out blocks in a loop that are not part of a
1502      natural loop.  */
1503   FOR_EACH_LOOP (li, loop, 0)
1504     {
1505       basic_block bb, *bbs;
1506       unsigned j, n_exits;
1507       vec<edge> exits;
1508       struct tree_niter_desc niter_desc;
1509       edge ex;
1510       struct nb_iter_bound *nb_iter;
1511       enum tree_code loop_bound_code = ERROR_MARK;
1512       tree loop_bound_step = NULL;
1513       tree loop_bound_var = NULL;
1514       tree loop_iv_base = NULL;
1515       gimple stmt = NULL;
1516
1517       exits = get_loop_exit_edges (loop);
1518       n_exits = exits.length ();
1519       if (!n_exits)
1520         {
1521           exits.release ();
1522           continue;
1523         }
1524
1525       FOR_EACH_VEC_ELT (exits, j, ex)
1526         {
1527           tree niter = NULL;
1528           HOST_WIDE_INT nitercst;
1529           int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1530           int probability;
1531           enum br_predictor predictor;
1532
1533           predict_extra_loop_exits (ex);
1534
1535           if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1536             niter = niter_desc.niter;
1537           if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1538             niter = loop_niter_by_eval (loop, ex);
1539
1540           if (TREE_CODE (niter) == INTEGER_CST)
1541             {
1542               if (host_integerp (niter, 1)
1543                   && max
1544                   && compare_tree_int (niter, max - 1) == -1)
1545                 nitercst = tree_low_cst (niter, 1) + 1;
1546               else
1547                 nitercst = max;
1548               predictor = PRED_LOOP_ITERATIONS;
1549             }
1550           /* If we have just one exit and we can derive some information about
1551              the number of iterations of the loop from the statements inside
1552              the loop, use it to predict this exit.  */
1553           else if (n_exits == 1)
1554             {
1555               nitercst = estimated_stmt_executions_int (loop);
1556               if (nitercst < 0)
1557                 continue;
1558               if (nitercst > max)
1559                 nitercst = max;
1560
1561               predictor = PRED_LOOP_ITERATIONS_GUESSED;
1562             }
1563           else
1564             continue;
1565
1566           /* If the prediction for number of iterations is zero, do not
1567              predict the exit edges.  */
1568           if (nitercst == 0)
1569             continue;
1570
1571           probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1572           predict_edge (ex, predictor, probability);
1573         }
1574       exits.release ();
1575
1576       /* Find information about loop bound variables.  */
1577       for (nb_iter = loop->bounds; nb_iter;
1578            nb_iter = nb_iter->next)
1579         if (nb_iter->stmt
1580             && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1581           {
1582             stmt = nb_iter->stmt;
1583             break;
1584           }
1585       if (!stmt && last_stmt (loop->header)
1586           && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1587         stmt = last_stmt (loop->header);
1588       if (stmt)
1589         is_comparison_with_loop_invariant_p (stmt, loop,
1590                                              &loop_bound_var,
1591                                              &loop_bound_code,
1592                                              &loop_bound_step,
1593                                              &loop_iv_base);
1594
1595       bbs = get_loop_body (loop);
1596
1597       for (j = 0; j < loop->num_nodes; j++)
1598         {
1599           int header_found = 0;
1600           edge e;
1601           edge_iterator ei;
1602
1603           bb = bbs[j];
1604
1605           /* Bypass loop heuristics on continue statement.  These
1606              statements construct loops via "non-loop" constructs
1607              in the source language and are better to be handled
1608              separately.  */
1609           if (predicted_by_p (bb, PRED_CONTINUE))
1610             continue;
1611
1612           /* Loop branch heuristics - predict an edge back to a
1613              loop's head as taken.  */
1614           if (bb == loop->latch)
1615             {
1616               e = find_edge (loop->latch, loop->header);
1617               if (e)
1618                 {
1619                   header_found = 1;
1620                   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1621                 }
1622             }
1623
1624           /* Loop exit heuristics - predict an edge exiting the loop if the
1625              conditional has no loop header successors as not taken.  */
1626           if (!header_found
1627               /* If we already used more reliable loop exit predictors, do not
1628                  bother with PRED_LOOP_EXIT.  */
1629               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1630               && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1631             {
1632               /* For loop with many exits we don't want to predict all exits
1633                  with the pretty large probability, because if all exits are
1634                  considered in row, the loop would be predicted to iterate
1635                  almost never.  The code to divide probability by number of
1636                  exits is very rough.  It should compute the number of exits
1637                  taken in each patch through function (not the overall number
1638                  of exits that might be a lot higher for loops with wide switch
1639                  statements in them) and compute n-th square root.
1640
1641                  We limit the minimal probability by 2% to avoid
1642                  EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1643                  as this was causing regression in perl benchmark containing such
1644                  a wide loop.  */
1645
1646               int probability = ((REG_BR_PROB_BASE
1647                                   - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1648                                  / n_exits);
1649               if (probability < HITRATE (2))
1650                 probability = HITRATE (2);
1651               FOR_EACH_EDGE (e, ei, bb->succs)
1652                 if (e->dest->index < NUM_FIXED_BLOCKS
1653                     || !flow_bb_inside_loop_p (loop, e->dest))
1654                   predict_edge (e, PRED_LOOP_EXIT, probability);
1655             }
1656           if (loop_bound_var)
1657             predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1658                                    loop_bound_code,
1659                                    tree_low_cst (loop_bound_step, 0));
1660         }
1661
1662       /* Free basic blocks from get_loop_body.  */
1663       free (bbs);
1664     }
1665 }
1666
1667 /* Attempt to predict probabilities of BB outgoing edges using local
1668    properties.  */
1669 static void
1670 bb_estimate_probability_locally (basic_block bb)
1671 {
1672   rtx last_insn = BB_END (bb);
1673   rtx cond;
1674
1675   if (! can_predict_insn_p (last_insn))
1676     return;
1677   cond = get_condition (last_insn, NULL, false, false);
1678   if (! cond)
1679     return;
1680
1681   /* Try "pointer heuristic."
1682      A comparison ptr == 0 is predicted as false.
1683      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1684   if (COMPARISON_P (cond)
1685       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1686           || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1687     {
1688       if (GET_CODE (cond) == EQ)
1689         predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1690       else if (GET_CODE (cond) == NE)
1691         predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1692     }
1693   else
1694
1695   /* Try "opcode heuristic."
1696      EQ tests are usually false and NE tests are usually true. Also,
1697      most quantities are positive, so we can make the appropriate guesses
1698      about signed comparisons against zero.  */
1699     switch (GET_CODE (cond))
1700       {
1701       case CONST_INT:
1702         /* Unconditional branch.  */
1703         predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1704                           cond == const0_rtx ? NOT_TAKEN : TAKEN);
1705         break;
1706
1707       case EQ:
1708       case UNEQ:
1709         /* Floating point comparisons appears to behave in a very
1710            unpredictable way because of special role of = tests in
1711            FP code.  */
1712         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1713           ;
1714         /* Comparisons with 0 are often used for booleans and there is
1715            nothing useful to predict about them.  */
1716         else if (XEXP (cond, 1) == const0_rtx
1717                  || XEXP (cond, 0) == const0_rtx)
1718           ;
1719         else
1720           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1721         break;
1722
1723       case NE:
1724       case LTGT:
1725         /* Floating point comparisons appears to behave in a very
1726            unpredictable way because of special role of = tests in
1727            FP code.  */
1728         if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1729           ;
1730         /* Comparisons with 0 are often used for booleans and there is
1731            nothing useful to predict about them.  */
1732         else if (XEXP (cond, 1) == const0_rtx
1733                  || XEXP (cond, 0) == const0_rtx)
1734           ;
1735         else
1736           predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1737         break;
1738
1739       case ORDERED:
1740         predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1741         break;
1742
1743       case UNORDERED:
1744         predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1745         break;
1746
1747       case LE:
1748       case LT:
1749         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1750             || XEXP (cond, 1) == constm1_rtx)
1751           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1752         break;
1753
1754       case GE:
1755       case GT:
1756         if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1757             || XEXP (cond, 1) == constm1_rtx)
1758           predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1759         break;
1760
1761       default:
1762         break;
1763       }
1764 }
1765
1766 /* Set edge->probability for each successor edge of BB.  */
1767 void
1768 guess_outgoing_edge_probabilities (basic_block bb)
1769 {
1770   bb_estimate_probability_locally (bb);
1771   combine_predictions_for_insn (BB_END (bb), bb);
1772 }
1773 \f
1774 static tree expr_expected_value (tree, bitmap);
1775
1776 /* Helper function for expr_expected_value.  */
1777
1778 static tree
1779 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1780                        tree op1, bitmap visited)
1781 {
1782   gimple def;
1783
1784   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1785     {
1786       if (TREE_CONSTANT (op0))
1787         return op0;
1788
1789       if (code != SSA_NAME)
1790         return NULL_TREE;
1791
1792       def = SSA_NAME_DEF_STMT (op0);
1793
1794       /* If we were already here, break the infinite cycle.  */
1795       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1796         return NULL;
1797
1798       if (gimple_code (def) == GIMPLE_PHI)
1799         {
1800           /* All the arguments of the PHI node must have the same constant
1801              length.  */
1802           int i, n = gimple_phi_num_args (def);
1803           tree val = NULL, new_val;
1804
1805           for (i = 0; i < n; i++)
1806             {
1807               tree arg = PHI_ARG_DEF (def, i);
1808
1809               /* If this PHI has itself as an argument, we cannot
1810                  determine the string length of this argument.  However,
1811                  if we can find an expected constant value for the other
1812                  PHI args then we can still be sure that this is
1813                  likely a constant.  So be optimistic and just
1814                  continue with the next argument.  */
1815               if (arg == PHI_RESULT (def))
1816                 continue;
1817
1818               new_val = expr_expected_value (arg, visited);
1819               if (!new_val)
1820                 return NULL;
1821               if (!val)
1822                 val = new_val;
1823               else if (!operand_equal_p (val, new_val, false))
1824                 return NULL;
1825             }
1826           return val;
1827         }
1828       if (is_gimple_assign (def))
1829         {
1830           if (gimple_assign_lhs (def) != op0)
1831             return NULL;
1832
1833           return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1834                                         gimple_assign_rhs1 (def),
1835                                         gimple_assign_rhs_code (def),
1836                                         gimple_assign_rhs2 (def),
1837                                         visited);
1838         }
1839
1840       if (is_gimple_call (def))
1841         {
1842           tree decl = gimple_call_fndecl (def);
1843           if (!decl)
1844             return NULL;
1845           if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1846             switch (DECL_FUNCTION_CODE (decl))
1847               {
1848               case BUILT_IN_EXPECT:
1849                 {
1850                   tree val;
1851                   if (gimple_call_num_args (def) != 2)
1852                     return NULL;
1853                   val = gimple_call_arg (def, 0);
1854                   if (TREE_CONSTANT (val))
1855                     return val;
1856                   return gimple_call_arg (def, 1);
1857                 }
1858
1859               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1860               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1861               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1862               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1863               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1864               case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1865               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1866               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1867               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1868               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1869               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1870               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1871               case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1872                 /* Assume that any given atomic operation has low contention,
1873                    and thus the compare-and-swap operation succeeds.  */
1874                 return boolean_true_node;
1875             }
1876         }
1877
1878       return NULL;
1879     }
1880
1881   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1882     {
1883       tree res;
1884       op0 = expr_expected_value (op0, visited);
1885       if (!op0)
1886         return NULL;
1887       op1 = expr_expected_value (op1, visited);
1888       if (!op1)
1889         return NULL;
1890       res = fold_build2 (code, type, op0, op1);
1891       if (TREE_CONSTANT (res))
1892         return res;
1893       return NULL;
1894     }
1895   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1896     {
1897       tree res;
1898       op0 = expr_expected_value (op0, visited);
1899       if (!op0)
1900         return NULL;
1901       res = fold_build1 (code, type, op0);
1902       if (TREE_CONSTANT (res))
1903         return res;
1904       return NULL;
1905     }
1906   return NULL;
1907 }
1908
1909 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1910    The function is used by builtin_expect branch predictor so the evidence
1911    must come from this construct and additional possible constant folding.
1912
1913    We may want to implement more involved value guess (such as value range
1914    propagation based prediction), but such tricks shall go to new
1915    implementation.  */
1916
1917 static tree
1918 expr_expected_value (tree expr, bitmap visited)
1919 {
1920   enum tree_code code;
1921   tree op0, op1;
1922
1923   if (TREE_CONSTANT (expr))
1924     return expr;
1925
1926   extract_ops_from_tree (expr, &code, &op0, &op1);
1927   return expr_expected_value_1 (TREE_TYPE (expr),
1928                                 op0, code, op1, visited);
1929 }
1930
1931 \f
1932 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1933    we no longer need.  */
1934 static unsigned int
1935 strip_predict_hints (void)
1936 {
1937   basic_block bb;
1938   gimple ass_stmt;
1939   tree var;
1940
1941   FOR_EACH_BB (bb)
1942     {
1943       gimple_stmt_iterator bi;
1944       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1945         {
1946           gimple stmt = gsi_stmt (bi);
1947
1948           if (gimple_code (stmt) == GIMPLE_PREDICT)
1949             {
1950               gsi_remove (&bi, true);
1951               continue;
1952             }
1953           else if (gimple_code (stmt) == GIMPLE_CALL)
1954             {
1955               tree fndecl = gimple_call_fndecl (stmt);
1956
1957               if (fndecl
1958                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1959                   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1960                   && gimple_call_num_args (stmt) == 2)
1961                 {
1962                   var = gimple_call_lhs (stmt);
1963                   if (var)
1964                     {
1965                       ass_stmt
1966                         = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1967                       gsi_replace (&bi, ass_stmt, true);
1968                     }
1969                   else
1970                     {
1971                       gsi_remove (&bi, true);
1972                       continue;
1973                     }
1974                 }
1975             }
1976           gsi_next (&bi);
1977         }
1978     }
1979   return 0;
1980 }
1981 \f
1982 /* Predict using opcode of the last statement in basic block.  */
1983 static void
1984 tree_predict_by_opcode (basic_block bb)
1985 {
1986   gimple stmt = last_stmt (bb);
1987   edge then_edge;
1988   tree op0, op1;
1989   tree type;
1990   tree val;
1991   enum tree_code cmp;
1992   bitmap visited;
1993   edge_iterator ei;
1994
1995   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1996     return;
1997   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1998     if (then_edge->flags & EDGE_TRUE_VALUE)
1999       break;
2000   op0 = gimple_cond_lhs (stmt);
2001   op1 = gimple_cond_rhs (stmt);
2002   cmp = gimple_cond_code (stmt);
2003   type = TREE_TYPE (op0);
2004   visited = BITMAP_ALLOC (NULL);
2005   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
2006   BITMAP_FREE (visited);
2007   if (val)
2008     {
2009       int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2010       void **preds;
2011       int hitrate;
2012
2013       gcc_assert (percent >= 0 && percent <= 100);
2014       /* This handles the cases like
2015            while (__builtin_expect (exp, 1)) { ... }
2016          W/o builtin_expect, the default HITRATE is 91%.
2017          It does not make sense to estimate a lower probability of 90%
2018          (current default for builtin_expect) with the annotation.
2019          So here, we bump the probability by a small amount.  */
2020       preds = pointer_map_contains (bb_predictions, bb);
2021       hitrate = HITRATE (percent);
2022       if (preds)
2023         {
2024           struct edge_prediction *pred;
2025           int exit_hitrate = predictor_info [(int) PRED_LOOP_EXIT].hitrate;
2026
2027           for (pred = (struct edge_prediction *) *preds; pred;
2028                pred = pred->ep_next)
2029             {
2030               if (pred->ep_predictor == PRED_LOOP_EXIT
2031                   && exit_hitrate > hitrate)
2032                 {
2033                   hitrate = exit_hitrate + HITRATE (4);
2034                   if (hitrate > REG_BR_PROB_BASE)
2035                     hitrate = REG_BR_PROB_BASE;
2036                   break;
2037                 }
2038             }
2039         }
2040       if (integer_zerop (val))
2041         hitrate = REG_BR_PROB_BASE - hitrate;
2042       predict_edge (then_edge, PRED_BUILTIN_EXPECT, hitrate);
2043     }
2044   /* Try "pointer heuristic."
2045      A comparison ptr == 0 is predicted as false.
2046      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2047   if (POINTER_TYPE_P (type))
2048     {
2049       if (cmp == EQ_EXPR)
2050         predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2051       else if (cmp == NE_EXPR)
2052         predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2053     }
2054   else
2055
2056   /* Try "opcode heuristic."
2057      EQ tests are usually false and NE tests are usually true. Also,
2058      most quantities are positive, so we can make the appropriate guesses
2059      about signed comparisons against zero.  */
2060     switch (cmp)
2061       {
2062       case EQ_EXPR:
2063       case UNEQ_EXPR:
2064         /* Floating point comparisons appears to behave in a very
2065            unpredictable way because of special role of = tests in
2066            FP code.  */
2067         if (FLOAT_TYPE_P (type))
2068           ;
2069         /* Comparisons with 0 are often used for booleans and there is
2070            nothing useful to predict about them.  */
2071         else if (integer_zerop (op0) || integer_zerop (op1))
2072           ;
2073         else
2074           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2075         break;
2076
2077       case NE_EXPR:
2078       case LTGT_EXPR:
2079         /* Floating point comparisons appears to behave in a very
2080            unpredictable way because of special role of = tests in
2081            FP code.  */
2082         if (FLOAT_TYPE_P (type))
2083           ;
2084         /* Comparisons with 0 are often used for booleans and there is
2085            nothing useful to predict about them.  */
2086         else if (integer_zerop (op0)
2087                  || integer_zerop (op1))
2088           ;
2089         else
2090           predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2091         break;
2092
2093       case ORDERED_EXPR:
2094         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2095         break;
2096
2097       case UNORDERED_EXPR:
2098         predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2099         break;
2100
2101       case LE_EXPR:
2102       case LT_EXPR:
2103         if (integer_zerop (op1)
2104             || integer_onep (op1)
2105             || integer_all_onesp (op1)
2106             || real_zerop (op1)
2107             || real_onep (op1)
2108             || real_minus_onep (op1))
2109           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2110         break;
2111
2112       case GE_EXPR:
2113       case GT_EXPR:
2114         if (integer_zerop (op1)
2115             || integer_onep (op1)
2116             || integer_all_onesp (op1)
2117             || real_zerop (op1)
2118             || real_onep (op1)
2119             || real_minus_onep (op1))
2120           predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2121         break;
2122
2123       default:
2124         break;
2125       }
2126 }
2127
2128 /* Try to guess whether the value of return means error code.  */
2129
2130 static enum br_predictor
2131 return_prediction (tree val, enum prediction *prediction)
2132 {
2133   /* VOID.  */
2134   if (!val)
2135     return PRED_NO_PREDICTION;
2136   /* Different heuristics for pointers and scalars.  */
2137   if (POINTER_TYPE_P (TREE_TYPE (val)))
2138     {
2139       /* NULL is usually not returned.  */
2140       if (integer_zerop (val))
2141         {
2142           *prediction = NOT_TAKEN;
2143           return PRED_NULL_RETURN;
2144         }
2145     }
2146   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2147     {
2148       /* Negative return values are often used to indicate
2149          errors.  */
2150       if (TREE_CODE (val) == INTEGER_CST
2151           && tree_int_cst_sgn (val) < 0)
2152         {
2153           *prediction = NOT_TAKEN;
2154           return PRED_NEGATIVE_RETURN;
2155         }
2156       /* Constant return values seems to be commonly taken.
2157          Zero/one often represent booleans so exclude them from the
2158          heuristics.  */
2159       if (TREE_CONSTANT (val)
2160           && (!integer_zerop (val) && !integer_onep (val)))
2161         {
2162           *prediction = TAKEN;
2163           return PRED_CONST_RETURN;
2164         }
2165     }
2166   return PRED_NO_PREDICTION;
2167 }
2168
2169 /* Find the basic block with return expression and look up for possible
2170    return value trying to apply RETURN_PREDICTION heuristics.  */
2171 static void
2172 apply_return_prediction (void)
2173 {
2174   gimple return_stmt = NULL;
2175   tree return_val;
2176   edge e;
2177   gimple phi;
2178   int phi_num_args, i;
2179   enum br_predictor pred;
2180   enum prediction direction;
2181   edge_iterator ei;
2182
2183   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2184     {
2185       return_stmt = last_stmt (e->src);
2186       if (return_stmt
2187           && gimple_code (return_stmt) == GIMPLE_RETURN)
2188         break;
2189     }
2190   if (!e)
2191     return;
2192   return_val = gimple_return_retval (return_stmt);
2193   if (!return_val)
2194     return;
2195   if (TREE_CODE (return_val) != SSA_NAME
2196       || !SSA_NAME_DEF_STMT (return_val)
2197       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2198     return;
2199   phi = SSA_NAME_DEF_STMT (return_val);
2200   phi_num_args = gimple_phi_num_args (phi);
2201   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2202
2203   /* Avoid the degenerate case where all return values form the function
2204      belongs to same category (ie they are all positive constants)
2205      so we can hardly say something about them.  */
2206   for (i = 1; i < phi_num_args; i++)
2207     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2208       break;
2209   if (i != phi_num_args)
2210     for (i = 0; i < phi_num_args; i++)
2211       {
2212         pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2213         if (pred != PRED_NO_PREDICTION)
2214           predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2215                                          direction);
2216       }
2217 }
2218
2219 /* Look for basic block that contains unlikely to happen events
2220    (such as noreturn calls) and mark all paths leading to execution
2221    of this basic blocks as unlikely.  */
2222
2223 static void
2224 tree_bb_level_predictions (void)
2225 {
2226   basic_block bb;
2227   bool has_return_edges = false;
2228   edge e;
2229   edge_iterator ei;
2230
2231   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2232     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2233       {
2234         has_return_edges = true;
2235         break;
2236       }
2237
2238   apply_return_prediction ();
2239
2240   FOR_EACH_BB (bb)
2241     {
2242       gimple_stmt_iterator gsi;
2243
2244       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2245         {
2246           gimple stmt = gsi_stmt (gsi);
2247           tree decl;
2248
2249           if (is_gimple_call (stmt))
2250             {
2251               if ((gimple_call_flags (stmt) & ECF_NORETURN)
2252                   && has_return_edges)
2253                 predict_paths_leading_to (bb, PRED_NORETURN,
2254                                           NOT_TAKEN);
2255               decl = gimple_call_fndecl (stmt);
2256               if (decl
2257                   && lookup_attribute ("cold",
2258                                        DECL_ATTRIBUTES (decl)))
2259                 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2260                                           NOT_TAKEN);
2261             }
2262           else if (gimple_code (stmt) == GIMPLE_PREDICT)
2263             {
2264               predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2265                                         gimple_predict_outcome (stmt));
2266               /* Keep GIMPLE_PREDICT around so early inlining will propagate
2267                  hints to callers.  */
2268             }
2269         }
2270     }
2271 }
2272
2273 #ifdef ENABLE_CHECKING
2274
2275 /* Callback for pointer_map_traverse, asserts that the pointer map is
2276    empty.  */
2277
2278 static bool
2279 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2280                  void *data ATTRIBUTE_UNUSED)
2281 {
2282   gcc_assert (!*value);
2283   return false;
2284 }
2285 #endif
2286
2287 /* Predict branch probabilities and estimate profile for basic block BB.  */
2288
2289 static void
2290 tree_estimate_probability_bb (basic_block bb)
2291 {
2292   edge e;
2293   edge_iterator ei;
2294   gimple last;
2295
2296   FOR_EACH_EDGE (e, ei, bb->succs)
2297     {
2298       /* Predict edges to user labels with attributes.  */
2299       if (e->dest != EXIT_BLOCK_PTR)
2300         {
2301           gimple_stmt_iterator gi;
2302           for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2303             {
2304               gimple stmt = gsi_stmt (gi);
2305               tree decl;
2306
2307               if (gimple_code (stmt) != GIMPLE_LABEL)
2308                 break;
2309               decl = gimple_label_label (stmt);
2310               if (DECL_ARTIFICIAL (decl))
2311                 continue;
2312
2313               /* Finally, we have a user-defined label.  */
2314               if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2315                 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2316               else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2317                 predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2318             }
2319         }
2320
2321       /* Predict early returns to be probable, as we've already taken
2322          care for error returns and other cases are often used for
2323          fast paths through function.
2324
2325          Since we've already removed the return statements, we are
2326          looking for CFG like:
2327
2328          if (conditional)
2329          {
2330          ..
2331          goto return_block
2332          }
2333          some other blocks
2334          return_block:
2335          return_stmt.  */
2336       if (e->dest != bb->next_bb
2337           && e->dest != EXIT_BLOCK_PTR
2338           && single_succ_p (e->dest)
2339           && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
2340           && (last = last_stmt (e->dest)) != NULL
2341           && gimple_code (last) == GIMPLE_RETURN)
2342         {
2343           edge e1;
2344           edge_iterator ei1;
2345
2346           if (single_succ_p (bb))
2347             {
2348               FOR_EACH_EDGE (e1, ei1, bb->preds)
2349                 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2350                     && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2351                     && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2352                   predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2353             }
2354           else
2355             if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2356                 && !predicted_by_p (e->src, PRED_CONST_RETURN)
2357                 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2358               predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2359         }
2360
2361       /* Look for block we are guarding (ie we dominate it,
2362          but it doesn't postdominate us).  */
2363       if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
2364           && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2365           && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2366         {
2367           gimple_stmt_iterator bi;
2368
2369           /* The call heuristic claims that a guarded function call
2370              is improbable.  This is because such calls are often used
2371              to signal exceptional situations such as printing error
2372              messages.  */
2373           for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2374                gsi_next (&bi))
2375             {
2376               gimple stmt = gsi_stmt (bi);
2377               if (is_gimple_call (stmt)
2378                   /* Constant and pure calls are hardly used to signalize
2379                      something exceptional.  */
2380                   && gimple_has_side_effects (stmt))
2381                 {
2382                   predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2383                   break;
2384                 }
2385             }
2386         }
2387     }
2388   tree_predict_by_opcode (bb);
2389 }
2390
2391 /* Predict branch probabilities and estimate profile of the tree CFG.
2392    This function can be called from the loop optimizers to recompute
2393    the profile information.  */
2394
2395 void
2396 tree_estimate_probability (void)
2397 {
2398   basic_block bb;
2399
2400   add_noreturn_fake_exit_edges ();
2401   connect_infinite_loops_to_exit ();
2402   /* We use loop_niter_by_eval, which requires that the loops have
2403      preheaders.  */
2404   create_preheaders (CP_SIMPLE_PREHEADERS);
2405   calculate_dominance_info (CDI_POST_DOMINATORS);
2406
2407   bb_predictions = pointer_map_create ();
2408   tree_bb_level_predictions ();
2409   record_loop_exits ();
2410
2411   if (number_of_loops () > 1)
2412     predict_loops ();
2413
2414   FOR_EACH_BB (bb)
2415     tree_estimate_probability_bb (bb);
2416
2417   FOR_EACH_BB (bb)
2418     combine_predictions_for_bb (bb);
2419
2420 #ifdef ENABLE_CHECKING
2421   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2422 #endif
2423   pointer_map_destroy (bb_predictions);
2424   bb_predictions = NULL;
2425
2426   estimate_bb_frequencies (false);
2427   free_dominance_info (CDI_POST_DOMINATORS);
2428   remove_fake_exit_edges ();
2429 }
2430
2431 /* Predict branch probabilities and estimate profile of the tree CFG.
2432    This is the driver function for PASS_PROFILE.  */
2433
2434 static unsigned int
2435 tree_estimate_probability_driver (void)
2436 {
2437   unsigned nb_loops;
2438
2439   loop_optimizer_init (LOOPS_NORMAL);
2440   if (dump_file && (dump_flags & TDF_DETAILS))
2441     flow_loops_dump (dump_file, NULL, 0);
2442
2443   mark_irreducible_loops ();
2444
2445   nb_loops = number_of_loops ();
2446   if (nb_loops > 1)
2447     scev_initialize ();
2448
2449   tree_estimate_probability ();
2450
2451   if (nb_loops > 1)
2452     scev_finalize ();
2453
2454   loop_optimizer_finalize ();
2455   if (dump_file && (dump_flags & TDF_DETAILS))
2456     gimple_dump_cfg (dump_file, dump_flags);
2457   if (profile_status == PROFILE_ABSENT)
2458     profile_status = PROFILE_GUESSED;
2459   return 0;
2460 }
2461 \f
2462 /* Predict edges to successors of CUR whose sources are not postdominated by
2463    BB by PRED and recurse to all postdominators.  */
2464
2465 static void
2466 predict_paths_for_bb (basic_block cur, basic_block bb,
2467                       enum br_predictor pred,
2468                       enum prediction taken,
2469                       bitmap visited)
2470 {
2471   edge e;
2472   edge_iterator ei;
2473   basic_block son;
2474
2475   /* We are looking for all edges forming edge cut induced by
2476      set of all blocks postdominated by BB.  */
2477   FOR_EACH_EDGE (e, ei, cur->preds)
2478     if (e->src->index >= NUM_FIXED_BLOCKS
2479         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2480     {
2481       edge e2;
2482       edge_iterator ei2;
2483       bool found = false;
2484
2485       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2486       if (e->flags & (EDGE_EH | EDGE_FAKE))
2487         continue;
2488       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2489
2490       /* See if there is an edge from e->src that is not abnormal
2491          and does not lead to BB.  */
2492       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2493         if (e2 != e
2494             && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2495             && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2496           {
2497             found = true;
2498             break;
2499           }
2500
2501       /* If there is non-abnormal path leaving e->src, predict edge
2502          using predictor.  Otherwise we need to look for paths
2503          leading to e->src.
2504
2505          The second may lead to infinite loop in the case we are predicitng
2506          regions that are only reachable by abnormal edges.  We simply
2507          prevent visiting given BB twice.  */
2508       if (found)
2509         predict_edge_def (e, pred, taken);
2510       else if (bitmap_set_bit (visited, e->src->index))
2511         predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2512     }
2513   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2514        son;
2515        son = next_dom_son (CDI_POST_DOMINATORS, son))
2516     predict_paths_for_bb (son, bb, pred, taken, visited);
2517 }
2518
2519 /* Sets branch probabilities according to PREDiction and
2520    FLAGS.  */
2521
2522 static void
2523 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2524                           enum prediction taken)
2525 {
2526   bitmap visited = BITMAP_ALLOC (NULL);
2527   predict_paths_for_bb (bb, bb, pred, taken, visited);
2528   BITMAP_FREE (visited);
2529 }
2530
2531 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2532
2533 static void
2534 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2535                                enum prediction taken)
2536 {
2537   bool has_nonloop_edge = false;
2538   edge_iterator ei;
2539   edge e2;
2540
2541   basic_block bb = e->src;
2542   FOR_EACH_EDGE (e2, ei, bb->succs)
2543     if (e2->dest != e->src && e2->dest != e->dest
2544         && !(e->flags & (EDGE_EH | EDGE_FAKE))
2545         && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2546       {
2547         has_nonloop_edge = true;
2548         break;
2549       }
2550   if (!has_nonloop_edge)
2551     {
2552       bitmap visited = BITMAP_ALLOC (NULL);
2553       predict_paths_for_bb (bb, bb, pred, taken, visited);
2554       BITMAP_FREE (visited);
2555     }
2556   else
2557     predict_edge_def (e, pred, taken);
2558 }
2559 \f
2560 /* This is used to carry information about basic blocks.  It is
2561    attached to the AUX field of the standard CFG block.  */
2562
2563 typedef struct block_info_def
2564 {
2565   /* Estimated frequency of execution of basic_block.  */
2566   sreal frequency;
2567
2568   /* To keep queue of basic blocks to process.  */
2569   basic_block next;
2570
2571   /* Number of predecessors we need to visit first.  */
2572   int npredecessors;
2573 } *block_info;
2574
2575 /* Similar information for edges.  */
2576 typedef struct edge_info_def
2577 {
2578   /* In case edge is a loopback edge, the probability edge will be reached
2579      in case header is.  Estimated number of iterations of the loop can be
2580      then computed as 1 / (1 - back_edge_prob).  */
2581   sreal back_edge_prob;
2582   /* True if the edge is a loopback edge in the natural loop.  */
2583   unsigned int back_edge:1;
2584 } *edge_info;
2585
2586 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
2587 #define EDGE_INFO(E)    ((edge_info) (E)->aux)
2588
2589 /* Helper function for estimate_bb_frequencies.
2590    Propagate the frequencies in blocks marked in
2591    TOVISIT, starting in HEAD.  */
2592
2593 static void
2594 propagate_freq (basic_block head, bitmap tovisit)
2595 {
2596   basic_block bb;
2597   basic_block last;
2598   unsigned i;
2599   edge e;
2600   basic_block nextbb;
2601   bitmap_iterator bi;
2602
2603   /* For each basic block we need to visit count number of his predecessors
2604      we need to visit first.  */
2605   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2606     {
2607       edge_iterator ei;
2608       int count = 0;
2609
2610       bb = BASIC_BLOCK (i);
2611
2612       FOR_EACH_EDGE (e, ei, bb->preds)
2613         {
2614           bool visit = bitmap_bit_p (tovisit, e->src->index);
2615
2616           if (visit && !(e->flags & EDGE_DFS_BACK))
2617             count++;
2618           else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2619             fprintf (dump_file,
2620                      "Irreducible region hit, ignoring edge to %i->%i\n",
2621                      e->src->index, bb->index);
2622         }
2623       BLOCK_INFO (bb)->npredecessors = count;
2624       /* When function never returns, we will never process exit block.  */
2625       if (!count && bb == EXIT_BLOCK_PTR)
2626         bb->count = bb->frequency = 0;
2627     }
2628
2629   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2630   last = head;
2631   for (bb = head; bb; bb = nextbb)
2632     {
2633       edge_iterator ei;
2634       sreal cyclic_probability, frequency;
2635
2636       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2637       memcpy (&frequency, &real_zero, sizeof (real_zero));
2638
2639       nextbb = BLOCK_INFO (bb)->next;
2640       BLOCK_INFO (bb)->next = NULL;
2641
2642       /* Compute frequency of basic block.  */
2643       if (bb != head)
2644         {
2645 #ifdef ENABLE_CHECKING
2646           FOR_EACH_EDGE (e, ei, bb->preds)
2647             gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2648                         || (e->flags & EDGE_DFS_BACK));
2649 #endif
2650
2651           FOR_EACH_EDGE (e, ei, bb->preds)
2652             if (EDGE_INFO (e)->back_edge)
2653               {
2654                 sreal_add (&cyclic_probability, &cyclic_probability,
2655                            &EDGE_INFO (e)->back_edge_prob);
2656               }
2657             else if (!(e->flags & EDGE_DFS_BACK))
2658               {
2659                 sreal tmp;
2660
2661                 /*  frequency += (e->probability
2662                                   * BLOCK_INFO (e->src)->frequency /
2663                                   REG_BR_PROB_BASE);  */
2664
2665                 sreal_init (&tmp, e->probability, 0);
2666                 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2667                 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2668                 sreal_add (&frequency, &frequency, &tmp);
2669               }
2670
2671           if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2672             {
2673               memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2674                       sizeof (frequency));
2675             }
2676           else
2677             {
2678               if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2679                 {
2680                   memcpy (&cyclic_probability, &real_almost_one,
2681                           sizeof (real_almost_one));
2682                 }
2683
2684               /* BLOCK_INFO (bb)->frequency = frequency
2685                                               / (1 - cyclic_probability) */
2686
2687               sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2688               sreal_div (&BLOCK_INFO (bb)->frequency,
2689                          &frequency, &cyclic_probability);
2690             }
2691         }
2692
2693       bitmap_clear_bit (tovisit, bb->index);
2694
2695       e = find_edge (bb, head);
2696       if (e)
2697         {
2698           sreal tmp;
2699
2700           /* EDGE_INFO (e)->back_edge_prob
2701              = ((e->probability * BLOCK_INFO (bb)->frequency)
2702              / REG_BR_PROB_BASE); */
2703
2704           sreal_init (&tmp, e->probability, 0);
2705           sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2706           sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2707                      &tmp, &real_inv_br_prob_base);
2708         }
2709
2710       /* Propagate to successor blocks.  */
2711       FOR_EACH_EDGE (e, ei, bb->succs)
2712         if (!(e->flags & EDGE_DFS_BACK)
2713             && BLOCK_INFO (e->dest)->npredecessors)
2714           {
2715             BLOCK_INFO (e->dest)->npredecessors--;
2716             if (!BLOCK_INFO (e->dest)->npredecessors)
2717               {
2718                 if (!nextbb)
2719                   nextbb = e->dest;
2720                 else
2721                   BLOCK_INFO (last)->next = e->dest;
2722
2723                 last = e->dest;
2724               }
2725           }
2726     }
2727 }
2728
2729 /* Estimate frequencies in loops at same nest level.  */
2730
2731 static void
2732 estimate_loops_at_level (struct loop *first_loop)
2733 {
2734   struct loop *loop;
2735
2736   for (loop = first_loop; loop; loop = loop->next)
2737     {
2738       edge e;
2739       basic_block *bbs;
2740       unsigned i;
2741       bitmap tovisit = BITMAP_ALLOC (NULL);
2742
2743       estimate_loops_at_level (loop->inner);
2744
2745       /* Find current loop back edge and mark it.  */
2746       e = loop_latch_edge (loop);
2747       EDGE_INFO (e)->back_edge = 1;
2748
2749       bbs = get_loop_body (loop);
2750       for (i = 0; i < loop->num_nodes; i++)
2751         bitmap_set_bit (tovisit, bbs[i]->index);
2752       free (bbs);
2753       propagate_freq (loop->header, tovisit);
2754       BITMAP_FREE (tovisit);
2755     }
2756 }
2757
2758 /* Propagates frequencies through structure of loops.  */
2759
2760 static void
2761 estimate_loops (void)
2762 {
2763   bitmap tovisit = BITMAP_ALLOC (NULL);
2764   basic_block bb;
2765
2766   /* Start by estimating the frequencies in the loops.  */
2767   if (number_of_loops () > 1)
2768     estimate_loops_at_level (current_loops->tree_root->inner);
2769
2770   /* Now propagate the frequencies through all the blocks.  */
2771   FOR_ALL_BB (bb)
2772     {
2773       bitmap_set_bit (tovisit, bb->index);
2774     }
2775   propagate_freq (ENTRY_BLOCK_PTR, tovisit);
2776   BITMAP_FREE (tovisit);
2777 }
2778
2779 /* Drop the profile for NODE to guessed, and update its frequency based on
2780    whether it is expected to be hot given the CALL_COUNT.  */
2781
2782 static void
2783 drop_profile (struct cgraph_node *node, gcov_type call_count)
2784 {
2785   struct function *fn = DECL_STRUCT_FUNCTION (node->symbol.decl);
2786   /* In the case where this was called by another function with a
2787      dropped profile, call_count will be 0. Since there are no
2788      non-zero call counts to this function, we don't know for sure
2789      whether it is hot, and therefore it will be marked normal below.  */
2790   bool hot = maybe_hot_count_p (NULL, call_count);
2791
2792   if (dump_file)
2793     fprintf (dump_file,
2794              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2795              cgraph_node_name (node), node->symbol.order,
2796              hot ? "Function is hot" : "Function is normal");
2797   /* We only expect to miss profiles for functions that are reached
2798      via non-zero call edges in cases where the function may have
2799      been linked from another module or library (COMDATs and extern
2800      templates). See the comments below for handle_missing_profiles.
2801      Also, only warn in cases where the missing counts exceed the
2802      number of training runs. In certain cases with an execv followed
2803      by a no-return call the profile for the no-return call is not
2804      dumped and there can be a mismatch.  */
2805   if (!DECL_COMDAT (node->symbol.decl) && !DECL_EXTERNAL (node->symbol.decl)
2806       && call_count > profile_info->runs)
2807     {
2808       if (flag_profile_correction)
2809         {
2810           if (dump_file)
2811             fprintf (dump_file,
2812                      "Missing counts for called function %s/%i\n",
2813                      cgraph_node_name (node), node->symbol.order);
2814         }
2815       else
2816         warning (0, "Missing counts for called function %s/%i",
2817                  cgraph_node_name (node), node->symbol.order);
2818     }
2819
2820   profile_status_for_function (fn)
2821       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2822   node->frequency
2823       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2824 }
2825
2826 /* In the case of COMDAT routines, multiple object files will contain the same
2827    function and the linker will select one for the binary. In that case
2828    all the other copies from the profile instrument binary will be missing
2829    profile counts. Look for cases where this happened, due to non-zero
2830    call counts going to 0-count functions, and drop the profile to guessed
2831    so that we can use the estimated probabilities and avoid optimizing only
2832    for size.
2833
2834    The other case where the profile may be missing is when the routine
2835    is not going to be emitted to the object file, e.g. for "extern template"
2836    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2837    all other cases of non-zero calls to 0-count functions.  */
2838
2839 void
2840 handle_missing_profiles (void)
2841 {
2842   struct cgraph_node *node;
2843   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2844   vec<struct cgraph_node *> worklist;
2845   worklist.create (64);
2846
2847   /* See if 0 count function has non-0 count callers.  In this case we
2848      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2849   FOR_EACH_DEFINED_FUNCTION (node)
2850     {
2851       struct cgraph_edge *e;
2852       gcov_type call_count = 0;
2853       struct function *fn = DECL_STRUCT_FUNCTION (node->symbol.decl);
2854
2855       if (node->count)
2856         continue;
2857       for (e = node->callers; e; e = e->next_caller)
2858         call_count += e->count;
2859       if (call_count
2860           && fn && fn->cfg
2861           && (call_count * unlikely_count_fraction >= profile_info->runs))
2862         {
2863           drop_profile (node, call_count);
2864           worklist.safe_push (node);
2865         }
2866     }
2867
2868   /* Propagate the profile dropping to other 0-count COMDATs that are
2869      potentially called by COMDATs we already dropped the profile on.  */
2870   while (worklist.length () > 0)
2871     {
2872       struct cgraph_edge *e;
2873
2874       node = worklist.pop ();
2875       for (e = node->callees; e; e = e->next_caller)
2876         {
2877           struct cgraph_node *callee = e->callee;
2878           struct function *fn = DECL_STRUCT_FUNCTION (callee->symbol.decl);
2879
2880           if (callee->count > 0)
2881             continue;
2882           if (DECL_COMDAT (callee->symbol.decl) && fn && fn->cfg
2883               && profile_status_for_function (fn) == PROFILE_READ)
2884             {
2885               drop_profile (node, 0);
2886               worklist.safe_push (callee);
2887             }
2888         }
2889     }
2890   worklist.release ();
2891 }
2892
2893 /* Convert counts measured by profile driven feedback to frequencies.
2894    Return nonzero iff there was any nonzero execution count.  */
2895
2896 int
2897 counts_to_freqs (void)
2898 {
2899   gcov_type count_max, true_count_max = 0;
2900   basic_block bb;
2901
2902   /* Don't overwrite the estimated frequencies when the profile for
2903      the function is missing.  We may drop this function PROFILE_GUESSED
2904      later in drop_profile ().  */
2905   if (!ENTRY_BLOCK_PTR->count)
2906     return 0;
2907
2908   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2909     true_count_max = MAX (bb->count, true_count_max);
2910
2911   count_max = MAX (true_count_max, 1);
2912   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2913     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2914
2915   return true_count_max;
2916 }
2917
2918 /* Return true if function is likely to be expensive, so there is no point to
2919    optimize performance of prologue, epilogue or do inlining at the expense
2920    of code size growth.  THRESHOLD is the limit of number of instructions
2921    function can execute at average to be still considered not expensive.  */
2922
2923 bool
2924 expensive_function_p (int threshold)
2925 {
2926   unsigned int sum = 0;
2927   basic_block bb;
2928   unsigned int limit;
2929
2930   /* We can not compute accurately for large thresholds due to scaled
2931      frequencies.  */
2932   gcc_assert (threshold <= BB_FREQ_MAX);
2933
2934   /* Frequencies are out of range.  This either means that function contains
2935      internal loop executing more than BB_FREQ_MAX times or profile feedback
2936      is available and function has not been executed at all.  */
2937   if (ENTRY_BLOCK_PTR->frequency == 0)
2938     return true;
2939
2940   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2941   limit = ENTRY_BLOCK_PTR->frequency * threshold;
2942   FOR_EACH_BB (bb)
2943     {
2944       rtx insn;
2945
2946       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2947            insn = NEXT_INSN (insn))
2948         if (active_insn_p (insn))
2949           {
2950             sum += bb->frequency;
2951             if (sum > limit)
2952               return true;
2953         }
2954     }
2955
2956   return false;
2957 }
2958
2959 /* Estimate and propagate basic block frequencies using the given branch
2960    probabilities.  If FORCE is true, the frequencies are used to estimate
2961    the counts even when there are already non-zero profile counts.  */
2962
2963 void
2964 estimate_bb_frequencies (bool force)
2965 {
2966   basic_block bb;
2967   sreal freq_max;
2968
2969   if (force || profile_status != PROFILE_READ || !counts_to_freqs ())
2970     {
2971       static int real_values_initialized = 0;
2972
2973       if (!real_values_initialized)
2974         {
2975           real_values_initialized = 1;
2976           sreal_init (&real_zero, 0, 0);
2977           sreal_init (&real_one, 1, 0);
2978           sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2979           sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2980           sreal_init (&real_one_half, 1, -1);
2981           sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2982           sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2983         }
2984
2985       mark_dfs_back_edges ();
2986
2987       single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2988
2989       /* Set up block info for each basic block.  */
2990       alloc_aux_for_blocks (sizeof (struct block_info_def));
2991       alloc_aux_for_edges (sizeof (struct edge_info_def));
2992       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2993         {
2994           edge e;
2995           edge_iterator ei;
2996
2997           FOR_EACH_EDGE (e, ei, bb->succs)
2998             {
2999               sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
3000               sreal_mul (&EDGE_INFO (e)->back_edge_prob,
3001                          &EDGE_INFO (e)->back_edge_prob,
3002                          &real_inv_br_prob_base);
3003             }
3004         }
3005
3006       /* First compute frequencies locally for each loop from innermost
3007          to outermost to examine frequencies for back edges.  */
3008       estimate_loops ();
3009
3010       memcpy (&freq_max, &real_zero, sizeof (real_zero));
3011       FOR_EACH_BB (bb)
3012         if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
3013           memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
3014
3015       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
3016       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3017         {
3018           sreal tmp;
3019
3020           sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
3021           sreal_add (&tmp, &tmp, &real_one_half);
3022           bb->frequency = sreal_to_int (&tmp);
3023         }
3024
3025       free_aux_for_blocks ();
3026       free_aux_for_edges ();
3027     }
3028   compute_function_frequency ();
3029 }
3030
3031 /* Decide whether function is hot, cold or unlikely executed.  */
3032 void
3033 compute_function_frequency (void)
3034 {
3035   basic_block bb;
3036   struct cgraph_node *node = cgraph_get_node (current_function_decl);
3037   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3038       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3039     node->only_called_at_startup = true;
3040   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3041     node->only_called_at_exit = true;
3042
3043   if (!profile_info || !flag_branch_probabilities
3044       || (flag_auto_profile && profile_status == PROFILE_GUESSED))
3045     {
3046       int flags = flags_from_decl_or_type (current_function_decl);
3047       if (profile_info && flag_auto_profile_accurate)
3048         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3049       else if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3050           != NULL)
3051         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3052       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3053                != NULL)
3054         node->frequency = NODE_FREQUENCY_HOT;
3055       else if (flags & ECF_NORETURN)
3056         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3057       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3058         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3059       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3060                || DECL_STATIC_DESTRUCTOR (current_function_decl))
3061         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3062       return;
3063     }
3064   node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3065   FOR_EACH_BB (bb)
3066     {
3067       if (maybe_hot_bb_p (cfun, bb))
3068         {
3069           node->frequency = NODE_FREQUENCY_HOT;
3070           return;
3071         }
3072       if (!probably_never_executed_bb_p (cfun, bb))
3073         node->frequency = NODE_FREQUENCY_NORMAL;
3074     }
3075 }
3076
3077 static bool
3078 gate_estimate_probability (void)
3079 {
3080   return flag_guess_branch_prob;
3081 }
3082
3083 /* Build PREDICT_EXPR.  */
3084 tree
3085 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3086 {
3087   tree t = build1 (PREDICT_EXPR, void_type_node,
3088                    build_int_cst (integer_type_node, predictor));
3089   SET_PREDICT_EXPR_OUTCOME (t, taken);
3090   return t;
3091 }
3092
3093 const char *
3094 predictor_name (enum br_predictor predictor)
3095 {
3096   return predictor_info[predictor].name;
3097 }
3098
3099 struct gimple_opt_pass pass_profile =
3100 {
3101  {
3102   GIMPLE_PASS,
3103   "profile_estimate",                   /* name */
3104   OPTGROUP_NONE,                        /* optinfo_flags */
3105   gate_estimate_probability,            /* gate */
3106   tree_estimate_probability_driver,     /* execute */
3107   NULL,                                 /* sub */
3108   NULL,                                 /* next */
3109   0,                                    /* static_pass_number */
3110   TV_BRANCH_PROB,                       /* tv_id */
3111   PROP_cfg,                             /* properties_required */
3112   0,                                    /* properties_provided */
3113   0,                                    /* properties_destroyed */
3114   0,                                    /* todo_flags_start */
3115   TODO_ggc_collect | TODO_verify_ssa                    /* todo_flags_finish */
3116  }
3117 };
3118
3119 struct gimple_opt_pass pass_strip_predict_hints =
3120 {
3121  {
3122   GIMPLE_PASS,
3123   "*strip_predict_hints",               /* name */
3124   OPTGROUP_NONE,                        /* optinfo_flags */
3125   NULL,                                 /* gate */
3126   strip_predict_hints,                  /* execute */
3127   NULL,                                 /* sub */
3128   NULL,                                 /* next */
3129   0,                                    /* static_pass_number */
3130   TV_BRANCH_PROB,                       /* tv_id */
3131   PROP_cfg,                             /* properties_required */
3132   0,                                    /* properties_provided */
3133   0,                                    /* properties_destroyed */
3134   0,                                    /* todo_flags_start */
3135   TODO_ggc_collect | TODO_verify_ssa                    /* todo_flags_finish */
3136  }
3137 };
3138
3139 /* Rebuild function frequencies.  Passes are in general expected to
3140    maintain profile by hand, however in some cases this is not possible:
3141    for example when inlining several functions with loops freuqencies might run
3142    out of scale and thus needs to be recomputed.  */
3143
3144 void
3145 rebuild_frequencies (void)
3146 {
3147   timevar_push (TV_REBUILD_FREQUENCIES);
3148
3149   /* When the max bb count in the function is small, there is a higher
3150      chance that there were truncation errors in the integer scaling
3151      of counts by inlining and other optimizations. This could lead
3152      to incorrect classification of code as being cold when it isn't.
3153      In that case, force the estimation of bb counts/frequencies from the
3154      branch probabilities, rather than computing frequencies from counts,
3155      which may also lead to frequencies incorrectly reduced to 0. There
3156      is less precision in the probabilities, so we only do this for small
3157      max counts.  */
3158   gcov_type count_max = 0;
3159   basic_block bb;
3160   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
3161     count_max = MAX (bb->count, count_max);
3162
3163   if (profile_status == PROFILE_GUESSED
3164       || (profile_status == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3165     {
3166       loop_optimizer_init (0);
3167       add_noreturn_fake_exit_edges ();
3168       mark_irreducible_loops ();
3169       connect_infinite_loops_to_exit ();
3170       estimate_bb_frequencies (true);
3171       remove_fake_exit_edges ();
3172       loop_optimizer_finalize ();
3173     }
3174   else if (profile_status == PROFILE_READ)
3175     counts_to_freqs ();
3176   else
3177     gcc_unreachable ();
3178   timevar_pop (TV_REBUILD_FREQUENCIES);
3179 }