gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2017 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This file contains the "reorder blocks" pass, which changes the control
  21    flow of a function to encounter fewer branches; the "partition blocks"
  22    pass, which divides the basic blocks into "hot" and "cold" partitions,
  23    which are kept separate; and the "duplicate computed gotos" pass, which
  24    duplicates blocks ending in an indirect jump.
  25
  26    There are two algorithms for "reorder blocks": the "simple" algorithm,
  27    which just rearranges blocks, trying to minimize the number of executed
  28    unconditional branches; and the "software trace cache" algorithm, which
  29    also copies code, and in general tries a lot harder to have long linear
  30    pieces of machine code executed.  This algorithm is described next.  */
  31
  32 /* This (greedy) algorithm constructs traces in several rounds.
  33    The construction starts from "seeds".  The seed for the first round
  34    is the entry point of the function.  When there are more than one seed,
  35    the one with the lowest key in the heap is selected first (see bb_to_key).
  36    Then the algorithm repeatedly adds the most probable successor to the end
  37    of a trace.  Finally it connects the traces.
  38
  39    There are two parameters: Branch Threshold and Exec Threshold.
  40    If the probability of an edge to a successor of the current basic block is
  41    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  42    then the successor will be the seed in one of the next rounds.
  43    Each round has these parameters lower than the previous one.
  44    The last round has to have these parameters set to zero so that the
  45    remaining blocks are picked up.
  46
  47    The algorithm selects the most probable successor from all unvisited
  48    successors and successors that have been added to this trace.
  49    The other successors (that has not been "sent" to the next round) will be
  50    other seeds for this round and the secondary traces will start from them.
  51    If the successor has not been visited in this trace, it is added to the
  52    trace (however, there is some heuristic for simple branches).
  53    If the successor has been visited in this trace, a loop has been found.
  54    If the loop has many iterations, the loop is rotated so that the source
  55    block of the most probable edge going out of the loop is the last block
  56    of the trace.
  57    If the loop has few iterations and there is no edge from the last block of
  58    the loop going out of the loop, the loop header is duplicated.
  59
  60    When connecting traces, the algorithm first checks whether there is an edge
  61    from the last block of a trace to the first block of another trace.
  62    When there are still some unconnected traces it checks whether there exists
  63    a basic block BB such that BB is a successor of the last block of a trace
  64    and BB is a predecessor of the first block of another trace.  In this case,
  65    BB is duplicated, added at the end of the first trace and the traces are
  66    connected through it.
  67    The rest of traces are simply connected so there will be a jump to the
  68    beginning of the rest of traces.
  69
  70    The above description is for the full algorithm, which is used when the
  71    function is optimized for speed.  When the function is optimized for size,
  72    in order to reduce long jumps and connect more fallthru edges, the
  73    algorithm is modified as follows:
  74    (1) Break long traces to short ones.  A trace is broken at a block that has
  75    multiple predecessors/ successors during trace discovery.  When connecting
  76    traces, only connect Trace n with Trace n + 1.  This change reduces most
  77    long jumps compared with the above algorithm.
  78    (2) Ignore the edge probability and frequency for fallthru edges.
  79    (3) Keep the original order of blocks when there is no chance to fall
  80    through.  We rely on the results of cfg_cleanup.
  81
  82    To implement the change for code size optimization, block's index is
  83    selected as the key and all traces are found in one round.
  84
  85    References:
  86
  87    "Software Trace Cache"
  88    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  89    http://citeseer.nj.nec.com/15361.html
  90
  91 */
  92
  93 #include "config.h"
  94 #define INCLUDE_ALGORITHM /* stable_sort */
  95 #include "system.h"
  96 #include "coretypes.h"
  97 #include "backend.h"
  98 #include "target.h"
  99 #include "rtl.h"
 100 #include "tree.h"
 101 #include "cfghooks.h"
 102 #include "df.h"
 103 #include "memmodel.h"
 104 #include "optabs.h"
 105 #include "regs.h"
 106 #include "emit-rtl.h"
 107 #include "output.h"
 108 #include "expr.h"
 109 #include "params.h"
 110 #include "tree-pass.h"
 111 #include "cfgrtl.h"
 112 #include "cfganal.h"
 113 #include "cfgbuild.h"
 114 #include "cfgcleanup.h"
 115 #include "bb-reorder.h"
 116 #include "except.h"
 117 #include "fibonacci_heap.h"
 118 #include "stringpool.h"
 119 #include "attribs.h"
 120
 121 /* The number of rounds.  In most cases there will only be 4 rounds, but
 122    when partitioning hot and cold basic blocks into separate sections of
 123    the object file there will be an extra round.  */
 124 #define N_ROUNDS 5
 125
 126 struct target_bb_reorder default_target_bb_reorder;
 127 #if SWITCHABLE_TARGET
 128 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 129 #endif
 130
 131 #define uncond_jump_length \
 132   (this_target_bb_reorder->x_uncond_jump_length)
 133
 134 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 135 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 136
 137 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 138 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 139
 140 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 141    block the edge destination is not duplicated while connecting traces.  */
 142 #define DUPLICATION_THRESHOLD 100
 143
 144 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
 145 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
 146
 147 /* Structure to hold needed information for each basic block.  */
 148 struct bbro_basic_block_data
 149 {
 150   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 151   int start_of_trace;
 152
 153   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 154   int end_of_trace;
 155
 156   /* Which trace is the bb in?  */
 157   int in_trace;
 158
 159   /* Which trace was this bb visited in?  */
 160   int visited;
 161
 162   /* Cached maximum frequency of interesting incoming edges.
 163      Minus one means not yet computed.  */
 164   int priority;
 165
 166   /* Which heap is BB in (if any)?  */
 167   bb_heap_t *heap;
 168
 169   /* Which heap node is BB in (if any)?  */
 170   bb_heap_node_t *node;
 171 };
 172
 173 /* The current size of the following dynamic array.  */
 174 static int array_size;
 175
 176 /* The array which holds needed information for basic blocks.  */
 177 static bbro_basic_block_data *bbd;
 178
 179 /* To avoid frequent reallocation the size of arrays is greater than needed,
 180    the number of elements is (not less than) 1.25 * size_wanted.  */
 181 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 182
 183 /* Free the memory and set the pointer to NULL.  */
 184 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 185
 186 /* Structure for holding information about a trace.  */
 187 struct trace
 188 {
 189   /* First and last basic block of the trace.  */
 190   basic_block first, last;
 191
 192   /* The round of the STC creation which this trace was found in.  */
 193   int round;
 194
 195   /* The length (i.e. the number of basic blocks) of the trace.  */
 196   int length;
 197 };
 198
 199 /* Maximum frequency and count of one of the entry blocks.  */
 200 static int max_entry_frequency;
 201 static profile_count max_entry_count;
 202
 203 /* Local function prototypes.  */
 204 static void find_traces (int *, struct trace *);
 205 static basic_block rotate_loop (edge, struct trace *, int);
 206 static void mark_bb_visited (basic_block, int);
 207 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 208                                  int, bb_heap_t **, int);
 209 static basic_block copy_bb (basic_block, edge, basic_block, int);
 210 static long bb_to_key (basic_block);
 211 static bool better_edge_p (const_basic_block, const_edge, profile_probability,
 212                            int, profile_probability, int, const_edge);
 213 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 214                                    struct trace *);
 215 static void connect_traces (int, struct trace *);
 216 static bool copy_bb_p (const_basic_block, int);
 217 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 218 \f
 219 /* Return the trace number in which BB was visited.  */
 220
 221 static int
 222 bb_visited_trace (const_basic_block bb)
 223 {
 224   gcc_assert (bb->index < array_size);
 225   return bbd[bb->index].visited;
 226 }
 227
 228 /* This function marks BB that it was visited in trace number TRACE.  */
 229
 230 static void
 231 mark_bb_visited (basic_block bb, int trace)
 232 {
 233   bbd[bb->index].visited = trace;
 234   if (bbd[bb->index].heap)
 235     {
 236       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
 237       bbd[bb->index].heap = NULL;
 238       bbd[bb->index].node = NULL;
 239     }
 240 }
 241
 242 /* Check to see if bb should be pushed into the next round of trace
 243    collections or not.  Reasons for pushing the block forward are 1).
 244    If the block is cold, we are doing partitioning, and there will be
 245    another round (cold partition blocks are not supposed to be
 246    collected into traces until the very last round); or 2). There will
 247    be another round, and the basic block is not "hot enough" for the
 248    current round of trace collection.  */
 249
 250 static bool
 251 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 252                       int exec_th, gcov_type count_th)
 253 {
 254   bool there_exists_another_round;
 255   bool block_not_hot_enough;
 256
 257   there_exists_another_round = round < number_of_rounds - 1;
 258
 259   block_not_hot_enough = (bb->count.to_frequency (cfun) < exec_th
 260                           || bb->count.ipa () < count_th
 261                           || probably_never_executed_bb_p (cfun, bb));
 262
 263   if (there_exists_another_round
 264       && block_not_hot_enough)
 265     return true;
 266   else
 267     return false;
 268 }
 269
 270 /* Find the traces for Software Trace Cache.  Chain each trace through
 271    RBI()->next.  Store the number of traces to N_TRACES and description of
 272    traces to TRACES.  */
 273
 274 static void
 275 find_traces (int *n_traces, struct trace *traces)
 276 {
 277   int i;
 278   int number_of_rounds;
 279   edge e;
 280   edge_iterator ei;
 281   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
 282
 283   /* Add one extra round of trace collection when partitioning hot/cold
 284      basic blocks into separate sections.  The last round is for all the
 285      cold blocks (and ONLY the cold blocks).  */
 286
 287   number_of_rounds = N_ROUNDS - 1;
 288
 289   /* Insert entry points of function into heap.  */
 290   max_entry_frequency = 0;
 291   max_entry_count = profile_count::zero ();
 292   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 293     {
 294       bbd[e->dest->index].heap = heap;
 295       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
 296       if (e->dest->count.to_frequency (cfun) > max_entry_frequency)
 297         max_entry_frequency = e->dest->count.to_frequency (cfun);
 298       if (e->dest->count.ipa_p () && e->dest->count > max_entry_count)
 299         max_entry_count = e->dest->count;
 300     }
 301
 302   /* Find the traces.  */
 303   for (i = 0; i < number_of_rounds; i++)
 304     {
 305       gcov_type count_threshold;
 306
 307       if (dump_file)
 308         fprintf (dump_file, "STC - round %d\n", i + 1);
 309
 310       if (max_entry_count < INT_MAX / 1000)
 311         count_threshold = max_entry_count.to_gcov_type () * exec_threshold[i] / 1000;
 312       else
 313         count_threshold = max_entry_count.to_gcov_type () / 1000 * exec_threshold[i];
 314
 315       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 316                            max_entry_frequency * exec_threshold[i] / 1000,
 317                            count_threshold, traces, n_traces, i, &heap,
 318                            number_of_rounds);
 319     }
 320   delete heap;
 321
 322   if (dump_file)
 323     {
 324       for (i = 0; i < *n_traces; i++)
 325         {
 326           basic_block bb;
 327           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 328                    traces[i].round + 1);
 329           for (bb = traces[i].first;
 330                bb != traces[i].last;
 331                bb = (basic_block) bb->aux)
 332             fprintf (dump_file, "%d [%d] ", bb->index,
 333                      bb->count.to_frequency (cfun));
 334           fprintf (dump_file, "%d [%d]\n", bb->index,
 335                    bb->count.to_frequency (cfun));
 336         }
 337       fflush (dump_file);
 338     }
 339 }
 340
 341 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 342    (with sequential number TRACE_N).  */
 343
 344 static basic_block
 345 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 346 {
 347   basic_block bb;
 348
 349   /* Information about the best end (end after rotation) of the loop.  */
 350   basic_block best_bb = NULL;
 351   edge best_edge = NULL;
 352   int best_freq = -1;
 353   profile_count best_count = profile_count::uninitialized ();
 354   /* The best edge is preferred when its destination is not visited yet
 355      or is a start block of some trace.  */
 356   bool is_preferred = false;
 357
 358   /* Find the most frequent edge that goes out from current trace.  */
 359   bb = back_edge->dest;
 360   do
 361     {
 362       edge e;
 363       edge_iterator ei;
 364
 365       FOR_EACH_EDGE (e, ei, bb->succs)
 366         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 367             && bb_visited_trace (e->dest) != trace_n
 368             && (e->flags & EDGE_CAN_FALLTHRU)
 369             && !(e->flags & EDGE_COMPLEX))
 370         {
 371           if (is_preferred)
 372             {
 373               /* The best edge is preferred.  */
 374               if (!bb_visited_trace (e->dest)
 375                   || bbd[e->dest->index].start_of_trace >= 0)
 376                 {
 377                   /* The current edge E is also preferred.  */
 378                   int freq = EDGE_FREQUENCY (e);
 379                   if (freq > best_freq || e->count () > best_count)
 380                     {
 381                       best_freq = freq;
 382                       if (e->count ().initialized_p ())
 383                         best_count = e->count ();
 384                       best_edge = e;
 385                       best_bb = bb;
 386                     }
 387                 }
 388             }
 389           else
 390             {
 391               if (!bb_visited_trace (e->dest)
 392                   || bbd[e->dest->index].start_of_trace >= 0)
 393                 {
 394                   /* The current edge E is preferred.  */
 395                   is_preferred = true;
 396                   best_freq = EDGE_FREQUENCY (e);
 397                   best_count = e->count ();
 398                   best_edge = e;
 399                   best_bb = bb;
 400                 }
 401               else
 402                 {
 403                   int freq = EDGE_FREQUENCY (e);
 404                   if (!best_edge || freq > best_freq || e->count () > best_count)
 405                     {
 406                       best_freq = freq;
 407                       best_count = e->count ();
 408                       best_edge = e;
 409                       best_bb = bb;
 410                     }
 411                 }
 412             }
 413         }
 414       bb = (basic_block) bb->aux;
 415     }
 416   while (bb != back_edge->dest);
 417
 418   if (best_bb)
 419     {
 420       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 421          the trace.  */
 422       if (back_edge->dest == trace->first)
 423         {
 424           trace->first = (basic_block) best_bb->aux;
 425         }
 426       else
 427         {
 428           basic_block prev_bb;
 429
 430           for (prev_bb = trace->first;
 431                prev_bb->aux != back_edge->dest;
 432                prev_bb = (basic_block) prev_bb->aux)
 433             ;
 434           prev_bb->aux = best_bb->aux;
 435
 436           /* Try to get rid of uncond jump to cond jump.  */
 437           if (single_succ_p (prev_bb))
 438             {
 439               basic_block header = single_succ (prev_bb);
 440
 441               /* Duplicate HEADER if it is a small block containing cond jump
 442                  in the end.  */
 443               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 444                   && !CROSSING_JUMP_P (BB_END (header)))
 445                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 446             }
 447         }
 448     }
 449   else
 450     {
 451       /* We have not found suitable loop tail so do no rotation.  */
 452       best_bb = back_edge->src;
 453     }
 454   best_bb->aux = NULL;
 455   return best_bb;
 456 }
 457
 458 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 459    not include basic blocks whose probability is lower than BRANCH_TH or whose
 460    frequency is lower than EXEC_TH into traces (or whose count is lower than
 461    COUNT_TH).  Store the new traces into TRACES and modify the number of
 462    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 463    The function expects starting basic blocks to be in *HEAP and will delete
 464    *HEAP and store starting points for the next round into new *HEAP.  */
 465
 466 static void
 467 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 468                      struct trace *traces, int *n_traces, int round,
 469                      bb_heap_t **heap, int number_of_rounds)
 470 {
 471   /* Heap for discarded basic blocks which are possible starting points for
 472      the next round.  */
 473   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
 474   bool for_size = optimize_function_for_size_p (cfun);
 475
 476   while (!(*heap)->empty ())
 477     {
 478       basic_block bb;
 479       struct trace *trace;
 480       edge best_edge, e;
 481       long key;
 482       edge_iterator ei;
 483
 484       bb = (*heap)->extract_min ();
 485       bbd[bb->index].heap = NULL;
 486       bbd[bb->index].node = NULL;
 487
 488       if (dump_file)
 489         fprintf (dump_file, "Getting bb %d\n", bb->index);
 490
 491       /* If the BB's frequency is too low, send BB to the next round.  When
 492          partitioning hot/cold blocks into separate sections, make sure all
 493          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 494          round.  When optimizing for size, do not push to next round.  */
 495
 496       if (!for_size
 497           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 498                                    count_th))
 499         {
 500           int key = bb_to_key (bb);
 501           bbd[bb->index].heap = new_heap;
 502           bbd[bb->index].node = new_heap->insert (key, bb);
 503
 504           if (dump_file)
 505             fprintf (dump_file,
 506                      "  Possible start point of next round: %d (key: %d)\n",
 507                      bb->index, key);
 508           continue;
 509         }
 510
 511       trace = traces + *n_traces;
 512       trace->first = bb;
 513       trace->round = round;
 514       trace->length = 0;
 515       bbd[bb->index].in_trace = *n_traces;
 516       (*n_traces)++;
 517
 518       do
 519         {
 520           profile_probability prob;
 521           int freq;
 522           bool ends_in_call;
 523
 524           /* The probability and frequency of the best edge.  */
 525           profile_probability best_prob = profile_probability::uninitialized ();
 526           int best_freq = INT_MIN / 2;
 527
 528           best_edge = NULL;
 529           mark_bb_visited (bb, *n_traces);
 530           trace->length++;
 531
 532           if (dump_file)
 533             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 534                      bb->index, *n_traces);
 535
 536           ends_in_call = block_ends_with_call_p (bb);
 537
 538           /* Select the successor that will be placed after BB.  */
 539           FOR_EACH_EDGE (e, ei, bb->succs)
 540             {
 541               gcc_assert (!(e->flags & EDGE_FAKE));
 542
 543               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 544                 continue;
 545
 546               if (bb_visited_trace (e->dest)
 547                   && bb_visited_trace (e->dest) != *n_traces)
 548                 continue;
 549
 550               /* If partitioning hot/cold basic blocks, don't consider edges
 551                  that cross section boundaries.  */
 552               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 553                 continue;
 554
 555               prob = e->probability;
 556               freq = e->dest->count.to_frequency (cfun);
 557
 558               /* The only sensible preference for a call instruction is the
 559                  fallthru edge.  Don't bother selecting anything else.  */
 560               if (ends_in_call)
 561                 {
 562                   if (e->flags & EDGE_CAN_FALLTHRU)
 563                     {
 564                       best_edge = e;
 565                       best_prob = prob;
 566                       best_freq = freq;
 567                     }
 568                   continue;
 569                 }
 570
 571               /* Edge that cannot be fallthru or improbable or infrequent
 572                  successor (i.e. it is unsuitable successor).  When optimizing
 573                  for size, ignore the probability and frequency.  */
 574               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 575                   || !prob.initialized_p ()
 576                   || ((prob.to_reg_br_prob_base () < branch_th
 577                        || EDGE_FREQUENCY (e) < exec_th
 578                       || e->count ().ipa () < count_th) && (!for_size)))
 579                 continue;
 580
 581               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 582                                  best_edge))
 583                 {
 584                   best_edge = e;
 585                   best_prob = prob;
 586                   best_freq = freq;
 587                 }
 588             }
 589
 590           /* If the best destination has multiple predecessors and can be
 591              duplicated cheaper than a jump, don't allow it to be added to
 592              a trace; we'll duplicate it when connecting the traces later.
 593              However, we need to check that this duplication wouldn't leave
 594              the best destination with only crossing predecessors, because
 595              this would change its effective partition from hot to cold.  */
 596           if (best_edge
 597               && EDGE_COUNT (best_edge->dest->preds) >= 2
 598               && copy_bb_p (best_edge->dest, 0))
 599             {
 600               bool only_crossing_preds = true;
 601               edge e;
 602               edge_iterator ei;
 603               FOR_EACH_EDGE (e, ei, best_edge->dest->preds)
 604                 if (e != best_edge && !(e->flags & EDGE_CROSSING))
 605                   {
 606                     only_crossing_preds = false;
 607                     break;
 608                   }
 609               if (!only_crossing_preds)
 610                 best_edge = NULL;
 611             }
 612
 613           /* If the best destination has multiple successors or predecessors,
 614              don't allow it to be added when optimizing for size.  This makes
 615              sure predecessors with smaller index are handled before the best
 616              destinarion.  It breaks long trace and reduces long jumps.
 617
 618              Take if-then-else as an example.
 619                 A
 620                / \
 621               B   C
 622                \ /
 623                 D
 624              If we do not remove the best edge B->D/C->D, the final order might
 625              be A B D ... C.  C is at the end of the program.  If D's successors
 626              and D are complicated, might need long jumps for A->C and C->D.
 627              Similar issue for order: A C D ... B.
 628
 629              After removing the best edge, the final result will be ABCD/ ACBD.
 630              It does not add jump compared with the previous order.  But it
 631              reduces the possibility of long jumps.  */
 632           if (best_edge && for_size
 633               && (EDGE_COUNT (best_edge->dest->succs) > 1
 634                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 635             best_edge = NULL;
 636
 637           /* Add all non-selected successors to the heaps.  */
 638           FOR_EACH_EDGE (e, ei, bb->succs)
 639             {
 640               if (e == best_edge
 641                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 642                   || bb_visited_trace (e->dest))
 643                 continue;
 644
 645               key = bb_to_key (e->dest);
 646
 647               if (bbd[e->dest->index].heap)
 648                 {
 649                   /* E->DEST is already in some heap.  */
 650                   if (key != bbd[e->dest->index].node->get_key ())
 651                     {
 652                       if (dump_file)
 653                         {
 654                           fprintf (dump_file,
 655                                    "Changing key for bb %d from %ld to %ld.\n",
 656                                    e->dest->index,
 657                                    (long) bbd[e->dest->index].node->get_key (),
 658                                    key);
 659                         }
 660                       bbd[e->dest->index].heap->replace_key
 661                         (bbd[e->dest->index].node, key);
 662                     }
 663                 }
 664               else
 665                 {
 666                   bb_heap_t *which_heap = *heap;
 667
 668                   prob = e->probability;
 669                   freq = EDGE_FREQUENCY (e);
 670
 671                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 672                       || (e->flags & EDGE_COMPLEX)
 673                       || !prob.initialized_p ()
 674                       || prob.to_reg_br_prob_base () < branch_th
 675                       || freq < exec_th
 676                       || e->count ().ipa () < count_th)
 677                     {
 678                       /* When partitioning hot/cold basic blocks, make sure
 679                          the cold blocks (and only the cold blocks) all get
 680                          pushed to the last round of trace collection.  When
 681                          optimizing for size, do not push to next round.  */
 682
 683                       if (!for_size && push_to_next_round_p (e->dest, round,
 684                                                              number_of_rounds,
 685                                                              exec_th, count_th))
 686                         which_heap = new_heap;
 687                     }
 688
 689                   bbd[e->dest->index].heap = which_heap;
 690                   bbd[e->dest->index].node = which_heap->insert (key, e->dest);
 691
 692                   if (dump_file)
 693                     {
 694                       fprintf (dump_file,
 695                                "  Possible start of %s round: %d (key: %ld)\n",
 696                                (which_heap == new_heap) ? "next" : "this",
 697                                e->dest->index, (long) key);
 698                     }
 699
 700                 }
 701             }
 702
 703           if (best_edge) /* Suitable successor was found.  */
 704             {
 705               if (bb_visited_trace (best_edge->dest) == *n_traces)
 706                 {
 707                   /* We do nothing with one basic block loops.  */
 708                   if (best_edge->dest != bb)
 709                     {
 710                       if (EDGE_FREQUENCY (best_edge)
 711                           > 4 * best_edge->dest->count.to_frequency (cfun) / 5)
 712                         {
 713                           /* The loop has at least 4 iterations.  If the loop
 714                              header is not the first block of the function
 715                              we can rotate the loop.  */
 716
 717                           if (best_edge->dest
 718                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 719                             {
 720                               if (dump_file)
 721                                 {
 722                                   fprintf (dump_file,
 723                                            "Rotating loop %d - %d\n",
 724                                            best_edge->dest->index, bb->index);
 725                                 }
 726                               bb->aux = best_edge->dest;
 727                               bbd[best_edge->dest->index].in_trace =
 728                                                              (*n_traces) - 1;
 729                               bb = rotate_loop (best_edge, trace, *n_traces);
 730                             }
 731                         }
 732                       else
 733                         {
 734                           /* The loop has less than 4 iterations.  */
 735
 736                           if (single_succ_p (bb)
 737                               && copy_bb_p (best_edge->dest,
 738                                             optimize_edge_for_speed_p
 739                                             (best_edge)))
 740                             {
 741                               bb = copy_bb (best_edge->dest, best_edge, bb,
 742                                             *n_traces);
 743                               trace->length++;
 744                             }
 745                         }
 746                     }
 747
 748                   /* Terminate the trace.  */
 749                   break;
 750                 }
 751               else
 752                 {
 753                   /* Check for a situation
 754
 755                     A
 756                    /|
 757                   B |
 758                    \|
 759                     C
 760
 761                   where
 762                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 763                     >= EDGE_FREQUENCY (AC).
 764                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 765                   Best ordering is then A B C.
 766
 767                   When optimizing for size, A B C is always the best order.
 768
 769                   This situation is created for example by:
 770
 771                   if (A) B;
 772                   C;
 773
 774                   */
 775
 776                   FOR_EACH_EDGE (e, ei, bb->succs)
 777                     if (e != best_edge
 778                         && (e->flags & EDGE_CAN_FALLTHRU)
 779                         && !(e->flags & EDGE_COMPLEX)
 780                         && !bb_visited_trace (e->dest)
 781                         && single_pred_p (e->dest)
 782                         && !(e->flags & EDGE_CROSSING)
 783                         && single_succ_p (e->dest)
 784                         && (single_succ_edge (e->dest)->flags
 785                             & EDGE_CAN_FALLTHRU)
 786                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 787                         && single_succ (e->dest) == best_edge->dest
 788                         && (2 * e->dest->count.to_frequency (cfun)
 789                             >= EDGE_FREQUENCY (best_edge) || for_size))
 790                       {
 791                         best_edge = e;
 792                         if (dump_file)
 793                           fprintf (dump_file, "Selecting BB %d\n",
 794                                    best_edge->dest->index);
 795                         break;
 796                       }
 797
 798                   bb->aux = best_edge->dest;
 799                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 800                   bb = best_edge->dest;
 801                 }
 802             }
 803         }
 804       while (best_edge);
 805       trace->last = bb;
 806       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 807       if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
 808         {
 809           bbd[trace->last->index].end_of_trace = *n_traces - 1;
 810           /* Update the cached maximum frequency for interesting predecessor
 811              edges for successors of the new trace end.  */
 812           FOR_EACH_EDGE (e, ei, trace->last->succs)
 813             if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
 814               bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
 815         }
 816
 817       /* The trace is terminated so we have to recount the keys in heap
 818          (some block can have a lower key because now one of its predecessors
 819          is an end of the trace).  */
 820       FOR_EACH_EDGE (e, ei, bb->succs)
 821         {
 822           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 823               || bb_visited_trace (e->dest))
 824             continue;
 825
 826           if (bbd[e->dest->index].heap)
 827             {
 828               key = bb_to_key (e->dest);
 829               if (key != bbd[e->dest->index].node->get_key ())
 830                 {
 831                   if (dump_file)
 832                     {
 833                       fprintf (dump_file,
 834                                "Changing key for bb %d from %ld to %ld.\n",
 835                                e->dest->index,
 836                                (long) bbd[e->dest->index].node->get_key (), key);
 837                     }
 838                   bbd[e->dest->index].heap->replace_key
 839                     (bbd[e->dest->index].node, key);
 840                 }
 841             }
 842         }
 843     }
 844
 845   delete (*heap);
 846
 847   /* "Return" the new heap.  */
 848   *heap = new_heap;
 849 }
 850
 851 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 852    it to trace after BB, mark OLD_BB visited and update pass' data structures
 853    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 854
 855 static basic_block
 856 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 857 {
 858   basic_block new_bb;
 859
 860   new_bb = duplicate_block (old_bb, e, bb);
 861   BB_COPY_PARTITION (new_bb, old_bb);
 862
 863   gcc_assert (e->dest == new_bb);
 864
 865   if (dump_file)
 866     fprintf (dump_file,
 867              "Duplicated bb %d (created bb %d)\n",
 868              old_bb->index, new_bb->index);
 869
 870   if (new_bb->index >= array_size
 871       || last_basic_block_for_fn (cfun) > array_size)
 872     {
 873       int i;
 874       int new_size;
 875
 876       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 877       new_size = GET_ARRAY_SIZE (new_size);
 878       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 879       for (i = array_size; i < new_size; i++)
 880         {
 881           bbd[i].start_of_trace = -1;
 882           bbd[i].end_of_trace = -1;
 883           bbd[i].in_trace = -1;
 884           bbd[i].visited = 0;
 885           bbd[i].priority = -1;
 886           bbd[i].heap = NULL;
 887           bbd[i].node = NULL;
 888         }
 889       array_size = new_size;
 890
 891       if (dump_file)
 892         {
 893           fprintf (dump_file,
 894                    "Growing the dynamic array to %d elements.\n",
 895                    array_size);
 896         }
 897     }
 898
 899   gcc_assert (!bb_visited_trace (e->dest));
 900   mark_bb_visited (new_bb, trace);
 901   new_bb->aux = bb->aux;
 902   bb->aux = new_bb;
 903
 904   bbd[new_bb->index].in_trace = trace;
 905
 906   return new_bb;
 907 }
 908
 909 /* Compute and return the key (for the heap) of the basic block BB.  */
 910
 911 static long
 912 bb_to_key (basic_block bb)
 913 {
 914   edge e;
 915   edge_iterator ei;
 916
 917   /* Use index as key to align with its original order.  */
 918   if (optimize_function_for_size_p (cfun))
 919     return bb->index;
 920
 921   /* Do not start in probably never executed blocks.  */
 922
 923   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 924       || probably_never_executed_bb_p (cfun, bb))
 925     return BB_FREQ_MAX;
 926
 927   /* Prefer blocks whose predecessor is an end of some trace
 928      or whose predecessor edge is EDGE_DFS_BACK.  */
 929   int priority = bbd[bb->index].priority;
 930   if (priority == -1)
 931     {
 932       priority = 0;
 933       FOR_EACH_EDGE (e, ei, bb->preds)
 934         {
 935           if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 936                && bbd[e->src->index].end_of_trace >= 0)
 937               || (e->flags & EDGE_DFS_BACK))
 938             {
 939               int edge_freq = EDGE_FREQUENCY (e);
 940
 941               if (edge_freq > priority)
 942                 priority = edge_freq;
 943             }
 944         }
 945       bbd[bb->index].priority = priority;
 946     }
 947
 948   if (priority)
 949     /* The block with priority should have significantly lower key.  */
 950     return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun));
 951
 952   return -bb->count.to_frequency (cfun);
 953 }
 954
 955 /* Return true when the edge E from basic block BB is better than the temporary
 956    best edge (details are in function).  The probability of edge E is PROB. The
 957    frequency of the successor is FREQ.  The current best probability is
 958    BEST_PROB, the best frequency is BEST_FREQ.
 959    The edge is considered to be equivalent when PROB does not differ much from
 960    BEST_PROB; similarly for frequency.  */
 961
 962 static bool
 963 better_edge_p (const_basic_block bb, const_edge e, profile_probability prob,
 964                int freq, profile_probability best_prob, int best_freq,
 965                const_edge cur_best_edge)
 966 {
 967   bool is_better_edge;
 968
 969   /* The BEST_* values do not have to be best, but can be a bit smaller than
 970      maximum values.  */
 971   profile_probability diff_prob = best_prob.apply_scale (1, 10);
 972   int diff_freq = best_freq / 10;
 973
 974   /* The smaller one is better to keep the original order.  */
 975   if (optimize_function_for_size_p (cfun))
 976     return !cur_best_edge
 977            || cur_best_edge->dest->index > e->dest->index;
 978
 979   /* Those edges are so expensive that continuing a trace is not useful
 980      performance wise.  */
 981   if (e->flags & (EDGE_ABNORMAL | EDGE_EH))
 982     return false;
 983
 984   if (prob > best_prob + diff_prob
 985       || (!best_prob.initialized_p ()
 986           && prob > profile_probability::guessed_never ()))
 987     /* The edge has higher probability than the temporary best edge.  */
 988     is_better_edge = true;
 989   else if (prob < best_prob - diff_prob)
 990     /* The edge has lower probability than the temporary best edge.  */
 991     is_better_edge = false;
 992   else if (freq < best_freq - diff_freq)
 993     /* The edge and the temporary best edge  have almost equivalent
 994        probabilities.  The higher frequency of a successor now means
 995        that there is another edge going into that successor.
 996        This successor has lower frequency so it is better.  */
 997     is_better_edge = true;
 998   else if (freq > best_freq + diff_freq)
 999     /* This successor has higher frequency so it is worse.  */
1000     is_better_edge = false;
1001   else if (e->dest->prev_bb == bb)
1002     /* The edges have equivalent probabilities and the successors
1003        have equivalent frequencies.  Select the previous successor.  */
1004     is_better_edge = true;
1005   else
1006     is_better_edge = false;
1007
1008   return is_better_edge;
1009 }
1010
1011 /* Return true when the edge E is better than the temporary best edge
1012    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
1013    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1014    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1015    TRACES record the information about traces.
1016    When optimizing for size, the edge with smaller index is better.
1017    When optimizing for speed, the edge with bigger probability or longer trace
1018    is better.  */
1019
1020 static bool
1021 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1022                        const_edge cur_best_edge, struct trace *traces)
1023 {
1024   int e_index;
1025   int b_index;
1026   bool is_better_edge;
1027
1028   if (!cur_best_edge)
1029     return true;
1030
1031   if (optimize_function_for_size_p (cfun))
1032     {
1033       e_index = src_index_p ? e->src->index : e->dest->index;
1034       b_index = src_index_p ? cur_best_edge->src->index
1035                               : cur_best_edge->dest->index;
1036       /* The smaller one is better to keep the original order.  */
1037       return b_index > e_index;
1038     }
1039
1040   if (src_index_p)
1041     {
1042       e_index = e->src->index;
1043
1044       if (e->probability > cur_best_edge->probability)
1045         /* The edge has higher probability than the temporary best edge.  */
1046         is_better_edge = true;
1047       else if (e->probability < cur_best_edge->probability)
1048         /* The edge has lower probability than the temporary best edge.  */
1049         is_better_edge = false;
1050       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1051         /* The edge and the temporary best edge have equivalent probabilities.
1052            The edge with longer trace is better.  */
1053         is_better_edge = true;
1054       else
1055         is_better_edge = false;
1056     }
1057   else
1058     {
1059       e_index = e->dest->index;
1060
1061       if (e->probability > cur_best_edge->probability)
1062         /* The edge has higher probability than the temporary best edge.  */
1063         is_better_edge = true;
1064       else if (e->probability < cur_best_edge->probability)
1065         /* The edge has lower probability than the temporary best edge.  */
1066         is_better_edge = false;
1067       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1068         /* The edge and the temporary best edge have equivalent probabilities.
1069            The edge with longer trace is better.  */
1070         is_better_edge = true;
1071       else
1072         is_better_edge = false;
1073     }
1074
1075   return is_better_edge;
1076 }
1077
1078 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1079
1080 static void
1081 connect_traces (int n_traces, struct trace *traces)
1082 {
1083   int i;
1084   bool *connected;
1085   bool two_passes;
1086   int last_trace;
1087   int current_pass;
1088   int current_partition;
1089   int freq_threshold;
1090   gcov_type count_threshold;
1091   bool for_size = optimize_function_for_size_p (cfun);
1092
1093   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1094   if (max_entry_count.to_gcov_type () < INT_MAX / 1000)
1095     count_threshold = max_entry_count.to_gcov_type () * DUPLICATION_THRESHOLD / 1000;
1096   else
1097     count_threshold = max_entry_count.to_gcov_type () / 1000 * DUPLICATION_THRESHOLD;
1098
1099   connected = XCNEWVEC (bool, n_traces);
1100   last_trace = -1;
1101   current_pass = 1;
1102   current_partition = BB_PARTITION (traces[0].first);
1103   two_passes = false;
1104
1105   if (crtl->has_bb_partition)
1106     for (i = 0; i < n_traces && !two_passes; i++)
1107       if (BB_PARTITION (traces[0].first)
1108           != BB_PARTITION (traces[i].first))
1109         two_passes = true;
1110
1111   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1112     {
1113       int t = i;
1114       int t2;
1115       edge e, best;
1116       int best_len;
1117
1118       if (i >= n_traces)
1119         {
1120           gcc_assert (two_passes && current_pass == 1);
1121           i = 0;
1122           t = i;
1123           current_pass = 2;
1124           if (current_partition == BB_HOT_PARTITION)
1125             current_partition = BB_COLD_PARTITION;
1126           else
1127             current_partition = BB_HOT_PARTITION;
1128         }
1129
1130       if (connected[t])
1131         continue;
1132
1133       if (two_passes
1134           && BB_PARTITION (traces[t].first) != current_partition)
1135         continue;
1136
1137       connected[t] = true;
1138
1139       /* Find the predecessor traces.  */
1140       for (t2 = t; t2 > 0;)
1141         {
1142           edge_iterator ei;
1143           best = NULL;
1144           best_len = 0;
1145           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1146             {
1147               int si = e->src->index;
1148
1149               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1150                   && (e->flags & EDGE_CAN_FALLTHRU)
1151                   && !(e->flags & EDGE_COMPLEX)
1152                   && bbd[si].end_of_trace >= 0
1153                   && !connected[bbd[si].end_of_trace]
1154                   && (BB_PARTITION (e->src) == current_partition)
1155                   && connect_better_edge_p (e, true, best_len, best, traces))
1156                 {
1157                   best = e;
1158                   best_len = traces[bbd[si].end_of_trace].length;
1159                 }
1160             }
1161           if (best)
1162             {
1163               best->src->aux = best->dest;
1164               t2 = bbd[best->src->index].end_of_trace;
1165               connected[t2] = true;
1166
1167               if (dump_file)
1168                 {
1169                   fprintf (dump_file, "Connection: %d %d\n",
1170                            best->src->index, best->dest->index);
1171                 }
1172             }
1173           else
1174             break;
1175         }
1176
1177       if (last_trace >= 0)
1178         traces[last_trace].last->aux = traces[t2].first;
1179       last_trace = t;
1180
1181       /* Find the successor traces.  */
1182       while (1)
1183         {
1184           /* Find the continuation of the chain.  */
1185           edge_iterator ei;
1186           best = NULL;
1187           best_len = 0;
1188           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1189             {
1190               int di = e->dest->index;
1191
1192               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1193                   && (e->flags & EDGE_CAN_FALLTHRU)
1194                   && !(e->flags & EDGE_COMPLEX)
1195                   && bbd[di].start_of_trace >= 0
1196                   && !connected[bbd[di].start_of_trace]
1197                   && (BB_PARTITION (e->dest) == current_partition)
1198                   && connect_better_edge_p (e, false, best_len, best, traces))
1199                 {
1200                   best = e;
1201                   best_len = traces[bbd[di].start_of_trace].length;
1202                 }
1203             }
1204
1205           if (for_size)
1206             {
1207               if (!best)
1208                 /* Stop finding the successor traces.  */
1209                 break;
1210
1211               /* It is OK to connect block n with block n + 1 or a block
1212                  before n.  For others, only connect to the loop header.  */
1213               if (best->dest->index > (traces[t].last->index + 1))
1214                 {
1215                   int count = EDGE_COUNT (best->dest->preds);
1216
1217                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1218                     if (e->flags & EDGE_DFS_BACK)
1219                       count--;
1220
1221                   /* If dest has multiple predecessors, skip it.  We expect
1222                      that one predecessor with smaller index connects with it
1223                      later.  */
1224                   if (count != 1)
1225                     break;
1226                 }
1227
1228               /* Only connect Trace n with Trace n + 1.  It is conservative
1229                  to keep the order as close as possible to the original order.
1230                  It also helps to reduce long jumps.  */
1231               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1232                 break;
1233
1234               if (dump_file)
1235                 fprintf (dump_file, "Connection: %d %d\n",
1236                          best->src->index, best->dest->index);
1237
1238               t = bbd[best->dest->index].start_of_trace;
1239               traces[last_trace].last->aux = traces[t].first;
1240               connected[t] = true;
1241               last_trace = t;
1242             }
1243           else if (best)
1244             {
1245               if (dump_file)
1246                 {
1247                   fprintf (dump_file, "Connection: %d %d\n",
1248                            best->src->index, best->dest->index);
1249                 }
1250               t = bbd[best->dest->index].start_of_trace;
1251               traces[last_trace].last->aux = traces[t].first;
1252               connected[t] = true;
1253               last_trace = t;
1254             }
1255           else
1256             {
1257               /* Try to connect the traces by duplication of 1 block.  */
1258               edge e2;
1259               basic_block next_bb = NULL;
1260               bool try_copy = false;
1261
1262               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1263                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1264                     && (e->flags & EDGE_CAN_FALLTHRU)
1265                     && !(e->flags & EDGE_COMPLEX)
1266                     && (!best || e->probability > best->probability))
1267                   {
1268                     edge_iterator ei;
1269                     edge best2 = NULL;
1270                     int best2_len = 0;
1271
1272                     /* If the destination is a start of a trace which is only
1273                        one block long, then no need to search the successor
1274                        blocks of the trace.  Accept it.  */
1275                     if (bbd[e->dest->index].start_of_trace >= 0
1276                         && traces[bbd[e->dest->index].start_of_trace].length
1277                            == 1)
1278                       {
1279                         best = e;
1280                         try_copy = true;
1281                         continue;
1282                       }
1283
1284                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1285                       {
1286                         int di = e2->dest->index;
1287
1288                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1289                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1290                                 && !(e2->flags & EDGE_COMPLEX)
1291                                 && bbd[di].start_of_trace >= 0
1292                                 && !connected[bbd[di].start_of_trace]
1293                                 && BB_PARTITION (e2->dest) == current_partition
1294                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1295                                 && e2->count ().ipa () >= count_threshold
1296                                 && (!best2
1297                                     || e2->probability > best2->probability
1298                                     || (e2->probability == best2->probability
1299                                         && traces[bbd[di].start_of_trace].length
1300                                            > best2_len))))
1301                           {
1302                             best = e;
1303                             best2 = e2;
1304                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1305                               best2_len = traces[bbd[di].start_of_trace].length;
1306                             else
1307                               best2_len = INT_MAX;
1308                             next_bb = e2->dest;
1309                             try_copy = true;
1310                           }
1311                       }
1312                   }
1313
1314               /* Copy tiny blocks always; copy larger blocks only when the
1315                  edge is traversed frequently enough.  */
1316               if (try_copy
1317                   && BB_PARTITION (best->src) == BB_PARTITION (best->dest)
1318                   && copy_bb_p (best->dest,
1319                                 optimize_edge_for_speed_p (best)
1320                                 && EDGE_FREQUENCY (best) >= freq_threshold
1321                                 && (!best->count ().initialized_p ()
1322                                     || best->count ().ipa () >= count_threshold)))
1323                 {
1324                   basic_block new_bb;
1325
1326                   if (dump_file)
1327                     {
1328                       fprintf (dump_file, "Connection: %d %d ",
1329                                traces[t].last->index, best->dest->index);
1330                       if (!next_bb)
1331                         fputc ('\n', dump_file);
1332                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1333                         fprintf (dump_file, "exit\n");
1334                       else
1335                         fprintf (dump_file, "%d\n", next_bb->index);
1336                     }
1337
1338                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1339                   traces[t].last = new_bb;
1340                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1341                     {
1342                       t = bbd[next_bb->index].start_of_trace;
1343                       traces[last_trace].last->aux = traces[t].first;
1344                       connected[t] = true;
1345                       last_trace = t;
1346                     }
1347                   else
1348                     break;      /* Stop finding the successor traces.  */
1349                 }
1350               else
1351                 break;  /* Stop finding the successor traces.  */
1352             }
1353         }
1354     }
1355
1356   if (dump_file)
1357     {
1358       basic_block bb;
1359
1360       fprintf (dump_file, "Final order:\n");
1361       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1362         fprintf (dump_file, "%d ", bb->index);
1363       fprintf (dump_file, "\n");
1364       fflush (dump_file);
1365     }
1366
1367   FREE (connected);
1368 }
1369
1370 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1371    when code size is allowed to grow by duplication.  */
1372
1373 static bool
1374 copy_bb_p (const_basic_block bb, int code_may_grow)
1375 {
1376   int size = 0;
1377   int max_size = uncond_jump_length;
1378   rtx_insn *insn;
1379
1380   if (!bb->count.to_frequency (cfun))
1381     return false;
1382   if (EDGE_COUNT (bb->preds) < 2)
1383     return false;
1384   if (!can_duplicate_block_p (bb))
1385     return false;
1386
1387   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1388   if (EDGE_COUNT (bb->succs) > 8)
1389     return false;
1390
1391   if (code_may_grow && optimize_bb_for_speed_p (bb))
1392     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1393
1394   FOR_BB_INSNS (bb, insn)
1395     {
1396       if (INSN_P (insn))
1397         size += get_attr_min_length (insn);
1398     }
1399
1400   if (size <= max_size)
1401     return true;
1402
1403   if (dump_file)
1404     {
1405       fprintf (dump_file,
1406                "Block %d can't be copied because its size = %d.\n",
1407                bb->index, size);
1408     }
1409
1410   return false;
1411 }
1412
1413 /* Return the length of unconditional jump instruction.  */
1414
1415 int
1416 get_uncond_jump_length (void)
1417 {
1418   int length;
1419
1420   start_sequence ();
1421   rtx_code_label *label = emit_label (gen_label_rtx ());
1422   rtx_insn *jump = emit_jump_insn (targetm.gen_jump (label));
1423   length = get_attr_min_length (jump);
1424   end_sequence ();
1425
1426   return length;
1427 }
1428
1429 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1430    Duplicate the landing pad and split the edges so that no EH edge
1431    crosses partitions.  */
1432
1433 static void
1434 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1435 {
1436   eh_landing_pad new_lp;
1437   basic_block new_bb, last_bb, post_bb;
1438   rtx_insn *jump;
1439   unsigned new_partition;
1440   edge_iterator ei;
1441   edge e;
1442
1443   /* Generate the new landing-pad structure.  */
1444   new_lp = gen_eh_landing_pad (old_lp->region);
1445   new_lp->post_landing_pad = old_lp->post_landing_pad;
1446   new_lp->landing_pad = gen_label_rtx ();
1447   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1448
1449   /* Put appropriate instructions in new bb.  */
1450   rtx_code_label *new_label = emit_label (new_lp->landing_pad);
1451
1452   expand_dw2_landing_pad_for_region (old_lp->region);
1453
1454   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1455   post_bb = single_succ (post_bb);
1456   rtx_code_label *post_label = block_label (post_bb);
1457   jump = emit_jump_insn (targetm.gen_jump (post_label));
1458   JUMP_LABEL (jump) = post_label;
1459
1460   /* Create new basic block to be dest for lp.  */
1461   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1462   new_bb = create_basic_block (new_label, jump, last_bb);
1463   new_bb->aux = last_bb->aux;
1464   new_bb->count = post_bb->count;
1465   last_bb->aux = new_bb;
1466
1467   emit_barrier_after_bb (new_bb);
1468
1469   make_single_succ_edge (new_bb, post_bb, 0);
1470
1471   /* Make sure new bb is in the other partition.  */
1472   new_partition = BB_PARTITION (old_bb);
1473   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1474   BB_SET_PARTITION (new_bb, new_partition);
1475
1476   /* Fix up the edges.  */
1477   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1478     if (BB_PARTITION (e->src) == new_partition)
1479       {
1480         rtx_insn *insn = BB_END (e->src);
1481         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1482
1483         gcc_assert (note != NULL);
1484         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1485         XEXP (note, 0) = GEN_INT (new_lp->index);
1486
1487         /* Adjust the edge to the new destination.  */
1488         redirect_edge_succ (e, new_bb);
1489       }
1490     else
1491       ei_next (&ei);
1492 }
1493
1494
1495 /* Ensure that all hot bbs are included in a hot path through the
1496    procedure. This is done by calling this function twice, once
1497    with WALK_UP true (to look for paths from the entry to hot bbs) and
1498    once with WALK_UP false (to look for paths from hot bbs to the exit).
1499    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1500    to BBS_IN_HOT_PARTITION.  */
1501
1502 static unsigned int
1503 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1504                     vec<basic_block> *bbs_in_hot_partition)
1505 {
1506   /* Callers check this.  */
1507   gcc_checking_assert (cold_bb_count);
1508
1509   /* Keep examining hot bbs while we still have some left to check
1510      and there are remaining cold bbs.  */
1511   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1512   while (! hot_bbs_to_check.is_empty ()
1513          && cold_bb_count)
1514     {
1515       basic_block bb = hot_bbs_to_check.pop ();
1516       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1517       edge e;
1518       edge_iterator ei;
1519       profile_probability highest_probability
1520                                  = profile_probability::uninitialized ();
1521       profile_count highest_count = profile_count::uninitialized ();
1522       bool found = false;
1523
1524       /* Walk the preds/succs and check if there is at least one already
1525          marked hot. Keep track of the most frequent pred/succ so that we
1526          can mark it hot if we don't find one.  */
1527       FOR_EACH_EDGE (e, ei, edges)
1528         {
1529           basic_block reach_bb = walk_up ? e->src : e->dest;
1530
1531           if (e->flags & EDGE_DFS_BACK)
1532             continue;
1533
1534           /* Do not expect profile insanities when profile was not adjusted.  */
1535           if (e->probability == profile_probability::never ()
1536               || e->count () == profile_count::zero ())
1537             continue;
1538
1539           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1540           {
1541             found = true;
1542             break;
1543           }
1544           /* The following loop will look for the hottest edge via
1545              the edge count, if it is non-zero, then fallback to the edge
1546              frequency and finally the edge probability.  */
1547           if (!(e->count () > highest_count))
1548             highest_count = e->count ();
1549           if (!highest_probability.initialized_p ()
1550               || e->probability > highest_probability)
1551             highest_probability = e->probability;
1552         }
1553
1554       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1555          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1556          then the most frequent pred (or succ) needs to be adjusted.  In the
1557          case where multiple preds/succs have the same frequency (e.g. a
1558          50-50 branch), then both will be adjusted.  */
1559       if (found)
1560         continue;
1561
1562       FOR_EACH_EDGE (e, ei, edges)
1563         {
1564           if (e->flags & EDGE_DFS_BACK)
1565             continue;
1566           /* Do not expect profile insanities when profile was not adjusted.  */
1567           if (e->probability == profile_probability::never ()
1568               || e->count () == profile_count::zero ())
1569             continue;
1570           /* Select the hottest edge using the edge count, if it is non-zero,
1571              then fallback to the edge frequency and finally the edge
1572              probability.  */
1573           if (highest_count.initialized_p ())
1574             {
1575               if (!(e->count () >= highest_count))
1576                 continue;
1577             }
1578           else if (!(e->probability >= highest_probability))
1579             continue;
1580
1581           basic_block reach_bb = walk_up ? e->src : e->dest;
1582
1583           /* We have a hot bb with an immediate dominator that is cold.
1584              The dominator needs to be re-marked hot.  */
1585           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1586           if (dump_file)
1587             fprintf (dump_file, "Promoting bb %i to hot partition to sanitize "
1588                      "profile of bb %i in %s walk\n", reach_bb->index,
1589                      bb->index, walk_up ? "backward" : "forward");
1590           cold_bb_count--;
1591
1592           /* Now we need to examine newly-hot reach_bb to see if it is also
1593              dominated by a cold bb.  */
1594           bbs_in_hot_partition->safe_push (reach_bb);
1595           hot_bbs_to_check.safe_push (reach_bb);
1596         }
1597     }
1598
1599   return cold_bb_count;
1600 }
1601
1602
1603 /* Find the basic blocks that are rarely executed and need to be moved to
1604    a separate section of the .o file (to cut down on paging and improve
1605    cache locality).  Return a vector of all edges that cross.  */
1606
1607 static vec<edge>
1608 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1609 {
1610   vec<edge> crossing_edges = vNULL;
1611   basic_block bb;
1612   edge e;
1613   edge_iterator ei;
1614   unsigned int cold_bb_count = 0;
1615   auto_vec<basic_block> bbs_in_hot_partition;
1616
1617   propagate_unlikely_bbs_forward ();
1618
1619   /* Mark which partition (hot/cold) each basic block belongs in.  */
1620   FOR_EACH_BB_FN (bb, cfun)
1621     {
1622       bool cold_bb = false;
1623
1624       if (probably_never_executed_bb_p (cfun, bb))
1625         {
1626           /* Handle profile insanities created by upstream optimizations
1627              by also checking the incoming edge weights. If there is a non-cold
1628              incoming edge, conservatively prevent this block from being split
1629              into the cold section.  */
1630           cold_bb = true;
1631           FOR_EACH_EDGE (e, ei, bb->preds)
1632             if (!probably_never_executed_edge_p (cfun, e))
1633               {
1634                 cold_bb = false;
1635                 break;
1636               }
1637         }
1638       if (cold_bb)
1639         {
1640           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1641           cold_bb_count++;
1642         }
1643       else
1644         {
1645           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1646           bbs_in_hot_partition.safe_push (bb);
1647         }
1648     }
1649
1650   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1651      Several different possibilities may include cold bbs along all paths
1652      to/from a hot bb. One is that there are edge weight insanities
1653      due to optimization phases that do not properly update basic block profile
1654      counts. The second is that the entry of the function may not be hot, because
1655      it is entered fewer times than the number of profile training runs, but there
1656      is a loop inside the function that causes blocks within the function to be
1657      above the threshold for hotness. This is fixed by walking up from hot bbs
1658      to the entry block, and then down from hot bbs to the exit, performing
1659      partitioning fixups as necessary.  */
1660   if (cold_bb_count)
1661     {
1662       mark_dfs_back_edges ();
1663       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1664                                           &bbs_in_hot_partition);
1665       if (cold_bb_count)
1666         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1667
1668       hash_set <basic_block> set;
1669       find_bbs_reachable_by_hot_paths (&set);
1670       FOR_EACH_BB_FN (bb, cfun)
1671         if (!set.contains (bb))
1672           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1673     }
1674
1675   /* The format of .gcc_except_table does not allow landing pads to
1676      be in a different partition as the throw.  Fix this by either
1677      moving or duplicating the landing pads.  */
1678   if (cfun->eh->lp_array)
1679     {
1680       unsigned i;
1681       eh_landing_pad lp;
1682
1683       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1684         {
1685           bool all_same, all_diff;
1686
1687           if (lp == NULL
1688               || lp->landing_pad == NULL_RTX
1689               || !LABEL_P (lp->landing_pad))
1690             continue;
1691
1692           all_same = all_diff = true;
1693           bb = BLOCK_FOR_INSN (lp->landing_pad);
1694           FOR_EACH_EDGE (e, ei, bb->preds)
1695             {
1696               gcc_assert (e->flags & EDGE_EH);
1697               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1698                 all_diff = false;
1699               else
1700                 all_same = false;
1701             }
1702
1703           if (all_same)
1704             ;
1705           else if (all_diff)
1706             {
1707               int which = BB_PARTITION (bb);
1708               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1709               BB_SET_PARTITION (bb, which);
1710             }
1711           else
1712             fix_up_crossing_landing_pad (lp, bb);
1713         }
1714     }
1715
1716   /* Mark every edge that crosses between sections.  */
1717
1718   FOR_EACH_BB_FN (bb, cfun)
1719     FOR_EACH_EDGE (e, ei, bb->succs)
1720       {
1721         unsigned int flags = e->flags;
1722
1723         /* We should never have EDGE_CROSSING set yet.  */
1724         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1725
1726         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1727             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1728             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1729           {
1730             crossing_edges.safe_push (e);
1731             flags |= EDGE_CROSSING;
1732           }
1733
1734         /* Now that we've split eh edges as appropriate, allow landing pads
1735            to be merged with the post-landing pads.  */
1736         flags &= ~EDGE_PRESERVE;
1737
1738         e->flags = flags;
1739       }
1740
1741   return crossing_edges;
1742 }
1743
1744 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1745
1746 static void
1747 set_edge_can_fallthru_flag (void)
1748 {
1749   basic_block bb;
1750
1751   FOR_EACH_BB_FN (bb, cfun)
1752     {
1753       edge e;
1754       edge_iterator ei;
1755
1756       FOR_EACH_EDGE (e, ei, bb->succs)
1757         {
1758           e->flags &= ~EDGE_CAN_FALLTHRU;
1759
1760           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1761           if (e->flags & EDGE_FALLTHRU)
1762             e->flags |= EDGE_CAN_FALLTHRU;
1763         }
1764
1765       /* If the BB ends with an invertible condjump all (2) edges are
1766          CAN_FALLTHRU edges.  */
1767       if (EDGE_COUNT (bb->succs) != 2)
1768         continue;
1769       if (!any_condjump_p (BB_END (bb)))
1770         continue;
1771
1772       rtx_jump_insn *bb_end_jump = as_a <rtx_jump_insn *> (BB_END (bb));
1773       if (!invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0))
1774         continue;
1775       invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0);
1776       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1777       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1778     }
1779 }
1780
1781 /* If any destination of a crossing edge does not have a label, add label;
1782    Convert any easy fall-through crossing edges to unconditional jumps.  */
1783
1784 static void
1785 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1786 {
1787   size_t i;
1788   edge e;
1789
1790   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1791     {
1792       basic_block src = e->src;
1793       basic_block dest = e->dest;
1794       rtx_jump_insn *new_jump;
1795
1796       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1797         continue;
1798
1799       /* Make sure dest has a label.  */
1800       rtx_code_label *label = block_label (dest);
1801
1802       /* Nothing to do for non-fallthru edges.  */
1803       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1804         continue;
1805       if ((e->flags & EDGE_FALLTHRU) == 0)
1806         continue;
1807
1808       /* If the block does not end with a control flow insn, then we
1809          can trivially add a jump to the end to fixup the crossing.
1810          Otherwise the jump will have to go in a new bb, which will
1811          be handled by fix_up_fall_thru_edges function.  */
1812       if (control_flow_insn_p (BB_END (src)))
1813         continue;
1814
1815       /* Make sure there's only one successor.  */
1816       gcc_assert (single_succ_p (src));
1817
1818       new_jump = emit_jump_insn_after (targetm.gen_jump (label), BB_END (src));
1819       BB_END (src) = new_jump;
1820       JUMP_LABEL (new_jump) = label;
1821       LABEL_NUSES (label) += 1;
1822
1823       emit_barrier_after_bb (src);
1824
1825       /* Mark edge as non-fallthru.  */
1826       e->flags &= ~EDGE_FALLTHRU;
1827     }
1828 }
1829
1830 /* Find any bb's where the fall-through edge is a crossing edge (note that
1831    these bb's must also contain a conditional jump or end with a call
1832    instruction; we've already dealt with fall-through edges for blocks
1833    that didn't have a conditional jump or didn't end with call instruction
1834    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1835    edge to non-crossing edge by inserting a new bb to fall-through into.
1836    The new bb will contain an unconditional jump (crossing edge) to the
1837    original fall through destination.  */
1838
1839 static void
1840 fix_up_fall_thru_edges (void)
1841 {
1842   basic_block cur_bb;
1843
1844   FOR_EACH_BB_FN (cur_bb, cfun)
1845     {
1846       edge succ1;
1847       edge succ2;
1848       edge fall_thru = NULL;
1849       edge cond_jump = NULL;
1850
1851       fall_thru = NULL;
1852       if (EDGE_COUNT (cur_bb->succs) > 0)
1853         succ1 = EDGE_SUCC (cur_bb, 0);
1854       else
1855         succ1 = NULL;
1856
1857       if (EDGE_COUNT (cur_bb->succs) > 1)
1858         succ2 = EDGE_SUCC (cur_bb, 1);
1859       else
1860         succ2 = NULL;
1861
1862       /* Find the fall-through edge.  */
1863
1864       if (succ1
1865           && (succ1->flags & EDGE_FALLTHRU))
1866         {
1867           fall_thru = succ1;
1868           cond_jump = succ2;
1869         }
1870       else if (succ2
1871                && (succ2->flags & EDGE_FALLTHRU))
1872         {
1873           fall_thru = succ2;
1874           cond_jump = succ1;
1875         }
1876       else if (succ2 && EDGE_COUNT (cur_bb->succs) > 2)
1877         fall_thru = find_fallthru_edge (cur_bb->succs);
1878
1879       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1880         {
1881           /* Check to see if the fall-thru edge is a crossing edge.  */
1882
1883           if (fall_thru->flags & EDGE_CROSSING)
1884             {
1885               /* The fall_thru edge crosses; now check the cond jump edge, if
1886                  it exists.  */
1887
1888               bool cond_jump_crosses = true;
1889               int invert_worked = 0;
1890               rtx_insn *old_jump = BB_END (cur_bb);
1891
1892               /* Find the jump instruction, if there is one.  */
1893
1894               if (cond_jump)
1895                 {
1896                   if (!(cond_jump->flags & EDGE_CROSSING))
1897                     cond_jump_crosses = false;
1898
1899                   /* We know the fall-thru edge crosses; if the cond
1900                      jump edge does NOT cross, and its destination is the
1901                      next block in the bb order, invert the jump
1902                      (i.e. fix it so the fall through does not cross and
1903                      the cond jump does).  */
1904
1905                   if (!cond_jump_crosses)
1906                     {
1907                       /* Find label in fall_thru block. We've already added
1908                          any missing labels, so there must be one.  */
1909
1910                       rtx_code_label *fall_thru_label
1911                         = block_label (fall_thru->dest);
1912
1913                       if (old_jump && fall_thru_label)
1914                         {
1915                           rtx_jump_insn *old_jump_insn
1916                             = dyn_cast <rtx_jump_insn *> (old_jump);
1917                           if (old_jump_insn)
1918                             invert_worked = invert_jump (old_jump_insn,
1919                                                          fall_thru_label, 0);
1920                         }
1921
1922                       if (invert_worked)
1923                         {
1924                           fall_thru->flags &= ~EDGE_FALLTHRU;
1925                           cond_jump->flags |= EDGE_FALLTHRU;
1926                           update_br_prob_note (cur_bb);
1927                           std::swap (fall_thru, cond_jump);
1928                           cond_jump->flags |= EDGE_CROSSING;
1929                           fall_thru->flags &= ~EDGE_CROSSING;
1930                         }
1931                     }
1932                 }
1933
1934               if (cond_jump_crosses || !invert_worked)
1935                 {
1936                   /* This is the case where both edges out of the basic
1937                      block are crossing edges. Here we will fix up the
1938                      fall through edge. The jump edge will be taken care
1939                      of later.  The EDGE_CROSSING flag of fall_thru edge
1940                      is unset before the call to force_nonfallthru
1941                      function because if a new basic-block is created
1942                      this edge remains in the current section boundary
1943                      while the edge between new_bb and the fall_thru->dest
1944                      becomes EDGE_CROSSING.  */
1945
1946                   fall_thru->flags &= ~EDGE_CROSSING;
1947                   basic_block new_bb = force_nonfallthru (fall_thru);
1948
1949                   if (new_bb)
1950                     {
1951                       new_bb->aux = cur_bb->aux;
1952                       cur_bb->aux = new_bb;
1953
1954                       /* This is done by force_nonfallthru_and_redirect.  */
1955                       gcc_assert (BB_PARTITION (new_bb)
1956                                   == BB_PARTITION (cur_bb));
1957
1958                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1959                     }
1960                   else
1961                     {
1962                       /* If a new basic-block was not created; restore
1963                          the EDGE_CROSSING flag.  */
1964                       fall_thru->flags |= EDGE_CROSSING;
1965                     }
1966
1967                   /* Add barrier after new jump */
1968                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1969                 }
1970             }
1971         }
1972     }
1973 }
1974
1975 /* This function checks the destination block of a "crossing jump" to
1976    see if it has any crossing predecessors that begin with a code label
1977    and end with an unconditional jump.  If so, it returns that predecessor
1978    block.  (This is to avoid creating lots of new basic blocks that all
1979    contain unconditional jumps to the same destination).  */
1980
1981 static basic_block
1982 find_jump_block (basic_block jump_dest)
1983 {
1984   basic_block source_bb = NULL;
1985   edge e;
1986   rtx_insn *insn;
1987   edge_iterator ei;
1988
1989   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1990     if (e->flags & EDGE_CROSSING)
1991       {
1992         basic_block src = e->src;
1993
1994         /* Check each predecessor to see if it has a label, and contains
1995            only one executable instruction, which is an unconditional jump.
1996            If so, we can use it.  */
1997
1998         if (LABEL_P (BB_HEAD (src)))
1999           for (insn = BB_HEAD (src);
2000                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2001                insn = NEXT_INSN (insn))
2002             {
2003               if (INSN_P (insn)
2004                   && insn == BB_END (src)
2005                   && JUMP_P (insn)
2006                   && !any_condjump_p (insn))
2007                 {
2008                   source_bb = src;
2009                   break;
2010                 }
2011             }
2012
2013         if (source_bb)
2014           break;
2015       }
2016
2017   return source_bb;
2018 }
2019
2020 /* Find all BB's with conditional jumps that are crossing edges;
2021    insert a new bb and make the conditional jump branch to the new
2022    bb instead (make the new bb same color so conditional branch won't
2023    be a 'crossing' edge).  Insert an unconditional jump from the
2024    new bb to the original destination of the conditional jump.  */
2025
2026 static void
2027 fix_crossing_conditional_branches (void)
2028 {
2029   basic_block cur_bb;
2030   basic_block new_bb;
2031   basic_block dest;
2032   edge succ1;
2033   edge succ2;
2034   edge crossing_edge;
2035   edge new_edge;
2036   rtx set_src;
2037   rtx old_label = NULL_RTX;
2038   rtx_code_label *new_label;
2039
2040   FOR_EACH_BB_FN (cur_bb, cfun)
2041     {
2042       crossing_edge = NULL;
2043       if (EDGE_COUNT (cur_bb->succs) > 0)
2044         succ1 = EDGE_SUCC (cur_bb, 0);
2045       else
2046         succ1 = NULL;
2047
2048       if (EDGE_COUNT (cur_bb->succs) > 1)
2049         succ2 = EDGE_SUCC (cur_bb, 1);
2050       else
2051         succ2 = NULL;
2052
2053       /* We already took care of fall-through edges, so only one successor
2054          can be a crossing edge.  */
2055
2056       if (succ1 && (succ1->flags & EDGE_CROSSING))
2057         crossing_edge = succ1;
2058       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2059         crossing_edge = succ2;
2060
2061       if (crossing_edge)
2062         {
2063           rtx_insn *old_jump = BB_END (cur_bb);
2064
2065           /* Check to make sure the jump instruction is a
2066              conditional jump.  */
2067
2068           set_src = NULL_RTX;
2069
2070           if (any_condjump_p (old_jump))
2071             {
2072               if (GET_CODE (PATTERN (old_jump)) == SET)
2073                 set_src = SET_SRC (PATTERN (old_jump));
2074               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2075                 {
2076                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2077                   if (GET_CODE (set_src) == SET)
2078                     set_src = SET_SRC (set_src);
2079                   else
2080                     set_src = NULL_RTX;
2081                 }
2082             }
2083
2084           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2085             {
2086               rtx_jump_insn *old_jump_insn =
2087                         as_a <rtx_jump_insn *> (old_jump);
2088
2089               if (GET_CODE (XEXP (set_src, 1)) == PC)
2090                 old_label = XEXP (set_src, 2);
2091               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2092                 old_label = XEXP (set_src, 1);
2093
2094               /* Check to see if new bb for jumping to that dest has
2095                  already been created; if so, use it; if not, create
2096                  a new one.  */
2097
2098               new_bb = find_jump_block (crossing_edge->dest);
2099
2100               if (new_bb)
2101                 new_label = block_label (new_bb);
2102               else
2103                 {
2104                   basic_block last_bb;
2105                   rtx_code_label *old_jump_target;
2106                   rtx_jump_insn *new_jump;
2107
2108                   /* Create new basic block to be dest for
2109                      conditional jump.  */
2110
2111                   /* Put appropriate instructions in new bb.  */
2112
2113                   new_label = gen_label_rtx ();
2114                   emit_label (new_label);
2115
2116                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2117                   old_jump_target = old_jump_insn->jump_target ();
2118                   new_jump = as_a <rtx_jump_insn *>
2119                     (emit_jump_insn (targetm.gen_jump (old_jump_target)));
2120                   new_jump->set_jump_target (old_jump_target);
2121
2122                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2123                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2124                   new_bb->aux = last_bb->aux;
2125                   last_bb->aux = new_bb;
2126
2127                   emit_barrier_after_bb (new_bb);
2128
2129                   /* Make sure new bb is in same partition as source
2130                      of conditional branch.  */
2131                   BB_COPY_PARTITION (new_bb, cur_bb);
2132                 }
2133
2134               /* Make old jump branch to new bb.  */
2135
2136               redirect_jump (old_jump_insn, new_label, 0);
2137
2138               /* Remove crossing_edge as predecessor of 'dest'.  */
2139
2140               dest = crossing_edge->dest;
2141
2142               redirect_edge_succ (crossing_edge, new_bb);
2143
2144               /* Make a new edge from new_bb to old dest; new edge
2145                  will be a successor for new_bb and a predecessor
2146                  for 'dest'.  */
2147
2148               if (EDGE_COUNT (new_bb->succs) == 0)
2149                 new_edge = make_single_succ_edge (new_bb, dest, 0);
2150               else
2151                 new_edge = EDGE_SUCC (new_bb, 0);
2152
2153               crossing_edge->flags &= ~EDGE_CROSSING;
2154               new_edge->flags |= EDGE_CROSSING;
2155             }
2156         }
2157     }
2158 }
2159
2160 /* Find any unconditional branches that cross between hot and cold
2161    sections.  Convert them into indirect jumps instead.  */
2162
2163 static void
2164 fix_crossing_unconditional_branches (void)
2165 {
2166   basic_block cur_bb;
2167   rtx_insn *last_insn;
2168   rtx label;
2169   rtx label_addr;
2170   rtx_insn *indirect_jump_sequence;
2171   rtx_insn *jump_insn = NULL;
2172   rtx new_reg;
2173   rtx_insn *cur_insn;
2174   edge succ;
2175
2176   FOR_EACH_BB_FN (cur_bb, cfun)
2177     {
2178       last_insn = BB_END (cur_bb);
2179
2180       if (EDGE_COUNT (cur_bb->succs) < 1)
2181         continue;
2182
2183       succ = EDGE_SUCC (cur_bb, 0);
2184
2185       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2186          this point, no crossing jumps should be conditional.  */
2187
2188       if (JUMP_P (last_insn)
2189           && (succ->flags & EDGE_CROSSING))
2190         {
2191           gcc_assert (!any_condjump_p (last_insn));
2192
2193           /* Make sure the jump is not already an indirect or table jump.  */
2194
2195           if (!computed_jump_p (last_insn)
2196               && !tablejump_p (last_insn, NULL, NULL))
2197             {
2198               /* We have found a "crossing" unconditional branch.  Now
2199                  we must convert it to an indirect jump.  First create
2200                  reference of label, as target for jump.  */
2201
2202               label = JUMP_LABEL (last_insn);
2203               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2204               LABEL_NUSES (label) += 1;
2205
2206               /* Get a register to use for the indirect jump.  */
2207
2208               new_reg = gen_reg_rtx (Pmode);
2209
2210               /* Generate indirect the jump sequence.  */
2211
2212               start_sequence ();
2213               emit_move_insn (new_reg, label_addr);
2214               emit_indirect_jump (new_reg);
2215               indirect_jump_sequence = get_insns ();
2216               end_sequence ();
2217
2218               /* Make sure every instruction in the new jump sequence has
2219                  its basic block set to be cur_bb.  */
2220
2221               for (cur_insn = indirect_jump_sequence; cur_insn;
2222                    cur_insn = NEXT_INSN (cur_insn))
2223                 {
2224                   if (!BARRIER_P (cur_insn))
2225                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2226                   if (JUMP_P (cur_insn))
2227                     jump_insn = cur_insn;
2228                 }
2229
2230               /* Insert the new (indirect) jump sequence immediately before
2231                  the unconditional jump, then delete the unconditional jump.  */
2232
2233               emit_insn_before (indirect_jump_sequence, last_insn);
2234               delete_insn (last_insn);
2235
2236               JUMP_LABEL (jump_insn) = label;
2237               LABEL_NUSES (label)++;
2238
2239               /* Make BB_END for cur_bb be the jump instruction (NOT the
2240                  barrier instruction at the end of the sequence...).  */
2241
2242               BB_END (cur_bb) = jump_insn;
2243             }
2244         }
2245     }
2246 }
2247
2248 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2249
2250 static void
2251 update_crossing_jump_flags (void)
2252 {
2253   basic_block bb;
2254   edge e;
2255   edge_iterator ei;
2256
2257   FOR_EACH_BB_FN (bb, cfun)
2258     FOR_EACH_EDGE (e, ei, bb->succs)
2259       if (e->flags & EDGE_CROSSING)
2260         {
2261           if (JUMP_P (BB_END (bb))
2262               /* Some flags were added during fix_up_fall_thru_edges, via
2263                  force_nonfallthru_and_redirect.  */
2264               && !CROSSING_JUMP_P (BB_END (bb)))
2265             CROSSING_JUMP_P (BB_END (bb)) = 1;
2266           break;
2267         }
2268 }
2269
2270 /* Reorder basic blocks using the software trace cache (STC) algorithm.  */
2271
2272 static void
2273 reorder_basic_blocks_software_trace_cache (void)
2274 {
2275   if (dump_file)
2276     fprintf (dump_file, "\nReordering with the STC algorithm.\n\n");
2277
2278   int n_traces;
2279   int i;
2280   struct trace *traces;
2281
2282   /* We are estimating the length of uncond jump insn only once since the code
2283      for getting the insn length always returns the minimal length now.  */
2284   if (uncond_jump_length == 0)
2285     uncond_jump_length = get_uncond_jump_length ();
2286
2287   /* We need to know some information for each basic block.  */
2288   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2289   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2290   for (i = 0; i < array_size; i++)
2291     {
2292       bbd[i].start_of_trace = -1;
2293       bbd[i].end_of_trace = -1;
2294       bbd[i].in_trace = -1;
2295       bbd[i].visited = 0;
2296       bbd[i].priority = -1;
2297       bbd[i].heap = NULL;
2298       bbd[i].node = NULL;
2299     }
2300
2301   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2302   n_traces = 0;
2303   find_traces (&n_traces, traces);
2304   connect_traces (n_traces, traces);
2305   FREE (traces);
2306   FREE (bbd);
2307 }
2308
2309 /* Return true if edge E1 is more desirable as a fallthrough edge than
2310    edge E2 is.  */
2311
2312 static bool
2313 edge_order (edge e1, edge e2)
2314 {
2315   return EDGE_FREQUENCY (e1) > EDGE_FREQUENCY (e2);
2316 }
2317
2318 /* Reorder basic blocks using the "simple" algorithm.  This tries to
2319    maximize the dynamic number of branches that are fallthrough, without
2320    copying instructions.  The algorithm is greedy, looking at the most
2321    frequently executed branch first.  */
2322
2323 static void
2324 reorder_basic_blocks_simple (void)
2325 {
2326   if (dump_file)
2327     fprintf (dump_file, "\nReordering with the \"simple\" algorithm.\n\n");
2328
2329   edge *edges = new edge[2 * n_basic_blocks_for_fn (cfun)];
2330
2331   /* First, collect all edges that can be optimized by reordering blocks:
2332      simple jumps and conditional jumps, as well as the function entry edge.  */
2333
2334   int n = 0;
2335   edges[n++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2336
2337   basic_block bb;
2338   FOR_EACH_BB_FN (bb, cfun)
2339     {
2340       rtx_insn *end = BB_END (bb);
2341
2342       if (computed_jump_p (end) || tablejump_p (end, NULL, NULL))
2343         continue;
2344
2345       /* We cannot optimize asm goto.  */
2346       if (JUMP_P (end) && extract_asm_operands (end))
2347         continue;
2348
2349       if (single_succ_p (bb))
2350         edges[n++] = EDGE_SUCC (bb, 0);
2351       else if (any_condjump_p (end))
2352         {
2353           edge e0 = EDGE_SUCC (bb, 0);
2354           edge e1 = EDGE_SUCC (bb, 1);
2355           /* When optimizing for size it is best to keep the original
2356              fallthrough edges.  */
2357           if (e1->flags & EDGE_FALLTHRU)
2358             std::swap (e0, e1);
2359           edges[n++] = e0;
2360           edges[n++] = e1;
2361         }
2362     }
2363
2364   /* Sort the edges, the most desirable first.  When optimizing for size
2365      all edges are equally desirable.  */
2366
2367   if (optimize_function_for_speed_p (cfun))
2368     std::stable_sort (edges, edges + n, edge_order);
2369
2370   /* Now decide which of those edges to make fallthrough edges.  We set
2371      BB_VISITED if a block already has a fallthrough successor assigned
2372      to it.  We make ->AUX of an endpoint point to the opposite endpoint
2373      of a sequence of blocks that fall through, and ->AUX will be NULL
2374      for a block that is in such a sequence but not an endpoint anymore.
2375
2376      To start with, everything points to itself, nothing is assigned yet.  */
2377
2378   FOR_ALL_BB_FN (bb, cfun)
2379     {
2380       bb->aux = bb;
2381       bb->flags &= ~BB_VISITED;
2382     }
2383
2384   EXIT_BLOCK_PTR_FOR_FN (cfun)->aux = 0;
2385
2386   /* Now for all edges, the most desirable first, see if that edge can
2387      connect two sequences.  If it can, update AUX and BB_VISITED; if it
2388      cannot, zero out the edge in the table.  */
2389
2390   for (int j = 0; j < n; j++)
2391     {
2392       edge e = edges[j];
2393
2394       basic_block tail_a = e->src;
2395       basic_block head_b = e->dest;
2396       basic_block head_a = (basic_block) tail_a->aux;
2397       basic_block tail_b = (basic_block) head_b->aux;
2398
2399       /* An edge cannot connect two sequences if:
2400          - it crosses partitions;
2401          - its src is not a current endpoint;
2402          - its dest is not a current endpoint;
2403          - or, it would create a loop.  */
2404
2405       if (e->flags & EDGE_CROSSING
2406           || tail_a->flags & BB_VISITED
2407           || !tail_b
2408           || (!(head_b->flags & BB_VISITED) && head_b != tail_b)
2409           || tail_a == tail_b)
2410         {
2411           edges[j] = 0;
2412           continue;
2413         }
2414
2415       tail_a->aux = 0;
2416       head_b->aux = 0;
2417       head_a->aux = tail_b;
2418       tail_b->aux = head_a;
2419       tail_a->flags |= BB_VISITED;
2420     }
2421
2422   /* Put the pieces together, in the same order that the start blocks of
2423      the sequences already had.  The hot/cold partitioning gives a little
2424      complication: as a first pass only do this for blocks in the same
2425      partition as the start block, and (if there is anything left to do)
2426      in a second pass handle the other partition.  */
2427
2428   basic_block last_tail = (basic_block) ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2429
2430   int current_partition = BB_PARTITION (last_tail);
2431   bool need_another_pass = true;
2432
2433   for (int pass = 0; pass < 2 && need_another_pass; pass++)
2434     {
2435       need_another_pass = false;
2436
2437       FOR_EACH_BB_FN (bb, cfun)
2438         if ((bb->flags & BB_VISITED && bb->aux) || bb->aux == bb)
2439           {
2440             if (BB_PARTITION (bb) != current_partition)
2441               {
2442                 need_another_pass = true;
2443                 continue;
2444               }
2445
2446             last_tail->aux = bb;
2447             last_tail = (basic_block) bb->aux;
2448           }
2449
2450       current_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
2451     }
2452
2453   last_tail->aux = 0;
2454
2455   /* Finally, link all the chosen fallthrough edges.  */
2456
2457   for (int j = 0; j < n; j++)
2458     if (edges[j])
2459       edges[j]->src->aux = edges[j]->dest;
2460
2461   delete[] edges;
2462
2463   /* If the entry edge no longer falls through we have to make a new
2464      block so it can do so again.  */
2465
2466   edge e = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2467   if (e->dest != ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux)
2468     {
2469       force_nonfallthru (e);
2470       e->src->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2471       BB_COPY_PARTITION (e->src, e->dest);
2472     }
2473 }
2474
2475 /* Reorder basic blocks.  The main entry point to this file.  */
2476
2477 static void
2478 reorder_basic_blocks (void)
2479 {
2480   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2481
2482   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2483     return;
2484
2485   set_edge_can_fallthru_flag ();
2486   mark_dfs_back_edges ();
2487
2488   switch (flag_reorder_blocks_algorithm)
2489     {
2490     case REORDER_BLOCKS_ALGORITHM_SIMPLE:
2491       reorder_basic_blocks_simple ();
2492       break;
2493
2494     case REORDER_BLOCKS_ALGORITHM_STC:
2495       reorder_basic_blocks_software_trace_cache ();
2496       break;
2497
2498     default:
2499       gcc_unreachable ();
2500     }
2501
2502   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2503
2504   if (dump_file)
2505     {
2506       if (dump_flags & TDF_DETAILS)
2507         dump_reg_info (dump_file);
2508       dump_flow_info (dump_file, dump_flags);
2509     }
2510
2511   /* Signal that rtl_verify_flow_info_1 can now verify that there
2512      is at most one switch between hot/cold sections.  */
2513   crtl->bb_reorder_complete = true;
2514 }
2515
2516 /* Determine which partition the first basic block in the function
2517    belongs to, then find the first basic block in the current function
2518    that belongs to a different section, and insert a
2519    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2520    instruction stream.  When writing out the assembly code,
2521    encountering this note will make the compiler switch between the
2522    hot and cold text sections.  */
2523
2524 void
2525 insert_section_boundary_note (void)
2526 {
2527   basic_block bb;
2528   bool switched_sections = false;
2529   int current_partition = 0;
2530
2531   if (!crtl->has_bb_partition)
2532     return;
2533
2534   FOR_EACH_BB_FN (bb, cfun)
2535     {
2536       if (!current_partition)
2537         current_partition = BB_PARTITION (bb);
2538       if (BB_PARTITION (bb) != current_partition)
2539         {
2540           gcc_assert (!switched_sections);
2541           switched_sections = true;
2542           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2543           current_partition = BB_PARTITION (bb);
2544         }
2545     }
2546 }
2547
2548 namespace {
2549
2550 const pass_data pass_data_reorder_blocks =
2551 {
2552   RTL_PASS, /* type */
2553   "bbro", /* name */
2554   OPTGROUP_NONE, /* optinfo_flags */
2555   TV_REORDER_BLOCKS, /* tv_id */
2556   0, /* properties_required */
2557   0, /* properties_provided */
2558   0, /* properties_destroyed */
2559   0, /* todo_flags_start */
2560   0, /* todo_flags_finish */
2561 };
2562
2563 class pass_reorder_blocks : public rtl_opt_pass
2564 {
2565 public:
2566   pass_reorder_blocks (gcc::context *ctxt)
2567     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2568   {}
2569
2570   /* opt_pass methods: */
2571   virtual bool gate (function *)
2572     {
2573       if (targetm.cannot_modify_jumps_p ())
2574         return false;
2575       return (optimize > 0
2576               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2577     }
2578
2579   virtual unsigned int execute (function *);
2580
2581 }; // class pass_reorder_blocks
2582
2583 unsigned int
2584 pass_reorder_blocks::execute (function *fun)
2585 {
2586   basic_block bb;
2587
2588   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2589      splitting possibly introduced more crossjumping opportunities.  */
2590   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2591
2592   reorder_basic_blocks ();
2593   cleanup_cfg (CLEANUP_EXPENSIVE);
2594
2595   FOR_EACH_BB_FN (bb, fun)
2596     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2597       bb->aux = bb->next_bb;
2598   cfg_layout_finalize ();
2599
2600   return 0;
2601 }
2602
2603 } // anon namespace
2604
2605 rtl_opt_pass *
2606 make_pass_reorder_blocks (gcc::context *ctxt)
2607 {
2608   return new pass_reorder_blocks (ctxt);
2609 }
2610
2611 /* Duplicate a block (that we already know ends in a computed jump) into its
2612    predecessors, where possible.  Return whether anything is changed.  */
2613 static bool
2614 maybe_duplicate_computed_goto (basic_block bb, int max_size)
2615 {
2616   if (single_pred_p (bb))
2617     return false;
2618
2619   /* Make sure that the block is small enough.  */
2620   rtx_insn *insn;
2621   FOR_BB_INSNS (bb, insn)
2622     if (INSN_P (insn))
2623       {
2624         max_size -= get_attr_min_length (insn);
2625         if (max_size < 0)
2626            return false;
2627       }
2628
2629   bool changed = false;
2630   edge e;
2631   edge_iterator ei;
2632   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
2633     {
2634       basic_block pred = e->src;
2635
2636       /* Do not duplicate BB into PRED if that is the last predecessor, or if
2637          we cannot merge a copy of BB with PRED.  */
2638       if (single_pred_p (bb)
2639           || !single_succ_p (pred)
2640           || e->flags & EDGE_COMPLEX
2641           || pred->index < NUM_FIXED_BLOCKS
2642           || (JUMP_P (BB_END (pred)) && !simplejump_p (BB_END (pred)))
2643           || (JUMP_P (BB_END (pred)) && CROSSING_JUMP_P (BB_END (pred))))
2644         {
2645           ei_next (&ei);
2646           continue;
2647         }
2648
2649       if (dump_file)
2650         fprintf (dump_file, "Duplicating computed goto bb %d into bb %d\n",
2651                  bb->index, e->src->index);
2652
2653       /* Remember if PRED can be duplicated; if so, the copy of BB merged
2654          with PRED can be duplicated as well.  */
2655       bool can_dup_more = can_duplicate_block_p (pred);
2656
2657       /* Make a copy of BB, merge it into PRED.  */
2658       basic_block copy = duplicate_block (bb, e, NULL);
2659       emit_barrier_after_bb (copy);
2660       reorder_insns_nobb (BB_HEAD (copy), BB_END (copy), BB_END (pred));
2661       merge_blocks (pred, copy);
2662
2663       changed = true;
2664
2665       /* Try to merge the resulting merged PRED into further predecessors.  */
2666       if (can_dup_more)
2667         maybe_duplicate_computed_goto (pred, max_size);
2668     }
2669
2670   return changed;
2671 }
2672
2673 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2674    computed gotos that were factored early on in the compilation process to
2675    speed up edge based data flow.  We used to not unfactor them again, which
2676    can seriously pessimize code with many computed jumps in the source code,
2677    such as interpreters.  See e.g. PR15242.  */
2678 static void
2679 duplicate_computed_gotos (function *fun)
2680 {
2681   /* We are estimating the length of uncond jump insn only once
2682      since the code for getting the insn length always returns
2683      the minimal length now.  */
2684   if (uncond_jump_length == 0)
2685     uncond_jump_length = get_uncond_jump_length ();
2686
2687   /* Never copy a block larger than this.  */
2688   int max_size
2689     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2690
2691   bool changed = false;
2692
2693   /* Try to duplicate all blocks that end in a computed jump and that
2694      can be duplicated at all.  */
2695   basic_block bb;
2696   FOR_EACH_BB_FN (bb, fun)
2697     if (computed_jump_p (BB_END (bb)) && can_duplicate_block_p (bb))
2698       changed |= maybe_duplicate_computed_goto (bb, max_size);
2699
2700   /* Duplicating blocks will redirect edges and may cause hot blocks
2701     previously reached by both hot and cold blocks to become dominated
2702     only by cold blocks.  */
2703   if (changed)
2704     fixup_partitions ();
2705 }
2706
2707 namespace {
2708
2709 const pass_data pass_data_duplicate_computed_gotos =
2710 {
2711   RTL_PASS, /* type */
2712   "compgotos", /* name */
2713   OPTGROUP_NONE, /* optinfo_flags */
2714   TV_REORDER_BLOCKS, /* tv_id */
2715   0, /* properties_required */
2716   0, /* properties_provided */
2717   0, /* properties_destroyed */
2718   0, /* todo_flags_start */
2719   0, /* todo_flags_finish */
2720 };
2721
2722 class pass_duplicate_computed_gotos : public rtl_opt_pass
2723 {
2724 public:
2725   pass_duplicate_computed_gotos (gcc::context *ctxt)
2726     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2727   {}
2728
2729   /* opt_pass methods: */
2730   virtual bool gate (function *);
2731   virtual unsigned int execute (function *);
2732
2733 }; // class pass_duplicate_computed_gotos
2734
2735 bool
2736 pass_duplicate_computed_gotos::gate (function *fun)
2737 {
2738   if (targetm.cannot_modify_jumps_p ())
2739     return false;
2740   return (optimize > 0
2741           && flag_expensive_optimizations
2742           && ! optimize_function_for_size_p (fun));
2743 }
2744
2745 unsigned int
2746 pass_duplicate_computed_gotos::execute (function *fun)
2747 {
2748   duplicate_computed_gotos (fun);
2749
2750   return 0;
2751 }
2752
2753 } // anon namespace
2754
2755 rtl_opt_pass *
2756 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2757 {
2758   return new pass_duplicate_computed_gotos (ctxt);
2759 }
2760
2761 /* This function is the main 'entrance' for the optimization that
2762    partitions hot and cold basic blocks into separate sections of the
2763    .o file (to improve performance and cache locality).  Ideally it
2764    would be called after all optimizations that rearrange the CFG have
2765    been called.  However part of this optimization may introduce new
2766    register usage, so it must be called before register allocation has
2767    occurred.  This means that this optimization is actually called
2768    well before the optimization that reorders basic blocks (see
2769    function above).
2770
2771    This optimization checks the feedback information to determine
2772    which basic blocks are hot/cold, updates flags on the basic blocks
2773    to indicate which section they belong in.  This information is
2774    later used for writing out sections in the .o file.  Because hot
2775    and cold sections can be arbitrarily large (within the bounds of
2776    memory), far beyond the size of a single function, it is necessary
2777    to fix up all edges that cross section boundaries, to make sure the
2778    instructions used can actually span the required distance.  The
2779    fixes are described below.
2780
2781    Fall-through edges must be changed into jumps; it is not safe or
2782    legal to fall through across a section boundary.  Whenever a
2783    fall-through edge crossing a section boundary is encountered, a new
2784    basic block is inserted (in the same section as the fall-through
2785    source), and the fall through edge is redirected to the new basic
2786    block.  The new basic block contains an unconditional jump to the
2787    original fall-through target.  (If the unconditional jump is
2788    insufficient to cross section boundaries, that is dealt with a
2789    little later, see below).
2790
2791    In order to deal with architectures that have short conditional
2792    branches (which cannot span all of memory) we take any conditional
2793    jump that attempts to cross a section boundary and add a level of
2794    indirection: it becomes a conditional jump to a new basic block, in
2795    the same section.  The new basic block contains an unconditional
2796    jump to the original target, in the other section.
2797
2798    For those architectures whose unconditional branch is also
2799    incapable of reaching all of memory, those unconditional jumps are
2800    converted into indirect jumps, through a register.
2801
2802    IMPORTANT NOTE: This optimization causes some messy interactions
2803    with the cfg cleanup optimizations; those optimizations want to
2804    merge blocks wherever possible, and to collapse indirect jump
2805    sequences (change "A jumps to B jumps to C" directly into "A jumps
2806    to C").  Those optimizations can undo the jump fixes that
2807    partitioning is required to make (see above), in order to ensure
2808    that jumps attempting to cross section boundaries are really able
2809    to cover whatever distance the jump requires (on many architectures
2810    conditional or unconditional jumps are not able to reach all of
2811    memory).  Therefore tests have to be inserted into each such
2812    optimization to make sure that it does not undo stuff necessary to
2813    cross partition boundaries.  This would be much less of a problem
2814    if we could perform this optimization later in the compilation, but
2815    unfortunately the fact that we may need to create indirect jumps
2816    (through registers) requires that this optimization be performed
2817    before register allocation.
2818
2819    Hot and cold basic blocks are partitioned and put in separate
2820    sections of the .o file, to reduce paging and improve cache
2821    performance (hopefully).  This can result in bits of code from the
2822    same function being widely separated in the .o file.  However this
2823    is not obvious to the current bb structure.  Therefore we must take
2824    care to ensure that: 1). There are no fall_thru edges that cross
2825    between sections; 2). For those architectures which have "short"
2826    conditional branches, all conditional branches that attempt to
2827    cross between sections are converted to unconditional branches;
2828    and, 3). For those architectures which have "short" unconditional
2829    branches, all unconditional branches that attempt to cross between
2830    sections are converted to indirect jumps.
2831
2832    The code for fixing up fall_thru edges that cross between hot and
2833    cold basic blocks does so by creating new basic blocks containing
2834    unconditional branches to the appropriate label in the "other"
2835    section.  The new basic block is then put in the same (hot or cold)
2836    section as the original conditional branch, and the fall_thru edge
2837    is modified to fall into the new basic block instead.  By adding
2838    this level of indirection we end up with only unconditional branches
2839    crossing between hot and cold sections.
2840
2841    Conditional branches are dealt with by adding a level of indirection.
2842    A new basic block is added in the same (hot/cold) section as the
2843    conditional branch, and the conditional branch is retargeted to the
2844    new basic block.  The new basic block contains an unconditional branch
2845    to the original target of the conditional branch (in the other section).
2846
2847    Unconditional branches are dealt with by converting them into
2848    indirect jumps.  */
2849
2850 namespace {
2851
2852 const pass_data pass_data_partition_blocks =
2853 {
2854   RTL_PASS, /* type */
2855   "bbpart", /* name */
2856   OPTGROUP_NONE, /* optinfo_flags */
2857   TV_REORDER_BLOCKS, /* tv_id */
2858   PROP_cfglayout, /* properties_required */
2859   0, /* properties_provided */
2860   0, /* properties_destroyed */
2861   0, /* todo_flags_start */
2862   0, /* todo_flags_finish */
2863 };
2864
2865 class pass_partition_blocks : public rtl_opt_pass
2866 {
2867 public:
2868   pass_partition_blocks (gcc::context *ctxt)
2869     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2870   {}
2871
2872   /* opt_pass methods: */
2873   virtual bool gate (function *);
2874   virtual unsigned int execute (function *);
2875
2876 }; // class pass_partition_blocks
2877
2878 bool
2879 pass_partition_blocks::gate (function *fun)
2880 {
2881   /* The optimization to partition hot/cold basic blocks into separate
2882      sections of the .o file does not work well with linkonce or with
2883      user defined section attributes.  Don't call it if either case
2884      arises.  */
2885   return (flag_reorder_blocks_and_partition
2886           && optimize
2887           /* See pass_reorder_blocks::gate.  We should not partition if
2888              we are going to omit the reordering.  */
2889           && optimize_function_for_speed_p (fun)
2890           && !DECL_COMDAT_GROUP (current_function_decl)
2891           && !lookup_attribute ("section", DECL_ATTRIBUTES (fun->decl)));
2892 }
2893
2894 unsigned
2895 pass_partition_blocks::execute (function *fun)
2896 {
2897   vec<edge> crossing_edges;
2898
2899   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2900     return 0;
2901
2902   df_set_flags (DF_DEFER_INSN_RESCAN);
2903
2904   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2905   if (!crossing_edges.exists ())
2906     /* Make sure to process deferred rescans and clear changeable df flags.  */
2907     return TODO_df_finish;
2908
2909   crtl->has_bb_partition = true;
2910
2911   /* Make sure the source of any crossing edge ends in a jump and the
2912      destination of any crossing edge has a label.  */
2913   add_labels_and_missing_jumps (crossing_edges);
2914
2915   /* Convert all crossing fall_thru edges to non-crossing fall
2916      thrus to unconditional jumps (that jump to the original fall
2917      through dest).  */
2918   fix_up_fall_thru_edges ();
2919
2920   /* If the architecture does not have conditional branches that can
2921      span all of memory, convert crossing conditional branches into
2922      crossing unconditional branches.  */
2923   if (!HAS_LONG_COND_BRANCH)
2924     fix_crossing_conditional_branches ();
2925
2926   /* If the architecture does not have unconditional branches that
2927      can span all of memory, convert crossing unconditional branches
2928      into indirect jumps.  Since adding an indirect jump also adds
2929      a new register usage, update the register usage information as
2930      well.  */
2931   if (!HAS_LONG_UNCOND_BRANCH)
2932     fix_crossing_unconditional_branches ();
2933
2934   update_crossing_jump_flags ();
2935
2936   /* Clear bb->aux fields that the above routines were using.  */
2937   clear_aux_for_blocks ();
2938
2939   crossing_edges.release ();
2940
2941   /* ??? FIXME: DF generates the bb info for a block immediately.
2942      And by immediately, I mean *during* creation of the block.
2943
2944         #0  df_bb_refs_collect
2945         #1  in df_bb_refs_record
2946         #2  in create_basic_block_structure
2947
2948      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2949      will *always* fail, because no edges can have been added to the
2950      block yet.  Which of course means we don't add the right
2951      artificial refs, which means we fail df_verify (much) later.
2952
2953      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2954      that we also shouldn't grab data from the new blocks those new
2955      insns are in either.  In this way one can create the block, link
2956      it up properly, and have everything Just Work later, when deferred
2957      insns are processed.
2958
2959      In the meantime, we have no other option but to throw away all
2960      of the DF data and recompute it all.  */
2961   if (fun->eh->lp_array)
2962     {
2963       df_finish_pass (true);
2964       df_scan_alloc (NULL);
2965       df_scan_blocks ();
2966       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2967          data.  We blindly generated all of them when creating the new
2968          landing pad.  Delete those assignments we don't use.  */
2969       df_set_flags (DF_LR_RUN_DCE);
2970       df_analyze ();
2971     }
2972
2973   /* Make sure to process deferred rescans and clear changeable df flags.  */
2974   return TODO_df_finish;
2975 }
2976
2977 } // anon namespace
2978
2979 rtl_opt_pass *
2980 make_pass_partition_blocks (gcc::context *ctxt)
2981 {
2982   return new pass_partition_blocks (ctxt);
2983 }