gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2016 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This file contains the "reorder blocks" pass, which changes the control
  21    flow of a function to encounter fewer branches; the "partition blocks"
  22    pass, which divides the basic blocks into "hot" and "cold" partitions,
  23    which are kept separate; and the "duplicate computed gotos" pass, which
  24    duplicates blocks ending in an indirect jump.
  25
  26    There are two algorithms for "reorder blocks": the "simple" algorithm,
  27    which just rearranges blocks, trying to minimize the number of executed
  28    unconditional branches; and the "software trace cache" algorithm, which
  29    also copies code, and in general tries a lot harder to have long linear
  30    pieces of machine code executed.  This algorithm is described next.  */
  31
  32 /* This (greedy) algorithm constructs traces in several rounds.
  33    The construction starts from "seeds".  The seed for the first round
  34    is the entry point of the function.  When there are more than one seed,
  35    the one with the lowest key in the heap is selected first (see bb_to_key).
  36    Then the algorithm repeatedly adds the most probable successor to the end
  37    of a trace.  Finally it connects the traces.
  38
  39    There are two parameters: Branch Threshold and Exec Threshold.
  40    If the probability of an edge to a successor of the current basic block is
  41    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  42    then the successor will be the seed in one of the next rounds.
  43    Each round has these parameters lower than the previous one.
  44    The last round has to have these parameters set to zero so that the
  45    remaining blocks are picked up.
  46
  47    The algorithm selects the most probable successor from all unvisited
  48    successors and successors that have been added to this trace.
  49    The other successors (that has not been "sent" to the next round) will be
  50    other seeds for this round and the secondary traces will start from them.
  51    If the successor has not been visited in this trace, it is added to the
  52    trace (however, there is some heuristic for simple branches).
  53    If the successor has been visited in this trace, a loop has been found.
  54    If the loop has many iterations, the loop is rotated so that the source
  55    block of the most probable edge going out of the loop is the last block
  56    of the trace.
  57    If the loop has few iterations and there is no edge from the last block of
  58    the loop going out of the loop, the loop header is duplicated.
  59
  60    When connecting traces, the algorithm first checks whether there is an edge
  61    from the last block of a trace to the first block of another trace.
  62    When there are still some unconnected traces it checks whether there exists
  63    a basic block BB such that BB is a successor of the last block of a trace
  64    and BB is a predecessor of the first block of another trace.  In this case,
  65    BB is duplicated, added at the end of the first trace and the traces are
  66    connected through it.
  67    The rest of traces are simply connected so there will be a jump to the
  68    beginning of the rest of traces.
  69
  70    The above description is for the full algorithm, which is used when the
  71    function is optimized for speed.  When the function is optimized for size,
  72    in order to reduce long jumps and connect more fallthru edges, the
  73    algorithm is modified as follows:
  74    (1) Break long traces to short ones.  A trace is broken at a block that has
  75    multiple predecessors/ successors during trace discovery.  When connecting
  76    traces, only connect Trace n with Trace n + 1.  This change reduces most
  77    long jumps compared with the above algorithm.
  78    (2) Ignore the edge probability and frequency for fallthru edges.
  79    (3) Keep the original order of blocks when there is no chance to fall
  80    through.  We rely on the results of cfg_cleanup.
  81
  82    To implement the change for code size optimization, block's index is
  83    selected as the key and all traces are found in one round.
  84
  85    References:
  86
  87    "Software Trace Cache"
  88    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  89    http://citeseer.nj.nec.com/15361.html
  90
  91 */
  92
  93 #include "config.h"
  94 #define INCLUDE_ALGORITHM /* stable_sort */
  95 #include "system.h"
  96 #include "coretypes.h"
  97 #include "backend.h"
  98 #include "target.h"
  99 #include "rtl.h"
 100 #include "tree.h"
 101 #include "cfghooks.h"
 102 #include "df.h"
 103 #include "optabs.h"
 104 #include "regs.h"
 105 #include "emit-rtl.h"
 106 #include "output.h"
 107 #include "expr.h"
 108 #include "params.h"
 109 #include "toplev.h" /* user_defined_section_attribute */
 110 #include "tree-pass.h"
 111 #include "cfgrtl.h"
 112 #include "cfganal.h"
 113 #include "cfgbuild.h"
 114 #include "cfgcleanup.h"
 115 #include "bb-reorder.h"
 116 #include "except.h"
 117 #include "fibonacci_heap.h"
 118
 119 /* The number of rounds.  In most cases there will only be 4 rounds, but
 120    when partitioning hot and cold basic blocks into separate sections of
 121    the object file there will be an extra round.  */
 122 #define N_ROUNDS 5
 123
 124 struct target_bb_reorder default_target_bb_reorder;
 125 #if SWITCHABLE_TARGET
 126 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 127 #endif
 128
 129 #define uncond_jump_length \
 130   (this_target_bb_reorder->x_uncond_jump_length)
 131
 132 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 133 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 134
 135 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 136 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 137
 138 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 139    block the edge destination is not duplicated while connecting traces.  */
 140 #define DUPLICATION_THRESHOLD 100
 141
 142 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
 143 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
 144
 145 /* Structure to hold needed information for each basic block.  */
 146 struct bbro_basic_block_data
 147 {
 148   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 149   int start_of_trace;
 150
 151   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 152   int end_of_trace;
 153
 154   /* Which trace is the bb in?  */
 155   int in_trace;
 156
 157   /* Which trace was this bb visited in?  */
 158   int visited;
 159
 160   /* Cached maximum frequency of interesting incoming edges.
 161      Minus one means not yet computed.  */
 162   int priority;
 163
 164   /* Which heap is BB in (if any)?  */
 165   bb_heap_t *heap;
 166
 167   /* Which heap node is BB in (if any)?  */
 168   bb_heap_node_t *node;
 169 };
 170
 171 /* The current size of the following dynamic array.  */
 172 static int array_size;
 173
 174 /* The array which holds needed information for basic blocks.  */
 175 static bbro_basic_block_data *bbd;
 176
 177 /* To avoid frequent reallocation the size of arrays is greater than needed,
 178    the number of elements is (not less than) 1.25 * size_wanted.  */
 179 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 180
 181 /* Free the memory and set the pointer to NULL.  */
 182 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 183
 184 /* Structure for holding information about a trace.  */
 185 struct trace
 186 {
 187   /* First and last basic block of the trace.  */
 188   basic_block first, last;
 189
 190   /* The round of the STC creation which this trace was found in.  */
 191   int round;
 192
 193   /* The length (i.e. the number of basic blocks) of the trace.  */
 194   int length;
 195 };
 196
 197 /* Maximum frequency and count of one of the entry blocks.  */
 198 static int max_entry_frequency;
 199 static gcov_type max_entry_count;
 200
 201 /* Local function prototypes.  */
 202 static void find_traces (int *, struct trace *);
 203 static basic_block rotate_loop (edge, struct trace *, int);
 204 static void mark_bb_visited (basic_block, int);
 205 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 206                                  int, bb_heap_t **, int);
 207 static basic_block copy_bb (basic_block, edge, basic_block, int);
 208 static long bb_to_key (basic_block);
 209 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
 210                            const_edge);
 211 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 212                                    struct trace *);
 213 static void connect_traces (int, struct trace *);
 214 static bool copy_bb_p (const_basic_block, int);
 215 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 216 \f
 217 /* Return the trace number in which BB was visited.  */
 218
 219 static int
 220 bb_visited_trace (const_basic_block bb)
 221 {
 222   gcc_assert (bb->index < array_size);
 223   return bbd[bb->index].visited;
 224 }
 225
 226 /* This function marks BB that it was visited in trace number TRACE.  */
 227
 228 static void
 229 mark_bb_visited (basic_block bb, int trace)
 230 {
 231   bbd[bb->index].visited = trace;
 232   if (bbd[bb->index].heap)
 233     {
 234       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
 235       bbd[bb->index].heap = NULL;
 236       bbd[bb->index].node = NULL;
 237     }
 238 }
 239
 240 /* Check to see if bb should be pushed into the next round of trace
 241    collections or not.  Reasons for pushing the block forward are 1).
 242    If the block is cold, we are doing partitioning, and there will be
 243    another round (cold partition blocks are not supposed to be
 244    collected into traces until the very last round); or 2). There will
 245    be another round, and the basic block is not "hot enough" for the
 246    current round of trace collection.  */
 247
 248 static bool
 249 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 250                       int exec_th, gcov_type count_th)
 251 {
 252   bool there_exists_another_round;
 253   bool block_not_hot_enough;
 254
 255   there_exists_another_round = round < number_of_rounds - 1;
 256
 257   block_not_hot_enough = (bb->frequency < exec_th
 258                           || bb->count < count_th
 259                           || probably_never_executed_bb_p (cfun, bb));
 260
 261   if (there_exists_another_round
 262       && block_not_hot_enough)
 263     return true;
 264   else
 265     return false;
 266 }
 267
 268 /* Find the traces for Software Trace Cache.  Chain each trace through
 269    RBI()->next.  Store the number of traces to N_TRACES and description of
 270    traces to TRACES.  */
 271
 272 static void
 273 find_traces (int *n_traces, struct trace *traces)
 274 {
 275   int i;
 276   int number_of_rounds;
 277   edge e;
 278   edge_iterator ei;
 279   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
 280
 281   /* Add one extra round of trace collection when partitioning hot/cold
 282      basic blocks into separate sections.  The last round is for all the
 283      cold blocks (and ONLY the cold blocks).  */
 284
 285   number_of_rounds = N_ROUNDS - 1;
 286
 287   /* Insert entry points of function into heap.  */
 288   max_entry_frequency = 0;
 289   max_entry_count = 0;
 290   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 291     {
 292       bbd[e->dest->index].heap = heap;
 293       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
 294       if (e->dest->frequency > max_entry_frequency)
 295         max_entry_frequency = e->dest->frequency;
 296       if (e->dest->count > max_entry_count)
 297         max_entry_count = e->dest->count;
 298     }
 299
 300   /* Find the traces.  */
 301   for (i = 0; i < number_of_rounds; i++)
 302     {
 303       gcov_type count_threshold;
 304
 305       if (dump_file)
 306         fprintf (dump_file, "STC - round %d\n", i + 1);
 307
 308       if (max_entry_count < INT_MAX / 1000)
 309         count_threshold = max_entry_count * exec_threshold[i] / 1000;
 310       else
 311         count_threshold = max_entry_count / 1000 * exec_threshold[i];
 312
 313       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 314                            max_entry_frequency * exec_threshold[i] / 1000,
 315                            count_threshold, traces, n_traces, i, &heap,
 316                            number_of_rounds);
 317     }
 318   delete heap;
 319
 320   if (dump_file)
 321     {
 322       for (i = 0; i < *n_traces; i++)
 323         {
 324           basic_block bb;
 325           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 326                    traces[i].round + 1);
 327           for (bb = traces[i].first;
 328                bb != traces[i].last;
 329                bb = (basic_block) bb->aux)
 330             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 331           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 332         }
 333       fflush (dump_file);
 334     }
 335 }
 336
 337 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 338    (with sequential number TRACE_N).  */
 339
 340 static basic_block
 341 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 342 {
 343   basic_block bb;
 344
 345   /* Information about the best end (end after rotation) of the loop.  */
 346   basic_block best_bb = NULL;
 347   edge best_edge = NULL;
 348   int best_freq = -1;
 349   gcov_type best_count = -1;
 350   /* The best edge is preferred when its destination is not visited yet
 351      or is a start block of some trace.  */
 352   bool is_preferred = false;
 353
 354   /* Find the most frequent edge that goes out from current trace.  */
 355   bb = back_edge->dest;
 356   do
 357     {
 358       edge e;
 359       edge_iterator ei;
 360
 361       FOR_EACH_EDGE (e, ei, bb->succs)
 362         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 363             && bb_visited_trace (e->dest) != trace_n
 364             && (e->flags & EDGE_CAN_FALLTHRU)
 365             && !(e->flags & EDGE_COMPLEX))
 366         {
 367           if (is_preferred)
 368             {
 369               /* The best edge is preferred.  */
 370               if (!bb_visited_trace (e->dest)
 371                   || bbd[e->dest->index].start_of_trace >= 0)
 372                 {
 373                   /* The current edge E is also preferred.  */
 374                   int freq = EDGE_FREQUENCY (e);
 375                   if (freq > best_freq || e->count > best_count)
 376                     {
 377                       best_freq = freq;
 378                       best_count = e->count;
 379                       best_edge = e;
 380                       best_bb = bb;
 381                     }
 382                 }
 383             }
 384           else
 385             {
 386               if (!bb_visited_trace (e->dest)
 387                   || bbd[e->dest->index].start_of_trace >= 0)
 388                 {
 389                   /* The current edge E is preferred.  */
 390                   is_preferred = true;
 391                   best_freq = EDGE_FREQUENCY (e);
 392                   best_count = e->count;
 393                   best_edge = e;
 394                   best_bb = bb;
 395                 }
 396               else
 397                 {
 398                   int freq = EDGE_FREQUENCY (e);
 399                   if (!best_edge || freq > best_freq || e->count > best_count)
 400                     {
 401                       best_freq = freq;
 402                       best_count = e->count;
 403                       best_edge = e;
 404                       best_bb = bb;
 405                     }
 406                 }
 407             }
 408         }
 409       bb = (basic_block) bb->aux;
 410     }
 411   while (bb != back_edge->dest);
 412
 413   if (best_bb)
 414     {
 415       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 416          the trace.  */
 417       if (back_edge->dest == trace->first)
 418         {
 419           trace->first = (basic_block) best_bb->aux;
 420         }
 421       else
 422         {
 423           basic_block prev_bb;
 424
 425           for (prev_bb = trace->first;
 426                prev_bb->aux != back_edge->dest;
 427                prev_bb = (basic_block) prev_bb->aux)
 428             ;
 429           prev_bb->aux = best_bb->aux;
 430
 431           /* Try to get rid of uncond jump to cond jump.  */
 432           if (single_succ_p (prev_bb))
 433             {
 434               basic_block header = single_succ (prev_bb);
 435
 436               /* Duplicate HEADER if it is a small block containing cond jump
 437                  in the end.  */
 438               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 439                   && !CROSSING_JUMP_P (BB_END (header)))
 440                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 441             }
 442         }
 443     }
 444   else
 445     {
 446       /* We have not found suitable loop tail so do no rotation.  */
 447       best_bb = back_edge->src;
 448     }
 449   best_bb->aux = NULL;
 450   return best_bb;
 451 }
 452
 453 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 454    not include basic blocks whose probability is lower than BRANCH_TH or whose
 455    frequency is lower than EXEC_TH into traces (or whose count is lower than
 456    COUNT_TH).  Store the new traces into TRACES and modify the number of
 457    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 458    The function expects starting basic blocks to be in *HEAP and will delete
 459    *HEAP and store starting points for the next round into new *HEAP.  */
 460
 461 static void
 462 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 463                      struct trace *traces, int *n_traces, int round,
 464                      bb_heap_t **heap, int number_of_rounds)
 465 {
 466   /* Heap for discarded basic blocks which are possible starting points for
 467      the next round.  */
 468   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
 469   bool for_size = optimize_function_for_size_p (cfun);
 470
 471   while (!(*heap)->empty ())
 472     {
 473       basic_block bb;
 474       struct trace *trace;
 475       edge best_edge, e;
 476       long key;
 477       edge_iterator ei;
 478
 479       bb = (*heap)->extract_min ();
 480       bbd[bb->index].heap = NULL;
 481       bbd[bb->index].node = NULL;
 482
 483       if (dump_file)
 484         fprintf (dump_file, "Getting bb %d\n", bb->index);
 485
 486       /* If the BB's frequency is too low, send BB to the next round.  When
 487          partitioning hot/cold blocks into separate sections, make sure all
 488          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 489          round.  When optimizing for size, do not push to next round.  */
 490
 491       if (!for_size
 492           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 493                                    count_th))
 494         {
 495           int key = bb_to_key (bb);
 496           bbd[bb->index].heap = new_heap;
 497           bbd[bb->index].node = new_heap->insert (key, bb);
 498
 499           if (dump_file)
 500             fprintf (dump_file,
 501                      "  Possible start point of next round: %d (key: %d)\n",
 502                      bb->index, key);
 503           continue;
 504         }
 505
 506       trace = traces + *n_traces;
 507       trace->first = bb;
 508       trace->round = round;
 509       trace->length = 0;
 510       bbd[bb->index].in_trace = *n_traces;
 511       (*n_traces)++;
 512
 513       do
 514         {
 515           int prob, freq;
 516           bool ends_in_call;
 517
 518           /* The probability and frequency of the best edge.  */
 519           int best_prob = INT_MIN / 2;
 520           int best_freq = INT_MIN / 2;
 521
 522           best_edge = NULL;
 523           mark_bb_visited (bb, *n_traces);
 524           trace->length++;
 525
 526           if (dump_file)
 527             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 528                      bb->index, *n_traces - 1);
 529
 530           ends_in_call = block_ends_with_call_p (bb);
 531
 532           /* Select the successor that will be placed after BB.  */
 533           FOR_EACH_EDGE (e, ei, bb->succs)
 534             {
 535               gcc_assert (!(e->flags & EDGE_FAKE));
 536
 537               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 538                 continue;
 539
 540               if (bb_visited_trace (e->dest)
 541                   && bb_visited_trace (e->dest) != *n_traces)
 542                 continue;
 543
 544               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 545                 continue;
 546
 547               prob = e->probability;
 548               freq = e->dest->frequency;
 549
 550               /* The only sensible preference for a call instruction is the
 551                  fallthru edge.  Don't bother selecting anything else.  */
 552               if (ends_in_call)
 553                 {
 554                   if (e->flags & EDGE_CAN_FALLTHRU)
 555                     {
 556                       best_edge = e;
 557                       best_prob = prob;
 558                       best_freq = freq;
 559                     }
 560                   continue;
 561                 }
 562
 563               /* Edge that cannot be fallthru or improbable or infrequent
 564                  successor (i.e. it is unsuitable successor).  When optimizing
 565                  for size, ignore the probability and frequency.  */
 566               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 567                   || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
 568                       || e->count < count_th) && (!for_size)))
 569                 continue;
 570
 571               /* If partitioning hot/cold basic blocks, don't consider edges
 572                  that cross section boundaries.  */
 573
 574               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 575                                  best_edge))
 576                 {
 577                   best_edge = e;
 578                   best_prob = prob;
 579                   best_freq = freq;
 580                 }
 581             }
 582
 583           /* If the best destination has multiple predecessors, and can be
 584              duplicated cheaper than a jump, don't allow it to be added
 585              to a trace.  We'll duplicate it when connecting traces.  */
 586           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 587               && copy_bb_p (best_edge->dest, 0))
 588             best_edge = NULL;
 589
 590           /* If the best destination has multiple successors or predecessors,
 591              don't allow it to be added when optimizing for size.  This makes
 592              sure predecessors with smaller index are handled before the best
 593              destinarion.  It breaks long trace and reduces long jumps.
 594
 595              Take if-then-else as an example.
 596                 A
 597                / \
 598               B   C
 599                \ /
 600                 D
 601              If we do not remove the best edge B->D/C->D, the final order might
 602              be A B D ... C.  C is at the end of the program.  If D's successors
 603              and D are complicated, might need long jumps for A->C and C->D.
 604              Similar issue for order: A C D ... B.
 605
 606              After removing the best edge, the final result will be ABCD/ ACBD.
 607              It does not add jump compared with the previous order.  But it
 608              reduces the possibility of long jumps.  */
 609           if (best_edge && for_size
 610               && (EDGE_COUNT (best_edge->dest->succs) > 1
 611                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 612             best_edge = NULL;
 613
 614           /* Add all non-selected successors to the heaps.  */
 615           FOR_EACH_EDGE (e, ei, bb->succs)
 616             {
 617               if (e == best_edge
 618                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 619                   || bb_visited_trace (e->dest))
 620                 continue;
 621
 622               key = bb_to_key (e->dest);
 623
 624               if (bbd[e->dest->index].heap)
 625                 {
 626                   /* E->DEST is already in some heap.  */
 627                   if (key != bbd[e->dest->index].node->get_key ())
 628                     {
 629                       if (dump_file)
 630                         {
 631                           fprintf (dump_file,
 632                                    "Changing key for bb %d from %ld to %ld.\n",
 633                                    e->dest->index,
 634                                    (long) bbd[e->dest->index].node->get_key (),
 635                                    key);
 636                         }
 637                       bbd[e->dest->index].heap->replace_key
 638                         (bbd[e->dest->index].node, key);
 639                     }
 640                 }
 641               else
 642                 {
 643                   bb_heap_t *which_heap = *heap;
 644
 645                   prob = e->probability;
 646                   freq = EDGE_FREQUENCY (e);
 647
 648                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 649                       || (e->flags & EDGE_COMPLEX)
 650                       || prob < branch_th || freq < exec_th
 651                       || e->count < count_th)
 652                     {
 653                       /* When partitioning hot/cold basic blocks, make sure
 654                          the cold blocks (and only the cold blocks) all get
 655                          pushed to the last round of trace collection.  When
 656                          optimizing for size, do not push to next round.  */
 657
 658                       if (!for_size && push_to_next_round_p (e->dest, round,
 659                                                              number_of_rounds,
 660                                                              exec_th, count_th))
 661                         which_heap = new_heap;
 662                     }
 663
 664                   bbd[e->dest->index].heap = which_heap;
 665                   bbd[e->dest->index].node = which_heap->insert (key, e->dest);
 666
 667                   if (dump_file)
 668                     {
 669                       fprintf (dump_file,
 670                                "  Possible start of %s round: %d (key: %ld)\n",
 671                                (which_heap == new_heap) ? "next" : "this",
 672                                e->dest->index, (long) key);
 673                     }
 674
 675                 }
 676             }
 677
 678           if (best_edge) /* Suitable successor was found.  */
 679             {
 680               if (bb_visited_trace (best_edge->dest) == *n_traces)
 681                 {
 682                   /* We do nothing with one basic block loops.  */
 683                   if (best_edge->dest != bb)
 684                     {
 685                       if (EDGE_FREQUENCY (best_edge)
 686                           > 4 * best_edge->dest->frequency / 5)
 687                         {
 688                           /* The loop has at least 4 iterations.  If the loop
 689                              header is not the first block of the function
 690                              we can rotate the loop.  */
 691
 692                           if (best_edge->dest
 693                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 694                             {
 695                               if (dump_file)
 696                                 {
 697                                   fprintf (dump_file,
 698                                            "Rotating loop %d - %d\n",
 699                                            best_edge->dest->index, bb->index);
 700                                 }
 701                               bb->aux = best_edge->dest;
 702                               bbd[best_edge->dest->index].in_trace =
 703                                                              (*n_traces) - 1;
 704                               bb = rotate_loop (best_edge, trace, *n_traces);
 705                             }
 706                         }
 707                       else
 708                         {
 709                           /* The loop has less than 4 iterations.  */
 710
 711                           if (single_succ_p (bb)
 712                               && copy_bb_p (best_edge->dest,
 713                                             optimize_edge_for_speed_p
 714                                             (best_edge)))
 715                             {
 716                               bb = copy_bb (best_edge->dest, best_edge, bb,
 717                                             *n_traces);
 718                               trace->length++;
 719                             }
 720                         }
 721                     }
 722
 723                   /* Terminate the trace.  */
 724                   break;
 725                 }
 726               else
 727                 {
 728                   /* Check for a situation
 729
 730                     A
 731                    /|
 732                   B |
 733                    \|
 734                     C
 735
 736                   where
 737                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 738                     >= EDGE_FREQUENCY (AC).
 739                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 740                   Best ordering is then A B C.
 741
 742                   When optimizing for size, A B C is always the best order.
 743
 744                   This situation is created for example by:
 745
 746                   if (A) B;
 747                   C;
 748
 749                   */
 750
 751                   FOR_EACH_EDGE (e, ei, bb->succs)
 752                     if (e != best_edge
 753                         && (e->flags & EDGE_CAN_FALLTHRU)
 754                         && !(e->flags & EDGE_COMPLEX)
 755                         && !bb_visited_trace (e->dest)
 756                         && single_pred_p (e->dest)
 757                         && !(e->flags & EDGE_CROSSING)
 758                         && single_succ_p (e->dest)
 759                         && (single_succ_edge (e->dest)->flags
 760                             & EDGE_CAN_FALLTHRU)
 761                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 762                         && single_succ (e->dest) == best_edge->dest
 763                         && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
 764                             || for_size))
 765                       {
 766                         best_edge = e;
 767                         if (dump_file)
 768                           fprintf (dump_file, "Selecting BB %d\n",
 769                                    best_edge->dest->index);
 770                         break;
 771                       }
 772
 773                   bb->aux = best_edge->dest;
 774                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 775                   bb = best_edge->dest;
 776                 }
 777             }
 778         }
 779       while (best_edge);
 780       trace->last = bb;
 781       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 782       if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
 783         {
 784           bbd[trace->last->index].end_of_trace = *n_traces - 1;
 785           /* Update the cached maximum frequency for interesting predecessor
 786              edges for successors of the new trace end.  */
 787           FOR_EACH_EDGE (e, ei, trace->last->succs)
 788             if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
 789               bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
 790         }
 791
 792       /* The trace is terminated so we have to recount the keys in heap
 793          (some block can have a lower key because now one of its predecessors
 794          is an end of the trace).  */
 795       FOR_EACH_EDGE (e, ei, bb->succs)
 796         {
 797           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 798               || bb_visited_trace (e->dest))
 799             continue;
 800
 801           if (bbd[e->dest->index].heap)
 802             {
 803               key = bb_to_key (e->dest);
 804               if (key != bbd[e->dest->index].node->get_key ())
 805                 {
 806                   if (dump_file)
 807                     {
 808                       fprintf (dump_file,
 809                                "Changing key for bb %d from %ld to %ld.\n",
 810                                e->dest->index,
 811                                (long) bbd[e->dest->index].node->get_key (), key);
 812                     }
 813                   bbd[e->dest->index].heap->replace_key
 814                     (bbd[e->dest->index].node, key);
 815                 }
 816             }
 817         }
 818     }
 819
 820   delete (*heap);
 821
 822   /* "Return" the new heap.  */
 823   *heap = new_heap;
 824 }
 825
 826 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 827    it to trace after BB, mark OLD_BB visited and update pass' data structures
 828    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 829
 830 static basic_block
 831 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 832 {
 833   basic_block new_bb;
 834
 835   new_bb = duplicate_block (old_bb, e, bb);
 836   BB_COPY_PARTITION (new_bb, old_bb);
 837
 838   gcc_assert (e->dest == new_bb);
 839
 840   if (dump_file)
 841     fprintf (dump_file,
 842              "Duplicated bb %d (created bb %d)\n",
 843              old_bb->index, new_bb->index);
 844
 845   if (new_bb->index >= array_size
 846       || last_basic_block_for_fn (cfun) > array_size)
 847     {
 848       int i;
 849       int new_size;
 850
 851       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 852       new_size = GET_ARRAY_SIZE (new_size);
 853       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 854       for (i = array_size; i < new_size; i++)
 855         {
 856           bbd[i].start_of_trace = -1;
 857           bbd[i].end_of_trace = -1;
 858           bbd[i].in_trace = -1;
 859           bbd[i].visited = 0;
 860           bbd[i].priority = -1;
 861           bbd[i].heap = NULL;
 862           bbd[i].node = NULL;
 863         }
 864       array_size = new_size;
 865
 866       if (dump_file)
 867         {
 868           fprintf (dump_file,
 869                    "Growing the dynamic array to %d elements.\n",
 870                    array_size);
 871         }
 872     }
 873
 874   gcc_assert (!bb_visited_trace (e->dest));
 875   mark_bb_visited (new_bb, trace);
 876   new_bb->aux = bb->aux;
 877   bb->aux = new_bb;
 878
 879   bbd[new_bb->index].in_trace = trace;
 880
 881   return new_bb;
 882 }
 883
 884 /* Compute and return the key (for the heap) of the basic block BB.  */
 885
 886 static long
 887 bb_to_key (basic_block bb)
 888 {
 889   edge e;
 890   edge_iterator ei;
 891
 892   /* Use index as key to align with its original order.  */
 893   if (optimize_function_for_size_p (cfun))
 894     return bb->index;
 895
 896   /* Do not start in probably never executed blocks.  */
 897
 898   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 899       || probably_never_executed_bb_p (cfun, bb))
 900     return BB_FREQ_MAX;
 901
 902   /* Prefer blocks whose predecessor is an end of some trace
 903      or whose predecessor edge is EDGE_DFS_BACK.  */
 904   int priority = bbd[bb->index].priority;
 905   if (priority == -1)
 906     {
 907       priority = 0;
 908       FOR_EACH_EDGE (e, ei, bb->preds)
 909         {
 910           if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 911                && bbd[e->src->index].end_of_trace >= 0)
 912               || (e->flags & EDGE_DFS_BACK))
 913             {
 914               int edge_freq = EDGE_FREQUENCY (e);
 915
 916               if (edge_freq > priority)
 917                 priority = edge_freq;
 918             }
 919         }
 920       bbd[bb->index].priority = priority;
 921     }
 922
 923   if (priority)
 924     /* The block with priority should have significantly lower key.  */
 925     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 926
 927   return -bb->frequency;
 928 }
 929
 930 /* Return true when the edge E from basic block BB is better than the temporary
 931    best edge (details are in function).  The probability of edge E is PROB. The
 932    frequency of the successor is FREQ.  The current best probability is
 933    BEST_PROB, the best frequency is BEST_FREQ.
 934    The edge is considered to be equivalent when PROB does not differ much from
 935    BEST_PROB; similarly for frequency.  */
 936
 937 static bool
 938 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
 939                int best_prob, int best_freq, const_edge cur_best_edge)
 940 {
 941   bool is_better_edge;
 942
 943   /* The BEST_* values do not have to be best, but can be a bit smaller than
 944      maximum values.  */
 945   int diff_prob = best_prob / 10;
 946   int diff_freq = best_freq / 10;
 947
 948   /* The smaller one is better to keep the original order.  */
 949   if (optimize_function_for_size_p (cfun))
 950     return !cur_best_edge
 951            || cur_best_edge->dest->index > e->dest->index;
 952
 953   if (prob > best_prob + diff_prob)
 954     /* The edge has higher probability than the temporary best edge.  */
 955     is_better_edge = true;
 956   else if (prob < best_prob - diff_prob)
 957     /* The edge has lower probability than the temporary best edge.  */
 958     is_better_edge = false;
 959   else if (freq < best_freq - diff_freq)
 960     /* The edge and the temporary best edge  have almost equivalent
 961        probabilities.  The higher frequency of a successor now means
 962        that there is another edge going into that successor.
 963        This successor has lower frequency so it is better.  */
 964     is_better_edge = true;
 965   else if (freq > best_freq + diff_freq)
 966     /* This successor has higher frequency so it is worse.  */
 967     is_better_edge = false;
 968   else if (e->dest->prev_bb == bb)
 969     /* The edges have equivalent probabilities and the successors
 970        have equivalent frequencies.  Select the previous successor.  */
 971     is_better_edge = true;
 972   else
 973     is_better_edge = false;
 974
 975   /* If we are doing hot/cold partitioning, make sure that we always favor
 976      non-crossing edges over crossing edges.  */
 977
 978   if (!is_better_edge
 979       && flag_reorder_blocks_and_partition
 980       && cur_best_edge
 981       && (cur_best_edge->flags & EDGE_CROSSING)
 982       && !(e->flags & EDGE_CROSSING))
 983     is_better_edge = true;
 984
 985   return is_better_edge;
 986 }
 987
 988 /* Return true when the edge E is better than the temporary best edge
 989    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
 990    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
 991    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
 992    TRACES record the information about traces.
 993    When optimizing for size, the edge with smaller index is better.
 994    When optimizing for speed, the edge with bigger probability or longer trace
 995    is better.  */
 996
 997 static bool
 998 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
 999                        const_edge cur_best_edge, struct trace *traces)
1000 {
1001   int e_index;
1002   int b_index;
1003   bool is_better_edge;
1004
1005   if (!cur_best_edge)
1006     return true;
1007
1008   if (optimize_function_for_size_p (cfun))
1009     {
1010       e_index = src_index_p ? e->src->index : e->dest->index;
1011       b_index = src_index_p ? cur_best_edge->src->index
1012                               : cur_best_edge->dest->index;
1013       /* The smaller one is better to keep the original order.  */
1014       return b_index > e_index;
1015     }
1016
1017   if (src_index_p)
1018     {
1019       e_index = e->src->index;
1020
1021       if (e->probability > cur_best_edge->probability)
1022         /* The edge has higher probability than the temporary best edge.  */
1023         is_better_edge = true;
1024       else if (e->probability < cur_best_edge->probability)
1025         /* The edge has lower probability than the temporary best edge.  */
1026         is_better_edge = false;
1027       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1028         /* The edge and the temporary best edge have equivalent probabilities.
1029            The edge with longer trace is better.  */
1030         is_better_edge = true;
1031       else
1032         is_better_edge = false;
1033     }
1034   else
1035     {
1036       e_index = e->dest->index;
1037
1038       if (e->probability > cur_best_edge->probability)
1039         /* The edge has higher probability than the temporary best edge.  */
1040         is_better_edge = true;
1041       else if (e->probability < cur_best_edge->probability)
1042         /* The edge has lower probability than the temporary best edge.  */
1043         is_better_edge = false;
1044       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1045         /* The edge and the temporary best edge have equivalent probabilities.
1046            The edge with longer trace is better.  */
1047         is_better_edge = true;
1048       else
1049         is_better_edge = false;
1050     }
1051
1052   return is_better_edge;
1053 }
1054
1055 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1056
1057 static void
1058 connect_traces (int n_traces, struct trace *traces)
1059 {
1060   int i;
1061   bool *connected;
1062   bool two_passes;
1063   int last_trace;
1064   int current_pass;
1065   int current_partition;
1066   int freq_threshold;
1067   gcov_type count_threshold;
1068   bool for_size = optimize_function_for_size_p (cfun);
1069
1070   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1071   if (max_entry_count < INT_MAX / 1000)
1072     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1073   else
1074     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1075
1076   connected = XCNEWVEC (bool, n_traces);
1077   last_trace = -1;
1078   current_pass = 1;
1079   current_partition = BB_PARTITION (traces[0].first);
1080   two_passes = false;
1081
1082   if (crtl->has_bb_partition)
1083     for (i = 0; i < n_traces && !two_passes; i++)
1084       if (BB_PARTITION (traces[0].first)
1085           != BB_PARTITION (traces[i].first))
1086         two_passes = true;
1087
1088   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1089     {
1090       int t = i;
1091       int t2;
1092       edge e, best;
1093       int best_len;
1094
1095       if (i >= n_traces)
1096         {
1097           gcc_assert (two_passes && current_pass == 1);
1098           i = 0;
1099           t = i;
1100           current_pass = 2;
1101           if (current_partition == BB_HOT_PARTITION)
1102             current_partition = BB_COLD_PARTITION;
1103           else
1104             current_partition = BB_HOT_PARTITION;
1105         }
1106
1107       if (connected[t])
1108         continue;
1109
1110       if (two_passes
1111           && BB_PARTITION (traces[t].first) != current_partition)
1112         continue;
1113
1114       connected[t] = true;
1115
1116       /* Find the predecessor traces.  */
1117       for (t2 = t; t2 > 0;)
1118         {
1119           edge_iterator ei;
1120           best = NULL;
1121           best_len = 0;
1122           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1123             {
1124               int si = e->src->index;
1125
1126               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1127                   && (e->flags & EDGE_CAN_FALLTHRU)
1128                   && !(e->flags & EDGE_COMPLEX)
1129                   && bbd[si].end_of_trace >= 0
1130                   && !connected[bbd[si].end_of_trace]
1131                   && (BB_PARTITION (e->src) == current_partition)
1132                   && connect_better_edge_p (e, true, best_len, best, traces))
1133                 {
1134                   best = e;
1135                   best_len = traces[bbd[si].end_of_trace].length;
1136                 }
1137             }
1138           if (best)
1139             {
1140               best->src->aux = best->dest;
1141               t2 = bbd[best->src->index].end_of_trace;
1142               connected[t2] = true;
1143
1144               if (dump_file)
1145                 {
1146                   fprintf (dump_file, "Connection: %d %d\n",
1147                            best->src->index, best->dest->index);
1148                 }
1149             }
1150           else
1151             break;
1152         }
1153
1154       if (last_trace >= 0)
1155         traces[last_trace].last->aux = traces[t2].first;
1156       last_trace = t;
1157
1158       /* Find the successor traces.  */
1159       while (1)
1160         {
1161           /* Find the continuation of the chain.  */
1162           edge_iterator ei;
1163           best = NULL;
1164           best_len = 0;
1165           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1166             {
1167               int di = e->dest->index;
1168
1169               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1170                   && (e->flags & EDGE_CAN_FALLTHRU)
1171                   && !(e->flags & EDGE_COMPLEX)
1172                   && bbd[di].start_of_trace >= 0
1173                   && !connected[bbd[di].start_of_trace]
1174                   && (BB_PARTITION (e->dest) == current_partition)
1175                   && connect_better_edge_p (e, false, best_len, best, traces))
1176                 {
1177                   best = e;
1178                   best_len = traces[bbd[di].start_of_trace].length;
1179                 }
1180             }
1181
1182           if (for_size)
1183             {
1184               if (!best)
1185                 /* Stop finding the successor traces.  */
1186                 break;
1187
1188               /* It is OK to connect block n with block n + 1 or a block
1189                  before n.  For others, only connect to the loop header.  */
1190               if (best->dest->index > (traces[t].last->index + 1))
1191                 {
1192                   int count = EDGE_COUNT (best->dest->preds);
1193
1194                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1195                     if (e->flags & EDGE_DFS_BACK)
1196                       count--;
1197
1198                   /* If dest has multiple predecessors, skip it.  We expect
1199                      that one predecessor with smaller index connects with it
1200                      later.  */
1201                   if (count != 1)
1202                     break;
1203                 }
1204
1205               /* Only connect Trace n with Trace n + 1.  It is conservative
1206                  to keep the order as close as possible to the original order.
1207                  It also helps to reduce long jumps.  */
1208               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1209                 break;
1210
1211               if (dump_file)
1212                 fprintf (dump_file, "Connection: %d %d\n",
1213                          best->src->index, best->dest->index);
1214
1215               t = bbd[best->dest->index].start_of_trace;
1216               traces[last_trace].last->aux = traces[t].first;
1217               connected[t] = true;
1218               last_trace = t;
1219             }
1220           else if (best)
1221             {
1222               if (dump_file)
1223                 {
1224                   fprintf (dump_file, "Connection: %d %d\n",
1225                            best->src->index, best->dest->index);
1226                 }
1227               t = bbd[best->dest->index].start_of_trace;
1228               traces[last_trace].last->aux = traces[t].first;
1229               connected[t] = true;
1230               last_trace = t;
1231             }
1232           else
1233             {
1234               /* Try to connect the traces by duplication of 1 block.  */
1235               edge e2;
1236               basic_block next_bb = NULL;
1237               bool try_copy = false;
1238
1239               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1240                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1241                     && (e->flags & EDGE_CAN_FALLTHRU)
1242                     && !(e->flags & EDGE_COMPLEX)
1243                     && (!best || e->probability > best->probability))
1244                   {
1245                     edge_iterator ei;
1246                     edge best2 = NULL;
1247                     int best2_len = 0;
1248
1249                     /* If the destination is a start of a trace which is only
1250                        one block long, then no need to search the successor
1251                        blocks of the trace.  Accept it.  */
1252                     if (bbd[e->dest->index].start_of_trace >= 0
1253                         && traces[bbd[e->dest->index].start_of_trace].length
1254                            == 1)
1255                       {
1256                         best = e;
1257                         try_copy = true;
1258                         continue;
1259                       }
1260
1261                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1262                       {
1263                         int di = e2->dest->index;
1264
1265                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1266                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1267                                 && !(e2->flags & EDGE_COMPLEX)
1268                                 && bbd[di].start_of_trace >= 0
1269                                 && !connected[bbd[di].start_of_trace]
1270                                 && BB_PARTITION (e2->dest) == current_partition
1271                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1272                                 && e2->count >= count_threshold
1273                                 && (!best2
1274                                     || e2->probability > best2->probability
1275                                     || (e2->probability == best2->probability
1276                                         && traces[bbd[di].start_of_trace].length
1277                                            > best2_len))))
1278                           {
1279                             best = e;
1280                             best2 = e2;
1281                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1282                               best2_len = traces[bbd[di].start_of_trace].length;
1283                             else
1284                               best2_len = INT_MAX;
1285                             next_bb = e2->dest;
1286                             try_copy = true;
1287                           }
1288                       }
1289                   }
1290
1291               if (crtl->has_bb_partition)
1292                 try_copy = false;
1293
1294               /* Copy tiny blocks always; copy larger blocks only when the
1295                  edge is traversed frequently enough.  */
1296               if (try_copy
1297                   && copy_bb_p (best->dest,
1298                                 optimize_edge_for_speed_p (best)
1299                                 && EDGE_FREQUENCY (best) >= freq_threshold
1300                                 && best->count >= count_threshold))
1301                 {
1302                   basic_block new_bb;
1303
1304                   if (dump_file)
1305                     {
1306                       fprintf (dump_file, "Connection: %d %d ",
1307                                traces[t].last->index, best->dest->index);
1308                       if (!next_bb)
1309                         fputc ('\n', dump_file);
1310                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1311                         fprintf (dump_file, "exit\n");
1312                       else
1313                         fprintf (dump_file, "%d\n", next_bb->index);
1314                     }
1315
1316                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1317                   traces[t].last = new_bb;
1318                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1319                     {
1320                       t = bbd[next_bb->index].start_of_trace;
1321                       traces[last_trace].last->aux = traces[t].first;
1322                       connected[t] = true;
1323                       last_trace = t;
1324                     }
1325                   else
1326                     break;      /* Stop finding the successor traces.  */
1327                 }
1328               else
1329                 break;  /* Stop finding the successor traces.  */
1330             }
1331         }
1332     }
1333
1334   if (dump_file)
1335     {
1336       basic_block bb;
1337
1338       fprintf (dump_file, "Final order:\n");
1339       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1340         fprintf (dump_file, "%d ", bb->index);
1341       fprintf (dump_file, "\n");
1342       fflush (dump_file);
1343     }
1344
1345   FREE (connected);
1346 }
1347
1348 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1349    when code size is allowed to grow by duplication.  */
1350
1351 static bool
1352 copy_bb_p (const_basic_block bb, int code_may_grow)
1353 {
1354   int size = 0;
1355   int max_size = uncond_jump_length;
1356   rtx_insn *insn;
1357
1358   if (!bb->frequency)
1359     return false;
1360   if (EDGE_COUNT (bb->preds) < 2)
1361     return false;
1362   if (!can_duplicate_block_p (bb))
1363     return false;
1364
1365   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1366   if (EDGE_COUNT (bb->succs) > 8)
1367     return false;
1368
1369   if (code_may_grow && optimize_bb_for_speed_p (bb))
1370     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1371
1372   FOR_BB_INSNS (bb, insn)
1373     {
1374       if (INSN_P (insn))
1375         size += get_attr_min_length (insn);
1376     }
1377
1378   if (size <= max_size)
1379     return true;
1380
1381   if (dump_file)
1382     {
1383       fprintf (dump_file,
1384                "Block %d can't be copied because its size = %d.\n",
1385                bb->index, size);
1386     }
1387
1388   return false;
1389 }
1390
1391 /* Return the length of unconditional jump instruction.  */
1392
1393 int
1394 get_uncond_jump_length (void)
1395 {
1396   int length;
1397
1398   start_sequence ();
1399   rtx_code_label *label = emit_label (gen_label_rtx ());
1400   rtx_insn *jump = emit_jump_insn (targetm.gen_jump (label));
1401   length = get_attr_min_length (jump);
1402   end_sequence ();
1403
1404   return length;
1405 }
1406
1407 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1408    Duplicate the landing pad and split the edges so that no EH edge
1409    crosses partitions.  */
1410
1411 static void
1412 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1413 {
1414   eh_landing_pad new_lp;
1415   basic_block new_bb, last_bb, post_bb;
1416   rtx_insn *jump;
1417   unsigned new_partition;
1418   edge_iterator ei;
1419   edge e;
1420
1421   /* Generate the new landing-pad structure.  */
1422   new_lp = gen_eh_landing_pad (old_lp->region);
1423   new_lp->post_landing_pad = old_lp->post_landing_pad;
1424   new_lp->landing_pad = gen_label_rtx ();
1425   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1426
1427   /* Put appropriate instructions in new bb.  */
1428   rtx_code_label *new_label = emit_label (new_lp->landing_pad);
1429
1430   expand_dw2_landing_pad_for_region (old_lp->region);
1431
1432   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1433   post_bb = single_succ (post_bb);
1434   rtx_code_label *post_label = block_label (post_bb);
1435   jump = emit_jump_insn (targetm.gen_jump (post_label));
1436   JUMP_LABEL (jump) = post_label;
1437
1438   /* Create new basic block to be dest for lp.  */
1439   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1440   new_bb = create_basic_block (new_label, jump, last_bb);
1441   new_bb->aux = last_bb->aux;
1442   last_bb->aux = new_bb;
1443
1444   emit_barrier_after_bb (new_bb);
1445
1446   make_edge (new_bb, post_bb, 0);
1447
1448   /* Make sure new bb is in the other partition.  */
1449   new_partition = BB_PARTITION (old_bb);
1450   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1451   BB_SET_PARTITION (new_bb, new_partition);
1452
1453   /* Fix up the edges.  */
1454   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1455     if (BB_PARTITION (e->src) == new_partition)
1456       {
1457         rtx_insn *insn = BB_END (e->src);
1458         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1459
1460         gcc_assert (note != NULL);
1461         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1462         XEXP (note, 0) = GEN_INT (new_lp->index);
1463
1464         /* Adjust the edge to the new destination.  */
1465         redirect_edge_succ (e, new_bb);
1466       }
1467     else
1468       ei_next (&ei);
1469 }
1470
1471
1472 /* Ensure that all hot bbs are included in a hot path through the
1473    procedure. This is done by calling this function twice, once
1474    with WALK_UP true (to look for paths from the entry to hot bbs) and
1475    once with WALK_UP false (to look for paths from hot bbs to the exit).
1476    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1477    to BBS_IN_HOT_PARTITION.  */
1478
1479 static unsigned int
1480 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1481                     vec<basic_block> *bbs_in_hot_partition)
1482 {
1483   /* Callers check this.  */
1484   gcc_checking_assert (cold_bb_count);
1485
1486   /* Keep examining hot bbs while we still have some left to check
1487      and there are remaining cold bbs.  */
1488   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1489   while (! hot_bbs_to_check.is_empty ()
1490          && cold_bb_count)
1491     {
1492       basic_block bb = hot_bbs_to_check.pop ();
1493       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1494       edge e;
1495       edge_iterator ei;
1496       int highest_probability = 0;
1497       int highest_freq = 0;
1498       gcov_type highest_count = 0;
1499       bool found = false;
1500
1501       /* Walk the preds/succs and check if there is at least one already
1502          marked hot. Keep track of the most frequent pred/succ so that we
1503          can mark it hot if we don't find one.  */
1504       FOR_EACH_EDGE (e, ei, edges)
1505         {
1506           basic_block reach_bb = walk_up ? e->src : e->dest;
1507
1508           if (e->flags & EDGE_DFS_BACK)
1509             continue;
1510
1511           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1512           {
1513             found = true;
1514             break;
1515           }
1516           /* The following loop will look for the hottest edge via
1517              the edge count, if it is non-zero, then fallback to the edge
1518              frequency and finally the edge probability.  */
1519           if (e->count > highest_count)
1520             highest_count = e->count;
1521           int edge_freq = EDGE_FREQUENCY (e);
1522           if (edge_freq > highest_freq)
1523             highest_freq = edge_freq;
1524           if (e->probability > highest_probability)
1525             highest_probability = e->probability;
1526         }
1527
1528       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1529          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1530          then the most frequent pred (or succ) needs to be adjusted.  In the
1531          case where multiple preds/succs have the same frequency (e.g. a
1532          50-50 branch), then both will be adjusted.  */
1533       if (found)
1534         continue;
1535
1536       FOR_EACH_EDGE (e, ei, edges)
1537         {
1538           if (e->flags & EDGE_DFS_BACK)
1539             continue;
1540           /* Select the hottest edge using the edge count, if it is non-zero,
1541              then fallback to the edge frequency and finally the edge
1542              probability.  */
1543           if (highest_count)
1544             {
1545               if (e->count < highest_count)
1546                 continue;
1547             }
1548           else if (highest_freq)
1549             {
1550               if (EDGE_FREQUENCY (e) < highest_freq)
1551                 continue;
1552             }
1553           else if (e->probability < highest_probability)
1554             continue;
1555
1556           basic_block reach_bb = walk_up ? e->src : e->dest;
1557
1558           /* We have a hot bb with an immediate dominator that is cold.
1559              The dominator needs to be re-marked hot.  */
1560           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1561           cold_bb_count--;
1562
1563           /* Now we need to examine newly-hot reach_bb to see if it is also
1564              dominated by a cold bb.  */
1565           bbs_in_hot_partition->safe_push (reach_bb);
1566           hot_bbs_to_check.safe_push (reach_bb);
1567         }
1568     }
1569
1570   return cold_bb_count;
1571 }
1572
1573
1574 /* Find the basic blocks that are rarely executed and need to be moved to
1575    a separate section of the .o file (to cut down on paging and improve
1576    cache locality).  Return a vector of all edges that cross.  */
1577
1578 static vec<edge>
1579 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1580 {
1581   vec<edge> crossing_edges = vNULL;
1582   basic_block bb;
1583   edge e;
1584   edge_iterator ei;
1585   unsigned int cold_bb_count = 0;
1586   auto_vec<basic_block> bbs_in_hot_partition;
1587
1588   /* Mark which partition (hot/cold) each basic block belongs in.  */
1589   FOR_EACH_BB_FN (bb, cfun)
1590     {
1591       bool cold_bb = false;
1592
1593       if (probably_never_executed_bb_p (cfun, bb))
1594         {
1595           /* Handle profile insanities created by upstream optimizations
1596              by also checking the incoming edge weights. If there is a non-cold
1597              incoming edge, conservatively prevent this block from being split
1598              into the cold section.  */
1599           cold_bb = true;
1600           FOR_EACH_EDGE (e, ei, bb->preds)
1601             if (!probably_never_executed_edge_p (cfun, e))
1602               {
1603                 cold_bb = false;
1604                 break;
1605               }
1606         }
1607       if (cold_bb)
1608         {
1609           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1610           cold_bb_count++;
1611         }
1612       else
1613         {
1614           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1615           bbs_in_hot_partition.safe_push (bb);
1616         }
1617     }
1618
1619   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1620      Several different possibilities may include cold bbs along all paths
1621      to/from a hot bb. One is that there are edge weight insanities
1622      due to optimization phases that do not properly update basic block profile
1623      counts. The second is that the entry of the function may not be hot, because
1624      it is entered fewer times than the number of profile training runs, but there
1625      is a loop inside the function that causes blocks within the function to be
1626      above the threshold for hotness. This is fixed by walking up from hot bbs
1627      to the entry block, and then down from hot bbs to the exit, performing
1628      partitioning fixups as necessary.  */
1629   if (cold_bb_count)
1630     {
1631       mark_dfs_back_edges ();
1632       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1633                                           &bbs_in_hot_partition);
1634       if (cold_bb_count)
1635         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1636     }
1637
1638   /* The format of .gcc_except_table does not allow landing pads to
1639      be in a different partition as the throw.  Fix this by either
1640      moving or duplicating the landing pads.  */
1641   if (cfun->eh->lp_array)
1642     {
1643       unsigned i;
1644       eh_landing_pad lp;
1645
1646       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1647         {
1648           bool all_same, all_diff;
1649
1650           if (lp == NULL
1651               || lp->landing_pad == NULL_RTX
1652               || !LABEL_P (lp->landing_pad))
1653             continue;
1654
1655           all_same = all_diff = true;
1656           bb = BLOCK_FOR_INSN (lp->landing_pad);
1657           FOR_EACH_EDGE (e, ei, bb->preds)
1658             {
1659               gcc_assert (e->flags & EDGE_EH);
1660               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1661                 all_diff = false;
1662               else
1663                 all_same = false;
1664             }
1665
1666           if (all_same)
1667             ;
1668           else if (all_diff)
1669             {
1670               int which = BB_PARTITION (bb);
1671               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1672               BB_SET_PARTITION (bb, which);
1673             }
1674           else
1675             fix_up_crossing_landing_pad (lp, bb);
1676         }
1677     }
1678
1679   /* Mark every edge that crosses between sections.  */
1680
1681   FOR_EACH_BB_FN (bb, cfun)
1682     FOR_EACH_EDGE (e, ei, bb->succs)
1683       {
1684         unsigned int flags = e->flags;
1685
1686         /* We should never have EDGE_CROSSING set yet.  */
1687         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1688
1689         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1690             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1691             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1692           {
1693             crossing_edges.safe_push (e);
1694             flags |= EDGE_CROSSING;
1695           }
1696
1697         /* Now that we've split eh edges as appropriate, allow landing pads
1698            to be merged with the post-landing pads.  */
1699         flags &= ~EDGE_PRESERVE;
1700
1701         e->flags = flags;
1702       }
1703
1704   return crossing_edges;
1705 }
1706
1707 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1708
1709 static void
1710 set_edge_can_fallthru_flag (void)
1711 {
1712   basic_block bb;
1713
1714   FOR_EACH_BB_FN (bb, cfun)
1715     {
1716       edge e;
1717       edge_iterator ei;
1718
1719       FOR_EACH_EDGE (e, ei, bb->succs)
1720         {
1721           e->flags &= ~EDGE_CAN_FALLTHRU;
1722
1723           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1724           if (e->flags & EDGE_FALLTHRU)
1725             e->flags |= EDGE_CAN_FALLTHRU;
1726         }
1727
1728       /* If the BB ends with an invertible condjump all (2) edges are
1729          CAN_FALLTHRU edges.  */
1730       if (EDGE_COUNT (bb->succs) != 2)
1731         continue;
1732       if (!any_condjump_p (BB_END (bb)))
1733         continue;
1734
1735       rtx_jump_insn *bb_end_jump = as_a <rtx_jump_insn *> (BB_END (bb));
1736       if (!invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0))
1737         continue;
1738       invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0);
1739       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1740       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1741     }
1742 }
1743
1744 /* If any destination of a crossing edge does not have a label, add label;
1745    Convert any easy fall-through crossing edges to unconditional jumps.  */
1746
1747 static void
1748 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1749 {
1750   size_t i;
1751   edge e;
1752
1753   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1754     {
1755       basic_block src = e->src;
1756       basic_block dest = e->dest;
1757       rtx_jump_insn *new_jump;
1758
1759       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1760         continue;
1761
1762       /* Make sure dest has a label.  */
1763       rtx_code_label *label = block_label (dest);
1764
1765       /* Nothing to do for non-fallthru edges.  */
1766       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1767         continue;
1768       if ((e->flags & EDGE_FALLTHRU) == 0)
1769         continue;
1770
1771       /* If the block does not end with a control flow insn, then we
1772          can trivially add a jump to the end to fixup the crossing.
1773          Otherwise the jump will have to go in a new bb, which will
1774          be handled by fix_up_fall_thru_edges function.  */
1775       if (control_flow_insn_p (BB_END (src)))
1776         continue;
1777
1778       /* Make sure there's only one successor.  */
1779       gcc_assert (single_succ_p (src));
1780
1781       new_jump = emit_jump_insn_after (targetm.gen_jump (label), BB_END (src));
1782       BB_END (src) = new_jump;
1783       JUMP_LABEL (new_jump) = label;
1784       LABEL_NUSES (label) += 1;
1785
1786       emit_barrier_after_bb (src);
1787
1788       /* Mark edge as non-fallthru.  */
1789       e->flags &= ~EDGE_FALLTHRU;
1790     }
1791 }
1792
1793 /* Find any bb's where the fall-through edge is a crossing edge (note that
1794    these bb's must also contain a conditional jump or end with a call
1795    instruction; we've already dealt with fall-through edges for blocks
1796    that didn't have a conditional jump or didn't end with call instruction
1797    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1798    edge to non-crossing edge by inserting a new bb to fall-through into.
1799    The new bb will contain an unconditional jump (crossing edge) to the
1800    original fall through destination.  */
1801
1802 static void
1803 fix_up_fall_thru_edges (void)
1804 {
1805   basic_block cur_bb;
1806   basic_block new_bb;
1807   edge succ1;
1808   edge succ2;
1809   edge fall_thru;
1810   edge cond_jump = NULL;
1811   bool cond_jump_crosses;
1812   int invert_worked;
1813   rtx_insn *old_jump;
1814   rtx_code_label *fall_thru_label;
1815
1816   FOR_EACH_BB_FN (cur_bb, cfun)
1817     {
1818       fall_thru = NULL;
1819       if (EDGE_COUNT (cur_bb->succs) > 0)
1820         succ1 = EDGE_SUCC (cur_bb, 0);
1821       else
1822         succ1 = NULL;
1823
1824       if (EDGE_COUNT (cur_bb->succs) > 1)
1825         succ2 = EDGE_SUCC (cur_bb, 1);
1826       else
1827         succ2 = NULL;
1828
1829       /* Find the fall-through edge.  */
1830
1831       if (succ1
1832           && (succ1->flags & EDGE_FALLTHRU))
1833         {
1834           fall_thru = succ1;
1835           cond_jump = succ2;
1836         }
1837       else if (succ2
1838                && (succ2->flags & EDGE_FALLTHRU))
1839         {
1840           fall_thru = succ2;
1841           cond_jump = succ1;
1842         }
1843       else if (succ1
1844                && (block_ends_with_call_p (cur_bb)
1845                    || can_throw_internal (BB_END (cur_bb))))
1846         {
1847           edge e;
1848           edge_iterator ei;
1849
1850           FOR_EACH_EDGE (e, ei, cur_bb->succs)
1851             if (e->flags & EDGE_FALLTHRU)
1852               {
1853                 fall_thru = e;
1854                 break;
1855               }
1856         }
1857
1858       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1859         {
1860           /* Check to see if the fall-thru edge is a crossing edge.  */
1861
1862           if (fall_thru->flags & EDGE_CROSSING)
1863             {
1864               /* The fall_thru edge crosses; now check the cond jump edge, if
1865                  it exists.  */
1866
1867               cond_jump_crosses = true;
1868               invert_worked  = 0;
1869               old_jump = BB_END (cur_bb);
1870
1871               /* Find the jump instruction, if there is one.  */
1872
1873               if (cond_jump)
1874                 {
1875                   if (!(cond_jump->flags & EDGE_CROSSING))
1876                     cond_jump_crosses = false;
1877
1878                   /* We know the fall-thru edge crosses; if the cond
1879                      jump edge does NOT cross, and its destination is the
1880                      next block in the bb order, invert the jump
1881                      (i.e. fix it so the fall through does not cross and
1882                      the cond jump does).  */
1883
1884                   if (!cond_jump_crosses)
1885                     {
1886                       /* Find label in fall_thru block. We've already added
1887                          any missing labels, so there must be one.  */
1888
1889                       fall_thru_label = block_label (fall_thru->dest);
1890
1891                       if (old_jump && fall_thru_label)
1892                         {
1893                           rtx_jump_insn *old_jump_insn =
1894                                 dyn_cast <rtx_jump_insn *> (old_jump);
1895                           if (old_jump_insn)
1896                             invert_worked = invert_jump (old_jump_insn,
1897                                                          fall_thru_label, 0);
1898                         }
1899
1900                       if (invert_worked)
1901                         {
1902                           fall_thru->flags &= ~EDGE_FALLTHRU;
1903                           cond_jump->flags |= EDGE_FALLTHRU;
1904                           update_br_prob_note (cur_bb);
1905                           std::swap (fall_thru, cond_jump);
1906                           cond_jump->flags |= EDGE_CROSSING;
1907                           fall_thru->flags &= ~EDGE_CROSSING;
1908                         }
1909                     }
1910                 }
1911
1912               if (cond_jump_crosses || !invert_worked)
1913                 {
1914                   /* This is the case where both edges out of the basic
1915                      block are crossing edges. Here we will fix up the
1916                      fall through edge. The jump edge will be taken care
1917                      of later.  The EDGE_CROSSING flag of fall_thru edge
1918                      is unset before the call to force_nonfallthru
1919                      function because if a new basic-block is created
1920                      this edge remains in the current section boundary
1921                      while the edge between new_bb and the fall_thru->dest
1922                      becomes EDGE_CROSSING.  */
1923
1924                   fall_thru->flags &= ~EDGE_CROSSING;
1925                   new_bb = force_nonfallthru (fall_thru);
1926
1927                   if (new_bb)
1928                     {
1929                       new_bb->aux = cur_bb->aux;
1930                       cur_bb->aux = new_bb;
1931
1932                       /* This is done by force_nonfallthru_and_redirect.  */
1933                       gcc_assert (BB_PARTITION (new_bb)
1934                                   == BB_PARTITION (cur_bb));
1935
1936                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1937                     }
1938                   else
1939                     {
1940                       /* If a new basic-block was not created; restore
1941                          the EDGE_CROSSING flag.  */
1942                       fall_thru->flags |= EDGE_CROSSING;
1943                     }
1944
1945                   /* Add barrier after new jump */
1946                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1947                 }
1948             }
1949         }
1950     }
1951 }
1952
1953 /* This function checks the destination block of a "crossing jump" to
1954    see if it has any crossing predecessors that begin with a code label
1955    and end with an unconditional jump.  If so, it returns that predecessor
1956    block.  (This is to avoid creating lots of new basic blocks that all
1957    contain unconditional jumps to the same destination).  */
1958
1959 static basic_block
1960 find_jump_block (basic_block jump_dest)
1961 {
1962   basic_block source_bb = NULL;
1963   edge e;
1964   rtx_insn *insn;
1965   edge_iterator ei;
1966
1967   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1968     if (e->flags & EDGE_CROSSING)
1969       {
1970         basic_block src = e->src;
1971
1972         /* Check each predecessor to see if it has a label, and contains
1973            only one executable instruction, which is an unconditional jump.
1974            If so, we can use it.  */
1975
1976         if (LABEL_P (BB_HEAD (src)))
1977           for (insn = BB_HEAD (src);
1978                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1979                insn = NEXT_INSN (insn))
1980             {
1981               if (INSN_P (insn)
1982                   && insn == BB_END (src)
1983                   && JUMP_P (insn)
1984                   && !any_condjump_p (insn))
1985                 {
1986                   source_bb = src;
1987                   break;
1988                 }
1989             }
1990
1991         if (source_bb)
1992           break;
1993       }
1994
1995   return source_bb;
1996 }
1997
1998 /* Find all BB's with conditional jumps that are crossing edges;
1999    insert a new bb and make the conditional jump branch to the new
2000    bb instead (make the new bb same color so conditional branch won't
2001    be a 'crossing' edge).  Insert an unconditional jump from the
2002    new bb to the original destination of the conditional jump.  */
2003
2004 static void
2005 fix_crossing_conditional_branches (void)
2006 {
2007   basic_block cur_bb;
2008   basic_block new_bb;
2009   basic_block dest;
2010   edge succ1;
2011   edge succ2;
2012   edge crossing_edge;
2013   edge new_edge;
2014   rtx set_src;
2015   rtx old_label = NULL_RTX;
2016   rtx_code_label *new_label;
2017
2018   FOR_EACH_BB_FN (cur_bb, cfun)
2019     {
2020       crossing_edge = NULL;
2021       if (EDGE_COUNT (cur_bb->succs) > 0)
2022         succ1 = EDGE_SUCC (cur_bb, 0);
2023       else
2024         succ1 = NULL;
2025
2026       if (EDGE_COUNT (cur_bb->succs) > 1)
2027         succ2 = EDGE_SUCC (cur_bb, 1);
2028       else
2029         succ2 = NULL;
2030
2031       /* We already took care of fall-through edges, so only one successor
2032          can be a crossing edge.  */
2033
2034       if (succ1 && (succ1->flags & EDGE_CROSSING))
2035         crossing_edge = succ1;
2036       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2037         crossing_edge = succ2;
2038
2039       if (crossing_edge)
2040         {
2041           rtx_insn *old_jump = BB_END (cur_bb);
2042
2043           /* Check to make sure the jump instruction is a
2044              conditional jump.  */
2045
2046           set_src = NULL_RTX;
2047
2048           if (any_condjump_p (old_jump))
2049             {
2050               if (GET_CODE (PATTERN (old_jump)) == SET)
2051                 set_src = SET_SRC (PATTERN (old_jump));
2052               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2053                 {
2054                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2055                   if (GET_CODE (set_src) == SET)
2056                     set_src = SET_SRC (set_src);
2057                   else
2058                     set_src = NULL_RTX;
2059                 }
2060             }
2061
2062           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2063             {
2064               rtx_jump_insn *old_jump_insn =
2065                         as_a <rtx_jump_insn *> (old_jump);
2066
2067               if (GET_CODE (XEXP (set_src, 1)) == PC)
2068                 old_label = XEXP (set_src, 2);
2069               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2070                 old_label = XEXP (set_src, 1);
2071
2072               /* Check to see if new bb for jumping to that dest has
2073                  already been created; if so, use it; if not, create
2074                  a new one.  */
2075
2076               new_bb = find_jump_block (crossing_edge->dest);
2077
2078               if (new_bb)
2079                 new_label = block_label (new_bb);
2080               else
2081                 {
2082                   basic_block last_bb;
2083                   rtx_code_label *old_jump_target;
2084                   rtx_jump_insn *new_jump;
2085
2086                   /* Create new basic block to be dest for
2087                      conditional jump.  */
2088
2089                   /* Put appropriate instructions in new bb.  */
2090
2091                   new_label = gen_label_rtx ();
2092                   emit_label (new_label);
2093
2094                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2095                   old_jump_target = old_jump_insn->jump_target ();
2096                   new_jump = as_a <rtx_jump_insn *>
2097                     (emit_jump_insn (targetm.gen_jump (old_jump_target)));
2098                   new_jump->set_jump_target (old_jump_target);
2099
2100                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2101                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2102                   new_bb->aux = last_bb->aux;
2103                   last_bb->aux = new_bb;
2104
2105                   emit_barrier_after_bb (new_bb);
2106
2107                   /* Make sure new bb is in same partition as source
2108                      of conditional branch.  */
2109                   BB_COPY_PARTITION (new_bb, cur_bb);
2110                 }
2111
2112               /* Make old jump branch to new bb.  */
2113
2114               redirect_jump (old_jump_insn, new_label, 0);
2115
2116               /* Remove crossing_edge as predecessor of 'dest'.  */
2117
2118               dest = crossing_edge->dest;
2119
2120               redirect_edge_succ (crossing_edge, new_bb);
2121
2122               /* Make a new edge from new_bb to old dest; new edge
2123                  will be a successor for new_bb and a predecessor
2124                  for 'dest'.  */
2125
2126               if (EDGE_COUNT (new_bb->succs) == 0)
2127                 new_edge = make_edge (new_bb, dest, 0);
2128               else
2129                 new_edge = EDGE_SUCC (new_bb, 0);
2130
2131               crossing_edge->flags &= ~EDGE_CROSSING;
2132               new_edge->flags |= EDGE_CROSSING;
2133             }
2134         }
2135     }
2136 }
2137
2138 /* Find any unconditional branches that cross between hot and cold
2139    sections.  Convert them into indirect jumps instead.  */
2140
2141 static void
2142 fix_crossing_unconditional_branches (void)
2143 {
2144   basic_block cur_bb;
2145   rtx_insn *last_insn;
2146   rtx label;
2147   rtx label_addr;
2148   rtx_insn *indirect_jump_sequence;
2149   rtx_insn *jump_insn = NULL;
2150   rtx new_reg;
2151   rtx_insn *cur_insn;
2152   edge succ;
2153
2154   FOR_EACH_BB_FN (cur_bb, cfun)
2155     {
2156       last_insn = BB_END (cur_bb);
2157
2158       if (EDGE_COUNT (cur_bb->succs) < 1)
2159         continue;
2160
2161       succ = EDGE_SUCC (cur_bb, 0);
2162
2163       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2164          this point, no crossing jumps should be conditional.  */
2165
2166       if (JUMP_P (last_insn)
2167           && (succ->flags & EDGE_CROSSING))
2168         {
2169           gcc_assert (!any_condjump_p (last_insn));
2170
2171           /* Make sure the jump is not already an indirect or table jump.  */
2172
2173           if (!computed_jump_p (last_insn)
2174               && !tablejump_p (last_insn, NULL, NULL))
2175             {
2176               /* We have found a "crossing" unconditional branch.  Now
2177                  we must convert it to an indirect jump.  First create
2178                  reference of label, as target for jump.  */
2179
2180               label = JUMP_LABEL (last_insn);
2181               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2182               LABEL_NUSES (label) += 1;
2183
2184               /* Get a register to use for the indirect jump.  */
2185
2186               new_reg = gen_reg_rtx (Pmode);
2187
2188               /* Generate indirect the jump sequence.  */
2189
2190               start_sequence ();
2191               emit_move_insn (new_reg, label_addr);
2192               emit_indirect_jump (new_reg);
2193               indirect_jump_sequence = get_insns ();
2194               end_sequence ();
2195
2196               /* Make sure every instruction in the new jump sequence has
2197                  its basic block set to be cur_bb.  */
2198
2199               for (cur_insn = indirect_jump_sequence; cur_insn;
2200                    cur_insn = NEXT_INSN (cur_insn))
2201                 {
2202                   if (!BARRIER_P (cur_insn))
2203                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2204                   if (JUMP_P (cur_insn))
2205                     jump_insn = cur_insn;
2206                 }
2207
2208               /* Insert the new (indirect) jump sequence immediately before
2209                  the unconditional jump, then delete the unconditional jump.  */
2210
2211               emit_insn_before (indirect_jump_sequence, last_insn);
2212               delete_insn (last_insn);
2213
2214               JUMP_LABEL (jump_insn) = label;
2215               LABEL_NUSES (label)++;
2216
2217               /* Make BB_END for cur_bb be the jump instruction (NOT the
2218                  barrier instruction at the end of the sequence...).  */
2219
2220               BB_END (cur_bb) = jump_insn;
2221             }
2222         }
2223     }
2224 }
2225
2226 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2227
2228 static void
2229 update_crossing_jump_flags (void)
2230 {
2231   basic_block bb;
2232   edge e;
2233   edge_iterator ei;
2234
2235   FOR_EACH_BB_FN (bb, cfun)
2236     FOR_EACH_EDGE (e, ei, bb->succs)
2237       if (e->flags & EDGE_CROSSING)
2238         {
2239           if (JUMP_P (BB_END (bb))
2240               /* Some flags were added during fix_up_fall_thru_edges, via
2241                  force_nonfallthru_and_redirect.  */
2242               && !CROSSING_JUMP_P (BB_END (bb)))
2243             CROSSING_JUMP_P (BB_END (bb)) = 1;
2244           break;
2245         }
2246 }
2247
2248 /* Reorder basic blocks using the software trace cache (STC) algorithm.  */
2249
2250 static void
2251 reorder_basic_blocks_software_trace_cache (void)
2252 {
2253   if (dump_file)
2254     fprintf (dump_file, "\nReordering with the STC algorithm.\n\n");
2255
2256   int n_traces;
2257   int i;
2258   struct trace *traces;
2259
2260   /* We are estimating the length of uncond jump insn only once since the code
2261      for getting the insn length always returns the minimal length now.  */
2262   if (uncond_jump_length == 0)
2263     uncond_jump_length = get_uncond_jump_length ();
2264
2265   /* We need to know some information for each basic block.  */
2266   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2267   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2268   for (i = 0; i < array_size; i++)
2269     {
2270       bbd[i].start_of_trace = -1;
2271       bbd[i].end_of_trace = -1;
2272       bbd[i].in_trace = -1;
2273       bbd[i].visited = 0;
2274       bbd[i].priority = -1;
2275       bbd[i].heap = NULL;
2276       bbd[i].node = NULL;
2277     }
2278
2279   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2280   n_traces = 0;
2281   find_traces (&n_traces, traces);
2282   connect_traces (n_traces, traces);
2283   FREE (traces);
2284   FREE (bbd);
2285 }
2286
2287 /* Return true if edge E1 is more desirable as a fallthrough edge than
2288    edge E2 is.  */
2289
2290 static bool
2291 edge_order (edge e1, edge e2)
2292 {
2293   return EDGE_FREQUENCY (e1) > EDGE_FREQUENCY (e2);
2294 }
2295
2296 /* Reorder basic blocks using the "simple" algorithm.  This tries to
2297    maximize the dynamic number of branches that are fallthrough, without
2298    copying instructions.  The algorithm is greedy, looking at the most
2299    frequently executed branch first.  */
2300
2301 static void
2302 reorder_basic_blocks_simple (void)
2303 {
2304   if (dump_file)
2305     fprintf (dump_file, "\nReordering with the \"simple\" algorithm.\n\n");
2306
2307   edge *edges = new edge[2 * n_basic_blocks_for_fn (cfun)];
2308
2309   /* First, collect all edges that can be optimized by reordering blocks:
2310      simple jumps and conditional jumps, as well as the function entry edge.  */
2311
2312   int n = 0;
2313   edges[n++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2314
2315   basic_block bb;
2316   FOR_EACH_BB_FN (bb, cfun)
2317     {
2318       rtx_insn *end = BB_END (bb);
2319
2320       if (computed_jump_p (end) || tablejump_p (end, NULL, NULL))
2321         continue;
2322
2323       /* We cannot optimize asm goto.  */
2324       if (JUMP_P (end) && extract_asm_operands (end))
2325         continue;
2326
2327       if (single_succ_p (bb))
2328         edges[n++] = EDGE_SUCC (bb, 0);
2329       else if (any_condjump_p (end))
2330         {
2331           edge e0 = EDGE_SUCC (bb, 0);
2332           edge e1 = EDGE_SUCC (bb, 1);
2333           /* When optimizing for size it is best to keep the original
2334              fallthrough edges.  */
2335           if (e1->flags & EDGE_FALLTHRU)
2336             std::swap (e0, e1);
2337           edges[n++] = e0;
2338           edges[n++] = e1;
2339         }
2340     }
2341
2342   /* Sort the edges, the most desirable first.  When optimizing for size
2343      all edges are equally desirable.  */
2344
2345   if (optimize_function_for_speed_p (cfun))
2346     std::stable_sort (edges, edges + n, edge_order);
2347
2348   /* Now decide which of those edges to make fallthrough edges.  We set
2349      BB_VISITED if a block already has a fallthrough successor assigned
2350      to it.  We make ->AUX of an endpoint point to the opposite endpoint
2351      of a sequence of blocks that fall through, and ->AUX will be NULL
2352      for a block that is in such a sequence but not an endpoint anymore.
2353
2354      To start with, everything points to itself, nothing is assigned yet.  */
2355
2356   FOR_ALL_BB_FN (bb, cfun)
2357     bb->aux = bb;
2358
2359   EXIT_BLOCK_PTR_FOR_FN (cfun)->aux = 0;
2360
2361   /* Now for all edges, the most desirable first, see if that edge can
2362      connect two sequences.  If it can, update AUX and BB_VISITED; if it
2363      cannot, zero out the edge in the table.  */
2364
2365   for (int j = 0; j < n; j++)
2366     {
2367       edge e = edges[j];
2368
2369       basic_block tail_a = e->src;
2370       basic_block head_b = e->dest;
2371       basic_block head_a = (basic_block) tail_a->aux;
2372       basic_block tail_b = (basic_block) head_b->aux;
2373
2374       /* An edge cannot connect two sequences if:
2375          - it crosses partitions;
2376          - its src is not a current endpoint;
2377          - its dest is not a current endpoint;
2378          - or, it would create a loop.  */
2379
2380       if (e->flags & EDGE_CROSSING
2381           || tail_a->flags & BB_VISITED
2382           || !tail_b
2383           || (!(head_b->flags & BB_VISITED) && head_b != tail_b)
2384           || tail_a == tail_b)
2385         {
2386           edges[j] = 0;
2387           continue;
2388         }
2389
2390       tail_a->aux = 0;
2391       head_b->aux = 0;
2392       head_a->aux = tail_b;
2393       tail_b->aux = head_a;
2394       tail_a->flags |= BB_VISITED;
2395     }
2396
2397   /* Put the pieces together, in the same order that the start blocks of
2398      the sequences already had.  The hot/cold partitioning gives a little
2399      complication: as a first pass only do this for blocks in the same
2400      partition as the start block, and (if there is anything left to do)
2401      in a second pass handle the other partition.  */
2402
2403   basic_block last_tail = (basic_block) ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2404
2405   int current_partition = BB_PARTITION (last_tail);
2406   bool need_another_pass = true;
2407
2408   for (int pass = 0; pass < 2 && need_another_pass; pass++)
2409     {
2410       need_another_pass = false;
2411
2412       FOR_EACH_BB_FN (bb, cfun)
2413         if ((bb->flags & BB_VISITED && bb->aux) || bb->aux == bb)
2414           {
2415             if (BB_PARTITION (bb) != current_partition)
2416               {
2417                 need_another_pass = true;
2418                 continue;
2419               }
2420
2421             last_tail->aux = bb;
2422             last_tail = (basic_block) bb->aux;
2423           }
2424
2425       current_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
2426     }
2427
2428   last_tail->aux = 0;
2429
2430   /* Finally, link all the chosen fallthrough edges.  */
2431
2432   for (int j = 0; j < n; j++)
2433     if (edges[j])
2434       edges[j]->src->aux = edges[j]->dest;
2435
2436   delete[] edges;
2437
2438   /* If the entry edge no longer falls through we have to make a new
2439      block so it can do so again.  */
2440
2441   edge e = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2442   if (e->dest != ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux)
2443     {
2444       force_nonfallthru (e);
2445       e->src->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2446       BB_COPY_PARTITION (e->src, e->dest);
2447     }
2448 }
2449
2450 /* Reorder basic blocks.  The main entry point to this file.  */
2451
2452 static void
2453 reorder_basic_blocks (void)
2454 {
2455   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2456
2457   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2458     return;
2459
2460   set_edge_can_fallthru_flag ();
2461   mark_dfs_back_edges ();
2462
2463   switch (flag_reorder_blocks_algorithm)
2464     {
2465     case REORDER_BLOCKS_ALGORITHM_SIMPLE:
2466       reorder_basic_blocks_simple ();
2467       break;
2468
2469     case REORDER_BLOCKS_ALGORITHM_STC:
2470       reorder_basic_blocks_software_trace_cache ();
2471       break;
2472
2473     default:
2474       gcc_unreachable ();
2475     }
2476
2477   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2478
2479   if (dump_file)
2480     {
2481       if (dump_flags & TDF_DETAILS)
2482         dump_reg_info (dump_file);
2483       dump_flow_info (dump_file, dump_flags);
2484     }
2485
2486   /* Signal that rtl_verify_flow_info_1 can now verify that there
2487      is at most one switch between hot/cold sections.  */
2488   crtl->bb_reorder_complete = true;
2489 }
2490
2491 /* Determine which partition the first basic block in the function
2492    belongs to, then find the first basic block in the current function
2493    that belongs to a different section, and insert a
2494    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2495    instruction stream.  When writing out the assembly code,
2496    encountering this note will make the compiler switch between the
2497    hot and cold text sections.  */
2498
2499 void
2500 insert_section_boundary_note (void)
2501 {
2502   basic_block bb;
2503   bool switched_sections = false;
2504   int current_partition = 0;
2505
2506   if (!crtl->has_bb_partition)
2507     return;
2508
2509   FOR_EACH_BB_FN (bb, cfun)
2510     {
2511       if (!current_partition)
2512         current_partition = BB_PARTITION (bb);
2513       if (BB_PARTITION (bb) != current_partition)
2514         {
2515           gcc_assert (!switched_sections);
2516           switched_sections = true;
2517           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2518           current_partition = BB_PARTITION (bb);
2519         }
2520     }
2521 }
2522
2523 namespace {
2524
2525 const pass_data pass_data_reorder_blocks =
2526 {
2527   RTL_PASS, /* type */
2528   "bbro", /* name */
2529   OPTGROUP_NONE, /* optinfo_flags */
2530   TV_REORDER_BLOCKS, /* tv_id */
2531   0, /* properties_required */
2532   0, /* properties_provided */
2533   0, /* properties_destroyed */
2534   0, /* todo_flags_start */
2535   0, /* todo_flags_finish */
2536 };
2537
2538 class pass_reorder_blocks : public rtl_opt_pass
2539 {
2540 public:
2541   pass_reorder_blocks (gcc::context *ctxt)
2542     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2543   {}
2544
2545   /* opt_pass methods: */
2546   virtual bool gate (function *)
2547     {
2548       if (targetm.cannot_modify_jumps_p ())
2549         return false;
2550       return (optimize > 0
2551               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2552     }
2553
2554   virtual unsigned int execute (function *);
2555
2556 }; // class pass_reorder_blocks
2557
2558 unsigned int
2559 pass_reorder_blocks::execute (function *fun)
2560 {
2561   basic_block bb;
2562
2563   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2564      splitting possibly introduced more crossjumping opportunities.  */
2565   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2566
2567   reorder_basic_blocks ();
2568   cleanup_cfg (CLEANUP_EXPENSIVE);
2569
2570   FOR_EACH_BB_FN (bb, fun)
2571     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2572       bb->aux = bb->next_bb;
2573   cfg_layout_finalize ();
2574
2575   return 0;
2576 }
2577
2578 } // anon namespace
2579
2580 rtl_opt_pass *
2581 make_pass_reorder_blocks (gcc::context *ctxt)
2582 {
2583   return new pass_reorder_blocks (ctxt);
2584 }
2585
2586 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2587    computed gotos that were factored early on in the compilation process to
2588    speed up edge based data flow.  We used to not unfactoring them again,
2589    which can seriously pessimize code with many computed jumps in the source
2590    code, such as interpreters.  See e.g. PR15242.  */
2591
2592 namespace {
2593
2594 const pass_data pass_data_duplicate_computed_gotos =
2595 {
2596   RTL_PASS, /* type */
2597   "compgotos", /* name */
2598   OPTGROUP_NONE, /* optinfo_flags */
2599   TV_REORDER_BLOCKS, /* tv_id */
2600   0, /* properties_required */
2601   0, /* properties_provided */
2602   0, /* properties_destroyed */
2603   0, /* todo_flags_start */
2604   0, /* todo_flags_finish */
2605 };
2606
2607 class pass_duplicate_computed_gotos : public rtl_opt_pass
2608 {
2609 public:
2610   pass_duplicate_computed_gotos (gcc::context *ctxt)
2611     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2612   {}
2613
2614   /* opt_pass methods: */
2615   virtual bool gate (function *);
2616   virtual unsigned int execute (function *);
2617
2618 }; // class pass_duplicate_computed_gotos
2619
2620 bool
2621 pass_duplicate_computed_gotos::gate (function *fun)
2622 {
2623   if (targetm.cannot_modify_jumps_p ())
2624     return false;
2625   return (optimize > 0
2626           && flag_expensive_optimizations
2627           && ! optimize_function_for_size_p (fun));
2628 }
2629
2630 unsigned int
2631 pass_duplicate_computed_gotos::execute (function *fun)
2632 {
2633   basic_block bb, new_bb;
2634   bitmap candidates;
2635   int max_size;
2636   bool changed = false;
2637
2638   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2639     return 0;
2640
2641   clear_bb_flags ();
2642   cfg_layout_initialize (0);
2643
2644   /* We are estimating the length of uncond jump insn only once
2645      since the code for getting the insn length always returns
2646      the minimal length now.  */
2647   if (uncond_jump_length == 0)
2648     uncond_jump_length = get_uncond_jump_length ();
2649
2650   max_size
2651     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2652   candidates = BITMAP_ALLOC (NULL);
2653
2654   /* Look for blocks that end in a computed jump, and see if such blocks
2655      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2656      mark it in the candidates.  */
2657   FOR_EACH_BB_FN (bb, fun)
2658     {
2659       rtx_insn *insn;
2660       edge e;
2661       edge_iterator ei;
2662       int size, all_flags;
2663
2664       /* Build the reorder chain for the original order of blocks.  */
2665       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2666         bb->aux = bb->next_bb;
2667
2668       /* Obviously the block has to end in a computed jump.  */
2669       if (!computed_jump_p (BB_END (bb)))
2670         continue;
2671
2672       /* Only consider blocks that can be duplicated.  */
2673       if (CROSSING_JUMP_P (BB_END (bb))
2674           || !can_duplicate_block_p (bb))
2675         continue;
2676
2677       /* Make sure that the block is small enough.  */
2678       size = 0;
2679       FOR_BB_INSNS (bb, insn)
2680         if (INSN_P (insn))
2681           {
2682             size += get_attr_min_length (insn);
2683             if (size > max_size)
2684                break;
2685           }
2686       if (size > max_size)
2687         continue;
2688
2689       /* Final check: there must not be any incoming abnormal edges.  */
2690       all_flags = 0;
2691       FOR_EACH_EDGE (e, ei, bb->preds)
2692         all_flags |= e->flags;
2693       if (all_flags & EDGE_COMPLEX)
2694         continue;
2695
2696       bitmap_set_bit (candidates, bb->index);
2697     }
2698
2699   /* Nothing to do if there is no computed jump here.  */
2700   if (bitmap_empty_p (candidates))
2701     goto done;
2702
2703   /* Duplicate computed gotos.  */
2704   FOR_EACH_BB_FN (bb, fun)
2705     {
2706       if (bb->flags & BB_VISITED)
2707         continue;
2708
2709       bb->flags |= BB_VISITED;
2710
2711       /* BB must have one outgoing edge.  That edge must not lead to
2712          the exit block or the next block.
2713          The destination must have more than one predecessor.  */
2714       if (!single_succ_p (bb)
2715           || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2716           || single_succ (bb) == bb->next_bb
2717           || single_pred_p (single_succ (bb)))
2718         continue;
2719
2720       /* The successor block has to be a duplication candidate.  */
2721       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2722         continue;
2723
2724       /* Don't duplicate a partition crossing edge, which requires difficult
2725          fixup.  */
2726       if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2727         continue;
2728
2729       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2730       new_bb->aux = bb->aux;
2731       bb->aux = new_bb;
2732       new_bb->flags |= BB_VISITED;
2733       changed = true;
2734     }
2735
2736  done:
2737   if (changed)
2738     {
2739       /* Duplicating blocks above will redirect edges and may cause hot
2740          blocks previously reached by both hot and cold blocks to become
2741          dominated only by cold blocks.  */
2742       fixup_partitions ();
2743
2744       /* Merge the duplicated blocks into predecessors, when possible.  */
2745       cfg_layout_finalize ();
2746       cleanup_cfg (0);
2747     }
2748   else
2749     cfg_layout_finalize ();
2750
2751   BITMAP_FREE (candidates);
2752   return 0;
2753 }
2754
2755 } // anon namespace
2756
2757 rtl_opt_pass *
2758 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2759 {
2760   return new pass_duplicate_computed_gotos (ctxt);
2761 }
2762
2763 /* This function is the main 'entrance' for the optimization that
2764    partitions hot and cold basic blocks into separate sections of the
2765    .o file (to improve performance and cache locality).  Ideally it
2766    would be called after all optimizations that rearrange the CFG have
2767    been called.  However part of this optimization may introduce new
2768    register usage, so it must be called before register allocation has
2769    occurred.  This means that this optimization is actually called
2770    well before the optimization that reorders basic blocks (see
2771    function above).
2772
2773    This optimization checks the feedback information to determine
2774    which basic blocks are hot/cold, updates flags on the basic blocks
2775    to indicate which section they belong in.  This information is
2776    later used for writing out sections in the .o file.  Because hot
2777    and cold sections can be arbitrarily large (within the bounds of
2778    memory), far beyond the size of a single function, it is necessary
2779    to fix up all edges that cross section boundaries, to make sure the
2780    instructions used can actually span the required distance.  The
2781    fixes are described below.
2782
2783    Fall-through edges must be changed into jumps; it is not safe or
2784    legal to fall through across a section boundary.  Whenever a
2785    fall-through edge crossing a section boundary is encountered, a new
2786    basic block is inserted (in the same section as the fall-through
2787    source), and the fall through edge is redirected to the new basic
2788    block.  The new basic block contains an unconditional jump to the
2789    original fall-through target.  (If the unconditional jump is
2790    insufficient to cross section boundaries, that is dealt with a
2791    little later, see below).
2792
2793    In order to deal with architectures that have short conditional
2794    branches (which cannot span all of memory) we take any conditional
2795    jump that attempts to cross a section boundary and add a level of
2796    indirection: it becomes a conditional jump to a new basic block, in
2797    the same section.  The new basic block contains an unconditional
2798    jump to the original target, in the other section.
2799
2800    For those architectures whose unconditional branch is also
2801    incapable of reaching all of memory, those unconditional jumps are
2802    converted into indirect jumps, through a register.
2803
2804    IMPORTANT NOTE: This optimization causes some messy interactions
2805    with the cfg cleanup optimizations; those optimizations want to
2806    merge blocks wherever possible, and to collapse indirect jump
2807    sequences (change "A jumps to B jumps to C" directly into "A jumps
2808    to C").  Those optimizations can undo the jump fixes that
2809    partitioning is required to make (see above), in order to ensure
2810    that jumps attempting to cross section boundaries are really able
2811    to cover whatever distance the jump requires (on many architectures
2812    conditional or unconditional jumps are not able to reach all of
2813    memory).  Therefore tests have to be inserted into each such
2814    optimization to make sure that it does not undo stuff necessary to
2815    cross partition boundaries.  This would be much less of a problem
2816    if we could perform this optimization later in the compilation, but
2817    unfortunately the fact that we may need to create indirect jumps
2818    (through registers) requires that this optimization be performed
2819    before register allocation.
2820
2821    Hot and cold basic blocks are partitioned and put in separate
2822    sections of the .o file, to reduce paging and improve cache
2823    performance (hopefully).  This can result in bits of code from the
2824    same function being widely separated in the .o file.  However this
2825    is not obvious to the current bb structure.  Therefore we must take
2826    care to ensure that: 1). There are no fall_thru edges that cross
2827    between sections; 2). For those architectures which have "short"
2828    conditional branches, all conditional branches that attempt to
2829    cross between sections are converted to unconditional branches;
2830    and, 3). For those architectures which have "short" unconditional
2831    branches, all unconditional branches that attempt to cross between
2832    sections are converted to indirect jumps.
2833
2834    The code for fixing up fall_thru edges that cross between hot and
2835    cold basic blocks does so by creating new basic blocks containing
2836    unconditional branches to the appropriate label in the "other"
2837    section.  The new basic block is then put in the same (hot or cold)
2838    section as the original conditional branch, and the fall_thru edge
2839    is modified to fall into the new basic block instead.  By adding
2840    this level of indirection we end up with only unconditional branches
2841    crossing between hot and cold sections.
2842
2843    Conditional branches are dealt with by adding a level of indirection.
2844    A new basic block is added in the same (hot/cold) section as the
2845    conditional branch, and the conditional branch is retargeted to the
2846    new basic block.  The new basic block contains an unconditional branch
2847    to the original target of the conditional branch (in the other section).
2848
2849    Unconditional branches are dealt with by converting them into
2850    indirect jumps.  */
2851
2852 namespace {
2853
2854 const pass_data pass_data_partition_blocks =
2855 {
2856   RTL_PASS, /* type */
2857   "bbpart", /* name */
2858   OPTGROUP_NONE, /* optinfo_flags */
2859   TV_REORDER_BLOCKS, /* tv_id */
2860   PROP_cfglayout, /* properties_required */
2861   0, /* properties_provided */
2862   0, /* properties_destroyed */
2863   0, /* todo_flags_start */
2864   0, /* todo_flags_finish */
2865 };
2866
2867 class pass_partition_blocks : public rtl_opt_pass
2868 {
2869 public:
2870   pass_partition_blocks (gcc::context *ctxt)
2871     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2872   {}
2873
2874   /* opt_pass methods: */
2875   virtual bool gate (function *);
2876   virtual unsigned int execute (function *);
2877
2878 }; // class pass_partition_blocks
2879
2880 bool
2881 pass_partition_blocks::gate (function *fun)
2882 {
2883   /* The optimization to partition hot/cold basic blocks into separate
2884      sections of the .o file does not work well with linkonce or with
2885      user defined section attributes.  Don't call it if either case
2886      arises.  */
2887   return (flag_reorder_blocks_and_partition
2888           && optimize
2889           /* See gate_handle_reorder_blocks.  We should not partition if
2890              we are going to omit the reordering.  */
2891           && optimize_function_for_speed_p (fun)
2892           && !DECL_COMDAT_GROUP (current_function_decl)
2893           && !user_defined_section_attribute);
2894 }
2895
2896 unsigned
2897 pass_partition_blocks::execute (function *fun)
2898 {
2899   vec<edge> crossing_edges;
2900
2901   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2902     return 0;
2903
2904   df_set_flags (DF_DEFER_INSN_RESCAN);
2905
2906   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2907   if (!crossing_edges.exists ())
2908     return 0;
2909
2910   crtl->has_bb_partition = true;
2911
2912   /* Make sure the source of any crossing edge ends in a jump and the
2913      destination of any crossing edge has a label.  */
2914   add_labels_and_missing_jumps (crossing_edges);
2915
2916   /* Convert all crossing fall_thru edges to non-crossing fall
2917      thrus to unconditional jumps (that jump to the original fall
2918      through dest).  */
2919   fix_up_fall_thru_edges ();
2920
2921   /* If the architecture does not have conditional branches that can
2922      span all of memory, convert crossing conditional branches into
2923      crossing unconditional branches.  */
2924   if (!HAS_LONG_COND_BRANCH)
2925     fix_crossing_conditional_branches ();
2926
2927   /* If the architecture does not have unconditional branches that
2928      can span all of memory, convert crossing unconditional branches
2929      into indirect jumps.  Since adding an indirect jump also adds
2930      a new register usage, update the register usage information as
2931      well.  */
2932   if (!HAS_LONG_UNCOND_BRANCH)
2933     fix_crossing_unconditional_branches ();
2934
2935   update_crossing_jump_flags ();
2936
2937   /* Clear bb->aux fields that the above routines were using.  */
2938   clear_aux_for_blocks ();
2939
2940   crossing_edges.release ();
2941
2942   /* ??? FIXME: DF generates the bb info for a block immediately.
2943      And by immediately, I mean *during* creation of the block.
2944
2945         #0  df_bb_refs_collect
2946         #1  in df_bb_refs_record
2947         #2  in create_basic_block_structure
2948
2949      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2950      will *always* fail, because no edges can have been added to the
2951      block yet.  Which of course means we don't add the right
2952      artificial refs, which means we fail df_verify (much) later.
2953
2954      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2955      that we also shouldn't grab data from the new blocks those new
2956      insns are in either.  In this way one can create the block, link
2957      it up properly, and have everything Just Work later, when deferred
2958      insns are processed.
2959
2960      In the meantime, we have no other option but to throw away all
2961      of the DF data and recompute it all.  */
2962   if (fun->eh->lp_array)
2963     {
2964       df_finish_pass (true);
2965       df_scan_alloc (NULL);
2966       df_scan_blocks ();
2967       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2968          data.  We blindly generated all of them when creating the new
2969          landing pad.  Delete those assignments we don't use.  */
2970       df_set_flags (DF_LR_RUN_DCE);
2971       df_analyze ();
2972     }
2973
2974   return 0;
2975 }
2976
2977 } // anon namespace
2978
2979 rtl_opt_pass *
2980 make_pass_partition_blocks (gcc::context *ctxt)
2981 {
2982   return new pass_partition_blocks (ctxt);
2983 }