gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2017 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This file contains the "reorder blocks" pass, which changes the control
  21    flow of a function to encounter fewer branches; the "partition blocks"
  22    pass, which divides the basic blocks into "hot" and "cold" partitions,
  23    which are kept separate; and the "duplicate computed gotos" pass, which
  24    duplicates blocks ending in an indirect jump.
  25
  26    There are two algorithms for "reorder blocks": the "simple" algorithm,
  27    which just rearranges blocks, trying to minimize the number of executed
  28    unconditional branches; and the "software trace cache" algorithm, which
  29    also copies code, and in general tries a lot harder to have long linear
  30    pieces of machine code executed.  This algorithm is described next.  */
  31
  32 /* This (greedy) algorithm constructs traces in several rounds.
  33    The construction starts from "seeds".  The seed for the first round
  34    is the entry point of the function.  When there are more than one seed,
  35    the one with the lowest key in the heap is selected first (see bb_to_key).
  36    Then the algorithm repeatedly adds the most probable successor to the end
  37    of a trace.  Finally it connects the traces.
  38
  39    There are two parameters: Branch Threshold and Exec Threshold.
  40    If the probability of an edge to a successor of the current basic block is
  41    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  42    then the successor will be the seed in one of the next rounds.
  43    Each round has these parameters lower than the previous one.
  44    The last round has to have these parameters set to zero so that the
  45    remaining blocks are picked up.
  46
  47    The algorithm selects the most probable successor from all unvisited
  48    successors and successors that have been added to this trace.
  49    The other successors (that has not been "sent" to the next round) will be
  50    other seeds for this round and the secondary traces will start from them.
  51    If the successor has not been visited in this trace, it is added to the
  52    trace (however, there is some heuristic for simple branches).
  53    If the successor has been visited in this trace, a loop has been found.
  54    If the loop has many iterations, the loop is rotated so that the source
  55    block of the most probable edge going out of the loop is the last block
  56    of the trace.
  57    If the loop has few iterations and there is no edge from the last block of
  58    the loop going out of the loop, the loop header is duplicated.
  59
  60    When connecting traces, the algorithm first checks whether there is an edge
  61    from the last block of a trace to the first block of another trace.
  62    When there are still some unconnected traces it checks whether there exists
  63    a basic block BB such that BB is a successor of the last block of a trace
  64    and BB is a predecessor of the first block of another trace.  In this case,
  65    BB is duplicated, added at the end of the first trace and the traces are
  66    connected through it.
  67    The rest of traces are simply connected so there will be a jump to the
  68    beginning of the rest of traces.
  69
  70    The above description is for the full algorithm, which is used when the
  71    function is optimized for speed.  When the function is optimized for size,
  72    in order to reduce long jumps and connect more fallthru edges, the
  73    algorithm is modified as follows:
  74    (1) Break long traces to short ones.  A trace is broken at a block that has
  75    multiple predecessors/ successors during trace discovery.  When connecting
  76    traces, only connect Trace n with Trace n + 1.  This change reduces most
  77    long jumps compared with the above algorithm.
  78    (2) Ignore the edge probability and frequency for fallthru edges.
  79    (3) Keep the original order of blocks when there is no chance to fall
  80    through.  We rely on the results of cfg_cleanup.
  81
  82    To implement the change for code size optimization, block's index is
  83    selected as the key and all traces are found in one round.
  84
  85    References:
  86
  87    "Software Trace Cache"
  88    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  89    http://citeseer.nj.nec.com/15361.html
  90
  91 */
  92
  93 #include "config.h"
  94 #define INCLUDE_ALGORITHM /* stable_sort */
  95 #include "system.h"
  96 #include "coretypes.h"
  97 #include "backend.h"
  98 #include "target.h"
  99 #include "rtl.h"
 100 #include "tree.h"
 101 #include "cfghooks.h"
 102 #include "df.h"
 103 #include "memmodel.h"
 104 #include "optabs.h"
 105 #include "regs.h"
 106 #include "emit-rtl.h"
 107 #include "output.h"
 108 #include "expr.h"
 109 #include "params.h"
 110 #include "tree-pass.h"
 111 #include "cfgrtl.h"
 112 #include "cfganal.h"
 113 #include "cfgbuild.h"
 114 #include "cfgcleanup.h"
 115 #include "bb-reorder.h"
 116 #include "except.h"
 117 #include "fibonacci_heap.h"
 118 #include "stringpool.h"
 119 #include "attribs.h"
 120
 121 /* The number of rounds.  In most cases there will only be 4 rounds, but
 122    when partitioning hot and cold basic blocks into separate sections of
 123    the object file there will be an extra round.  */
 124 #define N_ROUNDS 5
 125
 126 struct target_bb_reorder default_target_bb_reorder;
 127 #if SWITCHABLE_TARGET
 128 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 129 #endif
 130
 131 #define uncond_jump_length \
 132   (this_target_bb_reorder->x_uncond_jump_length)
 133
 134 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 135 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 136
 137 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 138 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 139
 140 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 141    block the edge destination is not duplicated while connecting traces.  */
 142 #define DUPLICATION_THRESHOLD 100
 143
 144 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
 145 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
 146
 147 /* Structure to hold needed information for each basic block.  */
 148 struct bbro_basic_block_data
 149 {
 150   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 151   int start_of_trace;
 152
 153   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 154   int end_of_trace;
 155
 156   /* Which trace is the bb in?  */
 157   int in_trace;
 158
 159   /* Which trace was this bb visited in?  */
 160   int visited;
 161
 162   /* Cached maximum frequency of interesting incoming edges.
 163      Minus one means not yet computed.  */
 164   int priority;
 165
 166   /* Which heap is BB in (if any)?  */
 167   bb_heap_t *heap;
 168
 169   /* Which heap node is BB in (if any)?  */
 170   bb_heap_node_t *node;
 171 };
 172
 173 /* The current size of the following dynamic array.  */
 174 static int array_size;
 175
 176 /* The array which holds needed information for basic blocks.  */
 177 static bbro_basic_block_data *bbd;
 178
 179 /* To avoid frequent reallocation the size of arrays is greater than needed,
 180    the number of elements is (not less than) 1.25 * size_wanted.  */
 181 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 182
 183 /* Free the memory and set the pointer to NULL.  */
 184 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 185
 186 /* Structure for holding information about a trace.  */
 187 struct trace
 188 {
 189   /* First and last basic block of the trace.  */
 190   basic_block first, last;
 191
 192   /* The round of the STC creation which this trace was found in.  */
 193   int round;
 194
 195   /* The length (i.e. the number of basic blocks) of the trace.  */
 196   int length;
 197 };
 198
 199 /* Maximum frequency and count of one of the entry blocks.  */
 200 static profile_count max_entry_count;
 201
 202 /* Local function prototypes.  */
 203 static void find_traces_1_round (int, profile_count, struct trace *, int *,
 204                                  int, bb_heap_t **, int);
 205 static basic_block copy_bb (basic_block, edge, basic_block, int);
 206 static long bb_to_key (basic_block);
 207 static bool better_edge_p (const_basic_block, const_edge, profile_probability,
 208                            int, profile_probability, int, const_edge);
 209 static bool copy_bb_p (const_basic_block, int);
 210 \f
 211 /* Return the trace number in which BB was visited.  */
 212
 213 static int
 214 bb_visited_trace (const_basic_block bb)
 215 {
 216   gcc_assert (bb->index < array_size);
 217   return bbd[bb->index].visited;
 218 }
 219
 220 /* This function marks BB that it was visited in trace number TRACE.  */
 221
 222 static void
 223 mark_bb_visited (basic_block bb, int trace)
 224 {
 225   bbd[bb->index].visited = trace;
 226   if (bbd[bb->index].heap)
 227     {
 228       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
 229       bbd[bb->index].heap = NULL;
 230       bbd[bb->index].node = NULL;
 231     }
 232 }
 233
 234 /* Check to see if bb should be pushed into the next round of trace
 235    collections or not.  Reasons for pushing the block forward are 1).
 236    If the block is cold, we are doing partitioning, and there will be
 237    another round (cold partition blocks are not supposed to be
 238    collected into traces until the very last round); or 2). There will
 239    be another round, and the basic block is not "hot enough" for the
 240    current round of trace collection.  */
 241
 242 static bool
 243 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 244                       profile_count count_th)
 245 {
 246   bool there_exists_another_round;
 247   bool block_not_hot_enough;
 248
 249   there_exists_another_round = round < number_of_rounds - 1;
 250
 251   block_not_hot_enough = (bb->count < count_th
 252                           || probably_never_executed_bb_p (cfun, bb));
 253
 254   if (there_exists_another_round
 255       && block_not_hot_enough)
 256     return true;
 257   else
 258     return false;
 259 }
 260
 261 /* Find the traces for Software Trace Cache.  Chain each trace through
 262    RBI()->next.  Store the number of traces to N_TRACES and description of
 263    traces to TRACES.  */
 264
 265 static void
 266 find_traces (int *n_traces, struct trace *traces)
 267 {
 268   int i;
 269   int number_of_rounds;
 270   edge e;
 271   edge_iterator ei;
 272   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
 273
 274   /* Add one extra round of trace collection when partitioning hot/cold
 275      basic blocks into separate sections.  The last round is for all the
 276      cold blocks (and ONLY the cold blocks).  */
 277
 278   number_of_rounds = N_ROUNDS - 1;
 279
 280   /* Insert entry points of function into heap.  */
 281   max_entry_count = profile_count::zero ();
 282   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 283     {
 284       bbd[e->dest->index].heap = heap;
 285       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
 286       if (e->dest->count > max_entry_count)
 287         max_entry_count = e->dest->count;
 288     }
 289
 290   /* Find the traces.  */
 291   for (i = 0; i < number_of_rounds; i++)
 292     {
 293       profile_count count_threshold;
 294
 295       if (dump_file)
 296         fprintf (dump_file, "STC - round %d\n", i + 1);
 297
 298       count_threshold = max_entry_count.apply_scale (exec_threshold[i], 1000);
 299
 300       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 301                            count_threshold, traces, n_traces, i, &heap,
 302                            number_of_rounds);
 303     }
 304   delete heap;
 305
 306   if (dump_file)
 307     {
 308       for (i = 0; i < *n_traces; i++)
 309         {
 310           basic_block bb;
 311           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 312                    traces[i].round + 1);
 313           for (bb = traces[i].first;
 314                bb != traces[i].last;
 315                bb = (basic_block) bb->aux)
 316             fprintf (dump_file, "%d [%d] ", bb->index,
 317                      bb->count.to_frequency (cfun));
 318           fprintf (dump_file, "%d [%d]\n", bb->index,
 319                    bb->count.to_frequency (cfun));
 320         }
 321       fflush (dump_file);
 322     }
 323 }
 324
 325 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 326    (with sequential number TRACE_N).  */
 327
 328 static basic_block
 329 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 330 {
 331   basic_block bb;
 332
 333   /* Information about the best end (end after rotation) of the loop.  */
 334   basic_block best_bb = NULL;
 335   edge best_edge = NULL;
 336   profile_count best_count = profile_count::uninitialized ();
 337   /* The best edge is preferred when its destination is not visited yet
 338      or is a start block of some trace.  */
 339   bool is_preferred = false;
 340
 341   /* Find the most frequent edge that goes out from current trace.  */
 342   bb = back_edge->dest;
 343   do
 344     {
 345       edge e;
 346       edge_iterator ei;
 347
 348       FOR_EACH_EDGE (e, ei, bb->succs)
 349         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 350             && bb_visited_trace (e->dest) != trace_n
 351             && (e->flags & EDGE_CAN_FALLTHRU)
 352             && !(e->flags & EDGE_COMPLEX))
 353         {
 354           if (is_preferred)
 355             {
 356               /* The best edge is preferred.  */
 357               if (!bb_visited_trace (e->dest)
 358                   || bbd[e->dest->index].start_of_trace >= 0)
 359                 {
 360                   /* The current edge E is also preferred.  */
 361                   if (e->count () > best_count)
 362                     {
 363                       best_count = e->count ();
 364                       best_edge = e;
 365                       best_bb = bb;
 366                     }
 367                 }
 368             }
 369           else
 370             {
 371               if (!bb_visited_trace (e->dest)
 372                   || bbd[e->dest->index].start_of_trace >= 0)
 373                 {
 374                   /* The current edge E is preferred.  */
 375                   is_preferred = true;
 376                   best_count = e->count ();
 377                   best_edge = e;
 378                   best_bb = bb;
 379                 }
 380               else
 381                 {
 382                   if (!best_edge || e->count () > best_count)
 383                     {
 384                       best_count = e->count ();
 385                       best_edge = e;
 386                       best_bb = bb;
 387                     }
 388                 }
 389             }
 390         }
 391       bb = (basic_block) bb->aux;
 392     }
 393   while (bb != back_edge->dest);
 394
 395   if (best_bb)
 396     {
 397       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 398          the trace.  */
 399       if (back_edge->dest == trace->first)
 400         {
 401           trace->first = (basic_block) best_bb->aux;
 402         }
 403       else
 404         {
 405           basic_block prev_bb;
 406
 407           for (prev_bb = trace->first;
 408                prev_bb->aux != back_edge->dest;
 409                prev_bb = (basic_block) prev_bb->aux)
 410             ;
 411           prev_bb->aux = best_bb->aux;
 412
 413           /* Try to get rid of uncond jump to cond jump.  */
 414           if (single_succ_p (prev_bb))
 415             {
 416               basic_block header = single_succ (prev_bb);
 417
 418               /* Duplicate HEADER if it is a small block containing cond jump
 419                  in the end.  */
 420               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 421                   && !CROSSING_JUMP_P (BB_END (header)))
 422                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 423             }
 424         }
 425     }
 426   else
 427     {
 428       /* We have not found suitable loop tail so do no rotation.  */
 429       best_bb = back_edge->src;
 430     }
 431   best_bb->aux = NULL;
 432   return best_bb;
 433 }
 434
 435 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 436    not include basic blocks whose probability is lower than BRANCH_TH or whose
 437    frequency is lower than EXEC_TH into traces (or whose count is lower than
 438    COUNT_TH).  Store the new traces into TRACES and modify the number of
 439    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 440    The function expects starting basic blocks to be in *HEAP and will delete
 441    *HEAP and store starting points for the next round into new *HEAP.  */
 442
 443 static void
 444 find_traces_1_round (int branch_th, profile_count count_th,
 445                      struct trace *traces, int *n_traces, int round,
 446                      bb_heap_t **heap, int number_of_rounds)
 447 {
 448   /* Heap for discarded basic blocks which are possible starting points for
 449      the next round.  */
 450   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
 451   bool for_size = optimize_function_for_size_p (cfun);
 452
 453   while (!(*heap)->empty ())
 454     {
 455       basic_block bb;
 456       struct trace *trace;
 457       edge best_edge, e;
 458       long key;
 459       edge_iterator ei;
 460
 461       bb = (*heap)->extract_min ();
 462       bbd[bb->index].heap = NULL;
 463       bbd[bb->index].node = NULL;
 464
 465       if (dump_file)
 466         fprintf (dump_file, "Getting bb %d\n", bb->index);
 467
 468       /* If the BB's frequency is too low, send BB to the next round.  When
 469          partitioning hot/cold blocks into separate sections, make sure all
 470          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 471          round.  When optimizing for size, do not push to next round.  */
 472
 473       if (!for_size
 474           && push_to_next_round_p (bb, round, number_of_rounds,
 475                                    count_th))
 476         {
 477           int key = bb_to_key (bb);
 478           bbd[bb->index].heap = new_heap;
 479           bbd[bb->index].node = new_heap->insert (key, bb);
 480
 481           if (dump_file)
 482             fprintf (dump_file,
 483                      "  Possible start point of next round: %d (key: %d)\n",
 484                      bb->index, key);
 485           continue;
 486         }
 487
 488       trace = traces + *n_traces;
 489       trace->first = bb;
 490       trace->round = round;
 491       trace->length = 0;
 492       bbd[bb->index].in_trace = *n_traces;
 493       (*n_traces)++;
 494
 495       do
 496         {
 497           profile_probability prob;
 498           int freq;
 499           bool ends_in_call;
 500
 501           /* The probability and frequency of the best edge.  */
 502           profile_probability best_prob = profile_probability::uninitialized ();
 503           int best_freq = INT_MIN / 2;
 504
 505           best_edge = NULL;
 506           mark_bb_visited (bb, *n_traces);
 507           trace->length++;
 508
 509           if (dump_file)
 510             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 511                      bb->index, *n_traces);
 512
 513           ends_in_call = block_ends_with_call_p (bb);
 514
 515           /* Select the successor that will be placed after BB.  */
 516           FOR_EACH_EDGE (e, ei, bb->succs)
 517             {
 518               gcc_assert (!(e->flags & EDGE_FAKE));
 519
 520               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 521                 continue;
 522
 523               if (bb_visited_trace (e->dest)
 524                   && bb_visited_trace (e->dest) != *n_traces)
 525                 continue;
 526
 527               /* If partitioning hot/cold basic blocks, don't consider edges
 528                  that cross section boundaries.  */
 529               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 530                 continue;
 531
 532               prob = e->probability;
 533               freq = e->dest->count.to_frequency (cfun);
 534
 535               /* The only sensible preference for a call instruction is the
 536                  fallthru edge.  Don't bother selecting anything else.  */
 537               if (ends_in_call)
 538                 {
 539                   if (e->flags & EDGE_CAN_FALLTHRU)
 540                     {
 541                       best_edge = e;
 542                       best_prob = prob;
 543                       best_freq = freq;
 544                     }
 545                   continue;
 546                 }
 547
 548               /* Edge that cannot be fallthru or improbable or infrequent
 549                  successor (i.e. it is unsuitable successor).  When optimizing
 550                  for size, ignore the probability and frequency.  */
 551               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 552                   || !prob.initialized_p ()
 553                   || ((prob.to_reg_br_prob_base () < branch_th
 554                       || e->count () < count_th) && (!for_size)))
 555                 continue;
 556
 557               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 558                                  best_edge))
 559                 {
 560                   best_edge = e;
 561                   best_prob = prob;
 562                   best_freq = freq;
 563                 }
 564             }
 565
 566           /* If the best destination has multiple predecessors and can be
 567              duplicated cheaper than a jump, don't allow it to be added to
 568              a trace; we'll duplicate it when connecting the traces later.
 569              However, we need to check that this duplication wouldn't leave
 570              the best destination with only crossing predecessors, because
 571              this would change its effective partition from hot to cold.  */
 572           if (best_edge
 573               && EDGE_COUNT (best_edge->dest->preds) >= 2
 574               && copy_bb_p (best_edge->dest, 0))
 575             {
 576               bool only_crossing_preds = true;
 577               edge e;
 578               edge_iterator ei;
 579               FOR_EACH_EDGE (e, ei, best_edge->dest->preds)
 580                 if (e != best_edge && !(e->flags & EDGE_CROSSING))
 581                   {
 582                     only_crossing_preds = false;
 583                     break;
 584                   }
 585               if (!only_crossing_preds)
 586                 best_edge = NULL;
 587             }
 588
 589           /* If the best destination has multiple successors or predecessors,
 590              don't allow it to be added when optimizing for size.  This makes
 591              sure predecessors with smaller index are handled before the best
 592              destinarion.  It breaks long trace and reduces long jumps.
 593
 594              Take if-then-else as an example.
 595                 A
 596                / \
 597               B   C
 598                \ /
 599                 D
 600              If we do not remove the best edge B->D/C->D, the final order might
 601              be A B D ... C.  C is at the end of the program.  If D's successors
 602              and D are complicated, might need long jumps for A->C and C->D.
 603              Similar issue for order: A C D ... B.
 604
 605              After removing the best edge, the final result will be ABCD/ ACBD.
 606              It does not add jump compared with the previous order.  But it
 607              reduces the possibility of long jumps.  */
 608           if (best_edge && for_size
 609               && (EDGE_COUNT (best_edge->dest->succs) > 1
 610                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 611             best_edge = NULL;
 612
 613           /* Add all non-selected successors to the heaps.  */
 614           FOR_EACH_EDGE (e, ei, bb->succs)
 615             {
 616               if (e == best_edge
 617                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 618                   || bb_visited_trace (e->dest))
 619                 continue;
 620
 621               key = bb_to_key (e->dest);
 622
 623               if (bbd[e->dest->index].heap)
 624                 {
 625                   /* E->DEST is already in some heap.  */
 626                   if (key != bbd[e->dest->index].node->get_key ())
 627                     {
 628                       if (dump_file)
 629                         {
 630                           fprintf (dump_file,
 631                                    "Changing key for bb %d from %ld to %ld.\n",
 632                                    e->dest->index,
 633                                    (long) bbd[e->dest->index].node->get_key (),
 634                                    key);
 635                         }
 636                       bbd[e->dest->index].heap->replace_key
 637                         (bbd[e->dest->index].node, key);
 638                     }
 639                 }
 640               else
 641                 {
 642                   bb_heap_t *which_heap = *heap;
 643
 644                   prob = e->probability;
 645
 646                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 647                       || (e->flags & EDGE_COMPLEX)
 648                       || !prob.initialized_p ()
 649                       || prob.to_reg_br_prob_base () < branch_th
 650                       || e->count () < count_th)
 651                     {
 652                       /* When partitioning hot/cold basic blocks, make sure
 653                          the cold blocks (and only the cold blocks) all get
 654                          pushed to the last round of trace collection.  When
 655                          optimizing for size, do not push to next round.  */
 656
 657                       if (!for_size && push_to_next_round_p (e->dest, round,
 658                                                              number_of_rounds,
 659                                                              count_th))
 660                         which_heap = new_heap;
 661                     }
 662
 663                   bbd[e->dest->index].heap = which_heap;
 664                   bbd[e->dest->index].node = which_heap->insert (key, e->dest);
 665
 666                   if (dump_file)
 667                     {
 668                       fprintf (dump_file,
 669                                "  Possible start of %s round: %d (key: %ld)\n",
 670                                (which_heap == new_heap) ? "next" : "this",
 671                                e->dest->index, (long) key);
 672                     }
 673
 674                 }
 675             }
 676
 677           if (best_edge) /* Suitable successor was found.  */
 678             {
 679               if (bb_visited_trace (best_edge->dest) == *n_traces)
 680                 {
 681                   /* We do nothing with one basic block loops.  */
 682                   if (best_edge->dest != bb)
 683                     {
 684                       if (best_edge->count ()
 685                           > best_edge->dest->count.apply_scale (4, 5))
 686                         {
 687                           /* The loop has at least 4 iterations.  If the loop
 688                              header is not the first block of the function
 689                              we can rotate the loop.  */
 690
 691                           if (best_edge->dest
 692                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 693                             {
 694                               if (dump_file)
 695                                 {
 696                                   fprintf (dump_file,
 697                                            "Rotating loop %d - %d\n",
 698                                            best_edge->dest->index, bb->index);
 699                                 }
 700                               bb->aux = best_edge->dest;
 701                               bbd[best_edge->dest->index].in_trace =
 702                                                              (*n_traces) - 1;
 703                               bb = rotate_loop (best_edge, trace, *n_traces);
 704                             }
 705                         }
 706                       else
 707                         {
 708                           /* The loop has less than 4 iterations.  */
 709
 710                           if (single_succ_p (bb)
 711                               && copy_bb_p (best_edge->dest,
 712                                             optimize_edge_for_speed_p
 713                                             (best_edge)))
 714                             {
 715                               bb = copy_bb (best_edge->dest, best_edge, bb,
 716                                             *n_traces);
 717                               trace->length++;
 718                             }
 719                         }
 720                     }
 721
 722                   /* Terminate the trace.  */
 723                   break;
 724                 }
 725               else
 726                 {
 727                   /* Check for a situation
 728
 729                     A
 730                    /|
 731                   B |
 732                    \|
 733                     C
 734
 735                   where
 736                   AB->count () + BC->count () >= AC->count ().
 737                   (i.e. 2 * B->count >= AC->count )
 738                   Best ordering is then A B C.
 739
 740                   When optimizing for size, A B C is always the best order.
 741
 742                   This situation is created for example by:
 743
 744                   if (A) B;
 745                   C;
 746
 747                   */
 748
 749                   FOR_EACH_EDGE (e, ei, bb->succs)
 750                     if (e != best_edge
 751                         && (e->flags & EDGE_CAN_FALLTHRU)
 752                         && !(e->flags & EDGE_COMPLEX)
 753                         && !bb_visited_trace (e->dest)
 754                         && single_pred_p (e->dest)
 755                         && !(e->flags & EDGE_CROSSING)
 756                         && single_succ_p (e->dest)
 757                         && (single_succ_edge (e->dest)->flags
 758                             & EDGE_CAN_FALLTHRU)
 759                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 760                         && single_succ (e->dest) == best_edge->dest
 761                         && (e->dest->count.apply_scale (2, 1)
 762                             >= best_edge->count () || for_size))
 763                       {
 764                         best_edge = e;
 765                         if (dump_file)
 766                           fprintf (dump_file, "Selecting BB %d\n",
 767                                    best_edge->dest->index);
 768                         break;
 769                       }
 770
 771                   bb->aux = best_edge->dest;
 772                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 773                   bb = best_edge->dest;
 774                 }
 775             }
 776         }
 777       while (best_edge);
 778       trace->last = bb;
 779       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 780       if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
 781         {
 782           bbd[trace->last->index].end_of_trace = *n_traces - 1;
 783           /* Update the cached maximum frequency for interesting predecessor
 784              edges for successors of the new trace end.  */
 785           FOR_EACH_EDGE (e, ei, trace->last->succs)
 786             if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
 787               bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
 788         }
 789
 790       /* The trace is terminated so we have to recount the keys in heap
 791          (some block can have a lower key because now one of its predecessors
 792          is an end of the trace).  */
 793       FOR_EACH_EDGE (e, ei, bb->succs)
 794         {
 795           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 796               || bb_visited_trace (e->dest))
 797             continue;
 798
 799           if (bbd[e->dest->index].heap)
 800             {
 801               key = bb_to_key (e->dest);
 802               if (key != bbd[e->dest->index].node->get_key ())
 803                 {
 804                   if (dump_file)
 805                     {
 806                       fprintf (dump_file,
 807                                "Changing key for bb %d from %ld to %ld.\n",
 808                                e->dest->index,
 809                                (long) bbd[e->dest->index].node->get_key (), key);
 810                     }
 811                   bbd[e->dest->index].heap->replace_key
 812                     (bbd[e->dest->index].node, key);
 813                 }
 814             }
 815         }
 816     }
 817
 818   delete (*heap);
 819
 820   /* "Return" the new heap.  */
 821   *heap = new_heap;
 822 }
 823
 824 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 825    it to trace after BB, mark OLD_BB visited and update pass' data structures
 826    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 827
 828 static basic_block
 829 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 830 {
 831   basic_block new_bb;
 832
 833   new_bb = duplicate_block (old_bb, e, bb);
 834   BB_COPY_PARTITION (new_bb, old_bb);
 835
 836   gcc_assert (e->dest == new_bb);
 837
 838   if (dump_file)
 839     fprintf (dump_file,
 840              "Duplicated bb %d (created bb %d)\n",
 841              old_bb->index, new_bb->index);
 842
 843   if (new_bb->index >= array_size
 844       || last_basic_block_for_fn (cfun) > array_size)
 845     {
 846       int i;
 847       int new_size;
 848
 849       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 850       new_size = GET_ARRAY_SIZE (new_size);
 851       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 852       for (i = array_size; i < new_size; i++)
 853         {
 854           bbd[i].start_of_trace = -1;
 855           bbd[i].end_of_trace = -1;
 856           bbd[i].in_trace = -1;
 857           bbd[i].visited = 0;
 858           bbd[i].priority = -1;
 859           bbd[i].heap = NULL;
 860           bbd[i].node = NULL;
 861         }
 862       array_size = new_size;
 863
 864       if (dump_file)
 865         {
 866           fprintf (dump_file,
 867                    "Growing the dynamic array to %d elements.\n",
 868                    array_size);
 869         }
 870     }
 871
 872   gcc_assert (!bb_visited_trace (e->dest));
 873   mark_bb_visited (new_bb, trace);
 874   new_bb->aux = bb->aux;
 875   bb->aux = new_bb;
 876
 877   bbd[new_bb->index].in_trace = trace;
 878
 879   return new_bb;
 880 }
 881
 882 /* Compute and return the key (for the heap) of the basic block BB.  */
 883
 884 static long
 885 bb_to_key (basic_block bb)
 886 {
 887   edge e;
 888   edge_iterator ei;
 889
 890   /* Use index as key to align with its original order.  */
 891   if (optimize_function_for_size_p (cfun))
 892     return bb->index;
 893
 894   /* Do not start in probably never executed blocks.  */
 895
 896   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 897       || probably_never_executed_bb_p (cfun, bb))
 898     return BB_FREQ_MAX;
 899
 900   /* Prefer blocks whose predecessor is an end of some trace
 901      or whose predecessor edge is EDGE_DFS_BACK.  */
 902   int priority = bbd[bb->index].priority;
 903   if (priority == -1)
 904     {
 905       priority = 0;
 906       FOR_EACH_EDGE (e, ei, bb->preds)
 907         {
 908           if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 909                && bbd[e->src->index].end_of_trace >= 0)
 910               || (e->flags & EDGE_DFS_BACK))
 911             {
 912               int edge_freq = EDGE_FREQUENCY (e);
 913
 914               if (edge_freq > priority)
 915                 priority = edge_freq;
 916             }
 917         }
 918       bbd[bb->index].priority = priority;
 919     }
 920
 921   if (priority)
 922     /* The block with priority should have significantly lower key.  */
 923     return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun));
 924
 925   return -bb->count.to_frequency (cfun);
 926 }
 927
 928 /* Return true when the edge E from basic block BB is better than the temporary
 929    best edge (details are in function).  The probability of edge E is PROB. The
 930    frequency of the successor is FREQ.  The current best probability is
 931    BEST_PROB, the best frequency is BEST_FREQ.
 932    The edge is considered to be equivalent when PROB does not differ much from
 933    BEST_PROB; similarly for frequency.  */
 934
 935 static bool
 936 better_edge_p (const_basic_block bb, const_edge e, profile_probability prob,
 937                int freq, profile_probability best_prob, int best_freq,
 938                const_edge cur_best_edge)
 939 {
 940   bool is_better_edge;
 941
 942   /* The BEST_* values do not have to be best, but can be a bit smaller than
 943      maximum values.  */
 944   profile_probability diff_prob = best_prob.apply_scale (1, 10);
 945   int diff_freq = best_freq / 10;
 946
 947   /* The smaller one is better to keep the original order.  */
 948   if (optimize_function_for_size_p (cfun))
 949     return !cur_best_edge
 950            || cur_best_edge->dest->index > e->dest->index;
 951
 952   /* Those edges are so expensive that continuing a trace is not useful
 953      performance wise.  */
 954   if (e->flags & (EDGE_ABNORMAL | EDGE_EH))
 955     return false;
 956
 957   if (prob > best_prob + diff_prob
 958       || (!best_prob.initialized_p ()
 959           && prob > profile_probability::guessed_never ()))
 960     /* The edge has higher probability than the temporary best edge.  */
 961     is_better_edge = true;
 962   else if (prob < best_prob - diff_prob)
 963     /* The edge has lower probability than the temporary best edge.  */
 964     is_better_edge = false;
 965   else if (freq < best_freq - diff_freq)
 966     /* The edge and the temporary best edge  have almost equivalent
 967        probabilities.  The higher frequency of a successor now means
 968        that there is another edge going into that successor.
 969        This successor has lower frequency so it is better.  */
 970     is_better_edge = true;
 971   else if (freq > best_freq + diff_freq)
 972     /* This successor has higher frequency so it is worse.  */
 973     is_better_edge = false;
 974   else if (e->dest->prev_bb == bb)
 975     /* The edges have equivalent probabilities and the successors
 976        have equivalent frequencies.  Select the previous successor.  */
 977     is_better_edge = true;
 978   else
 979     is_better_edge = false;
 980
 981   return is_better_edge;
 982 }
 983
 984 /* Return true when the edge E is better than the temporary best edge
 985    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
 986    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
 987    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
 988    TRACES record the information about traces.
 989    When optimizing for size, the edge with smaller index is better.
 990    When optimizing for speed, the edge with bigger probability or longer trace
 991    is better.  */
 992
 993 static bool
 994 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
 995                        const_edge cur_best_edge, struct trace *traces)
 996 {
 997   int e_index;
 998   int b_index;
 999   bool is_better_edge;
1000
1001   if (!cur_best_edge)
1002     return true;
1003
1004   if (optimize_function_for_size_p (cfun))
1005     {
1006       e_index = src_index_p ? e->src->index : e->dest->index;
1007       b_index = src_index_p ? cur_best_edge->src->index
1008                               : cur_best_edge->dest->index;
1009       /* The smaller one is better to keep the original order.  */
1010       return b_index > e_index;
1011     }
1012
1013   if (src_index_p)
1014     {
1015       e_index = e->src->index;
1016
1017       if (e->probability > cur_best_edge->probability)
1018         /* The edge has higher probability than the temporary best edge.  */
1019         is_better_edge = true;
1020       else if (e->probability < cur_best_edge->probability)
1021         /* The edge has lower probability than the temporary best edge.  */
1022         is_better_edge = false;
1023       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1024         /* The edge and the temporary best edge have equivalent probabilities.
1025            The edge with longer trace is better.  */
1026         is_better_edge = true;
1027       else
1028         is_better_edge = false;
1029     }
1030   else
1031     {
1032       e_index = e->dest->index;
1033
1034       if (e->probability > cur_best_edge->probability)
1035         /* The edge has higher probability than the temporary best edge.  */
1036         is_better_edge = true;
1037       else if (e->probability < cur_best_edge->probability)
1038         /* The edge has lower probability than the temporary best edge.  */
1039         is_better_edge = false;
1040       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1041         /* The edge and the temporary best edge have equivalent probabilities.
1042            The edge with longer trace is better.  */
1043         is_better_edge = true;
1044       else
1045         is_better_edge = false;
1046     }
1047
1048   return is_better_edge;
1049 }
1050
1051 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1052
1053 static void
1054 connect_traces (int n_traces, struct trace *traces)
1055 {
1056   int i;
1057   bool *connected;
1058   bool two_passes;
1059   int last_trace;
1060   int current_pass;
1061   int current_partition;
1062   profile_count count_threshold;
1063   bool for_size = optimize_function_for_size_p (cfun);
1064
1065   count_threshold = max_entry_count.apply_scale (DUPLICATION_THRESHOLD, 1000);
1066
1067   connected = XCNEWVEC (bool, n_traces);
1068   last_trace = -1;
1069   current_pass = 1;
1070   current_partition = BB_PARTITION (traces[0].first);
1071   two_passes = false;
1072
1073   if (crtl->has_bb_partition)
1074     for (i = 0; i < n_traces && !two_passes; i++)
1075       if (BB_PARTITION (traces[0].first)
1076           != BB_PARTITION (traces[i].first))
1077         two_passes = true;
1078
1079   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1080     {
1081       int t = i;
1082       int t2;
1083       edge e, best;
1084       int best_len;
1085
1086       if (i >= n_traces)
1087         {
1088           gcc_assert (two_passes && current_pass == 1);
1089           i = 0;
1090           t = i;
1091           current_pass = 2;
1092           if (current_partition == BB_HOT_PARTITION)
1093             current_partition = BB_COLD_PARTITION;
1094           else
1095             current_partition = BB_HOT_PARTITION;
1096         }
1097
1098       if (connected[t])
1099         continue;
1100
1101       if (two_passes
1102           && BB_PARTITION (traces[t].first) != current_partition)
1103         continue;
1104
1105       connected[t] = true;
1106
1107       /* Find the predecessor traces.  */
1108       for (t2 = t; t2 > 0;)
1109         {
1110           edge_iterator ei;
1111           best = NULL;
1112           best_len = 0;
1113           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1114             {
1115               int si = e->src->index;
1116
1117               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1118                   && (e->flags & EDGE_CAN_FALLTHRU)
1119                   && !(e->flags & EDGE_COMPLEX)
1120                   && bbd[si].end_of_trace >= 0
1121                   && !connected[bbd[si].end_of_trace]
1122                   && (BB_PARTITION (e->src) == current_partition)
1123                   && connect_better_edge_p (e, true, best_len, best, traces))
1124                 {
1125                   best = e;
1126                   best_len = traces[bbd[si].end_of_trace].length;
1127                 }
1128             }
1129           if (best)
1130             {
1131               best->src->aux = best->dest;
1132               t2 = bbd[best->src->index].end_of_trace;
1133               connected[t2] = true;
1134
1135               if (dump_file)
1136                 {
1137                   fprintf (dump_file, "Connection: %d %d\n",
1138                            best->src->index, best->dest->index);
1139                 }
1140             }
1141           else
1142             break;
1143         }
1144
1145       if (last_trace >= 0)
1146         traces[last_trace].last->aux = traces[t2].first;
1147       last_trace = t;
1148
1149       /* Find the successor traces.  */
1150       while (1)
1151         {
1152           /* Find the continuation of the chain.  */
1153           edge_iterator ei;
1154           best = NULL;
1155           best_len = 0;
1156           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1157             {
1158               int di = e->dest->index;
1159
1160               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1161                   && (e->flags & EDGE_CAN_FALLTHRU)
1162                   && !(e->flags & EDGE_COMPLEX)
1163                   && bbd[di].start_of_trace >= 0
1164                   && !connected[bbd[di].start_of_trace]
1165                   && (BB_PARTITION (e->dest) == current_partition)
1166                   && connect_better_edge_p (e, false, best_len, best, traces))
1167                 {
1168                   best = e;
1169                   best_len = traces[bbd[di].start_of_trace].length;
1170                 }
1171             }
1172
1173           if (for_size)
1174             {
1175               if (!best)
1176                 /* Stop finding the successor traces.  */
1177                 break;
1178
1179               /* It is OK to connect block n with block n + 1 or a block
1180                  before n.  For others, only connect to the loop header.  */
1181               if (best->dest->index > (traces[t].last->index + 1))
1182                 {
1183                   int count = EDGE_COUNT (best->dest->preds);
1184
1185                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1186                     if (e->flags & EDGE_DFS_BACK)
1187                       count--;
1188
1189                   /* If dest has multiple predecessors, skip it.  We expect
1190                      that one predecessor with smaller index connects with it
1191                      later.  */
1192                   if (count != 1)
1193                     break;
1194                 }
1195
1196               /* Only connect Trace n with Trace n + 1.  It is conservative
1197                  to keep the order as close as possible to the original order.
1198                  It also helps to reduce long jumps.  */
1199               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1200                 break;
1201
1202               if (dump_file)
1203                 fprintf (dump_file, "Connection: %d %d\n",
1204                          best->src->index, best->dest->index);
1205
1206               t = bbd[best->dest->index].start_of_trace;
1207               traces[last_trace].last->aux = traces[t].first;
1208               connected[t] = true;
1209               last_trace = t;
1210             }
1211           else if (best)
1212             {
1213               if (dump_file)
1214                 {
1215                   fprintf (dump_file, "Connection: %d %d\n",
1216                            best->src->index, best->dest->index);
1217                 }
1218               t = bbd[best->dest->index].start_of_trace;
1219               traces[last_trace].last->aux = traces[t].first;
1220               connected[t] = true;
1221               last_trace = t;
1222             }
1223           else
1224             {
1225               /* Try to connect the traces by duplication of 1 block.  */
1226               edge e2;
1227               basic_block next_bb = NULL;
1228               bool try_copy = false;
1229
1230               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1231                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1232                     && (e->flags & EDGE_CAN_FALLTHRU)
1233                     && !(e->flags & EDGE_COMPLEX)
1234                     && (!best || e->probability > best->probability))
1235                   {
1236                     edge_iterator ei;
1237                     edge best2 = NULL;
1238                     int best2_len = 0;
1239
1240                     /* If the destination is a start of a trace which is only
1241                        one block long, then no need to search the successor
1242                        blocks of the trace.  Accept it.  */
1243                     if (bbd[e->dest->index].start_of_trace >= 0
1244                         && traces[bbd[e->dest->index].start_of_trace].length
1245                            == 1)
1246                       {
1247                         best = e;
1248                         try_copy = true;
1249                         continue;
1250                       }
1251
1252                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1253                       {
1254                         int di = e2->dest->index;
1255
1256                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1257                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1258                                 && !(e2->flags & EDGE_COMPLEX)
1259                                 && bbd[di].start_of_trace >= 0
1260                                 && !connected[bbd[di].start_of_trace]
1261                                 && BB_PARTITION (e2->dest) == current_partition
1262                                 && e2->count () >= count_threshold
1263                                 && (!best2
1264                                     || e2->probability > best2->probability
1265                                     || (e2->probability == best2->probability
1266                                         && traces[bbd[di].start_of_trace].length
1267                                            > best2_len))))
1268                           {
1269                             best = e;
1270                             best2 = e2;
1271                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1272                               best2_len = traces[bbd[di].start_of_trace].length;
1273                             else
1274                               best2_len = INT_MAX;
1275                             next_bb = e2->dest;
1276                             try_copy = true;
1277                           }
1278                       }
1279                   }
1280
1281               /* Copy tiny blocks always; copy larger blocks only when the
1282                  edge is traversed frequently enough.  */
1283               if (try_copy
1284                   && BB_PARTITION (best->src) == BB_PARTITION (best->dest)
1285                   && copy_bb_p (best->dest,
1286                                 optimize_edge_for_speed_p (best)
1287                                 && (!best->count ().initialized_p ()
1288                                     || best->count () >= count_threshold)))
1289                 {
1290                   basic_block new_bb;
1291
1292                   if (dump_file)
1293                     {
1294                       fprintf (dump_file, "Connection: %d %d ",
1295                                traces[t].last->index, best->dest->index);
1296                       if (!next_bb)
1297                         fputc ('\n', dump_file);
1298                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1299                         fprintf (dump_file, "exit\n");
1300                       else
1301                         fprintf (dump_file, "%d\n", next_bb->index);
1302                     }
1303
1304                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1305                   traces[t].last = new_bb;
1306                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1307                     {
1308                       t = bbd[next_bb->index].start_of_trace;
1309                       traces[last_trace].last->aux = traces[t].first;
1310                       connected[t] = true;
1311                       last_trace = t;
1312                     }
1313                   else
1314                     break;      /* Stop finding the successor traces.  */
1315                 }
1316               else
1317                 break;  /* Stop finding the successor traces.  */
1318             }
1319         }
1320     }
1321
1322   if (dump_file)
1323     {
1324       basic_block bb;
1325
1326       fprintf (dump_file, "Final order:\n");
1327       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1328         fprintf (dump_file, "%d ", bb->index);
1329       fprintf (dump_file, "\n");
1330       fflush (dump_file);
1331     }
1332
1333   FREE (connected);
1334 }
1335
1336 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1337    when code size is allowed to grow by duplication.  */
1338
1339 static bool
1340 copy_bb_p (const_basic_block bb, int code_may_grow)
1341 {
1342   int size = 0;
1343   int max_size = uncond_jump_length;
1344   rtx_insn *insn;
1345
1346   if (!bb->count.to_frequency (cfun))
1347     return false;
1348   if (EDGE_COUNT (bb->preds) < 2)
1349     return false;
1350   if (!can_duplicate_block_p (bb))
1351     return false;
1352
1353   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1354   if (EDGE_COUNT (bb->succs) > 8)
1355     return false;
1356
1357   if (code_may_grow && optimize_bb_for_speed_p (bb))
1358     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1359
1360   FOR_BB_INSNS (bb, insn)
1361     {
1362       if (INSN_P (insn))
1363         size += get_attr_min_length (insn);
1364     }
1365
1366   if (size <= max_size)
1367     return true;
1368
1369   if (dump_file)
1370     {
1371       fprintf (dump_file,
1372                "Block %d can't be copied because its size = %d.\n",
1373                bb->index, size);
1374     }
1375
1376   return false;
1377 }
1378
1379 /* Return the length of unconditional jump instruction.  */
1380
1381 int
1382 get_uncond_jump_length (void)
1383 {
1384   int length;
1385
1386   start_sequence ();
1387   rtx_code_label *label = emit_label (gen_label_rtx ());
1388   rtx_insn *jump = emit_jump_insn (targetm.gen_jump (label));
1389   length = get_attr_min_length (jump);
1390   end_sequence ();
1391
1392   return length;
1393 }
1394
1395 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1396    Duplicate the landing pad and split the edges so that no EH edge
1397    crosses partitions.  */
1398
1399 static void
1400 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1401 {
1402   eh_landing_pad new_lp;
1403   basic_block new_bb, last_bb, post_bb;
1404   rtx_insn *jump;
1405   unsigned new_partition;
1406   edge_iterator ei;
1407   edge e;
1408
1409   /* Generate the new landing-pad structure.  */
1410   new_lp = gen_eh_landing_pad (old_lp->region);
1411   new_lp->post_landing_pad = old_lp->post_landing_pad;
1412   new_lp->landing_pad = gen_label_rtx ();
1413   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1414
1415   /* Put appropriate instructions in new bb.  */
1416   rtx_code_label *new_label = emit_label (new_lp->landing_pad);
1417
1418   expand_dw2_landing_pad_for_region (old_lp->region);
1419
1420   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1421   post_bb = single_succ (post_bb);
1422   rtx_code_label *post_label = block_label (post_bb);
1423   jump = emit_jump_insn (targetm.gen_jump (post_label));
1424   JUMP_LABEL (jump) = post_label;
1425
1426   /* Create new basic block to be dest for lp.  */
1427   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1428   new_bb = create_basic_block (new_label, jump, last_bb);
1429   new_bb->aux = last_bb->aux;
1430   new_bb->count = post_bb->count;
1431   last_bb->aux = new_bb;
1432
1433   emit_barrier_after_bb (new_bb);
1434
1435   make_single_succ_edge (new_bb, post_bb, 0);
1436
1437   /* Make sure new bb is in the other partition.  */
1438   new_partition = BB_PARTITION (old_bb);
1439   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1440   BB_SET_PARTITION (new_bb, new_partition);
1441
1442   /* Fix up the edges.  */
1443   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1444     if (BB_PARTITION (e->src) == new_partition)
1445       {
1446         rtx_insn *insn = BB_END (e->src);
1447         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1448
1449         gcc_assert (note != NULL);
1450         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1451         XEXP (note, 0) = GEN_INT (new_lp->index);
1452
1453         /* Adjust the edge to the new destination.  */
1454         redirect_edge_succ (e, new_bb);
1455       }
1456     else
1457       ei_next (&ei);
1458 }
1459
1460
1461 /* Ensure that all hot bbs are included in a hot path through the
1462    procedure. This is done by calling this function twice, once
1463    with WALK_UP true (to look for paths from the entry to hot bbs) and
1464    once with WALK_UP false (to look for paths from hot bbs to the exit).
1465    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1466    to BBS_IN_HOT_PARTITION.  */
1467
1468 static unsigned int
1469 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1470                     vec<basic_block> *bbs_in_hot_partition)
1471 {
1472   /* Callers check this.  */
1473   gcc_checking_assert (cold_bb_count);
1474
1475   /* Keep examining hot bbs while we still have some left to check
1476      and there are remaining cold bbs.  */
1477   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1478   while (! hot_bbs_to_check.is_empty ()
1479          && cold_bb_count)
1480     {
1481       basic_block bb = hot_bbs_to_check.pop ();
1482       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1483       edge e;
1484       edge_iterator ei;
1485       profile_probability highest_probability
1486                                  = profile_probability::uninitialized ();
1487       profile_count highest_count = profile_count::uninitialized ();
1488       bool found = false;
1489
1490       /* Walk the preds/succs and check if there is at least one already
1491          marked hot. Keep track of the most frequent pred/succ so that we
1492          can mark it hot if we don't find one.  */
1493       FOR_EACH_EDGE (e, ei, edges)
1494         {
1495           basic_block reach_bb = walk_up ? e->src : e->dest;
1496
1497           if (e->flags & EDGE_DFS_BACK)
1498             continue;
1499
1500           /* Do not expect profile insanities when profile was not adjusted.  */
1501           if (e->probability == profile_probability::never ()
1502               || e->count () == profile_count::zero ())
1503             continue;
1504
1505           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1506           {
1507             found = true;
1508             break;
1509           }
1510           /* The following loop will look for the hottest edge via
1511              the edge count, if it is non-zero, then fallback to the edge
1512              frequency and finally the edge probability.  */
1513           if (!(e->count () > highest_count))
1514             highest_count = e->count ();
1515           if (!highest_probability.initialized_p ()
1516               || e->probability > highest_probability)
1517             highest_probability = e->probability;
1518         }
1519
1520       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1521          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1522          then the most frequent pred (or succ) needs to be adjusted.  In the
1523          case where multiple preds/succs have the same frequency (e.g. a
1524          50-50 branch), then both will be adjusted.  */
1525       if (found)
1526         continue;
1527
1528       FOR_EACH_EDGE (e, ei, edges)
1529         {
1530           if (e->flags & EDGE_DFS_BACK)
1531             continue;
1532           /* Do not expect profile insanities when profile was not adjusted.  */
1533           if (e->probability == profile_probability::never ()
1534               || e->count () == profile_count::zero ())
1535             continue;
1536           /* Select the hottest edge using the edge count, if it is non-zero,
1537              then fallback to the edge frequency and finally the edge
1538              probability.  */
1539           if (highest_count.initialized_p ())
1540             {
1541               if (!(e->count () >= highest_count))
1542                 continue;
1543             }
1544           else if (!(e->probability >= highest_probability))
1545             continue;
1546
1547           basic_block reach_bb = walk_up ? e->src : e->dest;
1548
1549           /* We have a hot bb with an immediate dominator that is cold.
1550              The dominator needs to be re-marked hot.  */
1551           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1552           if (dump_file)
1553             fprintf (dump_file, "Promoting bb %i to hot partition to sanitize "
1554                      "profile of bb %i in %s walk\n", reach_bb->index,
1555                      bb->index, walk_up ? "backward" : "forward");
1556           cold_bb_count--;
1557
1558           /* Now we need to examine newly-hot reach_bb to see if it is also
1559              dominated by a cold bb.  */
1560           bbs_in_hot_partition->safe_push (reach_bb);
1561           hot_bbs_to_check.safe_push (reach_bb);
1562         }
1563     }
1564
1565   return cold_bb_count;
1566 }
1567
1568
1569 /* Find the basic blocks that are rarely executed and need to be moved to
1570    a separate section of the .o file (to cut down on paging and improve
1571    cache locality).  Return a vector of all edges that cross.  */
1572
1573 static vec<edge>
1574 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1575 {
1576   vec<edge> crossing_edges = vNULL;
1577   basic_block bb;
1578   edge e;
1579   edge_iterator ei;
1580   unsigned int cold_bb_count = 0;
1581   auto_vec<basic_block> bbs_in_hot_partition;
1582
1583   propagate_unlikely_bbs_forward ();
1584
1585   /* Mark which partition (hot/cold) each basic block belongs in.  */
1586   FOR_EACH_BB_FN (bb, cfun)
1587     {
1588       bool cold_bb = false;
1589
1590       if (probably_never_executed_bb_p (cfun, bb))
1591         {
1592           /* Handle profile insanities created by upstream optimizations
1593              by also checking the incoming edge weights. If there is a non-cold
1594              incoming edge, conservatively prevent this block from being split
1595              into the cold section.  */
1596           cold_bb = true;
1597           FOR_EACH_EDGE (e, ei, bb->preds)
1598             if (!probably_never_executed_edge_p (cfun, e))
1599               {
1600                 cold_bb = false;
1601                 break;
1602               }
1603         }
1604       if (cold_bb)
1605         {
1606           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1607           cold_bb_count++;
1608         }
1609       else
1610         {
1611           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1612           bbs_in_hot_partition.safe_push (bb);
1613         }
1614     }
1615
1616   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1617      Several different possibilities may include cold bbs along all paths
1618      to/from a hot bb. One is that there are edge weight insanities
1619      due to optimization phases that do not properly update basic block profile
1620      counts. The second is that the entry of the function may not be hot, because
1621      it is entered fewer times than the number of profile training runs, but there
1622      is a loop inside the function that causes blocks within the function to be
1623      above the threshold for hotness. This is fixed by walking up from hot bbs
1624      to the entry block, and then down from hot bbs to the exit, performing
1625      partitioning fixups as necessary.  */
1626   if (cold_bb_count)
1627     {
1628       mark_dfs_back_edges ();
1629       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1630                                           &bbs_in_hot_partition);
1631       if (cold_bb_count)
1632         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1633
1634       hash_set <basic_block> set;
1635       find_bbs_reachable_by_hot_paths (&set);
1636       FOR_EACH_BB_FN (bb, cfun)
1637         if (!set.contains (bb))
1638           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1639     }
1640
1641   /* The format of .gcc_except_table does not allow landing pads to
1642      be in a different partition as the throw.  Fix this by either
1643      moving or duplicating the landing pads.  */
1644   if (cfun->eh->lp_array)
1645     {
1646       unsigned i;
1647       eh_landing_pad lp;
1648
1649       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1650         {
1651           bool all_same, all_diff;
1652
1653           if (lp == NULL
1654               || lp->landing_pad == NULL_RTX
1655               || !LABEL_P (lp->landing_pad))
1656             continue;
1657
1658           all_same = all_diff = true;
1659           bb = BLOCK_FOR_INSN (lp->landing_pad);
1660           FOR_EACH_EDGE (e, ei, bb->preds)
1661             {
1662               gcc_assert (e->flags & EDGE_EH);
1663               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1664                 all_diff = false;
1665               else
1666                 all_same = false;
1667             }
1668
1669           if (all_same)
1670             ;
1671           else if (all_diff)
1672             {
1673               int which = BB_PARTITION (bb);
1674               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1675               BB_SET_PARTITION (bb, which);
1676             }
1677           else
1678             fix_up_crossing_landing_pad (lp, bb);
1679         }
1680     }
1681
1682   /* Mark every edge that crosses between sections.  */
1683
1684   FOR_EACH_BB_FN (bb, cfun)
1685     FOR_EACH_EDGE (e, ei, bb->succs)
1686       {
1687         unsigned int flags = e->flags;
1688
1689         /* We should never have EDGE_CROSSING set yet.  */
1690         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1691
1692         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1693             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1694             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1695           {
1696             crossing_edges.safe_push (e);
1697             flags |= EDGE_CROSSING;
1698           }
1699
1700         /* Now that we've split eh edges as appropriate, allow landing pads
1701            to be merged with the post-landing pads.  */
1702         flags &= ~EDGE_PRESERVE;
1703
1704         e->flags = flags;
1705       }
1706
1707   return crossing_edges;
1708 }
1709
1710 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1711
1712 static void
1713 set_edge_can_fallthru_flag (void)
1714 {
1715   basic_block bb;
1716
1717   FOR_EACH_BB_FN (bb, cfun)
1718     {
1719       edge e;
1720       edge_iterator ei;
1721
1722       FOR_EACH_EDGE (e, ei, bb->succs)
1723         {
1724           e->flags &= ~EDGE_CAN_FALLTHRU;
1725
1726           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1727           if (e->flags & EDGE_FALLTHRU)
1728             e->flags |= EDGE_CAN_FALLTHRU;
1729         }
1730
1731       /* If the BB ends with an invertible condjump all (2) edges are
1732          CAN_FALLTHRU edges.  */
1733       if (EDGE_COUNT (bb->succs) != 2)
1734         continue;
1735       if (!any_condjump_p (BB_END (bb)))
1736         continue;
1737
1738       rtx_jump_insn *bb_end_jump = as_a <rtx_jump_insn *> (BB_END (bb));
1739       if (!invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0))
1740         continue;
1741       invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0);
1742       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1743       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1744     }
1745 }
1746
1747 /* If any destination of a crossing edge does not have a label, add label;
1748    Convert any easy fall-through crossing edges to unconditional jumps.  */
1749
1750 static void
1751 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1752 {
1753   size_t i;
1754   edge e;
1755
1756   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1757     {
1758       basic_block src = e->src;
1759       basic_block dest = e->dest;
1760       rtx_jump_insn *new_jump;
1761
1762       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1763         continue;
1764
1765       /* Make sure dest has a label.  */
1766       rtx_code_label *label = block_label (dest);
1767
1768       /* Nothing to do for non-fallthru edges.  */
1769       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1770         continue;
1771       if ((e->flags & EDGE_FALLTHRU) == 0)
1772         continue;
1773
1774       /* If the block does not end with a control flow insn, then we
1775          can trivially add a jump to the end to fixup the crossing.
1776          Otherwise the jump will have to go in a new bb, which will
1777          be handled by fix_up_fall_thru_edges function.  */
1778       if (control_flow_insn_p (BB_END (src)))
1779         continue;
1780
1781       /* Make sure there's only one successor.  */
1782       gcc_assert (single_succ_p (src));
1783
1784       new_jump = emit_jump_insn_after (targetm.gen_jump (label), BB_END (src));
1785       BB_END (src) = new_jump;
1786       JUMP_LABEL (new_jump) = label;
1787       LABEL_NUSES (label) += 1;
1788
1789       emit_barrier_after_bb (src);
1790
1791       /* Mark edge as non-fallthru.  */
1792       e->flags &= ~EDGE_FALLTHRU;
1793     }
1794 }
1795
1796 /* Find any bb's where the fall-through edge is a crossing edge (note that
1797    these bb's must also contain a conditional jump or end with a call
1798    instruction; we've already dealt with fall-through edges for blocks
1799    that didn't have a conditional jump or didn't end with call instruction
1800    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1801    edge to non-crossing edge by inserting a new bb to fall-through into.
1802    The new bb will contain an unconditional jump (crossing edge) to the
1803    original fall through destination.  */
1804
1805 static void
1806 fix_up_fall_thru_edges (void)
1807 {
1808   basic_block cur_bb;
1809
1810   FOR_EACH_BB_FN (cur_bb, cfun)
1811     {
1812       edge succ1;
1813       edge succ2;
1814       edge fall_thru = NULL;
1815       edge cond_jump = NULL;
1816
1817       fall_thru = NULL;
1818       if (EDGE_COUNT (cur_bb->succs) > 0)
1819         succ1 = EDGE_SUCC (cur_bb, 0);
1820       else
1821         succ1 = NULL;
1822
1823       if (EDGE_COUNT (cur_bb->succs) > 1)
1824         succ2 = EDGE_SUCC (cur_bb, 1);
1825       else
1826         succ2 = NULL;
1827
1828       /* Find the fall-through edge.  */
1829
1830       if (succ1
1831           && (succ1->flags & EDGE_FALLTHRU))
1832         {
1833           fall_thru = succ1;
1834           cond_jump = succ2;
1835         }
1836       else if (succ2
1837                && (succ2->flags & EDGE_FALLTHRU))
1838         {
1839           fall_thru = succ2;
1840           cond_jump = succ1;
1841         }
1842       else if (succ2 && EDGE_COUNT (cur_bb->succs) > 2)
1843         fall_thru = find_fallthru_edge (cur_bb->succs);
1844
1845       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1846         {
1847           /* Check to see if the fall-thru edge is a crossing edge.  */
1848
1849           if (fall_thru->flags & EDGE_CROSSING)
1850             {
1851               /* The fall_thru edge crosses; now check the cond jump edge, if
1852                  it exists.  */
1853
1854               bool cond_jump_crosses = true;
1855               int invert_worked = 0;
1856               rtx_insn *old_jump = BB_END (cur_bb);
1857
1858               /* Find the jump instruction, if there is one.  */
1859
1860               if (cond_jump)
1861                 {
1862                   if (!(cond_jump->flags & EDGE_CROSSING))
1863                     cond_jump_crosses = false;
1864
1865                   /* We know the fall-thru edge crosses; if the cond
1866                      jump edge does NOT cross, and its destination is the
1867                      next block in the bb order, invert the jump
1868                      (i.e. fix it so the fall through does not cross and
1869                      the cond jump does).  */
1870
1871                   if (!cond_jump_crosses)
1872                     {
1873                       /* Find label in fall_thru block. We've already added
1874                          any missing labels, so there must be one.  */
1875
1876                       rtx_code_label *fall_thru_label
1877                         = block_label (fall_thru->dest);
1878
1879                       if (old_jump && fall_thru_label)
1880                         {
1881                           rtx_jump_insn *old_jump_insn
1882                             = dyn_cast <rtx_jump_insn *> (old_jump);
1883                           if (old_jump_insn)
1884                             invert_worked = invert_jump (old_jump_insn,
1885                                                          fall_thru_label, 0);
1886                         }
1887
1888                       if (invert_worked)
1889                         {
1890                           fall_thru->flags &= ~EDGE_FALLTHRU;
1891                           cond_jump->flags |= EDGE_FALLTHRU;
1892                           update_br_prob_note (cur_bb);
1893                           std::swap (fall_thru, cond_jump);
1894                           cond_jump->flags |= EDGE_CROSSING;
1895                           fall_thru->flags &= ~EDGE_CROSSING;
1896                         }
1897                     }
1898                 }
1899
1900               if (cond_jump_crosses || !invert_worked)
1901                 {
1902                   /* This is the case where both edges out of the basic
1903                      block are crossing edges. Here we will fix up the
1904                      fall through edge. The jump edge will be taken care
1905                      of later.  The EDGE_CROSSING flag of fall_thru edge
1906                      is unset before the call to force_nonfallthru
1907                      function because if a new basic-block is created
1908                      this edge remains in the current section boundary
1909                      while the edge between new_bb and the fall_thru->dest
1910                      becomes EDGE_CROSSING.  */
1911
1912                   fall_thru->flags &= ~EDGE_CROSSING;
1913                   basic_block new_bb = force_nonfallthru (fall_thru);
1914
1915                   if (new_bb)
1916                     {
1917                       new_bb->aux = cur_bb->aux;
1918                       cur_bb->aux = new_bb;
1919
1920                       /* This is done by force_nonfallthru_and_redirect.  */
1921                       gcc_assert (BB_PARTITION (new_bb)
1922                                   == BB_PARTITION (cur_bb));
1923
1924                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1925                     }
1926                   else
1927                     {
1928                       /* If a new basic-block was not created; restore
1929                          the EDGE_CROSSING flag.  */
1930                       fall_thru->flags |= EDGE_CROSSING;
1931                     }
1932
1933                   /* Add barrier after new jump */
1934                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1935                 }
1936             }
1937         }
1938     }
1939 }
1940
1941 /* This function checks the destination block of a "crossing jump" to
1942    see if it has any crossing predecessors that begin with a code label
1943    and end with an unconditional jump.  If so, it returns that predecessor
1944    block.  (This is to avoid creating lots of new basic blocks that all
1945    contain unconditional jumps to the same destination).  */
1946
1947 static basic_block
1948 find_jump_block (basic_block jump_dest)
1949 {
1950   basic_block source_bb = NULL;
1951   edge e;
1952   rtx_insn *insn;
1953   edge_iterator ei;
1954
1955   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1956     if (e->flags & EDGE_CROSSING)
1957       {
1958         basic_block src = e->src;
1959
1960         /* Check each predecessor to see if it has a label, and contains
1961            only one executable instruction, which is an unconditional jump.
1962            If so, we can use it.  */
1963
1964         if (LABEL_P (BB_HEAD (src)))
1965           for (insn = BB_HEAD (src);
1966                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1967                insn = NEXT_INSN (insn))
1968             {
1969               if (INSN_P (insn)
1970                   && insn == BB_END (src)
1971                   && JUMP_P (insn)
1972                   && !any_condjump_p (insn))
1973                 {
1974                   source_bb = src;
1975                   break;
1976                 }
1977             }
1978
1979         if (source_bb)
1980           break;
1981       }
1982
1983   return source_bb;
1984 }
1985
1986 /* Find all BB's with conditional jumps that are crossing edges;
1987    insert a new bb and make the conditional jump branch to the new
1988    bb instead (make the new bb same color so conditional branch won't
1989    be a 'crossing' edge).  Insert an unconditional jump from the
1990    new bb to the original destination of the conditional jump.  */
1991
1992 static void
1993 fix_crossing_conditional_branches (void)
1994 {
1995   basic_block cur_bb;
1996   basic_block new_bb;
1997   basic_block dest;
1998   edge succ1;
1999   edge succ2;
2000   edge crossing_edge;
2001   edge new_edge;
2002   rtx set_src;
2003   rtx old_label = NULL_RTX;
2004   rtx_code_label *new_label;
2005
2006   FOR_EACH_BB_FN (cur_bb, cfun)
2007     {
2008       crossing_edge = NULL;
2009       if (EDGE_COUNT (cur_bb->succs) > 0)
2010         succ1 = EDGE_SUCC (cur_bb, 0);
2011       else
2012         succ1 = NULL;
2013
2014       if (EDGE_COUNT (cur_bb->succs) > 1)
2015         succ2 = EDGE_SUCC (cur_bb, 1);
2016       else
2017         succ2 = NULL;
2018
2019       /* We already took care of fall-through edges, so only one successor
2020          can be a crossing edge.  */
2021
2022       if (succ1 && (succ1->flags & EDGE_CROSSING))
2023         crossing_edge = succ1;
2024       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2025         crossing_edge = succ2;
2026
2027       if (crossing_edge)
2028         {
2029           rtx_insn *old_jump = BB_END (cur_bb);
2030
2031           /* Check to make sure the jump instruction is a
2032              conditional jump.  */
2033
2034           set_src = NULL_RTX;
2035
2036           if (any_condjump_p (old_jump))
2037             {
2038               if (GET_CODE (PATTERN (old_jump)) == SET)
2039                 set_src = SET_SRC (PATTERN (old_jump));
2040               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2041                 {
2042                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2043                   if (GET_CODE (set_src) == SET)
2044                     set_src = SET_SRC (set_src);
2045                   else
2046                     set_src = NULL_RTX;
2047                 }
2048             }
2049
2050           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2051             {
2052               rtx_jump_insn *old_jump_insn =
2053                         as_a <rtx_jump_insn *> (old_jump);
2054
2055               if (GET_CODE (XEXP (set_src, 1)) == PC)
2056                 old_label = XEXP (set_src, 2);
2057               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2058                 old_label = XEXP (set_src, 1);
2059
2060               /* Check to see if new bb for jumping to that dest has
2061                  already been created; if so, use it; if not, create
2062                  a new one.  */
2063
2064               new_bb = find_jump_block (crossing_edge->dest);
2065
2066               if (new_bb)
2067                 new_label = block_label (new_bb);
2068               else
2069                 {
2070                   basic_block last_bb;
2071                   rtx_code_label *old_jump_target;
2072                   rtx_jump_insn *new_jump;
2073
2074                   /* Create new basic block to be dest for
2075                      conditional jump.  */
2076
2077                   /* Put appropriate instructions in new bb.  */
2078
2079                   new_label = gen_label_rtx ();
2080                   emit_label (new_label);
2081
2082                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2083                   old_jump_target = old_jump_insn->jump_target ();
2084                   new_jump = as_a <rtx_jump_insn *>
2085                     (emit_jump_insn (targetm.gen_jump (old_jump_target)));
2086                   new_jump->set_jump_target (old_jump_target);
2087
2088                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2089                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2090                   new_bb->aux = last_bb->aux;
2091                   last_bb->aux = new_bb;
2092
2093                   emit_barrier_after_bb (new_bb);
2094
2095                   /* Make sure new bb is in same partition as source
2096                      of conditional branch.  */
2097                   BB_COPY_PARTITION (new_bb, cur_bb);
2098                 }
2099
2100               /* Make old jump branch to new bb.  */
2101
2102               redirect_jump (old_jump_insn, new_label, 0);
2103
2104               /* Remove crossing_edge as predecessor of 'dest'.  */
2105
2106               dest = crossing_edge->dest;
2107
2108               redirect_edge_succ (crossing_edge, new_bb);
2109
2110               /* Make a new edge from new_bb to old dest; new edge
2111                  will be a successor for new_bb and a predecessor
2112                  for 'dest'.  */
2113
2114               if (EDGE_COUNT (new_bb->succs) == 0)
2115                 new_edge = make_single_succ_edge (new_bb, dest, 0);
2116               else
2117                 new_edge = EDGE_SUCC (new_bb, 0);
2118
2119               crossing_edge->flags &= ~EDGE_CROSSING;
2120               new_edge->flags |= EDGE_CROSSING;
2121             }
2122         }
2123     }
2124 }
2125
2126 /* Find any unconditional branches that cross between hot and cold
2127    sections.  Convert them into indirect jumps instead.  */
2128
2129 static void
2130 fix_crossing_unconditional_branches (void)
2131 {
2132   basic_block cur_bb;
2133   rtx_insn *last_insn;
2134   rtx label;
2135   rtx label_addr;
2136   rtx_insn *indirect_jump_sequence;
2137   rtx_insn *jump_insn = NULL;
2138   rtx new_reg;
2139   rtx_insn *cur_insn;
2140   edge succ;
2141
2142   FOR_EACH_BB_FN (cur_bb, cfun)
2143     {
2144       last_insn = BB_END (cur_bb);
2145
2146       if (EDGE_COUNT (cur_bb->succs) < 1)
2147         continue;
2148
2149       succ = EDGE_SUCC (cur_bb, 0);
2150
2151       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2152          this point, no crossing jumps should be conditional.  */
2153
2154       if (JUMP_P (last_insn)
2155           && (succ->flags & EDGE_CROSSING))
2156         {
2157           gcc_assert (!any_condjump_p (last_insn));
2158
2159           /* Make sure the jump is not already an indirect or table jump.  */
2160
2161           if (!computed_jump_p (last_insn)
2162               && !tablejump_p (last_insn, NULL, NULL))
2163             {
2164               /* We have found a "crossing" unconditional branch.  Now
2165                  we must convert it to an indirect jump.  First create
2166                  reference of label, as target for jump.  */
2167
2168               label = JUMP_LABEL (last_insn);
2169               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2170               LABEL_NUSES (label) += 1;
2171
2172               /* Get a register to use for the indirect jump.  */
2173
2174               new_reg = gen_reg_rtx (Pmode);
2175
2176               /* Generate indirect the jump sequence.  */
2177
2178               start_sequence ();
2179               emit_move_insn (new_reg, label_addr);
2180               emit_indirect_jump (new_reg);
2181               indirect_jump_sequence = get_insns ();
2182               end_sequence ();
2183
2184               /* Make sure every instruction in the new jump sequence has
2185                  its basic block set to be cur_bb.  */
2186
2187               for (cur_insn = indirect_jump_sequence; cur_insn;
2188                    cur_insn = NEXT_INSN (cur_insn))
2189                 {
2190                   if (!BARRIER_P (cur_insn))
2191                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2192                   if (JUMP_P (cur_insn))
2193                     jump_insn = cur_insn;
2194                 }
2195
2196               /* Insert the new (indirect) jump sequence immediately before
2197                  the unconditional jump, then delete the unconditional jump.  */
2198
2199               emit_insn_before (indirect_jump_sequence, last_insn);
2200               delete_insn (last_insn);
2201
2202               JUMP_LABEL (jump_insn) = label;
2203               LABEL_NUSES (label)++;
2204
2205               /* Make BB_END for cur_bb be the jump instruction (NOT the
2206                  barrier instruction at the end of the sequence...).  */
2207
2208               BB_END (cur_bb) = jump_insn;
2209             }
2210         }
2211     }
2212 }
2213
2214 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2215
2216 static void
2217 update_crossing_jump_flags (void)
2218 {
2219   basic_block bb;
2220   edge e;
2221   edge_iterator ei;
2222
2223   FOR_EACH_BB_FN (bb, cfun)
2224     FOR_EACH_EDGE (e, ei, bb->succs)
2225       if (e->flags & EDGE_CROSSING)
2226         {
2227           if (JUMP_P (BB_END (bb))
2228               /* Some flags were added during fix_up_fall_thru_edges, via
2229                  force_nonfallthru_and_redirect.  */
2230               && !CROSSING_JUMP_P (BB_END (bb)))
2231             CROSSING_JUMP_P (BB_END (bb)) = 1;
2232           break;
2233         }
2234 }
2235
2236 /* Reorder basic blocks using the software trace cache (STC) algorithm.  */
2237
2238 static void
2239 reorder_basic_blocks_software_trace_cache (void)
2240 {
2241   if (dump_file)
2242     fprintf (dump_file, "\nReordering with the STC algorithm.\n\n");
2243
2244   int n_traces;
2245   int i;
2246   struct trace *traces;
2247
2248   /* We are estimating the length of uncond jump insn only once since the code
2249      for getting the insn length always returns the minimal length now.  */
2250   if (uncond_jump_length == 0)
2251     uncond_jump_length = get_uncond_jump_length ();
2252
2253   /* We need to know some information for each basic block.  */
2254   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2255   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2256   for (i = 0; i < array_size; i++)
2257     {
2258       bbd[i].start_of_trace = -1;
2259       bbd[i].end_of_trace = -1;
2260       bbd[i].in_trace = -1;
2261       bbd[i].visited = 0;
2262       bbd[i].priority = -1;
2263       bbd[i].heap = NULL;
2264       bbd[i].node = NULL;
2265     }
2266
2267   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2268   n_traces = 0;
2269   find_traces (&n_traces, traces);
2270   connect_traces (n_traces, traces);
2271   FREE (traces);
2272   FREE (bbd);
2273 }
2274
2275 /* Return true if edge E1 is more desirable as a fallthrough edge than
2276    edge E2 is.  */
2277
2278 static bool
2279 edge_order (edge e1, edge e2)
2280 {
2281   return e1->count () > e2->count ();
2282 }
2283
2284 /* Reorder basic blocks using the "simple" algorithm.  This tries to
2285    maximize the dynamic number of branches that are fallthrough, without
2286    copying instructions.  The algorithm is greedy, looking at the most
2287    frequently executed branch first.  */
2288
2289 static void
2290 reorder_basic_blocks_simple (void)
2291 {
2292   if (dump_file)
2293     fprintf (dump_file, "\nReordering with the \"simple\" algorithm.\n\n");
2294
2295   edge *edges = new edge[2 * n_basic_blocks_for_fn (cfun)];
2296
2297   /* First, collect all edges that can be optimized by reordering blocks:
2298      simple jumps and conditional jumps, as well as the function entry edge.  */
2299
2300   int n = 0;
2301   edges[n++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2302
2303   basic_block bb;
2304   FOR_EACH_BB_FN (bb, cfun)
2305     {
2306       rtx_insn *end = BB_END (bb);
2307
2308       if (computed_jump_p (end) || tablejump_p (end, NULL, NULL))
2309         continue;
2310
2311       /* We cannot optimize asm goto.  */
2312       if (JUMP_P (end) && extract_asm_operands (end))
2313         continue;
2314
2315       if (single_succ_p (bb))
2316         edges[n++] = EDGE_SUCC (bb, 0);
2317       else if (any_condjump_p (end))
2318         {
2319           edge e0 = EDGE_SUCC (bb, 0);
2320           edge e1 = EDGE_SUCC (bb, 1);
2321           /* When optimizing for size it is best to keep the original
2322              fallthrough edges.  */
2323           if (e1->flags & EDGE_FALLTHRU)
2324             std::swap (e0, e1);
2325           edges[n++] = e0;
2326           edges[n++] = e1;
2327         }
2328     }
2329
2330   /* Sort the edges, the most desirable first.  When optimizing for size
2331      all edges are equally desirable.  */
2332
2333   if (optimize_function_for_speed_p (cfun))
2334     std::stable_sort (edges, edges + n, edge_order);
2335
2336   /* Now decide which of those edges to make fallthrough edges.  We set
2337      BB_VISITED if a block already has a fallthrough successor assigned
2338      to it.  We make ->AUX of an endpoint point to the opposite endpoint
2339      of a sequence of blocks that fall through, and ->AUX will be NULL
2340      for a block that is in such a sequence but not an endpoint anymore.
2341
2342      To start with, everything points to itself, nothing is assigned yet.  */
2343
2344   FOR_ALL_BB_FN (bb, cfun)
2345     {
2346       bb->aux = bb;
2347       bb->flags &= ~BB_VISITED;
2348     }
2349
2350   EXIT_BLOCK_PTR_FOR_FN (cfun)->aux = 0;
2351
2352   /* Now for all edges, the most desirable first, see if that edge can
2353      connect two sequences.  If it can, update AUX and BB_VISITED; if it
2354      cannot, zero out the edge in the table.  */
2355
2356   for (int j = 0; j < n; j++)
2357     {
2358       edge e = edges[j];
2359
2360       basic_block tail_a = e->src;
2361       basic_block head_b = e->dest;
2362       basic_block head_a = (basic_block) tail_a->aux;
2363       basic_block tail_b = (basic_block) head_b->aux;
2364
2365       /* An edge cannot connect two sequences if:
2366          - it crosses partitions;
2367          - its src is not a current endpoint;
2368          - its dest is not a current endpoint;
2369          - or, it would create a loop.  */
2370
2371       if (e->flags & EDGE_CROSSING
2372           || tail_a->flags & BB_VISITED
2373           || !tail_b
2374           || (!(head_b->flags & BB_VISITED) && head_b != tail_b)
2375           || tail_a == tail_b)
2376         {
2377           edges[j] = 0;
2378           continue;
2379         }
2380
2381       tail_a->aux = 0;
2382       head_b->aux = 0;
2383       head_a->aux = tail_b;
2384       tail_b->aux = head_a;
2385       tail_a->flags |= BB_VISITED;
2386     }
2387
2388   /* Put the pieces together, in the same order that the start blocks of
2389      the sequences already had.  The hot/cold partitioning gives a little
2390      complication: as a first pass only do this for blocks in the same
2391      partition as the start block, and (if there is anything left to do)
2392      in a second pass handle the other partition.  */
2393
2394   basic_block last_tail = (basic_block) ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2395
2396   int current_partition = BB_PARTITION (last_tail);
2397   bool need_another_pass = true;
2398
2399   for (int pass = 0; pass < 2 && need_another_pass; pass++)
2400     {
2401       need_another_pass = false;
2402
2403       FOR_EACH_BB_FN (bb, cfun)
2404         if ((bb->flags & BB_VISITED && bb->aux) || bb->aux == bb)
2405           {
2406             if (BB_PARTITION (bb) != current_partition)
2407               {
2408                 need_another_pass = true;
2409                 continue;
2410               }
2411
2412             last_tail->aux = bb;
2413             last_tail = (basic_block) bb->aux;
2414           }
2415
2416       current_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
2417     }
2418
2419   last_tail->aux = 0;
2420
2421   /* Finally, link all the chosen fallthrough edges.  */
2422
2423   for (int j = 0; j < n; j++)
2424     if (edges[j])
2425       edges[j]->src->aux = edges[j]->dest;
2426
2427   delete[] edges;
2428
2429   /* If the entry edge no longer falls through we have to make a new
2430      block so it can do so again.  */
2431
2432   edge e = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2433   if (e->dest != ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux)
2434     {
2435       force_nonfallthru (e);
2436       e->src->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2437       BB_COPY_PARTITION (e->src, e->dest);
2438     }
2439 }
2440
2441 /* Reorder basic blocks.  The main entry point to this file.  */
2442
2443 static void
2444 reorder_basic_blocks (void)
2445 {
2446   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2447
2448   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2449     return;
2450
2451   set_edge_can_fallthru_flag ();
2452   mark_dfs_back_edges ();
2453
2454   switch (flag_reorder_blocks_algorithm)
2455     {
2456     case REORDER_BLOCKS_ALGORITHM_SIMPLE:
2457       reorder_basic_blocks_simple ();
2458       break;
2459
2460     case REORDER_BLOCKS_ALGORITHM_STC:
2461       reorder_basic_blocks_software_trace_cache ();
2462       break;
2463
2464     default:
2465       gcc_unreachable ();
2466     }
2467
2468   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2469
2470   if (dump_file)
2471     {
2472       if (dump_flags & TDF_DETAILS)
2473         dump_reg_info (dump_file);
2474       dump_flow_info (dump_file, dump_flags);
2475     }
2476
2477   /* Signal that rtl_verify_flow_info_1 can now verify that there
2478      is at most one switch between hot/cold sections.  */
2479   crtl->bb_reorder_complete = true;
2480 }
2481
2482 /* Determine which partition the first basic block in the function
2483    belongs to, then find the first basic block in the current function
2484    that belongs to a different section, and insert a
2485    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2486    instruction stream.  When writing out the assembly code,
2487    encountering this note will make the compiler switch between the
2488    hot and cold text sections.  */
2489
2490 void
2491 insert_section_boundary_note (void)
2492 {
2493   basic_block bb;
2494   bool switched_sections = false;
2495   int current_partition = 0;
2496
2497   if (!crtl->has_bb_partition)
2498     return;
2499
2500   FOR_EACH_BB_FN (bb, cfun)
2501     {
2502       if (!current_partition)
2503         current_partition = BB_PARTITION (bb);
2504       if (BB_PARTITION (bb) != current_partition)
2505         {
2506           gcc_assert (!switched_sections);
2507           switched_sections = true;
2508           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2509           current_partition = BB_PARTITION (bb);
2510         }
2511     }
2512 }
2513
2514 namespace {
2515
2516 const pass_data pass_data_reorder_blocks =
2517 {
2518   RTL_PASS, /* type */
2519   "bbro", /* name */
2520   OPTGROUP_NONE, /* optinfo_flags */
2521   TV_REORDER_BLOCKS, /* tv_id */
2522   0, /* properties_required */
2523   0, /* properties_provided */
2524   0, /* properties_destroyed */
2525   0, /* todo_flags_start */
2526   0, /* todo_flags_finish */
2527 };
2528
2529 class pass_reorder_blocks : public rtl_opt_pass
2530 {
2531 public:
2532   pass_reorder_blocks (gcc::context *ctxt)
2533     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2534   {}
2535
2536   /* opt_pass methods: */
2537   virtual bool gate (function *)
2538     {
2539       if (targetm.cannot_modify_jumps_p ())
2540         return false;
2541       return (optimize > 0
2542               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2543     }
2544
2545   virtual unsigned int execute (function *);
2546
2547 }; // class pass_reorder_blocks
2548
2549 unsigned int
2550 pass_reorder_blocks::execute (function *fun)
2551 {
2552   basic_block bb;
2553
2554   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2555      splitting possibly introduced more crossjumping opportunities.  */
2556   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2557
2558   reorder_basic_blocks ();
2559   cleanup_cfg (CLEANUP_EXPENSIVE);
2560
2561   FOR_EACH_BB_FN (bb, fun)
2562     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2563       bb->aux = bb->next_bb;
2564   cfg_layout_finalize ();
2565
2566   return 0;
2567 }
2568
2569 } // anon namespace
2570
2571 rtl_opt_pass *
2572 make_pass_reorder_blocks (gcc::context *ctxt)
2573 {
2574   return new pass_reorder_blocks (ctxt);
2575 }
2576
2577 /* Duplicate a block (that we already know ends in a computed jump) into its
2578    predecessors, where possible.  Return whether anything is changed.  */
2579 static bool
2580 maybe_duplicate_computed_goto (basic_block bb, int max_size)
2581 {
2582   if (single_pred_p (bb))
2583     return false;
2584
2585   /* Make sure that the block is small enough.  */
2586   rtx_insn *insn;
2587   FOR_BB_INSNS (bb, insn)
2588     if (INSN_P (insn))
2589       {
2590         max_size -= get_attr_min_length (insn);
2591         if (max_size < 0)
2592            return false;
2593       }
2594
2595   bool changed = false;
2596   edge e;
2597   edge_iterator ei;
2598   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
2599     {
2600       basic_block pred = e->src;
2601
2602       /* Do not duplicate BB into PRED if that is the last predecessor, or if
2603          we cannot merge a copy of BB with PRED.  */
2604       if (single_pred_p (bb)
2605           || !single_succ_p (pred)
2606           || e->flags & EDGE_COMPLEX
2607           || pred->index < NUM_FIXED_BLOCKS
2608           || (JUMP_P (BB_END (pred)) && !simplejump_p (BB_END (pred)))
2609           || (JUMP_P (BB_END (pred)) && CROSSING_JUMP_P (BB_END (pred))))
2610         {
2611           ei_next (&ei);
2612           continue;
2613         }
2614
2615       if (dump_file)
2616         fprintf (dump_file, "Duplicating computed goto bb %d into bb %d\n",
2617                  bb->index, e->src->index);
2618
2619       /* Remember if PRED can be duplicated; if so, the copy of BB merged
2620          with PRED can be duplicated as well.  */
2621       bool can_dup_more = can_duplicate_block_p (pred);
2622
2623       /* Make a copy of BB, merge it into PRED.  */
2624       basic_block copy = duplicate_block (bb, e, NULL);
2625       emit_barrier_after_bb (copy);
2626       reorder_insns_nobb (BB_HEAD (copy), BB_END (copy), BB_END (pred));
2627       merge_blocks (pred, copy);
2628
2629       changed = true;
2630
2631       /* Try to merge the resulting merged PRED into further predecessors.  */
2632       if (can_dup_more)
2633         maybe_duplicate_computed_goto (pred, max_size);
2634     }
2635
2636   return changed;
2637 }
2638
2639 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2640    computed gotos that were factored early on in the compilation process to
2641    speed up edge based data flow.  We used to not unfactor them again, which
2642    can seriously pessimize code with many computed jumps in the source code,
2643    such as interpreters.  See e.g. PR15242.  */
2644 static void
2645 duplicate_computed_gotos (function *fun)
2646 {
2647   /* We are estimating the length of uncond jump insn only once
2648      since the code for getting the insn length always returns
2649      the minimal length now.  */
2650   if (uncond_jump_length == 0)
2651     uncond_jump_length = get_uncond_jump_length ();
2652
2653   /* Never copy a block larger than this.  */
2654   int max_size
2655     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2656
2657   bool changed = false;
2658
2659   /* Try to duplicate all blocks that end in a computed jump and that
2660      can be duplicated at all.  */
2661   basic_block bb;
2662   FOR_EACH_BB_FN (bb, fun)
2663     if (computed_jump_p (BB_END (bb)) && can_duplicate_block_p (bb))
2664       changed |= maybe_duplicate_computed_goto (bb, max_size);
2665
2666   /* Duplicating blocks will redirect edges and may cause hot blocks
2667     previously reached by both hot and cold blocks to become dominated
2668     only by cold blocks.  */
2669   if (changed)
2670     fixup_partitions ();
2671 }
2672
2673 namespace {
2674
2675 const pass_data pass_data_duplicate_computed_gotos =
2676 {
2677   RTL_PASS, /* type */
2678   "compgotos", /* name */
2679   OPTGROUP_NONE, /* optinfo_flags */
2680   TV_REORDER_BLOCKS, /* tv_id */
2681   0, /* properties_required */
2682   0, /* properties_provided */
2683   0, /* properties_destroyed */
2684   0, /* todo_flags_start */
2685   0, /* todo_flags_finish */
2686 };
2687
2688 class pass_duplicate_computed_gotos : public rtl_opt_pass
2689 {
2690 public:
2691   pass_duplicate_computed_gotos (gcc::context *ctxt)
2692     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2693   {}
2694
2695   /* opt_pass methods: */
2696   virtual bool gate (function *);
2697   virtual unsigned int execute (function *);
2698
2699 }; // class pass_duplicate_computed_gotos
2700
2701 bool
2702 pass_duplicate_computed_gotos::gate (function *fun)
2703 {
2704   if (targetm.cannot_modify_jumps_p ())
2705     return false;
2706   return (optimize > 0
2707           && flag_expensive_optimizations
2708           && ! optimize_function_for_size_p (fun));
2709 }
2710
2711 unsigned int
2712 pass_duplicate_computed_gotos::execute (function *fun)
2713 {
2714   duplicate_computed_gotos (fun);
2715
2716   return 0;
2717 }
2718
2719 } // anon namespace
2720
2721 rtl_opt_pass *
2722 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2723 {
2724   return new pass_duplicate_computed_gotos (ctxt);
2725 }
2726
2727 /* This function is the main 'entrance' for the optimization that
2728    partitions hot and cold basic blocks into separate sections of the
2729    .o file (to improve performance and cache locality).  Ideally it
2730    would be called after all optimizations that rearrange the CFG have
2731    been called.  However part of this optimization may introduce new
2732    register usage, so it must be called before register allocation has
2733    occurred.  This means that this optimization is actually called
2734    well before the optimization that reorders basic blocks (see
2735    function above).
2736
2737    This optimization checks the feedback information to determine
2738    which basic blocks are hot/cold, updates flags on the basic blocks
2739    to indicate which section they belong in.  This information is
2740    later used for writing out sections in the .o file.  Because hot
2741    and cold sections can be arbitrarily large (within the bounds of
2742    memory), far beyond the size of a single function, it is necessary
2743    to fix up all edges that cross section boundaries, to make sure the
2744    instructions used can actually span the required distance.  The
2745    fixes are described below.
2746
2747    Fall-through edges must be changed into jumps; it is not safe or
2748    legal to fall through across a section boundary.  Whenever a
2749    fall-through edge crossing a section boundary is encountered, a new
2750    basic block is inserted (in the same section as the fall-through
2751    source), and the fall through edge is redirected to the new basic
2752    block.  The new basic block contains an unconditional jump to the
2753    original fall-through target.  (If the unconditional jump is
2754    insufficient to cross section boundaries, that is dealt with a
2755    little later, see below).
2756
2757    In order to deal with architectures that have short conditional
2758    branches (which cannot span all of memory) we take any conditional
2759    jump that attempts to cross a section boundary and add a level of
2760    indirection: it becomes a conditional jump to a new basic block, in
2761    the same section.  The new basic block contains an unconditional
2762    jump to the original target, in the other section.
2763
2764    For those architectures whose unconditional branch is also
2765    incapable of reaching all of memory, those unconditional jumps are
2766    converted into indirect jumps, through a register.
2767
2768    IMPORTANT NOTE: This optimization causes some messy interactions
2769    with the cfg cleanup optimizations; those optimizations want to
2770    merge blocks wherever possible, and to collapse indirect jump
2771    sequences (change "A jumps to B jumps to C" directly into "A jumps
2772    to C").  Those optimizations can undo the jump fixes that
2773    partitioning is required to make (see above), in order to ensure
2774    that jumps attempting to cross section boundaries are really able
2775    to cover whatever distance the jump requires (on many architectures
2776    conditional or unconditional jumps are not able to reach all of
2777    memory).  Therefore tests have to be inserted into each such
2778    optimization to make sure that it does not undo stuff necessary to
2779    cross partition boundaries.  This would be much less of a problem
2780    if we could perform this optimization later in the compilation, but
2781    unfortunately the fact that we may need to create indirect jumps
2782    (through registers) requires that this optimization be performed
2783    before register allocation.
2784
2785    Hot and cold basic blocks are partitioned and put in separate
2786    sections of the .o file, to reduce paging and improve cache
2787    performance (hopefully).  This can result in bits of code from the
2788    same function being widely separated in the .o file.  However this
2789    is not obvious to the current bb structure.  Therefore we must take
2790    care to ensure that: 1). There are no fall_thru edges that cross
2791    between sections; 2). For those architectures which have "short"
2792    conditional branches, all conditional branches that attempt to
2793    cross between sections are converted to unconditional branches;
2794    and, 3). For those architectures which have "short" unconditional
2795    branches, all unconditional branches that attempt to cross between
2796    sections are converted to indirect jumps.
2797
2798    The code for fixing up fall_thru edges that cross between hot and
2799    cold basic blocks does so by creating new basic blocks containing
2800    unconditional branches to the appropriate label in the "other"
2801    section.  The new basic block is then put in the same (hot or cold)
2802    section as the original conditional branch, and the fall_thru edge
2803    is modified to fall into the new basic block instead.  By adding
2804    this level of indirection we end up with only unconditional branches
2805    crossing between hot and cold sections.
2806
2807    Conditional branches are dealt with by adding a level of indirection.
2808    A new basic block is added in the same (hot/cold) section as the
2809    conditional branch, and the conditional branch is retargeted to the
2810    new basic block.  The new basic block contains an unconditional branch
2811    to the original target of the conditional branch (in the other section).
2812
2813    Unconditional branches are dealt with by converting them into
2814    indirect jumps.  */
2815
2816 namespace {
2817
2818 const pass_data pass_data_partition_blocks =
2819 {
2820   RTL_PASS, /* type */
2821   "bbpart", /* name */
2822   OPTGROUP_NONE, /* optinfo_flags */
2823   TV_REORDER_BLOCKS, /* tv_id */
2824   PROP_cfglayout, /* properties_required */
2825   0, /* properties_provided */
2826   0, /* properties_destroyed */
2827   0, /* todo_flags_start */
2828   0, /* todo_flags_finish */
2829 };
2830
2831 class pass_partition_blocks : public rtl_opt_pass
2832 {
2833 public:
2834   pass_partition_blocks (gcc::context *ctxt)
2835     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2836   {}
2837
2838   /* opt_pass methods: */
2839   virtual bool gate (function *);
2840   virtual unsigned int execute (function *);
2841
2842 }; // class pass_partition_blocks
2843
2844 bool
2845 pass_partition_blocks::gate (function *fun)
2846 {
2847   /* The optimization to partition hot/cold basic blocks into separate
2848      sections of the .o file does not work well with linkonce or with
2849      user defined section attributes.  Don't call it if either case
2850      arises.  */
2851   return (flag_reorder_blocks_and_partition
2852           && optimize
2853           /* See pass_reorder_blocks::gate.  We should not partition if
2854              we are going to omit the reordering.  */
2855           && optimize_function_for_speed_p (fun)
2856           && !DECL_COMDAT_GROUP (current_function_decl)
2857           && !lookup_attribute ("section", DECL_ATTRIBUTES (fun->decl)));
2858 }
2859
2860 unsigned
2861 pass_partition_blocks::execute (function *fun)
2862 {
2863   vec<edge> crossing_edges;
2864
2865   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2866     return 0;
2867
2868   df_set_flags (DF_DEFER_INSN_RESCAN);
2869
2870   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2871   if (!crossing_edges.exists ())
2872     /* Make sure to process deferred rescans and clear changeable df flags.  */
2873     return TODO_df_finish;
2874
2875   crtl->has_bb_partition = true;
2876
2877   /* Make sure the source of any crossing edge ends in a jump and the
2878      destination of any crossing edge has a label.  */
2879   add_labels_and_missing_jumps (crossing_edges);
2880
2881   /* Convert all crossing fall_thru edges to non-crossing fall
2882      thrus to unconditional jumps (that jump to the original fall
2883      through dest).  */
2884   fix_up_fall_thru_edges ();
2885
2886   /* If the architecture does not have conditional branches that can
2887      span all of memory, convert crossing conditional branches into
2888      crossing unconditional branches.  */
2889   if (!HAS_LONG_COND_BRANCH)
2890     fix_crossing_conditional_branches ();
2891
2892   /* If the architecture does not have unconditional branches that
2893      can span all of memory, convert crossing unconditional branches
2894      into indirect jumps.  Since adding an indirect jump also adds
2895      a new register usage, update the register usage information as
2896      well.  */
2897   if (!HAS_LONG_UNCOND_BRANCH)
2898     fix_crossing_unconditional_branches ();
2899
2900   update_crossing_jump_flags ();
2901
2902   /* Clear bb->aux fields that the above routines were using.  */
2903   clear_aux_for_blocks ();
2904
2905   crossing_edges.release ();
2906
2907   /* ??? FIXME: DF generates the bb info for a block immediately.
2908      And by immediately, I mean *during* creation of the block.
2909
2910         #0  df_bb_refs_collect
2911         #1  in df_bb_refs_record
2912         #2  in create_basic_block_structure
2913
2914      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2915      will *always* fail, because no edges can have been added to the
2916      block yet.  Which of course means we don't add the right
2917      artificial refs, which means we fail df_verify (much) later.
2918
2919      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2920      that we also shouldn't grab data from the new blocks those new
2921      insns are in either.  In this way one can create the block, link
2922      it up properly, and have everything Just Work later, when deferred
2923      insns are processed.
2924
2925      In the meantime, we have no other option but to throw away all
2926      of the DF data and recompute it all.  */
2927   if (fun->eh->lp_array)
2928     {
2929       df_finish_pass (true);
2930       df_scan_alloc (NULL);
2931       df_scan_blocks ();
2932       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2933          data.  We blindly generated all of them when creating the new
2934          landing pad.  Delete those assignments we don't use.  */
2935       df_set_flags (DF_LR_RUN_DCE);
2936       df_analyze ();
2937     }
2938
2939   /* Make sure to process deferred rescans and clear changeable df flags.  */
2940   return TODO_df_finish;
2941 }
2942
2943 } // anon namespace
2944
2945 rtl_opt_pass *
2946 make_pass_partition_blocks (gcc::context *ctxt)
2947 {
2948   return new pass_partition_blocks (ctxt);
2949 }