gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This (greedy) algorithm constructs traces in several rounds.
  21    The construction starts from "seeds".  The seed for the first round
  22    is the entry point of the function.  When there are more than one seed,
  23    the one with the lowest key in the heap is selected first (see bb_to_key).
  24    Then the algorithm repeatedly adds the most probable successor to the end
  25    of a trace.  Finally it connects the traces.
  26
  27    There are two parameters: Branch Threshold and Exec Threshold.
  28    If the probability of an edge to a successor of the current basic block is
  29    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  30    then the successor will be the seed in one of the next rounds.
  31    Each round has these parameters lower than the previous one.
  32    The last round has to have these parameters set to zero so that the
  33    remaining blocks are picked up.
  34
  35    The algorithm selects the most probable successor from all unvisited
  36    successors and successors that have been added to this trace.
  37    The other successors (that has not been "sent" to the next round) will be
  38    other seeds for this round and the secondary traces will start from them.
  39    If the successor has not been visited in this trace, it is added to the
  40    trace (however, there is some heuristic for simple branches).
  41    If the successor has been visited in this trace, a loop has been found.
  42    If the loop has many iterations, the loop is rotated so that the source
  43    block of the most probable edge going out of the loop is the last block
  44    of the trace.
  45    If the loop has few iterations and there is no edge from the last block of
  46    the loop going out of the loop, the loop header is duplicated.
  47
  48    When connecting traces, the algorithm first checks whether there is an edge
  49    from the last block of a trace to the first block of another trace.
  50    When there are still some unconnected traces it checks whether there exists
  51    a basic block BB such that BB is a successor of the last block of a trace
  52    and BB is a predecessor of the first block of another trace.  In this case,
  53    BB is duplicated, added at the end of the first trace and the traces are
  54    connected through it.
  55    The rest of traces are simply connected so there will be a jump to the
  56    beginning of the rest of traces.
  57
  58    The above description is for the full algorithm, which is used when the
  59    function is optimized for speed.  When the function is optimized for size,
  60    in order to reduce long jumps and connect more fallthru edges, the
  61    algorithm is modified as follows:
  62    (1) Break long traces to short ones.  A trace is broken at a block that has
  63    multiple predecessors/ successors during trace discovery.  When connecting
  64    traces, only connect Trace n with Trace n + 1.  This change reduces most
  65    long jumps compared with the above algorithm.
  66    (2) Ignore the edge probability and frequency for fallthru edges.
  67    (3) Keep the original order of blocks when there is no chance to fall
  68    through.  We rely on the results of cfg_cleanup.
  69
  70    To implement the change for code size optimization, block's index is
  71    selected as the key and all traces are found in one round.
  72
  73    References:
  74
  75    "Software Trace Cache"
  76    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  77    http://citeseer.nj.nec.com/15361.html
  78
  79 */
  80
  81 #include "config.h"
  82 #include "system.h"
  83 #include "coretypes.h"
  84 #include "tm.h"
  85 #include "hash-set.h"
  86 #include "vec.h"
  87 #include "input.h"
  88 #include "alias.h"
  89 #include "symtab.h"
  90 #include "inchash.h"
  91 #include "tree.h"
  92 #include "rtl.h"
  93 #include "regs.h"
  94 #include "flags.h"
  95 #include "output.h"
  96 #include "target.h"
  97 #include "hashtab.h"
  98 #include "hard-reg-set.h"
  99 #include "function.h"
 100 #include "tm_p.h"
 101 #include "obstack.h"
 102 #include "statistics.h"
 103 #include "insn-config.h"
 104 #include "expmed.h"
 105 #include "dojump.h"
 106 #include "explow.h"
 107 #include "calls.h"
 108 #include "emit-rtl.h"
 109 #include "varasm.h"
 110 #include "stmt.h"
 111 #include "expr.h"
 112 #include "optabs.h"
 113 #include "params.h"
 114 #include "diagnostic-core.h"
 115 #include "toplev.h" /* user_defined_section_attribute */
 116 #include "tree-pass.h"
 117 #include "dominance.h"
 118 #include "cfg.h"
 119 #include "cfgrtl.h"
 120 #include "cfganal.h"
 121 #include "cfgbuild.h"
 122 #include "cfgcleanup.h"
 123 #include "predict.h"
 124 #include "basic-block.h"
 125 #include "df.h"
 126 #include "bb-reorder.h"
 127 #include "hash-map.h"
 128 #include "is-a.h"
 129 #include "plugin-api.h"
 130 #include "ipa-ref.h"
 131 #include "cgraph.h"
 132 #include "except.h"
 133 #include "fibonacci_heap.h"
 134
 135 /* The number of rounds.  In most cases there will only be 4 rounds, but
 136    when partitioning hot and cold basic blocks into separate sections of
 137    the object file there will be an extra round.  */
 138 #define N_ROUNDS 5
 139
 140 struct target_bb_reorder default_target_bb_reorder;
 141 #if SWITCHABLE_TARGET
 142 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 143 #endif
 144
 145 #define uncond_jump_length \
 146   (this_target_bb_reorder->x_uncond_jump_length)
 147
 148 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 149 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 150
 151 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 152 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 153
 154 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 155    block the edge destination is not duplicated while connecting traces.  */
 156 #define DUPLICATION_THRESHOLD 100
 157
 158 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
 159 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
 160
 161 /* Structure to hold needed information for each basic block.  */
 162 typedef struct bbro_basic_block_data_def
 163 {
 164   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 165   int start_of_trace;
 166
 167   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 168   int end_of_trace;
 169
 170   /* Which trace is the bb in?  */
 171   int in_trace;
 172
 173   /* Which trace was this bb visited in?  */
 174   int visited;
 175
 176   /* Which heap is BB in (if any)?  */
 177   bb_heap_t *heap;
 178
 179   /* Which heap node is BB in (if any)?  */
 180   bb_heap_node_t *node;
 181 } bbro_basic_block_data;
 182
 183 /* The current size of the following dynamic array.  */
 184 static int array_size;
 185
 186 /* The array which holds needed information for basic blocks.  */
 187 static bbro_basic_block_data *bbd;
 188
 189 /* To avoid frequent reallocation the size of arrays is greater than needed,
 190    the number of elements is (not less than) 1.25 * size_wanted.  */
 191 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 192
 193 /* Free the memory and set the pointer to NULL.  */
 194 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 195
 196 /* Structure for holding information about a trace.  */
 197 struct trace
 198 {
 199   /* First and last basic block of the trace.  */
 200   basic_block first, last;
 201
 202   /* The round of the STC creation which this trace was found in.  */
 203   int round;
 204
 205   /* The length (i.e. the number of basic blocks) of the trace.  */
 206   int length;
 207 };
 208
 209 /* Maximum frequency and count of one of the entry blocks.  */
 210 static int max_entry_frequency;
 211 static gcov_type max_entry_count;
 212
 213 /* Local function prototypes.  */
 214 static void find_traces (int *, struct trace *);
 215 static basic_block rotate_loop (edge, struct trace *, int);
 216 static void mark_bb_visited (basic_block, int);
 217 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 218                                  int, bb_heap_t **, int);
 219 static basic_block copy_bb (basic_block, edge, basic_block, int);
 220 static long bb_to_key (basic_block);
 221 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
 222                            const_edge);
 223 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 224                                    struct trace *);
 225 static void connect_traces (int, struct trace *);
 226 static bool copy_bb_p (const_basic_block, int);
 227 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 228 \f
 229 /* Return the trace number in which BB was visited.  */
 230
 231 static int
 232 bb_visited_trace (const_basic_block bb)
 233 {
 234   gcc_assert (bb->index < array_size);
 235   return bbd[bb->index].visited;
 236 }
 237
 238 /* This function marks BB that it was visited in trace number TRACE.  */
 239
 240 static void
 241 mark_bb_visited (basic_block bb, int trace)
 242 {
 243   bbd[bb->index].visited = trace;
 244   if (bbd[bb->index].heap)
 245     {
 246       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
 247       bbd[bb->index].heap = NULL;
 248       bbd[bb->index].node = NULL;
 249     }
 250 }
 251
 252 /* Check to see if bb should be pushed into the next round of trace
 253    collections or not.  Reasons for pushing the block forward are 1).
 254    If the block is cold, we are doing partitioning, and there will be
 255    another round (cold partition blocks are not supposed to be
 256    collected into traces until the very last round); or 2). There will
 257    be another round, and the basic block is not "hot enough" for the
 258    current round of trace collection.  */
 259
 260 static bool
 261 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 262                       int exec_th, gcov_type count_th)
 263 {
 264   bool there_exists_another_round;
 265   bool block_not_hot_enough;
 266
 267   there_exists_another_round = round < number_of_rounds - 1;
 268
 269   block_not_hot_enough = (bb->frequency < exec_th
 270                           || bb->count < count_th
 271                           || probably_never_executed_bb_p (cfun, bb));
 272
 273   if (there_exists_another_round
 274       && block_not_hot_enough)
 275     return true;
 276   else
 277     return false;
 278 }
 279
 280 /* Find the traces for Software Trace Cache.  Chain each trace through
 281    RBI()->next.  Store the number of traces to N_TRACES and description of
 282    traces to TRACES.  */
 283
 284 static void
 285 find_traces (int *n_traces, struct trace *traces)
 286 {
 287   int i;
 288   int number_of_rounds;
 289   edge e;
 290   edge_iterator ei;
 291   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
 292
 293   /* Add one extra round of trace collection when partitioning hot/cold
 294      basic blocks into separate sections.  The last round is for all the
 295      cold blocks (and ONLY the cold blocks).  */
 296
 297   number_of_rounds = N_ROUNDS - 1;
 298
 299   /* Insert entry points of function into heap.  */
 300   max_entry_frequency = 0;
 301   max_entry_count = 0;
 302   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 303     {
 304       bbd[e->dest->index].heap = heap;
 305       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
 306       if (e->dest->frequency > max_entry_frequency)
 307         max_entry_frequency = e->dest->frequency;
 308       if (e->dest->count > max_entry_count)
 309         max_entry_count = e->dest->count;
 310     }
 311
 312   /* Find the traces.  */
 313   for (i = 0; i < number_of_rounds; i++)
 314     {
 315       gcov_type count_threshold;
 316
 317       if (dump_file)
 318         fprintf (dump_file, "STC - round %d\n", i + 1);
 319
 320       if (max_entry_count < INT_MAX / 1000)
 321         count_threshold = max_entry_count * exec_threshold[i] / 1000;
 322       else
 323         count_threshold = max_entry_count / 1000 * exec_threshold[i];
 324
 325       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 326                            max_entry_frequency * exec_threshold[i] / 1000,
 327                            count_threshold, traces, n_traces, i, &heap,
 328                            number_of_rounds);
 329     }
 330   delete heap;
 331
 332   if (dump_file)
 333     {
 334       for (i = 0; i < *n_traces; i++)
 335         {
 336           basic_block bb;
 337           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 338                    traces[i].round + 1);
 339           for (bb = traces[i].first;
 340                bb != traces[i].last;
 341                bb = (basic_block) bb->aux)
 342             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 343           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 344         }
 345       fflush (dump_file);
 346     }
 347 }
 348
 349 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 350    (with sequential number TRACE_N).  */
 351
 352 static basic_block
 353 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 354 {
 355   basic_block bb;
 356
 357   /* Information about the best end (end after rotation) of the loop.  */
 358   basic_block best_bb = NULL;
 359   edge best_edge = NULL;
 360   int best_freq = -1;
 361   gcov_type best_count = -1;
 362   /* The best edge is preferred when its destination is not visited yet
 363      or is a start block of some trace.  */
 364   bool is_preferred = false;
 365
 366   /* Find the most frequent edge that goes out from current trace.  */
 367   bb = back_edge->dest;
 368   do
 369     {
 370       edge e;
 371       edge_iterator ei;
 372
 373       FOR_EACH_EDGE (e, ei, bb->succs)
 374         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 375             && bb_visited_trace (e->dest) != trace_n
 376             && (e->flags & EDGE_CAN_FALLTHRU)
 377             && !(e->flags & EDGE_COMPLEX))
 378         {
 379           if (is_preferred)
 380             {
 381               /* The best edge is preferred.  */
 382               if (!bb_visited_trace (e->dest)
 383                   || bbd[e->dest->index].start_of_trace >= 0)
 384                 {
 385                   /* The current edge E is also preferred.  */
 386                   int freq = EDGE_FREQUENCY (e);
 387                   if (freq > best_freq || e->count > best_count)
 388                     {
 389                       best_freq = freq;
 390                       best_count = e->count;
 391                       best_edge = e;
 392                       best_bb = bb;
 393                     }
 394                 }
 395             }
 396           else
 397             {
 398               if (!bb_visited_trace (e->dest)
 399                   || bbd[e->dest->index].start_of_trace >= 0)
 400                 {
 401                   /* The current edge E is preferred.  */
 402                   is_preferred = true;
 403                   best_freq = EDGE_FREQUENCY (e);
 404                   best_count = e->count;
 405                   best_edge = e;
 406                   best_bb = bb;
 407                 }
 408               else
 409                 {
 410                   int freq = EDGE_FREQUENCY (e);
 411                   if (!best_edge || freq > best_freq || e->count > best_count)
 412                     {
 413                       best_freq = freq;
 414                       best_count = e->count;
 415                       best_edge = e;
 416                       best_bb = bb;
 417                     }
 418                 }
 419             }
 420         }
 421       bb = (basic_block) bb->aux;
 422     }
 423   while (bb != back_edge->dest);
 424
 425   if (best_bb)
 426     {
 427       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 428          the trace.  */
 429       if (back_edge->dest == trace->first)
 430         {
 431           trace->first = (basic_block) best_bb->aux;
 432         }
 433       else
 434         {
 435           basic_block prev_bb;
 436
 437           for (prev_bb = trace->first;
 438                prev_bb->aux != back_edge->dest;
 439                prev_bb = (basic_block) prev_bb->aux)
 440             ;
 441           prev_bb->aux = best_bb->aux;
 442
 443           /* Try to get rid of uncond jump to cond jump.  */
 444           if (single_succ_p (prev_bb))
 445             {
 446               basic_block header = single_succ (prev_bb);
 447
 448               /* Duplicate HEADER if it is a small block containing cond jump
 449                  in the end.  */
 450               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 451                   && !CROSSING_JUMP_P (BB_END (header)))
 452                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 453             }
 454         }
 455     }
 456   else
 457     {
 458       /* We have not found suitable loop tail so do no rotation.  */
 459       best_bb = back_edge->src;
 460     }
 461   best_bb->aux = NULL;
 462   return best_bb;
 463 }
 464
 465 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 466    not include basic blocks whose probability is lower than BRANCH_TH or whose
 467    frequency is lower than EXEC_TH into traces (or whose count is lower than
 468    COUNT_TH).  Store the new traces into TRACES and modify the number of
 469    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 470    The function expects starting basic blocks to be in *HEAP and will delete
 471    *HEAP and store starting points for the next round into new *HEAP.  */
 472
 473 static void
 474 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 475                      struct trace *traces, int *n_traces, int round,
 476                      bb_heap_t **heap, int number_of_rounds)
 477 {
 478   /* Heap for discarded basic blocks which are possible starting points for
 479      the next round.  */
 480   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
 481   bool for_size = optimize_function_for_size_p (cfun);
 482
 483   while (!(*heap)->empty ())
 484     {
 485       basic_block bb;
 486       struct trace *trace;
 487       edge best_edge, e;
 488       long key;
 489       edge_iterator ei;
 490
 491       bb = (*heap)->extract_min ();
 492       bbd[bb->index].heap = NULL;
 493       bbd[bb->index].node = NULL;
 494
 495       if (dump_file)
 496         fprintf (dump_file, "Getting bb %d\n", bb->index);
 497
 498       /* If the BB's frequency is too low, send BB to the next round.  When
 499          partitioning hot/cold blocks into separate sections, make sure all
 500          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 501          round.  When optimizing for size, do not push to next round.  */
 502
 503       if (!for_size
 504           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 505                                    count_th))
 506         {
 507           int key = bb_to_key (bb);
 508           bbd[bb->index].heap = new_heap;
 509           bbd[bb->index].node = new_heap->insert (key, bb);
 510
 511           if (dump_file)
 512             fprintf (dump_file,
 513                      "  Possible start point of next round: %d (key: %d)\n",
 514                      bb->index, key);
 515           continue;
 516         }
 517
 518       trace = traces + *n_traces;
 519       trace->first = bb;
 520       trace->round = round;
 521       trace->length = 0;
 522       bbd[bb->index].in_trace = *n_traces;
 523       (*n_traces)++;
 524
 525       do
 526         {
 527           int prob, freq;
 528           bool ends_in_call;
 529
 530           /* The probability and frequency of the best edge.  */
 531           int best_prob = INT_MIN / 2;
 532           int best_freq = INT_MIN / 2;
 533
 534           best_edge = NULL;
 535           mark_bb_visited (bb, *n_traces);
 536           trace->length++;
 537
 538           if (dump_file)
 539             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 540                      bb->index, *n_traces - 1);
 541
 542           ends_in_call = block_ends_with_call_p (bb);
 543
 544           /* Select the successor that will be placed after BB.  */
 545           FOR_EACH_EDGE (e, ei, bb->succs)
 546             {
 547               gcc_assert (!(e->flags & EDGE_FAKE));
 548
 549               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 550                 continue;
 551
 552               if (bb_visited_trace (e->dest)
 553                   && bb_visited_trace (e->dest) != *n_traces)
 554                 continue;
 555
 556               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 557                 continue;
 558
 559               prob = e->probability;
 560               freq = e->dest->frequency;
 561
 562               /* The only sensible preference for a call instruction is the
 563                  fallthru edge.  Don't bother selecting anything else.  */
 564               if (ends_in_call)
 565                 {
 566                   if (e->flags & EDGE_CAN_FALLTHRU)
 567                     {
 568                       best_edge = e;
 569                       best_prob = prob;
 570                       best_freq = freq;
 571                     }
 572                   continue;
 573                 }
 574
 575               /* Edge that cannot be fallthru or improbable or infrequent
 576                  successor (i.e. it is unsuitable successor).  When optimizing
 577                  for size, ignore the probability and frequency.  */
 578               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 579                   || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
 580                       || e->count < count_th) && (!for_size)))
 581                 continue;
 582
 583               /* If partitioning hot/cold basic blocks, don't consider edges
 584                  that cross section boundaries.  */
 585
 586               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 587                                  best_edge))
 588                 {
 589                   best_edge = e;
 590                   best_prob = prob;
 591                   best_freq = freq;
 592                 }
 593             }
 594
 595           /* If the best destination has multiple predecessors, and can be
 596              duplicated cheaper than a jump, don't allow it to be added
 597              to a trace.  We'll duplicate it when connecting traces.  */
 598           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 599               && copy_bb_p (best_edge->dest, 0))
 600             best_edge = NULL;
 601
 602           /* If the best destination has multiple successors or predecessors,
 603              don't allow it to be added when optimizing for size.  This makes
 604              sure predecessors with smaller index are handled before the best
 605              destinarion.  It breaks long trace and reduces long jumps.
 606
 607              Take if-then-else as an example.
 608                 A
 609                / \
 610               B   C
 611                \ /
 612                 D
 613              If we do not remove the best edge B->D/C->D, the final order might
 614              be A B D ... C.  C is at the end of the program.  If D's successors
 615              and D are complicated, might need long jumps for A->C and C->D.
 616              Similar issue for order: A C D ... B.
 617
 618              After removing the best edge, the final result will be ABCD/ ACBD.
 619              It does not add jump compared with the previous order.  But it
 620              reduces the possibility of long jumps.  */
 621           if (best_edge && for_size
 622               && (EDGE_COUNT (best_edge->dest->succs) > 1
 623                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 624             best_edge = NULL;
 625
 626           /* Add all non-selected successors to the heaps.  */
 627           FOR_EACH_EDGE (e, ei, bb->succs)
 628             {
 629               if (e == best_edge
 630                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 631                   || bb_visited_trace (e->dest))
 632                 continue;
 633
 634               key = bb_to_key (e->dest);
 635
 636               if (bbd[e->dest->index].heap)
 637                 {
 638                   /* E->DEST is already in some heap.  */
 639                   if (key != bbd[e->dest->index].node->get_key ())
 640                     {
 641                       if (dump_file)
 642                         {
 643                           fprintf (dump_file,
 644                                    "Changing key for bb %d from %ld to %ld.\n",
 645                                    e->dest->index,
 646                                    (long) bbd[e->dest->index].node->get_key (),
 647                                    key);
 648                         }
 649                       bbd[e->dest->index].heap->replace_key
 650                         (bbd[e->dest->index].node, key);
 651                     }
 652                 }
 653               else
 654                 {
 655                   bb_heap_t *which_heap = *heap;
 656
 657                   prob = e->probability;
 658                   freq = EDGE_FREQUENCY (e);
 659
 660                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 661                       || (e->flags & EDGE_COMPLEX)
 662                       || prob < branch_th || freq < exec_th
 663                       || e->count < count_th)
 664                     {
 665                       /* When partitioning hot/cold basic blocks, make sure
 666                          the cold blocks (and only the cold blocks) all get
 667                          pushed to the last round of trace collection.  When
 668                          optimizing for size, do not push to next round.  */
 669
 670                       if (!for_size && push_to_next_round_p (e->dest, round,
 671                                                              number_of_rounds,
 672                                                              exec_th, count_th))
 673                         which_heap = new_heap;
 674                     }
 675
 676                   bbd[e->dest->index].heap = which_heap;
 677                   bbd[e->dest->index].node = which_heap->insert (key, e->dest);
 678
 679                   if (dump_file)
 680                     {
 681                       fprintf (dump_file,
 682                                "  Possible start of %s round: %d (key: %ld)\n",
 683                                (which_heap == new_heap) ? "next" : "this",
 684                                e->dest->index, (long) key);
 685                     }
 686
 687                 }
 688             }
 689
 690           if (best_edge) /* Suitable successor was found.  */
 691             {
 692               if (bb_visited_trace (best_edge->dest) == *n_traces)
 693                 {
 694                   /* We do nothing with one basic block loops.  */
 695                   if (best_edge->dest != bb)
 696                     {
 697                       if (EDGE_FREQUENCY (best_edge)
 698                           > 4 * best_edge->dest->frequency / 5)
 699                         {
 700                           /* The loop has at least 4 iterations.  If the loop
 701                              header is not the first block of the function
 702                              we can rotate the loop.  */
 703
 704                           if (best_edge->dest
 705                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 706                             {
 707                               if (dump_file)
 708                                 {
 709                                   fprintf (dump_file,
 710                                            "Rotating loop %d - %d\n",
 711                                            best_edge->dest->index, bb->index);
 712                                 }
 713                               bb->aux = best_edge->dest;
 714                               bbd[best_edge->dest->index].in_trace =
 715                                                              (*n_traces) - 1;
 716                               bb = rotate_loop (best_edge, trace, *n_traces);
 717                             }
 718                         }
 719                       else
 720                         {
 721                           /* The loop has less than 4 iterations.  */
 722
 723                           if (single_succ_p (bb)
 724                               && copy_bb_p (best_edge->dest,
 725                                             optimize_edge_for_speed_p
 726                                             (best_edge)))
 727                             {
 728                               bb = copy_bb (best_edge->dest, best_edge, bb,
 729                                             *n_traces);
 730                               trace->length++;
 731                             }
 732                         }
 733                     }
 734
 735                   /* Terminate the trace.  */
 736                   break;
 737                 }
 738               else
 739                 {
 740                   /* Check for a situation
 741
 742                     A
 743                    /|
 744                   B |
 745                    \|
 746                     C
 747
 748                   where
 749                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 750                     >= EDGE_FREQUENCY (AC).
 751                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 752                   Best ordering is then A B C.
 753
 754                   When optimizing for size, A B C is always the best order.
 755
 756                   This situation is created for example by:
 757
 758                   if (A) B;
 759                   C;
 760
 761                   */
 762
 763                   FOR_EACH_EDGE (e, ei, bb->succs)
 764                     if (e != best_edge
 765                         && (e->flags & EDGE_CAN_FALLTHRU)
 766                         && !(e->flags & EDGE_COMPLEX)
 767                         && !bb_visited_trace (e->dest)
 768                         && single_pred_p (e->dest)
 769                         && !(e->flags & EDGE_CROSSING)
 770                         && single_succ_p (e->dest)
 771                         && (single_succ_edge (e->dest)->flags
 772                             & EDGE_CAN_FALLTHRU)
 773                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 774                         && single_succ (e->dest) == best_edge->dest
 775                         && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
 776                             || for_size))
 777                       {
 778                         best_edge = e;
 779                         if (dump_file)
 780                           fprintf (dump_file, "Selecting BB %d\n",
 781                                    best_edge->dest->index);
 782                         break;
 783                       }
 784
 785                   bb->aux = best_edge->dest;
 786                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 787                   bb = best_edge->dest;
 788                 }
 789             }
 790         }
 791       while (best_edge);
 792       trace->last = bb;
 793       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 794       bbd[trace->last->index].end_of_trace = *n_traces - 1;
 795
 796       /* The trace is terminated so we have to recount the keys in heap
 797          (some block can have a lower key because now one of its predecessors
 798          is an end of the trace).  */
 799       FOR_EACH_EDGE (e, ei, bb->succs)
 800         {
 801           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 802               || bb_visited_trace (e->dest))
 803             continue;
 804
 805           if (bbd[e->dest->index].heap)
 806             {
 807               key = bb_to_key (e->dest);
 808               if (key != bbd[e->dest->index].node->get_key ())
 809                 {
 810                   if (dump_file)
 811                     {
 812                       fprintf (dump_file,
 813                                "Changing key for bb %d from %ld to %ld.\n",
 814                                e->dest->index,
 815                                (long) bbd[e->dest->index].node->get_key (), key);
 816                     }
 817                   bbd[e->dest->index].heap->replace_key
 818                     (bbd[e->dest->index].node, key);
 819                 }
 820             }
 821         }
 822     }
 823
 824   delete (*heap);
 825
 826   /* "Return" the new heap.  */
 827   *heap = new_heap;
 828 }
 829
 830 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 831    it to trace after BB, mark OLD_BB visited and update pass' data structures
 832    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 833
 834 static basic_block
 835 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 836 {
 837   basic_block new_bb;
 838
 839   new_bb = duplicate_block (old_bb, e, bb);
 840   BB_COPY_PARTITION (new_bb, old_bb);
 841
 842   gcc_assert (e->dest == new_bb);
 843
 844   if (dump_file)
 845     fprintf (dump_file,
 846              "Duplicated bb %d (created bb %d)\n",
 847              old_bb->index, new_bb->index);
 848
 849   if (new_bb->index >= array_size
 850       || last_basic_block_for_fn (cfun) > array_size)
 851     {
 852       int i;
 853       int new_size;
 854
 855       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 856       new_size = GET_ARRAY_SIZE (new_size);
 857       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 858       for (i = array_size; i < new_size; i++)
 859         {
 860           bbd[i].start_of_trace = -1;
 861           bbd[i].end_of_trace = -1;
 862           bbd[i].in_trace = -1;
 863           bbd[i].visited = 0;
 864           bbd[i].heap = NULL;
 865           bbd[i].node = NULL;
 866         }
 867       array_size = new_size;
 868
 869       if (dump_file)
 870         {
 871           fprintf (dump_file,
 872                    "Growing the dynamic array to %d elements.\n",
 873                    array_size);
 874         }
 875     }
 876
 877   gcc_assert (!bb_visited_trace (e->dest));
 878   mark_bb_visited (new_bb, trace);
 879   new_bb->aux = bb->aux;
 880   bb->aux = new_bb;
 881
 882   bbd[new_bb->index].in_trace = trace;
 883
 884   return new_bb;
 885 }
 886
 887 /* Compute and return the key (for the heap) of the basic block BB.  */
 888
 889 static long
 890 bb_to_key (basic_block bb)
 891 {
 892   edge e;
 893   edge_iterator ei;
 894   int priority = 0;
 895
 896   /* Use index as key to align with its original order.  */
 897   if (optimize_function_for_size_p (cfun))
 898     return bb->index;
 899
 900   /* Do not start in probably never executed blocks.  */
 901
 902   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 903       || probably_never_executed_bb_p (cfun, bb))
 904     return BB_FREQ_MAX;
 905
 906   /* Prefer blocks whose predecessor is an end of some trace
 907      or whose predecessor edge is EDGE_DFS_BACK.  */
 908   FOR_EACH_EDGE (e, ei, bb->preds)
 909     {
 910       if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 911            && bbd[e->src->index].end_of_trace >= 0)
 912           || (e->flags & EDGE_DFS_BACK))
 913         {
 914           int edge_freq = EDGE_FREQUENCY (e);
 915
 916           if (edge_freq > priority)
 917             priority = edge_freq;
 918         }
 919     }
 920
 921   if (priority)
 922     /* The block with priority should have significantly lower key.  */
 923     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 924
 925   return -bb->frequency;
 926 }
 927
 928 /* Return true when the edge E from basic block BB is better than the temporary
 929    best edge (details are in function).  The probability of edge E is PROB. The
 930    frequency of the successor is FREQ.  The current best probability is
 931    BEST_PROB, the best frequency is BEST_FREQ.
 932    The edge is considered to be equivalent when PROB does not differ much from
 933    BEST_PROB; similarly for frequency.  */
 934
 935 static bool
 936 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
 937                int best_prob, int best_freq, const_edge cur_best_edge)
 938 {
 939   bool is_better_edge;
 940
 941   /* The BEST_* values do not have to be best, but can be a bit smaller than
 942      maximum values.  */
 943   int diff_prob = best_prob / 10;
 944   int diff_freq = best_freq / 10;
 945
 946   /* The smaller one is better to keep the original order.  */
 947   if (optimize_function_for_size_p (cfun))
 948     return !cur_best_edge
 949            || cur_best_edge->dest->index > e->dest->index;
 950
 951   if (prob > best_prob + diff_prob)
 952     /* The edge has higher probability than the temporary best edge.  */
 953     is_better_edge = true;
 954   else if (prob < best_prob - diff_prob)
 955     /* The edge has lower probability than the temporary best edge.  */
 956     is_better_edge = false;
 957   else if (freq < best_freq - diff_freq)
 958     /* The edge and the temporary best edge  have almost equivalent
 959        probabilities.  The higher frequency of a successor now means
 960        that there is another edge going into that successor.
 961        This successor has lower frequency so it is better.  */
 962     is_better_edge = true;
 963   else if (freq > best_freq + diff_freq)
 964     /* This successor has higher frequency so it is worse.  */
 965     is_better_edge = false;
 966   else if (e->dest->prev_bb == bb)
 967     /* The edges have equivalent probabilities and the successors
 968        have equivalent frequencies.  Select the previous successor.  */
 969     is_better_edge = true;
 970   else
 971     is_better_edge = false;
 972
 973   /* If we are doing hot/cold partitioning, make sure that we always favor
 974      non-crossing edges over crossing edges.  */
 975
 976   if (!is_better_edge
 977       && flag_reorder_blocks_and_partition
 978       && cur_best_edge
 979       && (cur_best_edge->flags & EDGE_CROSSING)
 980       && !(e->flags & EDGE_CROSSING))
 981     is_better_edge = true;
 982
 983   return is_better_edge;
 984 }
 985
 986 /* Return true when the edge E is better than the temporary best edge
 987    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
 988    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
 989    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
 990    TRACES record the information about traces.
 991    When optimizing for size, the edge with smaller index is better.
 992    When optimizing for speed, the edge with bigger probability or longer trace
 993    is better.  */
 994
 995 static bool
 996 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
 997                        const_edge cur_best_edge, struct trace *traces)
 998 {
 999   int e_index;
1000   int b_index;
1001   bool is_better_edge;
1002
1003   if (!cur_best_edge)
1004     return true;
1005
1006   if (optimize_function_for_size_p (cfun))
1007     {
1008       e_index = src_index_p ? e->src->index : e->dest->index;
1009       b_index = src_index_p ? cur_best_edge->src->index
1010                               : cur_best_edge->dest->index;
1011       /* The smaller one is better to keep the original order.  */
1012       return b_index > e_index;
1013     }
1014
1015   if (src_index_p)
1016     {
1017       e_index = e->src->index;
1018
1019       if (e->probability > cur_best_edge->probability)
1020         /* The edge has higher probability than the temporary best edge.  */
1021         is_better_edge = true;
1022       else if (e->probability < cur_best_edge->probability)
1023         /* The edge has lower probability than the temporary best edge.  */
1024         is_better_edge = false;
1025       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1026         /* The edge and the temporary best edge have equivalent probabilities.
1027            The edge with longer trace is better.  */
1028         is_better_edge = true;
1029       else
1030         is_better_edge = false;
1031     }
1032   else
1033     {
1034       e_index = e->dest->index;
1035
1036       if (e->probability > cur_best_edge->probability)
1037         /* The edge has higher probability than the temporary best edge.  */
1038         is_better_edge = true;
1039       else if (e->probability < cur_best_edge->probability)
1040         /* The edge has lower probability than the temporary best edge.  */
1041         is_better_edge = false;
1042       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1043         /* The edge and the temporary best edge have equivalent probabilities.
1044            The edge with longer trace is better.  */
1045         is_better_edge = true;
1046       else
1047         is_better_edge = false;
1048     }
1049
1050   return is_better_edge;
1051 }
1052
1053 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1054
1055 static void
1056 connect_traces (int n_traces, struct trace *traces)
1057 {
1058   int i;
1059   bool *connected;
1060   bool two_passes;
1061   int last_trace;
1062   int current_pass;
1063   int current_partition;
1064   int freq_threshold;
1065   gcov_type count_threshold;
1066   bool for_size = optimize_function_for_size_p (cfun);
1067
1068   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1069   if (max_entry_count < INT_MAX / 1000)
1070     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1071   else
1072     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1073
1074   connected = XCNEWVEC (bool, n_traces);
1075   last_trace = -1;
1076   current_pass = 1;
1077   current_partition = BB_PARTITION (traces[0].first);
1078   two_passes = false;
1079
1080   if (crtl->has_bb_partition)
1081     for (i = 0; i < n_traces && !two_passes; i++)
1082       if (BB_PARTITION (traces[0].first)
1083           != BB_PARTITION (traces[i].first))
1084         two_passes = true;
1085
1086   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1087     {
1088       int t = i;
1089       int t2;
1090       edge e, best;
1091       int best_len;
1092
1093       if (i >= n_traces)
1094         {
1095           gcc_assert (two_passes && current_pass == 1);
1096           i = 0;
1097           t = i;
1098           current_pass = 2;
1099           if (current_partition == BB_HOT_PARTITION)
1100             current_partition = BB_COLD_PARTITION;
1101           else
1102             current_partition = BB_HOT_PARTITION;
1103         }
1104
1105       if (connected[t])
1106         continue;
1107
1108       if (two_passes
1109           && BB_PARTITION (traces[t].first) != current_partition)
1110         continue;
1111
1112       connected[t] = true;
1113
1114       /* Find the predecessor traces.  */
1115       for (t2 = t; t2 > 0;)
1116         {
1117           edge_iterator ei;
1118           best = NULL;
1119           best_len = 0;
1120           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1121             {
1122               int si = e->src->index;
1123
1124               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1125                   && (e->flags & EDGE_CAN_FALLTHRU)
1126                   && !(e->flags & EDGE_COMPLEX)
1127                   && bbd[si].end_of_trace >= 0
1128                   && !connected[bbd[si].end_of_trace]
1129                   && (BB_PARTITION (e->src) == current_partition)
1130                   && connect_better_edge_p (e, true, best_len, best, traces))
1131                 {
1132                   best = e;
1133                   best_len = traces[bbd[si].end_of_trace].length;
1134                 }
1135             }
1136           if (best)
1137             {
1138               best->src->aux = best->dest;
1139               t2 = bbd[best->src->index].end_of_trace;
1140               connected[t2] = true;
1141
1142               if (dump_file)
1143                 {
1144                   fprintf (dump_file, "Connection: %d %d\n",
1145                            best->src->index, best->dest->index);
1146                 }
1147             }
1148           else
1149             break;
1150         }
1151
1152       if (last_trace >= 0)
1153         traces[last_trace].last->aux = traces[t2].first;
1154       last_trace = t;
1155
1156       /* Find the successor traces.  */
1157       while (1)
1158         {
1159           /* Find the continuation of the chain.  */
1160           edge_iterator ei;
1161           best = NULL;
1162           best_len = 0;
1163           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1164             {
1165               int di = e->dest->index;
1166
1167               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1168                   && (e->flags & EDGE_CAN_FALLTHRU)
1169                   && !(e->flags & EDGE_COMPLEX)
1170                   && bbd[di].start_of_trace >= 0
1171                   && !connected[bbd[di].start_of_trace]
1172                   && (BB_PARTITION (e->dest) == current_partition)
1173                   && connect_better_edge_p (e, false, best_len, best, traces))
1174                 {
1175                   best = e;
1176                   best_len = traces[bbd[di].start_of_trace].length;
1177                 }
1178             }
1179
1180           if (for_size)
1181             {
1182               if (!best)
1183                 /* Stop finding the successor traces.  */
1184                 break;
1185
1186               /* It is OK to connect block n with block n + 1 or a block
1187                  before n.  For others, only connect to the loop header.  */
1188               if (best->dest->index > (traces[t].last->index + 1))
1189                 {
1190                   int count = EDGE_COUNT (best->dest->preds);
1191
1192                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1193                     if (e->flags & EDGE_DFS_BACK)
1194                       count--;
1195
1196                   /* If dest has multiple predecessors, skip it.  We expect
1197                      that one predecessor with smaller index connects with it
1198                      later.  */
1199                   if (count != 1)
1200                     break;
1201                 }
1202
1203               /* Only connect Trace n with Trace n + 1.  It is conservative
1204                  to keep the order as close as possible to the original order.
1205                  It also helps to reduce long jumps.  */
1206               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1207                 break;
1208
1209               if (dump_file)
1210                 fprintf (dump_file, "Connection: %d %d\n",
1211                          best->src->index, best->dest->index);
1212
1213               t = bbd[best->dest->index].start_of_trace;
1214               traces[last_trace].last->aux = traces[t].first;
1215               connected[t] = true;
1216               last_trace = t;
1217             }
1218           else if (best)
1219             {
1220               if (dump_file)
1221                 {
1222                   fprintf (dump_file, "Connection: %d %d\n",
1223                            best->src->index, best->dest->index);
1224                 }
1225               t = bbd[best->dest->index].start_of_trace;
1226               traces[last_trace].last->aux = traces[t].first;
1227               connected[t] = true;
1228               last_trace = t;
1229             }
1230           else
1231             {
1232               /* Try to connect the traces by duplication of 1 block.  */
1233               edge e2;
1234               basic_block next_bb = NULL;
1235               bool try_copy = false;
1236
1237               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1238                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1239                     && (e->flags & EDGE_CAN_FALLTHRU)
1240                     && !(e->flags & EDGE_COMPLEX)
1241                     && (!best || e->probability > best->probability))
1242                   {
1243                     edge_iterator ei;
1244                     edge best2 = NULL;
1245                     int best2_len = 0;
1246
1247                     /* If the destination is a start of a trace which is only
1248                        one block long, then no need to search the successor
1249                        blocks of the trace.  Accept it.  */
1250                     if (bbd[e->dest->index].start_of_trace >= 0
1251                         && traces[bbd[e->dest->index].start_of_trace].length
1252                            == 1)
1253                       {
1254                         best = e;
1255                         try_copy = true;
1256                         continue;
1257                       }
1258
1259                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1260                       {
1261                         int di = e2->dest->index;
1262
1263                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1264                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1265                                 && !(e2->flags & EDGE_COMPLEX)
1266                                 && bbd[di].start_of_trace >= 0
1267                                 && !connected[bbd[di].start_of_trace]
1268                                 && BB_PARTITION (e2->dest) == current_partition
1269                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1270                                 && e2->count >= count_threshold
1271                                 && (!best2
1272                                     || e2->probability > best2->probability
1273                                     || (e2->probability == best2->probability
1274                                         && traces[bbd[di].start_of_trace].length
1275                                            > best2_len))))
1276                           {
1277                             best = e;
1278                             best2 = e2;
1279                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1280                               best2_len = traces[bbd[di].start_of_trace].length;
1281                             else
1282                               best2_len = INT_MAX;
1283                             next_bb = e2->dest;
1284                             try_copy = true;
1285                           }
1286                       }
1287                   }
1288
1289               if (crtl->has_bb_partition)
1290                 try_copy = false;
1291
1292               /* Copy tiny blocks always; copy larger blocks only when the
1293                  edge is traversed frequently enough.  */
1294               if (try_copy
1295                   && copy_bb_p (best->dest,
1296                                 optimize_edge_for_speed_p (best)
1297                                 && EDGE_FREQUENCY (best) >= freq_threshold
1298                                 && best->count >= count_threshold))
1299                 {
1300                   basic_block new_bb;
1301
1302                   if (dump_file)
1303                     {
1304                       fprintf (dump_file, "Connection: %d %d ",
1305                                traces[t].last->index, best->dest->index);
1306                       if (!next_bb)
1307                         fputc ('\n', dump_file);
1308                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1309                         fprintf (dump_file, "exit\n");
1310                       else
1311                         fprintf (dump_file, "%d\n", next_bb->index);
1312                     }
1313
1314                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1315                   traces[t].last = new_bb;
1316                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1317                     {
1318                       t = bbd[next_bb->index].start_of_trace;
1319                       traces[last_trace].last->aux = traces[t].first;
1320                       connected[t] = true;
1321                       last_trace = t;
1322                     }
1323                   else
1324                     break;      /* Stop finding the successor traces.  */
1325                 }
1326               else
1327                 break;  /* Stop finding the successor traces.  */
1328             }
1329         }
1330     }
1331
1332   if (dump_file)
1333     {
1334       basic_block bb;
1335
1336       fprintf (dump_file, "Final order:\n");
1337       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1338         fprintf (dump_file, "%d ", bb->index);
1339       fprintf (dump_file, "\n");
1340       fflush (dump_file);
1341     }
1342
1343   FREE (connected);
1344 }
1345
1346 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1347    when code size is allowed to grow by duplication.  */
1348
1349 static bool
1350 copy_bb_p (const_basic_block bb, int code_may_grow)
1351 {
1352   int size = 0;
1353   int max_size = uncond_jump_length;
1354   rtx_insn *insn;
1355
1356   if (!bb->frequency)
1357     return false;
1358   if (EDGE_COUNT (bb->preds) < 2)
1359     return false;
1360   if (!can_duplicate_block_p (bb))
1361     return false;
1362
1363   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1364   if (EDGE_COUNT (bb->succs) > 8)
1365     return false;
1366
1367   if (code_may_grow && optimize_bb_for_speed_p (bb))
1368     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1369
1370   FOR_BB_INSNS (bb, insn)
1371     {
1372       if (INSN_P (insn))
1373         size += get_attr_min_length (insn);
1374     }
1375
1376   if (size <= max_size)
1377     return true;
1378
1379   if (dump_file)
1380     {
1381       fprintf (dump_file,
1382                "Block %d can't be copied because its size = %d.\n",
1383                bb->index, size);
1384     }
1385
1386   return false;
1387 }
1388
1389 /* Return the length of unconditional jump instruction.  */
1390
1391 int
1392 get_uncond_jump_length (void)
1393 {
1394   int length;
1395
1396   start_sequence ();
1397   rtx_code_label *label = emit_label (gen_label_rtx ());
1398   rtx_insn *jump = emit_jump_insn (gen_jump (label));
1399   length = get_attr_min_length (jump);
1400   end_sequence ();
1401
1402   return length;
1403 }
1404
1405 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1406    Duplicate the landing pad and split the edges so that no EH edge
1407    crosses partitions.  */
1408
1409 static void
1410 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1411 {
1412   eh_landing_pad new_lp;
1413   basic_block new_bb, last_bb, post_bb;
1414   rtx_insn *jump;
1415   unsigned new_partition;
1416   edge_iterator ei;
1417   edge e;
1418
1419   /* Generate the new landing-pad structure.  */
1420   new_lp = gen_eh_landing_pad (old_lp->region);
1421   new_lp->post_landing_pad = old_lp->post_landing_pad;
1422   new_lp->landing_pad = gen_label_rtx ();
1423   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1424
1425   /* Put appropriate instructions in new bb.  */
1426   rtx_code_label *new_label = emit_label (new_lp->landing_pad);
1427
1428   expand_dw2_landing_pad_for_region (old_lp->region);
1429
1430   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1431   post_bb = single_succ (post_bb);
1432   rtx_code_label *post_label = block_label (post_bb);
1433   jump = emit_jump_insn (gen_jump (post_label));
1434   JUMP_LABEL (jump) = post_label;
1435
1436   /* Create new basic block to be dest for lp.  */
1437   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1438   new_bb = create_basic_block (new_label, jump, last_bb);
1439   new_bb->aux = last_bb->aux;
1440   last_bb->aux = new_bb;
1441
1442   emit_barrier_after_bb (new_bb);
1443
1444   make_edge (new_bb, post_bb, 0);
1445
1446   /* Make sure new bb is in the other partition.  */
1447   new_partition = BB_PARTITION (old_bb);
1448   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1449   BB_SET_PARTITION (new_bb, new_partition);
1450
1451   /* Fix up the edges.  */
1452   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1453     if (BB_PARTITION (e->src) == new_partition)
1454       {
1455         rtx_insn *insn = BB_END (e->src);
1456         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1457
1458         gcc_assert (note != NULL);
1459         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1460         XEXP (note, 0) = GEN_INT (new_lp->index);
1461
1462         /* Adjust the edge to the new destination.  */
1463         redirect_edge_succ (e, new_bb);
1464       }
1465     else
1466       ei_next (&ei);
1467 }
1468
1469
1470 /* Ensure that all hot bbs are included in a hot path through the
1471    procedure. This is done by calling this function twice, once
1472    with WALK_UP true (to look for paths from the entry to hot bbs) and
1473    once with WALK_UP false (to look for paths from hot bbs to the exit).
1474    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1475    to BBS_IN_HOT_PARTITION.  */
1476
1477 static unsigned int
1478 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1479                     vec<basic_block> *bbs_in_hot_partition)
1480 {
1481   /* Callers check this.  */
1482   gcc_checking_assert (cold_bb_count);
1483
1484   /* Keep examining hot bbs while we still have some left to check
1485      and there are remaining cold bbs.  */
1486   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1487   while (! hot_bbs_to_check.is_empty ()
1488          && cold_bb_count)
1489     {
1490       basic_block bb = hot_bbs_to_check.pop ();
1491       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1492       edge e;
1493       edge_iterator ei;
1494       int highest_probability = 0;
1495       int highest_freq = 0;
1496       gcov_type highest_count = 0;
1497       bool found = false;
1498
1499       /* Walk the preds/succs and check if there is at least one already
1500          marked hot. Keep track of the most frequent pred/succ so that we
1501          can mark it hot if we don't find one.  */
1502       FOR_EACH_EDGE (e, ei, edges)
1503         {
1504           basic_block reach_bb = walk_up ? e->src : e->dest;
1505
1506           if (e->flags & EDGE_DFS_BACK)
1507             continue;
1508
1509           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1510           {
1511             found = true;
1512             break;
1513           }
1514           /* The following loop will look for the hottest edge via
1515              the edge count, if it is non-zero, then fallback to the edge
1516              frequency and finally the edge probability.  */
1517           if (e->count > highest_count)
1518             highest_count = e->count;
1519           int edge_freq = EDGE_FREQUENCY (e);
1520           if (edge_freq > highest_freq)
1521             highest_freq = edge_freq;
1522           if (e->probability > highest_probability)
1523             highest_probability = e->probability;
1524         }
1525
1526       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1527          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1528          then the most frequent pred (or succ) needs to be adjusted.  In the
1529          case where multiple preds/succs have the same frequency (e.g. a
1530          50-50 branch), then both will be adjusted.  */
1531       if (found)
1532         continue;
1533
1534       FOR_EACH_EDGE (e, ei, edges)
1535         {
1536           if (e->flags & EDGE_DFS_BACK)
1537             continue;
1538           /* Select the hottest edge using the edge count, if it is non-zero,
1539              then fallback to the edge frequency and finally the edge
1540              probability.  */
1541           if (highest_count)
1542             {
1543               if (e->count < highest_count)
1544                 continue;
1545             }
1546           else if (highest_freq)
1547             {
1548               if (EDGE_FREQUENCY (e) < highest_freq)
1549                 continue;
1550             }
1551           else if (e->probability < highest_probability)
1552             continue;
1553
1554           basic_block reach_bb = walk_up ? e->src : e->dest;
1555
1556           /* We have a hot bb with an immediate dominator that is cold.
1557              The dominator needs to be re-marked hot.  */
1558           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1559           cold_bb_count--;
1560
1561           /* Now we need to examine newly-hot reach_bb to see if it is also
1562              dominated by a cold bb.  */
1563           bbs_in_hot_partition->safe_push (reach_bb);
1564           hot_bbs_to_check.safe_push (reach_bb);
1565         }
1566     }
1567
1568   return cold_bb_count;
1569 }
1570
1571
1572 /* Find the basic blocks that are rarely executed and need to be moved to
1573    a separate section of the .o file (to cut down on paging and improve
1574    cache locality).  Return a vector of all edges that cross.  */
1575
1576 static vec<edge>
1577 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1578 {
1579   vec<edge> crossing_edges = vNULL;
1580   basic_block bb;
1581   edge e;
1582   edge_iterator ei;
1583   unsigned int cold_bb_count = 0;
1584   auto_vec<basic_block> bbs_in_hot_partition;
1585
1586   /* Mark which partition (hot/cold) each basic block belongs in.  */
1587   FOR_EACH_BB_FN (bb, cfun)
1588     {
1589       bool cold_bb = false;
1590
1591       if (probably_never_executed_bb_p (cfun, bb))
1592         {
1593           /* Handle profile insanities created by upstream optimizations
1594              by also checking the incoming edge weights. If there is a non-cold
1595              incoming edge, conservatively prevent this block from being split
1596              into the cold section.  */
1597           cold_bb = true;
1598           FOR_EACH_EDGE (e, ei, bb->preds)
1599             if (!probably_never_executed_edge_p (cfun, e))
1600               {
1601                 cold_bb = false;
1602                 break;
1603               }
1604         }
1605       if (cold_bb)
1606         {
1607           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1608           cold_bb_count++;
1609         }
1610       else
1611         {
1612           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1613           bbs_in_hot_partition.safe_push (bb);
1614         }
1615     }
1616
1617   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1618      Several different possibilities may include cold bbs along all paths
1619      to/from a hot bb. One is that there are edge weight insanities
1620      due to optimization phases that do not properly update basic block profile
1621      counts. The second is that the entry of the function may not be hot, because
1622      it is entered fewer times than the number of profile training runs, but there
1623      is a loop inside the function that causes blocks within the function to be
1624      above the threshold for hotness. This is fixed by walking up from hot bbs
1625      to the entry block, and then down from hot bbs to the exit, performing
1626      partitioning fixups as necessary.  */
1627   if (cold_bb_count)
1628     {
1629       mark_dfs_back_edges ();
1630       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1631                                           &bbs_in_hot_partition);
1632       if (cold_bb_count)
1633         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1634     }
1635
1636   /* The format of .gcc_except_table does not allow landing pads to
1637      be in a different partition as the throw.  Fix this by either
1638      moving or duplicating the landing pads.  */
1639   if (cfun->eh->lp_array)
1640     {
1641       unsigned i;
1642       eh_landing_pad lp;
1643
1644       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1645         {
1646           bool all_same, all_diff;
1647
1648           if (lp == NULL
1649               || lp->landing_pad == NULL_RTX
1650               || !LABEL_P (lp->landing_pad))
1651             continue;
1652
1653           all_same = all_diff = true;
1654           bb = BLOCK_FOR_INSN (lp->landing_pad);
1655           FOR_EACH_EDGE (e, ei, bb->preds)
1656             {
1657               gcc_assert (e->flags & EDGE_EH);
1658               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1659                 all_diff = false;
1660               else
1661                 all_same = false;
1662             }
1663
1664           if (all_same)
1665             ;
1666           else if (all_diff)
1667             {
1668               int which = BB_PARTITION (bb);
1669               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1670               BB_SET_PARTITION (bb, which);
1671             }
1672           else
1673             fix_up_crossing_landing_pad (lp, bb);
1674         }
1675     }
1676
1677   /* Mark every edge that crosses between sections.  */
1678
1679   FOR_EACH_BB_FN (bb, cfun)
1680     FOR_EACH_EDGE (e, ei, bb->succs)
1681       {
1682         unsigned int flags = e->flags;
1683
1684         /* We should never have EDGE_CROSSING set yet.  */
1685         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1686
1687         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1688             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1689             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1690           {
1691             crossing_edges.safe_push (e);
1692             flags |= EDGE_CROSSING;
1693           }
1694
1695         /* Now that we've split eh edges as appropriate, allow landing pads
1696            to be merged with the post-landing pads.  */
1697         flags &= ~EDGE_PRESERVE;
1698
1699         e->flags = flags;
1700       }
1701
1702   return crossing_edges;
1703 }
1704
1705 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1706
1707 static void
1708 set_edge_can_fallthru_flag (void)
1709 {
1710   basic_block bb;
1711
1712   FOR_EACH_BB_FN (bb, cfun)
1713     {
1714       edge e;
1715       edge_iterator ei;
1716
1717       FOR_EACH_EDGE (e, ei, bb->succs)
1718         {
1719           e->flags &= ~EDGE_CAN_FALLTHRU;
1720
1721           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1722           if (e->flags & EDGE_FALLTHRU)
1723             e->flags |= EDGE_CAN_FALLTHRU;
1724         }
1725
1726       /* If the BB ends with an invertible condjump all (2) edges are
1727          CAN_FALLTHRU edges.  */
1728       if (EDGE_COUNT (bb->succs) != 2)
1729         continue;
1730       if (!any_condjump_p (BB_END (bb)))
1731         continue;
1732
1733       rtx_jump_insn *bb_end_jump = as_a <rtx_jump_insn *> (BB_END (bb));
1734       if (!invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0))
1735         continue;
1736       invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0);
1737       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1738       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1739     }
1740 }
1741
1742 /* If any destination of a crossing edge does not have a label, add label;
1743    Convert any easy fall-through crossing edges to unconditional jumps.  */
1744
1745 static void
1746 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1747 {
1748   size_t i;
1749   edge e;
1750
1751   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1752     {
1753       basic_block src = e->src;
1754       basic_block dest = e->dest;
1755       rtx_jump_insn *new_jump;
1756
1757       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1758         continue;
1759
1760       /* Make sure dest has a label.  */
1761       rtx_code_label *label = block_label (dest);
1762
1763       /* Nothing to do for non-fallthru edges.  */
1764       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1765         continue;
1766       if ((e->flags & EDGE_FALLTHRU) == 0)
1767         continue;
1768
1769       /* If the block does not end with a control flow insn, then we
1770          can trivially add a jump to the end to fixup the crossing.
1771          Otherwise the jump will have to go in a new bb, which will
1772          be handled by fix_up_fall_thru_edges function.  */
1773       if (control_flow_insn_p (BB_END (src)))
1774         continue;
1775
1776       /* Make sure there's only one successor.  */
1777       gcc_assert (single_succ_p (src));
1778
1779       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1780       BB_END (src) = new_jump;
1781       JUMP_LABEL (new_jump) = label;
1782       LABEL_NUSES (label) += 1;
1783
1784       emit_barrier_after_bb (src);
1785
1786       /* Mark edge as non-fallthru.  */
1787       e->flags &= ~EDGE_FALLTHRU;
1788     }
1789 }
1790
1791 /* Find any bb's where the fall-through edge is a crossing edge (note that
1792    these bb's must also contain a conditional jump or end with a call
1793    instruction; we've already dealt with fall-through edges for blocks
1794    that didn't have a conditional jump or didn't end with call instruction
1795    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1796    edge to non-crossing edge by inserting a new bb to fall-through into.
1797    The new bb will contain an unconditional jump (crossing edge) to the
1798    original fall through destination.  */
1799
1800 static void
1801 fix_up_fall_thru_edges (void)
1802 {
1803   basic_block cur_bb;
1804   basic_block new_bb;
1805   edge succ1;
1806   edge succ2;
1807   edge fall_thru;
1808   edge cond_jump = NULL;
1809   bool cond_jump_crosses;
1810   int invert_worked;
1811   rtx_insn *old_jump;
1812   rtx_code_label *fall_thru_label;
1813
1814   FOR_EACH_BB_FN (cur_bb, cfun)
1815     {
1816       fall_thru = NULL;
1817       if (EDGE_COUNT (cur_bb->succs) > 0)
1818         succ1 = EDGE_SUCC (cur_bb, 0);
1819       else
1820         succ1 = NULL;
1821
1822       if (EDGE_COUNT (cur_bb->succs) > 1)
1823         succ2 = EDGE_SUCC (cur_bb, 1);
1824       else
1825         succ2 = NULL;
1826
1827       /* Find the fall-through edge.  */
1828
1829       if (succ1
1830           && (succ1->flags & EDGE_FALLTHRU))
1831         {
1832           fall_thru = succ1;
1833           cond_jump = succ2;
1834         }
1835       else if (succ2
1836                && (succ2->flags & EDGE_FALLTHRU))
1837         {
1838           fall_thru = succ2;
1839           cond_jump = succ1;
1840         }
1841       else if (succ1
1842                && (block_ends_with_call_p (cur_bb)
1843                    || can_throw_internal (BB_END (cur_bb))))
1844         {
1845           edge e;
1846           edge_iterator ei;
1847
1848           FOR_EACH_EDGE (e, ei, cur_bb->succs)
1849             if (e->flags & EDGE_FALLTHRU)
1850               {
1851                 fall_thru = e;
1852                 break;
1853               }
1854         }
1855
1856       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1857         {
1858           /* Check to see if the fall-thru edge is a crossing edge.  */
1859
1860           if (fall_thru->flags & EDGE_CROSSING)
1861             {
1862               /* The fall_thru edge crosses; now check the cond jump edge, if
1863                  it exists.  */
1864
1865               cond_jump_crosses = true;
1866               invert_worked  = 0;
1867               old_jump = BB_END (cur_bb);
1868
1869               /* Find the jump instruction, if there is one.  */
1870
1871               if (cond_jump)
1872                 {
1873                   if (!(cond_jump->flags & EDGE_CROSSING))
1874                     cond_jump_crosses = false;
1875
1876                   /* We know the fall-thru edge crosses; if the cond
1877                      jump edge does NOT cross, and its destination is the
1878                      next block in the bb order, invert the jump
1879                      (i.e. fix it so the fall through does not cross and
1880                      the cond jump does).  */
1881
1882                   if (!cond_jump_crosses)
1883                     {
1884                       /* Find label in fall_thru block. We've already added
1885                          any missing labels, so there must be one.  */
1886
1887                       fall_thru_label = block_label (fall_thru->dest);
1888
1889                       if (old_jump && fall_thru_label)
1890                         {
1891                           rtx_jump_insn *old_jump_insn =
1892                                 dyn_cast <rtx_jump_insn *> (old_jump);
1893                           if (old_jump_insn)
1894                             invert_worked = invert_jump (old_jump_insn,
1895                                                          fall_thru_label, 0);
1896                         }
1897
1898                       if (invert_worked)
1899                         {
1900                           fall_thru->flags &= ~EDGE_FALLTHRU;
1901                           cond_jump->flags |= EDGE_FALLTHRU;
1902                           update_br_prob_note (cur_bb);
1903                           std::swap (fall_thru, cond_jump);
1904                           cond_jump->flags |= EDGE_CROSSING;
1905                           fall_thru->flags &= ~EDGE_CROSSING;
1906                         }
1907                     }
1908                 }
1909
1910               if (cond_jump_crosses || !invert_worked)
1911                 {
1912                   /* This is the case where both edges out of the basic
1913                      block are crossing edges. Here we will fix up the
1914                      fall through edge. The jump edge will be taken care
1915                      of later.  The EDGE_CROSSING flag of fall_thru edge
1916                      is unset before the call to force_nonfallthru
1917                      function because if a new basic-block is created
1918                      this edge remains in the current section boundary
1919                      while the edge between new_bb and the fall_thru->dest
1920                      becomes EDGE_CROSSING.  */
1921
1922                   fall_thru->flags &= ~EDGE_CROSSING;
1923                   new_bb = force_nonfallthru (fall_thru);
1924
1925                   if (new_bb)
1926                     {
1927                       new_bb->aux = cur_bb->aux;
1928                       cur_bb->aux = new_bb;
1929
1930                       /* This is done by force_nonfallthru_and_redirect.  */
1931                       gcc_assert (BB_PARTITION (new_bb)
1932                                   == BB_PARTITION (cur_bb));
1933
1934                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1935                     }
1936                   else
1937                     {
1938                       /* If a new basic-block was not created; restore
1939                          the EDGE_CROSSING flag.  */
1940                       fall_thru->flags |= EDGE_CROSSING;
1941                     }
1942
1943                   /* Add barrier after new jump */
1944                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1945                 }
1946             }
1947         }
1948     }
1949 }
1950
1951 /* This function checks the destination block of a "crossing jump" to
1952    see if it has any crossing predecessors that begin with a code label
1953    and end with an unconditional jump.  If so, it returns that predecessor
1954    block.  (This is to avoid creating lots of new basic blocks that all
1955    contain unconditional jumps to the same destination).  */
1956
1957 static basic_block
1958 find_jump_block (basic_block jump_dest)
1959 {
1960   basic_block source_bb = NULL;
1961   edge e;
1962   rtx_insn *insn;
1963   edge_iterator ei;
1964
1965   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1966     if (e->flags & EDGE_CROSSING)
1967       {
1968         basic_block src = e->src;
1969
1970         /* Check each predecessor to see if it has a label, and contains
1971            only one executable instruction, which is an unconditional jump.
1972            If so, we can use it.  */
1973
1974         if (LABEL_P (BB_HEAD (src)))
1975           for (insn = BB_HEAD (src);
1976                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1977                insn = NEXT_INSN (insn))
1978             {
1979               if (INSN_P (insn)
1980                   && insn == BB_END (src)
1981                   && JUMP_P (insn)
1982                   && !any_condjump_p (insn))
1983                 {
1984                   source_bb = src;
1985                   break;
1986                 }
1987             }
1988
1989         if (source_bb)
1990           break;
1991       }
1992
1993   return source_bb;
1994 }
1995
1996 /* Find all BB's with conditional jumps that are crossing edges;
1997    insert a new bb and make the conditional jump branch to the new
1998    bb instead (make the new bb same color so conditional branch won't
1999    be a 'crossing' edge).  Insert an unconditional jump from the
2000    new bb to the original destination of the conditional jump.  */
2001
2002 static void
2003 fix_crossing_conditional_branches (void)
2004 {
2005   basic_block cur_bb;
2006   basic_block new_bb;
2007   basic_block dest;
2008   edge succ1;
2009   edge succ2;
2010   edge crossing_edge;
2011   edge new_edge;
2012   rtx set_src;
2013   rtx old_label = NULL_RTX;
2014   rtx_code_label *new_label;
2015
2016   FOR_EACH_BB_FN (cur_bb, cfun)
2017     {
2018       crossing_edge = NULL;
2019       if (EDGE_COUNT (cur_bb->succs) > 0)
2020         succ1 = EDGE_SUCC (cur_bb, 0);
2021       else
2022         succ1 = NULL;
2023
2024       if (EDGE_COUNT (cur_bb->succs) > 1)
2025         succ2 = EDGE_SUCC (cur_bb, 1);
2026       else
2027         succ2 = NULL;
2028
2029       /* We already took care of fall-through edges, so only one successor
2030          can be a crossing edge.  */
2031
2032       if (succ1 && (succ1->flags & EDGE_CROSSING))
2033         crossing_edge = succ1;
2034       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2035         crossing_edge = succ2;
2036
2037       if (crossing_edge)
2038         {
2039           rtx_insn *old_jump = BB_END (cur_bb);
2040
2041           /* Check to make sure the jump instruction is a
2042              conditional jump.  */
2043
2044           set_src = NULL_RTX;
2045
2046           if (any_condjump_p (old_jump))
2047             {
2048               if (GET_CODE (PATTERN (old_jump)) == SET)
2049                 set_src = SET_SRC (PATTERN (old_jump));
2050               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2051                 {
2052                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2053                   if (GET_CODE (set_src) == SET)
2054                     set_src = SET_SRC (set_src);
2055                   else
2056                     set_src = NULL_RTX;
2057                 }
2058             }
2059
2060           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2061             {
2062               rtx_jump_insn *old_jump_insn =
2063                         as_a <rtx_jump_insn *> (old_jump);
2064
2065               if (GET_CODE (XEXP (set_src, 1)) == PC)
2066                 old_label = XEXP (set_src, 2);
2067               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2068                 old_label = XEXP (set_src, 1);
2069
2070               /* Check to see if new bb for jumping to that dest has
2071                  already been created; if so, use it; if not, create
2072                  a new one.  */
2073
2074               new_bb = find_jump_block (crossing_edge->dest);
2075
2076               if (new_bb)
2077                 new_label = block_label (new_bb);
2078               else
2079                 {
2080                   basic_block last_bb;
2081                   rtx_code_label *old_jump_target;
2082                   rtx_jump_insn *new_jump;
2083
2084                   /* Create new basic block to be dest for
2085                      conditional jump.  */
2086
2087                   /* Put appropriate instructions in new bb.  */
2088
2089                   new_label = gen_label_rtx ();
2090                   emit_label (new_label);
2091
2092                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2093                   old_jump_target = old_jump_insn->jump_target ();
2094                   new_jump = as_a <rtx_jump_insn *>
2095                                 (emit_jump_insn (gen_jump (old_jump_target)));
2096                   new_jump->set_jump_target (old_jump_target);
2097
2098                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2099                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2100                   new_bb->aux = last_bb->aux;
2101                   last_bb->aux = new_bb;
2102
2103                   emit_barrier_after_bb (new_bb);
2104
2105                   /* Make sure new bb is in same partition as source
2106                      of conditional branch.  */
2107                   BB_COPY_PARTITION (new_bb, cur_bb);
2108                 }
2109
2110               /* Make old jump branch to new bb.  */
2111
2112               redirect_jump (old_jump_insn, new_label, 0);
2113
2114               /* Remove crossing_edge as predecessor of 'dest'.  */
2115
2116               dest = crossing_edge->dest;
2117
2118               redirect_edge_succ (crossing_edge, new_bb);
2119
2120               /* Make a new edge from new_bb to old dest; new edge
2121                  will be a successor for new_bb and a predecessor
2122                  for 'dest'.  */
2123
2124               if (EDGE_COUNT (new_bb->succs) == 0)
2125                 new_edge = make_edge (new_bb, dest, 0);
2126               else
2127                 new_edge = EDGE_SUCC (new_bb, 0);
2128
2129               crossing_edge->flags &= ~EDGE_CROSSING;
2130               new_edge->flags |= EDGE_CROSSING;
2131             }
2132         }
2133     }
2134 }
2135
2136 /* Find any unconditional branches that cross between hot and cold
2137    sections.  Convert them into indirect jumps instead.  */
2138
2139 static void
2140 fix_crossing_unconditional_branches (void)
2141 {
2142   basic_block cur_bb;
2143   rtx_insn *last_insn;
2144   rtx label;
2145   rtx label_addr;
2146   rtx_insn *indirect_jump_sequence;
2147   rtx_insn *jump_insn = NULL;
2148   rtx new_reg;
2149   rtx_insn *cur_insn;
2150   edge succ;
2151
2152   FOR_EACH_BB_FN (cur_bb, cfun)
2153     {
2154       last_insn = BB_END (cur_bb);
2155
2156       if (EDGE_COUNT (cur_bb->succs) < 1)
2157         continue;
2158
2159       succ = EDGE_SUCC (cur_bb, 0);
2160
2161       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2162          this point, no crossing jumps should be conditional.  */
2163
2164       if (JUMP_P (last_insn)
2165           && (succ->flags & EDGE_CROSSING))
2166         {
2167           gcc_assert (!any_condjump_p (last_insn));
2168
2169           /* Make sure the jump is not already an indirect or table jump.  */
2170
2171           if (!computed_jump_p (last_insn)
2172               && !tablejump_p (last_insn, NULL, NULL))
2173             {
2174               /* We have found a "crossing" unconditional branch.  Now
2175                  we must convert it to an indirect jump.  First create
2176                  reference of label, as target for jump.  */
2177
2178               label = JUMP_LABEL (last_insn);
2179               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2180               LABEL_NUSES (label) += 1;
2181
2182               /* Get a register to use for the indirect jump.  */
2183
2184               new_reg = gen_reg_rtx (Pmode);
2185
2186               /* Generate indirect the jump sequence.  */
2187
2188               start_sequence ();
2189               emit_move_insn (new_reg, label_addr);
2190               emit_indirect_jump (new_reg);
2191               indirect_jump_sequence = get_insns ();
2192               end_sequence ();
2193
2194               /* Make sure every instruction in the new jump sequence has
2195                  its basic block set to be cur_bb.  */
2196
2197               for (cur_insn = indirect_jump_sequence; cur_insn;
2198                    cur_insn = NEXT_INSN (cur_insn))
2199                 {
2200                   if (!BARRIER_P (cur_insn))
2201                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2202                   if (JUMP_P (cur_insn))
2203                     jump_insn = cur_insn;
2204                 }
2205
2206               /* Insert the new (indirect) jump sequence immediately before
2207                  the unconditional jump, then delete the unconditional jump.  */
2208
2209               emit_insn_before (indirect_jump_sequence, last_insn);
2210               delete_insn (last_insn);
2211
2212               JUMP_LABEL (jump_insn) = label;
2213               LABEL_NUSES (label)++;
2214
2215               /* Make BB_END for cur_bb be the jump instruction (NOT the
2216                  barrier instruction at the end of the sequence...).  */
2217
2218               BB_END (cur_bb) = jump_insn;
2219             }
2220         }
2221     }
2222 }
2223
2224 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2225
2226 static void
2227 update_crossing_jump_flags (void)
2228 {
2229   basic_block bb;
2230   edge e;
2231   edge_iterator ei;
2232
2233   FOR_EACH_BB_FN (bb, cfun)
2234     FOR_EACH_EDGE (e, ei, bb->succs)
2235       if (e->flags & EDGE_CROSSING)
2236         {
2237           if (JUMP_P (BB_END (bb))
2238               /* Some flags were added during fix_up_fall_thru_edges, via
2239                  force_nonfallthru_and_redirect.  */
2240               && !CROSSING_JUMP_P (BB_END (bb)))
2241             CROSSING_JUMP_P (BB_END (bb)) = 1;
2242           break;
2243         }
2244 }
2245
2246 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2247    the set of flags to pass to cfg_layout_initialize().  */
2248
2249 static void
2250 reorder_basic_blocks (void)
2251 {
2252   int n_traces;
2253   int i;
2254   struct trace *traces;
2255
2256   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2257
2258   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2259     return;
2260
2261   set_edge_can_fallthru_flag ();
2262   mark_dfs_back_edges ();
2263
2264   /* We are estimating the length of uncond jump insn only once since the code
2265      for getting the insn length always returns the minimal length now.  */
2266   if (uncond_jump_length == 0)
2267     uncond_jump_length = get_uncond_jump_length ();
2268
2269   /* We need to know some information for each basic block.  */
2270   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2271   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2272   for (i = 0; i < array_size; i++)
2273     {
2274       bbd[i].start_of_trace = -1;
2275       bbd[i].end_of_trace = -1;
2276       bbd[i].in_trace = -1;
2277       bbd[i].visited = 0;
2278       bbd[i].heap = NULL;
2279       bbd[i].node = NULL;
2280     }
2281
2282   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2283   n_traces = 0;
2284   find_traces (&n_traces, traces);
2285   connect_traces (n_traces, traces);
2286   FREE (traces);
2287   FREE (bbd);
2288
2289   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2290
2291   if (dump_file)
2292     {
2293       if (dump_flags & TDF_DETAILS)
2294         dump_reg_info (dump_file);
2295       dump_flow_info (dump_file, dump_flags);
2296     }
2297
2298   /* Signal that rtl_verify_flow_info_1 can now verify that there
2299      is at most one switch between hot/cold sections.  */
2300   crtl->bb_reorder_complete = true;
2301 }
2302
2303 /* Determine which partition the first basic block in the function
2304    belongs to, then find the first basic block in the current function
2305    that belongs to a different section, and insert a
2306    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2307    instruction stream.  When writing out the assembly code,
2308    encountering this note will make the compiler switch between the
2309    hot and cold text sections.  */
2310
2311 void
2312 insert_section_boundary_note (void)
2313 {
2314   basic_block bb;
2315   bool switched_sections = false;
2316   int current_partition = 0;
2317
2318   if (!crtl->has_bb_partition)
2319     return;
2320
2321   FOR_EACH_BB_FN (bb, cfun)
2322     {
2323       if (!current_partition)
2324         current_partition = BB_PARTITION (bb);
2325       if (BB_PARTITION (bb) != current_partition)
2326         {
2327           gcc_assert (!switched_sections);
2328           switched_sections = true;
2329           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2330           current_partition = BB_PARTITION (bb);
2331         }
2332     }
2333 }
2334
2335 namespace {
2336
2337 const pass_data pass_data_reorder_blocks =
2338 {
2339   RTL_PASS, /* type */
2340   "bbro", /* name */
2341   OPTGROUP_NONE, /* optinfo_flags */
2342   TV_REORDER_BLOCKS, /* tv_id */
2343   0, /* properties_required */
2344   0, /* properties_provided */
2345   0, /* properties_destroyed */
2346   0, /* todo_flags_start */
2347   0, /* todo_flags_finish */
2348 };
2349
2350 class pass_reorder_blocks : public rtl_opt_pass
2351 {
2352 public:
2353   pass_reorder_blocks (gcc::context *ctxt)
2354     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2355   {}
2356
2357   /* opt_pass methods: */
2358   virtual bool gate (function *)
2359     {
2360       if (targetm.cannot_modify_jumps_p ())
2361         return false;
2362       return (optimize > 0
2363               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2364     }
2365
2366   virtual unsigned int execute (function *);
2367
2368 }; // class pass_reorder_blocks
2369
2370 unsigned int
2371 pass_reorder_blocks::execute (function *fun)
2372 {
2373   basic_block bb;
2374
2375   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2376      splitting possibly introduced more crossjumping opportunities.  */
2377   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2378
2379   reorder_basic_blocks ();
2380   cleanup_cfg (CLEANUP_EXPENSIVE);
2381
2382   FOR_EACH_BB_FN (bb, fun)
2383     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2384       bb->aux = bb->next_bb;
2385   cfg_layout_finalize ();
2386
2387   return 0;
2388 }
2389
2390 } // anon namespace
2391
2392 rtl_opt_pass *
2393 make_pass_reorder_blocks (gcc::context *ctxt)
2394 {
2395   return new pass_reorder_blocks (ctxt);
2396 }
2397
2398 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2399    computed gotos that were factored early on in the compilation process to
2400    speed up edge based data flow.  We used to not unfactoring them again,
2401    which can seriously pessimize code with many computed jumps in the source
2402    code, such as interpreters.  See e.g. PR15242.  */
2403
2404 namespace {
2405
2406 const pass_data pass_data_duplicate_computed_gotos =
2407 {
2408   RTL_PASS, /* type */
2409   "compgotos", /* name */
2410   OPTGROUP_NONE, /* optinfo_flags */
2411   TV_REORDER_BLOCKS, /* tv_id */
2412   0, /* properties_required */
2413   0, /* properties_provided */
2414   0, /* properties_destroyed */
2415   0, /* todo_flags_start */
2416   0, /* todo_flags_finish */
2417 };
2418
2419 class pass_duplicate_computed_gotos : public rtl_opt_pass
2420 {
2421 public:
2422   pass_duplicate_computed_gotos (gcc::context *ctxt)
2423     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2424   {}
2425
2426   /* opt_pass methods: */
2427   virtual bool gate (function *);
2428   virtual unsigned int execute (function *);
2429
2430 }; // class pass_duplicate_computed_gotos
2431
2432 bool
2433 pass_duplicate_computed_gotos::gate (function *fun)
2434 {
2435   if (targetm.cannot_modify_jumps_p ())
2436     return false;
2437   return (optimize > 0
2438           && flag_expensive_optimizations
2439           && ! optimize_function_for_size_p (fun));
2440 }
2441
2442 unsigned int
2443 pass_duplicate_computed_gotos::execute (function *fun)
2444 {
2445   basic_block bb, new_bb;
2446   bitmap candidates;
2447   int max_size;
2448   bool changed = false;
2449
2450   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2451     return 0;
2452
2453   clear_bb_flags ();
2454   cfg_layout_initialize (0);
2455
2456   /* We are estimating the length of uncond jump insn only once
2457      since the code for getting the insn length always returns
2458      the minimal length now.  */
2459   if (uncond_jump_length == 0)
2460     uncond_jump_length = get_uncond_jump_length ();
2461
2462   max_size
2463     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2464   candidates = BITMAP_ALLOC (NULL);
2465
2466   /* Look for blocks that end in a computed jump, and see if such blocks
2467      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2468      mark it in the candidates.  */
2469   FOR_EACH_BB_FN (bb, fun)
2470     {
2471       rtx_insn *insn;
2472       edge e;
2473       edge_iterator ei;
2474       int size, all_flags;
2475
2476       /* Build the reorder chain for the original order of blocks.  */
2477       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2478         bb->aux = bb->next_bb;
2479
2480       /* Obviously the block has to end in a computed jump.  */
2481       if (!computed_jump_p (BB_END (bb)))
2482         continue;
2483
2484       /* Only consider blocks that can be duplicated.  */
2485       if (CROSSING_JUMP_P (BB_END (bb))
2486           || !can_duplicate_block_p (bb))
2487         continue;
2488
2489       /* Make sure that the block is small enough.  */
2490       size = 0;
2491       FOR_BB_INSNS (bb, insn)
2492         if (INSN_P (insn))
2493           {
2494             size += get_attr_min_length (insn);
2495             if (size > max_size)
2496                break;
2497           }
2498       if (size > max_size)
2499         continue;
2500
2501       /* Final check: there must not be any incoming abnormal edges.  */
2502       all_flags = 0;
2503       FOR_EACH_EDGE (e, ei, bb->preds)
2504         all_flags |= e->flags;
2505       if (all_flags & EDGE_COMPLEX)
2506         continue;
2507
2508       bitmap_set_bit (candidates, bb->index);
2509     }
2510
2511   /* Nothing to do if there is no computed jump here.  */
2512   if (bitmap_empty_p (candidates))
2513     goto done;
2514
2515   /* Duplicate computed gotos.  */
2516   FOR_EACH_BB_FN (bb, fun)
2517     {
2518       if (bb->flags & BB_VISITED)
2519         continue;
2520
2521       bb->flags |= BB_VISITED;
2522
2523       /* BB must have one outgoing edge.  That edge must not lead to
2524          the exit block or the next block.
2525          The destination must have more than one predecessor.  */
2526       if (!single_succ_p (bb)
2527           || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2528           || single_succ (bb) == bb->next_bb
2529           || single_pred_p (single_succ (bb)))
2530         continue;
2531
2532       /* The successor block has to be a duplication candidate.  */
2533       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2534         continue;
2535
2536       /* Don't duplicate a partition crossing edge, which requires difficult
2537          fixup.  */
2538       if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2539         continue;
2540
2541       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2542       new_bb->aux = bb->aux;
2543       bb->aux = new_bb;
2544       new_bb->flags |= BB_VISITED;
2545       changed = true;
2546     }
2547
2548  done:
2549   if (changed)
2550     {
2551       /* Duplicating blocks above will redirect edges and may cause hot
2552          blocks previously reached by both hot and cold blocks to become
2553          dominated only by cold blocks.  */
2554       fixup_partitions ();
2555
2556       /* Merge the duplicated blocks into predecessors, when possible.  */
2557       cfg_layout_finalize ();
2558       cleanup_cfg (0);
2559     }
2560   else
2561     cfg_layout_finalize ();
2562
2563   BITMAP_FREE (candidates);
2564   return 0;
2565 }
2566
2567 } // anon namespace
2568
2569 rtl_opt_pass *
2570 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2571 {
2572   return new pass_duplicate_computed_gotos (ctxt);
2573 }
2574
2575 /* This function is the main 'entrance' for the optimization that
2576    partitions hot and cold basic blocks into separate sections of the
2577    .o file (to improve performance and cache locality).  Ideally it
2578    would be called after all optimizations that rearrange the CFG have
2579    been called.  However part of this optimization may introduce new
2580    register usage, so it must be called before register allocation has
2581    occurred.  This means that this optimization is actually called
2582    well before the optimization that reorders basic blocks (see
2583    function above).
2584
2585    This optimization checks the feedback information to determine
2586    which basic blocks are hot/cold, updates flags on the basic blocks
2587    to indicate which section they belong in.  This information is
2588    later used for writing out sections in the .o file.  Because hot
2589    and cold sections can be arbitrarily large (within the bounds of
2590    memory), far beyond the size of a single function, it is necessary
2591    to fix up all edges that cross section boundaries, to make sure the
2592    instructions used can actually span the required distance.  The
2593    fixes are described below.
2594
2595    Fall-through edges must be changed into jumps; it is not safe or
2596    legal to fall through across a section boundary.  Whenever a
2597    fall-through edge crossing a section boundary is encountered, a new
2598    basic block is inserted (in the same section as the fall-through
2599    source), and the fall through edge is redirected to the new basic
2600    block.  The new basic block contains an unconditional jump to the
2601    original fall-through target.  (If the unconditional jump is
2602    insufficient to cross section boundaries, that is dealt with a
2603    little later, see below).
2604
2605    In order to deal with architectures that have short conditional
2606    branches (which cannot span all of memory) we take any conditional
2607    jump that attempts to cross a section boundary and add a level of
2608    indirection: it becomes a conditional jump to a new basic block, in
2609    the same section.  The new basic block contains an unconditional
2610    jump to the original target, in the other section.
2611
2612    For those architectures whose unconditional branch is also
2613    incapable of reaching all of memory, those unconditional jumps are
2614    converted into indirect jumps, through a register.
2615
2616    IMPORTANT NOTE: This optimization causes some messy interactions
2617    with the cfg cleanup optimizations; those optimizations want to
2618    merge blocks wherever possible, and to collapse indirect jump
2619    sequences (change "A jumps to B jumps to C" directly into "A jumps
2620    to C").  Those optimizations can undo the jump fixes that
2621    partitioning is required to make (see above), in order to ensure
2622    that jumps attempting to cross section boundaries are really able
2623    to cover whatever distance the jump requires (on many architectures
2624    conditional or unconditional jumps are not able to reach all of
2625    memory).  Therefore tests have to be inserted into each such
2626    optimization to make sure that it does not undo stuff necessary to
2627    cross partition boundaries.  This would be much less of a problem
2628    if we could perform this optimization later in the compilation, but
2629    unfortunately the fact that we may need to create indirect jumps
2630    (through registers) requires that this optimization be performed
2631    before register allocation.
2632
2633    Hot and cold basic blocks are partitioned and put in separate
2634    sections of the .o file, to reduce paging and improve cache
2635    performance (hopefully).  This can result in bits of code from the
2636    same function being widely separated in the .o file.  However this
2637    is not obvious to the current bb structure.  Therefore we must take
2638    care to ensure that: 1). There are no fall_thru edges that cross
2639    between sections; 2). For those architectures which have "short"
2640    conditional branches, all conditional branches that attempt to
2641    cross between sections are converted to unconditional branches;
2642    and, 3). For those architectures which have "short" unconditional
2643    branches, all unconditional branches that attempt to cross between
2644    sections are converted to indirect jumps.
2645
2646    The code for fixing up fall_thru edges that cross between hot and
2647    cold basic blocks does so by creating new basic blocks containing
2648    unconditional branches to the appropriate label in the "other"
2649    section.  The new basic block is then put in the same (hot or cold)
2650    section as the original conditional branch, and the fall_thru edge
2651    is modified to fall into the new basic block instead.  By adding
2652    this level of indirection we end up with only unconditional branches
2653    crossing between hot and cold sections.
2654
2655    Conditional branches are dealt with by adding a level of indirection.
2656    A new basic block is added in the same (hot/cold) section as the
2657    conditional branch, and the conditional branch is retargeted to the
2658    new basic block.  The new basic block contains an unconditional branch
2659    to the original target of the conditional branch (in the other section).
2660
2661    Unconditional branches are dealt with by converting them into
2662    indirect jumps.  */
2663
2664 namespace {
2665
2666 const pass_data pass_data_partition_blocks =
2667 {
2668   RTL_PASS, /* type */
2669   "bbpart", /* name */
2670   OPTGROUP_NONE, /* optinfo_flags */
2671   TV_REORDER_BLOCKS, /* tv_id */
2672   PROP_cfglayout, /* properties_required */
2673   0, /* properties_provided */
2674   0, /* properties_destroyed */
2675   0, /* todo_flags_start */
2676   0, /* todo_flags_finish */
2677 };
2678
2679 class pass_partition_blocks : public rtl_opt_pass
2680 {
2681 public:
2682   pass_partition_blocks (gcc::context *ctxt)
2683     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2684   {}
2685
2686   /* opt_pass methods: */
2687   virtual bool gate (function *);
2688   virtual unsigned int execute (function *);
2689
2690 }; // class pass_partition_blocks
2691
2692 bool
2693 pass_partition_blocks::gate (function *fun)
2694 {
2695   /* The optimization to partition hot/cold basic blocks into separate
2696      sections of the .o file does not work well with linkonce or with
2697      user defined section attributes.  Don't call it if either case
2698      arises.  */
2699   return (flag_reorder_blocks_and_partition
2700           && optimize
2701           /* See gate_handle_reorder_blocks.  We should not partition if
2702              we are going to omit the reordering.  */
2703           && optimize_function_for_speed_p (fun)
2704           && !DECL_COMDAT_GROUP (current_function_decl)
2705           && !user_defined_section_attribute);
2706 }
2707
2708 unsigned
2709 pass_partition_blocks::execute (function *fun)
2710 {
2711   vec<edge> crossing_edges;
2712
2713   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2714     return 0;
2715
2716   df_set_flags (DF_DEFER_INSN_RESCAN);
2717
2718   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2719   if (!crossing_edges.exists ())
2720     return 0;
2721
2722   crtl->has_bb_partition = true;
2723
2724   /* Make sure the source of any crossing edge ends in a jump and the
2725      destination of any crossing edge has a label.  */
2726   add_labels_and_missing_jumps (crossing_edges);
2727
2728   /* Convert all crossing fall_thru edges to non-crossing fall
2729      thrus to unconditional jumps (that jump to the original fall
2730      through dest).  */
2731   fix_up_fall_thru_edges ();
2732
2733   /* If the architecture does not have conditional branches that can
2734      span all of memory, convert crossing conditional branches into
2735      crossing unconditional branches.  */
2736   if (!HAS_LONG_COND_BRANCH)
2737     fix_crossing_conditional_branches ();
2738
2739   /* If the architecture does not have unconditional branches that
2740      can span all of memory, convert crossing unconditional branches
2741      into indirect jumps.  Since adding an indirect jump also adds
2742      a new register usage, update the register usage information as
2743      well.  */
2744   if (!HAS_LONG_UNCOND_BRANCH)
2745     fix_crossing_unconditional_branches ();
2746
2747   update_crossing_jump_flags ();
2748
2749   /* Clear bb->aux fields that the above routines were using.  */
2750   clear_aux_for_blocks ();
2751
2752   crossing_edges.release ();
2753
2754   /* ??? FIXME: DF generates the bb info for a block immediately.
2755      And by immediately, I mean *during* creation of the block.
2756
2757         #0  df_bb_refs_collect
2758         #1  in df_bb_refs_record
2759         #2  in create_basic_block_structure
2760
2761      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2762      will *always* fail, because no edges can have been added to the
2763      block yet.  Which of course means we don't add the right
2764      artificial refs, which means we fail df_verify (much) later.
2765
2766      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2767      that we also shouldn't grab data from the new blocks those new
2768      insns are in either.  In this way one can create the block, link
2769      it up properly, and have everything Just Work later, when deferred
2770      insns are processed.
2771
2772      In the meantime, we have no other option but to throw away all
2773      of the DF data and recompute it all.  */
2774   if (fun->eh->lp_array)
2775     {
2776       df_finish_pass (true);
2777       df_scan_alloc (NULL);
2778       df_scan_blocks ();
2779       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2780          data.  We blindly generated all of them when creating the new
2781          landing pad.  Delete those assignments we don't use.  */
2782       df_set_flags (DF_LR_RUN_DCE);
2783       df_analyze ();
2784     }
2785
2786   return 0;
2787 }
2788
2789 } // anon namespace
2790
2791 rtl_opt_pass *
2792 make_pass_partition_blocks (gcc::context *ctxt)
2793 {
2794   return new pass_partition_blocks (ctxt);
2795 }