contrib/gcc-5.0/gcc/bb-reorder.c

   1 /* Basic block reordering routines for the GNU compiler.
   2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 /* This (greedy) algorithm constructs traces in several rounds.
  21    The construction starts from "seeds".  The seed for the first round
  22    is the entry point of the function.  When there are more than one seed,
  23    the one with the lowest key in the heap is selected first (see bb_to_key).
  24    Then the algorithm repeatedly adds the most probable successor to the end
  25    of a trace.  Finally it connects the traces.
  26
  27    There are two parameters: Branch Threshold and Exec Threshold.
  28    If the probability of an edge to a successor of the current basic block is
  29    lower than Branch Threshold or its frequency is lower than Exec Threshold,
  30    then the successor will be the seed in one of the next rounds.
  31    Each round has these parameters lower than the previous one.
  32    The last round has to have these parameters set to zero so that the
  33    remaining blocks are picked up.
  34
  35    The algorithm selects the most probable successor from all unvisited
  36    successors and successors that have been added to this trace.
  37    The other successors (that has not been "sent" to the next round) will be
  38    other seeds for this round and the secondary traces will start from them.
  39    If the successor has not been visited in this trace, it is added to the
  40    trace (however, there is some heuristic for simple branches).
  41    If the successor has been visited in this trace, a loop has been found.
  42    If the loop has many iterations, the loop is rotated so that the source
  43    block of the most probable edge going out of the loop is the last block
  44    of the trace.
  45    If the loop has few iterations and there is no edge from the last block of
  46    the loop going out of the loop, the loop header is duplicated.
  47
  48    When connecting traces, the algorithm first checks whether there is an edge
  49    from the last block of a trace to the first block of another trace.
  50    When there are still some unconnected traces it checks whether there exists
  51    a basic block BB such that BB is a successor of the last block of a trace
  52    and BB is a predecessor of the first block of another trace.  In this case,
  53    BB is duplicated, added at the end of the first trace and the traces are
  54    connected through it.
  55    The rest of traces are simply connected so there will be a jump to the
  56    beginning of the rest of traces.
  57
  58    The above description is for the full algorithm, which is used when the
  59    function is optimized for speed.  When the function is optimized for size,
  60    in order to reduce long jumps and connect more fallthru edges, the
  61    algorithm is modified as follows:
  62    (1) Break long traces to short ones.  A trace is broken at a block that has
  63    multiple predecessors/ successors during trace discovery.  When connecting
  64    traces, only connect Trace n with Trace n + 1.  This change reduces most
  65    long jumps compared with the above algorithm.
  66    (2) Ignore the edge probability and frequency for fallthru edges.
  67    (3) Keep the original order of blocks when there is no chance to fall
  68    through.  We rely on the results of cfg_cleanup.
  69
  70    To implement the change for code size optimization, block's index is
  71    selected as the key and all traces are found in one round.
  72
  73    References:
  74
  75    "Software Trace Cache"
  76    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
  77    http://citeseer.nj.nec.com/15361.html
  78
  79 */
  80
  81 #include "config.h"
  82 #include "system.h"
  83 #include "coretypes.h"
  84 #include "tm.h"
  85 #include "hash-set.h"
  86 #include "machmode.h"
  87 #include "vec.h"
  88 #include "double-int.h"
  89 #include "input.h"
  90 #include "alias.h"
  91 #include "symtab.h"
  92 #include "wide-int.h"
  93 #include "inchash.h"
  94 #include "tree.h"
  95 #include "rtl.h"
  96 #include "regs.h"
  97 #include "flags.h"
  98 #include "output.h"
  99 #include "target.h"
 100 #include "hashtab.h"
 101 #include "hard-reg-set.h"
 102 #include "function.h"
 103 #include "tm_p.h"
 104 #include "obstack.h"
 105 #include "statistics.h"
 106 #include "real.h"
 107 #include "fixed-value.h"
 108 #include "insn-config.h"
 109 #include "expmed.h"
 110 #include "dojump.h"
 111 #include "explow.h"
 112 #include "calls.h"
 113 #include "emit-rtl.h"
 114 #include "varasm.h"
 115 #include "stmt.h"
 116 #include "expr.h"
 117 #include "optabs.h"
 118 #include "params.h"
 119 #include "diagnostic-core.h"
 120 #include "toplev.h" /* user_defined_section_attribute */
 121 #include "tree-pass.h"
 122 #include "dominance.h"
 123 #include "cfg.h"
 124 #include "cfgrtl.h"
 125 #include "cfganal.h"
 126 #include "cfgbuild.h"
 127 #include "cfgcleanup.h"
 128 #include "predict.h"
 129 #include "basic-block.h"
 130 #include "df.h"
 131 #include "bb-reorder.h"
 132 #include "hash-map.h"
 133 #include "is-a.h"
 134 #include "plugin-api.h"
 135 #include "ipa-ref.h"
 136 #include "cgraph.h"
 137 #include "except.h"
 138 #include "fibonacci_heap.h"
 139
 140 /* The number of rounds.  In most cases there will only be 4 rounds, but
 141    when partitioning hot and cold basic blocks into separate sections of
 142    the object file there will be an extra round.  */
 143 #define N_ROUNDS 5
 144
 145 /* Stubs in case we don't have a return insn.
 146    We have to check at run time too, not only compile time.  */
 147
 148 #ifndef HAVE_return
 149 #define HAVE_return 0
 150 #define gen_return() NULL_RTX
 151 #endif
 152
 153
 154 struct target_bb_reorder default_target_bb_reorder;
 155 #if SWITCHABLE_TARGET
 156 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
 157 #endif
 158
 159 #define uncond_jump_length \
 160   (this_target_bb_reorder->x_uncond_jump_length)
 161
 162 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 163 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 164
 165 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 166 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 167
 168 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
 169    block the edge destination is not duplicated while connecting traces.  */
 170 #define DUPLICATION_THRESHOLD 100
 171
 172 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
 173 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
 174
 175 /* Structure to hold needed information for each basic block.  */
 176 typedef struct bbro_basic_block_data_def
 177 {
 178   /* Which trace is the bb start of (-1 means it is not a start of any).  */
 179   int start_of_trace;
 180
 181   /* Which trace is the bb end of (-1 means it is not an end of any).  */
 182   int end_of_trace;
 183
 184   /* Which trace is the bb in?  */
 185   int in_trace;
 186
 187   /* Which trace was this bb visited in?  */
 188   int visited;
 189
 190   /* Cached maximum frequency of interesting incoming edges.
 191      Minus one means not yet computed.  */
 192   int priority;
 193
 194   /* Which heap is BB in (if any)?  */
 195   bb_heap_t *heap;
 196
 197   /* Which heap node is BB in (if any)?  */
 198   bb_heap_node_t *node;
 199 } bbro_basic_block_data;
 200
 201 /* The current size of the following dynamic array.  */
 202 static int array_size;
 203
 204 /* The array which holds needed information for basic blocks.  */
 205 static bbro_basic_block_data *bbd;
 206
 207 /* To avoid frequent reallocation the size of arrays is greater than needed,
 208    the number of elements is (not less than) 1.25 * size_wanted.  */
 209 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 210
 211 /* Free the memory and set the pointer to NULL.  */
 212 #define FREE(P) (gcc_assert (P), free (P), P = 0)
 213
 214 /* Structure for holding information about a trace.  */
 215 struct trace
 216 {
 217   /* First and last basic block of the trace.  */
 218   basic_block first, last;
 219
 220   /* The round of the STC creation which this trace was found in.  */
 221   int round;
 222
 223   /* The length (i.e. the number of basic blocks) of the trace.  */
 224   int length;
 225 };
 226
 227 /* Maximum frequency and count of one of the entry blocks.  */
 228 static int max_entry_frequency;
 229 static gcov_type max_entry_count;
 230
 231 /* Local function prototypes.  */
 232 static void find_traces (int *, struct trace *);
 233 static basic_block rotate_loop (edge, struct trace *, int);
 234 static void mark_bb_visited (basic_block, int);
 235 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
 236                                  int, bb_heap_t **, int);
 237 static basic_block copy_bb (basic_block, edge, basic_block, int);
 238 static long bb_to_key (basic_block);
 239 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
 240                            const_edge);
 241 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
 242                                    struct trace *);
 243 static void connect_traces (int, struct trace *);
 244 static bool copy_bb_p (const_basic_block, int);
 245 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
 246 \f
 247 /* Return the trace number in which BB was visited.  */
 248
 249 static int
 250 bb_visited_trace (const_basic_block bb)
 251 {
 252   gcc_assert (bb->index < array_size);
 253   return bbd[bb->index].visited;
 254 }
 255
 256 /* This function marks BB that it was visited in trace number TRACE.  */
 257
 258 static void
 259 mark_bb_visited (basic_block bb, int trace)
 260 {
 261   bbd[bb->index].visited = trace;
 262   if (bbd[bb->index].heap)
 263     {
 264       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
 265       bbd[bb->index].heap = NULL;
 266       bbd[bb->index].node = NULL;
 267     }
 268 }
 269
 270 /* Check to see if bb should be pushed into the next round of trace
 271    collections or not.  Reasons for pushing the block forward are 1).
 272    If the block is cold, we are doing partitioning, and there will be
 273    another round (cold partition blocks are not supposed to be
 274    collected into traces until the very last round); or 2). There will
 275    be another round, and the basic block is not "hot enough" for the
 276    current round of trace collection.  */
 277
 278 static bool
 279 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
 280                       int exec_th, gcov_type count_th)
 281 {
 282   bool there_exists_another_round;
 283   bool block_not_hot_enough;
 284
 285   there_exists_another_round = round < number_of_rounds - 1;
 286
 287   block_not_hot_enough = (bb->frequency < exec_th
 288                           || bb->count < count_th
 289                           || probably_never_executed_bb_p (cfun, bb));
 290
 291   if (there_exists_another_round
 292       && block_not_hot_enough)
 293     return true;
 294   else
 295     return false;
 296 }
 297
 298 /* Find the traces for Software Trace Cache.  Chain each trace through
 299    RBI()->next.  Store the number of traces to N_TRACES and description of
 300    traces to TRACES.  */
 301
 302 static void
 303 find_traces (int *n_traces, struct trace *traces)
 304 {
 305   int i;
 306   int number_of_rounds;
 307   edge e;
 308   edge_iterator ei;
 309   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
 310
 311   /* Add one extra round of trace collection when partitioning hot/cold
 312      basic blocks into separate sections.  The last round is for all the
 313      cold blocks (and ONLY the cold blocks).  */
 314
 315   number_of_rounds = N_ROUNDS - 1;
 316
 317   /* Insert entry points of function into heap.  */
 318   max_entry_frequency = 0;
 319   max_entry_count = 0;
 320   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
 321     {
 322       bbd[e->dest->index].heap = heap;
 323       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
 324       if (e->dest->frequency > max_entry_frequency)
 325         max_entry_frequency = e->dest->frequency;
 326       if (e->dest->count > max_entry_count)
 327         max_entry_count = e->dest->count;
 328     }
 329
 330   /* Find the traces.  */
 331   for (i = 0; i < number_of_rounds; i++)
 332     {
 333       gcov_type count_threshold;
 334
 335       if (dump_file)
 336         fprintf (dump_file, "STC - round %d\n", i + 1);
 337
 338       if (max_entry_count < INT_MAX / 1000)
 339         count_threshold = max_entry_count * exec_threshold[i] / 1000;
 340       else
 341         count_threshold = max_entry_count / 1000 * exec_threshold[i];
 342
 343       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
 344                            max_entry_frequency * exec_threshold[i] / 1000,
 345                            count_threshold, traces, n_traces, i, &heap,
 346                            number_of_rounds);
 347     }
 348   delete heap;
 349
 350   if (dump_file)
 351     {
 352       for (i = 0; i < *n_traces; i++)
 353         {
 354           basic_block bb;
 355           fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
 356                    traces[i].round + 1);
 357           for (bb = traces[i].first;
 358                bb != traces[i].last;
 359                bb = (basic_block) bb->aux)
 360             fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
 361           fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
 362         }
 363       fflush (dump_file);
 364     }
 365 }
 366
 367 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
 368    (with sequential number TRACE_N).  */
 369
 370 static basic_block
 371 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 372 {
 373   basic_block bb;
 374
 375   /* Information about the best end (end after rotation) of the loop.  */
 376   basic_block best_bb = NULL;
 377   edge best_edge = NULL;
 378   int best_freq = -1;
 379   gcov_type best_count = -1;
 380   /* The best edge is preferred when its destination is not visited yet
 381      or is a start block of some trace.  */
 382   bool is_preferred = false;
 383
 384   /* Find the most frequent edge that goes out from current trace.  */
 385   bb = back_edge->dest;
 386   do
 387     {
 388       edge e;
 389       edge_iterator ei;
 390
 391       FOR_EACH_EDGE (e, ei, bb->succs)
 392         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
 393             && bb_visited_trace (e->dest) != trace_n
 394             && (e->flags & EDGE_CAN_FALLTHRU)
 395             && !(e->flags & EDGE_COMPLEX))
 396         {
 397           if (is_preferred)
 398             {
 399               /* The best edge is preferred.  */
 400               if (!bb_visited_trace (e->dest)
 401                   || bbd[e->dest->index].start_of_trace >= 0)
 402                 {
 403                   /* The current edge E is also preferred.  */
 404                   int freq = EDGE_FREQUENCY (e);
 405                   if (freq > best_freq || e->count > best_count)
 406                     {
 407                       best_freq = freq;
 408                       best_count = e->count;
 409                       best_edge = e;
 410                       best_bb = bb;
 411                     }
 412                 }
 413             }
 414           else
 415             {
 416               if (!bb_visited_trace (e->dest)
 417                   || bbd[e->dest->index].start_of_trace >= 0)
 418                 {
 419                   /* The current edge E is preferred.  */
 420                   is_preferred = true;
 421                   best_freq = EDGE_FREQUENCY (e);
 422                   best_count = e->count;
 423                   best_edge = e;
 424                   best_bb = bb;
 425                 }
 426               else
 427                 {
 428                   int freq = EDGE_FREQUENCY (e);
 429                   if (!best_edge || freq > best_freq || e->count > best_count)
 430                     {
 431                       best_freq = freq;
 432                       best_count = e->count;
 433                       best_edge = e;
 434                       best_bb = bb;
 435                     }
 436                 }
 437             }
 438         }
 439       bb = (basic_block) bb->aux;
 440     }
 441   while (bb != back_edge->dest);
 442
 443   if (best_bb)
 444     {
 445       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
 446          the trace.  */
 447       if (back_edge->dest == trace->first)
 448         {
 449           trace->first = (basic_block) best_bb->aux;
 450         }
 451       else
 452         {
 453           basic_block prev_bb;
 454
 455           for (prev_bb = trace->first;
 456                prev_bb->aux != back_edge->dest;
 457                prev_bb = (basic_block) prev_bb->aux)
 458             ;
 459           prev_bb->aux = best_bb->aux;
 460
 461           /* Try to get rid of uncond jump to cond jump.  */
 462           if (single_succ_p (prev_bb))
 463             {
 464               basic_block header = single_succ (prev_bb);
 465
 466               /* Duplicate HEADER if it is a small block containing cond jump
 467                  in the end.  */
 468               if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
 469                   && !CROSSING_JUMP_P (BB_END (header)))
 470                 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
 471             }
 472         }
 473     }
 474   else
 475     {
 476       /* We have not found suitable loop tail so do no rotation.  */
 477       best_bb = back_edge->src;
 478     }
 479   best_bb->aux = NULL;
 480   return best_bb;
 481 }
 482
 483 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
 484    not include basic blocks whose probability is lower than BRANCH_TH or whose
 485    frequency is lower than EXEC_TH into traces (or whose count is lower than
 486    COUNT_TH).  Store the new traces into TRACES and modify the number of
 487    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
 488    The function expects starting basic blocks to be in *HEAP and will delete
 489    *HEAP and store starting points for the next round into new *HEAP.  */
 490
 491 static void
 492 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 493                      struct trace *traces, int *n_traces, int round,
 494                      bb_heap_t **heap, int number_of_rounds)
 495 {
 496   /* Heap for discarded basic blocks which are possible starting points for
 497      the next round.  */
 498   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
 499   bool for_size = optimize_function_for_size_p (cfun);
 500
 501   while (!(*heap)->empty ())
 502     {
 503       basic_block bb;
 504       struct trace *trace;
 505       edge best_edge, e;
 506       long key;
 507       edge_iterator ei;
 508
 509       bb = (*heap)->extract_min ();
 510       bbd[bb->index].heap = NULL;
 511       bbd[bb->index].node = NULL;
 512
 513       if (dump_file)
 514         fprintf (dump_file, "Getting bb %d\n", bb->index);
 515
 516       /* If the BB's frequency is too low, send BB to the next round.  When
 517          partitioning hot/cold blocks into separate sections, make sure all
 518          the cold blocks (and ONLY the cold blocks) go into the (extra) final
 519          round.  When optimizing for size, do not push to next round.  */
 520
 521       if (!for_size
 522           && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
 523                                    count_th))
 524         {
 525           int key = bb_to_key (bb);
 526           bbd[bb->index].heap = new_heap;
 527           bbd[bb->index].node = new_heap->insert (key, bb);
 528
 529           if (dump_file)
 530             fprintf (dump_file,
 531                      "  Possible start point of next round: %d (key: %d)\n",
 532                      bb->index, key);
 533           continue;
 534         }
 535
 536       trace = traces + *n_traces;
 537       trace->first = bb;
 538       trace->round = round;
 539       trace->length = 0;
 540       bbd[bb->index].in_trace = *n_traces;
 541       (*n_traces)++;
 542
 543       do
 544         {
 545           int prob, freq;
 546           bool ends_in_call;
 547
 548           /* The probability and frequency of the best edge.  */
 549           int best_prob = INT_MIN / 2;
 550           int best_freq = INT_MIN / 2;
 551
 552           best_edge = NULL;
 553           mark_bb_visited (bb, *n_traces);
 554           trace->length++;
 555
 556           if (dump_file)
 557             fprintf (dump_file, "Basic block %d was visited in trace %d\n",
 558                      bb->index, *n_traces - 1);
 559
 560           ends_in_call = block_ends_with_call_p (bb);
 561
 562           /* Select the successor that will be placed after BB.  */
 563           FOR_EACH_EDGE (e, ei, bb->succs)
 564             {
 565               gcc_assert (!(e->flags & EDGE_FAKE));
 566
 567               if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 568                 continue;
 569
 570               if (bb_visited_trace (e->dest)
 571                   && bb_visited_trace (e->dest) != *n_traces)
 572                 continue;
 573
 574               if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
 575                 continue;
 576
 577               prob = e->probability;
 578               freq = e->dest->frequency;
 579
 580               /* The only sensible preference for a call instruction is the
 581                  fallthru edge.  Don't bother selecting anything else.  */
 582               if (ends_in_call)
 583                 {
 584                   if (e->flags & EDGE_CAN_FALLTHRU)
 585                     {
 586                       best_edge = e;
 587                       best_prob = prob;
 588                       best_freq = freq;
 589                     }
 590                   continue;
 591                 }
 592
 593               /* Edge that cannot be fallthru or improbable or infrequent
 594                  successor (i.e. it is unsuitable successor).  When optimizing
 595                  for size, ignore the probability and frequency.  */
 596               if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
 597                   || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
 598                       || e->count < count_th) && (!for_size)))
 599                 continue;
 600
 601               /* If partitioning hot/cold basic blocks, don't consider edges
 602                  that cross section boundaries.  */
 603
 604               if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
 605                                  best_edge))
 606                 {
 607                   best_edge = e;
 608                   best_prob = prob;
 609                   best_freq = freq;
 610                 }
 611             }
 612
 613           /* If the best destination has multiple predecessors, and can be
 614              duplicated cheaper than a jump, don't allow it to be added
 615              to a trace.  We'll duplicate it when connecting traces.  */
 616           if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
 617               && copy_bb_p (best_edge->dest, 0))
 618             best_edge = NULL;
 619
 620           /* If the best destination has multiple successors or predecessors,
 621              don't allow it to be added when optimizing for size.  This makes
 622              sure predecessors with smaller index are handled before the best
 623              destinarion.  It breaks long trace and reduces long jumps.
 624
 625              Take if-then-else as an example.
 626                 A
 627                / \
 628               B   C
 629                \ /
 630                 D
 631              If we do not remove the best edge B->D/C->D, the final order might
 632              be A B D ... C.  C is at the end of the program.  If D's successors
 633              and D are complicated, might need long jumps for A->C and C->D.
 634              Similar issue for order: A C D ... B.
 635
 636              After removing the best edge, the final result will be ABCD/ ACBD.
 637              It does not add jump compared with the previous order.  But it
 638              reduces the possibility of long jumps.  */
 639           if (best_edge && for_size
 640               && (EDGE_COUNT (best_edge->dest->succs) > 1
 641                  || EDGE_COUNT (best_edge->dest->preds) > 1))
 642             best_edge = NULL;
 643
 644           /* Add all non-selected successors to the heaps.  */
 645           FOR_EACH_EDGE (e, ei, bb->succs)
 646             {
 647               if (e == best_edge
 648                   || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 649                   || bb_visited_trace (e->dest))
 650                 continue;
 651
 652               key = bb_to_key (e->dest);
 653
 654               if (bbd[e->dest->index].heap)
 655                 {
 656                   /* E->DEST is already in some heap.  */
 657                   if (key != bbd[e->dest->index].node->get_key ())
 658                     {
 659                       if (dump_file)
 660                         {
 661                           fprintf (dump_file,
 662                                    "Changing key for bb %d from %ld to %ld.\n",
 663                                    e->dest->index,
 664                                    (long) bbd[e->dest->index].node->get_key (),
 665                                    key);
 666                         }
 667                       bbd[e->dest->index].heap->replace_key
 668                         (bbd[e->dest->index].node, key);
 669                     }
 670                 }
 671               else
 672                 {
 673                   bb_heap_t *which_heap = *heap;
 674
 675                   prob = e->probability;
 676                   freq = EDGE_FREQUENCY (e);
 677
 678                   if (!(e->flags & EDGE_CAN_FALLTHRU)
 679                       || (e->flags & EDGE_COMPLEX)
 680                       || prob < branch_th || freq < exec_th
 681                       || e->count < count_th)
 682                     {
 683                       /* When partitioning hot/cold basic blocks, make sure
 684                          the cold blocks (and only the cold blocks) all get
 685                          pushed to the last round of trace collection.  When
 686                          optimizing for size, do not push to next round.  */
 687
 688                       if (!for_size && push_to_next_round_p (e->dest, round,
 689                                                              number_of_rounds,
 690                                                              exec_th, count_th))
 691                         which_heap = new_heap;
 692                     }
 693
 694                   bbd[e->dest->index].heap = which_heap;
 695                   bbd[e->dest->index].node = which_heap->insert (key, e->dest);
 696
 697                   if (dump_file)
 698                     {
 699                       fprintf (dump_file,
 700                                "  Possible start of %s round: %d (key: %ld)\n",
 701                                (which_heap == new_heap) ? "next" : "this",
 702                                e->dest->index, (long) key);
 703                     }
 704
 705                 }
 706             }
 707
 708           if (best_edge) /* Suitable successor was found.  */
 709             {
 710               if (bb_visited_trace (best_edge->dest) == *n_traces)
 711                 {
 712                   /* We do nothing with one basic block loops.  */
 713                   if (best_edge->dest != bb)
 714                     {
 715                       if (EDGE_FREQUENCY (best_edge)
 716                           > 4 * best_edge->dest->frequency / 5)
 717                         {
 718                           /* The loop has at least 4 iterations.  If the loop
 719                              header is not the first block of the function
 720                              we can rotate the loop.  */
 721
 722                           if (best_edge->dest
 723                               != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
 724                             {
 725                               if (dump_file)
 726                                 {
 727                                   fprintf (dump_file,
 728                                            "Rotating loop %d - %d\n",
 729                                            best_edge->dest->index, bb->index);
 730                                 }
 731                               bb->aux = best_edge->dest;
 732                               bbd[best_edge->dest->index].in_trace =
 733                                                              (*n_traces) - 1;
 734                               bb = rotate_loop (best_edge, trace, *n_traces);
 735                             }
 736                         }
 737                       else
 738                         {
 739                           /* The loop has less than 4 iterations.  */
 740
 741                           if (single_succ_p (bb)
 742                               && copy_bb_p (best_edge->dest,
 743                                             optimize_edge_for_speed_p
 744                                             (best_edge)))
 745                             {
 746                               bb = copy_bb (best_edge->dest, best_edge, bb,
 747                                             *n_traces);
 748                               trace->length++;
 749                             }
 750                         }
 751                     }
 752
 753                   /* Terminate the trace.  */
 754                   break;
 755                 }
 756               else
 757                 {
 758                   /* Check for a situation
 759
 760                     A
 761                    /|
 762                   B |
 763                    \|
 764                     C
 765
 766                   where
 767                   EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
 768                     >= EDGE_FREQUENCY (AC).
 769                   (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
 770                   Best ordering is then A B C.
 771
 772                   When optimizing for size, A B C is always the best order.
 773
 774                   This situation is created for example by:
 775
 776                   if (A) B;
 777                   C;
 778
 779                   */
 780
 781                   FOR_EACH_EDGE (e, ei, bb->succs)
 782                     if (e != best_edge
 783                         && (e->flags & EDGE_CAN_FALLTHRU)
 784                         && !(e->flags & EDGE_COMPLEX)
 785                         && !bb_visited_trace (e->dest)
 786                         && single_pred_p (e->dest)
 787                         && !(e->flags & EDGE_CROSSING)
 788                         && single_succ_p (e->dest)
 789                         && (single_succ_edge (e->dest)->flags
 790                             & EDGE_CAN_FALLTHRU)
 791                         && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
 792                         && single_succ (e->dest) == best_edge->dest
 793                         && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
 794                             || for_size))
 795                       {
 796                         best_edge = e;
 797                         if (dump_file)
 798                           fprintf (dump_file, "Selecting BB %d\n",
 799                                    best_edge->dest->index);
 800                         break;
 801                       }
 802
 803                   bb->aux = best_edge->dest;
 804                   bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
 805                   bb = best_edge->dest;
 806                 }
 807             }
 808         }
 809       while (best_edge);
 810       trace->last = bb;
 811       bbd[trace->first->index].start_of_trace = *n_traces - 1;
 812       if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
 813         {
 814           bbd[trace->last->index].end_of_trace = *n_traces - 1;
 815           /* Update the cached maximum frequency for interesting predecessor
 816              edges for successors of the new trace end.  */
 817           FOR_EACH_EDGE (e, ei, trace->last->succs)
 818             if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
 819               bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
 820         }
 821
 822       /* The trace is terminated so we have to recount the keys in heap
 823          (some block can have a lower key because now one of its predecessors
 824          is an end of the trace).  */
 825       FOR_EACH_EDGE (e, ei, bb->succs)
 826         {
 827           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
 828               || bb_visited_trace (e->dest))
 829             continue;
 830
 831           if (bbd[e->dest->index].heap)
 832             {
 833               key = bb_to_key (e->dest);
 834               if (key != bbd[e->dest->index].node->get_key ())
 835                 {
 836                   if (dump_file)
 837                     {
 838                       fprintf (dump_file,
 839                                "Changing key for bb %d from %ld to %ld.\n",
 840                                e->dest->index,
 841                                (long) bbd[e->dest->index].node->get_key (), key);
 842                     }
 843                   bbd[e->dest->index].heap->replace_key
 844                     (bbd[e->dest->index].node, key);
 845                 }
 846             }
 847         }
 848     }
 849
 850   delete (*heap);
 851
 852   /* "Return" the new heap.  */
 853   *heap = new_heap;
 854 }
 855
 856 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
 857    it to trace after BB, mark OLD_BB visited and update pass' data structures
 858    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 859
 860 static basic_block
 861 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 862 {
 863   basic_block new_bb;
 864
 865   new_bb = duplicate_block (old_bb, e, bb);
 866   BB_COPY_PARTITION (new_bb, old_bb);
 867
 868   gcc_assert (e->dest == new_bb);
 869
 870   if (dump_file)
 871     fprintf (dump_file,
 872              "Duplicated bb %d (created bb %d)\n",
 873              old_bb->index, new_bb->index);
 874
 875   if (new_bb->index >= array_size
 876       || last_basic_block_for_fn (cfun) > array_size)
 877     {
 878       int i;
 879       int new_size;
 880
 881       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
 882       new_size = GET_ARRAY_SIZE (new_size);
 883       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
 884       for (i = array_size; i < new_size; i++)
 885         {
 886           bbd[i].start_of_trace = -1;
 887           bbd[i].end_of_trace = -1;
 888           bbd[i].in_trace = -1;
 889           bbd[i].visited = 0;
 890           bbd[i].priority = -1;
 891           bbd[i].heap = NULL;
 892           bbd[i].node = NULL;
 893         }
 894       array_size = new_size;
 895
 896       if (dump_file)
 897         {
 898           fprintf (dump_file,
 899                    "Growing the dynamic array to %d elements.\n",
 900                    array_size);
 901         }
 902     }
 903
 904   gcc_assert (!bb_visited_trace (e->dest));
 905   mark_bb_visited (new_bb, trace);
 906   new_bb->aux = bb->aux;
 907   bb->aux = new_bb;
 908
 909   bbd[new_bb->index].in_trace = trace;
 910
 911   return new_bb;
 912 }
 913
 914 /* Compute and return the key (for the heap) of the basic block BB.  */
 915
 916 static long
 917 bb_to_key (basic_block bb)
 918 {
 919   edge e;
 920   edge_iterator ei;
 921
 922   /* Use index as key to align with its original order.  */
 923   if (optimize_function_for_size_p (cfun))
 924     return bb->index;
 925
 926   /* Do not start in probably never executed blocks.  */
 927
 928   if (BB_PARTITION (bb) == BB_COLD_PARTITION
 929       || probably_never_executed_bb_p (cfun, bb))
 930     return BB_FREQ_MAX;
 931
 932   /* Prefer blocks whose predecessor is an end of some trace
 933      or whose predecessor edge is EDGE_DFS_BACK.  */
 934   int priority = bbd[bb->index].priority;
 935   if (priority == -1)
 936     {
 937       priority = 0;
 938       FOR_EACH_EDGE (e, ei, bb->preds)
 939         {
 940           if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
 941                && bbd[e->src->index].end_of_trace >= 0)
 942               || (e->flags & EDGE_DFS_BACK))
 943             {
 944               int edge_freq = EDGE_FREQUENCY (e);
 945
 946               if (edge_freq > priority)
 947                 priority = edge_freq;
 948             }
 949         }
 950       bbd[bb->index].priority = priority;
 951     }
 952
 953   if (priority)
 954     /* The block with priority should have significantly lower key.  */
 955     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
 956
 957   return -bb->frequency;
 958 }
 959
 960 /* Return true when the edge E from basic block BB is better than the temporary
 961    best edge (details are in function).  The probability of edge E is PROB. The
 962    frequency of the successor is FREQ.  The current best probability is
 963    BEST_PROB, the best frequency is BEST_FREQ.
 964    The edge is considered to be equivalent when PROB does not differ much from
 965    BEST_PROB; similarly for frequency.  */
 966
 967 static bool
 968 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
 969                int best_prob, int best_freq, const_edge cur_best_edge)
 970 {
 971   bool is_better_edge;
 972
 973   /* The BEST_* values do not have to be best, but can be a bit smaller than
 974      maximum values.  */
 975   int diff_prob = best_prob / 10;
 976   int diff_freq = best_freq / 10;
 977
 978   /* The smaller one is better to keep the original order.  */
 979   if (optimize_function_for_size_p (cfun))
 980     return !cur_best_edge
 981            || cur_best_edge->dest->index > e->dest->index;
 982
 983   if (prob > best_prob + diff_prob)
 984     /* The edge has higher probability than the temporary best edge.  */
 985     is_better_edge = true;
 986   else if (prob < best_prob - diff_prob)
 987     /* The edge has lower probability than the temporary best edge.  */
 988     is_better_edge = false;
 989   else if (freq < best_freq - diff_freq)
 990     /* The edge and the temporary best edge  have almost equivalent
 991        probabilities.  The higher frequency of a successor now means
 992        that there is another edge going into that successor.
 993        This successor has lower frequency so it is better.  */
 994     is_better_edge = true;
 995   else if (freq > best_freq + diff_freq)
 996     /* This successor has higher frequency so it is worse.  */
 997     is_better_edge = false;
 998   else if (e->dest->prev_bb == bb)
 999     /* The edges have equivalent probabilities and the successors
1000        have equivalent frequencies.  Select the previous successor.  */
1001     is_better_edge = true;
1002   else
1003     is_better_edge = false;
1004
1005   /* If we are doing hot/cold partitioning, make sure that we always favor
1006      non-crossing edges over crossing edges.  */
1007
1008   if (!is_better_edge
1009       && flag_reorder_blocks_and_partition
1010       && cur_best_edge
1011       && (cur_best_edge->flags & EDGE_CROSSING)
1012       && !(e->flags & EDGE_CROSSING))
1013     is_better_edge = true;
1014
1015   return is_better_edge;
1016 }
1017
1018 /* Return true when the edge E is better than the temporary best edge
1019    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
1020    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1021    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1022    TRACES record the information about traces.
1023    When optimizing for size, the edge with smaller index is better.
1024    When optimizing for speed, the edge with bigger probability or longer trace
1025    is better.  */
1026
1027 static bool
1028 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1029                        const_edge cur_best_edge, struct trace *traces)
1030 {
1031   int e_index;
1032   int b_index;
1033   bool is_better_edge;
1034
1035   if (!cur_best_edge)
1036     return true;
1037
1038   if (optimize_function_for_size_p (cfun))
1039     {
1040       e_index = src_index_p ? e->src->index : e->dest->index;
1041       b_index = src_index_p ? cur_best_edge->src->index
1042                               : cur_best_edge->dest->index;
1043       /* The smaller one is better to keep the original order.  */
1044       return b_index > e_index;
1045     }
1046
1047   if (src_index_p)
1048     {
1049       e_index = e->src->index;
1050
1051       if (e->probability > cur_best_edge->probability)
1052         /* The edge has higher probability than the temporary best edge.  */
1053         is_better_edge = true;
1054       else if (e->probability < cur_best_edge->probability)
1055         /* The edge has lower probability than the temporary best edge.  */
1056         is_better_edge = false;
1057       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1058         /* The edge and the temporary best edge have equivalent probabilities.
1059            The edge with longer trace is better.  */
1060         is_better_edge = true;
1061       else
1062         is_better_edge = false;
1063     }
1064   else
1065     {
1066       e_index = e->dest->index;
1067
1068       if (e->probability > cur_best_edge->probability)
1069         /* The edge has higher probability than the temporary best edge.  */
1070         is_better_edge = true;
1071       else if (e->probability < cur_best_edge->probability)
1072         /* The edge has lower probability than the temporary best edge.  */
1073         is_better_edge = false;
1074       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1075         /* The edge and the temporary best edge have equivalent probabilities.
1076            The edge with longer trace is better.  */
1077         is_better_edge = true;
1078       else
1079         is_better_edge = false;
1080     }
1081
1082   return is_better_edge;
1083 }
1084
1085 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1086
1087 static void
1088 connect_traces (int n_traces, struct trace *traces)
1089 {
1090   int i;
1091   bool *connected;
1092   bool two_passes;
1093   int last_trace;
1094   int current_pass;
1095   int current_partition;
1096   int freq_threshold;
1097   gcov_type count_threshold;
1098   bool for_size = optimize_function_for_size_p (cfun);
1099
1100   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1101   if (max_entry_count < INT_MAX / 1000)
1102     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1103   else
1104     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1105
1106   connected = XCNEWVEC (bool, n_traces);
1107   last_trace = -1;
1108   current_pass = 1;
1109   current_partition = BB_PARTITION (traces[0].first);
1110   two_passes = false;
1111
1112   if (crtl->has_bb_partition)
1113     for (i = 0; i < n_traces && !two_passes; i++)
1114       if (BB_PARTITION (traces[0].first)
1115           != BB_PARTITION (traces[i].first))
1116         two_passes = true;
1117
1118   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1119     {
1120       int t = i;
1121       int t2;
1122       edge e, best;
1123       int best_len;
1124
1125       if (i >= n_traces)
1126         {
1127           gcc_assert (two_passes && current_pass == 1);
1128           i = 0;
1129           t = i;
1130           current_pass = 2;
1131           if (current_partition == BB_HOT_PARTITION)
1132             current_partition = BB_COLD_PARTITION;
1133           else
1134             current_partition = BB_HOT_PARTITION;
1135         }
1136
1137       if (connected[t])
1138         continue;
1139
1140       if (two_passes
1141           && BB_PARTITION (traces[t].first) != current_partition)
1142         continue;
1143
1144       connected[t] = true;
1145
1146       /* Find the predecessor traces.  */
1147       for (t2 = t; t2 > 0;)
1148         {
1149           edge_iterator ei;
1150           best = NULL;
1151           best_len = 0;
1152           FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1153             {
1154               int si = e->src->index;
1155
1156               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1157                   && (e->flags & EDGE_CAN_FALLTHRU)
1158                   && !(e->flags & EDGE_COMPLEX)
1159                   && bbd[si].end_of_trace >= 0
1160                   && !connected[bbd[si].end_of_trace]
1161                   && (BB_PARTITION (e->src) == current_partition)
1162                   && connect_better_edge_p (e, true, best_len, best, traces))
1163                 {
1164                   best = e;
1165                   best_len = traces[bbd[si].end_of_trace].length;
1166                 }
1167             }
1168           if (best)
1169             {
1170               best->src->aux = best->dest;
1171               t2 = bbd[best->src->index].end_of_trace;
1172               connected[t2] = true;
1173
1174               if (dump_file)
1175                 {
1176                   fprintf (dump_file, "Connection: %d %d\n",
1177                            best->src->index, best->dest->index);
1178                 }
1179             }
1180           else
1181             break;
1182         }
1183
1184       if (last_trace >= 0)
1185         traces[last_trace].last->aux = traces[t2].first;
1186       last_trace = t;
1187
1188       /* Find the successor traces.  */
1189       while (1)
1190         {
1191           /* Find the continuation of the chain.  */
1192           edge_iterator ei;
1193           best = NULL;
1194           best_len = 0;
1195           FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1196             {
1197               int di = e->dest->index;
1198
1199               if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1200                   && (e->flags & EDGE_CAN_FALLTHRU)
1201                   && !(e->flags & EDGE_COMPLEX)
1202                   && bbd[di].start_of_trace >= 0
1203                   && !connected[bbd[di].start_of_trace]
1204                   && (BB_PARTITION (e->dest) == current_partition)
1205                   && connect_better_edge_p (e, false, best_len, best, traces))
1206                 {
1207                   best = e;
1208                   best_len = traces[bbd[di].start_of_trace].length;
1209                 }
1210             }
1211
1212           if (for_size)
1213             {
1214               if (!best)
1215                 /* Stop finding the successor traces.  */
1216                 break;
1217
1218               /* It is OK to connect block n with block n + 1 or a block
1219                  before n.  For others, only connect to the loop header.  */
1220               if (best->dest->index > (traces[t].last->index + 1))
1221                 {
1222                   int count = EDGE_COUNT (best->dest->preds);
1223
1224                   FOR_EACH_EDGE (e, ei, best->dest->preds)
1225                     if (e->flags & EDGE_DFS_BACK)
1226                       count--;
1227
1228                   /* If dest has multiple predecessors, skip it.  We expect
1229                      that one predecessor with smaller index connects with it
1230                      later.  */
1231                   if (count != 1)
1232                     break;
1233                 }
1234
1235               /* Only connect Trace n with Trace n + 1.  It is conservative
1236                  to keep the order as close as possible to the original order.
1237                  It also helps to reduce long jumps.  */
1238               if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1239                 break;
1240
1241               if (dump_file)
1242                 fprintf (dump_file, "Connection: %d %d\n",
1243                          best->src->index, best->dest->index);
1244
1245               t = bbd[best->dest->index].start_of_trace;
1246               traces[last_trace].last->aux = traces[t].first;
1247               connected[t] = true;
1248               last_trace = t;
1249             }
1250           else if (best)
1251             {
1252               if (dump_file)
1253                 {
1254                   fprintf (dump_file, "Connection: %d %d\n",
1255                            best->src->index, best->dest->index);
1256                 }
1257               t = bbd[best->dest->index].start_of_trace;
1258               traces[last_trace].last->aux = traces[t].first;
1259               connected[t] = true;
1260               last_trace = t;
1261             }
1262           else
1263             {
1264               /* Try to connect the traces by duplication of 1 block.  */
1265               edge e2;
1266               basic_block next_bb = NULL;
1267               bool try_copy = false;
1268
1269               FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1270                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1271                     && (e->flags & EDGE_CAN_FALLTHRU)
1272                     && !(e->flags & EDGE_COMPLEX)
1273                     && (!best || e->probability > best->probability))
1274                   {
1275                     edge_iterator ei;
1276                     edge best2 = NULL;
1277                     int best2_len = 0;
1278
1279                     /* If the destination is a start of a trace which is only
1280                        one block long, then no need to search the successor
1281                        blocks of the trace.  Accept it.  */
1282                     if (bbd[e->dest->index].start_of_trace >= 0
1283                         && traces[bbd[e->dest->index].start_of_trace].length
1284                            == 1)
1285                       {
1286                         best = e;
1287                         try_copy = true;
1288                         continue;
1289                       }
1290
1291                     FOR_EACH_EDGE (e2, ei, e->dest->succs)
1292                       {
1293                         int di = e2->dest->index;
1294
1295                         if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1296                             || ((e2->flags & EDGE_CAN_FALLTHRU)
1297                                 && !(e2->flags & EDGE_COMPLEX)
1298                                 && bbd[di].start_of_trace >= 0
1299                                 && !connected[bbd[di].start_of_trace]
1300                                 && BB_PARTITION (e2->dest) == current_partition
1301                                 && EDGE_FREQUENCY (e2) >= freq_threshold
1302                                 && e2->count >= count_threshold
1303                                 && (!best2
1304                                     || e2->probability > best2->probability
1305                                     || (e2->probability == best2->probability
1306                                         && traces[bbd[di].start_of_trace].length
1307                                            > best2_len))))
1308                           {
1309                             best = e;
1310                             best2 = e2;
1311                             if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1312                               best2_len = traces[bbd[di].start_of_trace].length;
1313                             else
1314                               best2_len = INT_MAX;
1315                             next_bb = e2->dest;
1316                             try_copy = true;
1317                           }
1318                       }
1319                   }
1320
1321               if (crtl->has_bb_partition)
1322                 try_copy = false;
1323
1324               /* Copy tiny blocks always; copy larger blocks only when the
1325                  edge is traversed frequently enough.  */
1326               if (try_copy
1327                   && copy_bb_p (best->dest,
1328                                 optimize_edge_for_speed_p (best)
1329                                 && EDGE_FREQUENCY (best) >= freq_threshold
1330                                 && best->count >= count_threshold))
1331                 {
1332                   basic_block new_bb;
1333
1334                   if (dump_file)
1335                     {
1336                       fprintf (dump_file, "Connection: %d %d ",
1337                                traces[t].last->index, best->dest->index);
1338                       if (!next_bb)
1339                         fputc ('\n', dump_file);
1340                       else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1341                         fprintf (dump_file, "exit\n");
1342                       else
1343                         fprintf (dump_file, "%d\n", next_bb->index);
1344                     }
1345
1346                   new_bb = copy_bb (best->dest, best, traces[t].last, t);
1347                   traces[t].last = new_bb;
1348                   if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1349                     {
1350                       t = bbd[next_bb->index].start_of_trace;
1351                       traces[last_trace].last->aux = traces[t].first;
1352                       connected[t] = true;
1353                       last_trace = t;
1354                     }
1355                   else
1356                     break;      /* Stop finding the successor traces.  */
1357                 }
1358               else
1359                 break;  /* Stop finding the successor traces.  */
1360             }
1361         }
1362     }
1363
1364   if (dump_file)
1365     {
1366       basic_block bb;
1367
1368       fprintf (dump_file, "Final order:\n");
1369       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1370         fprintf (dump_file, "%d ", bb->index);
1371       fprintf (dump_file, "\n");
1372       fflush (dump_file);
1373     }
1374
1375   FREE (connected);
1376 }
1377
1378 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1379    when code size is allowed to grow by duplication.  */
1380
1381 static bool
1382 copy_bb_p (const_basic_block bb, int code_may_grow)
1383 {
1384   int size = 0;
1385   int max_size = uncond_jump_length;
1386   rtx_insn *insn;
1387
1388   if (!bb->frequency)
1389     return false;
1390   if (EDGE_COUNT (bb->preds) < 2)
1391     return false;
1392   if (!can_duplicate_block_p (bb))
1393     return false;
1394
1395   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1396   if (EDGE_COUNT (bb->succs) > 8)
1397     return false;
1398
1399   if (code_may_grow && optimize_bb_for_speed_p (bb))
1400     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1401
1402   FOR_BB_INSNS (bb, insn)
1403     {
1404       if (INSN_P (insn))
1405         size += get_attr_min_length (insn);
1406     }
1407
1408   if (size <= max_size)
1409     return true;
1410
1411   if (dump_file)
1412     {
1413       fprintf (dump_file,
1414                "Block %d can't be copied because its size = %d.\n",
1415                bb->index, size);
1416     }
1417
1418   return false;
1419 }
1420
1421 /* Return the length of unconditional jump instruction.  */
1422
1423 int
1424 get_uncond_jump_length (void)
1425 {
1426   rtx_insn *label, *jump;
1427   int length;
1428
1429   start_sequence ();
1430   label = emit_label (gen_label_rtx ());
1431   jump = emit_jump_insn (gen_jump (label));
1432   length = get_attr_min_length (jump);
1433   end_sequence ();
1434
1435   return length;
1436 }
1437
1438 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1439    Duplicate the landing pad and split the edges so that no EH edge
1440    crosses partitions.  */
1441
1442 static void
1443 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1444 {
1445   eh_landing_pad new_lp;
1446   basic_block new_bb, last_bb, post_bb;
1447   rtx_insn *new_label, *jump;
1448   rtx post_label;
1449   unsigned new_partition;
1450   edge_iterator ei;
1451   edge e;
1452
1453   /* Generate the new landing-pad structure.  */
1454   new_lp = gen_eh_landing_pad (old_lp->region);
1455   new_lp->post_landing_pad = old_lp->post_landing_pad;
1456   new_lp->landing_pad = gen_label_rtx ();
1457   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1458
1459   /* Put appropriate instructions in new bb.  */
1460   new_label = emit_label (new_lp->landing_pad);
1461
1462   expand_dw2_landing_pad_for_region (old_lp->region);
1463
1464   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1465   post_bb = single_succ (post_bb);
1466   post_label = block_label (post_bb);
1467   jump = emit_jump_insn (gen_jump (post_label));
1468   JUMP_LABEL (jump) = post_label;
1469
1470   /* Create new basic block to be dest for lp.  */
1471   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1472   new_bb = create_basic_block (new_label, jump, last_bb);
1473   new_bb->aux = last_bb->aux;
1474   last_bb->aux = new_bb;
1475
1476   emit_barrier_after_bb (new_bb);
1477
1478   make_edge (new_bb, post_bb, 0);
1479
1480   /* Make sure new bb is in the other partition.  */
1481   new_partition = BB_PARTITION (old_bb);
1482   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1483   BB_SET_PARTITION (new_bb, new_partition);
1484
1485   /* Fix up the edges.  */
1486   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1487     if (BB_PARTITION (e->src) == new_partition)
1488       {
1489         rtx_insn *insn = BB_END (e->src);
1490         rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1491
1492         gcc_assert (note != NULL);
1493         gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1494         XEXP (note, 0) = GEN_INT (new_lp->index);
1495
1496         /* Adjust the edge to the new destination.  */
1497         redirect_edge_succ (e, new_bb);
1498       }
1499     else
1500       ei_next (&ei);
1501 }
1502
1503
1504 /* Ensure that all hot bbs are included in a hot path through the
1505    procedure. This is done by calling this function twice, once
1506    with WALK_UP true (to look for paths from the entry to hot bbs) and
1507    once with WALK_UP false (to look for paths from hot bbs to the exit).
1508    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1509    to BBS_IN_HOT_PARTITION.  */
1510
1511 static unsigned int
1512 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1513                     vec<basic_block> *bbs_in_hot_partition)
1514 {
1515   /* Callers check this.  */
1516   gcc_checking_assert (cold_bb_count);
1517
1518   /* Keep examining hot bbs while we still have some left to check
1519      and there are remaining cold bbs.  */
1520   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1521   while (! hot_bbs_to_check.is_empty ()
1522          && cold_bb_count)
1523     {
1524       basic_block bb = hot_bbs_to_check.pop ();
1525       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1526       edge e;
1527       edge_iterator ei;
1528       int highest_probability = 0;
1529       int highest_freq = 0;
1530       gcov_type highest_count = 0;
1531       bool found = false;
1532
1533       /* Walk the preds/succs and check if there is at least one already
1534          marked hot. Keep track of the most frequent pred/succ so that we
1535          can mark it hot if we don't find one.  */
1536       FOR_EACH_EDGE (e, ei, edges)
1537         {
1538           basic_block reach_bb = walk_up ? e->src : e->dest;
1539
1540           if (e->flags & EDGE_DFS_BACK)
1541             continue;
1542
1543           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1544           {
1545             found = true;
1546             break;
1547           }
1548           /* The following loop will look for the hottest edge via
1549              the edge count, if it is non-zero, then fallback to the edge
1550              frequency and finally the edge probability.  */
1551           if (e->count > highest_count)
1552             highest_count = e->count;
1553           int edge_freq = EDGE_FREQUENCY (e);
1554           if (edge_freq > highest_freq)
1555             highest_freq = edge_freq;
1556           if (e->probability > highest_probability)
1557             highest_probability = e->probability;
1558         }
1559
1560       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1561          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1562          then the most frequent pred (or succ) needs to be adjusted.  In the
1563          case where multiple preds/succs have the same frequency (e.g. a
1564          50-50 branch), then both will be adjusted.  */
1565       if (found)
1566         continue;
1567
1568       FOR_EACH_EDGE (e, ei, edges)
1569         {
1570           if (e->flags & EDGE_DFS_BACK)
1571             continue;
1572           /* Select the hottest edge using the edge count, if it is non-zero,
1573              then fallback to the edge frequency and finally the edge
1574              probability.  */
1575           if (highest_count)
1576             {
1577               if (e->count < highest_count)
1578                 continue;
1579             }
1580           else if (highest_freq)
1581             {
1582               if (EDGE_FREQUENCY (e) < highest_freq)
1583                 continue;
1584             }
1585           else if (e->probability < highest_probability)
1586             continue;
1587
1588           basic_block reach_bb = walk_up ? e->src : e->dest;
1589
1590           /* We have a hot bb with an immediate dominator that is cold.
1591              The dominator needs to be re-marked hot.  */
1592           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1593           cold_bb_count--;
1594
1595           /* Now we need to examine newly-hot reach_bb to see if it is also
1596              dominated by a cold bb.  */
1597           bbs_in_hot_partition->safe_push (reach_bb);
1598           hot_bbs_to_check.safe_push (reach_bb);
1599         }
1600     }
1601
1602   return cold_bb_count;
1603 }
1604
1605
1606 /* Find the basic blocks that are rarely executed and need to be moved to
1607    a separate section of the .o file (to cut down on paging and improve
1608    cache locality).  Return a vector of all edges that cross.  */
1609
1610 static vec<edge>
1611 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1612 {
1613   vec<edge> crossing_edges = vNULL;
1614   basic_block bb;
1615   edge e;
1616   edge_iterator ei;
1617   unsigned int cold_bb_count = 0;
1618   auto_vec<basic_block> bbs_in_hot_partition;
1619
1620   /* Mark which partition (hot/cold) each basic block belongs in.  */
1621   FOR_EACH_BB_FN (bb, cfun)
1622     {
1623       bool cold_bb = false;
1624
1625       if (probably_never_executed_bb_p (cfun, bb))
1626         {
1627           /* Handle profile insanities created by upstream optimizations
1628              by also checking the incoming edge weights. If there is a non-cold
1629              incoming edge, conservatively prevent this block from being split
1630              into the cold section.  */
1631           cold_bb = true;
1632           FOR_EACH_EDGE (e, ei, bb->preds)
1633             if (!probably_never_executed_edge_p (cfun, e))
1634               {
1635                 cold_bb = false;
1636                 break;
1637               }
1638         }
1639       if (cold_bb)
1640         {
1641           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1642           cold_bb_count++;
1643         }
1644       else
1645         {
1646           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1647           bbs_in_hot_partition.safe_push (bb);
1648         }
1649     }
1650
1651   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1652      Several different possibilities may include cold bbs along all paths
1653      to/from a hot bb. One is that there are edge weight insanities
1654      due to optimization phases that do not properly update basic block profile
1655      counts. The second is that the entry of the function may not be hot, because
1656      it is entered fewer times than the number of profile training runs, but there
1657      is a loop inside the function that causes blocks within the function to be
1658      above the threshold for hotness. This is fixed by walking up from hot bbs
1659      to the entry block, and then down from hot bbs to the exit, performing
1660      partitioning fixups as necessary.  */
1661   if (cold_bb_count)
1662     {
1663       mark_dfs_back_edges ();
1664       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1665                                           &bbs_in_hot_partition);
1666       if (cold_bb_count)
1667         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1668     }
1669
1670   /* The format of .gcc_except_table does not allow landing pads to
1671      be in a different partition as the throw.  Fix this by either
1672      moving or duplicating the landing pads.  */
1673   if (cfun->eh->lp_array)
1674     {
1675       unsigned i;
1676       eh_landing_pad lp;
1677
1678       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1679         {
1680           bool all_same, all_diff;
1681
1682           if (lp == NULL
1683               || lp->landing_pad == NULL_RTX
1684               || !LABEL_P (lp->landing_pad))
1685             continue;
1686
1687           all_same = all_diff = true;
1688           bb = BLOCK_FOR_INSN (lp->landing_pad);
1689           FOR_EACH_EDGE (e, ei, bb->preds)
1690             {
1691               gcc_assert (e->flags & EDGE_EH);
1692               if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1693                 all_diff = false;
1694               else
1695                 all_same = false;
1696             }
1697
1698           if (all_same)
1699             ;
1700           else if (all_diff)
1701             {
1702               int which = BB_PARTITION (bb);
1703               which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1704               BB_SET_PARTITION (bb, which);
1705             }
1706           else
1707             fix_up_crossing_landing_pad (lp, bb);
1708         }
1709     }
1710
1711   /* Mark every edge that crosses between sections.  */
1712
1713   FOR_EACH_BB_FN (bb, cfun)
1714     FOR_EACH_EDGE (e, ei, bb->succs)
1715       {
1716         unsigned int flags = e->flags;
1717
1718         /* We should never have EDGE_CROSSING set yet.  */
1719         gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1720
1721         if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1722             && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1723             && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1724           {
1725             crossing_edges.safe_push (e);
1726             flags |= EDGE_CROSSING;
1727           }
1728
1729         /* Now that we've split eh edges as appropriate, allow landing pads
1730            to be merged with the post-landing pads.  */
1731         flags &= ~EDGE_PRESERVE;
1732
1733         e->flags = flags;
1734       }
1735
1736   return crossing_edges;
1737 }
1738
1739 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1740
1741 static void
1742 set_edge_can_fallthru_flag (void)
1743 {
1744   basic_block bb;
1745
1746   FOR_EACH_BB_FN (bb, cfun)
1747     {
1748       edge e;
1749       edge_iterator ei;
1750
1751       FOR_EACH_EDGE (e, ei, bb->succs)
1752         {
1753           e->flags &= ~EDGE_CAN_FALLTHRU;
1754
1755           /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1756           if (e->flags & EDGE_FALLTHRU)
1757             e->flags |= EDGE_CAN_FALLTHRU;
1758         }
1759
1760       /* If the BB ends with an invertible condjump all (2) edges are
1761          CAN_FALLTHRU edges.  */
1762       if (EDGE_COUNT (bb->succs) != 2)
1763         continue;
1764       if (!any_condjump_p (BB_END (bb)))
1765         continue;
1766       if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1767         continue;
1768       invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1769       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1770       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1771     }
1772 }
1773
1774 /* If any destination of a crossing edge does not have a label, add label;
1775    Convert any easy fall-through crossing edges to unconditional jumps.  */
1776
1777 static void
1778 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1779 {
1780   size_t i;
1781   edge e;
1782
1783   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1784     {
1785       basic_block src = e->src;
1786       basic_block dest = e->dest;
1787       rtx label;
1788       rtx_insn *new_jump;
1789
1790       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1791         continue;
1792
1793       /* Make sure dest has a label.  */
1794       label = block_label (dest);
1795
1796       /* Nothing to do for non-fallthru edges.  */
1797       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1798         continue;
1799       if ((e->flags & EDGE_FALLTHRU) == 0)
1800         continue;
1801
1802       /* If the block does not end with a control flow insn, then we
1803          can trivially add a jump to the end to fixup the crossing.
1804          Otherwise the jump will have to go in a new bb, which will
1805          be handled by fix_up_fall_thru_edges function.  */
1806       if (control_flow_insn_p (BB_END (src)))
1807         continue;
1808
1809       /* Make sure there's only one successor.  */
1810       gcc_assert (single_succ_p (src));
1811
1812       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1813       BB_END (src) = new_jump;
1814       JUMP_LABEL (new_jump) = label;
1815       LABEL_NUSES (label) += 1;
1816
1817       emit_barrier_after_bb (src);
1818
1819       /* Mark edge as non-fallthru.  */
1820       e->flags &= ~EDGE_FALLTHRU;
1821     }
1822 }
1823
1824 /* Find any bb's where the fall-through edge is a crossing edge (note that
1825    these bb's must also contain a conditional jump or end with a call
1826    instruction; we've already dealt with fall-through edges for blocks
1827    that didn't have a conditional jump or didn't end with call instruction
1828    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1829    edge to non-crossing edge by inserting a new bb to fall-through into.
1830    The new bb will contain an unconditional jump (crossing edge) to the
1831    original fall through destination.  */
1832
1833 static void
1834 fix_up_fall_thru_edges (void)
1835 {
1836   basic_block cur_bb;
1837   basic_block new_bb;
1838   edge succ1;
1839   edge succ2;
1840   edge fall_thru;
1841   edge cond_jump = NULL;
1842   edge e;
1843   bool cond_jump_crosses;
1844   int invert_worked;
1845   rtx_insn *old_jump;
1846   rtx fall_thru_label;
1847
1848   FOR_EACH_BB_FN (cur_bb, cfun)
1849     {
1850       fall_thru = NULL;
1851       if (EDGE_COUNT (cur_bb->succs) > 0)
1852         succ1 = EDGE_SUCC (cur_bb, 0);
1853       else
1854         succ1 = NULL;
1855
1856       if (EDGE_COUNT (cur_bb->succs) > 1)
1857         succ2 = EDGE_SUCC (cur_bb, 1);
1858       else
1859         succ2 = NULL;
1860
1861       /* Find the fall-through edge.  */
1862
1863       if (succ1
1864           && (succ1->flags & EDGE_FALLTHRU))
1865         {
1866           fall_thru = succ1;
1867           cond_jump = succ2;
1868         }
1869       else if (succ2
1870                && (succ2->flags & EDGE_FALLTHRU))
1871         {
1872           fall_thru = succ2;
1873           cond_jump = succ1;
1874         }
1875       else if (succ1
1876                && (block_ends_with_call_p (cur_bb)
1877                    || can_throw_internal (BB_END (cur_bb))))
1878         {
1879           edge e;
1880           edge_iterator ei;
1881
1882           FOR_EACH_EDGE (e, ei, cur_bb->succs)
1883             if (e->flags & EDGE_FALLTHRU)
1884               {
1885                 fall_thru = e;
1886                 break;
1887               }
1888         }
1889
1890       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1891         {
1892           /* Check to see if the fall-thru edge is a crossing edge.  */
1893
1894           if (fall_thru->flags & EDGE_CROSSING)
1895             {
1896               /* The fall_thru edge crosses; now check the cond jump edge, if
1897                  it exists.  */
1898
1899               cond_jump_crosses = true;
1900               invert_worked  = 0;
1901               old_jump = BB_END (cur_bb);
1902
1903               /* Find the jump instruction, if there is one.  */
1904
1905               if (cond_jump)
1906                 {
1907                   if (!(cond_jump->flags & EDGE_CROSSING))
1908                     cond_jump_crosses = false;
1909
1910                   /* We know the fall-thru edge crosses; if the cond
1911                      jump edge does NOT cross, and its destination is the
1912                      next block in the bb order, invert the jump
1913                      (i.e. fix it so the fall through does not cross and
1914                      the cond jump does).  */
1915
1916                   if (!cond_jump_crosses)
1917                     {
1918                       /* Find label in fall_thru block. We've already added
1919                          any missing labels, so there must be one.  */
1920
1921                       fall_thru_label = block_label (fall_thru->dest);
1922
1923                       if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1924                         invert_worked = invert_jump (old_jump,
1925                                                      fall_thru_label,0);
1926                       if (invert_worked)
1927                         {
1928                           fall_thru->flags &= ~EDGE_FALLTHRU;
1929                           cond_jump->flags |= EDGE_FALLTHRU;
1930                           update_br_prob_note (cur_bb);
1931                           e = fall_thru;
1932                           fall_thru = cond_jump;
1933                           cond_jump = e;
1934                           cond_jump->flags |= EDGE_CROSSING;
1935                           fall_thru->flags &= ~EDGE_CROSSING;
1936                         }
1937                     }
1938                 }
1939
1940               if (cond_jump_crosses || !invert_worked)
1941                 {
1942                   /* This is the case where both edges out of the basic
1943                      block are crossing edges. Here we will fix up the
1944                      fall through edge. The jump edge will be taken care
1945                      of later.  The EDGE_CROSSING flag of fall_thru edge
1946                      is unset before the call to force_nonfallthru
1947                      function because if a new basic-block is created
1948                      this edge remains in the current section boundary
1949                      while the edge between new_bb and the fall_thru->dest
1950                      becomes EDGE_CROSSING.  */
1951
1952                   fall_thru->flags &= ~EDGE_CROSSING;
1953                   new_bb = force_nonfallthru (fall_thru);
1954
1955                   if (new_bb)
1956                     {
1957                       new_bb->aux = cur_bb->aux;
1958                       cur_bb->aux = new_bb;
1959
1960                       /* This is done by force_nonfallthru_and_redirect.  */
1961                       gcc_assert (BB_PARTITION (new_bb)
1962                                   == BB_PARTITION (cur_bb));
1963
1964                       single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1965                     }
1966                   else
1967                     {
1968                       /* If a new basic-block was not created; restore
1969                          the EDGE_CROSSING flag.  */
1970                       fall_thru->flags |= EDGE_CROSSING;
1971                     }
1972
1973                   /* Add barrier after new jump */
1974                   emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1975                 }
1976             }
1977         }
1978     }
1979 }
1980
1981 /* This function checks the destination block of a "crossing jump" to
1982    see if it has any crossing predecessors that begin with a code label
1983    and end with an unconditional jump.  If so, it returns that predecessor
1984    block.  (This is to avoid creating lots of new basic blocks that all
1985    contain unconditional jumps to the same destination).  */
1986
1987 static basic_block
1988 find_jump_block (basic_block jump_dest)
1989 {
1990   basic_block source_bb = NULL;
1991   edge e;
1992   rtx_insn *insn;
1993   edge_iterator ei;
1994
1995   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1996     if (e->flags & EDGE_CROSSING)
1997       {
1998         basic_block src = e->src;
1999
2000         /* Check each predecessor to see if it has a label, and contains
2001            only one executable instruction, which is an unconditional jump.
2002            If so, we can use it.  */
2003
2004         if (LABEL_P (BB_HEAD (src)))
2005           for (insn = BB_HEAD (src);
2006                !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2007                insn = NEXT_INSN (insn))
2008             {
2009               if (INSN_P (insn)
2010                   && insn == BB_END (src)
2011                   && JUMP_P (insn)
2012                   && !any_condjump_p (insn))
2013                 {
2014                   source_bb = src;
2015                   break;
2016                 }
2017             }
2018
2019         if (source_bb)
2020           break;
2021       }
2022
2023   return source_bb;
2024 }
2025
2026 /* Find all BB's with conditional jumps that are crossing edges;
2027    insert a new bb and make the conditional jump branch to the new
2028    bb instead (make the new bb same color so conditional branch won't
2029    be a 'crossing' edge).  Insert an unconditional jump from the
2030    new bb to the original destination of the conditional jump.  */
2031
2032 static void
2033 fix_crossing_conditional_branches (void)
2034 {
2035   basic_block cur_bb;
2036   basic_block new_bb;
2037   basic_block dest;
2038   edge succ1;
2039   edge succ2;
2040   edge crossing_edge;
2041   edge new_edge;
2042   rtx_insn *old_jump;
2043   rtx set_src;
2044   rtx old_label = NULL_RTX;
2045   rtx new_label;
2046
2047   FOR_EACH_BB_FN (cur_bb, cfun)
2048     {
2049       crossing_edge = NULL;
2050       if (EDGE_COUNT (cur_bb->succs) > 0)
2051         succ1 = EDGE_SUCC (cur_bb, 0);
2052       else
2053         succ1 = NULL;
2054
2055       if (EDGE_COUNT (cur_bb->succs) > 1)
2056         succ2 = EDGE_SUCC (cur_bb, 1);
2057       else
2058         succ2 = NULL;
2059
2060       /* We already took care of fall-through edges, so only one successor
2061          can be a crossing edge.  */
2062
2063       if (succ1 && (succ1->flags & EDGE_CROSSING))
2064         crossing_edge = succ1;
2065       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2066         crossing_edge = succ2;
2067
2068       if (crossing_edge)
2069         {
2070           old_jump = BB_END (cur_bb);
2071
2072           /* Check to make sure the jump instruction is a
2073              conditional jump.  */
2074
2075           set_src = NULL_RTX;
2076
2077           if (any_condjump_p (old_jump))
2078             {
2079               if (GET_CODE (PATTERN (old_jump)) == SET)
2080                 set_src = SET_SRC (PATTERN (old_jump));
2081               else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2082                 {
2083                   set_src = XVECEXP (PATTERN (old_jump), 0,0);
2084                   if (GET_CODE (set_src) == SET)
2085                     set_src = SET_SRC (set_src);
2086                   else
2087                     set_src = NULL_RTX;
2088                 }
2089             }
2090
2091           if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2092             {
2093               if (GET_CODE (XEXP (set_src, 1)) == PC)
2094                 old_label = XEXP (set_src, 2);
2095               else if (GET_CODE (XEXP (set_src, 2)) == PC)
2096                 old_label = XEXP (set_src, 1);
2097
2098               /* Check to see if new bb for jumping to that dest has
2099                  already been created; if so, use it; if not, create
2100                  a new one.  */
2101
2102               new_bb = find_jump_block (crossing_edge->dest);
2103
2104               if (new_bb)
2105                 new_label = block_label (new_bb);
2106               else
2107                 {
2108                   basic_block last_bb;
2109                   rtx_insn *new_jump;
2110
2111                   /* Create new basic block to be dest for
2112                      conditional jump.  */
2113
2114                   /* Put appropriate instructions in new bb.  */
2115
2116                   new_label = gen_label_rtx ();
2117                   emit_label (new_label);
2118
2119                   gcc_assert (GET_CODE (old_label) == LABEL_REF);
2120                   old_label = JUMP_LABEL (old_jump);
2121                   new_jump = emit_jump_insn (gen_jump (old_label));
2122                   JUMP_LABEL (new_jump) = old_label;
2123
2124                   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2125                   new_bb = create_basic_block (new_label, new_jump, last_bb);
2126                   new_bb->aux = last_bb->aux;
2127                   last_bb->aux = new_bb;
2128
2129                   emit_barrier_after_bb (new_bb);
2130
2131                   /* Make sure new bb is in same partition as source
2132                      of conditional branch.  */
2133                   BB_COPY_PARTITION (new_bb, cur_bb);
2134                 }
2135
2136               /* Make old jump branch to new bb.  */
2137
2138               redirect_jump (old_jump, new_label, 0);
2139
2140               /* Remove crossing_edge as predecessor of 'dest'.  */
2141
2142               dest = crossing_edge->dest;
2143
2144               redirect_edge_succ (crossing_edge, new_bb);
2145
2146               /* Make a new edge from new_bb to old dest; new edge
2147                  will be a successor for new_bb and a predecessor
2148                  for 'dest'.  */
2149
2150               if (EDGE_COUNT (new_bb->succs) == 0)
2151                 new_edge = make_edge (new_bb, dest, 0);
2152               else
2153                 new_edge = EDGE_SUCC (new_bb, 0);
2154
2155               crossing_edge->flags &= ~EDGE_CROSSING;
2156               new_edge->flags |= EDGE_CROSSING;
2157             }
2158         }
2159     }
2160 }
2161
2162 /* Find any unconditional branches that cross between hot and cold
2163    sections.  Convert them into indirect jumps instead.  */
2164
2165 static void
2166 fix_crossing_unconditional_branches (void)
2167 {
2168   basic_block cur_bb;
2169   rtx_insn *last_insn;
2170   rtx label;
2171   rtx label_addr;
2172   rtx_insn *indirect_jump_sequence;
2173   rtx_insn *jump_insn = NULL;
2174   rtx new_reg;
2175   rtx_insn *cur_insn;
2176   edge succ;
2177
2178   FOR_EACH_BB_FN (cur_bb, cfun)
2179     {
2180       last_insn = BB_END (cur_bb);
2181
2182       if (EDGE_COUNT (cur_bb->succs) < 1)
2183         continue;
2184
2185       succ = EDGE_SUCC (cur_bb, 0);
2186
2187       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2188          this point, no crossing jumps should be conditional.  */
2189
2190       if (JUMP_P (last_insn)
2191           && (succ->flags & EDGE_CROSSING))
2192         {
2193           gcc_assert (!any_condjump_p (last_insn));
2194
2195           /* Make sure the jump is not already an indirect or table jump.  */
2196
2197           if (!computed_jump_p (last_insn)
2198               && !tablejump_p (last_insn, NULL, NULL))
2199             {
2200               /* We have found a "crossing" unconditional branch.  Now
2201                  we must convert it to an indirect jump.  First create
2202                  reference of label, as target for jump.  */
2203
2204               label = JUMP_LABEL (last_insn);
2205               label_addr = gen_rtx_LABEL_REF (Pmode, label);
2206               LABEL_NUSES (label) += 1;
2207
2208               /* Get a register to use for the indirect jump.  */
2209
2210               new_reg = gen_reg_rtx (Pmode);
2211
2212               /* Generate indirect the jump sequence.  */
2213
2214               start_sequence ();
2215               emit_move_insn (new_reg, label_addr);
2216               emit_indirect_jump (new_reg);
2217               indirect_jump_sequence = get_insns ();
2218               end_sequence ();
2219
2220               /* Make sure every instruction in the new jump sequence has
2221                  its basic block set to be cur_bb.  */
2222
2223               for (cur_insn = indirect_jump_sequence; cur_insn;
2224                    cur_insn = NEXT_INSN (cur_insn))
2225                 {
2226                   if (!BARRIER_P (cur_insn))
2227                     BLOCK_FOR_INSN (cur_insn) = cur_bb;
2228                   if (JUMP_P (cur_insn))
2229                     jump_insn = cur_insn;
2230                 }
2231
2232               /* Insert the new (indirect) jump sequence immediately before
2233                  the unconditional jump, then delete the unconditional jump.  */
2234
2235               emit_insn_before (indirect_jump_sequence, last_insn);
2236               delete_insn (last_insn);
2237
2238               JUMP_LABEL (jump_insn) = label;
2239               LABEL_NUSES (label)++;
2240
2241               /* Make BB_END for cur_bb be the jump instruction (NOT the
2242                  barrier instruction at the end of the sequence...).  */
2243
2244               BB_END (cur_bb) = jump_insn;
2245             }
2246         }
2247     }
2248 }
2249
2250 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2251
2252 static void
2253 update_crossing_jump_flags (void)
2254 {
2255   basic_block bb;
2256   edge e;
2257   edge_iterator ei;
2258
2259   FOR_EACH_BB_FN (bb, cfun)
2260     FOR_EACH_EDGE (e, ei, bb->succs)
2261       if (e->flags & EDGE_CROSSING)
2262         {
2263           if (JUMP_P (BB_END (bb))
2264               /* Some flags were added during fix_up_fall_thru_edges, via
2265                  force_nonfallthru_and_redirect.  */
2266               && !CROSSING_JUMP_P (BB_END (bb)))
2267             CROSSING_JUMP_P (BB_END (bb)) = 1;
2268           break;
2269         }
2270 }
2271
2272 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2273    the set of flags to pass to cfg_layout_initialize().  */
2274
2275 static void
2276 reorder_basic_blocks (void)
2277 {
2278   int n_traces;
2279   int i;
2280   struct trace *traces;
2281
2282   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2283
2284   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2285     return;
2286
2287   set_edge_can_fallthru_flag ();
2288   mark_dfs_back_edges ();
2289
2290   /* We are estimating the length of uncond jump insn only once since the code
2291      for getting the insn length always returns the minimal length now.  */
2292   if (uncond_jump_length == 0)
2293     uncond_jump_length = get_uncond_jump_length ();
2294
2295   /* We need to know some information for each basic block.  */
2296   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2297   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2298   for (i = 0; i < array_size; i++)
2299     {
2300       bbd[i].start_of_trace = -1;
2301       bbd[i].end_of_trace = -1;
2302       bbd[i].in_trace = -1;
2303       bbd[i].visited = 0;
2304       bbd[i].priority = -1;
2305       bbd[i].heap = NULL;
2306       bbd[i].node = NULL;
2307     }
2308
2309   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2310   n_traces = 0;
2311   find_traces (&n_traces, traces);
2312   connect_traces (n_traces, traces);
2313   FREE (traces);
2314   FREE (bbd);
2315
2316   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2317
2318   if (dump_file)
2319     {
2320       if (dump_flags & TDF_DETAILS)
2321         dump_reg_info (dump_file);
2322       dump_flow_info (dump_file, dump_flags);
2323     }
2324
2325   /* Signal that rtl_verify_flow_info_1 can now verify that there
2326      is at most one switch between hot/cold sections.  */
2327   crtl->bb_reorder_complete = true;
2328 }
2329
2330 /* Determine which partition the first basic block in the function
2331    belongs to, then find the first basic block in the current function
2332    that belongs to a different section, and insert a
2333    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2334    instruction stream.  When writing out the assembly code,
2335    encountering this note will make the compiler switch between the
2336    hot and cold text sections.  */
2337
2338 void
2339 insert_section_boundary_note (void)
2340 {
2341   basic_block bb;
2342   bool switched_sections = false;
2343   int current_partition = 0;
2344
2345   if (!crtl->has_bb_partition)
2346     return;
2347
2348   FOR_EACH_BB_FN (bb, cfun)
2349     {
2350       if (!current_partition)
2351         current_partition = BB_PARTITION (bb);
2352       if (BB_PARTITION (bb) != current_partition)
2353         {
2354           gcc_assert (!switched_sections);
2355           switched_sections = true;
2356           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2357           current_partition = BB_PARTITION (bb);
2358         }
2359     }
2360 }
2361
2362 namespace {
2363
2364 const pass_data pass_data_reorder_blocks =
2365 {
2366   RTL_PASS, /* type */
2367   "bbro", /* name */
2368   OPTGROUP_NONE, /* optinfo_flags */
2369   TV_REORDER_BLOCKS, /* tv_id */
2370   0, /* properties_required */
2371   0, /* properties_provided */
2372   0, /* properties_destroyed */
2373   0, /* todo_flags_start */
2374   0, /* todo_flags_finish */
2375 };
2376
2377 class pass_reorder_blocks : public rtl_opt_pass
2378 {
2379 public:
2380   pass_reorder_blocks (gcc::context *ctxt)
2381     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2382   {}
2383
2384   /* opt_pass methods: */
2385   virtual bool gate (function *)
2386     {
2387       if (targetm.cannot_modify_jumps_p ())
2388         return false;
2389       return (optimize > 0
2390               && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2391     }
2392
2393   virtual unsigned int execute (function *);
2394
2395 }; // class pass_reorder_blocks
2396
2397 unsigned int
2398 pass_reorder_blocks::execute (function *fun)
2399 {
2400   basic_block bb;
2401
2402   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2403      splitting possibly introduced more crossjumping opportunities.  */
2404   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2405
2406   reorder_basic_blocks ();
2407   cleanup_cfg (CLEANUP_EXPENSIVE);
2408
2409   FOR_EACH_BB_FN (bb, fun)
2410     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2411       bb->aux = bb->next_bb;
2412   cfg_layout_finalize ();
2413
2414   return 0;
2415 }
2416
2417 } // anon namespace
2418
2419 rtl_opt_pass *
2420 make_pass_reorder_blocks (gcc::context *ctxt)
2421 {
2422   return new pass_reorder_blocks (ctxt);
2423 }
2424
2425 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2426    computed gotos that were factored early on in the compilation process to
2427    speed up edge based data flow.  We used to not unfactoring them again,
2428    which can seriously pessimize code with many computed jumps in the source
2429    code, such as interpreters.  See e.g. PR15242.  */
2430
2431 namespace {
2432
2433 const pass_data pass_data_duplicate_computed_gotos =
2434 {
2435   RTL_PASS, /* type */
2436   "compgotos", /* name */
2437   OPTGROUP_NONE, /* optinfo_flags */
2438   TV_REORDER_BLOCKS, /* tv_id */
2439   0, /* properties_required */
2440   0, /* properties_provided */
2441   0, /* properties_destroyed */
2442   0, /* todo_flags_start */
2443   0, /* todo_flags_finish */
2444 };
2445
2446 class pass_duplicate_computed_gotos : public rtl_opt_pass
2447 {
2448 public:
2449   pass_duplicate_computed_gotos (gcc::context *ctxt)
2450     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2451   {}
2452
2453   /* opt_pass methods: */
2454   virtual bool gate (function *);
2455   virtual unsigned int execute (function *);
2456
2457 }; // class pass_duplicate_computed_gotos
2458
2459 bool
2460 pass_duplicate_computed_gotos::gate (function *fun)
2461 {
2462   if (targetm.cannot_modify_jumps_p ())
2463     return false;
2464   return (optimize > 0
2465           && flag_expensive_optimizations
2466           && ! optimize_function_for_size_p (fun));
2467 }
2468
2469 unsigned int
2470 pass_duplicate_computed_gotos::execute (function *fun)
2471 {
2472   basic_block bb, new_bb;
2473   bitmap candidates;
2474   int max_size;
2475   bool changed = false;
2476
2477   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2478     return 0;
2479
2480   clear_bb_flags ();
2481   cfg_layout_initialize (0);
2482
2483   /* We are estimating the length of uncond jump insn only once
2484      since the code for getting the insn length always returns
2485      the minimal length now.  */
2486   if (uncond_jump_length == 0)
2487     uncond_jump_length = get_uncond_jump_length ();
2488
2489   max_size
2490     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2491   candidates = BITMAP_ALLOC (NULL);
2492
2493   /* Look for blocks that end in a computed jump, and see if such blocks
2494      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2495      mark it in the candidates.  */
2496   FOR_EACH_BB_FN (bb, fun)
2497     {
2498       rtx_insn *insn;
2499       edge e;
2500       edge_iterator ei;
2501       int size, all_flags;
2502
2503       /* Build the reorder chain for the original order of blocks.  */
2504       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2505         bb->aux = bb->next_bb;
2506
2507       /* Obviously the block has to end in a computed jump.  */
2508       if (!computed_jump_p (BB_END (bb)))
2509         continue;
2510
2511       /* Only consider blocks that can be duplicated.  */
2512       if (CROSSING_JUMP_P (BB_END (bb))
2513           || !can_duplicate_block_p (bb))
2514         continue;
2515
2516       /* Make sure that the block is small enough.  */
2517       size = 0;
2518       FOR_BB_INSNS (bb, insn)
2519         if (INSN_P (insn))
2520           {
2521             size += get_attr_min_length (insn);
2522             if (size > max_size)
2523                break;
2524           }
2525       if (size > max_size)
2526         continue;
2527
2528       /* Final check: there must not be any incoming abnormal edges.  */
2529       all_flags = 0;
2530       FOR_EACH_EDGE (e, ei, bb->preds)
2531         all_flags |= e->flags;
2532       if (all_flags & EDGE_COMPLEX)
2533         continue;
2534
2535       bitmap_set_bit (candidates, bb->index);
2536     }
2537
2538   /* Nothing to do if there is no computed jump here.  */
2539   if (bitmap_empty_p (candidates))
2540     goto done;
2541
2542   /* Duplicate computed gotos.  */
2543   FOR_EACH_BB_FN (bb, fun)
2544     {
2545       if (bb->flags & BB_VISITED)
2546         continue;
2547
2548       bb->flags |= BB_VISITED;
2549
2550       /* BB must have one outgoing edge.  That edge must not lead to
2551          the exit block or the next block.
2552          The destination must have more than one predecessor.  */
2553       if (!single_succ_p (bb)
2554           || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2555           || single_succ (bb) == bb->next_bb
2556           || single_pred_p (single_succ (bb)))
2557         continue;
2558
2559       /* The successor block has to be a duplication candidate.  */
2560       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2561         continue;
2562
2563       /* Don't duplicate a partition crossing edge, which requires difficult
2564          fixup.  */
2565       if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2566         continue;
2567
2568       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2569       new_bb->aux = bb->aux;
2570       bb->aux = new_bb;
2571       new_bb->flags |= BB_VISITED;
2572       changed = true;
2573     }
2574
2575  done:
2576   if (changed)
2577     {
2578       /* Duplicating blocks above will redirect edges and may cause hot
2579          blocks previously reached by both hot and cold blocks to become
2580          dominated only by cold blocks.  */
2581       fixup_partitions ();
2582
2583       /* Merge the duplicated blocks into predecessors, when possible.  */
2584       cfg_layout_finalize ();
2585       cleanup_cfg (0);
2586     }
2587   else
2588     cfg_layout_finalize ();
2589
2590   BITMAP_FREE (candidates);
2591   return 0;
2592 }
2593
2594 } // anon namespace
2595
2596 rtl_opt_pass *
2597 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2598 {
2599   return new pass_duplicate_computed_gotos (ctxt);
2600 }
2601
2602 /* This function is the main 'entrance' for the optimization that
2603    partitions hot and cold basic blocks into separate sections of the
2604    .o file (to improve performance and cache locality).  Ideally it
2605    would be called after all optimizations that rearrange the CFG have
2606    been called.  However part of this optimization may introduce new
2607    register usage, so it must be called before register allocation has
2608    occurred.  This means that this optimization is actually called
2609    well before the optimization that reorders basic blocks (see
2610    function above).
2611
2612    This optimization checks the feedback information to determine
2613    which basic blocks are hot/cold, updates flags on the basic blocks
2614    to indicate which section they belong in.  This information is
2615    later used for writing out sections in the .o file.  Because hot
2616    and cold sections can be arbitrarily large (within the bounds of
2617    memory), far beyond the size of a single function, it is necessary
2618    to fix up all edges that cross section boundaries, to make sure the
2619    instructions used can actually span the required distance.  The
2620    fixes are described below.
2621
2622    Fall-through edges must be changed into jumps; it is not safe or
2623    legal to fall through across a section boundary.  Whenever a
2624    fall-through edge crossing a section boundary is encountered, a new
2625    basic block is inserted (in the same section as the fall-through
2626    source), and the fall through edge is redirected to the new basic
2627    block.  The new basic block contains an unconditional jump to the
2628    original fall-through target.  (If the unconditional jump is
2629    insufficient to cross section boundaries, that is dealt with a
2630    little later, see below).
2631
2632    In order to deal with architectures that have short conditional
2633    branches (which cannot span all of memory) we take any conditional
2634    jump that attempts to cross a section boundary and add a level of
2635    indirection: it becomes a conditional jump to a new basic block, in
2636    the same section.  The new basic block contains an unconditional
2637    jump to the original target, in the other section.
2638
2639    For those architectures whose unconditional branch is also
2640    incapable of reaching all of memory, those unconditional jumps are
2641    converted into indirect jumps, through a register.
2642
2643    IMPORTANT NOTE: This optimization causes some messy interactions
2644    with the cfg cleanup optimizations; those optimizations want to
2645    merge blocks wherever possible, and to collapse indirect jump
2646    sequences (change "A jumps to B jumps to C" directly into "A jumps
2647    to C").  Those optimizations can undo the jump fixes that
2648    partitioning is required to make (see above), in order to ensure
2649    that jumps attempting to cross section boundaries are really able
2650    to cover whatever distance the jump requires (on many architectures
2651    conditional or unconditional jumps are not able to reach all of
2652    memory).  Therefore tests have to be inserted into each such
2653    optimization to make sure that it does not undo stuff necessary to
2654    cross partition boundaries.  This would be much less of a problem
2655    if we could perform this optimization later in the compilation, but
2656    unfortunately the fact that we may need to create indirect jumps
2657    (through registers) requires that this optimization be performed
2658    before register allocation.
2659
2660    Hot and cold basic blocks are partitioned and put in separate
2661    sections of the .o file, to reduce paging and improve cache
2662    performance (hopefully).  This can result in bits of code from the
2663    same function being widely separated in the .o file.  However this
2664    is not obvious to the current bb structure.  Therefore we must take
2665    care to ensure that: 1). There are no fall_thru edges that cross
2666    between sections; 2). For those architectures which have "short"
2667    conditional branches, all conditional branches that attempt to
2668    cross between sections are converted to unconditional branches;
2669    and, 3). For those architectures which have "short" unconditional
2670    branches, all unconditional branches that attempt to cross between
2671    sections are converted to indirect jumps.
2672
2673    The code for fixing up fall_thru edges that cross between hot and
2674    cold basic blocks does so by creating new basic blocks containing
2675    unconditional branches to the appropriate label in the "other"
2676    section.  The new basic block is then put in the same (hot or cold)
2677    section as the original conditional branch, and the fall_thru edge
2678    is modified to fall into the new basic block instead.  By adding
2679    this level of indirection we end up with only unconditional branches
2680    crossing between hot and cold sections.
2681
2682    Conditional branches are dealt with by adding a level of indirection.
2683    A new basic block is added in the same (hot/cold) section as the
2684    conditional branch, and the conditional branch is retargeted to the
2685    new basic block.  The new basic block contains an unconditional branch
2686    to the original target of the conditional branch (in the other section).
2687
2688    Unconditional branches are dealt with by converting them into
2689    indirect jumps.  */
2690
2691 namespace {
2692
2693 const pass_data pass_data_partition_blocks =
2694 {
2695   RTL_PASS, /* type */
2696   "bbpart", /* name */
2697   OPTGROUP_NONE, /* optinfo_flags */
2698   TV_REORDER_BLOCKS, /* tv_id */
2699   PROP_cfglayout, /* properties_required */
2700   0, /* properties_provided */
2701   0, /* properties_destroyed */
2702   0, /* todo_flags_start */
2703   0, /* todo_flags_finish */
2704 };
2705
2706 class pass_partition_blocks : public rtl_opt_pass
2707 {
2708 public:
2709   pass_partition_blocks (gcc::context *ctxt)
2710     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2711   {}
2712
2713   /* opt_pass methods: */
2714   virtual bool gate (function *);
2715   virtual unsigned int execute (function *);
2716
2717 }; // class pass_partition_blocks
2718
2719 bool
2720 pass_partition_blocks::gate (function *fun)
2721 {
2722   /* The optimization to partition hot/cold basic blocks into separate
2723      sections of the .o file does not work well with linkonce or with
2724      user defined section attributes.  Don't call it if either case
2725      arises.  */
2726   return (flag_reorder_blocks_and_partition
2727           && optimize
2728           /* See gate_handle_reorder_blocks.  We should not partition if
2729              we are going to omit the reordering.  */
2730           && optimize_function_for_speed_p (fun)
2731           && !DECL_COMDAT_GROUP (current_function_decl)
2732           && !user_defined_section_attribute);
2733 }
2734
2735 unsigned
2736 pass_partition_blocks::execute (function *fun)
2737 {
2738   vec<edge> crossing_edges;
2739
2740   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2741     return 0;
2742
2743   df_set_flags (DF_DEFER_INSN_RESCAN);
2744
2745   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2746   if (!crossing_edges.exists ())
2747     return 0;
2748
2749   crtl->has_bb_partition = true;
2750
2751   /* Make sure the source of any crossing edge ends in a jump and the
2752      destination of any crossing edge has a label.  */
2753   add_labels_and_missing_jumps (crossing_edges);
2754
2755   /* Convert all crossing fall_thru edges to non-crossing fall
2756      thrus to unconditional jumps (that jump to the original fall
2757      through dest).  */
2758   fix_up_fall_thru_edges ();
2759
2760   /* If the architecture does not have conditional branches that can
2761      span all of memory, convert crossing conditional branches into
2762      crossing unconditional branches.  */
2763   if (!HAS_LONG_COND_BRANCH)
2764     fix_crossing_conditional_branches ();
2765
2766   /* If the architecture does not have unconditional branches that
2767      can span all of memory, convert crossing unconditional branches
2768      into indirect jumps.  Since adding an indirect jump also adds
2769      a new register usage, update the register usage information as
2770      well.  */
2771   if (!HAS_LONG_UNCOND_BRANCH)
2772     fix_crossing_unconditional_branches ();
2773
2774   update_crossing_jump_flags ();
2775
2776   /* Clear bb->aux fields that the above routines were using.  */
2777   clear_aux_for_blocks ();
2778
2779   crossing_edges.release ();
2780
2781   /* ??? FIXME: DF generates the bb info for a block immediately.
2782      And by immediately, I mean *during* creation of the block.
2783
2784         #0  df_bb_refs_collect
2785         #1  in df_bb_refs_record
2786         #2  in create_basic_block_structure
2787
2788      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2789      will *always* fail, because no edges can have been added to the
2790      block yet.  Which of course means we don't add the right
2791      artificial refs, which means we fail df_verify (much) later.
2792
2793      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2794      that we also shouldn't grab data from the new blocks those new
2795      insns are in either.  In this way one can create the block, link
2796      it up properly, and have everything Just Work later, when deferred
2797      insns are processed.
2798
2799      In the meantime, we have no other option but to throw away all
2800      of the DF data and recompute it all.  */
2801   if (fun->eh->lp_array)
2802     {
2803       df_finish_pass (true);
2804       df_scan_alloc (NULL);
2805       df_scan_blocks ();
2806       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2807          data.  We blindly generated all of them when creating the new
2808          landing pad.  Delete those assignments we don't use.  */
2809       df_set_flags (DF_LR_RUN_DCE);
2810       df_analyze ();
2811     }
2812
2813   return 0;
2814 }
2815
2816 } // anon namespace
2817
2818 rtl_opt_pass *
2819 make_pass_partition_blocks (gcc::context *ctxt)
2820 {
2821   return new pass_partition_blocks (ctxt);
2822 }