1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file contains the "reorder blocks" pass, which changes the control
21 flow of a function to encounter fewer branches; the "partition blocks"
22 pass, which divides the basic blocks into "hot" and "cold" partitions,
23 which are kept separate; and the "duplicate computed gotos" pass, which
24 duplicates blocks ending in an indirect jump.
26 There are two algorithms for "reorder blocks": the "simple" algorithm,
27 which just rearranges blocks, trying to minimize the number of executed
28 unconditional branches; and the "software trace cache" algorithm, which
29 also copies code, and in general tries a lot harder to have long linear
30 pieces of machine code executed. This algorithm is described next. */
32 /* This (greedy) algorithm constructs traces in several rounds.
33 The construction starts from "seeds". The seed for the first round
34 is the entry point of the function. When there are more than one seed,
35 the one with the lowest key in the heap is selected first (see bb_to_key).
36 Then the algorithm repeatedly adds the most probable successor to the end
37 of a trace. Finally it connects the traces.
39 There are two parameters: Branch Threshold and Exec Threshold.
40 If the probability of an edge to a successor of the current basic block is
41 lower than Branch Threshold or its frequency is lower than Exec Threshold,
42 then the successor will be the seed in one of the next rounds.
43 Each round has these parameters lower than the previous one.
44 The last round has to have these parameters set to zero so that the
45 remaining blocks are picked up.
47 The algorithm selects the most probable successor from all unvisited
48 successors and successors that have been added to this trace.
49 The other successors (that has not been "sent" to the next round) will be
50 other seeds for this round and the secondary traces will start from them.
51 If the successor has not been visited in this trace, it is added to the
52 trace (however, there is some heuristic for simple branches).
53 If the successor has been visited in this trace, a loop has been found.
54 If the loop has many iterations, the loop is rotated so that the source
55 block of the most probable edge going out of the loop is the last block
57 If the loop has few iterations and there is no edge from the last block of
58 the loop going out of the loop, the loop header is duplicated.
60 When connecting traces, the algorithm first checks whether there is an edge
61 from the last block of a trace to the first block of another trace.
62 When there are still some unconnected traces it checks whether there exists
63 a basic block BB such that BB is a successor of the last block of a trace
64 and BB is a predecessor of the first block of another trace. In this case,
65 BB is duplicated, added at the end of the first trace and the traces are
67 The rest of traces are simply connected so there will be a jump to the
68 beginning of the rest of traces.
70 The above description is for the full algorithm, which is used when the
71 function is optimized for speed. When the function is optimized for size,
72 in order to reduce long jumps and connect more fallthru edges, the
73 algorithm is modified as follows:
74 (1) Break long traces to short ones. A trace is broken at a block that has
75 multiple predecessors/ successors during trace discovery. When connecting
76 traces, only connect Trace n with Trace n + 1. This change reduces most
77 long jumps compared with the above algorithm.
78 (2) Ignore the edge probability and frequency for fallthru edges.
79 (3) Keep the original order of blocks when there is no chance to fall
80 through. We rely on the results of cfg_cleanup.
82 To implement the change for code size optimization, block's index is
83 selected as the key and all traces are found in one round.
87 "Software Trace Cache"
88 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
89 http://citeseer.nj.nec.com/15361.html
95 #include "coretypes.h"
107 #include "insn-config.h"
112 #include "emit-rtl.h"
118 #include "diagnostic-core.h"
119 #include "toplev.h" /* user_defined_section_attribute */
120 #include "tree-pass.h"
123 #include "cfgbuild.h"
124 #include "cfgcleanup.h"
125 #include "bb-reorder.h"
128 #include "fibonacci_heap.h"
130 /* The number of rounds. In most cases there will only be 4 rounds, but
131 when partitioning hot and cold basic blocks into separate sections of
132 the object file there will be an extra round. */
135 struct target_bb_reorder default_target_bb_reorder
;
136 #if SWITCHABLE_TARGET
137 struct target_bb_reorder
*this_target_bb_reorder
= &default_target_bb_reorder
;
140 #define uncond_jump_length \
141 (this_target_bb_reorder->x_uncond_jump_length)
143 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
144 static const int branch_threshold
[N_ROUNDS
] = {400, 200, 100, 0, 0};
146 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
147 static const int exec_threshold
[N_ROUNDS
] = {500, 200, 50, 0, 0};
149 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
150 block the edge destination is not duplicated while connecting traces. */
151 #define DUPLICATION_THRESHOLD 100
153 typedef fibonacci_heap
<long, basic_block_def
> bb_heap_t
;
154 typedef fibonacci_node
<long, basic_block_def
> bb_heap_node_t
;
156 /* Structure to hold needed information for each basic block. */
157 struct bbro_basic_block_data
159 /* Which trace is the bb start of (-1 means it is not a start of any). */
162 /* Which trace is the bb end of (-1 means it is not an end of any). */
165 /* Which trace is the bb in? */
168 /* Which trace was this bb visited in? */
171 /* Which heap is BB in (if any)? */
174 /* Which heap node is BB in (if any)? */
175 bb_heap_node_t
*node
;
178 /* The current size of the following dynamic array. */
179 static int array_size
;
181 /* The array which holds needed information for basic blocks. */
182 static bbro_basic_block_data
*bbd
;
184 /* To avoid frequent reallocation the size of arrays is greater than needed,
185 the number of elements is (not less than) 1.25 * size_wanted. */
186 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
188 /* Free the memory and set the pointer to NULL. */
189 #define FREE(P) (gcc_assert (P), free (P), P = 0)
191 /* Structure for holding information about a trace. */
194 /* First and last basic block of the trace. */
195 basic_block first
, last
;
197 /* The round of the STC creation which this trace was found in. */
200 /* The length (i.e. the number of basic blocks) of the trace. */
204 /* Maximum frequency and count of one of the entry blocks. */
205 static int max_entry_frequency
;
206 static gcov_type max_entry_count
;
208 /* Local function prototypes. */
209 static void find_traces (int *, struct trace
*);
210 static basic_block
rotate_loop (edge
, struct trace
*, int);
211 static void mark_bb_visited (basic_block
, int);
212 static void find_traces_1_round (int, int, gcov_type
, struct trace
*, int *,
213 int, bb_heap_t
**, int);
214 static basic_block
copy_bb (basic_block
, edge
, basic_block
, int);
215 static long bb_to_key (basic_block
);
216 static bool better_edge_p (const_basic_block
, const_edge
, int, int, int, int,
218 static bool connect_better_edge_p (const_edge
, bool, int, const_edge
,
220 static void connect_traces (int, struct trace
*);
221 static bool copy_bb_p (const_basic_block
, int);
222 static bool push_to_next_round_p (const_basic_block
, int, int, int, gcov_type
);
224 /* Return the trace number in which BB was visited. */
227 bb_visited_trace (const_basic_block bb
)
229 gcc_assert (bb
->index
< array_size
);
230 return bbd
[bb
->index
].visited
;
233 /* This function marks BB that it was visited in trace number TRACE. */
236 mark_bb_visited (basic_block bb
, int trace
)
238 bbd
[bb
->index
].visited
= trace
;
239 if (bbd
[bb
->index
].heap
)
241 bbd
[bb
->index
].heap
->delete_node (bbd
[bb
->index
].node
);
242 bbd
[bb
->index
].heap
= NULL
;
243 bbd
[bb
->index
].node
= NULL
;
247 /* Check to see if bb should be pushed into the next round of trace
248 collections or not. Reasons for pushing the block forward are 1).
249 If the block is cold, we are doing partitioning, and there will be
250 another round (cold partition blocks are not supposed to be
251 collected into traces until the very last round); or 2). There will
252 be another round, and the basic block is not "hot enough" for the
253 current round of trace collection. */
256 push_to_next_round_p (const_basic_block bb
, int round
, int number_of_rounds
,
257 int exec_th
, gcov_type count_th
)
259 bool there_exists_another_round
;
260 bool block_not_hot_enough
;
262 there_exists_another_round
= round
< number_of_rounds
- 1;
264 block_not_hot_enough
= (bb
->frequency
< exec_th
265 || bb
->count
< count_th
266 || probably_never_executed_bb_p (cfun
, bb
));
268 if (there_exists_another_round
269 && block_not_hot_enough
)
275 /* Find the traces for Software Trace Cache. Chain each trace through
276 RBI()->next. Store the number of traces to N_TRACES and description of
280 find_traces (int *n_traces
, struct trace
*traces
)
283 int number_of_rounds
;
286 bb_heap_t
*heap
= new bb_heap_t (LONG_MIN
);
288 /* Add one extra round of trace collection when partitioning hot/cold
289 basic blocks into separate sections. The last round is for all the
290 cold blocks (and ONLY the cold blocks). */
292 number_of_rounds
= N_ROUNDS
- 1;
294 /* Insert entry points of function into heap. */
295 max_entry_frequency
= 0;
297 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR_FOR_FN (cfun
)->succs
)
299 bbd
[e
->dest
->index
].heap
= heap
;
300 bbd
[e
->dest
->index
].node
= heap
->insert (bb_to_key (e
->dest
), e
->dest
);
301 if (e
->dest
->frequency
> max_entry_frequency
)
302 max_entry_frequency
= e
->dest
->frequency
;
303 if (e
->dest
->count
> max_entry_count
)
304 max_entry_count
= e
->dest
->count
;
307 /* Find the traces. */
308 for (i
= 0; i
< number_of_rounds
; i
++)
310 gcov_type count_threshold
;
313 fprintf (dump_file
, "STC - round %d\n", i
+ 1);
315 if (max_entry_count
< INT_MAX
/ 1000)
316 count_threshold
= max_entry_count
* exec_threshold
[i
] / 1000;
318 count_threshold
= max_entry_count
/ 1000 * exec_threshold
[i
];
320 find_traces_1_round (REG_BR_PROB_BASE
* branch_threshold
[i
] / 1000,
321 max_entry_frequency
* exec_threshold
[i
] / 1000,
322 count_threshold
, traces
, n_traces
, i
, &heap
,
329 for (i
= 0; i
< *n_traces
; i
++)
332 fprintf (dump_file
, "Trace %d (round %d): ", i
+ 1,
333 traces
[i
].round
+ 1);
334 for (bb
= traces
[i
].first
;
335 bb
!= traces
[i
].last
;
336 bb
= (basic_block
) bb
->aux
)
337 fprintf (dump_file
, "%d [%d] ", bb
->index
, bb
->frequency
);
338 fprintf (dump_file
, "%d [%d]\n", bb
->index
, bb
->frequency
);
344 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
345 (with sequential number TRACE_N). */
348 rotate_loop (edge back_edge
, struct trace
*trace
, int trace_n
)
352 /* Information about the best end (end after rotation) of the loop. */
353 basic_block best_bb
= NULL
;
354 edge best_edge
= NULL
;
356 gcov_type best_count
= -1;
357 /* The best edge is preferred when its destination is not visited yet
358 or is a start block of some trace. */
359 bool is_preferred
= false;
361 /* Find the most frequent edge that goes out from current trace. */
362 bb
= back_edge
->dest
;
368 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
369 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
370 && bb_visited_trace (e
->dest
) != trace_n
371 && (e
->flags
& EDGE_CAN_FALLTHRU
)
372 && !(e
->flags
& EDGE_COMPLEX
))
376 /* The best edge is preferred. */
377 if (!bb_visited_trace (e
->dest
)
378 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
380 /* The current edge E is also preferred. */
381 int freq
= EDGE_FREQUENCY (e
);
382 if (freq
> best_freq
|| e
->count
> best_count
)
385 best_count
= e
->count
;
393 if (!bb_visited_trace (e
->dest
)
394 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
396 /* The current edge E is preferred. */
398 best_freq
= EDGE_FREQUENCY (e
);
399 best_count
= e
->count
;
405 int freq
= EDGE_FREQUENCY (e
);
406 if (!best_edge
|| freq
> best_freq
|| e
->count
> best_count
)
409 best_count
= e
->count
;
416 bb
= (basic_block
) bb
->aux
;
418 while (bb
!= back_edge
->dest
);
422 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
424 if (back_edge
->dest
== trace
->first
)
426 trace
->first
= (basic_block
) best_bb
->aux
;
432 for (prev_bb
= trace
->first
;
433 prev_bb
->aux
!= back_edge
->dest
;
434 prev_bb
= (basic_block
) prev_bb
->aux
)
436 prev_bb
->aux
= best_bb
->aux
;
438 /* Try to get rid of uncond jump to cond jump. */
439 if (single_succ_p (prev_bb
))
441 basic_block header
= single_succ (prev_bb
);
443 /* Duplicate HEADER if it is a small block containing cond jump
445 if (any_condjump_p (BB_END (header
)) && copy_bb_p (header
, 0)
446 && !CROSSING_JUMP_P (BB_END (header
)))
447 copy_bb (header
, single_succ_edge (prev_bb
), prev_bb
, trace_n
);
453 /* We have not found suitable loop tail so do no rotation. */
454 best_bb
= back_edge
->src
;
460 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
461 not include basic blocks whose probability is lower than BRANCH_TH or whose
462 frequency is lower than EXEC_TH into traces (or whose count is lower than
463 COUNT_TH). Store the new traces into TRACES and modify the number of
464 traces *N_TRACES. Set the round (which the trace belongs to) to ROUND.
465 The function expects starting basic blocks to be in *HEAP and will delete
466 *HEAP and store starting points for the next round into new *HEAP. */
469 find_traces_1_round (int branch_th
, int exec_th
, gcov_type count_th
,
470 struct trace
*traces
, int *n_traces
, int round
,
471 bb_heap_t
**heap
, int number_of_rounds
)
473 /* Heap for discarded basic blocks which are possible starting points for
475 bb_heap_t
*new_heap
= new bb_heap_t (LONG_MIN
);
476 bool for_size
= optimize_function_for_size_p (cfun
);
478 while (!(*heap
)->empty ())
486 bb
= (*heap
)->extract_min ();
487 bbd
[bb
->index
].heap
= NULL
;
488 bbd
[bb
->index
].node
= NULL
;
491 fprintf (dump_file
, "Getting bb %d\n", bb
->index
);
493 /* If the BB's frequency is too low, send BB to the next round. When
494 partitioning hot/cold blocks into separate sections, make sure all
495 the cold blocks (and ONLY the cold blocks) go into the (extra) final
496 round. When optimizing for size, do not push to next round. */
499 && push_to_next_round_p (bb
, round
, number_of_rounds
, exec_th
,
502 int key
= bb_to_key (bb
);
503 bbd
[bb
->index
].heap
= new_heap
;
504 bbd
[bb
->index
].node
= new_heap
->insert (key
, bb
);
508 " Possible start point of next round: %d (key: %d)\n",
513 trace
= traces
+ *n_traces
;
515 trace
->round
= round
;
517 bbd
[bb
->index
].in_trace
= *n_traces
;
525 /* The probability and frequency of the best edge. */
526 int best_prob
= INT_MIN
/ 2;
527 int best_freq
= INT_MIN
/ 2;
530 mark_bb_visited (bb
, *n_traces
);
534 fprintf (dump_file
, "Basic block %d was visited in trace %d\n",
535 bb
->index
, *n_traces
- 1);
537 ends_in_call
= block_ends_with_call_p (bb
);
539 /* Select the successor that will be placed after BB. */
540 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
542 gcc_assert (!(e
->flags
& EDGE_FAKE
));
544 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
547 if (bb_visited_trace (e
->dest
)
548 && bb_visited_trace (e
->dest
) != *n_traces
)
551 if (BB_PARTITION (e
->dest
) != BB_PARTITION (bb
))
554 prob
= e
->probability
;
555 freq
= e
->dest
->frequency
;
557 /* The only sensible preference for a call instruction is the
558 fallthru edge. Don't bother selecting anything else. */
561 if (e
->flags
& EDGE_CAN_FALLTHRU
)
570 /* Edge that cannot be fallthru or improbable or infrequent
571 successor (i.e. it is unsuitable successor). When optimizing
572 for size, ignore the probability and frequency. */
573 if (!(e
->flags
& EDGE_CAN_FALLTHRU
) || (e
->flags
& EDGE_COMPLEX
)
574 || ((prob
< branch_th
|| EDGE_FREQUENCY (e
) < exec_th
575 || e
->count
< count_th
) && (!for_size
)))
578 /* If partitioning hot/cold basic blocks, don't consider edges
579 that cross section boundaries. */
581 if (better_edge_p (bb
, e
, prob
, freq
, best_prob
, best_freq
,
590 /* If the best destination has multiple predecessors, and can be
591 duplicated cheaper than a jump, don't allow it to be added
592 to a trace. We'll duplicate it when connecting traces. */
593 if (best_edge
&& EDGE_COUNT (best_edge
->dest
->preds
) >= 2
594 && copy_bb_p (best_edge
->dest
, 0))
597 /* If the best destination has multiple successors or predecessors,
598 don't allow it to be added when optimizing for size. This makes
599 sure predecessors with smaller index are handled before the best
600 destinarion. It breaks long trace and reduces long jumps.
602 Take if-then-else as an example.
608 If we do not remove the best edge B->D/C->D, the final order might
609 be A B D ... C. C is at the end of the program. If D's successors
610 and D are complicated, might need long jumps for A->C and C->D.
611 Similar issue for order: A C D ... B.
613 After removing the best edge, the final result will be ABCD/ ACBD.
614 It does not add jump compared with the previous order. But it
615 reduces the possibility of long jumps. */
616 if (best_edge
&& for_size
617 && (EDGE_COUNT (best_edge
->dest
->succs
) > 1
618 || EDGE_COUNT (best_edge
->dest
->preds
) > 1))
621 /* Add all non-selected successors to the heaps. */
622 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
625 || e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
626 || bb_visited_trace (e
->dest
))
629 key
= bb_to_key (e
->dest
);
631 if (bbd
[e
->dest
->index
].heap
)
633 /* E->DEST is already in some heap. */
634 if (key
!= bbd
[e
->dest
->index
].node
->get_key ())
639 "Changing key for bb %d from %ld to %ld.\n",
641 (long) bbd
[e
->dest
->index
].node
->get_key (),
644 bbd
[e
->dest
->index
].heap
->replace_key
645 (bbd
[e
->dest
->index
].node
, key
);
650 bb_heap_t
*which_heap
= *heap
;
652 prob
= e
->probability
;
653 freq
= EDGE_FREQUENCY (e
);
655 if (!(e
->flags
& EDGE_CAN_FALLTHRU
)
656 || (e
->flags
& EDGE_COMPLEX
)
657 || prob
< branch_th
|| freq
< exec_th
658 || e
->count
< count_th
)
660 /* When partitioning hot/cold basic blocks, make sure
661 the cold blocks (and only the cold blocks) all get
662 pushed to the last round of trace collection. When
663 optimizing for size, do not push to next round. */
665 if (!for_size
&& push_to_next_round_p (e
->dest
, round
,
668 which_heap
= new_heap
;
671 bbd
[e
->dest
->index
].heap
= which_heap
;
672 bbd
[e
->dest
->index
].node
= which_heap
->insert (key
, e
->dest
);
677 " Possible start of %s round: %d (key: %ld)\n",
678 (which_heap
== new_heap
) ? "next" : "this",
679 e
->dest
->index
, (long) key
);
685 if (best_edge
) /* Suitable successor was found. */
687 if (bb_visited_trace (best_edge
->dest
) == *n_traces
)
689 /* We do nothing with one basic block loops. */
690 if (best_edge
->dest
!= bb
)
692 if (EDGE_FREQUENCY (best_edge
)
693 > 4 * best_edge
->dest
->frequency
/ 5)
695 /* The loop has at least 4 iterations. If the loop
696 header is not the first block of the function
697 we can rotate the loop. */
700 != ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
)
705 "Rotating loop %d - %d\n",
706 best_edge
->dest
->index
, bb
->index
);
708 bb
->aux
= best_edge
->dest
;
709 bbd
[best_edge
->dest
->index
].in_trace
=
711 bb
= rotate_loop (best_edge
, trace
, *n_traces
);
716 /* The loop has less than 4 iterations. */
718 if (single_succ_p (bb
)
719 && copy_bb_p (best_edge
->dest
,
720 optimize_edge_for_speed_p
723 bb
= copy_bb (best_edge
->dest
, best_edge
, bb
,
730 /* Terminate the trace. */
735 /* Check for a situation
744 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
745 >= EDGE_FREQUENCY (AC).
746 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
747 Best ordering is then A B C.
749 When optimizing for size, A B C is always the best order.
751 This situation is created for example by:
758 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
760 && (e
->flags
& EDGE_CAN_FALLTHRU
)
761 && !(e
->flags
& EDGE_COMPLEX
)
762 && !bb_visited_trace (e
->dest
)
763 && single_pred_p (e
->dest
)
764 && !(e
->flags
& EDGE_CROSSING
)
765 && single_succ_p (e
->dest
)
766 && (single_succ_edge (e
->dest
)->flags
768 && !(single_succ_edge (e
->dest
)->flags
& EDGE_COMPLEX
)
769 && single_succ (e
->dest
) == best_edge
->dest
770 && (2 * e
->dest
->frequency
>= EDGE_FREQUENCY (best_edge
)
775 fprintf (dump_file
, "Selecting BB %d\n",
776 best_edge
->dest
->index
);
780 bb
->aux
= best_edge
->dest
;
781 bbd
[best_edge
->dest
->index
].in_trace
= (*n_traces
) - 1;
782 bb
= best_edge
->dest
;
788 bbd
[trace
->first
->index
].start_of_trace
= *n_traces
- 1;
789 bbd
[trace
->last
->index
].end_of_trace
= *n_traces
- 1;
791 /* The trace is terminated so we have to recount the keys in heap
792 (some block can have a lower key because now one of its predecessors
793 is an end of the trace). */
794 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
796 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
797 || bb_visited_trace (e
->dest
))
800 if (bbd
[e
->dest
->index
].heap
)
802 key
= bb_to_key (e
->dest
);
803 if (key
!= bbd
[e
->dest
->index
].node
->get_key ())
808 "Changing key for bb %d from %ld to %ld.\n",
810 (long) bbd
[e
->dest
->index
].node
->get_key (), key
);
812 bbd
[e
->dest
->index
].heap
->replace_key
813 (bbd
[e
->dest
->index
].node
, key
);
821 /* "Return" the new heap. */
825 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
826 it to trace after BB, mark OLD_BB visited and update pass' data structures
827 (TRACE is a number of trace which OLD_BB is duplicated to). */
830 copy_bb (basic_block old_bb
, edge e
, basic_block bb
, int trace
)
834 new_bb
= duplicate_block (old_bb
, e
, bb
);
835 BB_COPY_PARTITION (new_bb
, old_bb
);
837 gcc_assert (e
->dest
== new_bb
);
841 "Duplicated bb %d (created bb %d)\n",
842 old_bb
->index
, new_bb
->index
);
844 if (new_bb
->index
>= array_size
845 || last_basic_block_for_fn (cfun
) > array_size
)
850 new_size
= MAX (last_basic_block_for_fn (cfun
), new_bb
->index
+ 1);
851 new_size
= GET_ARRAY_SIZE (new_size
);
852 bbd
= XRESIZEVEC (bbro_basic_block_data
, bbd
, new_size
);
853 for (i
= array_size
; i
< new_size
; i
++)
855 bbd
[i
].start_of_trace
= -1;
856 bbd
[i
].end_of_trace
= -1;
857 bbd
[i
].in_trace
= -1;
862 array_size
= new_size
;
867 "Growing the dynamic array to %d elements.\n",
872 gcc_assert (!bb_visited_trace (e
->dest
));
873 mark_bb_visited (new_bb
, trace
);
874 new_bb
->aux
= bb
->aux
;
877 bbd
[new_bb
->index
].in_trace
= trace
;
882 /* Compute and return the key (for the heap) of the basic block BB. */
885 bb_to_key (basic_block bb
)
891 /* Use index as key to align with its original order. */
892 if (optimize_function_for_size_p (cfun
))
895 /* Do not start in probably never executed blocks. */
897 if (BB_PARTITION (bb
) == BB_COLD_PARTITION
898 || probably_never_executed_bb_p (cfun
, bb
))
901 /* Prefer blocks whose predecessor is an end of some trace
902 or whose predecessor edge is EDGE_DFS_BACK. */
903 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
905 if ((e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
906 && bbd
[e
->src
->index
].end_of_trace
>= 0)
907 || (e
->flags
& EDGE_DFS_BACK
))
909 int edge_freq
= EDGE_FREQUENCY (e
);
911 if (edge_freq
> priority
)
912 priority
= edge_freq
;
917 /* The block with priority should have significantly lower key. */
918 return -(100 * BB_FREQ_MAX
+ 100 * priority
+ bb
->frequency
);
920 return -bb
->frequency
;
923 /* Return true when the edge E from basic block BB is better than the temporary
924 best edge (details are in function). The probability of edge E is PROB. The
925 frequency of the successor is FREQ. The current best probability is
926 BEST_PROB, the best frequency is BEST_FREQ.
927 The edge is considered to be equivalent when PROB does not differ much from
928 BEST_PROB; similarly for frequency. */
931 better_edge_p (const_basic_block bb
, const_edge e
, int prob
, int freq
,
932 int best_prob
, int best_freq
, const_edge cur_best_edge
)
936 /* The BEST_* values do not have to be best, but can be a bit smaller than
938 int diff_prob
= best_prob
/ 10;
939 int diff_freq
= best_freq
/ 10;
941 /* The smaller one is better to keep the original order. */
942 if (optimize_function_for_size_p (cfun
))
943 return !cur_best_edge
944 || cur_best_edge
->dest
->index
> e
->dest
->index
;
946 if (prob
> best_prob
+ diff_prob
)
947 /* The edge has higher probability than the temporary best edge. */
948 is_better_edge
= true;
949 else if (prob
< best_prob
- diff_prob
)
950 /* The edge has lower probability than the temporary best edge. */
951 is_better_edge
= false;
952 else if (freq
< best_freq
- diff_freq
)
953 /* The edge and the temporary best edge have almost equivalent
954 probabilities. The higher frequency of a successor now means
955 that there is another edge going into that successor.
956 This successor has lower frequency so it is better. */
957 is_better_edge
= true;
958 else if (freq
> best_freq
+ diff_freq
)
959 /* This successor has higher frequency so it is worse. */
960 is_better_edge
= false;
961 else if (e
->dest
->prev_bb
== bb
)
962 /* The edges have equivalent probabilities and the successors
963 have equivalent frequencies. Select the previous successor. */
964 is_better_edge
= true;
966 is_better_edge
= false;
968 /* If we are doing hot/cold partitioning, make sure that we always favor
969 non-crossing edges over crossing edges. */
972 && flag_reorder_blocks_and_partition
974 && (cur_best_edge
->flags
& EDGE_CROSSING
)
975 && !(e
->flags
& EDGE_CROSSING
))
976 is_better_edge
= true;
978 return is_better_edge
;
981 /* Return true when the edge E is better than the temporary best edge
982 CUR_BEST_EDGE. If SRC_INDEX_P is true, the function compares the src bb of
983 E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
984 BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
985 TRACES record the information about traces.
986 When optimizing for size, the edge with smaller index is better.
987 When optimizing for speed, the edge with bigger probability or longer trace
991 connect_better_edge_p (const_edge e
, bool src_index_p
, int best_len
,
992 const_edge cur_best_edge
, struct trace
*traces
)
1001 if (optimize_function_for_size_p (cfun
))
1003 e_index
= src_index_p
? e
->src
->index
: e
->dest
->index
;
1004 b_index
= src_index_p
? cur_best_edge
->src
->index
1005 : cur_best_edge
->dest
->index
;
1006 /* The smaller one is better to keep the original order. */
1007 return b_index
> e_index
;
1012 e_index
= e
->src
->index
;
1014 if (e
->probability
> cur_best_edge
->probability
)
1015 /* The edge has higher probability than the temporary best edge. */
1016 is_better_edge
= true;
1017 else if (e
->probability
< cur_best_edge
->probability
)
1018 /* The edge has lower probability than the temporary best edge. */
1019 is_better_edge
= false;
1020 else if (traces
[bbd
[e_index
].end_of_trace
].length
> best_len
)
1021 /* The edge and the temporary best edge have equivalent probabilities.
1022 The edge with longer trace is better. */
1023 is_better_edge
= true;
1025 is_better_edge
= false;
1029 e_index
= e
->dest
->index
;
1031 if (e
->probability
> cur_best_edge
->probability
)
1032 /* The edge has higher probability than the temporary best edge. */
1033 is_better_edge
= true;
1034 else if (e
->probability
< cur_best_edge
->probability
)
1035 /* The edge has lower probability than the temporary best edge. */
1036 is_better_edge
= false;
1037 else if (traces
[bbd
[e_index
].start_of_trace
].length
> best_len
)
1038 /* The edge and the temporary best edge have equivalent probabilities.
1039 The edge with longer trace is better. */
1040 is_better_edge
= true;
1042 is_better_edge
= false;
1045 return is_better_edge
;
1048 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
1051 connect_traces (int n_traces
, struct trace
*traces
)
1058 int current_partition
;
1060 gcov_type count_threshold
;
1061 bool for_size
= optimize_function_for_size_p (cfun
);
1063 freq_threshold
= max_entry_frequency
* DUPLICATION_THRESHOLD
/ 1000;
1064 if (max_entry_count
< INT_MAX
/ 1000)
1065 count_threshold
= max_entry_count
* DUPLICATION_THRESHOLD
/ 1000;
1067 count_threshold
= max_entry_count
/ 1000 * DUPLICATION_THRESHOLD
;
1069 connected
= XCNEWVEC (bool, n_traces
);
1072 current_partition
= BB_PARTITION (traces
[0].first
);
1075 if (crtl
->has_bb_partition
)
1076 for (i
= 0; i
< n_traces
&& !two_passes
; i
++)
1077 if (BB_PARTITION (traces
[0].first
)
1078 != BB_PARTITION (traces
[i
].first
))
1081 for (i
= 0; i
< n_traces
|| (two_passes
&& current_pass
== 1) ; i
++)
1090 gcc_assert (two_passes
&& current_pass
== 1);
1094 if (current_partition
== BB_HOT_PARTITION
)
1095 current_partition
= BB_COLD_PARTITION
;
1097 current_partition
= BB_HOT_PARTITION
;
1104 && BB_PARTITION (traces
[t
].first
) != current_partition
)
1107 connected
[t
] = true;
1109 /* Find the predecessor traces. */
1110 for (t2
= t
; t2
> 0;)
1115 FOR_EACH_EDGE (e
, ei
, traces
[t2
].first
->preds
)
1117 int si
= e
->src
->index
;
1119 if (e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
1120 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1121 && !(e
->flags
& EDGE_COMPLEX
)
1122 && bbd
[si
].end_of_trace
>= 0
1123 && !connected
[bbd
[si
].end_of_trace
]
1124 && (BB_PARTITION (e
->src
) == current_partition
)
1125 && connect_better_edge_p (e
, true, best_len
, best
, traces
))
1128 best_len
= traces
[bbd
[si
].end_of_trace
].length
;
1133 best
->src
->aux
= best
->dest
;
1134 t2
= bbd
[best
->src
->index
].end_of_trace
;
1135 connected
[t2
] = true;
1139 fprintf (dump_file
, "Connection: %d %d\n",
1140 best
->src
->index
, best
->dest
->index
);
1147 if (last_trace
>= 0)
1148 traces
[last_trace
].last
->aux
= traces
[t2
].first
;
1151 /* Find the successor traces. */
1154 /* Find the continuation of the chain. */
1158 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1160 int di
= e
->dest
->index
;
1162 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
1163 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1164 && !(e
->flags
& EDGE_COMPLEX
)
1165 && bbd
[di
].start_of_trace
>= 0
1166 && !connected
[bbd
[di
].start_of_trace
]
1167 && (BB_PARTITION (e
->dest
) == current_partition
)
1168 && connect_better_edge_p (e
, false, best_len
, best
, traces
))
1171 best_len
= traces
[bbd
[di
].start_of_trace
].length
;
1178 /* Stop finding the successor traces. */
1181 /* It is OK to connect block n with block n + 1 or a block
1182 before n. For others, only connect to the loop header. */
1183 if (best
->dest
->index
> (traces
[t
].last
->index
+ 1))
1185 int count
= EDGE_COUNT (best
->dest
->preds
);
1187 FOR_EACH_EDGE (e
, ei
, best
->dest
->preds
)
1188 if (e
->flags
& EDGE_DFS_BACK
)
1191 /* If dest has multiple predecessors, skip it. We expect
1192 that one predecessor with smaller index connects with it
1198 /* Only connect Trace n with Trace n + 1. It is conservative
1199 to keep the order as close as possible to the original order.
1200 It also helps to reduce long jumps. */
1201 if (last_trace
!= bbd
[best
->dest
->index
].start_of_trace
- 1)
1205 fprintf (dump_file
, "Connection: %d %d\n",
1206 best
->src
->index
, best
->dest
->index
);
1208 t
= bbd
[best
->dest
->index
].start_of_trace
;
1209 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1210 connected
[t
] = true;
1217 fprintf (dump_file
, "Connection: %d %d\n",
1218 best
->src
->index
, best
->dest
->index
);
1220 t
= bbd
[best
->dest
->index
].start_of_trace
;
1221 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1222 connected
[t
] = true;
1227 /* Try to connect the traces by duplication of 1 block. */
1229 basic_block next_bb
= NULL
;
1230 bool try_copy
= false;
1232 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1233 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
1234 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1235 && !(e
->flags
& EDGE_COMPLEX
)
1236 && (!best
|| e
->probability
> best
->probability
))
1242 /* If the destination is a start of a trace which is only
1243 one block long, then no need to search the successor
1244 blocks of the trace. Accept it. */
1245 if (bbd
[e
->dest
->index
].start_of_trace
>= 0
1246 && traces
[bbd
[e
->dest
->index
].start_of_trace
].length
1254 FOR_EACH_EDGE (e2
, ei
, e
->dest
->succs
)
1256 int di
= e2
->dest
->index
;
1258 if (e2
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
1259 || ((e2
->flags
& EDGE_CAN_FALLTHRU
)
1260 && !(e2
->flags
& EDGE_COMPLEX
)
1261 && bbd
[di
].start_of_trace
>= 0
1262 && !connected
[bbd
[di
].start_of_trace
]
1263 && BB_PARTITION (e2
->dest
) == current_partition
1264 && EDGE_FREQUENCY (e2
) >= freq_threshold
1265 && e2
->count
>= count_threshold
1267 || e2
->probability
> best2
->probability
1268 || (e2
->probability
== best2
->probability
1269 && traces
[bbd
[di
].start_of_trace
].length
1274 if (e2
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1275 best2_len
= traces
[bbd
[di
].start_of_trace
].length
;
1277 best2_len
= INT_MAX
;
1284 if (crtl
->has_bb_partition
)
1287 /* Copy tiny blocks always; copy larger blocks only when the
1288 edge is traversed frequently enough. */
1290 && copy_bb_p (best
->dest
,
1291 optimize_edge_for_speed_p (best
)
1292 && EDGE_FREQUENCY (best
) >= freq_threshold
1293 && best
->count
>= count_threshold
))
1299 fprintf (dump_file
, "Connection: %d %d ",
1300 traces
[t
].last
->index
, best
->dest
->index
);
1302 fputc ('\n', dump_file
);
1303 else if (next_bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1304 fprintf (dump_file
, "exit\n");
1306 fprintf (dump_file
, "%d\n", next_bb
->index
);
1309 new_bb
= copy_bb (best
->dest
, best
, traces
[t
].last
, t
);
1310 traces
[t
].last
= new_bb
;
1311 if (next_bb
&& next_bb
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1313 t
= bbd
[next_bb
->index
].start_of_trace
;
1314 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1315 connected
[t
] = true;
1319 break; /* Stop finding the successor traces. */
1322 break; /* Stop finding the successor traces. */
1331 fprintf (dump_file
, "Final order:\n");
1332 for (bb
= traces
[0].first
; bb
; bb
= (basic_block
) bb
->aux
)
1333 fprintf (dump_file
, "%d ", bb
->index
);
1334 fprintf (dump_file
, "\n");
1341 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1342 when code size is allowed to grow by duplication. */
1345 copy_bb_p (const_basic_block bb
, int code_may_grow
)
1348 int max_size
= uncond_jump_length
;
1353 if (EDGE_COUNT (bb
->preds
) < 2)
1355 if (!can_duplicate_block_p (bb
))
1358 /* Avoid duplicating blocks which have many successors (PR/13430). */
1359 if (EDGE_COUNT (bb
->succs
) > 8)
1362 if (code_may_grow
&& optimize_bb_for_speed_p (bb
))
1363 max_size
*= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS
);
1365 FOR_BB_INSNS (bb
, insn
)
1368 size
+= get_attr_min_length (insn
);
1371 if (size
<= max_size
)
1377 "Block %d can't be copied because its size = %d.\n",
1384 /* Return the length of unconditional jump instruction. */
1387 get_uncond_jump_length (void)
1392 rtx_code_label
*label
= emit_label (gen_label_rtx ());
1393 rtx_insn
*jump
= emit_jump_insn (targetm
.gen_jump (label
));
1394 length
= get_attr_min_length (jump
);
1400 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1401 Duplicate the landing pad and split the edges so that no EH edge
1402 crosses partitions. */
1405 fix_up_crossing_landing_pad (eh_landing_pad old_lp
, basic_block old_bb
)
1407 eh_landing_pad new_lp
;
1408 basic_block new_bb
, last_bb
, post_bb
;
1410 unsigned new_partition
;
1414 /* Generate the new landing-pad structure. */
1415 new_lp
= gen_eh_landing_pad (old_lp
->region
);
1416 new_lp
->post_landing_pad
= old_lp
->post_landing_pad
;
1417 new_lp
->landing_pad
= gen_label_rtx ();
1418 LABEL_PRESERVE_P (new_lp
->landing_pad
) = 1;
1420 /* Put appropriate instructions in new bb. */
1421 rtx_code_label
*new_label
= emit_label (new_lp
->landing_pad
);
1423 expand_dw2_landing_pad_for_region (old_lp
->region
);
1425 post_bb
= BLOCK_FOR_INSN (old_lp
->landing_pad
);
1426 post_bb
= single_succ (post_bb
);
1427 rtx_code_label
*post_label
= block_label (post_bb
);
1428 jump
= emit_jump_insn (targetm
.gen_jump (post_label
));
1429 JUMP_LABEL (jump
) = post_label
;
1431 /* Create new basic block to be dest for lp. */
1432 last_bb
= EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
;
1433 new_bb
= create_basic_block (new_label
, jump
, last_bb
);
1434 new_bb
->aux
= last_bb
->aux
;
1435 last_bb
->aux
= new_bb
;
1437 emit_barrier_after_bb (new_bb
);
1439 make_edge (new_bb
, post_bb
, 0);
1441 /* Make sure new bb is in the other partition. */
1442 new_partition
= BB_PARTITION (old_bb
);
1443 new_partition
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1444 BB_SET_PARTITION (new_bb
, new_partition
);
1446 /* Fix up the edges. */
1447 for (ei
= ei_start (old_bb
->preds
); (e
= ei_safe_edge (ei
)) != NULL
; )
1448 if (BB_PARTITION (e
->src
) == new_partition
)
1450 rtx_insn
*insn
= BB_END (e
->src
);
1451 rtx note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
1453 gcc_assert (note
!= NULL
);
1454 gcc_checking_assert (INTVAL (XEXP (note
, 0)) == old_lp
->index
);
1455 XEXP (note
, 0) = GEN_INT (new_lp
->index
);
1457 /* Adjust the edge to the new destination. */
1458 redirect_edge_succ (e
, new_bb
);
1465 /* Ensure that all hot bbs are included in a hot path through the
1466 procedure. This is done by calling this function twice, once
1467 with WALK_UP true (to look for paths from the entry to hot bbs) and
1468 once with WALK_UP false (to look for paths from hot bbs to the exit).
1469 Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1470 to BBS_IN_HOT_PARTITION. */
1473 sanitize_hot_paths (bool walk_up
, unsigned int cold_bb_count
,
1474 vec
<basic_block
> *bbs_in_hot_partition
)
1476 /* Callers check this. */
1477 gcc_checking_assert (cold_bb_count
);
1479 /* Keep examining hot bbs while we still have some left to check
1480 and there are remaining cold bbs. */
1481 vec
<basic_block
> hot_bbs_to_check
= bbs_in_hot_partition
->copy ();
1482 while (! hot_bbs_to_check
.is_empty ()
1485 basic_block bb
= hot_bbs_to_check
.pop ();
1486 vec
<edge
, va_gc
> *edges
= walk_up
? bb
->preds
: bb
->succs
;
1489 int highest_probability
= 0;
1490 int highest_freq
= 0;
1491 gcov_type highest_count
= 0;
1494 /* Walk the preds/succs and check if there is at least one already
1495 marked hot. Keep track of the most frequent pred/succ so that we
1496 can mark it hot if we don't find one. */
1497 FOR_EACH_EDGE (e
, ei
, edges
)
1499 basic_block reach_bb
= walk_up
? e
->src
: e
->dest
;
1501 if (e
->flags
& EDGE_DFS_BACK
)
1504 if (BB_PARTITION (reach_bb
) != BB_COLD_PARTITION
)
1509 /* The following loop will look for the hottest edge via
1510 the edge count, if it is non-zero, then fallback to the edge
1511 frequency and finally the edge probability. */
1512 if (e
->count
> highest_count
)
1513 highest_count
= e
->count
;
1514 int edge_freq
= EDGE_FREQUENCY (e
);
1515 if (edge_freq
> highest_freq
)
1516 highest_freq
= edge_freq
;
1517 if (e
->probability
> highest_probability
)
1518 highest_probability
= e
->probability
;
1521 /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1522 block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1523 then the most frequent pred (or succ) needs to be adjusted. In the
1524 case where multiple preds/succs have the same frequency (e.g. a
1525 50-50 branch), then both will be adjusted. */
1529 FOR_EACH_EDGE (e
, ei
, edges
)
1531 if (e
->flags
& EDGE_DFS_BACK
)
1533 /* Select the hottest edge using the edge count, if it is non-zero,
1534 then fallback to the edge frequency and finally the edge
1538 if (e
->count
< highest_count
)
1541 else if (highest_freq
)
1543 if (EDGE_FREQUENCY (e
) < highest_freq
)
1546 else if (e
->probability
< highest_probability
)
1549 basic_block reach_bb
= walk_up
? e
->src
: e
->dest
;
1551 /* We have a hot bb with an immediate dominator that is cold.
1552 The dominator needs to be re-marked hot. */
1553 BB_SET_PARTITION (reach_bb
, BB_HOT_PARTITION
);
1556 /* Now we need to examine newly-hot reach_bb to see if it is also
1557 dominated by a cold bb. */
1558 bbs_in_hot_partition
->safe_push (reach_bb
);
1559 hot_bbs_to_check
.safe_push (reach_bb
);
1563 return cold_bb_count
;
1567 /* Find the basic blocks that are rarely executed and need to be moved to
1568 a separate section of the .o file (to cut down on paging and improve
1569 cache locality). Return a vector of all edges that cross. */
1572 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1574 vec
<edge
> crossing_edges
= vNULL
;
1578 unsigned int cold_bb_count
= 0;
1579 auto_vec
<basic_block
> bbs_in_hot_partition
;
1581 /* Mark which partition (hot/cold) each basic block belongs in. */
1582 FOR_EACH_BB_FN (bb
, cfun
)
1584 bool cold_bb
= false;
1586 if (probably_never_executed_bb_p (cfun
, bb
))
1588 /* Handle profile insanities created by upstream optimizations
1589 by also checking the incoming edge weights. If there is a non-cold
1590 incoming edge, conservatively prevent this block from being split
1591 into the cold section. */
1593 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1594 if (!probably_never_executed_edge_p (cfun
, e
))
1602 BB_SET_PARTITION (bb
, BB_COLD_PARTITION
);
1607 BB_SET_PARTITION (bb
, BB_HOT_PARTITION
);
1608 bbs_in_hot_partition
.safe_push (bb
);
1612 /* Ensure that hot bbs are included along a hot path from the entry to exit.
1613 Several different possibilities may include cold bbs along all paths
1614 to/from a hot bb. One is that there are edge weight insanities
1615 due to optimization phases that do not properly update basic block profile
1616 counts. The second is that the entry of the function may not be hot, because
1617 it is entered fewer times than the number of profile training runs, but there
1618 is a loop inside the function that causes blocks within the function to be
1619 above the threshold for hotness. This is fixed by walking up from hot bbs
1620 to the entry block, and then down from hot bbs to the exit, performing
1621 partitioning fixups as necessary. */
1624 mark_dfs_back_edges ();
1625 cold_bb_count
= sanitize_hot_paths (true, cold_bb_count
,
1626 &bbs_in_hot_partition
);
1628 sanitize_hot_paths (false, cold_bb_count
, &bbs_in_hot_partition
);
1631 /* The format of .gcc_except_table does not allow landing pads to
1632 be in a different partition as the throw. Fix this by either
1633 moving or duplicating the landing pads. */
1634 if (cfun
->eh
->lp_array
)
1639 FOR_EACH_VEC_ELT (*cfun
->eh
->lp_array
, i
, lp
)
1641 bool all_same
, all_diff
;
1644 || lp
->landing_pad
== NULL_RTX
1645 || !LABEL_P (lp
->landing_pad
))
1648 all_same
= all_diff
= true;
1649 bb
= BLOCK_FOR_INSN (lp
->landing_pad
);
1650 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1652 gcc_assert (e
->flags
& EDGE_EH
);
1653 if (BB_PARTITION (bb
) == BB_PARTITION (e
->src
))
1663 int which
= BB_PARTITION (bb
);
1664 which
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1665 BB_SET_PARTITION (bb
, which
);
1668 fix_up_crossing_landing_pad (lp
, bb
);
1672 /* Mark every edge that crosses between sections. */
1674 FOR_EACH_BB_FN (bb
, cfun
)
1675 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1677 unsigned int flags
= e
->flags
;
1679 /* We should never have EDGE_CROSSING set yet. */
1680 gcc_checking_assert ((flags
& EDGE_CROSSING
) == 0);
1682 if (e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
1683 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
1684 && BB_PARTITION (e
->src
) != BB_PARTITION (e
->dest
))
1686 crossing_edges
.safe_push (e
);
1687 flags
|= EDGE_CROSSING
;
1690 /* Now that we've split eh edges as appropriate, allow landing pads
1691 to be merged with the post-landing pads. */
1692 flags
&= ~EDGE_PRESERVE
;
1697 return crossing_edges
;
1700 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru. */
1703 set_edge_can_fallthru_flag (void)
1707 FOR_EACH_BB_FN (bb
, cfun
)
1712 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1714 e
->flags
&= ~EDGE_CAN_FALLTHRU
;
1716 /* The FALLTHRU edge is also CAN_FALLTHRU edge. */
1717 if (e
->flags
& EDGE_FALLTHRU
)
1718 e
->flags
|= EDGE_CAN_FALLTHRU
;
1721 /* If the BB ends with an invertible condjump all (2) edges are
1722 CAN_FALLTHRU edges. */
1723 if (EDGE_COUNT (bb
->succs
) != 2)
1725 if (!any_condjump_p (BB_END (bb
)))
1728 rtx_jump_insn
*bb_end_jump
= as_a
<rtx_jump_insn
*> (BB_END (bb
));
1729 if (!invert_jump (bb_end_jump
, JUMP_LABEL (bb_end_jump
), 0))
1731 invert_jump (bb_end_jump
, JUMP_LABEL (bb_end_jump
), 0);
1732 EDGE_SUCC (bb
, 0)->flags
|= EDGE_CAN_FALLTHRU
;
1733 EDGE_SUCC (bb
, 1)->flags
|= EDGE_CAN_FALLTHRU
;
1737 /* If any destination of a crossing edge does not have a label, add label;
1738 Convert any easy fall-through crossing edges to unconditional jumps. */
1741 add_labels_and_missing_jumps (vec
<edge
> crossing_edges
)
1746 FOR_EACH_VEC_ELT (crossing_edges
, i
, e
)
1748 basic_block src
= e
->src
;
1749 basic_block dest
= e
->dest
;
1750 rtx_jump_insn
*new_jump
;
1752 if (dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1755 /* Make sure dest has a label. */
1756 rtx_code_label
*label
= block_label (dest
);
1758 /* Nothing to do for non-fallthru edges. */
1759 if (src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1761 if ((e
->flags
& EDGE_FALLTHRU
) == 0)
1764 /* If the block does not end with a control flow insn, then we
1765 can trivially add a jump to the end to fixup the crossing.
1766 Otherwise the jump will have to go in a new bb, which will
1767 be handled by fix_up_fall_thru_edges function. */
1768 if (control_flow_insn_p (BB_END (src
)))
1771 /* Make sure there's only one successor. */
1772 gcc_assert (single_succ_p (src
));
1774 new_jump
= emit_jump_insn_after (targetm
.gen_jump (label
), BB_END (src
));
1775 BB_END (src
) = new_jump
;
1776 JUMP_LABEL (new_jump
) = label
;
1777 LABEL_NUSES (label
) += 1;
1779 emit_barrier_after_bb (src
);
1781 /* Mark edge as non-fallthru. */
1782 e
->flags
&= ~EDGE_FALLTHRU
;
1786 /* Find any bb's where the fall-through edge is a crossing edge (note that
1787 these bb's must also contain a conditional jump or end with a call
1788 instruction; we've already dealt with fall-through edges for blocks
1789 that didn't have a conditional jump or didn't end with call instruction
1790 in the call to add_labels_and_missing_jumps). Convert the fall-through
1791 edge to non-crossing edge by inserting a new bb to fall-through into.
1792 The new bb will contain an unconditional jump (crossing edge) to the
1793 original fall through destination. */
1796 fix_up_fall_thru_edges (void)
1803 edge cond_jump
= NULL
;
1804 bool cond_jump_crosses
;
1807 rtx_code_label
*fall_thru_label
;
1809 FOR_EACH_BB_FN (cur_bb
, cfun
)
1812 if (EDGE_COUNT (cur_bb
->succs
) > 0)
1813 succ1
= EDGE_SUCC (cur_bb
, 0);
1817 if (EDGE_COUNT (cur_bb
->succs
) > 1)
1818 succ2
= EDGE_SUCC (cur_bb
, 1);
1822 /* Find the fall-through edge. */
1825 && (succ1
->flags
& EDGE_FALLTHRU
))
1831 && (succ2
->flags
& EDGE_FALLTHRU
))
1837 && (block_ends_with_call_p (cur_bb
)
1838 || can_throw_internal (BB_END (cur_bb
))))
1843 FOR_EACH_EDGE (e
, ei
, cur_bb
->succs
)
1844 if (e
->flags
& EDGE_FALLTHRU
)
1851 if (fall_thru
&& (fall_thru
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)))
1853 /* Check to see if the fall-thru edge is a crossing edge. */
1855 if (fall_thru
->flags
& EDGE_CROSSING
)
1857 /* The fall_thru edge crosses; now check the cond jump edge, if
1860 cond_jump_crosses
= true;
1862 old_jump
= BB_END (cur_bb
);
1864 /* Find the jump instruction, if there is one. */
1868 if (!(cond_jump
->flags
& EDGE_CROSSING
))
1869 cond_jump_crosses
= false;
1871 /* We know the fall-thru edge crosses; if the cond
1872 jump edge does NOT cross, and its destination is the
1873 next block in the bb order, invert the jump
1874 (i.e. fix it so the fall through does not cross and
1875 the cond jump does). */
1877 if (!cond_jump_crosses
)
1879 /* Find label in fall_thru block. We've already added
1880 any missing labels, so there must be one. */
1882 fall_thru_label
= block_label (fall_thru
->dest
);
1884 if (old_jump
&& fall_thru_label
)
1886 rtx_jump_insn
*old_jump_insn
=
1887 dyn_cast
<rtx_jump_insn
*> (old_jump
);
1889 invert_worked
= invert_jump (old_jump_insn
,
1890 fall_thru_label
, 0);
1895 fall_thru
->flags
&= ~EDGE_FALLTHRU
;
1896 cond_jump
->flags
|= EDGE_FALLTHRU
;
1897 update_br_prob_note (cur_bb
);
1898 std::swap (fall_thru
, cond_jump
);
1899 cond_jump
->flags
|= EDGE_CROSSING
;
1900 fall_thru
->flags
&= ~EDGE_CROSSING
;
1905 if (cond_jump_crosses
|| !invert_worked
)
1907 /* This is the case where both edges out of the basic
1908 block are crossing edges. Here we will fix up the
1909 fall through edge. The jump edge will be taken care
1910 of later. The EDGE_CROSSING flag of fall_thru edge
1911 is unset before the call to force_nonfallthru
1912 function because if a new basic-block is created
1913 this edge remains in the current section boundary
1914 while the edge between new_bb and the fall_thru->dest
1915 becomes EDGE_CROSSING. */
1917 fall_thru
->flags
&= ~EDGE_CROSSING
;
1918 new_bb
= force_nonfallthru (fall_thru
);
1922 new_bb
->aux
= cur_bb
->aux
;
1923 cur_bb
->aux
= new_bb
;
1925 /* This is done by force_nonfallthru_and_redirect. */
1926 gcc_assert (BB_PARTITION (new_bb
)
1927 == BB_PARTITION (cur_bb
));
1929 single_succ_edge (new_bb
)->flags
|= EDGE_CROSSING
;
1933 /* If a new basic-block was not created; restore
1934 the EDGE_CROSSING flag. */
1935 fall_thru
->flags
|= EDGE_CROSSING
;
1938 /* Add barrier after new jump */
1939 emit_barrier_after_bb (new_bb
? new_bb
: cur_bb
);
1946 /* This function checks the destination block of a "crossing jump" to
1947 see if it has any crossing predecessors that begin with a code label
1948 and end with an unconditional jump. If so, it returns that predecessor
1949 block. (This is to avoid creating lots of new basic blocks that all
1950 contain unconditional jumps to the same destination). */
1953 find_jump_block (basic_block jump_dest
)
1955 basic_block source_bb
= NULL
;
1960 FOR_EACH_EDGE (e
, ei
, jump_dest
->preds
)
1961 if (e
->flags
& EDGE_CROSSING
)
1963 basic_block src
= e
->src
;
1965 /* Check each predecessor to see if it has a label, and contains
1966 only one executable instruction, which is an unconditional jump.
1967 If so, we can use it. */
1969 if (LABEL_P (BB_HEAD (src
)))
1970 for (insn
= BB_HEAD (src
);
1971 !INSN_P (insn
) && insn
!= NEXT_INSN (BB_END (src
));
1972 insn
= NEXT_INSN (insn
))
1975 && insn
== BB_END (src
)
1977 && !any_condjump_p (insn
))
1991 /* Find all BB's with conditional jumps that are crossing edges;
1992 insert a new bb and make the conditional jump branch to the new
1993 bb instead (make the new bb same color so conditional branch won't
1994 be a 'crossing' edge). Insert an unconditional jump from the
1995 new bb to the original destination of the conditional jump. */
1998 fix_crossing_conditional_branches (void)
2008 rtx old_label
= NULL_RTX
;
2009 rtx_code_label
*new_label
;
2011 FOR_EACH_BB_FN (cur_bb
, cfun
)
2013 crossing_edge
= NULL
;
2014 if (EDGE_COUNT (cur_bb
->succs
) > 0)
2015 succ1
= EDGE_SUCC (cur_bb
, 0);
2019 if (EDGE_COUNT (cur_bb
->succs
) > 1)
2020 succ2
= EDGE_SUCC (cur_bb
, 1);
2024 /* We already took care of fall-through edges, so only one successor
2025 can be a crossing edge. */
2027 if (succ1
&& (succ1
->flags
& EDGE_CROSSING
))
2028 crossing_edge
= succ1
;
2029 else if (succ2
&& (succ2
->flags
& EDGE_CROSSING
))
2030 crossing_edge
= succ2
;
2034 rtx_insn
*old_jump
= BB_END (cur_bb
);
2036 /* Check to make sure the jump instruction is a
2037 conditional jump. */
2041 if (any_condjump_p (old_jump
))
2043 if (GET_CODE (PATTERN (old_jump
)) == SET
)
2044 set_src
= SET_SRC (PATTERN (old_jump
));
2045 else if (GET_CODE (PATTERN (old_jump
)) == PARALLEL
)
2047 set_src
= XVECEXP (PATTERN (old_jump
), 0,0);
2048 if (GET_CODE (set_src
) == SET
)
2049 set_src
= SET_SRC (set_src
);
2055 if (set_src
&& (GET_CODE (set_src
) == IF_THEN_ELSE
))
2057 rtx_jump_insn
*old_jump_insn
=
2058 as_a
<rtx_jump_insn
*> (old_jump
);
2060 if (GET_CODE (XEXP (set_src
, 1)) == PC
)
2061 old_label
= XEXP (set_src
, 2);
2062 else if (GET_CODE (XEXP (set_src
, 2)) == PC
)
2063 old_label
= XEXP (set_src
, 1);
2065 /* Check to see if new bb for jumping to that dest has
2066 already been created; if so, use it; if not, create
2069 new_bb
= find_jump_block (crossing_edge
->dest
);
2072 new_label
= block_label (new_bb
);
2075 basic_block last_bb
;
2076 rtx_code_label
*old_jump_target
;
2077 rtx_jump_insn
*new_jump
;
2079 /* Create new basic block to be dest for
2080 conditional jump. */
2082 /* Put appropriate instructions in new bb. */
2084 new_label
= gen_label_rtx ();
2085 emit_label (new_label
);
2087 gcc_assert (GET_CODE (old_label
) == LABEL_REF
);
2088 old_jump_target
= old_jump_insn
->jump_target ();
2089 new_jump
= as_a
<rtx_jump_insn
*>
2090 (emit_jump_insn (targetm
.gen_jump (old_jump_target
)));
2091 new_jump
->set_jump_target (old_jump_target
);
2093 last_bb
= EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
;
2094 new_bb
= create_basic_block (new_label
, new_jump
, last_bb
);
2095 new_bb
->aux
= last_bb
->aux
;
2096 last_bb
->aux
= new_bb
;
2098 emit_barrier_after_bb (new_bb
);
2100 /* Make sure new bb is in same partition as source
2101 of conditional branch. */
2102 BB_COPY_PARTITION (new_bb
, cur_bb
);
2105 /* Make old jump branch to new bb. */
2107 redirect_jump (old_jump_insn
, new_label
, 0);
2109 /* Remove crossing_edge as predecessor of 'dest'. */
2111 dest
= crossing_edge
->dest
;
2113 redirect_edge_succ (crossing_edge
, new_bb
);
2115 /* Make a new edge from new_bb to old dest; new edge
2116 will be a successor for new_bb and a predecessor
2119 if (EDGE_COUNT (new_bb
->succs
) == 0)
2120 new_edge
= make_edge (new_bb
, dest
, 0);
2122 new_edge
= EDGE_SUCC (new_bb
, 0);
2124 crossing_edge
->flags
&= ~EDGE_CROSSING
;
2125 new_edge
->flags
|= EDGE_CROSSING
;
2131 /* Find any unconditional branches that cross between hot and cold
2132 sections. Convert them into indirect jumps instead. */
2135 fix_crossing_unconditional_branches (void)
2138 rtx_insn
*last_insn
;
2141 rtx_insn
*indirect_jump_sequence
;
2142 rtx_insn
*jump_insn
= NULL
;
2147 FOR_EACH_BB_FN (cur_bb
, cfun
)
2149 last_insn
= BB_END (cur_bb
);
2151 if (EDGE_COUNT (cur_bb
->succs
) < 1)
2154 succ
= EDGE_SUCC (cur_bb
, 0);
2156 /* Check to see if bb ends in a crossing (unconditional) jump. At
2157 this point, no crossing jumps should be conditional. */
2159 if (JUMP_P (last_insn
)
2160 && (succ
->flags
& EDGE_CROSSING
))
2162 gcc_assert (!any_condjump_p (last_insn
));
2164 /* Make sure the jump is not already an indirect or table jump. */
2166 if (!computed_jump_p (last_insn
)
2167 && !tablejump_p (last_insn
, NULL
, NULL
))
2169 /* We have found a "crossing" unconditional branch. Now
2170 we must convert it to an indirect jump. First create
2171 reference of label, as target for jump. */
2173 label
= JUMP_LABEL (last_insn
);
2174 label_addr
= gen_rtx_LABEL_REF (Pmode
, label
);
2175 LABEL_NUSES (label
) += 1;
2177 /* Get a register to use for the indirect jump. */
2179 new_reg
= gen_reg_rtx (Pmode
);
2181 /* Generate indirect the jump sequence. */
2184 emit_move_insn (new_reg
, label_addr
);
2185 emit_indirect_jump (new_reg
);
2186 indirect_jump_sequence
= get_insns ();
2189 /* Make sure every instruction in the new jump sequence has
2190 its basic block set to be cur_bb. */
2192 for (cur_insn
= indirect_jump_sequence
; cur_insn
;
2193 cur_insn
= NEXT_INSN (cur_insn
))
2195 if (!BARRIER_P (cur_insn
))
2196 BLOCK_FOR_INSN (cur_insn
) = cur_bb
;
2197 if (JUMP_P (cur_insn
))
2198 jump_insn
= cur_insn
;
2201 /* Insert the new (indirect) jump sequence immediately before
2202 the unconditional jump, then delete the unconditional jump. */
2204 emit_insn_before (indirect_jump_sequence
, last_insn
);
2205 delete_insn (last_insn
);
2207 JUMP_LABEL (jump_insn
) = label
;
2208 LABEL_NUSES (label
)++;
2210 /* Make BB_END for cur_bb be the jump instruction (NOT the
2211 barrier instruction at the end of the sequence...). */
2213 BB_END (cur_bb
) = jump_insn
;
2219 /* Update CROSSING_JUMP_P flags on all jump insns. */
2222 update_crossing_jump_flags (void)
2228 FOR_EACH_BB_FN (bb
, cfun
)
2229 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2230 if (e
->flags
& EDGE_CROSSING
)
2232 if (JUMP_P (BB_END (bb
))
2233 /* Some flags were added during fix_up_fall_thru_edges, via
2234 force_nonfallthru_and_redirect. */
2235 && !CROSSING_JUMP_P (BB_END (bb
)))
2236 CROSSING_JUMP_P (BB_END (bb
)) = 1;
2241 /* Reorder basic blocks using the software trace cache (STC) algorithm. */
2244 reorder_basic_blocks_software_trace_cache (void)
2247 fprintf (dump_file
, "\nReordering with the STC algorithm.\n\n");
2251 struct trace
*traces
;
2253 /* We are estimating the length of uncond jump insn only once since the code
2254 for getting the insn length always returns the minimal length now. */
2255 if (uncond_jump_length
== 0)
2256 uncond_jump_length
= get_uncond_jump_length ();
2258 /* We need to know some information for each basic block. */
2259 array_size
= GET_ARRAY_SIZE (last_basic_block_for_fn (cfun
));
2260 bbd
= XNEWVEC (bbro_basic_block_data
, array_size
);
2261 for (i
= 0; i
< array_size
; i
++)
2263 bbd
[i
].start_of_trace
= -1;
2264 bbd
[i
].end_of_trace
= -1;
2265 bbd
[i
].in_trace
= -1;
2271 traces
= XNEWVEC (struct trace
, n_basic_blocks_for_fn (cfun
));
2273 find_traces (&n_traces
, traces
);
2274 connect_traces (n_traces
, traces
);
2279 /* Return true if edge E1 is more desirable as a fallthrough edge than
2283 edge_order (edge e1
, edge e2
)
2285 return EDGE_FREQUENCY (e1
) > EDGE_FREQUENCY (e2
);
2288 /* Reorder basic blocks using the "simple" algorithm. This tries to
2289 maximize the dynamic number of branches that are fallthrough, without
2290 copying instructions. The algorithm is greedy, looking at the most
2291 frequently executed branch first. */
2294 reorder_basic_blocks_simple (void)
2297 fprintf (dump_file
, "\nReordering with the \"simple\" algorithm.\n\n");
2299 edge
*edges
= new edge
[2 * n_basic_blocks_for_fn (cfun
)];
2301 /* First, collect all edges that can be optimized by reordering blocks:
2302 simple jumps and conditional jumps, as well as the function entry edge. */
2305 edges
[n
++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun
), 0);
2308 FOR_EACH_BB_FN (bb
, cfun
)
2310 rtx_insn
*end
= BB_END (bb
);
2312 if (computed_jump_p (end
) || tablejump_p (end
, NULL
, NULL
))
2315 /* We cannot optimize asm goto. */
2316 if (JUMP_P (end
) && extract_asm_operands (end
))
2319 if (any_condjump_p (end
))
2321 edge e0
= EDGE_SUCC (bb
, 0);
2322 edge e1
= EDGE_SUCC (bb
, 1);
2323 /* When optimizing for size it is best to keep the original
2324 fallthrough edges. */
2325 if (e1
->flags
& EDGE_FALLTHRU
)
2330 else if (single_succ_p (bb
))
2331 edges
[n
++] = EDGE_SUCC (bb
, 0);
2334 /* Sort the edges, the most desirable first. When optimizing for size
2335 all edges are equally desirable. */
2337 if (optimize_function_for_speed_p (cfun
))
2338 std::stable_sort (edges
, edges
+ n
, edge_order
);
2340 /* Now decide which of those edges to make fallthrough edges. We set
2341 BB_VISITED if a block already has a fallthrough successor assigned
2342 to it. We make ->AUX of an endpoint point to the opposite endpoint
2343 of a sequence of blocks that fall through, and ->AUX will be NULL
2344 for a block that is in such a sequence but not an endpoint anymore.
2346 To start with, everything points to itself, nothing is assigned yet. */
2348 FOR_ALL_BB_FN (bb
, cfun
)
2351 EXIT_BLOCK_PTR_FOR_FN (cfun
)->aux
= 0;
2353 /* Now for all edges, the most desirable first, see if that edge can
2354 connect two sequences. If it can, update AUX and BB_VISITED; if it
2355 cannot, zero out the edge in the table. */
2357 for (int j
= 0; j
< n
; j
++)
2361 basic_block tail_a
= e
->src
;
2362 basic_block head_b
= e
->dest
;
2363 basic_block head_a
= (basic_block
) tail_a
->aux
;
2364 basic_block tail_b
= (basic_block
) head_b
->aux
;
2366 /* An edge cannot connect two sequences if:
2367 - it crosses partitions;
2368 - its src is not a current endpoint;
2369 - its dest is not a current endpoint;
2370 - or, it would create a loop. */
2372 if (e
->flags
& EDGE_CROSSING
2373 || tail_a
->flags
& BB_VISITED
2375 || (!(head_b
->flags
& BB_VISITED
) && head_b
!= tail_b
)
2376 || tail_a
== tail_b
)
2384 head_a
->aux
= tail_b
;
2385 tail_b
->aux
= head_a
;
2386 tail_a
->flags
|= BB_VISITED
;
2389 /* Put the pieces together, in the same order that the start blocks of
2390 the sequences already had. The hot/cold partitioning gives a little
2391 complication: as a first pass only do this for blocks in the same
2392 partition as the start block, and (if there is anything left to do)
2393 in a second pass handle the other partition. */
2395 basic_block last_tail
= (basic_block
) ENTRY_BLOCK_PTR_FOR_FN (cfun
)->aux
;
2397 int current_partition
= BB_PARTITION (last_tail
);
2398 bool need_another_pass
= true;
2400 for (int pass
= 0; pass
< 2 && need_another_pass
; pass
++)
2402 need_another_pass
= false;
2404 FOR_EACH_BB_FN (bb
, cfun
)
2405 if ((bb
->flags
& BB_VISITED
&& bb
->aux
) || bb
->aux
== bb
)
2407 if (BB_PARTITION (bb
) != current_partition
)
2409 need_another_pass
= true;
2413 last_tail
->aux
= bb
;
2414 last_tail
= (basic_block
) bb
->aux
;
2417 current_partition
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
2422 /* Finally, link all the chosen fallthrough edges. */
2424 for (int j
= 0; j
< n
; j
++)
2426 edges
[j
]->src
->aux
= edges
[j
]->dest
;
2430 /* If the entry edge no longer falls through we have to make a new
2431 block so it can do so again. */
2433 edge e
= EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun
), 0);
2434 if (e
->dest
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->aux
)
2436 force_nonfallthru (e
);
2437 e
->src
->aux
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->aux
;
2438 BB_COPY_PARTITION (e
->src
, e
->dest
);
2442 /* Reorder basic blocks. The main entry point to this file. */
2445 reorder_basic_blocks (void)
2447 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT
);
2449 if (n_basic_blocks_for_fn (cfun
) <= NUM_FIXED_BLOCKS
+ 1)
2452 set_edge_can_fallthru_flag ();
2453 mark_dfs_back_edges ();
2455 switch (flag_reorder_blocks_algorithm
)
2457 case REORDER_BLOCKS_ALGORITHM_SIMPLE
:
2458 reorder_basic_blocks_simple ();
2461 case REORDER_BLOCKS_ALGORITHM_STC
:
2462 reorder_basic_blocks_software_trace_cache ();
2469 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2473 if (dump_flags
& TDF_DETAILS
)
2474 dump_reg_info (dump_file
);
2475 dump_flow_info (dump_file
, dump_flags
);
2478 /* Signal that rtl_verify_flow_info_1 can now verify that there
2479 is at most one switch between hot/cold sections. */
2480 crtl
->bb_reorder_complete
= true;
2483 /* Determine which partition the first basic block in the function
2484 belongs to, then find the first basic block in the current function
2485 that belongs to a different section, and insert a
2486 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2487 instruction stream. When writing out the assembly code,
2488 encountering this note will make the compiler switch between the
2489 hot and cold text sections. */
2492 insert_section_boundary_note (void)
2495 bool switched_sections
= false;
2496 int current_partition
= 0;
2498 if (!crtl
->has_bb_partition
)
2501 FOR_EACH_BB_FN (bb
, cfun
)
2503 if (!current_partition
)
2504 current_partition
= BB_PARTITION (bb
);
2505 if (BB_PARTITION (bb
) != current_partition
)
2507 gcc_assert (!switched_sections
);
2508 switched_sections
= true;
2509 emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS
, BB_HEAD (bb
));
2510 current_partition
= BB_PARTITION (bb
);
2517 const pass_data pass_data_reorder_blocks
=
2519 RTL_PASS
, /* type */
2521 OPTGROUP_NONE
, /* optinfo_flags */
2522 TV_REORDER_BLOCKS
, /* tv_id */
2523 0, /* properties_required */
2524 0, /* properties_provided */
2525 0, /* properties_destroyed */
2526 0, /* todo_flags_start */
2527 0, /* todo_flags_finish */
2530 class pass_reorder_blocks
: public rtl_opt_pass
2533 pass_reorder_blocks (gcc::context
*ctxt
)
2534 : rtl_opt_pass (pass_data_reorder_blocks
, ctxt
)
2537 /* opt_pass methods: */
2538 virtual bool gate (function
*)
2540 if (targetm
.cannot_modify_jumps_p ())
2542 return (optimize
> 0
2543 && (flag_reorder_blocks
|| flag_reorder_blocks_and_partition
));
2546 virtual unsigned int execute (function
*);
2548 }; // class pass_reorder_blocks
2551 pass_reorder_blocks::execute (function
*fun
)
2555 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2556 splitting possibly introduced more crossjumping opportunities. */
2557 cfg_layout_initialize (CLEANUP_EXPENSIVE
);
2559 reorder_basic_blocks ();
2560 cleanup_cfg (CLEANUP_EXPENSIVE
);
2562 FOR_EACH_BB_FN (bb
, fun
)
2563 if (bb
->next_bb
!= EXIT_BLOCK_PTR_FOR_FN (fun
))
2564 bb
->aux
= bb
->next_bb
;
2565 cfg_layout_finalize ();
2573 make_pass_reorder_blocks (gcc::context
*ctxt
)
2575 return new pass_reorder_blocks (ctxt
);
2578 /* Duplicate the blocks containing computed gotos. This basically unfactors
2579 computed gotos that were factored early on in the compilation process to
2580 speed up edge based data flow. We used to not unfactoring them again,
2581 which can seriously pessimize code with many computed jumps in the source
2582 code, such as interpreters. See e.g. PR15242. */
2586 const pass_data pass_data_duplicate_computed_gotos
=
2588 RTL_PASS
, /* type */
2589 "compgotos", /* name */
2590 OPTGROUP_NONE
, /* optinfo_flags */
2591 TV_REORDER_BLOCKS
, /* tv_id */
2592 0, /* properties_required */
2593 0, /* properties_provided */
2594 0, /* properties_destroyed */
2595 0, /* todo_flags_start */
2596 0, /* todo_flags_finish */
2599 class pass_duplicate_computed_gotos
: public rtl_opt_pass
2602 pass_duplicate_computed_gotos (gcc::context
*ctxt
)
2603 : rtl_opt_pass (pass_data_duplicate_computed_gotos
, ctxt
)
2606 /* opt_pass methods: */
2607 virtual bool gate (function
*);
2608 virtual unsigned int execute (function
*);
2610 }; // class pass_duplicate_computed_gotos
2613 pass_duplicate_computed_gotos::gate (function
*fun
)
2615 if (targetm
.cannot_modify_jumps_p ())
2617 return (optimize
> 0
2618 && flag_expensive_optimizations
2619 && ! optimize_function_for_size_p (fun
));
2623 pass_duplicate_computed_gotos::execute (function
*fun
)
2625 basic_block bb
, new_bb
;
2628 bool changed
= false;
2630 if (n_basic_blocks_for_fn (fun
) <= NUM_FIXED_BLOCKS
+ 1)
2634 cfg_layout_initialize (0);
2636 /* We are estimating the length of uncond jump insn only once
2637 since the code for getting the insn length always returns
2638 the minimal length now. */
2639 if (uncond_jump_length
== 0)
2640 uncond_jump_length
= get_uncond_jump_length ();
2643 = uncond_jump_length
* PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS
);
2644 candidates
= BITMAP_ALLOC (NULL
);
2646 /* Look for blocks that end in a computed jump, and see if such blocks
2647 are suitable for unfactoring. If a block is a candidate for unfactoring,
2648 mark it in the candidates. */
2649 FOR_EACH_BB_FN (bb
, fun
)
2654 int size
, all_flags
;
2656 /* Build the reorder chain for the original order of blocks. */
2657 if (bb
->next_bb
!= EXIT_BLOCK_PTR_FOR_FN (fun
))
2658 bb
->aux
= bb
->next_bb
;
2660 /* Obviously the block has to end in a computed jump. */
2661 if (!computed_jump_p (BB_END (bb
)))
2664 /* Only consider blocks that can be duplicated. */
2665 if (CROSSING_JUMP_P (BB_END (bb
))
2666 || !can_duplicate_block_p (bb
))
2669 /* Make sure that the block is small enough. */
2671 FOR_BB_INSNS (bb
, insn
)
2674 size
+= get_attr_min_length (insn
);
2675 if (size
> max_size
)
2678 if (size
> max_size
)
2681 /* Final check: there must not be any incoming abnormal edges. */
2683 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2684 all_flags
|= e
->flags
;
2685 if (all_flags
& EDGE_COMPLEX
)
2688 bitmap_set_bit (candidates
, bb
->index
);
2691 /* Nothing to do if there is no computed jump here. */
2692 if (bitmap_empty_p (candidates
))
2695 /* Duplicate computed gotos. */
2696 FOR_EACH_BB_FN (bb
, fun
)
2698 if (bb
->flags
& BB_VISITED
)
2701 bb
->flags
|= BB_VISITED
;
2703 /* BB must have one outgoing edge. That edge must not lead to
2704 the exit block or the next block.
2705 The destination must have more than one predecessor. */
2706 if (!single_succ_p (bb
)
2707 || single_succ (bb
) == EXIT_BLOCK_PTR_FOR_FN (fun
)
2708 || single_succ (bb
) == bb
->next_bb
2709 || single_pred_p (single_succ (bb
)))
2712 /* The successor block has to be a duplication candidate. */
2713 if (!bitmap_bit_p (candidates
, single_succ (bb
)->index
))
2716 /* Don't duplicate a partition crossing edge, which requires difficult
2718 if (JUMP_P (BB_END (bb
)) && CROSSING_JUMP_P (BB_END (bb
)))
2721 new_bb
= duplicate_block (single_succ (bb
), single_succ_edge (bb
), bb
);
2722 new_bb
->aux
= bb
->aux
;
2724 new_bb
->flags
|= BB_VISITED
;
2731 /* Duplicating blocks above will redirect edges and may cause hot
2732 blocks previously reached by both hot and cold blocks to become
2733 dominated only by cold blocks. */
2734 fixup_partitions ();
2736 /* Merge the duplicated blocks into predecessors, when possible. */
2737 cfg_layout_finalize ();
2741 cfg_layout_finalize ();
2743 BITMAP_FREE (candidates
);
2750 make_pass_duplicate_computed_gotos (gcc::context
*ctxt
)
2752 return new pass_duplicate_computed_gotos (ctxt
);
2755 /* This function is the main 'entrance' for the optimization that
2756 partitions hot and cold basic blocks into separate sections of the
2757 .o file (to improve performance and cache locality). Ideally it
2758 would be called after all optimizations that rearrange the CFG have
2759 been called. However part of this optimization may introduce new
2760 register usage, so it must be called before register allocation has
2761 occurred. This means that this optimization is actually called
2762 well before the optimization that reorders basic blocks (see
2765 This optimization checks the feedback information to determine
2766 which basic blocks are hot/cold, updates flags on the basic blocks
2767 to indicate which section they belong in. This information is
2768 later used for writing out sections in the .o file. Because hot
2769 and cold sections can be arbitrarily large (within the bounds of
2770 memory), far beyond the size of a single function, it is necessary
2771 to fix up all edges that cross section boundaries, to make sure the
2772 instructions used can actually span the required distance. The
2773 fixes are described below.
2775 Fall-through edges must be changed into jumps; it is not safe or
2776 legal to fall through across a section boundary. Whenever a
2777 fall-through edge crossing a section boundary is encountered, a new
2778 basic block is inserted (in the same section as the fall-through
2779 source), and the fall through edge is redirected to the new basic
2780 block. The new basic block contains an unconditional jump to the
2781 original fall-through target. (If the unconditional jump is
2782 insufficient to cross section boundaries, that is dealt with a
2783 little later, see below).
2785 In order to deal with architectures that have short conditional
2786 branches (which cannot span all of memory) we take any conditional
2787 jump that attempts to cross a section boundary and add a level of
2788 indirection: it becomes a conditional jump to a new basic block, in
2789 the same section. The new basic block contains an unconditional
2790 jump to the original target, in the other section.
2792 For those architectures whose unconditional branch is also
2793 incapable of reaching all of memory, those unconditional jumps are
2794 converted into indirect jumps, through a register.
2796 IMPORTANT NOTE: This optimization causes some messy interactions
2797 with the cfg cleanup optimizations; those optimizations want to
2798 merge blocks wherever possible, and to collapse indirect jump
2799 sequences (change "A jumps to B jumps to C" directly into "A jumps
2800 to C"). Those optimizations can undo the jump fixes that
2801 partitioning is required to make (see above), in order to ensure
2802 that jumps attempting to cross section boundaries are really able
2803 to cover whatever distance the jump requires (on many architectures
2804 conditional or unconditional jumps are not able to reach all of
2805 memory). Therefore tests have to be inserted into each such
2806 optimization to make sure that it does not undo stuff necessary to
2807 cross partition boundaries. This would be much less of a problem
2808 if we could perform this optimization later in the compilation, but
2809 unfortunately the fact that we may need to create indirect jumps
2810 (through registers) requires that this optimization be performed
2811 before register allocation.
2813 Hot and cold basic blocks are partitioned and put in separate
2814 sections of the .o file, to reduce paging and improve cache
2815 performance (hopefully). This can result in bits of code from the
2816 same function being widely separated in the .o file. However this
2817 is not obvious to the current bb structure. Therefore we must take
2818 care to ensure that: 1). There are no fall_thru edges that cross
2819 between sections; 2). For those architectures which have "short"
2820 conditional branches, all conditional branches that attempt to
2821 cross between sections are converted to unconditional branches;
2822 and, 3). For those architectures which have "short" unconditional
2823 branches, all unconditional branches that attempt to cross between
2824 sections are converted to indirect jumps.
2826 The code for fixing up fall_thru edges that cross between hot and
2827 cold basic blocks does so by creating new basic blocks containing
2828 unconditional branches to the appropriate label in the "other"
2829 section. The new basic block is then put in the same (hot or cold)
2830 section as the original conditional branch, and the fall_thru edge
2831 is modified to fall into the new basic block instead. By adding
2832 this level of indirection we end up with only unconditional branches
2833 crossing between hot and cold sections.
2835 Conditional branches are dealt with by adding a level of indirection.
2836 A new basic block is added in the same (hot/cold) section as the
2837 conditional branch, and the conditional branch is retargeted to the
2838 new basic block. The new basic block contains an unconditional branch
2839 to the original target of the conditional branch (in the other section).
2841 Unconditional branches are dealt with by converting them into
2846 const pass_data pass_data_partition_blocks
=
2848 RTL_PASS
, /* type */
2849 "bbpart", /* name */
2850 OPTGROUP_NONE
, /* optinfo_flags */
2851 TV_REORDER_BLOCKS
, /* tv_id */
2852 PROP_cfglayout
, /* properties_required */
2853 0, /* properties_provided */
2854 0, /* properties_destroyed */
2855 0, /* todo_flags_start */
2856 0, /* todo_flags_finish */
2859 class pass_partition_blocks
: public rtl_opt_pass
2862 pass_partition_blocks (gcc::context
*ctxt
)
2863 : rtl_opt_pass (pass_data_partition_blocks
, ctxt
)
2866 /* opt_pass methods: */
2867 virtual bool gate (function
*);
2868 virtual unsigned int execute (function
*);
2870 }; // class pass_partition_blocks
2873 pass_partition_blocks::gate (function
*fun
)
2875 /* The optimization to partition hot/cold basic blocks into separate
2876 sections of the .o file does not work well with linkonce or with
2877 user defined section attributes. Don't call it if either case
2879 return (flag_reorder_blocks_and_partition
2881 /* See gate_handle_reorder_blocks. We should not partition if
2882 we are going to omit the reordering. */
2883 && optimize_function_for_speed_p (fun
)
2884 && !DECL_COMDAT_GROUP (current_function_decl
)
2885 && !user_defined_section_attribute
);
2889 pass_partition_blocks::execute (function
*fun
)
2891 vec
<edge
> crossing_edges
;
2893 if (n_basic_blocks_for_fn (fun
) <= NUM_FIXED_BLOCKS
+ 1)
2896 df_set_flags (DF_DEFER_INSN_RESCAN
);
2898 crossing_edges
= find_rarely_executed_basic_blocks_and_crossing_edges ();
2899 if (!crossing_edges
.exists ())
2902 crtl
->has_bb_partition
= true;
2904 /* Make sure the source of any crossing edge ends in a jump and the
2905 destination of any crossing edge has a label. */
2906 add_labels_and_missing_jumps (crossing_edges
);
2908 /* Convert all crossing fall_thru edges to non-crossing fall
2909 thrus to unconditional jumps (that jump to the original fall
2911 fix_up_fall_thru_edges ();
2913 /* If the architecture does not have conditional branches that can
2914 span all of memory, convert crossing conditional branches into
2915 crossing unconditional branches. */
2916 if (!HAS_LONG_COND_BRANCH
)
2917 fix_crossing_conditional_branches ();
2919 /* If the architecture does not have unconditional branches that
2920 can span all of memory, convert crossing unconditional branches
2921 into indirect jumps. Since adding an indirect jump also adds
2922 a new register usage, update the register usage information as
2924 if (!HAS_LONG_UNCOND_BRANCH
)
2925 fix_crossing_unconditional_branches ();
2927 update_crossing_jump_flags ();
2929 /* Clear bb->aux fields that the above routines were using. */
2930 clear_aux_for_blocks ();
2932 crossing_edges
.release ();
2934 /* ??? FIXME: DF generates the bb info for a block immediately.
2935 And by immediately, I mean *during* creation of the block.
2937 #0 df_bb_refs_collect
2938 #1 in df_bb_refs_record
2939 #2 in create_basic_block_structure
2941 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2942 will *always* fail, because no edges can have been added to the
2943 block yet. Which of course means we don't add the right
2944 artificial refs, which means we fail df_verify (much) later.
2946 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2947 that we also shouldn't grab data from the new blocks those new
2948 insns are in either. In this way one can create the block, link
2949 it up properly, and have everything Just Work later, when deferred
2950 insns are processed.
2952 In the meantime, we have no other option but to throw away all
2953 of the DF data and recompute it all. */
2954 if (fun
->eh
->lp_array
)
2956 df_finish_pass (true);
2957 df_scan_alloc (NULL
);
2959 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2960 data. We blindly generated all of them when creating the new
2961 landing pad. Delete those assignments we don't use. */
2962 df_set_flags (DF_LR_RUN_DCE
);
2972 make_pass_partition_blocks (gcc::context
*ctxt
)
2974 return new pass_partition_blocks (ctxt
);