1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file contains the "reorder blocks" pass, which changes the control
21 flow of a function to encounter fewer branches; the "partition blocks"
22 pass, which divides the basic blocks into "hot" and "cold" partitions,
23 which are kept separate; and the "duplicate computed gotos" pass, which
24 duplicates blocks ending in an indirect jump.
26 There are two algorithms for "reorder blocks": the "simple" algorithm,
27 which just rearranges blocks, trying to minimize the number of executed
28 unconditional branches; and the "software trace cache" algorithm, which
29 also copies code, and in general tries a lot harder to have long linear
30 pieces of machine code executed. This algorithm is described next. */
32 /* This (greedy) algorithm constructs traces in several rounds.
33 The construction starts from "seeds". The seed for the first round
34 is the entry point of the function. When there are more than one seed,
35 the one with the lowest key in the heap is selected first (see bb_to_key).
36 Then the algorithm repeatedly adds the most probable successor to the end
37 of a trace. Finally it connects the traces.
39 There are two parameters: Branch Threshold and Exec Threshold.
40 If the probability of an edge to a successor of the current basic block is
41 lower than Branch Threshold or its frequency is lower than Exec Threshold,
42 then the successor will be the seed in one of the next rounds.
43 Each round has these parameters lower than the previous one.
44 The last round has to have these parameters set to zero so that the
45 remaining blocks are picked up.
47 The algorithm selects the most probable successor from all unvisited
48 successors and successors that have been added to this trace.
49 The other successors (that has not been "sent" to the next round) will be
50 other seeds for this round and the secondary traces will start from them.
51 If the successor has not been visited in this trace, it is added to the
52 trace (however, there is some heuristic for simple branches).
53 If the successor has been visited in this trace, a loop has been found.
54 If the loop has many iterations, the loop is rotated so that the source
55 block of the most probable edge going out of the loop is the last block
57 If the loop has few iterations and there is no edge from the last block of
58 the loop going out of the loop, the loop header is duplicated.
60 When connecting traces, the algorithm first checks whether there is an edge
61 from the last block of a trace to the first block of another trace.
62 When there are still some unconnected traces it checks whether there exists
63 a basic block BB such that BB is a successor of the last block of a trace
64 and BB is a predecessor of the first block of another trace. In this case,
65 BB is duplicated, added at the end of the first trace and the traces are
67 The rest of traces are simply connected so there will be a jump to the
68 beginning of the rest of traces.
70 The above description is for the full algorithm, which is used when the
71 function is optimized for speed. When the function is optimized for size,
72 in order to reduce long jumps and connect more fallthru edges, the
73 algorithm is modified as follows:
74 (1) Break long traces to short ones. A trace is broken at a block that has
75 multiple predecessors/ successors during trace discovery. When connecting
76 traces, only connect Trace n with Trace n + 1. This change reduces most
77 long jumps compared with the above algorithm.
78 (2) Ignore the edge probability and frequency for fallthru edges.
79 (3) Keep the original order of blocks when there is no chance to fall
80 through. We rely on the results of cfg_cleanup.
82 To implement the change for code size optimization, block's index is
83 selected as the key and all traces are found in one round.
87 "Software Trace Cache"
88 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
89 http://citeseer.nj.nec.com/15361.html
94 #define INCLUDE_ALGORITHM /* stable_sort */
96 #include "coretypes.h"
101 #include "cfghooks.h"
103 #include "memmodel.h"
106 #include "emit-rtl.h"
110 #include "tree-pass.h"
113 #include "cfgbuild.h"
114 #include "cfgcleanup.h"
115 #include "bb-reorder.h"
117 #include "fibonacci_heap.h"
119 /* The number of rounds. In most cases there will only be 4 rounds, but
120 when partitioning hot and cold basic blocks into separate sections of
121 the object file there will be an extra round. */
124 struct target_bb_reorder default_target_bb_reorder
;
125 #if SWITCHABLE_TARGET
126 struct target_bb_reorder
*this_target_bb_reorder
= &default_target_bb_reorder
;
129 #define uncond_jump_length \
130 (this_target_bb_reorder->x_uncond_jump_length)
132 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
133 static const int branch_threshold
[N_ROUNDS
] = {400, 200, 100, 0, 0};
135 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
136 static const int exec_threshold
[N_ROUNDS
] = {500, 200, 50, 0, 0};
138 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
139 block the edge destination is not duplicated while connecting traces. */
140 #define DUPLICATION_THRESHOLD 100
142 typedef fibonacci_heap
<long, basic_block_def
> bb_heap_t
;
143 typedef fibonacci_node
<long, basic_block_def
> bb_heap_node_t
;
145 /* Structure to hold needed information for each basic block. */
146 struct bbro_basic_block_data
148 /* Which trace is the bb start of (-1 means it is not a start of any). */
151 /* Which trace is the bb end of (-1 means it is not an end of any). */
154 /* Which trace is the bb in? */
157 /* Which trace was this bb visited in? */
160 /* Cached maximum frequency of interesting incoming edges.
161 Minus one means not yet computed. */
164 /* Which heap is BB in (if any)? */
167 /* Which heap node is BB in (if any)? */
168 bb_heap_node_t
*node
;
171 /* The current size of the following dynamic array. */
172 static int array_size
;
174 /* The array which holds needed information for basic blocks. */
175 static bbro_basic_block_data
*bbd
;
177 /* To avoid frequent reallocation the size of arrays is greater than needed,
178 the number of elements is (not less than) 1.25 * size_wanted. */
179 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
181 /* Free the memory and set the pointer to NULL. */
182 #define FREE(P) (gcc_assert (P), free (P), P = 0)
184 /* Structure for holding information about a trace. */
187 /* First and last basic block of the trace. */
188 basic_block first
, last
;
190 /* The round of the STC creation which this trace was found in. */
193 /* The length (i.e. the number of basic blocks) of the trace. */
197 /* Maximum frequency and count of one of the entry blocks. */
198 static int max_entry_frequency
;
199 static profile_count max_entry_count
;
201 /* Local function prototypes. */
202 static void find_traces (int *, struct trace
*);
203 static basic_block
rotate_loop (edge
, struct trace
*, int);
204 static void mark_bb_visited (basic_block
, int);
205 static void find_traces_1_round (int, int, gcov_type
, struct trace
*, int *,
206 int, bb_heap_t
**, int);
207 static basic_block
copy_bb (basic_block
, edge
, basic_block
, int);
208 static long bb_to_key (basic_block
);
209 static bool better_edge_p (const_basic_block
, const_edge
, profile_probability
,
210 int, profile_probability
, int, const_edge
);
211 static bool connect_better_edge_p (const_edge
, bool, int, const_edge
,
213 static void connect_traces (int, struct trace
*);
214 static bool copy_bb_p (const_basic_block
, int);
215 static bool push_to_next_round_p (const_basic_block
, int, int, int, gcov_type
);
217 /* Return the trace number in which BB was visited. */
220 bb_visited_trace (const_basic_block bb
)
222 gcc_assert (bb
->index
< array_size
);
223 return bbd
[bb
->index
].visited
;
226 /* This function marks BB that it was visited in trace number TRACE. */
229 mark_bb_visited (basic_block bb
, int trace
)
231 bbd
[bb
->index
].visited
= trace
;
232 if (bbd
[bb
->index
].heap
)
234 bbd
[bb
->index
].heap
->delete_node (bbd
[bb
->index
].node
);
235 bbd
[bb
->index
].heap
= NULL
;
236 bbd
[bb
->index
].node
= NULL
;
240 /* Check to see if bb should be pushed into the next round of trace
241 collections or not. Reasons for pushing the block forward are 1).
242 If the block is cold, we are doing partitioning, and there will be
243 another round (cold partition blocks are not supposed to be
244 collected into traces until the very last round); or 2). There will
245 be another round, and the basic block is not "hot enough" for the
246 current round of trace collection. */
249 push_to_next_round_p (const_basic_block bb
, int round
, int number_of_rounds
,
250 int exec_th
, gcov_type count_th
)
252 bool there_exists_another_round
;
253 bool block_not_hot_enough
;
255 there_exists_another_round
= round
< number_of_rounds
- 1;
257 block_not_hot_enough
= (bb
->frequency
< exec_th
258 || bb
->count
< count_th
259 || probably_never_executed_bb_p (cfun
, bb
));
261 if (there_exists_another_round
262 && block_not_hot_enough
)
268 /* Find the traces for Software Trace Cache. Chain each trace through
269 RBI()->next. Store the number of traces to N_TRACES and description of
273 find_traces (int *n_traces
, struct trace
*traces
)
276 int number_of_rounds
;
279 bb_heap_t
*heap
= new bb_heap_t (LONG_MIN
);
281 /* Add one extra round of trace collection when partitioning hot/cold
282 basic blocks into separate sections. The last round is for all the
283 cold blocks (and ONLY the cold blocks). */
285 number_of_rounds
= N_ROUNDS
- 1;
287 /* Insert entry points of function into heap. */
288 max_entry_frequency
= 0;
289 max_entry_count
= profile_count::zero ();
290 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR_FOR_FN (cfun
)->succs
)
292 bbd
[e
->dest
->index
].heap
= heap
;
293 bbd
[e
->dest
->index
].node
= heap
->insert (bb_to_key (e
->dest
), e
->dest
);
294 if (e
->dest
->frequency
> max_entry_frequency
)
295 max_entry_frequency
= e
->dest
->frequency
;
296 if (e
->dest
->count
.initialized_p () && e
->dest
->count
> max_entry_count
)
297 max_entry_count
= e
->dest
->count
;
300 /* Find the traces. */
301 for (i
= 0; i
< number_of_rounds
; i
++)
303 gcov_type count_threshold
;
306 fprintf (dump_file
, "STC - round %d\n", i
+ 1);
308 if (max_entry_count
< INT_MAX
/ 1000)
309 count_threshold
= max_entry_count
.to_gcov_type () * exec_threshold
[i
] / 1000;
311 count_threshold
= max_entry_count
.to_gcov_type () / 1000 * exec_threshold
[i
];
313 find_traces_1_round (REG_BR_PROB_BASE
* branch_threshold
[i
] / 1000,
314 max_entry_frequency
* exec_threshold
[i
] / 1000,
315 count_threshold
, traces
, n_traces
, i
, &heap
,
322 for (i
= 0; i
< *n_traces
; i
++)
325 fprintf (dump_file
, "Trace %d (round %d): ", i
+ 1,
326 traces
[i
].round
+ 1);
327 for (bb
= traces
[i
].first
;
328 bb
!= traces
[i
].last
;
329 bb
= (basic_block
) bb
->aux
)
330 fprintf (dump_file
, "%d [%d] ", bb
->index
, bb
->frequency
);
331 fprintf (dump_file
, "%d [%d]\n", bb
->index
, bb
->frequency
);
337 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
338 (with sequential number TRACE_N). */
341 rotate_loop (edge back_edge
, struct trace
*trace
, int trace_n
)
345 /* Information about the best end (end after rotation) of the loop. */
346 basic_block best_bb
= NULL
;
347 edge best_edge
= NULL
;
349 profile_count best_count
= profile_count::uninitialized ();
350 /* The best edge is preferred when its destination is not visited yet
351 or is a start block of some trace. */
352 bool is_preferred
= false;
354 /* Find the most frequent edge that goes out from current trace. */
355 bb
= back_edge
->dest
;
361 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
362 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
363 && bb_visited_trace (e
->dest
) != trace_n
364 && (e
->flags
& EDGE_CAN_FALLTHRU
)
365 && !(e
->flags
& EDGE_COMPLEX
))
369 /* The best edge is preferred. */
370 if (!bb_visited_trace (e
->dest
)
371 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
373 /* The current edge E is also preferred. */
374 int freq
= EDGE_FREQUENCY (e
);
375 if (freq
> best_freq
|| e
->count
> best_count
)
378 if (e
->count
.initialized_p ())
379 best_count
= e
->count
;
387 if (!bb_visited_trace (e
->dest
)
388 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
390 /* The current edge E is preferred. */
392 best_freq
= EDGE_FREQUENCY (e
);
393 best_count
= e
->count
;
399 int freq
= EDGE_FREQUENCY (e
);
400 if (!best_edge
|| freq
> best_freq
|| e
->count
> best_count
)
403 best_count
= e
->count
;
410 bb
= (basic_block
) bb
->aux
;
412 while (bb
!= back_edge
->dest
);
416 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
418 if (back_edge
->dest
== trace
->first
)
420 trace
->first
= (basic_block
) best_bb
->aux
;
426 for (prev_bb
= trace
->first
;
427 prev_bb
->aux
!= back_edge
->dest
;
428 prev_bb
= (basic_block
) prev_bb
->aux
)
430 prev_bb
->aux
= best_bb
->aux
;
432 /* Try to get rid of uncond jump to cond jump. */
433 if (single_succ_p (prev_bb
))
435 basic_block header
= single_succ (prev_bb
);
437 /* Duplicate HEADER if it is a small block containing cond jump
439 if (any_condjump_p (BB_END (header
)) && copy_bb_p (header
, 0)
440 && !CROSSING_JUMP_P (BB_END (header
)))
441 copy_bb (header
, single_succ_edge (prev_bb
), prev_bb
, trace_n
);
447 /* We have not found suitable loop tail so do no rotation. */
448 best_bb
= back_edge
->src
;
454 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
455 not include basic blocks whose probability is lower than BRANCH_TH or whose
456 frequency is lower than EXEC_TH into traces (or whose count is lower than
457 COUNT_TH). Store the new traces into TRACES and modify the number of
458 traces *N_TRACES. Set the round (which the trace belongs to) to ROUND.
459 The function expects starting basic blocks to be in *HEAP and will delete
460 *HEAP and store starting points for the next round into new *HEAP. */
463 find_traces_1_round (int branch_th
, int exec_th
, gcov_type count_th
,
464 struct trace
*traces
, int *n_traces
, int round
,
465 bb_heap_t
**heap
, int number_of_rounds
)
467 /* Heap for discarded basic blocks which are possible starting points for
469 bb_heap_t
*new_heap
= new bb_heap_t (LONG_MIN
);
470 bool for_size
= optimize_function_for_size_p (cfun
);
472 while (!(*heap
)->empty ())
480 bb
= (*heap
)->extract_min ();
481 bbd
[bb
->index
].heap
= NULL
;
482 bbd
[bb
->index
].node
= NULL
;
485 fprintf (dump_file
, "Getting bb %d\n", bb
->index
);
487 /* If the BB's frequency is too low, send BB to the next round. When
488 partitioning hot/cold blocks into separate sections, make sure all
489 the cold blocks (and ONLY the cold blocks) go into the (extra) final
490 round. When optimizing for size, do not push to next round. */
493 && push_to_next_round_p (bb
, round
, number_of_rounds
, exec_th
,
496 int key
= bb_to_key (bb
);
497 bbd
[bb
->index
].heap
= new_heap
;
498 bbd
[bb
->index
].node
= new_heap
->insert (key
, bb
);
502 " Possible start point of next round: %d (key: %d)\n",
507 trace
= traces
+ *n_traces
;
509 trace
->round
= round
;
511 bbd
[bb
->index
].in_trace
= *n_traces
;
516 profile_probability prob
;
520 /* The probability and frequency of the best edge. */
521 profile_probability best_prob
= profile_probability::uninitialized ();
522 int best_freq
= INT_MIN
/ 2;
525 mark_bb_visited (bb
, *n_traces
);
529 fprintf (dump_file
, "Basic block %d was visited in trace %d\n",
530 bb
->index
, *n_traces
- 1);
532 ends_in_call
= block_ends_with_call_p (bb
);
534 /* Select the successor that will be placed after BB. */
535 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
537 gcc_assert (!(e
->flags
& EDGE_FAKE
));
539 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
542 if (bb_visited_trace (e
->dest
)
543 && bb_visited_trace (e
->dest
) != *n_traces
)
546 if (BB_PARTITION (e
->dest
) != BB_PARTITION (bb
))
549 prob
= e
->probability
;
550 freq
= e
->dest
->frequency
;
552 /* The only sensible preference for a call instruction is the
553 fallthru edge. Don't bother selecting anything else. */
556 if (e
->flags
& EDGE_CAN_FALLTHRU
)
565 /* Edge that cannot be fallthru or improbable or infrequent
566 successor (i.e. it is unsuitable successor). When optimizing
567 for size, ignore the probability and frequency. */
568 if (!(e
->flags
& EDGE_CAN_FALLTHRU
) || (e
->flags
& EDGE_COMPLEX
)
569 || !prob
.initialized_p ()
570 || ((prob
.to_reg_br_prob_base () < branch_th
571 || EDGE_FREQUENCY (e
) < exec_th
572 || e
->count
< count_th
) && (!for_size
)))
575 /* If partitioning hot/cold basic blocks, don't consider edges
576 that cross section boundaries. */
578 if (better_edge_p (bb
, e
, prob
, freq
, best_prob
, best_freq
,
587 /* If the best destination has multiple predecessors, and can be
588 duplicated cheaper than a jump, don't allow it to be added
589 to a trace. We'll duplicate it when connecting traces. */
590 if (best_edge
&& EDGE_COUNT (best_edge
->dest
->preds
) >= 2
591 && copy_bb_p (best_edge
->dest
, 0))
594 /* If the best destination has multiple successors or predecessors,
595 don't allow it to be added when optimizing for size. This makes
596 sure predecessors with smaller index are handled before the best
597 destinarion. It breaks long trace and reduces long jumps.
599 Take if-then-else as an example.
605 If we do not remove the best edge B->D/C->D, the final order might
606 be A B D ... C. C is at the end of the program. If D's successors
607 and D are complicated, might need long jumps for A->C and C->D.
608 Similar issue for order: A C D ... B.
610 After removing the best edge, the final result will be ABCD/ ACBD.
611 It does not add jump compared with the previous order. But it
612 reduces the possibility of long jumps. */
613 if (best_edge
&& for_size
614 && (EDGE_COUNT (best_edge
->dest
->succs
) > 1
615 || EDGE_COUNT (best_edge
->dest
->preds
) > 1))
618 /* Add all non-selected successors to the heaps. */
619 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
622 || e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
623 || bb_visited_trace (e
->dest
))
626 key
= bb_to_key (e
->dest
);
628 if (bbd
[e
->dest
->index
].heap
)
630 /* E->DEST is already in some heap. */
631 if (key
!= bbd
[e
->dest
->index
].node
->get_key ())
636 "Changing key for bb %d from %ld to %ld.\n",
638 (long) bbd
[e
->dest
->index
].node
->get_key (),
641 bbd
[e
->dest
->index
].heap
->replace_key
642 (bbd
[e
->dest
->index
].node
, key
);
647 bb_heap_t
*which_heap
= *heap
;
649 prob
= e
->probability
;
650 freq
= EDGE_FREQUENCY (e
);
652 if (!(e
->flags
& EDGE_CAN_FALLTHRU
)
653 || (e
->flags
& EDGE_COMPLEX
)
654 || !prob
.initialized_p ()
655 || prob
.to_reg_br_prob_base () < branch_th
657 || e
->count
< count_th
)
659 /* When partitioning hot/cold basic blocks, make sure
660 the cold blocks (and only the cold blocks) all get
661 pushed to the last round of trace collection. When
662 optimizing for size, do not push to next round. */
664 if (!for_size
&& push_to_next_round_p (e
->dest
, round
,
667 which_heap
= new_heap
;
670 bbd
[e
->dest
->index
].heap
= which_heap
;
671 bbd
[e
->dest
->index
].node
= which_heap
->insert (key
, e
->dest
);
676 " Possible start of %s round: %d (key: %ld)\n",
677 (which_heap
== new_heap
) ? "next" : "this",
678 e
->dest
->index
, (long) key
);
684 if (best_edge
) /* Suitable successor was found. */
686 if (bb_visited_trace (best_edge
->dest
) == *n_traces
)
688 /* We do nothing with one basic block loops. */
689 if (best_edge
->dest
!= bb
)
691 if (EDGE_FREQUENCY (best_edge
)
692 > 4 * best_edge
->dest
->frequency
/ 5)
694 /* The loop has at least 4 iterations. If the loop
695 header is not the first block of the function
696 we can rotate the loop. */
699 != ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
)
704 "Rotating loop %d - %d\n",
705 best_edge
->dest
->index
, bb
->index
);
707 bb
->aux
= best_edge
->dest
;
708 bbd
[best_edge
->dest
->index
].in_trace
=
710 bb
= rotate_loop (best_edge
, trace
, *n_traces
);
715 /* The loop has less than 4 iterations. */
717 if (single_succ_p (bb
)
718 && copy_bb_p (best_edge
->dest
,
719 optimize_edge_for_speed_p
722 bb
= copy_bb (best_edge
->dest
, best_edge
, bb
,
729 /* Terminate the trace. */
734 /* Check for a situation
743 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
744 >= EDGE_FREQUENCY (AC).
745 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
746 Best ordering is then A B C.
748 When optimizing for size, A B C is always the best order.
750 This situation is created for example by:
757 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
759 && (e
->flags
& EDGE_CAN_FALLTHRU
)
760 && !(e
->flags
& EDGE_COMPLEX
)
761 && !bb_visited_trace (e
->dest
)
762 && single_pred_p (e
->dest
)
763 && !(e
->flags
& EDGE_CROSSING
)
764 && single_succ_p (e
->dest
)
765 && (single_succ_edge (e
->dest
)->flags
767 && !(single_succ_edge (e
->dest
)->flags
& EDGE_COMPLEX
)
768 && single_succ (e
->dest
) == best_edge
->dest
769 && (2 * e
->dest
->frequency
>= EDGE_FREQUENCY (best_edge
)
774 fprintf (dump_file
, "Selecting BB %d\n",
775 best_edge
->dest
->index
);
779 bb
->aux
= best_edge
->dest
;
780 bbd
[best_edge
->dest
->index
].in_trace
= (*n_traces
) - 1;
781 bb
= best_edge
->dest
;
787 bbd
[trace
->first
->index
].start_of_trace
= *n_traces
- 1;
788 if (bbd
[trace
->last
->index
].end_of_trace
!= *n_traces
- 1)
790 bbd
[trace
->last
->index
].end_of_trace
= *n_traces
- 1;
791 /* Update the cached maximum frequency for interesting predecessor
792 edges for successors of the new trace end. */
793 FOR_EACH_EDGE (e
, ei
, trace
->last
->succs
)
794 if (EDGE_FREQUENCY (e
) > bbd
[e
->dest
->index
].priority
)
795 bbd
[e
->dest
->index
].priority
= EDGE_FREQUENCY (e
);
798 /* The trace is terminated so we have to recount the keys in heap
799 (some block can have a lower key because now one of its predecessors
800 is an end of the trace). */
801 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
803 if (e
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
804 || bb_visited_trace (e
->dest
))
807 if (bbd
[e
->dest
->index
].heap
)
809 key
= bb_to_key (e
->dest
);
810 if (key
!= bbd
[e
->dest
->index
].node
->get_key ())
815 "Changing key for bb %d from %ld to %ld.\n",
817 (long) bbd
[e
->dest
->index
].node
->get_key (), key
);
819 bbd
[e
->dest
->index
].heap
->replace_key
820 (bbd
[e
->dest
->index
].node
, key
);
828 /* "Return" the new heap. */
832 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
833 it to trace after BB, mark OLD_BB visited and update pass' data structures
834 (TRACE is a number of trace which OLD_BB is duplicated to). */
837 copy_bb (basic_block old_bb
, edge e
, basic_block bb
, int trace
)
841 new_bb
= duplicate_block (old_bb
, e
, bb
);
842 BB_COPY_PARTITION (new_bb
, old_bb
);
844 gcc_assert (e
->dest
== new_bb
);
848 "Duplicated bb %d (created bb %d)\n",
849 old_bb
->index
, new_bb
->index
);
851 if (new_bb
->index
>= array_size
852 || last_basic_block_for_fn (cfun
) > array_size
)
857 new_size
= MAX (last_basic_block_for_fn (cfun
), new_bb
->index
+ 1);
858 new_size
= GET_ARRAY_SIZE (new_size
);
859 bbd
= XRESIZEVEC (bbro_basic_block_data
, bbd
, new_size
);
860 for (i
= array_size
; i
< new_size
; i
++)
862 bbd
[i
].start_of_trace
= -1;
863 bbd
[i
].end_of_trace
= -1;
864 bbd
[i
].in_trace
= -1;
866 bbd
[i
].priority
= -1;
870 array_size
= new_size
;
875 "Growing the dynamic array to %d elements.\n",
880 gcc_assert (!bb_visited_trace (e
->dest
));
881 mark_bb_visited (new_bb
, trace
);
882 new_bb
->aux
= bb
->aux
;
885 bbd
[new_bb
->index
].in_trace
= trace
;
890 /* Compute and return the key (for the heap) of the basic block BB. */
893 bb_to_key (basic_block bb
)
898 /* Use index as key to align with its original order. */
899 if (optimize_function_for_size_p (cfun
))
902 /* Do not start in probably never executed blocks. */
904 if (BB_PARTITION (bb
) == BB_COLD_PARTITION
905 || probably_never_executed_bb_p (cfun
, bb
))
908 /* Prefer blocks whose predecessor is an end of some trace
909 or whose predecessor edge is EDGE_DFS_BACK. */
910 int priority
= bbd
[bb
->index
].priority
;
914 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
916 if ((e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
917 && bbd
[e
->src
->index
].end_of_trace
>= 0)
918 || (e
->flags
& EDGE_DFS_BACK
))
920 int edge_freq
= EDGE_FREQUENCY (e
);
922 if (edge_freq
> priority
)
923 priority
= edge_freq
;
926 bbd
[bb
->index
].priority
= priority
;
930 /* The block with priority should have significantly lower key. */
931 return -(100 * BB_FREQ_MAX
+ 100 * priority
+ bb
->frequency
);
933 return -bb
->frequency
;
936 /* Return true when the edge E from basic block BB is better than the temporary
937 best edge (details are in function). The probability of edge E is PROB. The
938 frequency of the successor is FREQ. The current best probability is
939 BEST_PROB, the best frequency is BEST_FREQ.
940 The edge is considered to be equivalent when PROB does not differ much from
941 BEST_PROB; similarly for frequency. */
944 better_edge_p (const_basic_block bb
, const_edge e
, profile_probability prob
,
945 int freq
, profile_probability best_prob
, int best_freq
,
946 const_edge cur_best_edge
)
950 /* The BEST_* values do not have to be best, but can be a bit smaller than
952 profile_probability diff_prob
= best_prob
.apply_scale (1, 10);
953 int diff_freq
= best_freq
/ 10;
955 /* The smaller one is better to keep the original order. */
956 if (optimize_function_for_size_p (cfun
))
957 return !cur_best_edge
958 || cur_best_edge
->dest
->index
> e
->dest
->index
;
960 /* Those edges are so expensive that continuing a trace is not useful
962 if (e
->flags
& (EDGE_ABNORMAL
| EDGE_EH
))
965 if (prob
> best_prob
+ diff_prob
966 || (!best_prob
.initialized_p ()
967 && prob
> profile_probability::guessed_never ()))
968 /* The edge has higher probability than the temporary best edge. */
969 is_better_edge
= true;
970 else if (prob
< best_prob
- diff_prob
)
971 /* The edge has lower probability than the temporary best edge. */
972 is_better_edge
= false;
973 else if (freq
< best_freq
- diff_freq
)
974 /* The edge and the temporary best edge have almost equivalent
975 probabilities. The higher frequency of a successor now means
976 that there is another edge going into that successor.
977 This successor has lower frequency so it is better. */
978 is_better_edge
= true;
979 else if (freq
> best_freq
+ diff_freq
)
980 /* This successor has higher frequency so it is worse. */
981 is_better_edge
= false;
982 else if (e
->dest
->prev_bb
== bb
)
983 /* The edges have equivalent probabilities and the successors
984 have equivalent frequencies. Select the previous successor. */
985 is_better_edge
= true;
987 is_better_edge
= false;
989 /* If we are doing hot/cold partitioning, make sure that we always favor
990 non-crossing edges over crossing edges. */
993 && flag_reorder_blocks_and_partition
995 && (cur_best_edge
->flags
& EDGE_CROSSING
)
996 && !(e
->flags
& EDGE_CROSSING
))
997 is_better_edge
= true;
999 return is_better_edge
;
1002 /* Return true when the edge E is better than the temporary best edge
1003 CUR_BEST_EDGE. If SRC_INDEX_P is true, the function compares the src bb of
1004 E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1005 BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1006 TRACES record the information about traces.
1007 When optimizing for size, the edge with smaller index is better.
1008 When optimizing for speed, the edge with bigger probability or longer trace
1012 connect_better_edge_p (const_edge e
, bool src_index_p
, int best_len
,
1013 const_edge cur_best_edge
, struct trace
*traces
)
1017 bool is_better_edge
;
1022 if (optimize_function_for_size_p (cfun
))
1024 e_index
= src_index_p
? e
->src
->index
: e
->dest
->index
;
1025 b_index
= src_index_p
? cur_best_edge
->src
->index
1026 : cur_best_edge
->dest
->index
;
1027 /* The smaller one is better to keep the original order. */
1028 return b_index
> e_index
;
1033 e_index
= e
->src
->index
;
1035 if (e
->probability
> cur_best_edge
->probability
)
1036 /* The edge has higher probability than the temporary best edge. */
1037 is_better_edge
= true;
1038 else if (e
->probability
< cur_best_edge
->probability
)
1039 /* The edge has lower probability than the temporary best edge. */
1040 is_better_edge
= false;
1041 else if (traces
[bbd
[e_index
].end_of_trace
].length
> best_len
)
1042 /* The edge and the temporary best edge have equivalent probabilities.
1043 The edge with longer trace is better. */
1044 is_better_edge
= true;
1046 is_better_edge
= false;
1050 e_index
= e
->dest
->index
;
1052 if (e
->probability
> cur_best_edge
->probability
)
1053 /* The edge has higher probability than the temporary best edge. */
1054 is_better_edge
= true;
1055 else if (e
->probability
< cur_best_edge
->probability
)
1056 /* The edge has lower probability than the temporary best edge. */
1057 is_better_edge
= false;
1058 else if (traces
[bbd
[e_index
].start_of_trace
].length
> best_len
)
1059 /* The edge and the temporary best edge have equivalent probabilities.
1060 The edge with longer trace is better. */
1061 is_better_edge
= true;
1063 is_better_edge
= false;
1066 return is_better_edge
;
1069 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
1072 connect_traces (int n_traces
, struct trace
*traces
)
1079 int current_partition
;
1081 gcov_type count_threshold
;
1082 bool for_size
= optimize_function_for_size_p (cfun
);
1084 freq_threshold
= max_entry_frequency
* DUPLICATION_THRESHOLD
/ 1000;
1085 if (max_entry_count
.to_gcov_type () < INT_MAX
/ 1000)
1086 count_threshold
= max_entry_count
.to_gcov_type () * DUPLICATION_THRESHOLD
/ 1000;
1088 count_threshold
= max_entry_count
.to_gcov_type () / 1000 * DUPLICATION_THRESHOLD
;
1090 connected
= XCNEWVEC (bool, n_traces
);
1093 current_partition
= BB_PARTITION (traces
[0].first
);
1096 if (crtl
->has_bb_partition
)
1097 for (i
= 0; i
< n_traces
&& !two_passes
; i
++)
1098 if (BB_PARTITION (traces
[0].first
)
1099 != BB_PARTITION (traces
[i
].first
))
1102 for (i
= 0; i
< n_traces
|| (two_passes
&& current_pass
== 1) ; i
++)
1111 gcc_assert (two_passes
&& current_pass
== 1);
1115 if (current_partition
== BB_HOT_PARTITION
)
1116 current_partition
= BB_COLD_PARTITION
;
1118 current_partition
= BB_HOT_PARTITION
;
1125 && BB_PARTITION (traces
[t
].first
) != current_partition
)
1128 connected
[t
] = true;
1130 /* Find the predecessor traces. */
1131 for (t2
= t
; t2
> 0;)
1136 FOR_EACH_EDGE (e
, ei
, traces
[t2
].first
->preds
)
1138 int si
= e
->src
->index
;
1140 if (e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
1141 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1142 && !(e
->flags
& EDGE_COMPLEX
)
1143 && bbd
[si
].end_of_trace
>= 0
1144 && !connected
[bbd
[si
].end_of_trace
]
1145 && (BB_PARTITION (e
->src
) == current_partition
)
1146 && connect_better_edge_p (e
, true, best_len
, best
, traces
))
1149 best_len
= traces
[bbd
[si
].end_of_trace
].length
;
1154 best
->src
->aux
= best
->dest
;
1155 t2
= bbd
[best
->src
->index
].end_of_trace
;
1156 connected
[t2
] = true;
1160 fprintf (dump_file
, "Connection: %d %d\n",
1161 best
->src
->index
, best
->dest
->index
);
1168 if (last_trace
>= 0)
1169 traces
[last_trace
].last
->aux
= traces
[t2
].first
;
1172 /* Find the successor traces. */
1175 /* Find the continuation of the chain. */
1179 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1181 int di
= e
->dest
->index
;
1183 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
1184 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1185 && !(e
->flags
& EDGE_COMPLEX
)
1186 && bbd
[di
].start_of_trace
>= 0
1187 && !connected
[bbd
[di
].start_of_trace
]
1188 && (BB_PARTITION (e
->dest
) == current_partition
)
1189 && connect_better_edge_p (e
, false, best_len
, best
, traces
))
1192 best_len
= traces
[bbd
[di
].start_of_trace
].length
;
1199 /* Stop finding the successor traces. */
1202 /* It is OK to connect block n with block n + 1 or a block
1203 before n. For others, only connect to the loop header. */
1204 if (best
->dest
->index
> (traces
[t
].last
->index
+ 1))
1206 int count
= EDGE_COUNT (best
->dest
->preds
);
1208 FOR_EACH_EDGE (e
, ei
, best
->dest
->preds
)
1209 if (e
->flags
& EDGE_DFS_BACK
)
1212 /* If dest has multiple predecessors, skip it. We expect
1213 that one predecessor with smaller index connects with it
1219 /* Only connect Trace n with Trace n + 1. It is conservative
1220 to keep the order as close as possible to the original order.
1221 It also helps to reduce long jumps. */
1222 if (last_trace
!= bbd
[best
->dest
->index
].start_of_trace
- 1)
1226 fprintf (dump_file
, "Connection: %d %d\n",
1227 best
->src
->index
, best
->dest
->index
);
1229 t
= bbd
[best
->dest
->index
].start_of_trace
;
1230 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1231 connected
[t
] = true;
1238 fprintf (dump_file
, "Connection: %d %d\n",
1239 best
->src
->index
, best
->dest
->index
);
1241 t
= bbd
[best
->dest
->index
].start_of_trace
;
1242 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1243 connected
[t
] = true;
1248 /* Try to connect the traces by duplication of 1 block. */
1250 basic_block next_bb
= NULL
;
1251 bool try_copy
= false;
1253 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1254 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
1255 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1256 && !(e
->flags
& EDGE_COMPLEX
)
1257 && (!best
|| e
->probability
> best
->probability
))
1263 /* If the destination is a start of a trace which is only
1264 one block long, then no need to search the successor
1265 blocks of the trace. Accept it. */
1266 if (bbd
[e
->dest
->index
].start_of_trace
>= 0
1267 && traces
[bbd
[e
->dest
->index
].start_of_trace
].length
1275 FOR_EACH_EDGE (e2
, ei
, e
->dest
->succs
)
1277 int di
= e2
->dest
->index
;
1279 if (e2
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
)
1280 || ((e2
->flags
& EDGE_CAN_FALLTHRU
)
1281 && !(e2
->flags
& EDGE_COMPLEX
)
1282 && bbd
[di
].start_of_trace
>= 0
1283 && !connected
[bbd
[di
].start_of_trace
]
1284 && BB_PARTITION (e2
->dest
) == current_partition
1285 && EDGE_FREQUENCY (e2
) >= freq_threshold
1286 && e2
->count
>= count_threshold
1288 || e2
->probability
> best2
->probability
1289 || (e2
->probability
== best2
->probability
1290 && traces
[bbd
[di
].start_of_trace
].length
1295 if (e2
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1296 best2_len
= traces
[bbd
[di
].start_of_trace
].length
;
1298 best2_len
= INT_MAX
;
1305 if (crtl
->has_bb_partition
)
1308 /* Copy tiny blocks always; copy larger blocks only when the
1309 edge is traversed frequently enough. */
1311 && copy_bb_p (best
->dest
,
1312 optimize_edge_for_speed_p (best
)
1313 && EDGE_FREQUENCY (best
) >= freq_threshold
1314 && best
->count
>= count_threshold
))
1320 fprintf (dump_file
, "Connection: %d %d ",
1321 traces
[t
].last
->index
, best
->dest
->index
);
1323 fputc ('\n', dump_file
);
1324 else if (next_bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1325 fprintf (dump_file
, "exit\n");
1327 fprintf (dump_file
, "%d\n", next_bb
->index
);
1330 new_bb
= copy_bb (best
->dest
, best
, traces
[t
].last
, t
);
1331 traces
[t
].last
= new_bb
;
1332 if (next_bb
&& next_bb
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
1334 t
= bbd
[next_bb
->index
].start_of_trace
;
1335 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1336 connected
[t
] = true;
1340 break; /* Stop finding the successor traces. */
1343 break; /* Stop finding the successor traces. */
1352 fprintf (dump_file
, "Final order:\n");
1353 for (bb
= traces
[0].first
; bb
; bb
= (basic_block
) bb
->aux
)
1354 fprintf (dump_file
, "%d ", bb
->index
);
1355 fprintf (dump_file
, "\n");
1362 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1363 when code size is allowed to grow by duplication. */
1366 copy_bb_p (const_basic_block bb
, int code_may_grow
)
1369 int max_size
= uncond_jump_length
;
1374 if (EDGE_COUNT (bb
->preds
) < 2)
1376 if (!can_duplicate_block_p (bb
))
1379 /* Avoid duplicating blocks which have many successors (PR/13430). */
1380 if (EDGE_COUNT (bb
->succs
) > 8)
1383 if (code_may_grow
&& optimize_bb_for_speed_p (bb
))
1384 max_size
*= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS
);
1386 FOR_BB_INSNS (bb
, insn
)
1389 size
+= get_attr_min_length (insn
);
1392 if (size
<= max_size
)
1398 "Block %d can't be copied because its size = %d.\n",
1405 /* Return the length of unconditional jump instruction. */
1408 get_uncond_jump_length (void)
1413 rtx_code_label
*label
= emit_label (gen_label_rtx ());
1414 rtx_insn
*jump
= emit_jump_insn (targetm
.gen_jump (label
));
1415 length
= get_attr_min_length (jump
);
1421 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1422 Duplicate the landing pad and split the edges so that no EH edge
1423 crosses partitions. */
1426 fix_up_crossing_landing_pad (eh_landing_pad old_lp
, basic_block old_bb
)
1428 eh_landing_pad new_lp
;
1429 basic_block new_bb
, last_bb
, post_bb
;
1431 unsigned new_partition
;
1435 /* Generate the new landing-pad structure. */
1436 new_lp
= gen_eh_landing_pad (old_lp
->region
);
1437 new_lp
->post_landing_pad
= old_lp
->post_landing_pad
;
1438 new_lp
->landing_pad
= gen_label_rtx ();
1439 LABEL_PRESERVE_P (new_lp
->landing_pad
) = 1;
1441 /* Put appropriate instructions in new bb. */
1442 rtx_code_label
*new_label
= emit_label (new_lp
->landing_pad
);
1444 expand_dw2_landing_pad_for_region (old_lp
->region
);
1446 post_bb
= BLOCK_FOR_INSN (old_lp
->landing_pad
);
1447 post_bb
= single_succ (post_bb
);
1448 rtx_code_label
*post_label
= block_label (post_bb
);
1449 jump
= emit_jump_insn (targetm
.gen_jump (post_label
));
1450 JUMP_LABEL (jump
) = post_label
;
1452 /* Create new basic block to be dest for lp. */
1453 last_bb
= EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
;
1454 new_bb
= create_basic_block (new_label
, jump
, last_bb
);
1455 new_bb
->aux
= last_bb
->aux
;
1456 new_bb
->frequency
= post_bb
->frequency
;
1457 new_bb
->count
= post_bb
->count
;
1458 last_bb
->aux
= new_bb
;
1460 emit_barrier_after_bb (new_bb
);
1462 make_single_succ_edge (new_bb
, post_bb
, 0);
1464 /* Make sure new bb is in the other partition. */
1465 new_partition
= BB_PARTITION (old_bb
);
1466 new_partition
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1467 BB_SET_PARTITION (new_bb
, new_partition
);
1469 /* Fix up the edges. */
1470 for (ei
= ei_start (old_bb
->preds
); (e
= ei_safe_edge (ei
)) != NULL
; )
1471 if (BB_PARTITION (e
->src
) == new_partition
)
1473 rtx_insn
*insn
= BB_END (e
->src
);
1474 rtx note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
1476 gcc_assert (note
!= NULL
);
1477 gcc_checking_assert (INTVAL (XEXP (note
, 0)) == old_lp
->index
);
1478 XEXP (note
, 0) = GEN_INT (new_lp
->index
);
1480 /* Adjust the edge to the new destination. */
1481 redirect_edge_succ (e
, new_bb
);
1488 /* Ensure that all hot bbs are included in a hot path through the
1489 procedure. This is done by calling this function twice, once
1490 with WALK_UP true (to look for paths from the entry to hot bbs) and
1491 once with WALK_UP false (to look for paths from hot bbs to the exit).
1492 Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1493 to BBS_IN_HOT_PARTITION. */
1496 sanitize_hot_paths (bool walk_up
, unsigned int cold_bb_count
,
1497 vec
<basic_block
> *bbs_in_hot_partition
)
1499 /* Callers check this. */
1500 gcc_checking_assert (cold_bb_count
);
1502 /* Keep examining hot bbs while we still have some left to check
1503 and there are remaining cold bbs. */
1504 vec
<basic_block
> hot_bbs_to_check
= bbs_in_hot_partition
->copy ();
1505 while (! hot_bbs_to_check
.is_empty ()
1508 basic_block bb
= hot_bbs_to_check
.pop ();
1509 vec
<edge
, va_gc
> *edges
= walk_up
? bb
->preds
: bb
->succs
;
1512 profile_probability highest_probability
1513 = profile_probability::uninitialized ();
1514 int highest_freq
= 0;
1515 profile_count highest_count
= profile_count::uninitialized ();
1518 /* Walk the preds/succs and check if there is at least one already
1519 marked hot. Keep track of the most frequent pred/succ so that we
1520 can mark it hot if we don't find one. */
1521 FOR_EACH_EDGE (e
, ei
, edges
)
1523 basic_block reach_bb
= walk_up
? e
->src
: e
->dest
;
1525 if (e
->flags
& EDGE_DFS_BACK
)
1528 if (BB_PARTITION (reach_bb
) != BB_COLD_PARTITION
)
1533 /* The following loop will look for the hottest edge via
1534 the edge count, if it is non-zero, then fallback to the edge
1535 frequency and finally the edge probability. */
1536 if (!highest_count
.initialized_p () || e
->count
> highest_count
)
1537 highest_count
= e
->count
;
1538 int edge_freq
= EDGE_FREQUENCY (e
);
1539 if (edge_freq
> highest_freq
)
1540 highest_freq
= edge_freq
;
1541 if (!highest_probability
.initialized_p ()
1542 || e
->probability
> highest_probability
)
1543 highest_probability
= e
->probability
;
1546 /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1547 block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1548 then the most frequent pred (or succ) needs to be adjusted. In the
1549 case where multiple preds/succs have the same frequency (e.g. a
1550 50-50 branch), then both will be adjusted. */
1554 FOR_EACH_EDGE (e
, ei
, edges
)
1556 if (e
->flags
& EDGE_DFS_BACK
)
1558 /* Select the hottest edge using the edge count, if it is non-zero,
1559 then fallback to the edge frequency and finally the edge
1561 if (highest_count
> 0)
1563 if (e
->count
< highest_count
)
1566 else if (highest_freq
)
1568 if (EDGE_FREQUENCY (e
) < highest_freq
)
1571 else if (e
->probability
< highest_probability
)
1574 basic_block reach_bb
= walk_up
? e
->src
: e
->dest
;
1576 /* We have a hot bb with an immediate dominator that is cold.
1577 The dominator needs to be re-marked hot. */
1578 BB_SET_PARTITION (reach_bb
, BB_HOT_PARTITION
);
1581 /* Now we need to examine newly-hot reach_bb to see if it is also
1582 dominated by a cold bb. */
1583 bbs_in_hot_partition
->safe_push (reach_bb
);
1584 hot_bbs_to_check
.safe_push (reach_bb
);
1588 return cold_bb_count
;
1592 /* Find the basic blocks that are rarely executed and need to be moved to
1593 a separate section of the .o file (to cut down on paging and improve
1594 cache locality). Return a vector of all edges that cross. */
1597 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1599 vec
<edge
> crossing_edges
= vNULL
;
1603 unsigned int cold_bb_count
= 0;
1604 auto_vec
<basic_block
> bbs_in_hot_partition
;
1606 /* Mark which partition (hot/cold) each basic block belongs in. */
1607 FOR_EACH_BB_FN (bb
, cfun
)
1609 bool cold_bb
= false;
1611 if (probably_never_executed_bb_p (cfun
, bb
))
1613 /* Handle profile insanities created by upstream optimizations
1614 by also checking the incoming edge weights. If there is a non-cold
1615 incoming edge, conservatively prevent this block from being split
1616 into the cold section. */
1618 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1619 if (!probably_never_executed_edge_p (cfun
, e
))
1627 BB_SET_PARTITION (bb
, BB_COLD_PARTITION
);
1632 BB_SET_PARTITION (bb
, BB_HOT_PARTITION
);
1633 bbs_in_hot_partition
.safe_push (bb
);
1637 /* Ensure that hot bbs are included along a hot path from the entry to exit.
1638 Several different possibilities may include cold bbs along all paths
1639 to/from a hot bb. One is that there are edge weight insanities
1640 due to optimization phases that do not properly update basic block profile
1641 counts. The second is that the entry of the function may not be hot, because
1642 it is entered fewer times than the number of profile training runs, but there
1643 is a loop inside the function that causes blocks within the function to be
1644 above the threshold for hotness. This is fixed by walking up from hot bbs
1645 to the entry block, and then down from hot bbs to the exit, performing
1646 partitioning fixups as necessary. */
1649 mark_dfs_back_edges ();
1650 cold_bb_count
= sanitize_hot_paths (true, cold_bb_count
,
1651 &bbs_in_hot_partition
);
1653 sanitize_hot_paths (false, cold_bb_count
, &bbs_in_hot_partition
);
1656 /* The format of .gcc_except_table does not allow landing pads to
1657 be in a different partition as the throw. Fix this by either
1658 moving or duplicating the landing pads. */
1659 if (cfun
->eh
->lp_array
)
1664 FOR_EACH_VEC_ELT (*cfun
->eh
->lp_array
, i
, lp
)
1666 bool all_same
, all_diff
;
1669 || lp
->landing_pad
== NULL_RTX
1670 || !LABEL_P (lp
->landing_pad
))
1673 all_same
= all_diff
= true;
1674 bb
= BLOCK_FOR_INSN (lp
->landing_pad
);
1675 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1677 gcc_assert (e
->flags
& EDGE_EH
);
1678 if (BB_PARTITION (bb
) == BB_PARTITION (e
->src
))
1688 int which
= BB_PARTITION (bb
);
1689 which
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1690 BB_SET_PARTITION (bb
, which
);
1693 fix_up_crossing_landing_pad (lp
, bb
);
1697 /* Mark every edge that crosses between sections. */
1699 FOR_EACH_BB_FN (bb
, cfun
)
1700 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1702 unsigned int flags
= e
->flags
;
1704 /* We should never have EDGE_CROSSING set yet. */
1705 gcc_checking_assert ((flags
& EDGE_CROSSING
) == 0);
1707 if (e
->src
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)
1708 && e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
1709 && BB_PARTITION (e
->src
) != BB_PARTITION (e
->dest
))
1711 crossing_edges
.safe_push (e
);
1712 flags
|= EDGE_CROSSING
;
1715 /* Now that we've split eh edges as appropriate, allow landing pads
1716 to be merged with the post-landing pads. */
1717 flags
&= ~EDGE_PRESERVE
;
1722 return crossing_edges
;
1725 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru. */
1728 set_edge_can_fallthru_flag (void)
1732 FOR_EACH_BB_FN (bb
, cfun
)
1737 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1739 e
->flags
&= ~EDGE_CAN_FALLTHRU
;
1741 /* The FALLTHRU edge is also CAN_FALLTHRU edge. */
1742 if (e
->flags
& EDGE_FALLTHRU
)
1743 e
->flags
|= EDGE_CAN_FALLTHRU
;
1746 /* If the BB ends with an invertible condjump all (2) edges are
1747 CAN_FALLTHRU edges. */
1748 if (EDGE_COUNT (bb
->succs
) != 2)
1750 if (!any_condjump_p (BB_END (bb
)))
1753 rtx_jump_insn
*bb_end_jump
= as_a
<rtx_jump_insn
*> (BB_END (bb
));
1754 if (!invert_jump (bb_end_jump
, JUMP_LABEL (bb_end_jump
), 0))
1756 invert_jump (bb_end_jump
, JUMP_LABEL (bb_end_jump
), 0);
1757 EDGE_SUCC (bb
, 0)->flags
|= EDGE_CAN_FALLTHRU
;
1758 EDGE_SUCC (bb
, 1)->flags
|= EDGE_CAN_FALLTHRU
;
1762 /* If any destination of a crossing edge does not have a label, add label;
1763 Convert any easy fall-through crossing edges to unconditional jumps. */
1766 add_labels_and_missing_jumps (vec
<edge
> crossing_edges
)
1771 FOR_EACH_VEC_ELT (crossing_edges
, i
, e
)
1773 basic_block src
= e
->src
;
1774 basic_block dest
= e
->dest
;
1775 rtx_jump_insn
*new_jump
;
1777 if (dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
1780 /* Make sure dest has a label. */
1781 rtx_code_label
*label
= block_label (dest
);
1783 /* Nothing to do for non-fallthru edges. */
1784 if (src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
1786 if ((e
->flags
& EDGE_FALLTHRU
) == 0)
1789 /* If the block does not end with a control flow insn, then we
1790 can trivially add a jump to the end to fixup the crossing.
1791 Otherwise the jump will have to go in a new bb, which will
1792 be handled by fix_up_fall_thru_edges function. */
1793 if (control_flow_insn_p (BB_END (src
)))
1796 /* Make sure there's only one successor. */
1797 gcc_assert (single_succ_p (src
));
1799 new_jump
= emit_jump_insn_after (targetm
.gen_jump (label
), BB_END (src
));
1800 BB_END (src
) = new_jump
;
1801 JUMP_LABEL (new_jump
) = label
;
1802 LABEL_NUSES (label
) += 1;
1804 emit_barrier_after_bb (src
);
1806 /* Mark edge as non-fallthru. */
1807 e
->flags
&= ~EDGE_FALLTHRU
;
1811 /* Find any bb's where the fall-through edge is a crossing edge (note that
1812 these bb's must also contain a conditional jump or end with a call
1813 instruction; we've already dealt with fall-through edges for blocks
1814 that didn't have a conditional jump or didn't end with call instruction
1815 in the call to add_labels_and_missing_jumps). Convert the fall-through
1816 edge to non-crossing edge by inserting a new bb to fall-through into.
1817 The new bb will contain an unconditional jump (crossing edge) to the
1818 original fall through destination. */
1821 fix_up_fall_thru_edges (void)
1825 FOR_EACH_BB_FN (cur_bb
, cfun
)
1829 edge fall_thru
= NULL
;
1830 edge cond_jump
= NULL
;
1833 if (EDGE_COUNT (cur_bb
->succs
) > 0)
1834 succ1
= EDGE_SUCC (cur_bb
, 0);
1838 if (EDGE_COUNT (cur_bb
->succs
) > 1)
1839 succ2
= EDGE_SUCC (cur_bb
, 1);
1843 /* Find the fall-through edge. */
1846 && (succ1
->flags
& EDGE_FALLTHRU
))
1852 && (succ2
->flags
& EDGE_FALLTHRU
))
1857 else if (succ2
&& EDGE_COUNT (cur_bb
->succs
) > 2)
1858 fall_thru
= find_fallthru_edge (cur_bb
->succs
);
1860 if (fall_thru
&& (fall_thru
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)))
1862 /* Check to see if the fall-thru edge is a crossing edge. */
1864 if (fall_thru
->flags
& EDGE_CROSSING
)
1866 /* The fall_thru edge crosses; now check the cond jump edge, if
1869 bool cond_jump_crosses
= true;
1870 int invert_worked
= 0;
1871 rtx_insn
*old_jump
= BB_END (cur_bb
);
1873 /* Find the jump instruction, if there is one. */
1877 if (!(cond_jump
->flags
& EDGE_CROSSING
))
1878 cond_jump_crosses
= false;
1880 /* We know the fall-thru edge crosses; if the cond
1881 jump edge does NOT cross, and its destination is the
1882 next block in the bb order, invert the jump
1883 (i.e. fix it so the fall through does not cross and
1884 the cond jump does). */
1886 if (!cond_jump_crosses
)
1888 /* Find label in fall_thru block. We've already added
1889 any missing labels, so there must be one. */
1891 rtx_code_label
*fall_thru_label
1892 = block_label (fall_thru
->dest
);
1894 if (old_jump
&& fall_thru_label
)
1896 rtx_jump_insn
*old_jump_insn
1897 = dyn_cast
<rtx_jump_insn
*> (old_jump
);
1899 invert_worked
= invert_jump (old_jump_insn
,
1900 fall_thru_label
, 0);
1905 fall_thru
->flags
&= ~EDGE_FALLTHRU
;
1906 cond_jump
->flags
|= EDGE_FALLTHRU
;
1907 update_br_prob_note (cur_bb
);
1908 std::swap (fall_thru
, cond_jump
);
1909 cond_jump
->flags
|= EDGE_CROSSING
;
1910 fall_thru
->flags
&= ~EDGE_CROSSING
;
1915 if (cond_jump_crosses
|| !invert_worked
)
1917 /* This is the case where both edges out of the basic
1918 block are crossing edges. Here we will fix up the
1919 fall through edge. The jump edge will be taken care
1920 of later. The EDGE_CROSSING flag of fall_thru edge
1921 is unset before the call to force_nonfallthru
1922 function because if a new basic-block is created
1923 this edge remains in the current section boundary
1924 while the edge between new_bb and the fall_thru->dest
1925 becomes EDGE_CROSSING. */
1927 fall_thru
->flags
&= ~EDGE_CROSSING
;
1928 basic_block new_bb
= force_nonfallthru (fall_thru
);
1932 new_bb
->aux
= cur_bb
->aux
;
1933 cur_bb
->aux
= new_bb
;
1935 /* This is done by force_nonfallthru_and_redirect. */
1936 gcc_assert (BB_PARTITION (new_bb
)
1937 == BB_PARTITION (cur_bb
));
1939 single_succ_edge (new_bb
)->flags
|= EDGE_CROSSING
;
1943 /* If a new basic-block was not created; restore
1944 the EDGE_CROSSING flag. */
1945 fall_thru
->flags
|= EDGE_CROSSING
;
1948 /* Add barrier after new jump */
1949 emit_barrier_after_bb (new_bb
? new_bb
: cur_bb
);
1956 /* This function checks the destination block of a "crossing jump" to
1957 see if it has any crossing predecessors that begin with a code label
1958 and end with an unconditional jump. If so, it returns that predecessor
1959 block. (This is to avoid creating lots of new basic blocks that all
1960 contain unconditional jumps to the same destination). */
1963 find_jump_block (basic_block jump_dest
)
1965 basic_block source_bb
= NULL
;
1970 FOR_EACH_EDGE (e
, ei
, jump_dest
->preds
)
1971 if (e
->flags
& EDGE_CROSSING
)
1973 basic_block src
= e
->src
;
1975 /* Check each predecessor to see if it has a label, and contains
1976 only one executable instruction, which is an unconditional jump.
1977 If so, we can use it. */
1979 if (LABEL_P (BB_HEAD (src
)))
1980 for (insn
= BB_HEAD (src
);
1981 !INSN_P (insn
) && insn
!= NEXT_INSN (BB_END (src
));
1982 insn
= NEXT_INSN (insn
))
1985 && insn
== BB_END (src
)
1987 && !any_condjump_p (insn
))
2001 /* Find all BB's with conditional jumps that are crossing edges;
2002 insert a new bb and make the conditional jump branch to the new
2003 bb instead (make the new bb same color so conditional branch won't
2004 be a 'crossing' edge). Insert an unconditional jump from the
2005 new bb to the original destination of the conditional jump. */
2008 fix_crossing_conditional_branches (void)
2018 rtx old_label
= NULL_RTX
;
2019 rtx_code_label
*new_label
;
2021 FOR_EACH_BB_FN (cur_bb
, cfun
)
2023 crossing_edge
= NULL
;
2024 if (EDGE_COUNT (cur_bb
->succs
) > 0)
2025 succ1
= EDGE_SUCC (cur_bb
, 0);
2029 if (EDGE_COUNT (cur_bb
->succs
) > 1)
2030 succ2
= EDGE_SUCC (cur_bb
, 1);
2034 /* We already took care of fall-through edges, so only one successor
2035 can be a crossing edge. */
2037 if (succ1
&& (succ1
->flags
& EDGE_CROSSING
))
2038 crossing_edge
= succ1
;
2039 else if (succ2
&& (succ2
->flags
& EDGE_CROSSING
))
2040 crossing_edge
= succ2
;
2044 rtx_insn
*old_jump
= BB_END (cur_bb
);
2046 /* Check to make sure the jump instruction is a
2047 conditional jump. */
2051 if (any_condjump_p (old_jump
))
2053 if (GET_CODE (PATTERN (old_jump
)) == SET
)
2054 set_src
= SET_SRC (PATTERN (old_jump
));
2055 else if (GET_CODE (PATTERN (old_jump
)) == PARALLEL
)
2057 set_src
= XVECEXP (PATTERN (old_jump
), 0,0);
2058 if (GET_CODE (set_src
) == SET
)
2059 set_src
= SET_SRC (set_src
);
2065 if (set_src
&& (GET_CODE (set_src
) == IF_THEN_ELSE
))
2067 rtx_jump_insn
*old_jump_insn
=
2068 as_a
<rtx_jump_insn
*> (old_jump
);
2070 if (GET_CODE (XEXP (set_src
, 1)) == PC
)
2071 old_label
= XEXP (set_src
, 2);
2072 else if (GET_CODE (XEXP (set_src
, 2)) == PC
)
2073 old_label
= XEXP (set_src
, 1);
2075 /* Check to see if new bb for jumping to that dest has
2076 already been created; if so, use it; if not, create
2079 new_bb
= find_jump_block (crossing_edge
->dest
);
2082 new_label
= block_label (new_bb
);
2085 basic_block last_bb
;
2086 rtx_code_label
*old_jump_target
;
2087 rtx_jump_insn
*new_jump
;
2089 /* Create new basic block to be dest for
2090 conditional jump. */
2092 /* Put appropriate instructions in new bb. */
2094 new_label
= gen_label_rtx ();
2095 emit_label (new_label
);
2097 gcc_assert (GET_CODE (old_label
) == LABEL_REF
);
2098 old_jump_target
= old_jump_insn
->jump_target ();
2099 new_jump
= as_a
<rtx_jump_insn
*>
2100 (emit_jump_insn (targetm
.gen_jump (old_jump_target
)));
2101 new_jump
->set_jump_target (old_jump_target
);
2103 last_bb
= EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
;
2104 new_bb
= create_basic_block (new_label
, new_jump
, last_bb
);
2105 new_bb
->aux
= last_bb
->aux
;
2106 last_bb
->aux
= new_bb
;
2108 emit_barrier_after_bb (new_bb
);
2110 /* Make sure new bb is in same partition as source
2111 of conditional branch. */
2112 BB_COPY_PARTITION (new_bb
, cur_bb
);
2115 /* Make old jump branch to new bb. */
2117 redirect_jump (old_jump_insn
, new_label
, 0);
2119 /* Remove crossing_edge as predecessor of 'dest'. */
2121 dest
= crossing_edge
->dest
;
2123 redirect_edge_succ (crossing_edge
, new_bb
);
2125 /* Make a new edge from new_bb to old dest; new edge
2126 will be a successor for new_bb and a predecessor
2129 if (EDGE_COUNT (new_bb
->succs
) == 0)
2130 new_edge
= make_single_succ_edge (new_bb
, dest
, 0);
2132 new_edge
= EDGE_SUCC (new_bb
, 0);
2134 crossing_edge
->flags
&= ~EDGE_CROSSING
;
2135 new_edge
->flags
|= EDGE_CROSSING
;
2141 /* Find any unconditional branches that cross between hot and cold
2142 sections. Convert them into indirect jumps instead. */
2145 fix_crossing_unconditional_branches (void)
2148 rtx_insn
*last_insn
;
2151 rtx_insn
*indirect_jump_sequence
;
2152 rtx_insn
*jump_insn
= NULL
;
2157 FOR_EACH_BB_FN (cur_bb
, cfun
)
2159 last_insn
= BB_END (cur_bb
);
2161 if (EDGE_COUNT (cur_bb
->succs
) < 1)
2164 succ
= EDGE_SUCC (cur_bb
, 0);
2166 /* Check to see if bb ends in a crossing (unconditional) jump. At
2167 this point, no crossing jumps should be conditional. */
2169 if (JUMP_P (last_insn
)
2170 && (succ
->flags
& EDGE_CROSSING
))
2172 gcc_assert (!any_condjump_p (last_insn
));
2174 /* Make sure the jump is not already an indirect or table jump. */
2176 if (!computed_jump_p (last_insn
)
2177 && !tablejump_p (last_insn
, NULL
, NULL
))
2179 /* We have found a "crossing" unconditional branch. Now
2180 we must convert it to an indirect jump. First create
2181 reference of label, as target for jump. */
2183 label
= JUMP_LABEL (last_insn
);
2184 label_addr
= gen_rtx_LABEL_REF (Pmode
, label
);
2185 LABEL_NUSES (label
) += 1;
2187 /* Get a register to use for the indirect jump. */
2189 new_reg
= gen_reg_rtx (Pmode
);
2191 /* Generate indirect the jump sequence. */
2194 emit_move_insn (new_reg
, label_addr
);
2195 emit_indirect_jump (new_reg
);
2196 indirect_jump_sequence
= get_insns ();
2199 /* Make sure every instruction in the new jump sequence has
2200 its basic block set to be cur_bb. */
2202 for (cur_insn
= indirect_jump_sequence
; cur_insn
;
2203 cur_insn
= NEXT_INSN (cur_insn
))
2205 if (!BARRIER_P (cur_insn
))
2206 BLOCK_FOR_INSN (cur_insn
) = cur_bb
;
2207 if (JUMP_P (cur_insn
))
2208 jump_insn
= cur_insn
;
2211 /* Insert the new (indirect) jump sequence immediately before
2212 the unconditional jump, then delete the unconditional jump. */
2214 emit_insn_before (indirect_jump_sequence
, last_insn
);
2215 delete_insn (last_insn
);
2217 JUMP_LABEL (jump_insn
) = label
;
2218 LABEL_NUSES (label
)++;
2220 /* Make BB_END for cur_bb be the jump instruction (NOT the
2221 barrier instruction at the end of the sequence...). */
2223 BB_END (cur_bb
) = jump_insn
;
2229 /* Update CROSSING_JUMP_P flags on all jump insns. */
2232 update_crossing_jump_flags (void)
2238 FOR_EACH_BB_FN (bb
, cfun
)
2239 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2240 if (e
->flags
& EDGE_CROSSING
)
2242 if (JUMP_P (BB_END (bb
))
2243 /* Some flags were added during fix_up_fall_thru_edges, via
2244 force_nonfallthru_and_redirect. */
2245 && !CROSSING_JUMP_P (BB_END (bb
)))
2246 CROSSING_JUMP_P (BB_END (bb
)) = 1;
2251 /* Reorder basic blocks using the software trace cache (STC) algorithm. */
2254 reorder_basic_blocks_software_trace_cache (void)
2257 fprintf (dump_file
, "\nReordering with the STC algorithm.\n\n");
2261 struct trace
*traces
;
2263 /* We are estimating the length of uncond jump insn only once since the code
2264 for getting the insn length always returns the minimal length now. */
2265 if (uncond_jump_length
== 0)
2266 uncond_jump_length
= get_uncond_jump_length ();
2268 /* We need to know some information for each basic block. */
2269 array_size
= GET_ARRAY_SIZE (last_basic_block_for_fn (cfun
));
2270 bbd
= XNEWVEC (bbro_basic_block_data
, array_size
);
2271 for (i
= 0; i
< array_size
; i
++)
2273 bbd
[i
].start_of_trace
= -1;
2274 bbd
[i
].end_of_trace
= -1;
2275 bbd
[i
].in_trace
= -1;
2277 bbd
[i
].priority
= -1;
2282 traces
= XNEWVEC (struct trace
, n_basic_blocks_for_fn (cfun
));
2284 find_traces (&n_traces
, traces
);
2285 connect_traces (n_traces
, traces
);
2290 /* Return true if edge E1 is more desirable as a fallthrough edge than
2294 edge_order (edge e1
, edge e2
)
2296 return EDGE_FREQUENCY (e1
) > EDGE_FREQUENCY (e2
);
2299 /* Reorder basic blocks using the "simple" algorithm. This tries to
2300 maximize the dynamic number of branches that are fallthrough, without
2301 copying instructions. The algorithm is greedy, looking at the most
2302 frequently executed branch first. */
2305 reorder_basic_blocks_simple (void)
2308 fprintf (dump_file
, "\nReordering with the \"simple\" algorithm.\n\n");
2310 edge
*edges
= new edge
[2 * n_basic_blocks_for_fn (cfun
)];
2312 /* First, collect all edges that can be optimized by reordering blocks:
2313 simple jumps and conditional jumps, as well as the function entry edge. */
2316 edges
[n
++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun
), 0);
2319 FOR_EACH_BB_FN (bb
, cfun
)
2321 rtx_insn
*end
= BB_END (bb
);
2323 if (computed_jump_p (end
) || tablejump_p (end
, NULL
, NULL
))
2326 /* We cannot optimize asm goto. */
2327 if (JUMP_P (end
) && extract_asm_operands (end
))
2330 if (single_succ_p (bb
))
2331 edges
[n
++] = EDGE_SUCC (bb
, 0);
2332 else if (any_condjump_p (end
))
2334 edge e0
= EDGE_SUCC (bb
, 0);
2335 edge e1
= EDGE_SUCC (bb
, 1);
2336 /* When optimizing for size it is best to keep the original
2337 fallthrough edges. */
2338 if (e1
->flags
& EDGE_FALLTHRU
)
2345 /* Sort the edges, the most desirable first. When optimizing for size
2346 all edges are equally desirable. */
2348 if (optimize_function_for_speed_p (cfun
))
2349 std::stable_sort (edges
, edges
+ n
, edge_order
);
2351 /* Now decide which of those edges to make fallthrough edges. We set
2352 BB_VISITED if a block already has a fallthrough successor assigned
2353 to it. We make ->AUX of an endpoint point to the opposite endpoint
2354 of a sequence of blocks that fall through, and ->AUX will be NULL
2355 for a block that is in such a sequence but not an endpoint anymore.
2357 To start with, everything points to itself, nothing is assigned yet. */
2359 FOR_ALL_BB_FN (bb
, cfun
)
2362 bb
->flags
&= ~BB_VISITED
;
2365 EXIT_BLOCK_PTR_FOR_FN (cfun
)->aux
= 0;
2367 /* Now for all edges, the most desirable first, see if that edge can
2368 connect two sequences. If it can, update AUX and BB_VISITED; if it
2369 cannot, zero out the edge in the table. */
2371 for (int j
= 0; j
< n
; j
++)
2375 basic_block tail_a
= e
->src
;
2376 basic_block head_b
= e
->dest
;
2377 basic_block head_a
= (basic_block
) tail_a
->aux
;
2378 basic_block tail_b
= (basic_block
) head_b
->aux
;
2380 /* An edge cannot connect two sequences if:
2381 - it crosses partitions;
2382 - its src is not a current endpoint;
2383 - its dest is not a current endpoint;
2384 - or, it would create a loop. */
2386 if (e
->flags
& EDGE_CROSSING
2387 || tail_a
->flags
& BB_VISITED
2389 || (!(head_b
->flags
& BB_VISITED
) && head_b
!= tail_b
)
2390 || tail_a
== tail_b
)
2398 head_a
->aux
= tail_b
;
2399 tail_b
->aux
= head_a
;
2400 tail_a
->flags
|= BB_VISITED
;
2403 /* Put the pieces together, in the same order that the start blocks of
2404 the sequences already had. The hot/cold partitioning gives a little
2405 complication: as a first pass only do this for blocks in the same
2406 partition as the start block, and (if there is anything left to do)
2407 in a second pass handle the other partition. */
2409 basic_block last_tail
= (basic_block
) ENTRY_BLOCK_PTR_FOR_FN (cfun
)->aux
;
2411 int current_partition
= BB_PARTITION (last_tail
);
2412 bool need_another_pass
= true;
2414 for (int pass
= 0; pass
< 2 && need_another_pass
; pass
++)
2416 need_another_pass
= false;
2418 FOR_EACH_BB_FN (bb
, cfun
)
2419 if ((bb
->flags
& BB_VISITED
&& bb
->aux
) || bb
->aux
== bb
)
2421 if (BB_PARTITION (bb
) != current_partition
)
2423 need_another_pass
= true;
2427 last_tail
->aux
= bb
;
2428 last_tail
= (basic_block
) bb
->aux
;
2431 current_partition
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
2436 /* Finally, link all the chosen fallthrough edges. */
2438 for (int j
= 0; j
< n
; j
++)
2440 edges
[j
]->src
->aux
= edges
[j
]->dest
;
2444 /* If the entry edge no longer falls through we have to make a new
2445 block so it can do so again. */
2447 edge e
= EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun
), 0);
2448 if (e
->dest
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->aux
)
2450 force_nonfallthru (e
);
2451 e
->src
->aux
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->aux
;
2452 BB_COPY_PARTITION (e
->src
, e
->dest
);
2456 /* Reorder basic blocks. The main entry point to this file. */
2459 reorder_basic_blocks (void)
2461 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT
);
2463 if (n_basic_blocks_for_fn (cfun
) <= NUM_FIXED_BLOCKS
+ 1)
2466 set_edge_can_fallthru_flag ();
2467 mark_dfs_back_edges ();
2469 switch (flag_reorder_blocks_algorithm
)
2471 case REORDER_BLOCKS_ALGORITHM_SIMPLE
:
2472 reorder_basic_blocks_simple ();
2475 case REORDER_BLOCKS_ALGORITHM_STC
:
2476 reorder_basic_blocks_software_trace_cache ();
2483 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2487 if (dump_flags
& TDF_DETAILS
)
2488 dump_reg_info (dump_file
);
2489 dump_flow_info (dump_file
, dump_flags
);
2492 /* Signal that rtl_verify_flow_info_1 can now verify that there
2493 is at most one switch between hot/cold sections. */
2494 crtl
->bb_reorder_complete
= true;
2497 /* Determine which partition the first basic block in the function
2498 belongs to, then find the first basic block in the current function
2499 that belongs to a different section, and insert a
2500 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2501 instruction stream. When writing out the assembly code,
2502 encountering this note will make the compiler switch between the
2503 hot and cold text sections. */
2506 insert_section_boundary_note (void)
2509 bool switched_sections
= false;
2510 int current_partition
= 0;
2512 if (!crtl
->has_bb_partition
)
2515 FOR_EACH_BB_FN (bb
, cfun
)
2517 if (!current_partition
)
2518 current_partition
= BB_PARTITION (bb
);
2519 if (BB_PARTITION (bb
) != current_partition
)
2521 gcc_assert (!switched_sections
);
2522 switched_sections
= true;
2523 emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS
, BB_HEAD (bb
));
2524 current_partition
= BB_PARTITION (bb
);
2531 const pass_data pass_data_reorder_blocks
=
2533 RTL_PASS
, /* type */
2535 OPTGROUP_NONE
, /* optinfo_flags */
2536 TV_REORDER_BLOCKS
, /* tv_id */
2537 0, /* properties_required */
2538 0, /* properties_provided */
2539 0, /* properties_destroyed */
2540 0, /* todo_flags_start */
2541 0, /* todo_flags_finish */
2544 class pass_reorder_blocks
: public rtl_opt_pass
2547 pass_reorder_blocks (gcc::context
*ctxt
)
2548 : rtl_opt_pass (pass_data_reorder_blocks
, ctxt
)
2551 /* opt_pass methods: */
2552 virtual bool gate (function
*)
2554 if (targetm
.cannot_modify_jumps_p ())
2556 return (optimize
> 0
2557 && (flag_reorder_blocks
|| flag_reorder_blocks_and_partition
));
2560 virtual unsigned int execute (function
*);
2562 }; // class pass_reorder_blocks
2565 pass_reorder_blocks::execute (function
*fun
)
2569 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2570 splitting possibly introduced more crossjumping opportunities. */
2571 cfg_layout_initialize (CLEANUP_EXPENSIVE
);
2573 reorder_basic_blocks ();
2574 cleanup_cfg (CLEANUP_EXPENSIVE
);
2576 FOR_EACH_BB_FN (bb
, fun
)
2577 if (bb
->next_bb
!= EXIT_BLOCK_PTR_FOR_FN (fun
))
2578 bb
->aux
= bb
->next_bb
;
2579 cfg_layout_finalize ();
2587 make_pass_reorder_blocks (gcc::context
*ctxt
)
2589 return new pass_reorder_blocks (ctxt
);
2592 /* Duplicate a block (that we already know ends in a computed jump) into its
2593 predecessors, where possible. Return whether anything is changed. */
2595 maybe_duplicate_computed_goto (basic_block bb
, int max_size
)
2597 if (single_pred_p (bb
))
2600 /* Make sure that the block is small enough. */
2602 FOR_BB_INSNS (bb
, insn
)
2605 max_size
-= get_attr_min_length (insn
);
2610 bool changed
= false;
2613 for (ei
= ei_start (bb
->preds
); (e
= ei_safe_edge (ei
)); )
2615 basic_block pred
= e
->src
;
2617 /* Do not duplicate BB into PRED if that is the last predecessor, or if
2618 we cannot merge a copy of BB with PRED. */
2619 if (single_pred_p (bb
)
2620 || !single_succ_p (pred
)
2621 || e
->flags
& EDGE_COMPLEX
2622 || pred
->index
< NUM_FIXED_BLOCKS
2623 || (JUMP_P (BB_END (pred
)) && !simplejump_p (BB_END (pred
)))
2624 || (JUMP_P (BB_END (pred
)) && CROSSING_JUMP_P (BB_END (pred
))))
2631 fprintf (dump_file
, "Duplicating computed goto bb %d into bb %d\n",
2632 bb
->index
, e
->src
->index
);
2634 /* Remember if PRED can be duplicated; if so, the copy of BB merged
2635 with PRED can be duplicated as well. */
2636 bool can_dup_more
= can_duplicate_block_p (pred
);
2638 /* Make a copy of BB, merge it into PRED. */
2639 basic_block copy
= duplicate_block (bb
, e
, NULL
);
2640 emit_barrier_after_bb (copy
);
2641 reorder_insns_nobb (BB_HEAD (copy
), BB_END (copy
), BB_END (pred
));
2642 merge_blocks (pred
, copy
);
2646 /* Try to merge the resulting merged PRED into further predecessors. */
2648 maybe_duplicate_computed_goto (pred
, max_size
);
2654 /* Duplicate the blocks containing computed gotos. This basically unfactors
2655 computed gotos that were factored early on in the compilation process to
2656 speed up edge based data flow. We used to not unfactor them again, which
2657 can seriously pessimize code with many computed jumps in the source code,
2658 such as interpreters. See e.g. PR15242. */
2660 duplicate_computed_gotos (function
*fun
)
2662 /* We are estimating the length of uncond jump insn only once
2663 since the code for getting the insn length always returns
2664 the minimal length now. */
2665 if (uncond_jump_length
== 0)
2666 uncond_jump_length
= get_uncond_jump_length ();
2668 /* Never copy a block larger than this. */
2670 = uncond_jump_length
* PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS
);
2672 bool changed
= false;
2674 /* Try to duplicate all blocks that end in a computed jump and that
2675 can be duplicated at all. */
2677 FOR_EACH_BB_FN (bb
, fun
)
2678 if (computed_jump_p (BB_END (bb
)) && can_duplicate_block_p (bb
))
2679 changed
|= maybe_duplicate_computed_goto (bb
, max_size
);
2681 /* Duplicating blocks will redirect edges and may cause hot blocks
2682 previously reached by both hot and cold blocks to become dominated
2683 only by cold blocks. */
2685 fixup_partitions ();
2690 const pass_data pass_data_duplicate_computed_gotos
=
2692 RTL_PASS
, /* type */
2693 "compgotos", /* name */
2694 OPTGROUP_NONE
, /* optinfo_flags */
2695 TV_REORDER_BLOCKS
, /* tv_id */
2696 0, /* properties_required */
2697 0, /* properties_provided */
2698 0, /* properties_destroyed */
2699 0, /* todo_flags_start */
2700 0, /* todo_flags_finish */
2703 class pass_duplicate_computed_gotos
: public rtl_opt_pass
2706 pass_duplicate_computed_gotos (gcc::context
*ctxt
)
2707 : rtl_opt_pass (pass_data_duplicate_computed_gotos
, ctxt
)
2710 /* opt_pass methods: */
2711 virtual bool gate (function
*);
2712 virtual unsigned int execute (function
*);
2714 }; // class pass_duplicate_computed_gotos
2717 pass_duplicate_computed_gotos::gate (function
*fun
)
2719 if (targetm
.cannot_modify_jumps_p ())
2721 return (optimize
> 0
2722 && flag_expensive_optimizations
2723 && ! optimize_function_for_size_p (fun
));
2727 pass_duplicate_computed_gotos::execute (function
*fun
)
2729 duplicate_computed_gotos (fun
);
2737 make_pass_duplicate_computed_gotos (gcc::context
*ctxt
)
2739 return new pass_duplicate_computed_gotos (ctxt
);
2742 /* This function is the main 'entrance' for the optimization that
2743 partitions hot and cold basic blocks into separate sections of the
2744 .o file (to improve performance and cache locality). Ideally it
2745 would be called after all optimizations that rearrange the CFG have
2746 been called. However part of this optimization may introduce new
2747 register usage, so it must be called before register allocation has
2748 occurred. This means that this optimization is actually called
2749 well before the optimization that reorders basic blocks (see
2752 This optimization checks the feedback information to determine
2753 which basic blocks are hot/cold, updates flags on the basic blocks
2754 to indicate which section they belong in. This information is
2755 later used for writing out sections in the .o file. Because hot
2756 and cold sections can be arbitrarily large (within the bounds of
2757 memory), far beyond the size of a single function, it is necessary
2758 to fix up all edges that cross section boundaries, to make sure the
2759 instructions used can actually span the required distance. The
2760 fixes are described below.
2762 Fall-through edges must be changed into jumps; it is not safe or
2763 legal to fall through across a section boundary. Whenever a
2764 fall-through edge crossing a section boundary is encountered, a new
2765 basic block is inserted (in the same section as the fall-through
2766 source), and the fall through edge is redirected to the new basic
2767 block. The new basic block contains an unconditional jump to the
2768 original fall-through target. (If the unconditional jump is
2769 insufficient to cross section boundaries, that is dealt with a
2770 little later, see below).
2772 In order to deal with architectures that have short conditional
2773 branches (which cannot span all of memory) we take any conditional
2774 jump that attempts to cross a section boundary and add a level of
2775 indirection: it becomes a conditional jump to a new basic block, in
2776 the same section. The new basic block contains an unconditional
2777 jump to the original target, in the other section.
2779 For those architectures whose unconditional branch is also
2780 incapable of reaching all of memory, those unconditional jumps are
2781 converted into indirect jumps, through a register.
2783 IMPORTANT NOTE: This optimization causes some messy interactions
2784 with the cfg cleanup optimizations; those optimizations want to
2785 merge blocks wherever possible, and to collapse indirect jump
2786 sequences (change "A jumps to B jumps to C" directly into "A jumps
2787 to C"). Those optimizations can undo the jump fixes that
2788 partitioning is required to make (see above), in order to ensure
2789 that jumps attempting to cross section boundaries are really able
2790 to cover whatever distance the jump requires (on many architectures
2791 conditional or unconditional jumps are not able to reach all of
2792 memory). Therefore tests have to be inserted into each such
2793 optimization to make sure that it does not undo stuff necessary to
2794 cross partition boundaries. This would be much less of a problem
2795 if we could perform this optimization later in the compilation, but
2796 unfortunately the fact that we may need to create indirect jumps
2797 (through registers) requires that this optimization be performed
2798 before register allocation.
2800 Hot and cold basic blocks are partitioned and put in separate
2801 sections of the .o file, to reduce paging and improve cache
2802 performance (hopefully). This can result in bits of code from the
2803 same function being widely separated in the .o file. However this
2804 is not obvious to the current bb structure. Therefore we must take
2805 care to ensure that: 1). There are no fall_thru edges that cross
2806 between sections; 2). For those architectures which have "short"
2807 conditional branches, all conditional branches that attempt to
2808 cross between sections are converted to unconditional branches;
2809 and, 3). For those architectures which have "short" unconditional
2810 branches, all unconditional branches that attempt to cross between
2811 sections are converted to indirect jumps.
2813 The code for fixing up fall_thru edges that cross between hot and
2814 cold basic blocks does so by creating new basic blocks containing
2815 unconditional branches to the appropriate label in the "other"
2816 section. The new basic block is then put in the same (hot or cold)
2817 section as the original conditional branch, and the fall_thru edge
2818 is modified to fall into the new basic block instead. By adding
2819 this level of indirection we end up with only unconditional branches
2820 crossing between hot and cold sections.
2822 Conditional branches are dealt with by adding a level of indirection.
2823 A new basic block is added in the same (hot/cold) section as the
2824 conditional branch, and the conditional branch is retargeted to the
2825 new basic block. The new basic block contains an unconditional branch
2826 to the original target of the conditional branch (in the other section).
2828 Unconditional branches are dealt with by converting them into
2833 const pass_data pass_data_partition_blocks
=
2835 RTL_PASS
, /* type */
2836 "bbpart", /* name */
2837 OPTGROUP_NONE
, /* optinfo_flags */
2838 TV_REORDER_BLOCKS
, /* tv_id */
2839 PROP_cfglayout
, /* properties_required */
2840 0, /* properties_provided */
2841 0, /* properties_destroyed */
2842 0, /* todo_flags_start */
2843 0, /* todo_flags_finish */
2846 class pass_partition_blocks
: public rtl_opt_pass
2849 pass_partition_blocks (gcc::context
*ctxt
)
2850 : rtl_opt_pass (pass_data_partition_blocks
, ctxt
)
2853 /* opt_pass methods: */
2854 virtual bool gate (function
*);
2855 virtual unsigned int execute (function
*);
2857 }; // class pass_partition_blocks
2860 pass_partition_blocks::gate (function
*fun
)
2862 /* The optimization to partition hot/cold basic blocks into separate
2863 sections of the .o file does not work well with linkonce or with
2864 user defined section attributes. Don't call it if either case
2866 return (flag_reorder_blocks_and_partition
2868 /* See pass_reorder_blocks::gate. We should not partition if
2869 we are going to omit the reordering. */
2870 && optimize_function_for_speed_p (fun
)
2871 && !DECL_COMDAT_GROUP (current_function_decl
)
2872 && !lookup_attribute ("section", DECL_ATTRIBUTES (fun
->decl
)));
2876 pass_partition_blocks::execute (function
*fun
)
2878 vec
<edge
> crossing_edges
;
2880 if (n_basic_blocks_for_fn (fun
) <= NUM_FIXED_BLOCKS
+ 1)
2883 df_set_flags (DF_DEFER_INSN_RESCAN
);
2885 crossing_edges
= find_rarely_executed_basic_blocks_and_crossing_edges ();
2886 if (!crossing_edges
.exists ())
2889 crtl
->has_bb_partition
= true;
2891 /* Make sure the source of any crossing edge ends in a jump and the
2892 destination of any crossing edge has a label. */
2893 add_labels_and_missing_jumps (crossing_edges
);
2895 /* Convert all crossing fall_thru edges to non-crossing fall
2896 thrus to unconditional jumps (that jump to the original fall
2898 fix_up_fall_thru_edges ();
2900 /* If the architecture does not have conditional branches that can
2901 span all of memory, convert crossing conditional branches into
2902 crossing unconditional branches. */
2903 if (!HAS_LONG_COND_BRANCH
)
2904 fix_crossing_conditional_branches ();
2906 /* If the architecture does not have unconditional branches that
2907 can span all of memory, convert crossing unconditional branches
2908 into indirect jumps. Since adding an indirect jump also adds
2909 a new register usage, update the register usage information as
2911 if (!HAS_LONG_UNCOND_BRANCH
)
2912 fix_crossing_unconditional_branches ();
2914 update_crossing_jump_flags ();
2916 /* Clear bb->aux fields that the above routines were using. */
2917 clear_aux_for_blocks ();
2919 crossing_edges
.release ();
2921 /* ??? FIXME: DF generates the bb info for a block immediately.
2922 And by immediately, I mean *during* creation of the block.
2924 #0 df_bb_refs_collect
2925 #1 in df_bb_refs_record
2926 #2 in create_basic_block_structure
2928 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2929 will *always* fail, because no edges can have been added to the
2930 block yet. Which of course means we don't add the right
2931 artificial refs, which means we fail df_verify (much) later.
2933 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2934 that we also shouldn't grab data from the new blocks those new
2935 insns are in either. In this way one can create the block, link
2936 it up properly, and have everything Just Work later, when deferred
2937 insns are processed.
2939 In the meantime, we have no other option but to throw away all
2940 of the DF data and recompute it all. */
2941 if (fun
->eh
->lp_array
)
2943 df_finish_pass (true);
2944 df_scan_alloc (NULL
);
2946 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2947 data. We blindly generated all of them when creating the new
2948 landing pad. Delete those assignments we don't use. */
2949 df_set_flags (DF_LR_RUN_DCE
);
2959 make_pass_partition_blocks (gcc::context
*ctxt
)
2961 return new pass_partition_blocks (ctxt
);