1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011,
3 2012 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
70 #include "coretypes.h"
83 #include "diagnostic-core.h"
84 #include "toplev.h" /* user_defined_section_attribute */
85 #include "tree-pass.h"
87 #include "bb-reorder.h"
90 /* The number of rounds. In most cases there will only be 4 rounds, but
91 when partitioning hot and cold basic blocks into separate sections of
92 the .o file there will be an extra round.*/
95 /* Stubs in case we don't have a return insn.
96 We have to check at runtime too, not only compiletime. */
100 #define gen_return() NULL_RTX
104 struct target_bb_reorder default_target_bb_reorder
;
105 #if SWITCHABLE_TARGET
106 struct target_bb_reorder
*this_target_bb_reorder
= &default_target_bb_reorder
;
109 #define uncond_jump_length \
110 (this_target_bb_reorder->x_uncond_jump_length)
112 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
113 static int branch_threshold
[N_ROUNDS
] = {400, 200, 100, 0, 0};
115 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
116 static int exec_threshold
[N_ROUNDS
] = {500, 200, 50, 0, 0};
118 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
119 block the edge destination is not duplicated while connecting traces. */
120 #define DUPLICATION_THRESHOLD 100
122 /* Structure to hold needed information for each basic block. */
123 typedef struct bbro_basic_block_data_def
125 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
128 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
131 /* Which trace is the bb in? */
134 /* Which trace was this bb visited in? */
137 /* Which heap is BB in (if any)? */
140 /* Which heap node is BB in (if any)? */
142 } bbro_basic_block_data
;
144 /* The current size of the following dynamic array. */
145 static int array_size
;
147 /* The array which holds needed information for basic blocks. */
148 static bbro_basic_block_data
*bbd
;
150 /* To avoid frequent reallocation the size of arrays is greater than needed,
151 the number of elements is (not less than) 1.25 * size_wanted. */
152 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
154 /* Free the memory and set the pointer to NULL. */
155 #define FREE(P) (gcc_assert (P), free (P), P = 0)
157 /* Structure for holding information about a trace. */
160 /* First and last basic block of the trace. */
161 basic_block first
, last
;
163 /* The round of the STC creation which this trace was found in. */
166 /* The length (i.e. the number of basic blocks) of the trace. */
170 /* Maximum frequency and count of one of the entry blocks. */
171 static int max_entry_frequency
;
172 static gcov_type max_entry_count
;
174 /* Local function prototypes. */
175 static void find_traces (int *, struct trace
*);
176 static basic_block
rotate_loop (edge
, struct trace
*, int);
177 static void mark_bb_visited (basic_block
, int);
178 static void find_traces_1_round (int, int, gcov_type
, struct trace
*, int *,
179 int, fibheap_t
*, int);
180 static basic_block
copy_bb (basic_block
, edge
, basic_block
, int);
181 static fibheapkey_t
bb_to_key (basic_block
);
182 static bool better_edge_p (const_basic_block
, const_edge
, int, int, int, int, const_edge
);
183 static void connect_traces (int, struct trace
*);
184 static bool copy_bb_p (const_basic_block
, int);
185 static bool push_to_next_round_p (const_basic_block
, int, int, int, gcov_type
);
187 /* Return the trace number in which BB was visited. */
190 bb_visited_trace (const_basic_block bb
)
192 gcc_assert (bb
->index
< array_size
);
193 return bbd
[bb
->index
].visited
;
196 /* This function marks BB that it was visited in trace number TRACE. */
199 mark_bb_visited (basic_block bb
, int trace
)
201 bbd
[bb
->index
].visited
= trace
;
202 if (bbd
[bb
->index
].heap
)
204 fibheap_delete_node (bbd
[bb
->index
].heap
, bbd
[bb
->index
].node
);
205 bbd
[bb
->index
].heap
= NULL
;
206 bbd
[bb
->index
].node
= NULL
;
210 /* Check to see if bb should be pushed into the next round of trace
211 collections or not. Reasons for pushing the block forward are 1).
212 If the block is cold, we are doing partitioning, and there will be
213 another round (cold partition blocks are not supposed to be
214 collected into traces until the very last round); or 2). There will
215 be another round, and the basic block is not "hot enough" for the
216 current round of trace collection. */
219 push_to_next_round_p (const_basic_block bb
, int round
, int number_of_rounds
,
220 int exec_th
, gcov_type count_th
)
222 bool there_exists_another_round
;
223 bool block_not_hot_enough
;
225 there_exists_another_round
= round
< number_of_rounds
- 1;
227 block_not_hot_enough
= (bb
->frequency
< exec_th
228 || bb
->count
< count_th
229 || probably_never_executed_bb_p (cfun
, bb
));
231 if (there_exists_another_round
232 && block_not_hot_enough
)
238 /* Find the traces for Software Trace Cache. Chain each trace through
239 RBI()->next. Store the number of traces to N_TRACES and description of
243 find_traces (int *n_traces
, struct trace
*traces
)
246 int number_of_rounds
;
251 /* Add one extra round of trace collection when partitioning hot/cold
252 basic blocks into separate sections. The last round is for all the
253 cold blocks (and ONLY the cold blocks). */
255 number_of_rounds
= N_ROUNDS
- 1;
257 /* Insert entry points of function into heap. */
258 heap
= fibheap_new ();
259 max_entry_frequency
= 0;
261 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
263 bbd
[e
->dest
->index
].heap
= heap
;
264 bbd
[e
->dest
->index
].node
= fibheap_insert (heap
, bb_to_key (e
->dest
),
266 if (e
->dest
->frequency
> max_entry_frequency
)
267 max_entry_frequency
= e
->dest
->frequency
;
268 if (e
->dest
->count
> max_entry_count
)
269 max_entry_count
= e
->dest
->count
;
272 /* Find the traces. */
273 for (i
= 0; i
< number_of_rounds
; i
++)
275 gcov_type count_threshold
;
278 fprintf (dump_file
, "STC - round %d\n", i
+ 1);
280 if (max_entry_count
< INT_MAX
/ 1000)
281 count_threshold
= max_entry_count
* exec_threshold
[i
] / 1000;
283 count_threshold
= max_entry_count
/ 1000 * exec_threshold
[i
];
285 find_traces_1_round (REG_BR_PROB_BASE
* branch_threshold
[i
] / 1000,
286 max_entry_frequency
* exec_threshold
[i
] / 1000,
287 count_threshold
, traces
, n_traces
, i
, &heap
,
290 fibheap_delete (heap
);
294 for (i
= 0; i
< *n_traces
; i
++)
297 fprintf (dump_file
, "Trace %d (round %d): ", i
+ 1,
298 traces
[i
].round
+ 1);
299 for (bb
= traces
[i
].first
; bb
!= traces
[i
].last
; bb
= (basic_block
) bb
->aux
)
300 fprintf (dump_file
, "%d [%d] ", bb
->index
, bb
->frequency
);
301 fprintf (dump_file
, "%d [%d]\n", bb
->index
, bb
->frequency
);
307 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
308 (with sequential number TRACE_N). */
311 rotate_loop (edge back_edge
, struct trace
*trace
, int trace_n
)
315 /* Information about the best end (end after rotation) of the loop. */
316 basic_block best_bb
= NULL
;
317 edge best_edge
= NULL
;
319 gcov_type best_count
= -1;
320 /* The best edge is preferred when its destination is not visited yet
321 or is a start block of some trace. */
322 bool is_preferred
= false;
324 /* Find the most frequent edge that goes out from current trace. */
325 bb
= back_edge
->dest
;
331 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
332 if (e
->dest
!= EXIT_BLOCK_PTR
333 && bb_visited_trace (e
->dest
) != trace_n
334 && (e
->flags
& EDGE_CAN_FALLTHRU
)
335 && !(e
->flags
& EDGE_COMPLEX
))
339 /* The best edge is preferred. */
340 if (!bb_visited_trace (e
->dest
)
341 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
343 /* The current edge E is also preferred. */
344 int freq
= EDGE_FREQUENCY (e
);
345 if (freq
> best_freq
|| e
->count
> best_count
)
348 best_count
= e
->count
;
356 if (!bb_visited_trace (e
->dest
)
357 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
359 /* The current edge E is preferred. */
361 best_freq
= EDGE_FREQUENCY (e
);
362 best_count
= e
->count
;
368 int freq
= EDGE_FREQUENCY (e
);
369 if (!best_edge
|| freq
> best_freq
|| e
->count
> best_count
)
372 best_count
= e
->count
;
379 bb
= (basic_block
) bb
->aux
;
381 while (bb
!= back_edge
->dest
);
385 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
387 if (back_edge
->dest
== trace
->first
)
389 trace
->first
= (basic_block
) best_bb
->aux
;
395 for (prev_bb
= trace
->first
;
396 prev_bb
->aux
!= back_edge
->dest
;
397 prev_bb
= (basic_block
) prev_bb
->aux
)
399 prev_bb
->aux
= best_bb
->aux
;
401 /* Try to get rid of uncond jump to cond jump. */
402 if (single_succ_p (prev_bb
))
404 basic_block header
= single_succ (prev_bb
);
406 /* Duplicate HEADER if it is a small block containing cond jump
408 if (any_condjump_p (BB_END (header
)) && copy_bb_p (header
, 0)
409 && !find_reg_note (BB_END (header
), REG_CROSSING_JUMP
,
411 copy_bb (header
, single_succ_edge (prev_bb
), prev_bb
, trace_n
);
417 /* We have not found suitable loop tail so do no rotation. */
418 best_bb
= back_edge
->src
;
424 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
425 not include basic blocks their probability is lower than BRANCH_TH or their
426 frequency is lower than EXEC_TH into traces (or count is lower than
427 COUNT_TH). It stores the new traces into TRACES and modifies the number of
428 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
429 expects that starting basic blocks are in *HEAP and at the end it deletes
430 *HEAP and stores starting points for the next round into new *HEAP. */
433 find_traces_1_round (int branch_th
, int exec_th
, gcov_type count_th
,
434 struct trace
*traces
, int *n_traces
, int round
,
435 fibheap_t
*heap
, int number_of_rounds
)
437 /* Heap for discarded basic blocks which are possible starting points for
439 fibheap_t new_heap
= fibheap_new ();
441 while (!fibheap_empty (*heap
))
449 bb
= (basic_block
) fibheap_extract_min (*heap
);
450 bbd
[bb
->index
].heap
= NULL
;
451 bbd
[bb
->index
].node
= NULL
;
454 fprintf (dump_file
, "Getting bb %d\n", bb
->index
);
456 /* If the BB's frequency is too low send BB to the next round. When
457 partitioning hot/cold blocks into separate sections, make sure all
458 the cold blocks (and ONLY the cold blocks) go into the (extra) final
461 if (push_to_next_round_p (bb
, round
, number_of_rounds
, exec_th
,
464 int key
= bb_to_key (bb
);
465 bbd
[bb
->index
].heap
= new_heap
;
466 bbd
[bb
->index
].node
= fibheap_insert (new_heap
, key
, bb
);
470 " Possible start point of next round: %d (key: %d)\n",
475 trace
= traces
+ *n_traces
;
477 trace
->round
= round
;
479 bbd
[bb
->index
].in_trace
= *n_traces
;
487 /* The probability and frequency of the best edge. */
488 int best_prob
= INT_MIN
/ 2;
489 int best_freq
= INT_MIN
/ 2;
492 mark_bb_visited (bb
, *n_traces
);
496 fprintf (dump_file
, "Basic block %d was visited in trace %d\n",
497 bb
->index
, *n_traces
- 1);
499 ends_in_call
= block_ends_with_call_p (bb
);
501 /* Select the successor that will be placed after BB. */
502 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
504 gcc_assert (!(e
->flags
& EDGE_FAKE
));
506 if (e
->dest
== EXIT_BLOCK_PTR
)
509 if (bb_visited_trace (e
->dest
)
510 && bb_visited_trace (e
->dest
) != *n_traces
)
513 if (BB_PARTITION (e
->dest
) != BB_PARTITION (bb
))
516 prob
= e
->probability
;
517 freq
= e
->dest
->frequency
;
519 /* The only sensible preference for a call instruction is the
520 fallthru edge. Don't bother selecting anything else. */
523 if (e
->flags
& EDGE_CAN_FALLTHRU
)
532 /* Edge that cannot be fallthru or improbable or infrequent
533 successor (i.e. it is unsuitable successor). */
534 if (!(e
->flags
& EDGE_CAN_FALLTHRU
) || (e
->flags
& EDGE_COMPLEX
)
535 || prob
< branch_th
|| EDGE_FREQUENCY (e
) < exec_th
536 || e
->count
< count_th
)
539 /* If partitioning hot/cold basic blocks, don't consider edges
540 that cross section boundaries. */
542 if (better_edge_p (bb
, e
, prob
, freq
, best_prob
, best_freq
,
551 /* If the best destination has multiple predecessors, and can be
552 duplicated cheaper than a jump, don't allow it to be added
553 to a trace. We'll duplicate it when connecting traces. */
554 if (best_edge
&& EDGE_COUNT (best_edge
->dest
->preds
) >= 2
555 && copy_bb_p (best_edge
->dest
, 0))
558 /* Add all non-selected successors to the heaps. */
559 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
562 || e
->dest
== EXIT_BLOCK_PTR
563 || bb_visited_trace (e
->dest
))
566 key
= bb_to_key (e
->dest
);
568 if (bbd
[e
->dest
->index
].heap
)
570 /* E->DEST is already in some heap. */
571 if (key
!= bbd
[e
->dest
->index
].node
->key
)
576 "Changing key for bb %d from %ld to %ld.\n",
578 (long) bbd
[e
->dest
->index
].node
->key
,
581 fibheap_replace_key (bbd
[e
->dest
->index
].heap
,
582 bbd
[e
->dest
->index
].node
, key
);
587 fibheap_t which_heap
= *heap
;
589 prob
= e
->probability
;
590 freq
= EDGE_FREQUENCY (e
);
592 if (!(e
->flags
& EDGE_CAN_FALLTHRU
)
593 || (e
->flags
& EDGE_COMPLEX
)
594 || prob
< branch_th
|| freq
< exec_th
595 || e
->count
< count_th
)
597 /* When partitioning hot/cold basic blocks, make sure
598 the cold blocks (and only the cold blocks) all get
599 pushed to the last round of trace collection. */
601 if (push_to_next_round_p (e
->dest
, round
,
604 which_heap
= new_heap
;
607 bbd
[e
->dest
->index
].heap
= which_heap
;
608 bbd
[e
->dest
->index
].node
= fibheap_insert (which_heap
,
614 " Possible start of %s round: %d (key: %ld)\n",
615 (which_heap
== new_heap
) ? "next" : "this",
616 e
->dest
->index
, (long) key
);
622 if (best_edge
) /* Suitable successor was found. */
624 if (bb_visited_trace (best_edge
->dest
) == *n_traces
)
626 /* We do nothing with one basic block loops. */
627 if (best_edge
->dest
!= bb
)
629 if (EDGE_FREQUENCY (best_edge
)
630 > 4 * best_edge
->dest
->frequency
/ 5)
632 /* The loop has at least 4 iterations. If the loop
633 header is not the first block of the function
634 we can rotate the loop. */
636 if (best_edge
->dest
!= ENTRY_BLOCK_PTR
->next_bb
)
641 "Rotating loop %d - %d\n",
642 best_edge
->dest
->index
, bb
->index
);
644 bb
->aux
= best_edge
->dest
;
645 bbd
[best_edge
->dest
->index
].in_trace
=
647 bb
= rotate_loop (best_edge
, trace
, *n_traces
);
652 /* The loop has less than 4 iterations. */
654 if (single_succ_p (bb
)
655 && copy_bb_p (best_edge
->dest
,
656 optimize_edge_for_speed_p (best_edge
)))
658 bb
= copy_bb (best_edge
->dest
, best_edge
, bb
,
665 /* Terminate the trace. */
670 /* Check for a situation
679 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
680 >= EDGE_FREQUENCY (AC).
681 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
682 Best ordering is then A B C.
684 This situation is created for example by:
691 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
693 && (e
->flags
& EDGE_CAN_FALLTHRU
)
694 && !(e
->flags
& EDGE_COMPLEX
)
695 && !bb_visited_trace (e
->dest
)
696 && single_pred_p (e
->dest
)
697 && !(e
->flags
& EDGE_CROSSING
)
698 && single_succ_p (e
->dest
)
699 && (single_succ_edge (e
->dest
)->flags
701 && !(single_succ_edge (e
->dest
)->flags
& EDGE_COMPLEX
)
702 && single_succ (e
->dest
) == best_edge
->dest
703 && 2 * e
->dest
->frequency
>= EDGE_FREQUENCY (best_edge
))
707 fprintf (dump_file
, "Selecting BB %d\n",
708 best_edge
->dest
->index
);
712 bb
->aux
= best_edge
->dest
;
713 bbd
[best_edge
->dest
->index
].in_trace
= (*n_traces
) - 1;
714 bb
= best_edge
->dest
;
720 bbd
[trace
->first
->index
].start_of_trace
= *n_traces
- 1;
721 bbd
[trace
->last
->index
].end_of_trace
= *n_traces
- 1;
723 /* The trace is terminated so we have to recount the keys in heap
724 (some block can have a lower key because now one of its predecessors
725 is an end of the trace). */
726 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
728 if (e
->dest
== EXIT_BLOCK_PTR
729 || bb_visited_trace (e
->dest
))
732 if (bbd
[e
->dest
->index
].heap
)
734 key
= bb_to_key (e
->dest
);
735 if (key
!= bbd
[e
->dest
->index
].node
->key
)
740 "Changing key for bb %d from %ld to %ld.\n",
742 (long) bbd
[e
->dest
->index
].node
->key
, key
);
744 fibheap_replace_key (bbd
[e
->dest
->index
].heap
,
745 bbd
[e
->dest
->index
].node
,
752 fibheap_delete (*heap
);
754 /* "Return" the new heap. */
758 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
759 it to trace after BB, mark OLD_BB visited and update pass' data structures
760 (TRACE is a number of trace which OLD_BB is duplicated to). */
763 copy_bb (basic_block old_bb
, edge e
, basic_block bb
, int trace
)
767 new_bb
= duplicate_block (old_bb
, e
, bb
);
768 BB_COPY_PARTITION (new_bb
, old_bb
);
770 gcc_assert (e
->dest
== new_bb
);
774 "Duplicated bb %d (created bb %d)\n",
775 old_bb
->index
, new_bb
->index
);
777 if (new_bb
->index
>= array_size
|| last_basic_block
> array_size
)
782 new_size
= MAX (last_basic_block
, new_bb
->index
+ 1);
783 new_size
= GET_ARRAY_SIZE (new_size
);
784 bbd
= XRESIZEVEC (bbro_basic_block_data
, bbd
, new_size
);
785 for (i
= array_size
; i
< new_size
; i
++)
787 bbd
[i
].start_of_trace
= -1;
788 bbd
[i
].end_of_trace
= -1;
789 bbd
[i
].in_trace
= -1;
794 array_size
= new_size
;
799 "Growing the dynamic array to %d elements.\n",
804 gcc_assert (!bb_visited_trace (e
->dest
));
805 mark_bb_visited (new_bb
, trace
);
806 new_bb
->aux
= bb
->aux
;
809 bbd
[new_bb
->index
].in_trace
= trace
;
814 /* Compute and return the key (for the heap) of the basic block BB. */
817 bb_to_key (basic_block bb
)
823 /* Do not start in probably never executed blocks. */
825 if (BB_PARTITION (bb
) == BB_COLD_PARTITION
826 || probably_never_executed_bb_p (cfun
, bb
))
829 /* Prefer blocks whose predecessor is an end of some trace
830 or whose predecessor edge is EDGE_DFS_BACK. */
831 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
833 if ((e
->src
!= ENTRY_BLOCK_PTR
&& bbd
[e
->src
->index
].end_of_trace
>= 0)
834 || (e
->flags
& EDGE_DFS_BACK
))
836 int edge_freq
= EDGE_FREQUENCY (e
);
838 if (edge_freq
> priority
)
839 priority
= edge_freq
;
844 /* The block with priority should have significantly lower key. */
845 return -(100 * BB_FREQ_MAX
+ 100 * priority
+ bb
->frequency
);
846 return -bb
->frequency
;
849 /* Return true when the edge E from basic block BB is better than the temporary
850 best edge (details are in function). The probability of edge E is PROB. The
851 frequency of the successor is FREQ. The current best probability is
852 BEST_PROB, the best frequency is BEST_FREQ.
853 The edge is considered to be equivalent when PROB does not differ much from
854 BEST_PROB; similarly for frequency. */
857 better_edge_p (const_basic_block bb
, const_edge e
, int prob
, int freq
, int best_prob
,
858 int best_freq
, const_edge cur_best_edge
)
862 /* The BEST_* values do not have to be best, but can be a bit smaller than
864 int diff_prob
= best_prob
/ 10;
865 int diff_freq
= best_freq
/ 10;
867 if (prob
> best_prob
+ diff_prob
)
868 /* The edge has higher probability than the temporary best edge. */
869 is_better_edge
= true;
870 else if (prob
< best_prob
- diff_prob
)
871 /* The edge has lower probability than the temporary best edge. */
872 is_better_edge
= false;
873 else if (freq
< best_freq
- diff_freq
)
874 /* The edge and the temporary best edge have almost equivalent
875 probabilities. The higher frequency of a successor now means
876 that there is another edge going into that successor.
877 This successor has lower frequency so it is better. */
878 is_better_edge
= true;
879 else if (freq
> best_freq
+ diff_freq
)
880 /* This successor has higher frequency so it is worse. */
881 is_better_edge
= false;
882 else if (e
->dest
->prev_bb
== bb
)
883 /* The edges have equivalent probabilities and the successors
884 have equivalent frequencies. Select the previous successor. */
885 is_better_edge
= true;
887 is_better_edge
= false;
889 /* If we are doing hot/cold partitioning, make sure that we always favor
890 non-crossing edges over crossing edges. */
893 && flag_reorder_blocks_and_partition
895 && (cur_best_edge
->flags
& EDGE_CROSSING
)
896 && !(e
->flags
& EDGE_CROSSING
))
897 is_better_edge
= true;
899 return is_better_edge
;
902 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
905 connect_traces (int n_traces
, struct trace
*traces
)
912 int current_partition
;
914 gcov_type count_threshold
;
916 freq_threshold
= max_entry_frequency
* DUPLICATION_THRESHOLD
/ 1000;
917 if (max_entry_count
< INT_MAX
/ 1000)
918 count_threshold
= max_entry_count
* DUPLICATION_THRESHOLD
/ 1000;
920 count_threshold
= max_entry_count
/ 1000 * DUPLICATION_THRESHOLD
;
922 connected
= XCNEWVEC (bool, n_traces
);
925 current_partition
= BB_PARTITION (traces
[0].first
);
928 if (flag_reorder_blocks_and_partition
)
929 for (i
= 0; i
< n_traces
&& !two_passes
; i
++)
930 if (BB_PARTITION (traces
[0].first
)
931 != BB_PARTITION (traces
[i
].first
))
934 for (i
= 0; i
< n_traces
|| (two_passes
&& current_pass
== 1) ; i
++)
943 gcc_assert (two_passes
&& current_pass
== 1);
947 if (current_partition
== BB_HOT_PARTITION
)
948 current_partition
= BB_COLD_PARTITION
;
950 current_partition
= BB_HOT_PARTITION
;
957 && BB_PARTITION (traces
[t
].first
) != current_partition
)
962 /* Find the predecessor traces. */
963 for (t2
= t
; t2
> 0;)
968 FOR_EACH_EDGE (e
, ei
, traces
[t2
].first
->preds
)
970 int si
= e
->src
->index
;
972 if (e
->src
!= ENTRY_BLOCK_PTR
973 && (e
->flags
& EDGE_CAN_FALLTHRU
)
974 && !(e
->flags
& EDGE_COMPLEX
)
975 && bbd
[si
].end_of_trace
>= 0
976 && !connected
[bbd
[si
].end_of_trace
]
977 && (BB_PARTITION (e
->src
) == current_partition
)
979 || e
->probability
> best
->probability
980 || (e
->probability
== best
->probability
981 && traces
[bbd
[si
].end_of_trace
].length
> best_len
)))
984 best_len
= traces
[bbd
[si
].end_of_trace
].length
;
989 best
->src
->aux
= best
->dest
;
990 t2
= bbd
[best
->src
->index
].end_of_trace
;
991 connected
[t2
] = true;
995 fprintf (dump_file
, "Connection: %d %d\n",
996 best
->src
->index
, best
->dest
->index
);
1003 if (last_trace
>= 0)
1004 traces
[last_trace
].last
->aux
= traces
[t2
].first
;
1007 /* Find the successor traces. */
1010 /* Find the continuation of the chain. */
1014 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1016 int di
= e
->dest
->index
;
1018 if (e
->dest
!= EXIT_BLOCK_PTR
1019 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1020 && !(e
->flags
& EDGE_COMPLEX
)
1021 && bbd
[di
].start_of_trace
>= 0
1022 && !connected
[bbd
[di
].start_of_trace
]
1023 && (BB_PARTITION (e
->dest
) == current_partition
)
1025 || e
->probability
> best
->probability
1026 || (e
->probability
== best
->probability
1027 && traces
[bbd
[di
].start_of_trace
].length
> best_len
)))
1030 best_len
= traces
[bbd
[di
].start_of_trace
].length
;
1038 fprintf (dump_file
, "Connection: %d %d\n",
1039 best
->src
->index
, best
->dest
->index
);
1041 t
= bbd
[best
->dest
->index
].start_of_trace
;
1042 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1043 connected
[t
] = true;
1048 /* Try to connect the traces by duplication of 1 block. */
1050 basic_block next_bb
= NULL
;
1051 bool try_copy
= false;
1053 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1054 if (e
->dest
!= EXIT_BLOCK_PTR
1055 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1056 && !(e
->flags
& EDGE_COMPLEX
)
1057 && (!best
|| e
->probability
> best
->probability
))
1063 /* If the destination is a start of a trace which is only
1064 one block long, then no need to search the successor
1065 blocks of the trace. Accept it. */
1066 if (bbd
[e
->dest
->index
].start_of_trace
>= 0
1067 && traces
[bbd
[e
->dest
->index
].start_of_trace
].length
1075 FOR_EACH_EDGE (e2
, ei
, e
->dest
->succs
)
1077 int di
= e2
->dest
->index
;
1079 if (e2
->dest
== EXIT_BLOCK_PTR
1080 || ((e2
->flags
& EDGE_CAN_FALLTHRU
)
1081 && !(e2
->flags
& EDGE_COMPLEX
)
1082 && bbd
[di
].start_of_trace
>= 0
1083 && !connected
[bbd
[di
].start_of_trace
]
1084 && (BB_PARTITION (e2
->dest
) == current_partition
)
1085 && (EDGE_FREQUENCY (e2
) >= freq_threshold
)
1086 && (e2
->count
>= count_threshold
)
1088 || e2
->probability
> best2
->probability
1089 || (e2
->probability
== best2
->probability
1090 && traces
[bbd
[di
].start_of_trace
].length
1095 if (e2
->dest
!= EXIT_BLOCK_PTR
)
1096 best2_len
= traces
[bbd
[di
].start_of_trace
].length
;
1098 best2_len
= INT_MAX
;
1105 if (flag_reorder_blocks_and_partition
)
1108 /* Copy tiny blocks always; copy larger blocks only when the
1109 edge is traversed frequently enough. */
1111 && copy_bb_p (best
->dest
,
1112 optimize_edge_for_speed_p (best
)
1113 && EDGE_FREQUENCY (best
) >= freq_threshold
1114 && best
->count
>= count_threshold
))
1120 fprintf (dump_file
, "Connection: %d %d ",
1121 traces
[t
].last
->index
, best
->dest
->index
);
1123 fputc ('\n', dump_file
);
1124 else if (next_bb
== EXIT_BLOCK_PTR
)
1125 fprintf (dump_file
, "exit\n");
1127 fprintf (dump_file
, "%d\n", next_bb
->index
);
1130 new_bb
= copy_bb (best
->dest
, best
, traces
[t
].last
, t
);
1131 traces
[t
].last
= new_bb
;
1132 if (next_bb
&& next_bb
!= EXIT_BLOCK_PTR
)
1134 t
= bbd
[next_bb
->index
].start_of_trace
;
1135 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1136 connected
[t
] = true;
1140 break; /* Stop finding the successor traces. */
1143 break; /* Stop finding the successor traces. */
1152 fprintf (dump_file
, "Final order:\n");
1153 for (bb
= traces
[0].first
; bb
; bb
= (basic_block
) bb
->aux
)
1154 fprintf (dump_file
, "%d ", bb
->index
);
1155 fprintf (dump_file
, "\n");
1162 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1163 when code size is allowed to grow by duplication. */
1166 copy_bb_p (const_basic_block bb
, int code_may_grow
)
1169 int max_size
= uncond_jump_length
;
1174 if (EDGE_COUNT (bb
->preds
) < 2)
1176 if (!can_duplicate_block_p (bb
))
1179 /* Avoid duplicating blocks which have many successors (PR/13430). */
1180 if (EDGE_COUNT (bb
->succs
) > 8)
1183 if (code_may_grow
&& optimize_bb_for_speed_p (bb
))
1184 max_size
*= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS
);
1186 FOR_BB_INSNS (bb
, insn
)
1189 size
+= get_attr_min_length (insn
);
1192 if (size
<= max_size
)
1198 "Block %d can't be copied because its size = %d.\n",
1205 /* Return the length of unconditional jump instruction. */
1208 get_uncond_jump_length (void)
1213 label
= emit_label_before (gen_label_rtx (), get_insns ());
1214 jump
= emit_jump_insn (gen_jump (label
));
1216 length
= get_attr_min_length (jump
);
1219 delete_insn (label
);
1223 /* Emit a barrier into the footer of BB. */
1226 emit_barrier_after_bb (basic_block bb
)
1228 rtx barrier
= emit_barrier_after (BB_END (bb
));
1229 BB_FOOTER (bb
) = unlink_insn_chain (barrier
, barrier
);
1232 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1233 Duplicate the landing pad and split the edges so that no EH edge
1234 crosses partitions. */
1237 fix_up_crossing_landing_pad (eh_landing_pad old_lp
, basic_block old_bb
)
1239 eh_landing_pad new_lp
;
1240 basic_block new_bb
, last_bb
, post_bb
;
1241 rtx new_label
, jump
, post_label
;
1242 unsigned new_partition
;
1246 /* Generate the new landing-pad structure. */
1247 new_lp
= gen_eh_landing_pad (old_lp
->region
);
1248 new_lp
->post_landing_pad
= old_lp
->post_landing_pad
;
1249 new_lp
->landing_pad
= gen_label_rtx ();
1250 LABEL_PRESERVE_P (new_lp
->landing_pad
) = 1;
1252 /* Put appropriate instructions in new bb. */
1253 new_label
= emit_label (new_lp
->landing_pad
);
1255 expand_dw2_landing_pad_for_region (old_lp
->region
);
1257 post_bb
= BLOCK_FOR_INSN (old_lp
->landing_pad
);
1258 post_bb
= single_succ (post_bb
);
1259 post_label
= block_label (post_bb
);
1260 jump
= emit_jump_insn (gen_jump (post_label
));
1261 JUMP_LABEL (jump
) = post_label
;
1263 /* Create new basic block to be dest for lp. */
1264 last_bb
= EXIT_BLOCK_PTR
->prev_bb
;
1265 new_bb
= create_basic_block (new_label
, jump
, last_bb
);
1266 new_bb
->aux
= last_bb
->aux
;
1267 last_bb
->aux
= new_bb
;
1269 emit_barrier_after_bb (new_bb
);
1271 make_edge (new_bb
, post_bb
, 0);
1273 /* Make sure new bb is in the other partition. */
1274 new_partition
= BB_PARTITION (old_bb
);
1275 new_partition
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1276 BB_SET_PARTITION (new_bb
, new_partition
);
1278 /* Fix up the edges. */
1279 for (ei
= ei_start (old_bb
->preds
); (e
= ei_safe_edge (ei
)) != NULL
; )
1280 if (BB_PARTITION (e
->src
) == new_partition
)
1282 rtx insn
= BB_END (e
->src
);
1283 rtx note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
1285 gcc_assert (note
!= NULL
);
1286 gcc_checking_assert (INTVAL (XEXP (note
, 0)) == old_lp
->index
);
1287 XEXP (note
, 0) = GEN_INT (new_lp
->index
);
1289 /* Adjust the edge to the new destination. */
1290 redirect_edge_succ (e
, new_bb
);
1296 /* Find the basic blocks that are rarely executed and need to be moved to
1297 a separate section of the .o file (to cut down on paging and improve
1298 cache locality). Return a vector of all edges that cross. */
1300 static VEC(edge
, heap
) *
1301 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1303 VEC(edge
, heap
) *crossing_edges
= NULL
;
1308 /* Mark which partition (hot/cold) each basic block belongs in. */
1311 if (probably_never_executed_bb_p (cfun
, bb
))
1312 BB_SET_PARTITION (bb
, BB_COLD_PARTITION
);
1314 BB_SET_PARTITION (bb
, BB_HOT_PARTITION
);
1317 /* The format of .gcc_except_table does not allow landing pads to
1318 be in a different partition as the throw. Fix this by either
1319 moving or duplicating the landing pads. */
1320 if (cfun
->eh
->lp_array
)
1325 FOR_EACH_VEC_ELT (eh_landing_pad
, cfun
->eh
->lp_array
, i
, lp
)
1327 bool all_same
, all_diff
;
1330 || lp
->landing_pad
== NULL_RTX
1331 || !LABEL_P (lp
->landing_pad
))
1334 all_same
= all_diff
= true;
1335 bb
= BLOCK_FOR_INSN (lp
->landing_pad
);
1336 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1338 gcc_assert (e
->flags
& EDGE_EH
);
1339 if (BB_PARTITION (bb
) == BB_PARTITION (e
->src
))
1349 int which
= BB_PARTITION (bb
);
1350 which
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1351 BB_SET_PARTITION (bb
, which
);
1354 fix_up_crossing_landing_pad (lp
, bb
);
1358 /* Mark every edge that crosses between sections. */
1361 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1363 unsigned int flags
= e
->flags
;
1365 /* We should never have EDGE_CROSSING set yet. */
1366 gcc_checking_assert ((flags
& EDGE_CROSSING
) == 0);
1368 if (e
->src
!= ENTRY_BLOCK_PTR
1369 && e
->dest
!= EXIT_BLOCK_PTR
1370 && BB_PARTITION (e
->src
) != BB_PARTITION (e
->dest
))
1372 VEC_safe_push (edge
, heap
, crossing_edges
, e
);
1373 flags
|= EDGE_CROSSING
;
1376 /* Now that we've split eh edges as appropriate, allow landing pads
1377 to be merged with the post-landing pads. */
1378 flags
&= ~EDGE_PRESERVE
;
1383 return crossing_edges
;
1386 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru. */
1389 set_edge_can_fallthru_flag (void)
1398 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1400 e
->flags
&= ~EDGE_CAN_FALLTHRU
;
1402 /* The FALLTHRU edge is also CAN_FALLTHRU edge. */
1403 if (e
->flags
& EDGE_FALLTHRU
)
1404 e
->flags
|= EDGE_CAN_FALLTHRU
;
1407 /* If the BB ends with an invertible condjump all (2) edges are
1408 CAN_FALLTHRU edges. */
1409 if (EDGE_COUNT (bb
->succs
) != 2)
1411 if (!any_condjump_p (BB_END (bb
)))
1413 if (!invert_jump (BB_END (bb
), JUMP_LABEL (BB_END (bb
)), 0))
1415 invert_jump (BB_END (bb
), JUMP_LABEL (BB_END (bb
)), 0);
1416 EDGE_SUCC (bb
, 0)->flags
|= EDGE_CAN_FALLTHRU
;
1417 EDGE_SUCC (bb
, 1)->flags
|= EDGE_CAN_FALLTHRU
;
1421 /* If any destination of a crossing edge does not have a label, add label;
1422 Convert any easy fall-through crossing edges to unconditional jumps. */
1425 add_labels_and_missing_jumps (VEC(edge
, heap
) *crossing_edges
)
1430 FOR_EACH_VEC_ELT (edge
, crossing_edges
, i
, e
)
1432 basic_block src
= e
->src
;
1433 basic_block dest
= e
->dest
;
1434 rtx label
, new_jump
;
1436 if (dest
== EXIT_BLOCK_PTR
)
1439 /* Make sure dest has a label. */
1440 label
= block_label (dest
);
1442 /* Nothing to do for non-fallthru edges. */
1443 if (src
== ENTRY_BLOCK_PTR
)
1445 if ((e
->flags
& EDGE_FALLTHRU
) == 0)
1448 /* If the block does not end with a control flow insn, then we
1449 can trivially add a jump to the end to fixup the crossing.
1450 Otherwise the jump will have to go in a new bb, which will
1451 be handled by fix_up_fall_thru_edges function. */
1452 if (control_flow_insn_p (BB_END (src
)))
1455 /* Make sure there's only one successor. */
1456 gcc_assert (single_succ_p (src
));
1458 new_jump
= emit_jump_insn_after (gen_jump (label
), BB_END (src
));
1459 BB_END (src
) = new_jump
;
1460 JUMP_LABEL (new_jump
) = label
;
1461 LABEL_NUSES (label
) += 1;
1463 emit_barrier_after_bb (src
);
1465 /* Mark edge as non-fallthru. */
1466 e
->flags
&= ~EDGE_FALLTHRU
;
1470 /* Find any bb's where the fall-through edge is a crossing edge (note that
1471 these bb's must also contain a conditional jump or end with a call
1472 instruction; we've already dealt with fall-through edges for blocks
1473 that didn't have a conditional jump or didn't end with call instruction
1474 in the call to add_labels_and_missing_jumps). Convert the fall-through
1475 edge to non-crossing edge by inserting a new bb to fall-through into.
1476 The new bb will contain an unconditional jump (crossing edge) to the
1477 original fall through destination. */
1480 fix_up_fall_thru_edges (void)
1487 edge cond_jump
= NULL
;
1489 bool cond_jump_crosses
;
1492 rtx fall_thru_label
;
1494 FOR_EACH_BB (cur_bb
)
1497 if (EDGE_COUNT (cur_bb
->succs
) > 0)
1498 succ1
= EDGE_SUCC (cur_bb
, 0);
1502 if (EDGE_COUNT (cur_bb
->succs
) > 1)
1503 succ2
= EDGE_SUCC (cur_bb
, 1);
1507 /* Find the fall-through edge. */
1510 && (succ1
->flags
& EDGE_FALLTHRU
))
1516 && (succ2
->flags
& EDGE_FALLTHRU
))
1522 && (block_ends_with_call_p (cur_bb
)
1523 || can_throw_internal (BB_END (cur_bb
))))
1528 /* Find EDGE_CAN_FALLTHRU edge. */
1529 FOR_EACH_EDGE (e
, ei
, cur_bb
->succs
)
1530 if (e
->flags
& EDGE_CAN_FALLTHRU
)
1537 if (fall_thru
&& (fall_thru
->dest
!= EXIT_BLOCK_PTR
))
1539 /* Check to see if the fall-thru edge is a crossing edge. */
1541 if (fall_thru
->flags
& EDGE_CROSSING
)
1543 /* The fall_thru edge crosses; now check the cond jump edge, if
1546 cond_jump_crosses
= true;
1548 old_jump
= BB_END (cur_bb
);
1550 /* Find the jump instruction, if there is one. */
1554 if (!(cond_jump
->flags
& EDGE_CROSSING
))
1555 cond_jump_crosses
= false;
1557 /* We know the fall-thru edge crosses; if the cond
1558 jump edge does NOT cross, and its destination is the
1559 next block in the bb order, invert the jump
1560 (i.e. fix it so the fall through does not cross and
1561 the cond jump does). */
1563 if (!cond_jump_crosses
1564 && cur_bb
->aux
== cond_jump
->dest
)
1566 /* Find label in fall_thru block. We've already added
1567 any missing labels, so there must be one. */
1569 fall_thru_label
= block_label (fall_thru
->dest
);
1571 if (old_jump
&& JUMP_P (old_jump
) && fall_thru_label
)
1572 invert_worked
= invert_jump (old_jump
,
1576 fall_thru
->flags
&= ~EDGE_FALLTHRU
;
1577 cond_jump
->flags
|= EDGE_FALLTHRU
;
1578 update_br_prob_note (cur_bb
);
1580 fall_thru
= cond_jump
;
1582 cond_jump
->flags
|= EDGE_CROSSING
;
1583 fall_thru
->flags
&= ~EDGE_CROSSING
;
1588 if (cond_jump_crosses
|| !invert_worked
)
1590 /* This is the case where both edges out of the basic
1591 block are crossing edges. Here we will fix up the
1592 fall through edge. The jump edge will be taken care
1593 of later. The EDGE_CROSSING flag of fall_thru edge
1594 is unset before the call to force_nonfallthru
1595 function because if a new basic-block is created
1596 this edge remains in the current section boundary
1597 while the edge between new_bb and the fall_thru->dest
1598 becomes EDGE_CROSSING. */
1600 fall_thru
->flags
&= ~EDGE_CROSSING
;
1601 new_bb
= force_nonfallthru (fall_thru
);
1605 new_bb
->aux
= cur_bb
->aux
;
1606 cur_bb
->aux
= new_bb
;
1608 /* Make sure new fall-through bb is in same
1609 partition as bb it's falling through from. */
1611 BB_COPY_PARTITION (new_bb
, cur_bb
);
1612 single_succ_edge (new_bb
)->flags
|= EDGE_CROSSING
;
1616 /* If a new basic-block was not created; restore
1617 the EDGE_CROSSING flag. */
1618 fall_thru
->flags
|= EDGE_CROSSING
;
1621 /* Add barrier after new jump */
1622 emit_barrier_after_bb (new_bb
? new_bb
: cur_bb
);
1629 /* This function checks the destination block of a "crossing jump" to
1630 see if it has any crossing predecessors that begin with a code label
1631 and end with an unconditional jump. If so, it returns that predecessor
1632 block. (This is to avoid creating lots of new basic blocks that all
1633 contain unconditional jumps to the same destination). */
1636 find_jump_block (basic_block jump_dest
)
1638 basic_block source_bb
= NULL
;
1643 FOR_EACH_EDGE (e
, ei
, jump_dest
->preds
)
1644 if (e
->flags
& EDGE_CROSSING
)
1646 basic_block src
= e
->src
;
1648 /* Check each predecessor to see if it has a label, and contains
1649 only one executable instruction, which is an unconditional jump.
1650 If so, we can use it. */
1652 if (LABEL_P (BB_HEAD (src
)))
1653 for (insn
= BB_HEAD (src
);
1654 !INSN_P (insn
) && insn
!= NEXT_INSN (BB_END (src
));
1655 insn
= NEXT_INSN (insn
))
1658 && insn
== BB_END (src
)
1660 && !any_condjump_p (insn
))
1674 /* Find all BB's with conditional jumps that are crossing edges;
1675 insert a new bb and make the conditional jump branch to the new
1676 bb instead (make the new bb same color so conditional branch won't
1677 be a 'crossing' edge). Insert an unconditional jump from the
1678 new bb to the original destination of the conditional jump. */
1681 fix_crossing_conditional_branches (void)
1692 rtx old_label
= NULL_RTX
;
1695 FOR_EACH_BB (cur_bb
)
1697 crossing_edge
= NULL
;
1698 if (EDGE_COUNT (cur_bb
->succs
) > 0)
1699 succ1
= EDGE_SUCC (cur_bb
, 0);
1703 if (EDGE_COUNT (cur_bb
->succs
) > 1)
1704 succ2
= EDGE_SUCC (cur_bb
, 1);
1708 /* We already took care of fall-through edges, so only one successor
1709 can be a crossing edge. */
1711 if (succ1
&& (succ1
->flags
& EDGE_CROSSING
))
1712 crossing_edge
= succ1
;
1713 else if (succ2
&& (succ2
->flags
& EDGE_CROSSING
))
1714 crossing_edge
= succ2
;
1718 old_jump
= BB_END (cur_bb
);
1720 /* Check to make sure the jump instruction is a
1721 conditional jump. */
1725 if (any_condjump_p (old_jump
))
1727 if (GET_CODE (PATTERN (old_jump
)) == SET
)
1728 set_src
= SET_SRC (PATTERN (old_jump
));
1729 else if (GET_CODE (PATTERN (old_jump
)) == PARALLEL
)
1731 set_src
= XVECEXP (PATTERN (old_jump
), 0,0);
1732 if (GET_CODE (set_src
) == SET
)
1733 set_src
= SET_SRC (set_src
);
1739 if (set_src
&& (GET_CODE (set_src
) == IF_THEN_ELSE
))
1741 if (GET_CODE (XEXP (set_src
, 1)) == PC
)
1742 old_label
= XEXP (set_src
, 2);
1743 else if (GET_CODE (XEXP (set_src
, 2)) == PC
)
1744 old_label
= XEXP (set_src
, 1);
1746 /* Check to see if new bb for jumping to that dest has
1747 already been created; if so, use it; if not, create
1750 new_bb
= find_jump_block (crossing_edge
->dest
);
1753 new_label
= block_label (new_bb
);
1756 basic_block last_bb
;
1759 /* Create new basic block to be dest for
1760 conditional jump. */
1762 /* Put appropriate instructions in new bb. */
1764 new_label
= gen_label_rtx ();
1765 emit_label (new_label
);
1767 gcc_assert (GET_CODE (old_label
) == LABEL_REF
);
1768 old_label
= JUMP_LABEL (old_jump
);
1769 new_jump
= emit_jump_insn (gen_jump (old_label
));
1770 JUMP_LABEL (new_jump
) = old_label
;
1772 last_bb
= EXIT_BLOCK_PTR
->prev_bb
;
1773 new_bb
= create_basic_block (new_label
, new_jump
, last_bb
);
1774 new_bb
->aux
= last_bb
->aux
;
1775 last_bb
->aux
= new_bb
;
1777 emit_barrier_after_bb (new_bb
);
1779 /* Make sure new bb is in same partition as source
1780 of conditional branch. */
1781 BB_COPY_PARTITION (new_bb
, cur_bb
);
1784 /* Make old jump branch to new bb. */
1786 redirect_jump (old_jump
, new_label
, 0);
1788 /* Remove crossing_edge as predecessor of 'dest'. */
1790 dest
= crossing_edge
->dest
;
1792 redirect_edge_succ (crossing_edge
, new_bb
);
1794 /* Make a new edge from new_bb to old dest; new edge
1795 will be a successor for new_bb and a predecessor
1798 if (EDGE_COUNT (new_bb
->succs
) == 0)
1799 new_edge
= make_edge (new_bb
, dest
, 0);
1801 new_edge
= EDGE_SUCC (new_bb
, 0);
1803 crossing_edge
->flags
&= ~EDGE_CROSSING
;
1804 new_edge
->flags
|= EDGE_CROSSING
;
1810 /* Find any unconditional branches that cross between hot and cold
1811 sections. Convert them into indirect jumps instead. */
1814 fix_crossing_unconditional_branches (void)
1820 rtx indirect_jump_sequence
;
1821 rtx jump_insn
= NULL_RTX
;
1826 FOR_EACH_BB (cur_bb
)
1828 last_insn
= BB_END (cur_bb
);
1830 if (EDGE_COUNT (cur_bb
->succs
) < 1)
1833 succ
= EDGE_SUCC (cur_bb
, 0);
1835 /* Check to see if bb ends in a crossing (unconditional) jump. At
1836 this point, no crossing jumps should be conditional. */
1838 if (JUMP_P (last_insn
)
1839 && (succ
->flags
& EDGE_CROSSING
))
1843 gcc_assert (!any_condjump_p (last_insn
));
1845 /* Make sure the jump is not already an indirect or table jump. */
1847 if (!computed_jump_p (last_insn
)
1848 && !tablejump_p (last_insn
, &label2
, &table
))
1850 /* We have found a "crossing" unconditional branch. Now
1851 we must convert it to an indirect jump. First create
1852 reference of label, as target for jump. */
1854 label
= JUMP_LABEL (last_insn
);
1855 label_addr
= gen_rtx_LABEL_REF (Pmode
, label
);
1856 LABEL_NUSES (label
) += 1;
1858 /* Get a register to use for the indirect jump. */
1860 new_reg
= gen_reg_rtx (Pmode
);
1862 /* Generate indirect the jump sequence. */
1865 emit_move_insn (new_reg
, label_addr
);
1866 emit_indirect_jump (new_reg
);
1867 indirect_jump_sequence
= get_insns ();
1870 /* Make sure every instruction in the new jump sequence has
1871 its basic block set to be cur_bb. */
1873 for (cur_insn
= indirect_jump_sequence
; cur_insn
;
1874 cur_insn
= NEXT_INSN (cur_insn
))
1876 if (!BARRIER_P (cur_insn
))
1877 BLOCK_FOR_INSN (cur_insn
) = cur_bb
;
1878 if (JUMP_P (cur_insn
))
1879 jump_insn
= cur_insn
;
1882 /* Insert the new (indirect) jump sequence immediately before
1883 the unconditional jump, then delete the unconditional jump. */
1885 emit_insn_before (indirect_jump_sequence
, last_insn
);
1886 delete_insn (last_insn
);
1888 /* Make BB_END for cur_bb be the jump instruction (NOT the
1889 barrier instruction at the end of the sequence...). */
1891 BB_END (cur_bb
) = jump_insn
;
1897 /* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1900 add_reg_crossing_jump_notes (void)
1907 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1908 if ((e
->flags
& EDGE_CROSSING
)
1909 && JUMP_P (BB_END (e
->src
)))
1910 add_reg_note (BB_END (e
->src
), REG_CROSSING_JUMP
, NULL_RTX
);
1913 /* Verify, in the basic block chain, that there is at most one switch
1914 between hot/cold partitions. This is modelled on
1915 rtl_verify_flow_info_1, but it cannot go inside that function
1916 because this condition will not be true until after
1917 reorder_basic_blocks is called. */
1920 verify_hot_cold_block_grouping (void)
1924 bool switched_sections
= false;
1925 int current_partition
= 0;
1929 if (!current_partition
)
1930 current_partition
= BB_PARTITION (bb
);
1931 if (BB_PARTITION (bb
) != current_partition
)
1933 if (switched_sections
)
1935 error ("multiple hot/cold transitions found (bb %i)",
1941 switched_sections
= true;
1942 current_partition
= BB_PARTITION (bb
);
1950 /* Reorder basic blocks. The main entry point to this file. FLAGS is
1951 the set of flags to pass to cfg_layout_initialize(). */
1954 reorder_basic_blocks (void)
1958 struct trace
*traces
;
1960 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT
);
1962 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1)
1965 set_edge_can_fallthru_flag ();
1966 mark_dfs_back_edges ();
1968 /* We are estimating the length of uncond jump insn only once since the code
1969 for getting the insn length always returns the minimal length now. */
1970 if (uncond_jump_length
== 0)
1971 uncond_jump_length
= get_uncond_jump_length ();
1973 /* We need to know some information for each basic block. */
1974 array_size
= GET_ARRAY_SIZE (last_basic_block
);
1975 bbd
= XNEWVEC (bbro_basic_block_data
, array_size
);
1976 for (i
= 0; i
< array_size
; i
++)
1978 bbd
[i
].start_of_trace
= -1;
1979 bbd
[i
].end_of_trace
= -1;
1980 bbd
[i
].in_trace
= -1;
1986 traces
= XNEWVEC (struct trace
, n_basic_blocks
);
1988 find_traces (&n_traces
, traces
);
1989 connect_traces (n_traces
, traces
);
1993 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1997 if (dump_flags
& TDF_DETAILS
)
1998 dump_reg_info (dump_file
);
1999 dump_flow_info (dump_file
, dump_flags
);
2002 if (flag_reorder_blocks_and_partition
)
2003 verify_hot_cold_block_grouping ();
2006 /* Determine which partition the first basic block in the function
2007 belongs to, then find the first basic block in the current function
2008 that belongs to a different section, and insert a
2009 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2010 instruction stream. When writing out the assembly code,
2011 encountering this note will make the compiler switch between the
2012 hot and cold text sections. */
2015 insert_section_boundary_note (void)
2019 int first_partition
= 0;
2021 if (!flag_reorder_blocks_and_partition
)
2026 if (!first_partition
)
2027 first_partition
= BB_PARTITION (bb
);
2028 if (BB_PARTITION (bb
) != first_partition
)
2030 new_note
= emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS
,
2032 /* ??? This kind of note always lives between basic blocks,
2033 but add_insn_before will set BLOCK_FOR_INSN anyway. */
2034 BLOCK_FOR_INSN (new_note
) = NULL
;
2040 /* Duplicate the blocks containing computed gotos. This basically unfactors
2041 computed gotos that were factored early on in the compilation process to
2042 speed up edge based data flow. We used to not unfactoring them again,
2043 which can seriously pessimize code with many computed jumps in the source
2044 code, such as interpreters. See e.g. PR15242. */
2047 gate_duplicate_computed_gotos (void)
2049 if (targetm
.cannot_modify_jumps_p ())
2051 return (optimize
> 0
2052 && flag_expensive_optimizations
2053 && ! optimize_function_for_size_p (cfun
));
2058 duplicate_computed_gotos (void)
2060 basic_block bb
, new_bb
;
2064 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1)
2068 cfg_layout_initialize (0);
2070 /* We are estimating the length of uncond jump insn only once
2071 since the code for getting the insn length always returns
2072 the minimal length now. */
2073 if (uncond_jump_length
== 0)
2074 uncond_jump_length
= get_uncond_jump_length ();
2076 max_size
= uncond_jump_length
* PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS
);
2077 candidates
= BITMAP_ALLOC (NULL
);
2079 /* Look for blocks that end in a computed jump, and see if such blocks
2080 are suitable for unfactoring. If a block is a candidate for unfactoring,
2081 mark it in the candidates. */
2087 int size
, all_flags
;
2089 /* Build the reorder chain for the original order of blocks. */
2090 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2091 bb
->aux
= bb
->next_bb
;
2093 /* Obviously the block has to end in a computed jump. */
2094 if (!computed_jump_p (BB_END (bb
)))
2097 /* Only consider blocks that can be duplicated. */
2098 if (find_reg_note (BB_END (bb
), REG_CROSSING_JUMP
, NULL_RTX
)
2099 || !can_duplicate_block_p (bb
))
2102 /* Make sure that the block is small enough. */
2104 FOR_BB_INSNS (bb
, insn
)
2107 size
+= get_attr_min_length (insn
);
2108 if (size
> max_size
)
2111 if (size
> max_size
)
2114 /* Final check: there must not be any incoming abnormal edges. */
2116 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2117 all_flags
|= e
->flags
;
2118 if (all_flags
& EDGE_COMPLEX
)
2121 bitmap_set_bit (candidates
, bb
->index
);
2124 /* Nothing to do if there is no computed jump here. */
2125 if (bitmap_empty_p (candidates
))
2128 /* Duplicate computed gotos. */
2131 if (bb
->flags
& BB_VISITED
)
2134 bb
->flags
|= BB_VISITED
;
2136 /* BB must have one outgoing edge. That edge must not lead to
2137 the exit block or the next block.
2138 The destination must have more than one predecessor. */
2139 if (!single_succ_p (bb
)
2140 || single_succ (bb
) == EXIT_BLOCK_PTR
2141 || single_succ (bb
) == bb
->next_bb
2142 || single_pred_p (single_succ (bb
)))
2145 /* The successor block has to be a duplication candidate. */
2146 if (!bitmap_bit_p (candidates
, single_succ (bb
)->index
))
2149 new_bb
= duplicate_block (single_succ (bb
), single_succ_edge (bb
), bb
);
2150 new_bb
->aux
= bb
->aux
;
2152 new_bb
->flags
|= BB_VISITED
;
2156 cfg_layout_finalize ();
2158 BITMAP_FREE (candidates
);
2162 struct rtl_opt_pass pass_duplicate_computed_gotos
=
2166 "compgotos", /* name */
2167 gate_duplicate_computed_gotos
, /* gate */
2168 duplicate_computed_gotos
, /* execute */
2171 0, /* static_pass_number */
2172 TV_REORDER_BLOCKS
, /* tv_id */
2173 0, /* properties_required */
2174 0, /* properties_provided */
2175 0, /* properties_destroyed */
2176 0, /* todo_flags_start */
2177 TODO_verify_rtl_sharing
,/* todo_flags_finish */
2182 /* This function is the main 'entrance' for the optimization that
2183 partitions hot and cold basic blocks into separate sections of the
2184 .o file (to improve performance and cache locality). Ideally it
2185 would be called after all optimizations that rearrange the CFG have
2186 been called. However part of this optimization may introduce new
2187 register usage, so it must be called before register allocation has
2188 occurred. This means that this optimization is actually called
2189 well before the optimization that reorders basic blocks (see
2192 This optimization checks the feedback information to determine
2193 which basic blocks are hot/cold, updates flags on the basic blocks
2194 to indicate which section they belong in. This information is
2195 later used for writing out sections in the .o file. Because hot
2196 and cold sections can be arbitrarily large (within the bounds of
2197 memory), far beyond the size of a single function, it is necessary
2198 to fix up all edges that cross section boundaries, to make sure the
2199 instructions used can actually span the required distance. The
2200 fixes are described below.
2202 Fall-through edges must be changed into jumps; it is not safe or
2203 legal to fall through across a section boundary. Whenever a
2204 fall-through edge crossing a section boundary is encountered, a new
2205 basic block is inserted (in the same section as the fall-through
2206 source), and the fall through edge is redirected to the new basic
2207 block. The new basic block contains an unconditional jump to the
2208 original fall-through target. (If the unconditional jump is
2209 insufficient to cross section boundaries, that is dealt with a
2210 little later, see below).
2212 In order to deal with architectures that have short conditional
2213 branches (which cannot span all of memory) we take any conditional
2214 jump that attempts to cross a section boundary and add a level of
2215 indirection: it becomes a conditional jump to a new basic block, in
2216 the same section. The new basic block contains an unconditional
2217 jump to the original target, in the other section.
2219 For those architectures whose unconditional branch is also
2220 incapable of reaching all of memory, those unconditional jumps are
2221 converted into indirect jumps, through a register.
2223 IMPORTANT NOTE: This optimization causes some messy interactions
2224 with the cfg cleanup optimizations; those optimizations want to
2225 merge blocks wherever possible, and to collapse indirect jump
2226 sequences (change "A jumps to B jumps to C" directly into "A jumps
2227 to C"). Those optimizations can undo the jump fixes that
2228 partitioning is required to make (see above), in order to ensure
2229 that jumps attempting to cross section boundaries are really able
2230 to cover whatever distance the jump requires (on many architectures
2231 conditional or unconditional jumps are not able to reach all of
2232 memory). Therefore tests have to be inserted into each such
2233 optimization to make sure that it does not undo stuff necessary to
2234 cross partition boundaries. This would be much less of a problem
2235 if we could perform this optimization later in the compilation, but
2236 unfortunately the fact that we may need to create indirect jumps
2237 (through registers) requires that this optimization be performed
2238 before register allocation.
2240 Hot and cold basic blocks are partitioned and put in separate
2241 sections of the .o file, to reduce paging and improve cache
2242 performance (hopefully). This can result in bits of code from the
2243 same function being widely separated in the .o file. However this
2244 is not obvious to the current bb structure. Therefore we must take
2245 care to ensure that: 1). There are no fall_thru edges that cross
2246 between sections; 2). For those architectures which have "short"
2247 conditional branches, all conditional branches that attempt to
2248 cross between sections are converted to unconditional branches;
2249 and, 3). For those architectures which have "short" unconditional
2250 branches, all unconditional branches that attempt to cross between
2251 sections are converted to indirect jumps.
2253 The code for fixing up fall_thru edges that cross between hot and
2254 cold basic blocks does so by creating new basic blocks containing
2255 unconditional branches to the appropriate label in the "other"
2256 section. The new basic block is then put in the same (hot or cold)
2257 section as the original conditional branch, and the fall_thru edge
2258 is modified to fall into the new basic block instead. By adding
2259 this level of indirection we end up with only unconditional branches
2260 crossing between hot and cold sections.
2262 Conditional branches are dealt with by adding a level of indirection.
2263 A new basic block is added in the same (hot/cold) section as the
2264 conditional branch, and the conditional branch is retargeted to the
2265 new basic block. The new basic block contains an unconditional branch
2266 to the original target of the conditional branch (in the other section).
2268 Unconditional branches are dealt with by converting them into
2272 partition_hot_cold_basic_blocks (void)
2274 VEC(edge
, heap
) *crossing_edges
;
2276 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1)
2279 df_set_flags (DF_DEFER_INSN_RESCAN
);
2281 crossing_edges
= find_rarely_executed_basic_blocks_and_crossing_edges ();
2282 if (crossing_edges
== NULL
)
2285 /* Make sure the source of any crossing edge ends in a jump and the
2286 destination of any crossing edge has a label. */
2287 add_labels_and_missing_jumps (crossing_edges
);
2289 /* Convert all crossing fall_thru edges to non-crossing fall
2290 thrus to unconditional jumps (that jump to the original fall
2292 fix_up_fall_thru_edges ();
2294 /* If the architecture does not have conditional branches that can
2295 span all of memory, convert crossing conditional branches into
2296 crossing unconditional branches. */
2297 if (!HAS_LONG_COND_BRANCH
)
2298 fix_crossing_conditional_branches ();
2300 /* If the architecture does not have unconditional branches that
2301 can span all of memory, convert crossing unconditional branches
2302 into indirect jumps. Since adding an indirect jump also adds
2303 a new register usage, update the register usage information as
2305 if (!HAS_LONG_UNCOND_BRANCH
)
2306 fix_crossing_unconditional_branches ();
2308 add_reg_crossing_jump_notes ();
2310 /* Clear bb->aux fields that the above routines were using. */
2311 clear_aux_for_blocks ();
2313 VEC_free (edge
, heap
, crossing_edges
);
2315 /* ??? FIXME: DF generates the bb info for a block immediately.
2316 And by immediately, I mean *during* creation of the block.
2318 #0 df_bb_refs_collect
2319 #1 in df_bb_refs_record
2320 #2 in create_basic_block_structure
2322 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2323 will *always* fail, because no edges can have been added to the
2324 block yet. Which of course means we don't add the right
2325 artificial refs, which means we fail df_verify (much) later.
2327 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2328 that we also shouldn't grab data from the new blocks those new
2329 insns are in either. In this way one can create the block, link
2330 it up properly, and have everything Just Work later, when deferred
2331 insns are processed.
2333 In the meantime, we have no other option but to throw away all
2334 of the DF data and recompute it all. */
2335 if (cfun
->eh
->lp_array
)
2337 df_finish_pass (true);
2338 df_scan_alloc (NULL
);
2340 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2341 data. We blindly generated all of them when creating the new
2342 landing pad. Delete those assignments we don't use. */
2343 df_set_flags (DF_LR_RUN_DCE
);
2347 return TODO_verify_flow
| TODO_verify_rtl_sharing
;
2351 gate_handle_reorder_blocks (void)
2353 if (targetm
.cannot_modify_jumps_p ())
2355 /* Don't reorder blocks when optimizing for size because extra jump insns may
2356 be created; also barrier may create extra padding.
2358 More correctly we should have a block reordering mode that tried to
2359 minimize the combined size of all the jumps. This would more or less
2360 automatically remove extra jumps, but would also try to use more short
2361 jumps instead of long jumps. */
2362 if (!optimize_function_for_speed_p (cfun
))
2364 return (optimize
> 0
2365 && (flag_reorder_blocks
|| flag_reorder_blocks_and_partition
));
2369 /* Reorder basic blocks. */
2371 rest_of_handle_reorder_blocks (void)
2375 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2376 splitting possibly introduced more crossjumping opportunities. */
2377 cfg_layout_initialize (CLEANUP_EXPENSIVE
);
2379 reorder_basic_blocks ();
2380 cleanup_cfg (CLEANUP_EXPENSIVE
);
2383 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2384 bb
->aux
= bb
->next_bb
;
2385 cfg_layout_finalize ();
2387 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */
2388 insert_section_boundary_note ();
2392 struct rtl_opt_pass pass_reorder_blocks
=
2397 gate_handle_reorder_blocks
, /* gate */
2398 rest_of_handle_reorder_blocks
, /* execute */
2401 0, /* static_pass_number */
2402 TV_REORDER_BLOCKS
, /* tv_id */
2403 0, /* properties_required */
2404 0, /* properties_provided */
2405 0, /* properties_destroyed */
2406 0, /* todo_flags_start */
2407 TODO_verify_rtl_sharing
, /* todo_flags_finish */
2412 gate_handle_partition_blocks (void)
2414 /* The optimization to partition hot/cold basic blocks into separate
2415 sections of the .o file does not work well with linkonce or with
2416 user defined section attributes. Don't call it if either case
2418 return (flag_reorder_blocks_and_partition
2420 /* See gate_handle_reorder_blocks. We should not partition if
2421 we are going to omit the reordering. */
2422 && optimize_function_for_speed_p (cfun
)
2423 && !DECL_ONE_ONLY (current_function_decl
)
2424 && !user_defined_section_attribute
);
2427 struct rtl_opt_pass pass_partition_blocks
=
2431 "bbpart", /* name */
2432 gate_handle_partition_blocks
, /* gate */
2433 partition_hot_cold_basic_blocks
, /* execute */
2436 0, /* static_pass_number */
2437 TV_REORDER_BLOCKS
, /* tv_id */
2438 PROP_cfglayout
, /* properties_required */
2439 0, /* properties_provided */
2440 0, /* properties_destroyed */
2441 0, /* todo_flags_start */
2442 0 /* todo_flags_finish */