PR c++/53989
[official-gcc.git] / gcc / bb-reorder.c
blob0d29b2d0c6f2ea7f601841cef1c84cf6a1e59171
1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011,
3 2012 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
60 References:
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "rtl.h"
73 #include "regs.h"
74 #include "flags.h"
75 #include "output.h"
76 #include "fibheap.h"
77 #include "target.h"
78 #include "function.h"
79 #include "tm_p.h"
80 #include "obstack.h"
81 #include "expr.h"
82 #include "params.h"
83 #include "diagnostic-core.h"
84 #include "toplev.h" /* user_defined_section_attribute */
85 #include "tree-pass.h"
86 #include "df.h"
87 #include "bb-reorder.h"
88 #include "except.h"
90 /* The number of rounds. In most cases there will only be 4 rounds, but
91 when partitioning hot and cold basic blocks into separate sections of
92 the .o file there will be an extra round.*/
93 #define N_ROUNDS 5
95 /* Stubs in case we don't have a return insn.
96 We have to check at runtime too, not only compiletime. */
98 #ifndef HAVE_return
99 #define HAVE_return 0
100 #define gen_return() NULL_RTX
101 #endif
104 struct target_bb_reorder default_target_bb_reorder;
105 #if SWITCHABLE_TARGET
106 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
107 #endif
109 #define uncond_jump_length \
110 (this_target_bb_reorder->x_uncond_jump_length)
112 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
113 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
115 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
116 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
118 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
119 block the edge destination is not duplicated while connecting traces. */
120 #define DUPLICATION_THRESHOLD 100
122 /* Structure to hold needed information for each basic block. */
123 typedef struct bbro_basic_block_data_def
125 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
126 int start_of_trace;
128 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
129 int end_of_trace;
131 /* Which trace is the bb in? */
132 int in_trace;
134 /* Which trace was this bb visited in? */
135 int visited;
137 /* Which heap is BB in (if any)? */
138 fibheap_t heap;
140 /* Which heap node is BB in (if any)? */
141 fibnode_t node;
142 } bbro_basic_block_data;
144 /* The current size of the following dynamic array. */
145 static int array_size;
147 /* The array which holds needed information for basic blocks. */
148 static bbro_basic_block_data *bbd;
150 /* To avoid frequent reallocation the size of arrays is greater than needed,
151 the number of elements is (not less than) 1.25 * size_wanted. */
152 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
154 /* Free the memory and set the pointer to NULL. */
155 #define FREE(P) (gcc_assert (P), free (P), P = 0)
157 /* Structure for holding information about a trace. */
158 struct trace
160 /* First and last basic block of the trace. */
161 basic_block first, last;
163 /* The round of the STC creation which this trace was found in. */
164 int round;
166 /* The length (i.e. the number of basic blocks) of the trace. */
167 int length;
170 /* Maximum frequency and count of one of the entry blocks. */
171 static int max_entry_frequency;
172 static gcov_type max_entry_count;
174 /* Local function prototypes. */
175 static void find_traces (int *, struct trace *);
176 static basic_block rotate_loop (edge, struct trace *, int);
177 static void mark_bb_visited (basic_block, int);
178 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
179 int, fibheap_t *, int);
180 static basic_block copy_bb (basic_block, edge, basic_block, int);
181 static fibheapkey_t bb_to_key (basic_block);
182 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge);
183 static void connect_traces (int, struct trace *);
184 static bool copy_bb_p (const_basic_block, int);
185 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
187 /* Return the trace number in which BB was visited. */
189 static int
190 bb_visited_trace (const_basic_block bb)
192 gcc_assert (bb->index < array_size);
193 return bbd[bb->index].visited;
196 /* This function marks BB that it was visited in trace number TRACE. */
198 static void
199 mark_bb_visited (basic_block bb, int trace)
201 bbd[bb->index].visited = trace;
202 if (bbd[bb->index].heap)
204 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
205 bbd[bb->index].heap = NULL;
206 bbd[bb->index].node = NULL;
210 /* Check to see if bb should be pushed into the next round of trace
211 collections or not. Reasons for pushing the block forward are 1).
212 If the block is cold, we are doing partitioning, and there will be
213 another round (cold partition blocks are not supposed to be
214 collected into traces until the very last round); or 2). There will
215 be another round, and the basic block is not "hot enough" for the
216 current round of trace collection. */
218 static bool
219 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
220 int exec_th, gcov_type count_th)
222 bool there_exists_another_round;
223 bool block_not_hot_enough;
225 there_exists_another_round = round < number_of_rounds - 1;
227 block_not_hot_enough = (bb->frequency < exec_th
228 || bb->count < count_th
229 || probably_never_executed_bb_p (bb));
231 if (there_exists_another_round
232 && block_not_hot_enough)
233 return true;
234 else
235 return false;
238 /* Find the traces for Software Trace Cache. Chain each trace through
239 RBI()->next. Store the number of traces to N_TRACES and description of
240 traces to TRACES. */
242 static void
243 find_traces (int *n_traces, struct trace *traces)
245 int i;
246 int number_of_rounds;
247 edge e;
248 edge_iterator ei;
249 fibheap_t heap;
251 /* Add one extra round of trace collection when partitioning hot/cold
252 basic blocks into separate sections. The last round is for all the
253 cold blocks (and ONLY the cold blocks). */
255 number_of_rounds = N_ROUNDS - 1;
257 /* Insert entry points of function into heap. */
258 heap = fibheap_new ();
259 max_entry_frequency = 0;
260 max_entry_count = 0;
261 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
263 bbd[e->dest->index].heap = heap;
264 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
265 e->dest);
266 if (e->dest->frequency > max_entry_frequency)
267 max_entry_frequency = e->dest->frequency;
268 if (e->dest->count > max_entry_count)
269 max_entry_count = e->dest->count;
272 /* Find the traces. */
273 for (i = 0; i < number_of_rounds; i++)
275 gcov_type count_threshold;
277 if (dump_file)
278 fprintf (dump_file, "STC - round %d\n", i + 1);
280 if (max_entry_count < INT_MAX / 1000)
281 count_threshold = max_entry_count * exec_threshold[i] / 1000;
282 else
283 count_threshold = max_entry_count / 1000 * exec_threshold[i];
285 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
286 max_entry_frequency * exec_threshold[i] / 1000,
287 count_threshold, traces, n_traces, i, &heap,
288 number_of_rounds);
290 fibheap_delete (heap);
292 if (dump_file)
294 for (i = 0; i < *n_traces; i++)
296 basic_block bb;
297 fprintf (dump_file, "Trace %d (round %d): ", i + 1,
298 traces[i].round + 1);
299 for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux)
300 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
301 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
303 fflush (dump_file);
307 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
308 (with sequential number TRACE_N). */
310 static basic_block
311 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
313 basic_block bb;
315 /* Information about the best end (end after rotation) of the loop. */
316 basic_block best_bb = NULL;
317 edge best_edge = NULL;
318 int best_freq = -1;
319 gcov_type best_count = -1;
320 /* The best edge is preferred when its destination is not visited yet
321 or is a start block of some trace. */
322 bool is_preferred = false;
324 /* Find the most frequent edge that goes out from current trace. */
325 bb = back_edge->dest;
328 edge e;
329 edge_iterator ei;
331 FOR_EACH_EDGE (e, ei, bb->succs)
332 if (e->dest != EXIT_BLOCK_PTR
333 && bb_visited_trace (e->dest) != trace_n
334 && (e->flags & EDGE_CAN_FALLTHRU)
335 && !(e->flags & EDGE_COMPLEX))
337 if (is_preferred)
339 /* The best edge is preferred. */
340 if (!bb_visited_trace (e->dest)
341 || bbd[e->dest->index].start_of_trace >= 0)
343 /* The current edge E is also preferred. */
344 int freq = EDGE_FREQUENCY (e);
345 if (freq > best_freq || e->count > best_count)
347 best_freq = freq;
348 best_count = e->count;
349 best_edge = e;
350 best_bb = bb;
354 else
356 if (!bb_visited_trace (e->dest)
357 || bbd[e->dest->index].start_of_trace >= 0)
359 /* The current edge E is preferred. */
360 is_preferred = true;
361 best_freq = EDGE_FREQUENCY (e);
362 best_count = e->count;
363 best_edge = e;
364 best_bb = bb;
366 else
368 int freq = EDGE_FREQUENCY (e);
369 if (!best_edge || freq > best_freq || e->count > best_count)
371 best_freq = freq;
372 best_count = e->count;
373 best_edge = e;
374 best_bb = bb;
379 bb = (basic_block) bb->aux;
381 while (bb != back_edge->dest);
383 if (best_bb)
385 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
386 the trace. */
387 if (back_edge->dest == trace->first)
389 trace->first = (basic_block) best_bb->aux;
391 else
393 basic_block prev_bb;
395 for (prev_bb = trace->first;
396 prev_bb->aux != back_edge->dest;
397 prev_bb = (basic_block) prev_bb->aux)
399 prev_bb->aux = best_bb->aux;
401 /* Try to get rid of uncond jump to cond jump. */
402 if (single_succ_p (prev_bb))
404 basic_block header = single_succ (prev_bb);
406 /* Duplicate HEADER if it is a small block containing cond jump
407 in the end. */
408 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
409 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
410 NULL_RTX))
411 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
415 else
417 /* We have not found suitable loop tail so do no rotation. */
418 best_bb = back_edge->src;
420 best_bb->aux = NULL;
421 return best_bb;
424 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
425 not include basic blocks their probability is lower than BRANCH_TH or their
426 frequency is lower than EXEC_TH into traces (or count is lower than
427 COUNT_TH). It stores the new traces into TRACES and modifies the number of
428 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
429 expects that starting basic blocks are in *HEAP and at the end it deletes
430 *HEAP and stores starting points for the next round into new *HEAP. */
432 static void
433 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
434 struct trace *traces, int *n_traces, int round,
435 fibheap_t *heap, int number_of_rounds)
437 /* Heap for discarded basic blocks which are possible starting points for
438 the next round. */
439 fibheap_t new_heap = fibheap_new ();
441 while (!fibheap_empty (*heap))
443 basic_block bb;
444 struct trace *trace;
445 edge best_edge, e;
446 fibheapkey_t key;
447 edge_iterator ei;
449 bb = (basic_block) fibheap_extract_min (*heap);
450 bbd[bb->index].heap = NULL;
451 bbd[bb->index].node = NULL;
453 if (dump_file)
454 fprintf (dump_file, "Getting bb %d\n", bb->index);
456 /* If the BB's frequency is too low send BB to the next round. When
457 partitioning hot/cold blocks into separate sections, make sure all
458 the cold blocks (and ONLY the cold blocks) go into the (extra) final
459 round. */
461 if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
462 count_th))
464 int key = bb_to_key (bb);
465 bbd[bb->index].heap = new_heap;
466 bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
468 if (dump_file)
469 fprintf (dump_file,
470 " Possible start point of next round: %d (key: %d)\n",
471 bb->index, key);
472 continue;
475 trace = traces + *n_traces;
476 trace->first = bb;
477 trace->round = round;
478 trace->length = 0;
479 bbd[bb->index].in_trace = *n_traces;
480 (*n_traces)++;
484 int prob, freq;
485 bool ends_in_call;
487 /* The probability and frequency of the best edge. */
488 int best_prob = INT_MIN / 2;
489 int best_freq = INT_MIN / 2;
491 best_edge = NULL;
492 mark_bb_visited (bb, *n_traces);
493 trace->length++;
495 if (dump_file)
496 fprintf (dump_file, "Basic block %d was visited in trace %d\n",
497 bb->index, *n_traces - 1);
499 ends_in_call = block_ends_with_call_p (bb);
501 /* Select the successor that will be placed after BB. */
502 FOR_EACH_EDGE (e, ei, bb->succs)
504 gcc_assert (!(e->flags & EDGE_FAKE));
506 if (e->dest == EXIT_BLOCK_PTR)
507 continue;
509 if (bb_visited_trace (e->dest)
510 && bb_visited_trace (e->dest) != *n_traces)
511 continue;
513 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
514 continue;
516 prob = e->probability;
517 freq = e->dest->frequency;
519 /* The only sensible preference for a call instruction is the
520 fallthru edge. Don't bother selecting anything else. */
521 if (ends_in_call)
523 if (e->flags & EDGE_CAN_FALLTHRU)
525 best_edge = e;
526 best_prob = prob;
527 best_freq = freq;
529 continue;
532 /* Edge that cannot be fallthru or improbable or infrequent
533 successor (i.e. it is unsuitable successor). */
534 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
535 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
536 || e->count < count_th)
537 continue;
539 /* If partitioning hot/cold basic blocks, don't consider edges
540 that cross section boundaries. */
542 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
543 best_edge))
545 best_edge = e;
546 best_prob = prob;
547 best_freq = freq;
551 /* If the best destination has multiple predecessors, and can be
552 duplicated cheaper than a jump, don't allow it to be added
553 to a trace. We'll duplicate it when connecting traces. */
554 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
555 && copy_bb_p (best_edge->dest, 0))
556 best_edge = NULL;
558 /* Add all non-selected successors to the heaps. */
559 FOR_EACH_EDGE (e, ei, bb->succs)
561 if (e == best_edge
562 || e->dest == EXIT_BLOCK_PTR
563 || bb_visited_trace (e->dest))
564 continue;
566 key = bb_to_key (e->dest);
568 if (bbd[e->dest->index].heap)
570 /* E->DEST is already in some heap. */
571 if (key != bbd[e->dest->index].node->key)
573 if (dump_file)
575 fprintf (dump_file,
576 "Changing key for bb %d from %ld to %ld.\n",
577 e->dest->index,
578 (long) bbd[e->dest->index].node->key,
579 key);
581 fibheap_replace_key (bbd[e->dest->index].heap,
582 bbd[e->dest->index].node, key);
585 else
587 fibheap_t which_heap = *heap;
589 prob = e->probability;
590 freq = EDGE_FREQUENCY (e);
592 if (!(e->flags & EDGE_CAN_FALLTHRU)
593 || (e->flags & EDGE_COMPLEX)
594 || prob < branch_th || freq < exec_th
595 || e->count < count_th)
597 /* When partitioning hot/cold basic blocks, make sure
598 the cold blocks (and only the cold blocks) all get
599 pushed to the last round of trace collection. */
601 if (push_to_next_round_p (e->dest, round,
602 number_of_rounds,
603 exec_th, count_th))
604 which_heap = new_heap;
607 bbd[e->dest->index].heap = which_heap;
608 bbd[e->dest->index].node = fibheap_insert (which_heap,
609 key, e->dest);
611 if (dump_file)
613 fprintf (dump_file,
614 " Possible start of %s round: %d (key: %ld)\n",
615 (which_heap == new_heap) ? "next" : "this",
616 e->dest->index, (long) key);
622 if (best_edge) /* Suitable successor was found. */
624 if (bb_visited_trace (best_edge->dest) == *n_traces)
626 /* We do nothing with one basic block loops. */
627 if (best_edge->dest != bb)
629 if (EDGE_FREQUENCY (best_edge)
630 > 4 * best_edge->dest->frequency / 5)
632 /* The loop has at least 4 iterations. If the loop
633 header is not the first block of the function
634 we can rotate the loop. */
636 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
638 if (dump_file)
640 fprintf (dump_file,
641 "Rotating loop %d - %d\n",
642 best_edge->dest->index, bb->index);
644 bb->aux = best_edge->dest;
645 bbd[best_edge->dest->index].in_trace =
646 (*n_traces) - 1;
647 bb = rotate_loop (best_edge, trace, *n_traces);
650 else
652 /* The loop has less than 4 iterations. */
654 if (single_succ_p (bb)
655 && copy_bb_p (best_edge->dest,
656 optimize_edge_for_speed_p (best_edge)))
658 bb = copy_bb (best_edge->dest, best_edge, bb,
659 *n_traces);
660 trace->length++;
665 /* Terminate the trace. */
666 break;
668 else
670 /* Check for a situation
678 where
679 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
680 >= EDGE_FREQUENCY (AC).
681 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
682 Best ordering is then A B C.
684 This situation is created for example by:
686 if (A) B;
691 FOR_EACH_EDGE (e, ei, bb->succs)
692 if (e != best_edge
693 && (e->flags & EDGE_CAN_FALLTHRU)
694 && !(e->flags & EDGE_COMPLEX)
695 && !bb_visited_trace (e->dest)
696 && single_pred_p (e->dest)
697 && !(e->flags & EDGE_CROSSING)
698 && single_succ_p (e->dest)
699 && (single_succ_edge (e->dest)->flags
700 & EDGE_CAN_FALLTHRU)
701 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
702 && single_succ (e->dest) == best_edge->dest
703 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
705 best_edge = e;
706 if (dump_file)
707 fprintf (dump_file, "Selecting BB %d\n",
708 best_edge->dest->index);
709 break;
712 bb->aux = best_edge->dest;
713 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
714 bb = best_edge->dest;
718 while (best_edge);
719 trace->last = bb;
720 bbd[trace->first->index].start_of_trace = *n_traces - 1;
721 bbd[trace->last->index].end_of_trace = *n_traces - 1;
723 /* The trace is terminated so we have to recount the keys in heap
724 (some block can have a lower key because now one of its predecessors
725 is an end of the trace). */
726 FOR_EACH_EDGE (e, ei, bb->succs)
728 if (e->dest == EXIT_BLOCK_PTR
729 || bb_visited_trace (e->dest))
730 continue;
732 if (bbd[e->dest->index].heap)
734 key = bb_to_key (e->dest);
735 if (key != bbd[e->dest->index].node->key)
737 if (dump_file)
739 fprintf (dump_file,
740 "Changing key for bb %d from %ld to %ld.\n",
741 e->dest->index,
742 (long) bbd[e->dest->index].node->key, key);
744 fibheap_replace_key (bbd[e->dest->index].heap,
745 bbd[e->dest->index].node,
746 key);
752 fibheap_delete (*heap);
754 /* "Return" the new heap. */
755 *heap = new_heap;
758 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
759 it to trace after BB, mark OLD_BB visited and update pass' data structures
760 (TRACE is a number of trace which OLD_BB is duplicated to). */
762 static basic_block
763 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
765 basic_block new_bb;
767 new_bb = duplicate_block (old_bb, e, bb);
768 BB_COPY_PARTITION (new_bb, old_bb);
770 gcc_assert (e->dest == new_bb);
772 if (dump_file)
773 fprintf (dump_file,
774 "Duplicated bb %d (created bb %d)\n",
775 old_bb->index, new_bb->index);
777 if (new_bb->index >= array_size || last_basic_block > array_size)
779 int i;
780 int new_size;
782 new_size = MAX (last_basic_block, new_bb->index + 1);
783 new_size = GET_ARRAY_SIZE (new_size);
784 bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
785 for (i = array_size; i < new_size; i++)
787 bbd[i].start_of_trace = -1;
788 bbd[i].end_of_trace = -1;
789 bbd[i].in_trace = -1;
790 bbd[i].visited = 0;
791 bbd[i].heap = NULL;
792 bbd[i].node = NULL;
794 array_size = new_size;
796 if (dump_file)
798 fprintf (dump_file,
799 "Growing the dynamic array to %d elements.\n",
800 array_size);
804 gcc_assert (!bb_visited_trace (e->dest));
805 mark_bb_visited (new_bb, trace);
806 new_bb->aux = bb->aux;
807 bb->aux = new_bb;
809 bbd[new_bb->index].in_trace = trace;
811 return new_bb;
814 /* Compute and return the key (for the heap) of the basic block BB. */
816 static fibheapkey_t
817 bb_to_key (basic_block bb)
819 edge e;
820 edge_iterator ei;
821 int priority = 0;
823 /* Do not start in probably never executed blocks. */
825 if (BB_PARTITION (bb) == BB_COLD_PARTITION
826 || probably_never_executed_bb_p (bb))
827 return BB_FREQ_MAX;
829 /* Prefer blocks whose predecessor is an end of some trace
830 or whose predecessor edge is EDGE_DFS_BACK. */
831 FOR_EACH_EDGE (e, ei, bb->preds)
833 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
834 || (e->flags & EDGE_DFS_BACK))
836 int edge_freq = EDGE_FREQUENCY (e);
838 if (edge_freq > priority)
839 priority = edge_freq;
843 if (priority)
844 /* The block with priority should have significantly lower key. */
845 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
846 return -bb->frequency;
849 /* Return true when the edge E from basic block BB is better than the temporary
850 best edge (details are in function). The probability of edge E is PROB. The
851 frequency of the successor is FREQ. The current best probability is
852 BEST_PROB, the best frequency is BEST_FREQ.
853 The edge is considered to be equivalent when PROB does not differ much from
854 BEST_PROB; similarly for frequency. */
856 static bool
857 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq, int best_prob,
858 int best_freq, const_edge cur_best_edge)
860 bool is_better_edge;
862 /* The BEST_* values do not have to be best, but can be a bit smaller than
863 maximum values. */
864 int diff_prob = best_prob / 10;
865 int diff_freq = best_freq / 10;
867 if (prob > best_prob + diff_prob)
868 /* The edge has higher probability than the temporary best edge. */
869 is_better_edge = true;
870 else if (prob < best_prob - diff_prob)
871 /* The edge has lower probability than the temporary best edge. */
872 is_better_edge = false;
873 else if (freq < best_freq - diff_freq)
874 /* The edge and the temporary best edge have almost equivalent
875 probabilities. The higher frequency of a successor now means
876 that there is another edge going into that successor.
877 This successor has lower frequency so it is better. */
878 is_better_edge = true;
879 else if (freq > best_freq + diff_freq)
880 /* This successor has higher frequency so it is worse. */
881 is_better_edge = false;
882 else if (e->dest->prev_bb == bb)
883 /* The edges have equivalent probabilities and the successors
884 have equivalent frequencies. Select the previous successor. */
885 is_better_edge = true;
886 else
887 is_better_edge = false;
889 /* If we are doing hot/cold partitioning, make sure that we always favor
890 non-crossing edges over crossing edges. */
892 if (!is_better_edge
893 && flag_reorder_blocks_and_partition
894 && cur_best_edge
895 && (cur_best_edge->flags & EDGE_CROSSING)
896 && !(e->flags & EDGE_CROSSING))
897 is_better_edge = true;
899 return is_better_edge;
902 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
904 static void
905 connect_traces (int n_traces, struct trace *traces)
907 int i;
908 bool *connected;
909 bool two_passes;
910 int last_trace;
911 int current_pass;
912 int current_partition;
913 int freq_threshold;
914 gcov_type count_threshold;
916 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
917 if (max_entry_count < INT_MAX / 1000)
918 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
919 else
920 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
922 connected = XCNEWVEC (bool, n_traces);
923 last_trace = -1;
924 current_pass = 1;
925 current_partition = BB_PARTITION (traces[0].first);
926 two_passes = false;
928 if (flag_reorder_blocks_and_partition)
929 for (i = 0; i < n_traces && !two_passes; i++)
930 if (BB_PARTITION (traces[0].first)
931 != BB_PARTITION (traces[i].first))
932 two_passes = true;
934 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
936 int t = i;
937 int t2;
938 edge e, best;
939 int best_len;
941 if (i >= n_traces)
943 gcc_assert (two_passes && current_pass == 1);
944 i = 0;
945 t = i;
946 current_pass = 2;
947 if (current_partition == BB_HOT_PARTITION)
948 current_partition = BB_COLD_PARTITION;
949 else
950 current_partition = BB_HOT_PARTITION;
953 if (connected[t])
954 continue;
956 if (two_passes
957 && BB_PARTITION (traces[t].first) != current_partition)
958 continue;
960 connected[t] = true;
962 /* Find the predecessor traces. */
963 for (t2 = t; t2 > 0;)
965 edge_iterator ei;
966 best = NULL;
967 best_len = 0;
968 FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
970 int si = e->src->index;
972 if (e->src != ENTRY_BLOCK_PTR
973 && (e->flags & EDGE_CAN_FALLTHRU)
974 && !(e->flags & EDGE_COMPLEX)
975 && bbd[si].end_of_trace >= 0
976 && !connected[bbd[si].end_of_trace]
977 && (BB_PARTITION (e->src) == current_partition)
978 && (!best
979 || e->probability > best->probability
980 || (e->probability == best->probability
981 && traces[bbd[si].end_of_trace].length > best_len)))
983 best = e;
984 best_len = traces[bbd[si].end_of_trace].length;
987 if (best)
989 best->src->aux = best->dest;
990 t2 = bbd[best->src->index].end_of_trace;
991 connected[t2] = true;
993 if (dump_file)
995 fprintf (dump_file, "Connection: %d %d\n",
996 best->src->index, best->dest->index);
999 else
1000 break;
1003 if (last_trace >= 0)
1004 traces[last_trace].last->aux = traces[t2].first;
1005 last_trace = t;
1007 /* Find the successor traces. */
1008 while (1)
1010 /* Find the continuation of the chain. */
1011 edge_iterator ei;
1012 best = NULL;
1013 best_len = 0;
1014 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1016 int di = e->dest->index;
1018 if (e->dest != EXIT_BLOCK_PTR
1019 && (e->flags & EDGE_CAN_FALLTHRU)
1020 && !(e->flags & EDGE_COMPLEX)
1021 && bbd[di].start_of_trace >= 0
1022 && !connected[bbd[di].start_of_trace]
1023 && (BB_PARTITION (e->dest) == current_partition)
1024 && (!best
1025 || e->probability > best->probability
1026 || (e->probability == best->probability
1027 && traces[bbd[di].start_of_trace].length > best_len)))
1029 best = e;
1030 best_len = traces[bbd[di].start_of_trace].length;
1034 if (best)
1036 if (dump_file)
1038 fprintf (dump_file, "Connection: %d %d\n",
1039 best->src->index, best->dest->index);
1041 t = bbd[best->dest->index].start_of_trace;
1042 traces[last_trace].last->aux = traces[t].first;
1043 connected[t] = true;
1044 last_trace = t;
1046 else
1048 /* Try to connect the traces by duplication of 1 block. */
1049 edge e2;
1050 basic_block next_bb = NULL;
1051 bool try_copy = false;
1053 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1054 if (e->dest != EXIT_BLOCK_PTR
1055 && (e->flags & EDGE_CAN_FALLTHRU)
1056 && !(e->flags & EDGE_COMPLEX)
1057 && (!best || e->probability > best->probability))
1059 edge_iterator ei;
1060 edge best2 = NULL;
1061 int best2_len = 0;
1063 /* If the destination is a start of a trace which is only
1064 one block long, then no need to search the successor
1065 blocks of the trace. Accept it. */
1066 if (bbd[e->dest->index].start_of_trace >= 0
1067 && traces[bbd[e->dest->index].start_of_trace].length
1068 == 1)
1070 best = e;
1071 try_copy = true;
1072 continue;
1075 FOR_EACH_EDGE (e2, ei, e->dest->succs)
1077 int di = e2->dest->index;
1079 if (e2->dest == EXIT_BLOCK_PTR
1080 || ((e2->flags & EDGE_CAN_FALLTHRU)
1081 && !(e2->flags & EDGE_COMPLEX)
1082 && bbd[di].start_of_trace >= 0
1083 && !connected[bbd[di].start_of_trace]
1084 && (BB_PARTITION (e2->dest) == current_partition)
1085 && (EDGE_FREQUENCY (e2) >= freq_threshold)
1086 && (e2->count >= count_threshold)
1087 && (!best2
1088 || e2->probability > best2->probability
1089 || (e2->probability == best2->probability
1090 && traces[bbd[di].start_of_trace].length
1091 > best2_len))))
1093 best = e;
1094 best2 = e2;
1095 if (e2->dest != EXIT_BLOCK_PTR)
1096 best2_len = traces[bbd[di].start_of_trace].length;
1097 else
1098 best2_len = INT_MAX;
1099 next_bb = e2->dest;
1100 try_copy = true;
1105 if (flag_reorder_blocks_and_partition)
1106 try_copy = false;
1108 /* Copy tiny blocks always; copy larger blocks only when the
1109 edge is traversed frequently enough. */
1110 if (try_copy
1111 && copy_bb_p (best->dest,
1112 optimize_edge_for_speed_p (best)
1113 && EDGE_FREQUENCY (best) >= freq_threshold
1114 && best->count >= count_threshold))
1116 basic_block new_bb;
1118 if (dump_file)
1120 fprintf (dump_file, "Connection: %d %d ",
1121 traces[t].last->index, best->dest->index);
1122 if (!next_bb)
1123 fputc ('\n', dump_file);
1124 else if (next_bb == EXIT_BLOCK_PTR)
1125 fprintf (dump_file, "exit\n");
1126 else
1127 fprintf (dump_file, "%d\n", next_bb->index);
1130 new_bb = copy_bb (best->dest, best, traces[t].last, t);
1131 traces[t].last = new_bb;
1132 if (next_bb && next_bb != EXIT_BLOCK_PTR)
1134 t = bbd[next_bb->index].start_of_trace;
1135 traces[last_trace].last->aux = traces[t].first;
1136 connected[t] = true;
1137 last_trace = t;
1139 else
1140 break; /* Stop finding the successor traces. */
1142 else
1143 break; /* Stop finding the successor traces. */
1148 if (dump_file)
1150 basic_block bb;
1152 fprintf (dump_file, "Final order:\n");
1153 for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1154 fprintf (dump_file, "%d ", bb->index);
1155 fprintf (dump_file, "\n");
1156 fflush (dump_file);
1159 FREE (connected);
1162 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1163 when code size is allowed to grow by duplication. */
1165 static bool
1166 copy_bb_p (const_basic_block bb, int code_may_grow)
1168 int size = 0;
1169 int max_size = uncond_jump_length;
1170 rtx insn;
1172 if (!bb->frequency)
1173 return false;
1174 if (EDGE_COUNT (bb->preds) < 2)
1175 return false;
1176 if (!can_duplicate_block_p (bb))
1177 return false;
1179 /* Avoid duplicating blocks which have many successors (PR/13430). */
1180 if (EDGE_COUNT (bb->succs) > 8)
1181 return false;
1183 if (code_may_grow && optimize_bb_for_speed_p (bb))
1184 max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1186 FOR_BB_INSNS (bb, insn)
1188 if (INSN_P (insn))
1189 size += get_attr_min_length (insn);
1192 if (size <= max_size)
1193 return true;
1195 if (dump_file)
1197 fprintf (dump_file,
1198 "Block %d can't be copied because its size = %d.\n",
1199 bb->index, size);
1202 return false;
1205 /* Return the length of unconditional jump instruction. */
1208 get_uncond_jump_length (void)
1210 rtx label, jump;
1211 int length;
1213 label = emit_label_before (gen_label_rtx (), get_insns ());
1214 jump = emit_jump_insn (gen_jump (label));
1216 length = get_attr_min_length (jump);
1218 delete_insn (jump);
1219 delete_insn (label);
1220 return length;
1223 /* Emit a barrier into the footer of BB. */
1225 static void
1226 emit_barrier_after_bb (basic_block bb)
1228 rtx barrier = emit_barrier_after (BB_END (bb));
1229 BB_FOOTER (bb) = unlink_insn_chain (barrier, barrier);
1232 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1233 Duplicate the landing pad and split the edges so that no EH edge
1234 crosses partitions. */
1236 static void
1237 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1239 eh_landing_pad new_lp;
1240 basic_block new_bb, last_bb, post_bb;
1241 rtx new_label, jump, post_label;
1242 unsigned new_partition;
1243 edge_iterator ei;
1244 edge e;
1246 /* Generate the new landing-pad structure. */
1247 new_lp = gen_eh_landing_pad (old_lp->region);
1248 new_lp->post_landing_pad = old_lp->post_landing_pad;
1249 new_lp->landing_pad = gen_label_rtx ();
1250 LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1252 /* Put appropriate instructions in new bb. */
1253 new_label = emit_label (new_lp->landing_pad);
1255 expand_dw2_landing_pad_for_region (old_lp->region);
1257 post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1258 post_bb = single_succ (post_bb);
1259 post_label = block_label (post_bb);
1260 jump = emit_jump_insn (gen_jump (post_label));
1261 JUMP_LABEL (jump) = post_label;
1263 /* Create new basic block to be dest for lp. */
1264 last_bb = EXIT_BLOCK_PTR->prev_bb;
1265 new_bb = create_basic_block (new_label, jump, last_bb);
1266 new_bb->aux = last_bb->aux;
1267 last_bb->aux = new_bb;
1269 emit_barrier_after_bb (new_bb);
1271 make_edge (new_bb, post_bb, 0);
1273 /* Make sure new bb is in the other partition. */
1274 new_partition = BB_PARTITION (old_bb);
1275 new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1276 BB_SET_PARTITION (new_bb, new_partition);
1278 /* Fix up the edges. */
1279 for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1280 if (BB_PARTITION (e->src) == new_partition)
1282 rtx insn = BB_END (e->src);
1283 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1285 gcc_assert (note != NULL);
1286 gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1287 XEXP (note, 0) = GEN_INT (new_lp->index);
1289 /* Adjust the edge to the new destination. */
1290 redirect_edge_succ (e, new_bb);
1292 else
1293 ei_next (&ei);
1296 /* Find the basic blocks that are rarely executed and need to be moved to
1297 a separate section of the .o file (to cut down on paging and improve
1298 cache locality). Return a vector of all edges that cross. */
1300 static VEC(edge, heap) *
1301 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1303 VEC(edge, heap) *crossing_edges = NULL;
1304 basic_block bb;
1305 edge e;
1306 edge_iterator ei;
1308 /* Mark which partition (hot/cold) each basic block belongs in. */
1309 FOR_EACH_BB (bb)
1311 if (probably_never_executed_bb_p (bb))
1312 BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1313 else
1314 BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1317 /* The format of .gcc_except_table does not allow landing pads to
1318 be in a different partition as the throw. Fix this by either
1319 moving or duplicating the landing pads. */
1320 if (cfun->eh->lp_array)
1322 unsigned i;
1323 eh_landing_pad lp;
1325 FOR_EACH_VEC_ELT (eh_landing_pad, cfun->eh->lp_array, i, lp)
1327 bool all_same, all_diff;
1329 if (lp == NULL
1330 || lp->landing_pad == NULL_RTX
1331 || !LABEL_P (lp->landing_pad))
1332 continue;
1334 all_same = all_diff = true;
1335 bb = BLOCK_FOR_INSN (lp->landing_pad);
1336 FOR_EACH_EDGE (e, ei, bb->preds)
1338 gcc_assert (e->flags & EDGE_EH);
1339 if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1340 all_diff = false;
1341 else
1342 all_same = false;
1345 if (all_same)
1347 else if (all_diff)
1349 int which = BB_PARTITION (bb);
1350 which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1351 BB_SET_PARTITION (bb, which);
1353 else
1354 fix_up_crossing_landing_pad (lp, bb);
1358 /* Mark every edge that crosses between sections. */
1360 FOR_EACH_BB (bb)
1361 FOR_EACH_EDGE (e, ei, bb->succs)
1363 unsigned int flags = e->flags;
1365 /* We should never have EDGE_CROSSING set yet. */
1366 gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1368 if (e->src != ENTRY_BLOCK_PTR
1369 && e->dest != EXIT_BLOCK_PTR
1370 && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1372 VEC_safe_push (edge, heap, crossing_edges, e);
1373 flags |= EDGE_CROSSING;
1376 /* Now that we've split eh edges as appropriate, allow landing pads
1377 to be merged with the post-landing pads. */
1378 flags &= ~EDGE_PRESERVE;
1380 e->flags = flags;
1383 return crossing_edges;
1386 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru. */
1388 static void
1389 set_edge_can_fallthru_flag (void)
1391 basic_block bb;
1393 FOR_EACH_BB (bb)
1395 edge e;
1396 edge_iterator ei;
1398 FOR_EACH_EDGE (e, ei, bb->succs)
1400 e->flags &= ~EDGE_CAN_FALLTHRU;
1402 /* The FALLTHRU edge is also CAN_FALLTHRU edge. */
1403 if (e->flags & EDGE_FALLTHRU)
1404 e->flags |= EDGE_CAN_FALLTHRU;
1407 /* If the BB ends with an invertible condjump all (2) edges are
1408 CAN_FALLTHRU edges. */
1409 if (EDGE_COUNT (bb->succs) != 2)
1410 continue;
1411 if (!any_condjump_p (BB_END (bb)))
1412 continue;
1413 if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1414 continue;
1415 invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1416 EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1417 EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1421 /* If any destination of a crossing edge does not have a label, add label;
1422 Convert any easy fall-through crossing edges to unconditional jumps. */
1424 static void
1425 add_labels_and_missing_jumps (VEC(edge, heap) *crossing_edges)
1427 size_t i;
1428 edge e;
1430 FOR_EACH_VEC_ELT (edge, crossing_edges, i, e)
1432 basic_block src = e->src;
1433 basic_block dest = e->dest;
1434 rtx label, new_jump;
1436 if (dest == EXIT_BLOCK_PTR)
1437 continue;
1439 /* Make sure dest has a label. */
1440 label = block_label (dest);
1442 /* Nothing to do for non-fallthru edges. */
1443 if (src == ENTRY_BLOCK_PTR)
1444 continue;
1445 if ((e->flags & EDGE_FALLTHRU) == 0)
1446 continue;
1448 /* If the block does not end with a control flow insn, then we
1449 can trivially add a jump to the end to fixup the crossing.
1450 Otherwise the jump will have to go in a new bb, which will
1451 be handled by fix_up_fall_thru_edges function. */
1452 if (control_flow_insn_p (BB_END (src)))
1453 continue;
1455 /* Make sure there's only one successor. */
1456 gcc_assert (single_succ_p (src));
1458 new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1459 BB_END (src) = new_jump;
1460 JUMP_LABEL (new_jump) = label;
1461 LABEL_NUSES (label) += 1;
1463 emit_barrier_after_bb (src);
1465 /* Mark edge as non-fallthru. */
1466 e->flags &= ~EDGE_FALLTHRU;
1470 /* Find any bb's where the fall-through edge is a crossing edge (note that
1471 these bb's must also contain a conditional jump or end with a call
1472 instruction; we've already dealt with fall-through edges for blocks
1473 that didn't have a conditional jump or didn't end with call instruction
1474 in the call to add_labels_and_missing_jumps). Convert the fall-through
1475 edge to non-crossing edge by inserting a new bb to fall-through into.
1476 The new bb will contain an unconditional jump (crossing edge) to the
1477 original fall through destination. */
1479 static void
1480 fix_up_fall_thru_edges (void)
1482 basic_block cur_bb;
1483 basic_block new_bb;
1484 edge succ1;
1485 edge succ2;
1486 edge fall_thru;
1487 edge cond_jump = NULL;
1488 edge e;
1489 bool cond_jump_crosses;
1490 int invert_worked;
1491 rtx old_jump;
1492 rtx fall_thru_label;
1494 FOR_EACH_BB (cur_bb)
1496 fall_thru = NULL;
1497 if (EDGE_COUNT (cur_bb->succs) > 0)
1498 succ1 = EDGE_SUCC (cur_bb, 0);
1499 else
1500 succ1 = NULL;
1502 if (EDGE_COUNT (cur_bb->succs) > 1)
1503 succ2 = EDGE_SUCC (cur_bb, 1);
1504 else
1505 succ2 = NULL;
1507 /* Find the fall-through edge. */
1509 if (succ1
1510 && (succ1->flags & EDGE_FALLTHRU))
1512 fall_thru = succ1;
1513 cond_jump = succ2;
1515 else if (succ2
1516 && (succ2->flags & EDGE_FALLTHRU))
1518 fall_thru = succ2;
1519 cond_jump = succ1;
1521 else if (succ1
1522 && (block_ends_with_call_p (cur_bb)
1523 || can_throw_internal (BB_END (cur_bb))))
1525 edge e;
1526 edge_iterator ei;
1528 /* Find EDGE_CAN_FALLTHRU edge. */
1529 FOR_EACH_EDGE (e, ei, cur_bb->succs)
1530 if (e->flags & EDGE_CAN_FALLTHRU)
1532 fall_thru = e;
1533 break;
1537 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1539 /* Check to see if the fall-thru edge is a crossing edge. */
1541 if (fall_thru->flags & EDGE_CROSSING)
1543 /* The fall_thru edge crosses; now check the cond jump edge, if
1544 it exists. */
1546 cond_jump_crosses = true;
1547 invert_worked = 0;
1548 old_jump = BB_END (cur_bb);
1550 /* Find the jump instruction, if there is one. */
1552 if (cond_jump)
1554 if (!(cond_jump->flags & EDGE_CROSSING))
1555 cond_jump_crosses = false;
1557 /* We know the fall-thru edge crosses; if the cond
1558 jump edge does NOT cross, and its destination is the
1559 next block in the bb order, invert the jump
1560 (i.e. fix it so the fall through does not cross and
1561 the cond jump does). */
1563 if (!cond_jump_crosses
1564 && cur_bb->aux == cond_jump->dest)
1566 /* Find label in fall_thru block. We've already added
1567 any missing labels, so there must be one. */
1569 fall_thru_label = block_label (fall_thru->dest);
1571 if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1572 invert_worked = invert_jump (old_jump,
1573 fall_thru_label,0);
1574 if (invert_worked)
1576 fall_thru->flags &= ~EDGE_FALLTHRU;
1577 cond_jump->flags |= EDGE_FALLTHRU;
1578 update_br_prob_note (cur_bb);
1579 e = fall_thru;
1580 fall_thru = cond_jump;
1581 cond_jump = e;
1582 cond_jump->flags |= EDGE_CROSSING;
1583 fall_thru->flags &= ~EDGE_CROSSING;
1588 if (cond_jump_crosses || !invert_worked)
1590 /* This is the case where both edges out of the basic
1591 block are crossing edges. Here we will fix up the
1592 fall through edge. The jump edge will be taken care
1593 of later. The EDGE_CROSSING flag of fall_thru edge
1594 is unset before the call to force_nonfallthru
1595 function because if a new basic-block is created
1596 this edge remains in the current section boundary
1597 while the edge between new_bb and the fall_thru->dest
1598 becomes EDGE_CROSSING. */
1600 fall_thru->flags &= ~EDGE_CROSSING;
1601 new_bb = force_nonfallthru (fall_thru);
1603 if (new_bb)
1605 new_bb->aux = cur_bb->aux;
1606 cur_bb->aux = new_bb;
1608 /* Make sure new fall-through bb is in same
1609 partition as bb it's falling through from. */
1611 BB_COPY_PARTITION (new_bb, cur_bb);
1612 single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1614 else
1616 /* If a new basic-block was not created; restore
1617 the EDGE_CROSSING flag. */
1618 fall_thru->flags |= EDGE_CROSSING;
1621 /* Add barrier after new jump */
1622 emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1629 /* This function checks the destination block of a "crossing jump" to
1630 see if it has any crossing predecessors that begin with a code label
1631 and end with an unconditional jump. If so, it returns that predecessor
1632 block. (This is to avoid creating lots of new basic blocks that all
1633 contain unconditional jumps to the same destination). */
1635 static basic_block
1636 find_jump_block (basic_block jump_dest)
1638 basic_block source_bb = NULL;
1639 edge e;
1640 rtx insn;
1641 edge_iterator ei;
1643 FOR_EACH_EDGE (e, ei, jump_dest->preds)
1644 if (e->flags & EDGE_CROSSING)
1646 basic_block src = e->src;
1648 /* Check each predecessor to see if it has a label, and contains
1649 only one executable instruction, which is an unconditional jump.
1650 If so, we can use it. */
1652 if (LABEL_P (BB_HEAD (src)))
1653 for (insn = BB_HEAD (src);
1654 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1655 insn = NEXT_INSN (insn))
1657 if (INSN_P (insn)
1658 && insn == BB_END (src)
1659 && JUMP_P (insn)
1660 && !any_condjump_p (insn))
1662 source_bb = src;
1663 break;
1667 if (source_bb)
1668 break;
1671 return source_bb;
1674 /* Find all BB's with conditional jumps that are crossing edges;
1675 insert a new bb and make the conditional jump branch to the new
1676 bb instead (make the new bb same color so conditional branch won't
1677 be a 'crossing' edge). Insert an unconditional jump from the
1678 new bb to the original destination of the conditional jump. */
1680 static void
1681 fix_crossing_conditional_branches (void)
1683 basic_block cur_bb;
1684 basic_block new_bb;
1685 basic_block dest;
1686 edge succ1;
1687 edge succ2;
1688 edge crossing_edge;
1689 edge new_edge;
1690 rtx old_jump;
1691 rtx set_src;
1692 rtx old_label = NULL_RTX;
1693 rtx new_label;
1695 FOR_EACH_BB (cur_bb)
1697 crossing_edge = NULL;
1698 if (EDGE_COUNT (cur_bb->succs) > 0)
1699 succ1 = EDGE_SUCC (cur_bb, 0);
1700 else
1701 succ1 = NULL;
1703 if (EDGE_COUNT (cur_bb->succs) > 1)
1704 succ2 = EDGE_SUCC (cur_bb, 1);
1705 else
1706 succ2 = NULL;
1708 /* We already took care of fall-through edges, so only one successor
1709 can be a crossing edge. */
1711 if (succ1 && (succ1->flags & EDGE_CROSSING))
1712 crossing_edge = succ1;
1713 else if (succ2 && (succ2->flags & EDGE_CROSSING))
1714 crossing_edge = succ2;
1716 if (crossing_edge)
1718 old_jump = BB_END (cur_bb);
1720 /* Check to make sure the jump instruction is a
1721 conditional jump. */
1723 set_src = NULL_RTX;
1725 if (any_condjump_p (old_jump))
1727 if (GET_CODE (PATTERN (old_jump)) == SET)
1728 set_src = SET_SRC (PATTERN (old_jump));
1729 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1731 set_src = XVECEXP (PATTERN (old_jump), 0,0);
1732 if (GET_CODE (set_src) == SET)
1733 set_src = SET_SRC (set_src);
1734 else
1735 set_src = NULL_RTX;
1739 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1741 if (GET_CODE (XEXP (set_src, 1)) == PC)
1742 old_label = XEXP (set_src, 2);
1743 else if (GET_CODE (XEXP (set_src, 2)) == PC)
1744 old_label = XEXP (set_src, 1);
1746 /* Check to see if new bb for jumping to that dest has
1747 already been created; if so, use it; if not, create
1748 a new one. */
1750 new_bb = find_jump_block (crossing_edge->dest);
1752 if (new_bb)
1753 new_label = block_label (new_bb);
1754 else
1756 basic_block last_bb;
1757 rtx new_jump;
1759 /* Create new basic block to be dest for
1760 conditional jump. */
1762 /* Put appropriate instructions in new bb. */
1764 new_label = gen_label_rtx ();
1765 emit_label (new_label);
1767 gcc_assert (GET_CODE (old_label) == LABEL_REF);
1768 old_label = JUMP_LABEL (old_jump);
1769 new_jump = emit_jump_insn (gen_jump (old_label));
1770 JUMP_LABEL (new_jump) = old_label;
1772 last_bb = EXIT_BLOCK_PTR->prev_bb;
1773 new_bb = create_basic_block (new_label, new_jump, last_bb);
1774 new_bb->aux = last_bb->aux;
1775 last_bb->aux = new_bb;
1777 emit_barrier_after_bb (new_bb);
1779 /* Make sure new bb is in same partition as source
1780 of conditional branch. */
1781 BB_COPY_PARTITION (new_bb, cur_bb);
1784 /* Make old jump branch to new bb. */
1786 redirect_jump (old_jump, new_label, 0);
1788 /* Remove crossing_edge as predecessor of 'dest'. */
1790 dest = crossing_edge->dest;
1792 redirect_edge_succ (crossing_edge, new_bb);
1794 /* Make a new edge from new_bb to old dest; new edge
1795 will be a successor for new_bb and a predecessor
1796 for 'dest'. */
1798 if (EDGE_COUNT (new_bb->succs) == 0)
1799 new_edge = make_edge (new_bb, dest, 0);
1800 else
1801 new_edge = EDGE_SUCC (new_bb, 0);
1803 crossing_edge->flags &= ~EDGE_CROSSING;
1804 new_edge->flags |= EDGE_CROSSING;
1810 /* Find any unconditional branches that cross between hot and cold
1811 sections. Convert them into indirect jumps instead. */
1813 static void
1814 fix_crossing_unconditional_branches (void)
1816 basic_block cur_bb;
1817 rtx last_insn;
1818 rtx label;
1819 rtx label_addr;
1820 rtx indirect_jump_sequence;
1821 rtx jump_insn = NULL_RTX;
1822 rtx new_reg;
1823 rtx cur_insn;
1824 edge succ;
1826 FOR_EACH_BB (cur_bb)
1828 last_insn = BB_END (cur_bb);
1830 if (EDGE_COUNT (cur_bb->succs) < 1)
1831 continue;
1833 succ = EDGE_SUCC (cur_bb, 0);
1835 /* Check to see if bb ends in a crossing (unconditional) jump. At
1836 this point, no crossing jumps should be conditional. */
1838 if (JUMP_P (last_insn)
1839 && (succ->flags & EDGE_CROSSING))
1841 rtx label2, table;
1843 gcc_assert (!any_condjump_p (last_insn));
1845 /* Make sure the jump is not already an indirect or table jump. */
1847 if (!computed_jump_p (last_insn)
1848 && !tablejump_p (last_insn, &label2, &table))
1850 /* We have found a "crossing" unconditional branch. Now
1851 we must convert it to an indirect jump. First create
1852 reference of label, as target for jump. */
1854 label = JUMP_LABEL (last_insn);
1855 label_addr = gen_rtx_LABEL_REF (Pmode, label);
1856 LABEL_NUSES (label) += 1;
1858 /* Get a register to use for the indirect jump. */
1860 new_reg = gen_reg_rtx (Pmode);
1862 /* Generate indirect the jump sequence. */
1864 start_sequence ();
1865 emit_move_insn (new_reg, label_addr);
1866 emit_indirect_jump (new_reg);
1867 indirect_jump_sequence = get_insns ();
1868 end_sequence ();
1870 /* Make sure every instruction in the new jump sequence has
1871 its basic block set to be cur_bb. */
1873 for (cur_insn = indirect_jump_sequence; cur_insn;
1874 cur_insn = NEXT_INSN (cur_insn))
1876 if (!BARRIER_P (cur_insn))
1877 BLOCK_FOR_INSN (cur_insn) = cur_bb;
1878 if (JUMP_P (cur_insn))
1879 jump_insn = cur_insn;
1882 /* Insert the new (indirect) jump sequence immediately before
1883 the unconditional jump, then delete the unconditional jump. */
1885 emit_insn_before (indirect_jump_sequence, last_insn);
1886 delete_insn (last_insn);
1888 /* Make BB_END for cur_bb be the jump instruction (NOT the
1889 barrier instruction at the end of the sequence...). */
1891 BB_END (cur_bb) = jump_insn;
1897 /* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1899 static void
1900 add_reg_crossing_jump_notes (void)
1902 basic_block bb;
1903 edge e;
1904 edge_iterator ei;
1906 FOR_EACH_BB (bb)
1907 FOR_EACH_EDGE (e, ei, bb->succs)
1908 if ((e->flags & EDGE_CROSSING)
1909 && JUMP_P (BB_END (e->src)))
1910 add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
1913 /* Verify, in the basic block chain, that there is at most one switch
1914 between hot/cold partitions. This is modelled on
1915 rtl_verify_flow_info_1, but it cannot go inside that function
1916 because this condition will not be true until after
1917 reorder_basic_blocks is called. */
1919 static void
1920 verify_hot_cold_block_grouping (void)
1922 basic_block bb;
1923 int err = 0;
1924 bool switched_sections = false;
1925 int current_partition = 0;
1927 FOR_EACH_BB (bb)
1929 if (!current_partition)
1930 current_partition = BB_PARTITION (bb);
1931 if (BB_PARTITION (bb) != current_partition)
1933 if (switched_sections)
1935 error ("multiple hot/cold transitions found (bb %i)",
1936 bb->index);
1937 err = 1;
1939 else
1941 switched_sections = true;
1942 current_partition = BB_PARTITION (bb);
1947 gcc_assert(!err);
1950 /* Reorder basic blocks. The main entry point to this file. FLAGS is
1951 the set of flags to pass to cfg_layout_initialize(). */
1953 static void
1954 reorder_basic_blocks (void)
1956 int n_traces;
1957 int i;
1958 struct trace *traces;
1960 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
1962 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1963 return;
1965 set_edge_can_fallthru_flag ();
1966 mark_dfs_back_edges ();
1968 /* We are estimating the length of uncond jump insn only once since the code
1969 for getting the insn length always returns the minimal length now. */
1970 if (uncond_jump_length == 0)
1971 uncond_jump_length = get_uncond_jump_length ();
1973 /* We need to know some information for each basic block. */
1974 array_size = GET_ARRAY_SIZE (last_basic_block);
1975 bbd = XNEWVEC (bbro_basic_block_data, array_size);
1976 for (i = 0; i < array_size; i++)
1978 bbd[i].start_of_trace = -1;
1979 bbd[i].end_of_trace = -1;
1980 bbd[i].in_trace = -1;
1981 bbd[i].visited = 0;
1982 bbd[i].heap = NULL;
1983 bbd[i].node = NULL;
1986 traces = XNEWVEC (struct trace, n_basic_blocks);
1987 n_traces = 0;
1988 find_traces (&n_traces, traces);
1989 connect_traces (n_traces, traces);
1990 FREE (traces);
1991 FREE (bbd);
1993 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1995 if (dump_file)
1997 if (dump_flags & TDF_DETAILS)
1998 dump_reg_info (dump_file);
1999 dump_flow_info (dump_file, dump_flags);
2002 if (flag_reorder_blocks_and_partition)
2003 verify_hot_cold_block_grouping ();
2006 /* Determine which partition the first basic block in the function
2007 belongs to, then find the first basic block in the current function
2008 that belongs to a different section, and insert a
2009 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2010 instruction stream. When writing out the assembly code,
2011 encountering this note will make the compiler switch between the
2012 hot and cold text sections. */
2014 static void
2015 insert_section_boundary_note (void)
2017 basic_block bb;
2018 rtx new_note;
2019 int first_partition = 0;
2021 if (!flag_reorder_blocks_and_partition)
2022 return;
2024 FOR_EACH_BB (bb)
2026 if (!first_partition)
2027 first_partition = BB_PARTITION (bb);
2028 if (BB_PARTITION (bb) != first_partition)
2030 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
2031 BB_HEAD (bb));
2032 /* ??? This kind of note always lives between basic blocks,
2033 but add_insn_before will set BLOCK_FOR_INSN anyway. */
2034 BLOCK_FOR_INSN (new_note) = NULL;
2035 break;
2040 /* Duplicate the blocks containing computed gotos. This basically unfactors
2041 computed gotos that were factored early on in the compilation process to
2042 speed up edge based data flow. We used to not unfactoring them again,
2043 which can seriously pessimize code with many computed jumps in the source
2044 code, such as interpreters. See e.g. PR15242. */
2046 static bool
2047 gate_duplicate_computed_gotos (void)
2049 if (targetm.cannot_modify_jumps_p ())
2050 return false;
2051 return (optimize > 0
2052 && flag_expensive_optimizations
2053 && ! optimize_function_for_size_p (cfun));
2057 static unsigned int
2058 duplicate_computed_gotos (void)
2060 basic_block bb, new_bb;
2061 bitmap candidates;
2062 int max_size;
2064 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2065 return 0;
2067 clear_bb_flags ();
2068 cfg_layout_initialize (0);
2070 /* We are estimating the length of uncond jump insn only once
2071 since the code for getting the insn length always returns
2072 the minimal length now. */
2073 if (uncond_jump_length == 0)
2074 uncond_jump_length = get_uncond_jump_length ();
2076 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2077 candidates = BITMAP_ALLOC (NULL);
2079 /* Look for blocks that end in a computed jump, and see if such blocks
2080 are suitable for unfactoring. If a block is a candidate for unfactoring,
2081 mark it in the candidates. */
2082 FOR_EACH_BB (bb)
2084 rtx insn;
2085 edge e;
2086 edge_iterator ei;
2087 int size, all_flags;
2089 /* Build the reorder chain for the original order of blocks. */
2090 if (bb->next_bb != EXIT_BLOCK_PTR)
2091 bb->aux = bb->next_bb;
2093 /* Obviously the block has to end in a computed jump. */
2094 if (!computed_jump_p (BB_END (bb)))
2095 continue;
2097 /* Only consider blocks that can be duplicated. */
2098 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2099 || !can_duplicate_block_p (bb))
2100 continue;
2102 /* Make sure that the block is small enough. */
2103 size = 0;
2104 FOR_BB_INSNS (bb, insn)
2105 if (INSN_P (insn))
2107 size += get_attr_min_length (insn);
2108 if (size > max_size)
2109 break;
2111 if (size > max_size)
2112 continue;
2114 /* Final check: there must not be any incoming abnormal edges. */
2115 all_flags = 0;
2116 FOR_EACH_EDGE (e, ei, bb->preds)
2117 all_flags |= e->flags;
2118 if (all_flags & EDGE_COMPLEX)
2119 continue;
2121 bitmap_set_bit (candidates, bb->index);
2124 /* Nothing to do if there is no computed jump here. */
2125 if (bitmap_empty_p (candidates))
2126 goto done;
2128 /* Duplicate computed gotos. */
2129 FOR_EACH_BB (bb)
2131 if (bb->flags & BB_VISITED)
2132 continue;
2134 bb->flags |= BB_VISITED;
2136 /* BB must have one outgoing edge. That edge must not lead to
2137 the exit block or the next block.
2138 The destination must have more than one predecessor. */
2139 if (!single_succ_p (bb)
2140 || single_succ (bb) == EXIT_BLOCK_PTR
2141 || single_succ (bb) == bb->next_bb
2142 || single_pred_p (single_succ (bb)))
2143 continue;
2145 /* The successor block has to be a duplication candidate. */
2146 if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2147 continue;
2149 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2150 new_bb->aux = bb->aux;
2151 bb->aux = new_bb;
2152 new_bb->flags |= BB_VISITED;
2155 done:
2156 cfg_layout_finalize ();
2158 BITMAP_FREE (candidates);
2159 return 0;
2162 struct rtl_opt_pass pass_duplicate_computed_gotos =
2165 RTL_PASS,
2166 "compgotos", /* name */
2167 gate_duplicate_computed_gotos, /* gate */
2168 duplicate_computed_gotos, /* execute */
2169 NULL, /* sub */
2170 NULL, /* next */
2171 0, /* static_pass_number */
2172 TV_REORDER_BLOCKS, /* tv_id */
2173 0, /* properties_required */
2174 0, /* properties_provided */
2175 0, /* properties_destroyed */
2176 0, /* todo_flags_start */
2177 TODO_verify_rtl_sharing,/* todo_flags_finish */
2182 /* This function is the main 'entrance' for the optimization that
2183 partitions hot and cold basic blocks into separate sections of the
2184 .o file (to improve performance and cache locality). Ideally it
2185 would be called after all optimizations that rearrange the CFG have
2186 been called. However part of this optimization may introduce new
2187 register usage, so it must be called before register allocation has
2188 occurred. This means that this optimization is actually called
2189 well before the optimization that reorders basic blocks (see
2190 function above).
2192 This optimization checks the feedback information to determine
2193 which basic blocks are hot/cold, updates flags on the basic blocks
2194 to indicate which section they belong in. This information is
2195 later used for writing out sections in the .o file. Because hot
2196 and cold sections can be arbitrarily large (within the bounds of
2197 memory), far beyond the size of a single function, it is necessary
2198 to fix up all edges that cross section boundaries, to make sure the
2199 instructions used can actually span the required distance. The
2200 fixes are described below.
2202 Fall-through edges must be changed into jumps; it is not safe or
2203 legal to fall through across a section boundary. Whenever a
2204 fall-through edge crossing a section boundary is encountered, a new
2205 basic block is inserted (in the same section as the fall-through
2206 source), and the fall through edge is redirected to the new basic
2207 block. The new basic block contains an unconditional jump to the
2208 original fall-through target. (If the unconditional jump is
2209 insufficient to cross section boundaries, that is dealt with a
2210 little later, see below).
2212 In order to deal with architectures that have short conditional
2213 branches (which cannot span all of memory) we take any conditional
2214 jump that attempts to cross a section boundary and add a level of
2215 indirection: it becomes a conditional jump to a new basic block, in
2216 the same section. The new basic block contains an unconditional
2217 jump to the original target, in the other section.
2219 For those architectures whose unconditional branch is also
2220 incapable of reaching all of memory, those unconditional jumps are
2221 converted into indirect jumps, through a register.
2223 IMPORTANT NOTE: This optimization causes some messy interactions
2224 with the cfg cleanup optimizations; those optimizations want to
2225 merge blocks wherever possible, and to collapse indirect jump
2226 sequences (change "A jumps to B jumps to C" directly into "A jumps
2227 to C"). Those optimizations can undo the jump fixes that
2228 partitioning is required to make (see above), in order to ensure
2229 that jumps attempting to cross section boundaries are really able
2230 to cover whatever distance the jump requires (on many architectures
2231 conditional or unconditional jumps are not able to reach all of
2232 memory). Therefore tests have to be inserted into each such
2233 optimization to make sure that it does not undo stuff necessary to
2234 cross partition boundaries. This would be much less of a problem
2235 if we could perform this optimization later in the compilation, but
2236 unfortunately the fact that we may need to create indirect jumps
2237 (through registers) requires that this optimization be performed
2238 before register allocation.
2240 Hot and cold basic blocks are partitioned and put in separate
2241 sections of the .o file, to reduce paging and improve cache
2242 performance (hopefully). This can result in bits of code from the
2243 same function being widely separated in the .o file. However this
2244 is not obvious to the current bb structure. Therefore we must take
2245 care to ensure that: 1). There are no fall_thru edges that cross
2246 between sections; 2). For those architectures which have "short"
2247 conditional branches, all conditional branches that attempt to
2248 cross between sections are converted to unconditional branches;
2249 and, 3). For those architectures which have "short" unconditional
2250 branches, all unconditional branches that attempt to cross between
2251 sections are converted to indirect jumps.
2253 The code for fixing up fall_thru edges that cross between hot and
2254 cold basic blocks does so by creating new basic blocks containing
2255 unconditional branches to the appropriate label in the "other"
2256 section. The new basic block is then put in the same (hot or cold)
2257 section as the original conditional branch, and the fall_thru edge
2258 is modified to fall into the new basic block instead. By adding
2259 this level of indirection we end up with only unconditional branches
2260 crossing between hot and cold sections.
2262 Conditional branches are dealt with by adding a level of indirection.
2263 A new basic block is added in the same (hot/cold) section as the
2264 conditional branch, and the conditional branch is retargeted to the
2265 new basic block. The new basic block contains an unconditional branch
2266 to the original target of the conditional branch (in the other section).
2268 Unconditional branches are dealt with by converting them into
2269 indirect jumps. */
2271 static unsigned
2272 partition_hot_cold_basic_blocks (void)
2274 VEC(edge, heap) *crossing_edges;
2276 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2277 return 0;
2279 df_set_flags (DF_DEFER_INSN_RESCAN);
2281 crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2282 if (crossing_edges == NULL)
2283 return 0;
2285 /* Make sure the source of any crossing edge ends in a jump and the
2286 destination of any crossing edge has a label. */
2287 add_labels_and_missing_jumps (crossing_edges);
2289 /* Convert all crossing fall_thru edges to non-crossing fall
2290 thrus to unconditional jumps (that jump to the original fall
2291 through dest). */
2292 fix_up_fall_thru_edges ();
2294 /* If the architecture does not have conditional branches that can
2295 span all of memory, convert crossing conditional branches into
2296 crossing unconditional branches. */
2297 if (!HAS_LONG_COND_BRANCH)
2298 fix_crossing_conditional_branches ();
2300 /* If the architecture does not have unconditional branches that
2301 can span all of memory, convert crossing unconditional branches
2302 into indirect jumps. Since adding an indirect jump also adds
2303 a new register usage, update the register usage information as
2304 well. */
2305 if (!HAS_LONG_UNCOND_BRANCH)
2306 fix_crossing_unconditional_branches ();
2308 add_reg_crossing_jump_notes ();
2310 /* Clear bb->aux fields that the above routines were using. */
2311 clear_aux_for_blocks ();
2313 VEC_free (edge, heap, crossing_edges);
2315 /* ??? FIXME: DF generates the bb info for a block immediately.
2316 And by immediately, I mean *during* creation of the block.
2318 #0 df_bb_refs_collect
2319 #1 in df_bb_refs_record
2320 #2 in create_basic_block_structure
2322 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2323 will *always* fail, because no edges can have been added to the
2324 block yet. Which of course means we don't add the right
2325 artificial refs, which means we fail df_verify (much) later.
2327 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2328 that we also shouldn't grab data from the new blocks those new
2329 insns are in either. In this way one can create the block, link
2330 it up properly, and have everything Just Work later, when deferred
2331 insns are processed.
2333 In the meantime, we have no other option but to throw away all
2334 of the DF data and recompute it all. */
2335 if (cfun->eh->lp_array)
2337 df_finish_pass (true);
2338 df_scan_alloc (NULL);
2339 df_scan_blocks ();
2340 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2341 data. We blindly generated all of them when creating the new
2342 landing pad. Delete those assignments we don't use. */
2343 df_set_flags (DF_LR_RUN_DCE);
2344 df_analyze ();
2347 return TODO_verify_flow | TODO_verify_rtl_sharing;
2350 static bool
2351 gate_handle_reorder_blocks (void)
2353 if (targetm.cannot_modify_jumps_p ())
2354 return false;
2355 /* Don't reorder blocks when optimizing for size because extra jump insns may
2356 be created; also barrier may create extra padding.
2358 More correctly we should have a block reordering mode that tried to
2359 minimize the combined size of all the jumps. This would more or less
2360 automatically remove extra jumps, but would also try to use more short
2361 jumps instead of long jumps. */
2362 if (!optimize_function_for_speed_p (cfun))
2363 return false;
2364 return (optimize > 0
2365 && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2369 /* Reorder basic blocks. */
2370 static unsigned int
2371 rest_of_handle_reorder_blocks (void)
2373 basic_block bb;
2375 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2376 splitting possibly introduced more crossjumping opportunities. */
2377 cfg_layout_initialize (CLEANUP_EXPENSIVE);
2379 reorder_basic_blocks ();
2380 cleanup_cfg (CLEANUP_EXPENSIVE);
2382 FOR_EACH_BB (bb)
2383 if (bb->next_bb != EXIT_BLOCK_PTR)
2384 bb->aux = bb->next_bb;
2385 cfg_layout_finalize ();
2387 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */
2388 insert_section_boundary_note ();
2389 return 0;
2392 struct rtl_opt_pass pass_reorder_blocks =
2395 RTL_PASS,
2396 "bbro", /* name */
2397 gate_handle_reorder_blocks, /* gate */
2398 rest_of_handle_reorder_blocks, /* execute */
2399 NULL, /* sub */
2400 NULL, /* next */
2401 0, /* static_pass_number */
2402 TV_REORDER_BLOCKS, /* tv_id */
2403 0, /* properties_required */
2404 0, /* properties_provided */
2405 0, /* properties_destroyed */
2406 0, /* todo_flags_start */
2407 TODO_verify_rtl_sharing, /* todo_flags_finish */
2411 static bool
2412 gate_handle_partition_blocks (void)
2414 /* The optimization to partition hot/cold basic blocks into separate
2415 sections of the .o file does not work well with linkonce or with
2416 user defined section attributes. Don't call it if either case
2417 arises. */
2418 return (flag_reorder_blocks_and_partition
2419 && optimize
2420 /* See gate_handle_reorder_blocks. We should not partition if
2421 we are going to omit the reordering. */
2422 && optimize_function_for_speed_p (cfun)
2423 && !DECL_ONE_ONLY (current_function_decl)
2424 && !user_defined_section_attribute);
2427 struct rtl_opt_pass pass_partition_blocks =
2430 RTL_PASS,
2431 "bbpart", /* name */
2432 gate_handle_partition_blocks, /* gate */
2433 partition_hot_cold_basic_blocks, /* execute */
2434 NULL, /* sub */
2435 NULL, /* next */
2436 0, /* static_pass_number */
2437 TV_REORDER_BLOCKS, /* tv_id */
2438 PROP_cfglayout, /* properties_required */
2439 0, /* properties_provided */
2440 0, /* properties_destroyed */
2441 0, /* todo_flags_start */
2442 0 /* todo_flags_finish */