objc/
[official-gcc.git] / gcc / bb-reorder.c
blobe0865ac61dcb4ed7cd6aa652fc2ae77ec8949935
1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
60 References:
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "rtl.h"
73 #include "regs.h"
74 #include "flags.h"
75 #include "timevar.h"
76 #include "output.h"
77 #include "cfglayout.h"
78 #include "fibheap.h"
79 #include "target.h"
80 #include "function.h"
81 #include "tm_p.h"
82 #include "obstack.h"
83 #include "expr.h"
84 #include "params.h"
85 #include "toplev.h"
87 /* The number of rounds. In most cases there will only be 4 rounds, but
88 when partitioning hot and cold basic blocks into separate sections of
89 the .o file there will be an extra round.*/
90 #define N_ROUNDS 5
92 /* Stubs in case we don't have a return insn.
93 We have to check at runtime too, not only compiletime. */
95 #ifndef HAVE_return
96 #define HAVE_return 0
97 #define gen_return() NULL_RTX
98 #endif
101 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
102 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
104 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
105 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
107 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
108 block the edge destination is not duplicated while connecting traces. */
109 #define DUPLICATION_THRESHOLD 100
111 /* Length of unconditional jump instruction. */
112 static int uncond_jump_length;
114 /* Structure to hold needed information for each basic block. */
115 typedef struct bbro_basic_block_data_def
117 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
118 int start_of_trace;
120 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
121 int end_of_trace;
123 /* Which trace is the bb in? */
124 int in_trace;
126 /* Which heap is BB in (if any)? */
127 fibheap_t heap;
129 /* Which heap node is BB in (if any)? */
130 fibnode_t node;
131 } bbro_basic_block_data;
133 /* The current size of the following dynamic array. */
134 static int array_size;
136 /* The array which holds needed information for basic blocks. */
137 static bbro_basic_block_data *bbd;
139 /* To avoid frequent reallocation the size of arrays is greater than needed,
140 the number of elements is (not less than) 1.25 * size_wanted. */
141 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
143 /* Free the memory and set the pointer to NULL. */
144 #define FREE(P) (gcc_assert (P), free (P), P = 0)
146 /* Structure for holding information about a trace. */
147 struct trace
149 /* First and last basic block of the trace. */
150 basic_block first, last;
152 /* The round of the STC creation which this trace was found in. */
153 int round;
155 /* The length (i.e. the number of basic blocks) of the trace. */
156 int length;
159 /* Maximum frequency and count of one of the entry blocks. */
160 static int max_entry_frequency;
161 static gcov_type max_entry_count;
163 /* Local function prototypes. */
164 static void find_traces (int *, struct trace *);
165 static basic_block rotate_loop (edge, struct trace *, int);
166 static void mark_bb_visited (basic_block, int);
167 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
168 int, fibheap_t *, int);
169 static basic_block copy_bb (basic_block, edge, basic_block, int);
170 static fibheapkey_t bb_to_key (basic_block);
171 static bool better_edge_p (basic_block, edge, int, int, int, int, edge);
172 static void connect_traces (int, struct trace *);
173 static bool copy_bb_p (basic_block, int);
174 static int get_uncond_jump_length (void);
175 static bool push_to_next_round_p (basic_block, int, int, int, gcov_type);
176 static void find_rarely_executed_basic_blocks_and_crossing_edges (edge *,
177 int *,
178 int *);
179 static void add_labels_and_missing_jumps (edge *, int);
180 static void add_reg_crossing_jump_notes (void);
181 static void fix_up_fall_thru_edges (void);
182 static void fix_edges_for_rarely_executed_code (edge *, int);
183 static void fix_crossing_conditional_branches (void);
184 static void fix_crossing_unconditional_branches (void);
186 /* Check to see if bb should be pushed into the next round of trace
187 collections or not. Reasons for pushing the block forward are 1).
188 If the block is cold, we are doing partitioning, and there will be
189 another round (cold partition blocks are not supposed to be
190 collected into traces until the very last round); or 2). There will
191 be another round, and the basic block is not "hot enough" for the
192 current round of trace collection. */
194 static bool
195 push_to_next_round_p (basic_block bb, int round, int number_of_rounds,
196 int exec_th, gcov_type count_th)
198 bool there_exists_another_round;
199 bool block_not_hot_enough;
201 there_exists_another_round = round < number_of_rounds - 1;
203 block_not_hot_enough = (bb->frequency < exec_th
204 || bb->count < count_th
205 || probably_never_executed_bb_p (bb));
207 if (there_exists_another_round
208 && block_not_hot_enough)
209 return true;
210 else
211 return false;
214 /* Find the traces for Software Trace Cache. Chain each trace through
215 RBI()->next. Store the number of traces to N_TRACES and description of
216 traces to TRACES. */
218 static void
219 find_traces (int *n_traces, struct trace *traces)
221 int i;
222 int number_of_rounds;
223 edge e;
224 edge_iterator ei;
225 fibheap_t heap;
227 /* Add one extra round of trace collection when partitioning hot/cold
228 basic blocks into separate sections. The last round is for all the
229 cold blocks (and ONLY the cold blocks). */
231 number_of_rounds = N_ROUNDS - 1;
233 /* Insert entry points of function into heap. */
234 heap = fibheap_new ();
235 max_entry_frequency = 0;
236 max_entry_count = 0;
237 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
239 bbd[e->dest->index].heap = heap;
240 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
241 e->dest);
242 if (e->dest->frequency > max_entry_frequency)
243 max_entry_frequency = e->dest->frequency;
244 if (e->dest->count > max_entry_count)
245 max_entry_count = e->dest->count;
248 /* Find the traces. */
249 for (i = 0; i < number_of_rounds; i++)
251 gcov_type count_threshold;
253 if (dump_file)
254 fprintf (dump_file, "STC - round %d\n", i + 1);
256 if (max_entry_count < INT_MAX / 1000)
257 count_threshold = max_entry_count * exec_threshold[i] / 1000;
258 else
259 count_threshold = max_entry_count / 1000 * exec_threshold[i];
261 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
262 max_entry_frequency * exec_threshold[i] / 1000,
263 count_threshold, traces, n_traces, i, &heap,
264 number_of_rounds);
266 fibheap_delete (heap);
268 if (dump_file)
270 for (i = 0; i < *n_traces; i++)
272 basic_block bb;
273 fprintf (dump_file, "Trace %d (round %d): ", i + 1,
274 traces[i].round + 1);
275 for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux)
276 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
277 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
279 fflush (dump_file);
283 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
284 (with sequential number TRACE_N). */
286 static basic_block
287 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
289 basic_block bb;
291 /* Information about the best end (end after rotation) of the loop. */
292 basic_block best_bb = NULL;
293 edge best_edge = NULL;
294 int best_freq = -1;
295 gcov_type best_count = -1;
296 /* The best edge is preferred when its destination is not visited yet
297 or is a start block of some trace. */
298 bool is_preferred = false;
300 /* Find the most frequent edge that goes out from current trace. */
301 bb = back_edge->dest;
304 edge e;
305 edge_iterator ei;
307 FOR_EACH_EDGE (e, ei, bb->succs)
308 if (e->dest != EXIT_BLOCK_PTR
309 && e->dest->il.rtl->visited != trace_n
310 && (e->flags & EDGE_CAN_FALLTHRU)
311 && !(e->flags & EDGE_COMPLEX))
313 if (is_preferred)
315 /* The best edge is preferred. */
316 if (!e->dest->il.rtl->visited
317 || bbd[e->dest->index].start_of_trace >= 0)
319 /* The current edge E is also preferred. */
320 int freq = EDGE_FREQUENCY (e);
321 if (freq > best_freq || e->count > best_count)
323 best_freq = freq;
324 best_count = e->count;
325 best_edge = e;
326 best_bb = bb;
330 else
332 if (!e->dest->il.rtl->visited
333 || bbd[e->dest->index].start_of_trace >= 0)
335 /* The current edge E is preferred. */
336 is_preferred = true;
337 best_freq = EDGE_FREQUENCY (e);
338 best_count = e->count;
339 best_edge = e;
340 best_bb = bb;
342 else
344 int freq = EDGE_FREQUENCY (e);
345 if (!best_edge || freq > best_freq || e->count > best_count)
347 best_freq = freq;
348 best_count = e->count;
349 best_edge = e;
350 best_bb = bb;
355 bb = bb->aux;
357 while (bb != back_edge->dest);
359 if (best_bb)
361 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
362 the trace. */
363 if (back_edge->dest == trace->first)
365 trace->first = best_bb->aux;
367 else
369 basic_block prev_bb;
371 for (prev_bb = trace->first;
372 prev_bb->aux != back_edge->dest;
373 prev_bb = prev_bb->aux)
375 prev_bb->aux = best_bb->aux;
377 /* Try to get rid of uncond jump to cond jump. */
378 if (single_succ_p (prev_bb))
380 basic_block header = single_succ (prev_bb);
382 /* Duplicate HEADER if it is a small block containing cond jump
383 in the end. */
384 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
385 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
386 NULL_RTX))
387 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
391 else
393 /* We have not found suitable loop tail so do no rotation. */
394 best_bb = back_edge->src;
396 best_bb->aux = NULL;
397 return best_bb;
400 /* This function marks BB that it was visited in trace number TRACE. */
402 static void
403 mark_bb_visited (basic_block bb, int trace)
405 bb->il.rtl->visited = trace;
406 if (bbd[bb->index].heap)
408 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
409 bbd[bb->index].heap = NULL;
410 bbd[bb->index].node = NULL;
414 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
415 not include basic blocks their probability is lower than BRANCH_TH or their
416 frequency is lower than EXEC_TH into traces (or count is lower than
417 COUNT_TH). It stores the new traces into TRACES and modifies the number of
418 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
419 expects that starting basic blocks are in *HEAP and at the end it deletes
420 *HEAP and stores starting points for the next round into new *HEAP. */
422 static void
423 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
424 struct trace *traces, int *n_traces, int round,
425 fibheap_t *heap, int number_of_rounds)
427 /* Heap for discarded basic blocks which are possible starting points for
428 the next round. */
429 fibheap_t new_heap = fibheap_new ();
431 while (!fibheap_empty (*heap))
433 basic_block bb;
434 struct trace *trace;
435 edge best_edge, e;
436 fibheapkey_t key;
437 edge_iterator ei;
439 bb = fibheap_extract_min (*heap);
440 bbd[bb->index].heap = NULL;
441 bbd[bb->index].node = NULL;
443 if (dump_file)
444 fprintf (dump_file, "Getting bb %d\n", bb->index);
446 /* If the BB's frequency is too low send BB to the next round. When
447 partitioning hot/cold blocks into separate sections, make sure all
448 the cold blocks (and ONLY the cold blocks) go into the (extra) final
449 round. */
451 if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
452 count_th))
454 int key = bb_to_key (bb);
455 bbd[bb->index].heap = new_heap;
456 bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
458 if (dump_file)
459 fprintf (dump_file,
460 " Possible start point of next round: %d (key: %d)\n",
461 bb->index, key);
462 continue;
465 trace = traces + *n_traces;
466 trace->first = bb;
467 trace->round = round;
468 trace->length = 0;
469 bbd[bb->index].in_trace = *n_traces;
470 (*n_traces)++;
474 int prob, freq;
475 bool ends_in_call;
477 /* The probability and frequency of the best edge. */
478 int best_prob = INT_MIN / 2;
479 int best_freq = INT_MIN / 2;
481 best_edge = NULL;
482 mark_bb_visited (bb, *n_traces);
483 trace->length++;
485 if (dump_file)
486 fprintf (dump_file, "Basic block %d was visited in trace %d\n",
487 bb->index, *n_traces - 1);
489 ends_in_call = block_ends_with_call_p (bb);
491 /* Select the successor that will be placed after BB. */
492 FOR_EACH_EDGE (e, ei, bb->succs)
494 gcc_assert (!(e->flags & EDGE_FAKE));
496 if (e->dest == EXIT_BLOCK_PTR)
497 continue;
499 if (e->dest->il.rtl->visited
500 && e->dest->il.rtl->visited != *n_traces)
501 continue;
503 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
504 continue;
506 prob = e->probability;
507 freq = e->dest->frequency;
509 /* The only sensible preference for a call instruction is the
510 fallthru edge. Don't bother selecting anything else. */
511 if (ends_in_call)
513 if (e->flags & EDGE_CAN_FALLTHRU)
515 best_edge = e;
516 best_prob = prob;
517 best_freq = freq;
519 continue;
522 /* Edge that cannot be fallthru or improbable or infrequent
523 successor (i.e. it is unsuitable successor). */
524 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
525 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
526 || e->count < count_th)
527 continue;
529 /* If partitioning hot/cold basic blocks, don't consider edges
530 that cross section boundaries. */
532 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
533 best_edge))
535 best_edge = e;
536 best_prob = prob;
537 best_freq = freq;
541 /* If the best destination has multiple predecessors, and can be
542 duplicated cheaper than a jump, don't allow it to be added
543 to a trace. We'll duplicate it when connecting traces. */
544 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
545 && copy_bb_p (best_edge->dest, 0))
546 best_edge = NULL;
548 /* Add all non-selected successors to the heaps. */
549 FOR_EACH_EDGE (e, ei, bb->succs)
551 if (e == best_edge
552 || e->dest == EXIT_BLOCK_PTR
553 || e->dest->il.rtl->visited)
554 continue;
556 key = bb_to_key (e->dest);
558 if (bbd[e->dest->index].heap)
560 /* E->DEST is already in some heap. */
561 if (key != bbd[e->dest->index].node->key)
563 if (dump_file)
565 fprintf (dump_file,
566 "Changing key for bb %d from %ld to %ld.\n",
567 e->dest->index,
568 (long) bbd[e->dest->index].node->key,
569 key);
571 fibheap_replace_key (bbd[e->dest->index].heap,
572 bbd[e->dest->index].node, key);
575 else
577 fibheap_t which_heap = *heap;
579 prob = e->probability;
580 freq = EDGE_FREQUENCY (e);
582 if (!(e->flags & EDGE_CAN_FALLTHRU)
583 || (e->flags & EDGE_COMPLEX)
584 || prob < branch_th || freq < exec_th
585 || e->count < count_th)
587 /* When partitioning hot/cold basic blocks, make sure
588 the cold blocks (and only the cold blocks) all get
589 pushed to the last round of trace collection. */
591 if (push_to_next_round_p (e->dest, round,
592 number_of_rounds,
593 exec_th, count_th))
594 which_heap = new_heap;
597 bbd[e->dest->index].heap = which_heap;
598 bbd[e->dest->index].node = fibheap_insert (which_heap,
599 key, e->dest);
601 if (dump_file)
603 fprintf (dump_file,
604 " Possible start of %s round: %d (key: %ld)\n",
605 (which_heap == new_heap) ? "next" : "this",
606 e->dest->index, (long) key);
612 if (best_edge) /* Suitable successor was found. */
614 if (best_edge->dest->il.rtl->visited == *n_traces)
616 /* We do nothing with one basic block loops. */
617 if (best_edge->dest != bb)
619 if (EDGE_FREQUENCY (best_edge)
620 > 4 * best_edge->dest->frequency / 5)
622 /* The loop has at least 4 iterations. If the loop
623 header is not the first block of the function
624 we can rotate the loop. */
626 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
628 if (dump_file)
630 fprintf (dump_file,
631 "Rotating loop %d - %d\n",
632 best_edge->dest->index, bb->index);
634 bb->aux = best_edge->dest;
635 bbd[best_edge->dest->index].in_trace =
636 (*n_traces) - 1;
637 bb = rotate_loop (best_edge, trace, *n_traces);
640 else
642 /* The loop has less than 4 iterations. */
644 if (single_succ_p (bb)
645 && copy_bb_p (best_edge->dest, !optimize_size))
647 bb = copy_bb (best_edge->dest, best_edge, bb,
648 *n_traces);
649 trace->length++;
654 /* Terminate the trace. */
655 break;
657 else
659 /* Check for a situation
667 where
668 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
669 >= EDGE_FREQUENCY (AC).
670 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
671 Best ordering is then A B C.
673 This situation is created for example by:
675 if (A) B;
680 FOR_EACH_EDGE (e, ei, bb->succs)
681 if (e != best_edge
682 && (e->flags & EDGE_CAN_FALLTHRU)
683 && !(e->flags & EDGE_COMPLEX)
684 && !e->dest->il.rtl->visited
685 && single_pred_p (e->dest)
686 && !(e->flags & EDGE_CROSSING)
687 && single_succ_p (e->dest)
688 && (single_succ_edge (e->dest)->flags
689 & EDGE_CAN_FALLTHRU)
690 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
691 && single_succ (e->dest) == best_edge->dest
692 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
694 best_edge = e;
695 if (dump_file)
696 fprintf (dump_file, "Selecting BB %d\n",
697 best_edge->dest->index);
698 break;
701 bb->aux = best_edge->dest;
702 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
703 bb = best_edge->dest;
707 while (best_edge);
708 trace->last = bb;
709 bbd[trace->first->index].start_of_trace = *n_traces - 1;
710 bbd[trace->last->index].end_of_trace = *n_traces - 1;
712 /* The trace is terminated so we have to recount the keys in heap
713 (some block can have a lower key because now one of its predecessors
714 is an end of the trace). */
715 FOR_EACH_EDGE (e, ei, bb->succs)
717 if (e->dest == EXIT_BLOCK_PTR
718 || e->dest->il.rtl->visited)
719 continue;
721 if (bbd[e->dest->index].heap)
723 key = bb_to_key (e->dest);
724 if (key != bbd[e->dest->index].node->key)
726 if (dump_file)
728 fprintf (dump_file,
729 "Changing key for bb %d from %ld to %ld.\n",
730 e->dest->index,
731 (long) bbd[e->dest->index].node->key, key);
733 fibheap_replace_key (bbd[e->dest->index].heap,
734 bbd[e->dest->index].node,
735 key);
741 fibheap_delete (*heap);
743 /* "Return" the new heap. */
744 *heap = new_heap;
747 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
748 it to trace after BB, mark OLD_BB visited and update pass' data structures
749 (TRACE is a number of trace which OLD_BB is duplicated to). */
751 static basic_block
752 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
754 basic_block new_bb;
756 new_bb = duplicate_block (old_bb, e);
757 BB_COPY_PARTITION (new_bb, old_bb);
759 gcc_assert (e->dest == new_bb);
760 gcc_assert (!e->dest->il.rtl->visited);
762 if (dump_file)
763 fprintf (dump_file,
764 "Duplicated bb %d (created bb %d)\n",
765 old_bb->index, new_bb->index);
766 new_bb->il.rtl->visited = trace;
767 new_bb->aux = bb->aux;
768 bb->aux = new_bb;
770 if (new_bb->index >= array_size || last_basic_block > array_size)
772 int i;
773 int new_size;
775 new_size = MAX (last_basic_block, new_bb->index + 1);
776 new_size = GET_ARRAY_SIZE (new_size);
777 bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
778 for (i = array_size; i < new_size; i++)
780 bbd[i].start_of_trace = -1;
781 bbd[i].in_trace = -1;
782 bbd[i].end_of_trace = -1;
783 bbd[i].heap = NULL;
784 bbd[i].node = NULL;
786 array_size = new_size;
788 if (dump_file)
790 fprintf (dump_file,
791 "Growing the dynamic array to %d elements.\n",
792 array_size);
796 bbd[new_bb->index].in_trace = trace;
798 return new_bb;
801 /* Compute and return the key (for the heap) of the basic block BB. */
803 static fibheapkey_t
804 bb_to_key (basic_block bb)
806 edge e;
807 edge_iterator ei;
808 int priority = 0;
810 /* Do not start in probably never executed blocks. */
812 if (BB_PARTITION (bb) == BB_COLD_PARTITION
813 || probably_never_executed_bb_p (bb))
814 return BB_FREQ_MAX;
816 /* Prefer blocks whose predecessor is an end of some trace
817 or whose predecessor edge is EDGE_DFS_BACK. */
818 FOR_EACH_EDGE (e, ei, bb->preds)
820 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
821 || (e->flags & EDGE_DFS_BACK))
823 int edge_freq = EDGE_FREQUENCY (e);
825 if (edge_freq > priority)
826 priority = edge_freq;
830 if (priority)
831 /* The block with priority should have significantly lower key. */
832 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
833 return -bb->frequency;
836 /* Return true when the edge E from basic block BB is better than the temporary
837 best edge (details are in function). The probability of edge E is PROB. The
838 frequency of the successor is FREQ. The current best probability is
839 BEST_PROB, the best frequency is BEST_FREQ.
840 The edge is considered to be equivalent when PROB does not differ much from
841 BEST_PROB; similarly for frequency. */
843 static bool
844 better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
845 int best_freq, edge cur_best_edge)
847 bool is_better_edge;
849 /* The BEST_* values do not have to be best, but can be a bit smaller than
850 maximum values. */
851 int diff_prob = best_prob / 10;
852 int diff_freq = best_freq / 10;
854 if (prob > best_prob + diff_prob)
855 /* The edge has higher probability than the temporary best edge. */
856 is_better_edge = true;
857 else if (prob < best_prob - diff_prob)
858 /* The edge has lower probability than the temporary best edge. */
859 is_better_edge = false;
860 else if (freq < best_freq - diff_freq)
861 /* The edge and the temporary best edge have almost equivalent
862 probabilities. The higher frequency of a successor now means
863 that there is another edge going into that successor.
864 This successor has lower frequency so it is better. */
865 is_better_edge = true;
866 else if (freq > best_freq + diff_freq)
867 /* This successor has higher frequency so it is worse. */
868 is_better_edge = false;
869 else if (e->dest->prev_bb == bb)
870 /* The edges have equivalent probabilities and the successors
871 have equivalent frequencies. Select the previous successor. */
872 is_better_edge = true;
873 else
874 is_better_edge = false;
876 /* If we are doing hot/cold partitioning, make sure that we always favor
877 non-crossing edges over crossing edges. */
879 if (!is_better_edge
880 && flag_reorder_blocks_and_partition
881 && cur_best_edge
882 && (cur_best_edge->flags & EDGE_CROSSING)
883 && !(e->flags & EDGE_CROSSING))
884 is_better_edge = true;
886 return is_better_edge;
889 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
891 static void
892 connect_traces (int n_traces, struct trace *traces)
894 int i;
895 bool *connected;
896 bool two_passes;
897 int last_trace;
898 int current_pass;
899 int current_partition;
900 int freq_threshold;
901 gcov_type count_threshold;
903 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
904 if (max_entry_count < INT_MAX / 1000)
905 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
906 else
907 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
909 connected = xcalloc (n_traces, sizeof (bool));
910 last_trace = -1;
911 current_pass = 1;
912 current_partition = BB_PARTITION (traces[0].first);
913 two_passes = false;
915 if (flag_reorder_blocks_and_partition)
916 for (i = 0; i < n_traces && !two_passes; i++)
917 if (BB_PARTITION (traces[0].first)
918 != BB_PARTITION (traces[i].first))
919 two_passes = true;
921 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
923 int t = i;
924 int t2;
925 edge e, best;
926 int best_len;
928 if (i >= n_traces)
930 gcc_assert (two_passes && current_pass == 1);
931 i = 0;
932 t = i;
933 current_pass = 2;
934 if (current_partition == BB_HOT_PARTITION)
935 current_partition = BB_COLD_PARTITION;
936 else
937 current_partition = BB_HOT_PARTITION;
940 if (connected[t])
941 continue;
943 if (two_passes
944 && BB_PARTITION (traces[t].first) != current_partition)
945 continue;
947 connected[t] = true;
949 /* Find the predecessor traces. */
950 for (t2 = t; t2 > 0;)
952 edge_iterator ei;
953 best = NULL;
954 best_len = 0;
955 FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
957 int si = e->src->index;
959 if (e->src != ENTRY_BLOCK_PTR
960 && (e->flags & EDGE_CAN_FALLTHRU)
961 && !(e->flags & EDGE_COMPLEX)
962 && bbd[si].end_of_trace >= 0
963 && !connected[bbd[si].end_of_trace]
964 && (BB_PARTITION (e->src) == current_partition)
965 && (!best
966 || e->probability > best->probability
967 || (e->probability == best->probability
968 && traces[bbd[si].end_of_trace].length > best_len)))
970 best = e;
971 best_len = traces[bbd[si].end_of_trace].length;
974 if (best)
976 best->src->aux = best->dest;
977 t2 = bbd[best->src->index].end_of_trace;
978 connected[t2] = true;
980 if (dump_file)
982 fprintf (dump_file, "Connection: %d %d\n",
983 best->src->index, best->dest->index);
986 else
987 break;
990 if (last_trace >= 0)
991 traces[last_trace].last->aux = traces[t2].first;
992 last_trace = t;
994 /* Find the successor traces. */
995 while (1)
997 /* Find the continuation of the chain. */
998 edge_iterator ei;
999 best = NULL;
1000 best_len = 0;
1001 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1003 int di = e->dest->index;
1005 if (e->dest != EXIT_BLOCK_PTR
1006 && (e->flags & EDGE_CAN_FALLTHRU)
1007 && !(e->flags & EDGE_COMPLEX)
1008 && bbd[di].start_of_trace >= 0
1009 && !connected[bbd[di].start_of_trace]
1010 && (BB_PARTITION (e->dest) == current_partition)
1011 && (!best
1012 || e->probability > best->probability
1013 || (e->probability == best->probability
1014 && traces[bbd[di].start_of_trace].length > best_len)))
1016 best = e;
1017 best_len = traces[bbd[di].start_of_trace].length;
1021 if (best)
1023 if (dump_file)
1025 fprintf (dump_file, "Connection: %d %d\n",
1026 best->src->index, best->dest->index);
1028 t = bbd[best->dest->index].start_of_trace;
1029 traces[last_trace].last->aux = traces[t].first;
1030 connected[t] = true;
1031 last_trace = t;
1033 else
1035 /* Try to connect the traces by duplication of 1 block. */
1036 edge e2;
1037 basic_block next_bb = NULL;
1038 bool try_copy = false;
1040 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1041 if (e->dest != EXIT_BLOCK_PTR
1042 && (e->flags & EDGE_CAN_FALLTHRU)
1043 && !(e->flags & EDGE_COMPLEX)
1044 && (!best || e->probability > best->probability))
1046 edge_iterator ei;
1047 edge best2 = NULL;
1048 int best2_len = 0;
1050 /* If the destination is a start of a trace which is only
1051 one block long, then no need to search the successor
1052 blocks of the trace. Accept it. */
1053 if (bbd[e->dest->index].start_of_trace >= 0
1054 && traces[bbd[e->dest->index].start_of_trace].length
1055 == 1)
1057 best = e;
1058 try_copy = true;
1059 continue;
1062 FOR_EACH_EDGE (e2, ei, e->dest->succs)
1064 int di = e2->dest->index;
1066 if (e2->dest == EXIT_BLOCK_PTR
1067 || ((e2->flags & EDGE_CAN_FALLTHRU)
1068 && !(e2->flags & EDGE_COMPLEX)
1069 && bbd[di].start_of_trace >= 0
1070 && !connected[bbd[di].start_of_trace]
1071 && (BB_PARTITION (e2->dest) == current_partition)
1072 && (EDGE_FREQUENCY (e2) >= freq_threshold)
1073 && (e2->count >= count_threshold)
1074 && (!best2
1075 || e2->probability > best2->probability
1076 || (e2->probability == best2->probability
1077 && traces[bbd[di].start_of_trace].length
1078 > best2_len))))
1080 best = e;
1081 best2 = e2;
1082 if (e2->dest != EXIT_BLOCK_PTR)
1083 best2_len = traces[bbd[di].start_of_trace].length;
1084 else
1085 best2_len = INT_MAX;
1086 next_bb = e2->dest;
1087 try_copy = true;
1092 if (flag_reorder_blocks_and_partition)
1093 try_copy = false;
1095 /* Copy tiny blocks always; copy larger blocks only when the
1096 edge is traversed frequently enough. */
1097 if (try_copy
1098 && copy_bb_p (best->dest,
1099 !optimize_size
1100 && EDGE_FREQUENCY (best) >= freq_threshold
1101 && best->count >= count_threshold))
1103 basic_block new_bb;
1105 if (dump_file)
1107 fprintf (dump_file, "Connection: %d %d ",
1108 traces[t].last->index, best->dest->index);
1109 if (!next_bb)
1110 fputc ('\n', dump_file);
1111 else if (next_bb == EXIT_BLOCK_PTR)
1112 fprintf (dump_file, "exit\n");
1113 else
1114 fprintf (dump_file, "%d\n", next_bb->index);
1117 new_bb = copy_bb (best->dest, best, traces[t].last, t);
1118 traces[t].last = new_bb;
1119 if (next_bb && next_bb != EXIT_BLOCK_PTR)
1121 t = bbd[next_bb->index].start_of_trace;
1122 traces[last_trace].last->aux = traces[t].first;
1123 connected[t] = true;
1124 last_trace = t;
1126 else
1127 break; /* Stop finding the successor traces. */
1129 else
1130 break; /* Stop finding the successor traces. */
1135 if (dump_file)
1137 basic_block bb;
1139 fprintf (dump_file, "Final order:\n");
1140 for (bb = traces[0].first; bb; bb = bb->aux)
1141 fprintf (dump_file, "%d ", bb->index);
1142 fprintf (dump_file, "\n");
1143 fflush (dump_file);
1146 FREE (connected);
1149 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1150 when code size is allowed to grow by duplication. */
1152 static bool
1153 copy_bb_p (basic_block bb, int code_may_grow)
1155 int size = 0;
1156 int max_size = uncond_jump_length;
1157 rtx insn;
1159 if (!bb->frequency)
1160 return false;
1161 if (EDGE_COUNT (bb->preds) < 2)
1162 return false;
1163 if (!can_duplicate_block_p (bb))
1164 return false;
1166 /* Avoid duplicating blocks which have many successors (PR/13430). */
1167 if (EDGE_COUNT (bb->succs) > 8)
1168 return false;
1170 if (code_may_grow && maybe_hot_bb_p (bb))
1171 max_size *= 8;
1173 FOR_BB_INSNS (bb, insn)
1175 if (INSN_P (insn))
1176 size += get_attr_length (insn);
1179 if (size <= max_size)
1180 return true;
1182 if (dump_file)
1184 fprintf (dump_file,
1185 "Block %d can't be copied because its size = %d.\n",
1186 bb->index, size);
1189 return false;
1192 /* Return the length of unconditional jump instruction. */
1194 static int
1195 get_uncond_jump_length (void)
1197 rtx label, jump;
1198 int length;
1200 label = emit_label_before (gen_label_rtx (), get_insns ());
1201 jump = emit_jump_insn (gen_jump (label));
1203 length = get_attr_length (jump);
1205 delete_insn (jump);
1206 delete_insn (label);
1207 return length;
1210 /* Find the basic blocks that are rarely executed and need to be moved to
1211 a separate section of the .o file (to cut down on paging and improve
1212 cache locality). */
1214 static void
1215 find_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges,
1216 int *n_crossing_edges,
1217 int *max_idx)
1219 basic_block bb;
1220 bool has_hot_blocks = false;
1221 edge e;
1222 int i;
1223 edge_iterator ei;
1225 /* Mark which partition (hot/cold) each basic block belongs in. */
1227 FOR_EACH_BB (bb)
1229 if (probably_never_executed_bb_p (bb))
1230 BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1231 else
1233 BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1234 has_hot_blocks = true;
1238 /* Mark every edge that crosses between sections. */
1240 i = 0;
1241 FOR_EACH_BB (bb)
1242 FOR_EACH_EDGE (e, ei, bb->succs)
1244 if (e->src != ENTRY_BLOCK_PTR
1245 && e->dest != EXIT_BLOCK_PTR
1246 && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1248 e->flags |= EDGE_CROSSING;
1249 if (i == *max_idx)
1251 *max_idx *= 2;
1252 crossing_edges = xrealloc (crossing_edges,
1253 (*max_idx) * sizeof (edge));
1255 crossing_edges[i++] = e;
1257 else
1258 e->flags &= ~EDGE_CROSSING;
1260 *n_crossing_edges = i;
1263 /* If any destination of a crossing edge does not have a label, add label;
1264 Convert any fall-through crossing edges (for blocks that do not contain
1265 a jump) to unconditional jumps. */
1267 static void
1268 add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1270 int i;
1271 basic_block src;
1272 basic_block dest;
1273 rtx label;
1274 rtx barrier;
1275 rtx new_jump;
1277 for (i=0; i < n_crossing_edges; i++)
1279 if (crossing_edges[i])
1281 src = crossing_edges[i]->src;
1282 dest = crossing_edges[i]->dest;
1284 /* Make sure dest has a label. */
1286 if (dest && (dest != EXIT_BLOCK_PTR))
1288 label = block_label (dest);
1290 /* Make sure source block ends with a jump. */
1292 if (src && (src != ENTRY_BLOCK_PTR))
1294 if (!JUMP_P (BB_END (src)))
1295 /* bb just falls through. */
1297 /* make sure there's only one successor */
1298 gcc_assert (single_succ_p (src));
1300 /* Find label in dest block. */
1301 label = block_label (dest);
1303 new_jump = emit_jump_insn_after (gen_jump (label),
1304 BB_END (src));
1305 barrier = emit_barrier_after (new_jump);
1306 JUMP_LABEL (new_jump) = label;
1307 LABEL_NUSES (label) += 1;
1308 src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
1309 /* Mark edge as non-fallthru. */
1310 crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
1311 } /* end: 'if (GET_CODE ... ' */
1312 } /* end: 'if (src && src->index...' */
1313 } /* end: 'if (dest && dest->index...' */
1314 } /* end: 'if (crossing_edges[i]...' */
1315 } /* end for loop */
1318 /* Find any bb's where the fall-through edge is a crossing edge (note that
1319 these bb's must also contain a conditional jump; we've already
1320 dealt with fall-through edges for blocks that didn't have a
1321 conditional jump in the call to add_labels_and_missing_jumps).
1322 Convert the fall-through edge to non-crossing edge by inserting a
1323 new bb to fall-through into. The new bb will contain an
1324 unconditional jump (crossing edge) to the original fall through
1325 destination. */
1327 static void
1328 fix_up_fall_thru_edges (void)
1330 basic_block cur_bb;
1331 basic_block new_bb;
1332 edge succ1;
1333 edge succ2;
1334 edge fall_thru;
1335 edge cond_jump = NULL;
1336 edge e;
1337 bool cond_jump_crosses;
1338 int invert_worked;
1339 rtx old_jump;
1340 rtx fall_thru_label;
1341 rtx barrier;
1343 FOR_EACH_BB (cur_bb)
1345 fall_thru = NULL;
1346 if (EDGE_COUNT (cur_bb->succs) > 0)
1347 succ1 = EDGE_SUCC (cur_bb, 0);
1348 else
1349 succ1 = NULL;
1351 if (EDGE_COUNT (cur_bb->succs) > 1)
1352 succ2 = EDGE_SUCC (cur_bb, 1);
1353 else
1354 succ2 = NULL;
1356 /* Find the fall-through edge. */
1358 if (succ1
1359 && (succ1->flags & EDGE_FALLTHRU))
1361 fall_thru = succ1;
1362 cond_jump = succ2;
1364 else if (succ2
1365 && (succ2->flags & EDGE_FALLTHRU))
1367 fall_thru = succ2;
1368 cond_jump = succ1;
1371 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1373 /* Check to see if the fall-thru edge is a crossing edge. */
1375 if (fall_thru->flags & EDGE_CROSSING)
1377 /* The fall_thru edge crosses; now check the cond jump edge, if
1378 it exists. */
1380 cond_jump_crosses = true;
1381 invert_worked = 0;
1382 old_jump = BB_END (cur_bb);
1384 /* Find the jump instruction, if there is one. */
1386 if (cond_jump)
1388 if (!(cond_jump->flags & EDGE_CROSSING))
1389 cond_jump_crosses = false;
1391 /* We know the fall-thru edge crosses; if the cond
1392 jump edge does NOT cross, and its destination is the
1393 next block in the bb order, invert the jump
1394 (i.e. fix it so the fall thru does not cross and
1395 the cond jump does). */
1397 if (!cond_jump_crosses
1398 && cur_bb->aux == cond_jump->dest)
1400 /* Find label in fall_thru block. We've already added
1401 any missing labels, so there must be one. */
1403 fall_thru_label = block_label (fall_thru->dest);
1405 if (old_jump && fall_thru_label)
1406 invert_worked = invert_jump (old_jump,
1407 fall_thru_label,0);
1408 if (invert_worked)
1410 fall_thru->flags &= ~EDGE_FALLTHRU;
1411 cond_jump->flags |= EDGE_FALLTHRU;
1412 update_br_prob_note (cur_bb);
1413 e = fall_thru;
1414 fall_thru = cond_jump;
1415 cond_jump = e;
1416 cond_jump->flags |= EDGE_CROSSING;
1417 fall_thru->flags &= ~EDGE_CROSSING;
1422 if (cond_jump_crosses || !invert_worked)
1424 /* This is the case where both edges out of the basic
1425 block are crossing edges. Here we will fix up the
1426 fall through edge. The jump edge will be taken care
1427 of later. */
1429 new_bb = force_nonfallthru (fall_thru);
1431 if (new_bb)
1433 new_bb->aux = cur_bb->aux;
1434 cur_bb->aux = new_bb;
1436 /* Make sure new fall-through bb is in same
1437 partition as bb it's falling through from. */
1439 BB_COPY_PARTITION (new_bb, cur_bb);
1440 single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1443 /* Add barrier after new jump */
1445 if (new_bb)
1447 barrier = emit_barrier_after (BB_END (new_bb));
1448 new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1449 barrier);
1451 else
1453 barrier = emit_barrier_after (BB_END (cur_bb));
1454 cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1455 barrier);
1463 /* This function checks the destination blockof a "crossing jump" to
1464 see if it has any crossing predecessors that begin with a code label
1465 and end with an unconditional jump. If so, it returns that predecessor
1466 block. (This is to avoid creating lots of new basic blocks that all
1467 contain unconditional jumps to the same destination). */
1469 static basic_block
1470 find_jump_block (basic_block jump_dest)
1472 basic_block source_bb = NULL;
1473 edge e;
1474 rtx insn;
1475 edge_iterator ei;
1477 FOR_EACH_EDGE (e, ei, jump_dest->preds)
1478 if (e->flags & EDGE_CROSSING)
1480 basic_block src = e->src;
1482 /* Check each predecessor to see if it has a label, and contains
1483 only one executable instruction, which is an unconditional jump.
1484 If so, we can use it. */
1486 if (LABEL_P (BB_HEAD (src)))
1487 for (insn = BB_HEAD (src);
1488 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1489 insn = NEXT_INSN (insn))
1491 if (INSN_P (insn)
1492 && insn == BB_END (src)
1493 && JUMP_P (insn)
1494 && !any_condjump_p (insn))
1496 source_bb = src;
1497 break;
1501 if (source_bb)
1502 break;
1505 return source_bb;
1508 /* Find all BB's with conditional jumps that are crossing edges;
1509 insert a new bb and make the conditional jump branch to the new
1510 bb instead (make the new bb same color so conditional branch won't
1511 be a 'crossing' edge). Insert an unconditional jump from the
1512 new bb to the original destination of the conditional jump. */
1514 static void
1515 fix_crossing_conditional_branches (void)
1517 basic_block cur_bb;
1518 basic_block new_bb;
1519 basic_block last_bb;
1520 basic_block dest;
1521 basic_block prev_bb;
1522 edge succ1;
1523 edge succ2;
1524 edge crossing_edge;
1525 edge new_edge;
1526 rtx old_jump;
1527 rtx set_src;
1528 rtx old_label = NULL_RTX;
1529 rtx new_label;
1530 rtx new_jump;
1531 rtx barrier;
1533 last_bb = EXIT_BLOCK_PTR->prev_bb;
1535 FOR_EACH_BB (cur_bb)
1537 crossing_edge = NULL;
1538 if (EDGE_COUNT (cur_bb->succs) > 0)
1539 succ1 = EDGE_SUCC (cur_bb, 0);
1540 else
1541 succ1 = NULL;
1543 if (EDGE_COUNT (cur_bb->succs) > 1)
1544 succ2 = EDGE_SUCC (cur_bb, 1);
1545 else
1546 succ2 = NULL;
1548 /* We already took care of fall-through edges, so only one successor
1549 can be a crossing edge. */
1551 if (succ1 && (succ1->flags & EDGE_CROSSING))
1552 crossing_edge = succ1;
1553 else if (succ2 && (succ2->flags & EDGE_CROSSING))
1554 crossing_edge = succ2;
1556 if (crossing_edge)
1558 old_jump = BB_END (cur_bb);
1560 /* Check to make sure the jump instruction is a
1561 conditional jump. */
1563 set_src = NULL_RTX;
1565 if (any_condjump_p (old_jump))
1567 if (GET_CODE (PATTERN (old_jump)) == SET)
1568 set_src = SET_SRC (PATTERN (old_jump));
1569 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1571 set_src = XVECEXP (PATTERN (old_jump), 0,0);
1572 if (GET_CODE (set_src) == SET)
1573 set_src = SET_SRC (set_src);
1574 else
1575 set_src = NULL_RTX;
1579 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1581 if (GET_CODE (XEXP (set_src, 1)) == PC)
1582 old_label = XEXP (set_src, 2);
1583 else if (GET_CODE (XEXP (set_src, 2)) == PC)
1584 old_label = XEXP (set_src, 1);
1586 /* Check to see if new bb for jumping to that dest has
1587 already been created; if so, use it; if not, create
1588 a new one. */
1590 new_bb = find_jump_block (crossing_edge->dest);
1592 if (new_bb)
1593 new_label = block_label (new_bb);
1594 else
1596 /* Create new basic block to be dest for
1597 conditional jump. */
1599 new_bb = create_basic_block (NULL, NULL, last_bb);
1600 new_bb->aux = last_bb->aux;
1601 last_bb->aux = new_bb;
1602 prev_bb = last_bb;
1603 last_bb = new_bb;
1605 /* Update register liveness information. */
1607 new_bb->il.rtl->global_live_at_start = ALLOC_REG_SET (&reg_obstack);
1608 new_bb->il.rtl->global_live_at_end = ALLOC_REG_SET (&reg_obstack);
1609 COPY_REG_SET (new_bb->il.rtl->global_live_at_end,
1610 prev_bb->il.rtl->global_live_at_end);
1611 COPY_REG_SET (new_bb->il.rtl->global_live_at_start,
1612 prev_bb->il.rtl->global_live_at_end);
1614 /* Put appropriate instructions in new bb. */
1616 new_label = gen_label_rtx ();
1617 emit_label_before (new_label, BB_HEAD (new_bb));
1618 BB_HEAD (new_bb) = new_label;
1620 if (GET_CODE (old_label) == LABEL_REF)
1622 old_label = JUMP_LABEL (old_jump);
1623 new_jump = emit_jump_insn_after (gen_jump
1624 (old_label),
1625 BB_END (new_bb));
1627 else
1629 gcc_assert (HAVE_return
1630 && GET_CODE (old_label) == RETURN);
1631 new_jump = emit_jump_insn_after (gen_return (),
1632 BB_END (new_bb));
1635 barrier = emit_barrier_after (new_jump);
1636 JUMP_LABEL (new_jump) = old_label;
1637 new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1638 barrier);
1640 /* Make sure new bb is in same partition as source
1641 of conditional branch. */
1642 BB_COPY_PARTITION (new_bb, cur_bb);
1645 /* Make old jump branch to new bb. */
1647 redirect_jump (old_jump, new_label, 0);
1649 /* Remove crossing_edge as predecessor of 'dest'. */
1651 dest = crossing_edge->dest;
1653 redirect_edge_succ (crossing_edge, new_bb);
1655 /* Make a new edge from new_bb to old dest; new edge
1656 will be a successor for new_bb and a predecessor
1657 for 'dest'. */
1659 if (EDGE_COUNT (new_bb->succs) == 0)
1660 new_edge = make_edge (new_bb, dest, 0);
1661 else
1662 new_edge = EDGE_SUCC (new_bb, 0);
1664 crossing_edge->flags &= ~EDGE_CROSSING;
1665 new_edge->flags |= EDGE_CROSSING;
1671 /* Find any unconditional branches that cross between hot and cold
1672 sections. Convert them into indirect jumps instead. */
1674 static void
1675 fix_crossing_unconditional_branches (void)
1677 basic_block cur_bb;
1678 rtx last_insn;
1679 rtx label;
1680 rtx label_addr;
1681 rtx indirect_jump_sequence;
1682 rtx jump_insn = NULL_RTX;
1683 rtx new_reg;
1684 rtx cur_insn;
1685 edge succ;
1687 FOR_EACH_BB (cur_bb)
1689 last_insn = BB_END (cur_bb);
1691 if (EDGE_COUNT (cur_bb->succs) < 1)
1692 continue;
1694 succ = EDGE_SUCC (cur_bb, 0);
1696 /* Check to see if bb ends in a crossing (unconditional) jump. At
1697 this point, no crossing jumps should be conditional. */
1699 if (JUMP_P (last_insn)
1700 && (succ->flags & EDGE_CROSSING))
1702 rtx label2, table;
1704 gcc_assert (!any_condjump_p (last_insn));
1706 /* Make sure the jump is not already an indirect or table jump. */
1708 if (!computed_jump_p (last_insn)
1709 && !tablejump_p (last_insn, &label2, &table))
1711 /* We have found a "crossing" unconditional branch. Now
1712 we must convert it to an indirect jump. First create
1713 reference of label, as target for jump. */
1715 label = JUMP_LABEL (last_insn);
1716 label_addr = gen_rtx_LABEL_REF (Pmode, label);
1717 LABEL_NUSES (label) += 1;
1719 /* Get a register to use for the indirect jump. */
1721 new_reg = gen_reg_rtx (Pmode);
1723 /* Generate indirect the jump sequence. */
1725 start_sequence ();
1726 emit_move_insn (new_reg, label_addr);
1727 emit_indirect_jump (new_reg);
1728 indirect_jump_sequence = get_insns ();
1729 end_sequence ();
1731 /* Make sure every instruction in the new jump sequence has
1732 its basic block set to be cur_bb. */
1734 for (cur_insn = indirect_jump_sequence; cur_insn;
1735 cur_insn = NEXT_INSN (cur_insn))
1737 BLOCK_FOR_INSN (cur_insn) = cur_bb;
1738 if (JUMP_P (cur_insn))
1739 jump_insn = cur_insn;
1742 /* Insert the new (indirect) jump sequence immediately before
1743 the unconditional jump, then delete the unconditional jump. */
1745 emit_insn_before (indirect_jump_sequence, last_insn);
1746 delete_insn (last_insn);
1748 /* Make BB_END for cur_bb be the jump instruction (NOT the
1749 barrier instruction at the end of the sequence...). */
1751 BB_END (cur_bb) = jump_insn;
1757 /* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1759 static void
1760 add_reg_crossing_jump_notes (void)
1762 basic_block bb;
1763 edge e;
1764 edge_iterator ei;
1766 FOR_EACH_BB (bb)
1767 FOR_EACH_EDGE (e, ei, bb->succs)
1768 if ((e->flags & EDGE_CROSSING)
1769 && JUMP_P (BB_END (e->src)))
1770 REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP,
1771 NULL_RTX,
1772 REG_NOTES (BB_END
1773 (e->src)));
1776 /* Hot and cold basic blocks are partitioned and put in separate
1777 sections of the .o file, to reduce paging and improve cache
1778 performance (hopefully). This can result in bits of code from the
1779 same function being widely separated in the .o file. However this
1780 is not obvious to the current bb structure. Therefore we must take
1781 care to ensure that: 1). There are no fall_thru edges that cross
1782 between sections; 2). For those architectures which have "short"
1783 conditional branches, all conditional branches that attempt to
1784 cross between sections are converted to unconditional branches;
1785 and, 3). For those architectures which have "short" unconditional
1786 branches, all unconditional branches that attempt to cross between
1787 sections are converted to indirect jumps.
1789 The code for fixing up fall_thru edges that cross between hot and
1790 cold basic blocks does so by creating new basic blocks containing
1791 unconditional branches to the appropriate label in the "other"
1792 section. The new basic block is then put in the same (hot or cold)
1793 section as the original conditional branch, and the fall_thru edge
1794 is modified to fall into the new basic block instead. By adding
1795 this level of indirection we end up with only unconditional branches
1796 crossing between hot and cold sections.
1798 Conditional branches are dealt with by adding a level of indirection.
1799 A new basic block is added in the same (hot/cold) section as the
1800 conditional branch, and the conditional branch is retargeted to the
1801 new basic block. The new basic block contains an unconditional branch
1802 to the original target of the conditional branch (in the other section).
1804 Unconditional branches are dealt with by converting them into
1805 indirect jumps. */
1807 static void
1808 fix_edges_for_rarely_executed_code (edge *crossing_edges,
1809 int n_crossing_edges)
1811 /* Make sure the source of any crossing edge ends in a jump and the
1812 destination of any crossing edge has a label. */
1814 add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
1816 /* Convert all crossing fall_thru edges to non-crossing fall
1817 thrus to unconditional jumps (that jump to the original fall
1818 thru dest). */
1820 fix_up_fall_thru_edges ();
1822 /* If the architecture does not have conditional branches that can
1823 span all of memory, convert crossing conditional branches into
1824 crossing unconditional branches. */
1826 if (!HAS_LONG_COND_BRANCH)
1827 fix_crossing_conditional_branches ();
1829 /* If the architecture does not have unconditional branches that
1830 can span all of memory, convert crossing unconditional branches
1831 into indirect jumps. Since adding an indirect jump also adds
1832 a new register usage, update the register usage information as
1833 well. */
1835 if (!HAS_LONG_UNCOND_BRANCH)
1837 fix_crossing_unconditional_branches ();
1838 reg_scan (get_insns(), max_reg_num ());
1841 add_reg_crossing_jump_notes ();
1844 /* Verify, in the basic block chain, that there is at most one switch
1845 between hot/cold partitions. This is modelled on
1846 rtl_verify_flow_info_1, but it cannot go inside that function
1847 because this condition will not be true until after
1848 reorder_basic_blocks is called. */
1850 static void
1851 verify_hot_cold_block_grouping (void)
1853 basic_block bb;
1854 int err = 0;
1855 bool switched_sections = false;
1856 int current_partition = 0;
1858 FOR_EACH_BB (bb)
1860 if (!current_partition)
1861 current_partition = BB_PARTITION (bb);
1862 if (BB_PARTITION (bb) != current_partition)
1864 if (switched_sections)
1866 error ("Multiple hot/cold transitions found (bb %i)",
1867 bb->index);
1868 err = 1;
1870 else
1872 switched_sections = true;
1873 current_partition = BB_PARTITION (bb);
1878 gcc_assert(!err);
1881 /* Reorder basic blocks. The main entry point to this file. FLAGS is
1882 the set of flags to pass to cfg_layout_initialize(). */
1884 void
1885 reorder_basic_blocks (unsigned int flags)
1887 int n_traces;
1888 int i;
1889 struct trace *traces;
1891 if (n_basic_blocks <= 1)
1892 return;
1894 if (targetm.cannot_modify_jumps_p ())
1895 return;
1897 timevar_push (TV_REORDER_BLOCKS);
1899 cfg_layout_initialize (flags);
1901 set_edge_can_fallthru_flag ();
1902 mark_dfs_back_edges ();
1904 /* We are estimating the length of uncond jump insn only once since the code
1905 for getting the insn length always returns the minimal length now. */
1906 if (uncond_jump_length == 0)
1907 uncond_jump_length = get_uncond_jump_length ();
1909 /* We need to know some information for each basic block. */
1910 array_size = GET_ARRAY_SIZE (last_basic_block);
1911 bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
1912 for (i = 0; i < array_size; i++)
1914 bbd[i].start_of_trace = -1;
1915 bbd[i].in_trace = -1;
1916 bbd[i].end_of_trace = -1;
1917 bbd[i].heap = NULL;
1918 bbd[i].node = NULL;
1921 traces = xmalloc (n_basic_blocks * sizeof (struct trace));
1922 n_traces = 0;
1923 find_traces (&n_traces, traces);
1924 connect_traces (n_traces, traces);
1925 FREE (traces);
1926 FREE (bbd);
1928 if (dump_file)
1929 dump_flow_info (dump_file);
1931 cfg_layout_finalize ();
1932 if (flag_reorder_blocks_and_partition)
1933 verify_hot_cold_block_grouping ();
1935 timevar_pop (TV_REORDER_BLOCKS);
1938 /* Determine which partition the first basic block in the function
1939 belongs to, then find the first basic block in the current function
1940 that belongs to a different section, and insert a
1941 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1942 instruction stream. When writing out the assembly code,
1943 encountering this note will make the compiler switch between the
1944 hot and cold text sections. */
1946 void
1947 insert_section_boundary_note (void)
1949 basic_block bb;
1950 rtx new_note;
1951 int first_partition = 0;
1953 if (flag_reorder_blocks_and_partition)
1954 FOR_EACH_BB (bb)
1956 if (!first_partition)
1957 first_partition = BB_PARTITION (bb);
1958 if (BB_PARTITION (bb) != first_partition)
1960 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1961 BB_HEAD (bb));
1962 break;
1967 /* Duplicate the blocks containing computed gotos. This basically unfactors
1968 computed gotos that were factored early on in the compilation process to
1969 speed up edge based data flow. We used to not unfactoring them again,
1970 which can seriously pessimize code with many computed jumps in the source
1971 code, such as interpreters. See e.g. PR15242. */
1973 void
1974 duplicate_computed_gotos (void)
1976 basic_block bb, new_bb;
1977 bitmap candidates;
1978 int max_size;
1980 if (n_basic_blocks <= 1)
1981 return;
1983 if (targetm.cannot_modify_jumps_p ())
1984 return;
1986 timevar_push (TV_REORDER_BLOCKS);
1988 cfg_layout_initialize (0);
1990 /* We are estimating the length of uncond jump insn only once
1991 since the code for getting the insn length always returns
1992 the minimal length now. */
1993 if (uncond_jump_length == 0)
1994 uncond_jump_length = get_uncond_jump_length ();
1996 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
1997 candidates = BITMAP_ALLOC (NULL);
1999 /* Look for blocks that end in a computed jump, and see if such blocks
2000 are suitable for unfactoring. If a block is a candidate for unfactoring,
2001 mark it in the candidates. */
2002 FOR_EACH_BB (bb)
2004 rtx insn;
2005 edge e;
2006 edge_iterator ei;
2007 int size, all_flags;
2009 /* Build the reorder chain for the original order of blocks. */
2010 if (bb->next_bb != EXIT_BLOCK_PTR)
2011 bb->aux = bb->next_bb;
2013 /* Obviously the block has to end in a computed jump. */
2014 if (!computed_jump_p (BB_END (bb)))
2015 continue;
2017 /* Only consider blocks that can be duplicated. */
2018 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2019 || !can_duplicate_block_p (bb))
2020 continue;
2022 /* Make sure that the block is small enough. */
2023 size = 0;
2024 FOR_BB_INSNS (bb, insn)
2025 if (INSN_P (insn))
2027 size += get_attr_length (insn);
2028 if (size > max_size)
2029 break;
2031 if (size > max_size)
2032 continue;
2034 /* Final check: there must not be any incoming abnormal edges. */
2035 all_flags = 0;
2036 FOR_EACH_EDGE (e, ei, bb->preds)
2037 all_flags |= e->flags;
2038 if (all_flags & EDGE_COMPLEX)
2039 continue;
2041 bitmap_set_bit (candidates, bb->index);
2044 /* Nothing to do if there is no computed jump here. */
2045 if (bitmap_empty_p (candidates))
2046 goto done;
2048 /* Duplicate computed gotos. */
2049 FOR_EACH_BB (bb)
2051 if (bb->il.rtl->visited)
2052 continue;
2054 bb->il.rtl->visited = 1;
2056 /* BB must have one outgoing edge. That edge must not lead to
2057 the exit block or the next block.
2058 The destination must have more than one predecessor. */
2059 if (!single_succ_p (bb)
2060 || single_succ (bb) == EXIT_BLOCK_PTR
2061 || single_succ (bb) == bb->next_bb
2062 || single_pred_p (single_succ (bb)))
2063 continue;
2065 /* The successor block has to be a duplication candidate. */
2066 if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2067 continue;
2069 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb));
2070 new_bb->aux = bb->aux;
2071 bb->aux = new_bb;
2072 new_bb->il.rtl->visited = 1;
2075 done:
2076 cfg_layout_finalize ();
2078 BITMAP_FREE (candidates);
2080 timevar_pop (TV_REORDER_BLOCKS);
2083 /* This function is the main 'entrance' for the optimization that
2084 partitions hot and cold basic blocks into separate sections of the
2085 .o file (to improve performance and cache locality). Ideally it
2086 would be called after all optimizations that rearrange the CFG have
2087 been called. However part of this optimization may introduce new
2088 register usage, so it must be called before register allocation has
2089 occurred. This means that this optimization is actually called
2090 well before the optimization that reorders basic blocks (see
2091 function above).
2093 This optimization checks the feedback information to determine
2094 which basic blocks are hot/cold, updates flags on the basic blocks
2095 to indicate which section they belong in. This information is
2096 later used for writing out sections in the .o file. Because hot
2097 and cold sections can be arbitrarily large (within the bounds of
2098 memory), far beyond the size of a single function, it is necessary
2099 to fix up all edges that cross section boundaries, to make sure the
2100 instructions used can actually span the required distance. The
2101 fixes are described below.
2103 Fall-through edges must be changed into jumps; it is not safe or
2104 legal to fall through across a section boundary. Whenever a
2105 fall-through edge crossing a section boundary is encountered, a new
2106 basic block is inserted (in the same section as the fall-through
2107 source), and the fall through edge is redirected to the new basic
2108 block. The new basic block contains an unconditional jump to the
2109 original fall-through target. (If the unconditional jump is
2110 insufficient to cross section boundaries, that is dealt with a
2111 little later, see below).
2113 In order to deal with architectures that have short conditional
2114 branches (which cannot span all of memory) we take any conditional
2115 jump that attempts to cross a section boundary and add a level of
2116 indirection: it becomes a conditional jump to a new basic block, in
2117 the same section. The new basic block contains an unconditional
2118 jump to the original target, in the other section.
2120 For those architectures whose unconditional branch is also
2121 incapable of reaching all of memory, those unconditional jumps are
2122 converted into indirect jumps, through a register.
2124 IMPORTANT NOTE: This optimization causes some messy interactions
2125 with the cfg cleanup optimizations; those optimizations want to
2126 merge blocks wherever possible, and to collapse indirect jump
2127 sequences (change "A jumps to B jumps to C" directly into "A jumps
2128 to C"). Those optimizations can undo the jump fixes that
2129 partitioning is required to make (see above), in order to ensure
2130 that jumps attempting to cross section boundaries are really able
2131 to cover whatever distance the jump requires (on many architectures
2132 conditional or unconditional jumps are not able to reach all of
2133 memory). Therefore tests have to be inserted into each such
2134 optimization to make sure that it does not undo stuff necessary to
2135 cross partition boundaries. This would be much less of a problem
2136 if we could perform this optimization later in the compilation, but
2137 unfortunately the fact that we may need to create indirect jumps
2138 (through registers) requires that this optimization be performed
2139 before register allocation. */
2141 void
2142 partition_hot_cold_basic_blocks (void)
2144 basic_block cur_bb;
2145 edge *crossing_edges;
2146 int n_crossing_edges;
2147 int max_edges = 2 * last_basic_block;
2149 if (n_basic_blocks <= 1)
2150 return;
2152 crossing_edges = xcalloc (max_edges, sizeof (edge));
2154 cfg_layout_initialize (0);
2156 FOR_EACH_BB (cur_bb)
2157 if (cur_bb->index >= 0
2158 && cur_bb->next_bb->index >= 0)
2159 cur_bb->aux = cur_bb->next_bb;
2161 find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges,
2162 &n_crossing_edges,
2163 &max_edges);
2165 if (n_crossing_edges > 0)
2166 fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
2168 free (crossing_edges);
2170 cfg_layout_finalize();