kernel - support dummy reallocblks in devfs
[dragonfly.git] / contrib / gcc-5.0 / gcc / bb-reorder.c
blob7d5073b024717a2fcf60a99512342ab30d4a6008
1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This (greedy) algorithm constructs traces in several rounds.
21 The construction starts from "seeds". The seed for the first round
22 is the entry point of the function. When there are more than one seed,
23 the one with the lowest key in the heap is selected first (see bb_to_key).
24 Then the algorithm repeatedly adds the most probable successor to the end
25 of a trace. Finally it connects the traces.
27 There are two parameters: Branch Threshold and Exec Threshold.
28 If the probability of an edge to a successor of the current basic block is
29 lower than Branch Threshold or its frequency is lower than Exec Threshold,
30 then the successor will be the seed in one of the next rounds.
31 Each round has these parameters lower than the previous one.
32 The last round has to have these parameters set to zero so that the
33 remaining blocks are picked up.
35 The algorithm selects the most probable successor from all unvisited
36 successors and successors that have been added to this trace.
37 The other successors (that has not been "sent" to the next round) will be
38 other seeds for this round and the secondary traces will start from them.
39 If the successor has not been visited in this trace, it is added to the
40 trace (however, there is some heuristic for simple branches).
41 If the successor has been visited in this trace, a loop has been found.
42 If the loop has many iterations, the loop is rotated so that the source
43 block of the most probable edge going out of the loop is the last block
44 of the trace.
45 If the loop has few iterations and there is no edge from the last block of
46 the loop going out of the loop, the loop header is duplicated.
48 When connecting traces, the algorithm first checks whether there is an edge
49 from the last block of a trace to the first block of another trace.
50 When there are still some unconnected traces it checks whether there exists
51 a basic block BB such that BB is a successor of the last block of a trace
52 and BB is a predecessor of the first block of another trace. In this case,
53 BB is duplicated, added at the end of the first trace and the traces are
54 connected through it.
55 The rest of traces are simply connected so there will be a jump to the
56 beginning of the rest of traces.
58 The above description is for the full algorithm, which is used when the
59 function is optimized for speed. When the function is optimized for size,
60 in order to reduce long jumps and connect more fallthru edges, the
61 algorithm is modified as follows:
62 (1) Break long traces to short ones. A trace is broken at a block that has
63 multiple predecessors/ successors during trace discovery. When connecting
64 traces, only connect Trace n with Trace n + 1. This change reduces most
65 long jumps compared with the above algorithm.
66 (2) Ignore the edge probability and frequency for fallthru edges.
67 (3) Keep the original order of blocks when there is no chance to fall
68 through. We rely on the results of cfg_cleanup.
70 To implement the change for code size optimization, block's index is
71 selected as the key and all traces are found in one round.
73 References:
75 "Software Trace Cache"
76 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
77 http://citeseer.nj.nec.com/15361.html
81 #include "config.h"
82 #include "system.h"
83 #include "coretypes.h"
84 #include "tm.h"
85 #include "hash-set.h"
86 #include "machmode.h"
87 #include "vec.h"
88 #include "double-int.h"
89 #include "input.h"
90 #include "alias.h"
91 #include "symtab.h"
92 #include "wide-int.h"
93 #include "inchash.h"
94 #include "tree.h"
95 #include "rtl.h"
96 #include "regs.h"
97 #include "flags.h"
98 #include "output.h"
99 #include "target.h"
100 #include "hashtab.h"
101 #include "hard-reg-set.h"
102 #include "function.h"
103 #include "tm_p.h"
104 #include "obstack.h"
105 #include "statistics.h"
106 #include "real.h"
107 #include "fixed-value.h"
108 #include "insn-config.h"
109 #include "expmed.h"
110 #include "dojump.h"
111 #include "explow.h"
112 #include "calls.h"
113 #include "emit-rtl.h"
114 #include "varasm.h"
115 #include "stmt.h"
116 #include "expr.h"
117 #include "optabs.h"
118 #include "params.h"
119 #include "diagnostic-core.h"
120 #include "toplev.h" /* user_defined_section_attribute */
121 #include "tree-pass.h"
122 #include "dominance.h"
123 #include "cfg.h"
124 #include "cfgrtl.h"
125 #include "cfganal.h"
126 #include "cfgbuild.h"
127 #include "cfgcleanup.h"
128 #include "predict.h"
129 #include "basic-block.h"
130 #include "df.h"
131 #include "bb-reorder.h"
132 #include "hash-map.h"
133 #include "is-a.h"
134 #include "plugin-api.h"
135 #include "ipa-ref.h"
136 #include "cgraph.h"
137 #include "except.h"
138 #include "fibonacci_heap.h"
140 /* The number of rounds. In most cases there will only be 4 rounds, but
141 when partitioning hot and cold basic blocks into separate sections of
142 the object file there will be an extra round. */
143 #define N_ROUNDS 5
145 /* Stubs in case we don't have a return insn.
146 We have to check at run time too, not only compile time. */
148 #ifndef HAVE_return
149 #define HAVE_return 0
150 #define gen_return() NULL_RTX
151 #endif
154 struct target_bb_reorder default_target_bb_reorder;
155 #if SWITCHABLE_TARGET
156 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
157 #endif
159 #define uncond_jump_length \
160 (this_target_bb_reorder->x_uncond_jump_length)
162 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
163 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
165 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
166 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
168 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
169 block the edge destination is not duplicated while connecting traces. */
170 #define DUPLICATION_THRESHOLD 100
172 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
173 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
175 /* Structure to hold needed information for each basic block. */
176 typedef struct bbro_basic_block_data_def
178 /* Which trace is the bb start of (-1 means it is not a start of any). */
179 int start_of_trace;
181 /* Which trace is the bb end of (-1 means it is not an end of any). */
182 int end_of_trace;
184 /* Which trace is the bb in? */
185 int in_trace;
187 /* Which trace was this bb visited in? */
188 int visited;
190 /* Cached maximum frequency of interesting incoming edges.
191 Minus one means not yet computed. */
192 int priority;
194 /* Which heap is BB in (if any)? */
195 bb_heap_t *heap;
197 /* Which heap node is BB in (if any)? */
198 bb_heap_node_t *node;
199 } bbro_basic_block_data;
201 /* The current size of the following dynamic array. */
202 static int array_size;
204 /* The array which holds needed information for basic blocks. */
205 static bbro_basic_block_data *bbd;
207 /* To avoid frequent reallocation the size of arrays is greater than needed,
208 the number of elements is (not less than) 1.25 * size_wanted. */
209 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
211 /* Free the memory and set the pointer to NULL. */
212 #define FREE(P) (gcc_assert (P), free (P), P = 0)
214 /* Structure for holding information about a trace. */
215 struct trace
217 /* First and last basic block of the trace. */
218 basic_block first, last;
220 /* The round of the STC creation which this trace was found in. */
221 int round;
223 /* The length (i.e. the number of basic blocks) of the trace. */
224 int length;
227 /* Maximum frequency and count of one of the entry blocks. */
228 static int max_entry_frequency;
229 static gcov_type max_entry_count;
231 /* Local function prototypes. */
232 static void find_traces (int *, struct trace *);
233 static basic_block rotate_loop (edge, struct trace *, int);
234 static void mark_bb_visited (basic_block, int);
235 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
236 int, bb_heap_t **, int);
237 static basic_block copy_bb (basic_block, edge, basic_block, int);
238 static long bb_to_key (basic_block);
239 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
240 const_edge);
241 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
242 struct trace *);
243 static void connect_traces (int, struct trace *);
244 static bool copy_bb_p (const_basic_block, int);
245 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
247 /* Return the trace number in which BB was visited. */
249 static int
250 bb_visited_trace (const_basic_block bb)
252 gcc_assert (bb->index < array_size);
253 return bbd[bb->index].visited;
256 /* This function marks BB that it was visited in trace number TRACE. */
258 static void
259 mark_bb_visited (basic_block bb, int trace)
261 bbd[bb->index].visited = trace;
262 if (bbd[bb->index].heap)
264 bbd[bb->index].heap->delete_node (bbd[bb->index].node);
265 bbd[bb->index].heap = NULL;
266 bbd[bb->index].node = NULL;
270 /* Check to see if bb should be pushed into the next round of trace
271 collections or not. Reasons for pushing the block forward are 1).
272 If the block is cold, we are doing partitioning, and there will be
273 another round (cold partition blocks are not supposed to be
274 collected into traces until the very last round); or 2). There will
275 be another round, and the basic block is not "hot enough" for the
276 current round of trace collection. */
278 static bool
279 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
280 int exec_th, gcov_type count_th)
282 bool there_exists_another_round;
283 bool block_not_hot_enough;
285 there_exists_another_round = round < number_of_rounds - 1;
287 block_not_hot_enough = (bb->frequency < exec_th
288 || bb->count < count_th
289 || probably_never_executed_bb_p (cfun, bb));
291 if (there_exists_another_round
292 && block_not_hot_enough)
293 return true;
294 else
295 return false;
298 /* Find the traces for Software Trace Cache. Chain each trace through
299 RBI()->next. Store the number of traces to N_TRACES and description of
300 traces to TRACES. */
302 static void
303 find_traces (int *n_traces, struct trace *traces)
305 int i;
306 int number_of_rounds;
307 edge e;
308 edge_iterator ei;
309 bb_heap_t *heap = new bb_heap_t (LONG_MIN);
311 /* Add one extra round of trace collection when partitioning hot/cold
312 basic blocks into separate sections. The last round is for all the
313 cold blocks (and ONLY the cold blocks). */
315 number_of_rounds = N_ROUNDS - 1;
317 /* Insert entry points of function into heap. */
318 max_entry_frequency = 0;
319 max_entry_count = 0;
320 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
322 bbd[e->dest->index].heap = heap;
323 bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
324 if (e->dest->frequency > max_entry_frequency)
325 max_entry_frequency = e->dest->frequency;
326 if (e->dest->count > max_entry_count)
327 max_entry_count = e->dest->count;
330 /* Find the traces. */
331 for (i = 0; i < number_of_rounds; i++)
333 gcov_type count_threshold;
335 if (dump_file)
336 fprintf (dump_file, "STC - round %d\n", i + 1);
338 if (max_entry_count < INT_MAX / 1000)
339 count_threshold = max_entry_count * exec_threshold[i] / 1000;
340 else
341 count_threshold = max_entry_count / 1000 * exec_threshold[i];
343 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
344 max_entry_frequency * exec_threshold[i] / 1000,
345 count_threshold, traces, n_traces, i, &heap,
346 number_of_rounds);
348 delete heap;
350 if (dump_file)
352 for (i = 0; i < *n_traces; i++)
354 basic_block bb;
355 fprintf (dump_file, "Trace %d (round %d): ", i + 1,
356 traces[i].round + 1);
357 for (bb = traces[i].first;
358 bb != traces[i].last;
359 bb = (basic_block) bb->aux)
360 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
361 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
363 fflush (dump_file);
367 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
368 (with sequential number TRACE_N). */
370 static basic_block
371 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
373 basic_block bb;
375 /* Information about the best end (end after rotation) of the loop. */
376 basic_block best_bb = NULL;
377 edge best_edge = NULL;
378 int best_freq = -1;
379 gcov_type best_count = -1;
380 /* The best edge is preferred when its destination is not visited yet
381 or is a start block of some trace. */
382 bool is_preferred = false;
384 /* Find the most frequent edge that goes out from current trace. */
385 bb = back_edge->dest;
388 edge e;
389 edge_iterator ei;
391 FOR_EACH_EDGE (e, ei, bb->succs)
392 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
393 && bb_visited_trace (e->dest) != trace_n
394 && (e->flags & EDGE_CAN_FALLTHRU)
395 && !(e->flags & EDGE_COMPLEX))
397 if (is_preferred)
399 /* The best edge is preferred. */
400 if (!bb_visited_trace (e->dest)
401 || bbd[e->dest->index].start_of_trace >= 0)
403 /* The current edge E is also preferred. */
404 int freq = EDGE_FREQUENCY (e);
405 if (freq > best_freq || e->count > best_count)
407 best_freq = freq;
408 best_count = e->count;
409 best_edge = e;
410 best_bb = bb;
414 else
416 if (!bb_visited_trace (e->dest)
417 || bbd[e->dest->index].start_of_trace >= 0)
419 /* The current edge E is preferred. */
420 is_preferred = true;
421 best_freq = EDGE_FREQUENCY (e);
422 best_count = e->count;
423 best_edge = e;
424 best_bb = bb;
426 else
428 int freq = EDGE_FREQUENCY (e);
429 if (!best_edge || freq > best_freq || e->count > best_count)
431 best_freq = freq;
432 best_count = e->count;
433 best_edge = e;
434 best_bb = bb;
439 bb = (basic_block) bb->aux;
441 while (bb != back_edge->dest);
443 if (best_bb)
445 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
446 the trace. */
447 if (back_edge->dest == trace->first)
449 trace->first = (basic_block) best_bb->aux;
451 else
453 basic_block prev_bb;
455 for (prev_bb = trace->first;
456 prev_bb->aux != back_edge->dest;
457 prev_bb = (basic_block) prev_bb->aux)
459 prev_bb->aux = best_bb->aux;
461 /* Try to get rid of uncond jump to cond jump. */
462 if (single_succ_p (prev_bb))
464 basic_block header = single_succ (prev_bb);
466 /* Duplicate HEADER if it is a small block containing cond jump
467 in the end. */
468 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
469 && !CROSSING_JUMP_P (BB_END (header)))
470 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
474 else
476 /* We have not found suitable loop tail so do no rotation. */
477 best_bb = back_edge->src;
479 best_bb->aux = NULL;
480 return best_bb;
483 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
484 not include basic blocks whose probability is lower than BRANCH_TH or whose
485 frequency is lower than EXEC_TH into traces (or whose count is lower than
486 COUNT_TH). Store the new traces into TRACES and modify the number of
487 traces *N_TRACES. Set the round (which the trace belongs to) to ROUND.
488 The function expects starting basic blocks to be in *HEAP and will delete
489 *HEAP and store starting points for the next round into new *HEAP. */
491 static void
492 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
493 struct trace *traces, int *n_traces, int round,
494 bb_heap_t **heap, int number_of_rounds)
496 /* Heap for discarded basic blocks which are possible starting points for
497 the next round. */
498 bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
499 bool for_size = optimize_function_for_size_p (cfun);
501 while (!(*heap)->empty ())
503 basic_block bb;
504 struct trace *trace;
505 edge best_edge, e;
506 long key;
507 edge_iterator ei;
509 bb = (*heap)->extract_min ();
510 bbd[bb->index].heap = NULL;
511 bbd[bb->index].node = NULL;
513 if (dump_file)
514 fprintf (dump_file, "Getting bb %d\n", bb->index);
516 /* If the BB's frequency is too low, send BB to the next round. When
517 partitioning hot/cold blocks into separate sections, make sure all
518 the cold blocks (and ONLY the cold blocks) go into the (extra) final
519 round. When optimizing for size, do not push to next round. */
521 if (!for_size
522 && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
523 count_th))
525 int key = bb_to_key (bb);
526 bbd[bb->index].heap = new_heap;
527 bbd[bb->index].node = new_heap->insert (key, bb);
529 if (dump_file)
530 fprintf (dump_file,
531 " Possible start point of next round: %d (key: %d)\n",
532 bb->index, key);
533 continue;
536 trace = traces + *n_traces;
537 trace->first = bb;
538 trace->round = round;
539 trace->length = 0;
540 bbd[bb->index].in_trace = *n_traces;
541 (*n_traces)++;
545 int prob, freq;
546 bool ends_in_call;
548 /* The probability and frequency of the best edge. */
549 int best_prob = INT_MIN / 2;
550 int best_freq = INT_MIN / 2;
552 best_edge = NULL;
553 mark_bb_visited (bb, *n_traces);
554 trace->length++;
556 if (dump_file)
557 fprintf (dump_file, "Basic block %d was visited in trace %d\n",
558 bb->index, *n_traces - 1);
560 ends_in_call = block_ends_with_call_p (bb);
562 /* Select the successor that will be placed after BB. */
563 FOR_EACH_EDGE (e, ei, bb->succs)
565 gcc_assert (!(e->flags & EDGE_FAKE));
567 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
568 continue;
570 if (bb_visited_trace (e->dest)
571 && bb_visited_trace (e->dest) != *n_traces)
572 continue;
574 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
575 continue;
577 prob = e->probability;
578 freq = e->dest->frequency;
580 /* The only sensible preference for a call instruction is the
581 fallthru edge. Don't bother selecting anything else. */
582 if (ends_in_call)
584 if (e->flags & EDGE_CAN_FALLTHRU)
586 best_edge = e;
587 best_prob = prob;
588 best_freq = freq;
590 continue;
593 /* Edge that cannot be fallthru or improbable or infrequent
594 successor (i.e. it is unsuitable successor). When optimizing
595 for size, ignore the probability and frequency. */
596 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
597 || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
598 || e->count < count_th) && (!for_size)))
599 continue;
601 /* If partitioning hot/cold basic blocks, don't consider edges
602 that cross section boundaries. */
604 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
605 best_edge))
607 best_edge = e;
608 best_prob = prob;
609 best_freq = freq;
613 /* If the best destination has multiple predecessors, and can be
614 duplicated cheaper than a jump, don't allow it to be added
615 to a trace. We'll duplicate it when connecting traces. */
616 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
617 && copy_bb_p (best_edge->dest, 0))
618 best_edge = NULL;
620 /* If the best destination has multiple successors or predecessors,
621 don't allow it to be added when optimizing for size. This makes
622 sure predecessors with smaller index are handled before the best
623 destinarion. It breaks long trace and reduces long jumps.
625 Take if-then-else as an example.
631 If we do not remove the best edge B->D/C->D, the final order might
632 be A B D ... C. C is at the end of the program. If D's successors
633 and D are complicated, might need long jumps for A->C and C->D.
634 Similar issue for order: A C D ... B.
636 After removing the best edge, the final result will be ABCD/ ACBD.
637 It does not add jump compared with the previous order. But it
638 reduces the possibility of long jumps. */
639 if (best_edge && for_size
640 && (EDGE_COUNT (best_edge->dest->succs) > 1
641 || EDGE_COUNT (best_edge->dest->preds) > 1))
642 best_edge = NULL;
644 /* Add all non-selected successors to the heaps. */
645 FOR_EACH_EDGE (e, ei, bb->succs)
647 if (e == best_edge
648 || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
649 || bb_visited_trace (e->dest))
650 continue;
652 key = bb_to_key (e->dest);
654 if (bbd[e->dest->index].heap)
656 /* E->DEST is already in some heap. */
657 if (key != bbd[e->dest->index].node->get_key ())
659 if (dump_file)
661 fprintf (dump_file,
662 "Changing key for bb %d from %ld to %ld.\n",
663 e->dest->index,
664 (long) bbd[e->dest->index].node->get_key (),
665 key);
667 bbd[e->dest->index].heap->replace_key
668 (bbd[e->dest->index].node, key);
671 else
673 bb_heap_t *which_heap = *heap;
675 prob = e->probability;
676 freq = EDGE_FREQUENCY (e);
678 if (!(e->flags & EDGE_CAN_FALLTHRU)
679 || (e->flags & EDGE_COMPLEX)
680 || prob < branch_th || freq < exec_th
681 || e->count < count_th)
683 /* When partitioning hot/cold basic blocks, make sure
684 the cold blocks (and only the cold blocks) all get
685 pushed to the last round of trace collection. When
686 optimizing for size, do not push to next round. */
688 if (!for_size && push_to_next_round_p (e->dest, round,
689 number_of_rounds,
690 exec_th, count_th))
691 which_heap = new_heap;
694 bbd[e->dest->index].heap = which_heap;
695 bbd[e->dest->index].node = which_heap->insert (key, e->dest);
697 if (dump_file)
699 fprintf (dump_file,
700 " Possible start of %s round: %d (key: %ld)\n",
701 (which_heap == new_heap) ? "next" : "this",
702 e->dest->index, (long) key);
708 if (best_edge) /* Suitable successor was found. */
710 if (bb_visited_trace (best_edge->dest) == *n_traces)
712 /* We do nothing with one basic block loops. */
713 if (best_edge->dest != bb)
715 if (EDGE_FREQUENCY (best_edge)
716 > 4 * best_edge->dest->frequency / 5)
718 /* The loop has at least 4 iterations. If the loop
719 header is not the first block of the function
720 we can rotate the loop. */
722 if (best_edge->dest
723 != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
725 if (dump_file)
727 fprintf (dump_file,
728 "Rotating loop %d - %d\n",
729 best_edge->dest->index, bb->index);
731 bb->aux = best_edge->dest;
732 bbd[best_edge->dest->index].in_trace =
733 (*n_traces) - 1;
734 bb = rotate_loop (best_edge, trace, *n_traces);
737 else
739 /* The loop has less than 4 iterations. */
741 if (single_succ_p (bb)
742 && copy_bb_p (best_edge->dest,
743 optimize_edge_for_speed_p
744 (best_edge)))
746 bb = copy_bb (best_edge->dest, best_edge, bb,
747 *n_traces);
748 trace->length++;
753 /* Terminate the trace. */
754 break;
756 else
758 /* Check for a situation
766 where
767 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
768 >= EDGE_FREQUENCY (AC).
769 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
770 Best ordering is then A B C.
772 When optimizing for size, A B C is always the best order.
774 This situation is created for example by:
776 if (A) B;
781 FOR_EACH_EDGE (e, ei, bb->succs)
782 if (e != best_edge
783 && (e->flags & EDGE_CAN_FALLTHRU)
784 && !(e->flags & EDGE_COMPLEX)
785 && !bb_visited_trace (e->dest)
786 && single_pred_p (e->dest)
787 && !(e->flags & EDGE_CROSSING)
788 && single_succ_p (e->dest)
789 && (single_succ_edge (e->dest)->flags
790 & EDGE_CAN_FALLTHRU)
791 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
792 && single_succ (e->dest) == best_edge->dest
793 && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
794 || for_size))
796 best_edge = e;
797 if (dump_file)
798 fprintf (dump_file, "Selecting BB %d\n",
799 best_edge->dest->index);
800 break;
803 bb->aux = best_edge->dest;
804 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
805 bb = best_edge->dest;
809 while (best_edge);
810 trace->last = bb;
811 bbd[trace->first->index].start_of_trace = *n_traces - 1;
812 if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
814 bbd[trace->last->index].end_of_trace = *n_traces - 1;
815 /* Update the cached maximum frequency for interesting predecessor
816 edges for successors of the new trace end. */
817 FOR_EACH_EDGE (e, ei, trace->last->succs)
818 if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
819 bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
822 /* The trace is terminated so we have to recount the keys in heap
823 (some block can have a lower key because now one of its predecessors
824 is an end of the trace). */
825 FOR_EACH_EDGE (e, ei, bb->succs)
827 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
828 || bb_visited_trace (e->dest))
829 continue;
831 if (bbd[e->dest->index].heap)
833 key = bb_to_key (e->dest);
834 if (key != bbd[e->dest->index].node->get_key ())
836 if (dump_file)
838 fprintf (dump_file,
839 "Changing key for bb %d from %ld to %ld.\n",
840 e->dest->index,
841 (long) bbd[e->dest->index].node->get_key (), key);
843 bbd[e->dest->index].heap->replace_key
844 (bbd[e->dest->index].node, key);
850 delete (*heap);
852 /* "Return" the new heap. */
853 *heap = new_heap;
856 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
857 it to trace after BB, mark OLD_BB visited and update pass' data structures
858 (TRACE is a number of trace which OLD_BB is duplicated to). */
860 static basic_block
861 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
863 basic_block new_bb;
865 new_bb = duplicate_block (old_bb, e, bb);
866 BB_COPY_PARTITION (new_bb, old_bb);
868 gcc_assert (e->dest == new_bb);
870 if (dump_file)
871 fprintf (dump_file,
872 "Duplicated bb %d (created bb %d)\n",
873 old_bb->index, new_bb->index);
875 if (new_bb->index >= array_size
876 || last_basic_block_for_fn (cfun) > array_size)
878 int i;
879 int new_size;
881 new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
882 new_size = GET_ARRAY_SIZE (new_size);
883 bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
884 for (i = array_size; i < new_size; i++)
886 bbd[i].start_of_trace = -1;
887 bbd[i].end_of_trace = -1;
888 bbd[i].in_trace = -1;
889 bbd[i].visited = 0;
890 bbd[i].priority = -1;
891 bbd[i].heap = NULL;
892 bbd[i].node = NULL;
894 array_size = new_size;
896 if (dump_file)
898 fprintf (dump_file,
899 "Growing the dynamic array to %d elements.\n",
900 array_size);
904 gcc_assert (!bb_visited_trace (e->dest));
905 mark_bb_visited (new_bb, trace);
906 new_bb->aux = bb->aux;
907 bb->aux = new_bb;
909 bbd[new_bb->index].in_trace = trace;
911 return new_bb;
914 /* Compute and return the key (for the heap) of the basic block BB. */
916 static long
917 bb_to_key (basic_block bb)
919 edge e;
920 edge_iterator ei;
922 /* Use index as key to align with its original order. */
923 if (optimize_function_for_size_p (cfun))
924 return bb->index;
926 /* Do not start in probably never executed blocks. */
928 if (BB_PARTITION (bb) == BB_COLD_PARTITION
929 || probably_never_executed_bb_p (cfun, bb))
930 return BB_FREQ_MAX;
932 /* Prefer blocks whose predecessor is an end of some trace
933 or whose predecessor edge is EDGE_DFS_BACK. */
934 int priority = bbd[bb->index].priority;
935 if (priority == -1)
937 priority = 0;
938 FOR_EACH_EDGE (e, ei, bb->preds)
940 if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
941 && bbd[e->src->index].end_of_trace >= 0)
942 || (e->flags & EDGE_DFS_BACK))
944 int edge_freq = EDGE_FREQUENCY (e);
946 if (edge_freq > priority)
947 priority = edge_freq;
950 bbd[bb->index].priority = priority;
953 if (priority)
954 /* The block with priority should have significantly lower key. */
955 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
957 return -bb->frequency;
960 /* Return true when the edge E from basic block BB is better than the temporary
961 best edge (details are in function). The probability of edge E is PROB. The
962 frequency of the successor is FREQ. The current best probability is
963 BEST_PROB, the best frequency is BEST_FREQ.
964 The edge is considered to be equivalent when PROB does not differ much from
965 BEST_PROB; similarly for frequency. */
967 static bool
968 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
969 int best_prob, int best_freq, const_edge cur_best_edge)
971 bool is_better_edge;
973 /* The BEST_* values do not have to be best, but can be a bit smaller than
974 maximum values. */
975 int diff_prob = best_prob / 10;
976 int diff_freq = best_freq / 10;
978 /* The smaller one is better to keep the original order. */
979 if (optimize_function_for_size_p (cfun))
980 return !cur_best_edge
981 || cur_best_edge->dest->index > e->dest->index;
983 if (prob > best_prob + diff_prob)
984 /* The edge has higher probability than the temporary best edge. */
985 is_better_edge = true;
986 else if (prob < best_prob - diff_prob)
987 /* The edge has lower probability than the temporary best edge. */
988 is_better_edge = false;
989 else if (freq < best_freq - diff_freq)
990 /* The edge and the temporary best edge have almost equivalent
991 probabilities. The higher frequency of a successor now means
992 that there is another edge going into that successor.
993 This successor has lower frequency so it is better. */
994 is_better_edge = true;
995 else if (freq > best_freq + diff_freq)
996 /* This successor has higher frequency so it is worse. */
997 is_better_edge = false;
998 else if (e->dest->prev_bb == bb)
999 /* The edges have equivalent probabilities and the successors
1000 have equivalent frequencies. Select the previous successor. */
1001 is_better_edge = true;
1002 else
1003 is_better_edge = false;
1005 /* If we are doing hot/cold partitioning, make sure that we always favor
1006 non-crossing edges over crossing edges. */
1008 if (!is_better_edge
1009 && flag_reorder_blocks_and_partition
1010 && cur_best_edge
1011 && (cur_best_edge->flags & EDGE_CROSSING)
1012 && !(e->flags & EDGE_CROSSING))
1013 is_better_edge = true;
1015 return is_better_edge;
1018 /* Return true when the edge E is better than the temporary best edge
1019 CUR_BEST_EDGE. If SRC_INDEX_P is true, the function compares the src bb of
1020 E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1021 BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1022 TRACES record the information about traces.
1023 When optimizing for size, the edge with smaller index is better.
1024 When optimizing for speed, the edge with bigger probability or longer trace
1025 is better. */
1027 static bool
1028 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1029 const_edge cur_best_edge, struct trace *traces)
1031 int e_index;
1032 int b_index;
1033 bool is_better_edge;
1035 if (!cur_best_edge)
1036 return true;
1038 if (optimize_function_for_size_p (cfun))
1040 e_index = src_index_p ? e->src->index : e->dest->index;
1041 b_index = src_index_p ? cur_best_edge->src->index
1042 : cur_best_edge->dest->index;
1043 /* The smaller one is better to keep the original order. */
1044 return b_index > e_index;
1047 if (src_index_p)
1049 e_index = e->src->index;
1051 if (e->probability > cur_best_edge->probability)
1052 /* The edge has higher probability than the temporary best edge. */
1053 is_better_edge = true;
1054 else if (e->probability < cur_best_edge->probability)
1055 /* The edge has lower probability than the temporary best edge. */
1056 is_better_edge = false;
1057 else if (traces[bbd[e_index].end_of_trace].length > best_len)
1058 /* The edge and the temporary best edge have equivalent probabilities.
1059 The edge with longer trace is better. */
1060 is_better_edge = true;
1061 else
1062 is_better_edge = false;
1064 else
1066 e_index = e->dest->index;
1068 if (e->probability > cur_best_edge->probability)
1069 /* The edge has higher probability than the temporary best edge. */
1070 is_better_edge = true;
1071 else if (e->probability < cur_best_edge->probability)
1072 /* The edge has lower probability than the temporary best edge. */
1073 is_better_edge = false;
1074 else if (traces[bbd[e_index].start_of_trace].length > best_len)
1075 /* The edge and the temporary best edge have equivalent probabilities.
1076 The edge with longer trace is better. */
1077 is_better_edge = true;
1078 else
1079 is_better_edge = false;
1082 return is_better_edge;
1085 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
1087 static void
1088 connect_traces (int n_traces, struct trace *traces)
1090 int i;
1091 bool *connected;
1092 bool two_passes;
1093 int last_trace;
1094 int current_pass;
1095 int current_partition;
1096 int freq_threshold;
1097 gcov_type count_threshold;
1098 bool for_size = optimize_function_for_size_p (cfun);
1100 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1101 if (max_entry_count < INT_MAX / 1000)
1102 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1103 else
1104 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1106 connected = XCNEWVEC (bool, n_traces);
1107 last_trace = -1;
1108 current_pass = 1;
1109 current_partition = BB_PARTITION (traces[0].first);
1110 two_passes = false;
1112 if (crtl->has_bb_partition)
1113 for (i = 0; i < n_traces && !two_passes; i++)
1114 if (BB_PARTITION (traces[0].first)
1115 != BB_PARTITION (traces[i].first))
1116 two_passes = true;
1118 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1120 int t = i;
1121 int t2;
1122 edge e, best;
1123 int best_len;
1125 if (i >= n_traces)
1127 gcc_assert (two_passes && current_pass == 1);
1128 i = 0;
1129 t = i;
1130 current_pass = 2;
1131 if (current_partition == BB_HOT_PARTITION)
1132 current_partition = BB_COLD_PARTITION;
1133 else
1134 current_partition = BB_HOT_PARTITION;
1137 if (connected[t])
1138 continue;
1140 if (two_passes
1141 && BB_PARTITION (traces[t].first) != current_partition)
1142 continue;
1144 connected[t] = true;
1146 /* Find the predecessor traces. */
1147 for (t2 = t; t2 > 0;)
1149 edge_iterator ei;
1150 best = NULL;
1151 best_len = 0;
1152 FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1154 int si = e->src->index;
1156 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1157 && (e->flags & EDGE_CAN_FALLTHRU)
1158 && !(e->flags & EDGE_COMPLEX)
1159 && bbd[si].end_of_trace >= 0
1160 && !connected[bbd[si].end_of_trace]
1161 && (BB_PARTITION (e->src) == current_partition)
1162 && connect_better_edge_p (e, true, best_len, best, traces))
1164 best = e;
1165 best_len = traces[bbd[si].end_of_trace].length;
1168 if (best)
1170 best->src->aux = best->dest;
1171 t2 = bbd[best->src->index].end_of_trace;
1172 connected[t2] = true;
1174 if (dump_file)
1176 fprintf (dump_file, "Connection: %d %d\n",
1177 best->src->index, best->dest->index);
1180 else
1181 break;
1184 if (last_trace >= 0)
1185 traces[last_trace].last->aux = traces[t2].first;
1186 last_trace = t;
1188 /* Find the successor traces. */
1189 while (1)
1191 /* Find the continuation of the chain. */
1192 edge_iterator ei;
1193 best = NULL;
1194 best_len = 0;
1195 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1197 int di = e->dest->index;
1199 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1200 && (e->flags & EDGE_CAN_FALLTHRU)
1201 && !(e->flags & EDGE_COMPLEX)
1202 && bbd[di].start_of_trace >= 0
1203 && !connected[bbd[di].start_of_trace]
1204 && (BB_PARTITION (e->dest) == current_partition)
1205 && connect_better_edge_p (e, false, best_len, best, traces))
1207 best = e;
1208 best_len = traces[bbd[di].start_of_trace].length;
1212 if (for_size)
1214 if (!best)
1215 /* Stop finding the successor traces. */
1216 break;
1218 /* It is OK to connect block n with block n + 1 or a block
1219 before n. For others, only connect to the loop header. */
1220 if (best->dest->index > (traces[t].last->index + 1))
1222 int count = EDGE_COUNT (best->dest->preds);
1224 FOR_EACH_EDGE (e, ei, best->dest->preds)
1225 if (e->flags & EDGE_DFS_BACK)
1226 count--;
1228 /* If dest has multiple predecessors, skip it. We expect
1229 that one predecessor with smaller index connects with it
1230 later. */
1231 if (count != 1)
1232 break;
1235 /* Only connect Trace n with Trace n + 1. It is conservative
1236 to keep the order as close as possible to the original order.
1237 It also helps to reduce long jumps. */
1238 if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1239 break;
1241 if (dump_file)
1242 fprintf (dump_file, "Connection: %d %d\n",
1243 best->src->index, best->dest->index);
1245 t = bbd[best->dest->index].start_of_trace;
1246 traces[last_trace].last->aux = traces[t].first;
1247 connected[t] = true;
1248 last_trace = t;
1250 else if (best)
1252 if (dump_file)
1254 fprintf (dump_file, "Connection: %d %d\n",
1255 best->src->index, best->dest->index);
1257 t = bbd[best->dest->index].start_of_trace;
1258 traces[last_trace].last->aux = traces[t].first;
1259 connected[t] = true;
1260 last_trace = t;
1262 else
1264 /* Try to connect the traces by duplication of 1 block. */
1265 edge e2;
1266 basic_block next_bb = NULL;
1267 bool try_copy = false;
1269 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1270 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1271 && (e->flags & EDGE_CAN_FALLTHRU)
1272 && !(e->flags & EDGE_COMPLEX)
1273 && (!best || e->probability > best->probability))
1275 edge_iterator ei;
1276 edge best2 = NULL;
1277 int best2_len = 0;
1279 /* If the destination is a start of a trace which is only
1280 one block long, then no need to search the successor
1281 blocks of the trace. Accept it. */
1282 if (bbd[e->dest->index].start_of_trace >= 0
1283 && traces[bbd[e->dest->index].start_of_trace].length
1284 == 1)
1286 best = e;
1287 try_copy = true;
1288 continue;
1291 FOR_EACH_EDGE (e2, ei, e->dest->succs)
1293 int di = e2->dest->index;
1295 if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1296 || ((e2->flags & EDGE_CAN_FALLTHRU)
1297 && !(e2->flags & EDGE_COMPLEX)
1298 && bbd[di].start_of_trace >= 0
1299 && !connected[bbd[di].start_of_trace]
1300 && BB_PARTITION (e2->dest) == current_partition
1301 && EDGE_FREQUENCY (e2) >= freq_threshold
1302 && e2->count >= count_threshold
1303 && (!best2
1304 || e2->probability > best2->probability
1305 || (e2->probability == best2->probability
1306 && traces[bbd[di].start_of_trace].length
1307 > best2_len))))
1309 best = e;
1310 best2 = e2;
1311 if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1312 best2_len = traces[bbd[di].start_of_trace].length;
1313 else
1314 best2_len = INT_MAX;
1315 next_bb = e2->dest;
1316 try_copy = true;
1321 if (crtl->has_bb_partition)
1322 try_copy = false;
1324 /* Copy tiny blocks always; copy larger blocks only when the
1325 edge is traversed frequently enough. */
1326 if (try_copy
1327 && copy_bb_p (best->dest,
1328 optimize_edge_for_speed_p (best)
1329 && EDGE_FREQUENCY (best) >= freq_threshold
1330 && best->count >= count_threshold))
1332 basic_block new_bb;
1334 if (dump_file)
1336 fprintf (dump_file, "Connection: %d %d ",
1337 traces[t].last->index, best->dest->index);
1338 if (!next_bb)
1339 fputc ('\n', dump_file);
1340 else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1341 fprintf (dump_file, "exit\n");
1342 else
1343 fprintf (dump_file, "%d\n", next_bb->index);
1346 new_bb = copy_bb (best->dest, best, traces[t].last, t);
1347 traces[t].last = new_bb;
1348 if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1350 t = bbd[next_bb->index].start_of_trace;
1351 traces[last_trace].last->aux = traces[t].first;
1352 connected[t] = true;
1353 last_trace = t;
1355 else
1356 break; /* Stop finding the successor traces. */
1358 else
1359 break; /* Stop finding the successor traces. */
1364 if (dump_file)
1366 basic_block bb;
1368 fprintf (dump_file, "Final order:\n");
1369 for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1370 fprintf (dump_file, "%d ", bb->index);
1371 fprintf (dump_file, "\n");
1372 fflush (dump_file);
1375 FREE (connected);
1378 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1379 when code size is allowed to grow by duplication. */
1381 static bool
1382 copy_bb_p (const_basic_block bb, int code_may_grow)
1384 int size = 0;
1385 int max_size = uncond_jump_length;
1386 rtx_insn *insn;
1388 if (!bb->frequency)
1389 return false;
1390 if (EDGE_COUNT (bb->preds) < 2)
1391 return false;
1392 if (!can_duplicate_block_p (bb))
1393 return false;
1395 /* Avoid duplicating blocks which have many successors (PR/13430). */
1396 if (EDGE_COUNT (bb->succs) > 8)
1397 return false;
1399 if (code_may_grow && optimize_bb_for_speed_p (bb))
1400 max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1402 FOR_BB_INSNS (bb, insn)
1404 if (INSN_P (insn))
1405 size += get_attr_min_length (insn);
1408 if (size <= max_size)
1409 return true;
1411 if (dump_file)
1413 fprintf (dump_file,
1414 "Block %d can't be copied because its size = %d.\n",
1415 bb->index, size);
1418 return false;
1421 /* Return the length of unconditional jump instruction. */
1424 get_uncond_jump_length (void)
1426 rtx_insn *label, *jump;
1427 int length;
1429 start_sequence ();
1430 label = emit_label (gen_label_rtx ());
1431 jump = emit_jump_insn (gen_jump (label));
1432 length = get_attr_min_length (jump);
1433 end_sequence ();
1435 return length;
1438 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1439 Duplicate the landing pad and split the edges so that no EH edge
1440 crosses partitions. */
1442 static void
1443 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1445 eh_landing_pad new_lp;
1446 basic_block new_bb, last_bb, post_bb;
1447 rtx_insn *new_label, *jump;
1448 rtx post_label;
1449 unsigned new_partition;
1450 edge_iterator ei;
1451 edge e;
1453 /* Generate the new landing-pad structure. */
1454 new_lp = gen_eh_landing_pad (old_lp->region);
1455 new_lp->post_landing_pad = old_lp->post_landing_pad;
1456 new_lp->landing_pad = gen_label_rtx ();
1457 LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1459 /* Put appropriate instructions in new bb. */
1460 new_label = emit_label (new_lp->landing_pad);
1462 expand_dw2_landing_pad_for_region (old_lp->region);
1464 post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1465 post_bb = single_succ (post_bb);
1466 post_label = block_label (post_bb);
1467 jump = emit_jump_insn (gen_jump (post_label));
1468 JUMP_LABEL (jump) = post_label;
1470 /* Create new basic block to be dest for lp. */
1471 last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1472 new_bb = create_basic_block (new_label, jump, last_bb);
1473 new_bb->aux = last_bb->aux;
1474 last_bb->aux = new_bb;
1476 emit_barrier_after_bb (new_bb);
1478 make_edge (new_bb, post_bb, 0);
1480 /* Make sure new bb is in the other partition. */
1481 new_partition = BB_PARTITION (old_bb);
1482 new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1483 BB_SET_PARTITION (new_bb, new_partition);
1485 /* Fix up the edges. */
1486 for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1487 if (BB_PARTITION (e->src) == new_partition)
1489 rtx_insn *insn = BB_END (e->src);
1490 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1492 gcc_assert (note != NULL);
1493 gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1494 XEXP (note, 0) = GEN_INT (new_lp->index);
1496 /* Adjust the edge to the new destination. */
1497 redirect_edge_succ (e, new_bb);
1499 else
1500 ei_next (&ei);
1504 /* Ensure that all hot bbs are included in a hot path through the
1505 procedure. This is done by calling this function twice, once
1506 with WALK_UP true (to look for paths from the entry to hot bbs) and
1507 once with WALK_UP false (to look for paths from hot bbs to the exit).
1508 Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1509 to BBS_IN_HOT_PARTITION. */
1511 static unsigned int
1512 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1513 vec<basic_block> *bbs_in_hot_partition)
1515 /* Callers check this. */
1516 gcc_checking_assert (cold_bb_count);
1518 /* Keep examining hot bbs while we still have some left to check
1519 and there are remaining cold bbs. */
1520 vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1521 while (! hot_bbs_to_check.is_empty ()
1522 && cold_bb_count)
1524 basic_block bb = hot_bbs_to_check.pop ();
1525 vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1526 edge e;
1527 edge_iterator ei;
1528 int highest_probability = 0;
1529 int highest_freq = 0;
1530 gcov_type highest_count = 0;
1531 bool found = false;
1533 /* Walk the preds/succs and check if there is at least one already
1534 marked hot. Keep track of the most frequent pred/succ so that we
1535 can mark it hot if we don't find one. */
1536 FOR_EACH_EDGE (e, ei, edges)
1538 basic_block reach_bb = walk_up ? e->src : e->dest;
1540 if (e->flags & EDGE_DFS_BACK)
1541 continue;
1543 if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1545 found = true;
1546 break;
1548 /* The following loop will look for the hottest edge via
1549 the edge count, if it is non-zero, then fallback to the edge
1550 frequency and finally the edge probability. */
1551 if (e->count > highest_count)
1552 highest_count = e->count;
1553 int edge_freq = EDGE_FREQUENCY (e);
1554 if (edge_freq > highest_freq)
1555 highest_freq = edge_freq;
1556 if (e->probability > highest_probability)
1557 highest_probability = e->probability;
1560 /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1561 block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1562 then the most frequent pred (or succ) needs to be adjusted. In the
1563 case where multiple preds/succs have the same frequency (e.g. a
1564 50-50 branch), then both will be adjusted. */
1565 if (found)
1566 continue;
1568 FOR_EACH_EDGE (e, ei, edges)
1570 if (e->flags & EDGE_DFS_BACK)
1571 continue;
1572 /* Select the hottest edge using the edge count, if it is non-zero,
1573 then fallback to the edge frequency and finally the edge
1574 probability. */
1575 if (highest_count)
1577 if (e->count < highest_count)
1578 continue;
1580 else if (highest_freq)
1582 if (EDGE_FREQUENCY (e) < highest_freq)
1583 continue;
1585 else if (e->probability < highest_probability)
1586 continue;
1588 basic_block reach_bb = walk_up ? e->src : e->dest;
1590 /* We have a hot bb with an immediate dominator that is cold.
1591 The dominator needs to be re-marked hot. */
1592 BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1593 cold_bb_count--;
1595 /* Now we need to examine newly-hot reach_bb to see if it is also
1596 dominated by a cold bb. */
1597 bbs_in_hot_partition->safe_push (reach_bb);
1598 hot_bbs_to_check.safe_push (reach_bb);
1602 return cold_bb_count;
1606 /* Find the basic blocks that are rarely executed and need to be moved to
1607 a separate section of the .o file (to cut down on paging and improve
1608 cache locality). Return a vector of all edges that cross. */
1610 static vec<edge>
1611 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1613 vec<edge> crossing_edges = vNULL;
1614 basic_block bb;
1615 edge e;
1616 edge_iterator ei;
1617 unsigned int cold_bb_count = 0;
1618 auto_vec<basic_block> bbs_in_hot_partition;
1620 /* Mark which partition (hot/cold) each basic block belongs in. */
1621 FOR_EACH_BB_FN (bb, cfun)
1623 bool cold_bb = false;
1625 if (probably_never_executed_bb_p (cfun, bb))
1627 /* Handle profile insanities created by upstream optimizations
1628 by also checking the incoming edge weights. If there is a non-cold
1629 incoming edge, conservatively prevent this block from being split
1630 into the cold section. */
1631 cold_bb = true;
1632 FOR_EACH_EDGE (e, ei, bb->preds)
1633 if (!probably_never_executed_edge_p (cfun, e))
1635 cold_bb = false;
1636 break;
1639 if (cold_bb)
1641 BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1642 cold_bb_count++;
1644 else
1646 BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1647 bbs_in_hot_partition.safe_push (bb);
1651 /* Ensure that hot bbs are included along a hot path from the entry to exit.
1652 Several different possibilities may include cold bbs along all paths
1653 to/from a hot bb. One is that there are edge weight insanities
1654 due to optimization phases that do not properly update basic block profile
1655 counts. The second is that the entry of the function may not be hot, because
1656 it is entered fewer times than the number of profile training runs, but there
1657 is a loop inside the function that causes blocks within the function to be
1658 above the threshold for hotness. This is fixed by walking up from hot bbs
1659 to the entry block, and then down from hot bbs to the exit, performing
1660 partitioning fixups as necessary. */
1661 if (cold_bb_count)
1663 mark_dfs_back_edges ();
1664 cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1665 &bbs_in_hot_partition);
1666 if (cold_bb_count)
1667 sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1670 /* The format of .gcc_except_table does not allow landing pads to
1671 be in a different partition as the throw. Fix this by either
1672 moving or duplicating the landing pads. */
1673 if (cfun->eh->lp_array)
1675 unsigned i;
1676 eh_landing_pad lp;
1678 FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1680 bool all_same, all_diff;
1682 if (lp == NULL
1683 || lp->landing_pad == NULL_RTX
1684 || !LABEL_P (lp->landing_pad))
1685 continue;
1687 all_same = all_diff = true;
1688 bb = BLOCK_FOR_INSN (lp->landing_pad);
1689 FOR_EACH_EDGE (e, ei, bb->preds)
1691 gcc_assert (e->flags & EDGE_EH);
1692 if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1693 all_diff = false;
1694 else
1695 all_same = false;
1698 if (all_same)
1700 else if (all_diff)
1702 int which = BB_PARTITION (bb);
1703 which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1704 BB_SET_PARTITION (bb, which);
1706 else
1707 fix_up_crossing_landing_pad (lp, bb);
1711 /* Mark every edge that crosses between sections. */
1713 FOR_EACH_BB_FN (bb, cfun)
1714 FOR_EACH_EDGE (e, ei, bb->succs)
1716 unsigned int flags = e->flags;
1718 /* We should never have EDGE_CROSSING set yet. */
1719 gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1721 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1722 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1723 && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1725 crossing_edges.safe_push (e);
1726 flags |= EDGE_CROSSING;
1729 /* Now that we've split eh edges as appropriate, allow landing pads
1730 to be merged with the post-landing pads. */
1731 flags &= ~EDGE_PRESERVE;
1733 e->flags = flags;
1736 return crossing_edges;
1739 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru. */
1741 static void
1742 set_edge_can_fallthru_flag (void)
1744 basic_block bb;
1746 FOR_EACH_BB_FN (bb, cfun)
1748 edge e;
1749 edge_iterator ei;
1751 FOR_EACH_EDGE (e, ei, bb->succs)
1753 e->flags &= ~EDGE_CAN_FALLTHRU;
1755 /* The FALLTHRU edge is also CAN_FALLTHRU edge. */
1756 if (e->flags & EDGE_FALLTHRU)
1757 e->flags |= EDGE_CAN_FALLTHRU;
1760 /* If the BB ends with an invertible condjump all (2) edges are
1761 CAN_FALLTHRU edges. */
1762 if (EDGE_COUNT (bb->succs) != 2)
1763 continue;
1764 if (!any_condjump_p (BB_END (bb)))
1765 continue;
1766 if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1767 continue;
1768 invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1769 EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1770 EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1774 /* If any destination of a crossing edge does not have a label, add label;
1775 Convert any easy fall-through crossing edges to unconditional jumps. */
1777 static void
1778 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1780 size_t i;
1781 edge e;
1783 FOR_EACH_VEC_ELT (crossing_edges, i, e)
1785 basic_block src = e->src;
1786 basic_block dest = e->dest;
1787 rtx label;
1788 rtx_insn *new_jump;
1790 if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1791 continue;
1793 /* Make sure dest has a label. */
1794 label = block_label (dest);
1796 /* Nothing to do for non-fallthru edges. */
1797 if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1798 continue;
1799 if ((e->flags & EDGE_FALLTHRU) == 0)
1800 continue;
1802 /* If the block does not end with a control flow insn, then we
1803 can trivially add a jump to the end to fixup the crossing.
1804 Otherwise the jump will have to go in a new bb, which will
1805 be handled by fix_up_fall_thru_edges function. */
1806 if (control_flow_insn_p (BB_END (src)))
1807 continue;
1809 /* Make sure there's only one successor. */
1810 gcc_assert (single_succ_p (src));
1812 new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1813 BB_END (src) = new_jump;
1814 JUMP_LABEL (new_jump) = label;
1815 LABEL_NUSES (label) += 1;
1817 emit_barrier_after_bb (src);
1819 /* Mark edge as non-fallthru. */
1820 e->flags &= ~EDGE_FALLTHRU;
1824 /* Find any bb's where the fall-through edge is a crossing edge (note that
1825 these bb's must also contain a conditional jump or end with a call
1826 instruction; we've already dealt with fall-through edges for blocks
1827 that didn't have a conditional jump or didn't end with call instruction
1828 in the call to add_labels_and_missing_jumps). Convert the fall-through
1829 edge to non-crossing edge by inserting a new bb to fall-through into.
1830 The new bb will contain an unconditional jump (crossing edge) to the
1831 original fall through destination. */
1833 static void
1834 fix_up_fall_thru_edges (void)
1836 basic_block cur_bb;
1837 basic_block new_bb;
1838 edge succ1;
1839 edge succ2;
1840 edge fall_thru;
1841 edge cond_jump = NULL;
1842 edge e;
1843 bool cond_jump_crosses;
1844 int invert_worked;
1845 rtx_insn *old_jump;
1846 rtx fall_thru_label;
1848 FOR_EACH_BB_FN (cur_bb, cfun)
1850 fall_thru = NULL;
1851 if (EDGE_COUNT (cur_bb->succs) > 0)
1852 succ1 = EDGE_SUCC (cur_bb, 0);
1853 else
1854 succ1 = NULL;
1856 if (EDGE_COUNT (cur_bb->succs) > 1)
1857 succ2 = EDGE_SUCC (cur_bb, 1);
1858 else
1859 succ2 = NULL;
1861 /* Find the fall-through edge. */
1863 if (succ1
1864 && (succ1->flags & EDGE_FALLTHRU))
1866 fall_thru = succ1;
1867 cond_jump = succ2;
1869 else if (succ2
1870 && (succ2->flags & EDGE_FALLTHRU))
1872 fall_thru = succ2;
1873 cond_jump = succ1;
1875 else if (succ1
1876 && (block_ends_with_call_p (cur_bb)
1877 || can_throw_internal (BB_END (cur_bb))))
1879 edge e;
1880 edge_iterator ei;
1882 FOR_EACH_EDGE (e, ei, cur_bb->succs)
1883 if (e->flags & EDGE_FALLTHRU)
1885 fall_thru = e;
1886 break;
1890 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1892 /* Check to see if the fall-thru edge is a crossing edge. */
1894 if (fall_thru->flags & EDGE_CROSSING)
1896 /* The fall_thru edge crosses; now check the cond jump edge, if
1897 it exists. */
1899 cond_jump_crosses = true;
1900 invert_worked = 0;
1901 old_jump = BB_END (cur_bb);
1903 /* Find the jump instruction, if there is one. */
1905 if (cond_jump)
1907 if (!(cond_jump->flags & EDGE_CROSSING))
1908 cond_jump_crosses = false;
1910 /* We know the fall-thru edge crosses; if the cond
1911 jump edge does NOT cross, and its destination is the
1912 next block in the bb order, invert the jump
1913 (i.e. fix it so the fall through does not cross and
1914 the cond jump does). */
1916 if (!cond_jump_crosses)
1918 /* Find label in fall_thru block. We've already added
1919 any missing labels, so there must be one. */
1921 fall_thru_label = block_label (fall_thru->dest);
1923 if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1924 invert_worked = invert_jump (old_jump,
1925 fall_thru_label,0);
1926 if (invert_worked)
1928 fall_thru->flags &= ~EDGE_FALLTHRU;
1929 cond_jump->flags |= EDGE_FALLTHRU;
1930 update_br_prob_note (cur_bb);
1931 e = fall_thru;
1932 fall_thru = cond_jump;
1933 cond_jump = e;
1934 cond_jump->flags |= EDGE_CROSSING;
1935 fall_thru->flags &= ~EDGE_CROSSING;
1940 if (cond_jump_crosses || !invert_worked)
1942 /* This is the case where both edges out of the basic
1943 block are crossing edges. Here we will fix up the
1944 fall through edge. The jump edge will be taken care
1945 of later. The EDGE_CROSSING flag of fall_thru edge
1946 is unset before the call to force_nonfallthru
1947 function because if a new basic-block is created
1948 this edge remains in the current section boundary
1949 while the edge between new_bb and the fall_thru->dest
1950 becomes EDGE_CROSSING. */
1952 fall_thru->flags &= ~EDGE_CROSSING;
1953 new_bb = force_nonfallthru (fall_thru);
1955 if (new_bb)
1957 new_bb->aux = cur_bb->aux;
1958 cur_bb->aux = new_bb;
1960 /* This is done by force_nonfallthru_and_redirect. */
1961 gcc_assert (BB_PARTITION (new_bb)
1962 == BB_PARTITION (cur_bb));
1964 single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1966 else
1968 /* If a new basic-block was not created; restore
1969 the EDGE_CROSSING flag. */
1970 fall_thru->flags |= EDGE_CROSSING;
1973 /* Add barrier after new jump */
1974 emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1981 /* This function checks the destination block of a "crossing jump" to
1982 see if it has any crossing predecessors that begin with a code label
1983 and end with an unconditional jump. If so, it returns that predecessor
1984 block. (This is to avoid creating lots of new basic blocks that all
1985 contain unconditional jumps to the same destination). */
1987 static basic_block
1988 find_jump_block (basic_block jump_dest)
1990 basic_block source_bb = NULL;
1991 edge e;
1992 rtx_insn *insn;
1993 edge_iterator ei;
1995 FOR_EACH_EDGE (e, ei, jump_dest->preds)
1996 if (e->flags & EDGE_CROSSING)
1998 basic_block src = e->src;
2000 /* Check each predecessor to see if it has a label, and contains
2001 only one executable instruction, which is an unconditional jump.
2002 If so, we can use it. */
2004 if (LABEL_P (BB_HEAD (src)))
2005 for (insn = BB_HEAD (src);
2006 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2007 insn = NEXT_INSN (insn))
2009 if (INSN_P (insn)
2010 && insn == BB_END (src)
2011 && JUMP_P (insn)
2012 && !any_condjump_p (insn))
2014 source_bb = src;
2015 break;
2019 if (source_bb)
2020 break;
2023 return source_bb;
2026 /* Find all BB's with conditional jumps that are crossing edges;
2027 insert a new bb and make the conditional jump branch to the new
2028 bb instead (make the new bb same color so conditional branch won't
2029 be a 'crossing' edge). Insert an unconditional jump from the
2030 new bb to the original destination of the conditional jump. */
2032 static void
2033 fix_crossing_conditional_branches (void)
2035 basic_block cur_bb;
2036 basic_block new_bb;
2037 basic_block dest;
2038 edge succ1;
2039 edge succ2;
2040 edge crossing_edge;
2041 edge new_edge;
2042 rtx_insn *old_jump;
2043 rtx set_src;
2044 rtx old_label = NULL_RTX;
2045 rtx new_label;
2047 FOR_EACH_BB_FN (cur_bb, cfun)
2049 crossing_edge = NULL;
2050 if (EDGE_COUNT (cur_bb->succs) > 0)
2051 succ1 = EDGE_SUCC (cur_bb, 0);
2052 else
2053 succ1 = NULL;
2055 if (EDGE_COUNT (cur_bb->succs) > 1)
2056 succ2 = EDGE_SUCC (cur_bb, 1);
2057 else
2058 succ2 = NULL;
2060 /* We already took care of fall-through edges, so only one successor
2061 can be a crossing edge. */
2063 if (succ1 && (succ1->flags & EDGE_CROSSING))
2064 crossing_edge = succ1;
2065 else if (succ2 && (succ2->flags & EDGE_CROSSING))
2066 crossing_edge = succ2;
2068 if (crossing_edge)
2070 old_jump = BB_END (cur_bb);
2072 /* Check to make sure the jump instruction is a
2073 conditional jump. */
2075 set_src = NULL_RTX;
2077 if (any_condjump_p (old_jump))
2079 if (GET_CODE (PATTERN (old_jump)) == SET)
2080 set_src = SET_SRC (PATTERN (old_jump));
2081 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2083 set_src = XVECEXP (PATTERN (old_jump), 0,0);
2084 if (GET_CODE (set_src) == SET)
2085 set_src = SET_SRC (set_src);
2086 else
2087 set_src = NULL_RTX;
2091 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2093 if (GET_CODE (XEXP (set_src, 1)) == PC)
2094 old_label = XEXP (set_src, 2);
2095 else if (GET_CODE (XEXP (set_src, 2)) == PC)
2096 old_label = XEXP (set_src, 1);
2098 /* Check to see if new bb for jumping to that dest has
2099 already been created; if so, use it; if not, create
2100 a new one. */
2102 new_bb = find_jump_block (crossing_edge->dest);
2104 if (new_bb)
2105 new_label = block_label (new_bb);
2106 else
2108 basic_block last_bb;
2109 rtx_insn *new_jump;
2111 /* Create new basic block to be dest for
2112 conditional jump. */
2114 /* Put appropriate instructions in new bb. */
2116 new_label = gen_label_rtx ();
2117 emit_label (new_label);
2119 gcc_assert (GET_CODE (old_label) == LABEL_REF);
2120 old_label = JUMP_LABEL (old_jump);
2121 new_jump = emit_jump_insn (gen_jump (old_label));
2122 JUMP_LABEL (new_jump) = old_label;
2124 last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2125 new_bb = create_basic_block (new_label, new_jump, last_bb);
2126 new_bb->aux = last_bb->aux;
2127 last_bb->aux = new_bb;
2129 emit_barrier_after_bb (new_bb);
2131 /* Make sure new bb is in same partition as source
2132 of conditional branch. */
2133 BB_COPY_PARTITION (new_bb, cur_bb);
2136 /* Make old jump branch to new bb. */
2138 redirect_jump (old_jump, new_label, 0);
2140 /* Remove crossing_edge as predecessor of 'dest'. */
2142 dest = crossing_edge->dest;
2144 redirect_edge_succ (crossing_edge, new_bb);
2146 /* Make a new edge from new_bb to old dest; new edge
2147 will be a successor for new_bb and a predecessor
2148 for 'dest'. */
2150 if (EDGE_COUNT (new_bb->succs) == 0)
2151 new_edge = make_edge (new_bb, dest, 0);
2152 else
2153 new_edge = EDGE_SUCC (new_bb, 0);
2155 crossing_edge->flags &= ~EDGE_CROSSING;
2156 new_edge->flags |= EDGE_CROSSING;
2162 /* Find any unconditional branches that cross between hot and cold
2163 sections. Convert them into indirect jumps instead. */
2165 static void
2166 fix_crossing_unconditional_branches (void)
2168 basic_block cur_bb;
2169 rtx_insn *last_insn;
2170 rtx label;
2171 rtx label_addr;
2172 rtx_insn *indirect_jump_sequence;
2173 rtx_insn *jump_insn = NULL;
2174 rtx new_reg;
2175 rtx_insn *cur_insn;
2176 edge succ;
2178 FOR_EACH_BB_FN (cur_bb, cfun)
2180 last_insn = BB_END (cur_bb);
2182 if (EDGE_COUNT (cur_bb->succs) < 1)
2183 continue;
2185 succ = EDGE_SUCC (cur_bb, 0);
2187 /* Check to see if bb ends in a crossing (unconditional) jump. At
2188 this point, no crossing jumps should be conditional. */
2190 if (JUMP_P (last_insn)
2191 && (succ->flags & EDGE_CROSSING))
2193 gcc_assert (!any_condjump_p (last_insn));
2195 /* Make sure the jump is not already an indirect or table jump. */
2197 if (!computed_jump_p (last_insn)
2198 && !tablejump_p (last_insn, NULL, NULL))
2200 /* We have found a "crossing" unconditional branch. Now
2201 we must convert it to an indirect jump. First create
2202 reference of label, as target for jump. */
2204 label = JUMP_LABEL (last_insn);
2205 label_addr = gen_rtx_LABEL_REF (Pmode, label);
2206 LABEL_NUSES (label) += 1;
2208 /* Get a register to use for the indirect jump. */
2210 new_reg = gen_reg_rtx (Pmode);
2212 /* Generate indirect the jump sequence. */
2214 start_sequence ();
2215 emit_move_insn (new_reg, label_addr);
2216 emit_indirect_jump (new_reg);
2217 indirect_jump_sequence = get_insns ();
2218 end_sequence ();
2220 /* Make sure every instruction in the new jump sequence has
2221 its basic block set to be cur_bb. */
2223 for (cur_insn = indirect_jump_sequence; cur_insn;
2224 cur_insn = NEXT_INSN (cur_insn))
2226 if (!BARRIER_P (cur_insn))
2227 BLOCK_FOR_INSN (cur_insn) = cur_bb;
2228 if (JUMP_P (cur_insn))
2229 jump_insn = cur_insn;
2232 /* Insert the new (indirect) jump sequence immediately before
2233 the unconditional jump, then delete the unconditional jump. */
2235 emit_insn_before (indirect_jump_sequence, last_insn);
2236 delete_insn (last_insn);
2238 JUMP_LABEL (jump_insn) = label;
2239 LABEL_NUSES (label)++;
2241 /* Make BB_END for cur_bb be the jump instruction (NOT the
2242 barrier instruction at the end of the sequence...). */
2244 BB_END (cur_bb) = jump_insn;
2250 /* Update CROSSING_JUMP_P flags on all jump insns. */
2252 static void
2253 update_crossing_jump_flags (void)
2255 basic_block bb;
2256 edge e;
2257 edge_iterator ei;
2259 FOR_EACH_BB_FN (bb, cfun)
2260 FOR_EACH_EDGE (e, ei, bb->succs)
2261 if (e->flags & EDGE_CROSSING)
2263 if (JUMP_P (BB_END (bb))
2264 /* Some flags were added during fix_up_fall_thru_edges, via
2265 force_nonfallthru_and_redirect. */
2266 && !CROSSING_JUMP_P (BB_END (bb)))
2267 CROSSING_JUMP_P (BB_END (bb)) = 1;
2268 break;
2272 /* Reorder basic blocks. The main entry point to this file. FLAGS is
2273 the set of flags to pass to cfg_layout_initialize(). */
2275 static void
2276 reorder_basic_blocks (void)
2278 int n_traces;
2279 int i;
2280 struct trace *traces;
2282 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2284 if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2285 return;
2287 set_edge_can_fallthru_flag ();
2288 mark_dfs_back_edges ();
2290 /* We are estimating the length of uncond jump insn only once since the code
2291 for getting the insn length always returns the minimal length now. */
2292 if (uncond_jump_length == 0)
2293 uncond_jump_length = get_uncond_jump_length ();
2295 /* We need to know some information for each basic block. */
2296 array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2297 bbd = XNEWVEC (bbro_basic_block_data, array_size);
2298 for (i = 0; i < array_size; i++)
2300 bbd[i].start_of_trace = -1;
2301 bbd[i].end_of_trace = -1;
2302 bbd[i].in_trace = -1;
2303 bbd[i].visited = 0;
2304 bbd[i].priority = -1;
2305 bbd[i].heap = NULL;
2306 bbd[i].node = NULL;
2309 traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2310 n_traces = 0;
2311 find_traces (&n_traces, traces);
2312 connect_traces (n_traces, traces);
2313 FREE (traces);
2314 FREE (bbd);
2316 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2318 if (dump_file)
2320 if (dump_flags & TDF_DETAILS)
2321 dump_reg_info (dump_file);
2322 dump_flow_info (dump_file, dump_flags);
2325 /* Signal that rtl_verify_flow_info_1 can now verify that there
2326 is at most one switch between hot/cold sections. */
2327 crtl->bb_reorder_complete = true;
2330 /* Determine which partition the first basic block in the function
2331 belongs to, then find the first basic block in the current function
2332 that belongs to a different section, and insert a
2333 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2334 instruction stream. When writing out the assembly code,
2335 encountering this note will make the compiler switch between the
2336 hot and cold text sections. */
2338 void
2339 insert_section_boundary_note (void)
2341 basic_block bb;
2342 bool switched_sections = false;
2343 int current_partition = 0;
2345 if (!crtl->has_bb_partition)
2346 return;
2348 FOR_EACH_BB_FN (bb, cfun)
2350 if (!current_partition)
2351 current_partition = BB_PARTITION (bb);
2352 if (BB_PARTITION (bb) != current_partition)
2354 gcc_assert (!switched_sections);
2355 switched_sections = true;
2356 emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2357 current_partition = BB_PARTITION (bb);
2362 namespace {
2364 const pass_data pass_data_reorder_blocks =
2366 RTL_PASS, /* type */
2367 "bbro", /* name */
2368 OPTGROUP_NONE, /* optinfo_flags */
2369 TV_REORDER_BLOCKS, /* tv_id */
2370 0, /* properties_required */
2371 0, /* properties_provided */
2372 0, /* properties_destroyed */
2373 0, /* todo_flags_start */
2374 0, /* todo_flags_finish */
2377 class pass_reorder_blocks : public rtl_opt_pass
2379 public:
2380 pass_reorder_blocks (gcc::context *ctxt)
2381 : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2384 /* opt_pass methods: */
2385 virtual bool gate (function *)
2387 if (targetm.cannot_modify_jumps_p ())
2388 return false;
2389 return (optimize > 0
2390 && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2393 virtual unsigned int execute (function *);
2395 }; // class pass_reorder_blocks
2397 unsigned int
2398 pass_reorder_blocks::execute (function *fun)
2400 basic_block bb;
2402 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2403 splitting possibly introduced more crossjumping opportunities. */
2404 cfg_layout_initialize (CLEANUP_EXPENSIVE);
2406 reorder_basic_blocks ();
2407 cleanup_cfg (CLEANUP_EXPENSIVE);
2409 FOR_EACH_BB_FN (bb, fun)
2410 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2411 bb->aux = bb->next_bb;
2412 cfg_layout_finalize ();
2414 return 0;
2417 } // anon namespace
2419 rtl_opt_pass *
2420 make_pass_reorder_blocks (gcc::context *ctxt)
2422 return new pass_reorder_blocks (ctxt);
2425 /* Duplicate the blocks containing computed gotos. This basically unfactors
2426 computed gotos that were factored early on in the compilation process to
2427 speed up edge based data flow. We used to not unfactoring them again,
2428 which can seriously pessimize code with many computed jumps in the source
2429 code, such as interpreters. See e.g. PR15242. */
2431 namespace {
2433 const pass_data pass_data_duplicate_computed_gotos =
2435 RTL_PASS, /* type */
2436 "compgotos", /* name */
2437 OPTGROUP_NONE, /* optinfo_flags */
2438 TV_REORDER_BLOCKS, /* tv_id */
2439 0, /* properties_required */
2440 0, /* properties_provided */
2441 0, /* properties_destroyed */
2442 0, /* todo_flags_start */
2443 0, /* todo_flags_finish */
2446 class pass_duplicate_computed_gotos : public rtl_opt_pass
2448 public:
2449 pass_duplicate_computed_gotos (gcc::context *ctxt)
2450 : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2453 /* opt_pass methods: */
2454 virtual bool gate (function *);
2455 virtual unsigned int execute (function *);
2457 }; // class pass_duplicate_computed_gotos
2459 bool
2460 pass_duplicate_computed_gotos::gate (function *fun)
2462 if (targetm.cannot_modify_jumps_p ())
2463 return false;
2464 return (optimize > 0
2465 && flag_expensive_optimizations
2466 && ! optimize_function_for_size_p (fun));
2469 unsigned int
2470 pass_duplicate_computed_gotos::execute (function *fun)
2472 basic_block bb, new_bb;
2473 bitmap candidates;
2474 int max_size;
2475 bool changed = false;
2477 if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2478 return 0;
2480 clear_bb_flags ();
2481 cfg_layout_initialize (0);
2483 /* We are estimating the length of uncond jump insn only once
2484 since the code for getting the insn length always returns
2485 the minimal length now. */
2486 if (uncond_jump_length == 0)
2487 uncond_jump_length = get_uncond_jump_length ();
2489 max_size
2490 = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2491 candidates = BITMAP_ALLOC (NULL);
2493 /* Look for blocks that end in a computed jump, and see if such blocks
2494 are suitable for unfactoring. If a block is a candidate for unfactoring,
2495 mark it in the candidates. */
2496 FOR_EACH_BB_FN (bb, fun)
2498 rtx_insn *insn;
2499 edge e;
2500 edge_iterator ei;
2501 int size, all_flags;
2503 /* Build the reorder chain for the original order of blocks. */
2504 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2505 bb->aux = bb->next_bb;
2507 /* Obviously the block has to end in a computed jump. */
2508 if (!computed_jump_p (BB_END (bb)))
2509 continue;
2511 /* Only consider blocks that can be duplicated. */
2512 if (CROSSING_JUMP_P (BB_END (bb))
2513 || !can_duplicate_block_p (bb))
2514 continue;
2516 /* Make sure that the block is small enough. */
2517 size = 0;
2518 FOR_BB_INSNS (bb, insn)
2519 if (INSN_P (insn))
2521 size += get_attr_min_length (insn);
2522 if (size > max_size)
2523 break;
2525 if (size > max_size)
2526 continue;
2528 /* Final check: there must not be any incoming abnormal edges. */
2529 all_flags = 0;
2530 FOR_EACH_EDGE (e, ei, bb->preds)
2531 all_flags |= e->flags;
2532 if (all_flags & EDGE_COMPLEX)
2533 continue;
2535 bitmap_set_bit (candidates, bb->index);
2538 /* Nothing to do if there is no computed jump here. */
2539 if (bitmap_empty_p (candidates))
2540 goto done;
2542 /* Duplicate computed gotos. */
2543 FOR_EACH_BB_FN (bb, fun)
2545 if (bb->flags & BB_VISITED)
2546 continue;
2548 bb->flags |= BB_VISITED;
2550 /* BB must have one outgoing edge. That edge must not lead to
2551 the exit block or the next block.
2552 The destination must have more than one predecessor. */
2553 if (!single_succ_p (bb)
2554 || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2555 || single_succ (bb) == bb->next_bb
2556 || single_pred_p (single_succ (bb)))
2557 continue;
2559 /* The successor block has to be a duplication candidate. */
2560 if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2561 continue;
2563 /* Don't duplicate a partition crossing edge, which requires difficult
2564 fixup. */
2565 if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2566 continue;
2568 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2569 new_bb->aux = bb->aux;
2570 bb->aux = new_bb;
2571 new_bb->flags |= BB_VISITED;
2572 changed = true;
2575 done:
2576 if (changed)
2578 /* Duplicating blocks above will redirect edges and may cause hot
2579 blocks previously reached by both hot and cold blocks to become
2580 dominated only by cold blocks. */
2581 fixup_partitions ();
2583 /* Merge the duplicated blocks into predecessors, when possible. */
2584 cfg_layout_finalize ();
2585 cleanup_cfg (0);
2587 else
2588 cfg_layout_finalize ();
2590 BITMAP_FREE (candidates);
2591 return 0;
2594 } // anon namespace
2596 rtl_opt_pass *
2597 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2599 return new pass_duplicate_computed_gotos (ctxt);
2602 /* This function is the main 'entrance' for the optimization that
2603 partitions hot and cold basic blocks into separate sections of the
2604 .o file (to improve performance and cache locality). Ideally it
2605 would be called after all optimizations that rearrange the CFG have
2606 been called. However part of this optimization may introduce new
2607 register usage, so it must be called before register allocation has
2608 occurred. This means that this optimization is actually called
2609 well before the optimization that reorders basic blocks (see
2610 function above).
2612 This optimization checks the feedback information to determine
2613 which basic blocks are hot/cold, updates flags on the basic blocks
2614 to indicate which section they belong in. This information is
2615 later used for writing out sections in the .o file. Because hot
2616 and cold sections can be arbitrarily large (within the bounds of
2617 memory), far beyond the size of a single function, it is necessary
2618 to fix up all edges that cross section boundaries, to make sure the
2619 instructions used can actually span the required distance. The
2620 fixes are described below.
2622 Fall-through edges must be changed into jumps; it is not safe or
2623 legal to fall through across a section boundary. Whenever a
2624 fall-through edge crossing a section boundary is encountered, a new
2625 basic block is inserted (in the same section as the fall-through
2626 source), and the fall through edge is redirected to the new basic
2627 block. The new basic block contains an unconditional jump to the
2628 original fall-through target. (If the unconditional jump is
2629 insufficient to cross section boundaries, that is dealt with a
2630 little later, see below).
2632 In order to deal with architectures that have short conditional
2633 branches (which cannot span all of memory) we take any conditional
2634 jump that attempts to cross a section boundary and add a level of
2635 indirection: it becomes a conditional jump to a new basic block, in
2636 the same section. The new basic block contains an unconditional
2637 jump to the original target, in the other section.
2639 For those architectures whose unconditional branch is also
2640 incapable of reaching all of memory, those unconditional jumps are
2641 converted into indirect jumps, through a register.
2643 IMPORTANT NOTE: This optimization causes some messy interactions
2644 with the cfg cleanup optimizations; those optimizations want to
2645 merge blocks wherever possible, and to collapse indirect jump
2646 sequences (change "A jumps to B jumps to C" directly into "A jumps
2647 to C"). Those optimizations can undo the jump fixes that
2648 partitioning is required to make (see above), in order to ensure
2649 that jumps attempting to cross section boundaries are really able
2650 to cover whatever distance the jump requires (on many architectures
2651 conditional or unconditional jumps are not able to reach all of
2652 memory). Therefore tests have to be inserted into each such
2653 optimization to make sure that it does not undo stuff necessary to
2654 cross partition boundaries. This would be much less of a problem
2655 if we could perform this optimization later in the compilation, but
2656 unfortunately the fact that we may need to create indirect jumps
2657 (through registers) requires that this optimization be performed
2658 before register allocation.
2660 Hot and cold basic blocks are partitioned and put in separate
2661 sections of the .o file, to reduce paging and improve cache
2662 performance (hopefully). This can result in bits of code from the
2663 same function being widely separated in the .o file. However this
2664 is not obvious to the current bb structure. Therefore we must take
2665 care to ensure that: 1). There are no fall_thru edges that cross
2666 between sections; 2). For those architectures which have "short"
2667 conditional branches, all conditional branches that attempt to
2668 cross between sections are converted to unconditional branches;
2669 and, 3). For those architectures which have "short" unconditional
2670 branches, all unconditional branches that attempt to cross between
2671 sections are converted to indirect jumps.
2673 The code for fixing up fall_thru edges that cross between hot and
2674 cold basic blocks does so by creating new basic blocks containing
2675 unconditional branches to the appropriate label in the "other"
2676 section. The new basic block is then put in the same (hot or cold)
2677 section as the original conditional branch, and the fall_thru edge
2678 is modified to fall into the new basic block instead. By adding
2679 this level of indirection we end up with only unconditional branches
2680 crossing between hot and cold sections.
2682 Conditional branches are dealt with by adding a level of indirection.
2683 A new basic block is added in the same (hot/cold) section as the
2684 conditional branch, and the conditional branch is retargeted to the
2685 new basic block. The new basic block contains an unconditional branch
2686 to the original target of the conditional branch (in the other section).
2688 Unconditional branches are dealt with by converting them into
2689 indirect jumps. */
2691 namespace {
2693 const pass_data pass_data_partition_blocks =
2695 RTL_PASS, /* type */
2696 "bbpart", /* name */
2697 OPTGROUP_NONE, /* optinfo_flags */
2698 TV_REORDER_BLOCKS, /* tv_id */
2699 PROP_cfglayout, /* properties_required */
2700 0, /* properties_provided */
2701 0, /* properties_destroyed */
2702 0, /* todo_flags_start */
2703 0, /* todo_flags_finish */
2706 class pass_partition_blocks : public rtl_opt_pass
2708 public:
2709 pass_partition_blocks (gcc::context *ctxt)
2710 : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2713 /* opt_pass methods: */
2714 virtual bool gate (function *);
2715 virtual unsigned int execute (function *);
2717 }; // class pass_partition_blocks
2719 bool
2720 pass_partition_blocks::gate (function *fun)
2722 /* The optimization to partition hot/cold basic blocks into separate
2723 sections of the .o file does not work well with linkonce or with
2724 user defined section attributes. Don't call it if either case
2725 arises. */
2726 return (flag_reorder_blocks_and_partition
2727 && optimize
2728 /* See gate_handle_reorder_blocks. We should not partition if
2729 we are going to omit the reordering. */
2730 && optimize_function_for_speed_p (fun)
2731 && !DECL_COMDAT_GROUP (current_function_decl)
2732 && !user_defined_section_attribute);
2735 unsigned
2736 pass_partition_blocks::execute (function *fun)
2738 vec<edge> crossing_edges;
2740 if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2741 return 0;
2743 df_set_flags (DF_DEFER_INSN_RESCAN);
2745 crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2746 if (!crossing_edges.exists ())
2747 return 0;
2749 crtl->has_bb_partition = true;
2751 /* Make sure the source of any crossing edge ends in a jump and the
2752 destination of any crossing edge has a label. */
2753 add_labels_and_missing_jumps (crossing_edges);
2755 /* Convert all crossing fall_thru edges to non-crossing fall
2756 thrus to unconditional jumps (that jump to the original fall
2757 through dest). */
2758 fix_up_fall_thru_edges ();
2760 /* If the architecture does not have conditional branches that can
2761 span all of memory, convert crossing conditional branches into
2762 crossing unconditional branches. */
2763 if (!HAS_LONG_COND_BRANCH)
2764 fix_crossing_conditional_branches ();
2766 /* If the architecture does not have unconditional branches that
2767 can span all of memory, convert crossing unconditional branches
2768 into indirect jumps. Since adding an indirect jump also adds
2769 a new register usage, update the register usage information as
2770 well. */
2771 if (!HAS_LONG_UNCOND_BRANCH)
2772 fix_crossing_unconditional_branches ();
2774 update_crossing_jump_flags ();
2776 /* Clear bb->aux fields that the above routines were using. */
2777 clear_aux_for_blocks ();
2779 crossing_edges.release ();
2781 /* ??? FIXME: DF generates the bb info for a block immediately.
2782 And by immediately, I mean *during* creation of the block.
2784 #0 df_bb_refs_collect
2785 #1 in df_bb_refs_record
2786 #2 in create_basic_block_structure
2788 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2789 will *always* fail, because no edges can have been added to the
2790 block yet. Which of course means we don't add the right
2791 artificial refs, which means we fail df_verify (much) later.
2793 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2794 that we also shouldn't grab data from the new blocks those new
2795 insns are in either. In this way one can create the block, link
2796 it up properly, and have everything Just Work later, when deferred
2797 insns are processed.
2799 In the meantime, we have no other option but to throw away all
2800 of the DF data and recompute it all. */
2801 if (fun->eh->lp_array)
2803 df_finish_pass (true);
2804 df_scan_alloc (NULL);
2805 df_scan_blocks ();
2806 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2807 data. We blindly generated all of them when creating the new
2808 landing pad. Delete those assignments we don't use. */
2809 df_set_flags (DF_LR_RUN_DCE);
2810 df_analyze ();
2813 return 0;
2816 } // anon namespace
2818 rtl_opt_pass *
2819 make_pass_partition_blocks (gcc::context *ctxt)
2821 return new pass_partition_blocks (ctxt);