1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
57 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
58 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
59 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
60 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
62 /* Random guesstimation given names. */
63 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
64 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
65 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
66 #define PROB_ALWAYS (REG_BR_PROB_BASE)
68 static bool predicted_by_p (basic_block
, enum br_predictor
);
69 static void combine_predictions_for_insn (rtx
, basic_block
);
70 static void dump_prediction (enum br_predictor
, int, basic_block
, int);
71 static void estimate_loops_at_level (struct loop
*loop
);
72 static void propagate_freq (struct loop
*);
73 static void estimate_bb_frequencies (struct loops
*);
74 static void counts_to_freqs (void);
75 static void process_note_predictions (basic_block
, int *, dominance_info
,
77 static void process_note_prediction (basic_block
, int *, dominance_info
,
78 dominance_info
, int, int);
79 static bool last_basic_block_p (basic_block
);
80 static void compute_function_frequency (void);
81 static void choose_function_section (void);
82 static bool can_predict_insn_p (rtx
);
84 /* Information we hold about each branch predictor.
85 Filled using information from predict.def. */
89 const char *const name
; /* Name used in the debugging dumps. */
90 const int hitrate
; /* Expected hitrate used by
91 predict_insn_def call. */
95 /* Use given predictor without Dempster-Shaffer theory if it matches
96 using first_match heuristics. */
97 #define PRED_FLAG_FIRST_MATCH 1
99 /* Recompute hitrate in percent to our representation. */
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
104 static const struct predictor_info predictor_info
[]= {
105 #include "predict.def"
107 /* Upper bound on predictors. */
112 /* Return true in case BB can be CPU intensive and should be optimized
113 for maximal performance. */
116 maybe_hot_bb_p (basic_block bb
)
118 if (profile_info
&& flag_branch_probabilities
120 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
122 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
127 /* Return true in case BB is cold and should be optimized for size. */
130 probably_cold_bb_p (basic_block bb
)
132 if (profile_info
&& flag_branch_probabilities
134 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
136 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
141 /* Return true in case BB is probably never executed. */
143 probably_never_executed_bb_p (basic_block bb
)
145 if (profile_info
&& flag_branch_probabilities
)
146 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
150 /* Return true if the one of outgoing edges is already predicted by
154 predicted_by_p (basic_block bb
, enum br_predictor predictor
)
157 if (!INSN_P (bb
->end
))
159 for (note
= REG_NOTES (bb
->end
); note
; note
= XEXP (note
, 1))
160 if (REG_NOTE_KIND (note
) == REG_BR_PRED
161 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
167 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
169 if (!any_condjump_p (insn
))
171 if (!flag_guess_branch_prob
)
175 = gen_rtx_EXPR_LIST (REG_BR_PRED
,
176 gen_rtx_CONCAT (VOIDmode
,
177 GEN_INT ((int) predictor
),
178 GEN_INT ((int) probability
)),
182 /* Predict insn by given predictor. */
185 predict_insn_def (rtx insn
, enum br_predictor predictor
,
186 enum prediction taken
)
188 int probability
= predictor_info
[(int) predictor
].hitrate
;
191 probability
= REG_BR_PROB_BASE
- probability
;
193 predict_insn (insn
, predictor
, probability
);
196 /* Predict edge E with given probability if possible. */
199 predict_edge (edge e
, enum br_predictor predictor
, int probability
)
202 last_insn
= e
->src
->end
;
204 /* We can store the branch prediction information only about
205 conditional jumps. */
206 if (!any_condjump_p (last_insn
))
209 /* We always store probability of branching. */
210 if (e
->flags
& EDGE_FALLTHRU
)
211 probability
= REG_BR_PROB_BASE
- probability
;
213 predict_insn (last_insn
, predictor
, probability
);
216 /* Return true when we can store prediction on insn INSN.
217 At the moment we represent predictions only on conditional
218 jumps, not at computed jump or other complicated cases. */
220 can_predict_insn_p (rtx insn
)
222 return (GET_CODE (insn
) == JUMP_INSN
223 && any_condjump_p (insn
)
224 && BLOCK_FOR_INSN (insn
)->succ
->succ_next
);
227 /* Predict edge E by given predictor if possible. */
230 predict_edge_def (edge e
, enum br_predictor predictor
,
231 enum prediction taken
)
233 int probability
= predictor_info
[(int) predictor
].hitrate
;
236 probability
= REG_BR_PROB_BASE
- probability
;
238 predict_edge (e
, predictor
, probability
);
241 /* Invert all branch predictions or probability notes in the INSN. This needs
242 to be done each time we invert the condition used by the jump. */
245 invert_br_probabilities (rtx insn
)
249 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
250 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
251 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
252 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
253 XEXP (XEXP (note
, 0), 1)
254 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
257 /* Dump information about the branch prediction to the output file. */
260 dump_prediction (enum br_predictor predictor
, int probability
,
261 basic_block bb
, int used
)
268 while (e
&& (e
->flags
& EDGE_FALLTHRU
))
271 fprintf (rtl_dump_file
, " %s heuristics%s: %.1f%%",
272 predictor_info
[predictor
].name
,
273 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
277 fprintf (rtl_dump_file
, " exec ");
278 fprintf (rtl_dump_file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
281 fprintf (rtl_dump_file
, " hit ");
282 fprintf (rtl_dump_file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
283 fprintf (rtl_dump_file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
287 fprintf (rtl_dump_file
, "\n");
290 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
291 note if not already present. Remove now useless REG_BR_PRED notes. */
294 combine_predictions_for_insn (rtx insn
, basic_block bb
)
296 rtx prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
297 rtx
*pnote
= ®_NOTES (insn
);
299 int best_probability
= PROB_EVEN
;
300 int best_predictor
= END_PREDICTORS
;
301 int combined_probability
= REG_BR_PROB_BASE
/ 2;
303 bool first_match
= false;
307 fprintf (rtl_dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
310 /* We implement "first match" heuristics and use probability guessed
311 by predictor with smallest index. In the future we will use better
312 probability combination techniques. */
313 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
314 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
316 int predictor
= INTVAL (XEXP (XEXP (note
, 0), 0));
317 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
320 if (best_predictor
> predictor
)
321 best_probability
= probability
, best_predictor
= predictor
;
323 d
= (combined_probability
* probability
324 + (REG_BR_PROB_BASE
- combined_probability
)
325 * (REG_BR_PROB_BASE
- probability
));
327 /* Use FP math to avoid overflows of 32bit integers. */
329 /* If one probability is 0% and one 100%, avoid division by zero. */
330 combined_probability
= REG_BR_PROB_BASE
/ 2;
332 combined_probability
= (((double) combined_probability
) * probability
333 * REG_BR_PROB_BASE
/ d
+ 0.5);
336 /* Decide which heuristic to use. In case we didn't match anything,
337 use no_prediction heuristic, in case we did match, use either
338 first match or Dempster-Shaffer theory depending on the flags. */
340 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
344 dump_prediction (PRED_NO_PREDICTION
, combined_probability
, bb
, true);
347 dump_prediction (PRED_DS_THEORY
, combined_probability
, bb
, !first_match
);
348 dump_prediction (PRED_FIRST_MATCH
, best_probability
, bb
, first_match
);
352 combined_probability
= best_probability
;
353 dump_prediction (PRED_COMBINED
, combined_probability
, bb
, true);
357 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
359 int predictor
= INTVAL (XEXP (XEXP (*pnote
, 0), 0));
360 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
362 dump_prediction (predictor
, probability
, bb
,
363 !first_match
|| best_predictor
== predictor
);
364 *pnote
= XEXP (*pnote
, 1);
367 pnote
= &XEXP (*pnote
, 1);
373 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
374 GEN_INT (combined_probability
), REG_NOTES (insn
));
376 /* Save the prediction into CFG in case we are seeing non-degenerated
378 if (bb
->succ
->succ_next
)
380 BRANCH_EDGE (bb
)->probability
= combined_probability
;
381 FALLTHRU_EDGE (bb
)->probability
382 = REG_BR_PROB_BASE
- combined_probability
;
387 /* Statically estimate the probability that a branch will be taken.
388 ??? In the next revision there will be a number of other predictors added
389 from the above references. Further, each heuristic will be factored out
390 into its own function for clarity (and to facilitate the combination of
394 estimate_probability (struct loops
*loops_info
)
396 dominance_info dominators
, post_dominators
;
400 connect_infinite_loops_to_exit ();
401 dominators
= calculate_dominance_info (CDI_DOMINATORS
);
402 post_dominators
= calculate_dominance_info (CDI_POST_DOMINATORS
);
404 /* Try to predict out blocks in a loop that are not part of a
406 for (i
= 1; i
< loops_info
->num
; i
++)
408 basic_block bb
, *bbs
;
411 struct loop
*loop
= loops_info
->parray
[i
];
412 struct loop_desc desc
;
413 unsigned HOST_WIDE_INT niter
;
415 flow_loop_scan (loops_info
, loop
, LOOP_EXIT_EDGES
);
416 exits
= loop
->num_exits
;
418 if (simple_loop_p (loops_info
, loop
, &desc
)
422 niter
= desc
.niter
+ 1;
423 if (niter
== 0) /* We might overflow here. */
426 prob
= (REG_BR_PROB_BASE
427 - (REG_BR_PROB_BASE
+ niter
/2) / niter
);
428 /* Branch prediction algorithm gives 0 frequency for everything
429 after the end of loop for loop having 0 probability to finish. */
430 if (prob
== REG_BR_PROB_BASE
)
431 prob
= REG_BR_PROB_BASE
- 1;
432 predict_edge (desc
.in_edge
, PRED_LOOP_ITERATIONS
,
436 bbs
= get_loop_body (loop
);
437 for (j
= 0; j
< loop
->num_nodes
; j
++)
439 int header_found
= 0;
444 /* Bypass loop heuristics on continue statement. These
445 statements construct loops via "non-loop" constructs
446 in the source language and are better to be handled
448 if (!can_predict_insn_p (bb
->end
)
449 || predicted_by_p (bb
, PRED_CONTINUE
))
452 /* Loop branch heuristics - predict an edge back to a
453 loop's head as taken. */
454 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
455 if (e
->dest
== loop
->header
456 && e
->src
== loop
->latch
)
459 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
462 /* Loop exit heuristics - predict an edge exiting the loop if the
463 conditional has no loop header successors as not taken. */
465 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
466 if (e
->dest
->index
< 0
467 || !flow_bb_inside_loop_p (loop
, e
->dest
))
471 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
476 /* Attempt to predict conditional jumps using a number of heuristics. */
479 rtx last_insn
= bb
->end
;
483 if (! can_predict_insn_p (last_insn
))
486 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
488 /* Predict early returns to be probable, as we've already taken
489 care for error returns and other are often used for fast paths
491 if ((e
->dest
== EXIT_BLOCK_PTR
492 || (e
->dest
->succ
&& !e
->dest
->succ
->succ_next
493 && e
->dest
->succ
->dest
== EXIT_BLOCK_PTR
))
494 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
495 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
496 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
497 && !last_basic_block_p (e
->dest
))
498 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
500 /* Look for block we are guarding (ie we dominate it,
501 but it doesn't postdominate us). */
502 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
503 && dominated_by_p (dominators
, e
->dest
, e
->src
)
504 && !dominated_by_p (post_dominators
, e
->src
, e
->dest
))
508 /* The call heuristic claims that a guarded function call
509 is improbable. This is because such calls are often used
510 to signal exceptional situations such as printing error
512 for (insn
= e
->dest
->head
; insn
!= NEXT_INSN (e
->dest
->end
);
513 insn
= NEXT_INSN (insn
))
514 if (GET_CODE (insn
) == CALL_INSN
515 /* Constant and pure calls are hardly used to signalize
516 something exceptional. */
517 && ! CONST_OR_PURE_CALL_P (insn
))
519 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
525 cond
= get_condition (last_insn
, &earliest
);
529 /* Try "pointer heuristic."
530 A comparison ptr == 0 is predicted as false.
531 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
532 if (GET_RTX_CLASS (GET_CODE (cond
)) == '<'
533 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
534 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
536 if (GET_CODE (cond
) == EQ
)
537 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
538 else if (GET_CODE (cond
) == NE
)
539 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
543 /* Try "opcode heuristic."
544 EQ tests are usually false and NE tests are usually true. Also,
545 most quantities are positive, so we can make the appropriate guesses
546 about signed comparisons against zero. */
547 switch (GET_CODE (cond
))
550 /* Unconditional branch. */
551 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
552 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
557 /* Floating point comparisons appears to behave in a very
558 unpredictable way because of special role of = tests in
560 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
562 /* Comparisons with 0 are often used for booleans and there is
563 nothing useful to predict about them. */
564 else if (XEXP (cond
, 1) == const0_rtx
565 || XEXP (cond
, 0) == const0_rtx
)
568 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
573 /* Floating point comparisons appears to behave in a very
574 unpredictable way because of special role of = tests in
576 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
578 /* Comparisons with 0 are often used for booleans and there is
579 nothing useful to predict about them. */
580 else if (XEXP (cond
, 1) == const0_rtx
581 || XEXP (cond
, 0) == const0_rtx
)
584 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
588 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
592 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
597 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
598 || XEXP (cond
, 1) == constm1_rtx
)
599 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
604 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
605 || XEXP (cond
, 1) == constm1_rtx
)
606 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
614 /* Attach the combined probability to each conditional jump. */
616 if (GET_CODE (bb
->end
) == JUMP_INSN
617 && any_condjump_p (bb
->end
)
618 && bb
->succ
->succ_next
!= NULL
)
619 combine_predictions_for_insn (bb
->end
, bb
);
621 free_dominance_info (post_dominators
);
622 free_dominance_info (dominators
);
624 remove_fake_edges ();
625 estimate_bb_frequencies (loops_info
);
628 /* __builtin_expect dropped tokens into the insn stream describing expected
629 values of registers. Generate branch probabilities based off these
633 expected_value_to_br_prob (void)
635 rtx insn
, cond
, ev
= NULL_RTX
, ev_reg
= NULL_RTX
;
637 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
639 switch (GET_CODE (insn
))
642 /* Look for expected value notes. */
643 if (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EXPECTED_VALUE
)
645 ev
= NOTE_EXPECTED_VALUE (insn
);
646 ev_reg
= XEXP (ev
, 0);
652 /* Never propagate across labels. */
657 /* Look for simple conditional branches. If we haven't got an
658 expected value yet, no point going further. */
659 if (GET_CODE (insn
) != JUMP_INSN
|| ev
== NULL_RTX
660 || ! any_condjump_p (insn
))
665 /* Look for insns that clobber the EV register. */
666 if (ev
&& reg_set_p (ev_reg
, insn
))
671 /* Collect the branch condition, hopefully relative to EV_REG. */
672 /* ??? At present we'll miss things like
673 (expected_value (eq r70 0))
675 (set r80 (lt r70 r71))
676 (set pc (if_then_else (ne r80 0) ...))
677 as canonicalize_condition will render this to us as
679 Could use cselib to try and reduce this further. */
680 cond
= XEXP (SET_SRC (pc_set (insn
)), 0);
681 cond
= canonicalize_condition (insn
, cond
, 0, NULL
, ev_reg
);
682 if (! cond
|| XEXP (cond
, 0) != ev_reg
683 || GET_CODE (XEXP (cond
, 1)) != CONST_INT
)
686 /* Substitute and simplify. Given that the expression we're
687 building involves two constants, we should wind up with either
689 cond
= gen_rtx_fmt_ee (GET_CODE (cond
), VOIDmode
,
690 XEXP (ev
, 1), XEXP (cond
, 1));
691 cond
= simplify_rtx (cond
);
693 /* Turn the condition into a scaled branch probability. */
694 if (cond
!= const_true_rtx
&& cond
!= const0_rtx
)
696 predict_insn_def (insn
, PRED_BUILTIN_EXPECT
,
697 cond
== const_true_rtx
? TAKEN
: NOT_TAKEN
);
701 /* Check whether this is the last basic block of function. Commonly
702 there is one extra common cleanup block. */
704 last_basic_block_p (basic_block bb
)
706 if (bb
== EXIT_BLOCK_PTR
)
709 return (bb
->next_bb
== EXIT_BLOCK_PTR
710 || (bb
->next_bb
->next_bb
== EXIT_BLOCK_PTR
711 && bb
->succ
&& !bb
->succ
->succ_next
712 && bb
->succ
->dest
->next_bb
== EXIT_BLOCK_PTR
));
715 /* Sets branch probabilities according to PREDiction and
716 FLAGS. HEADS[bb->index] should be index of basic block in that we
717 need to alter branch predictions (i.e. the first of our dominators
718 such that we do not post-dominate it) (but we fill this information
719 on demand, so -1 may be there in case this was not needed yet). */
722 process_note_prediction (basic_block bb
, int *heads
,
723 dominance_info dominators
,
724 dominance_info post_dominators
, int pred
,
731 taken
= flags
& IS_TAKEN
;
733 if (heads
[bb
->index
] < 0)
735 /* This is first time we need this field in heads array; so
736 find first dominator that we do not post-dominate (we are
737 using already known members of heads array). */
739 basic_block next_ai
= get_immediate_dominator (dominators
, bb
);
742 while (heads
[next_ai
->index
] < 0)
744 if (!dominated_by_p (post_dominators
, next_ai
, bb
))
746 heads
[next_ai
->index
] = ai
->index
;
748 next_ai
= get_immediate_dominator (dominators
, next_ai
);
750 if (!dominated_by_p (post_dominators
, next_ai
, bb
))
751 head
= next_ai
->index
;
753 head
= heads
[next_ai
->index
];
754 while (next_ai
!= bb
)
757 if (heads
[ai
->index
] == ENTRY_BLOCK
)
758 ai
= ENTRY_BLOCK_PTR
;
760 ai
= BASIC_BLOCK (heads
[ai
->index
]);
761 heads
[next_ai
->index
] = head
;
764 y
= heads
[bb
->index
];
766 /* Now find the edge that leads to our branch and aply the prediction. */
768 if (y
== last_basic_block
|| !can_predict_insn_p (BASIC_BLOCK (y
)->end
))
770 for (e
= BASIC_BLOCK (y
)->succ
; e
; e
= e
->succ_next
)
771 if (e
->dest
->index
>= 0
772 && dominated_by_p (post_dominators
, e
->dest
, bb
))
773 predict_edge_def (e
, pred
, taken
);
776 /* Gathers NOTE_INSN_PREDICTIONs in given basic block and turns them
777 into branch probabilities. For description of heads array, see
778 process_note_prediction. */
781 process_note_predictions (basic_block bb
, int *heads
,
782 dominance_info dominators
,
783 dominance_info post_dominators
)
788 /* Additionally, we check here for blocks with no successors. */
789 int contained_noreturn_call
= 0;
791 int noreturn_block
= 1;
793 for (insn
= bb
->end
; insn
;
794 was_bb_head
|= (insn
== bb
->head
), insn
= PREV_INSN (insn
))
796 if (GET_CODE (insn
) != NOTE
)
802 /* Noreturn calls cause program to exit, therefore they are
803 always predicted as not taken. */
804 if (GET_CODE (insn
) == CALL_INSN
805 && find_reg_note (insn
, REG_NORETURN
, NULL
))
806 contained_noreturn_call
= 1;
810 if (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_PREDICTION
)
812 int alg
= (int) NOTE_PREDICTION_ALG (insn
);
813 /* Process single prediction note. */
814 process_note_prediction (bb
,
818 alg
, (int) NOTE_PREDICTION_FLAGS (insn
));
822 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
823 if (!(e
->flags
& EDGE_FAKE
))
825 if (contained_noreturn_call
)
827 /* This block ended from other reasons than because of return.
828 If it is because of noreturn call, this should certainly not
829 be taken. Otherwise it is probably some error recovery. */
830 process_note_prediction (bb
,
833 post_dominators
, PRED_NORETURN
, NOT_TAKEN
);
837 /* Gathers NOTE_INSN_PREDICTIONs and turns them into
838 branch probabilities. */
841 note_prediction_to_br_prob (void)
844 dominance_info post_dominators
, dominators
;
847 /* To enable handling of noreturn blocks. */
848 add_noreturn_fake_exit_edges ();
849 connect_infinite_loops_to_exit ();
851 post_dominators
= calculate_dominance_info (CDI_POST_DOMINATORS
);
852 dominators
= calculate_dominance_info (CDI_DOMINATORS
);
854 heads
= xmalloc (sizeof (int) * last_basic_block
);
855 memset (heads
, -1, sizeof (int) * last_basic_block
);
856 heads
[ENTRY_BLOCK_PTR
->next_bb
->index
] = last_basic_block
;
858 /* Process all prediction notes. */
861 process_note_predictions (bb
, heads
, dominators
, post_dominators
);
863 free_dominance_info (post_dominators
);
864 free_dominance_info (dominators
);
867 remove_fake_edges ();
870 /* This is used to carry information about basic blocks. It is
871 attached to the AUX field of the standard CFG block. */
873 typedef struct block_info_def
875 /* Estimated frequency of execution of basic_block. */
878 /* To keep queue of basic blocks to process. */
881 /* True if block needs to be visited in prop_freqency. */
884 /* Number of predecessors we need to visit first. */
888 /* Similar information for edges. */
889 typedef struct edge_info_def
891 /* In case edge is an loopback edge, the probability edge will be reached
892 in case header is. Estimated number of iterations of the loop can be
893 then computed as 1 / (1 - back_edge_prob). */
894 sreal back_edge_prob
;
895 /* True if the edge is an loopback edge in the natural loop. */
899 #define BLOCK_INFO(B) ((block_info) (B)->aux)
900 #define EDGE_INFO(E) ((edge_info) (E)->aux)
902 /* Helper function for estimate_bb_frequencies.
903 Propagate the frequencies for LOOP. */
906 propagate_freq (struct loop
*loop
)
908 basic_block head
= loop
->header
;
914 /* For each basic block we need to visit count number of his predecessors
915 we need to visit first. */
918 if (BLOCK_INFO (bb
)->tovisit
)
922 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
923 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
925 else if (BLOCK_INFO (e
->src
)->tovisit
926 && rtl_dump_file
&& !EDGE_INFO (e
)->back_edge
)
927 fprintf (rtl_dump_file
,
928 "Irreducible region hit, ignoring edge to %i->%i\n",
929 e
->src
->index
, bb
->index
);
930 BLOCK_INFO (bb
)->npredecessors
= count
;
934 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
936 for (bb
= head
; bb
; bb
= nextbb
)
938 sreal cyclic_probability
, frequency
;
940 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
941 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
943 nextbb
= BLOCK_INFO (bb
)->next
;
944 BLOCK_INFO (bb
)->next
= NULL
;
946 /* Compute frequency of basic block. */
949 #ifdef ENABLE_CHECKING
950 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
951 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
955 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
956 if (EDGE_INFO (e
)->back_edge
)
958 sreal_add (&cyclic_probability
, &cyclic_probability
,
959 &EDGE_INFO (e
)->back_edge_prob
);
961 else if (!(e
->flags
& EDGE_DFS_BACK
))
965 /* frequency += (e->probability
966 * BLOCK_INFO (e->src)->frequency /
967 REG_BR_PROB_BASE); */
969 sreal_init (&tmp
, e
->probability
, 0);
970 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
971 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
972 sreal_add (&frequency
, &frequency
, &tmp
);
975 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
977 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
982 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
984 memcpy (&cyclic_probability
, &real_almost_one
,
985 sizeof (real_almost_one
));
988 /* BLOCK_INFO (bb)->frequency = frequency
989 / (1 - cyclic_probability) */
991 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
992 sreal_div (&BLOCK_INFO (bb
)->frequency
,
993 &frequency
, &cyclic_probability
);
997 BLOCK_INFO (bb
)->tovisit
= 0;
999 /* Compute back edge frequencies. */
1000 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1001 if (e
->dest
== head
)
1005 /* EDGE_INFO (e)->back_edge_prob
1006 = ((e->probability * BLOCK_INFO (bb)->frequency)
1007 / REG_BR_PROB_BASE); */
1009 sreal_init (&tmp
, e
->probability
, 0);
1010 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1011 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1012 &tmp
, &real_inv_br_prob_base
);
1015 /* Propagate to successor blocks. */
1016 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1017 if (!(e
->flags
& EDGE_DFS_BACK
)
1018 && BLOCK_INFO (e
->dest
)->npredecessors
)
1020 BLOCK_INFO (e
->dest
)->npredecessors
--;
1021 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1026 BLOCK_INFO (last
)->next
= e
->dest
;
1034 /* Estimate probabilities of loopback edges in loops at same nest level. */
1037 estimate_loops_at_level (struct loop
*first_loop
)
1041 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1047 estimate_loops_at_level (loop
->inner
);
1049 if (loop
->latch
->succ
) /* Do not do this for dummy function loop. */
1051 /* Find current loop back edge and mark it. */
1052 e
= loop_latch_edge (loop
);
1053 EDGE_INFO (e
)->back_edge
= 1;
1056 bbs
= get_loop_body (loop
);
1057 for (i
= 0; i
< loop
->num_nodes
; i
++)
1058 BLOCK_INFO (bbs
[i
])->tovisit
= 1;
1060 propagate_freq (loop
);
1064 /* Convert counts measured by profile driven feedback to frequencies. */
1067 counts_to_freqs (void)
1069 gcov_type count_max
= 1;
1073 count_max
= MAX (bb
->count
, count_max
);
1075 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1076 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
1079 /* Return true if function is likely to be expensive, so there is no point to
1080 optimize performance of prologue, epilogue or do inlining at the expense
1081 of code size growth. THRESHOLD is the limit of number of instructions
1082 function can execute at average to be still considered not expensive. */
1085 expensive_function_p (int threshold
)
1087 unsigned int sum
= 0;
1091 /* We can not compute accurately for large thresholds due to scaled
1093 if (threshold
> BB_FREQ_MAX
)
1096 /* Frequencies are out of range. This either means that function contains
1097 internal loop executing more than BB_FREQ_MAX times or profile feedback
1098 is available and function has not been executed at all. */
1099 if (ENTRY_BLOCK_PTR
->frequency
== 0)
1102 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1103 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
1108 for (insn
= bb
->head
; insn
!= NEXT_INSN (bb
->end
);
1109 insn
= NEXT_INSN (insn
))
1110 if (active_insn_p (insn
))
1112 sum
+= bb
->frequency
;
1121 /* Estimate basic blocks frequency by given branch probabilities. */
1124 estimate_bb_frequencies (struct loops
*loops
)
1129 if (flag_branch_probabilities
)
1133 static int real_values_initialized
= 0;
1135 if (!real_values_initialized
)
1137 real_values_initialized
= 1;
1138 sreal_init (&real_zero
, 0, 0);
1139 sreal_init (&real_one
, 1, 0);
1140 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
1141 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
1142 sreal_init (&real_one_half
, 1, -1);
1143 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
1144 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
1147 mark_dfs_back_edges ();
1148 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
1152 rtx last_insn
= bb
->end
;
1154 if (!can_predict_insn_p (last_insn
))
1156 /* We can predict only conditional jumps at the moment.
1157 Expect each edge to be equally probable.
1158 ?? In the future we want to make abnormal edges improbable. */
1162 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1165 if (e
->probability
!= 0)
1169 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1170 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
1174 ENTRY_BLOCK_PTR
->succ
->probability
= REG_BR_PROB_BASE
;
1176 /* Set up block info for each basic block. */
1177 alloc_aux_for_blocks (sizeof (struct block_info_def
));
1178 alloc_aux_for_edges (sizeof (struct edge_info_def
));
1179 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1183 BLOCK_INFO (bb
)->tovisit
= 0;
1184 for (e
= bb
->succ
; e
; e
= e
->succ_next
)
1186 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
1187 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1188 &EDGE_INFO (e
)->back_edge_prob
,
1189 &real_inv_br_prob_base
);
1193 /* First compute probabilities locally for each loop from innermost
1194 to outermost to examine probabilities for back edges. */
1195 estimate_loops_at_level (loops
->tree_root
);
1197 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
1199 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
1200 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
1202 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
1203 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1207 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
1208 sreal_add (&tmp
, &tmp
, &real_one_half
);
1209 bb
->frequency
= sreal_to_int (&tmp
);
1212 free_aux_for_blocks ();
1213 free_aux_for_edges ();
1215 compute_function_frequency ();
1216 if (flag_reorder_functions
)
1217 choose_function_section ();
1220 /* Decide whether function is hot, cold or unlikely executed. */
1222 compute_function_frequency (void)
1226 if (!profile_info
|| !flag_branch_probabilities
)
1228 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
1231 if (maybe_hot_bb_p (bb
))
1233 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
1236 if (!probably_never_executed_bb_p (bb
))
1237 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
1241 /* Choose appropriate section for the function. */
1243 choose_function_section (void)
1245 if (DECL_SECTION_NAME (current_function_decl
)
1246 || !targetm
.have_named_sections
1247 /* Theoretically we can split the gnu.linkonce text section too,
1248 but this requires more work as the frequency needs to match
1249 for all generated objects so we need to merge the frequency
1250 of all instances. For now just never set frequency for these. */
1251 || DECL_ONE_ONLY (current_function_decl
))
1253 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
1254 DECL_SECTION_NAME (current_function_decl
) =
1255 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
1256 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
1257 DECL_SECTION_NAME (current_function_decl
) =
1258 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
1259 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);