1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
56 #include "tree-flow.h"
58 #include "tree-dump.h"
59 #include "tree-pass.h"
62 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
63 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
64 static sreal real_zero
, real_one
, real_almost_one
, real_br_prob_base
,
65 real_inv_br_prob_base
, real_one_half
, real_bb_freq_max
;
67 /* Random guesstimation given names. */
68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
69 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
71 #define PROB_ALWAYS (REG_BR_PROB_BASE)
73 static void combine_predictions_for_insn (rtx
, basic_block
);
74 static void dump_prediction (FILE *, enum br_predictor
, int, basic_block
, int);
75 static void estimate_loops_at_level (struct loop
*loop
);
76 static void propagate_freq (struct loop
*);
77 static void estimate_bb_frequencies (struct loops
*);
78 static int counts_to_freqs (void);
79 static bool last_basic_block_p (basic_block
);
80 static void compute_function_frequency (void);
81 static void choose_function_section (void);
82 static bool can_predict_insn_p (rtx
);
84 /* Information we hold about each branch predictor.
85 Filled using information from predict.def. */
89 const char *const name
; /* Name used in the debugging dumps. */
90 const int hitrate
; /* Expected hitrate used by
91 predict_insn_def call. */
95 /* Use given predictor without Dempster-Shaffer theory if it matches
96 using first_match heuristics. */
97 #define PRED_FLAG_FIRST_MATCH 1
99 /* Recompute hitrate in percent to our representation. */
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
104 static const struct predictor_info predictor_info
[]= {
105 #include "predict.def"
107 /* Upper bound on predictors. */
112 /* Return true in case BB can be CPU intensive and should be optimized
113 for maximal performance. */
116 maybe_hot_bb_p (basic_block bb
)
118 if (profile_info
&& flag_branch_probabilities
120 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
122 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
127 /* Return true in case BB is cold and should be optimized for size. */
130 probably_cold_bb_p (basic_block bb
)
132 if (profile_info
&& flag_branch_probabilities
134 < profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
136 if (bb
->frequency
< BB_FREQ_MAX
/ PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION
))
141 /* Return true in case BB is probably never executed. */
143 probably_never_executed_bb_p (basic_block bb
)
145 if (profile_info
&& flag_branch_probabilities
)
146 return ((bb
->count
+ profile_info
->runs
/ 2) / profile_info
->runs
) == 0;
150 /* Return true if the one of outgoing edges is already predicted by
154 rtl_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
157 if (!INSN_P (BB_END (bb
)))
159 for (note
= REG_NOTES (BB_END (bb
)); note
; note
= XEXP (note
, 1))
160 if (REG_NOTE_KIND (note
) == REG_BR_PRED
161 && INTVAL (XEXP (XEXP (note
, 0), 0)) == (int)predictor
)
166 /* Return true if the one of outgoing edges is already predicted by
170 tree_predicted_by_p (basic_block bb
, enum br_predictor predictor
)
172 struct edge_prediction
*i
= bb_ann (bb
)->predictions
;
173 for (i
= bb_ann (bb
)->predictions
; i
; i
= i
->next
)
174 if (i
->predictor
== predictor
)
180 predict_insn (rtx insn
, enum br_predictor predictor
, int probability
)
182 if (!any_condjump_p (insn
))
184 if (!flag_guess_branch_prob
)
188 = gen_rtx_EXPR_LIST (REG_BR_PRED
,
189 gen_rtx_CONCAT (VOIDmode
,
190 GEN_INT ((int) predictor
),
191 GEN_INT ((int) probability
)),
195 /* Predict insn by given predictor. */
198 predict_insn_def (rtx insn
, enum br_predictor predictor
,
199 enum prediction taken
)
201 int probability
= predictor_info
[(int) predictor
].hitrate
;
204 probability
= REG_BR_PROB_BASE
- probability
;
206 predict_insn (insn
, predictor
, probability
);
209 /* Predict edge E with given probability if possible. */
212 rtl_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
215 last_insn
= BB_END (e
->src
);
217 /* We can store the branch prediction information only about
218 conditional jumps. */
219 if (!any_condjump_p (last_insn
))
222 /* We always store probability of branching. */
223 if (e
->flags
& EDGE_FALLTHRU
)
224 probability
= REG_BR_PROB_BASE
- probability
;
226 predict_insn (last_insn
, predictor
, probability
);
229 /* Predict edge E with the given PROBABILITY. */
231 tree_predict_edge (edge e
, enum br_predictor predictor
, int probability
)
233 struct edge_prediction
*i
= ggc_alloc (sizeof (struct edge_prediction
));
235 i
->next
= bb_ann (e
->src
)->predictions
;
236 bb_ann (e
->src
)->predictions
= i
;
237 i
->probability
= probability
;
238 i
->predictor
= predictor
;
242 /* Return true when we can store prediction on insn INSN.
243 At the moment we represent predictions only on conditional
244 jumps, not at computed jump or other complicated cases. */
246 can_predict_insn_p (rtx insn
)
248 return (JUMP_P (insn
)
249 && any_condjump_p (insn
)
250 && EDGE_COUNT (BLOCK_FOR_INSN (insn
)->succs
) >= 2);
253 /* Predict edge E by given predictor if possible. */
256 predict_edge_def (edge e
, enum br_predictor predictor
,
257 enum prediction taken
)
259 int probability
= predictor_info
[(int) predictor
].hitrate
;
262 probability
= REG_BR_PROB_BASE
- probability
;
264 predict_edge (e
, predictor
, probability
);
267 /* Invert all branch predictions or probability notes in the INSN. This needs
268 to be done each time we invert the condition used by the jump. */
271 invert_br_probabilities (rtx insn
)
275 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
276 if (REG_NOTE_KIND (note
) == REG_BR_PROB
)
277 XEXP (note
, 0) = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (note
, 0)));
278 else if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
279 XEXP (XEXP (note
, 0), 1)
280 = GEN_INT (REG_BR_PROB_BASE
- INTVAL (XEXP (XEXP (note
, 0), 1)));
283 /* Dump information about the branch prediction to the output file. */
286 dump_prediction (FILE *file
, enum br_predictor predictor
, int probability
,
287 basic_block bb
, int used
)
295 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
297 if (! (e
->flags
& EDGE_FALLTHRU
))
301 fprintf (file
, " %s heuristics%s: %.1f%%",
302 predictor_info
[predictor
].name
,
303 used
? "" : " (ignored)", probability
* 100.0 / REG_BR_PROB_BASE
);
307 fprintf (file
, " exec ");
308 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, bb
->count
);
311 fprintf (file
, " hit ");
312 fprintf (file
, HOST_WIDEST_INT_PRINT_DEC
, e
->count
);
313 fprintf (file
, " (%.1f%%)", e
->count
* 100.0 / bb
->count
);
317 fprintf (file
, "\n");
320 /* We can not predict the probabilities of ougtoing edges of bb. Set them
321 evenly and hope for the best. */
323 set_even_probabilities (basic_block bb
)
329 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
331 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
335 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
337 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
338 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
344 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
345 note if not already present. Remove now useless REG_BR_PRED notes. */
348 combine_predictions_for_insn (rtx insn
, basic_block bb
)
353 int best_probability
= PROB_EVEN
;
354 int best_predictor
= END_PREDICTORS
;
355 int combined_probability
= REG_BR_PROB_BASE
/ 2;
357 bool first_match
= false;
360 if (!can_predict_insn_p (insn
))
362 set_even_probabilities (bb
);
366 prob_note
= find_reg_note (insn
, REG_BR_PROB
, 0);
367 pnote
= ®_NOTES (insn
);
369 fprintf (dump_file
, "Predictions for insn %i bb %i\n", INSN_UID (insn
),
372 /* We implement "first match" heuristics and use probability guessed
373 by predictor with smallest index. */
374 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
375 if (REG_NOTE_KIND (note
) == REG_BR_PRED
)
377 int predictor
= INTVAL (XEXP (XEXP (note
, 0), 0));
378 int probability
= INTVAL (XEXP (XEXP (note
, 0), 1));
381 if (best_predictor
> predictor
)
382 best_probability
= probability
, best_predictor
= predictor
;
384 d
= (combined_probability
* probability
385 + (REG_BR_PROB_BASE
- combined_probability
)
386 * (REG_BR_PROB_BASE
- probability
));
388 /* Use FP math to avoid overflows of 32bit integers. */
390 /* If one probability is 0% and one 100%, avoid division by zero. */
391 combined_probability
= REG_BR_PROB_BASE
/ 2;
393 combined_probability
= (((double) combined_probability
) * probability
394 * REG_BR_PROB_BASE
/ d
+ 0.5);
397 /* Decide which heuristic to use. In case we didn't match anything,
398 use no_prediction heuristic, in case we did match, use either
399 first match or Dempster-Shaffer theory depending on the flags. */
401 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
405 dump_prediction (dump_file
, PRED_NO_PREDICTION
,
406 combined_probability
, bb
, true);
409 dump_prediction (dump_file
, PRED_DS_THEORY
, combined_probability
,
411 dump_prediction (dump_file
, PRED_FIRST_MATCH
, best_probability
,
416 combined_probability
= best_probability
;
417 dump_prediction (dump_file
, PRED_COMBINED
, combined_probability
, bb
, true);
421 if (REG_NOTE_KIND (*pnote
) == REG_BR_PRED
)
423 int predictor
= INTVAL (XEXP (XEXP (*pnote
, 0), 0));
424 int probability
= INTVAL (XEXP (XEXP (*pnote
, 0), 1));
426 dump_prediction (dump_file
, predictor
, probability
, bb
,
427 !first_match
|| best_predictor
== predictor
);
428 *pnote
= XEXP (*pnote
, 1);
431 pnote
= &XEXP (*pnote
, 1);
437 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
438 GEN_INT (combined_probability
), REG_NOTES (insn
));
440 /* Save the prediction into CFG in case we are seeing non-degenerated
442 if (EDGE_COUNT (bb
->succs
) > 1)
444 BRANCH_EDGE (bb
)->probability
= combined_probability
;
445 FALLTHRU_EDGE (bb
)->probability
446 = REG_BR_PROB_BASE
- combined_probability
;
451 /* Combine predictions into single probability and store them into CFG.
452 Remove now useless prediction entries. */
455 combine_predictions_for_bb (FILE *file
, basic_block bb
)
457 int best_probability
= PROB_EVEN
;
458 int best_predictor
= END_PREDICTORS
;
459 int combined_probability
= REG_BR_PROB_BASE
/ 2;
461 bool first_match
= false;
463 struct edge_prediction
*pred
;
465 edge e
, first
= NULL
, second
= NULL
;
468 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
470 if (!(e
->flags
& (EDGE_EH
| EDGE_FAKE
)))
473 if (first
&& !second
)
480 /* When there is no successor or only one choice, prediction is easy.
482 We are lazy for now and predict only basic blocks with two outgoing
483 edges. It is possible to predict generic case too, but we have to
484 ignore first match heuristics and do more involved combining. Implement
489 set_even_probabilities (bb
);
490 bb_ann (bb
)->predictions
= NULL
;
492 fprintf (file
, "%i edges in bb %i predicted to even probabilities\n",
498 fprintf (file
, "Predictions for bb %i\n", bb
->index
);
500 /* We implement "first match" heuristics and use probability guessed
501 by predictor with smallest index. */
502 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
504 int predictor
= pred
->predictor
;
505 int probability
= pred
->probability
;
507 if (pred
->edge
!= first
)
508 probability
= REG_BR_PROB_BASE
- probability
;
511 if (best_predictor
> predictor
)
512 best_probability
= probability
, best_predictor
= predictor
;
514 d
= (combined_probability
* probability
515 + (REG_BR_PROB_BASE
- combined_probability
)
516 * (REG_BR_PROB_BASE
- probability
));
518 /* Use FP math to avoid overflows of 32bit integers. */
520 /* If one probability is 0% and one 100%, avoid division by zero. */
521 combined_probability
= REG_BR_PROB_BASE
/ 2;
523 combined_probability
= (((double) combined_probability
) * probability
524 * REG_BR_PROB_BASE
/ d
+ 0.5);
527 /* Decide which heuristic to use. In case we didn't match anything,
528 use no_prediction heuristic, in case we did match, use either
529 first match or Dempster-Shaffer theory depending on the flags. */
531 if (predictor_info
[best_predictor
].flags
& PRED_FLAG_FIRST_MATCH
)
535 dump_prediction (file
, PRED_NO_PREDICTION
, combined_probability
, bb
, true);
538 dump_prediction (file
, PRED_DS_THEORY
, combined_probability
, bb
,
540 dump_prediction (file
, PRED_FIRST_MATCH
, best_probability
, bb
,
545 combined_probability
= best_probability
;
546 dump_prediction (file
, PRED_COMBINED
, combined_probability
, bb
, true);
548 for (pred
= bb_ann (bb
)->predictions
; pred
; pred
= pred
->next
)
550 int predictor
= pred
->predictor
;
551 int probability
= pred
->probability
;
553 if (pred
->edge
!= EDGE_SUCC (bb
, 0))
554 probability
= REG_BR_PROB_BASE
- probability
;
555 dump_prediction (file
, predictor
, probability
, bb
,
556 !first_match
|| best_predictor
== predictor
);
558 bb_ann (bb
)->predictions
= NULL
;
562 first
->probability
= combined_probability
;
563 second
->probability
= REG_BR_PROB_BASE
- combined_probability
;
567 /* Predict edge probabilities by exploiting loop structure.
568 When SIMPLELOOPS is set, attempt to count number of iterations by analyzing
571 predict_loops (struct loops
*loops_info
, bool simpleloops
)
575 /* Try to predict out blocks in a loop that are not part of a
577 for (i
= 1; i
< loops_info
->num
; i
++)
579 basic_block bb
, *bbs
;
582 struct loop
*loop
= loops_info
->parray
[i
];
583 struct niter_desc desc
;
584 unsigned HOST_WIDE_INT niter
;
586 flow_loop_scan (loop
, LOOP_EXIT_EDGES
);
587 exits
= loop
->num_exits
;
591 iv_analysis_loop_init (loop
);
592 find_simple_exit (loop
, &desc
);
594 if (desc
.simple_p
&& desc
.const_iter
)
597 niter
= desc
.niter
+ 1;
598 if (niter
== 0) /* We might overflow here. */
601 prob
= (REG_BR_PROB_BASE
602 - (REG_BR_PROB_BASE
+ niter
/2) / niter
);
603 /* Branch prediction algorithm gives 0 frequency for everything
604 after the end of loop for loop having 0 probability to finish. */
605 if (prob
== REG_BR_PROB_BASE
)
606 prob
= REG_BR_PROB_BASE
- 1;
607 predict_edge (desc
.in_edge
, PRED_LOOP_ITERATIONS
,
612 bbs
= get_loop_body (loop
);
614 for (j
= 0; j
< loop
->num_nodes
; j
++)
616 int header_found
= 0;
622 /* Bypass loop heuristics on continue statement. These
623 statements construct loops via "non-loop" constructs
624 in the source language and are better to be handled
626 if ((simpleloops
&& !can_predict_insn_p (BB_END (bb
)))
627 || predicted_by_p (bb
, PRED_CONTINUE
))
630 /* Loop branch heuristics - predict an edge back to a
631 loop's head as taken. */
632 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
634 if (e
->dest
== loop
->header
635 && e
->src
== loop
->latch
)
638 predict_edge_def (e
, PRED_LOOP_BRANCH
, TAKEN
);
642 /* Loop exit heuristics - predict an edge exiting the loop if the
643 conditional has no loop header successors as not taken. */
645 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
647 if (e
->dest
->index
< 0
648 || !flow_bb_inside_loop_p (loop
, e
->dest
))
652 - predictor_info
[(int) PRED_LOOP_EXIT
].hitrate
)
657 /* Free basic blocks from get_loop_body. */
662 /* Attempt to predict probabilities of BB outgoing edges using local
665 bb_estimate_probability_locally (basic_block bb
)
667 rtx last_insn
= BB_END (bb
);
670 if (! can_predict_insn_p (last_insn
))
672 cond
= get_condition (last_insn
, NULL
, false, false);
676 /* Try "pointer heuristic."
677 A comparison ptr == 0 is predicted as false.
678 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
679 if (COMPARISON_P (cond
)
680 && ((REG_P (XEXP (cond
, 0)) && REG_POINTER (XEXP (cond
, 0)))
681 || (REG_P (XEXP (cond
, 1)) && REG_POINTER (XEXP (cond
, 1)))))
683 if (GET_CODE (cond
) == EQ
)
684 predict_insn_def (last_insn
, PRED_POINTER
, NOT_TAKEN
);
685 else if (GET_CODE (cond
) == NE
)
686 predict_insn_def (last_insn
, PRED_POINTER
, TAKEN
);
690 /* Try "opcode heuristic."
691 EQ tests are usually false and NE tests are usually true. Also,
692 most quantities are positive, so we can make the appropriate guesses
693 about signed comparisons against zero. */
694 switch (GET_CODE (cond
))
697 /* Unconditional branch. */
698 predict_insn_def (last_insn
, PRED_UNCONDITIONAL
,
699 cond
== const0_rtx
? NOT_TAKEN
: TAKEN
);
704 /* Floating point comparisons appears to behave in a very
705 unpredictable way because of special role of = tests in
707 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
709 /* Comparisons with 0 are often used for booleans and there is
710 nothing useful to predict about them. */
711 else if (XEXP (cond
, 1) == const0_rtx
712 || XEXP (cond
, 0) == const0_rtx
)
715 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, NOT_TAKEN
);
720 /* Floating point comparisons appears to behave in a very
721 unpredictable way because of special role of = tests in
723 if (FLOAT_MODE_P (GET_MODE (XEXP (cond
, 0))))
725 /* Comparisons with 0 are often used for booleans and there is
726 nothing useful to predict about them. */
727 else if (XEXP (cond
, 1) == const0_rtx
728 || XEXP (cond
, 0) == const0_rtx
)
731 predict_insn_def (last_insn
, PRED_OPCODE_NONEQUAL
, TAKEN
);
735 predict_insn_def (last_insn
, PRED_FPOPCODE
, TAKEN
);
739 predict_insn_def (last_insn
, PRED_FPOPCODE
, NOT_TAKEN
);
744 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
745 || XEXP (cond
, 1) == constm1_rtx
)
746 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, NOT_TAKEN
);
751 if (XEXP (cond
, 1) == const0_rtx
|| XEXP (cond
, 1) == const1_rtx
752 || XEXP (cond
, 1) == constm1_rtx
)
753 predict_insn_def (last_insn
, PRED_OPCODE_POSITIVE
, TAKEN
);
761 /* Statically estimate the probability that a branch will be taken and produce
762 estimated profile. When profile feedback is present never executed portions
763 of function gets estimated. */
766 estimate_probability (struct loops
*loops_info
)
770 connect_infinite_loops_to_exit ();
771 calculate_dominance_info (CDI_DOMINATORS
);
772 calculate_dominance_info (CDI_POST_DOMINATORS
);
774 predict_loops (loops_info
, true);
778 /* Attempt to predict conditional jumps using a number of heuristics. */
781 rtx last_insn
= BB_END (bb
);
785 if (! can_predict_insn_p (last_insn
))
788 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
790 /* Predict early returns to be probable, as we've already taken
791 care for error returns and other are often used for fast paths
793 if ((e
->dest
== EXIT_BLOCK_PTR
794 || (EDGE_COUNT (e
->dest
->succs
) == 1
795 && EDGE_SUCC (e
->dest
, 0)->dest
== EXIT_BLOCK_PTR
))
796 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
797 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
798 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
799 && !last_basic_block_p (e
->dest
))
800 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
802 /* Look for block we are guarding (ie we dominate it,
803 but it doesn't postdominate us). */
804 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
805 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
806 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
810 /* The call heuristic claims that a guarded function call
811 is improbable. This is because such calls are often used
812 to signal exceptional situations such as printing error
814 for (insn
= BB_HEAD (e
->dest
); insn
!= NEXT_INSN (BB_END (e
->dest
));
815 insn
= NEXT_INSN (insn
))
817 /* Constant and pure calls are hardly used to signalize
818 something exceptional. */
819 && ! CONST_OR_PURE_CALL_P (insn
))
821 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
826 bb_estimate_probability_locally (bb
);
829 /* Attach the combined probability to each conditional jump. */
831 if (JUMP_P (BB_END (bb
))
832 && any_condjump_p (BB_END (bb
))
833 && EDGE_COUNT (bb
->succs
) >= 2)
834 combine_predictions_for_insn (BB_END (bb
), bb
);
836 remove_fake_exit_edges ();
837 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
842 rtx last_insn
= BB_END (bb
);
844 if (!can_predict_insn_p (last_insn
))
846 /* We can predict only conditional jumps at the moment.
847 Expect each edge to be equally probable.
848 ?? In the future we want to make abnormal edges improbable. */
852 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
855 if (e
->probability
!= 0)
859 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
861 e
->probability
= (REG_BR_PROB_BASE
+ nedges
/ 2) / nedges
;
865 estimate_bb_frequencies (loops_info
);
866 free_dominance_info (CDI_POST_DOMINATORS
);
867 if (profile_status
== PROFILE_ABSENT
)
868 profile_status
= PROFILE_GUESSED
;
871 /* Set edge->probability for each succestor edge of BB. */
873 guess_outgoing_edge_probabilities (basic_block bb
)
875 bb_estimate_probability_locally (bb
);
876 combine_predictions_for_insn (BB_END (bb
), bb
);
880 /* Predict using opcode of the last statement in basic block. */
882 tree_predict_by_opcode (basic_block bb
)
884 tree stmt
= last_stmt (bb
);
891 if (!stmt
|| TREE_CODE (stmt
) != COND_EXPR
)
893 FOR_EACH_EDGE (then_edge
, ei
, bb
->succs
)
895 if (then_edge
->flags
& EDGE_TRUE_VALUE
)
898 cond
= TREE_OPERAND (stmt
, 0);
899 if (TREE_CODE_CLASS (TREE_CODE (cond
)) != '<')
901 op0
= TREE_OPERAND (cond
, 0);
902 type
= TREE_TYPE (op0
);
903 /* Try "pointer heuristic."
904 A comparison ptr == 0 is predicted as false.
905 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
906 if (POINTER_TYPE_P (type
))
908 if (TREE_CODE (cond
) == EQ_EXPR
)
909 predict_edge_def (then_edge
, PRED_TREE_POINTER
, NOT_TAKEN
);
910 else if (TREE_CODE (cond
) == NE_EXPR
)
911 predict_edge_def (then_edge
, PRED_TREE_POINTER
, TAKEN
);
915 /* Try "opcode heuristic."
916 EQ tests are usually false and NE tests are usually true. Also,
917 most quantities are positive, so we can make the appropriate guesses
918 about signed comparisons against zero. */
919 switch (TREE_CODE (cond
))
923 /* Floating point comparisons appears to behave in a very
924 unpredictable way because of special role of = tests in
926 if (FLOAT_TYPE_P (type
))
928 /* Comparisons with 0 are often used for booleans and there is
929 nothing useful to predict about them. */
930 else if (integer_zerop (op0
)
931 || integer_zerop (TREE_OPERAND (cond
, 1)))
934 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, NOT_TAKEN
);
939 /* Floating point comparisons appears to behave in a very
940 unpredictable way because of special role of = tests in
942 if (FLOAT_TYPE_P (type
))
944 /* Comparisons with 0 are often used for booleans and there is
945 nothing useful to predict about them. */
946 else if (integer_zerop (op0
)
947 || integer_zerop (TREE_OPERAND (cond
, 1)))
950 predict_edge_def (then_edge
, PRED_TREE_OPCODE_NONEQUAL
, TAKEN
);
954 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, TAKEN
);
958 predict_edge_def (then_edge
, PRED_TREE_FPOPCODE
, NOT_TAKEN
);
963 if (integer_zerop (TREE_OPERAND (cond
, 1))
964 || integer_onep (TREE_OPERAND (cond
, 1))
965 || integer_all_onesp (TREE_OPERAND (cond
, 1))
966 || real_zerop (TREE_OPERAND (cond
, 1))
967 || real_onep (TREE_OPERAND (cond
, 1))
968 || real_minus_onep (TREE_OPERAND (cond
, 1)))
969 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, NOT_TAKEN
);
974 if (integer_zerop (TREE_OPERAND (cond
, 1))
975 || integer_onep (TREE_OPERAND (cond
, 1))
976 || integer_all_onesp (TREE_OPERAND (cond
, 1))
977 || real_zerop (TREE_OPERAND (cond
, 1))
978 || real_onep (TREE_OPERAND (cond
, 1))
979 || real_minus_onep (TREE_OPERAND (cond
, 1)))
980 predict_edge_def (then_edge
, PRED_TREE_OPCODE_POSITIVE
, TAKEN
);
988 /* Predict branch probabilities and estimate profile of the tree CFG. */
990 tree_estimate_probability (void)
993 struct loops loops_info
;
995 flow_loops_find (&loops_info
, LOOP_TREE
);
996 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
997 flow_loops_dump (&loops_info
, dump_file
, NULL
, 0);
999 connect_infinite_loops_to_exit ();
1000 calculate_dominance_info (CDI_DOMINATORS
);
1001 calculate_dominance_info (CDI_POST_DOMINATORS
);
1003 predict_loops (&loops_info
, false);
1010 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1012 /* Predict early returns to be probable, as we've already taken
1013 care for error returns and other are often used for fast paths
1014 trought function. */
1015 if ((e
->dest
== EXIT_BLOCK_PTR
1016 || (EDGE_COUNT (e
->dest
->succs
) == 1
1017 && EDGE_SUCC (e
->dest
, 0)->dest
== EXIT_BLOCK_PTR
))
1018 && !predicted_by_p (bb
, PRED_NULL_RETURN
)
1019 && !predicted_by_p (bb
, PRED_CONST_RETURN
)
1020 && !predicted_by_p (bb
, PRED_NEGATIVE_RETURN
)
1021 && !last_basic_block_p (e
->dest
))
1022 predict_edge_def (e
, PRED_EARLY_RETURN
, TAKEN
);
1024 /* Look for block we are guarding (ie we dominate it,
1025 but it doesn't postdominate us). */
1026 if (e
->dest
!= EXIT_BLOCK_PTR
&& e
->dest
!= bb
1027 && dominated_by_p (CDI_DOMINATORS
, e
->dest
, e
->src
)
1028 && !dominated_by_p (CDI_POST_DOMINATORS
, e
->src
, e
->dest
))
1030 block_stmt_iterator bi
;
1032 /* The call heuristic claims that a guarded function call
1033 is improbable. This is because such calls are often used
1034 to signal exceptional situations such as printing error
1036 for (bi
= bsi_start (e
->dest
); !bsi_end_p (bi
);
1039 tree stmt
= bsi_stmt (bi
);
1040 if ((TREE_CODE (stmt
) == CALL_EXPR
1041 || (TREE_CODE (stmt
) == MODIFY_EXPR
1042 && TREE_CODE (TREE_OPERAND (stmt
, 1)) == CALL_EXPR
))
1043 /* Constant and pure calls are hardly used to signalize
1044 something exceptional. */
1045 && TREE_SIDE_EFFECTS (stmt
))
1047 predict_edge_def (e
, PRED_CALL
, NOT_TAKEN
);
1053 tree_predict_by_opcode (bb
);
1056 combine_predictions_for_bb (dump_file
, bb
);
1058 estimate_bb_frequencies (&loops_info
);
1059 free_dominance_info (CDI_POST_DOMINATORS
);
1060 remove_fake_exit_edges ();
1061 flow_loops_free (&loops_info
);
1062 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1063 dump_tree_cfg (dump_file
, dump_flags
);
1064 if (profile_status
== PROFILE_ABSENT
)
1065 profile_status
= PROFILE_GUESSED
;
1068 /* __builtin_expect dropped tokens into the insn stream describing expected
1069 values of registers. Generate branch probabilities based off these
1073 expected_value_to_br_prob (void)
1075 rtx insn
, cond
, ev
= NULL_RTX
, ev_reg
= NULL_RTX
;
1077 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1079 switch (GET_CODE (insn
))
1082 /* Look for expected value notes. */
1083 if (NOTE_LINE_NUMBER (insn
) == NOTE_INSN_EXPECTED_VALUE
)
1085 ev
= NOTE_EXPECTED_VALUE (insn
);
1086 ev_reg
= XEXP (ev
, 0);
1092 /* Never propagate across labels. */
1097 /* Look for simple conditional branches. If we haven't got an
1098 expected value yet, no point going further. */
1099 if (!JUMP_P (insn
) || ev
== NULL_RTX
1100 || ! any_condjump_p (insn
))
1105 /* Look for insns that clobber the EV register. */
1106 if (ev
&& reg_set_p (ev_reg
, insn
))
1111 /* Collect the branch condition, hopefully relative to EV_REG. */
1112 /* ??? At present we'll miss things like
1113 (expected_value (eq r70 0))
1115 (set r80 (lt r70 r71))
1116 (set pc (if_then_else (ne r80 0) ...))
1117 as canonicalize_condition will render this to us as
1119 Could use cselib to try and reduce this further. */
1120 cond
= XEXP (SET_SRC (pc_set (insn
)), 0);
1121 cond
= canonicalize_condition (insn
, cond
, 0, NULL
, ev_reg
,
1123 if (! cond
|| XEXP (cond
, 0) != ev_reg
1124 || GET_CODE (XEXP (cond
, 1)) != CONST_INT
)
1127 /* Substitute and simplify. Given that the expression we're
1128 building involves two constants, we should wind up with either
1130 cond
= gen_rtx_fmt_ee (GET_CODE (cond
), VOIDmode
,
1131 XEXP (ev
, 1), XEXP (cond
, 1));
1132 cond
= simplify_rtx (cond
);
1134 /* Turn the condition into a scaled branch probability. */
1135 if (cond
!= const_true_rtx
&& cond
!= const0_rtx
)
1137 predict_insn_def (insn
, PRED_BUILTIN_EXPECT
,
1138 cond
== const_true_rtx
? TAKEN
: NOT_TAKEN
);
1142 /* Check whether this is the last basic block of function. Commonly
1143 there is one extra common cleanup block. */
1145 last_basic_block_p (basic_block bb
)
1147 if (bb
== EXIT_BLOCK_PTR
)
1150 return (bb
->next_bb
== EXIT_BLOCK_PTR
1151 || (bb
->next_bb
->next_bb
== EXIT_BLOCK_PTR
1152 && EDGE_COUNT (bb
->succs
) == 1
1153 && EDGE_SUCC (bb
, 0)->dest
->next_bb
== EXIT_BLOCK_PTR
));
1156 /* This is used to carry information about basic blocks. It is
1157 attached to the AUX field of the standard CFG block. */
1159 typedef struct block_info_def
1161 /* Estimated frequency of execution of basic_block. */
1164 /* To keep queue of basic blocks to process. */
1167 /* True if block needs to be visited in propagate_freq. */
1168 unsigned int tovisit
:1;
1170 /* Number of predecessors we need to visit first. */
1174 /* Similar information for edges. */
1175 typedef struct edge_info_def
1177 /* In case edge is an loopback edge, the probability edge will be reached
1178 in case header is. Estimated number of iterations of the loop can be
1179 then computed as 1 / (1 - back_edge_prob). */
1180 sreal back_edge_prob
;
1181 /* True if the edge is an loopback edge in the natural loop. */
1182 unsigned int back_edge
:1;
1185 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1186 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1188 /* Helper function for estimate_bb_frequencies.
1189 Propagate the frequencies for LOOP. */
1192 propagate_freq (struct loop
*loop
)
1194 basic_block head
= loop
->header
;
1200 /* For each basic block we need to visit count number of his predecessors
1201 we need to visit first. */
1202 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1204 if (BLOCK_INFO (bb
)->tovisit
)
1209 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1211 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1213 else if (BLOCK_INFO (e
->src
)->tovisit
1214 && dump_file
&& !EDGE_INFO (e
)->back_edge
)
1216 "Irreducible region hit, ignoring edge to %i->%i\n",
1217 e
->src
->index
, bb
->index
);
1219 BLOCK_INFO (bb
)->npredecessors
= count
;
1223 memcpy (&BLOCK_INFO (head
)->frequency
, &real_one
, sizeof (real_one
));
1225 for (bb
= head
; bb
; bb
= nextbb
)
1228 sreal cyclic_probability
, frequency
;
1230 memcpy (&cyclic_probability
, &real_zero
, sizeof (real_zero
));
1231 memcpy (&frequency
, &real_zero
, sizeof (real_zero
));
1233 nextbb
= BLOCK_INFO (bb
)->next
;
1234 BLOCK_INFO (bb
)->next
= NULL
;
1236 /* Compute frequency of basic block. */
1239 #ifdef ENABLE_CHECKING
1240 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1242 if (BLOCK_INFO (e
->src
)->tovisit
&& !(e
->flags
& EDGE_DFS_BACK
))
1247 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1249 if (EDGE_INFO (e
)->back_edge
)
1251 sreal_add (&cyclic_probability
, &cyclic_probability
,
1252 &EDGE_INFO (e
)->back_edge_prob
);
1254 else if (!(e
->flags
& EDGE_DFS_BACK
))
1258 /* frequency += (e->probability
1259 * BLOCK_INFO (e->src)->frequency /
1260 REG_BR_PROB_BASE); */
1262 sreal_init (&tmp
, e
->probability
, 0);
1263 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (e
->src
)->frequency
);
1264 sreal_mul (&tmp
, &tmp
, &real_inv_br_prob_base
);
1265 sreal_add (&frequency
, &frequency
, &tmp
);
1269 if (sreal_compare (&cyclic_probability
, &real_zero
) == 0)
1271 memcpy (&BLOCK_INFO (bb
)->frequency
, &frequency
,
1272 sizeof (frequency
));
1276 if (sreal_compare (&cyclic_probability
, &real_almost_one
) > 0)
1278 memcpy (&cyclic_probability
, &real_almost_one
,
1279 sizeof (real_almost_one
));
1282 /* BLOCK_INFO (bb)->frequency = frequency
1283 / (1 - cyclic_probability) */
1285 sreal_sub (&cyclic_probability
, &real_one
, &cyclic_probability
);
1286 sreal_div (&BLOCK_INFO (bb
)->frequency
,
1287 &frequency
, &cyclic_probability
);
1291 BLOCK_INFO (bb
)->tovisit
= 0;
1293 /* Compute back edge frequencies. */
1294 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1296 if (e
->dest
== head
)
1300 /* EDGE_INFO (e)->back_edge_prob
1301 = ((e->probability * BLOCK_INFO (bb)->frequency)
1302 / REG_BR_PROB_BASE); */
1304 sreal_init (&tmp
, e
->probability
, 0);
1305 sreal_mul (&tmp
, &tmp
, &BLOCK_INFO (bb
)->frequency
);
1306 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1307 &tmp
, &real_inv_br_prob_base
);
1311 /* Propagate to successor blocks. */
1312 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1314 if (!(e
->flags
& EDGE_DFS_BACK
)
1315 && BLOCK_INFO (e
->dest
)->npredecessors
)
1317 BLOCK_INFO (e
->dest
)->npredecessors
--;
1318 if (!BLOCK_INFO (e
->dest
)->npredecessors
)
1323 BLOCK_INFO (last
)->next
= e
->dest
;
1332 /* Estimate probabilities of loopback edges in loops at same nest level. */
1335 estimate_loops_at_level (struct loop
*first_loop
)
1339 for (loop
= first_loop
; loop
; loop
= loop
->next
)
1345 estimate_loops_at_level (loop
->inner
);
1347 /* Do not do this for dummy function loop. */
1348 if (EDGE_COUNT (loop
->latch
->succs
) > 0)
1350 /* Find current loop back edge and mark it. */
1351 e
= loop_latch_edge (loop
);
1352 EDGE_INFO (e
)->back_edge
= 1;
1355 bbs
= get_loop_body (loop
);
1356 for (i
= 0; i
< loop
->num_nodes
; i
++)
1357 BLOCK_INFO (bbs
[i
])->tovisit
= 1;
1359 propagate_freq (loop
);
1363 /* Convert counts measured by profile driven feedback to frequencies.
1364 Return nonzero iff there was any nonzero execution count. */
1367 counts_to_freqs (void)
1369 gcov_type count_max
, true_count_max
= 0;
1373 true_count_max
= MAX (bb
->count
, true_count_max
);
1375 count_max
= MAX (true_count_max
, 1);
1376 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1377 bb
->frequency
= (bb
->count
* BB_FREQ_MAX
+ count_max
/ 2) / count_max
;
1378 return true_count_max
;
1381 /* Return true if function is likely to be expensive, so there is no point to
1382 optimize performance of prologue, epilogue or do inlining at the expense
1383 of code size growth. THRESHOLD is the limit of number of instructions
1384 function can execute at average to be still considered not expensive. */
1387 expensive_function_p (int threshold
)
1389 unsigned int sum
= 0;
1393 /* We can not compute accurately for large thresholds due to scaled
1395 if (threshold
> BB_FREQ_MAX
)
1398 /* Frequencies are out of range. This either means that function contains
1399 internal loop executing more than BB_FREQ_MAX times or profile feedback
1400 is available and function has not been executed at all. */
1401 if (ENTRY_BLOCK_PTR
->frequency
== 0)
1404 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1405 limit
= ENTRY_BLOCK_PTR
->frequency
* threshold
;
1410 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
1411 insn
= NEXT_INSN (insn
))
1412 if (active_insn_p (insn
))
1414 sum
+= bb
->frequency
;
1423 /* Estimate basic blocks frequency by given branch probabilities. */
1426 estimate_bb_frequencies (struct loops
*loops
)
1431 if (!flag_branch_probabilities
|| !counts_to_freqs ())
1433 static int real_values_initialized
= 0;
1435 if (!real_values_initialized
)
1437 real_values_initialized
= 1;
1438 sreal_init (&real_zero
, 0, 0);
1439 sreal_init (&real_one
, 1, 0);
1440 sreal_init (&real_br_prob_base
, REG_BR_PROB_BASE
, 0);
1441 sreal_init (&real_bb_freq_max
, BB_FREQ_MAX
, 0);
1442 sreal_init (&real_one_half
, 1, -1);
1443 sreal_div (&real_inv_br_prob_base
, &real_one
, &real_br_prob_base
);
1444 sreal_sub (&real_almost_one
, &real_one
, &real_inv_br_prob_base
);
1447 mark_dfs_back_edges ();
1449 EDGE_SUCC (ENTRY_BLOCK_PTR
, 0)->probability
= REG_BR_PROB_BASE
;
1451 /* Set up block info for each basic block. */
1452 alloc_aux_for_blocks (sizeof (struct block_info_def
));
1453 alloc_aux_for_edges (sizeof (struct edge_info_def
));
1454 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1459 BLOCK_INFO (bb
)->tovisit
= 0;
1460 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1462 sreal_init (&EDGE_INFO (e
)->back_edge_prob
, e
->probability
, 0);
1463 sreal_mul (&EDGE_INFO (e
)->back_edge_prob
,
1464 &EDGE_INFO (e
)->back_edge_prob
,
1465 &real_inv_br_prob_base
);
1469 /* First compute probabilities locally for each loop from innermost
1470 to outermost to examine probabilities for back edges. */
1471 estimate_loops_at_level (loops
->tree_root
);
1473 memcpy (&freq_max
, &real_zero
, sizeof (real_zero
));
1475 if (sreal_compare (&freq_max
, &BLOCK_INFO (bb
)->frequency
) < 0)
1476 memcpy (&freq_max
, &BLOCK_INFO (bb
)->frequency
, sizeof (freq_max
));
1478 sreal_div (&freq_max
, &real_bb_freq_max
, &freq_max
);
1479 FOR_BB_BETWEEN (bb
, ENTRY_BLOCK_PTR
, NULL
, next_bb
)
1483 sreal_mul (&tmp
, &BLOCK_INFO (bb
)->frequency
, &freq_max
);
1484 sreal_add (&tmp
, &tmp
, &real_one_half
);
1485 bb
->frequency
= sreal_to_int (&tmp
);
1488 free_aux_for_blocks ();
1489 free_aux_for_edges ();
1491 compute_function_frequency ();
1492 if (flag_reorder_functions
)
1493 choose_function_section ();
1496 /* Decide whether function is hot, cold or unlikely executed. */
1498 compute_function_frequency (void)
1502 if (!profile_info
|| !flag_branch_probabilities
)
1504 cfun
->function_frequency
= FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
;
1507 if (maybe_hot_bb_p (bb
))
1509 cfun
->function_frequency
= FUNCTION_FREQUENCY_HOT
;
1512 if (!probably_never_executed_bb_p (bb
))
1513 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
1517 /* Choose appropriate section for the function. */
1519 choose_function_section (void)
1521 if (DECL_SECTION_NAME (current_function_decl
)
1522 || !targetm
.have_named_sections
1523 /* Theoretically we can split the gnu.linkonce text section too,
1524 but this requires more work as the frequency needs to match
1525 for all generated objects so we need to merge the frequency
1526 of all instances. For now just never set frequency for these. */
1527 || DECL_ONE_ONLY (current_function_decl
))
1530 /* If we are doing the partitioning optimization, let the optimization
1531 choose the correct section into which to put things. */
1533 if (flag_reorder_blocks_and_partition
)
1536 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_HOT
)
1537 DECL_SECTION_NAME (current_function_decl
) =
1538 build_string (strlen (HOT_TEXT_SECTION_NAME
), HOT_TEXT_SECTION_NAME
);
1539 if (cfun
->function_frequency
== FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
1540 DECL_SECTION_NAME (current_function_decl
) =
1541 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME
),
1542 UNLIKELY_EXECUTED_TEXT_SECTION_NAME
);
1546 struct tree_opt_pass pass_profile
=
1548 "profile", /* name */
1550 tree_estimate_probability
, /* execute */
1553 0, /* static_pass_number */
1554 TV_BRANCH_PROB
, /* tv_id */
1555 PROP_cfg
, /* properties_required */
1556 0, /* properties_provided */
1557 0, /* properties_destroyed */
1558 0, /* todo_flags_start */
1559 TODO_ggc_collect
| TODO_verify_ssa
, /* todo_flags_finish */