1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
32 #include "insn-config.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
44 #include "tree-pass.h"
46 static struct value_prof_hooks
*value_prof_hooks
;
48 /* This is the vector of histograms. Created in find_values_to_profile.
49 During profile generation, freed by instrument_values.
50 During profile use, freed by value_profile_transformations. */
52 static histogram_values static_values
= NULL
;
54 /* In this file value profile based optimizations are placed. Currently the
55 following optimizations are implemented (for more detailed descriptions
56 see comments at value_profile_transformations):
58 1) Division/modulo specialization. Provided that we can determine that the
59 operands of the division have some special properties, we may use it to
60 produce more effective code.
61 2) Speculative prefetching. If we are able to determine that the difference
62 between addresses accessed by a memory reference is usually constant, we
63 may add the prefetch instructions.
65 Every such optimization should add its requirements for profiled values to
66 insn_values_to_profile function. This function is called from branch_prob
67 in profile.c and the requested values are instrumented by it in the first
68 compilation with -fprofile-arcs. The optimization may then read the
69 gathered data in the second compilation with -fbranch-probabilities.
71 There are currently two versions, RTL-based and tree-based. Over time
72 the RTL-based version may go away.
74 In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE
75 note to the instrumented insn. The argument to the note consists of an
76 EXPR_LIST where its members have the following meaning (from the first to
79 -- type of information gathered (HIST_TYPE*)
80 -- the expression that is profiled
81 -- list of counters starting from the first one.
83 In the tree-based version, the measured data is pointed to from the histograms
84 field of the statement annotation of the instrumented insns. It is
85 kept as a linked list of struct histogram_value_t's, which contain the
86 same information as above. */
88 /* For speculative prefetching, the range in that we do not prefetch (because
89 we assume that it will be in cache anyway). The asymmetry between min and
90 max range is trying to reflect the fact that the sequential prefetching
91 of the data is commonly done directly by hardware. Nevertheless, these
92 values are just a guess and should of course be target-specific.
94 FIXME: There is no tree form of speculative prefetching as yet.
96 FIXME: A better approach to instrumentation in the profile-generation
97 pass is to generate calls to magic library functions (to be added to
98 libgcc) rather than inline code. This approach will probably be
99 necessary to get tree-based speculative prefetching working in a useful
100 fashion, as inline code bloats things so much the rest of the compiler has
101 serious problems dealing with it (judging from the rtl behavior). */
103 #ifndef NOPREFETCH_RANGE_MIN
104 #define NOPREFETCH_RANGE_MIN (-16)
106 #ifndef NOPREFETCH_RANGE_MAX
107 #define NOPREFETCH_RANGE_MAX 32
110 static void rtl_divmod_values_to_profile (rtx
, histogram_values
*);
112 static bool insn_prefetch_values_to_profile (rtx
, histogram_values
*);
113 static int find_mem_reference_1 (rtx
*, void *);
114 static void find_mem_reference_2 (rtx
, rtx
, void *);
115 static bool find_mem_reference (rtx
, rtx
*, int *);
118 static void rtl_values_to_profile (rtx
, histogram_values
*);
119 static rtx
rtl_divmod_fixed_value (enum machine_mode
, enum rtx_code
, rtx
, rtx
,
120 rtx
, gcov_type
, int);
121 static rtx
rtl_mod_pow2 (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
, int);
122 static rtx
rtl_mod_subtract (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
,
125 static rtx
gen_speculative_prefetch (rtx
, gcov_type
, int);
127 static bool rtl_divmod_fixed_value_transform (rtx
);
128 static bool rtl_mod_pow2_value_transform (rtx
);
129 static bool rtl_mod_subtract_transform (rtx
);
131 static bool speculative_prefetching_transform (rtx
);
133 static tree
tree_divmod_fixed_value (tree
, tree
, tree
, tree
,
134 tree
, int, gcov_type
, gcov_type
);
135 static tree
tree_mod_pow2 (tree
, tree
, tree
, tree
, int, gcov_type
, gcov_type
);
136 static tree
tree_mod_subtract (tree
, tree
, tree
, tree
, int, int, int,
137 gcov_type
, gcov_type
, gcov_type
);
138 static bool tree_divmod_fixed_value_transform (tree
);
139 static bool tree_mod_pow2_value_transform (tree
);
140 static bool tree_mod_subtract_transform (tree
);
143 /* Find values inside INSN for that we want to measure histograms for
144 division/modulo optimization and stores them to VALUES. */
146 rtl_divmod_values_to_profile (rtx insn
, histogram_values
*values
)
148 rtx set
, set_src
, op1
, op2
;
149 enum machine_mode mode
;
150 histogram_value hist
;
155 set
= single_set (insn
);
159 mode
= GET_MODE (SET_DEST (set
));
160 if (!INTEGRAL_MODE_P (mode
))
163 set_src
= SET_SRC (set
);
164 switch (GET_CODE (set_src
))
170 op1
= XEXP (set_src
, 0);
171 op2
= XEXP (set_src
, 1);
172 if (side_effects_p (op2
))
175 /* Check for a special case where the divisor is power of 2. */
176 if ((GET_CODE (set_src
) == UMOD
) && !CONSTANT_P (op2
))
178 hist
= ggc_alloc (sizeof (*hist
));
179 hist
->hvalue
.rtl
.value
= op2
;
180 hist
->hvalue
.rtl
.seq
= NULL_RTX
;
181 hist
->hvalue
.rtl
.mode
= mode
;
182 hist
->hvalue
.rtl
.insn
= insn
;
183 hist
->type
= HIST_TYPE_POW2
;
184 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
187 /* Check whether the divisor is not in fact a constant. */
188 if (!CONSTANT_P (op2
))
190 hist
= ggc_alloc (sizeof (*hist
));
191 hist
->hvalue
.rtl
.value
= op2
;
192 hist
->hvalue
.rtl
.mode
= mode
;
193 hist
->hvalue
.rtl
.seq
= NULL_RTX
;
194 hist
->hvalue
.rtl
.insn
= insn
;
195 hist
->type
= HIST_TYPE_SINGLE_VALUE
;
196 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
199 /* For mod, check whether it is not often a noop (or replaceable by
200 a few subtractions). */
201 if (GET_CODE (set_src
) == UMOD
&& !side_effects_p (op1
))
205 hist
= ggc_alloc (sizeof (*hist
));
207 tmp
= simplify_gen_binary (DIV
, mode
, copy_rtx (op1
), copy_rtx (op2
));
208 hist
->hvalue
.rtl
.value
= force_operand (tmp
, NULL_RTX
);
209 hist
->hvalue
.rtl
.seq
= get_insns ();
211 hist
->hvalue
.rtl
.mode
= mode
;
212 hist
->hvalue
.rtl
.insn
= insn
;
213 hist
->type
= HIST_TYPE_INTERVAL
;
214 hist
->hdata
.intvl
.int_start
= 0;
215 hist
->hdata
.intvl
.steps
= 2;
216 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
227 /* Called from find_mem_reference through for_each_rtx, finds a memory
228 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
229 to *RET and the traversing of the expression is interrupted by returning 1.
230 Otherwise 0 is returned. */
233 find_mem_reference_1 (rtx
*expr
, void *ret
)
245 /* Called form find_mem_reference through note_stores to find out whether
246 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
247 FMR2_WRITE is set to true. */
249 static int fmr2_write
;
251 find_mem_reference_2 (rtx expr
, rtx pat ATTRIBUTE_UNUSED
, void *mem
)
257 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
258 if it is a write of the mem. Return false if no memory reference is found,
262 find_mem_reference (rtx insn
, rtx
*mem
, int *write
)
265 for_each_rtx (&PATTERN (insn
), find_mem_reference_1
, mem
);
271 note_stores (PATTERN (insn
), find_mem_reference_2
, *mem
);
276 /* Find values inside INSN for that we want to measure histograms for
277 a speculative prefetching. Add them to the list VALUES.
278 Returns true if such we found any such value, false otherwise. */
281 insn_prefetch_values_to_profile (rtx insn
, histogram_values
* values
)
285 histogram_value hist
;
287 /* It only makes sense to look for memory references in ordinary insns. */
288 if (!NONJUMP_INSN_P (insn
))
291 if (!find_mem_reference (insn
, &mem
, &write
))
294 address
= XEXP (mem
, 0);
295 if (side_effects_p (address
))
298 if (CONSTANT_P (address
))
301 hist
= ggc_alloc (sizeof (*hist
));
302 hist
->hvalue
.rtl
.value
= address
;
303 hist
->hvalue
.rtl
.mode
= GET_MODE (address
);
304 hist
->hvalue
.rtl
.seq
= NULL_RTX
;
305 hist
->hvalue
.rtl
.insn
= insn
;
306 hist
->type
= HIST_TYPE_CONST_DELTA
;
307 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
312 /* Find values inside INSN for that we want to measure histograms and adds
313 them to list VALUES (increasing the record of its length in N_VALUES). */
315 rtl_values_to_profile (rtx insn
, histogram_values
*values
)
317 if (flag_value_profile_transformations
)
318 rtl_divmod_values_to_profile (insn
, values
);
321 if (flag_speculative_prefetching
)
322 insn_prefetch_values_to_profile (insn
, values
);
326 /* Find list of values for that we want to measure histograms. */
328 rtl_find_values_to_profile (histogram_values
*values
)
331 unsigned i
, libcall_level
;
332 histogram_value hist
;
334 life_analysis (NULL
, PROP_DEATH_NOTES
);
338 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
339 rtl_values_to_profile (insn
, values
);
340 static_values
= *values
;
342 for (i
= 0; VEC_iterate (histogram_value
, *values
, i
, hist
); i
++)
346 case HIST_TYPE_INTERVAL
:
349 "Interval counter for insn %d, range %d -- %d.\n",
350 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
),
351 hist
->hdata
.intvl
.int_start
,
352 (hist
->hdata
.intvl
.int_start
353 + hist
->hdata
.intvl
.steps
- 1));
354 hist
->n_counters
= hist
->hdata
.intvl
.steps
+ 2;
360 "Pow2 counter for insn %d.\n",
361 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
));
362 hist
->n_counters
= 2;
365 case HIST_TYPE_SINGLE_VALUE
:
368 "Single value counter for insn %d.\n",
369 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
));
370 hist
->n_counters
= 3;
373 case HIST_TYPE_CONST_DELTA
:
376 "Constant delta counter for insn %d.\n",
377 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
));
378 hist
->n_counters
= 4;
385 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
388 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
389 them to identify and exploit properties of values that are hard to analyze
392 We do following transformations:
398 where b is almost always a constant N is transformed to
411 where b is almost always a power of 2 and the division is unsigned
412 TODO -- handle signed case as well
414 if ((b & (b - 1)) == 0)
419 Note that when b = 0, no error will occur and x = a; this is correct,
420 as result of such operation is undefined.
426 where a is almost always less then b and the division is unsigned
427 TODO -- handle signed case as well
437 where a is almost always less then 2 * b and the division is unsigned
438 TODO -- handle signed case as well
446 It would be possible to continue analogically for K * b for other small
447 K's, but it is probably not useful.
451 Read or write of mem[address], where the value of address changes usually
452 by a constant C != 0 between the following accesses to the computation; with
453 -fspeculative-prefetching we then add a prefetch of address + C before
454 the insn. This handles prefetching of several interesting cases in addition
455 to a simple prefetching for addresses that are induction variables, e. g.
456 linked lists allocated sequentially (even in case they are processed
459 TODO -- we should also check whether there is not (usually) a small
460 difference with the adjacent memory references, so that we do
461 not issue overlapping prefetches. Also we should employ some
462 heuristics to eliminate cases where prefetching evidently spoils
464 -- it should somehow cooperate with the loop optimizer prefetching
468 There are other useful cases that could be handled by a similar mechanism,
471 for (i = 0; i < n; i++)
474 transform to (for constant N):
477 for (i = 0; i < N; i++)
480 for (i = 0; i < n; i++)
482 making unroller happy. Since this may grow the code significantly,
483 we would have to be very careful here. */
486 rtl_value_profile_transformations (void)
491 for (insn
= get_insns (); insn
; insn
= next
)
493 next
= NEXT_INSN (insn
);
498 /* Scan for insn carrying a histogram. */
499 if (!find_reg_note (insn
, REG_VALUE_PROFILE
, 0))
502 /* Ignore cold areas -- we are growing a code. */
503 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
508 fprintf (dump_file
, "Trying transformations on insn %d\n",
510 print_rtl_single (dump_file
, insn
);
513 /* Transformations: */
514 if (flag_value_profile_transformations
515 && (rtl_mod_subtract_transform (insn
)
516 || rtl_divmod_fixed_value_transform (insn
)
517 || rtl_mod_pow2_value_transform (insn
)))
520 if (flag_speculative_prefetching
521 && speculative_prefetching_transform (insn
))
528 commit_edge_insertions ();
529 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
535 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
536 and OP2, whose value is expected to be VALUE, result TARGET and
537 probability of taking the optimal path PROB). */
539 rtl_divmod_fixed_value (enum machine_mode mode
, enum rtx_code operation
,
540 rtx target
, rtx op1
, rtx op2
, gcov_type value
,
544 rtx neq_label
= gen_label_rtx ();
545 rtx end_label
= gen_label_rtx ();
552 tmp
= gen_reg_rtx (mode
);
553 emit_move_insn (tmp
, copy_rtx (op2
));
558 do_compare_rtx_and_jump (tmp
, GEN_INT (value
), NE
, 0, mode
, NULL_RTX
,
559 NULL_RTX
, neq_label
);
561 /* Add branch probability to jump we just created. */
562 jump
= get_last_insn ();
563 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
564 GEN_INT (REG_BR_PROB_BASE
- prob
),
567 tmp1
= simplify_gen_binary (operation
, mode
,
568 copy_rtx (op1
), GEN_INT (value
));
569 tmp1
= force_operand (tmp1
, target
);
571 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
573 emit_jump_insn (gen_jump (end_label
));
576 emit_label (neq_label
);
577 tmp1
= simplify_gen_binary (operation
, mode
,
578 copy_rtx (op1
), copy_rtx (tmp
));
579 tmp1
= force_operand (tmp1
, target
);
581 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
583 emit_label (end_label
);
585 sequence
= get_insns ();
587 rebuild_jump_labels (sequence
);
591 /* Do transform 1) on INSN if applicable. */
593 rtl_divmod_fixed_value_transform (rtx insn
)
595 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
597 enum machine_mode mode
;
598 gcov_type val
, count
, all
;
602 set
= single_set (insn
);
606 set_src
= SET_SRC (set
);
607 set_dest
= SET_DEST (set
);
608 code
= GET_CODE (set_src
);
609 mode
= GET_MODE (set_dest
);
611 if (code
!= DIV
&& code
!= MOD
&& code
!= UDIV
&& code
!= UMOD
)
613 op1
= XEXP (set_src
, false);
614 op2
= XEXP (set_src
, 1);
616 for (histogram
= REG_NOTES (insn
);
618 histogram
= XEXP (histogram
, 1))
619 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
620 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE
))
626 histogram
= XEXP (XEXP (histogram
, 0), 1);
627 value
= XEXP (histogram
, 0);
628 histogram
= XEXP (histogram
, 1);
629 val
= INTVAL (XEXP (histogram
, 0));
630 histogram
= XEXP (histogram
, 1);
631 count
= INTVAL (XEXP (histogram
, 0));
632 histogram
= XEXP (histogram
, 1);
633 all
= INTVAL (XEXP (histogram
, 0));
635 /* We require that count be at least half of all; this means
636 that for the transformation to fire the value must be constant
637 at least 50% of time (and 75% gives the guarantee of usage). */
638 if (!rtx_equal_p (op2
, value
) || 2 * count
< all
)
642 fprintf (dump_file
, "Div/mod by constant transformation on insn %d\n",
645 /* Compute probability of taking the optimal path. */
646 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
648 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
651 insert_insn_on_edge (
652 rtl_divmod_fixed_value (mode
, code
, set_dest
,
653 op1
, op2
, val
, prob
), e
);
658 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
659 and OP2, result TARGET and probability of taking the optimal path PROB). */
661 rtl_mod_pow2 (enum machine_mode mode
, enum rtx_code operation
, rtx target
,
662 rtx op1
, rtx op2
, int prob
)
664 rtx tmp
, tmp1
, tmp2
, tmp3
, jump
;
665 rtx neq_label
= gen_label_rtx ();
666 rtx end_label
= gen_label_rtx ();
673 tmp
= gen_reg_rtx (mode
);
674 emit_move_insn (tmp
, copy_rtx (op2
));
679 tmp1
= expand_simple_binop (mode
, PLUS
, tmp
, constm1_rtx
, NULL_RTX
,
681 tmp2
= expand_simple_binop (mode
, AND
, tmp
, tmp1
, NULL_RTX
,
683 do_compare_rtx_and_jump (tmp2
, const0_rtx
, NE
, 0, mode
, NULL_RTX
,
684 NULL_RTX
, neq_label
);
686 /* Add branch probability to jump we just created. */
687 jump
= get_last_insn ();
688 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
689 GEN_INT (REG_BR_PROB_BASE
- prob
),
692 tmp3
= expand_simple_binop (mode
, AND
, op1
, tmp1
, target
,
695 emit_move_insn (copy_rtx (target
), tmp3
);
696 emit_jump_insn (gen_jump (end_label
));
699 emit_label (neq_label
);
700 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), copy_rtx (tmp
));
701 tmp1
= force_operand (tmp1
, target
);
703 emit_move_insn (target
, tmp1
);
705 emit_label (end_label
);
707 sequence
= get_insns ();
709 rebuild_jump_labels (sequence
);
713 /* Do transform 2) on INSN if applicable. */
715 rtl_mod_pow2_value_transform (rtx insn
)
717 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
719 enum machine_mode mode
;
720 gcov_type wrong_values
, count
;
724 set
= single_set (insn
);
728 set_src
= SET_SRC (set
);
729 set_dest
= SET_DEST (set
);
730 code
= GET_CODE (set_src
);
731 mode
= GET_MODE (set_dest
);
735 op1
= XEXP (set_src
, 0);
736 op2
= XEXP (set_src
, 1);
738 for (histogram
= REG_NOTES (insn
);
740 histogram
= XEXP (histogram
, 1))
741 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
742 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_POW2
))
748 histogram
= XEXP (XEXP (histogram
, 0), 1);
749 value
= XEXP (histogram
, 0);
750 histogram
= XEXP (histogram
, 1);
751 wrong_values
= INTVAL (XEXP (histogram
, 0));
752 histogram
= XEXP (histogram
, 1);
753 count
= INTVAL (XEXP (histogram
, 0));
755 if (!rtx_equal_p (op2
, value
))
758 /* We require that we hit a power of two at least half of all evaluations. */
759 if (count
< wrong_values
)
763 fprintf (dump_file
, "Mod power of 2 transformation on insn %d\n",
766 /* Compute probability of taking the optimal path. */
767 all
= count
+ wrong_values
;
768 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
770 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
773 insert_insn_on_edge (
774 rtl_mod_pow2 (mode
, code
, set_dest
, op1
, op2
, prob
), e
);
779 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
780 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
781 probability of taking the optimal path(s) PROB1 and PROB2). */
783 rtl_mod_subtract (enum machine_mode mode
, enum rtx_code operation
,
784 rtx target
, rtx op1
, rtx op2
, int sub
, int prob1
, int prob2
)
787 rtx end_label
= gen_label_rtx ();
795 tmp
= gen_reg_rtx (mode
);
796 emit_move_insn (tmp
, copy_rtx (op2
));
801 emit_move_insn (target
, copy_rtx (op1
));
802 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
803 NULL_RTX
, end_label
);
805 /* Add branch probability to jump we just created. */
806 jump
= get_last_insn ();
807 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
808 GEN_INT (prob1
), REG_NOTES (jump
));
810 for (i
= 0; i
< sub
; i
++)
812 tmp1
= expand_simple_binop (mode
, MINUS
, target
, tmp
, target
,
815 emit_move_insn (target
, tmp1
);
816 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
817 NULL_RTX
, end_label
);
819 /* Add branch probability to jump we just created. */
820 jump
= get_last_insn ();
821 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
822 GEN_INT (prob2
), REG_NOTES (jump
));
825 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (target
), copy_rtx (tmp
));
826 tmp1
= force_operand (tmp1
, target
);
828 emit_move_insn (target
, tmp1
);
830 emit_label (end_label
);
832 sequence
= get_insns ();
834 rebuild_jump_labels (sequence
);
838 /* Do transforms 3) and 4) on INSN if applicable. */
840 rtl_mod_subtract_transform (rtx insn
)
842 rtx set
, set_src
, set_dest
, op1
, op2
, histogram
;
844 enum machine_mode mode
;
845 gcov_type wrong_values
, counts
[2], count
, all
;
849 set
= single_set (insn
);
853 set_src
= SET_SRC (set
);
854 set_dest
= SET_DEST (set
);
855 code
= GET_CODE (set_src
);
856 mode
= GET_MODE (set_dest
);
860 op1
= XEXP (set_src
, 0);
861 op2
= XEXP (set_src
, 1);
863 for (histogram
= REG_NOTES (insn
);
865 histogram
= XEXP (histogram
, 1))
866 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
867 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL
))
873 histogram
= XEXP (XEXP (histogram
, 0), 1);
874 histogram
= XEXP (histogram
, 1);
877 for (i
= 0; i
< 2; i
++)
879 counts
[i
] = INTVAL (XEXP (histogram
, 0));
881 histogram
= XEXP (histogram
, 1);
883 wrong_values
= INTVAL (XEXP (histogram
, 0));
884 histogram
= XEXP (histogram
, 1);
885 wrong_values
+= INTVAL (XEXP (histogram
, 0));
888 /* We require that we use just subtractions in at least 50% of all
891 for (i
= 0; i
< 2; i
++)
894 if (count
* 2 >= all
)
902 fprintf (dump_file
, "Mod subtract transformation on insn %d\n",
905 /* Compute probability of taking the optimal path(s). */
906 prob1
= (counts
[0] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
907 prob2
= (counts
[1] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
909 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
912 insert_insn_on_edge (
913 rtl_mod_subtract (mode
, code
, set_dest
,
914 op1
, op2
, i
, prob1
, prob2
), e
);
920 /* Generate code for transformation 5 for mem with ADDRESS and a constant
921 step DELTA. WRITE is true if the reference is a store to mem. */
924 gen_speculative_prefetch (rtx address
, gcov_type delta
, int write
)
929 /* TODO: we do the prefetching for just one iteration ahead, which
930 often is not enough. */
932 if (offsettable_address_p (0, VOIDmode
, address
))
933 tmp
= plus_constant (copy_rtx (address
), delta
);
936 tmp
= simplify_gen_binary (PLUS
, Pmode
,
937 copy_rtx (address
), GEN_INT (delta
));
938 tmp
= force_operand (tmp
, NULL
);
940 if (! (*insn_data
[(int)CODE_FOR_prefetch
].operand
[0].predicate
)
941 (tmp
, insn_data
[(int)CODE_FOR_prefetch
].operand
[0].mode
))
942 tmp
= force_reg (Pmode
, tmp
);
943 emit_insn (gen_prefetch (tmp
, GEN_INT (write
), GEN_INT (3)));
944 sequence
= get_insns ();
950 /* Do transform 5) on INSN if applicable. */
953 speculative_prefetching_transform (rtx insn
)
955 rtx histogram
, value
;
956 gcov_type val
, count
, all
;
961 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
964 if (!find_mem_reference (insn
, &mem
, &write
))
967 address
= XEXP (mem
, 0);
968 if (side_effects_p (address
))
971 if (CONSTANT_P (address
))
974 for (histogram
= REG_NOTES (insn
);
976 histogram
= XEXP (histogram
, 1))
977 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
978 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA
))
984 histogram
= XEXP (XEXP (histogram
, 0), 1);
985 value
= XEXP (histogram
, 0);
986 histogram
= XEXP (histogram
, 1);
987 /* Skip last value referenced. */
988 histogram
= XEXP (histogram
, 1);
989 val
= INTVAL (XEXP (histogram
, 0));
990 histogram
= XEXP (histogram
, 1);
991 count
= INTVAL (XEXP (histogram
, 0));
992 histogram
= XEXP (histogram
, 1);
993 all
= INTVAL (XEXP (histogram
, 0));
995 /* With that few executions we do not really have a reason to optimize the
996 statement, and more importantly, the data about differences of addresses
997 are spoiled by the first item that had no previous value to compare
1002 /* We require that count be at least half of all; this means
1003 that for the transformation to fire the value must be constant
1004 at least 50% of time (and 75% gives the guarantee of usage). */
1005 if (!rtx_equal_p (address
, value
) || 2 * count
< all
)
1008 /* If the difference is too small, it does not make too much sense to
1009 prefetch, as the memory is probably already in cache. */
1010 if (val
>= NOPREFETCH_RANGE_MIN
&& val
<= NOPREFETCH_RANGE_MAX
)
1014 fprintf (dump_file
, "Speculative prefetching for insn %d\n",
1017 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
1019 insert_insn_on_edge (gen_speculative_prefetch (address
, val
, write
), e
);
1023 #endif /* HAVE_prefetch */
1025 /* Tree based transformations. */
1027 tree_value_profile_transformations (void)
1030 block_stmt_iterator bsi
;
1031 bool changed
= false;
1035 /* Ignore cold areas -- we are enlarging the code. */
1036 if (!maybe_hot_bb_p (bb
))
1039 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1041 tree stmt
= bsi_stmt (bsi
);
1042 stmt_ann_t ann
= get_stmt_ann (stmt
);
1043 histogram_value th
= ann
->histograms
;
1049 fprintf (dump_file
, "Trying transformations on insn ");
1050 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1053 /* Transformations: */
1054 /* The order of things in this conditional controls which
1055 transformation is used when more than one is applicable. */
1056 /* It is expected that any code added by the transformations
1057 will be added before the current statement, and that the
1058 current statement remain valid (although possibly
1059 modified) upon return. */
1060 if (flag_value_profile_transformations
1061 && (tree_mod_subtract_transform (stmt
)
1062 || tree_divmod_fixed_value_transform (stmt
)
1063 || tree_mod_pow2_value_transform (stmt
)))
1066 /* Original statement may no longer be in the same block. */
1067 bb
= bb_for_stmt (stmt
);
1070 /* Free extra storage from compute_value_histograms. */
1073 free (th
->hvalue
.tree
.counters
);
1074 th
= th
->hvalue
.tree
.next
;
1076 ann
->histograms
= 0;
1088 /* Generate code for transformation 1 (with OPERATION, operands OP1
1089 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
1090 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
1091 within roundoff error). This generates the result into a temp and returns
1092 the temp; it does not replace or alter the original STMT. */
1094 tree_divmod_fixed_value (tree stmt
, tree operation
,
1095 tree op1
, tree op2
, tree value
, int prob
, gcov_type count
,
1098 tree stmt1
, stmt2
, stmt3
;
1099 tree tmp1
, tmp2
, tmpv
;
1100 tree label_decl1
= create_artificial_label ();
1101 tree label_decl2
= create_artificial_label ();
1102 tree label_decl3
= create_artificial_label ();
1103 tree label1
, label2
, label3
;
1104 tree bb1end
, bb2end
, bb3end
;
1105 basic_block bb
, bb2
, bb3
, bb4
;
1106 tree optype
= TREE_TYPE (operation
);
1107 edge e12
, e13
, e23
, e24
, e34
;
1108 block_stmt_iterator bsi
;
1110 bb
= bb_for_stmt (stmt
);
1111 bsi
= bsi_for_stmt (stmt
);
1113 tmpv
= create_tmp_var (optype
, "PROF");
1114 tmp1
= create_tmp_var (optype
, "PROF");
1115 stmt1
= build2 (MODIFY_EXPR
, optype
, tmpv
, fold_convert (optype
, value
));
1116 stmt2
= build2 (MODIFY_EXPR
, optype
, tmp1
, op2
);
1117 stmt3
= build3 (COND_EXPR
, void_type_node
,
1118 build2 (NE_EXPR
, boolean_type_node
, tmp1
, tmpv
),
1119 build1 (GOTO_EXPR
, void_type_node
, label_decl2
),
1120 build1 (GOTO_EXPR
, void_type_node
, label_decl1
));
1121 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1122 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1123 bsi_insert_before (&bsi
, stmt3
, BSI_SAME_STMT
);
1126 tmp2
= create_tmp_var (optype
, "PROF");
1127 label1
= build1 (LABEL_EXPR
, void_type_node
, label_decl1
);
1128 stmt1
= build2 (MODIFY_EXPR
, optype
, tmp2
,
1129 build2 (TREE_CODE (operation
), optype
, op1
, tmpv
));
1130 bsi_insert_before (&bsi
, label1
, BSI_SAME_STMT
);
1131 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1134 label2
= build1 (LABEL_EXPR
, void_type_node
, label_decl2
);
1135 stmt1
= build2 (MODIFY_EXPR
, optype
, tmp2
,
1136 build2 (TREE_CODE (operation
), optype
, op1
, op2
));
1137 bsi_insert_before (&bsi
, label2
, BSI_SAME_STMT
);
1138 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1141 label3
= build1 (LABEL_EXPR
, void_type_node
, label_decl3
);
1142 bsi_insert_before (&bsi
, label3
, BSI_SAME_STMT
);
1145 /* Edge e23 connects bb2 to bb3, etc. */
1146 e12
= split_block (bb
, bb1end
);
1149 e23
= split_block (bb2
, bb2end
);
1151 bb3
->count
= all
- count
;
1152 e34
= split_block (bb3
, bb3end
);
1156 e12
->flags
&= ~EDGE_FALLTHRU
;
1157 e12
->flags
|= EDGE_FALSE_VALUE
;
1158 e12
->probability
= prob
;
1161 e13
= make_edge (bb
, bb3
, EDGE_TRUE_VALUE
);
1162 e13
->probability
= REG_BR_PROB_BASE
- prob
;
1163 e13
->count
= all
- count
;
1167 e24
= make_edge (bb2
, bb4
, EDGE_FALLTHRU
);
1168 e24
->probability
= REG_BR_PROB_BASE
;
1171 e34
->probability
= REG_BR_PROB_BASE
;
1172 e34
->count
= all
- count
;
1177 /* Do transform 1) on INSN if applicable. */
1179 tree_divmod_fixed_value_transform (tree stmt
)
1181 stmt_ann_t ann
= get_stmt_ann (stmt
);
1182 histogram_value histogram
;
1183 enum tree_code code
;
1184 gcov_type val
, count
, all
;
1185 tree modify
, op
, op1
, op2
, result
, value
, tree_val
;
1189 if (TREE_CODE (stmt
) == RETURN_EXPR
1190 && TREE_OPERAND (stmt
, 0)
1191 && TREE_CODE (TREE_OPERAND (stmt
, 0)) == MODIFY_EXPR
)
1192 modify
= TREE_OPERAND (stmt
, 0);
1193 if (TREE_CODE (modify
) != MODIFY_EXPR
)
1195 op
= TREE_OPERAND (modify
, 1);
1196 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
1198 code
= TREE_CODE (op
);
1200 if (code
!= TRUNC_DIV_EXPR
&& code
!= TRUNC_MOD_EXPR
)
1203 op1
= TREE_OPERAND (op
, 0);
1204 op2
= TREE_OPERAND (op
, 1);
1205 if (!ann
->histograms
)
1208 for (histogram
= ann
->histograms
; histogram
; histogram
= histogram
->hvalue
.tree
.next
)
1209 if (histogram
->type
== HIST_TYPE_SINGLE_VALUE
)
1215 value
= histogram
->hvalue
.tree
.value
;
1216 val
= histogram
->hvalue
.tree
.counters
[0];
1217 count
= histogram
->hvalue
.tree
.counters
[1];
1218 all
= histogram
->hvalue
.tree
.counters
[2];
1220 /* We require that count is at least half of all; this means
1221 that for the transformation to fire the value must be constant
1222 at least 50% of time (and 75% gives the guarantee of usage). */
1223 if (simple_cst_equal (op2
, value
) != 1 || 2 * count
< all
)
1226 /* Compute probability of taking the optimal path. */
1227 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
1229 tree_val
= build_int_cst_wide (get_gcov_type (),
1230 (unsigned HOST_WIDE_INT
) val
,
1231 val
>> (HOST_BITS_PER_WIDE_INT
- 1) >> 1);
1232 result
= tree_divmod_fixed_value (stmt
, op
, op1
, op2
, tree_val
, prob
, count
, all
);
1236 fprintf (dump_file
, "Div/mod by constant ");
1237 print_generic_expr (dump_file
, value
, TDF_SLIM
);
1238 fprintf (dump_file
, "=");
1239 print_generic_expr (dump_file
, tree_val
, TDF_SLIM
);
1240 fprintf (dump_file
, " transformation on insn ");
1241 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1244 TREE_OPERAND (modify
, 1) = result
;
1249 /* Generate code for transformation 2 (with OPERATION, operands OP1
1250 and OP2, parent modify-expr STMT and probability of taking the optimal
1251 path PROB, which is equivalent to COUNT/ALL within roundoff error).
1252 This generates the result into a temp and returns
1253 the temp; it does not replace or alter the original STMT. */
1255 tree_mod_pow2 (tree stmt
, tree operation
, tree op1
, tree op2
, int prob
,
1256 gcov_type count
, gcov_type all
)
1258 tree stmt1
, stmt2
, stmt3
, stmt4
;
1259 tree tmp1
, tmp2
, tmp3
;
1260 tree label_decl1
= create_artificial_label ();
1261 tree label_decl2
= create_artificial_label ();
1262 tree label_decl3
= create_artificial_label ();
1263 tree label1
, label2
, label3
;
1264 tree bb1end
, bb2end
, bb3end
;
1265 basic_block bb
, bb2
, bb3
, bb4
;
1266 tree optype
= TREE_TYPE (operation
);
1267 edge e12
, e13
, e23
, e24
, e34
;
1268 block_stmt_iterator bsi
;
1269 tree result
= create_tmp_var (optype
, "PROF");
1271 bb
= bb_for_stmt (stmt
);
1272 bsi
= bsi_for_stmt (stmt
);
1274 tmp1
= create_tmp_var (optype
, "PROF");
1275 tmp2
= create_tmp_var (optype
, "PROF");
1276 tmp3
= create_tmp_var (optype
, "PROF");
1277 stmt1
= build2 (MODIFY_EXPR
, optype
, tmp1
, fold_convert (optype
, op2
));
1278 stmt2
= build2 (MODIFY_EXPR
, optype
, tmp2
,
1279 build2 (PLUS_EXPR
, optype
, op2
, integer_minus_one_node
));
1280 stmt3
= build2 (MODIFY_EXPR
, optype
, tmp3
,
1281 build2 (BIT_AND_EXPR
, optype
, tmp2
, tmp1
));
1282 stmt4
= build3 (COND_EXPR
, void_type_node
,
1283 build2 (NE_EXPR
, boolean_type_node
, tmp3
, integer_zero_node
),
1284 build1 (GOTO_EXPR
, void_type_node
, label_decl2
),
1285 build1 (GOTO_EXPR
, void_type_node
, label_decl1
));
1286 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1287 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1288 bsi_insert_before (&bsi
, stmt3
, BSI_SAME_STMT
);
1289 bsi_insert_before (&bsi
, stmt4
, BSI_SAME_STMT
);
1292 /* tmp2 == op2-1 inherited from previous block */
1293 label1
= build1 (LABEL_EXPR
, void_type_node
, label_decl1
);
1294 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1295 build2 (BIT_AND_EXPR
, optype
, op1
, tmp2
));
1296 bsi_insert_before (&bsi
, label1
, BSI_SAME_STMT
);
1297 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1300 label2
= build1 (LABEL_EXPR
, void_type_node
, label_decl2
);
1301 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1302 build2 (TREE_CODE (operation
), optype
, op1
, op2
));
1303 bsi_insert_before (&bsi
, label2
, BSI_SAME_STMT
);
1304 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1307 label3
= build1 (LABEL_EXPR
, void_type_node
, label_decl3
);
1308 bsi_insert_before (&bsi
, label3
, BSI_SAME_STMT
);
1311 /* Edge e23 connects bb2 to bb3, etc. */
1312 e12
= split_block (bb
, bb1end
);
1315 e23
= split_block (bb2
, bb2end
);
1317 bb3
->count
= all
- count
;
1318 e34
= split_block (bb3
, bb3end
);
1322 e12
->flags
&= ~EDGE_FALLTHRU
;
1323 e12
->flags
|= EDGE_FALSE_VALUE
;
1324 e12
->probability
= prob
;
1327 e13
= make_edge (bb
, bb3
, EDGE_TRUE_VALUE
);
1328 e13
->probability
= REG_BR_PROB_BASE
- prob
;
1329 e13
->count
= all
- count
;
1333 e24
= make_edge (bb2
, bb4
, EDGE_FALLTHRU
);
1334 e24
->probability
= REG_BR_PROB_BASE
;
1337 e34
->probability
= REG_BR_PROB_BASE
;
1338 e34
->count
= all
- count
;
1343 /* Do transform 2) on INSN if applicable. */
1345 tree_mod_pow2_value_transform (tree stmt
)
1347 stmt_ann_t ann
= get_stmt_ann (stmt
);
1348 histogram_value histogram
;
1349 enum tree_code code
;
1350 gcov_type count
, wrong_values
, all
;
1351 tree modify
, op
, op1
, op2
, result
, value
;
1355 if (TREE_CODE (stmt
) == RETURN_EXPR
1356 && TREE_OPERAND (stmt
, 0)
1357 && TREE_CODE (TREE_OPERAND (stmt
, 0)) == MODIFY_EXPR
)
1358 modify
= TREE_OPERAND (stmt
, 0);
1359 if (TREE_CODE (modify
) != MODIFY_EXPR
)
1361 op
= TREE_OPERAND (modify
, 1);
1362 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
1364 code
= TREE_CODE (op
);
1366 if (code
!= TRUNC_MOD_EXPR
|| !TYPE_UNSIGNED (TREE_TYPE (op
)))
1369 op1
= TREE_OPERAND (op
, 0);
1370 op2
= TREE_OPERAND (op
, 1);
1371 if (!ann
->histograms
)
1374 for (histogram
= ann
->histograms
; histogram
; histogram
= histogram
->hvalue
.tree
.next
)
1375 if (histogram
->type
== HIST_TYPE_POW2
)
1381 value
= histogram
->hvalue
.tree
.value
;
1382 wrong_values
= histogram
->hvalue
.tree
.counters
[0];
1383 count
= histogram
->hvalue
.tree
.counters
[1];
1385 /* We require that we hit a power of 2 at least half of all evaluations. */
1386 if (simple_cst_equal (op2
, value
) != 1 || count
< wrong_values
)
1391 fprintf (dump_file
, "Mod power of 2 transformation on insn ");
1392 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1395 /* Compute probability of taking the optimal path. */
1396 all
= count
+ wrong_values
;
1397 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
1399 result
= tree_mod_pow2 (stmt
, op
, op1
, op2
, prob
, count
, all
);
1401 TREE_OPERAND (modify
, 1) = result
;
1406 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
1407 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
1408 support. Currently only NCOUNTS==0 or 1 is supported and this is
1409 built into this interface. The probabilities of taking the optimal
1410 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
1411 COUNT2/ALL respectively within roundoff error). This generates the
1412 result into a temp and returns the temp; it does not replace or alter
1413 the original STMT. */
1414 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
1417 tree_mod_subtract (tree stmt
, tree operation
, tree op1
, tree op2
,
1418 int prob1
, int prob2
, int ncounts
,
1419 gcov_type count1
, gcov_type count2
, gcov_type all
)
1421 tree stmt1
, stmt2
, stmt3
;
1423 tree label_decl1
= create_artificial_label ();
1424 tree label_decl2
= create_artificial_label ();
1425 tree label_decl3
= create_artificial_label ();
1426 tree label1
, label2
, label3
;
1427 tree bb1end
, bb2end
= NULL_TREE
, bb3end
;
1428 basic_block bb
, bb2
, bb3
, bb4
;
1429 tree optype
= TREE_TYPE (operation
);
1430 edge e12
, e23
= 0, e24
, e34
, e14
;
1431 block_stmt_iterator bsi
;
1432 tree result
= create_tmp_var (optype
, "PROF");
1434 bb
= bb_for_stmt (stmt
);
1435 bsi
= bsi_for_stmt (stmt
);
1437 tmp1
= create_tmp_var (optype
, "PROF");
1438 stmt1
= build2 (MODIFY_EXPR
, optype
, result
, op1
);
1439 stmt2
= build2 (MODIFY_EXPR
, optype
, tmp1
, op2
);
1440 stmt3
= build3 (COND_EXPR
, void_type_node
,
1441 build2 (LT_EXPR
, boolean_type_node
, result
, tmp1
),
1442 build1 (GOTO_EXPR
, void_type_node
, label_decl3
),
1443 build1 (GOTO_EXPR
, void_type_node
,
1444 ncounts
? label_decl1
: label_decl2
));
1445 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1446 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1447 bsi_insert_before (&bsi
, stmt3
, BSI_SAME_STMT
);
1450 if (ncounts
) /* Assumed to be 0 or 1 */
1452 label1
= build1 (LABEL_EXPR
, void_type_node
, label_decl1
);
1453 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1454 build2 (MINUS_EXPR
, optype
, result
, tmp1
));
1455 stmt2
= build3 (COND_EXPR
, void_type_node
,
1456 build2 (LT_EXPR
, boolean_type_node
, result
, tmp1
),
1457 build1 (GOTO_EXPR
, void_type_node
, label_decl3
),
1458 build1 (GOTO_EXPR
, void_type_node
, label_decl2
));
1459 bsi_insert_before (&bsi
, label1
, BSI_SAME_STMT
);
1460 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1461 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1465 /* Fallback case. */
1466 label2
= build1 (LABEL_EXPR
, void_type_node
, label_decl2
);
1467 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1468 build2 (TREE_CODE (operation
), optype
, result
, tmp1
));
1469 bsi_insert_before (&bsi
, label2
, BSI_SAME_STMT
);
1470 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1473 label3
= build1 (LABEL_EXPR
, void_type_node
, label_decl3
);
1474 bsi_insert_before (&bsi
, label3
, BSI_SAME_STMT
);
1477 /* Edge e23 connects bb2 to bb3, etc. */
1478 /* However block 3 is optional; if it is not there, references
1479 to 3 really refer to block 2. */
1480 e12
= split_block (bb
, bb1end
);
1482 bb2
->count
= all
- count1
;
1484 if (ncounts
) /* Assumed to be 0 or 1. */
1486 e23
= split_block (bb2
, bb2end
);
1488 bb3
->count
= all
- count1
- count2
;
1491 e34
= split_block (ncounts
? bb3
: bb2
, bb3end
);
1495 e12
->flags
&= ~EDGE_FALLTHRU
;
1496 e12
->flags
|= EDGE_FALSE_VALUE
;
1497 e12
->probability
= REG_BR_PROB_BASE
- prob1
;
1498 e12
->count
= all
- count1
;
1500 e14
= make_edge (bb
, bb4
, EDGE_TRUE_VALUE
);
1501 e14
->probability
= prob1
;
1502 e14
->count
= count1
;
1504 if (ncounts
) /* Assumed to be 0 or 1. */
1506 e23
->flags
&= ~EDGE_FALLTHRU
;
1507 e23
->flags
|= EDGE_FALSE_VALUE
;
1508 e23
->count
= all
- count1
- count2
;
1509 e23
->probability
= REG_BR_PROB_BASE
- prob2
;
1511 e24
= make_edge (bb2
, bb4
, EDGE_TRUE_VALUE
);
1512 e24
->probability
= prob2
;
1513 e24
->count
= count2
;
1516 e34
->probability
= REG_BR_PROB_BASE
;
1517 e34
->count
= all
- count1
- count2
;
1522 /* Do transforms 3) and 4) on INSN if applicable. */
1524 tree_mod_subtract_transform (tree stmt
)
1526 stmt_ann_t ann
= get_stmt_ann (stmt
);
1527 histogram_value histogram
;
1528 enum tree_code code
;
1529 gcov_type count
, wrong_values
, all
;
1530 tree modify
, op
, op1
, op2
, result
, value
;
1535 if (TREE_CODE (stmt
) == RETURN_EXPR
1536 && TREE_OPERAND (stmt
, 0)
1537 && TREE_CODE (TREE_OPERAND (stmt
, 0)) == MODIFY_EXPR
)
1538 modify
= TREE_OPERAND (stmt
, 0);
1539 if (TREE_CODE (modify
) != MODIFY_EXPR
)
1541 op
= TREE_OPERAND (modify
, 1);
1542 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
1544 code
= TREE_CODE (op
);
1546 if (code
!= TRUNC_MOD_EXPR
|| !TYPE_UNSIGNED (TREE_TYPE (op
)))
1549 op1
= TREE_OPERAND (op
, 0);
1550 op2
= TREE_OPERAND (op
, 1);
1551 if (!ann
->histograms
)
1554 for (histogram
= ann
->histograms
; histogram
; histogram
= histogram
->hvalue
.tree
.next
)
1555 if (histogram
->type
== HIST_TYPE_INTERVAL
)
1561 value
= histogram
->hvalue
.tree
.value
;
1564 for (i
= 0; i
< histogram
->hdata
.intvl
.steps
; i
++)
1565 all
+= histogram
->hvalue
.tree
.counters
[i
];
1567 wrong_values
+= histogram
->hvalue
.tree
.counters
[i
];
1568 wrong_values
+= histogram
->hvalue
.tree
.counters
[i
+1];
1569 all
+= wrong_values
;
1571 /* We require that we use just subtractions in at least 50% of all
1574 for (i
= 0; i
< histogram
->hdata
.intvl
.steps
; i
++)
1576 count
+= histogram
->hvalue
.tree
.counters
[i
];
1577 if (count
* 2 >= all
)
1580 if (i
== histogram
->hdata
.intvl
.steps
)
1585 fprintf (dump_file
, "Mod subtract transformation on insn ");
1586 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1589 /* Compute probability of taking the optimal path(s). */
1590 prob1
= (histogram
->hvalue
.tree
.counters
[0] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
1591 prob2
= (histogram
->hvalue
.tree
.counters
[1] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
1593 /* In practice, "steps" is always 2. This interface reflects this,
1594 and will need to be changed if "steps" can change. */
1595 result
= tree_mod_subtract (stmt
, op
, op1
, op2
, prob1
, prob2
, i
,
1596 histogram
->hvalue
.tree
.counters
[0],
1597 histogram
->hvalue
.tree
.counters
[1], all
);
1599 TREE_OPERAND (modify
, 1) = result
;
1604 /* Connection to the outside world. */
1605 /* Struct for IR-dependent hooks. */
1606 struct value_prof_hooks
{
1607 /* Find list of values for which we want to measure histograms. */
1608 void (*find_values_to_profile
) (histogram_values
*);
1610 /* Identify and exploit properties of values that are hard to analyze
1611 statically. See value-prof.c for more detail. */
1612 bool (*value_profile_transformations
) (void);
1615 /* Hooks for RTL-based versions (the only ones that currently work). */
1616 static struct value_prof_hooks rtl_value_prof_hooks
=
1618 rtl_find_values_to_profile
,
1619 rtl_value_profile_transformations
1623 rtl_register_value_prof_hooks (void)
1625 value_prof_hooks
= &rtl_value_prof_hooks
;
1626 gcc_assert (!ir_type ());
1629 /* Find values inside STMT for that we want to measure histograms for
1630 division/modulo optimization. */
1632 tree_divmod_values_to_profile (tree stmt
, histogram_values
*values
)
1634 tree assign
, lhs
, rhs
, divisor
, op0
, type
;
1635 histogram_value hist
;
1637 if (TREE_CODE (stmt
) == RETURN_EXPR
)
1638 assign
= TREE_OPERAND (stmt
, 0);
1643 || TREE_CODE (assign
) != MODIFY_EXPR
)
1645 lhs
= TREE_OPERAND (assign
, 0);
1646 type
= TREE_TYPE (lhs
);
1647 if (!INTEGRAL_TYPE_P (type
))
1650 rhs
= TREE_OPERAND (assign
, 1);
1651 switch (TREE_CODE (rhs
))
1653 case TRUNC_DIV_EXPR
:
1654 case TRUNC_MOD_EXPR
:
1655 divisor
= TREE_OPERAND (rhs
, 1);
1656 op0
= TREE_OPERAND (rhs
, 0);
1658 VEC_reserve (histogram_value
, heap
, *values
, 3);
1660 if (is_gimple_reg (divisor
))
1662 /* Check for the case where the divisor is the same value most
1664 hist
= ggc_alloc (sizeof (*hist
));
1665 hist
->hvalue
.tree
.value
= divisor
;
1666 hist
->hvalue
.tree
.stmt
= stmt
;
1667 hist
->type
= HIST_TYPE_SINGLE_VALUE
;
1668 VEC_quick_push (histogram_value
, *values
, hist
);
1671 /* For mod, check whether it is not often a noop (or replaceable by
1672 a few subtractions). */
1673 if (TREE_CODE (rhs
) == TRUNC_MOD_EXPR
1674 && TYPE_UNSIGNED (type
))
1676 /* Check for a special case where the divisor is power of 2. */
1677 hist
= ggc_alloc (sizeof (*hist
));
1678 hist
->hvalue
.tree
.value
= divisor
;
1679 hist
->hvalue
.tree
.stmt
= stmt
;
1680 hist
->type
= HIST_TYPE_POW2
;
1681 VEC_quick_push (histogram_value
, *values
, hist
);
1683 hist
= ggc_alloc (sizeof (*hist
));
1684 hist
->hvalue
.tree
.stmt
= stmt
;
1685 hist
->hvalue
.tree
.value
1686 = build2 (TRUNC_DIV_EXPR
, type
, op0
, divisor
);
1687 hist
->type
= HIST_TYPE_INTERVAL
;
1688 hist
->hdata
.intvl
.int_start
= 0;
1689 hist
->hdata
.intvl
.steps
= 2;
1690 VEC_quick_push (histogram_value
, *values
, hist
);
1699 /* Find values inside STMT for that we want to measure histograms and adds
1700 them to list VALUES. */
1703 tree_values_to_profile (tree stmt
, histogram_values
*values
)
1705 if (flag_value_profile_transformations
)
1706 tree_divmod_values_to_profile (stmt
, values
);
1710 tree_find_values_to_profile (histogram_values
*values
)
1713 block_stmt_iterator bsi
;
1715 histogram_value hist
;
1719 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1720 tree_values_to_profile (bsi_stmt (bsi
), values
);
1721 static_values
= *values
;
1723 for (i
= 0; VEC_iterate (histogram_value
, *values
, i
, hist
); i
++)
1727 case HIST_TYPE_INTERVAL
:
1730 fprintf (dump_file
, "Interval counter for tree ");
1731 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
,
1733 fprintf (dump_file
, ", range %d -- %d.\n",
1734 hist
->hdata
.intvl
.int_start
,
1735 (hist
->hdata
.intvl
.int_start
1736 + hist
->hdata
.intvl
.steps
- 1));
1738 hist
->n_counters
= hist
->hdata
.intvl
.steps
+ 2;
1741 case HIST_TYPE_POW2
:
1744 fprintf (dump_file
, "Pow2 counter for tree ");
1745 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
, TDF_SLIM
);
1746 fprintf (dump_file
, ".\n");
1748 hist
->n_counters
= 2;
1751 case HIST_TYPE_SINGLE_VALUE
:
1754 fprintf (dump_file
, "Single value counter for tree ");
1755 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
, TDF_SLIM
);
1756 fprintf (dump_file
, ".\n");
1758 hist
->n_counters
= 3;
1761 case HIST_TYPE_CONST_DELTA
:
1764 fprintf (dump_file
, "Constant delta counter for tree ");
1765 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
, TDF_SLIM
);
1766 fprintf (dump_file
, ".\n");
1768 hist
->n_counters
= 4;
1777 static struct value_prof_hooks tree_value_prof_hooks
= {
1778 tree_find_values_to_profile
,
1779 tree_value_profile_transformations
1783 tree_register_value_prof_hooks (void)
1785 value_prof_hooks
= &tree_value_prof_hooks
;
1786 gcc_assert (ir_type ());
1789 /* IR-independent entry points. */
1791 find_values_to_profile (histogram_values
*values
)
1793 (value_prof_hooks
->find_values_to_profile
) (values
);
1797 value_profile_transformations (void)
1799 bool retval
= (value_prof_hooks
->value_profile_transformations
) ();
1800 VEC_free (histogram_value
, heap
, static_values
);
1805 gate_handle_value_profile_transformations (void)
1807 return flag_branch_probabilities
1808 && flag_profile_values
1809 && !flag_tree_based_profiling
1810 && (flag_value_profile_transformations
1811 || flag_speculative_prefetching
);
1815 /* Do optimizations based on expression value profiles. */
1817 rest_of_handle_value_profile_transformations (void)
1819 if (value_profile_transformations ())
1820 cleanup_cfg (CLEANUP_EXPENSIVE
);
1823 struct tree_opt_pass pass_value_profile_transformations
=
1826 gate_handle_value_profile_transformations
, /* gate */
1827 rest_of_handle_value_profile_transformations
, /* execute */
1830 0, /* static_pass_number */
1832 0, /* properties_required */
1833 0, /* properties_provided */
1834 0, /* properties_destroyed */
1835 0, /* todo_flags_start */
1836 TODO_dump_func
, /* todo_flags_finish */