1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
32 #include "insn-config.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
44 static struct value_prof_hooks
*value_prof_hooks
;
46 /* This is the vector of histograms. Created in find_values_to_profile.
47 During profile generation, freed by instrument_values.
48 During profile use, freed by value_profile_transformations. */
50 static histogram_values static_values
= NULL
;
52 /* In this file value profile based optimizations are placed. Currently the
53 following optimizations are implemented (for more detailed descriptions
54 see comments at value_profile_transformations):
56 1) Division/modulo specialization. Provided that we can determine that the
57 operands of the division have some special properties, we may use it to
58 produce more effective code.
59 2) Speculative prefetching. If we are able to determine that the difference
60 between addresses accessed by a memory reference is usually constant, we
61 may add the prefetch instructions.
63 Every such optimization should add its requirements for profiled values to
64 insn_values_to_profile function. This function is called from branch_prob
65 in profile.c and the requested values are instrumented by it in the first
66 compilation with -fprofile-arcs. The optimization may then read the
67 gathered data in the second compilation with -fbranch-probabilities.
69 There are currently two versions, RTL-based and tree-based. Over time
70 the RTL-based version may go away.
72 In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE
73 note to the instrumented insn. The argument to the note consists of an
74 EXPR_LIST where its members have the following meaning (from the first to
77 -- type of information gathered (HIST_TYPE*)
78 -- the expression that is profiled
79 -- list of counters starting from the first one.
81 In the tree-based version, the measured data is pointed to from the histograms
82 field of the statement annotation of the instrumented insns. It is
83 kept as a linked list of struct histogram_value_t's, which contain the
84 same information as above. */
86 /* For speculative prefetching, the range in that we do not prefetch (because
87 we assume that it will be in cache anyway). The asymmetry between min and
88 max range is trying to reflect the fact that the sequential prefetching
89 of the data is commonly done directly by hardware. Nevertheless, these
90 values are just a guess and should of course be target-specific.
92 FIXME: There is no tree form of speculative prefetching as yet.
94 FIXME: A better approach to instrumentation in the profile-generation
95 pass is to generate calls to magic library functions (to be added to
96 libgcc) rather than inline code. This approach will probably be
97 necessary to get tree-based speculative prefetching working in a useful
98 fashion, as inline code bloats things so much the rest of the compiler has
99 serious problems dealing with it (judging from the rtl behavior). */
101 #ifndef NOPREFETCH_RANGE_MIN
102 #define NOPREFETCH_RANGE_MIN (-16)
104 #ifndef NOPREFETCH_RANGE_MAX
105 #define NOPREFETCH_RANGE_MAX 32
108 static void rtl_divmod_values_to_profile (rtx
, histogram_values
*);
110 static bool insn_prefetch_values_to_profile (rtx
, histogram_values
*);
111 static int find_mem_reference_1 (rtx
*, void *);
112 static void find_mem_reference_2 (rtx
, rtx
, void *);
113 static bool find_mem_reference (rtx
, rtx
*, int *);
116 static void rtl_values_to_profile (rtx
, histogram_values
*);
117 static rtx
rtl_divmod_fixed_value (enum machine_mode
, enum rtx_code
, rtx
, rtx
,
118 rtx
, gcov_type
, int);
119 static rtx
rtl_mod_pow2 (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
, int);
120 static rtx
rtl_mod_subtract (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
,
123 static rtx
gen_speculative_prefetch (rtx
, gcov_type
, int);
125 static bool rtl_divmod_fixed_value_transform (rtx
);
126 static bool rtl_mod_pow2_value_transform (rtx
);
127 static bool rtl_mod_subtract_transform (rtx
);
129 static bool speculative_prefetching_transform (rtx
);
131 static tree
tree_divmod_fixed_value (tree
, tree
, tree
, tree
,
132 tree
, int, gcov_type
, gcov_type
);
133 static tree
tree_mod_pow2 (tree
, tree
, tree
, tree
, int, gcov_type
, gcov_type
);
134 static tree
tree_mod_subtract (tree
, tree
, tree
, tree
, int, int, int,
135 gcov_type
, gcov_type
, gcov_type
);
136 static bool tree_divmod_fixed_value_transform (tree
);
137 static bool tree_mod_pow2_value_transform (tree
);
138 static bool tree_mod_subtract_transform (tree
);
141 /* Find values inside INSN for that we want to measure histograms for
142 division/modulo optimization and stores them to VALUES. */
144 rtl_divmod_values_to_profile (rtx insn
, histogram_values
*values
)
146 rtx set
, set_src
, op1
, op2
;
147 enum machine_mode mode
;
148 histogram_value hist
;
153 set
= single_set (insn
);
157 mode
= GET_MODE (SET_DEST (set
));
158 if (!INTEGRAL_MODE_P (mode
))
161 set_src
= SET_SRC (set
);
162 switch (GET_CODE (set_src
))
168 op1
= XEXP (set_src
, 0);
169 op2
= XEXP (set_src
, 1);
170 if (side_effects_p (op2
))
173 /* Check for a special case where the divisor is power of 2. */
174 if ((GET_CODE (set_src
) == UMOD
) && !CONSTANT_P (op2
))
176 hist
= ggc_alloc (sizeof (*hist
));
177 hist
->hvalue
.rtl
.value
= op2
;
178 hist
->hvalue
.rtl
.seq
= NULL_RTX
;
179 hist
->hvalue
.rtl
.mode
= mode
;
180 hist
->hvalue
.rtl
.insn
= insn
;
181 hist
->type
= HIST_TYPE_POW2
;
182 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
185 /* Check whether the divisor is not in fact a constant. */
186 if (!CONSTANT_P (op2
))
188 hist
= ggc_alloc (sizeof (*hist
));
189 hist
->hvalue
.rtl
.value
= op2
;
190 hist
->hvalue
.rtl
.mode
= mode
;
191 hist
->hvalue
.rtl
.seq
= NULL_RTX
;
192 hist
->hvalue
.rtl
.insn
= insn
;
193 hist
->type
= HIST_TYPE_SINGLE_VALUE
;
194 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
197 /* For mod, check whether it is not often a noop (or replaceable by
198 a few subtractions). */
199 if (GET_CODE (set_src
) == UMOD
&& !side_effects_p (op1
))
203 hist
= ggc_alloc (sizeof (*hist
));
205 tmp
= simplify_gen_binary (DIV
, mode
, copy_rtx (op1
), copy_rtx (op2
));
206 hist
->hvalue
.rtl
.value
= force_operand (tmp
, NULL_RTX
);
207 hist
->hvalue
.rtl
.seq
= get_insns ();
209 hist
->hvalue
.rtl
.mode
= mode
;
210 hist
->hvalue
.rtl
.insn
= insn
;
211 hist
->type
= HIST_TYPE_INTERVAL
;
212 hist
->hdata
.intvl
.int_start
= 0;
213 hist
->hdata
.intvl
.steps
= 2;
214 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
225 /* Called from find_mem_reference through for_each_rtx, finds a memory
226 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
227 to *RET and the traversing of the expression is interrupted by returning 1.
228 Otherwise 0 is returned. */
231 find_mem_reference_1 (rtx
*expr
, void *ret
)
243 /* Called form find_mem_reference through note_stores to find out whether
244 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
245 FMR2_WRITE is set to true. */
247 static int fmr2_write
;
249 find_mem_reference_2 (rtx expr
, rtx pat ATTRIBUTE_UNUSED
, void *mem
)
255 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
256 if it is a write of the mem. Return false if no memory reference is found,
260 find_mem_reference (rtx insn
, rtx
*mem
, int *write
)
263 for_each_rtx (&PATTERN (insn
), find_mem_reference_1
, mem
);
269 note_stores (PATTERN (insn
), find_mem_reference_2
, *mem
);
274 /* Find values inside INSN for that we want to measure histograms for
275 a speculative prefetching. Add them to the list VALUES.
276 Returns true if such we found any such value, false otherwise. */
279 insn_prefetch_values_to_profile (rtx insn
, histogram_values
* values
)
283 histogram_value hist
;
285 /* It only makes sense to look for memory references in ordinary insns. */
286 if (!NONJUMP_INSN_P (insn
))
289 if (!find_mem_reference (insn
, &mem
, &write
))
292 address
= XEXP (mem
, 0);
293 if (side_effects_p (address
))
296 if (CONSTANT_P (address
))
299 hist
= ggc_alloc (sizeof (*hist
));
300 hist
->hvalue
.rtl
.value
= address
;
301 hist
->hvalue
.rtl
.mode
= GET_MODE (address
);
302 hist
->hvalue
.rtl
.seq
= NULL_RTX
;
303 hist
->hvalue
.rtl
.insn
= insn
;
304 hist
->type
= HIST_TYPE_CONST_DELTA
;
305 VEC_safe_push (histogram_value
, heap
, *values
, hist
);
310 /* Find values inside INSN for that we want to measure histograms and adds
311 them to list VALUES (increasing the record of its length in N_VALUES). */
313 rtl_values_to_profile (rtx insn
, histogram_values
*values
)
315 if (flag_value_profile_transformations
)
316 rtl_divmod_values_to_profile (insn
, values
);
319 if (flag_speculative_prefetching
)
320 insn_prefetch_values_to_profile (insn
, values
);
324 /* Find list of values for that we want to measure histograms. */
326 rtl_find_values_to_profile (histogram_values
*values
)
329 unsigned i
, libcall_level
;
330 histogram_value hist
;
332 life_analysis (NULL
, PROP_DEATH_NOTES
);
336 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
337 rtl_values_to_profile (insn
, values
);
338 static_values
= *values
;
340 for (i
= 0; VEC_iterate (histogram_value
, *values
, i
, hist
); i
++)
344 case HIST_TYPE_INTERVAL
:
347 "Interval counter for insn %d, range %d -- %d.\n",
348 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
),
349 hist
->hdata
.intvl
.int_start
,
350 (hist
->hdata
.intvl
.int_start
351 + hist
->hdata
.intvl
.steps
- 1));
352 hist
->n_counters
= hist
->hdata
.intvl
.steps
+ 2;
358 "Pow2 counter for insn %d.\n",
359 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
));
360 hist
->n_counters
= 2;
363 case HIST_TYPE_SINGLE_VALUE
:
366 "Single value counter for insn %d.\n",
367 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
));
368 hist
->n_counters
= 3;
371 case HIST_TYPE_CONST_DELTA
:
374 "Constant delta counter for insn %d.\n",
375 INSN_UID ((rtx
)hist
->hvalue
.rtl
.insn
));
376 hist
->n_counters
= 4;
383 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
386 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
387 them to identify and exploit properties of values that are hard to analyze
390 We do following transformations:
396 where b is almost always a constant N is transformed to
409 where b is almost always a power of 2 and the division is unsigned
410 TODO -- handle signed case as well
412 if ((b & (b - 1)) == 0)
417 Note that when b = 0, no error will occur and x = a; this is correct,
418 as result of such operation is undefined.
424 where a is almost always less then b and the division is unsigned
425 TODO -- handle signed case as well
435 where a is almost always less then 2 * b and the division is unsigned
436 TODO -- handle signed case as well
444 It would be possible to continue analogically for K * b for other small
445 K's, but it is probably not useful.
449 Read or write of mem[address], where the value of address changes usually
450 by a constant C != 0 between the following accesses to the computation; with
451 -fspeculative-prefetching we then add a prefetch of address + C before
452 the insn. This handles prefetching of several interesting cases in addition
453 to a simple prefetching for addresses that are induction variables, e. g.
454 linked lists allocated sequentially (even in case they are processed
457 TODO -- we should also check whether there is not (usually) a small
458 difference with the adjacent memory references, so that we do
459 not issue overlapping prefetches. Also we should employ some
460 heuristics to eliminate cases where prefetching evidently spoils
462 -- it should somehow cooperate with the loop optimizer prefetching
466 There are other useful cases that could be handled by a similar mechanism,
469 for (i = 0; i < n; i++)
472 transform to (for constant N):
475 for (i = 0; i < N; i++)
478 for (i = 0; i < n; i++)
480 making unroller happy. Since this may grow the code significantly,
481 we would have to be very careful here. */
484 rtl_value_profile_transformations (void)
489 for (insn
= get_insns (); insn
; insn
= next
)
491 next
= NEXT_INSN (insn
);
496 /* Scan for insn carrying a histogram. */
497 if (!find_reg_note (insn
, REG_VALUE_PROFILE
, 0))
500 /* Ignore cold areas -- we are growing a code. */
501 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
506 fprintf (dump_file
, "Trying transformations on insn %d\n",
508 print_rtl_single (dump_file
, insn
);
511 /* Transformations: */
512 if (flag_value_profile_transformations
513 && (rtl_mod_subtract_transform (insn
)
514 || rtl_divmod_fixed_value_transform (insn
)
515 || rtl_mod_pow2_value_transform (insn
)))
518 if (flag_speculative_prefetching
519 && speculative_prefetching_transform (insn
))
526 commit_edge_insertions ();
527 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
533 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
534 and OP2, whose value is expected to be VALUE, result TARGET and
535 probability of taking the optimal path PROB). */
537 rtl_divmod_fixed_value (enum machine_mode mode
, enum rtx_code operation
,
538 rtx target
, rtx op1
, rtx op2
, gcov_type value
,
542 rtx neq_label
= gen_label_rtx ();
543 rtx end_label
= gen_label_rtx ();
550 tmp
= gen_reg_rtx (mode
);
551 emit_move_insn (tmp
, copy_rtx (op2
));
556 do_compare_rtx_and_jump (tmp
, GEN_INT (value
), NE
, 0, mode
, NULL_RTX
,
557 NULL_RTX
, neq_label
);
559 /* Add branch probability to jump we just created. */
560 jump
= get_last_insn ();
561 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
562 GEN_INT (REG_BR_PROB_BASE
- prob
),
565 tmp1
= simplify_gen_binary (operation
, mode
,
566 copy_rtx (op1
), GEN_INT (value
));
567 tmp1
= force_operand (tmp1
, target
);
569 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
571 emit_jump_insn (gen_jump (end_label
));
574 emit_label (neq_label
);
575 tmp1
= simplify_gen_binary (operation
, mode
,
576 copy_rtx (op1
), copy_rtx (tmp
));
577 tmp1
= force_operand (tmp1
, target
);
579 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
581 emit_label (end_label
);
583 sequence
= get_insns ();
585 rebuild_jump_labels (sequence
);
589 /* Do transform 1) on INSN if applicable. */
591 rtl_divmod_fixed_value_transform (rtx insn
)
593 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
595 enum machine_mode mode
;
596 gcov_type val
, count
, all
;
600 set
= single_set (insn
);
604 set_src
= SET_SRC (set
);
605 set_dest
= SET_DEST (set
);
606 code
= GET_CODE (set_src
);
607 mode
= GET_MODE (set_dest
);
609 if (code
!= DIV
&& code
!= MOD
&& code
!= UDIV
&& code
!= UMOD
)
611 op1
= XEXP (set_src
, false);
612 op2
= XEXP (set_src
, 1);
614 for (histogram
= REG_NOTES (insn
);
616 histogram
= XEXP (histogram
, 1))
617 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
618 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE
))
624 histogram
= XEXP (XEXP (histogram
, 0), 1);
625 value
= XEXP (histogram
, 0);
626 histogram
= XEXP (histogram
, 1);
627 val
= INTVAL (XEXP (histogram
, 0));
628 histogram
= XEXP (histogram
, 1);
629 count
= INTVAL (XEXP (histogram
, 0));
630 histogram
= XEXP (histogram
, 1);
631 all
= INTVAL (XEXP (histogram
, 0));
633 /* We require that count be at least half of all; this means
634 that for the transformation to fire the value must be constant
635 at least 50% of time (and 75% gives the guarantee of usage). */
636 if (!rtx_equal_p (op2
, value
) || 2 * count
< all
)
640 fprintf (dump_file
, "Div/mod by constant transformation on insn %d\n",
643 /* Compute probability of taking the optimal path. */
644 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
646 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
649 insert_insn_on_edge (
650 rtl_divmod_fixed_value (mode
, code
, set_dest
,
651 op1
, op2
, val
, prob
), e
);
656 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
657 and OP2, result TARGET and probability of taking the optimal path PROB). */
659 rtl_mod_pow2 (enum machine_mode mode
, enum rtx_code operation
, rtx target
,
660 rtx op1
, rtx op2
, int prob
)
662 rtx tmp
, tmp1
, tmp2
, tmp3
, jump
;
663 rtx neq_label
= gen_label_rtx ();
664 rtx end_label
= gen_label_rtx ();
671 tmp
= gen_reg_rtx (mode
);
672 emit_move_insn (tmp
, copy_rtx (op2
));
677 tmp1
= expand_simple_binop (mode
, PLUS
, tmp
, constm1_rtx
, NULL_RTX
,
679 tmp2
= expand_simple_binop (mode
, AND
, tmp
, tmp1
, NULL_RTX
,
681 do_compare_rtx_and_jump (tmp2
, const0_rtx
, NE
, 0, mode
, NULL_RTX
,
682 NULL_RTX
, neq_label
);
684 /* Add branch probability to jump we just created. */
685 jump
= get_last_insn ();
686 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
687 GEN_INT (REG_BR_PROB_BASE
- prob
),
690 tmp3
= expand_simple_binop (mode
, AND
, op1
, tmp1
, target
,
693 emit_move_insn (copy_rtx (target
), tmp3
);
694 emit_jump_insn (gen_jump (end_label
));
697 emit_label (neq_label
);
698 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), copy_rtx (tmp
));
699 tmp1
= force_operand (tmp1
, target
);
701 emit_move_insn (target
, tmp1
);
703 emit_label (end_label
);
705 sequence
= get_insns ();
707 rebuild_jump_labels (sequence
);
711 /* Do transform 2) on INSN if applicable. */
713 rtl_mod_pow2_value_transform (rtx insn
)
715 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
717 enum machine_mode mode
;
718 gcov_type wrong_values
, count
;
722 set
= single_set (insn
);
726 set_src
= SET_SRC (set
);
727 set_dest
= SET_DEST (set
);
728 code
= GET_CODE (set_src
);
729 mode
= GET_MODE (set_dest
);
733 op1
= XEXP (set_src
, 0);
734 op2
= XEXP (set_src
, 1);
736 for (histogram
= REG_NOTES (insn
);
738 histogram
= XEXP (histogram
, 1))
739 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
740 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_POW2
))
746 histogram
= XEXP (XEXP (histogram
, 0), 1);
747 value
= XEXP (histogram
, 0);
748 histogram
= XEXP (histogram
, 1);
749 wrong_values
= INTVAL (XEXP (histogram
, 0));
750 histogram
= XEXP (histogram
, 1);
751 count
= INTVAL (XEXP (histogram
, 0));
753 if (!rtx_equal_p (op2
, value
))
756 /* We require that we hit a power of two at least half of all evaluations. */
757 if (count
< wrong_values
)
761 fprintf (dump_file
, "Mod power of 2 transformation on insn %d\n",
764 /* Compute probability of taking the optimal path. */
765 all
= count
+ wrong_values
;
766 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
768 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
771 insert_insn_on_edge (
772 rtl_mod_pow2 (mode
, code
, set_dest
, op1
, op2
, prob
), e
);
777 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
778 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
779 probability of taking the optimal path(s) PROB1 and PROB2). */
781 rtl_mod_subtract (enum machine_mode mode
, enum rtx_code operation
,
782 rtx target
, rtx op1
, rtx op2
, int sub
, int prob1
, int prob2
)
785 rtx end_label
= gen_label_rtx ();
793 tmp
= gen_reg_rtx (mode
);
794 emit_move_insn (tmp
, copy_rtx (op2
));
799 emit_move_insn (target
, copy_rtx (op1
));
800 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
801 NULL_RTX
, end_label
);
803 /* Add branch probability to jump we just created. */
804 jump
= get_last_insn ();
805 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
806 GEN_INT (prob1
), REG_NOTES (jump
));
808 for (i
= 0; i
< sub
; i
++)
810 tmp1
= expand_simple_binop (mode
, MINUS
, target
, tmp
, target
,
813 emit_move_insn (target
, tmp1
);
814 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
815 NULL_RTX
, end_label
);
817 /* Add branch probability to jump we just created. */
818 jump
= get_last_insn ();
819 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
820 GEN_INT (prob2
), REG_NOTES (jump
));
823 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (target
), copy_rtx (tmp
));
824 tmp1
= force_operand (tmp1
, target
);
826 emit_move_insn (target
, tmp1
);
828 emit_label (end_label
);
830 sequence
= get_insns ();
832 rebuild_jump_labels (sequence
);
836 /* Do transforms 3) and 4) on INSN if applicable. */
838 rtl_mod_subtract_transform (rtx insn
)
840 rtx set
, set_src
, set_dest
, op1
, op2
, histogram
;
842 enum machine_mode mode
;
843 gcov_type wrong_values
, counts
[2], count
, all
;
847 set
= single_set (insn
);
851 set_src
= SET_SRC (set
);
852 set_dest
= SET_DEST (set
);
853 code
= GET_CODE (set_src
);
854 mode
= GET_MODE (set_dest
);
858 op1
= XEXP (set_src
, 0);
859 op2
= XEXP (set_src
, 1);
861 for (histogram
= REG_NOTES (insn
);
863 histogram
= XEXP (histogram
, 1))
864 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
865 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL
))
871 histogram
= XEXP (XEXP (histogram
, 0), 1);
872 histogram
= XEXP (histogram
, 1);
875 for (i
= 0; i
< 2; i
++)
877 counts
[i
] = INTVAL (XEXP (histogram
, 0));
879 histogram
= XEXP (histogram
, 1);
881 wrong_values
= INTVAL (XEXP (histogram
, 0));
882 histogram
= XEXP (histogram
, 1);
883 wrong_values
+= INTVAL (XEXP (histogram
, 0));
886 /* We require that we use just subtractions in at least 50% of all
889 for (i
= 0; i
< 2; i
++)
892 if (count
* 2 >= all
)
900 fprintf (dump_file
, "Mod subtract transformation on insn %d\n",
903 /* Compute probability of taking the optimal path(s). */
904 prob1
= (counts
[0] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
905 prob2
= (counts
[1] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
907 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
910 insert_insn_on_edge (
911 rtl_mod_subtract (mode
, code
, set_dest
,
912 op1
, op2
, i
, prob1
, prob2
), e
);
918 /* Generate code for transformation 5 for mem with ADDRESS and a constant
919 step DELTA. WRITE is true if the reference is a store to mem. */
922 gen_speculative_prefetch (rtx address
, gcov_type delta
, int write
)
927 /* TODO: we do the prefetching for just one iteration ahead, which
928 often is not enough. */
930 if (offsettable_address_p (0, VOIDmode
, address
))
931 tmp
= plus_constant (copy_rtx (address
), delta
);
934 tmp
= simplify_gen_binary (PLUS
, Pmode
,
935 copy_rtx (address
), GEN_INT (delta
));
936 tmp
= force_operand (tmp
, NULL
);
938 if (! (*insn_data
[(int)CODE_FOR_prefetch
].operand
[0].predicate
)
939 (tmp
, insn_data
[(int)CODE_FOR_prefetch
].operand
[0].mode
))
940 tmp
= force_reg (Pmode
, tmp
);
941 emit_insn (gen_prefetch (tmp
, GEN_INT (write
), GEN_INT (3)));
942 sequence
= get_insns ();
948 /* Do transform 5) on INSN if applicable. */
951 speculative_prefetching_transform (rtx insn
)
953 rtx histogram
, value
;
954 gcov_type val
, count
, all
;
959 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
962 if (!find_mem_reference (insn
, &mem
, &write
))
965 address
= XEXP (mem
, 0);
966 if (side_effects_p (address
))
969 if (CONSTANT_P (address
))
972 for (histogram
= REG_NOTES (insn
);
974 histogram
= XEXP (histogram
, 1))
975 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
976 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA
))
982 histogram
= XEXP (XEXP (histogram
, 0), 1);
983 value
= XEXP (histogram
, 0);
984 histogram
= XEXP (histogram
, 1);
985 /* Skip last value referenced. */
986 histogram
= XEXP (histogram
, 1);
987 val
= INTVAL (XEXP (histogram
, 0));
988 histogram
= XEXP (histogram
, 1);
989 count
= INTVAL (XEXP (histogram
, 0));
990 histogram
= XEXP (histogram
, 1);
991 all
= INTVAL (XEXP (histogram
, 0));
993 /* With that few executions we do not really have a reason to optimize the
994 statement, and more importantly, the data about differences of addresses
995 are spoiled by the first item that had no previous value to compare
1000 /* We require that count be at least half of all; this means
1001 that for the transformation to fire the value must be constant
1002 at least 50% of time (and 75% gives the guarantee of usage). */
1003 if (!rtx_equal_p (address
, value
) || 2 * count
< all
)
1006 /* If the difference is too small, it does not make too much sense to
1007 prefetch, as the memory is probably already in cache. */
1008 if (val
>= NOPREFETCH_RANGE_MIN
&& val
<= NOPREFETCH_RANGE_MAX
)
1012 fprintf (dump_file
, "Speculative prefetching for insn %d\n",
1015 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
1017 insert_insn_on_edge (gen_speculative_prefetch (address
, val
, write
), e
);
1021 #endif /* HAVE_prefetch */
1023 /* Tree based transformations. */
1025 tree_value_profile_transformations (void)
1028 block_stmt_iterator bsi
;
1029 bool changed
= false;
1033 /* Ignore cold areas -- we are enlarging the code. */
1034 if (!maybe_hot_bb_p (bb
))
1037 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1039 tree stmt
= bsi_stmt (bsi
);
1040 stmt_ann_t ann
= get_stmt_ann (stmt
);
1041 histogram_value th
= ann
->histograms
;
1047 fprintf (dump_file
, "Trying transformations on insn ");
1048 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1051 /* Transformations: */
1052 /* The order of things in this conditional controls which
1053 transformation is used when more than one is applicable. */
1054 /* It is expected that any code added by the transformations
1055 will be added before the current statement, and that the
1056 current statement remain valid (although possibly
1057 modified) upon return. */
1058 if (flag_value_profile_transformations
1059 && (tree_mod_subtract_transform (stmt
)
1060 || tree_divmod_fixed_value_transform (stmt
)
1061 || tree_mod_pow2_value_transform (stmt
)))
1064 /* Original statement may no longer be in the same block. */
1065 bb
= bb_for_stmt (stmt
);
1068 /* Free extra storage from compute_value_histograms. */
1071 free (th
->hvalue
.tree
.counters
);
1072 th
= th
->hvalue
.tree
.next
;
1074 ann
->histograms
= 0;
1086 /* Generate code for transformation 1 (with OPERATION, operands OP1
1087 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
1088 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
1089 within roundoff error). This generates the result into a temp and returns
1090 the temp; it does not replace or alter the original STMT. */
1092 tree_divmod_fixed_value (tree stmt
, tree operation
,
1093 tree op1
, tree op2
, tree value
, int prob
, gcov_type count
,
1096 tree stmt1
, stmt2
, stmt3
;
1097 tree tmp1
, tmp2
, tmpv
;
1098 tree label_decl1
= create_artificial_label ();
1099 tree label_decl2
= create_artificial_label ();
1100 tree label_decl3
= create_artificial_label ();
1101 tree label1
, label2
, label3
;
1102 tree bb1end
, bb2end
, bb3end
;
1103 basic_block bb
, bb2
, bb3
, bb4
;
1104 tree optype
= TREE_TYPE (operation
);
1105 edge e12
, e13
, e23
, e24
, e34
;
1106 block_stmt_iterator bsi
;
1108 bb
= bb_for_stmt (stmt
);
1109 bsi
= bsi_for_stmt (stmt
);
1111 tmpv
= create_tmp_var (optype
, "PROF");
1112 tmp1
= create_tmp_var (optype
, "PROF");
1113 stmt1
= build2 (MODIFY_EXPR
, optype
, tmpv
, fold_convert (optype
, value
));
1114 stmt2
= build2 (MODIFY_EXPR
, optype
, tmp1
, op2
);
1115 stmt3
= build3 (COND_EXPR
, void_type_node
,
1116 build2 (NE_EXPR
, boolean_type_node
, tmp1
, tmpv
),
1117 build1 (GOTO_EXPR
, void_type_node
, label_decl2
),
1118 build1 (GOTO_EXPR
, void_type_node
, label_decl1
));
1119 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1120 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1121 bsi_insert_before (&bsi
, stmt3
, BSI_SAME_STMT
);
1124 tmp2
= create_tmp_var (optype
, "PROF");
1125 label1
= build1 (LABEL_EXPR
, void_type_node
, label_decl1
);
1126 stmt1
= build2 (MODIFY_EXPR
, optype
, tmp2
,
1127 build2 (TREE_CODE (operation
), optype
, op1
, tmpv
));
1128 bsi_insert_before (&bsi
, label1
, BSI_SAME_STMT
);
1129 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1132 label2
= build1 (LABEL_EXPR
, void_type_node
, label_decl2
);
1133 stmt1
= build2 (MODIFY_EXPR
, optype
, tmp2
,
1134 build2 (TREE_CODE (operation
), optype
, op1
, op2
));
1135 bsi_insert_before (&bsi
, label2
, BSI_SAME_STMT
);
1136 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1139 label3
= build1 (LABEL_EXPR
, void_type_node
, label_decl3
);
1140 bsi_insert_before (&bsi
, label3
, BSI_SAME_STMT
);
1143 /* Edge e23 connects bb2 to bb3, etc. */
1144 e12
= split_block (bb
, bb1end
);
1147 e23
= split_block (bb2
, bb2end
);
1149 bb3
->count
= all
- count
;
1150 e34
= split_block (bb3
, bb3end
);
1154 e12
->flags
&= ~EDGE_FALLTHRU
;
1155 e12
->flags
|= EDGE_FALSE_VALUE
;
1156 e12
->probability
= prob
;
1159 e13
= make_edge (bb
, bb3
, EDGE_TRUE_VALUE
);
1160 e13
->probability
= REG_BR_PROB_BASE
- prob
;
1161 e13
->count
= all
- count
;
1165 e24
= make_edge (bb2
, bb4
, EDGE_FALLTHRU
);
1166 e24
->probability
= REG_BR_PROB_BASE
;
1169 e34
->probability
= REG_BR_PROB_BASE
;
1170 e34
->count
= all
- count
;
1175 /* Do transform 1) on INSN if applicable. */
1177 tree_divmod_fixed_value_transform (tree stmt
)
1179 stmt_ann_t ann
= get_stmt_ann (stmt
);
1180 histogram_value histogram
;
1181 enum tree_code code
;
1182 gcov_type val
, count
, all
;
1183 tree modify
, op
, op1
, op2
, result
, value
, tree_val
;
1187 if (TREE_CODE (stmt
) == RETURN_EXPR
1188 && TREE_OPERAND (stmt
, 0)
1189 && TREE_CODE (TREE_OPERAND (stmt
, 0)) == MODIFY_EXPR
)
1190 modify
= TREE_OPERAND (stmt
, 0);
1191 if (TREE_CODE (modify
) != MODIFY_EXPR
)
1193 op
= TREE_OPERAND (modify
, 1);
1194 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
1196 code
= TREE_CODE (op
);
1198 if (code
!= TRUNC_DIV_EXPR
&& code
!= TRUNC_MOD_EXPR
)
1201 op1
= TREE_OPERAND (op
, 0);
1202 op2
= TREE_OPERAND (op
, 1);
1203 if (!ann
->histograms
)
1206 for (histogram
= ann
->histograms
; histogram
; histogram
= histogram
->hvalue
.tree
.next
)
1207 if (histogram
->type
== HIST_TYPE_SINGLE_VALUE
)
1213 value
= histogram
->hvalue
.tree
.value
;
1214 val
= histogram
->hvalue
.tree
.counters
[0];
1215 count
= histogram
->hvalue
.tree
.counters
[1];
1216 all
= histogram
->hvalue
.tree
.counters
[2];
1218 /* We require that count is at least half of all; this means
1219 that for the transformation to fire the value must be constant
1220 at least 50% of time (and 75% gives the guarantee of usage). */
1221 if (simple_cst_equal (op2
, value
) != 1 || 2 * count
< all
)
1224 /* Compute probability of taking the optimal path. */
1225 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
1227 tree_val
= build_int_cst_wide (get_gcov_type (),
1228 (unsigned HOST_WIDE_INT
) val
,
1229 val
>> (HOST_BITS_PER_WIDE_INT
- 1) >> 1);
1230 result
= tree_divmod_fixed_value (stmt
, op
, op1
, op2
, tree_val
, prob
, count
, all
);
1234 fprintf (dump_file
, "Div/mod by constant ");
1235 print_generic_expr (dump_file
, value
, TDF_SLIM
);
1236 fprintf (dump_file
, "=");
1237 print_generic_expr (dump_file
, tree_val
, TDF_SLIM
);
1238 fprintf (dump_file
, " transformation on insn ");
1239 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1242 TREE_OPERAND (modify
, 1) = result
;
1247 /* Generate code for transformation 2 (with OPERATION, operands OP1
1248 and OP2, parent modify-expr STMT and probability of taking the optimal
1249 path PROB, which is equivalent to COUNT/ALL within roundoff error).
1250 This generates the result into a temp and returns
1251 the temp; it does not replace or alter the original STMT. */
1253 tree_mod_pow2 (tree stmt
, tree operation
, tree op1
, tree op2
, int prob
,
1254 gcov_type count
, gcov_type all
)
1256 tree stmt1
, stmt2
, stmt3
, stmt4
;
1257 tree tmp1
, tmp2
, tmp3
;
1258 tree label_decl1
= create_artificial_label ();
1259 tree label_decl2
= create_artificial_label ();
1260 tree label_decl3
= create_artificial_label ();
1261 tree label1
, label2
, label3
;
1262 tree bb1end
, bb2end
, bb3end
;
1263 basic_block bb
, bb2
, bb3
, bb4
;
1264 tree optype
= TREE_TYPE (operation
);
1265 edge e12
, e13
, e23
, e24
, e34
;
1266 block_stmt_iterator bsi
;
1267 tree result
= create_tmp_var (optype
, "PROF");
1269 bb
= bb_for_stmt (stmt
);
1270 bsi
= bsi_for_stmt (stmt
);
1272 tmp1
= create_tmp_var (optype
, "PROF");
1273 tmp2
= create_tmp_var (optype
, "PROF");
1274 tmp3
= create_tmp_var (optype
, "PROF");
1275 stmt1
= build2 (MODIFY_EXPR
, optype
, tmp1
, fold_convert (optype
, op2
));
1276 stmt2
= build2 (MODIFY_EXPR
, optype
, tmp2
,
1277 build2 (PLUS_EXPR
, optype
, op2
, integer_minus_one_node
));
1278 stmt3
= build2 (MODIFY_EXPR
, optype
, tmp3
,
1279 build2 (BIT_AND_EXPR
, optype
, tmp2
, tmp1
));
1280 stmt4
= build3 (COND_EXPR
, void_type_node
,
1281 build2 (NE_EXPR
, boolean_type_node
, tmp3
, integer_zero_node
),
1282 build1 (GOTO_EXPR
, void_type_node
, label_decl2
),
1283 build1 (GOTO_EXPR
, void_type_node
, label_decl1
));
1284 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1285 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1286 bsi_insert_before (&bsi
, stmt3
, BSI_SAME_STMT
);
1287 bsi_insert_before (&bsi
, stmt4
, BSI_SAME_STMT
);
1290 /* tmp2 == op2-1 inherited from previous block */
1291 label1
= build1 (LABEL_EXPR
, void_type_node
, label_decl1
);
1292 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1293 build2 (BIT_AND_EXPR
, optype
, op1
, tmp2
));
1294 bsi_insert_before (&bsi
, label1
, BSI_SAME_STMT
);
1295 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1298 label2
= build1 (LABEL_EXPR
, void_type_node
, label_decl2
);
1299 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1300 build2 (TREE_CODE (operation
), optype
, op1
, op2
));
1301 bsi_insert_before (&bsi
, label2
, BSI_SAME_STMT
);
1302 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1305 label3
= build1 (LABEL_EXPR
, void_type_node
, label_decl3
);
1306 bsi_insert_before (&bsi
, label3
, BSI_SAME_STMT
);
1309 /* Edge e23 connects bb2 to bb3, etc. */
1310 e12
= split_block (bb
, bb1end
);
1313 e23
= split_block (bb2
, bb2end
);
1315 bb3
->count
= all
- count
;
1316 e34
= split_block (bb3
, bb3end
);
1320 e12
->flags
&= ~EDGE_FALLTHRU
;
1321 e12
->flags
|= EDGE_FALSE_VALUE
;
1322 e12
->probability
= prob
;
1325 e13
= make_edge (bb
, bb3
, EDGE_TRUE_VALUE
);
1326 e13
->probability
= REG_BR_PROB_BASE
- prob
;
1327 e13
->count
= all
- count
;
1331 e24
= make_edge (bb2
, bb4
, EDGE_FALLTHRU
);
1332 e24
->probability
= REG_BR_PROB_BASE
;
1335 e34
->probability
= REG_BR_PROB_BASE
;
1336 e34
->count
= all
- count
;
1341 /* Do transform 2) on INSN if applicable. */
1343 tree_mod_pow2_value_transform (tree stmt
)
1345 stmt_ann_t ann
= get_stmt_ann (stmt
);
1346 histogram_value histogram
;
1347 enum tree_code code
;
1348 gcov_type count
, wrong_values
, all
;
1349 tree modify
, op
, op1
, op2
, result
, value
;
1353 if (TREE_CODE (stmt
) == RETURN_EXPR
1354 && TREE_OPERAND (stmt
, 0)
1355 && TREE_CODE (TREE_OPERAND (stmt
, 0)) == MODIFY_EXPR
)
1356 modify
= TREE_OPERAND (stmt
, 0);
1357 if (TREE_CODE (modify
) != MODIFY_EXPR
)
1359 op
= TREE_OPERAND (modify
, 1);
1360 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
1362 code
= TREE_CODE (op
);
1364 if (code
!= TRUNC_MOD_EXPR
|| !TYPE_UNSIGNED (TREE_TYPE (op
)))
1367 op1
= TREE_OPERAND (op
, 0);
1368 op2
= TREE_OPERAND (op
, 1);
1369 if (!ann
->histograms
)
1372 for (histogram
= ann
->histograms
; histogram
; histogram
= histogram
->hvalue
.tree
.next
)
1373 if (histogram
->type
== HIST_TYPE_POW2
)
1379 value
= histogram
->hvalue
.tree
.value
;
1380 wrong_values
= histogram
->hvalue
.tree
.counters
[0];
1381 count
= histogram
->hvalue
.tree
.counters
[1];
1383 /* We require that we hit a power of 2 at least half of all evaluations. */
1384 if (simple_cst_equal (op2
, value
) != 1 || count
< wrong_values
)
1389 fprintf (dump_file
, "Mod power of 2 transformation on insn ");
1390 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1393 /* Compute probability of taking the optimal path. */
1394 all
= count
+ wrong_values
;
1395 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
1397 result
= tree_mod_pow2 (stmt
, op
, op1
, op2
, prob
, count
, all
);
1399 TREE_OPERAND (modify
, 1) = result
;
1404 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
1405 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
1406 support. Currently only NCOUNTS==0 or 1 is supported and this is
1407 built into this interface. The probabilities of taking the optimal
1408 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
1409 COUNT2/ALL respectively within roundoff error). This generates the
1410 result into a temp and returns the temp; it does not replace or alter
1411 the original STMT. */
1412 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
1415 tree_mod_subtract (tree stmt
, tree operation
, tree op1
, tree op2
,
1416 int prob1
, int prob2
, int ncounts
,
1417 gcov_type count1
, gcov_type count2
, gcov_type all
)
1419 tree stmt1
, stmt2
, stmt3
;
1421 tree label_decl1
= create_artificial_label ();
1422 tree label_decl2
= create_artificial_label ();
1423 tree label_decl3
= create_artificial_label ();
1424 tree label1
, label2
, label3
;
1425 tree bb1end
, bb2end
= NULL_TREE
, bb3end
;
1426 basic_block bb
, bb2
, bb3
, bb4
;
1427 tree optype
= TREE_TYPE (operation
);
1428 edge e12
, e23
= 0, e24
, e34
, e14
;
1429 block_stmt_iterator bsi
;
1430 tree result
= create_tmp_var (optype
, "PROF");
1432 bb
= bb_for_stmt (stmt
);
1433 bsi
= bsi_for_stmt (stmt
);
1435 tmp1
= create_tmp_var (optype
, "PROF");
1436 stmt1
= build2 (MODIFY_EXPR
, optype
, result
, op1
);
1437 stmt2
= build2 (MODIFY_EXPR
, optype
, tmp1
, op2
);
1438 stmt3
= build3 (COND_EXPR
, void_type_node
,
1439 build2 (LT_EXPR
, boolean_type_node
, result
, tmp1
),
1440 build1 (GOTO_EXPR
, void_type_node
, label_decl3
),
1441 build1 (GOTO_EXPR
, void_type_node
,
1442 ncounts
? label_decl1
: label_decl2
));
1443 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1444 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1445 bsi_insert_before (&bsi
, stmt3
, BSI_SAME_STMT
);
1448 if (ncounts
) /* Assumed to be 0 or 1 */
1450 label1
= build1 (LABEL_EXPR
, void_type_node
, label_decl1
);
1451 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1452 build2 (MINUS_EXPR
, optype
, result
, tmp1
));
1453 stmt2
= build3 (COND_EXPR
, void_type_node
,
1454 build2 (LT_EXPR
, boolean_type_node
, result
, tmp1
),
1455 build1 (GOTO_EXPR
, void_type_node
, label_decl3
),
1456 build1 (GOTO_EXPR
, void_type_node
, label_decl2
));
1457 bsi_insert_before (&bsi
, label1
, BSI_SAME_STMT
);
1458 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1459 bsi_insert_before (&bsi
, stmt2
, BSI_SAME_STMT
);
1463 /* Fallback case. */
1464 label2
= build1 (LABEL_EXPR
, void_type_node
, label_decl2
);
1465 stmt1
= build2 (MODIFY_EXPR
, optype
, result
,
1466 build2 (TREE_CODE (operation
), optype
, result
, tmp1
));
1467 bsi_insert_before (&bsi
, label2
, BSI_SAME_STMT
);
1468 bsi_insert_before (&bsi
, stmt1
, BSI_SAME_STMT
);
1471 label3
= build1 (LABEL_EXPR
, void_type_node
, label_decl3
);
1472 bsi_insert_before (&bsi
, label3
, BSI_SAME_STMT
);
1475 /* Edge e23 connects bb2 to bb3, etc. */
1476 /* However block 3 is optional; if it is not there, references
1477 to 3 really refer to block 2. */
1478 e12
= split_block (bb
, bb1end
);
1480 bb2
->count
= all
- count1
;
1482 if (ncounts
) /* Assumed to be 0 or 1. */
1484 e23
= split_block (bb2
, bb2end
);
1486 bb3
->count
= all
- count1
- count2
;
1489 e34
= split_block (ncounts
? bb3
: bb2
, bb3end
);
1493 e12
->flags
&= ~EDGE_FALLTHRU
;
1494 e12
->flags
|= EDGE_FALSE_VALUE
;
1495 e12
->probability
= REG_BR_PROB_BASE
- prob1
;
1496 e12
->count
= all
- count1
;
1498 e14
= make_edge (bb
, bb4
, EDGE_TRUE_VALUE
);
1499 e14
->probability
= prob1
;
1500 e14
->count
= count1
;
1502 if (ncounts
) /* Assumed to be 0 or 1. */
1504 e23
->flags
&= ~EDGE_FALLTHRU
;
1505 e23
->flags
|= EDGE_FALSE_VALUE
;
1506 e23
->count
= all
- count1
- count2
;
1507 e23
->probability
= REG_BR_PROB_BASE
- prob2
;
1509 e24
= make_edge (bb2
, bb4
, EDGE_TRUE_VALUE
);
1510 e24
->probability
= prob2
;
1511 e24
->count
= count2
;
1514 e34
->probability
= REG_BR_PROB_BASE
;
1515 e34
->count
= all
- count1
- count2
;
1520 /* Do transforms 3) and 4) on INSN if applicable. */
1522 tree_mod_subtract_transform (tree stmt
)
1524 stmt_ann_t ann
= get_stmt_ann (stmt
);
1525 histogram_value histogram
;
1526 enum tree_code code
;
1527 gcov_type count
, wrong_values
, all
;
1528 tree modify
, op
, op1
, op2
, result
, value
;
1533 if (TREE_CODE (stmt
) == RETURN_EXPR
1534 && TREE_OPERAND (stmt
, 0)
1535 && TREE_CODE (TREE_OPERAND (stmt
, 0)) == MODIFY_EXPR
)
1536 modify
= TREE_OPERAND (stmt
, 0);
1537 if (TREE_CODE (modify
) != MODIFY_EXPR
)
1539 op
= TREE_OPERAND (modify
, 1);
1540 if (!INTEGRAL_TYPE_P (TREE_TYPE (op
)))
1542 code
= TREE_CODE (op
);
1544 if (code
!= TRUNC_MOD_EXPR
|| !TYPE_UNSIGNED (TREE_TYPE (op
)))
1547 op1
= TREE_OPERAND (op
, 0);
1548 op2
= TREE_OPERAND (op
, 1);
1549 if (!ann
->histograms
)
1552 for (histogram
= ann
->histograms
; histogram
; histogram
= histogram
->hvalue
.tree
.next
)
1553 if (histogram
->type
== HIST_TYPE_INTERVAL
)
1559 value
= histogram
->hvalue
.tree
.value
;
1562 for (i
= 0; i
< histogram
->hdata
.intvl
.steps
; i
++)
1563 all
+= histogram
->hvalue
.tree
.counters
[i
];
1565 wrong_values
+= histogram
->hvalue
.tree
.counters
[i
];
1566 wrong_values
+= histogram
->hvalue
.tree
.counters
[i
+1];
1567 all
+= wrong_values
;
1569 /* We require that we use just subtractions in at least 50% of all
1572 for (i
= 0; i
< histogram
->hdata
.intvl
.steps
; i
++)
1574 count
+= histogram
->hvalue
.tree
.counters
[i
];
1575 if (count
* 2 >= all
)
1578 if (i
== histogram
->hdata
.intvl
.steps
)
1583 fprintf (dump_file
, "Mod subtract transformation on insn ");
1584 print_generic_stmt (dump_file
, stmt
, TDF_SLIM
);
1587 /* Compute probability of taking the optimal path(s). */
1588 prob1
= (histogram
->hvalue
.tree
.counters
[0] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
1589 prob2
= (histogram
->hvalue
.tree
.counters
[1] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
1591 /* In practice, "steps" is always 2. This interface reflects this,
1592 and will need to be changed if "steps" can change. */
1593 result
= tree_mod_subtract (stmt
, op
, op1
, op2
, prob1
, prob2
, i
,
1594 histogram
->hvalue
.tree
.counters
[0],
1595 histogram
->hvalue
.tree
.counters
[1], all
);
1597 TREE_OPERAND (modify
, 1) = result
;
1602 /* Connection to the outside world. */
1603 /* Struct for IR-dependent hooks. */
1604 struct value_prof_hooks
{
1605 /* Find list of values for which we want to measure histograms. */
1606 void (*find_values_to_profile
) (histogram_values
*);
1608 /* Identify and exploit properties of values that are hard to analyze
1609 statically. See value-prof.c for more detail. */
1610 bool (*value_profile_transformations
) (void);
1613 /* Hooks for RTL-based versions (the only ones that currently work). */
1614 static struct value_prof_hooks rtl_value_prof_hooks
=
1616 rtl_find_values_to_profile
,
1617 rtl_value_profile_transformations
1621 rtl_register_value_prof_hooks (void)
1623 value_prof_hooks
= &rtl_value_prof_hooks
;
1624 gcc_assert (!ir_type ());
1627 /* Find values inside STMT for that we want to measure histograms for
1628 division/modulo optimization. */
1630 tree_divmod_values_to_profile (tree stmt
, histogram_values
*values
)
1632 tree assign
, lhs
, rhs
, divisor
, op0
, type
;
1633 histogram_value hist
;
1635 if (TREE_CODE (stmt
) == RETURN_EXPR
)
1636 assign
= TREE_OPERAND (stmt
, 0);
1641 || TREE_CODE (assign
) != MODIFY_EXPR
)
1643 lhs
= TREE_OPERAND (assign
, 0);
1644 type
= TREE_TYPE (lhs
);
1645 if (!INTEGRAL_TYPE_P (type
))
1648 rhs
= TREE_OPERAND (assign
, 1);
1649 switch (TREE_CODE (rhs
))
1651 case TRUNC_DIV_EXPR
:
1652 case TRUNC_MOD_EXPR
:
1653 divisor
= TREE_OPERAND (rhs
, 1);
1654 op0
= TREE_OPERAND (rhs
, 0);
1656 VEC_reserve (histogram_value
, heap
, *values
, 3);
1658 if (is_gimple_reg (divisor
))
1660 /* Check for the case where the divisor is the same value most
1662 hist
= ggc_alloc (sizeof (*hist
));
1663 hist
->hvalue
.tree
.value
= divisor
;
1664 hist
->hvalue
.tree
.stmt
= stmt
;
1665 hist
->type
= HIST_TYPE_SINGLE_VALUE
;
1666 VEC_quick_push (histogram_value
, *values
, hist
);
1669 /* For mod, check whether it is not often a noop (or replaceable by
1670 a few subtractions). */
1671 if (TREE_CODE (rhs
) == TRUNC_MOD_EXPR
1672 && TYPE_UNSIGNED (type
))
1674 /* Check for a special case where the divisor is power of 2. */
1675 hist
= ggc_alloc (sizeof (*hist
));
1676 hist
->hvalue
.tree
.value
= divisor
;
1677 hist
->hvalue
.tree
.stmt
= stmt
;
1678 hist
->type
= HIST_TYPE_POW2
;
1679 VEC_quick_push (histogram_value
, *values
, hist
);
1681 hist
= ggc_alloc (sizeof (*hist
));
1682 hist
->hvalue
.tree
.stmt
= stmt
;
1683 hist
->hvalue
.tree
.value
1684 = build2 (TRUNC_DIV_EXPR
, type
, op0
, divisor
);
1685 hist
->type
= HIST_TYPE_INTERVAL
;
1686 hist
->hdata
.intvl
.int_start
= 0;
1687 hist
->hdata
.intvl
.steps
= 2;
1688 VEC_quick_push (histogram_value
, *values
, hist
);
1697 /* Find values inside STMT for that we want to measure histograms and adds
1698 them to list VALUES. */
1701 tree_values_to_profile (tree stmt
, histogram_values
*values
)
1703 if (flag_value_profile_transformations
)
1704 tree_divmod_values_to_profile (stmt
, values
);
1708 tree_find_values_to_profile (histogram_values
*values
)
1711 block_stmt_iterator bsi
;
1713 histogram_value hist
;
1717 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1718 tree_values_to_profile (bsi_stmt (bsi
), values
);
1719 static_values
= *values
;
1721 for (i
= 0; VEC_iterate (histogram_value
, *values
, i
, hist
); i
++)
1725 case HIST_TYPE_INTERVAL
:
1728 fprintf (dump_file
, "Interval counter for tree ");
1729 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
,
1731 fprintf (dump_file
, ", range %d -- %d.\n",
1732 hist
->hdata
.intvl
.int_start
,
1733 (hist
->hdata
.intvl
.int_start
1734 + hist
->hdata
.intvl
.steps
- 1));
1736 hist
->n_counters
= hist
->hdata
.intvl
.steps
+ 2;
1739 case HIST_TYPE_POW2
:
1742 fprintf (dump_file
, "Pow2 counter for tree ");
1743 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
, TDF_SLIM
);
1744 fprintf (dump_file
, ".\n");
1746 hist
->n_counters
= 2;
1749 case HIST_TYPE_SINGLE_VALUE
:
1752 fprintf (dump_file
, "Single value counter for tree ");
1753 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
, TDF_SLIM
);
1754 fprintf (dump_file
, ".\n");
1756 hist
->n_counters
= 3;
1759 case HIST_TYPE_CONST_DELTA
:
1762 fprintf (dump_file
, "Constant delta counter for tree ");
1763 print_generic_expr (dump_file
, hist
->hvalue
.tree
.stmt
, TDF_SLIM
);
1764 fprintf (dump_file
, ".\n");
1766 hist
->n_counters
= 4;
1775 static struct value_prof_hooks tree_value_prof_hooks
= {
1776 tree_find_values_to_profile
,
1777 tree_value_profile_transformations
1781 tree_register_value_prof_hooks (void)
1783 value_prof_hooks
= &tree_value_prof_hooks
;
1784 gcc_assert (ir_type ());
1787 /* IR-independent entry points. */
1789 find_values_to_profile (histogram_values
*values
)
1791 (value_prof_hooks
->find_values_to_profile
) (values
);
1795 value_profile_transformations (void)
1797 bool retval
= (value_prof_hooks
->value_profile_transformations
) ();
1798 VEC_free (histogram_value
, heap
, static_values
);