1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
32 #include "insn-config.h"
38 static struct value_prof_hooks
*value_prof_hooks
;
40 /* In this file value profile based optimizations are placed. Currently the
41 following optimizations are implemented (for more detailed descriptions
42 see comments at value_profile_transformations):
44 1) Division/modulo specialization. Provided that we can determine that the
45 operands of the division have some special properties, we may use it to
46 produce more effective code.
47 2) Speculative prefetching. If we are able to determine that the difference
48 between addresses accessed by a memory reference is usually constant, we
49 may add the prefetch instructions.
51 Every such optimization should add its requirements for profiled values to
52 insn_values_to_profile function. This function is called from branch_prob
53 in profile.c and the requested values are instrumented by it in the first
54 compilation with -fprofile-arcs. The optimization may then read the
55 gathered data in the second compilation with -fbranch-probabilities.
56 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
57 insn. The argument to the note consists of an EXPR_LIST where its
58 members have the following meaning (from the first to the last):
60 -- type of information gathered (HIST_TYPE*)
61 -- the expression that is profiled
62 -- list of counters starting from the first one. */
64 /* For speculative prefetching, the range in that we do not prefetch (because
65 we assume that it will be in cache anyway). The asymmetry between min and
66 max range is trying to reflect the fact that the sequential prefetching
67 of the data is commonly done directly by hardware. Nevertheless, these
68 values are just a guess and should of course be target-specific. */
70 #ifndef NOPREFETCH_RANGE_MIN
71 #define NOPREFETCH_RANGE_MIN (-16)
73 #ifndef NOPREFETCH_RANGE_MAX
74 #define NOPREFETCH_RANGE_MAX 32
77 static void insn_divmod_values_to_profile (rtx
, histogram_values
*);
79 static bool insn_prefetch_values_to_profile (rtx
, histogram_values
*);
80 static int find_mem_reference_1 (rtx
*, void *);
81 static void find_mem_reference_2 (rtx
, rtx
, void *);
82 static bool find_mem_reference (rtx
, rtx
*, int *);
85 static void insn_values_to_profile (rtx
, histogram_values
*);
86 static rtx
gen_divmod_fixed_value (enum machine_mode
, enum rtx_code
, rtx
, rtx
,
88 static rtx
gen_mod_pow2 (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
, int);
89 static rtx
gen_mod_subtract (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
,
92 static rtx
gen_speculative_prefetch (rtx
, gcov_type
, int);
94 static bool divmod_fixed_value_transform (rtx insn
);
95 static bool mod_pow2_value_transform (rtx
);
96 static bool mod_subtract_transform (rtx
);
98 static bool speculative_prefetching_transform (rtx
);
101 /* Find values inside INSN for that we want to measure histograms for
102 division/modulo optimization and stores them to VALUES. */
104 insn_divmod_values_to_profile (rtx insn
, histogram_values
*values
)
106 rtx set
, set_src
, op1
, op2
;
107 enum machine_mode mode
;
108 histogram_value hist
;
113 set
= single_set (insn
);
117 mode
= GET_MODE (SET_DEST (set
));
118 if (!INTEGRAL_MODE_P (mode
))
121 set_src
= SET_SRC (set
);
122 switch (GET_CODE (set_src
))
128 op1
= XEXP (set_src
, 0);
129 op2
= XEXP (set_src
, 1);
130 if (side_effects_p (op2
))
133 /* Check for a special case where the divisor is power of 2. */
134 if ((GET_CODE (set_src
) == UMOD
) && !CONSTANT_P (op2
))
136 hist
= ggc_alloc (sizeof (*hist
));
138 hist
->seq
= NULL_RTX
;
141 hist
->type
= HIST_TYPE_POW2
;
142 hist
->hdata
.pow2
.may_be_other
= 1;
143 VEC_safe_push (histogram_value
, *values
, hist
);
146 /* Check whether the divisor is not in fact a constant. */
147 if (!CONSTANT_P (op2
))
149 hist
= ggc_alloc (sizeof (*hist
));
152 hist
->seq
= NULL_RTX
;
154 hist
->type
= HIST_TYPE_SINGLE_VALUE
;
155 VEC_safe_push (histogram_value
, *values
, hist
);
158 /* For mod, check whether it is not often a noop (or replaceable by
159 a few subtractions). */
160 if (GET_CODE (set_src
) == UMOD
&& !side_effects_p (op1
))
164 hist
= ggc_alloc (sizeof (*hist
));
166 tmp
= simplify_gen_binary (DIV
, mode
, copy_rtx (op1
), copy_rtx (op2
));
167 hist
->value
= force_operand (tmp
, NULL_RTX
);
168 hist
->seq
= get_insns ();
172 hist
->type
= HIST_TYPE_INTERVAL
;
173 hist
->hdata
.intvl
.int_start
= 0;
174 hist
->hdata
.intvl
.steps
= 2;
175 hist
->hdata
.intvl
.may_be_less
= 1;
176 hist
->hdata
.intvl
.may_be_more
= 1;
177 VEC_safe_push (histogram_value
, *values
, hist
);
188 /* Called from find_mem_reference through for_each_rtx, finds a memory
189 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
190 to *RET and the traversing of the expression is interrupted by returning 1.
191 Otherwise 0 is returned. */
194 find_mem_reference_1 (rtx
*expr
, void *ret
)
198 if (GET_CODE (*expr
) == MEM
)
206 /* Called form find_mem_reference through note_stores to find out whether
207 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
208 FMR2_WRITE is set to true. */
210 static int fmr2_write
;
212 find_mem_reference_2 (rtx expr
, rtx pat ATTRIBUTE_UNUSED
, void *mem
)
218 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
219 if it is a write of the mem. Return false if no memory reference is found,
223 find_mem_reference (rtx insn
, rtx
*mem
, int *write
)
226 for_each_rtx (&PATTERN (insn
), find_mem_reference_1
, mem
);
232 note_stores (PATTERN (insn
), find_mem_reference_2
, *mem
);
237 /* Find values inside INSN for that we want to measure histograms for
238 a speculative prefetching. Add them to the list VALUES.
239 Returns true if such we found any such value, false otherwise. */
242 insn_prefetch_values_to_profile (rtx insn
, histogram_values
*values
)
246 histogram_value hist
;
248 /* It only makes sense to look for memory references in ordinary insns. */
249 if (GET_CODE (insn
) != INSN
)
252 if (!find_mem_reference (insn
, &mem
, &write
))
255 address
= XEXP (mem
, 0);
256 if (side_effects_p (address
))
259 if (CONSTANT_P (address
))
262 hist
= ggc_alloc (sizeof (*hist
));
263 hist
->value
= address
;
264 hist
->mode
= GET_MODE (address
);
265 hist
->seq
= NULL_RTX
;
267 hist
->type
= HIST_TYPE_CONST_DELTA
;
268 VEC_safe_push (histogram_value
, *values
, hist
);
273 /* Find values inside INSN for that we want to measure histograms and adds
274 them to list VALUES (increasing the record of its length in N_VALUES). */
276 insn_values_to_profile (rtx insn
, histogram_values
*values
)
278 if (flag_value_profile_transformations
)
279 insn_divmod_values_to_profile (insn
, values
);
282 if (flag_speculative_prefetching
)
283 insn_prefetch_values_to_profile (insn
, values
);
287 /* Find list of values for that we want to measure histograms. */
289 rtl_find_values_to_profile (histogram_values
*values
)
292 unsigned i
, libcall_level
;
294 life_analysis (NULL
, PROP_DEATH_NOTES
);
296 *values
= VEC_alloc (histogram_value
, 0);
298 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
300 if (find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
))
303 /* Do not instrument values inside libcalls (we are going to split block
304 due to instrumentation, and libcall blocks should be local to a single
307 insn_values_to_profile (insn
, values
);
309 if (find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
311 gcc_assert (libcall_level
> 0);
315 gcc_assert (libcall_level
== 0);
317 for (i
= 0; i
< VEC_length (histogram_value
, *values
); i
++)
319 histogram_value hist
= VEC_index (histogram_value
, *values
, i
);
323 case HIST_TYPE_INTERVAL
:
326 "Interval counter for insn %d, range %d -- %d.\n",
327 INSN_UID ((rtx
)hist
->insn
),
328 hist
->hdata
.intvl
.int_start
,
329 (hist
->hdata
.intvl
.int_start
330 + hist
->hdata
.intvl
.steps
- 1));
331 hist
->n_counters
= hist
->hdata
.intvl
.steps
+
332 (hist
->hdata
.intvl
.may_be_less
? 1 : 0) +
333 (hist
->hdata
.intvl
.may_be_more
? 1 : 0);
339 "Pow2 counter for insn %d.\n",
340 INSN_UID ((rtx
)hist
->insn
));
342 = GET_MODE_BITSIZE (hist
->mode
)
343 + (hist
->hdata
.pow2
.may_be_other
? 1 : 0);
346 case HIST_TYPE_SINGLE_VALUE
:
349 "Single value counter for insn %d.\n",
350 INSN_UID ((rtx
)hist
->insn
));
351 hist
->n_counters
= 3;
354 case HIST_TYPE_CONST_DELTA
:
357 "Constant delta counter for insn %d.\n",
358 INSN_UID ((rtx
)hist
->insn
));
359 hist
->n_counters
= 4;
366 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
369 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
370 them to identify and exploit properties of values that are hard to analyze
373 We do following transformations:
379 where b is almost always a constant N is transformed to
392 where b is almost always a power of 2 and the division is unsigned
393 TODO -- handle signed case as well
395 if ((b & (b - 1)) == 0)
400 Note that when b = 0, no error will occur and x = a; this is correct,
401 as result of such operation is undefined.
407 where a is almost always less then b and the division is unsigned
408 TODO -- handle signed case as well
418 where a is almost always less then 2 * b and the division is unsigned
419 TODO -- handle signed case as well
427 It would be possible to continue analogically for K * b for other small
428 K's, but it is probably not useful.
432 Read or write of mem[address], where the value of address changes usually
433 by a constant C != 0 between the following accesses to the computation; with
434 -fspeculative-prefetching we then add a prefetch of address + C before
435 the insn. This handles prefetching of several interesting cases in addition
436 to a simple prefetching for addresses that are induction variables, e. g.
437 linked lists allocated sequentially (even in case they are processed
440 TODO -- we should also check whether there is not (usually) a small
441 difference with the adjacent memory references, so that we do
442 not issue overlapping prefetches. Also we should employ some
443 heuristics to eliminate cases where prefetching evidently spoils
445 -- it should somehow cooperate with the loop optimizer prefetching
449 There are other useful cases that could be handled by a similar mechanism,
452 for (i = 0; i < n; i++)
455 transform to (for constant N):
458 for (i = 0; i < N; i++)
461 for (i = 0; i < n; i++)
463 making unroller happy. Since this may grow the code significantly,
464 we would have to be very careful here. */
467 rtl_value_profile_transformations (void)
472 for (insn
= get_insns (); insn
; insn
= next
)
474 next
= NEXT_INSN (insn
);
479 /* Scan for insn carrying a histogram. */
480 if (!find_reg_note (insn
, REG_VALUE_PROFILE
, 0))
483 /* Ignore cold areas -- we are growing a code. */
484 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
489 fprintf (dump_file
, "Trying transformations on insn %d\n",
491 print_rtl_single (dump_file
, insn
);
494 /* Transformations: */
495 if (flag_value_profile_transformations
496 && (mod_subtract_transform (insn
)
497 || divmod_fixed_value_transform (insn
)
498 || mod_pow2_value_transform (insn
)))
501 if (flag_speculative_prefetching
502 && speculative_prefetching_transform (insn
))
509 commit_edge_insertions ();
510 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
516 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
517 and OP2, whose value is expected to be VALUE, result TARGET and
518 probability of taking the optimal path PROB). */
520 gen_divmod_fixed_value (enum machine_mode mode
, enum rtx_code operation
,
521 rtx target
, rtx op1
, rtx op2
, gcov_type value
,
525 rtx neq_label
= gen_label_rtx ();
526 rtx end_label
= gen_label_rtx ();
533 tmp
= gen_reg_rtx (mode
);
534 emit_move_insn (tmp
, copy_rtx (op2
));
539 do_compare_rtx_and_jump (tmp
, GEN_INT (value
), NE
, 0, mode
, NULL_RTX
,
540 NULL_RTX
, neq_label
);
542 /* Add branch probability to jump we just created. */
543 jump
= get_last_insn ();
544 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
545 GEN_INT (REG_BR_PROB_BASE
- prob
),
548 tmp1
= simplify_gen_binary (operation
, mode
,
549 copy_rtx (op1
), GEN_INT (value
));
550 tmp1
= force_operand (tmp1
, target
);
552 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
554 emit_jump_insn (gen_jump (end_label
));
557 emit_label (neq_label
);
558 tmp1
= simplify_gen_binary (operation
, mode
,
559 copy_rtx (op1
), copy_rtx (tmp
));
560 tmp1
= force_operand (tmp1
, target
);
562 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
564 emit_label (end_label
);
566 sequence
= get_insns ();
568 rebuild_jump_labels (sequence
);
572 /* Do transform 1) on INSN if applicable. */
574 divmod_fixed_value_transform (rtx insn
)
576 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
578 enum machine_mode mode
;
579 gcov_type val
, count
, all
;
583 set
= single_set (insn
);
587 set_src
= SET_SRC (set
);
588 set_dest
= SET_DEST (set
);
589 code
= GET_CODE (set_src
);
590 mode
= GET_MODE (set_dest
);
592 if (code
!= DIV
&& code
!= MOD
&& code
!= UDIV
&& code
!= UMOD
)
594 op1
= XEXP (set_src
, false);
595 op2
= XEXP (set_src
, 1);
597 for (histogram
= REG_NOTES (insn
);
599 histogram
= XEXP (histogram
, 1))
600 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
601 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE
))
607 histogram
= XEXP (XEXP (histogram
, 0), 1);
608 value
= XEXP (histogram
, 0);
609 histogram
= XEXP (histogram
, 1);
610 val
= INTVAL (XEXP (histogram
, 0));
611 histogram
= XEXP (histogram
, 1);
612 count
= INTVAL (XEXP (histogram
, 0));
613 histogram
= XEXP (histogram
, 1);
614 all
= INTVAL (XEXP (histogram
, 0));
616 /* We require that count be at least half of all; this means
617 that for the transformation to fire the value must be constant
618 at least 50% of time (and 75% gives the guarantee of usage). */
619 if (!rtx_equal_p (op2
, value
) || 2 * count
< all
)
623 fprintf (dump_file
, "Div/mod by constant transformation on insn %d\n",
626 /* Compute probability of taking the optimal path. */
627 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
629 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
632 insert_insn_on_edge (
633 gen_divmod_fixed_value (mode
, code
, set_dest
,
634 op1
, op2
, val
, prob
), e
);
639 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
640 and OP2, result TARGET and probability of taking the optimal path PROB). */
642 gen_mod_pow2 (enum machine_mode mode
, enum rtx_code operation
, rtx target
,
643 rtx op1
, rtx op2
, int prob
)
645 rtx tmp
, tmp1
, tmp2
, tmp3
, jump
;
646 rtx neq_label
= gen_label_rtx ();
647 rtx end_label
= gen_label_rtx ();
654 tmp
= gen_reg_rtx (mode
);
655 emit_move_insn (tmp
, copy_rtx (op2
));
660 tmp1
= expand_simple_binop (mode
, PLUS
, tmp
, constm1_rtx
, NULL_RTX
,
662 tmp2
= expand_simple_binop (mode
, AND
, tmp
, tmp1
, NULL_RTX
,
664 do_compare_rtx_and_jump (tmp2
, const0_rtx
, NE
, 0, mode
, NULL_RTX
,
665 NULL_RTX
, neq_label
);
667 /* Add branch probability to jump we just created. */
668 jump
= get_last_insn ();
669 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
670 GEN_INT (REG_BR_PROB_BASE
- prob
),
673 tmp3
= expand_simple_binop (mode
, AND
, op1
, tmp1
, target
,
676 emit_move_insn (copy_rtx (target
), tmp3
);
677 emit_jump_insn (gen_jump (end_label
));
680 emit_label (neq_label
);
681 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), copy_rtx (tmp
));
682 tmp1
= force_operand (tmp1
, target
);
684 emit_move_insn (target
, tmp1
);
686 emit_label (end_label
);
688 sequence
= get_insns ();
690 rebuild_jump_labels (sequence
);
694 /* Do transform 2) on INSN if applicable. */
696 mod_pow2_value_transform (rtx insn
)
698 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
700 enum machine_mode mode
;
701 gcov_type wrong_values
, count
;
705 set
= single_set (insn
);
709 set_src
= SET_SRC (set
);
710 set_dest
= SET_DEST (set
);
711 code
= GET_CODE (set_src
);
712 mode
= GET_MODE (set_dest
);
716 op1
= XEXP (set_src
, 0);
717 op2
= XEXP (set_src
, 1);
719 for (histogram
= REG_NOTES (insn
);
721 histogram
= XEXP (histogram
, 1))
722 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
723 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_POW2
))
729 histogram
= XEXP (XEXP (histogram
, 0), 1);
730 value
= XEXP (histogram
, 0);
731 histogram
= XEXP (histogram
, 1);
732 wrong_values
=INTVAL (XEXP (histogram
, 0));
733 histogram
= XEXP (histogram
, 1);
736 for (i
= 0; i
< GET_MODE_BITSIZE (mode
); i
++)
738 count
+= INTVAL (XEXP (histogram
, 0));
739 histogram
= XEXP (histogram
, 1);
742 if (!rtx_equal_p (op2
, value
))
745 /* We require that we hit a power of two at least half of all evaluations. */
746 if (count
< wrong_values
)
750 fprintf (dump_file
, "Mod power of 2 transformation on insn %d\n",
753 /* Compute probability of taking the optimal path. */
754 all
= count
+ wrong_values
;
755 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
757 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
760 insert_insn_on_edge (
761 gen_mod_pow2 (mode
, code
, set_dest
, op1
, op2
, prob
), e
);
766 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
767 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
768 probability of taking the optimal path(s) PROB1 and PROB2). */
770 gen_mod_subtract (enum machine_mode mode
, enum rtx_code operation
,
771 rtx target
, rtx op1
, rtx op2
, int sub
, int prob1
, int prob2
)
774 rtx end_label
= gen_label_rtx ();
782 tmp
= gen_reg_rtx (mode
);
783 emit_move_insn (tmp
, copy_rtx (op2
));
788 emit_move_insn (target
, copy_rtx (op1
));
789 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
790 NULL_RTX
, end_label
);
792 /* Add branch probability to jump we just created. */
793 jump
= get_last_insn ();
794 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
795 GEN_INT (prob1
), REG_NOTES (jump
));
797 for (i
= 0; i
< sub
; i
++)
799 tmp1
= expand_simple_binop (mode
, MINUS
, target
, tmp
, target
,
802 emit_move_insn (target
, tmp1
);
803 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
804 NULL_RTX
, end_label
);
806 /* Add branch probability to jump we just created. */
807 jump
= get_last_insn ();
808 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
809 GEN_INT (prob2
), REG_NOTES (jump
));
812 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (target
), copy_rtx (tmp
));
813 tmp1
= force_operand (tmp1
, target
);
815 emit_move_insn (target
, tmp1
);
817 emit_label (end_label
);
819 sequence
= get_insns ();
821 rebuild_jump_labels (sequence
);
825 /* Do transforms 3) and 4) on INSN if applicable. */
827 mod_subtract_transform (rtx insn
)
829 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
831 enum machine_mode mode
;
832 gcov_type wrong_values
, counts
[2], count
, all
;
836 set
= single_set (insn
);
840 set_src
= SET_SRC (set
);
841 set_dest
= SET_DEST (set
);
842 code
= GET_CODE (set_src
);
843 mode
= GET_MODE (set_dest
);
847 op1
= XEXP (set_src
, 0);
848 op2
= XEXP (set_src
, 1);
850 for (histogram
= REG_NOTES (insn
);
852 histogram
= XEXP (histogram
, 1))
853 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
854 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL
))
860 histogram
= XEXP (XEXP (histogram
, 0), 1);
861 value
= XEXP (histogram
, 0);
862 histogram
= XEXP (histogram
, 1);
865 for (i
= 0; i
< 2; i
++)
867 counts
[i
] = INTVAL (XEXP (histogram
, 0));
869 histogram
= XEXP (histogram
, 1);
871 wrong_values
= INTVAL (XEXP (histogram
, 0));
872 histogram
= XEXP (histogram
, 1);
873 wrong_values
+= INTVAL (XEXP (histogram
, 0));
876 /* We require that we use just subtractions in at least 50% of all
879 for (i
= 0; i
< 2; i
++)
882 if (count
* 2 >= all
)
890 fprintf (dump_file
, "Mod subtract transformation on insn %d\n",
893 /* Compute probability of taking the optimal path(s). */
894 prob1
= (counts
[0] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
895 prob2
= (counts
[1] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
897 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
900 insert_insn_on_edge (
901 gen_mod_subtract (mode
, code
, set_dest
,
902 op1
, op2
, i
, prob1
, prob2
), e
);
908 /* Generate code for transformation 5 for mem with ADDRESS and a constant
909 step DELTA. WRITE is true if the reference is a store to mem. */
912 gen_speculative_prefetch (rtx address
, gcov_type delta
, int write
)
917 /* TODO: we do the prefetching for just one iteration ahead, which
918 often is not enough. */
920 if (offsettable_address_p (0, VOIDmode
, address
))
921 tmp
= plus_constant (copy_rtx (address
), delta
);
924 tmp
= simplify_gen_binary (PLUS
, Pmode
,
925 copy_rtx (address
), GEN_INT (delta
));
926 tmp
= force_operand (tmp
, NULL
);
928 if (! (*insn_data
[(int)CODE_FOR_prefetch
].operand
[0].predicate
)
929 (tmp
, insn_data
[(int)CODE_FOR_prefetch
].operand
[0].mode
))
930 tmp
= force_reg (Pmode
, tmp
);
931 emit_insn (gen_prefetch (tmp
, GEN_INT (write
), GEN_INT (3)));
932 sequence
= get_insns ();
938 /* Do transform 5) on INSN if applicable. */
941 speculative_prefetching_transform (rtx insn
)
943 rtx histogram
, value
;
944 gcov_type val
, count
, all
;
949 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
952 if (!find_mem_reference (insn
, &mem
, &write
))
955 address
= XEXP (mem
, 0);
956 if (side_effects_p (address
))
959 if (CONSTANT_P (address
))
962 for (histogram
= REG_NOTES (insn
);
964 histogram
= XEXP (histogram
, 1))
965 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
966 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA
))
972 histogram
= XEXP (XEXP (histogram
, 0), 1);
973 value
= XEXP (histogram
, 0);
974 histogram
= XEXP (histogram
, 1);
975 /* Skip last value referenced. */
976 histogram
= XEXP (histogram
, 1);
977 val
= INTVAL (XEXP (histogram
, 0));
978 histogram
= XEXP (histogram
, 1);
979 count
= INTVAL (XEXP (histogram
, 0));
980 histogram
= XEXP (histogram
, 1);
981 all
= INTVAL (XEXP (histogram
, 0));
983 /* With that few executions we do not really have a reason to optimize the
984 statement, and more importantly, the data about differences of addresses
985 are spoiled by the first item that had no previous value to compare
990 /* We require that count be at least half of all; this means
991 that for the transformation to fire the value must be constant
992 at least 50% of time (and 75% gives the guarantee of usage). */
993 if (!rtx_equal_p (address
, value
) || 2 * count
< all
)
996 /* If the difference is too small, it does not make too much sense to
997 prefetch, as the memory is probably already in cache. */
998 if (val
>= NOPREFETCH_RANGE_MIN
&& val
<= NOPREFETCH_RANGE_MAX
)
1002 fprintf (dump_file
, "Speculative prefetching for insn %d\n",
1005 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
1007 insert_insn_on_edge (gen_speculative_prefetch (address
, val
, write
), e
);
1011 #endif /* HAVE_prefetch */
1013 /* Connection to the outside world. */
1014 /* Struct for IR-dependent hooks. */
1015 struct value_prof_hooks
{
1016 /* Find list of values for which we want to measure histograms. */
1017 void (*find_values_to_profile
) (histogram_values
*);
1019 /* Identify and exploit properties of values that are hard to analyze
1020 statically. See value-prof.c for more detail. */
1021 bool (*value_profile_transformations
) (void);
1024 /* Hooks for RTL-based versions (the only ones that currently work). */
1025 static struct value_prof_hooks rtl_value_prof_hooks
=
1027 rtl_find_values_to_profile
,
1028 rtl_value_profile_transformations
1032 rtl_register_value_prof_hooks (void)
1034 value_prof_hooks
= &rtl_value_prof_hooks
;
1039 /* Tree-based versions are stubs for now. */
1041 tree_find_values_to_profile (histogram_values
*values ATTRIBUTE_UNUSED
)
1047 tree_value_profile_transformations (void)
1052 static struct value_prof_hooks tree_value_prof_hooks
= {
1053 tree_find_values_to_profile
,
1054 tree_value_profile_transformations
1058 tree_register_value_prof_hooks (void)
1060 value_prof_hooks
= &tree_value_prof_hooks
;
1065 /* IR-independent entry points. */
1067 find_values_to_profile (histogram_values
*values
)
1069 (value_prof_hooks
->find_values_to_profile
) (values
);
1073 value_profile_transformations (void)
1075 return (value_prof_hooks
->value_profile_transformations
) ();