1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
32 #include "insn-config.h"
38 static struct value_prof_hooks
*value_prof_hooks
;
40 /* In this file value profile based optimizations are placed. Currently the
41 following optimizations are implemented (for more detailed descriptions
42 see comments at value_profile_transformations):
44 1) Division/modulo specialization. Provided that we can determine that the
45 operands of the division have some special properties, we may use it to
46 produce more effective code.
47 2) Speculative prefetching. If we are able to determine that the difference
48 between addresses accessed by a memory reference is usually constant, we
49 may add the prefetch instructions.
51 Every such optimization should add its requirements for profiled values to
52 insn_values_to_profile function. This function is called from branch_prob
53 in profile.c and the requested values are instrumented by it in the first
54 compilation with -fprofile-arcs. The optimization may then read the
55 gathered data in the second compilation with -fbranch-probabilities.
56 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
57 insn. The argument to the note consists of an EXPR_LIST where its
58 members have the following meaning (from the first to the last):
60 -- type of information gathered (HIST_TYPE*)
61 -- the expression that is profiled
62 -- list of counters starting from the first one. */
64 /* For speculative prefetching, the range in that we do not prefetch (because
65 we assume that it will be in cache anyway). The asymmetry between min and
66 max range is trying to reflect the fact that the sequential prefetching
67 of the data is commonly done directly by hardware. Nevertheless, these
68 values are just a guess and should of course be target-specific. */
70 #ifndef NOPREFETCH_RANGE_MIN
71 #define NOPREFETCH_RANGE_MIN (-16)
73 #ifndef NOPREFETCH_RANGE_MAX
74 #define NOPREFETCH_RANGE_MAX 32
77 static void insn_divmod_values_to_profile (rtx
, histogram_values
*);
79 static bool insn_prefetch_values_to_profile (rtx
, histogram_values
*);
80 static int find_mem_reference_1 (rtx
*, void *);
81 static void find_mem_reference_2 (rtx
, rtx
, void *);
82 static bool find_mem_reference (rtx
, rtx
*, int *);
85 static void insn_values_to_profile (rtx
, histogram_values
*);
86 static rtx
gen_divmod_fixed_value (enum machine_mode
, enum rtx_code
, rtx
, rtx
,
88 static rtx
gen_mod_pow2 (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
, int);
89 static rtx
gen_mod_subtract (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
,
92 static rtx
gen_speculative_prefetch (rtx
, gcov_type
, int);
94 static bool divmod_fixed_value_transform (rtx insn
);
95 static bool mod_pow2_value_transform (rtx
);
96 static bool mod_subtract_transform (rtx
);
98 static bool speculative_prefetching_transform (rtx
);
101 /* Find values inside INSN for that we want to measure histograms for
102 division/modulo optimization and stores them to VALUES. */
104 insn_divmod_values_to_profile (rtx insn
, histogram_values
*values
)
106 rtx set
, set_src
, op1
, op2
;
107 enum machine_mode mode
;
108 histogram_value hist
;
113 set
= single_set (insn
);
117 mode
= GET_MODE (SET_DEST (set
));
118 if (!INTEGRAL_MODE_P (mode
))
121 set_src
= SET_SRC (set
);
122 switch (GET_CODE (set_src
))
128 op1
= XEXP (set_src
, 0);
129 op2
= XEXP (set_src
, 1);
130 if (side_effects_p (op2
))
133 /* Check for a special case where the divisor is power of 2. */
134 if ((GET_CODE (set_src
) == UMOD
) && !CONSTANT_P (op2
))
136 hist
= ggc_alloc (sizeof (*hist
));
138 hist
->seq
= NULL_RTX
;
141 hist
->type
= HIST_TYPE_POW2
;
142 hist
->hdata
.pow2
.may_be_other
= 1;
143 VEC_safe_push (histogram_value
, *values
, hist
);
146 /* Check whether the divisor is not in fact a constant. */
147 if (!CONSTANT_P (op2
))
149 hist
= ggc_alloc (sizeof (*hist
));
152 hist
->seq
= NULL_RTX
;
154 hist
->type
= HIST_TYPE_SINGLE_VALUE
;
155 VEC_safe_push (histogram_value
, *values
, hist
);
158 /* For mod, check whether it is not often a noop (or replaceable by
159 a few subtractions). */
160 if (GET_CODE (set_src
) == UMOD
&& !side_effects_p (op1
))
164 hist
= ggc_alloc (sizeof (*hist
));
166 tmp
= simplify_gen_binary (DIV
, mode
, copy_rtx (op1
), copy_rtx (op2
));
167 hist
->value
= force_operand (tmp
, NULL_RTX
);
168 hist
->seq
= get_insns ();
172 hist
->type
= HIST_TYPE_INTERVAL
;
173 hist
->hdata
.intvl
.int_start
= 0;
174 hist
->hdata
.intvl
.steps
= 2;
175 hist
->hdata
.intvl
.may_be_less
= 1;
176 hist
->hdata
.intvl
.may_be_more
= 1;
177 VEC_safe_push (histogram_value
, *values
, hist
);
188 /* Called from find_mem_reference through for_each_rtx, finds a memory
189 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
190 to *RET and the traversing of the expression is interrupted by returning 1.
191 Otherwise 0 is returned. */
194 find_mem_reference_1 (rtx
*expr
, void *ret
)
198 if (GET_CODE (*expr
) == MEM
)
206 /* Called form find_mem_reference through note_stores to find out whether
207 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
208 FMR2_WRITE is set to true. */
210 static int fmr2_write
;
212 find_mem_reference_2 (rtx expr
, rtx pat ATTRIBUTE_UNUSED
, void *mem
)
218 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
219 if it is a write of the mem. Return false if no memory reference is found,
223 find_mem_reference (rtx insn
, rtx
*mem
, int *write
)
226 for_each_rtx (&PATTERN (insn
), find_mem_reference_1
, mem
);
232 note_stores (PATTERN (insn
), find_mem_reference_2
, *mem
);
237 /* Find values inside INSN for that we want to measure histograms for
238 a speculative prefetching. Add them to the list VALUES.
239 Returns true if such we found any such value, false otherwise. */
242 insn_prefetch_values_to_profile (rtx insn
, histogram_values
*values
)
246 histogram_value hist
;
251 if (!find_mem_reference (insn
, &mem
, &write
))
254 address
= XEXP (mem
, 0);
255 if (side_effects_p (address
))
258 if (CONSTANT_P (address
))
261 hist
= ggc_alloc (sizeof (*hist
));
262 hist
->value
= address
;
263 hist
->mode
= GET_MODE (address
);
264 hist
->seq
= NULL_RTX
;
266 hist
->type
= HIST_TYPE_CONST_DELTA
;
267 VEC_safe_push (histogram_value
, *values
, hist
);
272 /* Find values inside INSN for that we want to measure histograms and adds
273 them to list VALUES (increasing the record of its length in N_VALUES). */
275 insn_values_to_profile (rtx insn
, histogram_values
*values
)
277 if (flag_value_profile_transformations
)
278 insn_divmod_values_to_profile (insn
, values
);
281 if (flag_speculative_prefetching
)
282 insn_prefetch_values_to_profile (insn
, values
);
286 /* Find list of values for that we want to measure histograms. */
288 rtl_find_values_to_profile (histogram_values
*values
)
293 life_analysis (NULL
, PROP_DEATH_NOTES
);
295 *values
= VEC_alloc (histogram_value
, 0);
296 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
297 insn_values_to_profile (insn
, values
);
299 for (i
= 0; i
< VEC_length (histogram_value
, *values
); i
++)
301 histogram_value hist
= VEC_index (histogram_value
, *values
, i
);
305 case HIST_TYPE_INTERVAL
:
308 "Interval counter for insn %d, range %d -- %d.\n",
309 INSN_UID ((rtx
)hist
->insn
),
310 hist
->hdata
.intvl
.int_start
,
311 (hist
->hdata
.intvl
.int_start
312 + hist
->hdata
.intvl
.steps
- 1));
313 hist
->n_counters
= hist
->hdata
.intvl
.steps
+
314 (hist
->hdata
.intvl
.may_be_less
? 1 : 0) +
315 (hist
->hdata
.intvl
.may_be_more
? 1 : 0);
321 "Pow2 counter for insn %d.\n",
322 INSN_UID ((rtx
)hist
->insn
));
324 = GET_MODE_BITSIZE (hist
->mode
)
325 + (hist
->hdata
.pow2
.may_be_other
? 1 : 0);
328 case HIST_TYPE_SINGLE_VALUE
:
331 "Single value counter for insn %d.\n",
332 INSN_UID ((rtx
)hist
->insn
));
333 hist
->n_counters
= 3;
336 case HIST_TYPE_CONST_DELTA
:
339 "Constant delta counter for insn %d.\n",
340 INSN_UID ((rtx
)hist
->insn
));
341 hist
->n_counters
= 4;
348 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
351 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
352 them to identify and exploit properties of values that are hard to analyze
355 We do following transformations:
361 where b is almost always a constant N is transformed to
374 where b is almost always a power of 2 and the division is unsigned
375 TODO -- handle signed case as well
377 if ((b & (b - 1)) == 0)
382 Note that when b = 0, no error will occur and x = a; this is correct,
383 as result of such operation is undefined.
389 where a is almost always less then b and the division is unsigned
390 TODO -- handle signed case as well
400 where a is almost always less then 2 * b and the division is unsigned
401 TODO -- handle signed case as well
409 It would be possible to continue analogically for K * b for other small
410 K's, but it is probably not useful.
414 Read or write of mem[address], where the value of address changes usually
415 by a constant C != 0 between the following accesses to the computation; with
416 -fspeculative-prefetching we then add a prefetch of address + C before
417 the insn. This handles prefetching of several interesting cases in addition
418 to a simple prefetching for addresses that are induction variables, e. g.
419 linked lists allocated sequentially (even in case they are processed
422 TODO -- we should also check whether there is not (usually) a small
423 difference with the adjacent memory references, so that we do
424 not issue overlapping prefetches. Also we should employ some
425 heuristics to eliminate cases where prefetching evidently spoils
427 -- it should somehow cooperate with the loop optimizer prefetching
431 There are other useful cases that could be handled by a similar mechanism,
434 for (i = 0; i < n; i++)
437 transform to (for constant N):
440 for (i = 0; i < N; i++)
443 for (i = 0; i < n; i++)
445 making unroller happy. Since this may grow the code significantly,
446 we would have to be very careful here. */
449 rtl_value_profile_transformations (void)
454 for (insn
= get_insns (); insn
; insn
= next
)
456 next
= NEXT_INSN (insn
);
461 /* Scan for insn carrying a histogram. */
462 if (!find_reg_note (insn
, REG_VALUE_PROFILE
, 0))
465 /* Ignore cold areas -- we are growing a code. */
466 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
471 fprintf (dump_file
, "Trying transformations on insn %d\n",
473 print_rtl_single (dump_file
, insn
);
476 /* Transformations: */
477 if (flag_value_profile_transformations
478 && (mod_subtract_transform (insn
)
479 || divmod_fixed_value_transform (insn
)
480 || mod_pow2_value_transform (insn
)))
483 if (flag_speculative_prefetching
484 && speculative_prefetching_transform (insn
))
491 commit_edge_insertions ();
492 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
498 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
499 and OP2, whose value is expected to be VALUE, result TARGET and
500 probability of taking the optimal path PROB). */
502 gen_divmod_fixed_value (enum machine_mode mode
, enum rtx_code operation
,
503 rtx target
, rtx op1
, rtx op2
, gcov_type value
,
507 rtx neq_label
= gen_label_rtx ();
508 rtx end_label
= gen_label_rtx ();
515 tmp
= gen_reg_rtx (mode
);
516 emit_move_insn (tmp
, copy_rtx (op2
));
521 do_compare_rtx_and_jump (tmp
, GEN_INT (value
), NE
, 0, mode
, NULL_RTX
,
522 NULL_RTX
, neq_label
);
524 /* Add branch probability to jump we just created. */
525 jump
= get_last_insn ();
526 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
527 GEN_INT (REG_BR_PROB_BASE
- prob
),
530 tmp1
= simplify_gen_binary (operation
, mode
,
531 copy_rtx (op1
), GEN_INT (value
));
532 tmp1
= force_operand (tmp1
, target
);
534 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
536 emit_jump_insn (gen_jump (end_label
));
539 emit_label (neq_label
);
540 tmp1
= simplify_gen_binary (operation
, mode
,
541 copy_rtx (op1
), copy_rtx (tmp
));
542 tmp1
= force_operand (tmp1
, target
);
544 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
546 emit_label (end_label
);
548 sequence
= get_insns ();
550 rebuild_jump_labels (sequence
);
554 /* Do transform 1) on INSN if applicable. */
556 divmod_fixed_value_transform (rtx insn
)
558 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
560 enum machine_mode mode
;
561 gcov_type val
, count
, all
;
565 set
= single_set (insn
);
569 set_src
= SET_SRC (set
);
570 set_dest
= SET_DEST (set
);
571 code
= GET_CODE (set_src
);
572 mode
= GET_MODE (set_dest
);
574 if (code
!= DIV
&& code
!= MOD
&& code
!= UDIV
&& code
!= UMOD
)
576 op1
= XEXP (set_src
, false);
577 op2
= XEXP (set_src
, 1);
579 for (histogram
= REG_NOTES (insn
);
581 histogram
= XEXP (histogram
, 1))
582 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
583 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE
))
589 histogram
= XEXP (XEXP (histogram
, 0), 1);
590 value
= XEXP (histogram
, 0);
591 histogram
= XEXP (histogram
, 1);
592 val
= INTVAL (XEXP (histogram
, 0));
593 histogram
= XEXP (histogram
, 1);
594 count
= INTVAL (XEXP (histogram
, 0));
595 histogram
= XEXP (histogram
, 1);
596 all
= INTVAL (XEXP (histogram
, 0));
598 /* We require that count is at least half of all; this means
599 that for the transformation to fire the value must be constant
600 at least 50% of time (and 75% gives the guarantee of usage). */
601 if (!rtx_equal_p (op2
, value
) || 2 * count
< all
)
605 fprintf (dump_file
, "Div/mod by constant transformation on insn %d\n",
608 /* Compute probability of taking the optimal path. */
609 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
611 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
614 insert_insn_on_edge (
615 gen_divmod_fixed_value (mode
, code
, set_dest
,
616 op1
, op2
, val
, prob
), e
);
621 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
622 and OP2, result TARGET and probability of taking the optimal path PROB). */
624 gen_mod_pow2 (enum machine_mode mode
, enum rtx_code operation
, rtx target
,
625 rtx op1
, rtx op2
, int prob
)
627 rtx tmp
, tmp1
, tmp2
, tmp3
, jump
;
628 rtx neq_label
= gen_label_rtx ();
629 rtx end_label
= gen_label_rtx ();
636 tmp
= gen_reg_rtx (mode
);
637 emit_move_insn (tmp
, copy_rtx (op2
));
642 tmp1
= expand_simple_binop (mode
, PLUS
, tmp
, constm1_rtx
, NULL_RTX
,
644 tmp2
= expand_simple_binop (mode
, AND
, tmp
, tmp1
, NULL_RTX
,
646 do_compare_rtx_and_jump (tmp2
, const0_rtx
, NE
, 0, mode
, NULL_RTX
,
647 NULL_RTX
, neq_label
);
649 /* Add branch probability to jump we just created. */
650 jump
= get_last_insn ();
651 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
652 GEN_INT (REG_BR_PROB_BASE
- prob
),
655 tmp3
= expand_simple_binop (mode
, AND
, op1
, tmp1
, target
,
658 emit_move_insn (copy_rtx (target
), tmp3
);
659 emit_jump_insn (gen_jump (end_label
));
662 emit_label (neq_label
);
663 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), copy_rtx (tmp
));
664 tmp1
= force_operand (tmp1
, target
);
666 emit_move_insn (target
, tmp1
);
668 emit_label (end_label
);
670 sequence
= get_insns ();
672 rebuild_jump_labels (sequence
);
676 /* Do transform 2) on INSN if applicable. */
678 mod_pow2_value_transform (rtx insn
)
680 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
682 enum machine_mode mode
;
683 gcov_type wrong_values
, count
;
687 set
= single_set (insn
);
691 set_src
= SET_SRC (set
);
692 set_dest
= SET_DEST (set
);
693 code
= GET_CODE (set_src
);
694 mode
= GET_MODE (set_dest
);
698 op1
= XEXP (set_src
, 0);
699 op2
= XEXP (set_src
, 1);
701 for (histogram
= REG_NOTES (insn
);
703 histogram
= XEXP (histogram
, 1))
704 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
705 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_POW2
))
711 histogram
= XEXP (XEXP (histogram
, 0), 1);
712 value
= XEXP (histogram
, 0);
713 histogram
= XEXP (histogram
, 1);
714 wrong_values
=INTVAL (XEXP (histogram
, 0));
715 histogram
= XEXP (histogram
, 1);
718 for (i
= 0; i
< GET_MODE_BITSIZE (mode
); i
++)
720 count
+= INTVAL (XEXP (histogram
, 0));
721 histogram
= XEXP (histogram
, 1);
724 if (!rtx_equal_p (op2
, value
))
727 /* We require that we hit a power of two at least half of all evaluations. */
728 if (count
< wrong_values
)
732 fprintf (dump_file
, "Mod power of 2 transformation on insn %d\n",
735 /* Compute probability of taking the optimal path. */
736 all
= count
+ wrong_values
;
737 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
739 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
742 insert_insn_on_edge (
743 gen_mod_pow2 (mode
, code
, set_dest
, op1
, op2
, prob
), e
);
748 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
749 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
750 probability of taking the optimal path(s) PROB1 and PROB2). */
752 gen_mod_subtract (enum machine_mode mode
, enum rtx_code operation
,
753 rtx target
, rtx op1
, rtx op2
, int sub
, int prob1
, int prob2
)
756 rtx end_label
= gen_label_rtx ();
764 tmp
= gen_reg_rtx (mode
);
765 emit_move_insn (tmp
, copy_rtx (op2
));
770 emit_move_insn (target
, copy_rtx (op1
));
771 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
772 NULL_RTX
, end_label
);
774 /* Add branch probability to jump we just created. */
775 jump
= get_last_insn ();
776 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
777 GEN_INT (prob1
), REG_NOTES (jump
));
779 for (i
= 0; i
< sub
; i
++)
781 tmp1
= expand_simple_binop (mode
, MINUS
, target
, tmp
, target
,
784 emit_move_insn (target
, tmp1
);
785 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
786 NULL_RTX
, end_label
);
788 /* Add branch probability to jump we just created. */
789 jump
= get_last_insn ();
790 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
791 GEN_INT (prob2
), REG_NOTES (jump
));
794 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (target
), copy_rtx (tmp
));
795 tmp1
= force_operand (tmp1
, target
);
797 emit_move_insn (target
, tmp1
);
799 emit_label (end_label
);
801 sequence
= get_insns ();
803 rebuild_jump_labels (sequence
);
807 /* Do transforms 3) and 4) on INSN if applicable. */
809 mod_subtract_transform (rtx insn
)
811 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
813 enum machine_mode mode
;
814 gcov_type wrong_values
, counts
[2], count
, all
;
818 set
= single_set (insn
);
822 set_src
= SET_SRC (set
);
823 set_dest
= SET_DEST (set
);
824 code
= GET_CODE (set_src
);
825 mode
= GET_MODE (set_dest
);
829 op1
= XEXP (set_src
, 0);
830 op2
= XEXP (set_src
, 1);
832 for (histogram
= REG_NOTES (insn
);
834 histogram
= XEXP (histogram
, 1))
835 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
836 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL
))
842 histogram
= XEXP (XEXP (histogram
, 0), 1);
843 value
= XEXP (histogram
, 0);
844 histogram
= XEXP (histogram
, 1);
847 for (i
= 0; i
< 2; i
++)
849 counts
[i
] = INTVAL (XEXP (histogram
, 0));
851 histogram
= XEXP (histogram
, 1);
853 wrong_values
= INTVAL (XEXP (histogram
, 0));
854 histogram
= XEXP (histogram
, 1);
855 wrong_values
+= INTVAL (XEXP (histogram
, 0));
858 /* We require that we use just subtractions in at least 50% of all
861 for (i
= 0; i
< 2; i
++)
864 if (count
* 2 >= all
)
872 fprintf (dump_file
, "Mod subtract transformation on insn %d\n",
875 /* Compute probability of taking the optimal path(s). */
876 prob1
= (counts
[0] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
877 prob2
= (counts
[1] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
879 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
882 insert_insn_on_edge (
883 gen_mod_subtract (mode
, code
, set_dest
,
884 op1
, op2
, i
, prob1
, prob2
), e
);
890 /* Generate code for transformation 5 for mem with ADDRESS and a constant
891 step DELTA. WRITE is true if the reference is a store to mem. */
894 gen_speculative_prefetch (rtx address
, gcov_type delta
, int write
)
899 /* TODO: we do the prefetching for just one iteration ahead, which
900 often is not enough. */
902 if (offsettable_address_p (0, VOIDmode
, address
))
903 tmp
= plus_constant (copy_rtx (address
), delta
);
906 tmp
= simplify_gen_binary (PLUS
, Pmode
,
907 copy_rtx (address
), GEN_INT (delta
));
908 tmp
= force_operand (tmp
, NULL
);
910 if (! (*insn_data
[(int)CODE_FOR_prefetch
].operand
[0].predicate
)
911 (tmp
, insn_data
[(int)CODE_FOR_prefetch
].operand
[0].mode
))
912 tmp
= force_reg (Pmode
, tmp
);
913 emit_insn (gen_prefetch (tmp
, GEN_INT (write
), GEN_INT (3)));
914 sequence
= get_insns ();
920 /* Do transform 5) on INSN if applicable. */
923 speculative_prefetching_transform (rtx insn
)
925 rtx histogram
, value
;
926 gcov_type val
, count
, all
;
931 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
934 if (!find_mem_reference (insn
, &mem
, &write
))
937 address
= XEXP (mem
, 0);
938 if (side_effects_p (address
))
941 if (CONSTANT_P (address
))
944 for (histogram
= REG_NOTES (insn
);
946 histogram
= XEXP (histogram
, 1))
947 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
948 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA
))
954 histogram
= XEXP (XEXP (histogram
, 0), 1);
955 value
= XEXP (histogram
, 0);
956 histogram
= XEXP (histogram
, 1);
957 /* Skip last value referenced. */
958 histogram
= XEXP (histogram
, 1);
959 val
= INTVAL (XEXP (histogram
, 0));
960 histogram
= XEXP (histogram
, 1);
961 count
= INTVAL (XEXP (histogram
, 0));
962 histogram
= XEXP (histogram
, 1);
963 all
= INTVAL (XEXP (histogram
, 0));
965 /* With that few executions we do not really have a reason to optimize the
966 statement, and more importantly, the data about differences of addresses
967 are spoiled by the first item that had no previous value to compare
972 /* We require that count is at least half of all; this means
973 that for the transformation to fire the value must be constant
974 at least 50% of time (and 75% gives the guarantee of usage). */
975 if (!rtx_equal_p (address
, value
) || 2 * count
< all
)
978 /* If the difference is too small, it does not make too much sense to
979 prefetch, as the memory is probably already in cache. */
980 if (val
>= NOPREFETCH_RANGE_MIN
&& val
<= NOPREFETCH_RANGE_MAX
)
984 fprintf (dump_file
, "Speculative prefetching for insn %d\n",
987 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
989 insert_insn_on_edge (gen_speculative_prefetch (address
, val
, write
), e
);
993 #endif /* HAVE_prefetch */
995 /* Connection to the outside world. */
996 /* Struct for IR-dependent hooks. */
997 struct value_prof_hooks
{
998 /* Find list of values for which we want to measure histograms. */
999 void (*find_values_to_profile
) (histogram_values
*);
1001 /* Identify and exploit properties of values that are hard to analyze
1002 statically. See value-prof.c for more detail. */
1003 bool (*value_profile_transformations
) (void);
1006 /* Hooks for RTL-based versions (the only ones that currently work). */
1007 static struct value_prof_hooks rtl_value_prof_hooks
=
1009 rtl_find_values_to_profile
,
1010 rtl_value_profile_transformations
1014 rtl_register_value_prof_hooks (void)
1016 value_prof_hooks
= &rtl_value_prof_hooks
;
1021 /* Tree-based versions are stubs for now. */
1023 tree_find_values_to_profile (histogram_values
*values ATTRIBUTE_UNUSED
)
1029 tree_value_profile_transformations (void)
1034 static struct value_prof_hooks tree_value_prof_hooks
= {
1035 tree_find_values_to_profile
,
1036 tree_value_profile_transformations
1040 tree_register_value_prof_hooks (void)
1042 value_prof_hooks
= &tree_value_prof_hooks
;
1047 /* IR-independent entry points. */
1049 find_values_to_profile (histogram_values
*values
)
1051 (value_prof_hooks
->find_values_to_profile
) (values
);
1055 value_profile_transformations (void)
1057 return (value_prof_hooks
->value_profile_transformations
) ();