1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
32 #include "insn-config.h"
37 static struct value_prof_hooks
*value_prof_hooks
;
39 /* In this file value profile based optimizations will be placed (none are
40 here just now, but they are hopefully coming soon).
42 Every such optimization should add its requirements for profiled values to
43 insn_values_to_profile function. This function is called from branch_prob
44 in profile.c and the requested values are instrumented by it in the first
45 compilation with -fprofile-arcs. The optimization may then read the
46 gathered data in the second compilation with -fbranch-probabilities.
47 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
48 insn. The argument to the note consists of an EXPR_LIST where its
49 members have the following meaning (from the first to the last):
51 -- type of information gathered (HIST_TYPE*)
52 -- the expression that is profiled
53 -- list of counters starting from the first one. */
55 static void insn_divmod_values_to_profile (rtx
, unsigned *,
56 struct histogram_value
**);
57 static void insn_values_to_profile (rtx
, unsigned *, struct histogram_value
**);
58 static rtx
gen_divmod_fixed_value (enum machine_mode
, enum rtx_code
, rtx
, rtx
,
60 static rtx
gen_mod_pow2 (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
);
61 static rtx
gen_mod_subtract (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
,
63 static bool divmod_fixed_value_transform (rtx insn
);
64 static bool mod_pow2_value_transform (rtx
);
65 static bool mod_subtract_transform (rtx
);
67 /* Release the list of VALUES of length N_VALUES for that we want to measure
70 free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED
,
71 struct histogram_value
*values
)
76 /* Find values inside INSN for that we want to measure histograms for
77 division/modulo optimization. */
79 insn_divmod_values_to_profile (rtx insn
, unsigned *n_values
,
80 struct histogram_value
**values
)
82 rtx set
, set_src
, op1
, op2
;
83 enum machine_mode mode
;
88 set
= single_set (insn
);
92 mode
= GET_MODE (SET_DEST (set
));
93 if (!INTEGRAL_MODE_P (mode
))
96 set_src
= SET_SRC (set
);
97 switch (GET_CODE (set_src
))
103 op1
= XEXP (set_src
, 0);
104 op2
= XEXP (set_src
, 1);
105 if (side_effects_p (op2
))
108 /* Check for a special case where the divisor is power of 2. */
109 if ((GET_CODE (set_src
) == UMOD
) && !CONSTANT_P (op2
))
111 *values
= xrealloc (*values
,
113 * sizeof (struct histogram_value
));
114 (*values
)[*n_values
].value
= op2
;
115 (*values
)[*n_values
].seq
= NULL_RTX
;
116 (*values
)[*n_values
].mode
= mode
;
117 (*values
)[*n_values
].insn
= insn
;
118 (*values
)[*n_values
].type
= HIST_TYPE_POW2
;
119 (*values
)[*n_values
].hdata
.pow2
.may_be_other
= 1;
123 /* Check whether the divisor is not in fact a constant. */
124 if (!CONSTANT_P (op2
))
126 *values
= xrealloc (*values
,
128 * sizeof (struct histogram_value
));
129 (*values
)[*n_values
].value
= op2
;
130 (*values
)[*n_values
].mode
= mode
;
131 (*values
)[*n_values
].seq
= NULL_RTX
;
132 (*values
)[*n_values
].insn
= insn
;
133 (*values
)[*n_values
].type
= HIST_TYPE_SINGLE_VALUE
;
137 /* For mod, check whether it is not often a noop (or replaceable by
138 a few subtractions). */
139 if (GET_CODE (set_src
) == UMOD
&& !side_effects_p (op1
))
143 *values
= xrealloc (*values
,
145 * sizeof (struct histogram_value
));
147 tmp
= simplify_gen_binary (DIV
, mode
, copy_rtx (op1
), copy_rtx (op2
));
148 (*values
)[*n_values
].value
= force_operand (tmp
, NULL_RTX
);
149 (*values
)[*n_values
].seq
= get_insns ();
151 (*values
)[*n_values
].mode
= mode
;
152 (*values
)[*n_values
].insn
= insn
;
153 (*values
)[*n_values
].type
= HIST_TYPE_INTERVAL
;
154 (*values
)[*n_values
].hdata
.intvl
.int_start
= 0;
155 (*values
)[*n_values
].hdata
.intvl
.steps
= 2;
156 (*values
)[*n_values
].hdata
.intvl
.may_be_less
= 1;
157 (*values
)[*n_values
].hdata
.intvl
.may_be_more
= 1;
167 /* Find values inside INSN for that we want to measure histograms and adds
168 them to list VALUES (increasing the record of its length in N_VALUES). */
170 insn_values_to_profile (rtx insn
,
172 struct histogram_value
**values
)
174 if (flag_value_profile_transformations
)
175 insn_divmod_values_to_profile (insn
, n_values
, values
);
178 /* Find list of values for that we want to measure histograms. */
180 rtl_find_values_to_profile (unsigned *n_values
, struct histogram_value
**values
)
185 life_analysis (NULL
, PROP_DEATH_NOTES
);
189 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
190 insn_values_to_profile (insn
, n_values
, values
);
192 for (i
= 0; i
< *n_values
; i
++)
194 switch ((*values
)[i
].type
)
196 case HIST_TYPE_INTERVAL
:
199 "Interval counter for insn %d, range %d -- %d.\n",
200 INSN_UID ((rtx
)(*values
)[i
].insn
),
201 (*values
)[i
].hdata
.intvl
.int_start
,
202 ((*values
)[i
].hdata
.intvl
.int_start
203 + (*values
)[i
].hdata
.intvl
.steps
- 1));
204 (*values
)[i
].n_counters
= (*values
)[i
].hdata
.intvl
.steps
+
205 ((*values
)[i
].hdata
.intvl
.may_be_less
? 1 : 0) +
206 ((*values
)[i
].hdata
.intvl
.may_be_more
? 1 : 0);
212 "Pow2 counter for insn %d.\n",
213 INSN_UID ((rtx
)(*values
)[i
].insn
));
214 (*values
)[i
].n_counters
215 = GET_MODE_BITSIZE ((*values
)[i
].mode
)
216 + ((*values
)[i
].hdata
.pow2
.may_be_other
? 1 : 0);
219 case HIST_TYPE_SINGLE_VALUE
:
222 "Single value counter for insn %d.\n",
223 INSN_UID ((rtx
)(*values
)[i
].insn
));
224 (*values
)[i
].n_counters
= 3;
227 case HIST_TYPE_CONST_DELTA
:
230 "Constant delta counter for insn %d.\n",
231 INSN_UID ((rtx
)(*values
)[i
].insn
));
232 (*values
)[i
].n_counters
= 4;
239 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
242 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
243 them to identify and exploit properties of values that are hard to analyze
246 We do following transformations:
252 where b is almost always a constant N is transformed to
265 where b is almost always a power of 2 and the division is unsigned
266 TODO -- handle signed case as well
268 if ((b & (b - 1)) == 0)
273 Note that when b = 0, no error will occur and x = a; this is correct,
274 as result of such operation is undefined.
280 where a is almost always less then b and the division is unsigned
281 TODO -- handle signed case as well
291 where a is almost always less then 2 * b and the division is unsigned
292 TODO -- handle signed case as well
300 It would be possible to continue analogically for K * b for other small
301 K's, but it is probably not useful.
305 There are other useful cases that could be handled by a similar mechanism,
308 for (i = 0; i < n; i++)
311 transform to (for constant N):
314 for (i = 0; i < N; i++)
317 for (i = 0; i < n; i++)
319 making unroller happy. Since this may grow the code significantly,
320 we would have to be very careful here. */
323 rtl_value_profile_transformations (void)
328 for (insn
= get_insns (); insn
; insn
= next
)
330 next
= NEXT_INSN (insn
);
335 /* Scan for insn carrying a histogram. */
336 if (!find_reg_note (insn
, REG_VALUE_PROFILE
, 0))
339 /* Ignore cold areas -- we are growing a code. */
340 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
345 fprintf (dump_file
, "Trying transformations on insn %d\n",
347 print_rtl_single (dump_file
, insn
);
350 /* Transformations: */
351 if (flag_value_profile_transformations
352 && (mod_subtract_transform (insn
)
353 || divmod_fixed_value_transform (insn
)
354 || mod_pow2_value_transform (insn
)))
360 commit_edge_insertions ();
361 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
367 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
368 and OP2 whose value is expected to be VALUE and result TARGET). */
370 gen_divmod_fixed_value (enum machine_mode mode
, enum rtx_code operation
,
371 rtx target
, rtx op1
, rtx op2
, gcov_type value
)
374 rtx neq_label
= gen_label_rtx ();
375 rtx end_label
= gen_label_rtx ();
382 tmp
= gen_reg_rtx (mode
);
383 emit_move_insn (tmp
, copy_rtx (op2
));
388 do_compare_rtx_and_jump (tmp
, GEN_INT (value
), NE
, 0, mode
, NULL_RTX
,
389 NULL_RTX
, neq_label
);
390 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), GEN_INT (value
));
391 tmp1
= force_operand (tmp1
, target
);
393 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
395 emit_jump_insn (gen_jump (end_label
));
398 emit_label (neq_label
);
399 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), copy_rtx (tmp
));
400 tmp1
= force_operand (tmp1
, target
);
402 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
404 emit_label (end_label
);
406 sequence
= get_insns ();
408 rebuild_jump_labels (sequence
);
412 /* Do transform 1) on INSN if applicable. */
414 divmod_fixed_value_transform (rtx insn
)
416 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
418 enum machine_mode mode
;
419 gcov_type val
, count
, all
;
422 set
= single_set (insn
);
426 set_src
= SET_SRC (set
);
427 set_dest
= SET_DEST (set
);
428 code
= GET_CODE (set_src
);
429 mode
= GET_MODE (set_dest
);
431 if (code
!= DIV
&& code
!= MOD
&& code
!= UDIV
&& code
!= UMOD
)
433 op1
= XEXP (set_src
, false);
434 op2
= XEXP (set_src
, 1);
436 for (histogram
= REG_NOTES (insn
);
438 histogram
= XEXP (histogram
, 1))
439 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
440 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE
))
446 histogram
= XEXP (XEXP (histogram
, 0), 1);
447 value
= XEXP (histogram
, 0);
448 histogram
= XEXP (histogram
, 1);
449 val
= INTVAL (XEXP (histogram
, 0));
450 histogram
= XEXP (histogram
, 1);
451 count
= INTVAL (XEXP (histogram
, 0));
452 histogram
= XEXP (histogram
, 1);
453 all
= INTVAL (XEXP (histogram
, 0));
455 /* We require that count is at least half of all; this means
456 that for the transformation to fire the value must be constant
457 at least 50% of time (and 75% gives the guarantee of usage). */
458 if (!rtx_equal_p (op2
, value
) || 2 * count
< all
)
462 fprintf (dump_file
, "Div/mod by constant transformation on insn %d\n",
465 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
468 insert_insn_on_edge (
469 gen_divmod_fixed_value (mode
, code
, set_dest
, op1
, op2
, val
), e
);
474 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
475 and OP2 and result TARGET). */
477 gen_mod_pow2 (enum machine_mode mode
, enum rtx_code operation
, rtx target
,
480 rtx tmp
, tmp1
, tmp2
, tmp3
;
481 rtx neq_label
= gen_label_rtx ();
482 rtx end_label
= gen_label_rtx ();
489 tmp
= gen_reg_rtx (mode
);
490 emit_move_insn (tmp
, copy_rtx (op2
));
495 tmp1
= expand_simple_binop (mode
, PLUS
, tmp
, constm1_rtx
, NULL_RTX
,
497 tmp2
= expand_simple_binop (mode
, AND
, tmp
, tmp1
, NULL_RTX
,
499 do_compare_rtx_and_jump (tmp2
, const0_rtx
, NE
, 0, mode
, NULL_RTX
,
500 NULL_RTX
, neq_label
);
501 tmp3
= expand_simple_binop (mode
, AND
, op1
, tmp1
, target
,
504 emit_move_insn (copy_rtx (target
), tmp3
);
505 emit_jump_insn (gen_jump (end_label
));
508 emit_label (neq_label
);
509 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), copy_rtx (tmp
));
510 tmp1
= force_operand (tmp1
, target
);
512 emit_move_insn (target
, tmp1
);
514 emit_label (end_label
);
516 sequence
= get_insns ();
518 rebuild_jump_labels (sequence
);
522 /* Do transform 2) on INSN if applicable. */
524 mod_pow2_value_transform (rtx insn
)
526 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
528 enum machine_mode mode
;
529 gcov_type wrong_values
, count
;
533 set
= single_set (insn
);
537 set_src
= SET_SRC (set
);
538 set_dest
= SET_DEST (set
);
539 code
= GET_CODE (set_src
);
540 mode
= GET_MODE (set_dest
);
544 op1
= XEXP (set_src
, 0);
545 op2
= XEXP (set_src
, 1);
547 for (histogram
= REG_NOTES (insn
);
549 histogram
= XEXP (histogram
, 1))
550 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
551 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_POW2
))
557 histogram
= XEXP (XEXP (histogram
, 0), 1);
558 value
= XEXP (histogram
, 0);
559 histogram
= XEXP (histogram
, 1);
560 wrong_values
=INTVAL (XEXP (histogram
, 0));
561 histogram
= XEXP (histogram
, 1);
564 for (i
= 0; i
< GET_MODE_BITSIZE (mode
); i
++)
566 count
+= INTVAL (XEXP (histogram
, 0));
567 histogram
= XEXP (histogram
, 1);
570 if (!rtx_equal_p (op2
, value
))
573 /* We require that we hit a power of two at least half of all evaluations. */
574 if (count
< wrong_values
)
578 fprintf (dump_file
, "Mod power of 2 transformation on insn %d\n",
581 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
584 insert_insn_on_edge (
585 gen_mod_pow2 (mode
, code
, set_dest
, op1
, op2
), e
);
590 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
591 operands OP1 and OP2, result TARGET and at most SUB subtractions). */
593 gen_mod_subtract (enum machine_mode mode
, enum rtx_code operation
,
594 rtx target
, rtx op1
, rtx op2
, int sub
)
597 rtx end_label
= gen_label_rtx ();
605 tmp
= gen_reg_rtx (mode
);
606 emit_move_insn (tmp
, copy_rtx (op2
));
611 emit_move_insn (target
, copy_rtx (op1
));
612 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
613 NULL_RTX
, end_label
);
616 for (i
= 0; i
< sub
; i
++)
618 tmp1
= expand_simple_binop (mode
, MINUS
, target
, tmp
, target
,
621 emit_move_insn (target
, tmp1
);
622 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
623 NULL_RTX
, end_label
);
626 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (target
), copy_rtx (tmp
));
627 tmp1
= force_operand (tmp1
, target
);
629 emit_move_insn (target
, tmp1
);
631 emit_label (end_label
);
633 sequence
= get_insns ();
635 rebuild_jump_labels (sequence
);
639 /* Do transforms 3) and 4) on INSN if applicable. */
641 mod_subtract_transform (rtx insn
)
643 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
645 enum machine_mode mode
;
646 gcov_type wrong_values
, counts
[2], count
, all
;
650 set
= single_set (insn
);
654 set_src
= SET_SRC (set
);
655 set_dest
= SET_DEST (set
);
656 code
= GET_CODE (set_src
);
657 mode
= GET_MODE (set_dest
);
661 op1
= XEXP (set_src
, 0);
662 op2
= XEXP (set_src
, 1);
664 for (histogram
= REG_NOTES (insn
);
666 histogram
= XEXP (histogram
, 1))
667 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
668 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL
))
674 histogram
= XEXP (XEXP (histogram
, 0), 1);
675 value
= XEXP (histogram
, 0);
676 histogram
= XEXP (histogram
, 1);
679 for (i
= 0; i
< 2; i
++)
681 counts
[i
] = INTVAL (XEXP (histogram
, 0));
683 histogram
= XEXP (histogram
, 1);
685 wrong_values
= INTVAL (XEXP (histogram
, 0));
686 histogram
= XEXP (histogram
, 1);
687 wrong_values
+= INTVAL (XEXP (histogram
, 0));
690 /* We require that we use just subtractions in at least 50% of all
693 for (i
= 0; i
< 2; i
++)
696 if (count
* 2 >= all
)
704 fprintf (dump_file
, "Mod subtract transformation on insn %d\n",
707 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
710 insert_insn_on_edge (
711 gen_mod_subtract (mode
, code
, set_dest
, op1
, op2
, i
), e
);
716 /* Connection to the outside world. */
717 /* Struct for IR-dependent hooks. */
718 struct value_prof_hooks
{
719 /* Find list of values for which we want to measure histograms. */
720 void (*find_values_to_profile
) (unsigned *, struct histogram_value
**);
722 /* Identify and exploit properties of values that are hard to analyze
723 statically. See value-prof.c for more detail. */
724 bool (*value_profile_transformations
) (void);
727 /* Hooks for RTL-based versions (the only ones that currently work). */
728 static struct value_prof_hooks rtl_value_prof_hooks
=
730 rtl_find_values_to_profile
,
731 rtl_value_profile_transformations
735 rtl_register_value_prof_hooks (void)
737 value_prof_hooks
= &rtl_value_prof_hooks
;
742 /* Tree-based versions are stubs for now. */
744 tree_find_values_to_profile (unsigned *n_values
, struct histogram_value
**values
)
752 tree_value_profile_transformations (void)
757 static struct value_prof_hooks tree_value_prof_hooks
= {
758 tree_find_values_to_profile
,
759 tree_value_profile_transformations
763 tree_register_value_prof_hooks (void)
765 value_prof_hooks
= &tree_value_prof_hooks
;
770 /* IR-independent entry points. */
772 find_values_to_profile (unsigned *n_values
, struct histogram_value
**values
)
774 (value_prof_hooks
->find_values_to_profile
) (n_values
, values
);
778 value_profile_transformations (void)
780 return (value_prof_hooks
->value_profile_transformations
) ();