1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
32 #include "insn-config.h"
37 static struct value_prof_hooks
*value_prof_hooks
;
39 /* In this file value profile based optimizations will be placed (none are
40 here just now, but they are hopefully coming soon).
42 Every such optimization should add its requirements for profiled values to
43 insn_values_to_profile function. This function is called from branch_prob
44 in profile.c and the requested values are instrumented by it in the first
45 compilation with -fprofile-arcs. The optimization may then read the
46 gathered data in the second compilation with -fbranch-probabilities.
47 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
48 insn. The argument to the note consists of an EXPR_LIST where its
49 members have the following meaning (from the first to the last):
51 -- type of information gathered (HIST_TYPE*)
52 -- the expression that is profiled
53 -- list of counters starting from the first one. */
55 static void insn_divmod_values_to_profile (rtx
, unsigned *,
56 struct histogram_value
**);
57 static void insn_values_to_profile (rtx
, unsigned *, struct histogram_value
**);
58 static rtx
gen_divmod_fixed_value (enum machine_mode
, enum rtx_code
, rtx
, rtx
,
60 static rtx
gen_mod_pow2 (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
, int);
61 static rtx
gen_mod_subtract (enum machine_mode
, enum rtx_code
, rtx
, rtx
, rtx
,
63 static bool divmod_fixed_value_transform (rtx insn
);
64 static bool mod_pow2_value_transform (rtx
);
65 static bool mod_subtract_transform (rtx
);
67 /* Release the list of VALUES of length N_VALUES for that we want to measure
70 free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED
,
71 struct histogram_value
*values
)
76 /* Find values inside INSN for that we want to measure histograms for
77 division/modulo optimization. */
79 insn_divmod_values_to_profile (rtx insn
, unsigned *n_values
,
80 struct histogram_value
**values
)
82 rtx set
, set_src
, op1
, op2
;
83 enum machine_mode mode
;
88 set
= single_set (insn
);
92 mode
= GET_MODE (SET_DEST (set
));
93 if (!INTEGRAL_MODE_P (mode
))
96 set_src
= SET_SRC (set
);
97 switch (GET_CODE (set_src
))
103 op1
= XEXP (set_src
, 0);
104 op2
= XEXP (set_src
, 1);
105 if (side_effects_p (op2
))
108 /* Check for a special case where the divisor is power of 2. */
109 if ((GET_CODE (set_src
) == UMOD
) && !CONSTANT_P (op2
))
111 *values
= xrealloc (*values
,
113 * sizeof (struct histogram_value
));
114 (*values
)[*n_values
].value
= op2
;
115 (*values
)[*n_values
].seq
= NULL_RTX
;
116 (*values
)[*n_values
].mode
= mode
;
117 (*values
)[*n_values
].insn
= insn
;
118 (*values
)[*n_values
].type
= HIST_TYPE_POW2
;
119 (*values
)[*n_values
].hdata
.pow2
.may_be_other
= 1;
123 /* Check whether the divisor is not in fact a constant. */
124 if (!CONSTANT_P (op2
))
126 *values
= xrealloc (*values
,
128 * sizeof (struct histogram_value
));
129 (*values
)[*n_values
].value
= op2
;
130 (*values
)[*n_values
].mode
= mode
;
131 (*values
)[*n_values
].seq
= NULL_RTX
;
132 (*values
)[*n_values
].insn
= insn
;
133 (*values
)[*n_values
].type
= HIST_TYPE_SINGLE_VALUE
;
137 /* For mod, check whether it is not often a noop (or replaceable by
138 a few subtractions). */
139 if (GET_CODE (set_src
) == UMOD
&& !side_effects_p (op1
))
143 *values
= xrealloc (*values
,
145 * sizeof (struct histogram_value
));
147 tmp
= simplify_gen_binary (DIV
, mode
, copy_rtx (op1
), copy_rtx (op2
));
148 (*values
)[*n_values
].value
= force_operand (tmp
, NULL_RTX
);
149 (*values
)[*n_values
].seq
= get_insns ();
151 (*values
)[*n_values
].mode
= mode
;
152 (*values
)[*n_values
].insn
= insn
;
153 (*values
)[*n_values
].type
= HIST_TYPE_INTERVAL
;
154 (*values
)[*n_values
].hdata
.intvl
.int_start
= 0;
155 (*values
)[*n_values
].hdata
.intvl
.steps
= 2;
156 (*values
)[*n_values
].hdata
.intvl
.may_be_less
= 1;
157 (*values
)[*n_values
].hdata
.intvl
.may_be_more
= 1;
167 /* Find values inside INSN for that we want to measure histograms and adds
168 them to list VALUES (increasing the record of its length in N_VALUES). */
170 insn_values_to_profile (rtx insn
,
172 struct histogram_value
**values
)
174 if (flag_value_profile_transformations
)
175 insn_divmod_values_to_profile (insn
, n_values
, values
);
178 /* Find list of values for that we want to measure histograms. */
180 rtl_find_values_to_profile (unsigned *n_values
, struct histogram_value
**values
)
185 life_analysis (NULL
, PROP_DEATH_NOTES
);
189 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
190 insn_values_to_profile (insn
, n_values
, values
);
192 for (i
= 0; i
< *n_values
; i
++)
194 switch ((*values
)[i
].type
)
196 case HIST_TYPE_INTERVAL
:
199 "Interval counter for insn %d, range %d -- %d.\n",
200 INSN_UID ((rtx
)(*values
)[i
].insn
),
201 (*values
)[i
].hdata
.intvl
.int_start
,
202 ((*values
)[i
].hdata
.intvl
.int_start
203 + (*values
)[i
].hdata
.intvl
.steps
- 1));
204 (*values
)[i
].n_counters
= (*values
)[i
].hdata
.intvl
.steps
+
205 ((*values
)[i
].hdata
.intvl
.may_be_less
? 1 : 0) +
206 ((*values
)[i
].hdata
.intvl
.may_be_more
? 1 : 0);
212 "Pow2 counter for insn %d.\n",
213 INSN_UID ((rtx
)(*values
)[i
].insn
));
214 (*values
)[i
].n_counters
215 = GET_MODE_BITSIZE ((*values
)[i
].mode
)
216 + ((*values
)[i
].hdata
.pow2
.may_be_other
? 1 : 0);
219 case HIST_TYPE_SINGLE_VALUE
:
222 "Single value counter for insn %d.\n",
223 INSN_UID ((rtx
)(*values
)[i
].insn
));
224 (*values
)[i
].n_counters
= 3;
227 case HIST_TYPE_CONST_DELTA
:
230 "Constant delta counter for insn %d.\n",
231 INSN_UID ((rtx
)(*values
)[i
].insn
));
232 (*values
)[i
].n_counters
= 4;
239 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
242 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
243 them to identify and exploit properties of values that are hard to analyze
246 We do following transformations:
252 where b is almost always a constant N is transformed to
265 where b is almost always a power of 2 and the division is unsigned
266 TODO -- handle signed case as well
268 if ((b & (b - 1)) == 0)
273 Note that when b = 0, no error will occur and x = a; this is correct,
274 as result of such operation is undefined.
280 where a is almost always less then b and the division is unsigned
281 TODO -- handle signed case as well
291 where a is almost always less then 2 * b and the division is unsigned
292 TODO -- handle signed case as well
300 It would be possible to continue analogically for K * b for other small
301 K's, but it is probably not useful.
305 There are other useful cases that could be handled by a similar mechanism,
308 for (i = 0; i < n; i++)
311 transform to (for constant N):
314 for (i = 0; i < N; i++)
317 for (i = 0; i < n; i++)
319 making unroller happy. Since this may grow the code significantly,
320 we would have to be very careful here. */
323 rtl_value_profile_transformations (void)
328 for (insn
= get_insns (); insn
; insn
= next
)
330 next
= NEXT_INSN (insn
);
335 /* Scan for insn carrying a histogram. */
336 if (!find_reg_note (insn
, REG_VALUE_PROFILE
, 0))
339 /* Ignore cold areas -- we are growing a code. */
340 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn
)))
345 fprintf (dump_file
, "Trying transformations on insn %d\n",
347 print_rtl_single (dump_file
, insn
);
350 /* Transformations: */
351 if (flag_value_profile_transformations
352 && (mod_subtract_transform (insn
)
353 || divmod_fixed_value_transform (insn
)
354 || mod_pow2_value_transform (insn
)))
360 commit_edge_insertions ();
361 allocate_reg_info (max_reg_num (), FALSE
, FALSE
);
367 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
368 and OP2, whose value is expected to be VALUE, result TARGET and
369 probability of taking the optimal path PROB). */
371 gen_divmod_fixed_value (enum machine_mode mode
, enum rtx_code operation
,
372 rtx target
, rtx op1
, rtx op2
, gcov_type value
,
376 rtx neq_label
= gen_label_rtx ();
377 rtx end_label
= gen_label_rtx ();
384 tmp
= gen_reg_rtx (mode
);
385 emit_move_insn (tmp
, copy_rtx (op2
));
390 do_compare_rtx_and_jump (tmp
, GEN_INT (value
), NE
, 0, mode
, NULL_RTX
,
391 NULL_RTX
, neq_label
);
393 /* Add branch probability to jump we just created. */
394 jump
= get_last_insn ();
395 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
396 GEN_INT (REG_BR_PROB_BASE
- prob
),
399 tmp1
= simplify_gen_binary (operation
, mode
,
400 copy_rtx (op1
), GEN_INT (value
));
401 tmp1
= force_operand (tmp1
, target
);
403 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
405 emit_jump_insn (gen_jump (end_label
));
408 emit_label (neq_label
);
409 tmp1
= simplify_gen_binary (operation
, mode
,
410 copy_rtx (op1
), copy_rtx (tmp
));
411 tmp1
= force_operand (tmp1
, target
);
413 emit_move_insn (copy_rtx (target
), copy_rtx (tmp1
));
415 emit_label (end_label
);
417 sequence
= get_insns ();
419 rebuild_jump_labels (sequence
);
423 /* Do transform 1) on INSN if applicable. */
425 divmod_fixed_value_transform (rtx insn
)
427 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
429 enum machine_mode mode
;
430 gcov_type val
, count
, all
;
434 set
= single_set (insn
);
438 set_src
= SET_SRC (set
);
439 set_dest
= SET_DEST (set
);
440 code
= GET_CODE (set_src
);
441 mode
= GET_MODE (set_dest
);
443 if (code
!= DIV
&& code
!= MOD
&& code
!= UDIV
&& code
!= UMOD
)
445 op1
= XEXP (set_src
, false);
446 op2
= XEXP (set_src
, 1);
448 for (histogram
= REG_NOTES (insn
);
450 histogram
= XEXP (histogram
, 1))
451 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
452 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE
))
458 histogram
= XEXP (XEXP (histogram
, 0), 1);
459 value
= XEXP (histogram
, 0);
460 histogram
= XEXP (histogram
, 1);
461 val
= INTVAL (XEXP (histogram
, 0));
462 histogram
= XEXP (histogram
, 1);
463 count
= INTVAL (XEXP (histogram
, 0));
464 histogram
= XEXP (histogram
, 1);
465 all
= INTVAL (XEXP (histogram
, 0));
467 /* We require that count is at least half of all; this means
468 that for the transformation to fire the value must be constant
469 at least 50% of time (and 75% gives the guarantee of usage). */
470 if (!rtx_equal_p (op2
, value
) || 2 * count
< all
)
474 fprintf (dump_file
, "Div/mod by constant transformation on insn %d\n",
477 /* Compute probability of taking the optimal path. */
478 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
480 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
483 insert_insn_on_edge (
484 gen_divmod_fixed_value (mode
, code
, set_dest
,
485 op1
, op2
, val
, prob
), e
);
490 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
491 and OP2, result TARGET and probability of taking the optimal path PROB). */
493 gen_mod_pow2 (enum machine_mode mode
, enum rtx_code operation
, rtx target
,
494 rtx op1
, rtx op2
, int prob
)
496 rtx tmp
, tmp1
, tmp2
, tmp3
, jump
;
497 rtx neq_label
= gen_label_rtx ();
498 rtx end_label
= gen_label_rtx ();
505 tmp
= gen_reg_rtx (mode
);
506 emit_move_insn (tmp
, copy_rtx (op2
));
511 tmp1
= expand_simple_binop (mode
, PLUS
, tmp
, constm1_rtx
, NULL_RTX
,
513 tmp2
= expand_simple_binop (mode
, AND
, tmp
, tmp1
, NULL_RTX
,
515 do_compare_rtx_and_jump (tmp2
, const0_rtx
, NE
, 0, mode
, NULL_RTX
,
516 NULL_RTX
, neq_label
);
518 /* Add branch probability to jump we just created. */
519 jump
= get_last_insn ();
520 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
521 GEN_INT (REG_BR_PROB_BASE
- prob
),
524 tmp3
= expand_simple_binop (mode
, AND
, op1
, tmp1
, target
,
527 emit_move_insn (copy_rtx (target
), tmp3
);
528 emit_jump_insn (gen_jump (end_label
));
531 emit_label (neq_label
);
532 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (op1
), copy_rtx (tmp
));
533 tmp1
= force_operand (tmp1
, target
);
535 emit_move_insn (target
, tmp1
);
537 emit_label (end_label
);
539 sequence
= get_insns ();
541 rebuild_jump_labels (sequence
);
545 /* Do transform 2) on INSN if applicable. */
547 mod_pow2_value_transform (rtx insn
)
549 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
551 enum machine_mode mode
;
552 gcov_type wrong_values
, count
;
556 set
= single_set (insn
);
560 set_src
= SET_SRC (set
);
561 set_dest
= SET_DEST (set
);
562 code
= GET_CODE (set_src
);
563 mode
= GET_MODE (set_dest
);
567 op1
= XEXP (set_src
, 0);
568 op2
= XEXP (set_src
, 1);
570 for (histogram
= REG_NOTES (insn
);
572 histogram
= XEXP (histogram
, 1))
573 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
574 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_POW2
))
580 histogram
= XEXP (XEXP (histogram
, 0), 1);
581 value
= XEXP (histogram
, 0);
582 histogram
= XEXP (histogram
, 1);
583 wrong_values
=INTVAL (XEXP (histogram
, 0));
584 histogram
= XEXP (histogram
, 1);
587 for (i
= 0; i
< GET_MODE_BITSIZE (mode
); i
++)
589 count
+= INTVAL (XEXP (histogram
, 0));
590 histogram
= XEXP (histogram
, 1);
593 if (!rtx_equal_p (op2
, value
))
596 /* We require that we hit a power of two at least half of all evaluations. */
597 if (count
< wrong_values
)
601 fprintf (dump_file
, "Mod power of 2 transformation on insn %d\n",
604 /* Compute probability of taking the optimal path. */
605 all
= count
+ wrong_values
;
606 prob
= (count
* REG_BR_PROB_BASE
+ all
/ 2) / all
;
608 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
611 insert_insn_on_edge (
612 gen_mod_pow2 (mode
, code
, set_dest
, op1
, op2
, prob
), e
);
617 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
618 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
619 probability of taking the optimal path(s) PROB1 and PROB2). */
621 gen_mod_subtract (enum machine_mode mode
, enum rtx_code operation
,
622 rtx target
, rtx op1
, rtx op2
, int sub
, int prob1
, int prob2
)
625 rtx end_label
= gen_label_rtx ();
633 tmp
= gen_reg_rtx (mode
);
634 emit_move_insn (tmp
, copy_rtx (op2
));
639 emit_move_insn (target
, copy_rtx (op1
));
640 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
641 NULL_RTX
, end_label
);
643 /* Add branch probability to jump we just created. */
644 jump
= get_last_insn ();
645 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
646 GEN_INT (prob1
), REG_NOTES (jump
));
648 for (i
= 0; i
< sub
; i
++)
650 tmp1
= expand_simple_binop (mode
, MINUS
, target
, tmp
, target
,
653 emit_move_insn (target
, tmp1
);
654 do_compare_rtx_and_jump (target
, tmp
, LTU
, 0, mode
, NULL_RTX
,
655 NULL_RTX
, end_label
);
657 /* Add branch probability to jump we just created. */
658 jump
= get_last_insn ();
659 REG_NOTES (jump
) = gen_rtx_EXPR_LIST (REG_BR_PROB
,
660 GEN_INT (prob2
), REG_NOTES (jump
));
663 tmp1
= simplify_gen_binary (operation
, mode
, copy_rtx (target
), copy_rtx (tmp
));
664 tmp1
= force_operand (tmp1
, target
);
666 emit_move_insn (target
, tmp1
);
668 emit_label (end_label
);
670 sequence
= get_insns ();
672 rebuild_jump_labels (sequence
);
676 /* Do transforms 3) and 4) on INSN if applicable. */
678 mod_subtract_transform (rtx insn
)
680 rtx set
, set_src
, set_dest
, op1
, op2
, value
, histogram
;
682 enum machine_mode mode
;
683 gcov_type wrong_values
, counts
[2], count
, all
;
687 set
= single_set (insn
);
691 set_src
= SET_SRC (set
);
692 set_dest
= SET_DEST (set
);
693 code
= GET_CODE (set_src
);
694 mode
= GET_MODE (set_dest
);
698 op1
= XEXP (set_src
, 0);
699 op2
= XEXP (set_src
, 1);
701 for (histogram
= REG_NOTES (insn
);
703 histogram
= XEXP (histogram
, 1))
704 if (REG_NOTE_KIND (histogram
) == REG_VALUE_PROFILE
705 && XEXP (XEXP (histogram
, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL
))
711 histogram
= XEXP (XEXP (histogram
, 0), 1);
712 value
= XEXP (histogram
, 0);
713 histogram
= XEXP (histogram
, 1);
716 for (i
= 0; i
< 2; i
++)
718 counts
[i
] = INTVAL (XEXP (histogram
, 0));
720 histogram
= XEXP (histogram
, 1);
722 wrong_values
= INTVAL (XEXP (histogram
, 0));
723 histogram
= XEXP (histogram
, 1);
724 wrong_values
+= INTVAL (XEXP (histogram
, 0));
727 /* We require that we use just subtractions in at least 50% of all
730 for (i
= 0; i
< 2; i
++)
733 if (count
* 2 >= all
)
741 fprintf (dump_file
, "Mod subtract transformation on insn %d\n",
744 /* Compute probability of taking the optimal path(s). */
745 prob1
= (counts
[0] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
746 prob2
= (counts
[1] * REG_BR_PROB_BASE
+ all
/ 2) / all
;
748 e
= split_block (BLOCK_FOR_INSN (insn
), PREV_INSN (insn
));
751 insert_insn_on_edge (
752 gen_mod_subtract (mode
, code
, set_dest
,
753 op1
, op2
, i
, prob1
, prob2
), e
);
758 /* Connection to the outside world. */
759 /* Struct for IR-dependent hooks. */
760 struct value_prof_hooks
{
761 /* Find list of values for which we want to measure histograms. */
762 void (*find_values_to_profile
) (unsigned *, struct histogram_value
**);
764 /* Identify and exploit properties of values that are hard to analyze
765 statically. See value-prof.c for more detail. */
766 bool (*value_profile_transformations
) (void);
769 /* Hooks for RTL-based versions (the only ones that currently work). */
770 static struct value_prof_hooks rtl_value_prof_hooks
=
772 rtl_find_values_to_profile
,
773 rtl_value_profile_transformations
777 rtl_register_value_prof_hooks (void)
779 value_prof_hooks
= &rtl_value_prof_hooks
;
784 /* Tree-based versions are stubs for now. */
786 tree_find_values_to_profile (unsigned *n_values
, struct histogram_value
**values
)
794 tree_value_profile_transformations (void)
799 static struct value_prof_hooks tree_value_prof_hooks
= {
800 tree_find_values_to_profile
,
801 tree_value_profile_transformations
805 tree_register_value_prof_hooks (void)
807 value_prof_hooks
= &tree_value_prof_hooks
;
812 /* IR-independent entry points. */
814 find_values_to_profile (unsigned *n_values
, struct histogram_value
**values
)
816 (value_prof_hooks
->find_values_to_profile
) (n_values
, values
);
820 value_profile_transformations (void)
822 return (value_prof_hooks
->value_profile_transformations
) ();