* system.h: Poison NO_RECURSIVE_FUNCTION_CSE.
[official-gcc.git] / gcc / value-prof.c
blob514b98f5190cf613a6ac617f22dfc983449e67e1
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
37 static struct value_prof_hooks *value_prof_hooks;
39 /* In this file value profile based optimizations will be placed (none are
40 here just now, but they are hopefully coming soon).
42 Every such optimization should add its requirements for profiled values to
43 insn_values_to_profile function. This function is called from branch_prob
44 in profile.c and the requested values are instrumented by it in the first
45 compilation with -fprofile-arcs. The optimization may then read the
46 gathered data in the second compilation with -fbranch-probabilities.
47 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
48 insn. The argument to the note consists of an EXPR_LIST where its
49 members have the following meaning (from the first to the last):
51 -- type of information gathered (HIST_TYPE*)
52 -- the expression that is profiled
53 -- list of counters starting from the first one. */
55 static void insn_divmod_values_to_profile (rtx, unsigned *,
56 struct histogram_value **);
57 static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **);
58 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
59 rtx, gcov_type);
60 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx);
61 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
62 int);
63 static bool divmod_fixed_value_transform (rtx insn);
64 static bool mod_pow2_value_transform (rtx);
65 static bool mod_subtract_transform (rtx);
67 /* Release the list of VALUES of length N_VALUES for that we want to measure
68 histograms. */
69 void
70 free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
71 struct histogram_value *values)
73 free (values);
76 /* Find values inside INSN for that we want to measure histograms for
77 division/modulo optimization. */
78 static void
79 insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
80 struct histogram_value **values)
82 rtx set, set_src, op1, op2;
83 enum machine_mode mode;
85 if (!INSN_P (insn))
86 return;
88 set = single_set (insn);
89 if (!set)
90 return;
92 mode = GET_MODE (SET_DEST (set));
93 if (!INTEGRAL_MODE_P (mode))
94 return;
96 set_src = SET_SRC (set);
97 switch (GET_CODE (set_src))
99 case DIV:
100 case MOD:
101 case UDIV:
102 case UMOD:
103 op1 = XEXP (set_src, 0);
104 op2 = XEXP (set_src, 1);
105 if (side_effects_p (op2))
106 return;
108 /* Check for a special case where the divisor is power of 2. */
109 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
111 *values = xrealloc (*values,
112 (*n_values + 1)
113 * sizeof (struct histogram_value));
114 (*values)[*n_values].value = op2;
115 (*values)[*n_values].seq = NULL_RTX;
116 (*values)[*n_values].mode = mode;
117 (*values)[*n_values].insn = insn;
118 (*values)[*n_values].type = HIST_TYPE_POW2;
119 (*values)[*n_values].hdata.pow2.may_be_other = 1;
120 (*n_values)++;
123 /* Check whether the divisor is not in fact a constant. */
124 if (!CONSTANT_P (op2))
126 *values = xrealloc (*values,
127 (*n_values + 1)
128 * sizeof (struct histogram_value));
129 (*values)[*n_values].value = op2;
130 (*values)[*n_values].mode = mode;
131 (*values)[*n_values].seq = NULL_RTX;
132 (*values)[*n_values].insn = insn;
133 (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
134 (*n_values)++;
137 /* For mod, check whether it is not often a noop (or replaceable by
138 a few subtractions). */
139 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
141 rtx tmp;
143 *values = xrealloc (*values,
144 (*n_values + 1)
145 * sizeof (struct histogram_value));
146 start_sequence ();
147 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
148 (*values)[*n_values].value = force_operand (tmp, NULL_RTX);
149 (*values)[*n_values].seq = get_insns ();
150 end_sequence ();
151 (*values)[*n_values].mode = mode;
152 (*values)[*n_values].insn = insn;
153 (*values)[*n_values].type = HIST_TYPE_INTERVAL;
154 (*values)[*n_values].hdata.intvl.int_start = 0;
155 (*values)[*n_values].hdata.intvl.steps = 2;
156 (*values)[*n_values].hdata.intvl.may_be_less = 1;
157 (*values)[*n_values].hdata.intvl.may_be_more = 1;
158 (*n_values)++;
160 return;
162 default:
163 return;
167 /* Find values inside INSN for that we want to measure histograms and adds
168 them to list VALUES (increasing the record of its length in N_VALUES). */
169 static void
170 insn_values_to_profile (rtx insn,
171 unsigned *n_values,
172 struct histogram_value **values)
174 if (flag_value_profile_transformations)
175 insn_divmod_values_to_profile (insn, n_values, values);
178 /* Find list of values for that we want to measure histograms. */
179 static void
180 rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values)
182 rtx insn;
183 unsigned i;
185 life_analysis (NULL, PROP_DEATH_NOTES);
187 *n_values = 0;
188 *values = NULL;
189 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
190 insn_values_to_profile (insn, n_values, values);
192 for (i = 0; i < *n_values; i++)
194 switch ((*values)[i].type)
196 case HIST_TYPE_INTERVAL:
197 if (dump_file)
198 fprintf (dump_file,
199 "Interval counter for insn %d, range %d -- %d.\n",
200 INSN_UID ((rtx)(*values)[i].insn),
201 (*values)[i].hdata.intvl.int_start,
202 ((*values)[i].hdata.intvl.int_start
203 + (*values)[i].hdata.intvl.steps - 1));
204 (*values)[i].n_counters = (*values)[i].hdata.intvl.steps +
205 ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) +
206 ((*values)[i].hdata.intvl.may_be_more ? 1 : 0);
207 break;
209 case HIST_TYPE_POW2:
210 if (dump_file)
211 fprintf (dump_file,
212 "Pow2 counter for insn %d.\n",
213 INSN_UID ((rtx)(*values)[i].insn));
214 (*values)[i].n_counters
215 = GET_MODE_BITSIZE ((*values)[i].mode)
216 + ((*values)[i].hdata.pow2.may_be_other ? 1 : 0);
217 break;
219 case HIST_TYPE_SINGLE_VALUE:
220 if (dump_file)
221 fprintf (dump_file,
222 "Single value counter for insn %d.\n",
223 INSN_UID ((rtx)(*values)[i].insn));
224 (*values)[i].n_counters = 3;
225 break;
227 case HIST_TYPE_CONST_DELTA:
228 if (dump_file)
229 fprintf (dump_file,
230 "Constant delta counter for insn %d.\n",
231 INSN_UID ((rtx)(*values)[i].insn));
232 (*values)[i].n_counters = 4;
233 break;
235 default:
236 abort ();
239 allocate_reg_info (max_reg_num (), FALSE, FALSE);
242 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
243 them to identify and exploit properties of values that are hard to analyze
244 statically.
246 We do following transformations:
250 x = a / b;
252 where b is almost always a constant N is transformed to
254 if (b == N)
255 x = a / N;
256 else
257 x = a / b;
259 Analogically with %
263 x = a % b
265 where b is almost always a power of 2 and the division is unsigned
266 TODO -- handle signed case as well
268 if ((b & (b - 1)) == 0)
269 x = a & (b - 1);
270 else
271 x = x % b;
273 Note that when b = 0, no error will occur and x = a; this is correct,
274 as result of such operation is undefined.
278 x = a % b
280 where a is almost always less then b and the division is unsigned
281 TODO -- handle signed case as well
283 x = a;
284 if (x >= b)
285 x %= b;
289 x = a % b
291 where a is almost always less then 2 * b and the division is unsigned
292 TODO -- handle signed case as well
294 x = a;
295 if (x >= b)
296 x -= b;
297 if (x >= b)
298 x %= b;
300 It would be possible to continue analogically for K * b for other small
301 K's, but it is probably not useful.
303 TODO:
305 There are other useful cases that could be handled by a similar mechanism,
306 for example:
308 for (i = 0; i < n; i++)
311 transform to (for constant N):
313 if (n == N)
314 for (i = 0; i < N; i++)
316 else
317 for (i = 0; i < n; i++)
319 making unroller happy. Since this may grow the code significantly,
320 we would have to be very careful here. */
322 static bool
323 rtl_value_profile_transformations (void)
325 rtx insn, next;
326 int changed = false;
328 for (insn = get_insns (); insn; insn = next)
330 next = NEXT_INSN (insn);
332 if (!INSN_P (insn))
333 continue;
335 /* Scan for insn carrying a histogram. */
336 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
337 continue;
339 /* Ignore cold areas -- we are growing a code. */
340 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
341 continue;
343 if (dump_file)
345 fprintf (dump_file, "Trying transformations on insn %d\n",
346 INSN_UID (insn));
347 print_rtl_single (dump_file, insn);
350 /* Transformations: */
351 if (flag_value_profile_transformations
352 && (mod_subtract_transform (insn)
353 || divmod_fixed_value_transform (insn)
354 || mod_pow2_value_transform (insn)))
355 changed = true;
358 if (changed)
360 commit_edge_insertions ();
361 allocate_reg_info (max_reg_num (), FALSE, FALSE);
364 return changed;
367 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
368 and OP2 whose value is expected to be VALUE and result TARGET). */
369 static rtx
370 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
371 rtx target, rtx op1, rtx op2, gcov_type value)
373 rtx tmp, tmp1;
374 rtx neq_label = gen_label_rtx ();
375 rtx end_label = gen_label_rtx ();
376 rtx sequence;
378 start_sequence ();
380 if (!REG_P (op2))
382 tmp = gen_reg_rtx (mode);
383 emit_move_insn (tmp, copy_rtx (op2));
385 else
386 tmp = op2;
388 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
389 NULL_RTX, neq_label);
390 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value));
391 tmp1 = force_operand (tmp1, target);
392 if (tmp1 != target)
393 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
395 emit_jump_insn (gen_jump (end_label));
396 emit_barrier ();
398 emit_label (neq_label);
399 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
400 tmp1 = force_operand (tmp1, target);
401 if (tmp1 != target)
402 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
404 emit_label (end_label);
406 sequence = get_insns ();
407 end_sequence ();
408 rebuild_jump_labels (sequence);
409 return sequence;
412 /* Do transform 1) on INSN if applicable. */
413 static bool
414 divmod_fixed_value_transform (rtx insn)
416 rtx set, set_src, set_dest, op1, op2, value, histogram;
417 enum rtx_code code;
418 enum machine_mode mode;
419 gcov_type val, count, all;
420 edge e;
422 set = single_set (insn);
423 if (!set)
424 return false;
426 set_src = SET_SRC (set);
427 set_dest = SET_DEST (set);
428 code = GET_CODE (set_src);
429 mode = GET_MODE (set_dest);
431 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
432 return false;
433 op1 = XEXP (set_src, false);
434 op2 = XEXP (set_src, 1);
436 for (histogram = REG_NOTES (insn);
437 histogram;
438 histogram = XEXP (histogram, 1))
439 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
440 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
441 break;
443 if (!histogram)
444 return false;
446 histogram = XEXP (XEXP (histogram, 0), 1);
447 value = XEXP (histogram, 0);
448 histogram = XEXP (histogram, 1);
449 val = INTVAL (XEXP (histogram, 0));
450 histogram = XEXP (histogram, 1);
451 count = INTVAL (XEXP (histogram, 0));
452 histogram = XEXP (histogram, 1);
453 all = INTVAL (XEXP (histogram, 0));
455 /* We require that count is at least half of all; this means
456 that for the transformation to fire the value must be constant
457 at least 50% of time (and 75% gives the guarantee of usage). */
458 if (!rtx_equal_p (op2, value) || 2 * count < all)
459 return false;
461 if (dump_file)
462 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
463 INSN_UID (insn));
465 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
466 delete_insn (insn);
468 insert_insn_on_edge (
469 gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e);
471 return true;
474 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
475 and OP2 and result TARGET). */
476 static rtx
477 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
478 rtx op1, rtx op2)
480 rtx tmp, tmp1, tmp2, tmp3;
481 rtx neq_label = gen_label_rtx ();
482 rtx end_label = gen_label_rtx ();
483 rtx sequence;
485 start_sequence ();
487 if (!REG_P (op2))
489 tmp = gen_reg_rtx (mode);
490 emit_move_insn (tmp, copy_rtx (op2));
492 else
493 tmp = op2;
495 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
496 0, OPTAB_WIDEN);
497 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
498 0, OPTAB_WIDEN);
499 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
500 NULL_RTX, neq_label);
501 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
502 0, OPTAB_WIDEN);
503 if (tmp3 != target)
504 emit_move_insn (copy_rtx (target), tmp3);
505 emit_jump_insn (gen_jump (end_label));
506 emit_barrier ();
508 emit_label (neq_label);
509 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
510 tmp1 = force_operand (tmp1, target);
511 if (tmp1 != target)
512 emit_move_insn (target, tmp1);
514 emit_label (end_label);
516 sequence = get_insns ();
517 end_sequence ();
518 rebuild_jump_labels (sequence);
519 return sequence;
522 /* Do transform 2) on INSN if applicable. */
523 static bool
524 mod_pow2_value_transform (rtx insn)
526 rtx set, set_src, set_dest, op1, op2, value, histogram;
527 enum rtx_code code;
528 enum machine_mode mode;
529 gcov_type wrong_values, count;
530 edge e;
531 int i;
533 set = single_set (insn);
534 if (!set)
535 return false;
537 set_src = SET_SRC (set);
538 set_dest = SET_DEST (set);
539 code = GET_CODE (set_src);
540 mode = GET_MODE (set_dest);
542 if (code != UMOD)
543 return false;
544 op1 = XEXP (set_src, 0);
545 op2 = XEXP (set_src, 1);
547 for (histogram = REG_NOTES (insn);
548 histogram;
549 histogram = XEXP (histogram, 1))
550 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
551 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
552 break;
554 if (!histogram)
555 return false;
557 histogram = XEXP (XEXP (histogram, 0), 1);
558 value = XEXP (histogram, 0);
559 histogram = XEXP (histogram, 1);
560 wrong_values =INTVAL (XEXP (histogram, 0));
561 histogram = XEXP (histogram, 1);
563 count = 0;
564 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
566 count += INTVAL (XEXP (histogram, 0));
567 histogram = XEXP (histogram, 1);
570 if (!rtx_equal_p (op2, value))
571 return false;
573 /* We require that we hit a power of two at least half of all evaluations. */
574 if (count < wrong_values)
575 return false;
577 if (dump_file)
578 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
579 INSN_UID (insn));
581 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
582 delete_insn (insn);
584 insert_insn_on_edge (
585 gen_mod_pow2 (mode, code, set_dest, op1, op2), e);
587 return true;
590 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
591 operands OP1 and OP2, result TARGET and at most SUB subtractions). */
592 static rtx
593 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
594 rtx target, rtx op1, rtx op2, int sub)
596 rtx tmp, tmp1;
597 rtx end_label = gen_label_rtx ();
598 rtx sequence;
599 int i;
601 start_sequence ();
603 if (!REG_P (op2))
605 tmp = gen_reg_rtx (mode);
606 emit_move_insn (tmp, copy_rtx (op2));
608 else
609 tmp = op2;
611 emit_move_insn (target, copy_rtx (op1));
612 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
613 NULL_RTX, end_label);
616 for (i = 0; i < sub; i++)
618 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
619 0, OPTAB_WIDEN);
620 if (tmp1 != target)
621 emit_move_insn (target, tmp1);
622 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
623 NULL_RTX, end_label);
626 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
627 tmp1 = force_operand (tmp1, target);
628 if (tmp1 != target)
629 emit_move_insn (target, tmp1);
631 emit_label (end_label);
633 sequence = get_insns ();
634 end_sequence ();
635 rebuild_jump_labels (sequence);
636 return sequence;
639 /* Do transforms 3) and 4) on INSN if applicable. */
640 static bool
641 mod_subtract_transform (rtx insn)
643 rtx set, set_src, set_dest, op1, op2, value, histogram;
644 enum rtx_code code;
645 enum machine_mode mode;
646 gcov_type wrong_values, counts[2], count, all;
647 edge e;
648 int i;
650 set = single_set (insn);
651 if (!set)
652 return false;
654 set_src = SET_SRC (set);
655 set_dest = SET_DEST (set);
656 code = GET_CODE (set_src);
657 mode = GET_MODE (set_dest);
659 if (code != UMOD)
660 return false;
661 op1 = XEXP (set_src, 0);
662 op2 = XEXP (set_src, 1);
664 for (histogram = REG_NOTES (insn);
665 histogram;
666 histogram = XEXP (histogram, 1))
667 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
668 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
669 break;
671 if (!histogram)
672 return false;
674 histogram = XEXP (XEXP (histogram, 0), 1);
675 value = XEXP (histogram, 0);
676 histogram = XEXP (histogram, 1);
678 all = 0;
679 for (i = 0; i < 2; i++)
681 counts[i] = INTVAL (XEXP (histogram, 0));
682 all += counts[i];
683 histogram = XEXP (histogram, 1);
685 wrong_values = INTVAL (XEXP (histogram, 0));
686 histogram = XEXP (histogram, 1);
687 wrong_values += INTVAL (XEXP (histogram, 0));
688 all += wrong_values;
690 /* We require that we use just subtractions in at least 50% of all
691 evaluations. */
692 count = 0;
693 for (i = 0; i < 2; i++)
695 count += counts[i];
696 if (count * 2 >= all)
697 break;
700 if (i == 2)
701 return false;
703 if (dump_file)
704 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
705 INSN_UID (insn));
707 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
708 delete_insn (insn);
710 insert_insn_on_edge (
711 gen_mod_subtract (mode, code, set_dest, op1, op2, i), e);
713 return true;
716 /* Connection to the outside world. */
717 /* Struct for IR-dependent hooks. */
718 struct value_prof_hooks {
719 /* Find list of values for which we want to measure histograms. */
720 void (*find_values_to_profile) (unsigned *, struct histogram_value **);
722 /* Identify and exploit properties of values that are hard to analyze
723 statically. See value-prof.c for more detail. */
724 bool (*value_profile_transformations) (void);
727 /* Hooks for RTL-based versions (the only ones that currently work). */
728 static struct value_prof_hooks rtl_value_prof_hooks =
730 rtl_find_values_to_profile,
731 rtl_value_profile_transformations
734 void
735 rtl_register_value_prof_hooks (void)
737 value_prof_hooks = &rtl_value_prof_hooks;
738 if (ir_type ())
739 abort ();
742 /* Tree-based versions are stubs for now. */
743 static void
744 tree_find_values_to_profile (unsigned *n_values, struct histogram_value **values)
746 (void)n_values;
747 (void)values;
748 abort ();
751 static bool
752 tree_value_profile_transformations (void)
754 abort ();
757 static struct value_prof_hooks tree_value_prof_hooks = {
758 tree_find_values_to_profile,
759 tree_value_profile_transformations
762 void
763 tree_register_value_prof_hooks (void)
765 value_prof_hooks = &tree_value_prof_hooks;
766 if (!ir_type ())
767 abort ();
770 /* IR-independent entry points. */
771 void
772 find_values_to_profile (unsigned *n_values, struct histogram_value **values)
774 (value_prof_hooks->find_values_to_profile) (n_values, values);
777 bool
778 value_profile_transformations (void)
780 return (value_prof_hooks->value_profile_transformations) ();