2005-04-29 Jim Tison <jtison@us.ibm.com>
[official-gcc.git] / gcc / value-prof.c
blobb96cdcafbf6be4c64424d6b888d0455b7e6ac882
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
40 #include "coverage.h"
41 #include "tree.h"
42 #include "gcov-io.h"
44 static struct value_prof_hooks *value_prof_hooks;
46 /* This is the vector of histograms. Created in find_values_to_profile.
47 During profile generation, freed by instrument_values.
48 During profile use, freed by value_profile_transformations. */
50 static histogram_values static_values = NULL;
52 /* In this file value profile based optimizations are placed. Currently the
53 following optimizations are implemented (for more detailed descriptions
54 see comments at value_profile_transformations):
56 1) Division/modulo specialization. Provided that we can determine that the
57 operands of the division have some special properties, we may use it to
58 produce more effective code.
59 2) Speculative prefetching. If we are able to determine that the difference
60 between addresses accessed by a memory reference is usually constant, we
61 may add the prefetch instructions.
63 Every such optimization should add its requirements for profiled values to
64 insn_values_to_profile function. This function is called from branch_prob
65 in profile.c and the requested values are instrumented by it in the first
66 compilation with -fprofile-arcs. The optimization may then read the
67 gathered data in the second compilation with -fbranch-probabilities.
69 There are currently two versions, RTL-based and tree-based. Over time
70 the RTL-based version may go away.
72 In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE
73 note to the instrumented insn. The argument to the note consists of an
74 EXPR_LIST where its members have the following meaning (from the first to
75 the last):
77 -- type of information gathered (HIST_TYPE*)
78 -- the expression that is profiled
79 -- list of counters starting from the first one.
81 In the tree-based version, the measured data is pointed to from the histograms
82 field of the statement annotation of the instrumented insns. It is
83 kept as a linked list of struct histogram_value_t's, which contain the
84 same information as above. */
86 /* For speculative prefetching, the range in that we do not prefetch (because
87 we assume that it will be in cache anyway). The asymmetry between min and
88 max range is trying to reflect the fact that the sequential prefetching
89 of the data is commonly done directly by hardware. Nevertheless, these
90 values are just a guess and should of course be target-specific.
92 FIXME: There is no tree form of speculative prefetching as yet.
94 FIXME: A better approach to instrumentation in the profile-generation
95 pass is to generate calls to magic library functions (to be added to
96 libgcc) rather than inline code. This approach will probably be
97 necessary to get tree-based speculative prefetching working in a useful
98 fashion, as inline code bloats things so much the rest of the compiler has
99 serious problems dealing with it (judging from the rtl behavior). */
101 #ifndef NOPREFETCH_RANGE_MIN
102 #define NOPREFETCH_RANGE_MIN (-16)
103 #endif
104 #ifndef NOPREFETCH_RANGE_MAX
105 #define NOPREFETCH_RANGE_MAX 32
106 #endif
108 static void rtl_divmod_values_to_profile (rtx, histogram_values *);
109 #ifdef HAVE_prefetch
110 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
111 static int find_mem_reference_1 (rtx *, void *);
112 static void find_mem_reference_2 (rtx, rtx, void *);
113 static bool find_mem_reference (rtx, rtx *, int *);
114 #endif
116 static void rtl_values_to_profile (rtx, histogram_values *);
117 static rtx rtl_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
118 rtx, gcov_type, int);
119 static rtx rtl_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
120 static rtx rtl_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
121 int, int, int);
122 #ifdef HAVE_prefetch
123 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
124 #endif
125 static bool rtl_divmod_fixed_value_transform (rtx);
126 static bool rtl_mod_pow2_value_transform (rtx);
127 static bool rtl_mod_subtract_transform (rtx);
128 #ifdef HAVE_prefetch
129 static bool speculative_prefetching_transform (rtx);
130 #endif
131 static tree tree_divmod_fixed_value (tree, tree, tree, tree,
132 tree, int, gcov_type, gcov_type);
133 static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type);
134 static tree tree_mod_subtract (tree, tree, tree, tree, int, int, int,
135 gcov_type, gcov_type, gcov_type);
136 static bool tree_divmod_fixed_value_transform (tree);
137 static bool tree_mod_pow2_value_transform (tree);
138 static bool tree_mod_subtract_transform (tree);
141 /* Find values inside INSN for that we want to measure histograms for
142 division/modulo optimization and stores them to VALUES. */
143 static void
144 rtl_divmod_values_to_profile (rtx insn, histogram_values *values)
146 rtx set, set_src, op1, op2;
147 enum machine_mode mode;
148 histogram_value hist;
150 if (!INSN_P (insn))
151 return;
153 set = single_set (insn);
154 if (!set)
155 return;
157 mode = GET_MODE (SET_DEST (set));
158 if (!INTEGRAL_MODE_P (mode))
159 return;
161 set_src = SET_SRC (set);
162 switch (GET_CODE (set_src))
164 case DIV:
165 case MOD:
166 case UDIV:
167 case UMOD:
168 op1 = XEXP (set_src, 0);
169 op2 = XEXP (set_src, 1);
170 if (side_effects_p (op2))
171 return;
173 /* Check for a special case where the divisor is power of 2. */
174 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
176 hist = ggc_alloc (sizeof (*hist));
177 hist->hvalue.rtl.value = op2;
178 hist->hvalue.rtl.seq = NULL_RTX;
179 hist->hvalue.rtl.mode = mode;
180 hist->hvalue.rtl.insn = insn;
181 hist->type = HIST_TYPE_POW2;
182 VEC_safe_push (histogram_value, heap, *values, hist);
185 /* Check whether the divisor is not in fact a constant. */
186 if (!CONSTANT_P (op2))
188 hist = ggc_alloc (sizeof (*hist));
189 hist->hvalue.rtl.value = op2;
190 hist->hvalue.rtl.mode = mode;
191 hist->hvalue.rtl.seq = NULL_RTX;
192 hist->hvalue.rtl.insn = insn;
193 hist->type = HIST_TYPE_SINGLE_VALUE;
194 VEC_safe_push (histogram_value, heap, *values, hist);
197 /* For mod, check whether it is not often a noop (or replaceable by
198 a few subtractions). */
199 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
201 rtx tmp;
203 hist = ggc_alloc (sizeof (*hist));
204 start_sequence ();
205 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
206 hist->hvalue.rtl.value = force_operand (tmp, NULL_RTX);
207 hist->hvalue.rtl.seq = get_insns ();
208 end_sequence ();
209 hist->hvalue.rtl.mode = mode;
210 hist->hvalue.rtl.insn = insn;
211 hist->type = HIST_TYPE_INTERVAL;
212 hist->hdata.intvl.int_start = 0;
213 hist->hdata.intvl.steps = 2;
214 VEC_safe_push (histogram_value, heap, *values, hist);
216 return;
218 default:
219 return;
223 #ifdef HAVE_prefetch
225 /* Called from find_mem_reference through for_each_rtx, finds a memory
226 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
227 to *RET and the traversing of the expression is interrupted by returning 1.
228 Otherwise 0 is returned. */
230 static int
231 find_mem_reference_1 (rtx *expr, void *ret)
233 rtx *mem = ret;
235 if (MEM_P (*expr))
237 *mem = *expr;
238 return 1;
240 return 0;
243 /* Called form find_mem_reference through note_stores to find out whether
244 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
245 FMR2_WRITE is set to true. */
247 static int fmr2_write;
248 static void
249 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
251 if (expr == mem)
252 fmr2_write = true;
255 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
256 if it is a write of the mem. Return false if no memory reference is found,
257 true otherwise. */
259 static bool
260 find_mem_reference (rtx insn, rtx *mem, int *write)
262 *mem = NULL_RTX;
263 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
265 if (!*mem)
266 return false;
268 fmr2_write = false;
269 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
270 *write = fmr2_write;
271 return true;
274 /* Find values inside INSN for that we want to measure histograms for
275 a speculative prefetching. Add them to the list VALUES.
276 Returns true if such we found any such value, false otherwise. */
278 static bool
279 insn_prefetch_values_to_profile (rtx insn, histogram_values* values)
281 rtx mem, address;
282 int write;
283 histogram_value hist;
285 /* It only makes sense to look for memory references in ordinary insns. */
286 if (!NONJUMP_INSN_P (insn))
287 return false;
289 if (!find_mem_reference (insn, &mem, &write))
290 return false;
292 address = XEXP (mem, 0);
293 if (side_effects_p (address))
294 return false;
296 if (CONSTANT_P (address))
297 return false;
299 hist = ggc_alloc (sizeof (*hist));
300 hist->hvalue.rtl.value = address;
301 hist->hvalue.rtl.mode = GET_MODE (address);
302 hist->hvalue.rtl.seq = NULL_RTX;
303 hist->hvalue.rtl.insn = insn;
304 hist->type = HIST_TYPE_CONST_DELTA;
305 VEC_safe_push (histogram_value, heap, *values, hist);
307 return true;
309 #endif
310 /* Find values inside INSN for that we want to measure histograms and adds
311 them to list VALUES (increasing the record of its length in N_VALUES). */
312 static void
313 rtl_values_to_profile (rtx insn, histogram_values *values)
315 if (flag_value_profile_transformations)
316 rtl_divmod_values_to_profile (insn, values);
318 #ifdef HAVE_prefetch
319 if (flag_speculative_prefetching)
320 insn_prefetch_values_to_profile (insn, values);
321 #endif
324 /* Find list of values for that we want to measure histograms. */
325 static void
326 rtl_find_values_to_profile (histogram_values *values)
328 rtx insn;
329 unsigned i, libcall_level;
330 histogram_value hist;
332 life_analysis (NULL, PROP_DEATH_NOTES);
334 *values = NULL;
335 libcall_level = 0;
336 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
337 rtl_values_to_profile (insn, values);
338 static_values = *values;
340 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
342 switch (hist->type)
344 case HIST_TYPE_INTERVAL:
345 if (dump_file)
346 fprintf (dump_file,
347 "Interval counter for insn %d, range %d -- %d.\n",
348 INSN_UID ((rtx)hist->hvalue.rtl.insn),
349 hist->hdata.intvl.int_start,
350 (hist->hdata.intvl.int_start
351 + hist->hdata.intvl.steps - 1));
352 hist->n_counters = hist->hdata.intvl.steps + 2;
353 break;
355 case HIST_TYPE_POW2:
356 if (dump_file)
357 fprintf (dump_file,
358 "Pow2 counter for insn %d.\n",
359 INSN_UID ((rtx)hist->hvalue.rtl.insn));
360 hist->n_counters = 2;
361 break;
363 case HIST_TYPE_SINGLE_VALUE:
364 if (dump_file)
365 fprintf (dump_file,
366 "Single value counter for insn %d.\n",
367 INSN_UID ((rtx)hist->hvalue.rtl.insn));
368 hist->n_counters = 3;
369 break;
371 case HIST_TYPE_CONST_DELTA:
372 if (dump_file)
373 fprintf (dump_file,
374 "Constant delta counter for insn %d.\n",
375 INSN_UID ((rtx)hist->hvalue.rtl.insn));
376 hist->n_counters = 4;
377 break;
379 default:
380 gcc_unreachable ();
383 allocate_reg_info (max_reg_num (), FALSE, FALSE);
386 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
387 them to identify and exploit properties of values that are hard to analyze
388 statically.
390 We do following transformations:
394 x = a / b;
396 where b is almost always a constant N is transformed to
398 if (b == N)
399 x = a / N;
400 else
401 x = a / b;
403 Analogically with %
407 x = a % b
409 where b is almost always a power of 2 and the division is unsigned
410 TODO -- handle signed case as well
412 if ((b & (b - 1)) == 0)
413 x = a & (b - 1);
414 else
415 x = x % b;
417 Note that when b = 0, no error will occur and x = a; this is correct,
418 as result of such operation is undefined.
422 x = a % b
424 where a is almost always less then b and the division is unsigned
425 TODO -- handle signed case as well
427 x = a;
428 if (x >= b)
429 x %= b;
433 x = a % b
435 where a is almost always less then 2 * b and the division is unsigned
436 TODO -- handle signed case as well
438 x = a;
439 if (x >= b)
440 x -= b;
441 if (x >= b)
442 x %= b;
444 It would be possible to continue analogically for K * b for other small
445 K's, but it is probably not useful.
449 Read or write of mem[address], where the value of address changes usually
450 by a constant C != 0 between the following accesses to the computation; with
451 -fspeculative-prefetching we then add a prefetch of address + C before
452 the insn. This handles prefetching of several interesting cases in addition
453 to a simple prefetching for addresses that are induction variables, e. g.
454 linked lists allocated sequentially (even in case they are processed
455 recursively).
457 TODO -- we should also check whether there is not (usually) a small
458 difference with the adjacent memory references, so that we do
459 not issue overlapping prefetches. Also we should employ some
460 heuristics to eliminate cases where prefetching evidently spoils
461 the code.
462 -- it should somehow cooperate with the loop optimizer prefetching
464 TODO:
466 There are other useful cases that could be handled by a similar mechanism,
467 for example:
469 for (i = 0; i < n; i++)
472 transform to (for constant N):
474 if (n == N)
475 for (i = 0; i < N; i++)
477 else
478 for (i = 0; i < n; i++)
480 making unroller happy. Since this may grow the code significantly,
481 we would have to be very careful here. */
483 static bool
484 rtl_value_profile_transformations (void)
486 rtx insn, next;
487 int changed = false;
489 for (insn = get_insns (); insn; insn = next)
491 next = NEXT_INSN (insn);
493 if (!INSN_P (insn))
494 continue;
496 /* Scan for insn carrying a histogram. */
497 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
498 continue;
500 /* Ignore cold areas -- we are growing a code. */
501 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
502 continue;
504 if (dump_file)
506 fprintf (dump_file, "Trying transformations on insn %d\n",
507 INSN_UID (insn));
508 print_rtl_single (dump_file, insn);
511 /* Transformations: */
512 if (flag_value_profile_transformations
513 && (rtl_mod_subtract_transform (insn)
514 || rtl_divmod_fixed_value_transform (insn)
515 || rtl_mod_pow2_value_transform (insn)))
516 changed = true;
517 #ifdef HAVE_prefetch
518 if (flag_speculative_prefetching
519 && speculative_prefetching_transform (insn))
520 changed = true;
521 #endif
524 if (changed)
526 commit_edge_insertions ();
527 allocate_reg_info (max_reg_num (), FALSE, FALSE);
530 return changed;
533 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
534 and OP2, whose value is expected to be VALUE, result TARGET and
535 probability of taking the optimal path PROB). */
536 static rtx
537 rtl_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
538 rtx target, rtx op1, rtx op2, gcov_type value,
539 int prob)
541 rtx tmp, tmp1, jump;
542 rtx neq_label = gen_label_rtx ();
543 rtx end_label = gen_label_rtx ();
544 rtx sequence;
546 start_sequence ();
548 if (!REG_P (op2))
550 tmp = gen_reg_rtx (mode);
551 emit_move_insn (tmp, copy_rtx (op2));
553 else
554 tmp = op2;
556 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
557 NULL_RTX, neq_label);
559 /* Add branch probability to jump we just created. */
560 jump = get_last_insn ();
561 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
562 GEN_INT (REG_BR_PROB_BASE - prob),
563 REG_NOTES (jump));
565 tmp1 = simplify_gen_binary (operation, mode,
566 copy_rtx (op1), GEN_INT (value));
567 tmp1 = force_operand (tmp1, target);
568 if (tmp1 != target)
569 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
571 emit_jump_insn (gen_jump (end_label));
572 emit_barrier ();
574 emit_label (neq_label);
575 tmp1 = simplify_gen_binary (operation, mode,
576 copy_rtx (op1), copy_rtx (tmp));
577 tmp1 = force_operand (tmp1, target);
578 if (tmp1 != target)
579 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
581 emit_label (end_label);
583 sequence = get_insns ();
584 end_sequence ();
585 rebuild_jump_labels (sequence);
586 return sequence;
589 /* Do transform 1) on INSN if applicable. */
590 static bool
591 rtl_divmod_fixed_value_transform (rtx insn)
593 rtx set, set_src, set_dest, op1, op2, value, histogram;
594 enum rtx_code code;
595 enum machine_mode mode;
596 gcov_type val, count, all;
597 edge e;
598 int prob;
600 set = single_set (insn);
601 if (!set)
602 return false;
604 set_src = SET_SRC (set);
605 set_dest = SET_DEST (set);
606 code = GET_CODE (set_src);
607 mode = GET_MODE (set_dest);
609 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
610 return false;
611 op1 = XEXP (set_src, false);
612 op2 = XEXP (set_src, 1);
614 for (histogram = REG_NOTES (insn);
615 histogram;
616 histogram = XEXP (histogram, 1))
617 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
618 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
619 break;
621 if (!histogram)
622 return false;
624 histogram = XEXP (XEXP (histogram, 0), 1);
625 value = XEXP (histogram, 0);
626 histogram = XEXP (histogram, 1);
627 val = INTVAL (XEXP (histogram, 0));
628 histogram = XEXP (histogram, 1);
629 count = INTVAL (XEXP (histogram, 0));
630 histogram = XEXP (histogram, 1);
631 all = INTVAL (XEXP (histogram, 0));
633 /* We require that count be at least half of all; this means
634 that for the transformation to fire the value must be constant
635 at least 50% of time (and 75% gives the guarantee of usage). */
636 if (!rtx_equal_p (op2, value) || 2 * count < all)
637 return false;
639 if (dump_file)
640 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
641 INSN_UID (insn));
643 /* Compute probability of taking the optimal path. */
644 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
646 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
647 delete_insn (insn);
649 insert_insn_on_edge (
650 rtl_divmod_fixed_value (mode, code, set_dest,
651 op1, op2, val, prob), e);
653 return true;
656 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
657 and OP2, result TARGET and probability of taking the optimal path PROB). */
658 static rtx
659 rtl_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
660 rtx op1, rtx op2, int prob)
662 rtx tmp, tmp1, tmp2, tmp3, jump;
663 rtx neq_label = gen_label_rtx ();
664 rtx end_label = gen_label_rtx ();
665 rtx sequence;
667 start_sequence ();
669 if (!REG_P (op2))
671 tmp = gen_reg_rtx (mode);
672 emit_move_insn (tmp, copy_rtx (op2));
674 else
675 tmp = op2;
677 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
678 0, OPTAB_WIDEN);
679 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
680 0, OPTAB_WIDEN);
681 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
682 NULL_RTX, neq_label);
684 /* Add branch probability to jump we just created. */
685 jump = get_last_insn ();
686 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
687 GEN_INT (REG_BR_PROB_BASE - prob),
688 REG_NOTES (jump));
690 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
691 0, OPTAB_WIDEN);
692 if (tmp3 != target)
693 emit_move_insn (copy_rtx (target), tmp3);
694 emit_jump_insn (gen_jump (end_label));
695 emit_barrier ();
697 emit_label (neq_label);
698 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
699 tmp1 = force_operand (tmp1, target);
700 if (tmp1 != target)
701 emit_move_insn (target, tmp1);
703 emit_label (end_label);
705 sequence = get_insns ();
706 end_sequence ();
707 rebuild_jump_labels (sequence);
708 return sequence;
711 /* Do transform 2) on INSN if applicable. */
712 static bool
713 rtl_mod_pow2_value_transform (rtx insn)
715 rtx set, set_src, set_dest, op1, op2, value, histogram;
716 enum rtx_code code;
717 enum machine_mode mode;
718 gcov_type wrong_values, count;
719 edge e;
720 int all, prob;
722 set = single_set (insn);
723 if (!set)
724 return false;
726 set_src = SET_SRC (set);
727 set_dest = SET_DEST (set);
728 code = GET_CODE (set_src);
729 mode = GET_MODE (set_dest);
731 if (code != UMOD)
732 return false;
733 op1 = XEXP (set_src, 0);
734 op2 = XEXP (set_src, 1);
736 for (histogram = REG_NOTES (insn);
737 histogram;
738 histogram = XEXP (histogram, 1))
739 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
740 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
741 break;
743 if (!histogram)
744 return false;
746 histogram = XEXP (XEXP (histogram, 0), 1);
747 value = XEXP (histogram, 0);
748 histogram = XEXP (histogram, 1);
749 wrong_values = INTVAL (XEXP (histogram, 0));
750 histogram = XEXP (histogram, 1);
751 count = INTVAL (XEXP (histogram, 0));
753 if (!rtx_equal_p (op2, value))
754 return false;
756 /* We require that we hit a power of two at least half of all evaluations. */
757 if (count < wrong_values)
758 return false;
760 if (dump_file)
761 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
762 INSN_UID (insn));
764 /* Compute probability of taking the optimal path. */
765 all = count + wrong_values;
766 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
768 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
769 delete_insn (insn);
771 insert_insn_on_edge (
772 rtl_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
774 return true;
777 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
778 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
779 probability of taking the optimal path(s) PROB1 and PROB2). */
780 static rtx
781 rtl_mod_subtract (enum machine_mode mode, enum rtx_code operation,
782 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
784 rtx tmp, tmp1, jump;
785 rtx end_label = gen_label_rtx ();
786 rtx sequence;
787 int i;
789 start_sequence ();
791 if (!REG_P (op2))
793 tmp = gen_reg_rtx (mode);
794 emit_move_insn (tmp, copy_rtx (op2));
796 else
797 tmp = op2;
799 emit_move_insn (target, copy_rtx (op1));
800 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
801 NULL_RTX, end_label);
803 /* Add branch probability to jump we just created. */
804 jump = get_last_insn ();
805 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
806 GEN_INT (prob1), REG_NOTES (jump));
808 for (i = 0; i < sub; i++)
810 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
811 0, OPTAB_WIDEN);
812 if (tmp1 != target)
813 emit_move_insn (target, tmp1);
814 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
815 NULL_RTX, end_label);
817 /* Add branch probability to jump we just created. */
818 jump = get_last_insn ();
819 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
820 GEN_INT (prob2), REG_NOTES (jump));
823 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
824 tmp1 = force_operand (tmp1, target);
825 if (tmp1 != target)
826 emit_move_insn (target, tmp1);
828 emit_label (end_label);
830 sequence = get_insns ();
831 end_sequence ();
832 rebuild_jump_labels (sequence);
833 return sequence;
836 /* Do transforms 3) and 4) on INSN if applicable. */
837 static bool
838 rtl_mod_subtract_transform (rtx insn)
840 rtx set, set_src, set_dest, op1, op2, histogram;
841 enum rtx_code code;
842 enum machine_mode mode;
843 gcov_type wrong_values, counts[2], count, all;
844 edge e;
845 int i, prob1, prob2;
847 set = single_set (insn);
848 if (!set)
849 return false;
851 set_src = SET_SRC (set);
852 set_dest = SET_DEST (set);
853 code = GET_CODE (set_src);
854 mode = GET_MODE (set_dest);
856 if (code != UMOD)
857 return false;
858 op1 = XEXP (set_src, 0);
859 op2 = XEXP (set_src, 1);
861 for (histogram = REG_NOTES (insn);
862 histogram;
863 histogram = XEXP (histogram, 1))
864 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
865 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
866 break;
868 if (!histogram)
869 return false;
871 histogram = XEXP (XEXP (histogram, 0), 1);
872 histogram = XEXP (histogram, 1);
874 all = 0;
875 for (i = 0; i < 2; i++)
877 counts[i] = INTVAL (XEXP (histogram, 0));
878 all += counts[i];
879 histogram = XEXP (histogram, 1);
881 wrong_values = INTVAL (XEXP (histogram, 0));
882 histogram = XEXP (histogram, 1);
883 wrong_values += INTVAL (XEXP (histogram, 0));
884 all += wrong_values;
886 /* We require that we use just subtractions in at least 50% of all
887 evaluations. */
888 count = 0;
889 for (i = 0; i < 2; i++)
891 count += counts[i];
892 if (count * 2 >= all)
893 break;
896 if (i == 2)
897 return false;
899 if (dump_file)
900 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
901 INSN_UID (insn));
903 /* Compute probability of taking the optimal path(s). */
904 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
905 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
907 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
908 delete_insn (insn);
910 insert_insn_on_edge (
911 rtl_mod_subtract (mode, code, set_dest,
912 op1, op2, i, prob1, prob2), e);
914 return true;
917 #ifdef HAVE_prefetch
918 /* Generate code for transformation 5 for mem with ADDRESS and a constant
919 step DELTA. WRITE is true if the reference is a store to mem. */
921 static rtx
922 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
924 rtx tmp;
925 rtx sequence;
927 /* TODO: we do the prefetching for just one iteration ahead, which
928 often is not enough. */
929 start_sequence ();
930 if (offsettable_address_p (0, VOIDmode, address))
931 tmp = plus_constant (copy_rtx (address), delta);
932 else
934 tmp = simplify_gen_binary (PLUS, Pmode,
935 copy_rtx (address), GEN_INT (delta));
936 tmp = force_operand (tmp, NULL);
938 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
939 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
940 tmp = force_reg (Pmode, tmp);
941 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
942 sequence = get_insns ();
943 end_sequence ();
945 return sequence;
948 /* Do transform 5) on INSN if applicable. */
950 static bool
951 speculative_prefetching_transform (rtx insn)
953 rtx histogram, value;
954 gcov_type val, count, all;
955 edge e;
956 rtx mem, address;
957 int write;
959 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
960 return false;
962 if (!find_mem_reference (insn, &mem, &write))
963 return false;
965 address = XEXP (mem, 0);
966 if (side_effects_p (address))
967 return false;
969 if (CONSTANT_P (address))
970 return false;
972 for (histogram = REG_NOTES (insn);
973 histogram;
974 histogram = XEXP (histogram, 1))
975 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
976 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
977 break;
979 if (!histogram)
980 return false;
982 histogram = XEXP (XEXP (histogram, 0), 1);
983 value = XEXP (histogram, 0);
984 histogram = XEXP (histogram, 1);
985 /* Skip last value referenced. */
986 histogram = XEXP (histogram, 1);
987 val = INTVAL (XEXP (histogram, 0));
988 histogram = XEXP (histogram, 1);
989 count = INTVAL (XEXP (histogram, 0));
990 histogram = XEXP (histogram, 1);
991 all = INTVAL (XEXP (histogram, 0));
993 /* With that few executions we do not really have a reason to optimize the
994 statement, and more importantly, the data about differences of addresses
995 are spoiled by the first item that had no previous value to compare
996 with. */
997 if (all < 4)
998 return false;
1000 /* We require that count be at least half of all; this means
1001 that for the transformation to fire the value must be constant
1002 at least 50% of time (and 75% gives the guarantee of usage). */
1003 if (!rtx_equal_p (address, value) || 2 * count < all)
1004 return false;
1006 /* If the difference is too small, it does not make too much sense to
1007 prefetch, as the memory is probably already in cache. */
1008 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
1009 return false;
1011 if (dump_file)
1012 fprintf (dump_file, "Speculative prefetching for insn %d\n",
1013 INSN_UID (insn));
1015 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
1017 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
1019 return true;
1021 #endif /* HAVE_prefetch */
1023 /* Tree based transformations. */
1024 static bool
1025 tree_value_profile_transformations (void)
1027 basic_block bb;
1028 block_stmt_iterator bsi;
1029 bool changed = false;
1031 FOR_EACH_BB (bb)
1033 /* Ignore cold areas -- we are enlarging the code. */
1034 if (!maybe_hot_bb_p (bb))
1035 continue;
1037 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1039 tree stmt = bsi_stmt (bsi);
1040 stmt_ann_t ann = get_stmt_ann (stmt);
1041 histogram_value th = ann->histograms;
1042 if (!th)
1043 continue;
1045 if (dump_file)
1047 fprintf (dump_file, "Trying transformations on insn ");
1048 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1051 /* Transformations: */
1052 /* The order of things in this conditional controls which
1053 transformation is used when more than one is applicable. */
1054 /* It is expected that any code added by the transformations
1055 will be added before the current statement, and that the
1056 current statement remain valid (although possibly
1057 modified) upon return. */
1058 if (flag_value_profile_transformations
1059 && (tree_mod_subtract_transform (stmt)
1060 || tree_divmod_fixed_value_transform (stmt)
1061 || tree_mod_pow2_value_transform (stmt)))
1063 changed = true;
1064 /* Original statement may no longer be in the same block. */
1065 bb = bb_for_stmt (stmt);
1068 /* Free extra storage from compute_value_histograms. */
1069 while (th)
1071 free (th->hvalue.tree.counters);
1072 th = th->hvalue.tree.next;
1074 ann->histograms = 0;
1078 if (changed)
1080 counts_to_freqs ();
1083 return changed;
1086 /* Generate code for transformation 1 (with OPERATION, operands OP1
1087 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
1088 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
1089 within roundoff error). This generates the result into a temp and returns
1090 the temp; it does not replace or alter the original STMT. */
1091 static tree
1092 tree_divmod_fixed_value (tree stmt, tree operation,
1093 tree op1, tree op2, tree value, int prob, gcov_type count,
1094 gcov_type all)
1096 tree stmt1, stmt2, stmt3;
1097 tree tmp1, tmp2, tmpv;
1098 tree label_decl1 = create_artificial_label ();
1099 tree label_decl2 = create_artificial_label ();
1100 tree label_decl3 = create_artificial_label ();
1101 tree label1, label2, label3;
1102 tree bb1end, bb2end, bb3end;
1103 basic_block bb, bb2, bb3, bb4;
1104 tree optype = TREE_TYPE (operation);
1105 edge e12, e13, e23, e24, e34;
1106 block_stmt_iterator bsi;
1108 bb = bb_for_stmt (stmt);
1109 bsi = bsi_for_stmt (stmt);
1111 tmpv = create_tmp_var (optype, "PROF");
1112 tmp1 = create_tmp_var (optype, "PROF");
1113 stmt1 = build2 (MODIFY_EXPR, optype, tmpv, fold_convert (optype, value));
1114 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1115 stmt3 = build3 (COND_EXPR, void_type_node,
1116 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
1117 build1 (GOTO_EXPR, void_type_node, label_decl2),
1118 build1 (GOTO_EXPR, void_type_node, label_decl1));
1119 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1120 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1121 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1122 bb1end = stmt3;
1124 tmp2 = create_tmp_var (optype, "PROF");
1125 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1126 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1127 build2 (TREE_CODE (operation), optype, op1, tmpv));
1128 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1129 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1130 bb2end = stmt1;
1132 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1133 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1134 build2 (TREE_CODE (operation), optype, op1, op2));
1135 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1136 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1137 bb3end = stmt1;
1139 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1140 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1142 /* Fix CFG. */
1143 /* Edge e23 connects bb2 to bb3, etc. */
1144 e12 = split_block (bb, bb1end);
1145 bb2 = e12->dest;
1146 bb2->count = count;
1147 e23 = split_block (bb2, bb2end);
1148 bb3 = e23->dest;
1149 bb3->count = all - count;
1150 e34 = split_block (bb3, bb3end);
1151 bb4 = e34->dest;
1152 bb4->count = all;
1154 e12->flags &= ~EDGE_FALLTHRU;
1155 e12->flags |= EDGE_FALSE_VALUE;
1156 e12->probability = prob;
1157 e12->count = count;
1159 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1160 e13->probability = REG_BR_PROB_BASE - prob;
1161 e13->count = all - count;
1163 remove_edge (e23);
1165 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1166 e24->probability = REG_BR_PROB_BASE;
1167 e24->count = count;
1169 e34->probability = REG_BR_PROB_BASE;
1170 e34->count = all - count;
1172 return tmp2;
1175 /* Do transform 1) on INSN if applicable. */
1176 static bool
1177 tree_divmod_fixed_value_transform (tree stmt)
1179 stmt_ann_t ann = get_stmt_ann (stmt);
1180 histogram_value histogram;
1181 enum tree_code code;
1182 gcov_type val, count, all;
1183 tree modify, op, op1, op2, result, value, tree_val;
1184 int prob;
1186 modify = stmt;
1187 if (TREE_CODE (stmt) == RETURN_EXPR
1188 && TREE_OPERAND (stmt, 0)
1189 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1190 modify = TREE_OPERAND (stmt, 0);
1191 if (TREE_CODE (modify) != MODIFY_EXPR)
1192 return false;
1193 op = TREE_OPERAND (modify, 1);
1194 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1195 return false;
1196 code = TREE_CODE (op);
1198 if (code != TRUNC_DIV_EXPR && code != TRUNC_MOD_EXPR)
1199 return false;
1201 op1 = TREE_OPERAND (op, 0);
1202 op2 = TREE_OPERAND (op, 1);
1203 if (!ann->histograms)
1204 return false;
1206 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1207 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
1208 break;
1210 if (!histogram)
1211 return false;
1213 value = histogram->hvalue.tree.value;
1214 val = histogram->hvalue.tree.counters[0];
1215 count = histogram->hvalue.tree.counters[1];
1216 all = histogram->hvalue.tree.counters[2];
1218 /* We require that count is at least half of all; this means
1219 that for the transformation to fire the value must be constant
1220 at least 50% of time (and 75% gives the guarantee of usage). */
1221 if (simple_cst_equal (op2, value) != 1 || 2 * count < all)
1222 return false;
1224 if (dump_file)
1226 fprintf (dump_file, "Div/mod by constant transformation on insn ");
1227 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1230 /* Compute probability of taking the optimal path. */
1231 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1233 tree_val = build_int_cst_wide (get_gcov_type (),
1234 (unsigned HOST_WIDE_INT) val,
1235 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
1236 result = tree_divmod_fixed_value (stmt, op, op1, op2, tree_val, prob, count, all);
1238 TREE_OPERAND (modify, 1) = result;
1240 return true;
1243 /* Generate code for transformation 2 (with OPERATION, operands OP1
1244 and OP2, parent modify-expr STMT and probability of taking the optimal
1245 path PROB, which is equivalent to COUNT/ALL within roundoff error).
1246 This generates the result into a temp and returns
1247 the temp; it does not replace or alter the original STMT. */
1248 static tree
1249 tree_mod_pow2 (tree stmt, tree operation, tree op1, tree op2, int prob,
1250 gcov_type count, gcov_type all)
1252 tree stmt1, stmt2, stmt3, stmt4;
1253 tree tmp1, tmp2, tmp3;
1254 tree label_decl1 = create_artificial_label ();
1255 tree label_decl2 = create_artificial_label ();
1256 tree label_decl3 = create_artificial_label ();
1257 tree label1, label2, label3;
1258 tree bb1end, bb2end, bb3end;
1259 basic_block bb, bb2, bb3, bb4;
1260 tree optype = TREE_TYPE (operation);
1261 edge e12, e13, e23, e24, e34;
1262 block_stmt_iterator bsi;
1263 tree result = create_tmp_var (optype, "PROF");
1265 bb = bb_for_stmt (stmt);
1266 bsi = bsi_for_stmt (stmt);
1268 tmp1 = create_tmp_var (optype, "PROF");
1269 tmp2 = create_tmp_var (optype, "PROF");
1270 tmp3 = create_tmp_var (optype, "PROF");
1271 stmt1 = build2 (MODIFY_EXPR, optype, tmp1, fold_convert (optype, op2));
1272 stmt2 = build2 (MODIFY_EXPR, optype, tmp2,
1273 build2 (PLUS_EXPR, optype, op2, integer_minus_one_node));
1274 stmt3 = build2 (MODIFY_EXPR, optype, tmp3,
1275 build2 (BIT_AND_EXPR, optype, tmp2, tmp1));
1276 stmt4 = build3 (COND_EXPR, void_type_node,
1277 build2 (NE_EXPR, boolean_type_node, tmp3, integer_zero_node),
1278 build1 (GOTO_EXPR, void_type_node, label_decl2),
1279 build1 (GOTO_EXPR, void_type_node, label_decl1));
1280 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1281 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1282 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1283 bsi_insert_before (&bsi, stmt4, BSI_SAME_STMT);
1284 bb1end = stmt4;
1286 /* tmp2 == op2-1 inherited from previous block */
1287 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1288 stmt1 = build2 (MODIFY_EXPR, optype, result,
1289 build2 (BIT_AND_EXPR, optype, op1, tmp2));
1290 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1291 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1292 bb2end = stmt1;
1294 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1295 stmt1 = build2 (MODIFY_EXPR, optype, result,
1296 build2 (TREE_CODE (operation), optype, op1, op2));
1297 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1298 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1299 bb3end = stmt1;
1301 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1302 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1304 /* Fix CFG. */
1305 /* Edge e23 connects bb2 to bb3, etc. */
1306 e12 = split_block (bb, bb1end);
1307 bb2 = e12->dest;
1308 bb2->count = count;
1309 e23 = split_block (bb2, bb2end);
1310 bb3 = e23->dest;
1311 bb3->count = all - count;
1312 e34 = split_block (bb3, bb3end);
1313 bb4 = e34->dest;
1314 bb4->count = all;
1316 e12->flags &= ~EDGE_FALLTHRU;
1317 e12->flags |= EDGE_FALSE_VALUE;
1318 e12->probability = prob;
1319 e12->count = count;
1321 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1322 e13->probability = REG_BR_PROB_BASE - prob;
1323 e13->count = all - count;
1325 remove_edge (e23);
1327 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1328 e24->probability = REG_BR_PROB_BASE;
1329 e24->count = count;
1331 e34->probability = REG_BR_PROB_BASE;
1332 e34->count = all - count;
1334 return result;
1337 /* Do transform 2) on INSN if applicable. */
1338 static bool
1339 tree_mod_pow2_value_transform (tree stmt)
1341 stmt_ann_t ann = get_stmt_ann (stmt);
1342 histogram_value histogram;
1343 enum tree_code code;
1344 gcov_type count, wrong_values, all;
1345 tree modify, op, op1, op2, result, value;
1346 int prob;
1348 modify = stmt;
1349 if (TREE_CODE (stmt) == RETURN_EXPR
1350 && TREE_OPERAND (stmt, 0)
1351 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1352 modify = TREE_OPERAND (stmt, 0);
1353 if (TREE_CODE (modify) != MODIFY_EXPR)
1354 return false;
1355 op = TREE_OPERAND (modify, 1);
1356 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1357 return false;
1358 code = TREE_CODE (op);
1360 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1361 return false;
1363 op1 = TREE_OPERAND (op, 0);
1364 op2 = TREE_OPERAND (op, 1);
1365 if (!ann->histograms)
1366 return false;
1368 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1369 if (histogram->type == HIST_TYPE_POW2)
1370 break;
1372 if (!histogram)
1373 return false;
1375 value = histogram->hvalue.tree.value;
1376 wrong_values = histogram->hvalue.tree.counters[0];
1377 count = histogram->hvalue.tree.counters[1];
1379 /* We require that we hit a power of 2 at least half of all evaluations. */
1380 if (simple_cst_equal (op2, value) != 1 || count < wrong_values)
1381 return false;
1383 if (dump_file)
1385 fprintf (dump_file, "Mod power of 2 transformation on insn ");
1386 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1389 /* Compute probability of taking the optimal path. */
1390 all = count + wrong_values;
1391 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1393 result = tree_mod_pow2 (stmt, op, op1, op2, prob, count, all);
1395 TREE_OPERAND (modify, 1) = result;
1397 return true;
1400 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
1401 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
1402 support. Currently only NCOUNTS==0 or 1 is supported and this is
1403 built into this interface. The probabilities of taking the optimal
1404 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
1405 COUNT2/ALL respectively within roundoff error). This generates the
1406 result into a temp and returns the temp; it does not replace or alter
1407 the original STMT. */
1408 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
1410 static tree
1411 tree_mod_subtract (tree stmt, tree operation, tree op1, tree op2,
1412 int prob1, int prob2, int ncounts,
1413 gcov_type count1, gcov_type count2, gcov_type all)
1415 tree stmt1, stmt2, stmt3;
1416 tree tmp1;
1417 tree label_decl1 = create_artificial_label ();
1418 tree label_decl2 = create_artificial_label ();
1419 tree label_decl3 = create_artificial_label ();
1420 tree label1, label2, label3;
1421 tree bb1end, bb2end = NULL_TREE, bb3end;
1422 basic_block bb, bb2, bb3, bb4;
1423 tree optype = TREE_TYPE (operation);
1424 edge e12, e23 = 0, e24, e34, e14;
1425 block_stmt_iterator bsi;
1426 tree result = create_tmp_var (optype, "PROF");
1428 bb = bb_for_stmt (stmt);
1429 bsi = bsi_for_stmt (stmt);
1431 tmp1 = create_tmp_var (optype, "PROF");
1432 stmt1 = build2 (MODIFY_EXPR, optype, result, op1);
1433 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1434 stmt3 = build3 (COND_EXPR, void_type_node,
1435 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1436 build1 (GOTO_EXPR, void_type_node, label_decl3),
1437 build1 (GOTO_EXPR, void_type_node,
1438 ncounts ? label_decl1 : label_decl2));
1439 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1440 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1441 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1442 bb1end = stmt3;
1444 if (ncounts) /* Assumed to be 0 or 1 */
1446 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1447 stmt1 = build2 (MODIFY_EXPR, optype, result,
1448 build2 (MINUS_EXPR, optype, result, tmp1));
1449 stmt2 = build3 (COND_EXPR, void_type_node,
1450 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1451 build1 (GOTO_EXPR, void_type_node, label_decl3),
1452 build1 (GOTO_EXPR, void_type_node, label_decl2));
1453 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1454 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1455 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1456 bb2end = stmt2;
1459 /* Fallback case. */
1460 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1461 stmt1 = build2 (MODIFY_EXPR, optype, result,
1462 build2 (TREE_CODE (operation), optype, result, tmp1));
1463 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1464 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1465 bb3end = stmt1;
1467 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1468 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1470 /* Fix CFG. */
1471 /* Edge e23 connects bb2 to bb3, etc. */
1472 /* However block 3 is optional; if it is not there, references
1473 to 3 really refer to block 2. */
1474 e12 = split_block (bb, bb1end);
1475 bb2 = e12->dest;
1476 bb2->count = all - count1;
1478 if (ncounts) /* Assumed to be 0 or 1. */
1480 e23 = split_block (bb2, bb2end);
1481 bb3 = e23->dest;
1482 bb3->count = all - count1 - count2;
1485 e34 = split_block (ncounts ? bb3 : bb2, bb3end);
1486 bb4 = e34->dest;
1487 bb4->count = all;
1489 e12->flags &= ~EDGE_FALLTHRU;
1490 e12->flags |= EDGE_FALSE_VALUE;
1491 e12->probability = REG_BR_PROB_BASE - prob1;
1492 e12->count = count1;
1494 e14 = make_edge (bb, bb4, EDGE_TRUE_VALUE);
1495 e14->probability = prob1;
1496 e14->count = all - count1;
1498 if (ncounts) /* Assumed to be 0 or 1. */
1500 e23->flags &= ~EDGE_FALLTHRU;
1501 e23->flags |= EDGE_FALSE_VALUE;
1502 e23->count = all - count1 - count2;
1503 e23->probability = REG_BR_PROB_BASE - prob2;
1505 e24 = make_edge (bb2, bb4, EDGE_TRUE_VALUE);
1506 e24->probability = prob2;
1507 e24->count = count2;
1510 e34->probability = REG_BR_PROB_BASE;
1511 e34->count = all - count1 - count2;
1513 return result;
1516 /* Do transforms 3) and 4) on INSN if applicable. */
1517 static bool
1518 tree_mod_subtract_transform (tree stmt)
1520 stmt_ann_t ann = get_stmt_ann (stmt);
1521 histogram_value histogram;
1522 enum tree_code code;
1523 gcov_type count, wrong_values, all;
1524 tree modify, op, op1, op2, result, value;
1525 int prob1, prob2;
1526 unsigned int i;
1528 modify = stmt;
1529 if (TREE_CODE (stmt) == RETURN_EXPR
1530 && TREE_OPERAND (stmt, 0)
1531 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1532 modify = TREE_OPERAND (stmt, 0);
1533 if (TREE_CODE (modify) != MODIFY_EXPR)
1534 return false;
1535 op = TREE_OPERAND (modify, 1);
1536 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1537 return false;
1538 code = TREE_CODE (op);
1540 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1541 return false;
1543 op1 = TREE_OPERAND (op, 0);
1544 op2 = TREE_OPERAND (op, 1);
1545 if (!ann->histograms)
1546 return false;
1548 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1549 if (histogram->type == HIST_TYPE_INTERVAL)
1550 break;
1552 if (!histogram)
1553 return false;
1555 value = histogram->hvalue.tree.value;
1556 all = 0;
1557 wrong_values = 0;
1558 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1559 all += histogram->hvalue.tree.counters[i];
1561 wrong_values += histogram->hvalue.tree.counters[i];
1562 wrong_values += histogram->hvalue.tree.counters[i+1];
1563 all += wrong_values;
1565 /* We require that we use just subtractions in at least 50% of all
1566 evaluations. */
1567 count = 0;
1568 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1570 count += histogram->hvalue.tree.counters[i];
1571 if (count * 2 >= all)
1572 break;
1574 if (i == histogram->hdata.intvl.steps)
1575 return false;
1577 if (dump_file)
1579 fprintf (dump_file, "Mod subtract transformation on insn ");
1580 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1583 /* Compute probability of taking the optimal path(s). */
1584 prob1 = (histogram->hvalue.tree.counters[0] * REG_BR_PROB_BASE + all / 2) / all;
1585 prob2 = (histogram->hvalue.tree.counters[1] * REG_BR_PROB_BASE + all / 2) / all;
1587 /* In practice, "steps" is always 2. This interface reflects this,
1588 and will need to be changed if "steps" can change. */
1589 result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i,
1590 histogram->hvalue.tree.counters[0],
1591 histogram->hvalue.tree.counters[1], all);
1593 TREE_OPERAND (modify, 1) = result;
1595 return true;
1598 /* Connection to the outside world. */
1599 /* Struct for IR-dependent hooks. */
1600 struct value_prof_hooks {
1601 /* Find list of values for which we want to measure histograms. */
1602 void (*find_values_to_profile) (histogram_values *);
1604 /* Identify and exploit properties of values that are hard to analyze
1605 statically. See value-prof.c for more detail. */
1606 bool (*value_profile_transformations) (void);
1609 /* Hooks for RTL-based versions (the only ones that currently work). */
1610 static struct value_prof_hooks rtl_value_prof_hooks =
1612 rtl_find_values_to_profile,
1613 rtl_value_profile_transformations
1616 void
1617 rtl_register_value_prof_hooks (void)
1619 value_prof_hooks = &rtl_value_prof_hooks;
1620 gcc_assert (!ir_type ());
1623 /* Find values inside STMT for that we want to measure histograms for
1624 division/modulo optimization. */
1625 static void
1626 tree_divmod_values_to_profile (tree stmt, histogram_values *values)
1628 tree assign, lhs, rhs, divisor, op0, type;
1629 histogram_value hist;
1631 if (TREE_CODE (stmt) == RETURN_EXPR)
1632 assign = TREE_OPERAND (stmt, 0);
1633 else
1634 assign = stmt;
1636 if (!assign
1637 || TREE_CODE (assign) != MODIFY_EXPR)
1638 return;
1639 lhs = TREE_OPERAND (assign, 0);
1640 type = TREE_TYPE (lhs);
1641 if (!INTEGRAL_TYPE_P (type))
1642 return;
1644 rhs = TREE_OPERAND (assign, 1);
1645 switch (TREE_CODE (rhs))
1647 case TRUNC_DIV_EXPR:
1648 case TRUNC_MOD_EXPR:
1649 divisor = TREE_OPERAND (rhs, 1);
1650 op0 = TREE_OPERAND (rhs, 0);
1652 VEC_reserve (histogram_value, heap, *values, 3);
1654 if (is_gimple_reg (divisor))
1656 /* Check for a special case where the divisor is power(s) of 2.
1657 This is more aggressive than the RTL version, under the
1658 assumption that later phases will reduce / or % by power of 2
1659 to something clever most of the time. Signed or unsigned. */
1660 hist = ggc_alloc (sizeof (*hist));
1661 hist->hvalue.tree.value = divisor;
1662 hist->hvalue.tree.stmt = stmt;
1663 hist->type = HIST_TYPE_POW2;
1664 VEC_quick_push (histogram_value, *values, hist);
1666 /* Check for the case where the divisor is the same value most
1667 of the time. */
1668 hist = ggc_alloc (sizeof (*hist));
1669 hist->hvalue.tree.value = divisor;
1670 hist->hvalue.tree.stmt = stmt;
1671 hist->type = HIST_TYPE_SINGLE_VALUE;
1672 VEC_quick_push (histogram_value, *values, hist);
1675 /* For mod, check whether it is not often a noop (or replaceable by
1676 a few subtractions). */
1677 if (TREE_CODE (rhs) == TRUNC_MOD_EXPR
1678 && TYPE_UNSIGNED (type))
1680 hist = ggc_alloc (sizeof (*hist));
1681 hist->hvalue.tree.stmt = stmt;
1682 hist->hvalue.tree.value
1683 = build2 (TRUNC_DIV_EXPR, type, op0, divisor);
1684 hist->type = HIST_TYPE_INTERVAL;
1685 hist->hdata.intvl.int_start = 0;
1686 hist->hdata.intvl.steps = 2;
1687 VEC_quick_push (histogram_value, *values, hist);
1689 return;
1691 default:
1692 return;
1696 /* Find values inside STMT for that we want to measure histograms and adds
1697 them to list VALUES. */
1699 static void
1700 tree_values_to_profile (tree stmt, histogram_values *values)
1702 if (flag_value_profile_transformations)
1703 tree_divmod_values_to_profile (stmt, values);
1706 static void
1707 tree_find_values_to_profile (histogram_values *values)
1709 basic_block bb;
1710 block_stmt_iterator bsi;
1711 unsigned i;
1712 histogram_value hist;
1714 *values = NULL;
1715 FOR_EACH_BB (bb)
1716 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1717 tree_values_to_profile (bsi_stmt (bsi), values);
1718 static_values = *values;
1720 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
1722 switch (hist->type)
1724 case HIST_TYPE_INTERVAL:
1725 if (dump_file)
1727 fprintf (dump_file, "Interval counter for tree ");
1728 print_generic_expr (dump_file, hist->hvalue.tree.stmt,
1729 TDF_SLIM);
1730 fprintf (dump_file, ", range %d -- %d.\n",
1731 hist->hdata.intvl.int_start,
1732 (hist->hdata.intvl.int_start
1733 + hist->hdata.intvl.steps - 1));
1735 hist->n_counters = hist->hdata.intvl.steps + 2;
1736 break;
1738 case HIST_TYPE_POW2:
1739 if (dump_file)
1741 fprintf (dump_file, "Pow2 counter for tree ");
1742 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1743 fprintf (dump_file, ".\n");
1745 hist->n_counters = 2;
1746 break;
1748 case HIST_TYPE_SINGLE_VALUE:
1749 if (dump_file)
1751 fprintf (dump_file, "Single value counter for tree ");
1752 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1753 fprintf (dump_file, ".\n");
1755 hist->n_counters = 3;
1756 break;
1758 case HIST_TYPE_CONST_DELTA:
1759 if (dump_file)
1761 fprintf (dump_file, "Constant delta counter for tree ");
1762 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1763 fprintf (dump_file, ".\n");
1765 hist->n_counters = 4;
1766 break;
1768 default:
1769 gcc_unreachable ();
1774 static struct value_prof_hooks tree_value_prof_hooks = {
1775 tree_find_values_to_profile,
1776 tree_value_profile_transformations
1779 void
1780 tree_register_value_prof_hooks (void)
1782 value_prof_hooks = &tree_value_prof_hooks;
1783 gcc_assert (ir_type ());
1786 /* IR-independent entry points. */
1787 void
1788 find_values_to_profile (histogram_values *values)
1790 (value_prof_hooks->find_values_to_profile) (values);
1793 bool
1794 value_profile_transformations (void)
1796 bool retval = (value_prof_hooks->value_profile_transformations) ();
1797 VEC_free (histogram_value, heap, static_values);
1798 return retval;