2005-06-30 J. D. Johnston <jjohnst@us.ibm.com>
[official-gcc.git] / gcc / value-prof.c
blob5c06c1596834483aa9ba90100f3c1a3266e172f6
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
40 #include "coverage.h"
41 #include "tree.h"
42 #include "gcov-io.h"
44 static struct value_prof_hooks *value_prof_hooks;
46 /* This is the vector of histograms. Created in find_values_to_profile.
47 During profile generation, freed by instrument_values.
48 During profile use, freed by value_profile_transformations. */
50 static histogram_values static_values = NULL;
52 /* In this file value profile based optimizations are placed. Currently the
53 following optimizations are implemented (for more detailed descriptions
54 see comments at value_profile_transformations):
56 1) Division/modulo specialization. Provided that we can determine that the
57 operands of the division have some special properties, we may use it to
58 produce more effective code.
59 2) Speculative prefetching. If we are able to determine that the difference
60 between addresses accessed by a memory reference is usually constant, we
61 may add the prefetch instructions.
63 Every such optimization should add its requirements for profiled values to
64 insn_values_to_profile function. This function is called from branch_prob
65 in profile.c and the requested values are instrumented by it in the first
66 compilation with -fprofile-arcs. The optimization may then read the
67 gathered data in the second compilation with -fbranch-probabilities.
69 There are currently two versions, RTL-based and tree-based. Over time
70 the RTL-based version may go away.
72 In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE
73 note to the instrumented insn. The argument to the note consists of an
74 EXPR_LIST where its members have the following meaning (from the first to
75 the last):
77 -- type of information gathered (HIST_TYPE*)
78 -- the expression that is profiled
79 -- list of counters starting from the first one.
81 In the tree-based version, the measured data is pointed to from the histograms
82 field of the statement annotation of the instrumented insns. It is
83 kept as a linked list of struct histogram_value_t's, which contain the
84 same information as above. */
86 /* For speculative prefetching, the range in that we do not prefetch (because
87 we assume that it will be in cache anyway). The asymmetry between min and
88 max range is trying to reflect the fact that the sequential prefetching
89 of the data is commonly done directly by hardware. Nevertheless, these
90 values are just a guess and should of course be target-specific.
92 FIXME: There is no tree form of speculative prefetching as yet.
94 FIXME: A better approach to instrumentation in the profile-generation
95 pass is to generate calls to magic library functions (to be added to
96 libgcc) rather than inline code. This approach will probably be
97 necessary to get tree-based speculative prefetching working in a useful
98 fashion, as inline code bloats things so much the rest of the compiler has
99 serious problems dealing with it (judging from the rtl behavior). */
101 #ifndef NOPREFETCH_RANGE_MIN
102 #define NOPREFETCH_RANGE_MIN (-16)
103 #endif
104 #ifndef NOPREFETCH_RANGE_MAX
105 #define NOPREFETCH_RANGE_MAX 32
106 #endif
108 static void rtl_divmod_values_to_profile (rtx, histogram_values *);
109 #ifdef HAVE_prefetch
110 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
111 static int find_mem_reference_1 (rtx *, void *);
112 static void find_mem_reference_2 (rtx, rtx, void *);
113 static bool find_mem_reference (rtx, rtx *, int *);
114 #endif
116 static void rtl_values_to_profile (rtx, histogram_values *);
117 static rtx rtl_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
118 rtx, gcov_type, int);
119 static rtx rtl_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
120 static rtx rtl_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
121 int, int, int);
122 #ifdef HAVE_prefetch
123 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
124 #endif
125 static bool rtl_divmod_fixed_value_transform (rtx);
126 static bool rtl_mod_pow2_value_transform (rtx);
127 static bool rtl_mod_subtract_transform (rtx);
128 #ifdef HAVE_prefetch
129 static bool speculative_prefetching_transform (rtx);
130 #endif
131 static tree tree_divmod_fixed_value (tree, tree, tree, tree,
132 tree, int, gcov_type, gcov_type);
133 static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type);
134 static tree tree_mod_subtract (tree, tree, tree, tree, int, int, int,
135 gcov_type, gcov_type, gcov_type);
136 static bool tree_divmod_fixed_value_transform (tree);
137 static bool tree_mod_pow2_value_transform (tree);
138 static bool tree_mod_subtract_transform (tree);
141 /* Find values inside INSN for that we want to measure histograms for
142 division/modulo optimization and stores them to VALUES. */
143 static void
144 rtl_divmod_values_to_profile (rtx insn, histogram_values *values)
146 rtx set, set_src, op1, op2;
147 enum machine_mode mode;
148 histogram_value hist;
150 if (!INSN_P (insn))
151 return;
153 set = single_set (insn);
154 if (!set)
155 return;
157 mode = GET_MODE (SET_DEST (set));
158 if (!INTEGRAL_MODE_P (mode))
159 return;
161 set_src = SET_SRC (set);
162 switch (GET_CODE (set_src))
164 case DIV:
165 case MOD:
166 case UDIV:
167 case UMOD:
168 op1 = XEXP (set_src, 0);
169 op2 = XEXP (set_src, 1);
170 if (side_effects_p (op2))
171 return;
173 /* Check for a special case where the divisor is power of 2. */
174 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
176 hist = ggc_alloc (sizeof (*hist));
177 hist->hvalue.rtl.value = op2;
178 hist->hvalue.rtl.seq = NULL_RTX;
179 hist->hvalue.rtl.mode = mode;
180 hist->hvalue.rtl.insn = insn;
181 hist->type = HIST_TYPE_POW2;
182 VEC_safe_push (histogram_value, heap, *values, hist);
185 /* Check whether the divisor is not in fact a constant. */
186 if (!CONSTANT_P (op2))
188 hist = ggc_alloc (sizeof (*hist));
189 hist->hvalue.rtl.value = op2;
190 hist->hvalue.rtl.mode = mode;
191 hist->hvalue.rtl.seq = NULL_RTX;
192 hist->hvalue.rtl.insn = insn;
193 hist->type = HIST_TYPE_SINGLE_VALUE;
194 VEC_safe_push (histogram_value, heap, *values, hist);
197 /* For mod, check whether it is not often a noop (or replaceable by
198 a few subtractions). */
199 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
201 rtx tmp;
203 hist = ggc_alloc (sizeof (*hist));
204 start_sequence ();
205 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
206 hist->hvalue.rtl.value = force_operand (tmp, NULL_RTX);
207 hist->hvalue.rtl.seq = get_insns ();
208 end_sequence ();
209 hist->hvalue.rtl.mode = mode;
210 hist->hvalue.rtl.insn = insn;
211 hist->type = HIST_TYPE_INTERVAL;
212 hist->hdata.intvl.int_start = 0;
213 hist->hdata.intvl.steps = 2;
214 VEC_safe_push (histogram_value, heap, *values, hist);
216 return;
218 default:
219 return;
223 #ifdef HAVE_prefetch
225 /* Called from find_mem_reference through for_each_rtx, finds a memory
226 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
227 to *RET and the traversing of the expression is interrupted by returning 1.
228 Otherwise 0 is returned. */
230 static int
231 find_mem_reference_1 (rtx *expr, void *ret)
233 rtx *mem = ret;
235 if (MEM_P (*expr))
237 *mem = *expr;
238 return 1;
240 return 0;
243 /* Called form find_mem_reference through note_stores to find out whether
244 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
245 FMR2_WRITE is set to true. */
247 static int fmr2_write;
248 static void
249 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
251 if (expr == mem)
252 fmr2_write = true;
255 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
256 if it is a write of the mem. Return false if no memory reference is found,
257 true otherwise. */
259 static bool
260 find_mem_reference (rtx insn, rtx *mem, int *write)
262 *mem = NULL_RTX;
263 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
265 if (!*mem)
266 return false;
268 fmr2_write = false;
269 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
270 *write = fmr2_write;
271 return true;
274 /* Find values inside INSN for that we want to measure histograms for
275 a speculative prefetching. Add them to the list VALUES.
276 Returns true if such we found any such value, false otherwise. */
278 static bool
279 insn_prefetch_values_to_profile (rtx insn, histogram_values* values)
281 rtx mem, address;
282 int write;
283 histogram_value hist;
285 /* It only makes sense to look for memory references in ordinary insns. */
286 if (!NONJUMP_INSN_P (insn))
287 return false;
289 if (!find_mem_reference (insn, &mem, &write))
290 return false;
292 address = XEXP (mem, 0);
293 if (side_effects_p (address))
294 return false;
296 if (CONSTANT_P (address))
297 return false;
299 hist = ggc_alloc (sizeof (*hist));
300 hist->hvalue.rtl.value = address;
301 hist->hvalue.rtl.mode = GET_MODE (address);
302 hist->hvalue.rtl.seq = NULL_RTX;
303 hist->hvalue.rtl.insn = insn;
304 hist->type = HIST_TYPE_CONST_DELTA;
305 VEC_safe_push (histogram_value, heap, *values, hist);
307 return true;
309 #endif
310 /* Find values inside INSN for that we want to measure histograms and adds
311 them to list VALUES (increasing the record of its length in N_VALUES). */
312 static void
313 rtl_values_to_profile (rtx insn, histogram_values *values)
315 if (flag_value_profile_transformations)
316 rtl_divmod_values_to_profile (insn, values);
318 #ifdef HAVE_prefetch
319 if (flag_speculative_prefetching)
320 insn_prefetch_values_to_profile (insn, values);
321 #endif
324 /* Find list of values for that we want to measure histograms. */
325 static void
326 rtl_find_values_to_profile (histogram_values *values)
328 rtx insn;
329 unsigned i, libcall_level;
330 histogram_value hist;
332 life_analysis (NULL, PROP_DEATH_NOTES);
334 *values = NULL;
335 libcall_level = 0;
336 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
337 rtl_values_to_profile (insn, values);
338 static_values = *values;
340 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
342 switch (hist->type)
344 case HIST_TYPE_INTERVAL:
345 if (dump_file)
346 fprintf (dump_file,
347 "Interval counter for insn %d, range %d -- %d.\n",
348 INSN_UID ((rtx)hist->hvalue.rtl.insn),
349 hist->hdata.intvl.int_start,
350 (hist->hdata.intvl.int_start
351 + hist->hdata.intvl.steps - 1));
352 hist->n_counters = hist->hdata.intvl.steps + 2;
353 break;
355 case HIST_TYPE_POW2:
356 if (dump_file)
357 fprintf (dump_file,
358 "Pow2 counter for insn %d.\n",
359 INSN_UID ((rtx)hist->hvalue.rtl.insn));
360 hist->n_counters = 2;
361 break;
363 case HIST_TYPE_SINGLE_VALUE:
364 if (dump_file)
365 fprintf (dump_file,
366 "Single value counter for insn %d.\n",
367 INSN_UID ((rtx)hist->hvalue.rtl.insn));
368 hist->n_counters = 3;
369 break;
371 case HIST_TYPE_CONST_DELTA:
372 if (dump_file)
373 fprintf (dump_file,
374 "Constant delta counter for insn %d.\n",
375 INSN_UID ((rtx)hist->hvalue.rtl.insn));
376 hist->n_counters = 4;
377 break;
379 default:
380 gcc_unreachable ();
383 allocate_reg_info (max_reg_num (), FALSE, FALSE);
386 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
387 them to identify and exploit properties of values that are hard to analyze
388 statically.
390 We do following transformations:
394 x = a / b;
396 where b is almost always a constant N is transformed to
398 if (b == N)
399 x = a / N;
400 else
401 x = a / b;
403 Analogically with %
407 x = a % b
409 where b is almost always a power of 2 and the division is unsigned
410 TODO -- handle signed case as well
412 if ((b & (b - 1)) == 0)
413 x = a & (b - 1);
414 else
415 x = x % b;
417 Note that when b = 0, no error will occur and x = a; this is correct,
418 as result of such operation is undefined.
422 x = a % b
424 where a is almost always less then b and the division is unsigned
425 TODO -- handle signed case as well
427 x = a;
428 if (x >= b)
429 x %= b;
433 x = a % b
435 where a is almost always less then 2 * b and the division is unsigned
436 TODO -- handle signed case as well
438 x = a;
439 if (x >= b)
440 x -= b;
441 if (x >= b)
442 x %= b;
444 It would be possible to continue analogically for K * b for other small
445 K's, but it is probably not useful.
449 Read or write of mem[address], where the value of address changes usually
450 by a constant C != 0 between the following accesses to the computation; with
451 -fspeculative-prefetching we then add a prefetch of address + C before
452 the insn. This handles prefetching of several interesting cases in addition
453 to a simple prefetching for addresses that are induction variables, e. g.
454 linked lists allocated sequentially (even in case they are processed
455 recursively).
457 TODO -- we should also check whether there is not (usually) a small
458 difference with the adjacent memory references, so that we do
459 not issue overlapping prefetches. Also we should employ some
460 heuristics to eliminate cases where prefetching evidently spoils
461 the code.
462 -- it should somehow cooperate with the loop optimizer prefetching
464 TODO:
466 There are other useful cases that could be handled by a similar mechanism,
467 for example:
469 for (i = 0; i < n; i++)
472 transform to (for constant N):
474 if (n == N)
475 for (i = 0; i < N; i++)
477 else
478 for (i = 0; i < n; i++)
480 making unroller happy. Since this may grow the code significantly,
481 we would have to be very careful here. */
483 static bool
484 rtl_value_profile_transformations (void)
486 rtx insn, next;
487 int changed = false;
489 for (insn = get_insns (); insn; insn = next)
491 next = NEXT_INSN (insn);
493 if (!INSN_P (insn))
494 continue;
496 /* Scan for insn carrying a histogram. */
497 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
498 continue;
500 /* Ignore cold areas -- we are growing a code. */
501 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
502 continue;
504 if (dump_file)
506 fprintf (dump_file, "Trying transformations on insn %d\n",
507 INSN_UID (insn));
508 print_rtl_single (dump_file, insn);
511 /* Transformations: */
512 if (flag_value_profile_transformations
513 && (rtl_mod_subtract_transform (insn)
514 || rtl_divmod_fixed_value_transform (insn)
515 || rtl_mod_pow2_value_transform (insn)))
516 changed = true;
517 #ifdef HAVE_prefetch
518 if (flag_speculative_prefetching
519 && speculative_prefetching_transform (insn))
520 changed = true;
521 #endif
524 if (changed)
526 commit_edge_insertions ();
527 allocate_reg_info (max_reg_num (), FALSE, FALSE);
530 return changed;
533 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
534 and OP2, whose value is expected to be VALUE, result TARGET and
535 probability of taking the optimal path PROB). */
536 static rtx
537 rtl_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
538 rtx target, rtx op1, rtx op2, gcov_type value,
539 int prob)
541 rtx tmp, tmp1, jump;
542 rtx neq_label = gen_label_rtx ();
543 rtx end_label = gen_label_rtx ();
544 rtx sequence;
546 start_sequence ();
548 if (!REG_P (op2))
550 tmp = gen_reg_rtx (mode);
551 emit_move_insn (tmp, copy_rtx (op2));
553 else
554 tmp = op2;
556 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
557 NULL_RTX, neq_label);
559 /* Add branch probability to jump we just created. */
560 jump = get_last_insn ();
561 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
562 GEN_INT (REG_BR_PROB_BASE - prob),
563 REG_NOTES (jump));
565 tmp1 = simplify_gen_binary (operation, mode,
566 copy_rtx (op1), GEN_INT (value));
567 tmp1 = force_operand (tmp1, target);
568 if (tmp1 != target)
569 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
571 emit_jump_insn (gen_jump (end_label));
572 emit_barrier ();
574 emit_label (neq_label);
575 tmp1 = simplify_gen_binary (operation, mode,
576 copy_rtx (op1), copy_rtx (tmp));
577 tmp1 = force_operand (tmp1, target);
578 if (tmp1 != target)
579 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
581 emit_label (end_label);
583 sequence = get_insns ();
584 end_sequence ();
585 rebuild_jump_labels (sequence);
586 return sequence;
589 /* Do transform 1) on INSN if applicable. */
590 static bool
591 rtl_divmod_fixed_value_transform (rtx insn)
593 rtx set, set_src, set_dest, op1, op2, value, histogram;
594 enum rtx_code code;
595 enum machine_mode mode;
596 gcov_type val, count, all;
597 edge e;
598 int prob;
600 set = single_set (insn);
601 if (!set)
602 return false;
604 set_src = SET_SRC (set);
605 set_dest = SET_DEST (set);
606 code = GET_CODE (set_src);
607 mode = GET_MODE (set_dest);
609 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
610 return false;
611 op1 = XEXP (set_src, false);
612 op2 = XEXP (set_src, 1);
614 for (histogram = REG_NOTES (insn);
615 histogram;
616 histogram = XEXP (histogram, 1))
617 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
618 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
619 break;
621 if (!histogram)
622 return false;
624 histogram = XEXP (XEXP (histogram, 0), 1);
625 value = XEXP (histogram, 0);
626 histogram = XEXP (histogram, 1);
627 val = INTVAL (XEXP (histogram, 0));
628 histogram = XEXP (histogram, 1);
629 count = INTVAL (XEXP (histogram, 0));
630 histogram = XEXP (histogram, 1);
631 all = INTVAL (XEXP (histogram, 0));
633 /* We require that count be at least half of all; this means
634 that for the transformation to fire the value must be constant
635 at least 50% of time (and 75% gives the guarantee of usage). */
636 if (!rtx_equal_p (op2, value) || 2 * count < all)
637 return false;
639 if (dump_file)
640 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
641 INSN_UID (insn));
643 /* Compute probability of taking the optimal path. */
644 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
646 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
647 delete_insn (insn);
649 insert_insn_on_edge (
650 rtl_divmod_fixed_value (mode, code, set_dest,
651 op1, op2, val, prob), e);
653 return true;
656 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
657 and OP2, result TARGET and probability of taking the optimal path PROB). */
658 static rtx
659 rtl_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
660 rtx op1, rtx op2, int prob)
662 rtx tmp, tmp1, tmp2, tmp3, jump;
663 rtx neq_label = gen_label_rtx ();
664 rtx end_label = gen_label_rtx ();
665 rtx sequence;
667 start_sequence ();
669 if (!REG_P (op2))
671 tmp = gen_reg_rtx (mode);
672 emit_move_insn (tmp, copy_rtx (op2));
674 else
675 tmp = op2;
677 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
678 0, OPTAB_WIDEN);
679 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
680 0, OPTAB_WIDEN);
681 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
682 NULL_RTX, neq_label);
684 /* Add branch probability to jump we just created. */
685 jump = get_last_insn ();
686 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
687 GEN_INT (REG_BR_PROB_BASE - prob),
688 REG_NOTES (jump));
690 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
691 0, OPTAB_WIDEN);
692 if (tmp3 != target)
693 emit_move_insn (copy_rtx (target), tmp3);
694 emit_jump_insn (gen_jump (end_label));
695 emit_barrier ();
697 emit_label (neq_label);
698 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
699 tmp1 = force_operand (tmp1, target);
700 if (tmp1 != target)
701 emit_move_insn (target, tmp1);
703 emit_label (end_label);
705 sequence = get_insns ();
706 end_sequence ();
707 rebuild_jump_labels (sequence);
708 return sequence;
711 /* Do transform 2) on INSN if applicable. */
712 static bool
713 rtl_mod_pow2_value_transform (rtx insn)
715 rtx set, set_src, set_dest, op1, op2, value, histogram;
716 enum rtx_code code;
717 enum machine_mode mode;
718 gcov_type wrong_values, count;
719 edge e;
720 int all, prob;
722 set = single_set (insn);
723 if (!set)
724 return false;
726 set_src = SET_SRC (set);
727 set_dest = SET_DEST (set);
728 code = GET_CODE (set_src);
729 mode = GET_MODE (set_dest);
731 if (code != UMOD)
732 return false;
733 op1 = XEXP (set_src, 0);
734 op2 = XEXP (set_src, 1);
736 for (histogram = REG_NOTES (insn);
737 histogram;
738 histogram = XEXP (histogram, 1))
739 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
740 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
741 break;
743 if (!histogram)
744 return false;
746 histogram = XEXP (XEXP (histogram, 0), 1);
747 value = XEXP (histogram, 0);
748 histogram = XEXP (histogram, 1);
749 wrong_values = INTVAL (XEXP (histogram, 0));
750 histogram = XEXP (histogram, 1);
751 count = INTVAL (XEXP (histogram, 0));
753 if (!rtx_equal_p (op2, value))
754 return false;
756 /* We require that we hit a power of two at least half of all evaluations. */
757 if (count < wrong_values)
758 return false;
760 if (dump_file)
761 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
762 INSN_UID (insn));
764 /* Compute probability of taking the optimal path. */
765 all = count + wrong_values;
766 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
768 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
769 delete_insn (insn);
771 insert_insn_on_edge (
772 rtl_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
774 return true;
777 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
778 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
779 probability of taking the optimal path(s) PROB1 and PROB2). */
780 static rtx
781 rtl_mod_subtract (enum machine_mode mode, enum rtx_code operation,
782 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
784 rtx tmp, tmp1, jump;
785 rtx end_label = gen_label_rtx ();
786 rtx sequence;
787 int i;
789 start_sequence ();
791 if (!REG_P (op2))
793 tmp = gen_reg_rtx (mode);
794 emit_move_insn (tmp, copy_rtx (op2));
796 else
797 tmp = op2;
799 emit_move_insn (target, copy_rtx (op1));
800 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
801 NULL_RTX, end_label);
803 /* Add branch probability to jump we just created. */
804 jump = get_last_insn ();
805 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
806 GEN_INT (prob1), REG_NOTES (jump));
808 for (i = 0; i < sub; i++)
810 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
811 0, OPTAB_WIDEN);
812 if (tmp1 != target)
813 emit_move_insn (target, tmp1);
814 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
815 NULL_RTX, end_label);
817 /* Add branch probability to jump we just created. */
818 jump = get_last_insn ();
819 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
820 GEN_INT (prob2), REG_NOTES (jump));
823 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
824 tmp1 = force_operand (tmp1, target);
825 if (tmp1 != target)
826 emit_move_insn (target, tmp1);
828 emit_label (end_label);
830 sequence = get_insns ();
831 end_sequence ();
832 rebuild_jump_labels (sequence);
833 return sequence;
836 /* Do transforms 3) and 4) on INSN if applicable. */
837 static bool
838 rtl_mod_subtract_transform (rtx insn)
840 rtx set, set_src, set_dest, op1, op2, histogram;
841 enum rtx_code code;
842 enum machine_mode mode;
843 gcov_type wrong_values, counts[2], count, all;
844 edge e;
845 int i, prob1, prob2;
847 set = single_set (insn);
848 if (!set)
849 return false;
851 set_src = SET_SRC (set);
852 set_dest = SET_DEST (set);
853 code = GET_CODE (set_src);
854 mode = GET_MODE (set_dest);
856 if (code != UMOD)
857 return false;
858 op1 = XEXP (set_src, 0);
859 op2 = XEXP (set_src, 1);
861 for (histogram = REG_NOTES (insn);
862 histogram;
863 histogram = XEXP (histogram, 1))
864 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
865 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
866 break;
868 if (!histogram)
869 return false;
871 histogram = XEXP (XEXP (histogram, 0), 1);
872 histogram = XEXP (histogram, 1);
874 all = 0;
875 for (i = 0; i < 2; i++)
877 counts[i] = INTVAL (XEXP (histogram, 0));
878 all += counts[i];
879 histogram = XEXP (histogram, 1);
881 wrong_values = INTVAL (XEXP (histogram, 0));
882 histogram = XEXP (histogram, 1);
883 wrong_values += INTVAL (XEXP (histogram, 0));
884 all += wrong_values;
886 /* We require that we use just subtractions in at least 50% of all
887 evaluations. */
888 count = 0;
889 for (i = 0; i < 2; i++)
891 count += counts[i];
892 if (count * 2 >= all)
893 break;
896 if (i == 2)
897 return false;
899 if (dump_file)
900 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
901 INSN_UID (insn));
903 /* Compute probability of taking the optimal path(s). */
904 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
905 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
907 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
908 delete_insn (insn);
910 insert_insn_on_edge (
911 rtl_mod_subtract (mode, code, set_dest,
912 op1, op2, i, prob1, prob2), e);
914 return true;
917 #ifdef HAVE_prefetch
918 /* Generate code for transformation 5 for mem with ADDRESS and a constant
919 step DELTA. WRITE is true if the reference is a store to mem. */
921 static rtx
922 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
924 rtx tmp;
925 rtx sequence;
927 /* TODO: we do the prefetching for just one iteration ahead, which
928 often is not enough. */
929 start_sequence ();
930 if (offsettable_address_p (0, VOIDmode, address))
931 tmp = plus_constant (copy_rtx (address), delta);
932 else
934 tmp = simplify_gen_binary (PLUS, Pmode,
935 copy_rtx (address), GEN_INT (delta));
936 tmp = force_operand (tmp, NULL);
938 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
939 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
940 tmp = force_reg (Pmode, tmp);
941 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
942 sequence = get_insns ();
943 end_sequence ();
945 return sequence;
948 /* Do transform 5) on INSN if applicable. */
950 static bool
951 speculative_prefetching_transform (rtx insn)
953 rtx histogram, value;
954 gcov_type val, count, all;
955 edge e;
956 rtx mem, address;
957 int write;
959 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
960 return false;
962 if (!find_mem_reference (insn, &mem, &write))
963 return false;
965 address = XEXP (mem, 0);
966 if (side_effects_p (address))
967 return false;
969 if (CONSTANT_P (address))
970 return false;
972 for (histogram = REG_NOTES (insn);
973 histogram;
974 histogram = XEXP (histogram, 1))
975 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
976 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
977 break;
979 if (!histogram)
980 return false;
982 histogram = XEXP (XEXP (histogram, 0), 1);
983 value = XEXP (histogram, 0);
984 histogram = XEXP (histogram, 1);
985 /* Skip last value referenced. */
986 histogram = XEXP (histogram, 1);
987 val = INTVAL (XEXP (histogram, 0));
988 histogram = XEXP (histogram, 1);
989 count = INTVAL (XEXP (histogram, 0));
990 histogram = XEXP (histogram, 1);
991 all = INTVAL (XEXP (histogram, 0));
993 /* With that few executions we do not really have a reason to optimize the
994 statement, and more importantly, the data about differences of addresses
995 are spoiled by the first item that had no previous value to compare
996 with. */
997 if (all < 4)
998 return false;
1000 /* We require that count be at least half of all; this means
1001 that for the transformation to fire the value must be constant
1002 at least 50% of time (and 75% gives the guarantee of usage). */
1003 if (!rtx_equal_p (address, value) || 2 * count < all)
1004 return false;
1006 /* If the difference is too small, it does not make too much sense to
1007 prefetch, as the memory is probably already in cache. */
1008 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
1009 return false;
1011 if (dump_file)
1012 fprintf (dump_file, "Speculative prefetching for insn %d\n",
1013 INSN_UID (insn));
1015 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
1017 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
1019 return true;
1021 #endif /* HAVE_prefetch */
1023 /* Tree based transformations. */
1024 static bool
1025 tree_value_profile_transformations (void)
1027 basic_block bb;
1028 block_stmt_iterator bsi;
1029 bool changed = false;
1031 FOR_EACH_BB (bb)
1033 /* Ignore cold areas -- we are enlarging the code. */
1034 if (!maybe_hot_bb_p (bb))
1035 continue;
1037 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1039 tree stmt = bsi_stmt (bsi);
1040 stmt_ann_t ann = get_stmt_ann (stmt);
1041 histogram_value th = ann->histograms;
1042 if (!th)
1043 continue;
1045 if (dump_file)
1047 fprintf (dump_file, "Trying transformations on insn ");
1048 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1051 /* Transformations: */
1052 /* The order of things in this conditional controls which
1053 transformation is used when more than one is applicable. */
1054 /* It is expected that any code added by the transformations
1055 will be added before the current statement, and that the
1056 current statement remain valid (although possibly
1057 modified) upon return. */
1058 if (flag_value_profile_transformations
1059 && (tree_mod_subtract_transform (stmt)
1060 || tree_divmod_fixed_value_transform (stmt)
1061 || tree_mod_pow2_value_transform (stmt)))
1063 changed = true;
1064 /* Original statement may no longer be in the same block. */
1065 bb = bb_for_stmt (stmt);
1068 /* Free extra storage from compute_value_histograms. */
1069 while (th)
1071 free (th->hvalue.tree.counters);
1072 th = th->hvalue.tree.next;
1074 ann->histograms = 0;
1078 if (changed)
1080 counts_to_freqs ();
1083 return changed;
1086 /* Generate code for transformation 1 (with OPERATION, operands OP1
1087 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
1088 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
1089 within roundoff error). This generates the result into a temp and returns
1090 the temp; it does not replace or alter the original STMT. */
1091 static tree
1092 tree_divmod_fixed_value (tree stmt, tree operation,
1093 tree op1, tree op2, tree value, int prob, gcov_type count,
1094 gcov_type all)
1096 tree stmt1, stmt2, stmt3;
1097 tree tmp1, tmp2, tmpv;
1098 tree label_decl1 = create_artificial_label ();
1099 tree label_decl2 = create_artificial_label ();
1100 tree label_decl3 = create_artificial_label ();
1101 tree label1, label2, label3;
1102 tree bb1end, bb2end, bb3end;
1103 basic_block bb, bb2, bb3, bb4;
1104 tree optype = TREE_TYPE (operation);
1105 edge e12, e13, e23, e24, e34;
1106 block_stmt_iterator bsi;
1108 bb = bb_for_stmt (stmt);
1109 bsi = bsi_for_stmt (stmt);
1111 tmpv = create_tmp_var (optype, "PROF");
1112 tmp1 = create_tmp_var (optype, "PROF");
1113 stmt1 = build2 (MODIFY_EXPR, optype, tmpv, fold_convert (optype, value));
1114 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1115 stmt3 = build3 (COND_EXPR, void_type_node,
1116 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
1117 build1 (GOTO_EXPR, void_type_node, label_decl2),
1118 build1 (GOTO_EXPR, void_type_node, label_decl1));
1119 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1120 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1121 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1122 bb1end = stmt3;
1124 tmp2 = create_tmp_var (optype, "PROF");
1125 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1126 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1127 build2 (TREE_CODE (operation), optype, op1, tmpv));
1128 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1129 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1130 bb2end = stmt1;
1132 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1133 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1134 build2 (TREE_CODE (operation), optype, op1, op2));
1135 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1136 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1137 bb3end = stmt1;
1139 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1140 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1142 /* Fix CFG. */
1143 /* Edge e23 connects bb2 to bb3, etc. */
1144 e12 = split_block (bb, bb1end);
1145 bb2 = e12->dest;
1146 bb2->count = count;
1147 e23 = split_block (bb2, bb2end);
1148 bb3 = e23->dest;
1149 bb3->count = all - count;
1150 e34 = split_block (bb3, bb3end);
1151 bb4 = e34->dest;
1152 bb4->count = all;
1154 e12->flags &= ~EDGE_FALLTHRU;
1155 e12->flags |= EDGE_FALSE_VALUE;
1156 e12->probability = prob;
1157 e12->count = count;
1159 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1160 e13->probability = REG_BR_PROB_BASE - prob;
1161 e13->count = all - count;
1163 remove_edge (e23);
1165 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1166 e24->probability = REG_BR_PROB_BASE;
1167 e24->count = count;
1169 e34->probability = REG_BR_PROB_BASE;
1170 e34->count = all - count;
1172 return tmp2;
1175 /* Do transform 1) on INSN if applicable. */
1176 static bool
1177 tree_divmod_fixed_value_transform (tree stmt)
1179 stmt_ann_t ann = get_stmt_ann (stmt);
1180 histogram_value histogram;
1181 enum tree_code code;
1182 gcov_type val, count, all;
1183 tree modify, op, op1, op2, result, value, tree_val;
1184 int prob;
1186 modify = stmt;
1187 if (TREE_CODE (stmt) == RETURN_EXPR
1188 && TREE_OPERAND (stmt, 0)
1189 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1190 modify = TREE_OPERAND (stmt, 0);
1191 if (TREE_CODE (modify) != MODIFY_EXPR)
1192 return false;
1193 op = TREE_OPERAND (modify, 1);
1194 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1195 return false;
1196 code = TREE_CODE (op);
1198 if (code != TRUNC_DIV_EXPR && code != TRUNC_MOD_EXPR)
1199 return false;
1201 op1 = TREE_OPERAND (op, 0);
1202 op2 = TREE_OPERAND (op, 1);
1203 if (!ann->histograms)
1204 return false;
1206 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1207 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
1208 break;
1210 if (!histogram)
1211 return false;
1213 value = histogram->hvalue.tree.value;
1214 val = histogram->hvalue.tree.counters[0];
1215 count = histogram->hvalue.tree.counters[1];
1216 all = histogram->hvalue.tree.counters[2];
1218 /* We require that count is at least half of all; this means
1219 that for the transformation to fire the value must be constant
1220 at least 50% of time (and 75% gives the guarantee of usage). */
1221 if (simple_cst_equal (op2, value) != 1 || 2 * count < all)
1222 return false;
1224 /* Compute probability of taking the optimal path. */
1225 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1227 tree_val = build_int_cst_wide (get_gcov_type (),
1228 (unsigned HOST_WIDE_INT) val,
1229 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
1230 result = tree_divmod_fixed_value (stmt, op, op1, op2, tree_val, prob, count, all);
1232 if (dump_file)
1234 fprintf (dump_file, "Div/mod by constant ");
1235 print_generic_expr (dump_file, value, TDF_SLIM);
1236 fprintf (dump_file, "=");
1237 print_generic_expr (dump_file, tree_val, TDF_SLIM);
1238 fprintf (dump_file, " transformation on insn ");
1239 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1242 TREE_OPERAND (modify, 1) = result;
1244 return true;
1247 /* Generate code for transformation 2 (with OPERATION, operands OP1
1248 and OP2, parent modify-expr STMT and probability of taking the optimal
1249 path PROB, which is equivalent to COUNT/ALL within roundoff error).
1250 This generates the result into a temp and returns
1251 the temp; it does not replace or alter the original STMT. */
1252 static tree
1253 tree_mod_pow2 (tree stmt, tree operation, tree op1, tree op2, int prob,
1254 gcov_type count, gcov_type all)
1256 tree stmt1, stmt2, stmt3, stmt4;
1257 tree tmp1, tmp2, tmp3;
1258 tree label_decl1 = create_artificial_label ();
1259 tree label_decl2 = create_artificial_label ();
1260 tree label_decl3 = create_artificial_label ();
1261 tree label1, label2, label3;
1262 tree bb1end, bb2end, bb3end;
1263 basic_block bb, bb2, bb3, bb4;
1264 tree optype = TREE_TYPE (operation);
1265 edge e12, e13, e23, e24, e34;
1266 block_stmt_iterator bsi;
1267 tree result = create_tmp_var (optype, "PROF");
1269 bb = bb_for_stmt (stmt);
1270 bsi = bsi_for_stmt (stmt);
1272 tmp1 = create_tmp_var (optype, "PROF");
1273 tmp2 = create_tmp_var (optype, "PROF");
1274 tmp3 = create_tmp_var (optype, "PROF");
1275 stmt1 = build2 (MODIFY_EXPR, optype, tmp1, fold_convert (optype, op2));
1276 stmt2 = build2 (MODIFY_EXPR, optype, tmp2,
1277 build2 (PLUS_EXPR, optype, op2, integer_minus_one_node));
1278 stmt3 = build2 (MODIFY_EXPR, optype, tmp3,
1279 build2 (BIT_AND_EXPR, optype, tmp2, tmp1));
1280 stmt4 = build3 (COND_EXPR, void_type_node,
1281 build2 (NE_EXPR, boolean_type_node, tmp3, integer_zero_node),
1282 build1 (GOTO_EXPR, void_type_node, label_decl2),
1283 build1 (GOTO_EXPR, void_type_node, label_decl1));
1284 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1285 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1286 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1287 bsi_insert_before (&bsi, stmt4, BSI_SAME_STMT);
1288 bb1end = stmt4;
1290 /* tmp2 == op2-1 inherited from previous block */
1291 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1292 stmt1 = build2 (MODIFY_EXPR, optype, result,
1293 build2 (BIT_AND_EXPR, optype, op1, tmp2));
1294 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1295 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1296 bb2end = stmt1;
1298 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1299 stmt1 = build2 (MODIFY_EXPR, optype, result,
1300 build2 (TREE_CODE (operation), optype, op1, op2));
1301 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1302 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1303 bb3end = stmt1;
1305 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1306 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1308 /* Fix CFG. */
1309 /* Edge e23 connects bb2 to bb3, etc. */
1310 e12 = split_block (bb, bb1end);
1311 bb2 = e12->dest;
1312 bb2->count = count;
1313 e23 = split_block (bb2, bb2end);
1314 bb3 = e23->dest;
1315 bb3->count = all - count;
1316 e34 = split_block (bb3, bb3end);
1317 bb4 = e34->dest;
1318 bb4->count = all;
1320 e12->flags &= ~EDGE_FALLTHRU;
1321 e12->flags |= EDGE_FALSE_VALUE;
1322 e12->probability = prob;
1323 e12->count = count;
1325 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1326 e13->probability = REG_BR_PROB_BASE - prob;
1327 e13->count = all - count;
1329 remove_edge (e23);
1331 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1332 e24->probability = REG_BR_PROB_BASE;
1333 e24->count = count;
1335 e34->probability = REG_BR_PROB_BASE;
1336 e34->count = all - count;
1338 return result;
1341 /* Do transform 2) on INSN if applicable. */
1342 static bool
1343 tree_mod_pow2_value_transform (tree stmt)
1345 stmt_ann_t ann = get_stmt_ann (stmt);
1346 histogram_value histogram;
1347 enum tree_code code;
1348 gcov_type count, wrong_values, all;
1349 tree modify, op, op1, op2, result, value;
1350 int prob;
1352 modify = stmt;
1353 if (TREE_CODE (stmt) == RETURN_EXPR
1354 && TREE_OPERAND (stmt, 0)
1355 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1356 modify = TREE_OPERAND (stmt, 0);
1357 if (TREE_CODE (modify) != MODIFY_EXPR)
1358 return false;
1359 op = TREE_OPERAND (modify, 1);
1360 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1361 return false;
1362 code = TREE_CODE (op);
1364 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1365 return false;
1367 op1 = TREE_OPERAND (op, 0);
1368 op2 = TREE_OPERAND (op, 1);
1369 if (!ann->histograms)
1370 return false;
1372 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1373 if (histogram->type == HIST_TYPE_POW2)
1374 break;
1376 if (!histogram)
1377 return false;
1379 value = histogram->hvalue.tree.value;
1380 wrong_values = histogram->hvalue.tree.counters[0];
1381 count = histogram->hvalue.tree.counters[1];
1383 /* We require that we hit a power of 2 at least half of all evaluations. */
1384 if (simple_cst_equal (op2, value) != 1 || count < wrong_values)
1385 return false;
1387 if (dump_file)
1389 fprintf (dump_file, "Mod power of 2 transformation on insn ");
1390 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1393 /* Compute probability of taking the optimal path. */
1394 all = count + wrong_values;
1395 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1397 result = tree_mod_pow2 (stmt, op, op1, op2, prob, count, all);
1399 TREE_OPERAND (modify, 1) = result;
1401 return true;
1404 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
1405 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
1406 support. Currently only NCOUNTS==0 or 1 is supported and this is
1407 built into this interface. The probabilities of taking the optimal
1408 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
1409 COUNT2/ALL respectively within roundoff error). This generates the
1410 result into a temp and returns the temp; it does not replace or alter
1411 the original STMT. */
1412 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
1414 static tree
1415 tree_mod_subtract (tree stmt, tree operation, tree op1, tree op2,
1416 int prob1, int prob2, int ncounts,
1417 gcov_type count1, gcov_type count2, gcov_type all)
1419 tree stmt1, stmt2, stmt3;
1420 tree tmp1;
1421 tree label_decl1 = create_artificial_label ();
1422 tree label_decl2 = create_artificial_label ();
1423 tree label_decl3 = create_artificial_label ();
1424 tree label1, label2, label3;
1425 tree bb1end, bb2end = NULL_TREE, bb3end;
1426 basic_block bb, bb2, bb3, bb4;
1427 tree optype = TREE_TYPE (operation);
1428 edge e12, e23 = 0, e24, e34, e14;
1429 block_stmt_iterator bsi;
1430 tree result = create_tmp_var (optype, "PROF");
1432 bb = bb_for_stmt (stmt);
1433 bsi = bsi_for_stmt (stmt);
1435 tmp1 = create_tmp_var (optype, "PROF");
1436 stmt1 = build2 (MODIFY_EXPR, optype, result, op1);
1437 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1438 stmt3 = build3 (COND_EXPR, void_type_node,
1439 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1440 build1 (GOTO_EXPR, void_type_node, label_decl3),
1441 build1 (GOTO_EXPR, void_type_node,
1442 ncounts ? label_decl1 : label_decl2));
1443 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1444 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1445 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1446 bb1end = stmt3;
1448 if (ncounts) /* Assumed to be 0 or 1 */
1450 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1451 stmt1 = build2 (MODIFY_EXPR, optype, result,
1452 build2 (MINUS_EXPR, optype, result, tmp1));
1453 stmt2 = build3 (COND_EXPR, void_type_node,
1454 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1455 build1 (GOTO_EXPR, void_type_node, label_decl3),
1456 build1 (GOTO_EXPR, void_type_node, label_decl2));
1457 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1458 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1459 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1460 bb2end = stmt2;
1463 /* Fallback case. */
1464 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1465 stmt1 = build2 (MODIFY_EXPR, optype, result,
1466 build2 (TREE_CODE (operation), optype, result, tmp1));
1467 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1468 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1469 bb3end = stmt1;
1471 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1472 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1474 /* Fix CFG. */
1475 /* Edge e23 connects bb2 to bb3, etc. */
1476 /* However block 3 is optional; if it is not there, references
1477 to 3 really refer to block 2. */
1478 e12 = split_block (bb, bb1end);
1479 bb2 = e12->dest;
1480 bb2->count = all - count1;
1482 if (ncounts) /* Assumed to be 0 or 1. */
1484 e23 = split_block (bb2, bb2end);
1485 bb3 = e23->dest;
1486 bb3->count = all - count1 - count2;
1489 e34 = split_block (ncounts ? bb3 : bb2, bb3end);
1490 bb4 = e34->dest;
1491 bb4->count = all;
1493 e12->flags &= ~EDGE_FALLTHRU;
1494 e12->flags |= EDGE_FALSE_VALUE;
1495 e12->probability = REG_BR_PROB_BASE - prob1;
1496 e12->count = all - count1;
1498 e14 = make_edge (bb, bb4, EDGE_TRUE_VALUE);
1499 e14->probability = prob1;
1500 e14->count = count1;
1502 if (ncounts) /* Assumed to be 0 or 1. */
1504 e23->flags &= ~EDGE_FALLTHRU;
1505 e23->flags |= EDGE_FALSE_VALUE;
1506 e23->count = all - count1 - count2;
1507 e23->probability = REG_BR_PROB_BASE - prob2;
1509 e24 = make_edge (bb2, bb4, EDGE_TRUE_VALUE);
1510 e24->probability = prob2;
1511 e24->count = count2;
1514 e34->probability = REG_BR_PROB_BASE;
1515 e34->count = all - count1 - count2;
1517 return result;
1520 /* Do transforms 3) and 4) on INSN if applicable. */
1521 static bool
1522 tree_mod_subtract_transform (tree stmt)
1524 stmt_ann_t ann = get_stmt_ann (stmt);
1525 histogram_value histogram;
1526 enum tree_code code;
1527 gcov_type count, wrong_values, all;
1528 tree modify, op, op1, op2, result, value;
1529 int prob1, prob2;
1530 unsigned int i;
1532 modify = stmt;
1533 if (TREE_CODE (stmt) == RETURN_EXPR
1534 && TREE_OPERAND (stmt, 0)
1535 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1536 modify = TREE_OPERAND (stmt, 0);
1537 if (TREE_CODE (modify) != MODIFY_EXPR)
1538 return false;
1539 op = TREE_OPERAND (modify, 1);
1540 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1541 return false;
1542 code = TREE_CODE (op);
1544 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1545 return false;
1547 op1 = TREE_OPERAND (op, 0);
1548 op2 = TREE_OPERAND (op, 1);
1549 if (!ann->histograms)
1550 return false;
1552 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1553 if (histogram->type == HIST_TYPE_INTERVAL)
1554 break;
1556 if (!histogram)
1557 return false;
1559 value = histogram->hvalue.tree.value;
1560 all = 0;
1561 wrong_values = 0;
1562 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1563 all += histogram->hvalue.tree.counters[i];
1565 wrong_values += histogram->hvalue.tree.counters[i];
1566 wrong_values += histogram->hvalue.tree.counters[i+1];
1567 all += wrong_values;
1569 /* We require that we use just subtractions in at least 50% of all
1570 evaluations. */
1571 count = 0;
1572 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1574 count += histogram->hvalue.tree.counters[i];
1575 if (count * 2 >= all)
1576 break;
1578 if (i == histogram->hdata.intvl.steps)
1579 return false;
1581 if (dump_file)
1583 fprintf (dump_file, "Mod subtract transformation on insn ");
1584 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1587 /* Compute probability of taking the optimal path(s). */
1588 prob1 = (histogram->hvalue.tree.counters[0] * REG_BR_PROB_BASE + all / 2) / all;
1589 prob2 = (histogram->hvalue.tree.counters[1] * REG_BR_PROB_BASE + all / 2) / all;
1591 /* In practice, "steps" is always 2. This interface reflects this,
1592 and will need to be changed if "steps" can change. */
1593 result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i,
1594 histogram->hvalue.tree.counters[0],
1595 histogram->hvalue.tree.counters[1], all);
1597 TREE_OPERAND (modify, 1) = result;
1599 return true;
1602 /* Connection to the outside world. */
1603 /* Struct for IR-dependent hooks. */
1604 struct value_prof_hooks {
1605 /* Find list of values for which we want to measure histograms. */
1606 void (*find_values_to_profile) (histogram_values *);
1608 /* Identify and exploit properties of values that are hard to analyze
1609 statically. See value-prof.c for more detail. */
1610 bool (*value_profile_transformations) (void);
1613 /* Hooks for RTL-based versions (the only ones that currently work). */
1614 static struct value_prof_hooks rtl_value_prof_hooks =
1616 rtl_find_values_to_profile,
1617 rtl_value_profile_transformations
1620 void
1621 rtl_register_value_prof_hooks (void)
1623 value_prof_hooks = &rtl_value_prof_hooks;
1624 gcc_assert (!ir_type ());
1627 /* Find values inside STMT for that we want to measure histograms for
1628 division/modulo optimization. */
1629 static void
1630 tree_divmod_values_to_profile (tree stmt, histogram_values *values)
1632 tree assign, lhs, rhs, divisor, op0, type;
1633 histogram_value hist;
1635 if (TREE_CODE (stmt) == RETURN_EXPR)
1636 assign = TREE_OPERAND (stmt, 0);
1637 else
1638 assign = stmt;
1640 if (!assign
1641 || TREE_CODE (assign) != MODIFY_EXPR)
1642 return;
1643 lhs = TREE_OPERAND (assign, 0);
1644 type = TREE_TYPE (lhs);
1645 if (!INTEGRAL_TYPE_P (type))
1646 return;
1648 rhs = TREE_OPERAND (assign, 1);
1649 switch (TREE_CODE (rhs))
1651 case TRUNC_DIV_EXPR:
1652 case TRUNC_MOD_EXPR:
1653 divisor = TREE_OPERAND (rhs, 1);
1654 op0 = TREE_OPERAND (rhs, 0);
1656 VEC_reserve (histogram_value, heap, *values, 3);
1658 if (is_gimple_reg (divisor))
1660 /* Check for the case where the divisor is the same value most
1661 of the time. */
1662 hist = ggc_alloc (sizeof (*hist));
1663 hist->hvalue.tree.value = divisor;
1664 hist->hvalue.tree.stmt = stmt;
1665 hist->type = HIST_TYPE_SINGLE_VALUE;
1666 VEC_quick_push (histogram_value, *values, hist);
1669 /* For mod, check whether it is not often a noop (or replaceable by
1670 a few subtractions). */
1671 if (TREE_CODE (rhs) == TRUNC_MOD_EXPR
1672 && TYPE_UNSIGNED (type))
1674 /* Check for a special case where the divisor is power of 2. */
1675 hist = ggc_alloc (sizeof (*hist));
1676 hist->hvalue.tree.value = divisor;
1677 hist->hvalue.tree.stmt = stmt;
1678 hist->type = HIST_TYPE_POW2;
1679 VEC_quick_push (histogram_value, *values, hist);
1681 hist = ggc_alloc (sizeof (*hist));
1682 hist->hvalue.tree.stmt = stmt;
1683 hist->hvalue.tree.value
1684 = build2 (TRUNC_DIV_EXPR, type, op0, divisor);
1685 hist->type = HIST_TYPE_INTERVAL;
1686 hist->hdata.intvl.int_start = 0;
1687 hist->hdata.intvl.steps = 2;
1688 VEC_quick_push (histogram_value, *values, hist);
1690 return;
1692 default:
1693 return;
1697 /* Find values inside STMT for that we want to measure histograms and adds
1698 them to list VALUES. */
1700 static void
1701 tree_values_to_profile (tree stmt, histogram_values *values)
1703 if (flag_value_profile_transformations)
1704 tree_divmod_values_to_profile (stmt, values);
1707 static void
1708 tree_find_values_to_profile (histogram_values *values)
1710 basic_block bb;
1711 block_stmt_iterator bsi;
1712 unsigned i;
1713 histogram_value hist;
1715 *values = NULL;
1716 FOR_EACH_BB (bb)
1717 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1718 tree_values_to_profile (bsi_stmt (bsi), values);
1719 static_values = *values;
1721 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
1723 switch (hist->type)
1725 case HIST_TYPE_INTERVAL:
1726 if (dump_file)
1728 fprintf (dump_file, "Interval counter for tree ");
1729 print_generic_expr (dump_file, hist->hvalue.tree.stmt,
1730 TDF_SLIM);
1731 fprintf (dump_file, ", range %d -- %d.\n",
1732 hist->hdata.intvl.int_start,
1733 (hist->hdata.intvl.int_start
1734 + hist->hdata.intvl.steps - 1));
1736 hist->n_counters = hist->hdata.intvl.steps + 2;
1737 break;
1739 case HIST_TYPE_POW2:
1740 if (dump_file)
1742 fprintf (dump_file, "Pow2 counter for tree ");
1743 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1744 fprintf (dump_file, ".\n");
1746 hist->n_counters = 2;
1747 break;
1749 case HIST_TYPE_SINGLE_VALUE:
1750 if (dump_file)
1752 fprintf (dump_file, "Single value counter for tree ");
1753 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1754 fprintf (dump_file, ".\n");
1756 hist->n_counters = 3;
1757 break;
1759 case HIST_TYPE_CONST_DELTA:
1760 if (dump_file)
1762 fprintf (dump_file, "Constant delta counter for tree ");
1763 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1764 fprintf (dump_file, ".\n");
1766 hist->n_counters = 4;
1767 break;
1769 default:
1770 gcc_unreachable ();
1775 static struct value_prof_hooks tree_value_prof_hooks = {
1776 tree_find_values_to_profile,
1777 tree_value_profile_transformations
1780 void
1781 tree_register_value_prof_hooks (void)
1783 value_prof_hooks = &tree_value_prof_hooks;
1784 gcc_assert (ir_type ());
1787 /* IR-independent entry points. */
1788 void
1789 find_values_to_profile (histogram_values *values)
1791 (value_prof_hooks->find_values_to_profile) (values);
1794 bool
1795 value_profile_transformations (void)
1797 bool retval = (value_prof_hooks->value_profile_transformations) ();
1798 VEC_free (histogram_value, heap, static_values);
1799 return retval;