alpha.c (alpha_gimplify_va_arg_1): Use build_va_arg_indirect_ref.
[official-gcc.git] / gcc / value-prof.c
blobf436b4a3efd63ce80f61a4b889771681b4affb4b
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
40 #include "coverage.h"
41 #include "tree.h"
42 #include "gcov-io.h"
43 #include "timevar.h"
44 #include "tree-pass.h"
46 static struct value_prof_hooks *value_prof_hooks;
48 /* This is the vector of histograms. Created in find_values_to_profile.
49 During profile generation, freed by instrument_values.
50 During profile use, freed by value_profile_transformations. */
52 static histogram_values static_values = NULL;
54 /* In this file value profile based optimizations are placed. Currently the
55 following optimizations are implemented (for more detailed descriptions
56 see comments at value_profile_transformations):
58 1) Division/modulo specialization. Provided that we can determine that the
59 operands of the division have some special properties, we may use it to
60 produce more effective code.
61 2) Speculative prefetching. If we are able to determine that the difference
62 between addresses accessed by a memory reference is usually constant, we
63 may add the prefetch instructions.
65 Every such optimization should add its requirements for profiled values to
66 insn_values_to_profile function. This function is called from branch_prob
67 in profile.c and the requested values are instrumented by it in the first
68 compilation with -fprofile-arcs. The optimization may then read the
69 gathered data in the second compilation with -fbranch-probabilities.
71 There are currently two versions, RTL-based and tree-based. Over time
72 the RTL-based version may go away.
74 In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE
75 note to the instrumented insn. The argument to the note consists of an
76 EXPR_LIST where its members have the following meaning (from the first to
77 the last):
79 -- type of information gathered (HIST_TYPE*)
80 -- the expression that is profiled
81 -- list of counters starting from the first one.
83 In the tree-based version, the measured data is pointed to from the histograms
84 field of the statement annotation of the instrumented insns. It is
85 kept as a linked list of struct histogram_value_t's, which contain the
86 same information as above. */
88 /* For speculative prefetching, the range in that we do not prefetch (because
89 we assume that it will be in cache anyway). The asymmetry between min and
90 max range is trying to reflect the fact that the sequential prefetching
91 of the data is commonly done directly by hardware. Nevertheless, these
92 values are just a guess and should of course be target-specific.
94 FIXME: There is no tree form of speculative prefetching as yet.
96 FIXME: A better approach to instrumentation in the profile-generation
97 pass is to generate calls to magic library functions (to be added to
98 libgcc) rather than inline code. This approach will probably be
99 necessary to get tree-based speculative prefetching working in a useful
100 fashion, as inline code bloats things so much the rest of the compiler has
101 serious problems dealing with it (judging from the rtl behavior). */
103 #ifndef NOPREFETCH_RANGE_MIN
104 #define NOPREFETCH_RANGE_MIN (-16)
105 #endif
106 #ifndef NOPREFETCH_RANGE_MAX
107 #define NOPREFETCH_RANGE_MAX 32
108 #endif
110 static void rtl_divmod_values_to_profile (rtx, histogram_values *);
111 #ifdef HAVE_prefetch
112 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
113 static int find_mem_reference_1 (rtx *, void *);
114 static void find_mem_reference_2 (rtx, rtx, void *);
115 static bool find_mem_reference (rtx, rtx *, int *);
116 #endif
118 static void rtl_values_to_profile (rtx, histogram_values *);
119 static rtx rtl_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
120 rtx, gcov_type, int);
121 static rtx rtl_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
122 static rtx rtl_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
123 int, int, int);
124 #ifdef HAVE_prefetch
125 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
126 #endif
127 static bool rtl_divmod_fixed_value_transform (rtx);
128 static bool rtl_mod_pow2_value_transform (rtx);
129 static bool rtl_mod_subtract_transform (rtx);
130 #ifdef HAVE_prefetch
131 static bool speculative_prefetching_transform (rtx);
132 #endif
133 static tree tree_divmod_fixed_value (tree, tree, tree, tree,
134 tree, int, gcov_type, gcov_type);
135 static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type);
136 static tree tree_mod_subtract (tree, tree, tree, tree, int, int, int,
137 gcov_type, gcov_type, gcov_type);
138 static bool tree_divmod_fixed_value_transform (tree);
139 static bool tree_mod_pow2_value_transform (tree);
140 static bool tree_mod_subtract_transform (tree);
143 /* Find values inside INSN for that we want to measure histograms for
144 division/modulo optimization and stores them to VALUES. */
145 static void
146 rtl_divmod_values_to_profile (rtx insn, histogram_values *values)
148 rtx set, set_src, op1, op2;
149 enum machine_mode mode;
150 histogram_value hist;
152 if (!INSN_P (insn))
153 return;
155 set = single_set (insn);
156 if (!set)
157 return;
159 mode = GET_MODE (SET_DEST (set));
160 if (!INTEGRAL_MODE_P (mode))
161 return;
163 set_src = SET_SRC (set);
164 switch (GET_CODE (set_src))
166 case DIV:
167 case MOD:
168 case UDIV:
169 case UMOD:
170 op1 = XEXP (set_src, 0);
171 op2 = XEXP (set_src, 1);
172 if (side_effects_p (op2))
173 return;
175 /* Check for a special case where the divisor is power of 2. */
176 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
178 hist = ggc_alloc (sizeof (*hist));
179 hist->hvalue.rtl.value = op2;
180 hist->hvalue.rtl.seq = NULL_RTX;
181 hist->hvalue.rtl.mode = mode;
182 hist->hvalue.rtl.insn = insn;
183 hist->type = HIST_TYPE_POW2;
184 VEC_safe_push (histogram_value, heap, *values, hist);
187 /* Check whether the divisor is not in fact a constant. */
188 if (!CONSTANT_P (op2))
190 hist = ggc_alloc (sizeof (*hist));
191 hist->hvalue.rtl.value = op2;
192 hist->hvalue.rtl.mode = mode;
193 hist->hvalue.rtl.seq = NULL_RTX;
194 hist->hvalue.rtl.insn = insn;
195 hist->type = HIST_TYPE_SINGLE_VALUE;
196 VEC_safe_push (histogram_value, heap, *values, hist);
199 /* For mod, check whether it is not often a noop (or replaceable by
200 a few subtractions). */
201 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
203 rtx tmp;
205 hist = ggc_alloc (sizeof (*hist));
206 start_sequence ();
207 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
208 hist->hvalue.rtl.value = force_operand (tmp, NULL_RTX);
209 hist->hvalue.rtl.seq = get_insns ();
210 end_sequence ();
211 hist->hvalue.rtl.mode = mode;
212 hist->hvalue.rtl.insn = insn;
213 hist->type = HIST_TYPE_INTERVAL;
214 hist->hdata.intvl.int_start = 0;
215 hist->hdata.intvl.steps = 2;
216 VEC_safe_push (histogram_value, heap, *values, hist);
218 return;
220 default:
221 return;
225 #ifdef HAVE_prefetch
227 /* Called from find_mem_reference through for_each_rtx, finds a memory
228 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
229 to *RET and the traversing of the expression is interrupted by returning 1.
230 Otherwise 0 is returned. */
232 static int
233 find_mem_reference_1 (rtx *expr, void *ret)
235 rtx *mem = ret;
237 if (MEM_P (*expr))
239 *mem = *expr;
240 return 1;
242 return 0;
245 /* Called form find_mem_reference through note_stores to find out whether
246 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
247 FMR2_WRITE is set to true. */
249 static int fmr2_write;
250 static void
251 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
253 if (expr == mem)
254 fmr2_write = true;
257 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
258 if it is a write of the mem. Return false if no memory reference is found,
259 true otherwise. */
261 static bool
262 find_mem_reference (rtx insn, rtx *mem, int *write)
264 *mem = NULL_RTX;
265 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
267 if (!*mem)
268 return false;
270 fmr2_write = false;
271 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
272 *write = fmr2_write;
273 return true;
276 /* Find values inside INSN for that we want to measure histograms for
277 a speculative prefetching. Add them to the list VALUES.
278 Returns true if such we found any such value, false otherwise. */
280 static bool
281 insn_prefetch_values_to_profile (rtx insn, histogram_values* values)
283 rtx mem, address;
284 int write;
285 histogram_value hist;
287 /* It only makes sense to look for memory references in ordinary insns. */
288 if (!NONJUMP_INSN_P (insn))
289 return false;
291 if (!find_mem_reference (insn, &mem, &write))
292 return false;
294 address = XEXP (mem, 0);
295 if (side_effects_p (address))
296 return false;
298 if (CONSTANT_P (address))
299 return false;
301 hist = ggc_alloc (sizeof (*hist));
302 hist->hvalue.rtl.value = address;
303 hist->hvalue.rtl.mode = GET_MODE (address);
304 hist->hvalue.rtl.seq = NULL_RTX;
305 hist->hvalue.rtl.insn = insn;
306 hist->type = HIST_TYPE_CONST_DELTA;
307 VEC_safe_push (histogram_value, heap, *values, hist);
309 return true;
311 #endif
312 /* Find values inside INSN for that we want to measure histograms and adds
313 them to list VALUES (increasing the record of its length in N_VALUES). */
314 static void
315 rtl_values_to_profile (rtx insn, histogram_values *values)
317 if (flag_value_profile_transformations)
318 rtl_divmod_values_to_profile (insn, values);
320 #ifdef HAVE_prefetch
321 if (flag_speculative_prefetching)
322 insn_prefetch_values_to_profile (insn, values);
323 #endif
326 /* Find list of values for that we want to measure histograms. */
327 static void
328 rtl_find_values_to_profile (histogram_values *values)
330 rtx insn;
331 unsigned i, libcall_level;
332 histogram_value hist;
334 life_analysis (NULL, PROP_DEATH_NOTES);
336 *values = NULL;
337 libcall_level = 0;
338 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
339 rtl_values_to_profile (insn, values);
340 static_values = *values;
342 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
344 switch (hist->type)
346 case HIST_TYPE_INTERVAL:
347 if (dump_file)
348 fprintf (dump_file,
349 "Interval counter for insn %d, range %d -- %d.\n",
350 INSN_UID ((rtx)hist->hvalue.rtl.insn),
351 hist->hdata.intvl.int_start,
352 (hist->hdata.intvl.int_start
353 + hist->hdata.intvl.steps - 1));
354 hist->n_counters = hist->hdata.intvl.steps + 2;
355 break;
357 case HIST_TYPE_POW2:
358 if (dump_file)
359 fprintf (dump_file,
360 "Pow2 counter for insn %d.\n",
361 INSN_UID ((rtx)hist->hvalue.rtl.insn));
362 hist->n_counters = 2;
363 break;
365 case HIST_TYPE_SINGLE_VALUE:
366 if (dump_file)
367 fprintf (dump_file,
368 "Single value counter for insn %d.\n",
369 INSN_UID ((rtx)hist->hvalue.rtl.insn));
370 hist->n_counters = 3;
371 break;
373 case HIST_TYPE_CONST_DELTA:
374 if (dump_file)
375 fprintf (dump_file,
376 "Constant delta counter for insn %d.\n",
377 INSN_UID ((rtx)hist->hvalue.rtl.insn));
378 hist->n_counters = 4;
379 break;
381 default:
382 gcc_unreachable ();
385 allocate_reg_info (max_reg_num (), FALSE, FALSE);
388 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
389 them to identify and exploit properties of values that are hard to analyze
390 statically.
392 We do following transformations:
396 x = a / b;
398 where b is almost always a constant N is transformed to
400 if (b == N)
401 x = a / N;
402 else
403 x = a / b;
405 Analogically with %
409 x = a % b
411 where b is almost always a power of 2 and the division is unsigned
412 TODO -- handle signed case as well
414 if ((b & (b - 1)) == 0)
415 x = a & (b - 1);
416 else
417 x = x % b;
419 Note that when b = 0, no error will occur and x = a; this is correct,
420 as result of such operation is undefined.
424 x = a % b
426 where a is almost always less then b and the division is unsigned
427 TODO -- handle signed case as well
429 x = a;
430 if (x >= b)
431 x %= b;
435 x = a % b
437 where a is almost always less then 2 * b and the division is unsigned
438 TODO -- handle signed case as well
440 x = a;
441 if (x >= b)
442 x -= b;
443 if (x >= b)
444 x %= b;
446 It would be possible to continue analogically for K * b for other small
447 K's, but it is probably not useful.
451 Read or write of mem[address], where the value of address changes usually
452 by a constant C != 0 between the following accesses to the computation; with
453 -fspeculative-prefetching we then add a prefetch of address + C before
454 the insn. This handles prefetching of several interesting cases in addition
455 to a simple prefetching for addresses that are induction variables, e. g.
456 linked lists allocated sequentially (even in case they are processed
457 recursively).
459 TODO -- we should also check whether there is not (usually) a small
460 difference with the adjacent memory references, so that we do
461 not issue overlapping prefetches. Also we should employ some
462 heuristics to eliminate cases where prefetching evidently spoils
463 the code.
464 -- it should somehow cooperate with the loop optimizer prefetching
466 TODO:
468 There are other useful cases that could be handled by a similar mechanism,
469 for example:
471 for (i = 0; i < n; i++)
474 transform to (for constant N):
476 if (n == N)
477 for (i = 0; i < N; i++)
479 else
480 for (i = 0; i < n; i++)
482 making unroller happy. Since this may grow the code significantly,
483 we would have to be very careful here. */
485 static bool
486 rtl_value_profile_transformations (void)
488 rtx insn, next;
489 int changed = false;
491 for (insn = get_insns (); insn; insn = next)
493 next = NEXT_INSN (insn);
495 if (!INSN_P (insn))
496 continue;
498 /* Scan for insn carrying a histogram. */
499 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
500 continue;
502 /* Ignore cold areas -- we are growing a code. */
503 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
504 continue;
506 if (dump_file)
508 fprintf (dump_file, "Trying transformations on insn %d\n",
509 INSN_UID (insn));
510 print_rtl_single (dump_file, insn);
513 /* Transformations: */
514 if (flag_value_profile_transformations
515 && (rtl_mod_subtract_transform (insn)
516 || rtl_divmod_fixed_value_transform (insn)
517 || rtl_mod_pow2_value_transform (insn)))
518 changed = true;
519 #ifdef HAVE_prefetch
520 if (flag_speculative_prefetching
521 && speculative_prefetching_transform (insn))
522 changed = true;
523 #endif
526 if (changed)
528 commit_edge_insertions ();
529 allocate_reg_info (max_reg_num (), FALSE, FALSE);
532 return changed;
535 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
536 and OP2, whose value is expected to be VALUE, result TARGET and
537 probability of taking the optimal path PROB). */
538 static rtx
539 rtl_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
540 rtx target, rtx op1, rtx op2, gcov_type value,
541 int prob)
543 rtx tmp, tmp1, jump;
544 rtx neq_label = gen_label_rtx ();
545 rtx end_label = gen_label_rtx ();
546 rtx sequence;
548 start_sequence ();
550 if (!REG_P (op2))
552 tmp = gen_reg_rtx (mode);
553 emit_move_insn (tmp, copy_rtx (op2));
555 else
556 tmp = op2;
558 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
559 NULL_RTX, neq_label);
561 /* Add branch probability to jump we just created. */
562 jump = get_last_insn ();
563 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
564 GEN_INT (REG_BR_PROB_BASE - prob),
565 REG_NOTES (jump));
567 tmp1 = simplify_gen_binary (operation, mode,
568 copy_rtx (op1), GEN_INT (value));
569 tmp1 = force_operand (tmp1, target);
570 if (tmp1 != target)
571 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
573 emit_jump_insn (gen_jump (end_label));
574 emit_barrier ();
576 emit_label (neq_label);
577 tmp1 = simplify_gen_binary (operation, mode,
578 copy_rtx (op1), copy_rtx (tmp));
579 tmp1 = force_operand (tmp1, target);
580 if (tmp1 != target)
581 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
583 emit_label (end_label);
585 sequence = get_insns ();
586 end_sequence ();
587 rebuild_jump_labels (sequence);
588 return sequence;
591 /* Do transform 1) on INSN if applicable. */
592 static bool
593 rtl_divmod_fixed_value_transform (rtx insn)
595 rtx set, set_src, set_dest, op1, op2, value, histogram;
596 enum rtx_code code;
597 enum machine_mode mode;
598 gcov_type val, count, all;
599 edge e;
600 int prob;
602 set = single_set (insn);
603 if (!set)
604 return false;
606 set_src = SET_SRC (set);
607 set_dest = SET_DEST (set);
608 code = GET_CODE (set_src);
609 mode = GET_MODE (set_dest);
611 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
612 return false;
613 op1 = XEXP (set_src, false);
614 op2 = XEXP (set_src, 1);
616 for (histogram = REG_NOTES (insn);
617 histogram;
618 histogram = XEXP (histogram, 1))
619 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
620 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
621 break;
623 if (!histogram)
624 return false;
626 histogram = XEXP (XEXP (histogram, 0), 1);
627 value = XEXP (histogram, 0);
628 histogram = XEXP (histogram, 1);
629 val = INTVAL (XEXP (histogram, 0));
630 histogram = XEXP (histogram, 1);
631 count = INTVAL (XEXP (histogram, 0));
632 histogram = XEXP (histogram, 1);
633 all = INTVAL (XEXP (histogram, 0));
635 /* We require that count be at least half of all; this means
636 that for the transformation to fire the value must be constant
637 at least 50% of time (and 75% gives the guarantee of usage). */
638 if (!rtx_equal_p (op2, value) || 2 * count < all)
639 return false;
641 if (dump_file)
642 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
643 INSN_UID (insn));
645 /* Compute probability of taking the optimal path. */
646 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
648 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
649 delete_insn (insn);
651 insert_insn_on_edge (
652 rtl_divmod_fixed_value (mode, code, set_dest,
653 op1, op2, val, prob), e);
655 return true;
658 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
659 and OP2, result TARGET and probability of taking the optimal path PROB). */
660 static rtx
661 rtl_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
662 rtx op1, rtx op2, int prob)
664 rtx tmp, tmp1, tmp2, tmp3, jump;
665 rtx neq_label = gen_label_rtx ();
666 rtx end_label = gen_label_rtx ();
667 rtx sequence;
669 start_sequence ();
671 if (!REG_P (op2))
673 tmp = gen_reg_rtx (mode);
674 emit_move_insn (tmp, copy_rtx (op2));
676 else
677 tmp = op2;
679 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
680 0, OPTAB_WIDEN);
681 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
682 0, OPTAB_WIDEN);
683 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
684 NULL_RTX, neq_label);
686 /* Add branch probability to jump we just created. */
687 jump = get_last_insn ();
688 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
689 GEN_INT (REG_BR_PROB_BASE - prob),
690 REG_NOTES (jump));
692 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
693 0, OPTAB_WIDEN);
694 if (tmp3 != target)
695 emit_move_insn (copy_rtx (target), tmp3);
696 emit_jump_insn (gen_jump (end_label));
697 emit_barrier ();
699 emit_label (neq_label);
700 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
701 tmp1 = force_operand (tmp1, target);
702 if (tmp1 != target)
703 emit_move_insn (target, tmp1);
705 emit_label (end_label);
707 sequence = get_insns ();
708 end_sequence ();
709 rebuild_jump_labels (sequence);
710 return sequence;
713 /* Do transform 2) on INSN if applicable. */
714 static bool
715 rtl_mod_pow2_value_transform (rtx insn)
717 rtx set, set_src, set_dest, op1, op2, value, histogram;
718 enum rtx_code code;
719 enum machine_mode mode;
720 gcov_type wrong_values, count;
721 edge e;
722 int all, prob;
724 set = single_set (insn);
725 if (!set)
726 return false;
728 set_src = SET_SRC (set);
729 set_dest = SET_DEST (set);
730 code = GET_CODE (set_src);
731 mode = GET_MODE (set_dest);
733 if (code != UMOD)
734 return false;
735 op1 = XEXP (set_src, 0);
736 op2 = XEXP (set_src, 1);
738 for (histogram = REG_NOTES (insn);
739 histogram;
740 histogram = XEXP (histogram, 1))
741 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
742 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
743 break;
745 if (!histogram)
746 return false;
748 histogram = XEXP (XEXP (histogram, 0), 1);
749 value = XEXP (histogram, 0);
750 histogram = XEXP (histogram, 1);
751 wrong_values = INTVAL (XEXP (histogram, 0));
752 histogram = XEXP (histogram, 1);
753 count = INTVAL (XEXP (histogram, 0));
755 if (!rtx_equal_p (op2, value))
756 return false;
758 /* We require that we hit a power of two at least half of all evaluations. */
759 if (count < wrong_values)
760 return false;
762 if (dump_file)
763 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
764 INSN_UID (insn));
766 /* Compute probability of taking the optimal path. */
767 all = count + wrong_values;
768 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
770 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
771 delete_insn (insn);
773 insert_insn_on_edge (
774 rtl_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
776 return true;
779 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
780 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
781 probability of taking the optimal path(s) PROB1 and PROB2). */
782 static rtx
783 rtl_mod_subtract (enum machine_mode mode, enum rtx_code operation,
784 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
786 rtx tmp, tmp1, jump;
787 rtx end_label = gen_label_rtx ();
788 rtx sequence;
789 int i;
791 start_sequence ();
793 if (!REG_P (op2))
795 tmp = gen_reg_rtx (mode);
796 emit_move_insn (tmp, copy_rtx (op2));
798 else
799 tmp = op2;
801 emit_move_insn (target, copy_rtx (op1));
802 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
803 NULL_RTX, end_label);
805 /* Add branch probability to jump we just created. */
806 jump = get_last_insn ();
807 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
808 GEN_INT (prob1), REG_NOTES (jump));
810 for (i = 0; i < sub; i++)
812 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
813 0, OPTAB_WIDEN);
814 if (tmp1 != target)
815 emit_move_insn (target, tmp1);
816 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
817 NULL_RTX, end_label);
819 /* Add branch probability to jump we just created. */
820 jump = get_last_insn ();
821 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
822 GEN_INT (prob2), REG_NOTES (jump));
825 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
826 tmp1 = force_operand (tmp1, target);
827 if (tmp1 != target)
828 emit_move_insn (target, tmp1);
830 emit_label (end_label);
832 sequence = get_insns ();
833 end_sequence ();
834 rebuild_jump_labels (sequence);
835 return sequence;
838 /* Do transforms 3) and 4) on INSN if applicable. */
839 static bool
840 rtl_mod_subtract_transform (rtx insn)
842 rtx set, set_src, set_dest, op1, op2, histogram;
843 enum rtx_code code;
844 enum machine_mode mode;
845 gcov_type wrong_values, counts[2], count, all;
846 edge e;
847 int i, prob1, prob2;
849 set = single_set (insn);
850 if (!set)
851 return false;
853 set_src = SET_SRC (set);
854 set_dest = SET_DEST (set);
855 code = GET_CODE (set_src);
856 mode = GET_MODE (set_dest);
858 if (code != UMOD)
859 return false;
860 op1 = XEXP (set_src, 0);
861 op2 = XEXP (set_src, 1);
863 for (histogram = REG_NOTES (insn);
864 histogram;
865 histogram = XEXP (histogram, 1))
866 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
867 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
868 break;
870 if (!histogram)
871 return false;
873 histogram = XEXP (XEXP (histogram, 0), 1);
874 histogram = XEXP (histogram, 1);
876 all = 0;
877 for (i = 0; i < 2; i++)
879 counts[i] = INTVAL (XEXP (histogram, 0));
880 all += counts[i];
881 histogram = XEXP (histogram, 1);
883 wrong_values = INTVAL (XEXP (histogram, 0));
884 histogram = XEXP (histogram, 1);
885 wrong_values += INTVAL (XEXP (histogram, 0));
886 all += wrong_values;
888 /* We require that we use just subtractions in at least 50% of all
889 evaluations. */
890 count = 0;
891 for (i = 0; i < 2; i++)
893 count += counts[i];
894 if (count * 2 >= all)
895 break;
898 if (i == 2)
899 return false;
901 if (dump_file)
902 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
903 INSN_UID (insn));
905 /* Compute probability of taking the optimal path(s). */
906 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
907 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
909 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
910 delete_insn (insn);
912 insert_insn_on_edge (
913 rtl_mod_subtract (mode, code, set_dest,
914 op1, op2, i, prob1, prob2), e);
916 return true;
919 #ifdef HAVE_prefetch
920 /* Generate code for transformation 5 for mem with ADDRESS and a constant
921 step DELTA. WRITE is true if the reference is a store to mem. */
923 static rtx
924 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
926 rtx tmp;
927 rtx sequence;
929 /* TODO: we do the prefetching for just one iteration ahead, which
930 often is not enough. */
931 start_sequence ();
932 if (offsettable_address_p (0, VOIDmode, address))
933 tmp = plus_constant (copy_rtx (address), delta);
934 else
936 tmp = simplify_gen_binary (PLUS, Pmode,
937 copy_rtx (address), GEN_INT (delta));
938 tmp = force_operand (tmp, NULL);
940 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
941 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
942 tmp = force_reg (Pmode, tmp);
943 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
944 sequence = get_insns ();
945 end_sequence ();
947 return sequence;
950 /* Do transform 5) on INSN if applicable. */
952 static bool
953 speculative_prefetching_transform (rtx insn)
955 rtx histogram, value;
956 gcov_type val, count, all;
957 edge e;
958 rtx mem, address;
959 int write;
961 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
962 return false;
964 if (!find_mem_reference (insn, &mem, &write))
965 return false;
967 address = XEXP (mem, 0);
968 if (side_effects_p (address))
969 return false;
971 if (CONSTANT_P (address))
972 return false;
974 for (histogram = REG_NOTES (insn);
975 histogram;
976 histogram = XEXP (histogram, 1))
977 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
978 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
979 break;
981 if (!histogram)
982 return false;
984 histogram = XEXP (XEXP (histogram, 0), 1);
985 value = XEXP (histogram, 0);
986 histogram = XEXP (histogram, 1);
987 /* Skip last value referenced. */
988 histogram = XEXP (histogram, 1);
989 val = INTVAL (XEXP (histogram, 0));
990 histogram = XEXP (histogram, 1);
991 count = INTVAL (XEXP (histogram, 0));
992 histogram = XEXP (histogram, 1);
993 all = INTVAL (XEXP (histogram, 0));
995 /* With that few executions we do not really have a reason to optimize the
996 statement, and more importantly, the data about differences of addresses
997 are spoiled by the first item that had no previous value to compare
998 with. */
999 if (all < 4)
1000 return false;
1002 /* We require that count be at least half of all; this means
1003 that for the transformation to fire the value must be constant
1004 at least 50% of time (and 75% gives the guarantee of usage). */
1005 if (!rtx_equal_p (address, value) || 2 * count < all)
1006 return false;
1008 /* If the difference is too small, it does not make too much sense to
1009 prefetch, as the memory is probably already in cache. */
1010 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
1011 return false;
1013 if (dump_file)
1014 fprintf (dump_file, "Speculative prefetching for insn %d\n",
1015 INSN_UID (insn));
1017 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
1019 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
1021 return true;
1023 #endif /* HAVE_prefetch */
1025 /* Tree based transformations. */
1026 static bool
1027 tree_value_profile_transformations (void)
1029 basic_block bb;
1030 block_stmt_iterator bsi;
1031 bool changed = false;
1033 FOR_EACH_BB (bb)
1035 /* Ignore cold areas -- we are enlarging the code. */
1036 if (!maybe_hot_bb_p (bb))
1037 continue;
1039 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1041 tree stmt = bsi_stmt (bsi);
1042 stmt_ann_t ann = get_stmt_ann (stmt);
1043 histogram_value th = ann->histograms;
1044 if (!th)
1045 continue;
1047 if (dump_file)
1049 fprintf (dump_file, "Trying transformations on insn ");
1050 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1053 /* Transformations: */
1054 /* The order of things in this conditional controls which
1055 transformation is used when more than one is applicable. */
1056 /* It is expected that any code added by the transformations
1057 will be added before the current statement, and that the
1058 current statement remain valid (although possibly
1059 modified) upon return. */
1060 if (flag_value_profile_transformations
1061 && (tree_mod_subtract_transform (stmt)
1062 || tree_divmod_fixed_value_transform (stmt)
1063 || tree_mod_pow2_value_transform (stmt)))
1065 changed = true;
1066 /* Original statement may no longer be in the same block. */
1067 bb = bb_for_stmt (stmt);
1070 /* Free extra storage from compute_value_histograms. */
1071 while (th)
1073 free (th->hvalue.tree.counters);
1074 th = th->hvalue.tree.next;
1076 ann->histograms = 0;
1080 if (changed)
1082 counts_to_freqs ();
1085 return changed;
1088 /* Generate code for transformation 1 (with OPERATION, operands OP1
1089 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
1090 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
1091 within roundoff error). This generates the result into a temp and returns
1092 the temp; it does not replace or alter the original STMT. */
1093 static tree
1094 tree_divmod_fixed_value (tree stmt, tree operation,
1095 tree op1, tree op2, tree value, int prob, gcov_type count,
1096 gcov_type all)
1098 tree stmt1, stmt2, stmt3;
1099 tree tmp1, tmp2, tmpv;
1100 tree label_decl1 = create_artificial_label ();
1101 tree label_decl2 = create_artificial_label ();
1102 tree label_decl3 = create_artificial_label ();
1103 tree label1, label2, label3;
1104 tree bb1end, bb2end, bb3end;
1105 basic_block bb, bb2, bb3, bb4;
1106 tree optype = TREE_TYPE (operation);
1107 edge e12, e13, e23, e24, e34;
1108 block_stmt_iterator bsi;
1110 bb = bb_for_stmt (stmt);
1111 bsi = bsi_for_stmt (stmt);
1113 tmpv = create_tmp_var (optype, "PROF");
1114 tmp1 = create_tmp_var (optype, "PROF");
1115 stmt1 = build2 (MODIFY_EXPR, optype, tmpv, fold_convert (optype, value));
1116 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1117 stmt3 = build3 (COND_EXPR, void_type_node,
1118 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
1119 build1 (GOTO_EXPR, void_type_node, label_decl2),
1120 build1 (GOTO_EXPR, void_type_node, label_decl1));
1121 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1122 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1123 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1124 bb1end = stmt3;
1126 tmp2 = create_tmp_var (optype, "PROF");
1127 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1128 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1129 build2 (TREE_CODE (operation), optype, op1, tmpv));
1130 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1131 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1132 bb2end = stmt1;
1134 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1135 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1136 build2 (TREE_CODE (operation), optype, op1, op2));
1137 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1138 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1139 bb3end = stmt1;
1141 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1142 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1144 /* Fix CFG. */
1145 /* Edge e23 connects bb2 to bb3, etc. */
1146 e12 = split_block (bb, bb1end);
1147 bb2 = e12->dest;
1148 bb2->count = count;
1149 e23 = split_block (bb2, bb2end);
1150 bb3 = e23->dest;
1151 bb3->count = all - count;
1152 e34 = split_block (bb3, bb3end);
1153 bb4 = e34->dest;
1154 bb4->count = all;
1156 e12->flags &= ~EDGE_FALLTHRU;
1157 e12->flags |= EDGE_FALSE_VALUE;
1158 e12->probability = prob;
1159 e12->count = count;
1161 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1162 e13->probability = REG_BR_PROB_BASE - prob;
1163 e13->count = all - count;
1165 remove_edge (e23);
1167 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1168 e24->probability = REG_BR_PROB_BASE;
1169 e24->count = count;
1171 e34->probability = REG_BR_PROB_BASE;
1172 e34->count = all - count;
1174 return tmp2;
1177 /* Do transform 1) on INSN if applicable. */
1178 static bool
1179 tree_divmod_fixed_value_transform (tree stmt)
1181 stmt_ann_t ann = get_stmt_ann (stmt);
1182 histogram_value histogram;
1183 enum tree_code code;
1184 gcov_type val, count, all;
1185 tree modify, op, op1, op2, result, value, tree_val;
1186 int prob;
1188 modify = stmt;
1189 if (TREE_CODE (stmt) == RETURN_EXPR
1190 && TREE_OPERAND (stmt, 0)
1191 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1192 modify = TREE_OPERAND (stmt, 0);
1193 if (TREE_CODE (modify) != MODIFY_EXPR)
1194 return false;
1195 op = TREE_OPERAND (modify, 1);
1196 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1197 return false;
1198 code = TREE_CODE (op);
1200 if (code != TRUNC_DIV_EXPR && code != TRUNC_MOD_EXPR)
1201 return false;
1203 op1 = TREE_OPERAND (op, 0);
1204 op2 = TREE_OPERAND (op, 1);
1205 if (!ann->histograms)
1206 return false;
1208 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1209 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
1210 break;
1212 if (!histogram)
1213 return false;
1215 value = histogram->hvalue.tree.value;
1216 val = histogram->hvalue.tree.counters[0];
1217 count = histogram->hvalue.tree.counters[1];
1218 all = histogram->hvalue.tree.counters[2];
1220 /* We require that count is at least half of all; this means
1221 that for the transformation to fire the value must be constant
1222 at least 50% of time (and 75% gives the guarantee of usage). */
1223 if (simple_cst_equal (op2, value) != 1 || 2 * count < all)
1224 return false;
1226 /* Compute probability of taking the optimal path. */
1227 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1229 tree_val = build_int_cst_wide (get_gcov_type (),
1230 (unsigned HOST_WIDE_INT) val,
1231 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
1232 result = tree_divmod_fixed_value (stmt, op, op1, op2, tree_val, prob, count, all);
1234 if (dump_file)
1236 fprintf (dump_file, "Div/mod by constant ");
1237 print_generic_expr (dump_file, value, TDF_SLIM);
1238 fprintf (dump_file, "=");
1239 print_generic_expr (dump_file, tree_val, TDF_SLIM);
1240 fprintf (dump_file, " transformation on insn ");
1241 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1244 TREE_OPERAND (modify, 1) = result;
1246 return true;
1249 /* Generate code for transformation 2 (with OPERATION, operands OP1
1250 and OP2, parent modify-expr STMT and probability of taking the optimal
1251 path PROB, which is equivalent to COUNT/ALL within roundoff error).
1252 This generates the result into a temp and returns
1253 the temp; it does not replace or alter the original STMT. */
1254 static tree
1255 tree_mod_pow2 (tree stmt, tree operation, tree op1, tree op2, int prob,
1256 gcov_type count, gcov_type all)
1258 tree stmt1, stmt2, stmt3, stmt4;
1259 tree tmp1, tmp2, tmp3;
1260 tree label_decl1 = create_artificial_label ();
1261 tree label_decl2 = create_artificial_label ();
1262 tree label_decl3 = create_artificial_label ();
1263 tree label1, label2, label3;
1264 tree bb1end, bb2end, bb3end;
1265 basic_block bb, bb2, bb3, bb4;
1266 tree optype = TREE_TYPE (operation);
1267 edge e12, e13, e23, e24, e34;
1268 block_stmt_iterator bsi;
1269 tree result = create_tmp_var (optype, "PROF");
1271 bb = bb_for_stmt (stmt);
1272 bsi = bsi_for_stmt (stmt);
1274 tmp1 = create_tmp_var (optype, "PROF");
1275 tmp2 = create_tmp_var (optype, "PROF");
1276 tmp3 = create_tmp_var (optype, "PROF");
1277 stmt1 = build2 (MODIFY_EXPR, optype, tmp1, fold_convert (optype, op2));
1278 stmt2 = build2 (MODIFY_EXPR, optype, tmp2,
1279 build2 (PLUS_EXPR, optype, op2, integer_minus_one_node));
1280 stmt3 = build2 (MODIFY_EXPR, optype, tmp3,
1281 build2 (BIT_AND_EXPR, optype, tmp2, tmp1));
1282 stmt4 = build3 (COND_EXPR, void_type_node,
1283 build2 (NE_EXPR, boolean_type_node, tmp3, integer_zero_node),
1284 build1 (GOTO_EXPR, void_type_node, label_decl2),
1285 build1 (GOTO_EXPR, void_type_node, label_decl1));
1286 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1287 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1288 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1289 bsi_insert_before (&bsi, stmt4, BSI_SAME_STMT);
1290 bb1end = stmt4;
1292 /* tmp2 == op2-1 inherited from previous block */
1293 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1294 stmt1 = build2 (MODIFY_EXPR, optype, result,
1295 build2 (BIT_AND_EXPR, optype, op1, tmp2));
1296 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1297 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1298 bb2end = stmt1;
1300 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1301 stmt1 = build2 (MODIFY_EXPR, optype, result,
1302 build2 (TREE_CODE (operation), optype, op1, op2));
1303 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1304 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1305 bb3end = stmt1;
1307 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1308 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1310 /* Fix CFG. */
1311 /* Edge e23 connects bb2 to bb3, etc. */
1312 e12 = split_block (bb, bb1end);
1313 bb2 = e12->dest;
1314 bb2->count = count;
1315 e23 = split_block (bb2, bb2end);
1316 bb3 = e23->dest;
1317 bb3->count = all - count;
1318 e34 = split_block (bb3, bb3end);
1319 bb4 = e34->dest;
1320 bb4->count = all;
1322 e12->flags &= ~EDGE_FALLTHRU;
1323 e12->flags |= EDGE_FALSE_VALUE;
1324 e12->probability = prob;
1325 e12->count = count;
1327 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1328 e13->probability = REG_BR_PROB_BASE - prob;
1329 e13->count = all - count;
1331 remove_edge (e23);
1333 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1334 e24->probability = REG_BR_PROB_BASE;
1335 e24->count = count;
1337 e34->probability = REG_BR_PROB_BASE;
1338 e34->count = all - count;
1340 return result;
1343 /* Do transform 2) on INSN if applicable. */
1344 static bool
1345 tree_mod_pow2_value_transform (tree stmt)
1347 stmt_ann_t ann = get_stmt_ann (stmt);
1348 histogram_value histogram;
1349 enum tree_code code;
1350 gcov_type count, wrong_values, all;
1351 tree modify, op, op1, op2, result, value;
1352 int prob;
1354 modify = stmt;
1355 if (TREE_CODE (stmt) == RETURN_EXPR
1356 && TREE_OPERAND (stmt, 0)
1357 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1358 modify = TREE_OPERAND (stmt, 0);
1359 if (TREE_CODE (modify) != MODIFY_EXPR)
1360 return false;
1361 op = TREE_OPERAND (modify, 1);
1362 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1363 return false;
1364 code = TREE_CODE (op);
1366 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1367 return false;
1369 op1 = TREE_OPERAND (op, 0);
1370 op2 = TREE_OPERAND (op, 1);
1371 if (!ann->histograms)
1372 return false;
1374 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1375 if (histogram->type == HIST_TYPE_POW2)
1376 break;
1378 if (!histogram)
1379 return false;
1381 value = histogram->hvalue.tree.value;
1382 wrong_values = histogram->hvalue.tree.counters[0];
1383 count = histogram->hvalue.tree.counters[1];
1385 /* We require that we hit a power of 2 at least half of all evaluations. */
1386 if (simple_cst_equal (op2, value) != 1 || count < wrong_values)
1387 return false;
1389 if (dump_file)
1391 fprintf (dump_file, "Mod power of 2 transformation on insn ");
1392 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1395 /* Compute probability of taking the optimal path. */
1396 all = count + wrong_values;
1397 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1399 result = tree_mod_pow2 (stmt, op, op1, op2, prob, count, all);
1401 TREE_OPERAND (modify, 1) = result;
1403 return true;
1406 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
1407 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
1408 support. Currently only NCOUNTS==0 or 1 is supported and this is
1409 built into this interface. The probabilities of taking the optimal
1410 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
1411 COUNT2/ALL respectively within roundoff error). This generates the
1412 result into a temp and returns the temp; it does not replace or alter
1413 the original STMT. */
1414 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
1416 static tree
1417 tree_mod_subtract (tree stmt, tree operation, tree op1, tree op2,
1418 int prob1, int prob2, int ncounts,
1419 gcov_type count1, gcov_type count2, gcov_type all)
1421 tree stmt1, stmt2, stmt3;
1422 tree tmp1;
1423 tree label_decl1 = create_artificial_label ();
1424 tree label_decl2 = create_artificial_label ();
1425 tree label_decl3 = create_artificial_label ();
1426 tree label1, label2, label3;
1427 tree bb1end, bb2end = NULL_TREE, bb3end;
1428 basic_block bb, bb2, bb3, bb4;
1429 tree optype = TREE_TYPE (operation);
1430 edge e12, e23 = 0, e24, e34, e14;
1431 block_stmt_iterator bsi;
1432 tree result = create_tmp_var (optype, "PROF");
1434 bb = bb_for_stmt (stmt);
1435 bsi = bsi_for_stmt (stmt);
1437 tmp1 = create_tmp_var (optype, "PROF");
1438 stmt1 = build2 (MODIFY_EXPR, optype, result, op1);
1439 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1440 stmt3 = build3 (COND_EXPR, void_type_node,
1441 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1442 build1 (GOTO_EXPR, void_type_node, label_decl3),
1443 build1 (GOTO_EXPR, void_type_node,
1444 ncounts ? label_decl1 : label_decl2));
1445 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1446 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1447 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1448 bb1end = stmt3;
1450 if (ncounts) /* Assumed to be 0 or 1 */
1452 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1453 stmt1 = build2 (MODIFY_EXPR, optype, result,
1454 build2 (MINUS_EXPR, optype, result, tmp1));
1455 stmt2 = build3 (COND_EXPR, void_type_node,
1456 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1457 build1 (GOTO_EXPR, void_type_node, label_decl3),
1458 build1 (GOTO_EXPR, void_type_node, label_decl2));
1459 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1460 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1461 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1462 bb2end = stmt2;
1465 /* Fallback case. */
1466 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1467 stmt1 = build2 (MODIFY_EXPR, optype, result,
1468 build2 (TREE_CODE (operation), optype, result, tmp1));
1469 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1470 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1471 bb3end = stmt1;
1473 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1474 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1476 /* Fix CFG. */
1477 /* Edge e23 connects bb2 to bb3, etc. */
1478 /* However block 3 is optional; if it is not there, references
1479 to 3 really refer to block 2. */
1480 e12 = split_block (bb, bb1end);
1481 bb2 = e12->dest;
1482 bb2->count = all - count1;
1484 if (ncounts) /* Assumed to be 0 or 1. */
1486 e23 = split_block (bb2, bb2end);
1487 bb3 = e23->dest;
1488 bb3->count = all - count1 - count2;
1491 e34 = split_block (ncounts ? bb3 : bb2, bb3end);
1492 bb4 = e34->dest;
1493 bb4->count = all;
1495 e12->flags &= ~EDGE_FALLTHRU;
1496 e12->flags |= EDGE_FALSE_VALUE;
1497 e12->probability = REG_BR_PROB_BASE - prob1;
1498 e12->count = all - count1;
1500 e14 = make_edge (bb, bb4, EDGE_TRUE_VALUE);
1501 e14->probability = prob1;
1502 e14->count = count1;
1504 if (ncounts) /* Assumed to be 0 or 1. */
1506 e23->flags &= ~EDGE_FALLTHRU;
1507 e23->flags |= EDGE_FALSE_VALUE;
1508 e23->count = all - count1 - count2;
1509 e23->probability = REG_BR_PROB_BASE - prob2;
1511 e24 = make_edge (bb2, bb4, EDGE_TRUE_VALUE);
1512 e24->probability = prob2;
1513 e24->count = count2;
1516 e34->probability = REG_BR_PROB_BASE;
1517 e34->count = all - count1 - count2;
1519 return result;
1522 /* Do transforms 3) and 4) on INSN if applicable. */
1523 static bool
1524 tree_mod_subtract_transform (tree stmt)
1526 stmt_ann_t ann = get_stmt_ann (stmt);
1527 histogram_value histogram;
1528 enum tree_code code;
1529 gcov_type count, wrong_values, all;
1530 tree modify, op, op1, op2, result, value;
1531 int prob1, prob2;
1532 unsigned int i;
1534 modify = stmt;
1535 if (TREE_CODE (stmt) == RETURN_EXPR
1536 && TREE_OPERAND (stmt, 0)
1537 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1538 modify = TREE_OPERAND (stmt, 0);
1539 if (TREE_CODE (modify) != MODIFY_EXPR)
1540 return false;
1541 op = TREE_OPERAND (modify, 1);
1542 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1543 return false;
1544 code = TREE_CODE (op);
1546 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1547 return false;
1549 op1 = TREE_OPERAND (op, 0);
1550 op2 = TREE_OPERAND (op, 1);
1551 if (!ann->histograms)
1552 return false;
1554 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1555 if (histogram->type == HIST_TYPE_INTERVAL)
1556 break;
1558 if (!histogram)
1559 return false;
1561 value = histogram->hvalue.tree.value;
1562 all = 0;
1563 wrong_values = 0;
1564 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1565 all += histogram->hvalue.tree.counters[i];
1567 wrong_values += histogram->hvalue.tree.counters[i];
1568 wrong_values += histogram->hvalue.tree.counters[i+1];
1569 all += wrong_values;
1571 /* We require that we use just subtractions in at least 50% of all
1572 evaluations. */
1573 count = 0;
1574 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1576 count += histogram->hvalue.tree.counters[i];
1577 if (count * 2 >= all)
1578 break;
1580 if (i == histogram->hdata.intvl.steps)
1581 return false;
1583 if (dump_file)
1585 fprintf (dump_file, "Mod subtract transformation on insn ");
1586 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1589 /* Compute probability of taking the optimal path(s). */
1590 prob1 = (histogram->hvalue.tree.counters[0] * REG_BR_PROB_BASE + all / 2) / all;
1591 prob2 = (histogram->hvalue.tree.counters[1] * REG_BR_PROB_BASE + all / 2) / all;
1593 /* In practice, "steps" is always 2. This interface reflects this,
1594 and will need to be changed if "steps" can change. */
1595 result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i,
1596 histogram->hvalue.tree.counters[0],
1597 histogram->hvalue.tree.counters[1], all);
1599 TREE_OPERAND (modify, 1) = result;
1601 return true;
1604 /* Connection to the outside world. */
1605 /* Struct for IR-dependent hooks. */
1606 struct value_prof_hooks {
1607 /* Find list of values for which we want to measure histograms. */
1608 void (*find_values_to_profile) (histogram_values *);
1610 /* Identify and exploit properties of values that are hard to analyze
1611 statically. See value-prof.c for more detail. */
1612 bool (*value_profile_transformations) (void);
1615 /* Hooks for RTL-based versions (the only ones that currently work). */
1616 static struct value_prof_hooks rtl_value_prof_hooks =
1618 rtl_find_values_to_profile,
1619 rtl_value_profile_transformations
1622 void
1623 rtl_register_value_prof_hooks (void)
1625 value_prof_hooks = &rtl_value_prof_hooks;
1626 gcc_assert (!ir_type ());
1629 /* Find values inside STMT for that we want to measure histograms for
1630 division/modulo optimization. */
1631 static void
1632 tree_divmod_values_to_profile (tree stmt, histogram_values *values)
1634 tree assign, lhs, rhs, divisor, op0, type;
1635 histogram_value hist;
1637 if (TREE_CODE (stmt) == RETURN_EXPR)
1638 assign = TREE_OPERAND (stmt, 0);
1639 else
1640 assign = stmt;
1642 if (!assign
1643 || TREE_CODE (assign) != MODIFY_EXPR)
1644 return;
1645 lhs = TREE_OPERAND (assign, 0);
1646 type = TREE_TYPE (lhs);
1647 if (!INTEGRAL_TYPE_P (type))
1648 return;
1650 rhs = TREE_OPERAND (assign, 1);
1651 switch (TREE_CODE (rhs))
1653 case TRUNC_DIV_EXPR:
1654 case TRUNC_MOD_EXPR:
1655 divisor = TREE_OPERAND (rhs, 1);
1656 op0 = TREE_OPERAND (rhs, 0);
1658 VEC_reserve (histogram_value, heap, *values, 3);
1660 if (is_gimple_reg (divisor))
1662 /* Check for the case where the divisor is the same value most
1663 of the time. */
1664 hist = ggc_alloc (sizeof (*hist));
1665 hist->hvalue.tree.value = divisor;
1666 hist->hvalue.tree.stmt = stmt;
1667 hist->type = HIST_TYPE_SINGLE_VALUE;
1668 VEC_quick_push (histogram_value, *values, hist);
1671 /* For mod, check whether it is not often a noop (or replaceable by
1672 a few subtractions). */
1673 if (TREE_CODE (rhs) == TRUNC_MOD_EXPR
1674 && TYPE_UNSIGNED (type))
1676 /* Check for a special case where the divisor is power of 2. */
1677 hist = ggc_alloc (sizeof (*hist));
1678 hist->hvalue.tree.value = divisor;
1679 hist->hvalue.tree.stmt = stmt;
1680 hist->type = HIST_TYPE_POW2;
1681 VEC_quick_push (histogram_value, *values, hist);
1683 hist = ggc_alloc (sizeof (*hist));
1684 hist->hvalue.tree.stmt = stmt;
1685 hist->hvalue.tree.value
1686 = build2 (TRUNC_DIV_EXPR, type, op0, divisor);
1687 hist->type = HIST_TYPE_INTERVAL;
1688 hist->hdata.intvl.int_start = 0;
1689 hist->hdata.intvl.steps = 2;
1690 VEC_quick_push (histogram_value, *values, hist);
1692 return;
1694 default:
1695 return;
1699 /* Find values inside STMT for that we want to measure histograms and adds
1700 them to list VALUES. */
1702 static void
1703 tree_values_to_profile (tree stmt, histogram_values *values)
1705 if (flag_value_profile_transformations)
1706 tree_divmod_values_to_profile (stmt, values);
1709 static void
1710 tree_find_values_to_profile (histogram_values *values)
1712 basic_block bb;
1713 block_stmt_iterator bsi;
1714 unsigned i;
1715 histogram_value hist;
1717 *values = NULL;
1718 FOR_EACH_BB (bb)
1719 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1720 tree_values_to_profile (bsi_stmt (bsi), values);
1721 static_values = *values;
1723 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
1725 switch (hist->type)
1727 case HIST_TYPE_INTERVAL:
1728 if (dump_file)
1730 fprintf (dump_file, "Interval counter for tree ");
1731 print_generic_expr (dump_file, hist->hvalue.tree.stmt,
1732 TDF_SLIM);
1733 fprintf (dump_file, ", range %d -- %d.\n",
1734 hist->hdata.intvl.int_start,
1735 (hist->hdata.intvl.int_start
1736 + hist->hdata.intvl.steps - 1));
1738 hist->n_counters = hist->hdata.intvl.steps + 2;
1739 break;
1741 case HIST_TYPE_POW2:
1742 if (dump_file)
1744 fprintf (dump_file, "Pow2 counter for tree ");
1745 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1746 fprintf (dump_file, ".\n");
1748 hist->n_counters = 2;
1749 break;
1751 case HIST_TYPE_SINGLE_VALUE:
1752 if (dump_file)
1754 fprintf (dump_file, "Single value counter for tree ");
1755 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1756 fprintf (dump_file, ".\n");
1758 hist->n_counters = 3;
1759 break;
1761 case HIST_TYPE_CONST_DELTA:
1762 if (dump_file)
1764 fprintf (dump_file, "Constant delta counter for tree ");
1765 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1766 fprintf (dump_file, ".\n");
1768 hist->n_counters = 4;
1769 break;
1771 default:
1772 gcc_unreachable ();
1777 static struct value_prof_hooks tree_value_prof_hooks = {
1778 tree_find_values_to_profile,
1779 tree_value_profile_transformations
1782 void
1783 tree_register_value_prof_hooks (void)
1785 value_prof_hooks = &tree_value_prof_hooks;
1786 gcc_assert (ir_type ());
1789 /* IR-independent entry points. */
1790 void
1791 find_values_to_profile (histogram_values *values)
1793 (value_prof_hooks->find_values_to_profile) (values);
1796 bool
1797 value_profile_transformations (void)
1799 bool retval = (value_prof_hooks->value_profile_transformations) ();
1800 VEC_free (histogram_value, heap, static_values);
1801 return retval;
1804 static bool
1805 gate_handle_value_profile_transformations (void)
1807 return flag_branch_probabilities
1808 && flag_profile_values
1809 && !flag_tree_based_profiling
1810 && (flag_value_profile_transformations
1811 || flag_speculative_prefetching);
1815 /* Do optimizations based on expression value profiles. */
1816 static void
1817 rest_of_handle_value_profile_transformations (void)
1819 if (value_profile_transformations ())
1820 cleanup_cfg (CLEANUP_EXPENSIVE);
1823 struct tree_opt_pass pass_value_profile_transformations =
1825 "vpt", /* name */
1826 gate_handle_value_profile_transformations, /* gate */
1827 rest_of_handle_value_profile_transformations, /* execute */
1828 NULL, /* sub */
1829 NULL, /* next */
1830 0, /* static_pass_number */
1831 TV_VPT, /* tv_id */
1832 0, /* properties_required */
1833 0, /* properties_provided */
1834 0, /* properties_destroyed */
1835 0, /* todo_flags_start */
1836 TODO_dump_func, /* todo_flags_finish */
1837 'V' /* letter */