fix spaces vs. tabs for scripts.
[official-gcc.git] / gcc / value-prof.c
blob6c6eed88daee11c01e51dd6ab1dff0d5a374cfe9
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
38 static struct value_prof_hooks *value_prof_hooks;
40 /* In this file value profile based optimizations are placed. Currently the
41 following optimizations are implemented (for more detailed descriptions
42 see comments at value_profile_transformations):
44 1) Division/modulo specialization. Provided that we can determine that the
45 operands of the division have some special properties, we may use it to
46 produce more effective code.
47 2) Speculative prefetching. If we are able to determine that the difference
48 between addresses accessed by a memory reference is usually constant, we
49 may add the prefetch instructions.
51 Every such optimization should add its requirements for profiled values to
52 insn_values_to_profile function. This function is called from branch_prob
53 in profile.c and the requested values are instrumented by it in the first
54 compilation with -fprofile-arcs. The optimization may then read the
55 gathered data in the second compilation with -fbranch-probabilities.
56 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
57 insn. The argument to the note consists of an EXPR_LIST where its
58 members have the following meaning (from the first to the last):
60 -- type of information gathered (HIST_TYPE*)
61 -- the expression that is profiled
62 -- list of counters starting from the first one. */
64 /* For speculative prefetching, the range in that we do not prefetch (because
65 we assume that it will be in cache anyway). The asymmetry between min and
66 max range is trying to reflect the fact that the sequential prefetching
67 of the data is commonly done directly by hardware. Nevertheless, these
68 values are just a guess and should of course be target-specific. */
70 #ifndef NOPREFETCH_RANGE_MIN
71 #define NOPREFETCH_RANGE_MIN (-16)
72 #endif
73 #ifndef NOPREFETCH_RANGE_MAX
74 #define NOPREFETCH_RANGE_MAX 32
75 #endif
77 static void insn_divmod_values_to_profile (rtx, histogram_values *);
78 #ifdef HAVE_prefetch
79 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
80 static int find_mem_reference_1 (rtx *, void *);
81 static void find_mem_reference_2 (rtx, rtx, void *);
82 static bool find_mem_reference (rtx, rtx *, int *);
83 #endif
85 static void insn_values_to_profile (rtx, histogram_values *);
86 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
87 rtx, gcov_type, int);
88 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
89 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
90 int, int, int);
91 #ifdef HAVE_prefetch
92 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
93 #endif
94 static bool divmod_fixed_value_transform (rtx insn);
95 static bool mod_pow2_value_transform (rtx);
96 static bool mod_subtract_transform (rtx);
97 #ifdef HAVE_prefetch
98 static bool speculative_prefetching_transform (rtx);
99 #endif
101 /* Find values inside INSN for that we want to measure histograms for
102 division/modulo optimization and stores them to VALUES. */
103 static void
104 insn_divmod_values_to_profile (rtx insn, histogram_values *values)
106 rtx set, set_src, op1, op2;
107 enum machine_mode mode;
108 histogram_value hist;
110 if (!INSN_P (insn))
111 return;
113 set = single_set (insn);
114 if (!set)
115 return;
117 mode = GET_MODE (SET_DEST (set));
118 if (!INTEGRAL_MODE_P (mode))
119 return;
121 set_src = SET_SRC (set);
122 switch (GET_CODE (set_src))
124 case DIV:
125 case MOD:
126 case UDIV:
127 case UMOD:
128 op1 = XEXP (set_src, 0);
129 op2 = XEXP (set_src, 1);
130 if (side_effects_p (op2))
131 return;
133 /* Check for a special case where the divisor is power of 2. */
134 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
136 hist = ggc_alloc (sizeof (*hist));
137 hist->value = op2;
138 hist->seq = NULL_RTX;
139 hist->mode = mode;
140 hist->insn = insn;
141 hist->type = HIST_TYPE_POW2;
142 hist->hdata.pow2.may_be_other = 1;
143 VEC_safe_push (histogram_value, *values, hist);
146 /* Check whether the divisor is not in fact a constant. */
147 if (!CONSTANT_P (op2))
149 hist = ggc_alloc (sizeof (*hist));
150 hist->value = op2;
151 hist->mode = mode;
152 hist->seq = NULL_RTX;
153 hist->insn = insn;
154 hist->type = HIST_TYPE_SINGLE_VALUE;
155 VEC_safe_push (histogram_value, *values, hist);
158 /* For mod, check whether it is not often a noop (or replaceable by
159 a few subtractions). */
160 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
162 rtx tmp;
164 hist = ggc_alloc (sizeof (*hist));
165 start_sequence ();
166 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
167 hist->value = force_operand (tmp, NULL_RTX);
168 hist->seq = get_insns ();
169 end_sequence ();
170 hist->mode = mode;
171 hist->insn = insn;
172 hist->type = HIST_TYPE_INTERVAL;
173 hist->hdata.intvl.int_start = 0;
174 hist->hdata.intvl.steps = 2;
175 hist->hdata.intvl.may_be_less = 1;
176 hist->hdata.intvl.may_be_more = 1;
177 VEC_safe_push (histogram_value, *values, hist);
179 return;
181 default:
182 return;
186 #ifdef HAVE_prefetch
188 /* Called from find_mem_reference through for_each_rtx, finds a memory
189 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
190 to *RET and the traversing of the expression is interrupted by returning 1.
191 Otherwise 0 is returned. */
193 static int
194 find_mem_reference_1 (rtx *expr, void *ret)
196 rtx *mem = ret;
198 if (GET_CODE (*expr) == MEM)
200 *mem = *expr;
201 return 1;
203 return 0;
206 /* Called form find_mem_reference through note_stores to find out whether
207 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
208 FMR2_WRITE is set to true. */
210 static int fmr2_write;
211 static void
212 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
214 if (expr == mem)
215 fmr2_write = true;
218 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
219 if it is a write of the mem. Return false if no memory reference is found,
220 true otherwise. */
222 static bool
223 find_mem_reference (rtx insn, rtx *mem, int *write)
225 *mem = NULL_RTX;
226 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
228 if (!*mem)
229 return false;
231 fmr2_write = false;
232 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
233 *write = fmr2_write;
234 return true;
237 /* Find values inside INSN for that we want to measure histograms for
238 a speculative prefetching. Add them to the list VALUES.
239 Returns true if such we found any such value, false otherwise. */
241 static bool
242 insn_prefetch_values_to_profile (rtx insn, histogram_values *values)
244 rtx mem, address;
245 int write;
246 histogram_value hist;
248 if (!INSN_P (insn))
249 return false;
251 if (!find_mem_reference (insn, &mem, &write))
252 return false;
254 address = XEXP (mem, 0);
255 if (side_effects_p (address))
256 return false;
258 if (CONSTANT_P (address))
259 return false;
261 hist = ggc_alloc (sizeof (*hist));
262 hist->value = address;
263 hist->mode = GET_MODE (address);
264 hist->seq = NULL_RTX;
265 hist->insn = insn;
266 hist->type = HIST_TYPE_CONST_DELTA;
267 VEC_safe_push (histogram_value, *values, hist);
269 return true;
271 #endif
272 /* Find values inside INSN for that we want to measure histograms and adds
273 them to list VALUES (increasing the record of its length in N_VALUES). */
274 static void
275 insn_values_to_profile (rtx insn, histogram_values *values)
277 if (flag_value_profile_transformations)
278 insn_divmod_values_to_profile (insn, values);
280 #ifdef HAVE_prefetch
281 if (flag_speculative_prefetching)
282 insn_prefetch_values_to_profile (insn, values);
283 #endif
286 /* Find list of values for that we want to measure histograms. */
287 static void
288 rtl_find_values_to_profile (histogram_values *values)
290 rtx insn;
291 unsigned i;
293 life_analysis (NULL, PROP_DEATH_NOTES);
295 *values = VEC_alloc (histogram_value, 0);
296 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
297 insn_values_to_profile (insn, values);
299 for (i = 0; i < VEC_length (histogram_value, *values); i++)
301 histogram_value hist = VEC_index (histogram_value, *values, i);
303 switch (hist->type)
305 case HIST_TYPE_INTERVAL:
306 if (dump_file)
307 fprintf (dump_file,
308 "Interval counter for insn %d, range %d -- %d.\n",
309 INSN_UID ((rtx)hist->insn),
310 hist->hdata.intvl.int_start,
311 (hist->hdata.intvl.int_start
312 + hist->hdata.intvl.steps - 1));
313 hist->n_counters = hist->hdata.intvl.steps +
314 (hist->hdata.intvl.may_be_less ? 1 : 0) +
315 (hist->hdata.intvl.may_be_more ? 1 : 0);
316 break;
318 case HIST_TYPE_POW2:
319 if (dump_file)
320 fprintf (dump_file,
321 "Pow2 counter for insn %d.\n",
322 INSN_UID ((rtx)hist->insn));
323 hist->n_counters
324 = GET_MODE_BITSIZE (hist->mode)
325 + (hist->hdata.pow2.may_be_other ? 1 : 0);
326 break;
328 case HIST_TYPE_SINGLE_VALUE:
329 if (dump_file)
330 fprintf (dump_file,
331 "Single value counter for insn %d.\n",
332 INSN_UID ((rtx)hist->insn));
333 hist->n_counters = 3;
334 break;
336 case HIST_TYPE_CONST_DELTA:
337 if (dump_file)
338 fprintf (dump_file,
339 "Constant delta counter for insn %d.\n",
340 INSN_UID ((rtx)hist->insn));
341 hist->n_counters = 4;
342 break;
344 default:
345 abort ();
348 allocate_reg_info (max_reg_num (), FALSE, FALSE);
351 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
352 them to identify and exploit properties of values that are hard to analyze
353 statically.
355 We do following transformations:
359 x = a / b;
361 where b is almost always a constant N is transformed to
363 if (b == N)
364 x = a / N;
365 else
366 x = a / b;
368 Analogically with %
372 x = a % b
374 where b is almost always a power of 2 and the division is unsigned
375 TODO -- handle signed case as well
377 if ((b & (b - 1)) == 0)
378 x = a & (b - 1);
379 else
380 x = x % b;
382 Note that when b = 0, no error will occur and x = a; this is correct,
383 as result of such operation is undefined.
387 x = a % b
389 where a is almost always less then b and the division is unsigned
390 TODO -- handle signed case as well
392 x = a;
393 if (x >= b)
394 x %= b;
398 x = a % b
400 where a is almost always less then 2 * b and the division is unsigned
401 TODO -- handle signed case as well
403 x = a;
404 if (x >= b)
405 x -= b;
406 if (x >= b)
407 x %= b;
409 It would be possible to continue analogically for K * b for other small
410 K's, but it is probably not useful.
414 Read or write of mem[address], where the value of address changes usually
415 by a constant C != 0 between the following accesses to the computation; with
416 -fspeculative-prefetching we then add a prefetch of address + C before
417 the insn. This handles prefetching of several interesting cases in addition
418 to a simple prefetching for addresses that are induction variables, e. g.
419 linked lists allocated sequentially (even in case they are processed
420 recursively).
422 TODO -- we should also check whether there is not (usually) a small
423 difference with the adjacent memory references, so that we do
424 not issue overlapping prefetches. Also we should employ some
425 heuristics to eliminate cases where prefetching evidently spoils
426 the code.
427 -- it should somehow cooperate with the loop optimizer prefetching
429 TODO:
431 There are other useful cases that could be handled by a similar mechanism,
432 for example:
434 for (i = 0; i < n; i++)
437 transform to (for constant N):
439 if (n == N)
440 for (i = 0; i < N; i++)
442 else
443 for (i = 0; i < n; i++)
445 making unroller happy. Since this may grow the code significantly,
446 we would have to be very careful here. */
448 static bool
449 rtl_value_profile_transformations (void)
451 rtx insn, next;
452 int changed = false;
454 for (insn = get_insns (); insn; insn = next)
456 next = NEXT_INSN (insn);
458 if (!INSN_P (insn))
459 continue;
461 /* Scan for insn carrying a histogram. */
462 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
463 continue;
465 /* Ignore cold areas -- we are growing a code. */
466 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
467 continue;
469 if (dump_file)
471 fprintf (dump_file, "Trying transformations on insn %d\n",
472 INSN_UID (insn));
473 print_rtl_single (dump_file, insn);
476 /* Transformations: */
477 if (flag_value_profile_transformations
478 && (mod_subtract_transform (insn)
479 || divmod_fixed_value_transform (insn)
480 || mod_pow2_value_transform (insn)))
481 changed = true;
482 #ifdef HAVE_prefetch
483 if (flag_speculative_prefetching
484 && speculative_prefetching_transform (insn))
485 changed = true;
486 #endif
489 if (changed)
491 commit_edge_insertions ();
492 allocate_reg_info (max_reg_num (), FALSE, FALSE);
495 return changed;
498 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
499 and OP2, whose value is expected to be VALUE, result TARGET and
500 probability of taking the optimal path PROB). */
501 static rtx
502 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
503 rtx target, rtx op1, rtx op2, gcov_type value,
504 int prob)
506 rtx tmp, tmp1, jump;
507 rtx neq_label = gen_label_rtx ();
508 rtx end_label = gen_label_rtx ();
509 rtx sequence;
511 start_sequence ();
513 if (!REG_P (op2))
515 tmp = gen_reg_rtx (mode);
516 emit_move_insn (tmp, copy_rtx (op2));
518 else
519 tmp = op2;
521 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
522 NULL_RTX, neq_label);
524 /* Add branch probability to jump we just created. */
525 jump = get_last_insn ();
526 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
527 GEN_INT (REG_BR_PROB_BASE - prob),
528 REG_NOTES (jump));
530 tmp1 = simplify_gen_binary (operation, mode,
531 copy_rtx (op1), GEN_INT (value));
532 tmp1 = force_operand (tmp1, target);
533 if (tmp1 != target)
534 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
536 emit_jump_insn (gen_jump (end_label));
537 emit_barrier ();
539 emit_label (neq_label);
540 tmp1 = simplify_gen_binary (operation, mode,
541 copy_rtx (op1), copy_rtx (tmp));
542 tmp1 = force_operand (tmp1, target);
543 if (tmp1 != target)
544 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
546 emit_label (end_label);
548 sequence = get_insns ();
549 end_sequence ();
550 rebuild_jump_labels (sequence);
551 return sequence;
554 /* Do transform 1) on INSN if applicable. */
555 static bool
556 divmod_fixed_value_transform (rtx insn)
558 rtx set, set_src, set_dest, op1, op2, value, histogram;
559 enum rtx_code code;
560 enum machine_mode mode;
561 gcov_type val, count, all;
562 edge e;
563 int prob;
565 set = single_set (insn);
566 if (!set)
567 return false;
569 set_src = SET_SRC (set);
570 set_dest = SET_DEST (set);
571 code = GET_CODE (set_src);
572 mode = GET_MODE (set_dest);
574 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
575 return false;
576 op1 = XEXP (set_src, false);
577 op2 = XEXP (set_src, 1);
579 for (histogram = REG_NOTES (insn);
580 histogram;
581 histogram = XEXP (histogram, 1))
582 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
583 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
584 break;
586 if (!histogram)
587 return false;
589 histogram = XEXP (XEXP (histogram, 0), 1);
590 value = XEXP (histogram, 0);
591 histogram = XEXP (histogram, 1);
592 val = INTVAL (XEXP (histogram, 0));
593 histogram = XEXP (histogram, 1);
594 count = INTVAL (XEXP (histogram, 0));
595 histogram = XEXP (histogram, 1);
596 all = INTVAL (XEXP (histogram, 0));
598 /* We require that count is at least half of all; this means
599 that for the transformation to fire the value must be constant
600 at least 50% of time (and 75% gives the guarantee of usage). */
601 if (!rtx_equal_p (op2, value) || 2 * count < all)
602 return false;
604 if (dump_file)
605 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
606 INSN_UID (insn));
608 /* Compute probability of taking the optimal path. */
609 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
611 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
612 delete_insn (insn);
614 insert_insn_on_edge (
615 gen_divmod_fixed_value (mode, code, set_dest,
616 op1, op2, val, prob), e);
618 return true;
621 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
622 and OP2, result TARGET and probability of taking the optimal path PROB). */
623 static rtx
624 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
625 rtx op1, rtx op2, int prob)
627 rtx tmp, tmp1, tmp2, tmp3, jump;
628 rtx neq_label = gen_label_rtx ();
629 rtx end_label = gen_label_rtx ();
630 rtx sequence;
632 start_sequence ();
634 if (!REG_P (op2))
636 tmp = gen_reg_rtx (mode);
637 emit_move_insn (tmp, copy_rtx (op2));
639 else
640 tmp = op2;
642 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
643 0, OPTAB_WIDEN);
644 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
645 0, OPTAB_WIDEN);
646 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
647 NULL_RTX, neq_label);
649 /* Add branch probability to jump we just created. */
650 jump = get_last_insn ();
651 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
652 GEN_INT (REG_BR_PROB_BASE - prob),
653 REG_NOTES (jump));
655 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
656 0, OPTAB_WIDEN);
657 if (tmp3 != target)
658 emit_move_insn (copy_rtx (target), tmp3);
659 emit_jump_insn (gen_jump (end_label));
660 emit_barrier ();
662 emit_label (neq_label);
663 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
664 tmp1 = force_operand (tmp1, target);
665 if (tmp1 != target)
666 emit_move_insn (target, tmp1);
668 emit_label (end_label);
670 sequence = get_insns ();
671 end_sequence ();
672 rebuild_jump_labels (sequence);
673 return sequence;
676 /* Do transform 2) on INSN if applicable. */
677 static bool
678 mod_pow2_value_transform (rtx insn)
680 rtx set, set_src, set_dest, op1, op2, value, histogram;
681 enum rtx_code code;
682 enum machine_mode mode;
683 gcov_type wrong_values, count;
684 edge e;
685 int i, all, prob;
687 set = single_set (insn);
688 if (!set)
689 return false;
691 set_src = SET_SRC (set);
692 set_dest = SET_DEST (set);
693 code = GET_CODE (set_src);
694 mode = GET_MODE (set_dest);
696 if (code != UMOD)
697 return false;
698 op1 = XEXP (set_src, 0);
699 op2 = XEXP (set_src, 1);
701 for (histogram = REG_NOTES (insn);
702 histogram;
703 histogram = XEXP (histogram, 1))
704 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
705 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
706 break;
708 if (!histogram)
709 return false;
711 histogram = XEXP (XEXP (histogram, 0), 1);
712 value = XEXP (histogram, 0);
713 histogram = XEXP (histogram, 1);
714 wrong_values =INTVAL (XEXP (histogram, 0));
715 histogram = XEXP (histogram, 1);
717 count = 0;
718 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
720 count += INTVAL (XEXP (histogram, 0));
721 histogram = XEXP (histogram, 1);
724 if (!rtx_equal_p (op2, value))
725 return false;
727 /* We require that we hit a power of two at least half of all evaluations. */
728 if (count < wrong_values)
729 return false;
731 if (dump_file)
732 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
733 INSN_UID (insn));
735 /* Compute probability of taking the optimal path. */
736 all = count + wrong_values;
737 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
739 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
740 delete_insn (insn);
742 insert_insn_on_edge (
743 gen_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
745 return true;
748 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
749 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
750 probability of taking the optimal path(s) PROB1 and PROB2). */
751 static rtx
752 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
753 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
755 rtx tmp, tmp1, jump;
756 rtx end_label = gen_label_rtx ();
757 rtx sequence;
758 int i;
760 start_sequence ();
762 if (!REG_P (op2))
764 tmp = gen_reg_rtx (mode);
765 emit_move_insn (tmp, copy_rtx (op2));
767 else
768 tmp = op2;
770 emit_move_insn (target, copy_rtx (op1));
771 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
772 NULL_RTX, end_label);
774 /* Add branch probability to jump we just created. */
775 jump = get_last_insn ();
776 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
777 GEN_INT (prob1), REG_NOTES (jump));
779 for (i = 0; i < sub; i++)
781 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
782 0, OPTAB_WIDEN);
783 if (tmp1 != target)
784 emit_move_insn (target, tmp1);
785 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
786 NULL_RTX, end_label);
788 /* Add branch probability to jump we just created. */
789 jump = get_last_insn ();
790 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
791 GEN_INT (prob2), REG_NOTES (jump));
794 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
795 tmp1 = force_operand (tmp1, target);
796 if (tmp1 != target)
797 emit_move_insn (target, tmp1);
799 emit_label (end_label);
801 sequence = get_insns ();
802 end_sequence ();
803 rebuild_jump_labels (sequence);
804 return sequence;
807 /* Do transforms 3) and 4) on INSN if applicable. */
808 static bool
809 mod_subtract_transform (rtx insn)
811 rtx set, set_src, set_dest, op1, op2, value, histogram;
812 enum rtx_code code;
813 enum machine_mode mode;
814 gcov_type wrong_values, counts[2], count, all;
815 edge e;
816 int i, prob1, prob2;
818 set = single_set (insn);
819 if (!set)
820 return false;
822 set_src = SET_SRC (set);
823 set_dest = SET_DEST (set);
824 code = GET_CODE (set_src);
825 mode = GET_MODE (set_dest);
827 if (code != UMOD)
828 return false;
829 op1 = XEXP (set_src, 0);
830 op2 = XEXP (set_src, 1);
832 for (histogram = REG_NOTES (insn);
833 histogram;
834 histogram = XEXP (histogram, 1))
835 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
836 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
837 break;
839 if (!histogram)
840 return false;
842 histogram = XEXP (XEXP (histogram, 0), 1);
843 value = XEXP (histogram, 0);
844 histogram = XEXP (histogram, 1);
846 all = 0;
847 for (i = 0; i < 2; i++)
849 counts[i] = INTVAL (XEXP (histogram, 0));
850 all += counts[i];
851 histogram = XEXP (histogram, 1);
853 wrong_values = INTVAL (XEXP (histogram, 0));
854 histogram = XEXP (histogram, 1);
855 wrong_values += INTVAL (XEXP (histogram, 0));
856 all += wrong_values;
858 /* We require that we use just subtractions in at least 50% of all
859 evaluations. */
860 count = 0;
861 for (i = 0; i < 2; i++)
863 count += counts[i];
864 if (count * 2 >= all)
865 break;
868 if (i == 2)
869 return false;
871 if (dump_file)
872 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
873 INSN_UID (insn));
875 /* Compute probability of taking the optimal path(s). */
876 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
877 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
879 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
880 delete_insn (insn);
882 insert_insn_on_edge (
883 gen_mod_subtract (mode, code, set_dest,
884 op1, op2, i, prob1, prob2), e);
886 return true;
889 #ifdef HAVE_prefetch
890 /* Generate code for transformation 5 for mem with ADDRESS and a constant
891 step DELTA. WRITE is true if the reference is a store to mem. */
893 static rtx
894 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
896 rtx tmp;
897 rtx sequence;
899 /* TODO: we do the prefetching for just one iteration ahead, which
900 often is not enough. */
901 start_sequence ();
902 if (offsettable_address_p (0, VOIDmode, address))
903 tmp = plus_constant (copy_rtx (address), delta);
904 else
906 tmp = simplify_gen_binary (PLUS, Pmode,
907 copy_rtx (address), GEN_INT (delta));
908 tmp = force_operand (tmp, NULL);
910 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
911 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
912 tmp = force_reg (Pmode, tmp);
913 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
914 sequence = get_insns ();
915 end_sequence ();
917 return sequence;
920 /* Do transform 5) on INSN if applicable. */
922 static bool
923 speculative_prefetching_transform (rtx insn)
925 rtx histogram, value;
926 gcov_type val, count, all;
927 edge e;
928 rtx mem, address;
929 int write;
931 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
932 return false;
934 if (!find_mem_reference (insn, &mem, &write))
935 return false;
937 address = XEXP (mem, 0);
938 if (side_effects_p (address))
939 return false;
941 if (CONSTANT_P (address))
942 return false;
944 for (histogram = REG_NOTES (insn);
945 histogram;
946 histogram = XEXP (histogram, 1))
947 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
948 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
949 break;
951 if (!histogram)
952 return false;
954 histogram = XEXP (XEXP (histogram, 0), 1);
955 value = XEXP (histogram, 0);
956 histogram = XEXP (histogram, 1);
957 /* Skip last value referenced. */
958 histogram = XEXP (histogram, 1);
959 val = INTVAL (XEXP (histogram, 0));
960 histogram = XEXP (histogram, 1);
961 count = INTVAL (XEXP (histogram, 0));
962 histogram = XEXP (histogram, 1);
963 all = INTVAL (XEXP (histogram, 0));
965 /* With that few executions we do not really have a reason to optimize the
966 statement, and more importantly, the data about differences of addresses
967 are spoiled by the first item that had no previous value to compare
968 with. */
969 if (all < 4)
970 return false;
972 /* We require that count is at least half of all; this means
973 that for the transformation to fire the value must be constant
974 at least 50% of time (and 75% gives the guarantee of usage). */
975 if (!rtx_equal_p (address, value) || 2 * count < all)
976 return false;
978 /* If the difference is too small, it does not make too much sense to
979 prefetch, as the memory is probably already in cache. */
980 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
981 return false;
983 if (dump_file)
984 fprintf (dump_file, "Speculative prefetching for insn %d\n",
985 INSN_UID (insn));
987 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
989 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
991 return true;
993 #endif /* HAVE_prefetch */
995 /* Connection to the outside world. */
996 /* Struct for IR-dependent hooks. */
997 struct value_prof_hooks {
998 /* Find list of values for which we want to measure histograms. */
999 void (*find_values_to_profile) (histogram_values *);
1001 /* Identify and exploit properties of values that are hard to analyze
1002 statically. See value-prof.c for more detail. */
1003 bool (*value_profile_transformations) (void);
1006 /* Hooks for RTL-based versions (the only ones that currently work). */
1007 static struct value_prof_hooks rtl_value_prof_hooks =
1009 rtl_find_values_to_profile,
1010 rtl_value_profile_transformations
1013 void
1014 rtl_register_value_prof_hooks (void)
1016 value_prof_hooks = &rtl_value_prof_hooks;
1017 if (ir_type ())
1018 abort ();
1021 /* Tree-based versions are stubs for now. */
1022 static void
1023 tree_find_values_to_profile (histogram_values *values ATTRIBUTE_UNUSED)
1025 abort ();
1028 static bool
1029 tree_value_profile_transformations (void)
1031 abort ();
1034 static struct value_prof_hooks tree_value_prof_hooks = {
1035 tree_find_values_to_profile,
1036 tree_value_profile_transformations
1039 void
1040 tree_register_value_prof_hooks (void)
1042 value_prof_hooks = &tree_value_prof_hooks;
1043 if (!ir_type ())
1044 abort ();
1047 /* IR-independent entry points. */
1048 void
1049 find_values_to_profile (histogram_values *values)
1051 (value_prof_hooks->find_values_to_profile) (values);
1054 bool
1055 value_profile_transformations (void)
1057 return (value_prof_hooks->value_profile_transformations) ();