* optabs.c (expand_binop): Make sure the first subword's result
[official-gcc.git] / gcc / value-prof.c
blobc3a70f9e391aabcd9f4b7da3ddca67ffcf6bb16c
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
38 static struct value_prof_hooks *value_prof_hooks;
40 /* In this file value profile based optimizations are placed. Currently the
41 following optimizations are implemented (for more detailed descriptions
42 see comments at value_profile_transformations):
44 1) Division/modulo specialization. Provided that we can determine that the
45 operands of the division have some special properties, we may use it to
46 produce more effective code.
47 2) Speculative prefetching. If we are able to determine that the difference
48 between addresses accessed by a memory reference is usually constant, we
49 may add the prefetch instructions.
51 Every such optimization should add its requirements for profiled values to
52 insn_values_to_profile function. This function is called from branch_prob
53 in profile.c and the requested values are instrumented by it in the first
54 compilation with -fprofile-arcs. The optimization may then read the
55 gathered data in the second compilation with -fbranch-probabilities.
56 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
57 insn. The argument to the note consists of an EXPR_LIST where its
58 members have the following meaning (from the first to the last):
60 -- type of information gathered (HIST_TYPE*)
61 -- the expression that is profiled
62 -- list of counters starting from the first one. */
64 /* For speculative prefetching, the range in that we do not prefetch (because
65 we assume that it will be in cache anyway). The asymmetry between min and
66 max range is trying to reflect the fact that the sequential prefetching
67 of the data is commonly done directly by hardware. Nevertheless, these
68 values are just a guess and should of course be target-specific. */
70 #ifndef NOPREFETCH_RANGE_MIN
71 #define NOPREFETCH_RANGE_MIN (-16)
72 #endif
73 #ifndef NOPREFETCH_RANGE_MAX
74 #define NOPREFETCH_RANGE_MAX 32
75 #endif
77 static void insn_divmod_values_to_profile (rtx, histogram_values *);
78 #ifdef HAVE_prefetch
79 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
80 static int find_mem_reference_1 (rtx *, void *);
81 static void find_mem_reference_2 (rtx, rtx, void *);
82 static bool find_mem_reference (rtx, rtx *, int *);
83 #endif
85 static void insn_values_to_profile (rtx, histogram_values *);
86 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
87 rtx, gcov_type, int);
88 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
89 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
90 int, int, int);
91 #ifdef HAVE_prefetch
92 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
93 #endif
94 static bool divmod_fixed_value_transform (rtx insn);
95 static bool mod_pow2_value_transform (rtx);
96 static bool mod_subtract_transform (rtx);
97 #ifdef HAVE_prefetch
98 static bool speculative_prefetching_transform (rtx);
99 #endif
101 /* Find values inside INSN for that we want to measure histograms for
102 division/modulo optimization and stores them to VALUES. */
103 static void
104 insn_divmod_values_to_profile (rtx insn, histogram_values *values)
106 rtx set, set_src, op1, op2;
107 enum machine_mode mode;
108 histogram_value hist;
110 if (!INSN_P (insn))
111 return;
113 set = single_set (insn);
114 if (!set)
115 return;
117 mode = GET_MODE (SET_DEST (set));
118 if (!INTEGRAL_MODE_P (mode))
119 return;
121 set_src = SET_SRC (set);
122 switch (GET_CODE (set_src))
124 case DIV:
125 case MOD:
126 case UDIV:
127 case UMOD:
128 op1 = XEXP (set_src, 0);
129 op2 = XEXP (set_src, 1);
130 if (side_effects_p (op2))
131 return;
133 /* Check for a special case where the divisor is power of 2. */
134 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
136 hist = ggc_alloc (sizeof (*hist));
137 hist->value = op2;
138 hist->seq = NULL_RTX;
139 hist->mode = mode;
140 hist->insn = insn;
141 hist->type = HIST_TYPE_POW2;
142 hist->hdata.pow2.may_be_other = 1;
143 VEC_safe_push (histogram_value, *values, hist);
146 /* Check whether the divisor is not in fact a constant. */
147 if (!CONSTANT_P (op2))
149 hist = ggc_alloc (sizeof (*hist));
150 hist->value = op2;
151 hist->mode = mode;
152 hist->seq = NULL_RTX;
153 hist->insn = insn;
154 hist->type = HIST_TYPE_SINGLE_VALUE;
155 VEC_safe_push (histogram_value, *values, hist);
158 /* For mod, check whether it is not often a noop (or replaceable by
159 a few subtractions). */
160 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
162 rtx tmp;
164 hist = ggc_alloc (sizeof (*hist));
165 start_sequence ();
166 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
167 hist->value = force_operand (tmp, NULL_RTX);
168 hist->seq = get_insns ();
169 end_sequence ();
170 hist->mode = mode;
171 hist->insn = insn;
172 hist->type = HIST_TYPE_INTERVAL;
173 hist->hdata.intvl.int_start = 0;
174 hist->hdata.intvl.steps = 2;
175 hist->hdata.intvl.may_be_less = 1;
176 hist->hdata.intvl.may_be_more = 1;
177 VEC_safe_push (histogram_value, *values, hist);
179 return;
181 default:
182 return;
186 #ifdef HAVE_prefetch
188 /* Called from find_mem_reference through for_each_rtx, finds a memory
189 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
190 to *RET and the traversing of the expression is interrupted by returning 1.
191 Otherwise 0 is returned. */
193 static int
194 find_mem_reference_1 (rtx *expr, void *ret)
196 rtx *mem = ret;
198 if (GET_CODE (*expr) == MEM)
200 *mem = *expr;
201 return 1;
203 return 0;
206 /* Called form find_mem_reference through note_stores to find out whether
207 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
208 FMR2_WRITE is set to true. */
210 static int fmr2_write;
211 static void
212 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
214 if (expr == mem)
215 fmr2_write = true;
218 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
219 if it is a write of the mem. Return false if no memory reference is found,
220 true otherwise. */
222 static bool
223 find_mem_reference (rtx insn, rtx *mem, int *write)
225 *mem = NULL_RTX;
226 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
228 if (!*mem)
229 return false;
231 fmr2_write = false;
232 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
233 *write = fmr2_write;
234 return true;
237 /* Find values inside INSN for that we want to measure histograms for
238 a speculative prefetching. Add them to the list VALUES.
239 Returns true if such we found any such value, false otherwise. */
241 static bool
242 insn_prefetch_values_to_profile (rtx insn, histogram_values *values)
244 rtx mem, address;
245 int write;
246 histogram_value hist;
248 /* It only makes sense to look for memory references in ordinary insns. */
249 if (GET_CODE (insn) != INSN)
250 return false;
252 if (!find_mem_reference (insn, &mem, &write))
253 return false;
255 address = XEXP (mem, 0);
256 if (side_effects_p (address))
257 return false;
259 if (CONSTANT_P (address))
260 return false;
262 hist = ggc_alloc (sizeof (*hist));
263 hist->value = address;
264 hist->mode = GET_MODE (address);
265 hist->seq = NULL_RTX;
266 hist->insn = insn;
267 hist->type = HIST_TYPE_CONST_DELTA;
268 VEC_safe_push (histogram_value, *values, hist);
270 return true;
272 #endif
273 /* Find values inside INSN for that we want to measure histograms and adds
274 them to list VALUES (increasing the record of its length in N_VALUES). */
275 static void
276 insn_values_to_profile (rtx insn, histogram_values *values)
278 if (flag_value_profile_transformations)
279 insn_divmod_values_to_profile (insn, values);
281 #ifdef HAVE_prefetch
282 if (flag_speculative_prefetching)
283 insn_prefetch_values_to_profile (insn, values);
284 #endif
287 /* Find list of values for that we want to measure histograms. */
288 static void
289 rtl_find_values_to_profile (histogram_values *values)
291 rtx insn;
292 unsigned i, libcall_level;
294 life_analysis (NULL, PROP_DEATH_NOTES);
296 *values = VEC_alloc (histogram_value, 0);
297 libcall_level = 0;
298 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
300 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
301 libcall_level++;
303 /* Do not instrument values inside libcalls (we are going to split block
304 due to instrumentation, and libcall blocks should be local to a single
305 basic block). */
306 if (!libcall_level)
307 insn_values_to_profile (insn, values);
309 if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
311 gcc_assert (libcall_level > 0);
312 libcall_level--;
315 gcc_assert (libcall_level == 0);
317 for (i = 0; i < VEC_length (histogram_value, *values); i++)
319 histogram_value hist = VEC_index (histogram_value, *values, i);
321 switch (hist->type)
323 case HIST_TYPE_INTERVAL:
324 if (dump_file)
325 fprintf (dump_file,
326 "Interval counter for insn %d, range %d -- %d.\n",
327 INSN_UID ((rtx)hist->insn),
328 hist->hdata.intvl.int_start,
329 (hist->hdata.intvl.int_start
330 + hist->hdata.intvl.steps - 1));
331 hist->n_counters = hist->hdata.intvl.steps +
332 (hist->hdata.intvl.may_be_less ? 1 : 0) +
333 (hist->hdata.intvl.may_be_more ? 1 : 0);
334 break;
336 case HIST_TYPE_POW2:
337 if (dump_file)
338 fprintf (dump_file,
339 "Pow2 counter for insn %d.\n",
340 INSN_UID ((rtx)hist->insn));
341 hist->n_counters
342 = GET_MODE_BITSIZE (hist->mode)
343 + (hist->hdata.pow2.may_be_other ? 1 : 0);
344 break;
346 case HIST_TYPE_SINGLE_VALUE:
347 if (dump_file)
348 fprintf (dump_file,
349 "Single value counter for insn %d.\n",
350 INSN_UID ((rtx)hist->insn));
351 hist->n_counters = 3;
352 break;
354 case HIST_TYPE_CONST_DELTA:
355 if (dump_file)
356 fprintf (dump_file,
357 "Constant delta counter for insn %d.\n",
358 INSN_UID ((rtx)hist->insn));
359 hist->n_counters = 4;
360 break;
362 default:
363 gcc_unreachable ();
366 allocate_reg_info (max_reg_num (), FALSE, FALSE);
369 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
370 them to identify and exploit properties of values that are hard to analyze
371 statically.
373 We do following transformations:
377 x = a / b;
379 where b is almost always a constant N is transformed to
381 if (b == N)
382 x = a / N;
383 else
384 x = a / b;
386 Analogically with %
390 x = a % b
392 where b is almost always a power of 2 and the division is unsigned
393 TODO -- handle signed case as well
395 if ((b & (b - 1)) == 0)
396 x = a & (b - 1);
397 else
398 x = x % b;
400 Note that when b = 0, no error will occur and x = a; this is correct,
401 as result of such operation is undefined.
405 x = a % b
407 where a is almost always less then b and the division is unsigned
408 TODO -- handle signed case as well
410 x = a;
411 if (x >= b)
412 x %= b;
416 x = a % b
418 where a is almost always less then 2 * b and the division is unsigned
419 TODO -- handle signed case as well
421 x = a;
422 if (x >= b)
423 x -= b;
424 if (x >= b)
425 x %= b;
427 It would be possible to continue analogically for K * b for other small
428 K's, but it is probably not useful.
432 Read or write of mem[address], where the value of address changes usually
433 by a constant C != 0 between the following accesses to the computation; with
434 -fspeculative-prefetching we then add a prefetch of address + C before
435 the insn. This handles prefetching of several interesting cases in addition
436 to a simple prefetching for addresses that are induction variables, e. g.
437 linked lists allocated sequentially (even in case they are processed
438 recursively).
440 TODO -- we should also check whether there is not (usually) a small
441 difference with the adjacent memory references, so that we do
442 not issue overlapping prefetches. Also we should employ some
443 heuristics to eliminate cases where prefetching evidently spoils
444 the code.
445 -- it should somehow cooperate with the loop optimizer prefetching
447 TODO:
449 There are other useful cases that could be handled by a similar mechanism,
450 for example:
452 for (i = 0; i < n; i++)
455 transform to (for constant N):
457 if (n == N)
458 for (i = 0; i < N; i++)
460 else
461 for (i = 0; i < n; i++)
463 making unroller happy. Since this may grow the code significantly,
464 we would have to be very careful here. */
466 static bool
467 rtl_value_profile_transformations (void)
469 rtx insn, next;
470 int changed = false;
472 for (insn = get_insns (); insn; insn = next)
474 next = NEXT_INSN (insn);
476 if (!INSN_P (insn))
477 continue;
479 /* Scan for insn carrying a histogram. */
480 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
481 continue;
483 /* Ignore cold areas -- we are growing a code. */
484 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
485 continue;
487 if (dump_file)
489 fprintf (dump_file, "Trying transformations on insn %d\n",
490 INSN_UID (insn));
491 print_rtl_single (dump_file, insn);
494 /* Transformations: */
495 if (flag_value_profile_transformations
496 && (mod_subtract_transform (insn)
497 || divmod_fixed_value_transform (insn)
498 || mod_pow2_value_transform (insn)))
499 changed = true;
500 #ifdef HAVE_prefetch
501 if (flag_speculative_prefetching
502 && speculative_prefetching_transform (insn))
503 changed = true;
504 #endif
507 if (changed)
509 commit_edge_insertions ();
510 allocate_reg_info (max_reg_num (), FALSE, FALSE);
513 return changed;
516 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
517 and OP2, whose value is expected to be VALUE, result TARGET and
518 probability of taking the optimal path PROB). */
519 static rtx
520 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
521 rtx target, rtx op1, rtx op2, gcov_type value,
522 int prob)
524 rtx tmp, tmp1, jump;
525 rtx neq_label = gen_label_rtx ();
526 rtx end_label = gen_label_rtx ();
527 rtx sequence;
529 start_sequence ();
531 if (!REG_P (op2))
533 tmp = gen_reg_rtx (mode);
534 emit_move_insn (tmp, copy_rtx (op2));
536 else
537 tmp = op2;
539 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
540 NULL_RTX, neq_label);
542 /* Add branch probability to jump we just created. */
543 jump = get_last_insn ();
544 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
545 GEN_INT (REG_BR_PROB_BASE - prob),
546 REG_NOTES (jump));
548 tmp1 = simplify_gen_binary (operation, mode,
549 copy_rtx (op1), GEN_INT (value));
550 tmp1 = force_operand (tmp1, target);
551 if (tmp1 != target)
552 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
554 emit_jump_insn (gen_jump (end_label));
555 emit_barrier ();
557 emit_label (neq_label);
558 tmp1 = simplify_gen_binary (operation, mode,
559 copy_rtx (op1), copy_rtx (tmp));
560 tmp1 = force_operand (tmp1, target);
561 if (tmp1 != target)
562 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
564 emit_label (end_label);
566 sequence = get_insns ();
567 end_sequence ();
568 rebuild_jump_labels (sequence);
569 return sequence;
572 /* Do transform 1) on INSN if applicable. */
573 static bool
574 divmod_fixed_value_transform (rtx insn)
576 rtx set, set_src, set_dest, op1, op2, value, histogram;
577 enum rtx_code code;
578 enum machine_mode mode;
579 gcov_type val, count, all;
580 edge e;
581 int prob;
583 set = single_set (insn);
584 if (!set)
585 return false;
587 set_src = SET_SRC (set);
588 set_dest = SET_DEST (set);
589 code = GET_CODE (set_src);
590 mode = GET_MODE (set_dest);
592 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
593 return false;
594 op1 = XEXP (set_src, false);
595 op2 = XEXP (set_src, 1);
597 for (histogram = REG_NOTES (insn);
598 histogram;
599 histogram = XEXP (histogram, 1))
600 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
601 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
602 break;
604 if (!histogram)
605 return false;
607 histogram = XEXP (XEXP (histogram, 0), 1);
608 value = XEXP (histogram, 0);
609 histogram = XEXP (histogram, 1);
610 val = INTVAL (XEXP (histogram, 0));
611 histogram = XEXP (histogram, 1);
612 count = INTVAL (XEXP (histogram, 0));
613 histogram = XEXP (histogram, 1);
614 all = INTVAL (XEXP (histogram, 0));
616 /* We require that count be at least half of all; this means
617 that for the transformation to fire the value must be constant
618 at least 50% of time (and 75% gives the guarantee of usage). */
619 if (!rtx_equal_p (op2, value) || 2 * count < all)
620 return false;
622 if (dump_file)
623 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
624 INSN_UID (insn));
626 /* Compute probability of taking the optimal path. */
627 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
629 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
630 delete_insn (insn);
632 insert_insn_on_edge (
633 gen_divmod_fixed_value (mode, code, set_dest,
634 op1, op2, val, prob), e);
636 return true;
639 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
640 and OP2, result TARGET and probability of taking the optimal path PROB). */
641 static rtx
642 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
643 rtx op1, rtx op2, int prob)
645 rtx tmp, tmp1, tmp2, tmp3, jump;
646 rtx neq_label = gen_label_rtx ();
647 rtx end_label = gen_label_rtx ();
648 rtx sequence;
650 start_sequence ();
652 if (!REG_P (op2))
654 tmp = gen_reg_rtx (mode);
655 emit_move_insn (tmp, copy_rtx (op2));
657 else
658 tmp = op2;
660 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
661 0, OPTAB_WIDEN);
662 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
663 0, OPTAB_WIDEN);
664 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
665 NULL_RTX, neq_label);
667 /* Add branch probability to jump we just created. */
668 jump = get_last_insn ();
669 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
670 GEN_INT (REG_BR_PROB_BASE - prob),
671 REG_NOTES (jump));
673 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
674 0, OPTAB_WIDEN);
675 if (tmp3 != target)
676 emit_move_insn (copy_rtx (target), tmp3);
677 emit_jump_insn (gen_jump (end_label));
678 emit_barrier ();
680 emit_label (neq_label);
681 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
682 tmp1 = force_operand (tmp1, target);
683 if (tmp1 != target)
684 emit_move_insn (target, tmp1);
686 emit_label (end_label);
688 sequence = get_insns ();
689 end_sequence ();
690 rebuild_jump_labels (sequence);
691 return sequence;
694 /* Do transform 2) on INSN if applicable. */
695 static bool
696 mod_pow2_value_transform (rtx insn)
698 rtx set, set_src, set_dest, op1, op2, value, histogram;
699 enum rtx_code code;
700 enum machine_mode mode;
701 gcov_type wrong_values, count;
702 edge e;
703 int i, all, prob;
705 set = single_set (insn);
706 if (!set)
707 return false;
709 set_src = SET_SRC (set);
710 set_dest = SET_DEST (set);
711 code = GET_CODE (set_src);
712 mode = GET_MODE (set_dest);
714 if (code != UMOD)
715 return false;
716 op1 = XEXP (set_src, 0);
717 op2 = XEXP (set_src, 1);
719 for (histogram = REG_NOTES (insn);
720 histogram;
721 histogram = XEXP (histogram, 1))
722 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
723 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
724 break;
726 if (!histogram)
727 return false;
729 histogram = XEXP (XEXP (histogram, 0), 1);
730 value = XEXP (histogram, 0);
731 histogram = XEXP (histogram, 1);
732 wrong_values =INTVAL (XEXP (histogram, 0));
733 histogram = XEXP (histogram, 1);
735 count = 0;
736 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
738 count += INTVAL (XEXP (histogram, 0));
739 histogram = XEXP (histogram, 1);
742 if (!rtx_equal_p (op2, value))
743 return false;
745 /* We require that we hit a power of two at least half of all evaluations. */
746 if (count < wrong_values)
747 return false;
749 if (dump_file)
750 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
751 INSN_UID (insn));
753 /* Compute probability of taking the optimal path. */
754 all = count + wrong_values;
755 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
757 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
758 delete_insn (insn);
760 insert_insn_on_edge (
761 gen_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
763 return true;
766 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
767 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
768 probability of taking the optimal path(s) PROB1 and PROB2). */
769 static rtx
770 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
771 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
773 rtx tmp, tmp1, jump;
774 rtx end_label = gen_label_rtx ();
775 rtx sequence;
776 int i;
778 start_sequence ();
780 if (!REG_P (op2))
782 tmp = gen_reg_rtx (mode);
783 emit_move_insn (tmp, copy_rtx (op2));
785 else
786 tmp = op2;
788 emit_move_insn (target, copy_rtx (op1));
789 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
790 NULL_RTX, end_label);
792 /* Add branch probability to jump we just created. */
793 jump = get_last_insn ();
794 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
795 GEN_INT (prob1), REG_NOTES (jump));
797 for (i = 0; i < sub; i++)
799 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
800 0, OPTAB_WIDEN);
801 if (tmp1 != target)
802 emit_move_insn (target, tmp1);
803 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
804 NULL_RTX, end_label);
806 /* Add branch probability to jump we just created. */
807 jump = get_last_insn ();
808 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
809 GEN_INT (prob2), REG_NOTES (jump));
812 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
813 tmp1 = force_operand (tmp1, target);
814 if (tmp1 != target)
815 emit_move_insn (target, tmp1);
817 emit_label (end_label);
819 sequence = get_insns ();
820 end_sequence ();
821 rebuild_jump_labels (sequence);
822 return sequence;
825 /* Do transforms 3) and 4) on INSN if applicable. */
826 static bool
827 mod_subtract_transform (rtx insn)
829 rtx set, set_src, set_dest, op1, op2, histogram;
830 enum rtx_code code;
831 enum machine_mode mode;
832 gcov_type wrong_values, counts[2], count, all;
833 edge e;
834 int i, prob1, prob2;
836 set = single_set (insn);
837 if (!set)
838 return false;
840 set_src = SET_SRC (set);
841 set_dest = SET_DEST (set);
842 code = GET_CODE (set_src);
843 mode = GET_MODE (set_dest);
845 if (code != UMOD)
846 return false;
847 op1 = XEXP (set_src, 0);
848 op2 = XEXP (set_src, 1);
850 for (histogram = REG_NOTES (insn);
851 histogram;
852 histogram = XEXP (histogram, 1))
853 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
854 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
855 break;
857 if (!histogram)
858 return false;
860 histogram = XEXP (XEXP (histogram, 0), 1);
861 histogram = XEXP (histogram, 1);
863 all = 0;
864 for (i = 0; i < 2; i++)
866 counts[i] = INTVAL (XEXP (histogram, 0));
867 all += counts[i];
868 histogram = XEXP (histogram, 1);
870 wrong_values = INTVAL (XEXP (histogram, 0));
871 histogram = XEXP (histogram, 1);
872 wrong_values += INTVAL (XEXP (histogram, 0));
873 all += wrong_values;
875 /* We require that we use just subtractions in at least 50% of all
876 evaluations. */
877 count = 0;
878 for (i = 0; i < 2; i++)
880 count += counts[i];
881 if (count * 2 >= all)
882 break;
885 if (i == 2)
886 return false;
888 if (dump_file)
889 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
890 INSN_UID (insn));
892 /* Compute probability of taking the optimal path(s). */
893 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
894 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
896 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
897 delete_insn (insn);
899 insert_insn_on_edge (
900 gen_mod_subtract (mode, code, set_dest,
901 op1, op2, i, prob1, prob2), e);
903 return true;
906 #ifdef HAVE_prefetch
907 /* Generate code for transformation 5 for mem with ADDRESS and a constant
908 step DELTA. WRITE is true if the reference is a store to mem. */
910 static rtx
911 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
913 rtx tmp;
914 rtx sequence;
916 /* TODO: we do the prefetching for just one iteration ahead, which
917 often is not enough. */
918 start_sequence ();
919 if (offsettable_address_p (0, VOIDmode, address))
920 tmp = plus_constant (copy_rtx (address), delta);
921 else
923 tmp = simplify_gen_binary (PLUS, Pmode,
924 copy_rtx (address), GEN_INT (delta));
925 tmp = force_operand (tmp, NULL);
927 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
928 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
929 tmp = force_reg (Pmode, tmp);
930 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
931 sequence = get_insns ();
932 end_sequence ();
934 return sequence;
937 /* Do transform 5) on INSN if applicable. */
939 static bool
940 speculative_prefetching_transform (rtx insn)
942 rtx histogram, value;
943 gcov_type val, count, all;
944 edge e;
945 rtx mem, address;
946 int write;
948 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
949 return false;
951 if (!find_mem_reference (insn, &mem, &write))
952 return false;
954 address = XEXP (mem, 0);
955 if (side_effects_p (address))
956 return false;
958 if (CONSTANT_P (address))
959 return false;
961 for (histogram = REG_NOTES (insn);
962 histogram;
963 histogram = XEXP (histogram, 1))
964 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
965 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
966 break;
968 if (!histogram)
969 return false;
971 histogram = XEXP (XEXP (histogram, 0), 1);
972 value = XEXP (histogram, 0);
973 histogram = XEXP (histogram, 1);
974 /* Skip last value referenced. */
975 histogram = XEXP (histogram, 1);
976 val = INTVAL (XEXP (histogram, 0));
977 histogram = XEXP (histogram, 1);
978 count = INTVAL (XEXP (histogram, 0));
979 histogram = XEXP (histogram, 1);
980 all = INTVAL (XEXP (histogram, 0));
982 /* With that few executions we do not really have a reason to optimize the
983 statement, and more importantly, the data about differences of addresses
984 are spoiled by the first item that had no previous value to compare
985 with. */
986 if (all < 4)
987 return false;
989 /* We require that count be at least half of all; this means
990 that for the transformation to fire the value must be constant
991 at least 50% of time (and 75% gives the guarantee of usage). */
992 if (!rtx_equal_p (address, value) || 2 * count < all)
993 return false;
995 /* If the difference is too small, it does not make too much sense to
996 prefetch, as the memory is probably already in cache. */
997 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
998 return false;
1000 if (dump_file)
1001 fprintf (dump_file, "Speculative prefetching for insn %d\n",
1002 INSN_UID (insn));
1004 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
1006 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
1008 return true;
1010 #endif /* HAVE_prefetch */
1012 /* Connection to the outside world. */
1013 /* Struct for IR-dependent hooks. */
1014 struct value_prof_hooks {
1015 /* Find list of values for which we want to measure histograms. */
1016 void (*find_values_to_profile) (histogram_values *);
1018 /* Identify and exploit properties of values that are hard to analyze
1019 statically. See value-prof.c for more detail. */
1020 bool (*value_profile_transformations) (void);
1023 /* Hooks for RTL-based versions (the only ones that currently work). */
1024 static struct value_prof_hooks rtl_value_prof_hooks =
1026 rtl_find_values_to_profile,
1027 rtl_value_profile_transformations
1030 void
1031 rtl_register_value_prof_hooks (void)
1033 value_prof_hooks = &rtl_value_prof_hooks;
1034 gcc_assert (!ir_type ());
1037 /* Tree-based versions are stubs for now. */
1038 static void
1039 tree_find_values_to_profile (histogram_values *values ATTRIBUTE_UNUSED)
1041 gcc_unreachable ();
1044 static bool
1045 tree_value_profile_transformations (void)
1047 gcc_unreachable ();
1050 static struct value_prof_hooks tree_value_prof_hooks = {
1051 tree_find_values_to_profile,
1052 tree_value_profile_transformations
1055 void
1056 tree_register_value_prof_hooks (void)
1058 value_prof_hooks = &tree_value_prof_hooks;
1059 gcc_assert (ir_type ());
1062 /* IR-independent entry points. */
1063 void
1064 find_values_to_profile (histogram_values *values)
1066 (value_prof_hooks->find_values_to_profile) (values);
1069 bool
1070 value_profile_transformations (void)
1072 return (value_prof_hooks->value_profile_transformations) ();