2003-12-26 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / gcc / value-prof.c
blob4f6f3da599a90b1c834b01ef4ddd8a24f224f484
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
37 /* In this file value profile based optimizations will be placed (none are
38 here just now, but they are hopefully coming soon).
40 Every such optimization should add its requirements for profiled values to
41 insn_values_to_profile function. This function is called from branch_prob
42 in profile.c and the requested values are instrumented by it in the first
43 compilation with -fprofile-arcs. The optimization may then read the
44 gathered data in the second compilation with -fbranch-probabilities.
45 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
46 insn. The argument to the note consists of an EXPR_LIST where its
47 members have the following meaning (from the first to the last):
49 -- type of information gathered (HIST_TYPE*)
50 -- the expression that is profiled
51 -- list of counters starting from the first one. */
53 static void insn_divmod_values_to_profile (rtx, unsigned *,
54 struct histogram_value **);
55 static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **);
56 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
57 rtx, gcov_type);
58 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx);
59 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
60 int);
61 static bool divmod_fixed_value_transform (rtx insn);
62 static bool mod_pow2_value_transform (rtx);
63 static bool mod_subtract_transform (rtx);
65 /* Release the list of VALUES of length N_VALUES for that we want to measure
66 histograms. */
67 void
68 free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
69 struct histogram_value *values)
71 free (values);
74 /* Find values inside INSN for that we want to measure histograms for
75 division/modulo optimization. */
76 static void
77 insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
78 struct histogram_value **values)
80 rtx set, set_src, op1, op2;
81 enum machine_mode mode;
83 if (!INSN_P (insn))
84 return;
86 set = single_set (insn);
87 if (!set)
88 return;
90 mode = GET_MODE (SET_DEST (set));
91 if (!INTEGRAL_MODE_P (mode))
92 return;
94 set_src = SET_SRC (set);
95 switch (GET_CODE (set_src))
97 case DIV:
98 case MOD:
99 case UDIV:
100 case UMOD:
101 op1 = XEXP (set_src, 0);
102 op2 = XEXP (set_src, 1);
103 if (side_effects_p (op2))
104 return;
106 /* Check for a special case where the divisor is power of 2. */
107 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
109 *values = xrealloc (*values,
110 (*n_values + 1)
111 * sizeof (struct histogram_value));
112 (*values)[*n_values].value = op2;
113 (*values)[*n_values].seq = NULL_RTX;
114 (*values)[*n_values].mode = mode;
115 (*values)[*n_values].insn = insn;
116 (*values)[*n_values].type = HIST_TYPE_POW2;
117 (*values)[*n_values].hdata.pow2.may_be_other = 1;
118 (*n_values)++;
121 /* Check whether the divisor is not in fact a constant. */
122 if (!CONSTANT_P (op2))
124 *values = xrealloc (*values,
125 (*n_values + 1)
126 * sizeof (struct histogram_value));
127 (*values)[*n_values].value = op2;
128 (*values)[*n_values].mode = mode;
129 (*values)[*n_values].seq = NULL_RTX;
130 (*values)[*n_values].insn = insn;
131 (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
132 (*n_values)++;
135 /* For mod, check whether it is not often a noop (or replaceable by
136 a few subtractions). */
137 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
139 rtx tmp;
141 *values = xrealloc (*values,
142 (*n_values + 1)
143 * sizeof (struct histogram_value));
144 start_sequence ();
145 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
146 (*values)[*n_values].value = force_operand (tmp, NULL_RTX);
147 (*values)[*n_values].seq = get_insns ();
148 end_sequence ();
149 (*values)[*n_values].mode = mode;
150 (*values)[*n_values].insn = insn;
151 (*values)[*n_values].type = HIST_TYPE_INTERVAL;
152 (*values)[*n_values].hdata.intvl.int_start = 0;
153 (*values)[*n_values].hdata.intvl.steps = 2;
154 (*values)[*n_values].hdata.intvl.may_be_less = 1;
155 (*values)[*n_values].hdata.intvl.may_be_more = 1;
156 (*n_values)++;
158 return;
160 default:
161 return;
165 /* Find values inside INSN for that we want to measure histograms and adds
166 them to list VALUES (increasing the record of its length in N_VALUES). */
167 static void
168 insn_values_to_profile (rtx insn,
169 unsigned *n_values,
170 struct histogram_value **values)
172 if (flag_value_profile_transformations)
173 insn_divmod_values_to_profile (insn, n_values, values);
176 /* Find list of values for that we want to measure histograms. */
177 void
178 find_values_to_profile (unsigned *n_values, struct histogram_value **values)
180 rtx insn;
181 unsigned i;
183 *n_values = 0;
184 *values = NULL;
185 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
186 insn_values_to_profile (insn, n_values, values);
188 for (i = 0; i < *n_values; i++)
190 switch ((*values)[i].type)
192 case HIST_TYPE_INTERVAL:
193 if (rtl_dump_file)
194 fprintf (rtl_dump_file,
195 "Interval counter for insn %d, range %d -- %d.\n",
196 INSN_UID ((*values)[i].insn),
197 (*values)[i].hdata.intvl.int_start,
198 ((*values)[i].hdata.intvl.int_start
199 + (*values)[i].hdata.intvl.steps - 1));
200 (*values)[i].n_counters = (*values)[i].hdata.intvl.steps +
201 ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) +
202 ((*values)[i].hdata.intvl.may_be_more ? 1 : 0);
203 break;
205 case HIST_TYPE_POW2:
206 if (rtl_dump_file)
207 fprintf (rtl_dump_file,
208 "Pow2 counter for insn %d.\n",
209 INSN_UID ((*values)[i].insn));
210 (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) +
211 ((*values)[i].hdata.pow2.may_be_other ? 1 : 0);
212 break;
214 case HIST_TYPE_SINGLE_VALUE:
215 if (rtl_dump_file)
216 fprintf (rtl_dump_file,
217 "Single value counter for insn %d.\n",
218 INSN_UID ((*values)[i].insn));
219 (*values)[i].n_counters = 3;
220 break;
222 case HIST_TYPE_CONST_DELTA:
223 if (rtl_dump_file)
224 fprintf (rtl_dump_file,
225 "Constant delta counter for insn %d.\n",
226 INSN_UID ((*values)[i].insn));
227 (*values)[i].n_counters = 4;
228 break;
230 default:
231 abort ();
236 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
237 them to identify and exploit properties of values that are hard to analyze
238 statically.
240 We do following transformations:
244 x = a / b;
246 where b is almost always a constant N is transformed to
248 if (b == N)
249 x = a / N;
250 else
251 x = a / b;
253 Analogically with %
257 x = a % b
259 where b is almost always a power of 2 and the division is unsigned
260 TODO -- handle signed case as well
262 if ((b & (b - 1)) == 0)
263 x = a & (b - 1);
264 else
265 x = x % b;
267 Note that when b = 0, no error will occur and x = a; this is correct,
268 as result of such operation is undefined.
272 x = a % b
274 where a is almost always less then b and the division is unsigned
275 TODO -- handle signed case as well
277 x = a;
278 if (x >= b)
279 x %= b;
283 x = a % b
285 where a is almost always less then 2 * b and the division is unsigned
286 TODO -- handle signed case as well
288 x = a;
289 if (x >= b)
290 x -= b;
291 if (x >= b)
292 x %= b;
294 It would be possible to continue analogically for K * b for other small
295 K's, but it is probably not useful.
297 TODO:
299 There are other useful cases that could be handled by a similar mechanism,
300 for example:
302 for (i = 0; i < n; i++)
305 transform to (for constant N):
307 if (n == N)
308 for (i = 0; i < N; i++)
310 else
311 for (i = 0; i < n; i++)
313 making unroller happy. Since this may grow the code significantly,
314 we would have to be very careful here. */
316 bool
317 value_profile_transformations (void)
319 rtx insn, next;
320 int changed = false;
322 for (insn = get_insns (); insn; insn = next)
324 next = NEXT_INSN (insn);
326 if (!INSN_P (insn))
327 continue;
329 /* Scan for insn carrying a histogram. */
330 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
331 continue;
333 /* Ignore cold areas -- we are growing a code. */
334 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
335 continue;
337 if (rtl_dump_file)
339 fprintf (rtl_dump_file, "Trying transformations on insn %d\n",
340 INSN_UID (insn));
341 print_rtl_single (rtl_dump_file, insn);
344 /* Transformations: */
345 if (flag_value_profile_transformations
346 && (mod_subtract_transform (insn)
347 || divmod_fixed_value_transform (insn)
348 || mod_pow2_value_transform (insn)))
349 changed = true;
352 if (changed)
354 commit_edge_insertions ();
355 allocate_reg_info (max_reg_num (), FALSE, FALSE);
358 return changed;
361 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
362 and OP2 whose value is expected to be VALUE and result TARGET). */
363 static rtx
364 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
365 rtx target, rtx op1, rtx op2, gcov_type value)
367 rtx tmp, tmp1;
368 rtx neq_label = gen_label_rtx ();
369 rtx end_label = gen_label_rtx ();
370 rtx sequence;
372 start_sequence ();
374 if (!REG_P (op2))
376 tmp = gen_reg_rtx (mode);
377 emit_move_insn (tmp, copy_rtx (op2));
379 else
380 tmp = op2;
382 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
383 NULL_RTX, neq_label);
384 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value));
385 tmp1 = force_operand (tmp1, target);
386 if (tmp1 != target)
387 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
389 emit_jump_insn (gen_jump (end_label));
390 emit_barrier ();
392 emit_label (neq_label);
393 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
394 tmp1 = force_operand (tmp1, target);
395 if (tmp1 != target)
396 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
398 emit_label (end_label);
400 sequence = get_insns ();
401 end_sequence ();
402 rebuild_jump_labels (sequence);
403 return sequence;
406 /* Do transform 1) on INSN if applicable. */
407 static bool
408 divmod_fixed_value_transform (rtx insn)
410 rtx set, set_src, set_dest, op1, op2, value, histogram;
411 enum rtx_code code;
412 enum machine_mode mode;
413 gcov_type val, count, all;
414 edge e;
416 set = single_set (insn);
417 if (!set)
418 return false;
420 set_src = SET_SRC (set);
421 set_dest = SET_DEST (set);
422 code = GET_CODE (set_src);
423 mode = GET_MODE (set_dest);
425 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
426 return false;
427 op1 = XEXP (set_src, false);
428 op2 = XEXP (set_src, 1);
430 for (histogram = REG_NOTES (insn);
431 histogram;
432 histogram = XEXP (histogram, 1))
433 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
434 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
435 break;
437 if (!histogram)
438 return false;
440 histogram = XEXP (XEXP (histogram, 0), 1);
441 value = XEXP (histogram, 0);
442 histogram = XEXP (histogram, 1);
443 val = INTVAL (XEXP (histogram, 0));
444 histogram = XEXP (histogram, 1);
445 count = INTVAL (XEXP (histogram, 0));
446 histogram = XEXP (histogram, 1);
447 all = INTVAL (XEXP (histogram, 0));
449 /* We require that count is at least half of all; this means
450 that for the transformation to fire the value must be constant
451 at least 50% of time (and 75% gives the guarantee of usage). */
452 if (!rtx_equal_p (op2, value) || 2 * count < all)
453 return false;
455 if (rtl_dump_file)
456 fprintf (rtl_dump_file, "Div/mod by constant transformation on insn %d\n",
457 INSN_UID (insn));
459 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
460 delete_insn (insn);
462 insert_insn_on_edge (
463 gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e);
465 return true;
468 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
469 and OP2 and result TARGET). */
470 static rtx
471 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
472 rtx op1, rtx op2)
474 rtx tmp, tmp1, tmp2, tmp3;
475 rtx neq_label = gen_label_rtx ();
476 rtx end_label = gen_label_rtx ();
477 rtx sequence;
479 start_sequence ();
481 if (!REG_P (op2))
483 tmp = gen_reg_rtx (mode);
484 emit_move_insn (tmp, copy_rtx (op2));
486 else
487 tmp = op2;
489 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
490 0, OPTAB_WIDEN);
491 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
492 0, OPTAB_WIDEN);
493 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
494 NULL_RTX, neq_label);
495 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
496 0, OPTAB_WIDEN);
497 if (tmp3 != target)
498 emit_move_insn (copy_rtx (target), tmp3);
499 emit_jump_insn (gen_jump (end_label));
500 emit_barrier ();
502 emit_label (neq_label);
503 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
504 tmp1 = force_operand (tmp1, target);
505 if (tmp1 != target)
506 emit_move_insn (target, tmp1);
508 emit_label (end_label);
510 sequence = get_insns ();
511 end_sequence ();
512 rebuild_jump_labels (sequence);
513 return sequence;
516 /* Do transform 2) on INSN if applicable. */
517 static bool
518 mod_pow2_value_transform (rtx insn)
520 rtx set, set_src, set_dest, op1, op2, value, histogram;
521 enum rtx_code code;
522 enum machine_mode mode;
523 gcov_type wrong_values, count;
524 edge e;
525 int i;
527 set = single_set (insn);
528 if (!set)
529 return false;
531 set_src = SET_SRC (set);
532 set_dest = SET_DEST (set);
533 code = GET_CODE (set_src);
534 mode = GET_MODE (set_dest);
536 if (code != UMOD)
537 return false;
538 op1 = XEXP (set_src, 0);
539 op2 = XEXP (set_src, 1);
541 for (histogram = REG_NOTES (insn);
542 histogram;
543 histogram = XEXP (histogram, 1))
544 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
545 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
546 break;
548 if (!histogram)
549 return false;
551 histogram = XEXP (XEXP (histogram, 0), 1);
552 value = XEXP (histogram, 0);
553 histogram = XEXP (histogram, 1);
554 wrong_values =INTVAL (XEXP (histogram, 0));
555 histogram = XEXP (histogram, 1);
557 count = 0;
558 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
560 count += INTVAL (XEXP (histogram, 0));
561 histogram = XEXP (histogram, 1);
564 if (!rtx_equal_p (op2, value))
565 return false;
567 /* We require that we hit a power of two at least half of all evaluations. */
568 if (count < wrong_values)
569 return false;
571 if (rtl_dump_file)
572 fprintf (rtl_dump_file, "Mod power of 2 transformation on insn %d\n",
573 INSN_UID (insn));
575 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
576 delete_insn (insn);
578 insert_insn_on_edge (
579 gen_mod_pow2 (mode, code, set_dest, op1, op2), e);
581 return true;
584 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
585 operands OP1 and OP2, result TARGET and at most SUB subtractions). */
586 static rtx
587 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
588 rtx target, rtx op1, rtx op2, int sub)
590 rtx tmp, tmp1;
591 rtx end_label = gen_label_rtx ();
592 rtx sequence;
593 int i;
595 start_sequence ();
597 if (!REG_P (op2))
599 tmp = gen_reg_rtx (mode);
600 emit_move_insn (tmp, copy_rtx (op2));
602 else
603 tmp = op2;
605 emit_move_insn (target, copy_rtx (op1));
606 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
607 NULL_RTX, end_label);
610 for (i = 0; i < sub; i++)
612 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
613 0, OPTAB_WIDEN);
614 if (tmp1 != target)
615 emit_move_insn (target, tmp1);
616 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
617 NULL_RTX, end_label);
620 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
621 tmp1 = force_operand (tmp1, target);
622 if (tmp1 != target)
623 emit_move_insn (target, tmp1);
625 emit_label (end_label);
627 sequence = get_insns ();
628 end_sequence ();
629 rebuild_jump_labels (sequence);
630 return sequence;
633 /* Do transforms 3) and 4) on INSN if applicable. */
634 static bool
635 mod_subtract_transform (rtx insn)
637 rtx set, set_src, set_dest, op1, op2, value, histogram;
638 enum rtx_code code;
639 enum machine_mode mode;
640 gcov_type wrong_values, counts[2], count, all;
641 edge e;
642 int i;
644 set = single_set (insn);
645 if (!set)
646 return false;
648 set_src = SET_SRC (set);
649 set_dest = SET_DEST (set);
650 code = GET_CODE (set_src);
651 mode = GET_MODE (set_dest);
653 if (code != UMOD)
654 return false;
655 op1 = XEXP (set_src, 0);
656 op2 = XEXP (set_src, 1);
658 for (histogram = REG_NOTES (insn);
659 histogram;
660 histogram = XEXP (histogram, 1))
661 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
662 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
663 break;
665 if (!histogram)
666 return false;
668 histogram = XEXP (XEXP (histogram, 0), 1);
669 value = XEXP (histogram, 0);
670 histogram = XEXP (histogram, 1);
672 all = 0;
673 for (i = 0; i < 2; i++)
675 counts[i] = INTVAL (XEXP (histogram, 0));
676 all += counts[i];
677 histogram = XEXP (histogram, 1);
679 wrong_values = INTVAL (XEXP (histogram, 0));
680 histogram = XEXP (histogram, 1);
681 wrong_values += INTVAL (XEXP (histogram, 0));
682 all += wrong_values;
684 /* We require that we use just subtractions in at least 50% of all
685 evaluations. */
686 count = 0;
687 for (i = 0; i < 2; i++)
689 count += counts[i];
690 if (count * 2 >= all)
691 break;
694 if (i == 2)
695 return false;
697 if (rtl_dump_file)
698 fprintf (rtl_dump_file, "Mod subtract transformation on insn %d\n",
699 INSN_UID (insn));
701 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
702 delete_insn (insn);
704 insert_insn_on_edge (
705 gen_mod_subtract (mode, code, set_dest, op1, op2, i), e);
707 return true;