* reload1.c (eliminate_regs_in_insn): Merge the plus_src "else" and
[official-gcc.git] / gcc / value-prof.c
blob61786b814c5e4bd6a66c68675f73962f0ae949db
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
40 #include "coverage.h"
41 #include "tree.h"
42 #include "gcov-io.h"
43 #include "timevar.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
47 static struct value_prof_hooks *value_prof_hooks;
49 /* In this file value profile based optimizations are placed. Currently the
50 following optimizations are implemented (for more detailed descriptions
51 see comments at value_profile_transformations):
53 1) Division/modulo specialization. Provided that we can determine that the
54 operands of the division have some special properties, we may use it to
55 produce more effective code.
56 2) Speculative prefetching. If we are able to determine that the difference
57 between addresses accessed by a memory reference is usually constant, we
58 may add the prefetch instructions.
59 FIXME: This transformation was removed together with RTL based value
60 profiling.
62 Every such optimization should add its requirements for profiled values to
63 insn_values_to_profile function. This function is called from branch_prob
64 in profile.c and the requested values are instrumented by it in the first
65 compilation with -fprofile-arcs. The optimization may then read the
66 gathered data in the second compilation with -fbranch-probabilities.
68 The measured data is pointed to from the histograms
69 field of the statement annotation of the instrumented insns. It is
70 kept as a linked list of struct histogram_value_t's, which contain the
71 same information as above. */
74 static tree tree_divmod_fixed_value (tree, tree, tree, tree,
75 tree, int, gcov_type, gcov_type);
76 static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type);
77 static tree tree_mod_subtract (tree, tree, tree, tree, int, int, int,
78 gcov_type, gcov_type, gcov_type);
79 static bool tree_divmod_fixed_value_transform (tree);
80 static bool tree_mod_pow2_value_transform (tree);
81 static bool tree_mod_subtract_transform (tree);
82 static bool tree_stringops_transform (block_stmt_iterator *);
84 /* The overall number of invocations of the counter should match execution count
85 of basic block. Report it as error rather than internal error as it might
86 mean that user has misused the profile somehow. */
87 static bool
88 check_counter (tree stmt, const char * name, gcov_type all, gcov_type bb_count)
90 if (all != bb_count)
92 location_t * locus;
93 locus = (stmt != NULL && EXPR_HAS_LOCATION (stmt)
94 ? EXPR_LOCUS (stmt)
95 : &DECL_SOURCE_LOCATION (current_function_decl));
96 error ("%HCorrupted value profile: %s profiler overall count (%d) does not match BB count (%d)",
97 locus, name, (int)all, (int)bb_count);
98 return true;
100 return false;
103 /* Tree based transformations. */
104 static bool
105 tree_value_profile_transformations (void)
107 basic_block bb;
108 block_stmt_iterator bsi;
109 bool changed = false;
111 FOR_EACH_BB (bb)
113 /* Ignore cold areas -- we are enlarging the code. */
114 if (!bb->count || !maybe_hot_bb_p (bb))
115 continue;
117 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
119 tree stmt = bsi_stmt (bsi);
120 stmt_ann_t ann = get_stmt_ann (stmt);
121 histogram_value th = ann->histograms;
122 if (!th)
123 continue;
125 if (dump_file)
127 fprintf (dump_file, "Trying transformations on insn ");
128 print_generic_stmt (dump_file, stmt, TDF_SLIM);
131 /* Transformations: */
132 /* The order of things in this conditional controls which
133 transformation is used when more than one is applicable. */
134 /* It is expected that any code added by the transformations
135 will be added before the current statement, and that the
136 current statement remain valid (although possibly
137 modified) upon return. */
138 if (flag_value_profile_transformations
139 && (tree_mod_subtract_transform (stmt)
140 || tree_divmod_fixed_value_transform (stmt)
141 || tree_mod_pow2_value_transform (stmt)
142 || tree_stringops_transform (&bsi)))
144 stmt = bsi_stmt (bsi);
145 changed = true;
146 /* Original statement may no longer be in the same block. */
147 if (bb != bb_for_stmt (stmt))
149 bb = bb_for_stmt (stmt);
150 bsi = bsi_for_stmt (stmt);
154 /* Free extra storage from compute_value_histograms. */
155 while (th)
157 free (th->hvalue.counters);
158 th = th->hvalue.next;
160 ann->histograms = 0;
164 if (changed)
166 counts_to_freqs ();
169 return changed;
172 /* Generate code for transformation 1 (with OPERATION, operands OP1
173 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
174 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
175 within roundoff error). This generates the result into a temp and returns
176 the temp; it does not replace or alter the original STMT. */
177 static tree
178 tree_divmod_fixed_value (tree stmt, tree operation,
179 tree op1, tree op2, tree value, int prob, gcov_type count,
180 gcov_type all)
182 tree stmt1, stmt2, stmt3;
183 tree tmp1, tmp2, tmpv;
184 tree label_decl1 = create_artificial_label ();
185 tree label_decl2 = create_artificial_label ();
186 tree label1, label2;
187 tree bb1end, bb2end, bb3end;
188 basic_block bb, bb2, bb3, bb4;
189 tree optype = TREE_TYPE (operation);
190 edge e12, e13, e23, e24, e34;
191 block_stmt_iterator bsi;
193 bb = bb_for_stmt (stmt);
194 bsi = bsi_for_stmt (stmt);
196 tmpv = create_tmp_var (optype, "PROF");
197 tmp1 = create_tmp_var (optype, "PROF");
198 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmpv,
199 fold_convert (optype, value));
200 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, op2);
201 stmt3 = build3 (COND_EXPR, void_type_node,
202 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
203 build1 (GOTO_EXPR, void_type_node, label_decl2),
204 build1 (GOTO_EXPR, void_type_node, label_decl1));
205 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
206 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
207 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
208 bb1end = stmt3;
210 tmp2 = create_tmp_var (optype, "PROF");
211 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
212 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmp2,
213 build2 (TREE_CODE (operation), optype, op1, tmpv));
214 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
215 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
216 bb2end = stmt1;
218 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
219 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmp2,
220 build2 (TREE_CODE (operation), optype, op1, op2));
221 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
222 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
223 bb3end = stmt1;
225 /* Fix CFG. */
226 /* Edge e23 connects bb2 to bb3, etc. */
227 e12 = split_block (bb, bb1end);
228 bb2 = e12->dest;
229 bb2->count = count;
230 e23 = split_block (bb2, bb2end);
231 bb3 = e23->dest;
232 bb3->count = all - count;
233 e34 = split_block (bb3, bb3end);
234 bb4 = e34->dest;
235 bb4->count = all;
237 e12->flags &= ~EDGE_FALLTHRU;
238 e12->flags |= EDGE_FALSE_VALUE;
239 e12->probability = prob;
240 e12->count = count;
242 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
243 e13->probability = REG_BR_PROB_BASE - prob;
244 e13->count = all - count;
246 remove_edge (e23);
248 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
249 e24->probability = REG_BR_PROB_BASE;
250 e24->count = count;
252 e34->probability = REG_BR_PROB_BASE;
253 e34->count = all - count;
255 return tmp2;
258 /* Do transform 1) on INSN if applicable. */
259 static bool
260 tree_divmod_fixed_value_transform (tree stmt)
262 stmt_ann_t ann = get_stmt_ann (stmt);
263 histogram_value histogram;
264 enum tree_code code;
265 gcov_type val, count, all;
266 tree modify, op, op1, op2, result, value, tree_val;
267 int prob;
269 modify = stmt;
270 if (TREE_CODE (stmt) == RETURN_EXPR
271 && TREE_OPERAND (stmt, 0)
272 && TREE_CODE (TREE_OPERAND (stmt, 0)) == GIMPLE_MODIFY_STMT)
273 modify = TREE_OPERAND (stmt, 0);
274 if (TREE_CODE (modify) != GIMPLE_MODIFY_STMT)
275 return false;
276 op = GIMPLE_STMT_OPERAND (modify, 1);
277 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
278 return false;
279 code = TREE_CODE (op);
281 if (code != TRUNC_DIV_EXPR && code != TRUNC_MOD_EXPR)
282 return false;
284 op1 = TREE_OPERAND (op, 0);
285 op2 = TREE_OPERAND (op, 1);
286 if (!ann->histograms)
287 return false;
289 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next)
290 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
291 break;
293 if (!histogram)
294 return false;
296 value = histogram->hvalue.value;
297 val = histogram->hvalue.counters[0];
298 count = histogram->hvalue.counters[1];
299 all = histogram->hvalue.counters[2];
301 /* We require that count is at least half of all; this means
302 that for the transformation to fire the value must be constant
303 at least 50% of time (and 75% gives the guarantee of usage). */
304 if (simple_cst_equal (op2, value) != 1 || 2 * count < all)
305 return false;
307 if (check_counter (stmt, "value", all, bb_for_stmt (stmt)->count))
308 return false;
310 /* Compute probability of taking the optimal path. */
311 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
313 tree_val = build_int_cst_wide (get_gcov_type (),
314 (unsigned HOST_WIDE_INT) val,
315 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
316 result = tree_divmod_fixed_value (stmt, op, op1, op2, tree_val, prob, count, all);
318 if (dump_file)
320 fprintf (dump_file, "Div/mod by constant ");
321 print_generic_expr (dump_file, value, TDF_SLIM);
322 fprintf (dump_file, "=");
323 print_generic_expr (dump_file, tree_val, TDF_SLIM);
324 fprintf (dump_file, " transformation on insn ");
325 print_generic_stmt (dump_file, stmt, TDF_SLIM);
328 GIMPLE_STMT_OPERAND (modify, 1) = result;
330 return true;
333 /* Generate code for transformation 2 (with OPERATION, operands OP1
334 and OP2, parent modify-expr STMT and probability of taking the optimal
335 path PROB, which is equivalent to COUNT/ALL within roundoff error).
336 This generates the result into a temp and returns
337 the temp; it does not replace or alter the original STMT. */
338 static tree
339 tree_mod_pow2 (tree stmt, tree operation, tree op1, tree op2, int prob,
340 gcov_type count, gcov_type all)
342 tree stmt1, stmt2, stmt3, stmt4;
343 tree tmp2, tmp3;
344 tree label_decl1 = create_artificial_label ();
345 tree label_decl2 = create_artificial_label ();
346 tree label1, label2;
347 tree bb1end, bb2end, bb3end;
348 basic_block bb, bb2, bb3, bb4;
349 tree optype = TREE_TYPE (operation);
350 edge e12, e13, e23, e24, e34;
351 block_stmt_iterator bsi;
352 tree result = create_tmp_var (optype, "PROF");
354 bb = bb_for_stmt (stmt);
355 bsi = bsi_for_stmt (stmt);
357 tmp2 = create_tmp_var (optype, "PROF");
358 tmp3 = create_tmp_var (optype, "PROF");
359 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp2,
360 build2 (PLUS_EXPR, optype, op2, build_int_cst (optype, -1)));
361 stmt3 = build2 (GIMPLE_MODIFY_STMT, optype, tmp3,
362 build2 (BIT_AND_EXPR, optype, tmp2, op2));
363 stmt4 = build3 (COND_EXPR, void_type_node,
364 build2 (NE_EXPR, boolean_type_node,
365 tmp3, build_int_cst (optype, 0)),
366 build1 (GOTO_EXPR, void_type_node, label_decl2),
367 build1 (GOTO_EXPR, void_type_node, label_decl1));
368 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
369 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
370 bsi_insert_before (&bsi, stmt4, BSI_SAME_STMT);
371 bb1end = stmt4;
373 /* tmp2 == op2-1 inherited from previous block */
374 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
375 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
376 build2 (BIT_AND_EXPR, optype, op1, tmp2));
377 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
378 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
379 bb2end = stmt1;
381 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
382 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
383 build2 (TREE_CODE (operation), optype, op1, op2));
384 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
385 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
386 bb3end = stmt1;
388 /* Fix CFG. */
389 /* Edge e23 connects bb2 to bb3, etc. */
390 e12 = split_block (bb, bb1end);
391 bb2 = e12->dest;
392 bb2->count = count;
393 e23 = split_block (bb2, bb2end);
394 bb3 = e23->dest;
395 bb3->count = all - count;
396 e34 = split_block (bb3, bb3end);
397 bb4 = e34->dest;
398 bb4->count = all;
400 e12->flags &= ~EDGE_FALLTHRU;
401 e12->flags |= EDGE_FALSE_VALUE;
402 e12->probability = prob;
403 e12->count = count;
405 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
406 e13->probability = REG_BR_PROB_BASE - prob;
407 e13->count = all - count;
409 remove_edge (e23);
411 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
412 e24->probability = REG_BR_PROB_BASE;
413 e24->count = count;
415 e34->probability = REG_BR_PROB_BASE;
416 e34->count = all - count;
418 return result;
421 /* Do transform 2) on INSN if applicable. */
422 static bool
423 tree_mod_pow2_value_transform (tree stmt)
425 stmt_ann_t ann = get_stmt_ann (stmt);
426 histogram_value histogram;
427 enum tree_code code;
428 gcov_type count, wrong_values, all;
429 tree modify, op, op1, op2, result, value;
430 int prob;
432 modify = stmt;
433 if (TREE_CODE (stmt) == RETURN_EXPR
434 && TREE_OPERAND (stmt, 0)
435 && TREE_CODE (TREE_OPERAND (stmt, 0)) == GIMPLE_MODIFY_STMT)
436 modify = TREE_OPERAND (stmt, 0);
437 if (TREE_CODE (modify) != GIMPLE_MODIFY_STMT)
438 return false;
439 op = GIMPLE_STMT_OPERAND (modify, 1);
440 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
441 return false;
442 code = TREE_CODE (op);
444 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
445 return false;
447 op1 = TREE_OPERAND (op, 0);
448 op2 = TREE_OPERAND (op, 1);
449 if (!ann->histograms)
450 return false;
452 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next)
453 if (histogram->type == HIST_TYPE_POW2)
454 break;
456 if (!histogram)
457 return false;
459 value = histogram->hvalue.value;
460 wrong_values = histogram->hvalue.counters[0];
461 count = histogram->hvalue.counters[1];
463 /* We require that we hit a power of 2 at least half of all evaluations. */
464 if (simple_cst_equal (op2, value) != 1 || count < wrong_values)
465 return false;
467 if (dump_file)
469 fprintf (dump_file, "Mod power of 2 transformation on insn ");
470 print_generic_stmt (dump_file, stmt, TDF_SLIM);
473 /* Compute probability of taking the optimal path. */
474 all = count + wrong_values;
475 if (check_counter (stmt, "pow2", all, bb_for_stmt (stmt)->count))
476 return false;
478 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
480 result = tree_mod_pow2 (stmt, op, op1, op2, prob, count, all);
482 GIMPLE_STMT_OPERAND (modify, 1) = result;
484 return true;
487 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
488 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
489 support. Currently only NCOUNTS==0 or 1 is supported and this is
490 built into this interface. The probabilities of taking the optimal
491 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
492 COUNT2/ALL respectively within roundoff error). This generates the
493 result into a temp and returns the temp; it does not replace or alter
494 the original STMT. */
495 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
497 static tree
498 tree_mod_subtract (tree stmt, tree operation, tree op1, tree op2,
499 int prob1, int prob2, int ncounts,
500 gcov_type count1, gcov_type count2, gcov_type all)
502 tree stmt1, stmt2, stmt3;
503 tree tmp1;
504 tree label_decl1 = create_artificial_label ();
505 tree label_decl2 = create_artificial_label ();
506 tree label_decl3 = create_artificial_label ();
507 tree label1, label2, label3;
508 tree bb1end, bb2end = NULL_TREE, bb3end;
509 basic_block bb, bb2, bb3, bb4;
510 tree optype = TREE_TYPE (operation);
511 edge e12, e23 = 0, e24, e34, e14;
512 block_stmt_iterator bsi;
513 tree result = create_tmp_var (optype, "PROF");
515 bb = bb_for_stmt (stmt);
516 bsi = bsi_for_stmt (stmt);
518 tmp1 = create_tmp_var (optype, "PROF");
519 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result, op1);
520 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, op2);
521 stmt3 = build3 (COND_EXPR, void_type_node,
522 build2 (LT_EXPR, boolean_type_node, result, tmp1),
523 build1 (GOTO_EXPR, void_type_node, label_decl3),
524 build1 (GOTO_EXPR, void_type_node,
525 ncounts ? label_decl1 : label_decl2));
526 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
527 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
528 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
529 bb1end = stmt3;
531 if (ncounts) /* Assumed to be 0 or 1 */
533 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
534 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
535 build2 (MINUS_EXPR, optype, result, tmp1));
536 stmt2 = build3 (COND_EXPR, void_type_node,
537 build2 (LT_EXPR, boolean_type_node, result, tmp1),
538 build1 (GOTO_EXPR, void_type_node, label_decl3),
539 build1 (GOTO_EXPR, void_type_node, label_decl2));
540 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
541 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
542 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
543 bb2end = stmt2;
546 /* Fallback case. */
547 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
548 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, result,
549 build2 (TREE_CODE (operation), optype, result, tmp1));
550 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
551 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
552 bb3end = stmt1;
554 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
555 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
557 /* Fix CFG. */
558 /* Edge e23 connects bb2 to bb3, etc. */
559 /* However block 3 is optional; if it is not there, references
560 to 3 really refer to block 2. */
561 e12 = split_block (bb, bb1end);
562 bb2 = e12->dest;
563 bb2->count = all - count1;
565 if (ncounts) /* Assumed to be 0 or 1. */
567 e23 = split_block (bb2, bb2end);
568 bb3 = e23->dest;
569 bb3->count = all - count1 - count2;
572 e34 = split_block (ncounts ? bb3 : bb2, bb3end);
573 bb4 = e34->dest;
574 bb4->count = all;
576 e12->flags &= ~EDGE_FALLTHRU;
577 e12->flags |= EDGE_FALSE_VALUE;
578 e12->probability = REG_BR_PROB_BASE - prob1;
579 e12->count = all - count1;
581 e14 = make_edge (bb, bb4, EDGE_TRUE_VALUE);
582 e14->probability = prob1;
583 e14->count = count1;
585 if (ncounts) /* Assumed to be 0 or 1. */
587 e23->flags &= ~EDGE_FALLTHRU;
588 e23->flags |= EDGE_FALSE_VALUE;
589 e23->count = all - count1 - count2;
590 e23->probability = REG_BR_PROB_BASE - prob2;
592 e24 = make_edge (bb2, bb4, EDGE_TRUE_VALUE);
593 e24->probability = prob2;
594 e24->count = count2;
597 e34->probability = REG_BR_PROB_BASE;
598 e34->count = all - count1 - count2;
600 return result;
603 /* Do transforms 3) and 4) on INSN if applicable. */
604 static bool
605 tree_mod_subtract_transform (tree stmt)
607 stmt_ann_t ann = get_stmt_ann (stmt);
608 histogram_value histogram;
609 enum tree_code code;
610 gcov_type count, wrong_values, all;
611 tree modify, op, op1, op2, result, value;
612 int prob1, prob2;
613 unsigned int i;
615 modify = stmt;
616 if (TREE_CODE (stmt) == RETURN_EXPR
617 && TREE_OPERAND (stmt, 0)
618 && TREE_CODE (TREE_OPERAND (stmt, 0)) == GIMPLE_MODIFY_STMT)
619 modify = TREE_OPERAND (stmt, 0);
620 if (TREE_CODE (modify) != GIMPLE_MODIFY_STMT)
621 return false;
622 op = GIMPLE_STMT_OPERAND (modify, 1);
623 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
624 return false;
625 code = TREE_CODE (op);
627 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
628 return false;
630 op1 = TREE_OPERAND (op, 0);
631 op2 = TREE_OPERAND (op, 1);
632 if (!ann->histograms)
633 return false;
635 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next)
636 if (histogram->type == HIST_TYPE_INTERVAL)
637 break;
639 if (!histogram)
640 return false;
642 value = histogram->hvalue.value;
643 all = 0;
644 wrong_values = 0;
645 for (i = 0; i < histogram->hdata.intvl.steps; i++)
646 all += histogram->hvalue.counters[i];
648 wrong_values += histogram->hvalue.counters[i];
649 wrong_values += histogram->hvalue.counters[i+1];
650 all += wrong_values;
652 /* Compute probability of taking the optimal path. */
653 if (check_counter (stmt, "interval", all, bb_for_stmt (stmt)->count))
654 return false;
656 /* We require that we use just subtractions in at least 50% of all
657 evaluations. */
658 count = 0;
659 for (i = 0; i < histogram->hdata.intvl.steps; i++)
661 count += histogram->hvalue.counters[i];
662 if (count * 2 >= all)
663 break;
665 if (i == histogram->hdata.intvl.steps)
666 return false;
668 if (dump_file)
670 fprintf (dump_file, "Mod subtract transformation on insn ");
671 print_generic_stmt (dump_file, stmt, TDF_SLIM);
674 /* Compute probability of taking the optimal path(s). */
675 prob1 = (histogram->hvalue.counters[0] * REG_BR_PROB_BASE + all / 2) / all;
676 prob2 = (histogram->hvalue.counters[1] * REG_BR_PROB_BASE + all / 2) / all;
678 /* In practice, "steps" is always 2. This interface reflects this,
679 and will need to be changed if "steps" can change. */
680 result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i,
681 histogram->hvalue.counters[0],
682 histogram->hvalue.counters[1], all);
684 GIMPLE_STMT_OPERAND (modify, 1) = result;
686 return true;
689 /* Return true if the stringop FNDECL with ARGLIST shall be profiled. */
690 static bool
691 interesting_stringop_to_profile_p (tree fndecl, tree arglist)
693 enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
695 if (fcode != BUILT_IN_MEMSET && fcode != BUILT_IN_MEMCPY
696 && fcode != BUILT_IN_BZERO)
697 return false;
699 switch (fcode)
701 case BUILT_IN_MEMCPY:
702 case BUILT_IN_MEMPCPY:
703 return validate_arglist (arglist,
704 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE,
705 VOID_TYPE);
706 case BUILT_IN_MEMSET:
707 return validate_arglist (arglist,
708 POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE,
709 VOID_TYPE);
710 case BUILT_IN_BZERO:
711 return validate_arglist (arglist, POINTER_TYPE, INTEGER_TYPE,
712 VOID_TYPE);
713 default:
714 gcc_unreachable ();
718 /* Convert stringop (..., size)
719 into
720 if (size == VALUE)
721 stringop (...., VALUE);
722 else
723 stringop (...., size);
724 assuming constant propagation of VALUE will happen later.
726 static void
727 tree_stringop_fixed_value (tree stmt, tree value, int prob, gcov_type count,
728 gcov_type all)
730 tree stmt1, stmt2, stmt3;
731 tree tmp1, tmpv;
732 tree label_decl1 = create_artificial_label ();
733 tree label_decl2 = create_artificial_label ();
734 tree label1, label2;
735 tree bb1end, bb2end;
736 basic_block bb, bb2, bb3, bb4;
737 edge e12, e13, e23, e24, e34;
738 block_stmt_iterator bsi;
739 tree call = get_call_expr_in (stmt);
740 tree arglist = TREE_OPERAND (call, 1);
741 tree blck_size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
742 tree optype = TREE_TYPE (blck_size);
743 int region;
745 bb = bb_for_stmt (stmt);
746 bsi = bsi_for_stmt (stmt);
748 if (bsi_end_p (bsi))
750 edge_iterator ei;
751 for (ei = ei_start (bb->succs); (e34 = ei_safe_edge (ei)); )
752 if (!e34->flags & EDGE_ABNORMAL)
753 break;
755 else
757 e34 = split_block (bb, stmt);
758 bsi = bsi_for_stmt (stmt);
760 bb4 = e34->dest;
762 tmpv = create_tmp_var (optype, "PROF");
763 tmp1 = create_tmp_var (optype, "PROF");
764 stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmpv,
765 fold_convert (optype, value));
766 stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, blck_size);
767 stmt3 = build3 (COND_EXPR, void_type_node,
768 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
769 build1 (GOTO_EXPR, void_type_node, label_decl2),
770 build1 (GOTO_EXPR, void_type_node, label_decl1));
771 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
772 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
773 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
774 bb1end = stmt3;
776 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
777 stmt1 = unshare_expr (stmt);
778 call = get_call_expr_in (stmt1);
779 arglist = TREE_OPERAND (call, 1);
780 TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))) = value;
781 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
782 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
783 region = lookup_stmt_eh_region (stmt);
784 if (region >= 0)
785 add_stmt_to_eh_region (stmt1, region);
786 bb2end = stmt1;
787 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
788 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
790 /* Fix CFG. */
791 /* Edge e23 connects bb2 to bb3, etc. */
792 e12 = split_block (bb, bb1end);
793 bb2 = e12->dest;
794 bb2->count = count;
795 e23 = split_block (bb2, bb2end);
796 bb3 = e23->dest;
797 bb3->count = all - count;
799 e12->flags &= ~EDGE_FALLTHRU;
800 e12->flags |= EDGE_FALSE_VALUE;
801 e12->probability = prob;
802 e12->count = count;
804 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
805 e13->probability = REG_BR_PROB_BASE - prob;
806 e13->count = all - count;
808 remove_edge (e23);
810 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
811 e24->probability = REG_BR_PROB_BASE;
812 e24->count = count;
814 e34->probability = REG_BR_PROB_BASE;
815 e34->count = all - count;
818 /* Find values inside STMT for that we want to measure histograms for
819 division/modulo optimization. */
820 static bool
821 tree_stringops_transform (block_stmt_iterator *bsi)
823 tree stmt = bsi_stmt (*bsi);
824 tree call = get_call_expr_in (stmt);
825 tree fndecl;
826 tree arglist;
827 tree blck_size;
828 enum built_in_function fcode;
829 stmt_ann_t ann = get_stmt_ann (stmt);
830 histogram_value histogram;
831 gcov_type count, all, val;
832 tree value;
833 tree dest, src;
834 unsigned int dest_align, src_align;
835 int prob;
836 tree tree_val;
838 if (!call)
839 return false;
840 fndecl = get_callee_fndecl (call);
841 if (!fndecl)
842 return false;
843 fcode = DECL_FUNCTION_CODE (fndecl);
844 arglist = TREE_OPERAND (call, 1);
845 if (!interesting_stringop_to_profile_p (fndecl, arglist))
846 return false;
848 if (fcode == BUILT_IN_BZERO)
849 blck_size = TREE_VALUE (TREE_CHAIN (arglist));
850 else
851 blck_size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
852 if (TREE_CODE (blck_size) == INTEGER_CST)
853 return false;
855 if (!ann->histograms)
856 return false;
858 all = bb_for_stmt (stmt)->count;
859 if (!all)
860 return false;
861 for (histogram = ann->histograms; histogram;
862 histogram = histogram->hvalue.next)
863 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
864 break;
865 if (!histogram)
866 return false;
867 value = histogram->hvalue.value;
868 val = histogram->hvalue.counters[0];
869 count = histogram->hvalue.counters[1];
870 all = histogram->hvalue.counters[2];
871 /* We require that count is at least half of all; this means
872 that for the transformation to fire the value must be constant
873 at least 80% of time. */
874 if ((6 * count / 5) < all)
875 return false;
876 if (check_counter (stmt, "value", all, bb_for_stmt (stmt)->count))
877 return false;
878 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
879 dest = TREE_VALUE (arglist);
880 dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
881 switch (fcode)
883 case BUILT_IN_MEMCPY:
884 case BUILT_IN_MEMPCPY:
885 src = TREE_VALUE (TREE_CHAIN (arglist));
886 src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
887 if (!can_move_by_pieces (val, MIN (dest_align, src_align)))
888 return false;
889 break;
890 case BUILT_IN_MEMSET:
891 if (!can_store_by_pieces (val, builtin_memset_read_str,
892 TREE_VALUE (TREE_CHAIN (arglist)),
893 dest_align))
894 return false;
895 break;
896 case BUILT_IN_BZERO:
897 if (!can_store_by_pieces (val, builtin_memset_read_str,
898 integer_zero_node,
899 dest_align))
900 return false;
901 break;
902 default:
903 gcc_unreachable ();
905 tree_val = build_int_cst_wide (get_gcov_type (),
906 (unsigned HOST_WIDE_INT) val,
907 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
908 if (dump_file)
910 fprintf (dump_file, "Single value %i stringop transformation on ",
911 (int)val);
912 print_generic_stmt (dump_file, stmt, TDF_SLIM);
914 tree_stringop_fixed_value (stmt, tree_val, prob, count, all);
916 return true;
919 struct value_prof_hooks {
920 /* Find list of values for which we want to measure histograms. */
921 void (*find_values_to_profile) (histogram_values *);
923 /* Identify and exploit properties of values that are hard to analyze
924 statically. See value-prof.c for more detail. */
925 bool (*value_profile_transformations) (void);
928 /* Find values inside STMT for that we want to measure histograms for
929 division/modulo optimization. */
930 static void
931 tree_divmod_values_to_profile (tree stmt, histogram_values *values)
933 tree assign, lhs, rhs, divisor, op0, type;
934 histogram_value hist;
936 if (TREE_CODE (stmt) == RETURN_EXPR)
937 assign = TREE_OPERAND (stmt, 0);
938 else
939 assign = stmt;
941 if (!assign
942 || TREE_CODE (assign) != GIMPLE_MODIFY_STMT)
943 return;
944 lhs = GIMPLE_STMT_OPERAND (assign, 0);
945 type = TREE_TYPE (lhs);
946 if (!INTEGRAL_TYPE_P (type))
947 return;
949 rhs = GIMPLE_STMT_OPERAND (assign, 1);
950 switch (TREE_CODE (rhs))
952 case TRUNC_DIV_EXPR:
953 case TRUNC_MOD_EXPR:
954 divisor = TREE_OPERAND (rhs, 1);
955 op0 = TREE_OPERAND (rhs, 0);
957 VEC_reserve (histogram_value, heap, *values, 3);
959 if (is_gimple_reg (divisor))
961 /* Check for the case where the divisor is the same value most
962 of the time. */
963 hist = ggc_alloc (sizeof (*hist));
964 hist->hvalue.value = divisor;
965 hist->hvalue.stmt = stmt;
966 hist->type = HIST_TYPE_SINGLE_VALUE;
967 VEC_quick_push (histogram_value, *values, hist);
970 /* For mod, check whether it is not often a noop (or replaceable by
971 a few subtractions). */
972 if (TREE_CODE (rhs) == TRUNC_MOD_EXPR
973 && TYPE_UNSIGNED (type))
975 /* Check for a special case where the divisor is power of 2. */
976 hist = ggc_alloc (sizeof (*hist));
977 hist->hvalue.value = divisor;
978 hist->hvalue.stmt = stmt;
979 hist->type = HIST_TYPE_POW2;
980 VEC_quick_push (histogram_value, *values, hist);
982 hist = ggc_alloc (sizeof (*hist));
983 hist->hvalue.stmt = stmt;
984 hist->hvalue.value
985 = build2 (TRUNC_DIV_EXPR, type, op0, divisor);
986 hist->type = HIST_TYPE_INTERVAL;
987 hist->hdata.intvl.int_start = 0;
988 hist->hdata.intvl.steps = 2;
989 VEC_quick_push (histogram_value, *values, hist);
991 return;
993 default:
994 return;
998 /* Find values inside STMT for that we want to measure histograms for
999 division/modulo optimization. */
1000 static void
1001 tree_stringops_values_to_profile (tree stmt, histogram_values *values)
1003 tree call = get_call_expr_in (stmt);
1004 tree fndecl;
1005 tree arglist;
1006 tree blck_size;
1007 enum built_in_function fcode;
1008 histogram_value hist;
1010 if (!call)
1011 return;
1012 fndecl = get_callee_fndecl (call);
1013 if (!fndecl)
1014 return;
1015 fcode = DECL_FUNCTION_CODE (fndecl);
1016 arglist = TREE_OPERAND (call, 1);
1018 if (!interesting_stringop_to_profile_p (fndecl, arglist))
1019 return;
1021 if (fcode == BUILT_IN_BZERO)
1022 blck_size = TREE_VALUE (TREE_CHAIN (arglist));
1023 else
1024 blck_size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
1026 if (TREE_CODE (blck_size) != INTEGER_CST)
1028 VEC_reserve (histogram_value, heap, *values, 3);
1029 hist = ggc_alloc (sizeof (*hist));
1030 hist->hvalue.value = blck_size;
1031 hist->hvalue.stmt = stmt;
1032 hist->type = HIST_TYPE_SINGLE_VALUE;
1033 VEC_quick_push (histogram_value, *values, hist);
1037 /* Find values inside STMT for that we want to measure histograms and adds
1038 them to list VALUES. */
1040 static void
1041 tree_values_to_profile (tree stmt, histogram_values *values)
1043 if (flag_value_profile_transformations)
1045 tree_divmod_values_to_profile (stmt, values);
1046 tree_stringops_values_to_profile (stmt, values);
1050 static void
1051 tree_find_values_to_profile (histogram_values *values)
1053 basic_block bb;
1054 block_stmt_iterator bsi;
1055 unsigned i;
1056 histogram_value hist;
1058 *values = NULL;
1059 FOR_EACH_BB (bb)
1060 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1061 tree_values_to_profile (bsi_stmt (bsi), values);
1063 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
1065 switch (hist->type)
1067 case HIST_TYPE_INTERVAL:
1068 if (dump_file)
1070 fprintf (dump_file, "Interval counter for tree ");
1071 print_generic_expr (dump_file, hist->hvalue.stmt,
1072 TDF_SLIM);
1073 fprintf (dump_file, ", range %d -- %d.\n",
1074 hist->hdata.intvl.int_start,
1075 (hist->hdata.intvl.int_start
1076 + hist->hdata.intvl.steps - 1));
1078 hist->n_counters = hist->hdata.intvl.steps + 2;
1079 break;
1081 case HIST_TYPE_POW2:
1082 if (dump_file)
1084 fprintf (dump_file, "Pow2 counter for tree ");
1085 print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM);
1086 fprintf (dump_file, ".\n");
1088 hist->n_counters = 2;
1089 break;
1091 case HIST_TYPE_SINGLE_VALUE:
1092 if (dump_file)
1094 fprintf (dump_file, "Single value counter for tree ");
1095 print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM);
1096 fprintf (dump_file, ".\n");
1098 hist->n_counters = 3;
1099 break;
1101 case HIST_TYPE_CONST_DELTA:
1102 if (dump_file)
1104 fprintf (dump_file, "Constant delta counter for tree ");
1105 print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM);
1106 fprintf (dump_file, ".\n");
1108 hist->n_counters = 4;
1109 break;
1111 default:
1112 gcc_unreachable ();
1117 static struct value_prof_hooks tree_value_prof_hooks = {
1118 tree_find_values_to_profile,
1119 tree_value_profile_transformations
1122 void
1123 tree_register_value_prof_hooks (void)
1125 gcc_assert (current_ir_type () == IR_GIMPLE);
1126 value_prof_hooks = &tree_value_prof_hooks;
1129 /* IR-independent entry points. */
1130 void
1131 find_values_to_profile (histogram_values *values)
1133 (value_prof_hooks->find_values_to_profile) (values);
1136 bool
1137 value_profile_transformations (void)
1139 return (value_prof_hooks->value_profile_transformations) ();