2014-09-18 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / ipa-profile.c
blobac461e24d178fc225cde97f3ef6567e2909420a6
1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
21 inter-procedurally.
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complette only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
42 of inliner.
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
47 #include "config.h"
48 #include "system.h"
49 #include "coretypes.h"
50 #include "tm.h"
51 #include "tree.h"
52 #include "cgraph.h"
53 #include "tree-pass.h"
54 #include "tree-ssa-alias.h"
55 #include "internal-fn.h"
56 #include "gimple-expr.h"
57 #include "gimple.h"
58 #include "gimple-iterator.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "tree-iterator.h"
62 #include "ipa-utils.h"
63 #include "profile.h"
64 #include "params.h"
65 #include "value-prof.h"
66 #include "alloc-pool.h"
67 #include "tree-inline.h"
68 #include "lto-streamer.h"
69 #include "data-streamer.h"
70 #include "ipa-inline.h"
72 /* Entry in the histogram. */
74 struct histogram_entry
76 gcov_type count;
77 int time;
78 int size;
81 /* Histogram of profile values.
82 The histogram is represented as an ordered vector of entries allocated via
83 histogram_pool. During construction a separate hashtable is kept to lookup
84 duplicate entries. */
86 vec<histogram_entry *> histogram;
87 static alloc_pool histogram_pool;
89 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
91 struct histogram_hash : typed_noop_remove <histogram_entry>
93 typedef histogram_entry value_type;
94 typedef histogram_entry compare_type;
95 static inline hashval_t hash (const value_type *);
96 static inline int equal (const value_type *, const compare_type *);
99 inline hashval_t
100 histogram_hash::hash (const histogram_entry *val)
102 return val->count;
105 inline int
106 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
108 return val->count == val2->count;
111 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
112 HASHTABLE is the on-side hash kept to avoid duplicates. */
114 static void
115 account_time_size (hash_table<histogram_hash> *hashtable,
116 vec<histogram_entry *> &histogram,
117 gcov_type count, int time, int size)
119 histogram_entry key = {count, 0, 0};
120 histogram_entry **val = hashtable->find_slot (&key, INSERT);
122 if (!*val)
124 *val = (histogram_entry *) pool_alloc (histogram_pool);
125 **val = key;
126 histogram.safe_push (*val);
128 (*val)->time += time;
129 (*val)->size += size;
133 cmp_counts (const void *v1, const void *v2)
135 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
136 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
137 if (h1->count < h2->count)
138 return 1;
139 if (h1->count > h2->count)
140 return -1;
141 return 0;
144 /* Dump HISTOGRAM to FILE. */
146 static void
147 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
149 unsigned int i;
150 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
152 fprintf (dump_file, "Histogram:\n");
153 for (i = 0; i < histogram.length (); i++)
155 overall_time += histogram[i]->count * histogram[i]->time;
156 overall_size += histogram[i]->size;
158 if (!overall_time)
159 overall_time = 1;
160 if (!overall_size)
161 overall_size = 1;
162 for (i = 0; i < histogram.length (); i++)
164 cumulated_time += histogram[i]->count * histogram[i]->time;
165 cumulated_size += histogram[i]->size;
166 fprintf (file, " %"PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
167 (int64_t) histogram[i]->count,
168 histogram[i]->time,
169 cumulated_time * 100.0 / overall_time,
170 histogram[i]->size,
171 cumulated_size * 100.0 / overall_size);
175 /* Collect histogram from CFG profiles. */
177 static void
178 ipa_profile_generate_summary (void)
180 struct cgraph_node *node;
181 gimple_stmt_iterator gsi;
182 basic_block bb;
184 hash_table<histogram_hash> hashtable (10);
185 histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
186 10);
188 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
189 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
191 int time = 0;
192 int size = 0;
193 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
195 gimple stmt = gsi_stmt (gsi);
196 if (gimple_code (stmt) == GIMPLE_CALL
197 && !gimple_call_fndecl (stmt))
199 histogram_value h;
200 h = gimple_histogram_value_of_type
201 (DECL_STRUCT_FUNCTION (node->decl),
202 stmt, HIST_TYPE_INDIR_CALL);
203 /* No need to do sanity check: gimple_ic_transform already
204 takes away bad histograms. */
205 if (h)
207 /* counter 0 is target, counter 1 is number of execution we called target,
208 counter 2 is total number of executions. */
209 if (h->hvalue.counters[2])
211 struct cgraph_edge * e = node->get_edge (stmt);
212 if (e && !e->indirect_unknown_callee)
213 continue;
214 e->indirect_info->common_target_id
215 = h->hvalue.counters [0];
216 e->indirect_info->common_target_probability
217 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
218 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
220 if (dump_file)
221 fprintf (dump_file, "Probability capped to 1\n");
222 e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
225 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
226 stmt, h);
229 time += estimate_num_insns (stmt, &eni_time_weights);
230 size += estimate_num_insns (stmt, &eni_size_weights);
232 account_time_size (&hashtable, histogram, bb->count, time, size);
234 histogram.qsort (cmp_counts);
237 /* Serialize the ipa info for lto. */
239 static void
240 ipa_profile_write_summary (void)
242 struct lto_simple_output_block *ob
243 = lto_create_simple_output_block (LTO_section_ipa_profile);
244 unsigned int i;
246 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
247 for (i = 0; i < histogram.length (); i++)
249 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
250 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
251 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
253 lto_destroy_simple_output_block (ob);
256 /* Deserialize the ipa info for lto. */
258 static void
259 ipa_profile_read_summary (void)
261 struct lto_file_decl_data ** file_data_vec
262 = lto_get_file_decl_data ();
263 struct lto_file_decl_data * file_data;
264 int j = 0;
266 hash_table<histogram_hash> hashtable (10);
267 histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
268 10);
270 while ((file_data = file_data_vec[j++]))
272 const char *data;
273 size_t len;
274 struct lto_input_block *ib
275 = lto_create_simple_input_block (file_data,
276 LTO_section_ipa_profile,
277 &data, &len);
278 if (ib)
280 unsigned int num = streamer_read_uhwi (ib);
281 unsigned int n;
282 for (n = 0; n < num; n++)
284 gcov_type count = streamer_read_gcov_count (ib);
285 int time = streamer_read_uhwi (ib);
286 int size = streamer_read_uhwi (ib);
287 account_time_size (&hashtable, histogram,
288 count, time, size);
290 lto_destroy_simple_input_block (file_data,
291 LTO_section_ipa_profile,
292 ib, data, len);
295 histogram.qsort (cmp_counts);
298 /* Data used by ipa_propagate_frequency. */
300 struct ipa_propagate_frequency_data
302 bool maybe_unlikely_executed;
303 bool maybe_executed_once;
304 bool only_called_at_startup;
305 bool only_called_at_exit;
308 /* Worker for ipa_propagate_frequency_1. */
310 static bool
311 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
313 struct ipa_propagate_frequency_data *d;
314 struct cgraph_edge *edge;
316 d = (struct ipa_propagate_frequency_data *)data;
317 for (edge = node->callers;
318 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
319 || d->only_called_at_startup || d->only_called_at_exit);
320 edge = edge->next_caller)
322 if (edge->caller != node)
324 d->only_called_at_startup &= edge->caller->only_called_at_startup;
325 /* It makes sense to put main() together with the static constructors.
326 It will be executed for sure, but rest of functions called from
327 main are definitely not at startup only. */
328 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
329 d->only_called_at_startup = 0;
330 d->only_called_at_exit &= edge->caller->only_called_at_exit;
333 /* When profile feedback is available, do not try to propagate too hard;
334 counts are already good guide on function frequencies and roundoff
335 errors can make us to push function into unlikely section even when
336 it is executed by the train run. Transfer the function only if all
337 callers are unlikely executed. */
338 if (profile_info && flag_branch_probabilities
339 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
340 || (edge->caller->global.inlined_to
341 && edge->caller->global.inlined_to->frequency
342 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
343 d->maybe_unlikely_executed = false;
344 if (!edge->frequency)
345 continue;
346 switch (edge->caller->frequency)
348 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
349 break;
350 case NODE_FREQUENCY_EXECUTED_ONCE:
351 if (dump_file && (dump_flags & TDF_DETAILS))
352 fprintf (dump_file, " Called by %s that is executed once\n",
353 edge->caller->name ());
354 d->maybe_unlikely_executed = false;
355 if (inline_edge_summary (edge)->loop_depth)
357 d->maybe_executed_once = false;
358 if (dump_file && (dump_flags & TDF_DETAILS))
359 fprintf (dump_file, " Called in loop\n");
361 break;
362 case NODE_FREQUENCY_HOT:
363 case NODE_FREQUENCY_NORMAL:
364 if (dump_file && (dump_flags & TDF_DETAILS))
365 fprintf (dump_file, " Called by %s that is normal or hot\n",
366 edge->caller->name ());
367 d->maybe_unlikely_executed = false;
368 d->maybe_executed_once = false;
369 break;
372 return edge != NULL;
375 /* Return ture if NODE contains hot calls. */
377 bool
378 contains_hot_call_p (struct cgraph_node *node)
380 struct cgraph_edge *e;
381 for (e = node->callees; e; e = e->next_callee)
382 if (e->maybe_hot_p ())
383 return true;
384 else if (!e->inline_failed
385 && contains_hot_call_p (e->callee))
386 return true;
387 for (e = node->indirect_calls; e; e = e->next_callee)
388 if (e->maybe_hot_p ())
389 return true;
390 return false;
393 /* See if the frequency of NODE can be updated based on frequencies of its
394 callers. */
395 bool
396 ipa_propagate_frequency (struct cgraph_node *node)
398 struct ipa_propagate_frequency_data d = {true, true, true, true};
399 bool changed = false;
401 /* We can not propagate anything useful about externally visible functions
402 nor about virtuals. */
403 if (!node->local.local
404 || node->alias
405 || (flag_devirtualize && DECL_VIRTUAL_P (node->decl)))
406 return false;
407 gcc_assert (node->analyzed);
408 if (dump_file && (dump_flags & TDF_DETAILS))
409 fprintf (dump_file, "Processing frequency %s\n", node->name ());
411 node->call_for_symbol_thunks_and_aliases (ipa_propagate_frequency_1, &d,
412 true);
414 if ((d.only_called_at_startup && !d.only_called_at_exit)
415 && !node->only_called_at_startup)
417 node->only_called_at_startup = true;
418 if (dump_file)
419 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
420 node->name ());
421 changed = true;
423 if ((d.only_called_at_exit && !d.only_called_at_startup)
424 && !node->only_called_at_exit)
426 node->only_called_at_exit = true;
427 if (dump_file)
428 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
429 node->name ());
430 changed = true;
433 /* With profile we can decide on hot/normal based on count. */
434 if (node->count)
436 bool hot = false;
437 if (node->count >= get_hot_bb_threshold ())
438 hot = true;
439 if (!hot)
440 hot |= contains_hot_call_p (node);
441 if (hot)
443 if (node->frequency != NODE_FREQUENCY_HOT)
445 if (dump_file)
446 fprintf (dump_file, "Node %s promoted to hot.\n",
447 node->name ());
448 node->frequency = NODE_FREQUENCY_HOT;
449 return true;
451 return false;
453 else if (node->frequency == NODE_FREQUENCY_HOT)
455 if (dump_file)
456 fprintf (dump_file, "Node %s reduced to normal.\n",
457 node->name ());
458 node->frequency = NODE_FREQUENCY_NORMAL;
459 changed = true;
462 /* These come either from profile or user hints; never update them. */
463 if (node->frequency == NODE_FREQUENCY_HOT
464 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
465 return changed;
466 if (d.maybe_unlikely_executed)
468 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
469 if (dump_file)
470 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
471 node->name ());
472 changed = true;
474 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
476 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
477 if (dump_file)
478 fprintf (dump_file, "Node %s promoted to executed once.\n",
479 node->name ());
480 changed = true;
482 return changed;
485 /* Simple ipa profile pass propagating frequencies across the callgraph. */
487 static unsigned int
488 ipa_profile (void)
490 struct cgraph_node **order;
491 struct cgraph_edge *e;
492 int order_pos;
493 bool something_changed = false;
494 int i;
495 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
496 struct cgraph_node *n,*n2;
497 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
498 bool node_map_initialized = false;
500 if (dump_file)
501 dump_histogram (dump_file, histogram);
502 for (i = 0; i < (int)histogram.length (); i++)
504 overall_time += histogram[i]->count * histogram[i]->time;
505 overall_size += histogram[i]->size;
507 if (overall_time)
509 gcov_type threshold;
511 gcc_assert (overall_size);
512 if (dump_file)
514 gcov_type min, cumulated_time = 0, cumulated_size = 0;
516 fprintf (dump_file, "Overall time: %"PRId64"\n",
517 (int64_t)overall_time);
518 min = get_hot_bb_threshold ();
519 for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
520 i++)
522 cumulated_time += histogram[i]->count * histogram[i]->time;
523 cumulated_size += histogram[i]->size;
525 fprintf (dump_file, "GCOV min count: %"PRId64
526 " Time:%3.2f%% Size:%3.2f%%\n",
527 (int64_t)min,
528 cumulated_time * 100.0 / overall_time,
529 cumulated_size * 100.0 / overall_size);
531 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
532 threshold = 0;
533 for (i = 0; cumulated < cutoff; i++)
535 cumulated += histogram[i]->count * histogram[i]->time;
536 threshold = histogram[i]->count;
538 if (!threshold)
539 threshold = 1;
540 if (dump_file)
542 gcov_type cumulated_time = 0, cumulated_size = 0;
544 for (i = 0;
545 i < (int)histogram.length () && histogram[i]->count >= threshold;
546 i++)
548 cumulated_time += histogram[i]->count * histogram[i]->time;
549 cumulated_size += histogram[i]->size;
551 fprintf (dump_file, "Determined min count: %"PRId64
552 " Time:%3.2f%% Size:%3.2f%%\n",
553 (int64_t)threshold,
554 cumulated_time * 100.0 / overall_time,
555 cumulated_size * 100.0 / overall_size);
557 if (threshold > get_hot_bb_threshold ()
558 || in_lto_p)
560 if (dump_file)
561 fprintf (dump_file, "Threshold updated.\n");
562 set_hot_bb_threshold (threshold);
565 histogram.release ();
566 free_alloc_pool (histogram_pool);
568 /* Produce speculative calls: we saved common traget from porfiling into
569 e->common_target_id. Now, at link time, we can look up corresponding
570 function node and produce speculative call. */
572 FOR_EACH_DEFINED_FUNCTION (n)
574 bool update = false;
576 for (e = n->indirect_calls; e; e = e->next_callee)
578 if (n->count)
579 nindirect++;
580 if (e->indirect_info->common_target_id)
582 if (!node_map_initialized)
583 init_node_map (false);
584 node_map_initialized = true;
585 ncommon++;
586 n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
587 if (n2)
589 if (dump_file)
591 fprintf (dump_file, "Indirect call -> direct call from"
592 " other module %s/%i => %s/%i, prob %3.2f\n",
593 xstrdup (n->name ()), n->order,
594 xstrdup (n2->name ()), n2->order,
595 e->indirect_info->common_target_probability
596 / (float)REG_BR_PROB_BASE);
598 if (e->indirect_info->common_target_probability
599 < REG_BR_PROB_BASE / 2)
601 nuseless++;
602 if (dump_file)
603 fprintf (dump_file,
604 "Not speculating: probability is too low.\n");
606 else if (!e->maybe_hot_p ())
608 nuseless++;
609 if (dump_file)
610 fprintf (dump_file,
611 "Not speculating: call is cold.\n");
613 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
614 && n2->can_be_discarded_p ())
616 nuseless++;
617 if (dump_file)
618 fprintf (dump_file,
619 "Not speculating: target is overwritable "
620 "and can be discarded.\n");
622 else
624 /* Target may be overwritable, but profile says that
625 control flow goes to this particular implementation
626 of N2. Speculate on the local alias to allow inlining.
628 if (!n2->can_be_discarded_p ())
630 cgraph_node *alias;
631 alias = dyn_cast<cgraph_node *> (n2->noninterposable_alias ());
632 if (alias)
633 n2 = alias;
635 nconverted++;
636 e->make_speculative
637 (n2,
638 apply_scale (e->count,
639 e->indirect_info->common_target_probability),
640 apply_scale (e->frequency,
641 e->indirect_info->common_target_probability));
642 update = true;
645 else
647 if (dump_file)
648 fprintf (dump_file, "Function with profile-id %i not found.\n",
649 e->indirect_info->common_target_id);
650 nunknown++;
654 if (update)
655 inline_update_overall_summary (n);
657 if (node_map_initialized)
658 del_node_map ();
659 if (dump_file && nindirect)
660 fprintf (dump_file,
661 "%i indirect calls trained.\n"
662 "%i (%3.2f%%) have common target.\n"
663 "%i (%3.2f%%) targets was not found.\n"
664 "%i (%3.2f%%) speculations seems useless.\n"
665 "%i (%3.2f%%) speculations produced.\n",
666 nindirect,
667 ncommon, ncommon * 100.0 / nindirect,
668 nunknown, nunknown * 100.0 / nindirect,
669 nuseless, nuseless * 100.0 / nindirect,
670 nconverted, nconverted * 100.0 / nindirect);
672 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
673 order_pos = ipa_reverse_postorder (order);
674 for (i = order_pos - 1; i >= 0; i--)
676 if (order[i]->local.local && ipa_propagate_frequency (order[i]))
678 for (e = order[i]->callees; e; e = e->next_callee)
679 if (e->callee->local.local && !e->callee->aux)
681 something_changed = true;
682 e->callee->aux = (void *)1;
685 order[i]->aux = NULL;
688 while (something_changed)
690 something_changed = false;
691 for (i = order_pos - 1; i >= 0; i--)
693 if (order[i]->aux && ipa_propagate_frequency (order[i]))
695 for (e = order[i]->callees; e; e = e->next_callee)
696 if (e->callee->local.local && !e->callee->aux)
698 something_changed = true;
699 e->callee->aux = (void *)1;
702 order[i]->aux = NULL;
705 free (order);
706 return 0;
709 namespace {
711 const pass_data pass_data_ipa_profile =
713 IPA_PASS, /* type */
714 "profile_estimate", /* name */
715 OPTGROUP_NONE, /* optinfo_flags */
716 TV_IPA_PROFILE, /* tv_id */
717 0, /* properties_required */
718 0, /* properties_provided */
719 0, /* properties_destroyed */
720 0, /* todo_flags_start */
721 0, /* todo_flags_finish */
724 class pass_ipa_profile : public ipa_opt_pass_d
726 public:
727 pass_ipa_profile (gcc::context *ctxt)
728 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
729 ipa_profile_generate_summary, /* generate_summary */
730 ipa_profile_write_summary, /* write_summary */
731 ipa_profile_read_summary, /* read_summary */
732 NULL, /* write_optimization_summary */
733 NULL, /* read_optimization_summary */
734 NULL, /* stmt_fixup */
735 0, /* function_transform_todo_flags_start */
736 NULL, /* function_transform */
737 NULL) /* variable_transform */
740 /* opt_pass methods: */
741 virtual bool gate (function *) { return flag_ipa_profile; }
742 virtual unsigned int execute (function *) { return ipa_profile (); }
744 }; // class pass_ipa_profile
746 } // anon namespace
748 ipa_opt_pass_d *
749 make_pass_ipa_profile (gcc::context *ctxt)
751 return new pass_ipa_profile (ctxt);