* rtl.h (insn_location): Declare.
[official-gcc.git] / gcc / ipa-profile.c
blob339f38ea0dba2cfd82cbacc4efc306011bd2397e
1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
21 inter-procedurally.
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complette only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
42 of inliner.
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
47 #include "config.h"
48 #include "system.h"
49 #include "coretypes.h"
50 #include "tm.h"
51 #include "tree.h"
52 #include "cgraph.h"
53 #include "tree-pass.h"
54 #include "tree-ssa-alias.h"
55 #include "internal-fn.h"
56 #include "gimple-expr.h"
57 #include "gimple.h"
58 #include "gimple-iterator.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "tree-iterator.h"
62 #include "ipa-utils.h"
63 #include "profile.h"
64 #include "params.h"
65 #include "value-prof.h"
66 #include "alloc-pool.h"
67 #include "tree-inline.h"
68 #include "lto-streamer.h"
69 #include "data-streamer.h"
70 #include "ipa-inline.h"
72 /* Entry in the histogram. */
74 struct histogram_entry
76 gcov_type count;
77 int time;
78 int size;
81 /* Histogram of profile values.
82 The histogram is represented as an ordered vector of entries allocated via
83 histogram_pool. During construction a separate hashtable is kept to lookup
84 duplicate entries. */
86 vec<histogram_entry *> histogram;
87 static alloc_pool histogram_pool;
89 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
91 struct histogram_hash : typed_noop_remove <histogram_entry>
93 typedef histogram_entry value_type;
94 typedef histogram_entry compare_type;
95 static inline hashval_t hash (const value_type *);
96 static inline int equal (const value_type *, const compare_type *);
99 inline hashval_t
100 histogram_hash::hash (const histogram_entry *val)
102 return val->count;
105 inline int
106 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
108 return val->count == val2->count;
111 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
112 HASHTABLE is the on-side hash kept to avoid duplicates. */
114 static void
115 account_time_size (hash_table <histogram_hash> hashtable,
116 vec<histogram_entry *> &histogram,
117 gcov_type count, int time, int size)
119 histogram_entry key = {count, 0, 0};
120 histogram_entry **val = hashtable.find_slot (&key, INSERT);
122 if (!*val)
124 *val = (histogram_entry *) pool_alloc (histogram_pool);
125 **val = key;
126 histogram.safe_push (*val);
128 (*val)->time += time;
129 (*val)->size += size;
133 cmp_counts (const void *v1, const void *v2)
135 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
136 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
137 if (h1->count < h2->count)
138 return 1;
139 if (h1->count > h2->count)
140 return -1;
141 return 0;
144 /* Dump HISTOGRAM to FILE. */
146 static void
147 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
149 unsigned int i;
150 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
152 fprintf (dump_file, "Histogram:\n");
153 for (i = 0; i < histogram.length (); i++)
155 overall_time += histogram[i]->count * histogram[i]->time;
156 overall_size += histogram[i]->size;
158 if (!overall_time)
159 overall_time = 1;
160 if (!overall_size)
161 overall_size = 1;
162 for (i = 0; i < histogram.length (); i++)
164 cumulated_time += histogram[i]->count * histogram[i]->time;
165 cumulated_size += histogram[i]->size;
166 fprintf (file, " %"PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
167 (int64_t) histogram[i]->count,
168 histogram[i]->time,
169 cumulated_time * 100.0 / overall_time,
170 histogram[i]->size,
171 cumulated_size * 100.0 / overall_size);
175 /* Collect histogram from CFG profiles. */
177 static void
178 ipa_profile_generate_summary (void)
180 struct cgraph_node *node;
181 gimple_stmt_iterator gsi;
182 hash_table <histogram_hash> hashtable;
183 basic_block bb;
185 hashtable.create (10);
186 histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
187 10);
189 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
190 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
192 int time = 0;
193 int size = 0;
194 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
196 gimple stmt = gsi_stmt (gsi);
197 if (gimple_code (stmt) == GIMPLE_CALL
198 && !gimple_call_fndecl (stmt))
200 histogram_value h;
201 h = gimple_histogram_value_of_type
202 (DECL_STRUCT_FUNCTION (node->decl),
203 stmt, HIST_TYPE_INDIR_CALL);
204 /* No need to do sanity check: gimple_ic_transform already
205 takes away bad histograms. */
206 if (h)
208 /* counter 0 is target, counter 1 is number of execution we called target,
209 counter 2 is total number of executions. */
210 if (h->hvalue.counters[2])
212 struct cgraph_edge * e = cgraph_edge (node, stmt);
213 if (e && !e->indirect_unknown_callee)
214 continue;
215 e->indirect_info->common_target_id
216 = h->hvalue.counters [0];
217 e->indirect_info->common_target_probability
218 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
219 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
221 if (dump_file)
222 fprintf (dump_file, "Probability capped to 1\n");
223 e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
226 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
227 stmt, h);
230 time += estimate_num_insns (stmt, &eni_time_weights);
231 size += estimate_num_insns (stmt, &eni_size_weights);
233 account_time_size (hashtable, histogram, bb->count, time, size);
235 hashtable.dispose ();
236 histogram.qsort (cmp_counts);
239 /* Serialize the ipa info for lto. */
241 static void
242 ipa_profile_write_summary (void)
244 struct lto_simple_output_block *ob
245 = lto_create_simple_output_block (LTO_section_ipa_profile);
246 unsigned int i;
248 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
249 for (i = 0; i < histogram.length (); i++)
251 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
252 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
253 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
255 lto_destroy_simple_output_block (ob);
258 /* Deserialize the ipa info for lto. */
260 static void
261 ipa_profile_read_summary (void)
263 struct lto_file_decl_data ** file_data_vec
264 = lto_get_file_decl_data ();
265 struct lto_file_decl_data * file_data;
266 hash_table <histogram_hash> hashtable;
267 int j = 0;
269 hashtable.create (10);
270 histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
271 10);
273 while ((file_data = file_data_vec[j++]))
275 const char *data;
276 size_t len;
277 struct lto_input_block *ib
278 = lto_create_simple_input_block (file_data,
279 LTO_section_ipa_profile,
280 &data, &len);
281 if (ib)
283 unsigned int num = streamer_read_uhwi (ib);
284 unsigned int n;
285 for (n = 0; n < num; n++)
287 gcov_type count = streamer_read_gcov_count (ib);
288 int time = streamer_read_uhwi (ib);
289 int size = streamer_read_uhwi (ib);
290 account_time_size (hashtable, histogram,
291 count, time, size);
293 lto_destroy_simple_input_block (file_data,
294 LTO_section_ipa_profile,
295 ib, data, len);
298 hashtable.dispose ();
299 histogram.qsort (cmp_counts);
302 /* Data used by ipa_propagate_frequency. */
304 struct ipa_propagate_frequency_data
306 bool maybe_unlikely_executed;
307 bool maybe_executed_once;
308 bool only_called_at_startup;
309 bool only_called_at_exit;
312 /* Worker for ipa_propagate_frequency_1. */
314 static bool
315 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
317 struct ipa_propagate_frequency_data *d;
318 struct cgraph_edge *edge;
320 d = (struct ipa_propagate_frequency_data *)data;
321 for (edge = node->callers;
322 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
323 || d->only_called_at_startup || d->only_called_at_exit);
324 edge = edge->next_caller)
326 if (edge->caller != node)
328 d->only_called_at_startup &= edge->caller->only_called_at_startup;
329 /* It makes sense to put main() together with the static constructors.
330 It will be executed for sure, but rest of functions called from
331 main are definitely not at startup only. */
332 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
333 d->only_called_at_startup = 0;
334 d->only_called_at_exit &= edge->caller->only_called_at_exit;
337 /* When profile feedback is available, do not try to propagate too hard;
338 counts are already good guide on function frequencies and roundoff
339 errors can make us to push function into unlikely section even when
340 it is executed by the train run. Transfer the function only if all
341 callers are unlikely executed. */
342 if (profile_info && flag_branch_probabilities
343 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
344 || (edge->caller->global.inlined_to
345 && edge->caller->global.inlined_to->frequency
346 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
347 d->maybe_unlikely_executed = false;
348 if (!edge->frequency)
349 continue;
350 switch (edge->caller->frequency)
352 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
353 break;
354 case NODE_FREQUENCY_EXECUTED_ONCE:
355 if (dump_file && (dump_flags & TDF_DETAILS))
356 fprintf (dump_file, " Called by %s that is executed once\n",
357 edge->caller->name ());
358 d->maybe_unlikely_executed = false;
359 if (inline_edge_summary (edge)->loop_depth)
361 d->maybe_executed_once = false;
362 if (dump_file && (dump_flags & TDF_DETAILS))
363 fprintf (dump_file, " Called in loop\n");
365 break;
366 case NODE_FREQUENCY_HOT:
367 case NODE_FREQUENCY_NORMAL:
368 if (dump_file && (dump_flags & TDF_DETAILS))
369 fprintf (dump_file, " Called by %s that is normal or hot\n",
370 edge->caller->name ());
371 d->maybe_unlikely_executed = false;
372 d->maybe_executed_once = false;
373 break;
376 return edge != NULL;
379 /* Return ture if NODE contains hot calls. */
381 bool
382 contains_hot_call_p (struct cgraph_node *node)
384 struct cgraph_edge *e;
385 for (e = node->callees; e; e = e->next_callee)
386 if (cgraph_maybe_hot_edge_p (e))
387 return true;
388 else if (!e->inline_failed
389 && contains_hot_call_p (e->callee))
390 return true;
391 for (e = node->indirect_calls; e; e = e->next_callee)
392 if (cgraph_maybe_hot_edge_p (e))
393 return true;
394 return false;
397 /* See if the frequency of NODE can be updated based on frequencies of its
398 callers. */
399 bool
400 ipa_propagate_frequency (struct cgraph_node *node)
402 struct ipa_propagate_frequency_data d = {true, true, true, true};
403 bool changed = false;
405 /* We can not propagate anything useful about externally visible functions
406 nor about virtuals. */
407 if (!node->local.local
408 || node->alias
409 || (flag_devirtualize && DECL_VIRTUAL_P (node->decl)))
410 return false;
411 gcc_assert (node->analyzed);
412 if (dump_file && (dump_flags & TDF_DETAILS))
413 fprintf (dump_file, "Processing frequency %s\n", node->name ());
415 cgraph_for_node_and_aliases (node, ipa_propagate_frequency_1, &d, true);
417 if ((d.only_called_at_startup && !d.only_called_at_exit)
418 && !node->only_called_at_startup)
420 node->only_called_at_startup = true;
421 if (dump_file)
422 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
423 node->name ());
424 changed = true;
426 if ((d.only_called_at_exit && !d.only_called_at_startup)
427 && !node->only_called_at_exit)
429 node->only_called_at_exit = true;
430 if (dump_file)
431 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
432 node->name ());
433 changed = true;
436 /* With profile we can decide on hot/normal based on count. */
437 if (node->count)
439 bool hot = false;
440 if (node->count >= get_hot_bb_threshold ())
441 hot = true;
442 if (!hot)
443 hot |= contains_hot_call_p (node);
444 if (hot)
446 if (node->frequency != NODE_FREQUENCY_HOT)
448 if (dump_file)
449 fprintf (dump_file, "Node %s promoted to hot.\n",
450 node->name ());
451 node->frequency = NODE_FREQUENCY_HOT;
452 return true;
454 return false;
456 else if (node->frequency == NODE_FREQUENCY_HOT)
458 if (dump_file)
459 fprintf (dump_file, "Node %s reduced to normal.\n",
460 node->name ());
461 node->frequency = NODE_FREQUENCY_NORMAL;
462 changed = true;
465 /* These come either from profile or user hints; never update them. */
466 if (node->frequency == NODE_FREQUENCY_HOT
467 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
468 return changed;
469 if (d.maybe_unlikely_executed)
471 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
472 if (dump_file)
473 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
474 node->name ());
475 changed = true;
477 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
479 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
480 if (dump_file)
481 fprintf (dump_file, "Node %s promoted to executed once.\n",
482 node->name ());
483 changed = true;
485 return changed;
488 /* Simple ipa profile pass propagating frequencies across the callgraph. */
490 static unsigned int
491 ipa_profile (void)
493 struct cgraph_node **order;
494 struct cgraph_edge *e;
495 int order_pos;
496 bool something_changed = false;
497 int i;
498 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
499 struct cgraph_node *n,*n2;
500 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
501 bool node_map_initialized = false;
503 if (dump_file)
504 dump_histogram (dump_file, histogram);
505 for (i = 0; i < (int)histogram.length (); i++)
507 overall_time += histogram[i]->count * histogram[i]->time;
508 overall_size += histogram[i]->size;
510 if (overall_time)
512 gcov_type threshold;
514 gcc_assert (overall_size);
515 if (dump_file)
517 gcov_type min, cumulated_time = 0, cumulated_size = 0;
519 fprintf (dump_file, "Overall time: %"PRId64"\n",
520 (int64_t)overall_time);
521 min = get_hot_bb_threshold ();
522 for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
523 i++)
525 cumulated_time += histogram[i]->count * histogram[i]->time;
526 cumulated_size += histogram[i]->size;
528 fprintf (dump_file, "GCOV min count: %"PRId64
529 " Time:%3.2f%% Size:%3.2f%%\n",
530 (int64_t)min,
531 cumulated_time * 100.0 / overall_time,
532 cumulated_size * 100.0 / overall_size);
534 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
535 threshold = 0;
536 for (i = 0; cumulated < cutoff; i++)
538 cumulated += histogram[i]->count * histogram[i]->time;
539 threshold = histogram[i]->count;
541 if (!threshold)
542 threshold = 1;
543 if (dump_file)
545 gcov_type cumulated_time = 0, cumulated_size = 0;
547 for (i = 0;
548 i < (int)histogram.length () && histogram[i]->count >= threshold;
549 i++)
551 cumulated_time += histogram[i]->count * histogram[i]->time;
552 cumulated_size += histogram[i]->size;
554 fprintf (dump_file, "Determined min count: %"PRId64
555 " Time:%3.2f%% Size:%3.2f%%\n",
556 (int64_t)threshold,
557 cumulated_time * 100.0 / overall_time,
558 cumulated_size * 100.0 / overall_size);
560 if (threshold > get_hot_bb_threshold ()
561 || in_lto_p)
563 if (dump_file)
564 fprintf (dump_file, "Threshold updated.\n");
565 set_hot_bb_threshold (threshold);
568 histogram.release ();
569 free_alloc_pool (histogram_pool);
571 /* Produce speculative calls: we saved common traget from porfiling into
572 e->common_target_id. Now, at link time, we can look up corresponding
573 function node and produce speculative call. */
575 FOR_EACH_DEFINED_FUNCTION (n)
577 bool update = false;
579 for (e = n->indirect_calls; e; e = e->next_callee)
581 if (n->count)
582 nindirect++;
583 if (e->indirect_info->common_target_id)
585 if (!node_map_initialized)
586 init_node_map (false);
587 node_map_initialized = true;
588 ncommon++;
589 n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
590 if (n2)
592 if (dump_file)
594 fprintf (dump_file, "Indirect call -> direct call from"
595 " other module %s/%i => %s/%i, prob %3.2f\n",
596 xstrdup (n->name ()), n->order,
597 xstrdup (n2->name ()), n2->order,
598 e->indirect_info->common_target_probability
599 / (float)REG_BR_PROB_BASE);
601 if (e->indirect_info->common_target_probability
602 < REG_BR_PROB_BASE / 2)
604 nuseless++;
605 if (dump_file)
606 fprintf (dump_file,
607 "Not speculating: probability is too low.\n");
609 else if (!cgraph_maybe_hot_edge_p (e))
611 nuseless++;
612 if (dump_file)
613 fprintf (dump_file,
614 "Not speculating: call is cold.\n");
616 else if (cgraph_function_body_availability (n2)
617 <= AVAIL_OVERWRITABLE
618 && symtab_can_be_discarded (n2))
620 nuseless++;
621 if (dump_file)
622 fprintf (dump_file,
623 "Not speculating: target is overwritable "
624 "and can be discarded.\n");
626 else
628 /* Target may be overwritable, but profile says that
629 control flow goes to this particular implementation
630 of N2. Speculate on the local alias to allow inlining.
632 if (!symtab_can_be_discarded (n2))
634 cgraph_node *alias;
635 alias = cgraph (symtab_nonoverwritable_alias
636 (n2));
637 if (alias)
638 n2 = alias;
640 nconverted++;
641 cgraph_turn_edge_to_speculative
642 (e, n2,
643 apply_scale (e->count,
644 e->indirect_info->common_target_probability),
645 apply_scale (e->frequency,
646 e->indirect_info->common_target_probability));
647 update = true;
650 else
652 if (dump_file)
653 fprintf (dump_file, "Function with profile-id %i not found.\n",
654 e->indirect_info->common_target_id);
655 nunknown++;
659 if (update)
660 inline_update_overall_summary (n);
662 if (node_map_initialized)
663 del_node_map ();
664 if (dump_file && nindirect)
665 fprintf (dump_file,
666 "%i indirect calls trained.\n"
667 "%i (%3.2f%%) have common target.\n"
668 "%i (%3.2f%%) targets was not found.\n"
669 "%i (%3.2f%%) speculations seems useless.\n"
670 "%i (%3.2f%%) speculations produced.\n",
671 nindirect,
672 ncommon, ncommon * 100.0 / nindirect,
673 nunknown, nunknown * 100.0 / nindirect,
674 nuseless, nuseless * 100.0 / nindirect,
675 nconverted, nconverted * 100.0 / nindirect);
677 order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
678 order_pos = ipa_reverse_postorder (order);
679 for (i = order_pos - 1; i >= 0; i--)
681 if (order[i]->local.local && ipa_propagate_frequency (order[i]))
683 for (e = order[i]->callees; e; e = e->next_callee)
684 if (e->callee->local.local && !e->callee->aux)
686 something_changed = true;
687 e->callee->aux = (void *)1;
690 order[i]->aux = NULL;
693 while (something_changed)
695 something_changed = false;
696 for (i = order_pos - 1; i >= 0; i--)
698 if (order[i]->aux && ipa_propagate_frequency (order[i]))
700 for (e = order[i]->callees; e; e = e->next_callee)
701 if (e->callee->local.local && !e->callee->aux)
703 something_changed = true;
704 e->callee->aux = (void *)1;
707 order[i]->aux = NULL;
710 free (order);
711 return 0;
714 namespace {
716 const pass_data pass_data_ipa_profile =
718 IPA_PASS, /* type */
719 "profile_estimate", /* name */
720 OPTGROUP_NONE, /* optinfo_flags */
721 true, /* has_execute */
722 TV_IPA_PROFILE, /* tv_id */
723 0, /* properties_required */
724 0, /* properties_provided */
725 0, /* properties_destroyed */
726 0, /* todo_flags_start */
727 0, /* todo_flags_finish */
730 class pass_ipa_profile : public ipa_opt_pass_d
732 public:
733 pass_ipa_profile (gcc::context *ctxt)
734 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
735 ipa_profile_generate_summary, /* generate_summary */
736 ipa_profile_write_summary, /* write_summary */
737 ipa_profile_read_summary, /* read_summary */
738 NULL, /* write_optimization_summary */
739 NULL, /* read_optimization_summary */
740 NULL, /* stmt_fixup */
741 0, /* function_transform_todo_flags_start */
742 NULL, /* function_transform */
743 NULL) /* variable_transform */
746 /* opt_pass methods: */
747 virtual bool gate (function *) { return flag_ipa_profile; }
748 virtual unsigned int execute (function *) { return ipa_profile (); }
750 }; // class pass_ipa_profile
752 } // anon namespace
754 ipa_opt_pass_d *
755 make_pass_ipa_profile (gcc::context *ctxt)
757 return new pass_ipa_profile (ctxt);