Daily bump.
[official-gcc.git] / gcc / ipa-profile.c
blobfc231c916b78e2455cbdc1bbccb01f3cc3c3c2ac
1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
21 inter-procedurally.
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 The information is used to set hot/cold thresholds.
29 - Next speculative indirect call resolution is performed: the local
30 profile pass assigns profile-id to each function and provide us with a
31 histogram specifying the most common target. We look up the callgraph
32 node corresponding to the target and produce a speculative call.
34 This call may or may not survive through IPA optimization based on decision
35 of inliner.
36 - Finally we propagate the following flags: unlikely executed, executed
37 once, executed at startup and executed at exit. These flags are used to
38 control code size/performance threshold and code placement (by producing
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
40 #include "config.h"
41 #include "system.h"
42 #include "coretypes.h"
43 #include "backend.h"
44 #include "tree.h"
45 #include "gimple.h"
46 #include "predict.h"
47 #include "alloc-pool.h"
48 #include "tree-pass.h"
49 #include "cgraph.h"
50 #include "data-streamer.h"
51 #include "gimple-iterator.h"
52 #include "ipa-utils.h"
53 #include "profile.h"
54 #include "value-prof.h"
55 #include "tree-inline.h"
56 #include "symbol-summary.h"
57 #include "tree-vrp.h"
58 #include "ipa-prop.h"
59 #include "ipa-fnsummary.h"
61 /* Entry in the histogram. */
63 struct histogram_entry
65 gcov_type count;
66 int time;
67 int size;
70 /* Histogram of profile values.
71 The histogram is represented as an ordered vector of entries allocated via
72 histogram_pool. During construction a separate hashtable is kept to lookup
73 duplicate entries. */
75 vec<histogram_entry *> histogram;
76 static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
78 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
80 struct histogram_hash : nofree_ptr_hash <histogram_entry>
82 static inline hashval_t hash (const histogram_entry *);
83 static inline int equal (const histogram_entry *, const histogram_entry *);
86 inline hashval_t
87 histogram_hash::hash (const histogram_entry *val)
89 return val->count;
92 inline int
93 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
95 return val->count == val2->count;
98 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
99 HASHTABLE is the on-side hash kept to avoid duplicates. */
101 static void
102 account_time_size (hash_table<histogram_hash> *hashtable,
103 vec<histogram_entry *> &histogram,
104 gcov_type count, int time, int size)
106 histogram_entry key = {count, 0, 0};
107 histogram_entry **val = hashtable->find_slot (&key, INSERT);
109 if (!*val)
111 *val = histogram_pool.allocate ();
112 **val = key;
113 histogram.safe_push (*val);
115 (*val)->time += time;
116 (*val)->size += size;
120 cmp_counts (const void *v1, const void *v2)
122 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
123 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
124 if (h1->count < h2->count)
125 return 1;
126 if (h1->count > h2->count)
127 return -1;
128 return 0;
131 /* Dump HISTOGRAM to FILE. */
133 static void
134 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
136 unsigned int i;
137 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
139 fprintf (dump_file, "Histogram:\n");
140 for (i = 0; i < histogram.length (); i++)
142 overall_time += histogram[i]->count * histogram[i]->time;
143 overall_size += histogram[i]->size;
145 if (!overall_time)
146 overall_time = 1;
147 if (!overall_size)
148 overall_size = 1;
149 for (i = 0; i < histogram.length (); i++)
151 cumulated_time += histogram[i]->count * histogram[i]->time;
152 cumulated_size += histogram[i]->size;
153 fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
154 (int64_t) histogram[i]->count,
155 histogram[i]->time,
156 cumulated_time * 100.0 / overall_time,
157 histogram[i]->size,
158 cumulated_size * 100.0 / overall_size);
162 /* Structure containing speculative target information from profile. */
164 struct speculative_call_target
166 speculative_call_target (unsigned int id = 0, int prob = 0)
167 : target_id (id), target_probability (prob)
171 /* Profile_id of target obtained from profile. */
172 unsigned int target_id;
173 /* Probability that call will land in function with target_id. */
174 unsigned int target_probability;
177 class speculative_call_summary
179 public:
180 speculative_call_summary () : speculative_call_targets ()
183 auto_vec<speculative_call_target> speculative_call_targets;
185 void dump (FILE *f);
189 /* Class to manage call summaries. */
191 class ipa_profile_call_summaries
192 : public call_summary<speculative_call_summary *>
194 public:
195 ipa_profile_call_summaries (symbol_table *table)
196 : call_summary<speculative_call_summary *> (table)
199 /* Duplicate info when an edge is cloned. */
200 virtual void duplicate (cgraph_edge *, cgraph_edge *,
201 speculative_call_summary *old_sum,
202 speculative_call_summary *new_sum);
205 static ipa_profile_call_summaries *call_sums = NULL;
207 /* Dump all information in speculative call summary to F. */
209 void
210 speculative_call_summary::dump (FILE *f)
212 cgraph_node *n2;
214 unsigned spec_count = speculative_call_targets.length ();
215 for (unsigned i = 0; i < spec_count; i++)
217 speculative_call_target item = speculative_call_targets[i];
218 n2 = find_func_by_profile_id (item.target_id);
219 if (n2)
220 fprintf (f, " The %i speculative target is %s with prob %3.2f\n", i,
221 n2->dump_name (),
222 item.target_probability / (float) REG_BR_PROB_BASE);
223 else
224 fprintf (f, " The %i speculative target is %u with prob %3.2f\n", i,
225 item.target_id,
226 item.target_probability / (float) REG_BR_PROB_BASE);
230 /* Duplicate info when an edge is cloned. */
232 void
233 ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
234 speculative_call_summary *old_sum,
235 speculative_call_summary *new_sum)
237 if (!old_sum)
238 return;
240 unsigned old_count = old_sum->speculative_call_targets.length ();
241 if (!old_count)
242 return;
244 new_sum->speculative_call_targets.reserve_exact (old_count);
245 new_sum->speculative_call_targets.quick_grow_cleared (old_count);
247 for (unsigned i = 0; i < old_count; i++)
249 new_sum->speculative_call_targets[i]
250 = old_sum->speculative_call_targets[i];
254 /* Collect histogram and speculative target summaries from CFG profiles. */
256 static void
257 ipa_profile_generate_summary (void)
259 struct cgraph_node *node;
260 gimple_stmt_iterator gsi;
261 basic_block bb;
263 hash_table<histogram_hash> hashtable (10);
265 gcc_checking_assert (!call_sums);
266 call_sums = new ipa_profile_call_summaries (symtab);
268 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
269 if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
270 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
272 int time = 0;
273 int size = 0;
274 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
276 gimple *stmt = gsi_stmt (gsi);
277 if (gimple_code (stmt) == GIMPLE_CALL
278 && !gimple_call_fndecl (stmt))
280 histogram_value h;
281 h = gimple_histogram_value_of_type
282 (DECL_STRUCT_FUNCTION (node->decl),
283 stmt, HIST_TYPE_INDIR_CALL);
284 /* No need to do sanity check: gimple_ic_transform already
285 takes away bad histograms. */
286 if (h)
288 gcov_type val, count, all;
289 struct cgraph_edge *e = node->get_edge (stmt);
290 if (e && !e->indirect_unknown_callee)
291 continue;
293 speculative_call_summary *csum
294 = call_sums->get_create (e);
296 for (unsigned j = 0; j < GCOV_TOPN_VALUES; j++)
298 if (!get_nth_most_common_value (NULL, "indirect call",
299 h, &val, &count, &all,
301 continue;
303 if (val == 0)
304 continue;
306 speculative_call_target item (
307 val, GCOV_COMPUTE_SCALE (count, all));
308 if (item.target_probability > REG_BR_PROB_BASE)
310 if (dump_file)
311 fprintf (dump_file,
312 "Probability capped to 1\n");
313 item.target_probability = REG_BR_PROB_BASE;
315 csum->speculative_call_targets.safe_push (item);
318 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
319 stmt, h);
322 time += estimate_num_insns (stmt, &eni_time_weights);
323 size += estimate_num_insns (stmt, &eni_size_weights);
325 if (bb->count.ipa_p () && bb->count.initialized_p ())
326 account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (),
327 time, size);
329 histogram.qsort (cmp_counts);
332 /* Serialize the speculative summary info for LTO. */
334 static void
335 ipa_profile_write_edge_summary (lto_simple_output_block *ob,
336 speculative_call_summary *csum)
338 unsigned len = 0;
340 len = csum->speculative_call_targets.length ();
342 gcc_assert (len <= GCOV_TOPN_VALUES);
344 streamer_write_hwi_stream (ob->main_stream, len);
346 if (len)
348 unsigned spec_count = csum->speculative_call_targets.length ();
349 for (unsigned i = 0; i < spec_count; i++)
351 speculative_call_target item = csum->speculative_call_targets[i];
352 gcc_assert (item.target_id);
353 streamer_write_hwi_stream (ob->main_stream, item.target_id);
354 streamer_write_hwi_stream (ob->main_stream, item.target_probability);
359 /* Serialize the ipa info for lto. */
361 static void
362 ipa_profile_write_summary (void)
364 struct lto_simple_output_block *ob
365 = lto_create_simple_output_block (LTO_section_ipa_profile);
366 unsigned int i;
368 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
369 for (i = 0; i < histogram.length (); i++)
371 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
372 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
373 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
376 if (!call_sums)
377 return;
379 /* Serialize speculative targets information. */
380 unsigned int count = 0;
381 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
382 lto_symtab_encoder_iterator lsei;
383 cgraph_node *node;
385 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
386 lsei_next_function_in_partition (&lsei))
388 node = lsei_cgraph_node (lsei);
389 if (node->definition && node->has_gimple_body_p ()
390 && node->indirect_calls)
391 count++;
394 streamer_write_uhwi_stream (ob->main_stream, count);
396 /* Process all of the functions. */
397 for (lsei = lsei_start_function_in_partition (encoder);
398 !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei))
400 cgraph_node *node = lsei_cgraph_node (lsei);
401 if (node->definition && node->has_gimple_body_p ()
402 && node->indirect_calls)
404 int node_ref = lto_symtab_encoder_encode (encoder, node);
405 streamer_write_uhwi_stream (ob->main_stream, node_ref);
407 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
409 speculative_call_summary *csum = call_sums->get_create (e);
410 ipa_profile_write_edge_summary (ob, csum);
415 lto_destroy_simple_output_block (ob);
418 /* Dump all profile summary data for all cgraph nodes and edges to file F. */
420 static void
421 ipa_profile_dump_all_summaries (FILE *f)
423 fprintf (dump_file,
424 "\n========== IPA-profile speculative targets: ==========\n");
425 cgraph_node *node;
426 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
428 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
429 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
431 fprintf (f, " Summary for %s of indirect edge %d:\n",
432 e->caller->dump_name (), e->lto_stmt_uid);
433 speculative_call_summary *csum = call_sums->get_create (e);
434 csum->dump (f);
437 fprintf (f, "\n\n");
440 /* Read speculative targets information about edge for LTO WPA. */
442 static void
443 ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
445 unsigned i, len;
447 len = streamer_read_hwi (ib);
448 gcc_assert (len <= GCOV_TOPN_VALUES);
450 speculative_call_summary *csum = call_sums->get_create (edge);
452 for (i = 0; i < len; i++)
454 speculative_call_target item (streamer_read_hwi (ib),
455 streamer_read_hwi (ib));
456 csum->speculative_call_targets.safe_push (item);
460 /* Read profile speculative targets section information for LTO WPA. */
462 static void
463 ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
464 class lto_input_block *ib)
466 if (!ib)
467 return;
469 lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
471 unsigned int count = streamer_read_uhwi (ib);
473 unsigned int i;
474 unsigned int index;
475 cgraph_node * node;
477 for (i = 0; i < count; i++)
479 index = streamer_read_uhwi (ib);
480 encoder = file_data->symtab_node_encoder;
481 node
482 = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index));
484 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
485 ipa_profile_read_edge_summary (ib, e);
489 /* Deserialize the IPA histogram and speculative targets summary info for LTO.
492 static void
493 ipa_profile_read_summary (void)
495 struct lto_file_decl_data ** file_data_vec
496 = lto_get_file_decl_data ();
497 struct lto_file_decl_data * file_data;
498 int j = 0;
500 hash_table<histogram_hash> hashtable (10);
502 gcc_checking_assert (!call_sums);
503 call_sums = new ipa_profile_call_summaries (symtab);
505 while ((file_data = file_data_vec[j++]))
507 const char *data;
508 size_t len;
509 class lto_input_block *ib
510 = lto_create_simple_input_block (file_data,
511 LTO_section_ipa_profile,
512 &data, &len);
513 if (ib)
515 unsigned int num = streamer_read_uhwi (ib);
516 unsigned int n;
517 for (n = 0; n < num; n++)
519 gcov_type count = streamer_read_gcov_count (ib);
520 int time = streamer_read_uhwi (ib);
521 int size = streamer_read_uhwi (ib);
522 account_time_size (&hashtable, histogram,
523 count, time, size);
526 ipa_profile_read_summary_section (file_data, ib);
528 lto_destroy_simple_input_block (file_data,
529 LTO_section_ipa_profile,
530 ib, data, len);
533 histogram.qsort (cmp_counts);
536 /* Data used by ipa_propagate_frequency. */
538 struct ipa_propagate_frequency_data
540 cgraph_node *function_symbol;
541 bool maybe_unlikely_executed;
542 bool maybe_executed_once;
543 bool only_called_at_startup;
544 bool only_called_at_exit;
547 /* Worker for ipa_propagate_frequency_1. */
549 static bool
550 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
552 struct ipa_propagate_frequency_data *d;
553 struct cgraph_edge *edge;
555 d = (struct ipa_propagate_frequency_data *)data;
556 for (edge = node->callers;
557 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
558 || d->only_called_at_startup || d->only_called_at_exit);
559 edge = edge->next_caller)
561 if (edge->caller != d->function_symbol)
563 d->only_called_at_startup &= edge->caller->only_called_at_startup;
564 /* It makes sense to put main() together with the static constructors.
565 It will be executed for sure, but rest of functions called from
566 main are definitely not at startup only. */
567 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
568 d->only_called_at_startup = 0;
569 d->only_called_at_exit &= edge->caller->only_called_at_exit;
572 /* When profile feedback is available, do not try to propagate too hard;
573 counts are already good guide on function frequencies and roundoff
574 errors can make us to push function into unlikely section even when
575 it is executed by the train run. Transfer the function only if all
576 callers are unlikely executed. */
577 if (profile_info
578 && !(edge->callee->count.ipa () == profile_count::zero ())
579 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
580 || (edge->caller->inlined_to
581 && edge->caller->inlined_to->frequency
582 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
583 d->maybe_unlikely_executed = false;
584 if (edge->count.ipa ().initialized_p ()
585 && !edge->count.ipa ().nonzero_p ())
586 continue;
587 switch (edge->caller->frequency)
589 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
590 break;
591 case NODE_FREQUENCY_EXECUTED_ONCE:
593 if (dump_file && (dump_flags & TDF_DETAILS))
594 fprintf (dump_file, " Called by %s that is executed once\n",
595 edge->caller->dump_name ());
596 d->maybe_unlikely_executed = false;
597 ipa_call_summary *s = ipa_call_summaries->get (edge);
598 if (s != NULL && s->loop_depth)
600 d->maybe_executed_once = false;
601 if (dump_file && (dump_flags & TDF_DETAILS))
602 fprintf (dump_file, " Called in loop\n");
604 break;
606 case NODE_FREQUENCY_HOT:
607 case NODE_FREQUENCY_NORMAL:
608 if (dump_file && (dump_flags & TDF_DETAILS))
609 fprintf (dump_file, " Called by %s that is normal or hot\n",
610 edge->caller->dump_name ());
611 d->maybe_unlikely_executed = false;
612 d->maybe_executed_once = false;
613 break;
616 return edge != NULL;
619 /* Return ture if NODE contains hot calls. */
621 bool
622 contains_hot_call_p (struct cgraph_node *node)
624 struct cgraph_edge *e;
625 for (e = node->callees; e; e = e->next_callee)
626 if (e->maybe_hot_p ())
627 return true;
628 else if (!e->inline_failed
629 && contains_hot_call_p (e->callee))
630 return true;
631 for (e = node->indirect_calls; e; e = e->next_callee)
632 if (e->maybe_hot_p ())
633 return true;
634 return false;
637 /* See if the frequency of NODE can be updated based on frequencies of its
638 callers. */
639 bool
640 ipa_propagate_frequency (struct cgraph_node *node)
642 struct ipa_propagate_frequency_data d = {node, true, true, true, true};
643 bool changed = false;
645 /* We cannot propagate anything useful about externally visible functions
646 nor about virtuals. */
647 if (!node->local
648 || node->alias
649 || (opt_for_fn (node->decl, flag_devirtualize)
650 && DECL_VIRTUAL_P (node->decl)))
651 return false;
652 gcc_assert (node->analyzed);
653 if (dump_file && (dump_flags & TDF_DETAILS))
654 fprintf (dump_file, "Processing frequency %s\n", node->dump_name ());
656 node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
657 true);
659 if ((d.only_called_at_startup && !d.only_called_at_exit)
660 && !node->only_called_at_startup)
662 node->only_called_at_startup = true;
663 if (dump_file)
664 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
665 node->dump_name ());
666 changed = true;
668 if ((d.only_called_at_exit && !d.only_called_at_startup)
669 && !node->only_called_at_exit)
671 node->only_called_at_exit = true;
672 if (dump_file)
673 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
674 node->dump_name ());
675 changed = true;
678 /* With profile we can decide on hot/normal based on count. */
679 if (node->count. ipa().initialized_p ())
681 bool hot = false;
682 if (!(node->count. ipa() == profile_count::zero ())
683 && node->count. ipa() >= get_hot_bb_threshold ())
684 hot = true;
685 if (!hot)
686 hot |= contains_hot_call_p (node);
687 if (hot)
689 if (node->frequency != NODE_FREQUENCY_HOT)
691 if (dump_file)
692 fprintf (dump_file, "Node %s promoted to hot.\n",
693 node->dump_name ());
694 node->frequency = NODE_FREQUENCY_HOT;
695 return true;
697 return false;
699 else if (node->frequency == NODE_FREQUENCY_HOT)
701 if (dump_file)
702 fprintf (dump_file, "Node %s reduced to normal.\n",
703 node->dump_name ());
704 node->frequency = NODE_FREQUENCY_NORMAL;
705 changed = true;
708 /* These come either from profile or user hints; never update them. */
709 if (node->frequency == NODE_FREQUENCY_HOT
710 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
711 return changed;
712 if (d.maybe_unlikely_executed)
714 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
715 if (dump_file)
716 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
717 node->dump_name ());
718 changed = true;
720 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
722 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
723 if (dump_file)
724 fprintf (dump_file, "Node %s promoted to executed once.\n",
725 node->dump_name ());
726 changed = true;
728 return changed;
731 /* Check that number of arguments of N agrees with E.
732 Be conservative when summaries are not present. */
734 static bool
735 check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
737 if (!ipa_node_params_sum || !ipa_edge_args_sum)
738 return true;
739 class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ());
740 if (!info)
741 return true;
742 ipa_edge_args *e_info = IPA_EDGE_REF (e);
743 if (!e_info)
744 return true;
745 if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
746 && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
747 || !stdarg_p (TREE_TYPE (n->decl))))
748 return false;
749 return true;
752 /* Simple ipa profile pass propagating frequencies across the callgraph. */
754 static unsigned int
755 ipa_profile (void)
757 struct cgraph_node **order;
758 struct cgraph_edge *e;
759 int order_pos;
760 bool something_changed = false;
761 int i;
762 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
763 struct cgraph_node *n,*n2;
764 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
765 int nmismatch = 0, nimpossible = 0;
766 bool node_map_initialized = false;
767 gcov_type threshold;
769 if (dump_file)
770 dump_histogram (dump_file, histogram);
771 for (i = 0; i < (int)histogram.length (); i++)
773 overall_time += histogram[i]->count * histogram[i]->time;
774 overall_size += histogram[i]->size;
776 threshold = 0;
777 if (overall_time)
779 gcc_assert (overall_size);
781 cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000;
782 for (i = 0; cumulated < cutoff; i++)
784 cumulated += histogram[i]->count * histogram[i]->time;
785 threshold = histogram[i]->count;
787 if (!threshold)
788 threshold = 1;
789 if (dump_file)
791 gcov_type cumulated_time = 0, cumulated_size = 0;
793 for (i = 0;
794 i < (int)histogram.length () && histogram[i]->count >= threshold;
795 i++)
797 cumulated_time += histogram[i]->count * histogram[i]->time;
798 cumulated_size += histogram[i]->size;
800 fprintf (dump_file, "Determined min count: %" PRId64
801 " Time:%3.2f%% Size:%3.2f%%\n",
802 (int64_t)threshold,
803 cumulated_time * 100.0 / overall_time,
804 cumulated_size * 100.0 / overall_size);
807 if (in_lto_p)
809 if (dump_file)
810 fprintf (dump_file, "Setting hotness threshold in LTO mode.\n");
811 set_hot_bb_threshold (threshold);
814 histogram.release ();
815 histogram_pool.release ();
817 /* Produce speculative calls: we saved common target from profiling into
818 e->target_id. Now, at link time, we can look up corresponding
819 function node and produce speculative call. */
821 gcc_checking_assert (call_sums);
823 if (dump_file)
825 if (!node_map_initialized)
826 init_node_map (false);
827 node_map_initialized = true;
829 ipa_profile_dump_all_summaries (dump_file);
832 FOR_EACH_DEFINED_FUNCTION (n)
834 bool update = false;
836 if (!opt_for_fn (n->decl, flag_ipa_profile))
837 continue;
839 for (e = n->indirect_calls; e; e = e->next_callee)
841 if (n->count.initialized_p ())
842 nindirect++;
844 speculative_call_summary *csum = call_sums->get_create (e);
845 unsigned spec_count = csum->speculative_call_targets.length ();
846 if (spec_count)
848 if (!node_map_initialized)
849 init_node_map (false);
850 node_map_initialized = true;
851 ncommon++;
853 if (in_lto_p)
855 if (dump_file)
857 fprintf (dump_file,
858 "Updating hotness threshold in LTO mode.\n");
859 fprintf (dump_file, "Updated min count: %" PRId64 "\n",
860 (int64_t) threshold / spec_count);
862 set_hot_bb_threshold (threshold / spec_count);
865 unsigned speculative_id = 0;
866 bool speculative_found = false;
867 for (unsigned i = 0; i < spec_count; i++)
869 speculative_call_target item
870 = csum->speculative_call_targets[i];
871 n2 = find_func_by_profile_id (item.target_id);
872 if (n2)
874 if (dump_file)
876 fprintf (dump_file, "Indirect call -> direct call from"
877 " other module %s => %s, prob %3.2f\n",
878 n->dump_name (),
879 n2->dump_name (),
880 item.target_probability
881 / (float) REG_BR_PROB_BASE);
883 if (item.target_probability < REG_BR_PROB_BASE / 2)
885 nuseless++;
886 if (dump_file)
887 fprintf (dump_file,
888 "Not speculating: probability is too low.\n");
890 else if (!e->maybe_hot_p ())
892 nuseless++;
893 if (dump_file)
894 fprintf (dump_file,
895 "Not speculating: call is cold.\n");
897 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
898 && n2->can_be_discarded_p ())
900 nuseless++;
901 if (dump_file)
902 fprintf (dump_file,
903 "Not speculating: target is overwritable "
904 "and can be discarded.\n");
906 else if (!check_argument_count (n2, e))
908 nmismatch++;
909 if (dump_file)
910 fprintf (dump_file,
911 "Not speculating: "
912 "parameter count mismatch\n");
914 else if (e->indirect_info->polymorphic
915 && !opt_for_fn (n->decl, flag_devirtualize)
916 && !possible_polymorphic_call_target_p (e, n2))
918 nimpossible++;
919 if (dump_file)
920 fprintf (dump_file,
921 "Not speculating: "
922 "function is not in the polymorphic "
923 "call target list\n");
925 else
927 /* Target may be overwritable, but profile says that
928 control flow goes to this particular implementation
929 of N2. Speculate on the local alias to allow inlining.
931 if (!n2->can_be_discarded_p ())
933 cgraph_node *alias;
934 alias = dyn_cast<cgraph_node *> (n2->noninterposable_alias ());
935 if (alias)
936 n2 = alias;
938 nconverted++;
939 e->make_speculative (n2,
940 e->count.apply_probability (
941 item.target_probability),
942 speculative_id,
943 item.target_probability);
944 update = true;
945 speculative_id++;
946 speculative_found = true;
949 else
951 if (dump_file)
952 fprintf (dump_file, "Function with profile-id %i not found.\n",
953 item.target_id);
954 nunknown++;
957 if (speculative_found)
958 e->indirect_info->num_speculative_call_targets = speculative_id;
961 if (update)
962 ipa_update_overall_fn_summary (n);
964 if (node_map_initialized)
965 del_node_map ();
966 if (dump_file && nindirect)
967 fprintf (dump_file,
968 "%i indirect calls trained.\n"
969 "%i (%3.2f%%) have common target.\n"
970 "%i (%3.2f%%) targets was not found.\n"
971 "%i (%3.2f%%) targets had parameter count mismatch.\n"
972 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
973 "%i (%3.2f%%) speculations seems useless.\n"
974 "%i (%3.2f%%) speculations produced.\n",
975 nindirect,
976 ncommon, ncommon * 100.0 / nindirect,
977 nunknown, nunknown * 100.0 / nindirect,
978 nmismatch, nmismatch * 100.0 / nindirect,
979 nimpossible, nimpossible * 100.0 / nindirect,
980 nuseless, nuseless * 100.0 / nindirect,
981 nconverted, nconverted * 100.0 / nindirect);
983 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
984 order_pos = ipa_reverse_postorder (order);
985 for (i = order_pos - 1; i >= 0; i--)
987 if (order[i]->local
988 && opt_for_fn (order[i]->decl, flag_ipa_profile)
989 && ipa_propagate_frequency (order[i]))
991 for (e = order[i]->callees; e; e = e->next_callee)
992 if (e->callee->local && !e->callee->aux)
994 something_changed = true;
995 e->callee->aux = (void *)1;
998 order[i]->aux = NULL;
1001 while (something_changed)
1003 something_changed = false;
1004 for (i = order_pos - 1; i >= 0; i--)
1006 if (order[i]->aux
1007 && opt_for_fn (order[i]->decl, flag_ipa_profile)
1008 && ipa_propagate_frequency (order[i]))
1010 for (e = order[i]->callees; e; e = e->next_callee)
1011 if (e->callee->local && !e->callee->aux)
1013 something_changed = true;
1014 e->callee->aux = (void *)1;
1017 order[i]->aux = NULL;
1020 free (order);
1022 if (dump_file && (dump_flags & TDF_DETAILS))
1023 symtab->dump (dump_file);
1025 return 0;
1028 namespace {
1030 const pass_data pass_data_ipa_profile =
1032 IPA_PASS, /* type */
1033 "profile_estimate", /* name */
1034 OPTGROUP_NONE, /* optinfo_flags */
1035 TV_IPA_PROFILE, /* tv_id */
1036 0, /* properties_required */
1037 0, /* properties_provided */
1038 0, /* properties_destroyed */
1039 0, /* todo_flags_start */
1040 0, /* todo_flags_finish */
1043 class pass_ipa_profile : public ipa_opt_pass_d
1045 public:
1046 pass_ipa_profile (gcc::context *ctxt)
1047 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
1048 ipa_profile_generate_summary, /* generate_summary */
1049 ipa_profile_write_summary, /* write_summary */
1050 ipa_profile_read_summary, /* read_summary */
1051 NULL, /* write_optimization_summary */
1052 NULL, /* read_optimization_summary */
1053 NULL, /* stmt_fixup */
1054 0, /* function_transform_todo_flags_start */
1055 NULL, /* function_transform */
1056 NULL) /* variable_transform */
1059 /* opt_pass methods: */
1060 virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
1061 virtual unsigned int execute (function *) { return ipa_profile (); }
1063 }; // class pass_ipa_profile
1065 } // anon namespace
1067 ipa_opt_pass_d *
1068 make_pass_ipa_profile (gcc::context *ctxt)
1070 return new pass_ipa_profile (ctxt);