1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
55 #include "fold-const.h"
57 #include "dominance.h"
59 #include "basic-block.h"
61 #include "plugin-api.h"
62 #include "hard-reg-set.h"
67 #include "tree-pass.h"
68 #include "tree-ssa-alias.h"
69 #include "internal-fn.h"
70 #include "gimple-expr.h"
72 #include "gimple-iterator.h"
75 #include "tree-iterator.h"
76 #include "ipa-utils.h"
79 #include "value-prof.h"
80 #include "alloc-pool.h"
81 #include "tree-inline.h"
82 #include "lto-streamer.h"
83 #include "data-streamer.h"
84 #include "symbol-summary.h"
86 #include "ipa-inline.h"
88 /* Entry in the histogram. */
90 struct histogram_entry
97 /* Histogram of profile values.
98 The histogram is represented as an ordered vector of entries allocated via
99 histogram_pool. During construction a separate hashtable is kept to lookup
100 duplicate entries. */
102 vec
<histogram_entry
*> histogram
;
103 static pool_allocator
<histogram_entry
> histogram_pool
104 ("IPA histogram", 10);
106 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
108 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
110 typedef histogram_entry
*value_type
;
111 typedef histogram_entry
*compare_type
;
112 static inline hashval_t
hash (const histogram_entry
*);
113 static inline int equal (const histogram_entry
*, const histogram_entry
*);
117 histogram_hash::hash (const histogram_entry
*val
)
123 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
125 return val
->count
== val2
->count
;
128 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
129 HASHTABLE is the on-side hash kept to avoid duplicates. */
132 account_time_size (hash_table
<histogram_hash
> *hashtable
,
133 vec
<histogram_entry
*> &histogram
,
134 gcov_type count
, int time
, int size
)
136 histogram_entry key
= {count
, 0, 0};
137 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
141 *val
= histogram_pool
.allocate ();
143 histogram
.safe_push (*val
);
145 (*val
)->time
+= time
;
146 (*val
)->size
+= size
;
150 cmp_counts (const void *v1
, const void *v2
)
152 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
153 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
154 if (h1
->count
< h2
->count
)
156 if (h1
->count
> h2
->count
)
161 /* Dump HISTOGRAM to FILE. */
164 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
167 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
169 fprintf (dump_file
, "Histogram:\n");
170 for (i
= 0; i
< histogram
.length (); i
++)
172 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
173 overall_size
+= histogram
[i
]->size
;
179 for (i
= 0; i
< histogram
.length (); i
++)
181 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
182 cumulated_size
+= histogram
[i
]->size
;
183 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
184 (int64_t) histogram
[i
]->count
,
186 cumulated_time
* 100.0 / overall_time
,
188 cumulated_size
* 100.0 / overall_size
);
192 /* Collect histogram from CFG profiles. */
195 ipa_profile_generate_summary (void)
197 struct cgraph_node
*node
;
198 gimple_stmt_iterator gsi
;
201 hash_table
<histogram_hash
> hashtable (10);
203 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
204 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
208 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
210 gimple stmt
= gsi_stmt (gsi
);
211 if (gimple_code (stmt
) == GIMPLE_CALL
212 && !gimple_call_fndecl (stmt
))
215 h
= gimple_histogram_value_of_type
216 (DECL_STRUCT_FUNCTION (node
->decl
),
217 stmt
, HIST_TYPE_INDIR_CALL
);
218 /* No need to do sanity check: gimple_ic_transform already
219 takes away bad histograms. */
222 /* counter 0 is target, counter 1 is number of execution we called target,
223 counter 2 is total number of executions. */
224 if (h
->hvalue
.counters
[2])
226 struct cgraph_edge
* e
= node
->get_edge (stmt
);
227 if (e
&& !e
->indirect_unknown_callee
)
229 e
->indirect_info
->common_target_id
230 = h
->hvalue
.counters
[0];
231 e
->indirect_info
->common_target_probability
232 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
233 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
236 fprintf (dump_file
, "Probability capped to 1\n");
237 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
240 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
244 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
245 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
247 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
249 histogram
.qsort (cmp_counts
);
252 /* Serialize the ipa info for lto. */
255 ipa_profile_write_summary (void)
257 struct lto_simple_output_block
*ob
258 = lto_create_simple_output_block (LTO_section_ipa_profile
);
261 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
262 for (i
= 0; i
< histogram
.length (); i
++)
264 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
265 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
266 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
268 lto_destroy_simple_output_block (ob
);
271 /* Deserialize the ipa info for lto. */
274 ipa_profile_read_summary (void)
276 struct lto_file_decl_data
** file_data_vec
277 = lto_get_file_decl_data ();
278 struct lto_file_decl_data
* file_data
;
281 hash_table
<histogram_hash
> hashtable (10);
283 while ((file_data
= file_data_vec
[j
++]))
287 struct lto_input_block
*ib
288 = lto_create_simple_input_block (file_data
,
289 LTO_section_ipa_profile
,
293 unsigned int num
= streamer_read_uhwi (ib
);
295 for (n
= 0; n
< num
; n
++)
297 gcov_type count
= streamer_read_gcov_count (ib
);
298 int time
= streamer_read_uhwi (ib
);
299 int size
= streamer_read_uhwi (ib
);
300 account_time_size (&hashtable
, histogram
,
303 lto_destroy_simple_input_block (file_data
,
304 LTO_section_ipa_profile
,
308 histogram
.qsort (cmp_counts
);
311 /* Data used by ipa_propagate_frequency. */
313 struct ipa_propagate_frequency_data
315 cgraph_node
*function_symbol
;
316 bool maybe_unlikely_executed
;
317 bool maybe_executed_once
;
318 bool only_called_at_startup
;
319 bool only_called_at_exit
;
322 /* Worker for ipa_propagate_frequency_1. */
325 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
327 struct ipa_propagate_frequency_data
*d
;
328 struct cgraph_edge
*edge
;
330 d
= (struct ipa_propagate_frequency_data
*)data
;
331 for (edge
= node
->callers
;
332 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
333 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
334 edge
= edge
->next_caller
)
336 if (edge
->caller
!= d
->function_symbol
)
338 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
339 /* It makes sense to put main() together with the static constructors.
340 It will be executed for sure, but rest of functions called from
341 main are definitely not at startup only. */
342 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
343 d
->only_called_at_startup
= 0;
344 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
347 /* When profile feedback is available, do not try to propagate too hard;
348 counts are already good guide on function frequencies and roundoff
349 errors can make us to push function into unlikely section even when
350 it is executed by the train run. Transfer the function only if all
351 callers are unlikely executed. */
353 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
354 /* Thunks are not profiled. This is more or less implementation
356 && !d
->function_symbol
->thunk
.thunk_p
357 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
358 || (edge
->caller
->global
.inlined_to
359 && edge
->caller
->global
.inlined_to
->frequency
360 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
361 d
->maybe_unlikely_executed
= false;
362 if (!edge
->frequency
)
364 switch (edge
->caller
->frequency
)
366 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
368 case NODE_FREQUENCY_EXECUTED_ONCE
:
369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
370 fprintf (dump_file
, " Called by %s that is executed once\n",
371 edge
->caller
->name ());
372 d
->maybe_unlikely_executed
= false;
373 if (inline_edge_summary (edge
)->loop_depth
)
375 d
->maybe_executed_once
= false;
376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
377 fprintf (dump_file
, " Called in loop\n");
380 case NODE_FREQUENCY_HOT
:
381 case NODE_FREQUENCY_NORMAL
:
382 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
383 fprintf (dump_file
, " Called by %s that is normal or hot\n",
384 edge
->caller
->name ());
385 d
->maybe_unlikely_executed
= false;
386 d
->maybe_executed_once
= false;
393 /* Return ture if NODE contains hot calls. */
396 contains_hot_call_p (struct cgraph_node
*node
)
398 struct cgraph_edge
*e
;
399 for (e
= node
->callees
; e
; e
= e
->next_callee
)
400 if (e
->maybe_hot_p ())
402 else if (!e
->inline_failed
403 && contains_hot_call_p (e
->callee
))
405 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
406 if (e
->maybe_hot_p ())
411 /* See if the frequency of NODE can be updated based on frequencies of its
414 ipa_propagate_frequency (struct cgraph_node
*node
)
416 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
417 bool changed
= false;
419 /* We can not propagate anything useful about externally visible functions
420 nor about virtuals. */
421 if (!node
->local
.local
423 || (opt_for_fn (node
->decl
, flag_devirtualize
)
424 && DECL_VIRTUAL_P (node
->decl
)))
426 gcc_assert (node
->analyzed
);
427 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
428 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
430 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
433 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
434 && !node
->only_called_at_startup
)
436 node
->only_called_at_startup
= true;
438 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
442 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
443 && !node
->only_called_at_exit
)
445 node
->only_called_at_exit
= true;
447 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
452 /* With profile we can decide on hot/normal based on count. */
456 if (node
->count
>= get_hot_bb_threshold ())
459 hot
|= contains_hot_call_p (node
);
462 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
465 fprintf (dump_file
, "Node %s promoted to hot.\n",
467 node
->frequency
= NODE_FREQUENCY_HOT
;
472 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
475 fprintf (dump_file
, "Node %s reduced to normal.\n",
477 node
->frequency
= NODE_FREQUENCY_NORMAL
;
481 /* These come either from profile or user hints; never update them. */
482 if (node
->frequency
== NODE_FREQUENCY_HOT
483 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
485 if (d
.maybe_unlikely_executed
)
487 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
489 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
493 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
495 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
497 fprintf (dump_file
, "Node %s promoted to executed once.\n",
504 /* Simple ipa profile pass propagating frequencies across the callgraph. */
509 struct cgraph_node
**order
;
510 struct cgraph_edge
*e
;
512 bool something_changed
= false;
514 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
515 struct cgraph_node
*n
,*n2
;
516 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
517 int nmismatch
= 0, nimpossible
= 0;
518 bool node_map_initialized
= false;
521 dump_histogram (dump_file
, histogram
);
522 for (i
= 0; i
< (int)histogram
.length (); i
++)
524 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
525 overall_size
+= histogram
[i
]->size
;
531 gcc_assert (overall_size
);
534 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
536 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
537 (int64_t)overall_time
);
538 min
= get_hot_bb_threshold ();
539 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
542 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
543 cumulated_size
+= histogram
[i
]->size
;
545 fprintf (dump_file
, "GCOV min count: %" PRId64
546 " Time:%3.2f%% Size:%3.2f%%\n",
548 cumulated_time
* 100.0 / overall_time
,
549 cumulated_size
* 100.0 / overall_size
);
551 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
553 for (i
= 0; cumulated
< cutoff
; i
++)
555 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
556 threshold
= histogram
[i
]->count
;
562 gcov_type cumulated_time
= 0, cumulated_size
= 0;
565 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
568 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
569 cumulated_size
+= histogram
[i
]->size
;
571 fprintf (dump_file
, "Determined min count: %" PRId64
572 " Time:%3.2f%% Size:%3.2f%%\n",
574 cumulated_time
* 100.0 / overall_time
,
575 cumulated_size
* 100.0 / overall_size
);
577 if (threshold
> get_hot_bb_threshold ()
581 fprintf (dump_file
, "Threshold updated.\n");
582 set_hot_bb_threshold (threshold
);
585 histogram
.release ();
586 histogram_pool
.release ();
588 /* Produce speculative calls: we saved common traget from porfiling into
589 e->common_target_id. Now, at link time, we can look up corresponding
590 function node and produce speculative call. */
592 FOR_EACH_DEFINED_FUNCTION (n
)
596 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
599 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
603 if (e
->indirect_info
->common_target_id
)
605 if (!node_map_initialized
)
606 init_node_map (false);
607 node_map_initialized
= true;
609 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
614 fprintf (dump_file
, "Indirect call -> direct call from"
615 " other module %s/%i => %s/%i, prob %3.2f\n",
616 xstrdup_for_dump (n
->name ()), n
->order
,
617 xstrdup_for_dump (n2
->name ()), n2
->order
,
618 e
->indirect_info
->common_target_probability
619 / (float)REG_BR_PROB_BASE
);
621 if (e
->indirect_info
->common_target_probability
622 < REG_BR_PROB_BASE
/ 2)
627 "Not speculating: probability is too low.\n");
629 else if (!e
->maybe_hot_p ())
634 "Not speculating: call is cold.\n");
636 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
637 && n2
->can_be_discarded_p ())
642 "Not speculating: target is overwritable "
643 "and can be discarded.\n");
645 else if (ipa_node_params_sum
&& ipa_edge_args_vector
646 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
647 && ipa_get_param_count (IPA_NODE_REF (n2
))
648 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
649 && (ipa_get_param_count (IPA_NODE_REF (n2
))
650 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
651 || !stdarg_p (TREE_TYPE (n2
->decl
))))
657 "parameter count mistmatch\n");
659 else if (e
->indirect_info
->polymorphic
660 && !opt_for_fn (n
->decl
, flag_devirtualize
)
661 && !possible_polymorphic_call_target_p (e
, n2
))
667 "function is not in the polymorphic "
668 "call target list\n");
672 /* Target may be overwritable, but profile says that
673 control flow goes to this particular implementation
674 of N2. Speculate on the local alias to allow inlining.
676 if (!n2
->can_be_discarded_p ())
679 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
686 apply_scale (e
->count
,
687 e
->indirect_info
->common_target_probability
),
688 apply_scale (e
->frequency
,
689 e
->indirect_info
->common_target_probability
));
696 fprintf (dump_file
, "Function with profile-id %i not found.\n",
697 e
->indirect_info
->common_target_id
);
703 inline_update_overall_summary (n
);
705 if (node_map_initialized
)
707 if (dump_file
&& nindirect
)
709 "%i indirect calls trained.\n"
710 "%i (%3.2f%%) have common target.\n"
711 "%i (%3.2f%%) targets was not found.\n"
712 "%i (%3.2f%%) targets had parameter count mismatch.\n"
713 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
714 "%i (%3.2f%%) speculations seems useless.\n"
715 "%i (%3.2f%%) speculations produced.\n",
717 ncommon
, ncommon
* 100.0 / nindirect
,
718 nunknown
, nunknown
* 100.0 / nindirect
,
719 nmismatch
, nmismatch
* 100.0 / nindirect
,
720 nimpossible
, nimpossible
* 100.0 / nindirect
,
721 nuseless
, nuseless
* 100.0 / nindirect
,
722 nconverted
, nconverted
* 100.0 / nindirect
);
724 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
725 order_pos
= ipa_reverse_postorder (order
);
726 for (i
= order_pos
- 1; i
>= 0; i
--)
728 if (order
[i
]->local
.local
729 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
730 && ipa_propagate_frequency (order
[i
]))
732 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
733 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
735 something_changed
= true;
736 e
->callee
->aux
= (void *)1;
739 order
[i
]->aux
= NULL
;
742 while (something_changed
)
744 something_changed
= false;
745 for (i
= order_pos
- 1; i
>= 0; i
--)
748 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
749 && ipa_propagate_frequency (order
[i
]))
751 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
752 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
754 something_changed
= true;
755 e
->callee
->aux
= (void *)1;
758 order
[i
]->aux
= NULL
;
767 const pass_data pass_data_ipa_profile
=
770 "profile_estimate", /* name */
771 OPTGROUP_NONE
, /* optinfo_flags */
772 TV_IPA_PROFILE
, /* tv_id */
773 0, /* properties_required */
774 0, /* properties_provided */
775 0, /* properties_destroyed */
776 0, /* todo_flags_start */
777 0, /* todo_flags_finish */
780 class pass_ipa_profile
: public ipa_opt_pass_d
783 pass_ipa_profile (gcc::context
*ctxt
)
784 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
785 ipa_profile_generate_summary
, /* generate_summary */
786 ipa_profile_write_summary
, /* write_summary */
787 ipa_profile_read_summary
, /* read_summary */
788 NULL
, /* write_optimization_summary */
789 NULL
, /* read_optimization_summary */
790 NULL
, /* stmt_fixup */
791 0, /* function_transform_todo_flags_start */
792 NULL
, /* function_transform */
793 NULL
) /* variable_transform */
796 /* opt_pass methods: */
797 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
798 virtual unsigned int execute (function
*) { return ipa_profile (); }
800 }; // class pass_ipa_profile
805 make_pass_ipa_profile (gcc::context
*ctxt
)
807 return new pass_ipa_profile (ctxt
);