1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "hard-reg-set.h"
56 #include "fold-const.h"
58 #include "tree-pass.h"
59 #include "internal-fn.h"
60 #include "gimple-iterator.h"
63 #include "tree-iterator.h"
64 #include "ipa-utils.h"
67 #include "value-prof.h"
68 #include "alloc-pool.h"
69 #include "tree-inline.h"
70 #include "data-streamer.h"
71 #include "symbol-summary.h"
73 #include "ipa-inline.h"
75 /* Entry in the histogram. */
77 struct histogram_entry
84 /* Histogram of profile values.
85 The histogram is represented as an ordered vector of entries allocated via
86 histogram_pool. During construction a separate hashtable is kept to lookup
89 vec
<histogram_entry
*> histogram
;
90 static object_allocator
<histogram_entry
> histogram_pool
91 ("IPA histogram", 10);
93 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
95 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
97 static inline hashval_t
hash (const histogram_entry
*);
98 static inline int equal (const histogram_entry
*, const histogram_entry
*);
102 histogram_hash::hash (const histogram_entry
*val
)
108 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
110 return val
->count
== val2
->count
;
113 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
114 HASHTABLE is the on-side hash kept to avoid duplicates. */
117 account_time_size (hash_table
<histogram_hash
> *hashtable
,
118 vec
<histogram_entry
*> &histogram
,
119 gcov_type count
, int time
, int size
)
121 histogram_entry key
= {count
, 0, 0};
122 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
126 *val
= histogram_pool
.allocate ();
128 histogram
.safe_push (*val
);
130 (*val
)->time
+= time
;
131 (*val
)->size
+= size
;
135 cmp_counts (const void *v1
, const void *v2
)
137 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
138 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
139 if (h1
->count
< h2
->count
)
141 if (h1
->count
> h2
->count
)
146 /* Dump HISTOGRAM to FILE. */
149 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
152 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
154 fprintf (dump_file
, "Histogram:\n");
155 for (i
= 0; i
< histogram
.length (); i
++)
157 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
158 overall_size
+= histogram
[i
]->size
;
164 for (i
= 0; i
< histogram
.length (); i
++)
166 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
167 cumulated_size
+= histogram
[i
]->size
;
168 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
169 (int64_t) histogram
[i
]->count
,
171 cumulated_time
* 100.0 / overall_time
,
173 cumulated_size
* 100.0 / overall_size
);
177 /* Collect histogram from CFG profiles. */
180 ipa_profile_generate_summary (void)
182 struct cgraph_node
*node
;
183 gimple_stmt_iterator gsi
;
186 hash_table
<histogram_hash
> hashtable (10);
188 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
189 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
193 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
195 gimple stmt
= gsi_stmt (gsi
);
196 if (gimple_code (stmt
) == GIMPLE_CALL
197 && !gimple_call_fndecl (stmt
))
200 h
= gimple_histogram_value_of_type
201 (DECL_STRUCT_FUNCTION (node
->decl
),
202 stmt
, HIST_TYPE_INDIR_CALL
);
203 /* No need to do sanity check: gimple_ic_transform already
204 takes away bad histograms. */
207 /* counter 0 is target, counter 1 is number of execution we called target,
208 counter 2 is total number of executions. */
209 if (h
->hvalue
.counters
[2])
211 struct cgraph_edge
* e
= node
->get_edge (stmt
);
212 if (e
&& !e
->indirect_unknown_callee
)
214 e
->indirect_info
->common_target_id
215 = h
->hvalue
.counters
[0];
216 e
->indirect_info
->common_target_probability
217 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
218 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
221 fprintf (dump_file
, "Probability capped to 1\n");
222 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
225 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
229 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
230 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
232 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
234 histogram
.qsort (cmp_counts
);
237 /* Serialize the ipa info for lto. */
240 ipa_profile_write_summary (void)
242 struct lto_simple_output_block
*ob
243 = lto_create_simple_output_block (LTO_section_ipa_profile
);
246 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
247 for (i
= 0; i
< histogram
.length (); i
++)
249 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
250 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
251 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
253 lto_destroy_simple_output_block (ob
);
256 /* Deserialize the ipa info for lto. */
259 ipa_profile_read_summary (void)
261 struct lto_file_decl_data
** file_data_vec
262 = lto_get_file_decl_data ();
263 struct lto_file_decl_data
* file_data
;
266 hash_table
<histogram_hash
> hashtable (10);
268 while ((file_data
= file_data_vec
[j
++]))
272 struct lto_input_block
*ib
273 = lto_create_simple_input_block (file_data
,
274 LTO_section_ipa_profile
,
278 unsigned int num
= streamer_read_uhwi (ib
);
280 for (n
= 0; n
< num
; n
++)
282 gcov_type count
= streamer_read_gcov_count (ib
);
283 int time
= streamer_read_uhwi (ib
);
284 int size
= streamer_read_uhwi (ib
);
285 account_time_size (&hashtable
, histogram
,
288 lto_destroy_simple_input_block (file_data
,
289 LTO_section_ipa_profile
,
293 histogram
.qsort (cmp_counts
);
296 /* Data used by ipa_propagate_frequency. */
298 struct ipa_propagate_frequency_data
300 cgraph_node
*function_symbol
;
301 bool maybe_unlikely_executed
;
302 bool maybe_executed_once
;
303 bool only_called_at_startup
;
304 bool only_called_at_exit
;
307 /* Worker for ipa_propagate_frequency_1. */
310 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
312 struct ipa_propagate_frequency_data
*d
;
313 struct cgraph_edge
*edge
;
315 d
= (struct ipa_propagate_frequency_data
*)data
;
316 for (edge
= node
->callers
;
317 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
318 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
319 edge
= edge
->next_caller
)
321 if (edge
->caller
!= d
->function_symbol
)
323 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
324 /* It makes sense to put main() together with the static constructors.
325 It will be executed for sure, but rest of functions called from
326 main are definitely not at startup only. */
327 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
328 d
->only_called_at_startup
= 0;
329 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
332 /* When profile feedback is available, do not try to propagate too hard;
333 counts are already good guide on function frequencies and roundoff
334 errors can make us to push function into unlikely section even when
335 it is executed by the train run. Transfer the function only if all
336 callers are unlikely executed. */
338 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
339 /* Thunks are not profiled. This is more or less implementation
341 && !d
->function_symbol
->thunk
.thunk_p
342 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
343 || (edge
->caller
->global
.inlined_to
344 && edge
->caller
->global
.inlined_to
->frequency
345 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
346 d
->maybe_unlikely_executed
= false;
347 if (!edge
->frequency
)
349 switch (edge
->caller
->frequency
)
351 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
353 case NODE_FREQUENCY_EXECUTED_ONCE
:
354 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
355 fprintf (dump_file
, " Called by %s that is executed once\n",
356 edge
->caller
->name ());
357 d
->maybe_unlikely_executed
= false;
358 if (inline_edge_summary (edge
)->loop_depth
)
360 d
->maybe_executed_once
= false;
361 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
362 fprintf (dump_file
, " Called in loop\n");
365 case NODE_FREQUENCY_HOT
:
366 case NODE_FREQUENCY_NORMAL
:
367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
368 fprintf (dump_file
, " Called by %s that is normal or hot\n",
369 edge
->caller
->name ());
370 d
->maybe_unlikely_executed
= false;
371 d
->maybe_executed_once
= false;
378 /* Return ture if NODE contains hot calls. */
381 contains_hot_call_p (struct cgraph_node
*node
)
383 struct cgraph_edge
*e
;
384 for (e
= node
->callees
; e
; e
= e
->next_callee
)
385 if (e
->maybe_hot_p ())
387 else if (!e
->inline_failed
388 && contains_hot_call_p (e
->callee
))
390 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
391 if (e
->maybe_hot_p ())
396 /* See if the frequency of NODE can be updated based on frequencies of its
399 ipa_propagate_frequency (struct cgraph_node
*node
)
401 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
402 bool changed
= false;
404 /* We can not propagate anything useful about externally visible functions
405 nor about virtuals. */
406 if (!node
->local
.local
408 || (opt_for_fn (node
->decl
, flag_devirtualize
)
409 && DECL_VIRTUAL_P (node
->decl
)))
411 gcc_assert (node
->analyzed
);
412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
413 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
415 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
418 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
419 && !node
->only_called_at_startup
)
421 node
->only_called_at_startup
= true;
423 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
427 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
428 && !node
->only_called_at_exit
)
430 node
->only_called_at_exit
= true;
432 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
437 /* With profile we can decide on hot/normal based on count. */
441 if (node
->count
>= get_hot_bb_threshold ())
444 hot
|= contains_hot_call_p (node
);
447 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
450 fprintf (dump_file
, "Node %s promoted to hot.\n",
452 node
->frequency
= NODE_FREQUENCY_HOT
;
457 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
460 fprintf (dump_file
, "Node %s reduced to normal.\n",
462 node
->frequency
= NODE_FREQUENCY_NORMAL
;
466 /* These come either from profile or user hints; never update them. */
467 if (node
->frequency
== NODE_FREQUENCY_HOT
468 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
470 if (d
.maybe_unlikely_executed
)
472 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
474 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
478 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
480 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
482 fprintf (dump_file
, "Node %s promoted to executed once.\n",
489 /* Simple ipa profile pass propagating frequencies across the callgraph. */
494 struct cgraph_node
**order
;
495 struct cgraph_edge
*e
;
497 bool something_changed
= false;
499 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
500 struct cgraph_node
*n
,*n2
;
501 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
502 int nmismatch
= 0, nimpossible
= 0;
503 bool node_map_initialized
= false;
506 dump_histogram (dump_file
, histogram
);
507 for (i
= 0; i
< (int)histogram
.length (); i
++)
509 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
510 overall_size
+= histogram
[i
]->size
;
516 gcc_assert (overall_size
);
519 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
521 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
522 (int64_t)overall_time
);
523 min
= get_hot_bb_threshold ();
524 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
527 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
528 cumulated_size
+= histogram
[i
]->size
;
530 fprintf (dump_file
, "GCOV min count: %" PRId64
531 " Time:%3.2f%% Size:%3.2f%%\n",
533 cumulated_time
* 100.0 / overall_time
,
534 cumulated_size
* 100.0 / overall_size
);
536 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
538 for (i
= 0; cumulated
< cutoff
; i
++)
540 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
541 threshold
= histogram
[i
]->count
;
547 gcov_type cumulated_time
= 0, cumulated_size
= 0;
550 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
553 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
554 cumulated_size
+= histogram
[i
]->size
;
556 fprintf (dump_file
, "Determined min count: %" PRId64
557 " Time:%3.2f%% Size:%3.2f%%\n",
559 cumulated_time
* 100.0 / overall_time
,
560 cumulated_size
* 100.0 / overall_size
);
562 if (threshold
> get_hot_bb_threshold ()
566 fprintf (dump_file
, "Threshold updated.\n");
567 set_hot_bb_threshold (threshold
);
570 histogram
.release ();
571 histogram_pool
.release ();
573 /* Produce speculative calls: we saved common traget from porfiling into
574 e->common_target_id. Now, at link time, we can look up corresponding
575 function node and produce speculative call. */
577 FOR_EACH_DEFINED_FUNCTION (n
)
581 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
584 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
588 if (e
->indirect_info
->common_target_id
)
590 if (!node_map_initialized
)
591 init_node_map (false);
592 node_map_initialized
= true;
594 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
599 fprintf (dump_file
, "Indirect call -> direct call from"
600 " other module %s/%i => %s/%i, prob %3.2f\n",
601 xstrdup_for_dump (n
->name ()), n
->order
,
602 xstrdup_for_dump (n2
->name ()), n2
->order
,
603 e
->indirect_info
->common_target_probability
604 / (float)REG_BR_PROB_BASE
);
606 if (e
->indirect_info
->common_target_probability
607 < REG_BR_PROB_BASE
/ 2)
612 "Not speculating: probability is too low.\n");
614 else if (!e
->maybe_hot_p ())
619 "Not speculating: call is cold.\n");
621 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
622 && n2
->can_be_discarded_p ())
627 "Not speculating: target is overwritable "
628 "and can be discarded.\n");
630 else if (ipa_node_params_sum
&& ipa_edge_args_vector
631 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
632 && ipa_get_param_count (IPA_NODE_REF (n2
))
633 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
634 && (ipa_get_param_count (IPA_NODE_REF (n2
))
635 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
636 || !stdarg_p (TREE_TYPE (n2
->decl
))))
642 "parameter count mistmatch\n");
644 else if (e
->indirect_info
->polymorphic
645 && !opt_for_fn (n
->decl
, flag_devirtualize
)
646 && !possible_polymorphic_call_target_p (e
, n2
))
652 "function is not in the polymorphic "
653 "call target list\n");
657 /* Target may be overwritable, but profile says that
658 control flow goes to this particular implementation
659 of N2. Speculate on the local alias to allow inlining.
661 if (!n2
->can_be_discarded_p ())
664 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
671 apply_scale (e
->count
,
672 e
->indirect_info
->common_target_probability
),
673 apply_scale (e
->frequency
,
674 e
->indirect_info
->common_target_probability
));
681 fprintf (dump_file
, "Function with profile-id %i not found.\n",
682 e
->indirect_info
->common_target_id
);
688 inline_update_overall_summary (n
);
690 if (node_map_initialized
)
692 if (dump_file
&& nindirect
)
694 "%i indirect calls trained.\n"
695 "%i (%3.2f%%) have common target.\n"
696 "%i (%3.2f%%) targets was not found.\n"
697 "%i (%3.2f%%) targets had parameter count mismatch.\n"
698 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
699 "%i (%3.2f%%) speculations seems useless.\n"
700 "%i (%3.2f%%) speculations produced.\n",
702 ncommon
, ncommon
* 100.0 / nindirect
,
703 nunknown
, nunknown
* 100.0 / nindirect
,
704 nmismatch
, nmismatch
* 100.0 / nindirect
,
705 nimpossible
, nimpossible
* 100.0 / nindirect
,
706 nuseless
, nuseless
* 100.0 / nindirect
,
707 nconverted
, nconverted
* 100.0 / nindirect
);
709 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
710 order_pos
= ipa_reverse_postorder (order
);
711 for (i
= order_pos
- 1; i
>= 0; i
--)
713 if (order
[i
]->local
.local
714 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
715 && ipa_propagate_frequency (order
[i
]))
717 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
718 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
720 something_changed
= true;
721 e
->callee
->aux
= (void *)1;
724 order
[i
]->aux
= NULL
;
727 while (something_changed
)
729 something_changed
= false;
730 for (i
= order_pos
- 1; i
>= 0; i
--)
733 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
734 && ipa_propagate_frequency (order
[i
]))
736 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
737 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
739 something_changed
= true;
740 e
->callee
->aux
= (void *)1;
743 order
[i
]->aux
= NULL
;
752 const pass_data pass_data_ipa_profile
=
755 "profile_estimate", /* name */
756 OPTGROUP_NONE
, /* optinfo_flags */
757 TV_IPA_PROFILE
, /* tv_id */
758 0, /* properties_required */
759 0, /* properties_provided */
760 0, /* properties_destroyed */
761 0, /* todo_flags_start */
762 0, /* todo_flags_finish */
765 class pass_ipa_profile
: public ipa_opt_pass_d
768 pass_ipa_profile (gcc::context
*ctxt
)
769 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
770 ipa_profile_generate_summary
, /* generate_summary */
771 ipa_profile_write_summary
, /* write_summary */
772 ipa_profile_read_summary
, /* read_summary */
773 NULL
, /* write_optimization_summary */
774 NULL
, /* read_optimization_summary */
775 NULL
, /* stmt_fixup */
776 0, /* function_transform_todo_flags_start */
777 NULL
, /* function_transform */
778 NULL
) /* variable_transform */
781 /* opt_pass methods: */
782 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
783 virtual unsigned int execute (function
*) { return ipa_profile (); }
785 }; // class pass_ipa_profile
790 make_pass_ipa_profile (gcc::context
*ctxt
)
792 return new pass_ipa_profile (ctxt
);