1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "alloc-pool.h"
55 #include "tree-pass.h"
57 #include "data-streamer.h"
58 #include "gimple-iterator.h"
59 #include "ipa-utils.h"
62 #include "value-prof.h"
63 #include "tree-inline.h"
64 #include "symbol-summary.h"
66 #include "ipa-inline.h"
68 /* Entry in the histogram. */
70 struct histogram_entry
77 /* Histogram of profile values.
78 The histogram is represented as an ordered vector of entries allocated via
79 histogram_pool. During construction a separate hashtable is kept to lookup
82 vec
<histogram_entry
*> histogram
;
83 static object_allocator
<histogram_entry
> histogram_pool ("IPA histogram");
85 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
87 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
89 static inline hashval_t
hash (const histogram_entry
*);
90 static inline int equal (const histogram_entry
*, const histogram_entry
*);
94 histogram_hash::hash (const histogram_entry
*val
)
100 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
102 return val
->count
== val2
->count
;
105 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
106 HASHTABLE is the on-side hash kept to avoid duplicates. */
109 account_time_size (hash_table
<histogram_hash
> *hashtable
,
110 vec
<histogram_entry
*> &histogram
,
111 gcov_type count
, int time
, int size
)
113 histogram_entry key
= {count
, 0, 0};
114 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
118 *val
= histogram_pool
.allocate ();
120 histogram
.safe_push (*val
);
122 (*val
)->time
+= time
;
123 (*val
)->size
+= size
;
127 cmp_counts (const void *v1
, const void *v2
)
129 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
130 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
131 if (h1
->count
< h2
->count
)
133 if (h1
->count
> h2
->count
)
138 /* Dump HISTOGRAM to FILE. */
141 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
144 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
146 fprintf (dump_file
, "Histogram:\n");
147 for (i
= 0; i
< histogram
.length (); i
++)
149 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
150 overall_size
+= histogram
[i
]->size
;
156 for (i
= 0; i
< histogram
.length (); i
++)
158 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
159 cumulated_size
+= histogram
[i
]->size
;
160 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
161 (int64_t) histogram
[i
]->count
,
163 cumulated_time
* 100.0 / overall_time
,
165 cumulated_size
* 100.0 / overall_size
);
169 /* Collect histogram from CFG profiles. */
172 ipa_profile_generate_summary (void)
174 struct cgraph_node
*node
;
175 gimple_stmt_iterator gsi
;
178 hash_table
<histogram_hash
> hashtable (10);
180 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
181 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
185 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
187 gimple
*stmt
= gsi_stmt (gsi
);
188 if (gimple_code (stmt
) == GIMPLE_CALL
189 && !gimple_call_fndecl (stmt
))
192 h
= gimple_histogram_value_of_type
193 (DECL_STRUCT_FUNCTION (node
->decl
),
194 stmt
, HIST_TYPE_INDIR_CALL
);
195 /* No need to do sanity check: gimple_ic_transform already
196 takes away bad histograms. */
199 /* counter 0 is target, counter 1 is number of execution we called target,
200 counter 2 is total number of executions. */
201 if (h
->hvalue
.counters
[2])
203 struct cgraph_edge
* e
= node
->get_edge (stmt
);
204 if (e
&& !e
->indirect_unknown_callee
)
206 e
->indirect_info
->common_target_id
207 = h
->hvalue
.counters
[0];
208 e
->indirect_info
->common_target_probability
209 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
210 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
213 fprintf (dump_file
, "Probability capped to 1\n");
214 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
217 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
221 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
222 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
224 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
226 histogram
.qsort (cmp_counts
);
229 /* Serialize the ipa info for lto. */
232 ipa_profile_write_summary (void)
234 struct lto_simple_output_block
*ob
235 = lto_create_simple_output_block (LTO_section_ipa_profile
);
238 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
239 for (i
= 0; i
< histogram
.length (); i
++)
241 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
242 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
243 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
245 lto_destroy_simple_output_block (ob
);
248 /* Deserialize the ipa info for lto. */
251 ipa_profile_read_summary (void)
253 struct lto_file_decl_data
** file_data_vec
254 = lto_get_file_decl_data ();
255 struct lto_file_decl_data
* file_data
;
258 hash_table
<histogram_hash
> hashtable (10);
260 while ((file_data
= file_data_vec
[j
++]))
264 struct lto_input_block
*ib
265 = lto_create_simple_input_block (file_data
,
266 LTO_section_ipa_profile
,
270 unsigned int num
= streamer_read_uhwi (ib
);
272 for (n
= 0; n
< num
; n
++)
274 gcov_type count
= streamer_read_gcov_count (ib
);
275 int time
= streamer_read_uhwi (ib
);
276 int size
= streamer_read_uhwi (ib
);
277 account_time_size (&hashtable
, histogram
,
280 lto_destroy_simple_input_block (file_data
,
281 LTO_section_ipa_profile
,
285 histogram
.qsort (cmp_counts
);
288 /* Data used by ipa_propagate_frequency. */
290 struct ipa_propagate_frequency_data
292 cgraph_node
*function_symbol
;
293 bool maybe_unlikely_executed
;
294 bool maybe_executed_once
;
295 bool only_called_at_startup
;
296 bool only_called_at_exit
;
299 /* Worker for ipa_propagate_frequency_1. */
302 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
304 struct ipa_propagate_frequency_data
*d
;
305 struct cgraph_edge
*edge
;
307 d
= (struct ipa_propagate_frequency_data
*)data
;
308 for (edge
= node
->callers
;
309 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
310 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
311 edge
= edge
->next_caller
)
313 if (edge
->caller
!= d
->function_symbol
)
315 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
316 /* It makes sense to put main() together with the static constructors.
317 It will be executed for sure, but rest of functions called from
318 main are definitely not at startup only. */
319 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
320 d
->only_called_at_startup
= 0;
321 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
324 /* When profile feedback is available, do not try to propagate too hard;
325 counts are already good guide on function frequencies and roundoff
326 errors can make us to push function into unlikely section even when
327 it is executed by the train run. Transfer the function only if all
328 callers are unlikely executed. */
330 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
331 /* Thunks are not profiled. This is more or less implementation
333 && !d
->function_symbol
->thunk
.thunk_p
334 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
335 || (edge
->caller
->global
.inlined_to
336 && edge
->caller
->global
.inlined_to
->frequency
337 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
338 d
->maybe_unlikely_executed
= false;
339 if (!edge
->frequency
)
341 switch (edge
->caller
->frequency
)
343 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
345 case NODE_FREQUENCY_EXECUTED_ONCE
:
346 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
347 fprintf (dump_file
, " Called by %s that is executed once\n",
348 edge
->caller
->name ());
349 d
->maybe_unlikely_executed
= false;
350 if (inline_edge_summary (edge
)->loop_depth
)
352 d
->maybe_executed_once
= false;
353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
354 fprintf (dump_file
, " Called in loop\n");
357 case NODE_FREQUENCY_HOT
:
358 case NODE_FREQUENCY_NORMAL
:
359 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
360 fprintf (dump_file
, " Called by %s that is normal or hot\n",
361 edge
->caller
->name ());
362 d
->maybe_unlikely_executed
= false;
363 d
->maybe_executed_once
= false;
370 /* Return ture if NODE contains hot calls. */
373 contains_hot_call_p (struct cgraph_node
*node
)
375 struct cgraph_edge
*e
;
376 for (e
= node
->callees
; e
; e
= e
->next_callee
)
377 if (e
->maybe_hot_p ())
379 else if (!e
->inline_failed
380 && contains_hot_call_p (e
->callee
))
382 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
383 if (e
->maybe_hot_p ())
388 /* See if the frequency of NODE can be updated based on frequencies of its
391 ipa_propagate_frequency (struct cgraph_node
*node
)
393 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
394 bool changed
= false;
396 /* We can not propagate anything useful about externally visible functions
397 nor about virtuals. */
398 if (!node
->local
.local
400 || (opt_for_fn (node
->decl
, flag_devirtualize
)
401 && DECL_VIRTUAL_P (node
->decl
)))
403 gcc_assert (node
->analyzed
);
404 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
405 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
407 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
410 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
411 && !node
->only_called_at_startup
)
413 node
->only_called_at_startup
= true;
415 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
419 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
420 && !node
->only_called_at_exit
)
422 node
->only_called_at_exit
= true;
424 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
429 /* With profile we can decide on hot/normal based on count. */
433 if (node
->count
>= get_hot_bb_threshold ())
436 hot
|= contains_hot_call_p (node
);
439 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
442 fprintf (dump_file
, "Node %s promoted to hot.\n",
444 node
->frequency
= NODE_FREQUENCY_HOT
;
449 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
452 fprintf (dump_file
, "Node %s reduced to normal.\n",
454 node
->frequency
= NODE_FREQUENCY_NORMAL
;
458 /* These come either from profile or user hints; never update them. */
459 if (node
->frequency
== NODE_FREQUENCY_HOT
460 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
462 if (d
.maybe_unlikely_executed
)
464 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
466 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
470 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
472 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
474 fprintf (dump_file
, "Node %s promoted to executed once.\n",
481 /* Simple ipa profile pass propagating frequencies across the callgraph. */
486 struct cgraph_node
**order
;
487 struct cgraph_edge
*e
;
489 bool something_changed
= false;
491 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
492 struct cgraph_node
*n
,*n2
;
493 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
494 int nmismatch
= 0, nimpossible
= 0;
495 bool node_map_initialized
= false;
498 dump_histogram (dump_file
, histogram
);
499 for (i
= 0; i
< (int)histogram
.length (); i
++)
501 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
502 overall_size
+= histogram
[i
]->size
;
508 gcc_assert (overall_size
);
511 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
513 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
514 (int64_t)overall_time
);
515 min
= get_hot_bb_threshold ();
516 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
519 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
520 cumulated_size
+= histogram
[i
]->size
;
522 fprintf (dump_file
, "GCOV min count: %" PRId64
523 " Time:%3.2f%% Size:%3.2f%%\n",
525 cumulated_time
* 100.0 / overall_time
,
526 cumulated_size
* 100.0 / overall_size
);
528 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
530 for (i
= 0; cumulated
< cutoff
; i
++)
532 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
533 threshold
= histogram
[i
]->count
;
539 gcov_type cumulated_time
= 0, cumulated_size
= 0;
542 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
545 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
546 cumulated_size
+= histogram
[i
]->size
;
548 fprintf (dump_file
, "Determined min count: %" PRId64
549 " Time:%3.2f%% Size:%3.2f%%\n",
551 cumulated_time
* 100.0 / overall_time
,
552 cumulated_size
* 100.0 / overall_size
);
554 if (threshold
> get_hot_bb_threshold ()
558 fprintf (dump_file
, "Threshold updated.\n");
559 set_hot_bb_threshold (threshold
);
562 histogram
.release ();
563 histogram_pool
.release ();
565 /* Produce speculative calls: we saved common traget from porfiling into
566 e->common_target_id. Now, at link time, we can look up corresponding
567 function node and produce speculative call. */
569 FOR_EACH_DEFINED_FUNCTION (n
)
573 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
576 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
580 if (e
->indirect_info
->common_target_id
)
582 if (!node_map_initialized
)
583 init_node_map (false);
584 node_map_initialized
= true;
586 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
591 fprintf (dump_file
, "Indirect call -> direct call from"
592 " other module %s/%i => %s/%i, prob %3.2f\n",
593 xstrdup_for_dump (n
->name ()), n
->order
,
594 xstrdup_for_dump (n2
->name ()), n2
->order
,
595 e
->indirect_info
->common_target_probability
596 / (float)REG_BR_PROB_BASE
);
598 if (e
->indirect_info
->common_target_probability
599 < REG_BR_PROB_BASE
/ 2)
604 "Not speculating: probability is too low.\n");
606 else if (!e
->maybe_hot_p ())
611 "Not speculating: call is cold.\n");
613 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
614 && n2
->can_be_discarded_p ())
619 "Not speculating: target is overwritable "
620 "and can be discarded.\n");
622 else if (ipa_node_params_sum
&& ipa_edge_args_vector
623 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
624 && ipa_get_param_count (IPA_NODE_REF (n2
))
625 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
626 && (ipa_get_param_count (IPA_NODE_REF (n2
))
627 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
628 || !stdarg_p (TREE_TYPE (n2
->decl
))))
634 "parameter count mistmatch\n");
636 else if (e
->indirect_info
->polymorphic
637 && !opt_for_fn (n
->decl
, flag_devirtualize
)
638 && !possible_polymorphic_call_target_p (e
, n2
))
644 "function is not in the polymorphic "
645 "call target list\n");
649 /* Target may be overwritable, but profile says that
650 control flow goes to this particular implementation
651 of N2. Speculate on the local alias to allow inlining.
653 if (!n2
->can_be_discarded_p ())
656 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
663 apply_scale (e
->count
,
664 e
->indirect_info
->common_target_probability
),
665 apply_scale (e
->frequency
,
666 e
->indirect_info
->common_target_probability
));
673 fprintf (dump_file
, "Function with profile-id %i not found.\n",
674 e
->indirect_info
->common_target_id
);
680 inline_update_overall_summary (n
);
682 if (node_map_initialized
)
684 if (dump_file
&& nindirect
)
686 "%i indirect calls trained.\n"
687 "%i (%3.2f%%) have common target.\n"
688 "%i (%3.2f%%) targets was not found.\n"
689 "%i (%3.2f%%) targets had parameter count mismatch.\n"
690 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
691 "%i (%3.2f%%) speculations seems useless.\n"
692 "%i (%3.2f%%) speculations produced.\n",
694 ncommon
, ncommon
* 100.0 / nindirect
,
695 nunknown
, nunknown
* 100.0 / nindirect
,
696 nmismatch
, nmismatch
* 100.0 / nindirect
,
697 nimpossible
, nimpossible
* 100.0 / nindirect
,
698 nuseless
, nuseless
* 100.0 / nindirect
,
699 nconverted
, nconverted
* 100.0 / nindirect
);
701 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
702 order_pos
= ipa_reverse_postorder (order
);
703 for (i
= order_pos
- 1; i
>= 0; i
--)
705 if (order
[i
]->local
.local
706 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
707 && ipa_propagate_frequency (order
[i
]))
709 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
710 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
712 something_changed
= true;
713 e
->callee
->aux
= (void *)1;
716 order
[i
]->aux
= NULL
;
719 while (something_changed
)
721 something_changed
= false;
722 for (i
= order_pos
- 1; i
>= 0; i
--)
725 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
726 && ipa_propagate_frequency (order
[i
]))
728 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
729 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
731 something_changed
= true;
732 e
->callee
->aux
= (void *)1;
735 order
[i
]->aux
= NULL
;
744 const pass_data pass_data_ipa_profile
=
747 "profile_estimate", /* name */
748 OPTGROUP_NONE
, /* optinfo_flags */
749 TV_IPA_PROFILE
, /* tv_id */
750 0, /* properties_required */
751 0, /* properties_provided */
752 0, /* properties_destroyed */
753 0, /* todo_flags_start */
754 0, /* todo_flags_finish */
757 class pass_ipa_profile
: public ipa_opt_pass_d
760 pass_ipa_profile (gcc::context
*ctxt
)
761 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
762 ipa_profile_generate_summary
, /* generate_summary */
763 ipa_profile_write_summary
, /* write_summary */
764 ipa_profile_read_summary
, /* read_summary */
765 NULL
, /* write_optimization_summary */
766 NULL
, /* read_optimization_summary */
767 NULL
, /* stmt_fixup */
768 0, /* function_transform_todo_flags_start */
769 NULL
, /* function_transform */
770 NULL
) /* variable_transform */
773 /* opt_pass methods: */
774 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
775 virtual unsigned int execute (function
*) { return ipa_profile (); }
777 }; // class pass_ipa_profile
782 make_pass_ipa_profile (gcc::context
*ctxt
)
784 return new pass_ipa_profile (ctxt
);