1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "hard-reg-set.h"
56 #include "fold-const.h"
58 #include "tree-pass.h"
59 #include "internal-fn.h"
60 #include "gimple-iterator.h"
63 #include "tree-iterator.h"
64 #include "ipa-utils.h"
67 #include "value-prof.h"
68 #include "alloc-pool.h"
69 #include "tree-inline.h"
70 #include "lto-streamer.h"
71 #include "data-streamer.h"
72 #include "symbol-summary.h"
74 #include "ipa-inline.h"
76 /* Entry in the histogram. */
78 struct histogram_entry
85 /* Histogram of profile values.
86 The histogram is represented as an ordered vector of entries allocated via
87 histogram_pool. During construction a separate hashtable is kept to lookup
90 vec
<histogram_entry
*> histogram
;
91 static pool_allocator
<histogram_entry
> histogram_pool
92 ("IPA histogram", 10);
94 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
96 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
98 static inline hashval_t
hash (const histogram_entry
*);
99 static inline int equal (const histogram_entry
*, const histogram_entry
*);
103 histogram_hash::hash (const histogram_entry
*val
)
109 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
111 return val
->count
== val2
->count
;
114 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
115 HASHTABLE is the on-side hash kept to avoid duplicates. */
118 account_time_size (hash_table
<histogram_hash
> *hashtable
,
119 vec
<histogram_entry
*> &histogram
,
120 gcov_type count
, int time
, int size
)
122 histogram_entry key
= {count
, 0, 0};
123 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
127 *val
= histogram_pool
.allocate ();
129 histogram
.safe_push (*val
);
131 (*val
)->time
+= time
;
132 (*val
)->size
+= size
;
136 cmp_counts (const void *v1
, const void *v2
)
138 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
139 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
140 if (h1
->count
< h2
->count
)
142 if (h1
->count
> h2
->count
)
147 /* Dump HISTOGRAM to FILE. */
150 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
153 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
155 fprintf (dump_file
, "Histogram:\n");
156 for (i
= 0; i
< histogram
.length (); i
++)
158 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
159 overall_size
+= histogram
[i
]->size
;
165 for (i
= 0; i
< histogram
.length (); i
++)
167 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
168 cumulated_size
+= histogram
[i
]->size
;
169 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
170 (int64_t) histogram
[i
]->count
,
172 cumulated_time
* 100.0 / overall_time
,
174 cumulated_size
* 100.0 / overall_size
);
178 /* Collect histogram from CFG profiles. */
181 ipa_profile_generate_summary (void)
183 struct cgraph_node
*node
;
184 gimple_stmt_iterator gsi
;
187 hash_table
<histogram_hash
> hashtable (10);
189 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
190 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
194 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
196 gimple stmt
= gsi_stmt (gsi
);
197 if (gimple_code (stmt
) == GIMPLE_CALL
198 && !gimple_call_fndecl (stmt
))
201 h
= gimple_histogram_value_of_type
202 (DECL_STRUCT_FUNCTION (node
->decl
),
203 stmt
, HIST_TYPE_INDIR_CALL
);
204 /* No need to do sanity check: gimple_ic_transform already
205 takes away bad histograms. */
208 /* counter 0 is target, counter 1 is number of execution we called target,
209 counter 2 is total number of executions. */
210 if (h
->hvalue
.counters
[2])
212 struct cgraph_edge
* e
= node
->get_edge (stmt
);
213 if (e
&& !e
->indirect_unknown_callee
)
215 e
->indirect_info
->common_target_id
216 = h
->hvalue
.counters
[0];
217 e
->indirect_info
->common_target_probability
218 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
219 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
222 fprintf (dump_file
, "Probability capped to 1\n");
223 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
226 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
230 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
231 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
233 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
235 histogram
.qsort (cmp_counts
);
238 /* Serialize the ipa info for lto. */
241 ipa_profile_write_summary (void)
243 struct lto_simple_output_block
*ob
244 = lto_create_simple_output_block (LTO_section_ipa_profile
);
247 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
248 for (i
= 0; i
< histogram
.length (); i
++)
250 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
251 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
252 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
254 lto_destroy_simple_output_block (ob
);
257 /* Deserialize the ipa info for lto. */
260 ipa_profile_read_summary (void)
262 struct lto_file_decl_data
** file_data_vec
263 = lto_get_file_decl_data ();
264 struct lto_file_decl_data
* file_data
;
267 hash_table
<histogram_hash
> hashtable (10);
269 while ((file_data
= file_data_vec
[j
++]))
273 struct lto_input_block
*ib
274 = lto_create_simple_input_block (file_data
,
275 LTO_section_ipa_profile
,
279 unsigned int num
= streamer_read_uhwi (ib
);
281 for (n
= 0; n
< num
; n
++)
283 gcov_type count
= streamer_read_gcov_count (ib
);
284 int time
= streamer_read_uhwi (ib
);
285 int size
= streamer_read_uhwi (ib
);
286 account_time_size (&hashtable
, histogram
,
289 lto_destroy_simple_input_block (file_data
,
290 LTO_section_ipa_profile
,
294 histogram
.qsort (cmp_counts
);
297 /* Data used by ipa_propagate_frequency. */
299 struct ipa_propagate_frequency_data
301 cgraph_node
*function_symbol
;
302 bool maybe_unlikely_executed
;
303 bool maybe_executed_once
;
304 bool only_called_at_startup
;
305 bool only_called_at_exit
;
308 /* Worker for ipa_propagate_frequency_1. */
311 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
313 struct ipa_propagate_frequency_data
*d
;
314 struct cgraph_edge
*edge
;
316 d
= (struct ipa_propagate_frequency_data
*)data
;
317 for (edge
= node
->callers
;
318 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
319 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
320 edge
= edge
->next_caller
)
322 if (edge
->caller
!= d
->function_symbol
)
324 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
325 /* It makes sense to put main() together with the static constructors.
326 It will be executed for sure, but rest of functions called from
327 main are definitely not at startup only. */
328 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
329 d
->only_called_at_startup
= 0;
330 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
333 /* When profile feedback is available, do not try to propagate too hard;
334 counts are already good guide on function frequencies and roundoff
335 errors can make us to push function into unlikely section even when
336 it is executed by the train run. Transfer the function only if all
337 callers are unlikely executed. */
339 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
340 /* Thunks are not profiled. This is more or less implementation
342 && !d
->function_symbol
->thunk
.thunk_p
343 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
344 || (edge
->caller
->global
.inlined_to
345 && edge
->caller
->global
.inlined_to
->frequency
346 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
347 d
->maybe_unlikely_executed
= false;
348 if (!edge
->frequency
)
350 switch (edge
->caller
->frequency
)
352 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
354 case NODE_FREQUENCY_EXECUTED_ONCE
:
355 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
356 fprintf (dump_file
, " Called by %s that is executed once\n",
357 edge
->caller
->name ());
358 d
->maybe_unlikely_executed
= false;
359 if (inline_edge_summary (edge
)->loop_depth
)
361 d
->maybe_executed_once
= false;
362 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
363 fprintf (dump_file
, " Called in loop\n");
366 case NODE_FREQUENCY_HOT
:
367 case NODE_FREQUENCY_NORMAL
:
368 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
369 fprintf (dump_file
, " Called by %s that is normal or hot\n",
370 edge
->caller
->name ());
371 d
->maybe_unlikely_executed
= false;
372 d
->maybe_executed_once
= false;
379 /* Return ture if NODE contains hot calls. */
382 contains_hot_call_p (struct cgraph_node
*node
)
384 struct cgraph_edge
*e
;
385 for (e
= node
->callees
; e
; e
= e
->next_callee
)
386 if (e
->maybe_hot_p ())
388 else if (!e
->inline_failed
389 && contains_hot_call_p (e
->callee
))
391 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
392 if (e
->maybe_hot_p ())
397 /* See if the frequency of NODE can be updated based on frequencies of its
400 ipa_propagate_frequency (struct cgraph_node
*node
)
402 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
403 bool changed
= false;
405 /* We can not propagate anything useful about externally visible functions
406 nor about virtuals. */
407 if (!node
->local
.local
409 || (opt_for_fn (node
->decl
, flag_devirtualize
)
410 && DECL_VIRTUAL_P (node
->decl
)))
412 gcc_assert (node
->analyzed
);
413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
414 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
416 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
419 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
420 && !node
->only_called_at_startup
)
422 node
->only_called_at_startup
= true;
424 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
428 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
429 && !node
->only_called_at_exit
)
431 node
->only_called_at_exit
= true;
433 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
438 /* With profile we can decide on hot/normal based on count. */
442 if (node
->count
>= get_hot_bb_threshold ())
445 hot
|= contains_hot_call_p (node
);
448 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
451 fprintf (dump_file
, "Node %s promoted to hot.\n",
453 node
->frequency
= NODE_FREQUENCY_HOT
;
458 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
461 fprintf (dump_file
, "Node %s reduced to normal.\n",
463 node
->frequency
= NODE_FREQUENCY_NORMAL
;
467 /* These come either from profile or user hints; never update them. */
468 if (node
->frequency
== NODE_FREQUENCY_HOT
469 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
471 if (d
.maybe_unlikely_executed
)
473 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
475 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
479 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
481 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
483 fprintf (dump_file
, "Node %s promoted to executed once.\n",
490 /* Simple ipa profile pass propagating frequencies across the callgraph. */
495 struct cgraph_node
**order
;
496 struct cgraph_edge
*e
;
498 bool something_changed
= false;
500 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
501 struct cgraph_node
*n
,*n2
;
502 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
503 int nmismatch
= 0, nimpossible
= 0;
504 bool node_map_initialized
= false;
507 dump_histogram (dump_file
, histogram
);
508 for (i
= 0; i
< (int)histogram
.length (); i
++)
510 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
511 overall_size
+= histogram
[i
]->size
;
517 gcc_assert (overall_size
);
520 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
522 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
523 (int64_t)overall_time
);
524 min
= get_hot_bb_threshold ();
525 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
528 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
529 cumulated_size
+= histogram
[i
]->size
;
531 fprintf (dump_file
, "GCOV min count: %" PRId64
532 " Time:%3.2f%% Size:%3.2f%%\n",
534 cumulated_time
* 100.0 / overall_time
,
535 cumulated_size
* 100.0 / overall_size
);
537 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
539 for (i
= 0; cumulated
< cutoff
; i
++)
541 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
542 threshold
= histogram
[i
]->count
;
548 gcov_type cumulated_time
= 0, cumulated_size
= 0;
551 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
554 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
555 cumulated_size
+= histogram
[i
]->size
;
557 fprintf (dump_file
, "Determined min count: %" PRId64
558 " Time:%3.2f%% Size:%3.2f%%\n",
560 cumulated_time
* 100.0 / overall_time
,
561 cumulated_size
* 100.0 / overall_size
);
563 if (threshold
> get_hot_bb_threshold ()
567 fprintf (dump_file
, "Threshold updated.\n");
568 set_hot_bb_threshold (threshold
);
571 histogram
.release ();
572 histogram_pool
.release ();
574 /* Produce speculative calls: we saved common traget from porfiling into
575 e->common_target_id. Now, at link time, we can look up corresponding
576 function node and produce speculative call. */
578 FOR_EACH_DEFINED_FUNCTION (n
)
582 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
585 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
589 if (e
->indirect_info
->common_target_id
)
591 if (!node_map_initialized
)
592 init_node_map (false);
593 node_map_initialized
= true;
595 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
600 fprintf (dump_file
, "Indirect call -> direct call from"
601 " other module %s/%i => %s/%i, prob %3.2f\n",
602 xstrdup_for_dump (n
->name ()), n
->order
,
603 xstrdup_for_dump (n2
->name ()), n2
->order
,
604 e
->indirect_info
->common_target_probability
605 / (float)REG_BR_PROB_BASE
);
607 if (e
->indirect_info
->common_target_probability
608 < REG_BR_PROB_BASE
/ 2)
613 "Not speculating: probability is too low.\n");
615 else if (!e
->maybe_hot_p ())
620 "Not speculating: call is cold.\n");
622 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
623 && n2
->can_be_discarded_p ())
628 "Not speculating: target is overwritable "
629 "and can be discarded.\n");
631 else if (ipa_node_params_sum
&& ipa_edge_args_vector
632 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
633 && ipa_get_param_count (IPA_NODE_REF (n2
))
634 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
635 && (ipa_get_param_count (IPA_NODE_REF (n2
))
636 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
637 || !stdarg_p (TREE_TYPE (n2
->decl
))))
643 "parameter count mistmatch\n");
645 else if (e
->indirect_info
->polymorphic
646 && !opt_for_fn (n
->decl
, flag_devirtualize
)
647 && !possible_polymorphic_call_target_p (e
, n2
))
653 "function is not in the polymorphic "
654 "call target list\n");
658 /* Target may be overwritable, but profile says that
659 control flow goes to this particular implementation
660 of N2. Speculate on the local alias to allow inlining.
662 if (!n2
->can_be_discarded_p ())
665 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
672 apply_scale (e
->count
,
673 e
->indirect_info
->common_target_probability
),
674 apply_scale (e
->frequency
,
675 e
->indirect_info
->common_target_probability
));
682 fprintf (dump_file
, "Function with profile-id %i not found.\n",
683 e
->indirect_info
->common_target_id
);
689 inline_update_overall_summary (n
);
691 if (node_map_initialized
)
693 if (dump_file
&& nindirect
)
695 "%i indirect calls trained.\n"
696 "%i (%3.2f%%) have common target.\n"
697 "%i (%3.2f%%) targets was not found.\n"
698 "%i (%3.2f%%) targets had parameter count mismatch.\n"
699 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
700 "%i (%3.2f%%) speculations seems useless.\n"
701 "%i (%3.2f%%) speculations produced.\n",
703 ncommon
, ncommon
* 100.0 / nindirect
,
704 nunknown
, nunknown
* 100.0 / nindirect
,
705 nmismatch
, nmismatch
* 100.0 / nindirect
,
706 nimpossible
, nimpossible
* 100.0 / nindirect
,
707 nuseless
, nuseless
* 100.0 / nindirect
,
708 nconverted
, nconverted
* 100.0 / nindirect
);
710 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
711 order_pos
= ipa_reverse_postorder (order
);
712 for (i
= order_pos
- 1; i
>= 0; i
--)
714 if (order
[i
]->local
.local
715 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
716 && ipa_propagate_frequency (order
[i
]))
718 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
719 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
721 something_changed
= true;
722 e
->callee
->aux
= (void *)1;
725 order
[i
]->aux
= NULL
;
728 while (something_changed
)
730 something_changed
= false;
731 for (i
= order_pos
- 1; i
>= 0; i
--)
734 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
735 && ipa_propagate_frequency (order
[i
]))
737 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
738 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
740 something_changed
= true;
741 e
->callee
->aux
= (void *)1;
744 order
[i
]->aux
= NULL
;
753 const pass_data pass_data_ipa_profile
=
756 "profile_estimate", /* name */
757 OPTGROUP_NONE
, /* optinfo_flags */
758 TV_IPA_PROFILE
, /* tv_id */
759 0, /* properties_required */
760 0, /* properties_provided */
761 0, /* properties_destroyed */
762 0, /* todo_flags_start */
763 0, /* todo_flags_finish */
766 class pass_ipa_profile
: public ipa_opt_pass_d
769 pass_ipa_profile (gcc::context
*ctxt
)
770 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
771 ipa_profile_generate_summary
, /* generate_summary */
772 ipa_profile_write_summary
, /* write_summary */
773 ipa_profile_read_summary
, /* read_summary */
774 NULL
, /* write_optimization_summary */
775 NULL
, /* read_optimization_summary */
776 NULL
, /* stmt_fixup */
777 0, /* function_transform_todo_flags_start */
778 NULL
, /* function_transform */
779 NULL
) /* variable_transform */
782 /* opt_pass methods: */
783 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
784 virtual unsigned int execute (function
*) { return ipa_profile (); }
786 }; // class pass_ipa_profile
791 make_pass_ipa_profile (gcc::context
*ctxt
)
793 return new pass_ipa_profile (ctxt
);