1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "alloc-pool.h"
55 #include "tree-pass.h"
57 #include "data-streamer.h"
58 #include "gimple-iterator.h"
59 #include "ipa-utils.h"
62 #include "value-prof.h"
63 #include "tree-inline.h"
64 #include "symbol-summary.h"
67 #include "ipa-fnsummary.h"
69 /* Entry in the histogram. */
71 struct histogram_entry
78 /* Histogram of profile values.
79 The histogram is represented as an ordered vector of entries allocated via
80 histogram_pool. During construction a separate hashtable is kept to lookup
83 vec
<histogram_entry
*> histogram
;
84 static object_allocator
<histogram_entry
> histogram_pool ("IPA histogram");
86 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
88 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
90 static inline hashval_t
hash (const histogram_entry
*);
91 static inline int equal (const histogram_entry
*, const histogram_entry
*);
95 histogram_hash::hash (const histogram_entry
*val
)
101 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
103 return val
->count
== val2
->count
;
106 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
107 HASHTABLE is the on-side hash kept to avoid duplicates. */
110 account_time_size (hash_table
<histogram_hash
> *hashtable
,
111 vec
<histogram_entry
*> &histogram
,
112 gcov_type count
, int time
, int size
)
114 histogram_entry key
= {count
, 0, 0};
115 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
119 *val
= histogram_pool
.allocate ();
121 histogram
.safe_push (*val
);
123 (*val
)->time
+= time
;
124 (*val
)->size
+= size
;
128 cmp_counts (const void *v1
, const void *v2
)
130 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
131 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
132 if (h1
->count
< h2
->count
)
134 if (h1
->count
> h2
->count
)
139 /* Dump HISTOGRAM to FILE. */
142 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
145 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
147 fprintf (dump_file
, "Histogram:\n");
148 for (i
= 0; i
< histogram
.length (); i
++)
150 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
151 overall_size
+= histogram
[i
]->size
;
157 for (i
= 0; i
< histogram
.length (); i
++)
159 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
160 cumulated_size
+= histogram
[i
]->size
;
161 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
162 (int64_t) histogram
[i
]->count
,
164 cumulated_time
* 100.0 / overall_time
,
166 cumulated_size
* 100.0 / overall_size
);
170 /* Collect histogram from CFG profiles. */
173 ipa_profile_generate_summary (void)
175 struct cgraph_node
*node
;
176 gimple_stmt_iterator gsi
;
179 hash_table
<histogram_hash
> hashtable (10);
181 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
182 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
186 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
188 gimple
*stmt
= gsi_stmt (gsi
);
189 if (gimple_code (stmt
) == GIMPLE_CALL
190 && !gimple_call_fndecl (stmt
))
193 h
= gimple_histogram_value_of_type
194 (DECL_STRUCT_FUNCTION (node
->decl
),
195 stmt
, HIST_TYPE_INDIR_CALL
);
196 /* No need to do sanity check: gimple_ic_transform already
197 takes away bad histograms. */
200 /* counter 0 is target, counter 1 is number of execution we called target,
201 counter 2 is total number of executions. */
202 if (h
->hvalue
.counters
[2])
204 struct cgraph_edge
* e
= node
->get_edge (stmt
);
205 if (e
&& !e
->indirect_unknown_callee
)
207 e
->indirect_info
->common_target_id
208 = h
->hvalue
.counters
[0];
209 e
->indirect_info
->common_target_probability
210 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
211 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
214 fprintf (dump_file
, "Probability capped to 1\n");
215 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
218 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
222 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
223 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
225 if (bb
->count
.initialized_p ())
226 account_time_size (&hashtable
, histogram
, bb
->count
.to_gcov_type (),
229 histogram
.qsort (cmp_counts
);
232 /* Serialize the ipa info for lto. */
235 ipa_profile_write_summary (void)
237 struct lto_simple_output_block
*ob
238 = lto_create_simple_output_block (LTO_section_ipa_profile
);
241 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
242 for (i
= 0; i
< histogram
.length (); i
++)
244 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
245 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
246 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
248 lto_destroy_simple_output_block (ob
);
251 /* Deserialize the ipa info for lto. */
254 ipa_profile_read_summary (void)
256 struct lto_file_decl_data
** file_data_vec
257 = lto_get_file_decl_data ();
258 struct lto_file_decl_data
* file_data
;
261 hash_table
<histogram_hash
> hashtable (10);
263 while ((file_data
= file_data_vec
[j
++]))
267 struct lto_input_block
*ib
268 = lto_create_simple_input_block (file_data
,
269 LTO_section_ipa_profile
,
273 unsigned int num
= streamer_read_uhwi (ib
);
275 for (n
= 0; n
< num
; n
++)
277 gcov_type count
= streamer_read_gcov_count (ib
);
278 int time
= streamer_read_uhwi (ib
);
279 int size
= streamer_read_uhwi (ib
);
280 account_time_size (&hashtable
, histogram
,
283 lto_destroy_simple_input_block (file_data
,
284 LTO_section_ipa_profile
,
288 histogram
.qsort (cmp_counts
);
291 /* Data used by ipa_propagate_frequency. */
293 struct ipa_propagate_frequency_data
295 cgraph_node
*function_symbol
;
296 bool maybe_unlikely_executed
;
297 bool maybe_executed_once
;
298 bool only_called_at_startup
;
299 bool only_called_at_exit
;
302 /* Worker for ipa_propagate_frequency_1. */
305 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
307 struct ipa_propagate_frequency_data
*d
;
308 struct cgraph_edge
*edge
;
310 d
= (struct ipa_propagate_frequency_data
*)data
;
311 for (edge
= node
->callers
;
312 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
313 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
314 edge
= edge
->next_caller
)
316 if (edge
->caller
!= d
->function_symbol
)
318 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
319 /* It makes sense to put main() together with the static constructors.
320 It will be executed for sure, but rest of functions called from
321 main are definitely not at startup only. */
322 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
323 d
->only_called_at_startup
= 0;
324 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
327 /* When profile feedback is available, do not try to propagate too hard;
328 counts are already good guide on function frequencies and roundoff
329 errors can make us to push function into unlikely section even when
330 it is executed by the train run. Transfer the function only if all
331 callers are unlikely executed. */
333 && edge
->callee
->count
.initialized_p ()
334 /* Thunks are not profiled. This is more or less implementation
336 && !d
->function_symbol
->thunk
.thunk_p
337 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
338 || (edge
->caller
->global
.inlined_to
339 && edge
->caller
->global
.inlined_to
->frequency
340 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
341 d
->maybe_unlikely_executed
= false;
342 if (!edge
->frequency
)
344 switch (edge
->caller
->frequency
)
346 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
348 case NODE_FREQUENCY_EXECUTED_ONCE
:
349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
350 fprintf (dump_file
, " Called by %s that is executed once\n",
351 edge
->caller
->name ());
352 d
->maybe_unlikely_executed
= false;
353 if (ipa_call_summaries
->get (edge
)->loop_depth
)
355 d
->maybe_executed_once
= false;
356 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
357 fprintf (dump_file
, " Called in loop\n");
360 case NODE_FREQUENCY_HOT
:
361 case NODE_FREQUENCY_NORMAL
:
362 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
363 fprintf (dump_file
, " Called by %s that is normal or hot\n",
364 edge
->caller
->name ());
365 d
->maybe_unlikely_executed
= false;
366 d
->maybe_executed_once
= false;
373 /* Return ture if NODE contains hot calls. */
376 contains_hot_call_p (struct cgraph_node
*node
)
378 struct cgraph_edge
*e
;
379 for (e
= node
->callees
; e
; e
= e
->next_callee
)
380 if (e
->maybe_hot_p ())
382 else if (!e
->inline_failed
383 && contains_hot_call_p (e
->callee
))
385 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
386 if (e
->maybe_hot_p ())
391 /* See if the frequency of NODE can be updated based on frequencies of its
394 ipa_propagate_frequency (struct cgraph_node
*node
)
396 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
397 bool changed
= false;
399 /* We can not propagate anything useful about externally visible functions
400 nor about virtuals. */
401 if (!node
->local
.local
403 || (opt_for_fn (node
->decl
, flag_devirtualize
)
404 && DECL_VIRTUAL_P (node
->decl
)))
406 gcc_assert (node
->analyzed
);
407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
408 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
410 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
413 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
414 && !node
->only_called_at_startup
)
416 node
->only_called_at_startup
= true;
418 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
422 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
423 && !node
->only_called_at_exit
)
425 node
->only_called_at_exit
= true;
427 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
432 /* With profile we can decide on hot/normal based on count. */
433 if (node
->count
.initialized_p ())
436 if (!(node
->count
== profile_count::zero ())
437 && node
->count
>= get_hot_bb_threshold ())
440 hot
|= contains_hot_call_p (node
);
443 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
446 fprintf (dump_file
, "Node %s promoted to hot.\n",
448 node
->frequency
= NODE_FREQUENCY_HOT
;
453 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
456 fprintf (dump_file
, "Node %s reduced to normal.\n",
458 node
->frequency
= NODE_FREQUENCY_NORMAL
;
462 /* These come either from profile or user hints; never update them. */
463 if (node
->frequency
== NODE_FREQUENCY_HOT
464 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
466 if (d
.maybe_unlikely_executed
)
468 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
470 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
474 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
476 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
478 fprintf (dump_file
, "Node %s promoted to executed once.\n",
485 /* Simple ipa profile pass propagating frequencies across the callgraph. */
490 struct cgraph_node
**order
;
491 struct cgraph_edge
*e
;
493 bool something_changed
= false;
495 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
496 struct cgraph_node
*n
,*n2
;
497 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
498 int nmismatch
= 0, nimpossible
= 0;
499 bool node_map_initialized
= false;
502 dump_histogram (dump_file
, histogram
);
503 for (i
= 0; i
< (int)histogram
.length (); i
++)
505 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
506 overall_size
+= histogram
[i
]->size
;
512 gcc_assert (overall_size
);
515 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
517 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
518 (int64_t)overall_time
);
519 min
= get_hot_bb_threshold ();
520 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
523 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
524 cumulated_size
+= histogram
[i
]->size
;
526 fprintf (dump_file
, "GCOV min count: %" PRId64
527 " Time:%3.2f%% Size:%3.2f%%\n",
529 cumulated_time
* 100.0 / overall_time
,
530 cumulated_size
* 100.0 / overall_size
);
532 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
534 for (i
= 0; cumulated
< cutoff
; i
++)
536 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
537 threshold
= histogram
[i
]->count
;
543 gcov_type cumulated_time
= 0, cumulated_size
= 0;
546 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
549 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
550 cumulated_size
+= histogram
[i
]->size
;
552 fprintf (dump_file
, "Determined min count: %" PRId64
553 " Time:%3.2f%% Size:%3.2f%%\n",
555 cumulated_time
* 100.0 / overall_time
,
556 cumulated_size
* 100.0 / overall_size
);
558 if (threshold
> get_hot_bb_threshold ()
562 fprintf (dump_file
, "Threshold updated.\n");
563 set_hot_bb_threshold (threshold
);
566 histogram
.release ();
567 histogram_pool
.release ();
569 /* Produce speculative calls: we saved common traget from porfiling into
570 e->common_target_id. Now, at link time, we can look up corresponding
571 function node and produce speculative call. */
573 FOR_EACH_DEFINED_FUNCTION (n
)
577 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
580 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
582 if (n
->count
.initialized_p ())
584 if (e
->indirect_info
->common_target_id
)
586 if (!node_map_initialized
)
587 init_node_map (false);
588 node_map_initialized
= true;
590 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
595 fprintf (dump_file
, "Indirect call -> direct call from"
596 " other module %s => %s, prob %3.2f\n",
599 e
->indirect_info
->common_target_probability
600 / (float)REG_BR_PROB_BASE
);
602 if (e
->indirect_info
->common_target_probability
603 < REG_BR_PROB_BASE
/ 2)
608 "Not speculating: probability is too low.\n");
610 else if (!e
->maybe_hot_p ())
615 "Not speculating: call is cold.\n");
617 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
618 && n2
->can_be_discarded_p ())
623 "Not speculating: target is overwritable "
624 "and can be discarded.\n");
626 else if (ipa_node_params_sum
&& ipa_edge_args_sum
627 && (!vec_safe_is_empty
628 (IPA_NODE_REF (n2
)->descriptors
))
629 && ipa_get_param_count (IPA_NODE_REF (n2
))
630 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
631 && (ipa_get_param_count (IPA_NODE_REF (n2
))
632 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
633 || !stdarg_p (TREE_TYPE (n2
->decl
))))
639 "parameter count mistmatch\n");
641 else if (e
->indirect_info
->polymorphic
642 && !opt_for_fn (n
->decl
, flag_devirtualize
)
643 && !possible_polymorphic_call_target_p (e
, n2
))
649 "function is not in the polymorphic "
650 "call target list\n");
654 /* Target may be overwritable, but profile says that
655 control flow goes to this particular implementation
656 of N2. Speculate on the local alias to allow inlining.
658 if (!n2
->can_be_discarded_p ())
661 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
668 e
->count
.apply_probability
669 (e
->indirect_info
->common_target_probability
),
670 apply_scale (e
->frequency
,
671 e
->indirect_info
->common_target_probability
));
678 fprintf (dump_file
, "Function with profile-id %i not found.\n",
679 e
->indirect_info
->common_target_id
);
685 ipa_update_overall_fn_summary (n
);
687 if (node_map_initialized
)
689 if (dump_file
&& nindirect
)
691 "%i indirect calls trained.\n"
692 "%i (%3.2f%%) have common target.\n"
693 "%i (%3.2f%%) targets was not found.\n"
694 "%i (%3.2f%%) targets had parameter count mismatch.\n"
695 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
696 "%i (%3.2f%%) speculations seems useless.\n"
697 "%i (%3.2f%%) speculations produced.\n",
699 ncommon
, ncommon
* 100.0 / nindirect
,
700 nunknown
, nunknown
* 100.0 / nindirect
,
701 nmismatch
, nmismatch
* 100.0 / nindirect
,
702 nimpossible
, nimpossible
* 100.0 / nindirect
,
703 nuseless
, nuseless
* 100.0 / nindirect
,
704 nconverted
, nconverted
* 100.0 / nindirect
);
706 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
707 order_pos
= ipa_reverse_postorder (order
);
708 for (i
= order_pos
- 1; i
>= 0; i
--)
710 if (order
[i
]->local
.local
711 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
712 && ipa_propagate_frequency (order
[i
]))
714 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
715 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
717 something_changed
= true;
718 e
->callee
->aux
= (void *)1;
721 order
[i
]->aux
= NULL
;
724 while (something_changed
)
726 something_changed
= false;
727 for (i
= order_pos
- 1; i
>= 0; i
--)
730 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
731 && ipa_propagate_frequency (order
[i
]))
733 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
734 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
736 something_changed
= true;
737 e
->callee
->aux
= (void *)1;
740 order
[i
]->aux
= NULL
;
749 const pass_data pass_data_ipa_profile
=
752 "profile_estimate", /* name */
753 OPTGROUP_NONE
, /* optinfo_flags */
754 TV_IPA_PROFILE
, /* tv_id */
755 0, /* properties_required */
756 0, /* properties_provided */
757 0, /* properties_destroyed */
758 0, /* todo_flags_start */
759 0, /* todo_flags_finish */
762 class pass_ipa_profile
: public ipa_opt_pass_d
765 pass_ipa_profile (gcc::context
*ctxt
)
766 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
767 ipa_profile_generate_summary
, /* generate_summary */
768 ipa_profile_write_summary
, /* write_summary */
769 ipa_profile_read_summary
, /* read_summary */
770 NULL
, /* write_optimization_summary */
771 NULL
, /* read_optimization_summary */
772 NULL
, /* stmt_fixup */
773 0, /* function_transform_todo_flags_start */
774 NULL
, /* function_transform */
775 NULL
) /* variable_transform */
778 /* opt_pass methods: */
779 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
780 virtual unsigned int execute (function
*) { return ipa_profile (); }
782 }; // class pass_ipa_profile
787 make_pass_ipa_profile (gcc::context
*ctxt
)
789 return new pass_ipa_profile (ctxt
);