1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 The information is used to set hot/cold thresholds.
29 - Next speculative indirect call resolution is performed: the local
30 profile pass assigns profile-id to each function and provide us with a
31 histogram specifying the most common target. We look up the callgraph
32 node corresponding to the target and produce a speculative call.
34 This call may or may not survive through IPA optimization based on decision
36 - Finally we propagate the following flags: unlikely executed, executed
37 once, executed at startup and executed at exit. These flags are used to
38 control code size/performance threshold and code placement (by producing
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
42 #include "coretypes.h"
47 #include "alloc-pool.h"
48 #include "tree-pass.h"
50 #include "data-streamer.h"
51 #include "gimple-iterator.h"
52 #include "ipa-utils.h"
55 #include "value-prof.h"
56 #include "tree-inline.h"
57 #include "symbol-summary.h"
60 #include "ipa-fnsummary.h"
62 /* Entry in the histogram. */
64 struct histogram_entry
71 /* Histogram of profile values.
72 The histogram is represented as an ordered vector of entries allocated via
73 histogram_pool. During construction a separate hashtable is kept to lookup
76 vec
<histogram_entry
*> histogram
;
77 static object_allocator
<histogram_entry
> histogram_pool ("IPA histogram");
79 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
81 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
83 static inline hashval_t
hash (const histogram_entry
*);
84 static inline int equal (const histogram_entry
*, const histogram_entry
*);
88 histogram_hash::hash (const histogram_entry
*val
)
94 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
96 return val
->count
== val2
->count
;
99 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
100 HASHTABLE is the on-side hash kept to avoid duplicates. */
103 account_time_size (hash_table
<histogram_hash
> *hashtable
,
104 vec
<histogram_entry
*> &histogram
,
105 gcov_type count
, int time
, int size
)
107 histogram_entry key
= {count
, 0, 0};
108 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
112 *val
= histogram_pool
.allocate ();
114 histogram
.safe_push (*val
);
116 (*val
)->time
+= time
;
117 (*val
)->size
+= size
;
121 cmp_counts (const void *v1
, const void *v2
)
123 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
124 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
125 if (h1
->count
< h2
->count
)
127 if (h1
->count
> h2
->count
)
132 /* Dump HISTOGRAM to FILE. */
135 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
138 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
140 fprintf (dump_file
, "Histogram:\n");
141 for (i
= 0; i
< histogram
.length (); i
++)
143 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
144 overall_size
+= histogram
[i
]->size
;
150 for (i
= 0; i
< histogram
.length (); i
++)
152 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
153 cumulated_size
+= histogram
[i
]->size
;
154 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
155 (int64_t) histogram
[i
]->count
,
157 cumulated_time
* 100.0 / overall_time
,
159 cumulated_size
* 100.0 / overall_size
);
163 /* Collect histogram from CFG profiles. */
166 ipa_profile_generate_summary (void)
168 struct cgraph_node
*node
;
169 gimple_stmt_iterator gsi
;
172 hash_table
<histogram_hash
> hashtable (10);
174 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
175 if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node
->decl
))->count
.ipa_p ())
176 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
180 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
182 gimple
*stmt
= gsi_stmt (gsi
);
183 if (gimple_code (stmt
) == GIMPLE_CALL
184 && !gimple_call_fndecl (stmt
))
187 h
= gimple_histogram_value_of_type
188 (DECL_STRUCT_FUNCTION (node
->decl
),
189 stmt
, HIST_TYPE_INDIR_CALL
);
190 /* No need to do sanity check: gimple_ic_transform already
191 takes away bad histograms. */
194 /* counter 0 is target, counter 1 is number of execution we called target,
195 counter 2 is total number of executions. */
196 if (h
->hvalue
.counters
[2])
198 struct cgraph_edge
* e
= node
->get_edge (stmt
);
199 if (e
&& !e
->indirect_unknown_callee
)
201 e
->indirect_info
->common_target_id
202 = h
->hvalue
.counters
[0];
203 e
->indirect_info
->common_target_probability
204 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
205 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
208 fprintf (dump_file
, "Probability capped to 1\n");
209 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
212 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
216 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
217 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
219 if (bb
->count
.ipa_p () && bb
->count
.initialized_p ())
220 account_time_size (&hashtable
, histogram
, bb
->count
.ipa ().to_gcov_type (),
223 histogram
.qsort (cmp_counts
);
226 /* Serialize the ipa info for lto. */
229 ipa_profile_write_summary (void)
231 struct lto_simple_output_block
*ob
232 = lto_create_simple_output_block (LTO_section_ipa_profile
);
235 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
236 for (i
= 0; i
< histogram
.length (); i
++)
238 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
239 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
240 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
242 lto_destroy_simple_output_block (ob
);
245 /* Deserialize the ipa info for lto. */
248 ipa_profile_read_summary (void)
250 struct lto_file_decl_data
** file_data_vec
251 = lto_get_file_decl_data ();
252 struct lto_file_decl_data
* file_data
;
255 hash_table
<histogram_hash
> hashtable (10);
257 while ((file_data
= file_data_vec
[j
++]))
261 struct lto_input_block
*ib
262 = lto_create_simple_input_block (file_data
,
263 LTO_section_ipa_profile
,
267 unsigned int num
= streamer_read_uhwi (ib
);
269 for (n
= 0; n
< num
; n
++)
271 gcov_type count
= streamer_read_gcov_count (ib
);
272 int time
= streamer_read_uhwi (ib
);
273 int size
= streamer_read_uhwi (ib
);
274 account_time_size (&hashtable
, histogram
,
277 lto_destroy_simple_input_block (file_data
,
278 LTO_section_ipa_profile
,
282 histogram
.qsort (cmp_counts
);
285 /* Data used by ipa_propagate_frequency. */
287 struct ipa_propagate_frequency_data
289 cgraph_node
*function_symbol
;
290 bool maybe_unlikely_executed
;
291 bool maybe_executed_once
;
292 bool only_called_at_startup
;
293 bool only_called_at_exit
;
296 /* Worker for ipa_propagate_frequency_1. */
299 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
301 struct ipa_propagate_frequency_data
*d
;
302 struct cgraph_edge
*edge
;
304 d
= (struct ipa_propagate_frequency_data
*)data
;
305 for (edge
= node
->callers
;
306 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
307 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
308 edge
= edge
->next_caller
)
310 if (edge
->caller
!= d
->function_symbol
)
312 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
313 /* It makes sense to put main() together with the static constructors.
314 It will be executed for sure, but rest of functions called from
315 main are definitely not at startup only. */
316 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
317 d
->only_called_at_startup
= 0;
318 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
321 /* When profile feedback is available, do not try to propagate too hard;
322 counts are already good guide on function frequencies and roundoff
323 errors can make us to push function into unlikely section even when
324 it is executed by the train run. Transfer the function only if all
325 callers are unlikely executed. */
327 && !(edge
->callee
->count
.ipa () == profile_count::zero ())
328 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
329 || (edge
->caller
->global
.inlined_to
330 && edge
->caller
->global
.inlined_to
->frequency
331 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
332 d
->maybe_unlikely_executed
= false;
333 if (edge
->count
.ipa ().initialized_p ()
334 && !edge
->count
.ipa ().nonzero_p ())
336 switch (edge
->caller
->frequency
)
338 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
340 case NODE_FREQUENCY_EXECUTED_ONCE
:
342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
343 fprintf (dump_file
, " Called by %s that is executed once\n",
344 edge
->caller
->name ());
345 d
->maybe_unlikely_executed
= false;
346 ipa_call_summary
*s
= ipa_call_summaries
->get (edge
);
347 if (s
!= NULL
&& s
->loop_depth
)
349 d
->maybe_executed_once
= false;
350 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
351 fprintf (dump_file
, " Called in loop\n");
355 case NODE_FREQUENCY_HOT
:
356 case NODE_FREQUENCY_NORMAL
:
357 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
358 fprintf (dump_file
, " Called by %s that is normal or hot\n",
359 edge
->caller
->name ());
360 d
->maybe_unlikely_executed
= false;
361 d
->maybe_executed_once
= false;
368 /* Return ture if NODE contains hot calls. */
371 contains_hot_call_p (struct cgraph_node
*node
)
373 struct cgraph_edge
*e
;
374 for (e
= node
->callees
; e
; e
= e
->next_callee
)
375 if (e
->maybe_hot_p ())
377 else if (!e
->inline_failed
378 && contains_hot_call_p (e
->callee
))
380 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
381 if (e
->maybe_hot_p ())
386 /* See if the frequency of NODE can be updated based on frequencies of its
389 ipa_propagate_frequency (struct cgraph_node
*node
)
391 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
392 bool changed
= false;
394 /* We cannot propagate anything useful about externally visible functions
395 nor about virtuals. */
396 if (!node
->local
.local
398 || (opt_for_fn (node
->decl
, flag_devirtualize
)
399 && DECL_VIRTUAL_P (node
->decl
)))
401 gcc_assert (node
->analyzed
);
402 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
403 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
405 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
408 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
409 && !node
->only_called_at_startup
)
411 node
->only_called_at_startup
= true;
413 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
417 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
418 && !node
->only_called_at_exit
)
420 node
->only_called_at_exit
= true;
422 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
427 /* With profile we can decide on hot/normal based on count. */
428 if (node
->count
. ipa().initialized_p ())
431 if (!(node
->count
. ipa() == profile_count::zero ())
432 && node
->count
. ipa() >= get_hot_bb_threshold ())
435 hot
|= contains_hot_call_p (node
);
438 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
441 fprintf (dump_file
, "Node %s promoted to hot.\n",
443 node
->frequency
= NODE_FREQUENCY_HOT
;
448 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
451 fprintf (dump_file
, "Node %s reduced to normal.\n",
453 node
->frequency
= NODE_FREQUENCY_NORMAL
;
457 /* These come either from profile or user hints; never update them. */
458 if (node
->frequency
== NODE_FREQUENCY_HOT
459 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
461 if (d
.maybe_unlikely_executed
)
463 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
465 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
469 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
471 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
473 fprintf (dump_file
, "Node %s promoted to executed once.\n",
480 /* Simple ipa profile pass propagating frequencies across the callgraph. */
485 struct cgraph_node
**order
;
486 struct cgraph_edge
*e
;
488 bool something_changed
= false;
490 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
491 struct cgraph_node
*n
,*n2
;
492 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
493 int nmismatch
= 0, nimpossible
= 0;
494 bool node_map_initialized
= false;
497 dump_histogram (dump_file
, histogram
);
498 for (i
= 0; i
< (int)histogram
.length (); i
++)
500 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
501 overall_size
+= histogram
[i
]->size
;
507 gcc_assert (overall_size
);
509 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
511 for (i
= 0; cumulated
< cutoff
; i
++)
513 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
514 threshold
= histogram
[i
]->count
;
520 gcov_type cumulated_time
= 0, cumulated_size
= 0;
523 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
526 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
527 cumulated_size
+= histogram
[i
]->size
;
529 fprintf (dump_file
, "Determined min count: %" PRId64
530 " Time:%3.2f%% Size:%3.2f%%\n",
532 cumulated_time
* 100.0 / overall_time
,
533 cumulated_size
* 100.0 / overall_size
);
539 fprintf (dump_file
, "Setting hotness threshold in LTO mode.\n");
540 set_hot_bb_threshold (threshold
);
543 histogram
.release ();
544 histogram_pool
.release ();
546 /* Produce speculative calls: we saved common traget from porfiling into
547 e->common_target_id. Now, at link time, we can look up corresponding
548 function node and produce speculative call. */
550 FOR_EACH_DEFINED_FUNCTION (n
)
554 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
557 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
559 if (n
->count
.initialized_p ())
561 if (e
->indirect_info
->common_target_id
)
563 if (!node_map_initialized
)
564 init_node_map (false);
565 node_map_initialized
= true;
567 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
572 fprintf (dump_file
, "Indirect call -> direct call from"
573 " other module %s => %s, prob %3.2f\n",
576 e
->indirect_info
->common_target_probability
577 / (float)REG_BR_PROB_BASE
);
579 if (e
->indirect_info
->common_target_probability
580 < REG_BR_PROB_BASE
/ 2)
585 "Not speculating: probability is too low.\n");
587 else if (!e
->maybe_hot_p ())
592 "Not speculating: call is cold.\n");
594 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
595 && n2
->can_be_discarded_p ())
600 "Not speculating: target is overwritable "
601 "and can be discarded.\n");
603 else if (ipa_node_params_sum
&& ipa_edge_args_sum
604 && (!vec_safe_is_empty
605 (IPA_NODE_REF (n2
)->descriptors
))
606 && ipa_get_param_count (IPA_NODE_REF (n2
))
607 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
608 && (ipa_get_param_count (IPA_NODE_REF (n2
))
609 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
610 || !stdarg_p (TREE_TYPE (n2
->decl
))))
616 "parameter count mistmatch\n");
618 else if (e
->indirect_info
->polymorphic
619 && !opt_for_fn (n
->decl
, flag_devirtualize
)
620 && !possible_polymorphic_call_target_p (e
, n2
))
626 "function is not in the polymorphic "
627 "call target list\n");
631 /* Target may be overwritable, but profile says that
632 control flow goes to this particular implementation
633 of N2. Speculate on the local alias to allow inlining.
635 if (!n2
->can_be_discarded_p ())
638 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
645 e
->count
.apply_probability
646 (e
->indirect_info
->common_target_probability
));
653 fprintf (dump_file
, "Function with profile-id %i not found.\n",
654 e
->indirect_info
->common_target_id
);
660 ipa_update_overall_fn_summary (n
);
662 if (node_map_initialized
)
664 if (dump_file
&& nindirect
)
666 "%i indirect calls trained.\n"
667 "%i (%3.2f%%) have common target.\n"
668 "%i (%3.2f%%) targets was not found.\n"
669 "%i (%3.2f%%) targets had parameter count mismatch.\n"
670 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
671 "%i (%3.2f%%) speculations seems useless.\n"
672 "%i (%3.2f%%) speculations produced.\n",
674 ncommon
, ncommon
* 100.0 / nindirect
,
675 nunknown
, nunknown
* 100.0 / nindirect
,
676 nmismatch
, nmismatch
* 100.0 / nindirect
,
677 nimpossible
, nimpossible
* 100.0 / nindirect
,
678 nuseless
, nuseless
* 100.0 / nindirect
,
679 nconverted
, nconverted
* 100.0 / nindirect
);
681 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
682 order_pos
= ipa_reverse_postorder (order
);
683 for (i
= order_pos
- 1; i
>= 0; i
--)
685 if (order
[i
]->local
.local
686 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
687 && ipa_propagate_frequency (order
[i
]))
689 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
690 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
692 something_changed
= true;
693 e
->callee
->aux
= (void *)1;
696 order
[i
]->aux
= NULL
;
699 while (something_changed
)
701 something_changed
= false;
702 for (i
= order_pos
- 1; i
>= 0; i
--)
705 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
706 && ipa_propagate_frequency (order
[i
]))
708 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
709 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
711 something_changed
= true;
712 e
->callee
->aux
= (void *)1;
715 order
[i
]->aux
= NULL
;
724 const pass_data pass_data_ipa_profile
=
727 "profile_estimate", /* name */
728 OPTGROUP_NONE
, /* optinfo_flags */
729 TV_IPA_PROFILE
, /* tv_id */
730 0, /* properties_required */
731 0, /* properties_provided */
732 0, /* properties_destroyed */
733 0, /* todo_flags_start */
734 0, /* todo_flags_finish */
737 class pass_ipa_profile
: public ipa_opt_pass_d
740 pass_ipa_profile (gcc::context
*ctxt
)
741 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
742 ipa_profile_generate_summary
, /* generate_summary */
743 ipa_profile_write_summary
, /* write_summary */
744 ipa_profile_read_summary
, /* read_summary */
745 NULL
, /* write_optimization_summary */
746 NULL
, /* read_optimization_summary */
747 NULL
, /* stmt_fixup */
748 0, /* function_transform_todo_flags_start */
749 NULL
, /* function_transform */
750 NULL
) /* variable_transform */
753 /* opt_pass methods: */
754 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
755 virtual unsigned int execute (function
*) { return ipa_profile (); }
757 }; // class pass_ipa_profile
762 make_pass_ipa_profile (gcc::context
*ctxt
)
764 return new pass_ipa_profile (ctxt
);