1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "fold-const.h"
56 #include "dominance.h"
58 #include "basic-block.h"
59 #include "plugin-api.h"
60 #include "hard-reg-set.h"
64 #include "tree-pass.h"
65 #include "tree-ssa-alias.h"
66 #include "internal-fn.h"
67 #include "gimple-expr.h"
69 #include "gimple-iterator.h"
72 #include "tree-iterator.h"
73 #include "ipa-utils.h"
76 #include "value-prof.h"
77 #include "alloc-pool.h"
78 #include "tree-inline.h"
79 #include "lto-streamer.h"
80 #include "data-streamer.h"
81 #include "symbol-summary.h"
83 #include "ipa-inline.h"
85 /* Entry in the histogram. */
87 struct histogram_entry
94 /* Histogram of profile values.
95 The histogram is represented as an ordered vector of entries allocated via
96 histogram_pool. During construction a separate hashtable is kept to lookup
99 vec
<histogram_entry
*> histogram
;
100 static pool_allocator
<histogram_entry
> histogram_pool
101 ("IPA histogram", 10);
103 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
105 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
107 typedef histogram_entry
*value_type
;
108 typedef histogram_entry
*compare_type
;
109 static inline hashval_t
hash (const histogram_entry
*);
110 static inline int equal (const histogram_entry
*, const histogram_entry
*);
114 histogram_hash::hash (const histogram_entry
*val
)
120 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
122 return val
->count
== val2
->count
;
125 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
126 HASHTABLE is the on-side hash kept to avoid duplicates. */
129 account_time_size (hash_table
<histogram_hash
> *hashtable
,
130 vec
<histogram_entry
*> &histogram
,
131 gcov_type count
, int time
, int size
)
133 histogram_entry key
= {count
, 0, 0};
134 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
138 *val
= histogram_pool
.allocate ();
140 histogram
.safe_push (*val
);
142 (*val
)->time
+= time
;
143 (*val
)->size
+= size
;
147 cmp_counts (const void *v1
, const void *v2
)
149 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
150 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
151 if (h1
->count
< h2
->count
)
153 if (h1
->count
> h2
->count
)
158 /* Dump HISTOGRAM to FILE. */
161 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
164 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
166 fprintf (dump_file
, "Histogram:\n");
167 for (i
= 0; i
< histogram
.length (); i
++)
169 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
170 overall_size
+= histogram
[i
]->size
;
176 for (i
= 0; i
< histogram
.length (); i
++)
178 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
179 cumulated_size
+= histogram
[i
]->size
;
180 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
181 (int64_t) histogram
[i
]->count
,
183 cumulated_time
* 100.0 / overall_time
,
185 cumulated_size
* 100.0 / overall_size
);
189 /* Collect histogram from CFG profiles. */
192 ipa_profile_generate_summary (void)
194 struct cgraph_node
*node
;
195 gimple_stmt_iterator gsi
;
198 hash_table
<histogram_hash
> hashtable (10);
200 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
201 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
205 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
207 gimple stmt
= gsi_stmt (gsi
);
208 if (gimple_code (stmt
) == GIMPLE_CALL
209 && !gimple_call_fndecl (stmt
))
212 h
= gimple_histogram_value_of_type
213 (DECL_STRUCT_FUNCTION (node
->decl
),
214 stmt
, HIST_TYPE_INDIR_CALL
);
215 /* No need to do sanity check: gimple_ic_transform already
216 takes away bad histograms. */
219 /* counter 0 is target, counter 1 is number of execution we called target,
220 counter 2 is total number of executions. */
221 if (h
->hvalue
.counters
[2])
223 struct cgraph_edge
* e
= node
->get_edge (stmt
);
224 if (e
&& !e
->indirect_unknown_callee
)
226 e
->indirect_info
->common_target_id
227 = h
->hvalue
.counters
[0];
228 e
->indirect_info
->common_target_probability
229 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
230 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
233 fprintf (dump_file
, "Probability capped to 1\n");
234 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
237 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
241 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
242 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
244 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
246 histogram
.qsort (cmp_counts
);
249 /* Serialize the ipa info for lto. */
252 ipa_profile_write_summary (void)
254 struct lto_simple_output_block
*ob
255 = lto_create_simple_output_block (LTO_section_ipa_profile
);
258 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
259 for (i
= 0; i
< histogram
.length (); i
++)
261 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
262 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
263 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
265 lto_destroy_simple_output_block (ob
);
268 /* Deserialize the ipa info for lto. */
271 ipa_profile_read_summary (void)
273 struct lto_file_decl_data
** file_data_vec
274 = lto_get_file_decl_data ();
275 struct lto_file_decl_data
* file_data
;
278 hash_table
<histogram_hash
> hashtable (10);
280 while ((file_data
= file_data_vec
[j
++]))
284 struct lto_input_block
*ib
285 = lto_create_simple_input_block (file_data
,
286 LTO_section_ipa_profile
,
290 unsigned int num
= streamer_read_uhwi (ib
);
292 for (n
= 0; n
< num
; n
++)
294 gcov_type count
= streamer_read_gcov_count (ib
);
295 int time
= streamer_read_uhwi (ib
);
296 int size
= streamer_read_uhwi (ib
);
297 account_time_size (&hashtable
, histogram
,
300 lto_destroy_simple_input_block (file_data
,
301 LTO_section_ipa_profile
,
305 histogram
.qsort (cmp_counts
);
308 /* Data used by ipa_propagate_frequency. */
310 struct ipa_propagate_frequency_data
312 cgraph_node
*function_symbol
;
313 bool maybe_unlikely_executed
;
314 bool maybe_executed_once
;
315 bool only_called_at_startup
;
316 bool only_called_at_exit
;
319 /* Worker for ipa_propagate_frequency_1. */
322 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
324 struct ipa_propagate_frequency_data
*d
;
325 struct cgraph_edge
*edge
;
327 d
= (struct ipa_propagate_frequency_data
*)data
;
328 for (edge
= node
->callers
;
329 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
330 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
331 edge
= edge
->next_caller
)
333 if (edge
->caller
!= d
->function_symbol
)
335 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
336 /* It makes sense to put main() together with the static constructors.
337 It will be executed for sure, but rest of functions called from
338 main are definitely not at startup only. */
339 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
340 d
->only_called_at_startup
= 0;
341 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
344 /* When profile feedback is available, do not try to propagate too hard;
345 counts are already good guide on function frequencies and roundoff
346 errors can make us to push function into unlikely section even when
347 it is executed by the train run. Transfer the function only if all
348 callers are unlikely executed. */
350 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
351 /* Thunks are not profiled. This is more or less implementation
353 && !d
->function_symbol
->thunk
.thunk_p
354 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
355 || (edge
->caller
->global
.inlined_to
356 && edge
->caller
->global
.inlined_to
->frequency
357 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
358 d
->maybe_unlikely_executed
= false;
359 if (!edge
->frequency
)
361 switch (edge
->caller
->frequency
)
363 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
365 case NODE_FREQUENCY_EXECUTED_ONCE
:
366 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
367 fprintf (dump_file
, " Called by %s that is executed once\n",
368 edge
->caller
->name ());
369 d
->maybe_unlikely_executed
= false;
370 if (inline_edge_summary (edge
)->loop_depth
)
372 d
->maybe_executed_once
= false;
373 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
374 fprintf (dump_file
, " Called in loop\n");
377 case NODE_FREQUENCY_HOT
:
378 case NODE_FREQUENCY_NORMAL
:
379 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
380 fprintf (dump_file
, " Called by %s that is normal or hot\n",
381 edge
->caller
->name ());
382 d
->maybe_unlikely_executed
= false;
383 d
->maybe_executed_once
= false;
390 /* Return ture if NODE contains hot calls. */
393 contains_hot_call_p (struct cgraph_node
*node
)
395 struct cgraph_edge
*e
;
396 for (e
= node
->callees
; e
; e
= e
->next_callee
)
397 if (e
->maybe_hot_p ())
399 else if (!e
->inline_failed
400 && contains_hot_call_p (e
->callee
))
402 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
403 if (e
->maybe_hot_p ())
408 /* See if the frequency of NODE can be updated based on frequencies of its
411 ipa_propagate_frequency (struct cgraph_node
*node
)
413 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
414 bool changed
= false;
416 /* We can not propagate anything useful about externally visible functions
417 nor about virtuals. */
418 if (!node
->local
.local
420 || (opt_for_fn (node
->decl
, flag_devirtualize
)
421 && DECL_VIRTUAL_P (node
->decl
)))
423 gcc_assert (node
->analyzed
);
424 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
425 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
427 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
430 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
431 && !node
->only_called_at_startup
)
433 node
->only_called_at_startup
= true;
435 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
439 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
440 && !node
->only_called_at_exit
)
442 node
->only_called_at_exit
= true;
444 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
449 /* With profile we can decide on hot/normal based on count. */
453 if (node
->count
>= get_hot_bb_threshold ())
456 hot
|= contains_hot_call_p (node
);
459 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
462 fprintf (dump_file
, "Node %s promoted to hot.\n",
464 node
->frequency
= NODE_FREQUENCY_HOT
;
469 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
472 fprintf (dump_file
, "Node %s reduced to normal.\n",
474 node
->frequency
= NODE_FREQUENCY_NORMAL
;
478 /* These come either from profile or user hints; never update them. */
479 if (node
->frequency
== NODE_FREQUENCY_HOT
480 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
482 if (d
.maybe_unlikely_executed
)
484 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
486 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
490 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
492 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
494 fprintf (dump_file
, "Node %s promoted to executed once.\n",
501 /* Simple ipa profile pass propagating frequencies across the callgraph. */
506 struct cgraph_node
**order
;
507 struct cgraph_edge
*e
;
509 bool something_changed
= false;
511 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
512 struct cgraph_node
*n
,*n2
;
513 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
514 int nmismatch
= 0, nimpossible
= 0;
515 bool node_map_initialized
= false;
518 dump_histogram (dump_file
, histogram
);
519 for (i
= 0; i
< (int)histogram
.length (); i
++)
521 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
522 overall_size
+= histogram
[i
]->size
;
528 gcc_assert (overall_size
);
531 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
533 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
534 (int64_t)overall_time
);
535 min
= get_hot_bb_threshold ();
536 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
539 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
540 cumulated_size
+= histogram
[i
]->size
;
542 fprintf (dump_file
, "GCOV min count: %" PRId64
543 " Time:%3.2f%% Size:%3.2f%%\n",
545 cumulated_time
* 100.0 / overall_time
,
546 cumulated_size
* 100.0 / overall_size
);
548 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
550 for (i
= 0; cumulated
< cutoff
; i
++)
552 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
553 threshold
= histogram
[i
]->count
;
559 gcov_type cumulated_time
= 0, cumulated_size
= 0;
562 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
565 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
566 cumulated_size
+= histogram
[i
]->size
;
568 fprintf (dump_file
, "Determined min count: %" PRId64
569 " Time:%3.2f%% Size:%3.2f%%\n",
571 cumulated_time
* 100.0 / overall_time
,
572 cumulated_size
* 100.0 / overall_size
);
574 if (threshold
> get_hot_bb_threshold ()
578 fprintf (dump_file
, "Threshold updated.\n");
579 set_hot_bb_threshold (threshold
);
582 histogram
.release ();
583 histogram_pool
.release ();
585 /* Produce speculative calls: we saved common traget from porfiling into
586 e->common_target_id. Now, at link time, we can look up corresponding
587 function node and produce speculative call. */
589 FOR_EACH_DEFINED_FUNCTION (n
)
593 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
596 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
600 if (e
->indirect_info
->common_target_id
)
602 if (!node_map_initialized
)
603 init_node_map (false);
604 node_map_initialized
= true;
606 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
611 fprintf (dump_file
, "Indirect call -> direct call from"
612 " other module %s/%i => %s/%i, prob %3.2f\n",
613 xstrdup_for_dump (n
->name ()), n
->order
,
614 xstrdup_for_dump (n2
->name ()), n2
->order
,
615 e
->indirect_info
->common_target_probability
616 / (float)REG_BR_PROB_BASE
);
618 if (e
->indirect_info
->common_target_probability
619 < REG_BR_PROB_BASE
/ 2)
624 "Not speculating: probability is too low.\n");
626 else if (!e
->maybe_hot_p ())
631 "Not speculating: call is cold.\n");
633 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
634 && n2
->can_be_discarded_p ())
639 "Not speculating: target is overwritable "
640 "and can be discarded.\n");
642 else if (ipa_node_params_sum
&& ipa_edge_args_vector
643 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
644 && ipa_get_param_count (IPA_NODE_REF (n2
))
645 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
646 && (ipa_get_param_count (IPA_NODE_REF (n2
))
647 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
648 || !stdarg_p (TREE_TYPE (n2
->decl
))))
654 "parameter count mistmatch\n");
656 else if (e
->indirect_info
->polymorphic
657 && !opt_for_fn (n
->decl
, flag_devirtualize
)
658 && !possible_polymorphic_call_target_p (e
, n2
))
664 "function is not in the polymorphic "
665 "call target list\n");
669 /* Target may be overwritable, but profile says that
670 control flow goes to this particular implementation
671 of N2. Speculate on the local alias to allow inlining.
673 if (!n2
->can_be_discarded_p ())
676 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
683 apply_scale (e
->count
,
684 e
->indirect_info
->common_target_probability
),
685 apply_scale (e
->frequency
,
686 e
->indirect_info
->common_target_probability
));
693 fprintf (dump_file
, "Function with profile-id %i not found.\n",
694 e
->indirect_info
->common_target_id
);
700 inline_update_overall_summary (n
);
702 if (node_map_initialized
)
704 if (dump_file
&& nindirect
)
706 "%i indirect calls trained.\n"
707 "%i (%3.2f%%) have common target.\n"
708 "%i (%3.2f%%) targets was not found.\n"
709 "%i (%3.2f%%) targets had parameter count mismatch.\n"
710 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
711 "%i (%3.2f%%) speculations seems useless.\n"
712 "%i (%3.2f%%) speculations produced.\n",
714 ncommon
, ncommon
* 100.0 / nindirect
,
715 nunknown
, nunknown
* 100.0 / nindirect
,
716 nmismatch
, nmismatch
* 100.0 / nindirect
,
717 nimpossible
, nimpossible
* 100.0 / nindirect
,
718 nuseless
, nuseless
* 100.0 / nindirect
,
719 nconverted
, nconverted
* 100.0 / nindirect
);
721 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
722 order_pos
= ipa_reverse_postorder (order
);
723 for (i
= order_pos
- 1; i
>= 0; i
--)
725 if (order
[i
]->local
.local
726 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
727 && ipa_propagate_frequency (order
[i
]))
729 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
730 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
732 something_changed
= true;
733 e
->callee
->aux
= (void *)1;
736 order
[i
]->aux
= NULL
;
739 while (something_changed
)
741 something_changed
= false;
742 for (i
= order_pos
- 1; i
>= 0; i
--)
745 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
746 && ipa_propagate_frequency (order
[i
]))
748 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
749 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
751 something_changed
= true;
752 e
->callee
->aux
= (void *)1;
755 order
[i
]->aux
= NULL
;
764 const pass_data pass_data_ipa_profile
=
767 "profile_estimate", /* name */
768 OPTGROUP_NONE
, /* optinfo_flags */
769 TV_IPA_PROFILE
, /* tv_id */
770 0, /* properties_required */
771 0, /* properties_provided */
772 0, /* properties_destroyed */
773 0, /* todo_flags_start */
774 0, /* todo_flags_finish */
777 class pass_ipa_profile
: public ipa_opt_pass_d
780 pass_ipa_profile (gcc::context
*ctxt
)
781 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
782 ipa_profile_generate_summary
, /* generate_summary */
783 ipa_profile_write_summary
, /* write_summary */
784 ipa_profile_read_summary
, /* read_summary */
785 NULL
, /* write_optimization_summary */
786 NULL
, /* read_optimization_summary */
787 NULL
, /* stmt_fixup */
788 0, /* function_transform_todo_flags_start */
789 NULL
, /* function_transform */
790 NULL
) /* variable_transform */
793 /* opt_pass methods: */
794 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
795 virtual unsigned int execute (function
*) { return ipa_profile (); }
797 }; // class pass_ipa_profile
802 make_pass_ipa_profile (gcc::context
*ctxt
)
804 return new pass_ipa_profile (ctxt
);