1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "fold-const.h"
56 #include "dominance.h"
58 #include "basic-block.h"
59 #include "plugin-api.h"
60 #include "hard-reg-set.h"
64 #include "tree-pass.h"
65 #include "tree-ssa-alias.h"
66 #include "internal-fn.h"
67 #include "gimple-expr.h"
69 #include "gimple-iterator.h"
72 #include "tree-iterator.h"
73 #include "ipa-utils.h"
76 #include "value-prof.h"
77 #include "alloc-pool.h"
78 #include "tree-inline.h"
79 #include "lto-streamer.h"
80 #include "data-streamer.h"
81 #include "symbol-summary.h"
83 #include "ipa-inline.h"
85 /* Entry in the histogram. */
87 struct histogram_entry
94 /* Histogram of profile values.
95 The histogram is represented as an ordered vector of entries allocated via
96 histogram_pool. During construction a separate hashtable is kept to lookup
99 vec
<histogram_entry
*> histogram
;
100 static pool_allocator
<histogram_entry
> histogram_pool
101 ("IPA histogram", 10);
103 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
105 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
107 static inline hashval_t
hash (const histogram_entry
*);
108 static inline int equal (const histogram_entry
*, const histogram_entry
*);
112 histogram_hash::hash (const histogram_entry
*val
)
118 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
120 return val
->count
== val2
->count
;
123 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
124 HASHTABLE is the on-side hash kept to avoid duplicates. */
127 account_time_size (hash_table
<histogram_hash
> *hashtable
,
128 vec
<histogram_entry
*> &histogram
,
129 gcov_type count
, int time
, int size
)
131 histogram_entry key
= {count
, 0, 0};
132 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
136 *val
= histogram_pool
.allocate ();
138 histogram
.safe_push (*val
);
140 (*val
)->time
+= time
;
141 (*val
)->size
+= size
;
145 cmp_counts (const void *v1
, const void *v2
)
147 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
148 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
149 if (h1
->count
< h2
->count
)
151 if (h1
->count
> h2
->count
)
156 /* Dump HISTOGRAM to FILE. */
159 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
162 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
164 fprintf (dump_file
, "Histogram:\n");
165 for (i
= 0; i
< histogram
.length (); i
++)
167 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
168 overall_size
+= histogram
[i
]->size
;
174 for (i
= 0; i
< histogram
.length (); i
++)
176 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
177 cumulated_size
+= histogram
[i
]->size
;
178 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
179 (int64_t) histogram
[i
]->count
,
181 cumulated_time
* 100.0 / overall_time
,
183 cumulated_size
* 100.0 / overall_size
);
187 /* Collect histogram from CFG profiles. */
190 ipa_profile_generate_summary (void)
192 struct cgraph_node
*node
;
193 gimple_stmt_iterator gsi
;
196 hash_table
<histogram_hash
> hashtable (10);
198 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
199 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
203 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
205 gimple stmt
= gsi_stmt (gsi
);
206 if (gimple_code (stmt
) == GIMPLE_CALL
207 && !gimple_call_fndecl (stmt
))
210 h
= gimple_histogram_value_of_type
211 (DECL_STRUCT_FUNCTION (node
->decl
),
212 stmt
, HIST_TYPE_INDIR_CALL
);
213 /* No need to do sanity check: gimple_ic_transform already
214 takes away bad histograms. */
217 /* counter 0 is target, counter 1 is number of execution we called target,
218 counter 2 is total number of executions. */
219 if (h
->hvalue
.counters
[2])
221 struct cgraph_edge
* e
= node
->get_edge (stmt
);
222 if (e
&& !e
->indirect_unknown_callee
)
224 e
->indirect_info
->common_target_id
225 = h
->hvalue
.counters
[0];
226 e
->indirect_info
->common_target_probability
227 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
228 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
231 fprintf (dump_file
, "Probability capped to 1\n");
232 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
235 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
239 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
240 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
242 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
244 histogram
.qsort (cmp_counts
);
247 /* Serialize the ipa info for lto. */
250 ipa_profile_write_summary (void)
252 struct lto_simple_output_block
*ob
253 = lto_create_simple_output_block (LTO_section_ipa_profile
);
256 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
257 for (i
= 0; i
< histogram
.length (); i
++)
259 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
260 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
261 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
263 lto_destroy_simple_output_block (ob
);
266 /* Deserialize the ipa info for lto. */
269 ipa_profile_read_summary (void)
271 struct lto_file_decl_data
** file_data_vec
272 = lto_get_file_decl_data ();
273 struct lto_file_decl_data
* file_data
;
276 hash_table
<histogram_hash
> hashtable (10);
278 while ((file_data
= file_data_vec
[j
++]))
282 struct lto_input_block
*ib
283 = lto_create_simple_input_block (file_data
,
284 LTO_section_ipa_profile
,
288 unsigned int num
= streamer_read_uhwi (ib
);
290 for (n
= 0; n
< num
; n
++)
292 gcov_type count
= streamer_read_gcov_count (ib
);
293 int time
= streamer_read_uhwi (ib
);
294 int size
= streamer_read_uhwi (ib
);
295 account_time_size (&hashtable
, histogram
,
298 lto_destroy_simple_input_block (file_data
,
299 LTO_section_ipa_profile
,
303 histogram
.qsort (cmp_counts
);
306 /* Data used by ipa_propagate_frequency. */
308 struct ipa_propagate_frequency_data
310 cgraph_node
*function_symbol
;
311 bool maybe_unlikely_executed
;
312 bool maybe_executed_once
;
313 bool only_called_at_startup
;
314 bool only_called_at_exit
;
317 /* Worker for ipa_propagate_frequency_1. */
320 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
322 struct ipa_propagate_frequency_data
*d
;
323 struct cgraph_edge
*edge
;
325 d
= (struct ipa_propagate_frequency_data
*)data
;
326 for (edge
= node
->callers
;
327 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
328 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
329 edge
= edge
->next_caller
)
331 if (edge
->caller
!= d
->function_symbol
)
333 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
334 /* It makes sense to put main() together with the static constructors.
335 It will be executed for sure, but rest of functions called from
336 main are definitely not at startup only. */
337 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
338 d
->only_called_at_startup
= 0;
339 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
342 /* When profile feedback is available, do not try to propagate too hard;
343 counts are already good guide on function frequencies and roundoff
344 errors can make us to push function into unlikely section even when
345 it is executed by the train run. Transfer the function only if all
346 callers are unlikely executed. */
348 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
349 /* Thunks are not profiled. This is more or less implementation
351 && !d
->function_symbol
->thunk
.thunk_p
352 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
353 || (edge
->caller
->global
.inlined_to
354 && edge
->caller
->global
.inlined_to
->frequency
355 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
356 d
->maybe_unlikely_executed
= false;
357 if (!edge
->frequency
)
359 switch (edge
->caller
->frequency
)
361 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
363 case NODE_FREQUENCY_EXECUTED_ONCE
:
364 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
365 fprintf (dump_file
, " Called by %s that is executed once\n",
366 edge
->caller
->name ());
367 d
->maybe_unlikely_executed
= false;
368 if (inline_edge_summary (edge
)->loop_depth
)
370 d
->maybe_executed_once
= false;
371 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
372 fprintf (dump_file
, " Called in loop\n");
375 case NODE_FREQUENCY_HOT
:
376 case NODE_FREQUENCY_NORMAL
:
377 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
378 fprintf (dump_file
, " Called by %s that is normal or hot\n",
379 edge
->caller
->name ());
380 d
->maybe_unlikely_executed
= false;
381 d
->maybe_executed_once
= false;
388 /* Return ture if NODE contains hot calls. */
391 contains_hot_call_p (struct cgraph_node
*node
)
393 struct cgraph_edge
*e
;
394 for (e
= node
->callees
; e
; e
= e
->next_callee
)
395 if (e
->maybe_hot_p ())
397 else if (!e
->inline_failed
398 && contains_hot_call_p (e
->callee
))
400 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
401 if (e
->maybe_hot_p ())
406 /* See if the frequency of NODE can be updated based on frequencies of its
409 ipa_propagate_frequency (struct cgraph_node
*node
)
411 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
412 bool changed
= false;
414 /* We can not propagate anything useful about externally visible functions
415 nor about virtuals. */
416 if (!node
->local
.local
418 || (opt_for_fn (node
->decl
, flag_devirtualize
)
419 && DECL_VIRTUAL_P (node
->decl
)))
421 gcc_assert (node
->analyzed
);
422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
423 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
425 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
428 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
429 && !node
->only_called_at_startup
)
431 node
->only_called_at_startup
= true;
433 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
437 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
438 && !node
->only_called_at_exit
)
440 node
->only_called_at_exit
= true;
442 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
447 /* With profile we can decide on hot/normal based on count. */
451 if (node
->count
>= get_hot_bb_threshold ())
454 hot
|= contains_hot_call_p (node
);
457 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
460 fprintf (dump_file
, "Node %s promoted to hot.\n",
462 node
->frequency
= NODE_FREQUENCY_HOT
;
467 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
470 fprintf (dump_file
, "Node %s reduced to normal.\n",
472 node
->frequency
= NODE_FREQUENCY_NORMAL
;
476 /* These come either from profile or user hints; never update them. */
477 if (node
->frequency
== NODE_FREQUENCY_HOT
478 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
480 if (d
.maybe_unlikely_executed
)
482 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
484 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
488 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
490 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
492 fprintf (dump_file
, "Node %s promoted to executed once.\n",
499 /* Simple ipa profile pass propagating frequencies across the callgraph. */
504 struct cgraph_node
**order
;
505 struct cgraph_edge
*e
;
507 bool something_changed
= false;
509 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
510 struct cgraph_node
*n
,*n2
;
511 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
512 int nmismatch
= 0, nimpossible
= 0;
513 bool node_map_initialized
= false;
516 dump_histogram (dump_file
, histogram
);
517 for (i
= 0; i
< (int)histogram
.length (); i
++)
519 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
520 overall_size
+= histogram
[i
]->size
;
526 gcc_assert (overall_size
);
529 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
531 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
532 (int64_t)overall_time
);
533 min
= get_hot_bb_threshold ();
534 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
537 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
538 cumulated_size
+= histogram
[i
]->size
;
540 fprintf (dump_file
, "GCOV min count: %" PRId64
541 " Time:%3.2f%% Size:%3.2f%%\n",
543 cumulated_time
* 100.0 / overall_time
,
544 cumulated_size
* 100.0 / overall_size
);
546 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
548 for (i
= 0; cumulated
< cutoff
; i
++)
550 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
551 threshold
= histogram
[i
]->count
;
557 gcov_type cumulated_time
= 0, cumulated_size
= 0;
560 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
563 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
564 cumulated_size
+= histogram
[i
]->size
;
566 fprintf (dump_file
, "Determined min count: %" PRId64
567 " Time:%3.2f%% Size:%3.2f%%\n",
569 cumulated_time
* 100.0 / overall_time
,
570 cumulated_size
* 100.0 / overall_size
);
572 if (threshold
> get_hot_bb_threshold ()
576 fprintf (dump_file
, "Threshold updated.\n");
577 set_hot_bb_threshold (threshold
);
580 histogram
.release ();
581 histogram_pool
.release ();
583 /* Produce speculative calls: we saved common traget from porfiling into
584 e->common_target_id. Now, at link time, we can look up corresponding
585 function node and produce speculative call. */
587 FOR_EACH_DEFINED_FUNCTION (n
)
591 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
594 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
598 if (e
->indirect_info
->common_target_id
)
600 if (!node_map_initialized
)
601 init_node_map (false);
602 node_map_initialized
= true;
604 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
609 fprintf (dump_file
, "Indirect call -> direct call from"
610 " other module %s/%i => %s/%i, prob %3.2f\n",
611 xstrdup_for_dump (n
->name ()), n
->order
,
612 xstrdup_for_dump (n2
->name ()), n2
->order
,
613 e
->indirect_info
->common_target_probability
614 / (float)REG_BR_PROB_BASE
);
616 if (e
->indirect_info
->common_target_probability
617 < REG_BR_PROB_BASE
/ 2)
622 "Not speculating: probability is too low.\n");
624 else if (!e
->maybe_hot_p ())
629 "Not speculating: call is cold.\n");
631 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
632 && n2
->can_be_discarded_p ())
637 "Not speculating: target is overwritable "
638 "and can be discarded.\n");
640 else if (ipa_node_params_sum
&& ipa_edge_args_vector
641 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
642 && ipa_get_param_count (IPA_NODE_REF (n2
))
643 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
644 && (ipa_get_param_count (IPA_NODE_REF (n2
))
645 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
646 || !stdarg_p (TREE_TYPE (n2
->decl
))))
652 "parameter count mistmatch\n");
654 else if (e
->indirect_info
->polymorphic
655 && !opt_for_fn (n
->decl
, flag_devirtualize
)
656 && !possible_polymorphic_call_target_p (e
, n2
))
662 "function is not in the polymorphic "
663 "call target list\n");
667 /* Target may be overwritable, but profile says that
668 control flow goes to this particular implementation
669 of N2. Speculate on the local alias to allow inlining.
671 if (!n2
->can_be_discarded_p ())
674 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
681 apply_scale (e
->count
,
682 e
->indirect_info
->common_target_probability
),
683 apply_scale (e
->frequency
,
684 e
->indirect_info
->common_target_probability
));
691 fprintf (dump_file
, "Function with profile-id %i not found.\n",
692 e
->indirect_info
->common_target_id
);
698 inline_update_overall_summary (n
);
700 if (node_map_initialized
)
702 if (dump_file
&& nindirect
)
704 "%i indirect calls trained.\n"
705 "%i (%3.2f%%) have common target.\n"
706 "%i (%3.2f%%) targets was not found.\n"
707 "%i (%3.2f%%) targets had parameter count mismatch.\n"
708 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
709 "%i (%3.2f%%) speculations seems useless.\n"
710 "%i (%3.2f%%) speculations produced.\n",
712 ncommon
, ncommon
* 100.0 / nindirect
,
713 nunknown
, nunknown
* 100.0 / nindirect
,
714 nmismatch
, nmismatch
* 100.0 / nindirect
,
715 nimpossible
, nimpossible
* 100.0 / nindirect
,
716 nuseless
, nuseless
* 100.0 / nindirect
,
717 nconverted
, nconverted
* 100.0 / nindirect
);
719 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
720 order_pos
= ipa_reverse_postorder (order
);
721 for (i
= order_pos
- 1; i
>= 0; i
--)
723 if (order
[i
]->local
.local
724 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
725 && ipa_propagate_frequency (order
[i
]))
727 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
728 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
730 something_changed
= true;
731 e
->callee
->aux
= (void *)1;
734 order
[i
]->aux
= NULL
;
737 while (something_changed
)
739 something_changed
= false;
740 for (i
= order_pos
- 1; i
>= 0; i
--)
743 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
744 && ipa_propagate_frequency (order
[i
]))
746 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
747 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
749 something_changed
= true;
750 e
->callee
->aux
= (void *)1;
753 order
[i
]->aux
= NULL
;
762 const pass_data pass_data_ipa_profile
=
765 "profile_estimate", /* name */
766 OPTGROUP_NONE
, /* optinfo_flags */
767 TV_IPA_PROFILE
, /* tv_id */
768 0, /* properties_required */
769 0, /* properties_provided */
770 0, /* properties_destroyed */
771 0, /* todo_flags_start */
772 0, /* todo_flags_finish */
775 class pass_ipa_profile
: public ipa_opt_pass_d
778 pass_ipa_profile (gcc::context
*ctxt
)
779 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
780 ipa_profile_generate_summary
, /* generate_summary */
781 ipa_profile_write_summary
, /* write_summary */
782 ipa_profile_read_summary
, /* read_summary */
783 NULL
, /* write_optimization_summary */
784 NULL
, /* read_optimization_summary */
785 NULL
, /* stmt_fixup */
786 0, /* function_transform_todo_flags_start */
787 NULL
, /* function_transform */
788 NULL
) /* variable_transform */
791 /* opt_pass methods: */
792 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
793 virtual unsigned int execute (function
*) { return ipa_profile (); }
795 }; // class pass_ipa_profile
800 make_pass_ipa_profile (gcc::context
*ctxt
)
802 return new pass_ipa_profile (ctxt
);