1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "double-int.h"
61 #include "fold-const.h"
63 #include "dominance.h"
65 #include "basic-block.h"
68 #include "plugin-api.h"
69 #include "hard-reg-set.h"
74 #include "tree-pass.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "gimple-expr.h"
79 #include "gimple-iterator.h"
82 #include "tree-iterator.h"
83 #include "ipa-utils.h"
86 #include "value-prof.h"
87 #include "alloc-pool.h"
88 #include "tree-inline.h"
89 #include "lto-streamer.h"
90 #include "data-streamer.h"
91 #include "symbol-summary.h"
93 #include "ipa-inline.h"
95 /* Entry in the histogram. */
97 struct histogram_entry
104 /* Histogram of profile values.
105 The histogram is represented as an ordered vector of entries allocated via
106 histogram_pool. During construction a separate hashtable is kept to lookup
107 duplicate entries. */
109 vec
<histogram_entry
*> histogram
;
110 static alloc_pool histogram_pool
;
112 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
114 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
116 typedef histogram_entry value_type
;
117 typedef histogram_entry compare_type
;
118 static inline hashval_t
hash (const value_type
*);
119 static inline int equal (const value_type
*, const compare_type
*);
123 histogram_hash::hash (const histogram_entry
*val
)
129 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
131 return val
->count
== val2
->count
;
134 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
135 HASHTABLE is the on-side hash kept to avoid duplicates. */
138 account_time_size (hash_table
<histogram_hash
> *hashtable
,
139 vec
<histogram_entry
*> &histogram
,
140 gcov_type count
, int time
, int size
)
142 histogram_entry key
= {count
, 0, 0};
143 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
147 *val
= (histogram_entry
*) pool_alloc (histogram_pool
);
149 histogram
.safe_push (*val
);
151 (*val
)->time
+= time
;
152 (*val
)->size
+= size
;
156 cmp_counts (const void *v1
, const void *v2
)
158 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
159 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
160 if (h1
->count
< h2
->count
)
162 if (h1
->count
> h2
->count
)
167 /* Dump HISTOGRAM to FILE. */
170 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
173 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
175 fprintf (dump_file
, "Histogram:\n");
176 for (i
= 0; i
< histogram
.length (); i
++)
178 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
179 overall_size
+= histogram
[i
]->size
;
185 for (i
= 0; i
< histogram
.length (); i
++)
187 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
188 cumulated_size
+= histogram
[i
]->size
;
189 fprintf (file
, " %"PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
190 (int64_t) histogram
[i
]->count
,
192 cumulated_time
* 100.0 / overall_time
,
194 cumulated_size
* 100.0 / overall_size
);
198 /* Collect histogram from CFG profiles. */
201 ipa_profile_generate_summary (void)
203 struct cgraph_node
*node
;
204 gimple_stmt_iterator gsi
;
207 hash_table
<histogram_hash
> hashtable (10);
208 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
211 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
212 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
216 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
218 gimple stmt
= gsi_stmt (gsi
);
219 if (gimple_code (stmt
) == GIMPLE_CALL
220 && !gimple_call_fndecl (stmt
))
223 h
= gimple_histogram_value_of_type
224 (DECL_STRUCT_FUNCTION (node
->decl
),
225 stmt
, HIST_TYPE_INDIR_CALL
);
226 /* No need to do sanity check: gimple_ic_transform already
227 takes away bad histograms. */
230 /* counter 0 is target, counter 1 is number of execution we called target,
231 counter 2 is total number of executions. */
232 if (h
->hvalue
.counters
[2])
234 struct cgraph_edge
* e
= node
->get_edge (stmt
);
235 if (e
&& !e
->indirect_unknown_callee
)
237 e
->indirect_info
->common_target_id
238 = h
->hvalue
.counters
[0];
239 e
->indirect_info
->common_target_probability
240 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
241 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
244 fprintf (dump_file
, "Probability capped to 1\n");
245 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
248 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
252 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
253 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
255 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
257 histogram
.qsort (cmp_counts
);
260 /* Serialize the ipa info for lto. */
263 ipa_profile_write_summary (void)
265 struct lto_simple_output_block
*ob
266 = lto_create_simple_output_block (LTO_section_ipa_profile
);
269 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
270 for (i
= 0; i
< histogram
.length (); i
++)
272 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
273 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
274 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
276 lto_destroy_simple_output_block (ob
);
279 /* Deserialize the ipa info for lto. */
282 ipa_profile_read_summary (void)
284 struct lto_file_decl_data
** file_data_vec
285 = lto_get_file_decl_data ();
286 struct lto_file_decl_data
* file_data
;
289 hash_table
<histogram_hash
> hashtable (10);
290 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
293 while ((file_data
= file_data_vec
[j
++]))
297 struct lto_input_block
*ib
298 = lto_create_simple_input_block (file_data
,
299 LTO_section_ipa_profile
,
303 unsigned int num
= streamer_read_uhwi (ib
);
305 for (n
= 0; n
< num
; n
++)
307 gcov_type count
= streamer_read_gcov_count (ib
);
308 int time
= streamer_read_uhwi (ib
);
309 int size
= streamer_read_uhwi (ib
);
310 account_time_size (&hashtable
, histogram
,
313 lto_destroy_simple_input_block (file_data
,
314 LTO_section_ipa_profile
,
318 histogram
.qsort (cmp_counts
);
321 /* Data used by ipa_propagate_frequency. */
323 struct ipa_propagate_frequency_data
325 bool maybe_unlikely_executed
;
326 bool maybe_executed_once
;
327 bool only_called_at_startup
;
328 bool only_called_at_exit
;
331 /* Worker for ipa_propagate_frequency_1. */
334 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
336 struct ipa_propagate_frequency_data
*d
;
337 struct cgraph_edge
*edge
;
339 d
= (struct ipa_propagate_frequency_data
*)data
;
340 for (edge
= node
->callers
;
341 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
342 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
343 edge
= edge
->next_caller
)
345 if (edge
->caller
!= node
)
347 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
348 /* It makes sense to put main() together with the static constructors.
349 It will be executed for sure, but rest of functions called from
350 main are definitely not at startup only. */
351 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
352 d
->only_called_at_startup
= 0;
353 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
356 /* When profile feedback is available, do not try to propagate too hard;
357 counts are already good guide on function frequencies and roundoff
358 errors can make us to push function into unlikely section even when
359 it is executed by the train run. Transfer the function only if all
360 callers are unlikely executed. */
361 if (profile_info
&& flag_branch_probabilities
362 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
363 || (edge
->caller
->global
.inlined_to
364 && edge
->caller
->global
.inlined_to
->frequency
365 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
366 d
->maybe_unlikely_executed
= false;
367 if (!edge
->frequency
)
369 switch (edge
->caller
->frequency
)
371 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
373 case NODE_FREQUENCY_EXECUTED_ONCE
:
374 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
375 fprintf (dump_file
, " Called by %s that is executed once\n",
376 edge
->caller
->name ());
377 d
->maybe_unlikely_executed
= false;
378 if (inline_edge_summary (edge
)->loop_depth
)
380 d
->maybe_executed_once
= false;
381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
382 fprintf (dump_file
, " Called in loop\n");
385 case NODE_FREQUENCY_HOT
:
386 case NODE_FREQUENCY_NORMAL
:
387 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
388 fprintf (dump_file
, " Called by %s that is normal or hot\n",
389 edge
->caller
->name ());
390 d
->maybe_unlikely_executed
= false;
391 d
->maybe_executed_once
= false;
398 /* Return ture if NODE contains hot calls. */
401 contains_hot_call_p (struct cgraph_node
*node
)
403 struct cgraph_edge
*e
;
404 for (e
= node
->callees
; e
; e
= e
->next_callee
)
405 if (e
->maybe_hot_p ())
407 else if (!e
->inline_failed
408 && contains_hot_call_p (e
->callee
))
410 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
411 if (e
->maybe_hot_p ())
416 /* See if the frequency of NODE can be updated based on frequencies of its
419 ipa_propagate_frequency (struct cgraph_node
*node
)
421 struct ipa_propagate_frequency_data d
= {true, true, true, true};
422 bool changed
= false;
424 /* We can not propagate anything useful about externally visible functions
425 nor about virtuals. */
426 if (!node
->local
.local
428 || (opt_for_fn (node
->decl
, flag_devirtualize
)
429 && DECL_VIRTUAL_P (node
->decl
)))
431 gcc_assert (node
->analyzed
);
432 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
433 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
435 node
->call_for_symbol_thunks_and_aliases (ipa_propagate_frequency_1
, &d
,
438 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
439 && !node
->only_called_at_startup
)
441 node
->only_called_at_startup
= true;
443 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
447 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
448 && !node
->only_called_at_exit
)
450 node
->only_called_at_exit
= true;
452 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
457 /* With profile we can decide on hot/normal based on count. */
461 if (node
->count
>= get_hot_bb_threshold ())
464 hot
|= contains_hot_call_p (node
);
467 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
470 fprintf (dump_file
, "Node %s promoted to hot.\n",
472 node
->frequency
= NODE_FREQUENCY_HOT
;
477 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
480 fprintf (dump_file
, "Node %s reduced to normal.\n",
482 node
->frequency
= NODE_FREQUENCY_NORMAL
;
486 /* These come either from profile or user hints; never update them. */
487 if (node
->frequency
== NODE_FREQUENCY_HOT
488 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
490 if (d
.maybe_unlikely_executed
)
492 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
494 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
498 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
500 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
502 fprintf (dump_file
, "Node %s promoted to executed once.\n",
509 /* Simple ipa profile pass propagating frequencies across the callgraph. */
514 struct cgraph_node
**order
;
515 struct cgraph_edge
*e
;
517 bool something_changed
= false;
519 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
520 struct cgraph_node
*n
,*n2
;
521 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
522 bool node_map_initialized
= false;
525 dump_histogram (dump_file
, histogram
);
526 for (i
= 0; i
< (int)histogram
.length (); i
++)
528 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
529 overall_size
+= histogram
[i
]->size
;
535 gcc_assert (overall_size
);
538 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
540 fprintf (dump_file
, "Overall time: %"PRId64
"\n",
541 (int64_t)overall_time
);
542 min
= get_hot_bb_threshold ();
543 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
546 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
547 cumulated_size
+= histogram
[i
]->size
;
549 fprintf (dump_file
, "GCOV min count: %"PRId64
550 " Time:%3.2f%% Size:%3.2f%%\n",
552 cumulated_time
* 100.0 / overall_time
,
553 cumulated_size
* 100.0 / overall_size
);
555 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
557 for (i
= 0; cumulated
< cutoff
; i
++)
559 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
560 threshold
= histogram
[i
]->count
;
566 gcov_type cumulated_time
= 0, cumulated_size
= 0;
569 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
572 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
573 cumulated_size
+= histogram
[i
]->size
;
575 fprintf (dump_file
, "Determined min count: %"PRId64
576 " Time:%3.2f%% Size:%3.2f%%\n",
578 cumulated_time
* 100.0 / overall_time
,
579 cumulated_size
* 100.0 / overall_size
);
581 if (threshold
> get_hot_bb_threshold ()
585 fprintf (dump_file
, "Threshold updated.\n");
586 set_hot_bb_threshold (threshold
);
589 histogram
.release ();
590 free_alloc_pool (histogram_pool
);
592 /* Produce speculative calls: we saved common traget from porfiling into
593 e->common_target_id. Now, at link time, we can look up corresponding
594 function node and produce speculative call. */
596 FOR_EACH_DEFINED_FUNCTION (n
)
600 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
604 if (e
->indirect_info
->common_target_id
)
606 if (!node_map_initialized
)
607 init_node_map (false);
608 node_map_initialized
= true;
610 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
615 fprintf (dump_file
, "Indirect call -> direct call from"
616 " other module %s/%i => %s/%i, prob %3.2f\n",
617 xstrdup_for_dump (n
->name ()), n
->order
,
618 xstrdup_for_dump (n2
->name ()), n2
->order
,
619 e
->indirect_info
->common_target_probability
620 / (float)REG_BR_PROB_BASE
);
622 if (e
->indirect_info
->common_target_probability
623 < REG_BR_PROB_BASE
/ 2)
628 "Not speculating: probability is too low.\n");
630 else if (!e
->maybe_hot_p ())
635 "Not speculating: call is cold.\n");
637 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
638 && n2
->can_be_discarded_p ())
643 "Not speculating: target is overwritable "
644 "and can be discarded.\n");
648 /* Target may be overwritable, but profile says that
649 control flow goes to this particular implementation
650 of N2. Speculate on the local alias to allow inlining.
652 if (!n2
->can_be_discarded_p ())
655 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
662 apply_scale (e
->count
,
663 e
->indirect_info
->common_target_probability
),
664 apply_scale (e
->frequency
,
665 e
->indirect_info
->common_target_probability
));
672 fprintf (dump_file
, "Function with profile-id %i not found.\n",
673 e
->indirect_info
->common_target_id
);
679 inline_update_overall_summary (n
);
681 if (node_map_initialized
)
683 if (dump_file
&& nindirect
)
685 "%i indirect calls trained.\n"
686 "%i (%3.2f%%) have common target.\n"
687 "%i (%3.2f%%) targets was not found.\n"
688 "%i (%3.2f%%) speculations seems useless.\n"
689 "%i (%3.2f%%) speculations produced.\n",
691 ncommon
, ncommon
* 100.0 / nindirect
,
692 nunknown
, nunknown
* 100.0 / nindirect
,
693 nuseless
, nuseless
* 100.0 / nindirect
,
694 nconverted
, nconverted
* 100.0 / nindirect
);
696 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
697 order_pos
= ipa_reverse_postorder (order
);
698 for (i
= order_pos
- 1; i
>= 0; i
--)
700 if (order
[i
]->local
.local
&& ipa_propagate_frequency (order
[i
]))
702 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
703 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
705 something_changed
= true;
706 e
->callee
->aux
= (void *)1;
709 order
[i
]->aux
= NULL
;
712 while (something_changed
)
714 something_changed
= false;
715 for (i
= order_pos
- 1; i
>= 0; i
--)
717 if (order
[i
]->aux
&& ipa_propagate_frequency (order
[i
]))
719 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
720 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
722 something_changed
= true;
723 e
->callee
->aux
= (void *)1;
726 order
[i
]->aux
= NULL
;
735 const pass_data pass_data_ipa_profile
=
738 "profile_estimate", /* name */
739 OPTGROUP_NONE
, /* optinfo_flags */
740 TV_IPA_PROFILE
, /* tv_id */
741 0, /* properties_required */
742 0, /* properties_provided */
743 0, /* properties_destroyed */
744 0, /* todo_flags_start */
745 0, /* todo_flags_finish */
748 class pass_ipa_profile
: public ipa_opt_pass_d
751 pass_ipa_profile (gcc::context
*ctxt
)
752 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
753 ipa_profile_generate_summary
, /* generate_summary */
754 ipa_profile_write_summary
, /* write_summary */
755 ipa_profile_read_summary
, /* read_summary */
756 NULL
, /* write_optimization_summary */
757 NULL
, /* read_optimization_summary */
758 NULL
, /* stmt_fixup */
759 0, /* function_transform_todo_flags_start */
760 NULL
, /* function_transform */
761 NULL
) /* variable_transform */
764 /* opt_pass methods: */
765 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
766 virtual unsigned int execute (function
*) { return ipa_profile (); }
768 }; // class pass_ipa_profile
773 make_pass_ipa_profile (gcc::context
*ctxt
)
775 return new pass_ipa_profile (ctxt
);