1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
53 #include "dominance.h"
55 #include "basic-block.h"
58 #include "plugin-api.h"
63 #include "hard-reg-set.h"
68 #include "tree-pass.h"
69 #include "tree-ssa-alias.h"
70 #include "internal-fn.h"
71 #include "gimple-expr.h"
73 #include "gimple-iterator.h"
76 #include "tree-iterator.h"
77 #include "ipa-utils.h"
80 #include "value-prof.h"
81 #include "alloc-pool.h"
82 #include "tree-inline.h"
83 #include "lto-streamer.h"
84 #include "data-streamer.h"
85 #include "symbol-summary.h"
87 #include "ipa-inline.h"
89 /* Entry in the histogram. */
91 struct histogram_entry
98 /* Histogram of profile values.
99 The histogram is represented as an ordered vector of entries allocated via
100 histogram_pool. During construction a separate hashtable is kept to lookup
101 duplicate entries. */
103 vec
<histogram_entry
*> histogram
;
104 static alloc_pool histogram_pool
;
106 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
108 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
110 typedef histogram_entry value_type
;
111 typedef histogram_entry compare_type
;
112 static inline hashval_t
hash (const value_type
*);
113 static inline int equal (const value_type
*, const compare_type
*);
117 histogram_hash::hash (const histogram_entry
*val
)
123 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
125 return val
->count
== val2
->count
;
128 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
129 HASHTABLE is the on-side hash kept to avoid duplicates. */
132 account_time_size (hash_table
<histogram_hash
> *hashtable
,
133 vec
<histogram_entry
*> &histogram
,
134 gcov_type count
, int time
, int size
)
136 histogram_entry key
= {count
, 0, 0};
137 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
141 *val
= (histogram_entry
*) pool_alloc (histogram_pool
);
143 histogram
.safe_push (*val
);
145 (*val
)->time
+= time
;
146 (*val
)->size
+= size
;
150 cmp_counts (const void *v1
, const void *v2
)
152 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
153 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
154 if (h1
->count
< h2
->count
)
156 if (h1
->count
> h2
->count
)
161 /* Dump HISTOGRAM to FILE. */
164 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
167 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
169 fprintf (dump_file
, "Histogram:\n");
170 for (i
= 0; i
< histogram
.length (); i
++)
172 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
173 overall_size
+= histogram
[i
]->size
;
179 for (i
= 0; i
< histogram
.length (); i
++)
181 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
182 cumulated_size
+= histogram
[i
]->size
;
183 fprintf (file
, " %"PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
184 (int64_t) histogram
[i
]->count
,
186 cumulated_time
* 100.0 / overall_time
,
188 cumulated_size
* 100.0 / overall_size
);
192 /* Collect histogram from CFG profiles. */
195 ipa_profile_generate_summary (void)
197 struct cgraph_node
*node
;
198 gimple_stmt_iterator gsi
;
201 hash_table
<histogram_hash
> hashtable (10);
202 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
205 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
206 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
210 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
212 gimple stmt
= gsi_stmt (gsi
);
213 if (gimple_code (stmt
) == GIMPLE_CALL
214 && !gimple_call_fndecl (stmt
))
217 h
= gimple_histogram_value_of_type
218 (DECL_STRUCT_FUNCTION (node
->decl
),
219 stmt
, HIST_TYPE_INDIR_CALL
);
220 /* No need to do sanity check: gimple_ic_transform already
221 takes away bad histograms. */
224 /* counter 0 is target, counter 1 is number of execution we called target,
225 counter 2 is total number of executions. */
226 if (h
->hvalue
.counters
[2])
228 struct cgraph_edge
* e
= node
->get_edge (stmt
);
229 if (e
&& !e
->indirect_unknown_callee
)
231 e
->indirect_info
->common_target_id
232 = h
->hvalue
.counters
[0];
233 e
->indirect_info
->common_target_probability
234 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
235 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
238 fprintf (dump_file
, "Probability capped to 1\n");
239 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
242 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
246 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
247 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
249 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
251 histogram
.qsort (cmp_counts
);
254 /* Serialize the ipa info for lto. */
257 ipa_profile_write_summary (void)
259 struct lto_simple_output_block
*ob
260 = lto_create_simple_output_block (LTO_section_ipa_profile
);
263 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
264 for (i
= 0; i
< histogram
.length (); i
++)
266 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
267 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
268 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
270 lto_destroy_simple_output_block (ob
);
273 /* Deserialize the ipa info for lto. */
276 ipa_profile_read_summary (void)
278 struct lto_file_decl_data
** file_data_vec
279 = lto_get_file_decl_data ();
280 struct lto_file_decl_data
* file_data
;
283 hash_table
<histogram_hash
> hashtable (10);
284 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
287 while ((file_data
= file_data_vec
[j
++]))
291 struct lto_input_block
*ib
292 = lto_create_simple_input_block (file_data
,
293 LTO_section_ipa_profile
,
297 unsigned int num
= streamer_read_uhwi (ib
);
299 for (n
= 0; n
< num
; n
++)
301 gcov_type count
= streamer_read_gcov_count (ib
);
302 int time
= streamer_read_uhwi (ib
);
303 int size
= streamer_read_uhwi (ib
);
304 account_time_size (&hashtable
, histogram
,
307 lto_destroy_simple_input_block (file_data
,
308 LTO_section_ipa_profile
,
312 histogram
.qsort (cmp_counts
);
315 /* Data used by ipa_propagate_frequency. */
317 struct ipa_propagate_frequency_data
319 bool maybe_unlikely_executed
;
320 bool maybe_executed_once
;
321 bool only_called_at_startup
;
322 bool only_called_at_exit
;
325 /* Worker for ipa_propagate_frequency_1. */
328 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
330 struct ipa_propagate_frequency_data
*d
;
331 struct cgraph_edge
*edge
;
333 d
= (struct ipa_propagate_frequency_data
*)data
;
334 for (edge
= node
->callers
;
335 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
336 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
337 edge
= edge
->next_caller
)
339 if (edge
->caller
!= node
)
341 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
342 /* It makes sense to put main() together with the static constructors.
343 It will be executed for sure, but rest of functions called from
344 main are definitely not at startup only. */
345 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
346 d
->only_called_at_startup
= 0;
347 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
350 /* When profile feedback is available, do not try to propagate too hard;
351 counts are already good guide on function frequencies and roundoff
352 errors can make us to push function into unlikely section even when
353 it is executed by the train run. Transfer the function only if all
354 callers are unlikely executed. */
355 if (profile_info
&& flag_branch_probabilities
356 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
357 || (edge
->caller
->global
.inlined_to
358 && edge
->caller
->global
.inlined_to
->frequency
359 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
360 d
->maybe_unlikely_executed
= false;
361 if (!edge
->frequency
)
363 switch (edge
->caller
->frequency
)
365 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
367 case NODE_FREQUENCY_EXECUTED_ONCE
:
368 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
369 fprintf (dump_file
, " Called by %s that is executed once\n",
370 edge
->caller
->name ());
371 d
->maybe_unlikely_executed
= false;
372 if (inline_edge_summary (edge
)->loop_depth
)
374 d
->maybe_executed_once
= false;
375 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
376 fprintf (dump_file
, " Called in loop\n");
379 case NODE_FREQUENCY_HOT
:
380 case NODE_FREQUENCY_NORMAL
:
381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
382 fprintf (dump_file
, " Called by %s that is normal or hot\n",
383 edge
->caller
->name ());
384 d
->maybe_unlikely_executed
= false;
385 d
->maybe_executed_once
= false;
392 /* Return ture if NODE contains hot calls. */
395 contains_hot_call_p (struct cgraph_node
*node
)
397 struct cgraph_edge
*e
;
398 for (e
= node
->callees
; e
; e
= e
->next_callee
)
399 if (e
->maybe_hot_p ())
401 else if (!e
->inline_failed
402 && contains_hot_call_p (e
->callee
))
404 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
405 if (e
->maybe_hot_p ())
410 /* See if the frequency of NODE can be updated based on frequencies of its
413 ipa_propagate_frequency (struct cgraph_node
*node
)
415 struct ipa_propagate_frequency_data d
= {true, true, true, true};
416 bool changed
= false;
418 /* We can not propagate anything useful about externally visible functions
419 nor about virtuals. */
420 if (!node
->local
.local
422 || (opt_for_fn (node
->decl
, flag_devirtualize
)
423 && DECL_VIRTUAL_P (node
->decl
)))
425 gcc_assert (node
->analyzed
);
426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
427 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
429 node
->call_for_symbol_thunks_and_aliases (ipa_propagate_frequency_1
, &d
,
432 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
433 && !node
->only_called_at_startup
)
435 node
->only_called_at_startup
= true;
437 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
441 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
442 && !node
->only_called_at_exit
)
444 node
->only_called_at_exit
= true;
446 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
451 /* With profile we can decide on hot/normal based on count. */
455 if (node
->count
>= get_hot_bb_threshold ())
458 hot
|= contains_hot_call_p (node
);
461 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
464 fprintf (dump_file
, "Node %s promoted to hot.\n",
466 node
->frequency
= NODE_FREQUENCY_HOT
;
471 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
474 fprintf (dump_file
, "Node %s reduced to normal.\n",
476 node
->frequency
= NODE_FREQUENCY_NORMAL
;
480 /* These come either from profile or user hints; never update them. */
481 if (node
->frequency
== NODE_FREQUENCY_HOT
482 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
484 if (d
.maybe_unlikely_executed
)
486 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
488 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
492 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
494 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
496 fprintf (dump_file
, "Node %s promoted to executed once.\n",
503 /* Simple ipa profile pass propagating frequencies across the callgraph. */
508 struct cgraph_node
**order
;
509 struct cgraph_edge
*e
;
511 bool something_changed
= false;
513 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
514 struct cgraph_node
*n
,*n2
;
515 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
516 bool node_map_initialized
= false;
519 dump_histogram (dump_file
, histogram
);
520 for (i
= 0; i
< (int)histogram
.length (); i
++)
522 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
523 overall_size
+= histogram
[i
]->size
;
529 gcc_assert (overall_size
);
532 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
534 fprintf (dump_file
, "Overall time: %"PRId64
"\n",
535 (int64_t)overall_time
);
536 min
= get_hot_bb_threshold ();
537 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
540 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
541 cumulated_size
+= histogram
[i
]->size
;
543 fprintf (dump_file
, "GCOV min count: %"PRId64
544 " Time:%3.2f%% Size:%3.2f%%\n",
546 cumulated_time
* 100.0 / overall_time
,
547 cumulated_size
* 100.0 / overall_size
);
549 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
551 for (i
= 0; cumulated
< cutoff
; i
++)
553 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
554 threshold
= histogram
[i
]->count
;
560 gcov_type cumulated_time
= 0, cumulated_size
= 0;
563 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
566 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
567 cumulated_size
+= histogram
[i
]->size
;
569 fprintf (dump_file
, "Determined min count: %"PRId64
570 " Time:%3.2f%% Size:%3.2f%%\n",
572 cumulated_time
* 100.0 / overall_time
,
573 cumulated_size
* 100.0 / overall_size
);
575 if (threshold
> get_hot_bb_threshold ()
579 fprintf (dump_file
, "Threshold updated.\n");
580 set_hot_bb_threshold (threshold
);
583 histogram
.release ();
584 free_alloc_pool (histogram_pool
);
586 /* Produce speculative calls: we saved common traget from porfiling into
587 e->common_target_id. Now, at link time, we can look up corresponding
588 function node and produce speculative call. */
590 FOR_EACH_DEFINED_FUNCTION (n
)
594 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
598 if (e
->indirect_info
->common_target_id
)
600 if (!node_map_initialized
)
601 init_node_map (false);
602 node_map_initialized
= true;
604 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
609 fprintf (dump_file
, "Indirect call -> direct call from"
610 " other module %s/%i => %s/%i, prob %3.2f\n",
611 xstrdup_for_dump (n
->name ()), n
->order
,
612 xstrdup_for_dump (n2
->name ()), n2
->order
,
613 e
->indirect_info
->common_target_probability
614 / (float)REG_BR_PROB_BASE
);
616 if (e
->indirect_info
->common_target_probability
617 < REG_BR_PROB_BASE
/ 2)
622 "Not speculating: probability is too low.\n");
624 else if (!e
->maybe_hot_p ())
629 "Not speculating: call is cold.\n");
631 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
632 && n2
->can_be_discarded_p ())
637 "Not speculating: target is overwritable "
638 "and can be discarded.\n");
642 /* Target may be overwritable, but profile says that
643 control flow goes to this particular implementation
644 of N2. Speculate on the local alias to allow inlining.
646 if (!n2
->can_be_discarded_p ())
649 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
656 apply_scale (e
->count
,
657 e
->indirect_info
->common_target_probability
),
658 apply_scale (e
->frequency
,
659 e
->indirect_info
->common_target_probability
));
666 fprintf (dump_file
, "Function with profile-id %i not found.\n",
667 e
->indirect_info
->common_target_id
);
673 inline_update_overall_summary (n
);
675 if (node_map_initialized
)
677 if (dump_file
&& nindirect
)
679 "%i indirect calls trained.\n"
680 "%i (%3.2f%%) have common target.\n"
681 "%i (%3.2f%%) targets was not found.\n"
682 "%i (%3.2f%%) speculations seems useless.\n"
683 "%i (%3.2f%%) speculations produced.\n",
685 ncommon
, ncommon
* 100.0 / nindirect
,
686 nunknown
, nunknown
* 100.0 / nindirect
,
687 nuseless
, nuseless
* 100.0 / nindirect
,
688 nconverted
, nconverted
* 100.0 / nindirect
);
690 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
691 order_pos
= ipa_reverse_postorder (order
);
692 for (i
= order_pos
- 1; i
>= 0; i
--)
694 if (order
[i
]->local
.local
&& ipa_propagate_frequency (order
[i
]))
696 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
697 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
699 something_changed
= true;
700 e
->callee
->aux
= (void *)1;
703 order
[i
]->aux
= NULL
;
706 while (something_changed
)
708 something_changed
= false;
709 for (i
= order_pos
- 1; i
>= 0; i
--)
711 if (order
[i
]->aux
&& ipa_propagate_frequency (order
[i
]))
713 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
714 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
716 something_changed
= true;
717 e
->callee
->aux
= (void *)1;
720 order
[i
]->aux
= NULL
;
729 const pass_data pass_data_ipa_profile
=
732 "profile_estimate", /* name */
733 OPTGROUP_NONE
, /* optinfo_flags */
734 TV_IPA_PROFILE
, /* tv_id */
735 0, /* properties_required */
736 0, /* properties_provided */
737 0, /* properties_destroyed */
738 0, /* todo_flags_start */
739 0, /* todo_flags_finish */
742 class pass_ipa_profile
: public ipa_opt_pass_d
745 pass_ipa_profile (gcc::context
*ctxt
)
746 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
747 ipa_profile_generate_summary
, /* generate_summary */
748 ipa_profile_write_summary
, /* write_summary */
749 ipa_profile_read_summary
, /* read_summary */
750 NULL
, /* write_optimization_summary */
751 NULL
, /* read_optimization_summary */
752 NULL
, /* stmt_fixup */
753 0, /* function_transform_todo_flags_start */
754 NULL
, /* function_transform */
755 NULL
) /* variable_transform */
758 /* opt_pass methods: */
759 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
760 virtual unsigned int execute (function
*) { return ipa_profile (); }
762 }; // class pass_ipa_profile
767 make_pass_ipa_profile (gcc::context
*ctxt
)
769 return new pass_ipa_profile (ctxt
);