1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "hard-reg-set.h"
56 #include "fold-const.h"
58 #include "tree-pass.h"
59 #include "internal-fn.h"
60 #include "gimple-iterator.h"
63 #include "tree-iterator.h"
64 #include "ipa-utils.h"
67 #include "value-prof.h"
68 #include "alloc-pool.h"
69 #include "tree-inline.h"
70 #include "data-streamer.h"
71 #include "symbol-summary.h"
73 #include "ipa-inline.h"
75 /* Entry in the histogram. */
77 struct histogram_entry
84 /* Histogram of profile values.
85 The histogram is represented as an ordered vector of entries allocated via
86 histogram_pool. During construction a separate hashtable is kept to lookup
89 vec
<histogram_entry
*> histogram
;
90 static object_allocator
<histogram_entry
> histogram_pool ("IPA histogram");
92 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
94 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
96 static inline hashval_t
hash (const histogram_entry
*);
97 static inline int equal (const histogram_entry
*, const histogram_entry
*);
101 histogram_hash::hash (const histogram_entry
*val
)
107 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
109 return val
->count
== val2
->count
;
112 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
113 HASHTABLE is the on-side hash kept to avoid duplicates. */
116 account_time_size (hash_table
<histogram_hash
> *hashtable
,
117 vec
<histogram_entry
*> &histogram
,
118 gcov_type count
, int time
, int size
)
120 histogram_entry key
= {count
, 0, 0};
121 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
125 *val
= histogram_pool
.allocate ();
127 histogram
.safe_push (*val
);
129 (*val
)->time
+= time
;
130 (*val
)->size
+= size
;
134 cmp_counts (const void *v1
, const void *v2
)
136 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
137 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
138 if (h1
->count
< h2
->count
)
140 if (h1
->count
> h2
->count
)
145 /* Dump HISTOGRAM to FILE. */
148 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
151 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
153 fprintf (dump_file
, "Histogram:\n");
154 for (i
= 0; i
< histogram
.length (); i
++)
156 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
157 overall_size
+= histogram
[i
]->size
;
163 for (i
= 0; i
< histogram
.length (); i
++)
165 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
166 cumulated_size
+= histogram
[i
]->size
;
167 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
168 (int64_t) histogram
[i
]->count
,
170 cumulated_time
* 100.0 / overall_time
,
172 cumulated_size
* 100.0 / overall_size
);
176 /* Collect histogram from CFG profiles. */
179 ipa_profile_generate_summary (void)
181 struct cgraph_node
*node
;
182 gimple_stmt_iterator gsi
;
185 hash_table
<histogram_hash
> hashtable (10);
187 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
188 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
192 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
194 gimple
*stmt
= gsi_stmt (gsi
);
195 if (gimple_code (stmt
) == GIMPLE_CALL
196 && !gimple_call_fndecl (stmt
))
199 h
= gimple_histogram_value_of_type
200 (DECL_STRUCT_FUNCTION (node
->decl
),
201 stmt
, HIST_TYPE_INDIR_CALL
);
202 /* No need to do sanity check: gimple_ic_transform already
203 takes away bad histograms. */
206 /* counter 0 is target, counter 1 is number of execution we called target,
207 counter 2 is total number of executions. */
208 if (h
->hvalue
.counters
[2])
210 struct cgraph_edge
* e
= node
->get_edge (stmt
);
211 if (e
&& !e
->indirect_unknown_callee
)
213 e
->indirect_info
->common_target_id
214 = h
->hvalue
.counters
[0];
215 e
->indirect_info
->common_target_probability
216 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
217 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
220 fprintf (dump_file
, "Probability capped to 1\n");
221 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
224 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
228 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
229 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
231 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
233 histogram
.qsort (cmp_counts
);
236 /* Serialize the ipa info for lto. */
239 ipa_profile_write_summary (void)
241 struct lto_simple_output_block
*ob
242 = lto_create_simple_output_block (LTO_section_ipa_profile
);
245 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
246 for (i
= 0; i
< histogram
.length (); i
++)
248 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
249 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
250 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
252 lto_destroy_simple_output_block (ob
);
255 /* Deserialize the ipa info for lto. */
258 ipa_profile_read_summary (void)
260 struct lto_file_decl_data
** file_data_vec
261 = lto_get_file_decl_data ();
262 struct lto_file_decl_data
* file_data
;
265 hash_table
<histogram_hash
> hashtable (10);
267 while ((file_data
= file_data_vec
[j
++]))
271 struct lto_input_block
*ib
272 = lto_create_simple_input_block (file_data
,
273 LTO_section_ipa_profile
,
277 unsigned int num
= streamer_read_uhwi (ib
);
279 for (n
= 0; n
< num
; n
++)
281 gcov_type count
= streamer_read_gcov_count (ib
);
282 int time
= streamer_read_uhwi (ib
);
283 int size
= streamer_read_uhwi (ib
);
284 account_time_size (&hashtable
, histogram
,
287 lto_destroy_simple_input_block (file_data
,
288 LTO_section_ipa_profile
,
292 histogram
.qsort (cmp_counts
);
295 /* Data used by ipa_propagate_frequency. */
297 struct ipa_propagate_frequency_data
299 cgraph_node
*function_symbol
;
300 bool maybe_unlikely_executed
;
301 bool maybe_executed_once
;
302 bool only_called_at_startup
;
303 bool only_called_at_exit
;
306 /* Worker for ipa_propagate_frequency_1. */
309 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
311 struct ipa_propagate_frequency_data
*d
;
312 struct cgraph_edge
*edge
;
314 d
= (struct ipa_propagate_frequency_data
*)data
;
315 for (edge
= node
->callers
;
316 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
317 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
318 edge
= edge
->next_caller
)
320 if (edge
->caller
!= d
->function_symbol
)
322 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
323 /* It makes sense to put main() together with the static constructors.
324 It will be executed for sure, but rest of functions called from
325 main are definitely not at startup only. */
326 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
327 d
->only_called_at_startup
= 0;
328 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
331 /* When profile feedback is available, do not try to propagate too hard;
332 counts are already good guide on function frequencies and roundoff
333 errors can make us to push function into unlikely section even when
334 it is executed by the train run. Transfer the function only if all
335 callers are unlikely executed. */
337 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
338 /* Thunks are not profiled. This is more or less implementation
340 && !d
->function_symbol
->thunk
.thunk_p
341 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
342 || (edge
->caller
->global
.inlined_to
343 && edge
->caller
->global
.inlined_to
->frequency
344 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
345 d
->maybe_unlikely_executed
= false;
346 if (!edge
->frequency
)
348 switch (edge
->caller
->frequency
)
350 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
352 case NODE_FREQUENCY_EXECUTED_ONCE
:
353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
354 fprintf (dump_file
, " Called by %s that is executed once\n",
355 edge
->caller
->name ());
356 d
->maybe_unlikely_executed
= false;
357 if (inline_edge_summary (edge
)->loop_depth
)
359 d
->maybe_executed_once
= false;
360 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
361 fprintf (dump_file
, " Called in loop\n");
364 case NODE_FREQUENCY_HOT
:
365 case NODE_FREQUENCY_NORMAL
:
366 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
367 fprintf (dump_file
, " Called by %s that is normal or hot\n",
368 edge
->caller
->name ());
369 d
->maybe_unlikely_executed
= false;
370 d
->maybe_executed_once
= false;
377 /* Return ture if NODE contains hot calls. */
380 contains_hot_call_p (struct cgraph_node
*node
)
382 struct cgraph_edge
*e
;
383 for (e
= node
->callees
; e
; e
= e
->next_callee
)
384 if (e
->maybe_hot_p ())
386 else if (!e
->inline_failed
387 && contains_hot_call_p (e
->callee
))
389 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
390 if (e
->maybe_hot_p ())
395 /* See if the frequency of NODE can be updated based on frequencies of its
398 ipa_propagate_frequency (struct cgraph_node
*node
)
400 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
401 bool changed
= false;
403 /* We can not propagate anything useful about externally visible functions
404 nor about virtuals. */
405 if (!node
->local
.local
407 || (opt_for_fn (node
->decl
, flag_devirtualize
)
408 && DECL_VIRTUAL_P (node
->decl
)))
410 gcc_assert (node
->analyzed
);
411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
412 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
414 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
417 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
418 && !node
->only_called_at_startup
)
420 node
->only_called_at_startup
= true;
422 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
426 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
427 && !node
->only_called_at_exit
)
429 node
->only_called_at_exit
= true;
431 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
436 /* With profile we can decide on hot/normal based on count. */
440 if (node
->count
>= get_hot_bb_threshold ())
443 hot
|= contains_hot_call_p (node
);
446 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
449 fprintf (dump_file
, "Node %s promoted to hot.\n",
451 node
->frequency
= NODE_FREQUENCY_HOT
;
456 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
459 fprintf (dump_file
, "Node %s reduced to normal.\n",
461 node
->frequency
= NODE_FREQUENCY_NORMAL
;
465 /* These come either from profile or user hints; never update them. */
466 if (node
->frequency
== NODE_FREQUENCY_HOT
467 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
469 if (d
.maybe_unlikely_executed
)
471 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
473 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
477 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
479 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
481 fprintf (dump_file
, "Node %s promoted to executed once.\n",
488 /* Simple ipa profile pass propagating frequencies across the callgraph. */
493 struct cgraph_node
**order
;
494 struct cgraph_edge
*e
;
496 bool something_changed
= false;
498 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
499 struct cgraph_node
*n
,*n2
;
500 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
501 int nmismatch
= 0, nimpossible
= 0;
502 bool node_map_initialized
= false;
505 dump_histogram (dump_file
, histogram
);
506 for (i
= 0; i
< (int)histogram
.length (); i
++)
508 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
509 overall_size
+= histogram
[i
]->size
;
515 gcc_assert (overall_size
);
518 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
520 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
521 (int64_t)overall_time
);
522 min
= get_hot_bb_threshold ();
523 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
526 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
527 cumulated_size
+= histogram
[i
]->size
;
529 fprintf (dump_file
, "GCOV min count: %" PRId64
530 " Time:%3.2f%% Size:%3.2f%%\n",
532 cumulated_time
* 100.0 / overall_time
,
533 cumulated_size
* 100.0 / overall_size
);
535 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
537 for (i
= 0; cumulated
< cutoff
; i
++)
539 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
540 threshold
= histogram
[i
]->count
;
546 gcov_type cumulated_time
= 0, cumulated_size
= 0;
549 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
552 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
553 cumulated_size
+= histogram
[i
]->size
;
555 fprintf (dump_file
, "Determined min count: %" PRId64
556 " Time:%3.2f%% Size:%3.2f%%\n",
558 cumulated_time
* 100.0 / overall_time
,
559 cumulated_size
* 100.0 / overall_size
);
561 if (threshold
> get_hot_bb_threshold ()
565 fprintf (dump_file
, "Threshold updated.\n");
566 set_hot_bb_threshold (threshold
);
569 histogram
.release ();
570 histogram_pool
.release ();
572 /* Produce speculative calls: we saved common traget from porfiling into
573 e->common_target_id. Now, at link time, we can look up corresponding
574 function node and produce speculative call. */
576 FOR_EACH_DEFINED_FUNCTION (n
)
580 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
583 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
587 if (e
->indirect_info
->common_target_id
)
589 if (!node_map_initialized
)
590 init_node_map (false);
591 node_map_initialized
= true;
593 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
598 fprintf (dump_file
, "Indirect call -> direct call from"
599 " other module %s/%i => %s/%i, prob %3.2f\n",
600 xstrdup_for_dump (n
->name ()), n
->order
,
601 xstrdup_for_dump (n2
->name ()), n2
->order
,
602 e
->indirect_info
->common_target_probability
603 / (float)REG_BR_PROB_BASE
);
605 if (e
->indirect_info
->common_target_probability
606 < REG_BR_PROB_BASE
/ 2)
611 "Not speculating: probability is too low.\n");
613 else if (!e
->maybe_hot_p ())
618 "Not speculating: call is cold.\n");
620 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
621 && n2
->can_be_discarded_p ())
626 "Not speculating: target is overwritable "
627 "and can be discarded.\n");
629 else if (ipa_node_params_sum
&& ipa_edge_args_vector
630 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
631 && ipa_get_param_count (IPA_NODE_REF (n2
))
632 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
633 && (ipa_get_param_count (IPA_NODE_REF (n2
))
634 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
635 || !stdarg_p (TREE_TYPE (n2
->decl
))))
641 "parameter count mistmatch\n");
643 else if (e
->indirect_info
->polymorphic
644 && !opt_for_fn (n
->decl
, flag_devirtualize
)
645 && !possible_polymorphic_call_target_p (e
, n2
))
651 "function is not in the polymorphic "
652 "call target list\n");
656 /* Target may be overwritable, but profile says that
657 control flow goes to this particular implementation
658 of N2. Speculate on the local alias to allow inlining.
660 if (!n2
->can_be_discarded_p ())
663 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
670 apply_scale (e
->count
,
671 e
->indirect_info
->common_target_probability
),
672 apply_scale (e
->frequency
,
673 e
->indirect_info
->common_target_probability
));
680 fprintf (dump_file
, "Function with profile-id %i not found.\n",
681 e
->indirect_info
->common_target_id
);
687 inline_update_overall_summary (n
);
689 if (node_map_initialized
)
691 if (dump_file
&& nindirect
)
693 "%i indirect calls trained.\n"
694 "%i (%3.2f%%) have common target.\n"
695 "%i (%3.2f%%) targets was not found.\n"
696 "%i (%3.2f%%) targets had parameter count mismatch.\n"
697 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
698 "%i (%3.2f%%) speculations seems useless.\n"
699 "%i (%3.2f%%) speculations produced.\n",
701 ncommon
, ncommon
* 100.0 / nindirect
,
702 nunknown
, nunknown
* 100.0 / nindirect
,
703 nmismatch
, nmismatch
* 100.0 / nindirect
,
704 nimpossible
, nimpossible
* 100.0 / nindirect
,
705 nuseless
, nuseless
* 100.0 / nindirect
,
706 nconverted
, nconverted
* 100.0 / nindirect
);
708 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
709 order_pos
= ipa_reverse_postorder (order
);
710 for (i
= order_pos
- 1; i
>= 0; i
--)
712 if (order
[i
]->local
.local
713 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
714 && ipa_propagate_frequency (order
[i
]))
716 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
717 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
719 something_changed
= true;
720 e
->callee
->aux
= (void *)1;
723 order
[i
]->aux
= NULL
;
726 while (something_changed
)
728 something_changed
= false;
729 for (i
= order_pos
- 1; i
>= 0; i
--)
732 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
733 && ipa_propagate_frequency (order
[i
]))
735 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
736 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
738 something_changed
= true;
739 e
->callee
->aux
= (void *)1;
742 order
[i
]->aux
= NULL
;
751 const pass_data pass_data_ipa_profile
=
754 "profile_estimate", /* name */
755 OPTGROUP_NONE
, /* optinfo_flags */
756 TV_IPA_PROFILE
, /* tv_id */
757 0, /* properties_required */
758 0, /* properties_provided */
759 0, /* properties_destroyed */
760 0, /* todo_flags_start */
761 0, /* todo_flags_finish */
764 class pass_ipa_profile
: public ipa_opt_pass_d
767 pass_ipa_profile (gcc::context
*ctxt
)
768 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
769 ipa_profile_generate_summary
, /* generate_summary */
770 ipa_profile_write_summary
, /* write_summary */
771 ipa_profile_read_summary
, /* read_summary */
772 NULL
, /* write_optimization_summary */
773 NULL
, /* read_optimization_summary */
774 NULL
, /* stmt_fixup */
775 0, /* function_transform_todo_flags_start */
776 NULL
, /* function_transform */
777 NULL
) /* variable_transform */
780 /* opt_pass methods: */
781 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
782 virtual unsigned int execute (function
*) { return ipa_profile (); }
784 }; // class pass_ipa_profile
789 make_pass_ipa_profile (gcc::context
*ctxt
)
791 return new pass_ipa_profile (ctxt
);