1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
53 #include "dominance.h"
55 #include "basic-block.h"
58 #include "plugin-api.h"
63 #include "hard-reg-set.h"
68 #include "tree-pass.h"
69 #include "tree-ssa-alias.h"
70 #include "internal-fn.h"
71 #include "gimple-expr.h"
73 #include "gimple-iterator.h"
76 #include "tree-iterator.h"
77 #include "ipa-utils.h"
80 #include "value-prof.h"
81 #include "alloc-pool.h"
82 #include "tree-inline.h"
83 #include "lto-streamer.h"
84 #include "data-streamer.h"
86 #include "ipa-inline.h"
88 /* Entry in the histogram. */
90 struct histogram_entry
97 /* Histogram of profile values.
98 The histogram is represented as an ordered vector of entries allocated via
99 histogram_pool. During construction a separate hashtable is kept to lookup
100 duplicate entries. */
102 vec
<histogram_entry
*> histogram
;
103 static alloc_pool histogram_pool
;
105 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
107 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
109 typedef histogram_entry value_type
;
110 typedef histogram_entry compare_type
;
111 static inline hashval_t
hash (const value_type
*);
112 static inline int equal (const value_type
*, const compare_type
*);
116 histogram_hash::hash (const histogram_entry
*val
)
122 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
124 return val
->count
== val2
->count
;
127 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
128 HASHTABLE is the on-side hash kept to avoid duplicates. */
131 account_time_size (hash_table
<histogram_hash
> *hashtable
,
132 vec
<histogram_entry
*> &histogram
,
133 gcov_type count
, int time
, int size
)
135 histogram_entry key
= {count
, 0, 0};
136 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
140 *val
= (histogram_entry
*) pool_alloc (histogram_pool
);
142 histogram
.safe_push (*val
);
144 (*val
)->time
+= time
;
145 (*val
)->size
+= size
;
149 cmp_counts (const void *v1
, const void *v2
)
151 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
152 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
153 if (h1
->count
< h2
->count
)
155 if (h1
->count
> h2
->count
)
160 /* Dump HISTOGRAM to FILE. */
163 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
166 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
168 fprintf (dump_file
, "Histogram:\n");
169 for (i
= 0; i
< histogram
.length (); i
++)
171 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
172 overall_size
+= histogram
[i
]->size
;
178 for (i
= 0; i
< histogram
.length (); i
++)
180 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
181 cumulated_size
+= histogram
[i
]->size
;
182 fprintf (file
, " %"PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
183 (int64_t) histogram
[i
]->count
,
185 cumulated_time
* 100.0 / overall_time
,
187 cumulated_size
* 100.0 / overall_size
);
191 /* Collect histogram from CFG profiles. */
194 ipa_profile_generate_summary (void)
196 struct cgraph_node
*node
;
197 gimple_stmt_iterator gsi
;
200 hash_table
<histogram_hash
> hashtable (10);
201 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
204 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
205 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
209 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
211 gimple stmt
= gsi_stmt (gsi
);
212 if (gimple_code (stmt
) == GIMPLE_CALL
213 && !gimple_call_fndecl (stmt
))
216 h
= gimple_histogram_value_of_type
217 (DECL_STRUCT_FUNCTION (node
->decl
),
218 stmt
, HIST_TYPE_INDIR_CALL
);
219 /* No need to do sanity check: gimple_ic_transform already
220 takes away bad histograms. */
223 /* counter 0 is target, counter 1 is number of execution we called target,
224 counter 2 is total number of executions. */
225 if (h
->hvalue
.counters
[2])
227 struct cgraph_edge
* e
= node
->get_edge (stmt
);
228 if (e
&& !e
->indirect_unknown_callee
)
230 e
->indirect_info
->common_target_id
231 = h
->hvalue
.counters
[0];
232 e
->indirect_info
->common_target_probability
233 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
234 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
237 fprintf (dump_file
, "Probability capped to 1\n");
238 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
241 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
245 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
246 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
248 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
250 histogram
.qsort (cmp_counts
);
253 /* Serialize the ipa info for lto. */
256 ipa_profile_write_summary (void)
258 struct lto_simple_output_block
*ob
259 = lto_create_simple_output_block (LTO_section_ipa_profile
);
262 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
263 for (i
= 0; i
< histogram
.length (); i
++)
265 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
266 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
267 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
269 lto_destroy_simple_output_block (ob
);
272 /* Deserialize the ipa info for lto. */
275 ipa_profile_read_summary (void)
277 struct lto_file_decl_data
** file_data_vec
278 = lto_get_file_decl_data ();
279 struct lto_file_decl_data
* file_data
;
282 hash_table
<histogram_hash
> hashtable (10);
283 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
286 while ((file_data
= file_data_vec
[j
++]))
290 struct lto_input_block
*ib
291 = lto_create_simple_input_block (file_data
,
292 LTO_section_ipa_profile
,
296 unsigned int num
= streamer_read_uhwi (ib
);
298 for (n
= 0; n
< num
; n
++)
300 gcov_type count
= streamer_read_gcov_count (ib
);
301 int time
= streamer_read_uhwi (ib
);
302 int size
= streamer_read_uhwi (ib
);
303 account_time_size (&hashtable
, histogram
,
306 lto_destroy_simple_input_block (file_data
,
307 LTO_section_ipa_profile
,
311 histogram
.qsort (cmp_counts
);
314 /* Data used by ipa_propagate_frequency. */
316 struct ipa_propagate_frequency_data
318 bool maybe_unlikely_executed
;
319 bool maybe_executed_once
;
320 bool only_called_at_startup
;
321 bool only_called_at_exit
;
324 /* Worker for ipa_propagate_frequency_1. */
327 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
329 struct ipa_propagate_frequency_data
*d
;
330 struct cgraph_edge
*edge
;
332 d
= (struct ipa_propagate_frequency_data
*)data
;
333 for (edge
= node
->callers
;
334 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
335 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
336 edge
= edge
->next_caller
)
338 if (edge
->caller
!= node
)
340 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
341 /* It makes sense to put main() together with the static constructors.
342 It will be executed for sure, but rest of functions called from
343 main are definitely not at startup only. */
344 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
345 d
->only_called_at_startup
= 0;
346 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
349 /* When profile feedback is available, do not try to propagate too hard;
350 counts are already good guide on function frequencies and roundoff
351 errors can make us to push function into unlikely section even when
352 it is executed by the train run. Transfer the function only if all
353 callers are unlikely executed. */
354 if (profile_info
&& flag_branch_probabilities
355 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
356 || (edge
->caller
->global
.inlined_to
357 && edge
->caller
->global
.inlined_to
->frequency
358 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
359 d
->maybe_unlikely_executed
= false;
360 if (!edge
->frequency
)
362 switch (edge
->caller
->frequency
)
364 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
366 case NODE_FREQUENCY_EXECUTED_ONCE
:
367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
368 fprintf (dump_file
, " Called by %s that is executed once\n",
369 edge
->caller
->name ());
370 d
->maybe_unlikely_executed
= false;
371 if (inline_edge_summary (edge
)->loop_depth
)
373 d
->maybe_executed_once
= false;
374 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
375 fprintf (dump_file
, " Called in loop\n");
378 case NODE_FREQUENCY_HOT
:
379 case NODE_FREQUENCY_NORMAL
:
380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
381 fprintf (dump_file
, " Called by %s that is normal or hot\n",
382 edge
->caller
->name ());
383 d
->maybe_unlikely_executed
= false;
384 d
->maybe_executed_once
= false;
391 /* Return ture if NODE contains hot calls. */
394 contains_hot_call_p (struct cgraph_node
*node
)
396 struct cgraph_edge
*e
;
397 for (e
= node
->callees
; e
; e
= e
->next_callee
)
398 if (e
->maybe_hot_p ())
400 else if (!e
->inline_failed
401 && contains_hot_call_p (e
->callee
))
403 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
404 if (e
->maybe_hot_p ())
409 /* See if the frequency of NODE can be updated based on frequencies of its
412 ipa_propagate_frequency (struct cgraph_node
*node
)
414 struct ipa_propagate_frequency_data d
= {true, true, true, true};
415 bool changed
= false;
417 /* We can not propagate anything useful about externally visible functions
418 nor about virtuals. */
419 if (!node
->local
.local
421 || (opt_for_fn (node
->decl
, flag_devirtualize
)
422 && DECL_VIRTUAL_P (node
->decl
)))
424 gcc_assert (node
->analyzed
);
425 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
426 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
428 node
->call_for_symbol_thunks_and_aliases (ipa_propagate_frequency_1
, &d
,
431 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
432 && !node
->only_called_at_startup
)
434 node
->only_called_at_startup
= true;
436 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
440 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
441 && !node
->only_called_at_exit
)
443 node
->only_called_at_exit
= true;
445 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
450 /* With profile we can decide on hot/normal based on count. */
454 if (node
->count
>= get_hot_bb_threshold ())
457 hot
|= contains_hot_call_p (node
);
460 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
463 fprintf (dump_file
, "Node %s promoted to hot.\n",
465 node
->frequency
= NODE_FREQUENCY_HOT
;
470 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
473 fprintf (dump_file
, "Node %s reduced to normal.\n",
475 node
->frequency
= NODE_FREQUENCY_NORMAL
;
479 /* These come either from profile or user hints; never update them. */
480 if (node
->frequency
== NODE_FREQUENCY_HOT
481 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
483 if (d
.maybe_unlikely_executed
)
485 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
487 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
491 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
493 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
495 fprintf (dump_file
, "Node %s promoted to executed once.\n",
502 /* Simple ipa profile pass propagating frequencies across the callgraph. */
507 struct cgraph_node
**order
;
508 struct cgraph_edge
*e
;
510 bool something_changed
= false;
512 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
513 struct cgraph_node
*n
,*n2
;
514 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
515 bool node_map_initialized
= false;
518 dump_histogram (dump_file
, histogram
);
519 for (i
= 0; i
< (int)histogram
.length (); i
++)
521 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
522 overall_size
+= histogram
[i
]->size
;
528 gcc_assert (overall_size
);
531 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
533 fprintf (dump_file
, "Overall time: %"PRId64
"\n",
534 (int64_t)overall_time
);
535 min
= get_hot_bb_threshold ();
536 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
539 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
540 cumulated_size
+= histogram
[i
]->size
;
542 fprintf (dump_file
, "GCOV min count: %"PRId64
543 " Time:%3.2f%% Size:%3.2f%%\n",
545 cumulated_time
* 100.0 / overall_time
,
546 cumulated_size
* 100.0 / overall_size
);
548 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
550 for (i
= 0; cumulated
< cutoff
; i
++)
552 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
553 threshold
= histogram
[i
]->count
;
559 gcov_type cumulated_time
= 0, cumulated_size
= 0;
562 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
565 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
566 cumulated_size
+= histogram
[i
]->size
;
568 fprintf (dump_file
, "Determined min count: %"PRId64
569 " Time:%3.2f%% Size:%3.2f%%\n",
571 cumulated_time
* 100.0 / overall_time
,
572 cumulated_size
* 100.0 / overall_size
);
574 if (threshold
> get_hot_bb_threshold ()
578 fprintf (dump_file
, "Threshold updated.\n");
579 set_hot_bb_threshold (threshold
);
582 histogram
.release ();
583 free_alloc_pool (histogram_pool
);
585 /* Produce speculative calls: we saved common traget from porfiling into
586 e->common_target_id. Now, at link time, we can look up corresponding
587 function node and produce speculative call. */
589 FOR_EACH_DEFINED_FUNCTION (n
)
593 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
597 if (e
->indirect_info
->common_target_id
)
599 if (!node_map_initialized
)
600 init_node_map (false);
601 node_map_initialized
= true;
603 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
608 fprintf (dump_file
, "Indirect call -> direct call from"
609 " other module %s/%i => %s/%i, prob %3.2f\n",
610 xstrdup_for_dump (n
->name ()), n
->order
,
611 xstrdup_for_dump (n2
->name ()), n2
->order
,
612 e
->indirect_info
->common_target_probability
613 / (float)REG_BR_PROB_BASE
);
615 if (e
->indirect_info
->common_target_probability
616 < REG_BR_PROB_BASE
/ 2)
621 "Not speculating: probability is too low.\n");
623 else if (!e
->maybe_hot_p ())
628 "Not speculating: call is cold.\n");
630 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
631 && n2
->can_be_discarded_p ())
636 "Not speculating: target is overwritable "
637 "and can be discarded.\n");
641 /* Target may be overwritable, but profile says that
642 control flow goes to this particular implementation
643 of N2. Speculate on the local alias to allow inlining.
645 if (!n2
->can_be_discarded_p ())
648 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
655 apply_scale (e
->count
,
656 e
->indirect_info
->common_target_probability
),
657 apply_scale (e
->frequency
,
658 e
->indirect_info
->common_target_probability
));
665 fprintf (dump_file
, "Function with profile-id %i not found.\n",
666 e
->indirect_info
->common_target_id
);
672 inline_update_overall_summary (n
);
674 if (node_map_initialized
)
676 if (dump_file
&& nindirect
)
678 "%i indirect calls trained.\n"
679 "%i (%3.2f%%) have common target.\n"
680 "%i (%3.2f%%) targets was not found.\n"
681 "%i (%3.2f%%) speculations seems useless.\n"
682 "%i (%3.2f%%) speculations produced.\n",
684 ncommon
, ncommon
* 100.0 / nindirect
,
685 nunknown
, nunknown
* 100.0 / nindirect
,
686 nuseless
, nuseless
* 100.0 / nindirect
,
687 nconverted
, nconverted
* 100.0 / nindirect
);
689 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
690 order_pos
= ipa_reverse_postorder (order
);
691 for (i
= order_pos
- 1; i
>= 0; i
--)
693 if (order
[i
]->local
.local
&& ipa_propagate_frequency (order
[i
]))
695 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
696 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
698 something_changed
= true;
699 e
->callee
->aux
= (void *)1;
702 order
[i
]->aux
= NULL
;
705 while (something_changed
)
707 something_changed
= false;
708 for (i
= order_pos
- 1; i
>= 0; i
--)
710 if (order
[i
]->aux
&& ipa_propagate_frequency (order
[i
]))
712 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
713 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
715 something_changed
= true;
716 e
->callee
->aux
= (void *)1;
719 order
[i
]->aux
= NULL
;
728 const pass_data pass_data_ipa_profile
=
731 "profile_estimate", /* name */
732 OPTGROUP_NONE
, /* optinfo_flags */
733 TV_IPA_PROFILE
, /* tv_id */
734 0, /* properties_required */
735 0, /* properties_provided */
736 0, /* properties_destroyed */
737 0, /* todo_flags_start */
738 0, /* todo_flags_finish */
741 class pass_ipa_profile
: public ipa_opt_pass_d
744 pass_ipa_profile (gcc::context
*ctxt
)
745 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
746 ipa_profile_generate_summary
, /* generate_summary */
747 ipa_profile_write_summary
, /* write_summary */
748 ipa_profile_read_summary
, /* read_summary */
749 NULL
, /* write_optimization_summary */
750 NULL
, /* read_optimization_summary */
751 NULL
, /* stmt_fixup */
752 0, /* function_transform_todo_flags_start */
753 NULL
, /* function_transform */
754 NULL
) /* variable_transform */
757 /* opt_pass methods: */
758 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
759 virtual unsigned int execute (function
*) { return ipa_profile (); }
761 }; // class pass_ipa_profile
766 make_pass_ipa_profile (gcc::context
*ctxt
)
768 return new pass_ipa_profile (ctxt
);