1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complette only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
53 #include "tree-pass.h"
54 #include "tree-ssa-alias.h"
55 #include "internal-fn.h"
56 #include "gimple-expr.h"
58 #include "gimple-iterator.h"
61 #include "tree-iterator.h"
62 #include "ipa-utils.h"
65 #include "value-prof.h"
66 #include "alloc-pool.h"
67 #include "tree-inline.h"
68 #include "lto-streamer.h"
69 #include "data-streamer.h"
70 #include "ipa-inline.h"
72 /* Entry in the histogram. */
74 struct histogram_entry
81 /* Histogram of profile values.
82 The histogram is represented as an ordered vector of entries allocated via
83 histogram_pool. During construction a separate hashtable is kept to lookup
86 vec
<histogram_entry
*> histogram
;
87 static alloc_pool histogram_pool
;
89 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
91 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
93 typedef histogram_entry value_type
;
94 typedef histogram_entry compare_type
;
95 static inline hashval_t
hash (const value_type
*);
96 static inline int equal (const value_type
*, const compare_type
*);
100 histogram_hash::hash (const histogram_entry
*val
)
106 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
108 return val
->count
== val2
->count
;
111 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
112 HASHTABLE is the on-side hash kept to avoid duplicates. */
115 account_time_size (hash_table
<histogram_hash
> hashtable
,
116 vec
<histogram_entry
*> &histogram
,
117 gcov_type count
, int time
, int size
)
119 histogram_entry key
= {count
, 0, 0};
120 histogram_entry
**val
= hashtable
.find_slot (&key
, INSERT
);
124 *val
= (histogram_entry
*) pool_alloc (histogram_pool
);
126 histogram
.safe_push (*val
);
128 (*val
)->time
+= time
;
129 (*val
)->size
+= size
;
133 cmp_counts (const void *v1
, const void *v2
)
135 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
136 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
137 if (h1
->count
< h2
->count
)
139 if (h1
->count
> h2
->count
)
144 /* Dump HISTOGRAM to FILE. */
147 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
150 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
152 fprintf (dump_file
, "Histogram:\n");
153 for (i
= 0; i
< histogram
.length (); i
++)
155 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
156 overall_size
+= histogram
[i
]->size
;
162 for (i
= 0; i
< histogram
.length (); i
++)
164 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
165 cumulated_size
+= histogram
[i
]->size
;
166 fprintf (file
, " "HOST_WIDEST_INT_PRINT_DEC
": time:%i (%2.2f) size:%i (%2.2f)\n",
167 (HOST_WIDEST_INT
) histogram
[i
]->count
,
169 cumulated_time
* 100.0 / overall_time
,
171 cumulated_size
* 100.0 / overall_size
);
175 /* Collect histogram from CFG profiles. */
178 ipa_profile_generate_summary (void)
180 struct cgraph_node
*node
;
181 gimple_stmt_iterator gsi
;
182 hash_table
<histogram_hash
> hashtable
;
185 hashtable
.create (10);
186 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
189 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
190 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
194 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
196 gimple stmt
= gsi_stmt (gsi
);
197 if (gimple_code (stmt
) == GIMPLE_CALL
198 && !gimple_call_fndecl (stmt
))
201 h
= gimple_histogram_value_of_type
202 (DECL_STRUCT_FUNCTION (node
->decl
),
203 stmt
, HIST_TYPE_INDIR_CALL
);
204 /* No need to do sanity check: gimple_ic_transform already
205 takes away bad histograms. */
208 /* counter 0 is target, counter 1 is number of execution we called target,
209 counter 2 is total number of executions. */
210 if (h
->hvalue
.counters
[2])
212 struct cgraph_edge
* e
= cgraph_edge (node
, stmt
);
213 e
->indirect_info
->common_target_id
214 = h
->hvalue
.counters
[0];
215 e
->indirect_info
->common_target_probability
216 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
217 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
220 fprintf (dump_file
, "Probability capped to 1\n");
221 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
224 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
228 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
229 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
231 account_time_size (hashtable
, histogram
, bb
->count
, time
, size
);
233 hashtable
.dispose ();
234 histogram
.qsort (cmp_counts
);
237 /* Serialize the ipa info for lto. */
240 ipa_profile_write_summary (void)
242 struct lto_simple_output_block
*ob
243 = lto_create_simple_output_block (LTO_section_ipa_profile
);
246 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
247 for (i
= 0; i
< histogram
.length (); i
++)
249 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
250 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
251 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
253 lto_destroy_simple_output_block (ob
);
256 /* Deserialize the ipa info for lto. */
259 ipa_profile_read_summary (void)
261 struct lto_file_decl_data
** file_data_vec
262 = lto_get_file_decl_data ();
263 struct lto_file_decl_data
* file_data
;
264 hash_table
<histogram_hash
> hashtable
;
267 hashtable
.create (10);
268 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
271 while ((file_data
= file_data_vec
[j
++]))
275 struct lto_input_block
*ib
276 = lto_create_simple_input_block (file_data
,
277 LTO_section_ipa_profile
,
281 unsigned int num
= streamer_read_uhwi (ib
);
283 for (n
= 0; n
< num
; n
++)
285 gcov_type count
= streamer_read_gcov_count (ib
);
286 int time
= streamer_read_uhwi (ib
);
287 int size
= streamer_read_uhwi (ib
);
288 account_time_size (hashtable
, histogram
,
291 lto_destroy_simple_input_block (file_data
,
292 LTO_section_ipa_profile
,
296 hashtable
.dispose ();
297 histogram
.qsort (cmp_counts
);
300 /* Data used by ipa_propagate_frequency. */
302 struct ipa_propagate_frequency_data
304 bool maybe_unlikely_executed
;
305 bool maybe_executed_once
;
306 bool only_called_at_startup
;
307 bool only_called_at_exit
;
310 /* Worker for ipa_propagate_frequency_1. */
313 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
315 struct ipa_propagate_frequency_data
*d
;
316 struct cgraph_edge
*edge
;
318 d
= (struct ipa_propagate_frequency_data
*)data
;
319 for (edge
= node
->callers
;
320 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
321 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
322 edge
= edge
->next_caller
)
324 if (edge
->caller
!= node
)
326 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
327 /* It makes sense to put main() together with the static constructors.
328 It will be executed for sure, but rest of functions called from
329 main are definitely not at startup only. */
330 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
331 d
->only_called_at_startup
= 0;
332 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
335 /* When profile feedback is available, do not try to propagate too hard;
336 counts are already good guide on function frequencies and roundoff
337 errors can make us to push function into unlikely section even when
338 it is executed by the train run. Transfer the function only if all
339 callers are unlikely executed. */
340 if (profile_info
&& flag_branch_probabilities
341 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
342 || (edge
->caller
->global
.inlined_to
343 && edge
->caller
->global
.inlined_to
->frequency
344 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
345 d
->maybe_unlikely_executed
= false;
346 if (!edge
->frequency
)
348 switch (edge
->caller
->frequency
)
350 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
352 case NODE_FREQUENCY_EXECUTED_ONCE
:
353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
354 fprintf (dump_file
, " Called by %s that is executed once\n",
355 edge
->caller
->name ());
356 d
->maybe_unlikely_executed
= false;
357 if (inline_edge_summary (edge
)->loop_depth
)
359 d
->maybe_executed_once
= false;
360 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
361 fprintf (dump_file
, " Called in loop\n");
364 case NODE_FREQUENCY_HOT
:
365 case NODE_FREQUENCY_NORMAL
:
366 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
367 fprintf (dump_file
, " Called by %s that is normal or hot\n",
368 edge
->caller
->name ());
369 d
->maybe_unlikely_executed
= false;
370 d
->maybe_executed_once
= false;
377 /* Return ture if NODE contains hot calls. */
380 contains_hot_call_p (struct cgraph_node
*node
)
382 struct cgraph_edge
*e
;
383 for (e
= node
->callees
; e
; e
= e
->next_callee
)
384 if (cgraph_maybe_hot_edge_p (e
))
386 else if (!e
->inline_failed
387 && contains_hot_call_p (e
->callee
))
389 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
390 if (cgraph_maybe_hot_edge_p (e
))
395 /* See if the frequency of NODE can be updated based on frequencies of its
398 ipa_propagate_frequency (struct cgraph_node
*node
)
400 struct ipa_propagate_frequency_data d
= {true, true, true, true};
401 bool changed
= false;
403 /* We can not propagate anything useful about externally visible functions
404 nor about virtuals. */
405 if (!node
->local
.local
407 || (flag_devirtualize
&& DECL_VIRTUAL_P (node
->decl
)))
409 gcc_assert (node
->analyzed
);
410 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
411 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
413 cgraph_for_node_and_aliases (node
, ipa_propagate_frequency_1
, &d
, true);
415 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
416 && !node
->only_called_at_startup
)
418 node
->only_called_at_startup
= true;
420 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
424 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
425 && !node
->only_called_at_exit
)
427 node
->only_called_at_exit
= true;
429 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
434 /* With profile we can decide on hot/normal based on count. */
438 if (node
->count
>= get_hot_bb_threshold ())
441 hot
|= contains_hot_call_p (node
);
444 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
447 fprintf (dump_file
, "Node %s promoted to hot.\n",
449 node
->frequency
= NODE_FREQUENCY_HOT
;
454 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
457 fprintf (dump_file
, "Node %s reduced to normal.\n",
459 node
->frequency
= NODE_FREQUENCY_NORMAL
;
463 /* These come either from profile or user hints; never update them. */
464 if (node
->frequency
== NODE_FREQUENCY_HOT
465 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
467 if (d
.maybe_unlikely_executed
)
469 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
471 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
475 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
477 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
479 fprintf (dump_file
, "Node %s promoted to executed once.\n",
486 /* Simple ipa profile pass propagating frequencies across the callgraph. */
491 struct cgraph_node
**order
;
492 struct cgraph_edge
*e
;
494 bool something_changed
= false;
496 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
497 struct cgraph_node
*n
,*n2
;
498 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
499 bool node_map_initialized
= false;
502 dump_histogram (dump_file
, histogram
);
503 for (i
= 0; i
< (int)histogram
.length (); i
++)
505 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
506 overall_size
+= histogram
[i
]->size
;
512 gcc_assert (overall_size
);
515 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
517 fprintf (dump_file
, "Overall time: "HOST_WIDEST_INT_PRINT_DEC
"\n",
518 (HOST_WIDEST_INT
)overall_time
);
519 min
= get_hot_bb_threshold ();
520 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
523 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
524 cumulated_size
+= histogram
[i
]->size
;
526 fprintf (dump_file
, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
527 " Time:%3.2f%% Size:%3.2f%%\n",
528 (HOST_WIDEST_INT
)min
,
529 cumulated_time
* 100.0 / overall_time
,
530 cumulated_size
* 100.0 / overall_size
);
532 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
534 for (i
= 0; cumulated
< cutoff
; i
++)
536 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
537 threshold
= histogram
[i
]->count
;
543 gcov_type cumulated_time
= 0, cumulated_size
= 0;
546 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
549 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
550 cumulated_size
+= histogram
[i
]->size
;
552 fprintf (dump_file
, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
553 " Time:%3.2f%% Size:%3.2f%%\n",
554 (HOST_WIDEST_INT
)threshold
,
555 cumulated_time
* 100.0 / overall_time
,
556 cumulated_size
* 100.0 / overall_size
);
558 if (threshold
> get_hot_bb_threshold ()
562 fprintf (dump_file
, "Threshold updated.\n");
563 set_hot_bb_threshold (threshold
);
566 histogram
.release ();
567 free_alloc_pool (histogram_pool
);
569 /* Produce speculative calls: we saved common traget from porfiling into
570 e->common_target_id. Now, at link time, we can look up corresponding
571 function node and produce speculative call. */
573 FOR_EACH_DEFINED_FUNCTION (n
)
577 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
581 if (e
->indirect_info
->common_target_id
)
583 if (!node_map_initialized
)
584 init_node_map (false);
585 node_map_initialized
= true;
587 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
592 fprintf (dump_file
, "Indirect call -> direct call from"
593 " other module %s/%i => %s/%i, prob %3.2f\n",
594 xstrdup (n
->name ()), n
->order
,
595 xstrdup (n2
->name ()), n2
->order
,
596 e
->indirect_info
->common_target_probability
597 / (float)REG_BR_PROB_BASE
);
599 if (e
->indirect_info
->common_target_probability
600 < REG_BR_PROB_BASE
/ 2)
605 "Not speculating: probability is too low.\n");
607 else if (!cgraph_maybe_hot_edge_p (e
))
612 "Not speculating: call is cold.\n");
614 else if (cgraph_function_body_availability (n2
)
615 <= AVAIL_OVERWRITABLE
616 && symtab_can_be_discarded (n2
))
621 "Not speculating: target is overwritable "
622 "and can be discarded.\n");
626 /* Target may be overwritable, but profile says that
627 control flow goes to this particular implementation
628 of N2. Speculate on the local alias to allow inlining.
630 if (!symtab_can_be_discarded (n2
))
633 alias
= cgraph (symtab_nonoverwritable_alias
639 cgraph_turn_edge_to_speculative
641 apply_scale (e
->count
,
642 e
->indirect_info
->common_target_probability
),
643 apply_scale (e
->frequency
,
644 e
->indirect_info
->common_target_probability
));
651 fprintf (dump_file
, "Function with profile-id %i not found.\n",
652 e
->indirect_info
->common_target_id
);
658 inline_update_overall_summary (n
);
660 if (node_map_initialized
)
662 if (dump_file
&& nindirect
)
664 "%i indirect calls trained.\n"
665 "%i (%3.2f%%) have common target.\n"
666 "%i (%3.2f%%) targets was not found.\n"
667 "%i (%3.2f%%) speculations seems useless.\n"
668 "%i (%3.2f%%) speculations produced.\n",
670 ncommon
, ncommon
* 100.0 / nindirect
,
671 nunknown
, nunknown
* 100.0 / nindirect
,
672 nuseless
, nuseless
* 100.0 / nindirect
,
673 nconverted
, nconverted
* 100.0 / nindirect
);
675 order
= XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
676 order_pos
= ipa_reverse_postorder (order
);
677 for (i
= order_pos
- 1; i
>= 0; i
--)
679 if (order
[i
]->local
.local
&& ipa_propagate_frequency (order
[i
]))
681 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
682 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
684 something_changed
= true;
685 e
->callee
->aux
= (void *)1;
688 order
[i
]->aux
= NULL
;
691 while (something_changed
)
693 something_changed
= false;
694 for (i
= order_pos
- 1; i
>= 0; i
--)
696 if (order
[i
]->aux
&& ipa_propagate_frequency (order
[i
]))
698 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
699 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
701 something_changed
= true;
702 e
->callee
->aux
= (void *)1;
705 order
[i
]->aux
= NULL
;
713 gate_ipa_profile (void)
715 return flag_ipa_profile
;
720 const pass_data pass_data_ipa_profile
=
723 "profile_estimate", /* name */
724 OPTGROUP_NONE
, /* optinfo_flags */
726 true, /* has_execute */
727 TV_IPA_PROFILE
, /* tv_id */
728 0, /* properties_required */
729 0, /* properties_provided */
730 0, /* properties_destroyed */
731 0, /* todo_flags_start */
732 0, /* todo_flags_finish */
735 class pass_ipa_profile
: public ipa_opt_pass_d
738 pass_ipa_profile (gcc::context
*ctxt
)
739 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
740 ipa_profile_generate_summary
, /* generate_summary */
741 ipa_profile_write_summary
, /* write_summary */
742 ipa_profile_read_summary
, /* read_summary */
743 NULL
, /* write_optimization_summary */
744 NULL
, /* read_optimization_summary */
745 NULL
, /* stmt_fixup */
746 0, /* function_transform_todo_flags_start */
747 NULL
, /* function_transform */
748 NULL
) /* variable_transform */
751 /* opt_pass methods: */
752 bool gate () { return gate_ipa_profile (); }
753 unsigned int execute () { return ipa_profile (); }
755 }; // class pass_ipa_profile
760 make_pass_ipa_profile (gcc::context
*ctxt
)
762 return new pass_ipa_profile (ctxt
);