1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complette only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
53 #include "tree-pass.h"
55 #include "gimple-iterator.h"
59 #include "tree-iterator.h"
60 #include "ipa-utils.h"
61 #include "hash-table.h"
64 #include "value-prof.h"
65 #include "alloc-pool.h"
66 #include "tree-inline.h"
67 #include "lto-streamer.h"
68 #include "data-streamer.h"
69 #include "ipa-inline.h"
71 /* Entry in the histogram. */
73 struct histogram_entry
80 /* Histogram of profile values.
81 The histogram is represented as an ordered vector of entries allocated via
82 histogram_pool. During construction a separate hashtable is kept to lookup
85 vec
<histogram_entry
*> histogram
;
86 static alloc_pool histogram_pool
;
88 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
90 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
92 typedef histogram_entry value_type
;
93 typedef histogram_entry compare_type
;
94 static inline hashval_t
hash (const value_type
*);
95 static inline int equal (const value_type
*, const compare_type
*);
99 histogram_hash::hash (const histogram_entry
*val
)
105 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
107 return val
->count
== val2
->count
;
110 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
111 HASHTABLE is the on-side hash kept to avoid duplicates. */
114 account_time_size (hash_table
<histogram_hash
> hashtable
,
115 vec
<histogram_entry
*> &histogram
,
116 gcov_type count
, int time
, int size
)
118 histogram_entry key
= {count
, 0, 0};
119 histogram_entry
**val
= hashtable
.find_slot (&key
, INSERT
);
123 *val
= (histogram_entry
*) pool_alloc (histogram_pool
);
125 histogram
.safe_push (*val
);
127 (*val
)->time
+= time
;
128 (*val
)->size
+= size
;
132 cmp_counts (const void *v1
, const void *v2
)
134 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
135 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
136 if (h1
->count
< h2
->count
)
138 if (h1
->count
> h2
->count
)
143 /* Dump HISTOGRAM to FILE. */
146 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
149 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
151 fprintf (dump_file
, "Histogram:\n");
152 for (i
= 0; i
< histogram
.length (); i
++)
154 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
155 overall_size
+= histogram
[i
]->size
;
161 for (i
= 0; i
< histogram
.length (); i
++)
163 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
164 cumulated_size
+= histogram
[i
]->size
;
165 fprintf (file
, " "HOST_WIDEST_INT_PRINT_DEC
": time:%i (%2.2f) size:%i (%2.2f)\n",
166 (HOST_WIDEST_INT
) histogram
[i
]->count
,
168 cumulated_time
* 100.0 / overall_time
,
170 cumulated_size
* 100.0 / overall_size
);
174 /* Collect histogram from CFG profiles. */
177 ipa_profile_generate_summary (void)
179 struct cgraph_node
*node
;
180 gimple_stmt_iterator gsi
;
181 hash_table
<histogram_hash
> hashtable
;
184 hashtable
.create (10);
185 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
188 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
189 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
193 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
195 gimple stmt
= gsi_stmt (gsi
);
196 if (gimple_code (stmt
) == GIMPLE_CALL
197 && !gimple_call_fndecl (stmt
))
200 h
= gimple_histogram_value_of_type
201 (DECL_STRUCT_FUNCTION (node
->decl
),
202 stmt
, HIST_TYPE_INDIR_CALL
);
203 /* No need to do sanity check: gimple_ic_transform already
204 takes away bad histograms. */
207 /* counter 0 is target, counter 1 is number of execution we called target,
208 counter 2 is total number of executions. */
209 if (h
->hvalue
.counters
[2])
211 struct cgraph_edge
* e
= cgraph_edge (node
, stmt
);
212 e
->indirect_info
->common_target_id
213 = h
->hvalue
.counters
[0];
214 e
->indirect_info
->common_target_probability
215 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
216 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
219 fprintf (dump_file
, "Probability capped to 1\n");
220 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
223 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
227 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
228 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
230 account_time_size (hashtable
, histogram
, bb
->count
, time
, size
);
232 hashtable
.dispose ();
233 histogram
.qsort (cmp_counts
);
236 /* Serialize the ipa info for lto. */
239 ipa_profile_write_summary (void)
241 struct lto_simple_output_block
*ob
242 = lto_create_simple_output_block (LTO_section_ipa_profile
);
245 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
246 for (i
= 0; i
< histogram
.length (); i
++)
248 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
249 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
250 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
252 lto_destroy_simple_output_block (ob
);
255 /* Deserialize the ipa info for lto. */
258 ipa_profile_read_summary (void)
260 struct lto_file_decl_data
** file_data_vec
261 = lto_get_file_decl_data ();
262 struct lto_file_decl_data
* file_data
;
263 hash_table
<histogram_hash
> hashtable
;
266 hashtable
.create (10);
267 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
270 while ((file_data
= file_data_vec
[j
++]))
274 struct lto_input_block
*ib
275 = lto_create_simple_input_block (file_data
,
276 LTO_section_ipa_profile
,
280 unsigned int num
= streamer_read_uhwi (ib
);
282 for (n
= 0; n
< num
; n
++)
284 gcov_type count
= streamer_read_gcov_count (ib
);
285 int time
= streamer_read_uhwi (ib
);
286 int size
= streamer_read_uhwi (ib
);
287 account_time_size (hashtable
, histogram
,
290 lto_destroy_simple_input_block (file_data
,
291 LTO_section_ipa_profile
,
295 hashtable
.dispose ();
296 histogram
.qsort (cmp_counts
);
299 /* Data used by ipa_propagate_frequency. */
301 struct ipa_propagate_frequency_data
303 bool maybe_unlikely_executed
;
304 bool maybe_executed_once
;
305 bool only_called_at_startup
;
306 bool only_called_at_exit
;
309 /* Worker for ipa_propagate_frequency_1. */
312 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
314 struct ipa_propagate_frequency_data
*d
;
315 struct cgraph_edge
*edge
;
317 d
= (struct ipa_propagate_frequency_data
*)data
;
318 for (edge
= node
->callers
;
319 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
320 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
321 edge
= edge
->next_caller
)
323 if (edge
->caller
!= node
)
325 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
326 /* It makes sense to put main() together with the static constructors.
327 It will be executed for sure, but rest of functions called from
328 main are definitely not at startup only. */
329 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
330 d
->only_called_at_startup
= 0;
331 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
334 /* When profile feedback is available, do not try to propagate too hard;
335 counts are already good guide on function frequencies and roundoff
336 errors can make us to push function into unlikely section even when
337 it is executed by the train run. Transfer the function only if all
338 callers are unlikely executed. */
339 if (profile_info
&& flag_branch_probabilities
340 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
341 || (edge
->caller
->global
.inlined_to
342 && edge
->caller
->global
.inlined_to
->frequency
343 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
344 d
->maybe_unlikely_executed
= false;
345 if (!edge
->frequency
)
347 switch (edge
->caller
->frequency
)
349 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
351 case NODE_FREQUENCY_EXECUTED_ONCE
:
352 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
353 fprintf (dump_file
, " Called by %s that is executed once\n",
354 edge
->caller
->name ());
355 d
->maybe_unlikely_executed
= false;
356 if (inline_edge_summary (edge
)->loop_depth
)
358 d
->maybe_executed_once
= false;
359 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
360 fprintf (dump_file
, " Called in loop\n");
363 case NODE_FREQUENCY_HOT
:
364 case NODE_FREQUENCY_NORMAL
:
365 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
366 fprintf (dump_file
, " Called by %s that is normal or hot\n",
367 edge
->caller
->name ());
368 d
->maybe_unlikely_executed
= false;
369 d
->maybe_executed_once
= false;
376 /* Return ture if NODE contains hot calls. */
379 contains_hot_call_p (struct cgraph_node
*node
)
381 struct cgraph_edge
*e
;
382 for (e
= node
->callees
; e
; e
= e
->next_callee
)
383 if (cgraph_maybe_hot_edge_p (e
))
385 else if (!e
->inline_failed
386 && contains_hot_call_p (e
->callee
))
388 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
389 if (cgraph_maybe_hot_edge_p (e
))
394 /* See if the frequency of NODE can be updated based on frequencies of its
397 ipa_propagate_frequency (struct cgraph_node
*node
)
399 struct ipa_propagate_frequency_data d
= {true, true, true, true};
400 bool changed
= false;
402 /* We can not propagate anything useful about externally visible functions
403 nor about virtuals. */
404 if (!node
->local
.local
406 || (flag_devirtualize
&& DECL_VIRTUAL_P (node
->decl
)))
408 gcc_assert (node
->analyzed
);
409 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
410 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
412 cgraph_for_node_and_aliases (node
, ipa_propagate_frequency_1
, &d
, true);
414 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
415 && !node
->only_called_at_startup
)
417 node
->only_called_at_startup
= true;
419 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
423 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
424 && !node
->only_called_at_exit
)
426 node
->only_called_at_exit
= true;
428 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
433 /* With profile we can decide on hot/normal based on count. */
437 if (node
->count
>= get_hot_bb_threshold ())
440 hot
|= contains_hot_call_p (node
);
443 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
446 fprintf (dump_file
, "Node %s promoted to hot.\n",
448 node
->frequency
= NODE_FREQUENCY_HOT
;
453 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
456 fprintf (dump_file
, "Node %s reduced to normal.\n",
458 node
->frequency
= NODE_FREQUENCY_NORMAL
;
462 /* These come either from profile or user hints; never update them. */
463 if (node
->frequency
== NODE_FREQUENCY_HOT
464 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
466 if (d
.maybe_unlikely_executed
)
468 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
470 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
474 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
476 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
478 fprintf (dump_file
, "Node %s promoted to executed once.\n",
485 /* Simple ipa profile pass propagating frequencies across the callgraph. */
490 struct cgraph_node
**order
;
491 struct cgraph_edge
*e
;
493 bool something_changed
= false;
495 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
496 struct cgraph_node
*n
,*n2
;
497 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
498 bool node_map_initialized
= false;
501 dump_histogram (dump_file
, histogram
);
502 for (i
= 0; i
< (int)histogram
.length (); i
++)
504 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
505 overall_size
+= histogram
[i
]->size
;
511 gcc_assert (overall_size
);
514 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
516 fprintf (dump_file
, "Overall time: "HOST_WIDEST_INT_PRINT_DEC
"\n",
517 (HOST_WIDEST_INT
)overall_time
);
518 min
= get_hot_bb_threshold ();
519 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
522 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
523 cumulated_size
+= histogram
[i
]->size
;
525 fprintf (dump_file
, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
526 " Time:%3.2f%% Size:%3.2f%%\n",
527 (HOST_WIDEST_INT
)min
,
528 cumulated_time
* 100.0 / overall_time
,
529 cumulated_size
* 100.0 / overall_size
);
531 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
533 for (i
= 0; cumulated
< cutoff
; i
++)
535 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
536 threshold
= histogram
[i
]->count
;
542 gcov_type cumulated_time
= 0, cumulated_size
= 0;
545 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
548 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
549 cumulated_size
+= histogram
[i
]->size
;
551 fprintf (dump_file
, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
552 " Time:%3.2f%% Size:%3.2f%%\n",
553 (HOST_WIDEST_INT
)threshold
,
554 cumulated_time
* 100.0 / overall_time
,
555 cumulated_size
* 100.0 / overall_size
);
557 if (threshold
> get_hot_bb_threshold ()
561 fprintf (dump_file
, "Threshold updated.\n");
562 set_hot_bb_threshold (threshold
);
565 histogram
.release ();
566 free_alloc_pool (histogram_pool
);
568 /* Produce speculative calls: we saved common traget from porfiling into
569 e->common_target_id. Now, at link time, we can look up corresponding
570 function node and produce speculative call. */
572 FOR_EACH_DEFINED_FUNCTION (n
)
576 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
580 if (e
->indirect_info
->common_target_id
)
582 if (!node_map_initialized
)
583 init_node_map (false);
584 node_map_initialized
= true;
586 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
591 fprintf (dump_file
, "Indirect call -> direct call from"
592 " other module %s/%i => %s/%i, prob %3.2f\n",
593 xstrdup (n
->name ()), n
->order
,
594 xstrdup (n2
->name ()), n2
->order
,
595 e
->indirect_info
->common_target_probability
596 / (float)REG_BR_PROB_BASE
);
598 if (e
->indirect_info
->common_target_probability
599 < REG_BR_PROB_BASE
/ 2)
604 "Not speculating: probability is too low.\n");
606 else if (!cgraph_maybe_hot_edge_p (e
))
611 "Not speculating: call is cold.\n");
613 else if (cgraph_function_body_availability (n2
)
614 <= AVAIL_OVERWRITABLE
615 && symtab_can_be_discarded (n2
))
620 "Not speculating: target is overwritable "
621 "and can be discarded.\n");
625 /* Target may be overwritable, but profile says that
626 control flow goes to this particular implementation
627 of N2. Speculate on the local alias to allow inlining.
629 if (!symtab_can_be_discarded (n2
))
632 alias
= cgraph (symtab_nonoverwritable_alias
638 cgraph_turn_edge_to_speculative
640 apply_scale (e
->count
,
641 e
->indirect_info
->common_target_probability
),
642 apply_scale (e
->frequency
,
643 e
->indirect_info
->common_target_probability
));
650 fprintf (dump_file
, "Function with profile-id %i not found.\n",
651 e
->indirect_info
->common_target_id
);
657 inline_update_overall_summary (n
);
659 if (node_map_initialized
)
661 if (dump_file
&& nindirect
)
663 "%i indirect calls trained.\n"
664 "%i (%3.2f%%) have common target.\n"
665 "%i (%3.2f%%) targets was not found.\n"
666 "%i (%3.2f%%) speculations seems useless.\n"
667 "%i (%3.2f%%) speculations produced.\n",
669 ncommon
, ncommon
* 100.0 / nindirect
,
670 nunknown
, nunknown
* 100.0 / nindirect
,
671 nuseless
, nuseless
* 100.0 / nindirect
,
672 nconverted
, nconverted
* 100.0 / nindirect
);
674 order
= XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
675 order_pos
= ipa_reverse_postorder (order
);
676 for (i
= order_pos
- 1; i
>= 0; i
--)
678 if (order
[i
]->local
.local
&& ipa_propagate_frequency (order
[i
]))
680 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
681 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
683 something_changed
= true;
684 e
->callee
->aux
= (void *)1;
687 order
[i
]->aux
= NULL
;
690 while (something_changed
)
692 something_changed
= false;
693 for (i
= order_pos
- 1; i
>= 0; i
--)
695 if (order
[i
]->aux
&& ipa_propagate_frequency (order
[i
]))
697 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
698 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
700 something_changed
= true;
701 e
->callee
->aux
= (void *)1;
704 order
[i
]->aux
= NULL
;
712 gate_ipa_profile (void)
714 return flag_ipa_profile
;
719 const pass_data pass_data_ipa_profile
=
722 "profile_estimate", /* name */
723 OPTGROUP_NONE
, /* optinfo_flags */
725 true, /* has_execute */
726 TV_IPA_PROFILE
, /* tv_id */
727 0, /* properties_required */
728 0, /* properties_provided */
729 0, /* properties_destroyed */
730 0, /* todo_flags_start */
731 0, /* todo_flags_finish */
734 class pass_ipa_profile
: public ipa_opt_pass_d
737 pass_ipa_profile (gcc::context
*ctxt
)
738 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
739 ipa_profile_generate_summary
, /* generate_summary */
740 ipa_profile_write_summary
, /* write_summary */
741 ipa_profile_read_summary
, /* read_summary */
742 NULL
, /* write_optimization_summary */
743 NULL
, /* read_optimization_summary */
744 NULL
, /* stmt_fixup */
745 0, /* function_transform_todo_flags_start */
746 NULL
, /* function_transform */
747 NULL
) /* variable_transform */
750 /* opt_pass methods: */
751 bool gate () { return gate_ipa_profile (); }
752 unsigned int execute () { return ipa_profile (); }
754 }; // class pass_ipa_profile
759 make_pass_ipa_profile (gcc::context
*ctxt
)
761 return new pass_ipa_profile (ctxt
);