1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complette only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
53 #include "tree-pass.h"
58 #include "tree-iterator.h"
59 #include "ipa-utils.h"
60 #include "hash-table.h"
63 #include "value-prof.h"
64 #include "alloc-pool.h"
65 #include "tree-inline.h"
66 #include "lto-streamer.h"
67 #include "data-streamer.h"
68 #include "ipa-inline.h"
70 /* Entry in the histogram. */
72 struct histogram_entry
79 /* Histogram of profile values.
80 The histogram is represented as an ordered vector of entries allocated via
81 histogram_pool. During construction a separate hashtable is kept to lookup
84 vec
<histogram_entry
*> histogram
;
85 static alloc_pool histogram_pool
;
87 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
89 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
91 typedef histogram_entry value_type
;
92 typedef histogram_entry compare_type
;
93 static inline hashval_t
hash (const value_type
*);
94 static inline int equal (const value_type
*, const compare_type
*);
98 histogram_hash::hash (const histogram_entry
*val
)
104 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
106 return val
->count
== val2
->count
;
109 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
110 HASHTABLE is the on-side hash kept to avoid duplicates. */
113 account_time_size (hash_table
<histogram_hash
> hashtable
,
114 vec
<histogram_entry
*> &histogram
,
115 gcov_type count
, int time
, int size
)
117 histogram_entry key
= {count
, 0, 0};
118 histogram_entry
**val
= hashtable
.find_slot (&key
, INSERT
);
122 *val
= (histogram_entry
*) pool_alloc (histogram_pool
);
124 histogram
.safe_push (*val
);
126 (*val
)->time
+= time
;
127 (*val
)->size
+= size
;
131 cmp_counts (const void *v1
, const void *v2
)
133 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
134 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
135 if (h1
->count
< h2
->count
)
137 if (h1
->count
> h2
->count
)
142 /* Dump HISTOGRAM to FILE. */
145 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
148 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
150 fprintf (dump_file
, "Histogram:\n");
151 for (i
= 0; i
< histogram
.length (); i
++)
153 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
154 overall_size
+= histogram
[i
]->size
;
160 for (i
= 0; i
< histogram
.length (); i
++)
162 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
163 cumulated_size
+= histogram
[i
]->size
;
164 fprintf (file
, " "HOST_WIDEST_INT_PRINT_DEC
": time:%i (%2.2f) size:%i (%2.2f)\n",
165 (HOST_WIDEST_INT
) histogram
[i
]->count
,
167 cumulated_time
* 100.0 / overall_time
,
169 cumulated_size
* 100.0 / overall_size
);
173 /* Collect histogram from CFG profiles. */
176 ipa_profile_generate_summary (void)
178 struct cgraph_node
*node
;
179 gimple_stmt_iterator gsi
;
180 hash_table
<histogram_hash
> hashtable
;
183 hashtable
.create (10);
184 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
187 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
188 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
192 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
194 gimple stmt
= gsi_stmt (gsi
);
195 if (gimple_code (stmt
) == GIMPLE_CALL
196 && !gimple_call_fndecl (stmt
))
199 h
= gimple_histogram_value_of_type
200 (DECL_STRUCT_FUNCTION (node
->decl
),
201 stmt
, HIST_TYPE_INDIR_CALL
);
202 /* No need to do sanity check: gimple_ic_transform already
203 takes away bad histograms. */
206 /* counter 0 is target, counter 1 is number of execution we called target,
207 counter 2 is total number of executions. */
208 if (h
->hvalue
.counters
[2])
210 struct cgraph_edge
* e
= cgraph_edge (node
, stmt
);
211 e
->indirect_info
->common_target_id
212 = h
->hvalue
.counters
[0];
213 e
->indirect_info
->common_target_probability
214 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
215 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
218 fprintf (dump_file
, "Probability capped to 1\n");
219 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
222 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
226 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
227 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
229 account_time_size (hashtable
, histogram
, bb
->count
, time
, size
);
231 hashtable
.dispose ();
232 histogram
.qsort (cmp_counts
);
235 /* Serialize the ipa info for lto. */
238 ipa_profile_write_summary (void)
240 struct lto_simple_output_block
*ob
241 = lto_create_simple_output_block (LTO_section_ipa_profile
);
244 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
245 for (i
= 0; i
< histogram
.length (); i
++)
247 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
248 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
249 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
251 lto_destroy_simple_output_block (ob
);
254 /* Deserialize the ipa info for lto. */
257 ipa_profile_read_summary (void)
259 struct lto_file_decl_data
** file_data_vec
260 = lto_get_file_decl_data ();
261 struct lto_file_decl_data
* file_data
;
262 hash_table
<histogram_hash
> hashtable
;
265 hashtable
.create (10);
266 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
269 while ((file_data
= file_data_vec
[j
++]))
273 struct lto_input_block
*ib
274 = lto_create_simple_input_block (file_data
,
275 LTO_section_ipa_profile
,
279 unsigned int num
= streamer_read_uhwi (ib
);
281 for (n
= 0; n
< num
; n
++)
283 gcov_type count
= streamer_read_gcov_count (ib
);
284 int time
= streamer_read_uhwi (ib
);
285 int size
= streamer_read_uhwi (ib
);
286 account_time_size (hashtable
, histogram
,
289 lto_destroy_simple_input_block (file_data
,
290 LTO_section_ipa_profile
,
294 hashtable
.dispose ();
295 histogram
.qsort (cmp_counts
);
298 /* Data used by ipa_propagate_frequency. */
300 struct ipa_propagate_frequency_data
302 bool maybe_unlikely_executed
;
303 bool maybe_executed_once
;
304 bool only_called_at_startup
;
305 bool only_called_at_exit
;
308 /* Worker for ipa_propagate_frequency_1. */
311 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
313 struct ipa_propagate_frequency_data
*d
;
314 struct cgraph_edge
*edge
;
316 d
= (struct ipa_propagate_frequency_data
*)data
;
317 for (edge
= node
->callers
;
318 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
319 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
320 edge
= edge
->next_caller
)
322 if (edge
->caller
!= node
)
324 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
325 /* It makes sense to put main() together with the static constructors.
326 It will be executed for sure, but rest of functions called from
327 main are definitely not at startup only. */
328 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
329 d
->only_called_at_startup
= 0;
330 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
333 /* When profile feedback is available, do not try to propagate too hard;
334 counts are already good guide on function frequencies and roundoff
335 errors can make us to push function into unlikely section even when
336 it is executed by the train run. Transfer the function only if all
337 callers are unlikely executed. */
338 if (profile_info
&& flag_branch_probabilities
339 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
340 || (edge
->caller
->global
.inlined_to
341 && edge
->caller
->global
.inlined_to
->frequency
342 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
343 d
->maybe_unlikely_executed
= false;
344 if (!edge
->frequency
)
346 switch (edge
->caller
->frequency
)
348 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
350 case NODE_FREQUENCY_EXECUTED_ONCE
:
351 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
352 fprintf (dump_file
, " Called by %s that is executed once\n",
353 cgraph_node_name (edge
->caller
));
354 d
->maybe_unlikely_executed
= false;
355 if (inline_edge_summary (edge
)->loop_depth
)
357 d
->maybe_executed_once
= false;
358 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
359 fprintf (dump_file
, " Called in loop\n");
362 case NODE_FREQUENCY_HOT
:
363 case NODE_FREQUENCY_NORMAL
:
364 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
365 fprintf (dump_file
, " Called by %s that is normal or hot\n",
366 cgraph_node_name (edge
->caller
));
367 d
->maybe_unlikely_executed
= false;
368 d
->maybe_executed_once
= false;
375 /* Return ture if NODE contains hot calls. */
378 contains_hot_call_p (struct cgraph_node
*node
)
380 struct cgraph_edge
*e
;
381 for (e
= node
->callees
; e
; e
= e
->next_callee
)
382 if (cgraph_maybe_hot_edge_p (e
))
384 else if (!e
->inline_failed
385 && contains_hot_call_p (e
->callee
))
387 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
388 if (cgraph_maybe_hot_edge_p (e
))
393 /* See if the frequency of NODE can be updated based on frequencies of its
396 ipa_propagate_frequency (struct cgraph_node
*node
)
398 struct ipa_propagate_frequency_data d
= {true, true, true, true};
399 bool changed
= false;
401 /* We can not propagate anything useful about externally visible functions
402 nor about virtuals. */
403 if (!node
->local
.local
405 || (flag_devirtualize
&& DECL_VIRTUAL_P (node
->decl
)))
407 gcc_assert (node
->analyzed
);
408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
409 fprintf (dump_file
, "Processing frequency %s\n", cgraph_node_name (node
));
411 cgraph_for_node_and_aliases (node
, ipa_propagate_frequency_1
, &d
, true);
413 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
414 && !node
->only_called_at_startup
)
416 node
->only_called_at_startup
= true;
418 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
419 cgraph_node_name (node
));
422 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
423 && !node
->only_called_at_exit
)
425 node
->only_called_at_exit
= true;
427 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
428 cgraph_node_name (node
));
432 /* With profile we can decide on hot/normal based on count. */
436 if (node
->count
>= get_hot_bb_threshold ())
439 hot
|= contains_hot_call_p (node
);
442 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
445 fprintf (dump_file
, "Node %s promoted to hot.\n",
446 cgraph_node_name (node
));
447 node
->frequency
= NODE_FREQUENCY_HOT
;
452 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
455 fprintf (dump_file
, "Node %s reduced to normal.\n",
456 cgraph_node_name (node
));
457 node
->frequency
= NODE_FREQUENCY_NORMAL
;
461 /* These come either from profile or user hints; never update them. */
462 if (node
->frequency
== NODE_FREQUENCY_HOT
463 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
465 if (d
.maybe_unlikely_executed
)
467 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
469 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
470 cgraph_node_name (node
));
473 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
475 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
477 fprintf (dump_file
, "Node %s promoted to executed once.\n",
478 cgraph_node_name (node
));
484 /* Simple ipa profile pass propagating frequencies across the callgraph. */
489 struct cgraph_node
**order
;
490 struct cgraph_edge
*e
;
492 bool something_changed
= false;
494 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
495 struct cgraph_node
*n
,*n2
;
496 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
497 bool node_map_initialized
= false;
500 dump_histogram (dump_file
, histogram
);
501 for (i
= 0; i
< (int)histogram
.length (); i
++)
503 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
504 overall_size
+= histogram
[i
]->size
;
510 gcc_assert (overall_size
);
513 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
515 fprintf (dump_file
, "Overall time: "HOST_WIDEST_INT_PRINT_DEC
"\n",
516 (HOST_WIDEST_INT
)overall_time
);
517 min
= get_hot_bb_threshold ();
518 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
521 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
522 cumulated_size
+= histogram
[i
]->size
;
524 fprintf (dump_file
, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
525 " Time:%3.2f%% Size:%3.2f%%\n",
526 (HOST_WIDEST_INT
)min
,
527 cumulated_time
* 100.0 / overall_time
,
528 cumulated_size
* 100.0 / overall_size
);
530 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
532 for (i
= 0; cumulated
< cutoff
; i
++)
534 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
535 threshold
= histogram
[i
]->count
;
541 gcov_type cumulated_time
= 0, cumulated_size
= 0;
544 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
547 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
548 cumulated_size
+= histogram
[i
]->size
;
550 fprintf (dump_file
, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
551 " Time:%3.2f%% Size:%3.2f%%\n",
552 (HOST_WIDEST_INT
)threshold
,
553 cumulated_time
* 100.0 / overall_time
,
554 cumulated_size
* 100.0 / overall_size
);
556 if (threshold
> get_hot_bb_threshold ()
560 fprintf (dump_file
, "Threshold updated.\n");
561 set_hot_bb_threshold (threshold
);
564 histogram
.release ();
565 free_alloc_pool (histogram_pool
);
567 /* Produce speculative calls: we saved common traget from porfiling into
568 e->common_target_id. Now, at link time, we can look up corresponding
569 function node and produce speculative call. */
571 FOR_EACH_DEFINED_FUNCTION (n
)
575 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
579 if (e
->indirect_info
->common_target_id
)
581 if (!node_map_initialized
)
582 init_node_map (false);
583 node_map_initialized
= true;
585 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
590 fprintf (dump_file
, "Indirect call -> direct call from"
591 " other module %s/%i => %s/%i, prob %3.2f\n",
592 xstrdup (cgraph_node_name (n
)), n
->order
,
593 xstrdup (cgraph_node_name (n2
)), n2
->order
,
594 e
->indirect_info
->common_target_probability
595 / (float)REG_BR_PROB_BASE
);
597 if (e
->indirect_info
->common_target_probability
598 < REG_BR_PROB_BASE
/ 2)
603 "Not speculating: probability is too low.\n");
605 else if (!cgraph_maybe_hot_edge_p (e
))
610 "Not speculating: call is cold.\n");
612 else if (cgraph_function_body_availability (n2
)
613 <= AVAIL_OVERWRITABLE
614 && symtab_can_be_discarded (n2
))
619 "Not speculating: target is overwritable "
620 "and can be discarded.\n");
624 /* Target may be overwritable, but profile says that
625 control flow goes to this particular implementation
626 of N2. Speculate on the local alias to allow inlining.
628 if (!symtab_can_be_discarded (n2
))
631 alias
= cgraph (symtab_nonoverwritable_alias
637 cgraph_turn_edge_to_speculative
639 apply_scale (e
->count
,
640 e
->indirect_info
->common_target_probability
),
641 apply_scale (e
->frequency
,
642 e
->indirect_info
->common_target_probability
));
649 fprintf (dump_file
, "Function with profile-id %i not found.\n",
650 e
->indirect_info
->common_target_id
);
656 inline_update_overall_summary (n
);
658 if (node_map_initialized
)
660 if (dump_file
&& nindirect
)
662 "%i indirect calls trained.\n"
663 "%i (%3.2f%%) have common target.\n"
664 "%i (%3.2f%%) targets was not found.\n"
665 "%i (%3.2f%%) speculations seems useless.\n"
666 "%i (%3.2f%%) speculations produced.\n",
668 ncommon
, ncommon
* 100.0 / nindirect
,
669 nunknown
, nunknown
* 100.0 / nindirect
,
670 nuseless
, nuseless
* 100.0 / nindirect
,
671 nconverted
, nconverted
* 100.0 / nindirect
);
673 order
= XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
674 order_pos
= ipa_reverse_postorder (order
);
675 for (i
= order_pos
- 1; i
>= 0; i
--)
677 if (order
[i
]->local
.local
&& ipa_propagate_frequency (order
[i
]))
679 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
680 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
682 something_changed
= true;
683 e
->callee
->aux
= (void *)1;
686 order
[i
]->aux
= NULL
;
689 while (something_changed
)
691 something_changed
= false;
692 for (i
= order_pos
- 1; i
>= 0; i
--)
694 if (order
[i
]->aux
&& ipa_propagate_frequency (order
[i
]))
696 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
697 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
699 something_changed
= true;
700 e
->callee
->aux
= (void *)1;
703 order
[i
]->aux
= NULL
;
711 gate_ipa_profile (void)
713 return flag_ipa_profile
;
718 const pass_data pass_data_ipa_profile
=
721 "profile_estimate", /* name */
722 OPTGROUP_NONE
, /* optinfo_flags */
724 true, /* has_execute */
725 TV_IPA_PROFILE
, /* tv_id */
726 0, /* properties_required */
727 0, /* properties_provided */
728 0, /* properties_destroyed */
729 0, /* todo_flags_start */
730 0, /* todo_flags_finish */
733 class pass_ipa_profile
: public ipa_opt_pass_d
736 pass_ipa_profile (gcc::context
*ctxt
)
737 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
738 ipa_profile_generate_summary
, /* generate_summary */
739 ipa_profile_write_summary
, /* write_summary */
740 ipa_profile_read_summary
, /* read_summary */
741 NULL
, /* write_optimization_summary */
742 NULL
, /* read_optimization_summary */
743 NULL
, /* stmt_fixup */
744 0, /* function_transform_todo_flags_start */
745 NULL
, /* function_transform */
746 NULL
) /* variable_transform */
749 /* opt_pass methods: */
750 bool gate () { return gate_ipa_profile (); }
751 unsigned int execute () { return ipa_profile (); }
753 }; // class pass_ipa_profile
758 make_pass_ipa_profile (gcc::context
*ctxt
)
760 return new pass_ipa_profile (ctxt
);