1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complette only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
52 #include "tree-pass.h"
57 #include "tree-iterator.h"
58 #include "ipa-utils.h"
59 #include "hash-table.h"
62 #include "value-prof.h"
63 #include "alloc-pool.h"
64 #include "tree-inline.h"
65 #include "lto-streamer.h"
66 #include "data-streamer.h"
67 #include "ipa-inline.h"
69 /* Entry in the histogram. */
71 struct histogram_entry
78 /* Histogram of profile values.
79 The histogram is represented as an ordered vector of entries allocated via
80 histogram_pool. During construction a separate hashtable is kept to lookup
83 vec
<histogram_entry
*> histogram
;
84 static alloc_pool histogram_pool
;
86 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
88 struct histogram_hash
: typed_noop_remove
<histogram_entry
>
90 typedef histogram_entry value_type
;
91 typedef histogram_entry compare_type
;
92 static inline hashval_t
hash (const value_type
*);
93 static inline int equal (const value_type
*, const compare_type
*);
97 histogram_hash::hash (const histogram_entry
*val
)
103 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
105 return val
->count
== val2
->count
;
108 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
109 HASHTABLE is the on-side hash kept to avoid duplicates. */
112 account_time_size (hash_table
<histogram_hash
> hashtable
,
113 vec
<histogram_entry
*> &histogram
,
114 gcov_type count
, int time
, int size
)
116 histogram_entry key
= {count
, 0, 0};
117 histogram_entry
**val
= hashtable
.find_slot (&key
, INSERT
);
121 *val
= (histogram_entry
*) pool_alloc (histogram_pool
);
123 histogram
.safe_push (*val
);
125 (*val
)->time
+= time
;
126 (*val
)->size
+= size
;
130 cmp_counts (const void *v1
, const void *v2
)
132 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
133 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
134 if (h1
->count
< h2
->count
)
136 if (h1
->count
> h2
->count
)
141 /* Dump HISTOGRAM to FILE. */
144 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
147 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
149 fprintf (dump_file
, "Histogram:\n");
150 for (i
= 0; i
< histogram
.length (); i
++)
152 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
153 overall_size
+= histogram
[i
]->size
;
159 for (i
= 0; i
< histogram
.length (); i
++)
161 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
162 cumulated_size
+= histogram
[i
]->size
;
163 fprintf (file
, " "HOST_WIDEST_INT_PRINT_DEC
": time:%i (%2.2f) size:%i (%2.2f)\n",
164 (HOST_WIDEST_INT
) histogram
[i
]->count
,
166 cumulated_time
* 100.0 / overall_time
,
168 cumulated_size
* 100.0 / overall_size
);
172 /* Collect histogram from CFG profiles. */
175 ipa_profile_generate_summary (void)
177 struct cgraph_node
*node
;
178 gimple_stmt_iterator gsi
;
179 hash_table
<histogram_hash
> hashtable
;
182 hashtable
.create (10);
183 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
186 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
187 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->symbol
.decl
))
191 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
193 gimple stmt
= gsi_stmt (gsi
);
194 if (gimple_code (stmt
) == GIMPLE_CALL
195 && !gimple_call_fndecl (stmt
))
198 h
= gimple_histogram_value_of_type
199 (DECL_STRUCT_FUNCTION (node
->symbol
.decl
),
200 stmt
, HIST_TYPE_INDIR_CALL
);
201 /* No need to do sanity check: gimple_ic_transform already
202 takes away bad histograms. */
205 /* counter 0 is target, counter 1 is number of execution we called target,
206 counter 2 is total number of executions. */
207 if (h
->hvalue
.counters
[2])
209 struct cgraph_edge
* e
= cgraph_edge (node
, stmt
);
210 e
->indirect_info
->common_target_id
211 = h
->hvalue
.counters
[0];
212 e
->indirect_info
->common_target_probability
213 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
214 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
217 fprintf (dump_file
, "Probability capped to 1\n");
218 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
221 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->symbol
.decl
),
225 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
226 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
228 account_time_size (hashtable
, histogram
, bb
->count
, time
, size
);
230 hashtable
.dispose ();
231 histogram
.qsort (cmp_counts
);
234 /* Serialize the ipa info for lto. */
237 ipa_profile_write_summary (void)
239 struct lto_simple_output_block
*ob
240 = lto_create_simple_output_block (LTO_section_ipa_profile
);
243 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
244 for (i
= 0; i
< histogram
.length (); i
++)
246 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
247 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
248 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
250 lto_destroy_simple_output_block (ob
);
253 /* Deserialize the ipa info for lto. */
256 ipa_profile_read_summary (void)
258 struct lto_file_decl_data
** file_data_vec
259 = lto_get_file_decl_data ();
260 struct lto_file_decl_data
* file_data
;
261 hash_table
<histogram_hash
> hashtable
;
264 hashtable
.create (10);
265 histogram_pool
= create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry
),
268 while ((file_data
= file_data_vec
[j
++]))
272 struct lto_input_block
*ib
273 = lto_create_simple_input_block (file_data
,
274 LTO_section_ipa_profile
,
278 unsigned int num
= streamer_read_uhwi (ib
);
280 for (n
= 0; n
< num
; n
++)
282 gcov_type count
= streamer_read_gcov_count (ib
);
283 int time
= streamer_read_uhwi (ib
);
284 int size
= streamer_read_uhwi (ib
);
285 account_time_size (hashtable
, histogram
,
288 lto_destroy_simple_input_block (file_data
,
289 LTO_section_ipa_profile
,
293 hashtable
.dispose ();
294 histogram
.qsort (cmp_counts
);
297 /* Data used by ipa_propagate_frequency. */
299 struct ipa_propagate_frequency_data
301 bool maybe_unlikely_executed
;
302 bool maybe_executed_once
;
303 bool only_called_at_startup
;
304 bool only_called_at_exit
;
307 /* Worker for ipa_propagate_frequency_1. */
310 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
312 struct ipa_propagate_frequency_data
*d
;
313 struct cgraph_edge
*edge
;
315 d
= (struct ipa_propagate_frequency_data
*)data
;
316 for (edge
= node
->callers
;
317 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
318 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
319 edge
= edge
->next_caller
)
321 if (edge
->caller
!= node
)
323 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
324 /* It makes sense to put main() together with the static constructors.
325 It will be executed for sure, but rest of functions called from
326 main are definitely not at startup only. */
327 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->symbol
.decl
)))
328 d
->only_called_at_startup
= 0;
329 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
332 /* When profile feedback is available, do not try to propagate too hard;
333 counts are already good guide on function frequencies and roundoff
334 errors can make us to push function into unlikely section even when
335 it is executed by the train run. Transfer the function only if all
336 callers are unlikely executed. */
337 if (profile_info
&& flag_branch_probabilities
338 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
339 || (edge
->caller
->global
.inlined_to
340 && edge
->caller
->global
.inlined_to
->frequency
341 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
342 d
->maybe_unlikely_executed
= false;
343 if (!edge
->frequency
)
345 switch (edge
->caller
->frequency
)
347 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
349 case NODE_FREQUENCY_EXECUTED_ONCE
:
350 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
351 fprintf (dump_file
, " Called by %s that is executed once\n",
352 cgraph_node_name (edge
->caller
));
353 d
->maybe_unlikely_executed
= false;
354 if (inline_edge_summary (edge
)->loop_depth
)
356 d
->maybe_executed_once
= false;
357 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
358 fprintf (dump_file
, " Called in loop\n");
361 case NODE_FREQUENCY_HOT
:
362 case NODE_FREQUENCY_NORMAL
:
363 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
364 fprintf (dump_file
, " Called by %s that is normal or hot\n",
365 cgraph_node_name (edge
->caller
));
366 d
->maybe_unlikely_executed
= false;
367 d
->maybe_executed_once
= false;
374 /* Return ture if NODE contains hot calls. */
377 contains_hot_call_p (struct cgraph_node
*node
)
379 struct cgraph_edge
*e
;
380 for (e
= node
->callees
; e
; e
= e
->next_callee
)
381 if (cgraph_maybe_hot_edge_p (e
))
383 else if (!e
->inline_failed
384 && contains_hot_call_p (e
->callee
))
386 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
387 if (cgraph_maybe_hot_edge_p (e
))
392 /* See if the frequency of NODE can be updated based on frequencies of its
395 ipa_propagate_frequency (struct cgraph_node
*node
)
397 struct ipa_propagate_frequency_data d
= {true, true, true, true};
398 bool changed
= false;
400 /* We can not propagate anything useful about externally visible functions
401 nor about virtuals. */
402 if (!node
->local
.local
403 || node
->symbol
.alias
404 || (flag_devirtualize
&& DECL_VIRTUAL_P (node
->symbol
.decl
)))
406 gcc_assert (node
->symbol
.analyzed
);
407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
408 fprintf (dump_file
, "Processing frequency %s\n", cgraph_node_name (node
));
410 cgraph_for_node_and_aliases (node
, ipa_propagate_frequency_1
, &d
, true);
412 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
413 && !node
->only_called_at_startup
)
415 node
->only_called_at_startup
= true;
417 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
418 cgraph_node_name (node
));
421 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
422 && !node
->only_called_at_exit
)
424 node
->only_called_at_exit
= true;
426 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
427 cgraph_node_name (node
));
431 /* With profile we can decide on hot/normal based on count. */
435 if (node
->count
>= get_hot_bb_threshold ())
438 hot
|= contains_hot_call_p (node
);
441 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
444 fprintf (dump_file
, "Node %s promoted to hot.\n",
445 cgraph_node_name (node
));
446 node
->frequency
= NODE_FREQUENCY_HOT
;
451 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
454 fprintf (dump_file
, "Node %s reduced to normal.\n",
455 cgraph_node_name (node
));
456 node
->frequency
= NODE_FREQUENCY_NORMAL
;
460 /* These come either from profile or user hints; never update them. */
461 if (node
->frequency
== NODE_FREQUENCY_HOT
462 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
464 if (d
.maybe_unlikely_executed
)
466 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
468 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
469 cgraph_node_name (node
));
472 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
474 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
476 fprintf (dump_file
, "Node %s promoted to executed once.\n",
477 cgraph_node_name (node
));
483 /* Simple ipa profile pass propagating frequencies across the callgraph. */
488 struct cgraph_node
**order
;
489 struct cgraph_edge
*e
;
491 bool something_changed
= false;
493 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
494 struct cgraph_node
*n
,*n2
;
495 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
496 bool node_map_initialized
= false;
499 dump_histogram (dump_file
, histogram
);
500 for (i
= 0; i
< (int)histogram
.length (); i
++)
502 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
503 overall_size
+= histogram
[i
]->size
;
509 gcc_assert (overall_size
);
512 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
514 fprintf (dump_file
, "Overall time: "HOST_WIDEST_INT_PRINT_DEC
"\n",
515 (HOST_WIDEST_INT
)overall_time
);
516 min
= get_hot_bb_threshold ();
517 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
520 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
521 cumulated_size
+= histogram
[i
]->size
;
523 fprintf (dump_file
, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
524 " Time:%3.2f%% Size:%3.2f%%\n",
525 (HOST_WIDEST_INT
)min
,
526 cumulated_time
* 100.0 / overall_time
,
527 cumulated_size
* 100.0 / overall_size
);
529 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
531 for (i
= 0; cumulated
< cutoff
; i
++)
533 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
534 threshold
= histogram
[i
]->count
;
540 gcov_type cumulated_time
= 0, cumulated_size
= 0;
543 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
546 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
547 cumulated_size
+= histogram
[i
]->size
;
549 fprintf (dump_file
, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
550 " Time:%3.2f%% Size:%3.2f%%\n",
551 (HOST_WIDEST_INT
)threshold
,
552 cumulated_time
* 100.0 / overall_time
,
553 cumulated_size
* 100.0 / overall_size
);
555 if (threshold
> get_hot_bb_threshold ()
559 fprintf (dump_file
, "Threshold updated.\n");
560 set_hot_bb_threshold (threshold
);
563 histogram
.release ();
564 free_alloc_pool (histogram_pool
);
566 /* Produce speculative calls: we saved common traget from porfiling into
567 e->common_target_id. Now, at link time, we can look up corresponding
568 function node and produce speculative call. */
570 FOR_EACH_DEFINED_FUNCTION (n
)
574 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
578 if (e
->indirect_info
->common_target_id
)
580 if (!node_map_initialized
)
581 init_node_map (false);
582 node_map_initialized
= true;
584 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
589 fprintf (dump_file
, "Indirect call -> direct call from"
590 " other module %s/%i => %s/%i, prob %3.2f\n",
591 xstrdup (cgraph_node_name (n
)), n
->symbol
.order
,
592 xstrdup (cgraph_node_name (n2
)), n2
->symbol
.order
,
593 e
->indirect_info
->common_target_probability
594 / (float)REG_BR_PROB_BASE
);
596 if (e
->indirect_info
->common_target_probability
597 < REG_BR_PROB_BASE
/ 2)
602 "Not speculating: probability is too low.\n");
604 else if (!cgraph_maybe_hot_edge_p (e
))
609 "Not speculating: call is cold.\n");
611 else if (cgraph_function_body_availability (n2
)
612 <= AVAIL_OVERWRITABLE
613 && symtab_can_be_discarded ((symtab_node
) n2
))
618 "Not speculating: target is overwritable "
619 "and can be discarded.\n");
623 /* Target may be overwritable, but profile says that
624 control flow goes to this particular implementation
625 of N2. Speculate on the local alias to allow inlining.
627 if (!symtab_can_be_discarded ((symtab_node
) n2
))
630 alias
= cgraph (symtab_nonoverwritable_alias
636 cgraph_turn_edge_to_speculative
638 apply_scale (e
->count
,
639 e
->indirect_info
->common_target_probability
),
640 apply_scale (e
->frequency
,
641 e
->indirect_info
->common_target_probability
));
648 fprintf (dump_file
, "Function with profile-id %i not found.\n",
649 e
->indirect_info
->common_target_id
);
655 inline_update_overall_summary (n
);
657 if (node_map_initialized
)
659 if (dump_file
&& nindirect
)
661 "%i indirect calls trained.\n"
662 "%i (%3.2f%%) have common target.\n"
663 "%i (%3.2f%%) targets was not found.\n"
664 "%i (%3.2f%%) speculations seems useless.\n"
665 "%i (%3.2f%%) speculations produced.\n",
667 ncommon
, ncommon
* 100.0 / nindirect
,
668 nunknown
, nunknown
* 100.0 / nindirect
,
669 nuseless
, nuseless
* 100.0 / nindirect
,
670 nconverted
, nconverted
* 100.0 / nindirect
);
672 order
= XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
673 order_pos
= ipa_reverse_postorder (order
);
674 for (i
= order_pos
- 1; i
>= 0; i
--)
676 if (order
[i
]->local
.local
&& ipa_propagate_frequency (order
[i
]))
678 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
679 if (e
->callee
->local
.local
&& !e
->callee
->symbol
.aux
)
681 something_changed
= true;
682 e
->callee
->symbol
.aux
= (void *)1;
685 order
[i
]->symbol
.aux
= NULL
;
688 while (something_changed
)
690 something_changed
= false;
691 for (i
= order_pos
- 1; i
>= 0; i
--)
693 if (order
[i
]->symbol
.aux
&& ipa_propagate_frequency (order
[i
]))
695 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
696 if (e
->callee
->local
.local
&& !e
->callee
->symbol
.aux
)
698 something_changed
= true;
699 e
->callee
->symbol
.aux
= (void *)1;
702 order
[i
]->symbol
.aux
= NULL
;
710 gate_ipa_profile (void)
712 return flag_ipa_profile
;
717 const pass_data pass_data_ipa_profile
=
720 "profile_estimate", /* name */
721 OPTGROUP_NONE
, /* optinfo_flags */
723 true, /* has_execute */
724 TV_IPA_PROFILE
, /* tv_id */
725 0, /* properties_required */
726 0, /* properties_provided */
727 0, /* properties_destroyed */
728 0, /* todo_flags_start */
729 0, /* todo_flags_finish */
732 class pass_ipa_profile
: public ipa_opt_pass_d
735 pass_ipa_profile (gcc::context
*ctxt
)
736 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
737 ipa_profile_generate_summary
, /* generate_summary */
738 ipa_profile_write_summary
, /* write_summary */
739 ipa_profile_read_summary
, /* read_summary */
740 NULL
, /* write_optimization_summary */
741 NULL
, /* read_optimization_summary */
742 NULL
, /* stmt_fixup */
743 0, /* function_transform_todo_flags_start */
744 NULL
, /* function_transform */
745 NULL
) /* variable_transform */
748 /* opt_pass methods: */
749 bool gate () { return gate_ipa_profile (); }
750 unsigned int execute () { return ipa_profile (); }
752 }; // class pass_ipa_profile
757 make_pass_ipa_profile (gcc::context
*ctxt
)
759 return new pass_ipa_profile (ctxt
);