1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 The information is used to set hot/cold thresholds.
29 - Next speculative indirect call resolution is performed: the local
30 profile pass assigns profile-id to each function and provide us with a
31 histogram specifying the most common target. We look up the callgraph
32 node corresponding to the target and produce a speculative call.
34 This call may or may not survive through IPA optimization based on decision
36 - Finally we propagate the following flags: unlikely executed, executed
37 once, executed at startup and executed at exit. These flags are used to
38 control code size/performance threshold and code placement (by producing
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
42 #include "coretypes.h"
47 #include "alloc-pool.h"
48 #include "tree-pass.h"
50 #include "data-streamer.h"
51 #include "gimple-iterator.h"
52 #include "ipa-utils.h"
54 #include "value-prof.h"
55 #include "tree-inline.h"
56 #include "symbol-summary.h"
59 #include "ipa-fnsummary.h"
61 /* Entry in the histogram. */
63 struct histogram_entry
70 /* Histogram of profile values.
71 The histogram is represented as an ordered vector of entries allocated via
72 histogram_pool. During construction a separate hashtable is kept to lookup
75 vec
<histogram_entry
*> histogram
;
76 static object_allocator
<histogram_entry
> histogram_pool ("IPA histogram");
78 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
80 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
82 static inline hashval_t
hash (const histogram_entry
*);
83 static inline int equal (const histogram_entry
*, const histogram_entry
*);
87 histogram_hash::hash (const histogram_entry
*val
)
93 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
95 return val
->count
== val2
->count
;
98 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
99 HASHTABLE is the on-side hash kept to avoid duplicates. */
102 account_time_size (hash_table
<histogram_hash
> *hashtable
,
103 vec
<histogram_entry
*> &histogram
,
104 gcov_type count
, int time
, int size
)
106 histogram_entry key
= {count
, 0, 0};
107 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
111 *val
= histogram_pool
.allocate ();
113 histogram
.safe_push (*val
);
115 (*val
)->time
+= time
;
116 (*val
)->size
+= size
;
120 cmp_counts (const void *v1
, const void *v2
)
122 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
123 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
124 if (h1
->count
< h2
->count
)
126 if (h1
->count
> h2
->count
)
131 /* Dump HISTOGRAM to FILE. */
134 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
137 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0,
140 fprintf (dump_file
, "Histogram:\n");
141 for (i
= 0; i
< histogram
.length (); i
++)
143 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
144 overall_size
+= histogram
[i
]->size
;
150 for (i
= 0; i
< histogram
.length (); i
++)
152 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
153 cumulated_size
+= histogram
[i
]->size
;
154 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
155 (int64_t) histogram
[i
]->count
,
157 cumulated_time
* 100.0 / overall_time
,
159 cumulated_size
* 100.0 / overall_size
);
163 /* Structure containing speculative target information from profile. */
165 struct speculative_call_target
167 speculative_call_target (unsigned int id
= 0, int prob
= 0)
168 : target_id (id
), target_probability (prob
)
172 /* Profile_id of target obtained from profile. */
173 unsigned int target_id
;
174 /* Probability that call will land in function with target_id. */
175 unsigned int target_probability
;
178 class speculative_call_summary
181 speculative_call_summary () : speculative_call_targets ()
184 auto_vec
<speculative_call_target
> speculative_call_targets
;
190 /* Class to manage call summaries. */
192 class ipa_profile_call_summaries
193 : public call_summary
<speculative_call_summary
*>
196 ipa_profile_call_summaries (symbol_table
*table
)
197 : call_summary
<speculative_call_summary
*> (table
)
200 /* Duplicate info when an edge is cloned. */
201 virtual void duplicate (cgraph_edge
*, cgraph_edge
*,
202 speculative_call_summary
*old_sum
,
203 speculative_call_summary
*new_sum
);
206 static ipa_profile_call_summaries
*call_sums
= NULL
;
208 /* Dump all information in speculative call summary to F. */
211 speculative_call_summary::dump (FILE *f
)
215 unsigned spec_count
= speculative_call_targets
.length ();
216 for (unsigned i
= 0; i
< spec_count
; i
++)
218 speculative_call_target item
= speculative_call_targets
[i
];
219 n2
= find_func_by_profile_id (item
.target_id
);
221 fprintf (f
, " The %i speculative target is %s with prob %3.2f\n", i
,
223 item
.target_probability
/ (float) REG_BR_PROB_BASE
);
225 fprintf (f
, " The %i speculative target is %u with prob %3.2f\n", i
,
227 item
.target_probability
/ (float) REG_BR_PROB_BASE
);
231 /* Duplicate info when an edge is cloned. */
234 ipa_profile_call_summaries::duplicate (cgraph_edge
*, cgraph_edge
*,
235 speculative_call_summary
*old_sum
,
236 speculative_call_summary
*new_sum
)
241 unsigned old_count
= old_sum
->speculative_call_targets
.length ();
245 new_sum
->speculative_call_targets
.reserve_exact (old_count
);
246 new_sum
->speculative_call_targets
.quick_grow_cleared (old_count
);
248 for (unsigned i
= 0; i
< old_count
; i
++)
250 new_sum
->speculative_call_targets
[i
]
251 = old_sum
->speculative_call_targets
[i
];
255 /* Collect histogram and speculative target summaries from CFG profiles. */
258 ipa_profile_generate_summary (void)
260 struct cgraph_node
*node
;
261 gimple_stmt_iterator gsi
;
264 hash_table
<histogram_hash
> hashtable (10);
266 gcc_checking_assert (!call_sums
);
267 call_sums
= new ipa_profile_call_summaries (symtab
);
269 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
270 if (ENTRY_BLOCK_PTR_FOR_FN
271 (DECL_STRUCT_FUNCTION (node
->decl
))->count
.ipa_p ())
272 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
276 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
278 gimple
*stmt
= gsi_stmt (gsi
);
279 if (gimple_code (stmt
) == GIMPLE_CALL
280 && !gimple_call_fndecl (stmt
))
283 h
= gimple_histogram_value_of_type
284 (DECL_STRUCT_FUNCTION (node
->decl
),
285 stmt
, HIST_TYPE_INDIR_CALL
);
286 /* No need to do sanity check: gimple_ic_transform already
287 takes away bad histograms. */
290 gcov_type val
, count
, all
;
291 struct cgraph_edge
*e
= node
->get_edge (stmt
);
292 if (e
&& !e
->indirect_unknown_callee
)
295 speculative_call_summary
*csum
296 = call_sums
->get_create (e
);
298 for (unsigned j
= 0; j
< GCOV_TOPN_MAXIMUM_TRACKED_VALUES
;
301 if (!get_nth_most_common_value (NULL
, "indirect call",
302 h
, &val
, &count
, &all
,
306 if (val
== 0 || count
== 0)
313 "Probability capped to 1\n");
316 speculative_call_target
item (
317 val
, GCOV_COMPUTE_SCALE (count
, all
));
318 csum
->speculative_call_targets
.safe_push (item
);
321 gimple_remove_histogram_value
322 (DECL_STRUCT_FUNCTION (node
->decl
), stmt
, h
);
325 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
326 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
328 if (bb
->count
.ipa_p () && bb
->count
.initialized_p ())
329 account_time_size (&hashtable
, histogram
,
330 bb
->count
.ipa ().to_gcov_type (),
333 histogram
.qsort (cmp_counts
);
336 /* Serialize the speculative summary info for LTO. */
339 ipa_profile_write_edge_summary (lto_simple_output_block
*ob
,
340 speculative_call_summary
*csum
)
344 len
= csum
->speculative_call_targets
.length ();
346 gcc_assert (len
<= GCOV_TOPN_MAXIMUM_TRACKED_VALUES
);
348 streamer_write_hwi_stream (ob
->main_stream
, len
);
352 unsigned spec_count
= csum
->speculative_call_targets
.length ();
353 for (unsigned i
= 0; i
< spec_count
; i
++)
355 speculative_call_target item
= csum
->speculative_call_targets
[i
];
356 gcc_assert (item
.target_id
);
357 streamer_write_hwi_stream (ob
->main_stream
, item
.target_id
);
358 streamer_write_hwi_stream (ob
->main_stream
, item
.target_probability
);
363 /* Serialize the ipa info for lto. */
366 ipa_profile_write_summary (void)
368 struct lto_simple_output_block
*ob
369 = lto_create_simple_output_block (LTO_section_ipa_profile
);
372 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
373 for (i
= 0; i
< histogram
.length (); i
++)
375 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
376 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
377 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
383 /* Serialize speculative targets information. */
384 unsigned int count
= 0;
385 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
386 lto_symtab_encoder_iterator lsei
;
389 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
390 lsei_next_function_in_partition (&lsei
))
392 node
= lsei_cgraph_node (lsei
);
393 if (node
->definition
&& node
->has_gimple_body_p ()
394 && node
->indirect_calls
)
398 streamer_write_uhwi_stream (ob
->main_stream
, count
);
400 /* Process all of the functions. */
401 for (lsei
= lsei_start_function_in_partition (encoder
);
402 !lsei_end_p (lsei
) && count
; lsei_next_function_in_partition (&lsei
))
404 cgraph_node
*node
= lsei_cgraph_node (lsei
);
405 if (node
->definition
&& node
->has_gimple_body_p ()
406 && node
->indirect_calls
)
408 int node_ref
= lto_symtab_encoder_encode (encoder
, node
);
409 streamer_write_uhwi_stream (ob
->main_stream
, node_ref
);
411 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
413 speculative_call_summary
*csum
= call_sums
->get_create (e
);
414 ipa_profile_write_edge_summary (ob
, csum
);
419 lto_destroy_simple_output_block (ob
);
422 /* Dump all profile summary data for all cgraph nodes and edges to file F. */
425 ipa_profile_dump_all_summaries (FILE *f
)
428 "\n========== IPA-profile speculative targets: ==========\n");
430 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
432 fprintf (f
, "\nSummary for node %s:\n", node
->dump_name ());
433 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
435 fprintf (f
, " Summary for %s of indirect edge %d:\n",
436 e
->caller
->dump_name (), e
->lto_stmt_uid
);
437 speculative_call_summary
*csum
= call_sums
->get_create (e
);
444 /* Read speculative targets information about edge for LTO WPA. */
447 ipa_profile_read_edge_summary (class lto_input_block
*ib
, cgraph_edge
*edge
)
451 len
= streamer_read_hwi (ib
);
452 gcc_assert (len
<= GCOV_TOPN_MAXIMUM_TRACKED_VALUES
);
453 speculative_call_summary
*csum
= call_sums
->get_create (edge
);
455 for (i
= 0; i
< len
; i
++)
457 unsigned int target_id
= streamer_read_hwi (ib
);
458 int target_probability
= streamer_read_hwi (ib
);
459 speculative_call_target
item (target_id
, target_probability
);
460 csum
->speculative_call_targets
.safe_push (item
);
464 /* Read profile speculative targets section information for LTO WPA. */
467 ipa_profile_read_summary_section (struct lto_file_decl_data
*file_data
,
468 class lto_input_block
*ib
)
473 lto_symtab_encoder_t encoder
= file_data
->symtab_node_encoder
;
475 unsigned int count
= streamer_read_uhwi (ib
);
481 for (i
= 0; i
< count
; i
++)
483 index
= streamer_read_uhwi (ib
);
484 encoder
= file_data
->symtab_node_encoder
;
486 = dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
, index
));
488 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
489 ipa_profile_read_edge_summary (ib
, e
);
493 /* Deserialize the IPA histogram and speculative targets summary info for LTO.
497 ipa_profile_read_summary (void)
499 struct lto_file_decl_data
** file_data_vec
500 = lto_get_file_decl_data ();
501 struct lto_file_decl_data
* file_data
;
504 hash_table
<histogram_hash
> hashtable (10);
506 gcc_checking_assert (!call_sums
);
507 call_sums
= new ipa_profile_call_summaries (symtab
);
509 while ((file_data
= file_data_vec
[j
++]))
513 class lto_input_block
*ib
514 = lto_create_simple_input_block (file_data
,
515 LTO_section_ipa_profile
,
519 unsigned int num
= streamer_read_uhwi (ib
);
521 for (n
= 0; n
< num
; n
++)
523 gcov_type count
= streamer_read_gcov_count (ib
);
524 int time
= streamer_read_uhwi (ib
);
525 int size
= streamer_read_uhwi (ib
);
526 account_time_size (&hashtable
, histogram
,
530 ipa_profile_read_summary_section (file_data
, ib
);
532 lto_destroy_simple_input_block (file_data
,
533 LTO_section_ipa_profile
,
537 histogram
.qsort (cmp_counts
);
540 /* Data used by ipa_propagate_frequency. */
542 struct ipa_propagate_frequency_data
544 cgraph_node
*function_symbol
;
545 bool maybe_unlikely_executed
;
546 bool maybe_executed_once
;
547 bool only_called_at_startup
;
548 bool only_called_at_exit
;
551 /* Worker for ipa_propagate_frequency_1. */
554 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
556 struct ipa_propagate_frequency_data
*d
;
557 struct cgraph_edge
*edge
;
559 d
= (struct ipa_propagate_frequency_data
*)data
;
560 for (edge
= node
->callers
;
561 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
562 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
563 edge
= edge
->next_caller
)
565 if (edge
->caller
!= d
->function_symbol
)
567 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
568 /* It makes sense to put main() together with the static constructors.
569 It will be executed for sure, but rest of functions called from
570 main are definitely not at startup only. */
571 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
572 d
->only_called_at_startup
= 0;
573 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
576 /* When profile feedback is available, do not try to propagate too hard;
577 counts are already good guide on function frequencies and roundoff
578 errors can make us to push function into unlikely section even when
579 it is executed by the train run. Transfer the function only if all
580 callers are unlikely executed. */
582 && !(edge
->callee
->count
.ipa () == profile_count::zero ())
583 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
584 || (edge
->caller
->inlined_to
585 && edge
->caller
->inlined_to
->frequency
586 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
587 d
->maybe_unlikely_executed
= false;
588 if (edge
->count
.ipa ().initialized_p ()
589 && !edge
->count
.ipa ().nonzero_p ())
591 switch (edge
->caller
->frequency
)
593 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
595 case NODE_FREQUENCY_EXECUTED_ONCE
:
597 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
598 fprintf (dump_file
, " Called by %s that is executed once\n",
599 edge
->caller
->dump_name ());
600 d
->maybe_unlikely_executed
= false;
601 ipa_call_summary
*s
= ipa_call_summaries
->get (edge
);
602 if (s
!= NULL
&& s
->loop_depth
)
604 d
->maybe_executed_once
= false;
605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
606 fprintf (dump_file
, " Called in loop\n");
610 case NODE_FREQUENCY_HOT
:
611 case NODE_FREQUENCY_NORMAL
:
612 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
613 fprintf (dump_file
, " Called by %s that is normal or hot\n",
614 edge
->caller
->dump_name ());
615 d
->maybe_unlikely_executed
= false;
616 d
->maybe_executed_once
= false;
623 /* Return ture if NODE contains hot calls. */
626 contains_hot_call_p (struct cgraph_node
*node
)
628 struct cgraph_edge
*e
;
629 for (e
= node
->callees
; e
; e
= e
->next_callee
)
630 if (e
->maybe_hot_p ())
632 else if (!e
->inline_failed
633 && contains_hot_call_p (e
->callee
))
635 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
636 if (e
->maybe_hot_p ())
641 /* See if the frequency of NODE can be updated based on frequencies of its
644 ipa_propagate_frequency (struct cgraph_node
*node
)
646 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
647 bool changed
= false;
649 /* We cannot propagate anything useful about externally visible functions
650 nor about virtuals. */
653 || (opt_for_fn (node
->decl
, flag_devirtualize
)
654 && DECL_VIRTUAL_P (node
->decl
)))
656 gcc_assert (node
->analyzed
);
657 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
658 fprintf (dump_file
, "Processing frequency %s\n", node
->dump_name ());
660 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
663 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
664 && !node
->only_called_at_startup
)
666 node
->only_called_at_startup
= true;
668 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
672 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
673 && !node
->only_called_at_exit
)
675 node
->only_called_at_exit
= true;
677 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
682 /* With profile we can decide on hot/normal based on count. */
683 if (node
->count
. ipa().initialized_p ())
686 if (!(node
->count
. ipa() == profile_count::zero ())
687 && node
->count
. ipa() >= get_hot_bb_threshold ())
690 hot
|= contains_hot_call_p (node
);
693 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
696 fprintf (dump_file
, "Node %s promoted to hot.\n",
698 node
->frequency
= NODE_FREQUENCY_HOT
;
703 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
706 fprintf (dump_file
, "Node %s reduced to normal.\n",
708 node
->frequency
= NODE_FREQUENCY_NORMAL
;
712 /* These come either from profile or user hints; never update them. */
713 if (node
->frequency
== NODE_FREQUENCY_HOT
714 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
716 if (d
.maybe_unlikely_executed
)
718 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
720 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
724 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
726 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
728 fprintf (dump_file
, "Node %s promoted to executed once.\n",
735 /* Check that number of arguments of N agrees with E.
736 Be conservative when summaries are not present. */
739 check_argument_count (struct cgraph_node
*n
, struct cgraph_edge
*e
)
741 if (!ipa_node_params_sum
|| !ipa_edge_args_sum
)
743 ipa_node_params
*info
= ipa_node_params_sum
->get (n
->function_symbol ());
746 ipa_edge_args
*e_info
= ipa_edge_args_sum
->get (e
);
749 if (ipa_get_param_count (info
) != ipa_get_cs_argument_count (e_info
)
750 && (ipa_get_param_count (info
) >= ipa_get_cs_argument_count (e_info
)
751 || !stdarg_p (TREE_TYPE (n
->decl
))))
756 /* Simple ipa profile pass propagating frequencies across the callgraph. */
761 struct cgraph_node
**order
;
762 struct cgraph_edge
*e
;
764 bool something_changed
= false;
766 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
767 struct cgraph_node
*n
,*n2
;
768 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
769 int nmismatch
= 0, nimpossible
= 0;
770 bool node_map_initialized
= false;
774 dump_histogram (dump_file
, histogram
);
775 for (i
= 0; i
< (int)histogram
.length (); i
++)
777 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
778 overall_size
+= histogram
[i
]->size
;
783 gcc_assert (overall_size
);
785 cutoff
= (overall_time
* param_hot_bb_count_ws_permille
+ 500) / 1000;
786 for (i
= 0; cumulated
< cutoff
; i
++)
788 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
789 threshold
= histogram
[i
]->count
;
795 gcov_type cumulated_time
= 0, cumulated_size
= 0;
798 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
801 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
802 cumulated_size
+= histogram
[i
]->size
;
804 fprintf (dump_file
, "Determined min count: %" PRId64
805 " Time:%3.2f%% Size:%3.2f%%\n",
807 cumulated_time
* 100.0 / overall_time
,
808 cumulated_size
* 100.0 / overall_size
);
814 fprintf (dump_file
, "Setting hotness threshold in LTO mode.\n");
815 set_hot_bb_threshold (threshold
);
818 histogram
.release ();
819 histogram_pool
.release ();
821 /* Produce speculative calls: we saved common target from profiling into
822 e->target_id. Now, at link time, we can look up corresponding
823 function node and produce speculative call. */
825 gcc_checking_assert (call_sums
);
829 if (!node_map_initialized
)
830 init_node_map (false);
831 node_map_initialized
= true;
833 ipa_profile_dump_all_summaries (dump_file
);
836 FOR_EACH_DEFINED_FUNCTION (n
)
840 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
843 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
845 if (n
->count
.initialized_p ())
848 speculative_call_summary
*csum
= call_sums
->get_create (e
);
849 unsigned spec_count
= csum
->speculative_call_targets
.length ();
852 if (!node_map_initialized
)
853 init_node_map (false);
854 node_map_initialized
= true;
862 "Updating hotness threshold in LTO mode.\n");
863 fprintf (dump_file
, "Updated min count: %" PRId64
"\n",
864 (int64_t) threshold
/ spec_count
);
866 set_hot_bb_threshold (threshold
/ spec_count
);
869 unsigned speculative_id
= 0;
870 profile_count orig
= e
->count
;
871 for (unsigned i
= 0; i
< spec_count
; i
++)
873 speculative_call_target item
874 = csum
->speculative_call_targets
[i
];
875 n2
= find_func_by_profile_id (item
.target_id
);
881 "Indirect call -> direct call from"
882 " other module %s => %s, prob %3.2f\n",
885 item
.target_probability
886 / (float) REG_BR_PROB_BASE
);
888 if (item
.target_probability
< REG_BR_PROB_BASE
/ 2)
894 "probability is too low.\n");
896 else if (!e
->maybe_hot_p ())
901 "Not speculating: call is cold.\n");
903 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
904 && n2
->can_be_discarded_p ())
909 "Not speculating: target is overwritable "
910 "and can be discarded.\n");
912 else if (!check_argument_count (n2
, e
))
918 "parameter count mismatch\n");
920 else if (e
->indirect_info
->polymorphic
921 && !opt_for_fn (n
->decl
, flag_devirtualize
)
922 && !possible_polymorphic_call_target_p (e
, n2
))
928 "function is not in the polymorphic "
929 "call target list\n");
933 /* Target may be overwritable, but profile says that
934 control flow goes to this particular implementation
935 of N2. Speculate on the local alias to allow
937 if (!n2
->can_be_discarded_p ())
940 alias
= dyn_cast
<cgraph_node
*>
941 (n2
->noninterposable_alias ());
946 profile_probability prob
947 = profile_probability::from_reg_br_prob_base
948 (item
.target_probability
).adjusted ();
949 e
->make_speculative (n2
,
950 orig
.apply_probability (prob
),
960 "Function with profile-id %i not found.\n",
968 ipa_update_overall_fn_summary (n
);
970 if (node_map_initialized
)
972 if (dump_file
&& nindirect
)
974 "%i indirect calls trained.\n"
975 "%i (%3.2f%%) have common target.\n"
976 "%i (%3.2f%%) targets was not found.\n"
977 "%i (%3.2f%%) targets had parameter count mismatch.\n"
978 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
979 "%i (%3.2f%%) speculations seems useless.\n"
980 "%i (%3.2f%%) speculations produced.\n",
982 ncommon
, ncommon
* 100.0 / nindirect
,
983 nunknown
, nunknown
* 100.0 / nindirect
,
984 nmismatch
, nmismatch
* 100.0 / nindirect
,
985 nimpossible
, nimpossible
* 100.0 / nindirect
,
986 nuseless
, nuseless
* 100.0 / nindirect
,
987 nconverted
, nconverted
* 100.0 / nindirect
);
989 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
990 order_pos
= ipa_reverse_postorder (order
);
991 for (i
= order_pos
- 1; i
>= 0; i
--)
994 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
995 && ipa_propagate_frequency (order
[i
]))
997 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
998 if (e
->callee
->local
&& !e
->callee
->aux
)
1000 something_changed
= true;
1001 e
->callee
->aux
= (void *)1;
1004 order
[i
]->aux
= NULL
;
1007 while (something_changed
)
1009 something_changed
= false;
1010 for (i
= order_pos
- 1; i
>= 0; i
--)
1013 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
1014 && ipa_propagate_frequency (order
[i
]))
1016 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
1017 if (e
->callee
->local
&& !e
->callee
->aux
)
1019 something_changed
= true;
1020 e
->callee
->aux
= (void *)1;
1023 order
[i
]->aux
= NULL
;
1028 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1029 symtab
->dump (dump_file
);
1039 const pass_data pass_data_ipa_profile
=
1041 IPA_PASS
, /* type */
1042 "profile_estimate", /* name */
1043 OPTGROUP_NONE
, /* optinfo_flags */
1044 TV_IPA_PROFILE
, /* tv_id */
1045 0, /* properties_required */
1046 0, /* properties_provided */
1047 0, /* properties_destroyed */
1048 0, /* todo_flags_start */
1049 0, /* todo_flags_finish */
1052 class pass_ipa_profile
: public ipa_opt_pass_d
1055 pass_ipa_profile (gcc::context
*ctxt
)
1056 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
1057 ipa_profile_generate_summary
, /* generate_summary */
1058 ipa_profile_write_summary
, /* write_summary */
1059 ipa_profile_read_summary
, /* read_summary */
1060 NULL
, /* write_optimization_summary */
1061 NULL
, /* read_optimization_summary */
1062 NULL
, /* stmt_fixup */
1063 0, /* function_transform_todo_flags_start */
1064 NULL
, /* function_transform */
1065 NULL
) /* variable_transform */
1068 /* opt_pass methods: */
1069 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
1070 virtual unsigned int execute (function
*) { return ipa_profile (); }
1072 }; // class pass_ipa_profile
1077 make_pass_ipa_profile (gcc::context
*ctxt
)
1079 return new pass_ipa_profile (ctxt
);