PR middle-end/80422
[official-gcc.git] / gcc / auto-profile.c
blob4b21340c6f0c1411aca593aff00f0cec1369bd4c
1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014-2017 Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gcov-io.h"
35 #include "diagnostic-core.h"
36 #include "profile.h"
37 #include "langhooks.h"
38 #include "cfgloop.h"
39 #include "tree-cfg.h"
40 #include "tree-cfgcleanup.h"
41 #include "tree-into-ssa.h"
42 #include "gimple-iterator.h"
43 #include "value-prof.h"
44 #include "params.h"
45 #include "symbol-summary.h"
46 #include "ipa-prop.h"
47 #include "ipa-inline.h"
48 #include "tree-inline.h"
49 #include "auto-profile.h"
50 #include "tree-pretty-print.h"
51 #include "gimple-pretty-print.h"
53 /* The following routines implements AutoFDO optimization.
55 This optimization uses sampling profiles to annotate basic block counts
56 and uses heuristics to estimate branch probabilities.
58 There are three phases in AutoFDO:
60 Phase 1: Read profile from the profile data file.
61 The following info is read from the profile datafile:
62 * string_table: a map between function name and its index.
63 * autofdo_source_profile: a map from function_instance name to
64 function_instance. This is represented as a forest of
65 function_instances.
66 * WorkingSet: a histogram of how many instructions are covered for a
67 given percentage of total cycles. This is describing the binary
68 level information (not source level). This info is used to help
69 decide if we want aggressive optimizations that could increase
70 code footprint (e.g. loop unroll etc.)
71 A function instance is an instance of function that could either be a
72 standalone symbol, or a clone of a function that is inlined into another
73 function.
75 Phase 2: Early inline + value profile transformation.
76 Early inline uses autofdo_source_profile to find if a callsite is:
77 * inlined in the profiled binary.
78 * callee body is hot in the profiling run.
79 If both condition satisfies, early inline will inline the callsite
80 regardless of the code growth.
81 Phase 2 is an iterative process. During each iteration, we also check
82 if an indirect callsite is promoted and inlined in the profiling run.
83 If yes, vpt will happen to force promote it and in the next iteration,
84 einline will inline the promoted callsite in the next iteration.
86 Phase 3: Annotate control flow graph.
87 AutoFDO uses a separate pass to:
88 * Annotate basic block count
89 * Estimate branch probability
91 After the above 3 phases, all profile is readily annotated on the GCC IR.
92 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
93 use of the profile. E.g. it uses existing mechanism to calculate the basic
94 block/edge frequency, as well as the cgraph node/edge count.
97 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
98 #define AUTO_PROFILE_VERSION 1
100 namespace autofdo
103 /* Represent a source location: (function_decl, lineno). */
104 typedef std::pair<tree, unsigned> decl_lineno;
106 /* Represent an inline stack. vector[0] is the leaf node. */
107 typedef auto_vec<decl_lineno> inline_stack;
109 /* String array that stores function names. */
110 typedef auto_vec<char *> string_vector;
112 /* Map from function name's index in string_table to target's
113 execution count. */
114 typedef std::map<unsigned, gcov_type> icall_target_map;
116 /* Set of gimple stmts. Used to track if the stmt has already been promoted
117 to direct call. */
118 typedef std::set<gimple *> stmt_set;
120 /* Represent count info of an inline stack. */
121 struct count_info
123 /* Sampled count of the inline stack. */
124 gcov_type count;
126 /* Map from indirect call target to its sample count. */
127 icall_target_map targets;
129 /* Whether this inline stack is already used in annotation.
131 Each inline stack should only be used to annotate IR once.
132 This will be enforced when instruction-level discriminator
133 is supported. */
134 bool annotated;
137 /* operator< for "const char *". */
138 struct string_compare
140 bool operator()(const char *a, const char *b) const
142 return strcmp (a, b) < 0;
146 /* Store a string array, indexed by string position in the array. */
147 class string_table
149 public:
150 string_table ()
153 ~string_table ();
155 /* For a given string, returns its index. */
156 int get_index (const char *name) const;
158 /* For a given decl, returns the index of the decl name. */
159 int get_index_by_decl (tree decl) const;
161 /* For a given index, returns the string. */
162 const char *get_name (int index) const;
164 /* Read profile, return TRUE on success. */
165 bool read ();
167 private:
168 typedef std::map<const char *, unsigned, string_compare> string_index_map;
169 string_vector vector_;
170 string_index_map map_;
173 /* Profile of a function instance:
174 1. total_count of the function.
175 2. head_count (entry basic block count) of the function (only valid when
176 function is a top-level function_instance, i.e. it is the original copy
177 instead of the inlined copy).
178 3. map from source location (decl_lineno) to profile (count_info).
179 4. map from callsite to callee function_instance. */
180 class function_instance
182 public:
183 typedef auto_vec<function_instance *> function_instance_stack;
185 /* Read the profile and return a function_instance with head count as
186 HEAD_COUNT. Recursively read callsites to create nested function_instances
187 too. STACK is used to track the recursive creation process. */
188 static function_instance *
189 read_function_instance (function_instance_stack *stack,
190 gcov_type head_count);
192 /* Recursively deallocate all callsites (nested function_instances). */
193 ~function_instance ();
195 /* Accessors. */
197 name () const
199 return name_;
201 gcov_type
202 total_count () const
204 return total_count_;
206 gcov_type
207 head_count () const
209 return head_count_;
212 /* Traverse callsites of the current function_instance to find one at the
213 location of LINENO and callee name represented in DECL. */
214 function_instance *get_function_instance_by_decl (unsigned lineno,
215 tree decl) const;
217 /* Store the profile info for LOC in INFO. Return TRUE if profile info
218 is found. */
219 bool get_count_info (location_t loc, count_info *info) const;
221 /* Read the inlined indirect call target profile for STMT and store it in
222 MAP, return the total count for all inlined indirect calls. */
223 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
225 /* Sum of counts that is used during annotation. */
226 gcov_type total_annotated_count () const;
228 /* Mark LOC as annotated. */
229 void mark_annotated (location_t loc);
231 private:
232 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
233 typedef std::pair<unsigned, unsigned> callsite;
235 /* Map from callsite to callee function_instance. */
236 typedef std::map<callsite, function_instance *> callsite_map;
238 function_instance (unsigned name, gcov_type head_count)
239 : name_ (name), total_count_ (0), head_count_ (head_count)
243 /* Map from source location (decl_lineno) to profile (count_info). */
244 typedef std::map<unsigned, count_info> position_count_map;
246 /* function_instance name index in the string_table. */
247 unsigned name_;
249 /* Total sample count. */
250 gcov_type total_count_;
252 /* Entry BB's sample count. */
253 gcov_type head_count_;
255 /* Map from callsite location to callee function_instance. */
256 callsite_map callsites;
258 /* Map from source location to count_info. */
259 position_count_map pos_counts;
262 /* Profile for all functions. */
263 class autofdo_source_profile
265 public:
266 static autofdo_source_profile *
267 create ()
269 autofdo_source_profile *map = new autofdo_source_profile ();
271 if (map->read ())
272 return map;
273 delete map;
274 return NULL;
277 ~autofdo_source_profile ();
279 /* For a given DECL, returns the top-level function_instance. */
280 function_instance *get_function_instance_by_decl (tree decl) const;
282 /* Find count_info for a given gimple STMT. If found, store the count_info
283 in INFO and return true; otherwise return false. */
284 bool get_count_info (gimple *stmt, count_info *info) const;
286 /* Find total count of the callee of EDGE. */
287 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
289 /* Update value profile INFO for STMT from the inlined indirect callsite.
290 Return true if INFO is updated. */
291 bool update_inlined_ind_target (gcall *stmt, count_info *info);
293 /* Mark LOC as annotated. */
294 void mark_annotated (location_t loc);
296 private:
297 /* Map from function_instance name index (in string_table) to
298 function_instance. */
299 typedef std::map<unsigned, function_instance *> name_function_instance_map;
301 autofdo_source_profile () {}
303 /* Read AutoFDO profile and returns TRUE on success. */
304 bool read ();
306 /* Return the function_instance in the profile that correspond to the
307 inline STACK. */
308 function_instance *
309 get_function_instance_by_inline_stack (const inline_stack &stack) const;
311 name_function_instance_map map_;
314 /* Store the strings read from the profile data file. */
315 static string_table *afdo_string_table;
317 /* Store the AutoFDO source profile. */
318 static autofdo_source_profile *afdo_source_profile;
320 /* gcov_ctr_summary structure to store the profile_info. */
321 static struct gcov_ctr_summary *afdo_profile_info;
323 /* Helper functions. */
325 /* Return the original name of NAME: strip the suffix that starts
326 with '.' Caller is responsible for freeing RET. */
328 static char *
329 get_original_name (const char *name)
331 char *ret = xstrdup (name);
332 char *find = strchr (ret, '.');
333 if (find != NULL)
334 *find = 0;
335 return ret;
338 /* Return the combined location, which is a 32bit integer in which
339 higher 16 bits stores the line offset of LOC to the start lineno
340 of DECL, The lower 16 bits stores the discriminator. */
342 static unsigned
343 get_combined_location (location_t loc, tree decl)
345 /* TODO: allow more bits for line and less bits for discriminator. */
346 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
347 warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
348 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
351 /* Return the function decl of a given lexical BLOCK. */
353 static tree
354 get_function_decl_from_block (tree block)
356 tree decl;
358 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
359 return NULL_TREE;
361 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
362 decl && (TREE_CODE (decl) == BLOCK);
363 decl = BLOCK_ABSTRACT_ORIGIN (decl))
364 if (TREE_CODE (decl) == FUNCTION_DECL)
365 break;
366 return decl;
369 /* Store inline stack for STMT in STACK. */
371 static void
372 get_inline_stack (location_t locus, inline_stack *stack)
374 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
375 return;
377 tree block = LOCATION_BLOCK (locus);
378 if (block && TREE_CODE (block) == BLOCK)
380 int level = 0;
381 for (block = BLOCK_SUPERCONTEXT (block);
382 block && (TREE_CODE (block) == BLOCK);
383 block = BLOCK_SUPERCONTEXT (block))
385 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
386 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
387 continue;
389 tree decl = get_function_decl_from_block (block);
390 stack->safe_push (
391 std::make_pair (decl, get_combined_location (locus, decl)));
392 locus = tmp_locus;
393 level++;
396 stack->safe_push (
397 std::make_pair (current_function_decl,
398 get_combined_location (locus, current_function_decl)));
401 /* Return STMT's combined location, which is a 32bit integer in which
402 higher 16 bits stores the line offset of LOC to the start lineno
403 of DECL, The lower 16 bits stores the discriminator. */
405 static unsigned
406 get_relative_location_for_stmt (gimple *stmt)
408 location_t locus = gimple_location (stmt);
409 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
410 return UNKNOWN_LOCATION;
412 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
413 block = BLOCK_SUPERCONTEXT (block))
414 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
415 return get_combined_location (locus,
416 get_function_decl_from_block (block));
417 return get_combined_location (locus, current_function_decl);
420 /* Return true if BB contains indirect call. */
422 static bool
423 has_indirect_call (basic_block bb)
425 gimple_stmt_iterator gsi;
427 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
429 gimple *stmt = gsi_stmt (gsi);
430 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
431 && (gimple_call_fn (stmt) == NULL
432 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
433 return true;
435 return false;
438 /* Member functions for string_table. */
440 /* Deconstructor. */
442 string_table::~string_table ()
444 for (unsigned i = 0; i < vector_.length (); i++)
445 free (vector_[i]);
449 /* Return the index of a given function NAME. Return -1 if NAME is not
450 found in string table. */
453 string_table::get_index (const char *name) const
455 if (name == NULL)
456 return -1;
457 string_index_map::const_iterator iter = map_.find (name);
458 if (iter == map_.end ())
459 return -1;
461 return iter->second;
464 /* Return the index of a given function DECL. Return -1 if DECL is not
465 found in string table. */
468 string_table::get_index_by_decl (tree decl) const
470 char *name
471 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
472 int ret = get_index (name);
473 free (name);
474 if (ret != -1)
475 return ret;
476 ret = get_index (lang_hooks.dwarf_name (decl, 0));
477 if (ret != -1)
478 return ret;
479 if (DECL_ABSTRACT_ORIGIN (decl))
480 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
482 return -1;
485 /* Return the function name of a given INDEX. */
487 const char *
488 string_table::get_name (int index) const
490 gcc_assert (index > 0 && index < (int)vector_.length ());
491 return vector_[index];
494 /* Read the string table. Return TRUE if reading is successful. */
496 bool
497 string_table::read ()
499 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
500 return false;
501 /* Skip the length of the section. */
502 gcov_read_unsigned ();
503 /* Read in the file name table. */
504 unsigned string_num = gcov_read_unsigned ();
505 for (unsigned i = 0; i < string_num; i++)
507 vector_.safe_push (get_original_name (gcov_read_string ()));
508 map_[vector_.last ()] = i;
510 return true;
513 /* Member functions for function_instance. */
515 function_instance::~function_instance ()
517 for (callsite_map::iterator iter = callsites.begin ();
518 iter != callsites.end (); ++iter)
519 delete iter->second;
522 /* Traverse callsites of the current function_instance to find one at the
523 location of LINENO and callee name represented in DECL. */
525 function_instance *
526 function_instance::get_function_instance_by_decl (unsigned lineno,
527 tree decl) const
529 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
530 if (func_name_idx != -1)
532 callsite_map::const_iterator ret
533 = callsites.find (std::make_pair (lineno, func_name_idx));
534 if (ret != callsites.end ())
535 return ret->second;
537 func_name_idx
538 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
539 if (func_name_idx != -1)
541 callsite_map::const_iterator ret
542 = callsites.find (std::make_pair (lineno, func_name_idx));
543 if (ret != callsites.end ())
544 return ret->second;
546 if (DECL_ABSTRACT_ORIGIN (decl))
547 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
549 return NULL;
552 /* Store the profile info for LOC in INFO. Return TRUE if profile info
553 is found. */
555 bool
556 function_instance::get_count_info (location_t loc, count_info *info) const
558 position_count_map::const_iterator iter = pos_counts.find (loc);
559 if (iter == pos_counts.end ())
560 return false;
561 *info = iter->second;
562 return true;
565 /* Mark LOC as annotated. */
567 void
568 function_instance::mark_annotated (location_t loc)
570 position_count_map::iterator iter = pos_counts.find (loc);
571 if (iter == pos_counts.end ())
572 return;
573 iter->second.annotated = true;
576 /* Read the inlined indirect call target profile for STMT and store it in
577 MAP, return the total count for all inlined indirect calls. */
579 gcov_type
580 function_instance::find_icall_target_map (gcall *stmt,
581 icall_target_map *map) const
583 gcov_type ret = 0;
584 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
586 for (callsite_map::const_iterator iter = callsites.begin ();
587 iter != callsites.end (); ++iter)
589 unsigned callee = iter->second->name ();
590 /* Check if callsite location match the stmt. */
591 if (iter->first.first != stmt_offset)
592 continue;
593 struct cgraph_node *node = cgraph_node::get_for_asmname (
594 get_identifier (afdo_string_table->get_name (callee)));
595 if (node == NULL)
596 continue;
597 if (!check_ic_target (stmt, node))
598 continue;
599 (*map)[callee] = iter->second->total_count ();
600 ret += iter->second->total_count ();
602 return ret;
605 /* Read the profile and create a function_instance with head count as
606 HEAD_COUNT. Recursively read callsites to create nested function_instances
607 too. STACK is used to track the recursive creation process. */
609 /* function instance profile format:
611 ENTRY_COUNT: 8 bytes
612 NAME_INDEX: 4 bytes
613 NUM_POS_COUNTS: 4 bytes
614 NUM_CALLSITES: 4 byte
615 POS_COUNT_1:
616 POS_1_OFFSET: 4 bytes
617 NUM_TARGETS: 4 bytes
618 COUNT: 8 bytes
619 TARGET_1:
620 VALUE_PROFILE_TYPE: 4 bytes
621 TARGET_IDX: 8 bytes
622 COUNT: 8 bytes
623 TARGET_2
625 TARGET_n
626 POS_COUNT_2
628 POS_COUNT_N
629 CALLSITE_1:
630 CALLSITE_1_OFFSET: 4 bytes
631 FUNCTION_INSTANCE_PROFILE (nested)
632 CALLSITE_2
634 CALLSITE_n. */
636 function_instance *
637 function_instance::read_function_instance (function_instance_stack *stack,
638 gcov_type head_count)
640 unsigned name = gcov_read_unsigned ();
641 unsigned num_pos_counts = gcov_read_unsigned ();
642 unsigned num_callsites = gcov_read_unsigned ();
643 function_instance *s = new function_instance (name, head_count);
644 stack->safe_push (s);
646 for (unsigned i = 0; i < num_pos_counts; i++)
648 unsigned offset = gcov_read_unsigned () & 0xffff0000;
649 unsigned num_targets = gcov_read_unsigned ();
650 gcov_type count = gcov_read_counter ();
651 s->pos_counts[offset].count = count;
652 for (unsigned j = 0; j < stack->length (); j++)
653 (*stack)[j]->total_count_ += count;
654 for (unsigned j = 0; j < num_targets; j++)
656 /* Only indirect call target histogram is supported now. */
657 gcov_read_unsigned ();
658 gcov_type target_idx = gcov_read_counter ();
659 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
662 for (unsigned i = 0; i < num_callsites; i++)
664 unsigned offset = gcov_read_unsigned ();
665 function_instance *callee_function_instance
666 = read_function_instance (stack, 0);
667 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
668 = callee_function_instance;
670 stack->pop ();
671 return s;
674 /* Sum of counts that is used during annotation. */
676 gcov_type
677 function_instance::total_annotated_count () const
679 gcov_type ret = 0;
680 for (callsite_map::const_iterator iter = callsites.begin ();
681 iter != callsites.end (); ++iter)
682 ret += iter->second->total_annotated_count ();
683 for (position_count_map::const_iterator iter = pos_counts.begin ();
684 iter != pos_counts.end (); ++iter)
685 if (iter->second.annotated)
686 ret += iter->second.count;
687 return ret;
690 /* Member functions for autofdo_source_profile. */
692 autofdo_source_profile::~autofdo_source_profile ()
694 for (name_function_instance_map::const_iterator iter = map_.begin ();
695 iter != map_.end (); ++iter)
696 delete iter->second;
699 /* For a given DECL, returns the top-level function_instance. */
701 function_instance *
702 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
704 int index = afdo_string_table->get_index_by_decl (decl);
705 if (index == -1)
706 return NULL;
707 name_function_instance_map::const_iterator ret = map_.find (index);
708 return ret == map_.end () ? NULL : ret->second;
711 /* Find count_info for a given gimple STMT. If found, store the count_info
712 in INFO and return true; otherwise return false. */
714 bool
715 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
717 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
718 return false;
720 inline_stack stack;
721 get_inline_stack (gimple_location (stmt), &stack);
722 if (stack.length () == 0)
723 return false;
724 function_instance *s = get_function_instance_by_inline_stack (stack);
725 if (s == NULL)
726 return false;
727 return s->get_count_info (stack[0].second, info);
730 /* Mark LOC as annotated. */
732 void
733 autofdo_source_profile::mark_annotated (location_t loc)
735 inline_stack stack;
736 get_inline_stack (loc, &stack);
737 if (stack.length () == 0)
738 return;
739 function_instance *s = get_function_instance_by_inline_stack (stack);
740 if (s == NULL)
741 return;
742 s->mark_annotated (stack[0].second);
745 /* Update value profile INFO for STMT from the inlined indirect callsite.
746 Return true if INFO is updated. */
748 bool
749 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
750 count_info *info)
752 if (dump_file)
754 fprintf (dump_file, "Checking indirect call -> direct call ");
755 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
758 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
760 if (dump_file)
761 fprintf (dump_file, " good locus\n");
762 return false;
765 count_info old_info;
766 get_count_info (stmt, &old_info);
767 gcov_type total = 0;
768 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
769 iter != old_info.targets.end (); ++iter)
770 total += iter->second;
772 /* Program behavior changed, original promoted (and inlined) target is not
773 hot any more. Will avoid promote the original target.
775 To check if original promoted target is still hot, we check the total
776 count of the unpromoted targets (stored in old_info). If it is no less
777 than half of the callsite count (stored in INFO), the original promoted
778 target is considered not hot any more. */
779 if (total >= info->count / 2)
781 if (dump_file)
782 fprintf (dump_file, " not hot anymore %ld >= %ld",
783 (long)total,
784 (long)info->count /2);
785 return false;
788 inline_stack stack;
789 get_inline_stack (gimple_location (stmt), &stack);
790 if (stack.length () == 0)
792 if (dump_file)
793 fprintf (dump_file, " no inline stack\n");
794 return false;
796 function_instance *s = get_function_instance_by_inline_stack (stack);
797 if (s == NULL)
799 if (dump_file)
800 fprintf (dump_file, " function not found in inline stack\n");
801 return false;
803 icall_target_map map;
804 if (s->find_icall_target_map (stmt, &map) == 0)
806 if (dump_file)
807 fprintf (dump_file, " no target map\n");
808 return false;
810 for (icall_target_map::const_iterator iter = map.begin ();
811 iter != map.end (); ++iter)
812 info->targets[iter->first] = iter->second;
813 if (dump_file)
814 fprintf (dump_file, " looks good\n");
815 return true;
818 /* Find total count of the callee of EDGE. */
820 gcov_type
821 autofdo_source_profile::get_callsite_total_count (
822 struct cgraph_edge *edge) const
824 inline_stack stack;
825 stack.safe_push (std::make_pair (edge->callee->decl, 0));
826 get_inline_stack (gimple_location (edge->call_stmt), &stack);
828 function_instance *s = get_function_instance_by_inline_stack (stack);
829 if (s == NULL
830 || afdo_string_table->get_index (IDENTIFIER_POINTER (
831 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
832 return 0;
834 return s->total_count ();
837 /* Read AutoFDO profile and returns TRUE on success. */
839 /* source profile format:
841 GCOV_TAG_AFDO_FUNCTION: 4 bytes
842 LENGTH: 4 bytes
843 NUM_FUNCTIONS: 4 bytes
844 FUNCTION_INSTANCE_1
845 FUNCTION_INSTANCE_2
847 FUNCTION_INSTANCE_N. */
849 bool
850 autofdo_source_profile::read ()
852 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
854 inform (0, "Not expected TAG.");
855 return false;
858 /* Skip the length of the section. */
859 gcov_read_unsigned ();
861 /* Read in the function/callsite profile, and store it in local
862 data structure. */
863 unsigned function_num = gcov_read_unsigned ();
864 for (unsigned i = 0; i < function_num; i++)
866 function_instance::function_instance_stack stack;
867 function_instance *s = function_instance::read_function_instance (
868 &stack, gcov_read_counter ());
869 afdo_profile_info->sum_all += s->total_count ();
870 map_[s->name ()] = s;
872 return true;
875 /* Return the function_instance in the profile that correspond to the
876 inline STACK. */
878 function_instance *
879 autofdo_source_profile::get_function_instance_by_inline_stack (
880 const inline_stack &stack) const
882 name_function_instance_map::const_iterator iter = map_.find (
883 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
884 if (iter == map_.end())
885 return NULL;
886 function_instance *s = iter->second;
887 for (unsigned i = stack.length() - 1; i > 0; i--)
889 s = s->get_function_instance_by_decl (
890 stack[i].second, stack[i - 1].first);
891 if (s == NULL)
892 return NULL;
894 return s;
897 /* Module profile is only used by LIPO. Here we simply ignore it. */
899 static void
900 fake_read_autofdo_module_profile ()
902 /* Read in the module info. */
903 gcov_read_unsigned ();
905 /* Skip the length of the section. */
906 gcov_read_unsigned ();
908 /* Read in the file name table. */
909 unsigned total_module_num = gcov_read_unsigned ();
910 gcc_assert (total_module_num == 0);
913 /* Read data from profile data file. */
915 static void
916 read_profile (void)
918 if (gcov_open (auto_profile_file, 1) == 0)
920 error ("cannot open profile file %s", auto_profile_file);
921 return;
924 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
926 error ("AutoFDO profile magic number does not match");
927 return;
930 /* Skip the version number. */
931 unsigned version = gcov_read_unsigned ();
932 if (version != AUTO_PROFILE_VERSION)
934 error ("AutoFDO profile version %u does match %u",
935 version, AUTO_PROFILE_VERSION);
936 return;
939 /* Skip the empty integer. */
940 gcov_read_unsigned ();
942 /* string_table. */
943 afdo_string_table = new string_table ();
944 if (!afdo_string_table->read())
946 error ("cannot read string table from %s", auto_profile_file);
947 return;
950 /* autofdo_source_profile. */
951 afdo_source_profile = autofdo_source_profile::create ();
952 if (afdo_source_profile == NULL)
954 error ("cannot read function profile from %s", auto_profile_file);
955 return;
958 /* autofdo_module_profile. */
959 fake_read_autofdo_module_profile ();
961 /* Read in the working set. */
962 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
964 error ("cannot read working set from %s", auto_profile_file);
965 return;
968 /* Skip the length of the section. */
969 gcov_read_unsigned ();
970 gcov_working_set_t set[128];
971 for (unsigned i = 0; i < 128; i++)
973 set[i].num_counters = gcov_read_unsigned ();
974 set[i].min_counter = gcov_read_counter ();
976 add_working_set (set);
979 /* From AutoFDO profiles, find values inside STMT for that we want to measure
980 histograms for indirect-call optimization.
982 This function is actually served for 2 purposes:
983 * before annotation, we need to mark histogram, promote and inline
984 * after annotation, we just need to mark, and let follow-up logic to
985 decide if it needs to promote and inline. */
987 static void
988 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
989 bool transform)
991 gimple *gs = gsi_stmt (*gsi);
992 tree callee;
994 if (map.size () == 0)
995 return;
996 gcall *stmt = dyn_cast <gcall *> (gs);
997 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
998 return;
1000 callee = gimple_call_fn (stmt);
1002 histogram_value hist = gimple_alloc_histogram_value (
1003 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
1004 hist->n_counters = 3;
1005 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
1006 gimple_add_histogram_value (cfun, stmt, hist);
1008 gcov_type total = 0;
1009 icall_target_map::const_iterator max_iter = map.end ();
1011 for (icall_target_map::const_iterator iter = map.begin ();
1012 iter != map.end (); ++iter)
1014 total += iter->second;
1015 if (max_iter == map.end () || max_iter->second < iter->second)
1016 max_iter = iter;
1019 hist->hvalue.counters[0]
1020 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
1021 hist->hvalue.counters[1] = max_iter->second;
1022 hist->hvalue.counters[2] = total;
1024 if (!transform)
1025 return;
1027 struct cgraph_edge *indirect_edge
1028 = cgraph_node::get (current_function_decl)->get_edge (stmt);
1029 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1030 get_identifier ((const char *) hist->hvalue.counters[0]));
1032 if (dump_file)
1034 fprintf (dump_file, "Indirect call -> direct call ");
1035 print_generic_expr (dump_file, callee, TDF_SLIM);
1036 fprintf (dump_file, " => ");
1037 print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
1040 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1042 if (dump_file)
1043 fprintf (dump_file, " not transforming\n");
1044 return;
1046 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1048 if (dump_file)
1049 fprintf (dump_file, " no declaration\n");
1050 return;
1053 if (dump_file)
1055 fprintf (dump_file, " transformation on insn ");
1056 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1057 fprintf (dump_file, "\n");
1060 struct cgraph_edge *new_edge
1061 = indirect_edge->make_speculative (direct_call, 0, 0);
1062 new_edge->redirect_call_stmt_to_callee ();
1063 gimple_remove_histogram_value (cfun, stmt, hist);
1064 inline_call (new_edge, true, NULL, NULL, false);
1067 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1068 histograms and adds them to list VALUES. */
1070 static void
1071 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1072 bool transform)
1074 afdo_indirect_call (gsi, map, transform);
1077 typedef std::set<basic_block> bb_set;
1078 typedef std::set<edge> edge_set;
1080 static bool
1081 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1083 return annotated.find (bb) != annotated.end ();
1086 static void
1087 set_bb_annotated (basic_block bb, bb_set *annotated)
1089 annotated->insert (bb);
1092 static bool
1093 is_edge_annotated (const edge e, const edge_set &annotated)
1095 return annotated.find (e) != annotated.end ();
1098 static void
1099 set_edge_annotated (edge e, edge_set *annotated)
1101 annotated->insert (e);
1104 /* For a given BB, set its execution count. Attach value profile if a stmt
1105 is not in PROMOTED, because we only want to promote an indirect call once.
1106 Return TRUE if BB is annotated. */
1108 static bool
1109 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1111 gimple_stmt_iterator gsi;
1112 edge e;
1113 edge_iterator ei;
1114 gcov_type max_count = 0;
1115 bool has_annotated = false;
1117 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1119 count_info info;
1120 gimple *stmt = gsi_stmt (gsi);
1121 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1122 continue;
1123 if (afdo_source_profile->get_count_info (stmt, &info))
1125 if (info.count > max_count)
1126 max_count = info.count;
1127 has_annotated = true;
1128 if (info.targets.size () > 0
1129 && promoted.find (stmt) == promoted.end ())
1130 afdo_vpt (&gsi, info.targets, false);
1134 if (!has_annotated)
1135 return false;
1137 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1138 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1139 for (gphi_iterator gpi = gsi_start_phis (bb);
1140 !gsi_end_p (gpi);
1141 gsi_next (&gpi))
1143 gphi *phi = gpi.phi ();
1144 size_t i;
1145 for (i = 0; i < gimple_phi_num_args (phi); i++)
1146 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1148 FOR_EACH_EDGE (e, ei, bb->succs)
1149 afdo_source_profile->mark_annotated (e->goto_locus);
1151 bb->count = max_count;
1152 return true;
1155 /* BB1 and BB2 are in an equivalent class iff:
1156 1. BB1 dominates BB2.
1157 2. BB2 post-dominates BB1.
1158 3. BB1 and BB2 are in the same loop nest.
1159 This function finds the equivalent class for each basic block, and
1160 stores a pointer to the first BB in its equivalent class. Meanwhile,
1161 set bb counts for the same equivalent class to be idenical. Update
1162 ANNOTATED_BB for the first BB in its equivalent class. */
1164 static void
1165 afdo_find_equiv_class (bb_set *annotated_bb)
1167 basic_block bb;
1169 FOR_ALL_BB_FN (bb, cfun)
1170 bb->aux = NULL;
1172 FOR_ALL_BB_FN (bb, cfun)
1174 vec<basic_block> dom_bbs;
1175 basic_block bb1;
1176 int i;
1178 if (bb->aux != NULL)
1179 continue;
1180 bb->aux = bb;
1181 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1182 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1183 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1184 && bb1->loop_father == bb->loop_father)
1186 bb1->aux = bb;
1187 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1189 bb->count = bb1->count;
1190 set_bb_annotated (bb, annotated_bb);
1193 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1194 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1195 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1196 && bb1->loop_father == bb->loop_father)
1198 bb1->aux = bb;
1199 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1201 bb->count = bb1->count;
1202 set_bb_annotated (bb, annotated_bb);
1208 /* If a basic block's count is known, and only one of its in/out edges' count
1209 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1210 edges' counts are known, then the basic block's unknown count can also be
1211 calculated.
1212 IS_SUCC is true if out edges of a basic blocks are examined.
1213 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1214 Return TRUE if any basic block/edge count is changed. */
1216 static bool
1217 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1218 edge_set *annotated_edge)
1220 basic_block bb;
1221 bool changed = false;
1223 FOR_EACH_BB_FN (bb, cfun)
1225 edge e, unknown_edge = NULL;
1226 edge_iterator ei;
1227 int num_unknown_edge = 0;
1228 gcov_type total_known_count = 0;
1230 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1231 if (!is_edge_annotated (e, *annotated_edge))
1232 num_unknown_edge++, unknown_edge = e;
1233 else
1234 total_known_count += e->count;
1236 if (num_unknown_edge == 0)
1238 if (total_known_count > bb->count)
1240 bb->count = total_known_count;
1241 changed = true;
1243 if (!is_bb_annotated (bb, *annotated_bb))
1245 set_bb_annotated (bb, annotated_bb);
1246 changed = true;
1249 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1251 if (bb->count >= total_known_count)
1252 unknown_edge->count = bb->count - total_known_count;
1253 else
1254 unknown_edge->count = 0;
1255 set_edge_annotated (unknown_edge, annotated_edge);
1256 changed = true;
1259 return changed;
1262 /* Special propagation for circuit expressions. Because GCC translates
1263 control flow into data flow for circuit expressions. E.g.
1264 BB1:
1265 if (a && b)
1267 else
1270 will be translated into:
1272 BB1:
1273 if (a)
1274 goto BB.t1
1275 else
1276 goto BB.t3
1277 BB.t1:
1278 if (b)
1279 goto BB.t2
1280 else
1281 goto BB.t3
1282 BB.t2:
1283 goto BB.t3
1284 BB.t3:
1285 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1286 if (tmp)
1287 goto BB2
1288 else
1289 goto BB3
1291 In this case, we need to propagate through PHI to determine the edge
1292 count of BB1->BB.t1, BB.t1->BB.t2.
1293 Update ANNOTATED_EDGE accordingly. */
1295 static void
1296 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1298 basic_block bb;
1299 FOR_ALL_BB_FN (bb, cfun)
1301 gimple *def_stmt;
1302 tree cmp_rhs, cmp_lhs;
1303 gimple *cmp_stmt = last_stmt (bb);
1304 edge e;
1305 edge_iterator ei;
1307 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1308 continue;
1309 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1310 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1311 if (!TREE_CONSTANT (cmp_rhs)
1312 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1313 continue;
1314 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1315 continue;
1316 if (!is_bb_annotated (bb, annotated_bb))
1317 continue;
1318 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1319 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1320 && gimple_assign_single_p (def_stmt)
1321 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1322 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1323 if (!def_stmt)
1324 continue;
1325 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1326 if (!phi_stmt)
1327 continue;
1328 FOR_EACH_EDGE (e, ei, bb->succs)
1330 unsigned i, total = 0;
1331 edge only_one;
1332 bool check_value_one = (((integer_onep (cmp_rhs))
1333 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1334 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1335 if (!is_edge_annotated (e, *annotated_edge))
1336 continue;
1337 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1339 tree val = gimple_phi_arg_def (phi_stmt, i);
1340 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1342 if (!TREE_CONSTANT (val)
1343 || !(integer_zerop (val) || integer_onep (val)))
1344 continue;
1345 if (check_value_one ^ integer_onep (val))
1346 continue;
1347 total++;
1348 only_one = ep;
1349 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1351 ep->probability = 0;
1352 ep->count = 0;
1353 set_edge_annotated (ep, annotated_edge);
1356 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1358 only_one->probability = e->probability;
1359 only_one->count = e->count;
1360 set_edge_annotated (only_one, annotated_edge);
1366 /* Propagate the basic block count and edge count on the control flow
1367 graph. We do the propagation iteratively until stablize. */
1369 static void
1370 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1372 basic_block bb;
1373 bool changed = true;
1374 int i = 0;
1376 FOR_ALL_BB_FN (bb, cfun)
1378 bb->count = ((basic_block)bb->aux)->count;
1379 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1380 set_bb_annotated (bb, annotated_bb);
1383 while (changed && i++ < 10)
1385 changed = false;
1387 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1388 changed = true;
1389 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1390 changed = true;
1391 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1395 /* Propagate counts on control flow graph and calculate branch
1396 probabilities. */
1398 static void
1399 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1401 basic_block bb;
1402 bool has_sample = false;
1404 FOR_EACH_BB_FN (bb, cfun)
1406 if (bb->count > 0)
1408 has_sample = true;
1409 break;
1413 if (!has_sample)
1414 return;
1416 calculate_dominance_info (CDI_POST_DOMINATORS);
1417 calculate_dominance_info (CDI_DOMINATORS);
1418 loop_optimizer_init (0);
1420 afdo_find_equiv_class (annotated_bb);
1421 afdo_propagate (annotated_bb, annotated_edge);
1423 FOR_EACH_BB_FN (bb, cfun)
1425 edge e;
1426 edge_iterator ei;
1427 int num_unknown_succ = 0;
1428 gcov_type total_count = 0;
1430 FOR_EACH_EDGE (e, ei, bb->succs)
1432 if (!is_edge_annotated (e, *annotated_edge))
1433 num_unknown_succ++;
1434 else
1435 total_count += e->count;
1437 if (num_unknown_succ == 0 && total_count > 0)
1439 FOR_EACH_EDGE (e, ei, bb->succs)
1440 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1443 FOR_ALL_BB_FN (bb, cfun)
1445 edge e;
1446 edge_iterator ei;
1448 FOR_EACH_EDGE (e, ei, bb->succs)
1449 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1450 bb->aux = NULL;
1453 loop_optimizer_finalize ();
1454 free_dominance_info (CDI_DOMINATORS);
1455 free_dominance_info (CDI_POST_DOMINATORS);
1458 /* Perform value profile transformation using AutoFDO profile. Add the
1459 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1460 indirect call promoted. */
1462 static bool
1463 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1465 basic_block bb;
1466 if (afdo_source_profile->get_function_instance_by_decl (
1467 current_function_decl) == NULL)
1468 return false;
1470 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1472 bool has_vpt = false;
1473 FOR_EACH_BB_FN (bb, cfun)
1475 if (!has_indirect_call (bb))
1476 continue;
1477 gimple_stmt_iterator gsi;
1479 gcov_type bb_count = 0;
1480 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1482 count_info info;
1483 gimple *stmt = gsi_stmt (gsi);
1484 if (afdo_source_profile->get_count_info (stmt, &info))
1485 bb_count = MAX (bb_count, info.count);
1488 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1490 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1491 /* IC_promotion and early_inline_2 is done in multiple iterations.
1492 No need to promoted the stmt if its in promoted_stmts (means
1493 it is already been promoted in the previous iterations). */
1494 if ((!stmt) || gimple_call_fn (stmt) == NULL
1495 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1496 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1497 continue;
1499 count_info info;
1500 afdo_source_profile->get_count_info (stmt, &info);
1501 info.count = bb_count;
1502 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1504 /* Promote the indirect call and update the promoted_stmts. */
1505 promoted_stmts->insert (stmt);
1506 afdo_vpt (&gsi, info.targets, true);
1507 has_vpt = true;
1512 if (has_vpt)
1514 optimize_inline_calls (current_function_decl);
1515 return true;
1518 return false;
1521 /* Annotate auto profile to the control flow graph. Do not annotate value
1522 profile for stmts in PROMOTED_STMTS. */
1524 static void
1525 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1527 basic_block bb;
1528 bb_set annotated_bb;
1529 edge_set annotated_edge;
1530 const function_instance *s
1531 = afdo_source_profile->get_function_instance_by_decl (
1532 current_function_decl);
1534 if (s == NULL)
1535 return;
1536 cgraph_node::get (current_function_decl)->count = s->head_count ();
1537 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1538 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1540 FOR_EACH_BB_FN (bb, cfun)
1542 edge e;
1543 edge_iterator ei;
1545 bb->count = 0;
1546 FOR_EACH_EDGE (e, ei, bb->succs)
1547 e->count = 0;
1549 if (afdo_set_bb_count (bb, promoted_stmts))
1550 set_bb_annotated (bb, &annotated_bb);
1551 if (bb->count > max_count)
1552 max_count = bb->count;
1554 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1555 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1557 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1558 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1559 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1561 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1562 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1564 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1565 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1566 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1568 afdo_source_profile->mark_annotated (
1569 DECL_SOURCE_LOCATION (current_function_decl));
1570 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1571 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1572 if (max_count > 0)
1574 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1575 counts_to_freqs ();
1576 profile_status_for_fn (cfun) = PROFILE_READ;
1578 if (flag_value_profile_transformations)
1580 gimple_value_profile_transformations ();
1581 free_dominance_info (CDI_DOMINATORS);
1582 free_dominance_info (CDI_POST_DOMINATORS);
1583 update_ssa (TODO_update_ssa);
1587 /* Wrapper function to invoke early inliner. */
1589 static void
1590 early_inline ()
1592 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1593 unsigned todo = early_inliner (cfun);
1594 if (todo & TODO_update_ssa_any)
1595 update_ssa (TODO_update_ssa);
1598 /* Use AutoFDO profile to annoate the control flow graph.
1599 Return the todo flag. */
1601 static unsigned int
1602 auto_profile (void)
1604 struct cgraph_node *node;
1606 if (symtab->state == FINISHED)
1607 return 0;
1609 init_node_map (true);
1610 profile_info = autofdo::afdo_profile_info;
1612 FOR_EACH_FUNCTION (node)
1614 if (!gimple_has_body_p (node->decl))
1615 continue;
1617 /* Don't profile functions produced for builtin stuff. */
1618 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1619 continue;
1621 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1623 /* First do indirect call promotion and early inline to make the
1624 IR match the profiled binary before actual annotation.
1626 This is needed because an indirect call might have been promoted
1627 and inlined in the profiled binary. If we do not promote and
1628 inline these indirect calls before annotation, the profile for
1629 these promoted functions will be lost.
1631 e.g. foo() --indirect_call--> bar()
1632 In profiled binary, the callsite is promoted and inlined, making
1633 the profile look like:
1635 foo: {
1636 loc_foo_1: count_1
1637 bar@loc_foo_2: {
1638 loc_bar_1: count_2
1639 loc_bar_2: count_3
1643 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1644 If we perform annotation on it, the profile inside bar@loc_foo2
1645 will be wasted.
1647 To avoid this, we promote loc_foo_2 and inline the promoted bar
1648 function before annotation, so the profile inside bar@loc_foo2
1649 will be useful. */
1650 autofdo::stmt_set promoted_stmts;
1651 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1653 if (!flag_value_profile_transformations
1654 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1655 break;
1656 early_inline ();
1659 early_inline ();
1660 autofdo::afdo_annotate_cfg (promoted_stmts);
1661 compute_function_frequency ();
1663 /* Local pure-const may imply need to fixup the cfg. */
1664 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1665 cleanup_tree_cfg ();
1667 free_dominance_info (CDI_DOMINATORS);
1668 free_dominance_info (CDI_POST_DOMINATORS);
1669 cgraph_edge::rebuild_edges ();
1670 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1671 pop_cfun ();
1674 return TODO_rebuild_cgraph_edges;
1676 } /* namespace autofdo. */
1678 /* Read the profile from the profile data file. */
1680 void
1681 read_autofdo_file (void)
1683 if (auto_profile_file == NULL)
1684 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1686 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1687 1, sizeof (struct gcov_ctr_summary));
1688 autofdo::afdo_profile_info->runs = 1;
1689 autofdo::afdo_profile_info->sum_max = 0;
1690 autofdo::afdo_profile_info->sum_all = 0;
1692 /* Read the profile from the profile file. */
1693 autofdo::read_profile ();
1696 /* Free the resources. */
1698 void
1699 end_auto_profile (void)
1701 delete autofdo::afdo_source_profile;
1702 delete autofdo::afdo_string_table;
1703 profile_info = NULL;
1706 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1708 bool
1709 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1711 gcov_type count
1712 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1714 if (count > 0)
1716 bool is_hot;
1717 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1718 /* At early inline stage, profile_info is not set yet. We need to
1719 temporarily set it to afdo_profile_info to calculate hotness. */
1720 profile_info = autofdo::afdo_profile_info;
1721 is_hot = maybe_hot_count_p (NULL, count);
1722 profile_info = saved_profile_info;
1723 return is_hot;
1726 return false;
1729 namespace
1732 const pass_data pass_data_ipa_auto_profile = {
1733 SIMPLE_IPA_PASS, "afdo", /* name */
1734 OPTGROUP_NONE, /* optinfo_flags */
1735 TV_IPA_AUTOFDO, /* tv_id */
1736 0, /* properties_required */
1737 0, /* properties_provided */
1738 0, /* properties_destroyed */
1739 0, /* todo_flags_start */
1740 0, /* todo_flags_finish */
1743 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1745 public:
1746 pass_ipa_auto_profile (gcc::context *ctxt)
1747 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1751 /* opt_pass methods: */
1752 virtual bool
1753 gate (function *)
1755 return flag_auto_profile;
1757 virtual unsigned int
1758 execute (function *)
1760 return autofdo::auto_profile ();
1762 }; // class pass_ipa_auto_profile
1764 } // anon namespace
1766 simple_ipa_opt_pass *
1767 make_pass_ipa_auto_profile (gcc::context *ctxt)
1769 return new pass_ipa_auto_profile (ctxt);