2015-03-04 Robert Dewar <dewar@adacore.com>
[official-gcc.git] / gcc / auto-profile.c
blobba2d5ab654e633ac63f88d565d19c6c29851df3a
1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
24 #include <string.h>
25 #include <map>
26 #include <set>
28 #include "coretypes.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "options.h"
37 #include "wide-int.h"
38 #include "inchash.h"
39 #include "tree.h"
40 #include "fold-const.h"
41 #include "tree-pass.h"
42 #include "flags.h"
43 #include "predict.h"
44 #include "vec.h"
45 #include "hashtab.h"
46 #include "hash-set.h"
47 #include "machmode.h"
48 #include "tm.h"
49 #include "hard-reg-set.h"
50 #include "input.h"
51 #include "function.h"
52 #include "dominance.h"
53 #include "cfg.h"
54 #include "basic-block.h"
55 #include "diagnostic-core.h"
56 #include "gcov-io.h"
57 #include "profile.h"
58 #include "langhooks.h"
59 #include "opts.h"
60 #include "tree-pass.h"
61 #include "cfgloop.h"
62 #include "tree-ssa-alias.h"
63 #include "tree-cfg.h"
64 #include "tree-cfgcleanup.h"
65 #include "tree-ssa-operands.h"
66 #include "tree-into-ssa.h"
67 #include "internal-fn.h"
68 #include "is-a.h"
69 #include "gimple-expr.h"
70 #include "gimple.h"
71 #include "gimple-iterator.h"
72 #include "gimple-ssa.h"
73 #include "hash-map.h"
74 #include "plugin-api.h"
75 #include "ipa-ref.h"
76 #include "cgraph.h"
77 #include "value-prof.h"
78 #include "coverage.h"
79 #include "params.h"
80 #include "alloc-pool.h"
81 #include "symbol-summary.h"
82 #include "ipa-prop.h"
83 #include "ipa-inline.h"
84 #include "tree-inline.h"
85 #include "stringpool.h"
86 #include "auto-profile.h"
88 /* The following routines implements AutoFDO optimization.
90 This optimization uses sampling profiles to annotate basic block counts
91 and uses heuristics to estimate branch probabilities.
93 There are three phases in AutoFDO:
95 Phase 1: Read profile from the profile data file.
96 The following info is read from the profile datafile:
97 * string_table: a map between function name and its index.
98 * autofdo_source_profile: a map from function_instance name to
99 function_instance. This is represented as a forest of
100 function_instances.
101 * WorkingSet: a histogram of how many instructions are covered for a
102 given percentage of total cycles. This is describing the binary
103 level information (not source level). This info is used to help
104 decide if we want aggressive optimizations that could increase
105 code footprint (e.g. loop unroll etc.)
106 A function instance is an instance of function that could either be a
107 standalone symbol, or a clone of a function that is inlined into another
108 function.
110 Phase 2: Early inline + value profile transformation.
111 Early inline uses autofdo_source_profile to find if a callsite is:
112 * inlined in the profiled binary.
113 * callee body is hot in the profiling run.
114 If both condition satisfies, early inline will inline the callsite
115 regardless of the code growth.
116 Phase 2 is an iterative process. During each iteration, we also check
117 if an indirect callsite is promoted and inlined in the profiling run.
118 If yes, vpt will happen to force promote it and in the next iteration,
119 einline will inline the promoted callsite in the next iteration.
121 Phase 3: Annotate control flow graph.
122 AutoFDO uses a separate pass to:
123 * Annotate basic block count
124 * Estimate branch probability
126 After the above 3 phases, all profile is readily annotated on the GCC IR.
127 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
128 use of the profile. E.g. it uses existing mechanism to calculate the basic
129 block/edge frequency, as well as the cgraph node/edge count.
132 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
133 #define AUTO_PROFILE_VERSION 1
135 namespace autofdo
138 /* Represent a source location: (function_decl, lineno). */
139 typedef std::pair<tree, unsigned> decl_lineno;
141 /* Represent an inline stack. vector[0] is the leaf node. */
142 typedef auto_vec<decl_lineno> inline_stack;
144 /* String array that stores function names. */
145 typedef auto_vec<char *> string_vector;
147 /* Map from function name's index in string_table to target's
148 execution count. */
149 typedef std::map<unsigned, gcov_type> icall_target_map;
151 /* Set of gimple stmts. Used to track if the stmt has already been promoted
152 to direct call. */
153 typedef std::set<gimple> stmt_set;
155 /* Represent count info of an inline stack. */
156 struct count_info
158 /* Sampled count of the inline stack. */
159 gcov_type count;
161 /* Map from indirect call target to its sample count. */
162 icall_target_map targets;
164 /* Whether this inline stack is already used in annotation.
166 Each inline stack should only be used to annotate IR once.
167 This will be enforced when instruction-level discriminator
168 is supported. */
169 bool annotated;
172 /* operator< for "const char *". */
173 struct string_compare
175 bool operator()(const char *a, const char *b) const
177 return strcmp (a, b) < 0;
181 /* Store a string array, indexed by string position in the array. */
182 class string_table
184 public:
185 string_table ()
188 ~string_table ();
190 /* For a given string, returns its index. */
191 int get_index (const char *name) const;
193 /* For a given decl, returns the index of the decl name. */
194 int get_index_by_decl (tree decl) const;
196 /* For a given index, returns the string. */
197 const char *get_name (int index) const;
199 /* Read profile, return TRUE on success. */
200 bool read ();
202 private:
203 typedef std::map<const char *, unsigned, string_compare> string_index_map;
204 string_vector vector_;
205 string_index_map map_;
208 /* Profile of a function instance:
209 1. total_count of the function.
210 2. head_count (entry basic block count) of the function (only valid when
211 function is a top-level function_instance, i.e. it is the original copy
212 instead of the inlined copy).
213 3. map from source location (decl_lineno) to profile (count_info).
214 4. map from callsite to callee function_instance. */
215 class function_instance
217 public:
218 typedef auto_vec<function_instance *> function_instance_stack;
220 /* Read the profile and return a function_instance with head count as
221 HEAD_COUNT. Recursively read callsites to create nested function_instances
222 too. STACK is used to track the recursive creation process. */
223 static function_instance *
224 read_function_instance (function_instance_stack *stack,
225 gcov_type head_count);
227 /* Recursively deallocate all callsites (nested function_instances). */
228 ~function_instance ();
230 /* Accessors. */
232 name () const
234 return name_;
236 gcov_type
237 total_count () const
239 return total_count_;
241 gcov_type
242 head_count () const
244 return head_count_;
247 /* Traverse callsites of the current function_instance to find one at the
248 location of LINENO and callee name represented in DECL. */
249 function_instance *get_function_instance_by_decl (unsigned lineno,
250 tree decl) const;
252 /* Store the profile info for LOC in INFO. Return TRUE if profile info
253 is found. */
254 bool get_count_info (location_t loc, count_info *info) const;
256 /* Read the inlined indirect call target profile for STMT and store it in
257 MAP, return the total count for all inlined indirect calls. */
258 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
260 /* Sum of counts that is used during annotation. */
261 gcov_type total_annotated_count () const;
263 /* Mark LOC as annotated. */
264 void mark_annotated (location_t loc);
266 private:
267 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
268 typedef std::pair<unsigned, unsigned> callsite;
270 /* Map from callsite to callee function_instance. */
271 typedef std::map<callsite, function_instance *> callsite_map;
273 function_instance (unsigned name, gcov_type head_count)
274 : name_ (name), total_count_ (0), head_count_ (head_count)
278 /* Map from source location (decl_lineno) to profile (count_info). */
279 typedef std::map<unsigned, count_info> position_count_map;
281 /* function_instance name index in the string_table. */
282 unsigned name_;
284 /* Total sample count. */
285 gcov_type total_count_;
287 /* Entry BB's sample count. */
288 gcov_type head_count_;
290 /* Map from callsite location to callee function_instance. */
291 callsite_map callsites;
293 /* Map from source location to count_info. */
294 position_count_map pos_counts;
297 /* Profile for all functions. */
298 class autofdo_source_profile
300 public:
301 static autofdo_source_profile *
302 create ()
304 autofdo_source_profile *map = new autofdo_source_profile ();
306 if (map->read ())
307 return map;
308 delete map;
309 return NULL;
312 ~autofdo_source_profile ();
314 /* For a given DECL, returns the top-level function_instance. */
315 function_instance *get_function_instance_by_decl (tree decl) const;
317 /* Find count_info for a given gimple STMT. If found, store the count_info
318 in INFO and return true; otherwise return false. */
319 bool get_count_info (gimple stmt, count_info *info) const;
321 /* Find total count of the callee of EDGE. */
322 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
324 /* Update value profile INFO for STMT from the inlined indirect callsite.
325 Return true if INFO is updated. */
326 bool update_inlined_ind_target (gcall *stmt, count_info *info);
328 /* Mark LOC as annotated. */
329 void mark_annotated (location_t loc);
331 private:
332 /* Map from function_instance name index (in string_table) to
333 function_instance. */
334 typedef std::map<unsigned, function_instance *> name_function_instance_map;
336 autofdo_source_profile () {}
338 /* Read AutoFDO profile and returns TRUE on success. */
339 bool read ();
341 /* Return the function_instance in the profile that correspond to the
342 inline STACK. */
343 function_instance *
344 get_function_instance_by_inline_stack (const inline_stack &stack) const;
346 name_function_instance_map map_;
349 /* Store the strings read from the profile data file. */
350 static string_table *afdo_string_table;
352 /* Store the AutoFDO source profile. */
353 static autofdo_source_profile *afdo_source_profile;
355 /* gcov_ctr_summary structure to store the profile_info. */
356 static struct gcov_ctr_summary *afdo_profile_info;
358 /* Helper functions. */
360 /* Return the original name of NAME: strip the suffix that starts
361 with '.' Caller is responsible for freeing RET. */
363 static char *
364 get_original_name (const char *name)
366 char *ret = xstrdup (name);
367 char *find = strchr (ret, '.');
368 if (find != NULL)
369 *find = 0;
370 return ret;
373 /* Return the combined location, which is a 32bit integer in which
374 higher 16 bits stores the line offset of LOC to the start lineno
375 of DECL, The lower 16 bits stores the discriminator. */
377 static unsigned
378 get_combined_location (location_t loc, tree decl)
380 /* TODO: allow more bits for line and less bits for discriminator. */
381 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
382 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
383 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
386 /* Return the function decl of a given lexical BLOCK. */
388 static tree
389 get_function_decl_from_block (tree block)
391 tree decl;
393 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
394 return NULL_TREE;
396 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
397 decl && (TREE_CODE (decl) == BLOCK);
398 decl = BLOCK_ABSTRACT_ORIGIN (decl))
399 if (TREE_CODE (decl) == FUNCTION_DECL)
400 break;
401 return decl;
404 /* Store inline stack for STMT in STACK. */
406 static void
407 get_inline_stack (location_t locus, inline_stack *stack)
409 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
410 return;
412 tree block = LOCATION_BLOCK (locus);
413 if (block && TREE_CODE (block) == BLOCK)
415 int level = 0;
416 for (block = BLOCK_SUPERCONTEXT (block);
417 block && (TREE_CODE (block) == BLOCK);
418 block = BLOCK_SUPERCONTEXT (block))
420 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
421 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
422 continue;
424 tree decl = get_function_decl_from_block (block);
425 stack->safe_push (
426 std::make_pair (decl, get_combined_location (locus, decl)));
427 locus = tmp_locus;
428 level++;
431 stack->safe_push (
432 std::make_pair (current_function_decl,
433 get_combined_location (locus, current_function_decl)));
436 /* Return STMT's combined location, which is a 32bit integer in which
437 higher 16 bits stores the line offset of LOC to the start lineno
438 of DECL, The lower 16 bits stores the discriminator. */
440 static unsigned
441 get_relative_location_for_stmt (gimple stmt)
443 location_t locus = gimple_location (stmt);
444 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
445 return UNKNOWN_LOCATION;
447 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
448 block = BLOCK_SUPERCONTEXT (block))
449 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
450 return get_combined_location (locus,
451 get_function_decl_from_block (block));
452 return get_combined_location (locus, current_function_decl);
455 /* Return true if BB contains indirect call. */
457 static bool
458 has_indirect_call (basic_block bb)
460 gimple_stmt_iterator gsi;
462 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
464 gimple stmt = gsi_stmt (gsi);
465 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
466 && (gimple_call_fn (stmt) == NULL
467 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
468 return true;
470 return false;
473 /* Member functions for string_table. */
475 /* Deconstructor. */
477 string_table::~string_table ()
479 for (unsigned i = 0; i < vector_.length (); i++)
480 free (vector_[i]);
484 /* Return the index of a given function NAME. Return -1 if NAME is not
485 found in string table. */
488 string_table::get_index (const char *name) const
490 if (name == NULL)
491 return -1;
492 string_index_map::const_iterator iter = map_.find (name);
493 if (iter == map_.end ())
494 return -1;
496 return iter->second;
499 /* Return the index of a given function DECL. Return -1 if DECL is not
500 found in string table. */
503 string_table::get_index_by_decl (tree decl) const
505 char *name
506 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
507 int ret = get_index (name);
508 free (name);
509 if (ret != -1)
510 return ret;
511 ret = get_index (lang_hooks.dwarf_name (decl, 0));
512 if (ret != -1)
513 return ret;
514 if (DECL_ABSTRACT_ORIGIN (decl))
515 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
517 return -1;
520 /* Return the function name of a given INDEX. */
522 const char *
523 string_table::get_name (int index) const
525 gcc_assert (index > 0 && index < (int)vector_.length ());
526 return vector_[index];
529 /* Read the string table. Return TRUE if reading is successful. */
531 bool
532 string_table::read ()
534 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
535 return false;
536 /* Skip the length of the section. */
537 gcov_read_unsigned ();
538 /* Read in the file name table. */
539 unsigned string_num = gcov_read_unsigned ();
540 for (unsigned i = 0; i < string_num; i++)
542 vector_.safe_push (get_original_name (gcov_read_string ()));
543 map_[vector_.last ()] = i;
545 return true;
548 /* Member functions for function_instance. */
550 function_instance::~function_instance ()
552 for (callsite_map::iterator iter = callsites.begin ();
553 iter != callsites.end (); ++iter)
554 delete iter->second;
557 /* Traverse callsites of the current function_instance to find one at the
558 location of LINENO and callee name represented in DECL. */
560 function_instance *
561 function_instance::get_function_instance_by_decl (unsigned lineno,
562 tree decl) const
564 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
565 if (func_name_idx != -1)
567 callsite_map::const_iterator ret
568 = callsites.find (std::make_pair (lineno, func_name_idx));
569 if (ret != callsites.end ())
570 return ret->second;
572 func_name_idx
573 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
574 if (func_name_idx != -1)
576 callsite_map::const_iterator ret
577 = callsites.find (std::make_pair (lineno, func_name_idx));
578 if (ret != callsites.end ())
579 return ret->second;
581 if (DECL_ABSTRACT_ORIGIN (decl))
582 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
584 return NULL;
587 /* Store the profile info for LOC in INFO. Return TRUE if profile info
588 is found. */
590 bool
591 function_instance::get_count_info (location_t loc, count_info *info) const
593 position_count_map::const_iterator iter = pos_counts.find (loc);
594 if (iter == pos_counts.end ())
595 return false;
596 *info = iter->second;
597 return true;
600 /* Mark LOC as annotated. */
602 void
603 function_instance::mark_annotated (location_t loc)
605 position_count_map::iterator iter = pos_counts.find (loc);
606 if (iter == pos_counts.end ())
607 return;
608 iter->second.annotated = true;
611 /* Read the inlined indirect call target profile for STMT and store it in
612 MAP, return the total count for all inlined indirect calls. */
614 gcov_type
615 function_instance::find_icall_target_map (gcall *stmt,
616 icall_target_map *map) const
618 gcov_type ret = 0;
619 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
621 for (callsite_map::const_iterator iter = callsites.begin ();
622 iter != callsites.end (); ++iter)
624 unsigned callee = iter->second->name ();
625 /* Check if callsite location match the stmt. */
626 if (iter->first.first != stmt_offset)
627 continue;
628 struct cgraph_node *node = cgraph_node::get_for_asmname (
629 get_identifier (afdo_string_table->get_name (callee)));
630 if (node == NULL)
631 continue;
632 if (!check_ic_target (stmt, node))
633 continue;
634 (*map)[callee] = iter->second->total_count ();
635 ret += iter->second->total_count ();
637 return ret;
640 /* Read the profile and create a function_instance with head count as
641 HEAD_COUNT. Recursively read callsites to create nested function_instances
642 too. STACK is used to track the recursive creation process. */
644 /* function instance profile format:
646 ENTRY_COUNT: 8 bytes
647 NAME_INDEX: 4 bytes
648 NUM_POS_COUNTS: 4 bytes
649 NUM_CALLSITES: 4 byte
650 POS_COUNT_1:
651 POS_1_OFFSET: 4 bytes
652 NUM_TARGETS: 4 bytes
653 COUNT: 8 bytes
654 TARGET_1:
655 VALUE_PROFILE_TYPE: 4 bytes
656 TARGET_IDX: 8 bytes
657 COUNT: 8 bytes
658 TARGET_2
660 TARGET_n
661 POS_COUNT_2
663 POS_COUNT_N
664 CALLSITE_1:
665 CALLSITE_1_OFFSET: 4 bytes
666 FUNCTION_INSTANCE_PROFILE (nested)
667 CALLSITE_2
669 CALLSITE_n. */
671 function_instance *
672 function_instance::read_function_instance (function_instance_stack *stack,
673 gcov_type head_count)
675 unsigned name = gcov_read_unsigned ();
676 unsigned num_pos_counts = gcov_read_unsigned ();
677 unsigned num_callsites = gcov_read_unsigned ();
678 function_instance *s = new function_instance (name, head_count);
679 stack->safe_push (s);
681 for (unsigned i = 0; i < num_pos_counts; i++)
683 unsigned offset = gcov_read_unsigned () & 0xffff0000;
684 unsigned num_targets = gcov_read_unsigned ();
685 gcov_type count = gcov_read_counter ();
686 s->pos_counts[offset].count = count;
687 for (unsigned j = 0; j < stack->length (); j++)
688 (*stack)[j]->total_count_ += count;
689 for (unsigned j = 0; j < num_targets; j++)
691 /* Only indirect call target histogram is supported now. */
692 gcov_read_unsigned ();
693 gcov_type target_idx = gcov_read_counter ();
694 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
697 for (unsigned i = 0; i < num_callsites; i++)
699 unsigned offset = gcov_read_unsigned ();
700 function_instance *callee_function_instance
701 = read_function_instance (stack, 0);
702 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
703 = callee_function_instance;
705 stack->pop ();
706 return s;
709 /* Sum of counts that is used during annotation. */
711 gcov_type
712 function_instance::total_annotated_count () const
714 gcov_type ret = 0;
715 for (callsite_map::const_iterator iter = callsites.begin ();
716 iter != callsites.end (); ++iter)
717 ret += iter->second->total_annotated_count ();
718 for (position_count_map::const_iterator iter = pos_counts.begin ();
719 iter != pos_counts.end (); ++iter)
720 if (iter->second.annotated)
721 ret += iter->second.count;
722 return ret;
725 /* Member functions for autofdo_source_profile. */
727 autofdo_source_profile::~autofdo_source_profile ()
729 for (name_function_instance_map::const_iterator iter = map_.begin ();
730 iter != map_.end (); ++iter)
731 delete iter->second;
734 /* For a given DECL, returns the top-level function_instance. */
736 function_instance *
737 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
739 int index = afdo_string_table->get_index_by_decl (decl);
740 if (index == -1)
741 return NULL;
742 name_function_instance_map::const_iterator ret = map_.find (index);
743 return ret == map_.end () ? NULL : ret->second;
746 /* Find count_info for a given gimple STMT. If found, store the count_info
747 in INFO and return true; otherwise return false. */
749 bool
750 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
752 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
753 return false;
755 inline_stack stack;
756 get_inline_stack (gimple_location (stmt), &stack);
757 if (stack.length () == 0)
758 return false;
759 function_instance *s = get_function_instance_by_inline_stack (stack);
760 if (s == NULL)
761 return false;
762 return s->get_count_info (stack[0].second, info);
765 /* Mark LOC as annotated. */
767 void
768 autofdo_source_profile::mark_annotated (location_t loc)
770 inline_stack stack;
771 get_inline_stack (loc, &stack);
772 if (stack.length () == 0)
773 return;
774 function_instance *s = get_function_instance_by_inline_stack (stack);
775 if (s == NULL)
776 return;
777 s->mark_annotated (stack[0].second);
780 /* Update value profile INFO for STMT from the inlined indirect callsite.
781 Return true if INFO is updated. */
783 bool
784 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
785 count_info *info)
787 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
788 return false;
790 count_info old_info;
791 get_count_info (stmt, &old_info);
792 gcov_type total = 0;
793 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
794 iter != old_info.targets.end (); ++iter)
795 total += iter->second;
797 /* Program behavior changed, original promoted (and inlined) target is not
798 hot any more. Will avoid promote the original target.
800 To check if original promoted target is still hot, we check the total
801 count of the unpromoted targets (stored in old_info). If it is no less
802 than half of the callsite count (stored in INFO), the original promoted
803 target is considered not hot any more. */
804 if (total >= info->count / 2)
805 return false;
807 inline_stack stack;
808 get_inline_stack (gimple_location (stmt), &stack);
809 if (stack.length () == 0)
810 return false;
811 function_instance *s = get_function_instance_by_inline_stack (stack);
812 if (s == NULL)
813 return false;
814 icall_target_map map;
815 if (s->find_icall_target_map (stmt, &map) == 0)
816 return false;
817 for (icall_target_map::const_iterator iter = map.begin ();
818 iter != map.end (); ++iter)
819 info->targets[iter->first] = iter->second;
820 return true;
823 /* Find total count of the callee of EDGE. */
825 gcov_type
826 autofdo_source_profile::get_callsite_total_count (
827 struct cgraph_edge *edge) const
829 inline_stack stack;
830 stack.safe_push (std::make_pair (edge->callee->decl, 0));
831 get_inline_stack (gimple_location (edge->call_stmt), &stack);
833 function_instance *s = get_function_instance_by_inline_stack (stack);
834 if (s == NULL
835 || afdo_string_table->get_index (IDENTIFIER_POINTER (
836 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
837 return 0;
839 return s->total_count ();
842 /* Read AutoFDO profile and returns TRUE on success. */
844 /* source profile format:
846 GCOV_TAG_AFDO_FUNCTION: 4 bytes
847 LENGTH: 4 bytes
848 NUM_FUNCTIONS: 4 bytes
849 FUNCTION_INSTANCE_1
850 FUNCTION_INSTANCE_2
852 FUNCTION_INSTANCE_N. */
854 bool
855 autofdo_source_profile::read ()
857 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
859 inform (0, "Not expected TAG.");
860 return false;
863 /* Skip the length of the section. */
864 gcov_read_unsigned ();
866 /* Read in the function/callsite profile, and store it in local
867 data structure. */
868 unsigned function_num = gcov_read_unsigned ();
869 for (unsigned i = 0; i < function_num; i++)
871 function_instance::function_instance_stack stack;
872 function_instance *s = function_instance::read_function_instance (
873 &stack, gcov_read_counter ());
874 afdo_profile_info->sum_all += s->total_count ();
875 map_[s->name ()] = s;
877 return true;
880 /* Return the function_instance in the profile that correspond to the
881 inline STACK. */
883 function_instance *
884 autofdo_source_profile::get_function_instance_by_inline_stack (
885 const inline_stack &stack) const
887 name_function_instance_map::const_iterator iter = map_.find (
888 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
889 if (iter == map_.end())
890 return NULL;
891 function_instance *s = iter->second;
892 for (unsigned i = stack.length() - 1; i > 0; i--)
894 s = s->get_function_instance_by_decl (
895 stack[i].second, stack[i - 1].first);
896 if (s == NULL)
897 return NULL;
899 return s;
902 /* Module profile is only used by LIPO. Here we simply ignore it. */
904 static void
905 fake_read_autofdo_module_profile ()
907 /* Read in the module info. */
908 gcov_read_unsigned ();
910 /* Skip the length of the section. */
911 gcov_read_unsigned ();
913 /* Read in the file name table. */
914 unsigned total_module_num = gcov_read_unsigned ();
915 gcc_assert (total_module_num == 0);
918 /* Read data from profile data file. */
920 static void
921 read_profile (void)
923 if (gcov_open (auto_profile_file, 1) == 0)
924 error ("Cannot open profile file %s.", auto_profile_file);
926 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
927 error ("AutoFDO profile magic number does not mathch.");
929 /* Skip the version number. */
930 unsigned version = gcov_read_unsigned ();
931 if (version != AUTO_PROFILE_VERSION)
932 error ("AutoFDO profile version %u does match %u.",
933 version, AUTO_PROFILE_VERSION);
935 /* Skip the empty integer. */
936 gcov_read_unsigned ();
938 /* string_table. */
939 afdo_string_table = new string_table ();
940 if (!afdo_string_table->read())
941 error ("Cannot read string table from %s.", auto_profile_file);
943 /* autofdo_source_profile. */
944 afdo_source_profile = autofdo_source_profile::create ();
945 if (afdo_source_profile == NULL)
946 error ("Cannot read function profile from %s.", auto_profile_file);
948 /* autofdo_module_profile. */
949 fake_read_autofdo_module_profile ();
951 /* Read in the working set. */
952 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
953 error ("Cannot read working set from %s.", auto_profile_file);
955 /* Skip the length of the section. */
956 gcov_read_unsigned ();
957 gcov_working_set_t set[128];
958 for (unsigned i = 0; i < 128; i++)
960 set[i].num_counters = gcov_read_unsigned ();
961 set[i].min_counter = gcov_read_counter ();
963 add_working_set (set);
966 /* From AutoFDO profiles, find values inside STMT for that we want to measure
967 histograms for indirect-call optimization.
969 This function is actually served for 2 purposes:
970 * before annotation, we need to mark histogram, promote and inline
971 * after annotation, we just need to mark, and let follow-up logic to
972 decide if it needs to promote and inline. */
974 static void
975 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
976 bool transform)
978 gimple gs = gsi_stmt (*gsi);
979 tree callee;
981 if (map.size () == 0)
982 return;
983 gcall *stmt = dyn_cast <gcall *> (gs);
984 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
985 return;
987 callee = gimple_call_fn (stmt);
989 histogram_value hist = gimple_alloc_histogram_value (
990 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
991 hist->n_counters = 3;
992 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
993 gimple_add_histogram_value (cfun, stmt, hist);
995 gcov_type total = 0;
996 icall_target_map::const_iterator max_iter = map.end ();
998 for (icall_target_map::const_iterator iter = map.begin ();
999 iter != map.end (); ++iter)
1001 total += iter->second;
1002 if (max_iter == map.end () || max_iter->second < iter->second)
1003 max_iter = iter;
1006 hist->hvalue.counters[0]
1007 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
1008 hist->hvalue.counters[1] = max_iter->second;
1009 hist->hvalue.counters[2] = total;
1011 if (!transform)
1012 return;
1014 struct cgraph_edge *indirect_edge
1015 = cgraph_node::get (current_function_decl)->get_edge (stmt);
1016 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1017 get_identifier ((const char *) hist->hvalue.counters[0]));
1019 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1020 return;
1021 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1022 return;
1023 struct cgraph_edge *new_edge
1024 = indirect_edge->make_speculative (direct_call, 0, 0);
1025 new_edge->redirect_call_stmt_to_callee ();
1026 gimple_remove_histogram_value (cfun, stmt, hist);
1027 inline_call (new_edge, true, NULL, NULL, false);
1030 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1031 histograms and adds them to list VALUES. */
1033 static void
1034 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1035 bool transform)
1037 afdo_indirect_call (gsi, map, transform);
1040 typedef std::set<basic_block> bb_set;
1041 typedef std::set<edge> edge_set;
1043 static bool
1044 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1046 return annotated.find (bb) != annotated.end ();
1049 static void
1050 set_bb_annotated (basic_block bb, bb_set *annotated)
1052 annotated->insert (bb);
1055 static bool
1056 is_edge_annotated (const edge e, const edge_set &annotated)
1058 return annotated.find (e) != annotated.end ();
1061 static void
1062 set_edge_annotated (edge e, edge_set *annotated)
1064 annotated->insert (e);
1067 /* For a given BB, set its execution count. Attach value profile if a stmt
1068 is not in PROMOTED, because we only want to promote an indirect call once.
1069 Return TRUE if BB is annotated. */
1071 static bool
1072 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1074 gimple_stmt_iterator gsi;
1075 edge e;
1076 edge_iterator ei;
1077 gcov_type max_count = 0;
1078 bool has_annotated = false;
1080 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1082 count_info info;
1083 gimple stmt = gsi_stmt (gsi);
1084 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1085 continue;
1086 if (afdo_source_profile->get_count_info (stmt, &info))
1088 if (info.count > max_count)
1089 max_count = info.count;
1090 has_annotated = true;
1091 if (info.targets.size () > 0
1092 && promoted.find (stmt) == promoted.end ())
1093 afdo_vpt (&gsi, info.targets, false);
1097 if (!has_annotated)
1098 return false;
1100 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1101 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1102 for (gphi_iterator gpi = gsi_start_phis (bb);
1103 !gsi_end_p (gpi);
1104 gsi_next (&gpi))
1106 gphi *phi = gpi.phi ();
1107 size_t i;
1108 for (i = 0; i < gimple_phi_num_args (phi); i++)
1109 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1111 FOR_EACH_EDGE (e, ei, bb->succs)
1112 afdo_source_profile->mark_annotated (e->goto_locus);
1114 bb->count = max_count;
1115 return true;
1118 /* BB1 and BB2 are in an equivalent class iff:
1119 1. BB1 dominates BB2.
1120 2. BB2 post-dominates BB1.
1121 3. BB1 and BB2 are in the same loop nest.
1122 This function finds the equivalent class for each basic block, and
1123 stores a pointer to the first BB in its equivalent class. Meanwhile,
1124 set bb counts for the same equivalent class to be idenical. Update
1125 ANNOTATED_BB for the first BB in its equivalent class. */
1127 static void
1128 afdo_find_equiv_class (bb_set *annotated_bb)
1130 basic_block bb;
1132 FOR_ALL_BB_FN (bb, cfun)
1133 bb->aux = NULL;
1135 FOR_ALL_BB_FN (bb, cfun)
1137 vec<basic_block> dom_bbs;
1138 basic_block bb1;
1139 int i;
1141 if (bb->aux != NULL)
1142 continue;
1143 bb->aux = bb;
1144 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1145 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1146 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1147 && bb1->loop_father == bb->loop_father)
1149 bb1->aux = bb;
1150 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1152 bb->count = bb1->count;
1153 set_bb_annotated (bb, annotated_bb);
1156 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1157 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1158 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1159 && bb1->loop_father == bb->loop_father)
1161 bb1->aux = bb;
1162 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1164 bb->count = bb1->count;
1165 set_bb_annotated (bb, annotated_bb);
1171 /* If a basic block's count is known, and only one of its in/out edges' count
1172 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1173 edges' counts are known, then the basic block's unknown count can also be
1174 calculated.
1175 IS_SUCC is true if out edges of a basic blocks are examined.
1176 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1177 Return TRUE if any basic block/edge count is changed. */
1179 static bool
1180 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1181 edge_set *annotated_edge)
1183 basic_block bb;
1184 bool changed = false;
1186 FOR_EACH_BB_FN (bb, cfun)
1188 edge e, unknown_edge = NULL;
1189 edge_iterator ei;
1190 int num_unknown_edge = 0;
1191 gcov_type total_known_count = 0;
1193 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1194 if (!is_edge_annotated (e, *annotated_edge))
1195 num_unknown_edge++, unknown_edge = e;
1196 else
1197 total_known_count += e->count;
1199 if (num_unknown_edge == 0)
1201 if (total_known_count > bb->count)
1203 bb->count = total_known_count;
1204 changed = true;
1206 if (!is_bb_annotated (bb, *annotated_bb))
1208 set_bb_annotated (bb, annotated_bb);
1209 changed = true;
1212 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1214 if (bb->count >= total_known_count)
1215 unknown_edge->count = bb->count - total_known_count;
1216 else
1217 unknown_edge->count = 0;
1218 set_edge_annotated (unknown_edge, annotated_edge);
1219 changed = true;
1222 return changed;
1225 /* Special propagation for circuit expressions. Because GCC translates
1226 control flow into data flow for circuit expressions. E.g.
1227 BB1:
1228 if (a && b)
1230 else
1233 will be translated into:
1235 BB1:
1236 if (a)
1237 goto BB.t1
1238 else
1239 goto BB.t3
1240 BB.t1:
1241 if (b)
1242 goto BB.t2
1243 else
1244 goto BB.t3
1245 BB.t2:
1246 goto BB.t3
1247 BB.t3:
1248 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1249 if (tmp)
1250 goto BB2
1251 else
1252 goto BB3
1254 In this case, we need to propagate through PHI to determine the edge
1255 count of BB1->BB.t1, BB.t1->BB.t2.
1256 Update ANNOTATED_EDGE accordingly. */
1258 static void
1259 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1261 basic_block bb;
1262 FOR_ALL_BB_FN (bb, cfun)
1264 gimple def_stmt;
1265 tree cmp_rhs, cmp_lhs;
1266 gimple cmp_stmt = last_stmt (bb);
1267 edge e;
1268 edge_iterator ei;
1270 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1271 continue;
1272 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1273 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1274 if (!TREE_CONSTANT (cmp_rhs)
1275 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1276 continue;
1277 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1278 continue;
1279 if (!is_bb_annotated (bb, annotated_bb))
1280 continue;
1281 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1282 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1283 && gimple_assign_single_p (def_stmt)
1284 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1285 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1286 if (!def_stmt)
1287 continue;
1288 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1289 if (!phi_stmt)
1290 continue;
1291 FOR_EACH_EDGE (e, ei, bb->succs)
1293 unsigned i, total = 0;
1294 edge only_one;
1295 bool check_value_one = (((integer_onep (cmp_rhs))
1296 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1297 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1298 if (!is_edge_annotated (e, *annotated_edge))
1299 continue;
1300 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1302 tree val = gimple_phi_arg_def (phi_stmt, i);
1303 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1305 if (!TREE_CONSTANT (val)
1306 || !(integer_zerop (val) || integer_onep (val)))
1307 continue;
1308 if (check_value_one ^ integer_onep (val))
1309 continue;
1310 total++;
1311 only_one = ep;
1312 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1314 ep->probability = 0;
1315 ep->count = 0;
1316 set_edge_annotated (ep, annotated_edge);
1319 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1321 only_one->probability = e->probability;
1322 only_one->count = e->count;
1323 set_edge_annotated (only_one, annotated_edge);
1329 /* Propagate the basic block count and edge count on the control flow
1330 graph. We do the propagation iteratively until stablize. */
1332 static void
1333 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1335 basic_block bb;
1336 bool changed = true;
1337 int i = 0;
1339 FOR_ALL_BB_FN (bb, cfun)
1341 bb->count = ((basic_block)bb->aux)->count;
1342 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1343 set_bb_annotated (bb, annotated_bb);
1346 while (changed && i++ < 10)
1348 changed = false;
1350 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1351 changed = true;
1352 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1353 changed = true;
1354 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1358 /* Propagate counts on control flow graph and calculate branch
1359 probabilities. */
1361 static void
1362 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1364 basic_block bb;
1365 bool has_sample = false;
1367 FOR_EACH_BB_FN (bb, cfun)
1368 if (bb->count > 0)
1369 has_sample = true;
1371 if (!has_sample)
1372 return;
1374 calculate_dominance_info (CDI_POST_DOMINATORS);
1375 calculate_dominance_info (CDI_DOMINATORS);
1376 loop_optimizer_init (0);
1378 afdo_find_equiv_class (annotated_bb);
1379 afdo_propagate (annotated_bb, annotated_edge);
1381 FOR_EACH_BB_FN (bb, cfun)
1383 edge e;
1384 edge_iterator ei;
1385 int num_unknown_succ = 0;
1386 gcov_type total_count = 0;
1388 FOR_EACH_EDGE (e, ei, bb->succs)
1390 if (!is_edge_annotated (e, *annotated_edge))
1391 num_unknown_succ++;
1392 else
1393 total_count += e->count;
1395 if (num_unknown_succ == 0 && total_count > 0)
1397 FOR_EACH_EDGE (e, ei, bb->succs)
1398 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1401 FOR_ALL_BB_FN (bb, cfun)
1403 edge e;
1404 edge_iterator ei;
1406 FOR_EACH_EDGE (e, ei, bb->succs)
1407 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1408 bb->aux = NULL;
1411 loop_optimizer_finalize ();
1412 free_dominance_info (CDI_DOMINATORS);
1413 free_dominance_info (CDI_POST_DOMINATORS);
1416 /* Perform value profile transformation using AutoFDO profile. Add the
1417 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1418 indirect call promoted. */
1420 static bool
1421 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1423 basic_block bb;
1424 if (afdo_source_profile->get_function_instance_by_decl (
1425 current_function_decl) == NULL)
1426 return false;
1428 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1430 bool has_vpt = false;
1431 FOR_EACH_BB_FN (bb, cfun)
1433 if (!has_indirect_call (bb))
1434 continue;
1435 gimple_stmt_iterator gsi;
1437 gcov_type bb_count = 0;
1438 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1440 count_info info;
1441 gimple stmt = gsi_stmt (gsi);
1442 if (afdo_source_profile->get_count_info (stmt, &info))
1443 bb_count = MAX (bb_count, info.count);
1446 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1448 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1449 /* IC_promotion and early_inline_2 is done in multiple iterations.
1450 No need to promoted the stmt if its in promoted_stmts (means
1451 it is already been promoted in the previous iterations). */
1452 if ((!stmt) || gimple_call_fn (stmt) == NULL
1453 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1454 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1455 continue;
1457 count_info info;
1458 afdo_source_profile->get_count_info (stmt, &info);
1459 info.count = bb_count;
1460 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1462 /* Promote the indirect call and update the promoted_stmts. */
1463 promoted_stmts->insert (stmt);
1464 afdo_vpt (&gsi, info.targets, true);
1465 has_vpt = true;
1470 if (has_vpt)
1472 optimize_inline_calls (current_function_decl);
1473 return true;
1476 return false;
1479 /* Annotate auto profile to the control flow graph. Do not annotate value
1480 profile for stmts in PROMOTED_STMTS. */
1482 static void
1483 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1485 basic_block bb;
1486 bb_set annotated_bb;
1487 edge_set annotated_edge;
1488 const function_instance *s
1489 = afdo_source_profile->get_function_instance_by_decl (
1490 current_function_decl);
1492 if (s == NULL)
1493 return;
1494 cgraph_node::get (current_function_decl)->count = s->head_count ();
1495 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1496 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1498 FOR_EACH_BB_FN (bb, cfun)
1500 edge e;
1501 edge_iterator ei;
1503 bb->count = 0;
1504 FOR_EACH_EDGE (e, ei, bb->succs)
1505 e->count = 0;
1507 if (afdo_set_bb_count (bb, promoted_stmts))
1508 set_bb_annotated (bb, &annotated_bb);
1509 if (bb->count > max_count)
1510 max_count = bb->count;
1512 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1513 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1515 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1516 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1517 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1519 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1520 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1522 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1523 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1524 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1526 afdo_source_profile->mark_annotated (
1527 DECL_SOURCE_LOCATION (current_function_decl));
1528 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1529 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1530 if (max_count > 0)
1532 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1533 counts_to_freqs ();
1534 profile_status_for_fn (cfun) = PROFILE_READ;
1536 if (flag_value_profile_transformations)
1538 gimple_value_profile_transformations ();
1539 free_dominance_info (CDI_DOMINATORS);
1540 free_dominance_info (CDI_POST_DOMINATORS);
1541 update_ssa (TODO_update_ssa);
1545 /* Wrapper function to invoke early inliner. */
1547 static void
1548 early_inline ()
1550 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1551 unsigned todo = early_inliner (cfun);
1552 if (todo & TODO_update_ssa_any)
1553 update_ssa (TODO_update_ssa);
1556 /* Use AutoFDO profile to annoate the control flow graph.
1557 Return the todo flag. */
1559 static unsigned int
1560 auto_profile (void)
1562 struct cgraph_node *node;
1564 if (symtab->state == FINISHED)
1565 return 0;
1567 init_node_map (true);
1568 profile_info = autofdo::afdo_profile_info;
1570 FOR_EACH_FUNCTION (node)
1572 if (!gimple_has_body_p (node->decl))
1573 continue;
1575 /* Don't profile functions produced for builtin stuff. */
1576 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1577 continue;
1579 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1581 /* First do indirect call promotion and early inline to make the
1582 IR match the profiled binary before actual annotation.
1584 This is needed because an indirect call might have been promoted
1585 and inlined in the profiled binary. If we do not promote and
1586 inline these indirect calls before annotation, the profile for
1587 these promoted functions will be lost.
1589 e.g. foo() --indirect_call--> bar()
1590 In profiled binary, the callsite is promoted and inlined, making
1591 the profile look like:
1593 foo: {
1594 loc_foo_1: count_1
1595 bar@loc_foo_2: {
1596 loc_bar_1: count_2
1597 loc_bar_2: count_3
1601 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1602 If we perform annotation on it, the profile inside bar@loc_foo2
1603 will be wasted.
1605 To avoid this, we promote loc_foo_2 and inline the promoted bar
1606 function before annotation, so the profile inside bar@loc_foo2
1607 will be useful. */
1608 autofdo::stmt_set promoted_stmts;
1609 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1611 if (!flag_value_profile_transformations
1612 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1613 break;
1614 early_inline ();
1617 early_inline ();
1618 autofdo::afdo_annotate_cfg (promoted_stmts);
1619 compute_function_frequency ();
1621 /* Local pure-const may imply need to fixup the cfg. */
1622 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1623 cleanup_tree_cfg ();
1625 free_dominance_info (CDI_DOMINATORS);
1626 free_dominance_info (CDI_POST_DOMINATORS);
1627 cgraph_edge::rebuild_edges ();
1628 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1629 pop_cfun ();
1632 return TODO_rebuild_cgraph_edges;
1634 } /* namespace autofdo. */
1636 /* Read the profile from the profile data file. */
1638 void
1639 read_autofdo_file (void)
1641 if (auto_profile_file == NULL)
1642 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1644 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1645 1, sizeof (struct gcov_ctr_summary));
1646 autofdo::afdo_profile_info->runs = 1;
1647 autofdo::afdo_profile_info->sum_max = 0;
1648 autofdo::afdo_profile_info->sum_all = 0;
1650 /* Read the profile from the profile file. */
1651 autofdo::read_profile ();
1654 /* Free the resources. */
1656 void
1657 end_auto_profile (void)
1659 delete autofdo::afdo_source_profile;
1660 delete autofdo::afdo_string_table;
1661 profile_info = NULL;
1664 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1666 bool
1667 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1669 gcov_type count
1670 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1672 if (count > 0)
1674 bool is_hot;
1675 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1676 /* At early inline stage, profile_info is not set yet. We need to
1677 temporarily set it to afdo_profile_info to calculate hotness. */
1678 profile_info = autofdo::afdo_profile_info;
1679 is_hot = maybe_hot_count_p (NULL, count);
1680 profile_info = saved_profile_info;
1681 return is_hot;
1684 return false;
1687 namespace
1690 const pass_data pass_data_ipa_auto_profile = {
1691 SIMPLE_IPA_PASS, "afdo", /* name */
1692 OPTGROUP_NONE, /* optinfo_flags */
1693 TV_IPA_AUTOFDO, /* tv_id */
1694 0, /* properties_required */
1695 0, /* properties_provided */
1696 0, /* properties_destroyed */
1697 0, /* todo_flags_start */
1698 0, /* todo_flags_finish */
1701 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1703 public:
1704 pass_ipa_auto_profile (gcc::context *ctxt)
1705 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1709 /* opt_pass methods: */
1710 virtual bool
1711 gate (function *)
1713 return flag_auto_profile;
1715 virtual unsigned int
1716 execute (function *)
1718 return autofdo::auto_profile ();
1720 }; // class pass_ipa_auto_profile
1722 } // anon namespace
1724 simple_ipa_opt_pass *
1725 make_pass_ipa_auto_profile (gcc::context *ctxt)
1727 return new pass_ipa_auto_profile (ctxt);