* g++.dg/template/using30.C: Move ...
[official-gcc.git] / gcc / auto-profile.c
blob085bbd6ec549d1ca0627b9050e224bf3b127b3e4
1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014. Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
24 #include <string.h>
25 #include <map>
26 #include <set>
28 #include "coretypes.h"
29 #include "tree.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "predict.h"
33 #include "vec.h"
34 #include "hashtab.h"
35 #include "hash-set.h"
36 #include "machmode.h"
37 #include "tm.h"
38 #include "hard-reg-set.h"
39 #include "input.h"
40 #include "function.h"
41 #include "dominance.h"
42 #include "cfg.h"
43 #include "basic-block.h"
44 #include "diagnostic-core.h"
45 #include "gcov-io.h"
46 #include "profile.h"
47 #include "langhooks.h"
48 #include "opts.h"
49 #include "tree-pass.h"
50 #include "cfgloop.h"
51 #include "tree-ssa-alias.h"
52 #include "tree-cfg.h"
53 #include "tree-cfgcleanup.h"
54 #include "tree-ssa-operands.h"
55 #include "tree-into-ssa.h"
56 #include "internal-fn.h"
57 #include "is-a.h"
58 #include "gimple-expr.h"
59 #include "gimple.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "hash-map.h"
63 #include "plugin-api.h"
64 #include "ipa-ref.h"
65 #include "cgraph.h"
66 #include "value-prof.h"
67 #include "coverage.h"
68 #include "params.h"
69 #include "alloc-pool.h"
70 #include "ipa-prop.h"
71 #include "ipa-inline.h"
72 #include "tree-inline.h"
73 #include "stringpool.h"
74 #include "auto-profile.h"
76 /* The following routines implements AutoFDO optimization.
78 This optimization uses sampling profiles to annotate basic block counts
79 and uses heuristics to estimate branch probabilities.
81 There are three phases in AutoFDO:
83 Phase 1: Read profile from the profile data file.
84 The following info is read from the profile datafile:
85 * string_table: a map between function name and its index.
86 * autofdo_source_profile: a map from function_instance name to
87 function_instance. This is represented as a forest of
88 function_instances.
89 * WorkingSet: a histogram of how many instructions are covered for a
90 given percentage of total cycles. This is describing the binary
91 level information (not source level). This info is used to help
92 decide if we want aggressive optimizations that could increase
93 code footprint (e.g. loop unroll etc.)
94 A function instance is an instance of function that could either be a
95 standalone symbol, or a clone of a function that is inlined into another
96 function.
98 Phase 2: Early inline + valur profile transformation.
99 Early inline uses autofdo_source_profile to find if a callsite is:
100 * inlined in the profiled binary.
101 * callee body is hot in the profiling run.
102 If both condition satisfies, early inline will inline the callsite
103 regardless of the code growth.
104 Phase 2 is an iterative process. During each iteration, we also check
105 if an indirect callsite is promoted and inlined in the profiling run.
106 If yes, vpt will happen to force promote it and in the next iteration,
107 einline will inline the promoted callsite in the next iteration.
109 Phase 3: Annotate control flow graph.
110 AutoFDO uses a separate pass to:
111 * Annotate basic block count
112 * Estimate branch probability
114 After the above 3 phases, all profile is readily annotated on the GCC IR.
115 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
116 use of the profile. E.g. it uses existing mechanism to calculate the basic
117 block/edge frequency, as well as the cgraph node/edge count.
120 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
121 #define AUTO_PROFILE_VERSION 1
123 namespace autofdo
126 /* Represent a source location: (function_decl, lineno). */
127 typedef std::pair<tree, unsigned> decl_lineno;
129 /* Represent an inline stack. vector[0] is the leaf node. */
130 typedef auto_vec<decl_lineno> inline_stack;
132 /* String array that stores function names. */
133 typedef auto_vec<char *> string_vector;
135 /* Map from function name's index in string_table to target's
136 execution count. */
137 typedef std::map<unsigned, gcov_type> icall_target_map;
139 /* Set of gimple stmts. Used to track if the stmt has already been promoted
140 to direct call. */
141 typedef std::set<gimple> stmt_set;
143 /* Represent count info of an inline stack. */
144 struct count_info
146 /* Sampled count of the inline stack. */
147 gcov_type count;
149 /* Map from indirect call target to its sample count. */
150 icall_target_map targets;
152 /* Whether this inline stack is already used in annotation.
154 Each inline stack should only be used to annotate IR once.
155 This will be enforced when instruction-level discriminator
156 is supported. */
157 bool annotated;
160 /* operator< for "const char *". */
161 struct string_compare
163 bool operator()(const char *a, const char *b) const
165 return strcmp (a, b) < 0;
169 /* Store a string array, indexed by string position in the array. */
170 class string_table
172 public:
173 string_table ()
176 ~string_table ();
178 /* For a given string, returns its index. */
179 int get_index (const char *name) const;
181 /* For a given decl, returns the index of the decl name. */
182 int get_index_by_decl (tree decl) const;
184 /* For a given index, returns the string. */
185 const char *get_name (int index) const;
187 /* Read profile, return TRUE on success. */
188 bool read ();
190 private:
191 typedef std::map<const char *, unsigned, string_compare> string_index_map;
192 string_vector vector_;
193 string_index_map map_;
196 /* Profile of a function instance:
197 1. total_count of the function.
198 2. head_count (entry basic block count) of the function (only valid when
199 function is a top-level function_instance, i.e. it is the original copy
200 instead of the inlined copy).
201 3. map from source location (decl_lineno) to profile (count_info).
202 4. map from callsite to callee function_instance. */
203 class function_instance
205 public:
206 typedef auto_vec<function_instance *> function_instance_stack;
208 /* Read the profile and return a function_instance with head count as
209 HEAD_COUNT. Recursively read callsites to create nested function_instances
210 too. STACK is used to track the recursive creation process. */
211 static function_instance *
212 read_function_instance (function_instance_stack *stack,
213 gcov_type head_count);
215 /* Recursively deallocate all callsites (nested function_instances). */
216 ~function_instance ();
218 /* Accessors. */
220 name () const
222 return name_;
224 gcov_type
225 total_count () const
227 return total_count_;
229 gcov_type
230 head_count () const
232 return head_count_;
235 /* Traverse callsites of the current function_instance to find one at the
236 location of LINENO and callee name represented in DECL. */
237 function_instance *get_function_instance_by_decl (unsigned lineno,
238 tree decl) const;
240 /* Store the profile info for LOC in INFO. Return TRUE if profile info
241 is found. */
242 bool get_count_info (location_t loc, count_info *info) const;
244 /* Read the inlined indirect call target profile for STMT and store it in
245 MAP, return the total count for all inlined indirect calls. */
246 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
248 /* Sum of counts that is used during annotation. */
249 gcov_type total_annotated_count () const;
251 /* Mark LOC as annotated. */
252 void mark_annotated (location_t loc);
254 private:
255 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
256 typedef std::pair<unsigned, unsigned> callsite;
258 /* Map from callsite to callee function_instance. */
259 typedef std::map<callsite, function_instance *> callsite_map;
261 function_instance (unsigned name, gcov_type head_count)
262 : name_ (name), total_count_ (0), head_count_ (head_count)
266 /* Map from source location (decl_lineno) to profile (count_info). */
267 typedef std::map<unsigned, count_info> position_count_map;
269 /* function_instance name index in the string_table. */
270 unsigned name_;
272 /* Total sample count. */
273 gcov_type total_count_;
275 /* Entry BB's sample count. */
276 gcov_type head_count_;
278 /* Map from callsite location to callee function_instance. */
279 callsite_map callsites;
281 /* Map from source location to count_info. */
282 position_count_map pos_counts;
285 /* Profile for all functions. */
286 class autofdo_source_profile
288 public:
289 static autofdo_source_profile *
290 create ()
292 autofdo_source_profile *map = new autofdo_source_profile ();
294 if (map->read ())
295 return map;
296 delete map;
297 return NULL;
300 ~autofdo_source_profile ();
302 /* For a given DECL, returns the top-level function_instance. */
303 function_instance *get_function_instance_by_decl (tree decl) const;
305 /* Find count_info for a given gimple STMT. If found, store the count_info
306 in INFO and return true; otherwise return false. */
307 bool get_count_info (gimple stmt, count_info *info) const;
309 /* Find total count of the callee of EDGE. */
310 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
312 /* Update value profile INFO for STMT from the inlined indirect callsite.
313 Return true if INFO is updated. */
314 bool update_inlined_ind_target (gcall *stmt, count_info *info);
316 /* Mark LOC as annotated. */
317 void mark_annotated (location_t loc);
319 private:
320 /* Map from function_instance name index (in string_table) to
321 function_instance. */
322 typedef std::map<unsigned, function_instance *> name_function_instance_map;
324 autofdo_source_profile () {}
326 /* Read AutoFDO profile and returns TRUE on success. */
327 bool read ();
329 /* Return the function_instance in the profile that correspond to the
330 inline STACK. */
331 function_instance *
332 get_function_instance_by_inline_stack (const inline_stack &stack) const;
334 name_function_instance_map map_;
337 /* Store the strings read from the profile data file. */
338 static string_table *afdo_string_table;
340 /* Store the AutoFDO source profile. */
341 static autofdo_source_profile *afdo_source_profile;
343 /* gcov_ctr_summary structure to store the profile_info. */
344 static struct gcov_ctr_summary *afdo_profile_info;
346 /* Helper functions. */
348 /* Return the original name of NAME: strip the suffix that starts
349 with '.' Caller is responsible for freeing RET. */
351 static char *
352 get_original_name (const char *name)
354 char *ret = xstrdup (name);
355 char *find = strchr (ret, '.');
356 if (find != NULL)
357 *find = 0;
358 return ret;
361 /* Return the combined location, which is a 32bit integer in which
362 higher 16 bits stores the line offset of LOC to the start lineno
363 of DECL, The lower 16 bits stores the discrimnator. */
365 static unsigned
366 get_combined_location (location_t loc, tree decl)
368 /* TODO: allow more bits for line and less bits for discriminator. */
369 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
370 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
371 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
374 /* Return the function decl of a given lexical BLOCK. */
376 static tree
377 get_function_decl_from_block (tree block)
379 tree decl;
381 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
382 return NULL_TREE;
384 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
385 decl && (TREE_CODE (decl) == BLOCK);
386 decl = BLOCK_ABSTRACT_ORIGIN (decl))
387 if (TREE_CODE (decl) == FUNCTION_DECL)
388 break;
389 return decl;
392 /* Store inline stack for STMT in STACK. */
394 static void
395 get_inline_stack (location_t locus, inline_stack *stack)
397 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
398 return;
400 tree block = LOCATION_BLOCK (locus);
401 if (block && TREE_CODE (block) == BLOCK)
403 int level = 0;
404 for (block = BLOCK_SUPERCONTEXT (block);
405 block && (TREE_CODE (block) == BLOCK);
406 block = BLOCK_SUPERCONTEXT (block))
408 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
409 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
410 continue;
412 tree decl = get_function_decl_from_block (block);
413 stack->safe_push (
414 std::make_pair (decl, get_combined_location (locus, decl)));
415 locus = tmp_locus;
416 level++;
419 stack->safe_push (
420 std::make_pair (current_function_decl,
421 get_combined_location (locus, current_function_decl)));
424 /* Return STMT's combined location, which is a 32bit integer in which
425 higher 16 bits stores the line offset of LOC to the start lineno
426 of DECL, The lower 16 bits stores the discrimnator. */
428 static unsigned
429 get_relative_location_for_stmt (gimple stmt)
431 location_t locus = gimple_location (stmt);
432 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
433 return UNKNOWN_LOCATION;
435 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
436 block = BLOCK_SUPERCONTEXT (block))
437 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
438 return get_combined_location (locus,
439 get_function_decl_from_block (block));
440 return get_combined_location (locus, current_function_decl);
443 /* Return true if BB contains indirect call. */
445 static bool
446 has_indirect_call (basic_block bb)
448 gimple_stmt_iterator gsi;
450 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
452 gimple stmt = gsi_stmt (gsi);
453 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
454 && (gimple_call_fn (stmt) == NULL
455 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
456 return true;
458 return false;
461 /* Member functions for string_table. */
463 /* Deconstructor. */
465 string_table::~string_table ()
467 for (unsigned i = 0; i < vector_.length (); i++)
468 free (vector_[i]);
472 /* Return the index of a given function NAME. Return -1 if NAME is not
473 found in string table. */
476 string_table::get_index (const char *name) const
478 if (name == NULL)
479 return -1;
480 string_index_map::const_iterator iter = map_.find (name);
481 if (iter == map_.end ())
482 return -1;
483 else
484 return iter->second;
487 /* Return the index of a given function DECL. Return -1 if DECL is not
488 found in string table. */
491 string_table::get_index_by_decl (tree decl) const
493 char *name
494 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
495 int ret = get_index (name);
496 free (name);
497 if (ret != -1)
498 return ret;
499 ret = get_index (lang_hooks.dwarf_name (decl, 0));
500 if (ret != -1)
501 return ret;
502 if (DECL_ABSTRACT_ORIGIN (decl))
503 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
504 else
505 return -1;
508 /* Return the function name of a given INDEX. */
510 const char *
511 string_table::get_name (int index) const
513 gcc_assert (index > 0 && index < (int)vector_.length ());
514 return vector_[index];
517 /* Read the string table. Return TRUE if reading is successful. */
519 bool
520 string_table::read ()
522 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
523 return false;
524 /* Skip the length of the section. */
525 gcov_read_unsigned ();
526 /* Read in the file name table. */
527 unsigned string_num = gcov_read_unsigned ();
528 for (unsigned i = 0; i < string_num; i++)
530 vector_.safe_push (get_original_name (gcov_read_string ()));
531 map_[vector_.last ()] = i;
533 return true;
536 /* Member functions for function_instance. */
538 function_instance::~function_instance ()
540 for (callsite_map::iterator iter = callsites.begin ();
541 iter != callsites.end (); ++iter)
542 delete iter->second;
545 /* Traverse callsites of the current function_instance to find one at the
546 location of LINENO and callee name represented in DECL. */
548 function_instance *
549 function_instance::get_function_instance_by_decl (unsigned lineno,
550 tree decl) const
552 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
553 if (func_name_idx != -1)
555 callsite_map::const_iterator ret
556 = callsites.find (std::make_pair (lineno, func_name_idx));
557 if (ret != callsites.end ())
558 return ret->second;
560 func_name_idx
561 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
562 if (func_name_idx != -1)
564 callsite_map::const_iterator ret
565 = callsites.find (std::make_pair (lineno, func_name_idx));
566 if (ret != callsites.end ())
567 return ret->second;
569 if (DECL_ABSTRACT_ORIGIN (decl))
570 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
571 else
572 return NULL;
575 /* Store the profile info for LOC in INFO. Return TRUE if profile info
576 is found. */
578 bool
579 function_instance::get_count_info (location_t loc, count_info *info) const
581 position_count_map::const_iterator iter = pos_counts.find (loc);
582 if (iter == pos_counts.end ())
583 return false;
584 *info = iter->second;
585 return true;
588 /* Mark LOC as annotated. */
590 void
591 function_instance::mark_annotated (location_t loc)
593 position_count_map::iterator iter = pos_counts.find (loc);
594 if (iter == pos_counts.end ())
595 return;
596 iter->second.annotated = true;
599 /* Read the inlinied indirect call target profile for STMT and store it in
600 MAP, return the total count for all inlined indirect calls. */
602 gcov_type
603 function_instance::find_icall_target_map (gcall *stmt,
604 icall_target_map *map) const
606 gcov_type ret = 0;
607 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
609 for (callsite_map::const_iterator iter = callsites.begin ();
610 iter != callsites.end (); ++iter)
612 unsigned callee = iter->second->name ();
613 /* Check if callsite location match the stmt. */
614 if (iter->first.first != stmt_offset)
615 continue;
616 struct cgraph_node *node = cgraph_node::get_for_asmname (
617 get_identifier (afdo_string_table->get_name (callee)));
618 if (node == NULL)
619 continue;
620 if (!check_ic_target (stmt, node))
621 continue;
622 (*map)[callee] = iter->second->total_count ();
623 ret += iter->second->total_count ();
625 return ret;
628 /* Read the profile and create a function_instance with head count as
629 HEAD_COUNT. Recursively read callsites to create nested function_instances
630 too. STACK is used to track the recursive creation process. */
632 /* function instance profile format:
634 ENTRY_COUNT: 8 bytes
635 NAME_INDEX: 4 bytes
636 NUM_POS_COUNTS: 4 bytes
637 NUM_CALLSITES: 4 byte
638 POS_COUNT_1:
639 POS_1_OFFSET: 4 bytes
640 NUM_TARGETS: 4 bytes
641 COUNT: 8 bytes
642 TARGET_1:
643 VALUE_PROFILE_TYPE: 4 bytes
644 TARGET_IDX: 8 bytes
645 COUNT: 8 bytes
646 TARGET_2
648 TARGET_n
649 POS_COUNT_2
651 POS_COUNT_N
652 CALLSITE_1:
653 CALLSITE_1_OFFSET: 4 bytes
654 FUNCTION_INSTANCE_PROFILE (nested)
655 CALLSITE_2
657 CALLSITE_n. */
659 function_instance *
660 function_instance::read_function_instance (function_instance_stack *stack,
661 gcov_type head_count)
663 unsigned name = gcov_read_unsigned ();
664 unsigned num_pos_counts = gcov_read_unsigned ();
665 unsigned num_callsites = gcov_read_unsigned ();
666 function_instance *s = new function_instance (name, head_count);
667 stack->safe_push (s);
669 for (unsigned i = 0; i < num_pos_counts; i++)
671 unsigned offset = gcov_read_unsigned () & 0xffff0000;
672 unsigned num_targets = gcov_read_unsigned ();
673 gcov_type count = gcov_read_counter ();
674 s->pos_counts[offset].count = count;
675 for (unsigned j = 0; j < stack->length (); j++)
676 (*stack)[j]->total_count_ += count;
677 for (unsigned j = 0; j < num_targets; j++)
679 /* Only indirect call target histogram is supported now. */
680 gcov_read_unsigned ();
681 gcov_type target_idx = gcov_read_counter ();
682 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
685 for (unsigned i = 0; i < num_callsites; i++)
687 unsigned offset = gcov_read_unsigned ();
688 function_instance *callee_function_instance
689 = read_function_instance (stack, 0);
690 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
691 = callee_function_instance;
693 stack->pop ();
694 return s;
697 /* Sum of counts that is used during annotation. */
699 gcov_type
700 function_instance::total_annotated_count () const
702 gcov_type ret = 0;
703 for (callsite_map::const_iterator iter = callsites.begin ();
704 iter != callsites.end (); ++iter)
705 ret += iter->second->total_annotated_count ();
706 for (position_count_map::const_iterator iter = pos_counts.begin ();
707 iter != pos_counts.end (); ++iter)
708 if (iter->second.annotated)
709 ret += iter->second.count;
710 return ret;
713 /* Member functions for autofdo_source_profile. */
715 autofdo_source_profile::~autofdo_source_profile ()
717 for (name_function_instance_map::const_iterator iter = map_.begin ();
718 iter != map_.end (); ++iter)
719 delete iter->second;
722 /* For a given DECL, returns the top-level function_instance. */
724 function_instance *
725 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
727 int index = afdo_string_table->get_index_by_decl (decl);
728 if (index == -1)
729 return NULL;
730 name_function_instance_map::const_iterator ret = map_.find (index);
731 return ret == map_.end () ? NULL : ret->second;
734 /* Find count_info for a given gimple STMT. If found, store the count_info
735 in INFO and return true; otherwise return false. */
737 bool
738 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
740 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
741 return false;
743 inline_stack stack;
744 get_inline_stack (gimple_location (stmt), &stack);
745 if (stack.length () == 0)
746 return false;
747 function_instance *s = get_function_instance_by_inline_stack (stack);
748 if (s == NULL)
749 return false;
750 return s->get_count_info (stack[0].second, info);
753 /* Mark LOC as annotated. */
755 void
756 autofdo_source_profile::mark_annotated (location_t loc)
758 inline_stack stack;
759 get_inline_stack (loc, &stack);
760 if (stack.length () == 0)
761 return;
762 function_instance *s = get_function_instance_by_inline_stack (stack);
763 if (s == NULL)
764 return;
765 s->mark_annotated (stack[0].second);
768 /* Update value profile INFO for STMT from the inlined indirect callsite.
769 Return true if INFO is updated. */
771 bool
772 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
773 count_info *info)
775 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
776 return false;
778 count_info old_info;
779 get_count_info (stmt, &old_info);
780 gcov_type total = 0;
781 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
782 iter != old_info.targets.end (); ++iter)
783 total += iter->second;
785 /* Program behavior changed, original promoted (and inlined) target is not
786 hot any more. Will avoid promote the original target.
788 To check if original promoted target is still hot, we check the total
789 count of the unpromoted targets (stored in old_info). If it is no less
790 than half of the callsite count (stored in INFO), the original promoted
791 target is considered not hot any more. */
792 if (total >= info->count / 2)
793 return false;
795 inline_stack stack;
796 get_inline_stack (gimple_location (stmt), &stack);
797 if (stack.length () == 0)
798 return false;
799 function_instance *s = get_function_instance_by_inline_stack (stack);
800 if (s == NULL)
801 return false;
802 icall_target_map map;
803 if (s->find_icall_target_map (stmt, &map) == 0)
804 return false;
805 for (icall_target_map::const_iterator iter = map.begin ();
806 iter != map.end (); ++iter)
807 info->targets[iter->first] = iter->second;
808 return true;
811 /* Find total count of the callee of EDGE. */
813 gcov_type
814 autofdo_source_profile::get_callsite_total_count (
815 struct cgraph_edge *edge) const
817 inline_stack stack;
818 stack.safe_push (std::make_pair (edge->callee->decl, 0));
819 get_inline_stack (gimple_location (edge->call_stmt), &stack);
821 function_instance *s = get_function_instance_by_inline_stack (stack);
822 if (s == NULL
823 || afdo_string_table->get_index (IDENTIFIER_POINTER (
824 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
825 return 0;
826 else
827 return s->total_count ();
830 /* Read AutoFDO profile and returns TRUE on success. */
832 /* source profile format:
834 GCOV_TAG_AFDO_FUNCTION: 4 bytes
835 LENGTH: 4 bytes
836 NUM_FUNCTIONS: 4 bytes
837 FUNCTION_INSTANCE_1
838 FUNCTION_INSTANCE_2
840 FUNCTION_INSTANCE_N. */
842 bool
843 autofdo_source_profile::read ()
845 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
847 inform (0, "Not expected TAG.");
848 return false;
851 /* Skip the length of the section. */
852 gcov_read_unsigned ();
854 /* Read in the function/callsite profile, and store it in local
855 data structure. */
856 unsigned function_num = gcov_read_unsigned ();
857 for (unsigned i = 0; i < function_num; i++)
859 function_instance::function_instance_stack stack;
860 function_instance *s = function_instance::read_function_instance (
861 &stack, gcov_read_counter ());
862 afdo_profile_info->sum_all += s->total_count ();
863 map_[s->name ()] = s;
865 return true;
868 /* Return the function_instance in the profile that correspond to the
869 inline STACK. */
871 function_instance *
872 autofdo_source_profile::get_function_instance_by_inline_stack (
873 const inline_stack &stack) const
875 name_function_instance_map::const_iterator iter = map_.find (
876 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
877 if (iter == map_.end())
878 return NULL;
879 function_instance *s = iter->second;
880 for (unsigned i = stack.length() - 1; i > 0; i--)
882 s = s->get_function_instance_by_decl (
883 stack[i].second, stack[i - 1].first);
884 if (s == NULL)
885 return NULL;
887 return s;
890 /* Module profile is only used by LIPO. Here we simply ignore it. */
892 static void
893 fake_read_autofdo_module_profile ()
895 /* Read in the module info. */
896 gcov_read_unsigned ();
898 /* Skip the length of the section. */
899 gcov_read_unsigned ();
901 /* Read in the file name table. */
902 unsigned total_module_num = gcov_read_unsigned ();
903 gcc_assert (total_module_num == 0);
906 /* Read data from profile data file. */
908 static void
909 read_profile (void)
911 if (gcov_open (auto_profile_file, 1) == 0)
912 error ("Cannot open profile file %s.", auto_profile_file);
914 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
915 error ("AutoFDO profile magic number does not mathch.");
917 /* Skip the version number. */
918 unsigned version = gcov_read_unsigned ();
919 if (version != AUTO_PROFILE_VERSION)
920 error ("AutoFDO profile version %u does match %u.",
921 version, AUTO_PROFILE_VERSION);
923 /* Skip the empty integer. */
924 gcov_read_unsigned ();
926 /* string_table. */
927 afdo_string_table = new string_table ();
928 if (!afdo_string_table->read())
929 error ("Cannot read string table from %s.", auto_profile_file);
931 /* autofdo_source_profile. */
932 afdo_source_profile = autofdo_source_profile::create ();
933 if (afdo_source_profile == NULL)
934 error ("Cannot read function profile from %s.", auto_profile_file);
936 /* autofdo_module_profile. */
937 fake_read_autofdo_module_profile ();
939 /* Read in the working set. */
940 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
941 error ("Cannot read working set from %s.", auto_profile_file);
943 /* Skip the length of the section. */
944 gcov_read_unsigned ();
945 gcov_working_set_t set[128];
946 for (unsigned i = 0; i < 128; i++)
948 set[i].num_counters = gcov_read_unsigned ();
949 set[i].min_counter = gcov_read_counter ();
951 add_working_set (set);
954 /* From AutoFDO profiles, find values inside STMT for that we want to measure
955 histograms for indirect-call optimization.
957 This function is actually served for 2 purposes:
958     * before annotation, we need to mark histogram, promote and inline
959     * after annotation, we just need to mark, and let follow-up logic to
960       decide if it needs to promote and inline. */
962 static void
963 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
964 bool transform)
966 gimple gs = gsi_stmt (*gsi);
967 tree callee;
969 if (map.size () == 0)
970 return;
971 gcall *stmt = dyn_cast <gcall *> (gs);
972 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
973 return;
975 callee = gimple_call_fn (stmt);
977 histogram_value hist = gimple_alloc_histogram_value (
978 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
979 hist->n_counters = 3;
980 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
981 gimple_add_histogram_value (cfun, stmt, hist);
983 gcov_type total = 0;
984 icall_target_map::const_iterator max_iter = map.end ();
986 for (icall_target_map::const_iterator iter = map.begin ();
987 iter != map.end (); ++iter)
989 total += iter->second;
990 if (max_iter == map.end () || max_iter->second < iter->second)
991 max_iter = iter;
994 hist->hvalue.counters[0]
995 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
996 hist->hvalue.counters[1] = max_iter->second;
997 hist->hvalue.counters[2] = total;
999 if (!transform)
1000 return;
1002 struct cgraph_edge *indirect_edge
1003 = cgraph_node::get (current_function_decl)->get_edge (stmt);
1004 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1005 get_identifier ((const char *) hist->hvalue.counters[0]));
1007 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1008 return;
1009 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1010 return;
1011 struct cgraph_edge *new_edge
1012 = indirect_edge->make_speculative (direct_call, 0, 0);
1013 new_edge->redirect_call_stmt_to_callee ();
1014 gimple_remove_histogram_value (cfun, stmt, hist);
1015 inline_call (new_edge, true, NULL, NULL, false);
1018 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1019 histograms and adds them to list VALUES. */
1021 static void
1022 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1023 bool transform)
1025 afdo_indirect_call (gsi, map, transform);
1028 typedef std::set<basic_block> bb_set;
1029 typedef std::set<edge> edge_set;
1031 static bool
1032 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1034 return annotated.find (bb) != annotated.end ();
1037 static void
1038 set_bb_annotated (basic_block bb, bb_set *annotated)
1040 annotated->insert (bb);
1043 static bool
1044 is_edge_annotated (const edge e, const edge_set &annotated)
1046 return annotated.find (e) != annotated.end ();
1049 static void
1050 set_edge_annotated (edge e, edge_set *annotated)
1052 annotated->insert (e);
1055 /* For a given BB, set its execution count. Attach value profile if a stmt
1056 is not in PROMOTED, because we only want to promot an indirect call once.
1057 Return TRUE if BB is annotated. */
1059 static bool
1060 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1062 gimple_stmt_iterator gsi;
1063 edge e;
1064 edge_iterator ei;
1065 gcov_type max_count = 0;
1066 bool has_annotated = false;
1068 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1070 count_info info;
1071 gimple stmt = gsi_stmt (gsi);
1072 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1073 continue;
1074 if (afdo_source_profile->get_count_info (stmt, &info))
1076 if (info.count > max_count)
1077 max_count = info.count;
1078 has_annotated = true;
1079 if (info.targets.size () > 0
1080 && promoted.find (stmt) == promoted.end ())
1081 afdo_vpt (&gsi, info.targets, false);
1085 if (!has_annotated)
1086 return false;
1088 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1089 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1090 for (gphi_iterator gpi = gsi_start_phis (bb);
1091 !gsi_end_p (gpi);
1092 gsi_next (&gpi))
1094 gphi *phi = gpi.phi ();
1095 size_t i;
1096 for (i = 0; i < gimple_phi_num_args (phi); i++)
1097 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1099 FOR_EACH_EDGE (e, ei, bb->succs)
1100 afdo_source_profile->mark_annotated (e->goto_locus);
1102 bb->count = max_count;
1103 return true;
1106 /* BB1 and BB2 are in an equivalent class iff:
1107 1. BB1 dominates BB2.
1108 2. BB2 post-dominates BB1.
1109 3. BB1 and BB2 are in the same loop nest.
1110 This function finds the equivalent class for each basic block, and
1111 stores a pointer to the first BB in its equivalent class. Meanwhile,
1112 set bb counts for the same equivalent class to be idenical. Update
1113 ANNOTATED_BB for the first BB in its equivalent class. */
1115 static void
1116 afdo_find_equiv_class (bb_set *annotated_bb)
1118 basic_block bb;
1120 FOR_ALL_BB_FN (bb, cfun)
1121 bb->aux = NULL;
1123 FOR_ALL_BB_FN (bb, cfun)
1125 vec<basic_block> dom_bbs;
1126 basic_block bb1;
1127 int i;
1129 if (bb->aux != NULL)
1130 continue;
1131 bb->aux = bb;
1132 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1133 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1134 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1135 && bb1->loop_father == bb->loop_father)
1137 bb1->aux = bb;
1138 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1140 bb->count = MAX (bb->count, bb1->count);
1141 set_bb_annotated (bb, annotated_bb);
1144 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1145 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1146 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1147 && bb1->loop_father == bb->loop_father)
1149 bb1->aux = bb;
1150 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1152 bb->count = MAX (bb->count, bb1->count);
1153 set_bb_annotated (bb, annotated_bb);
1159 /* If a basic block's count is known, and only one of its in/out edges' count
1160 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1161 edges' counts are known, then the basic block's unknown count can also be
1162 calculated.
1163 IS_SUCC is true if out edges of a basic blocks are examined.
1164 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1165 Return TRUE if any basic block/edge count is changed. */
1167 static bool
1168 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1169 edge_set *annotated_edge)
1171 basic_block bb;
1172 bool changed = false;
1174 FOR_EACH_BB_FN (bb, cfun)
1176 edge e, unknown_edge = NULL;
1177 edge_iterator ei;
1178 int num_unknown_edge = 0;
1179 gcov_type total_known_count = 0;
1181 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1182 if (!is_edge_annotated (e, *annotated_edge))
1183 num_unknown_edge++, unknown_edge = e;
1184 else
1185 total_known_count += e->count;
1187 if (num_unknown_edge == 0)
1189 if (total_known_count > bb->count)
1191 bb->count = total_known_count;
1192 changed = true;
1194 if (!is_bb_annotated (bb, *annotated_bb))
1196 set_bb_annotated (bb, annotated_bb);
1197 changed = true;
1200 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1202 if (bb->count >= total_known_count)
1203 unknown_edge->count = bb->count - total_known_count;
1204 else
1205 unknown_edge->count = 0;
1206 set_edge_annotated (unknown_edge, annotated_edge);
1207 changed = true;
1210 return changed;
1213 /* Special propagation for circuit expressions. Because GCC translates
1214 control flow into data flow for circuit expressions. E.g.
1215 BB1:
1216 if (a && b)
1218 else
1221 will be translated into:
1223 BB1:
1224 if (a)
1225 goto BB.t1
1226 else
1227 goto BB.t3
1228 BB.t1:
1229 if (b)
1230 goto BB.t2
1231 else
1232 goto BB.t3
1233 BB.t2:
1234 goto BB.t3
1235 BB.t3:
1236 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1237 if (tmp)
1238 goto BB2
1239 else
1240 goto BB3
1242 In this case, we need to propagate through PHI to determine the edge
1243 count of BB1->BB.t1, BB.t1->BB.t2.
1244 Update ANNOTATED_EDGE accordingly. */
1246 static void
1247 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1249 basic_block bb;
1250 FOR_ALL_BB_FN (bb, cfun)
1252 gimple def_stmt;
1253 tree cmp_rhs, cmp_lhs;
1254 gimple cmp_stmt = last_stmt (bb);
1255 edge e;
1256 edge_iterator ei;
1258 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1259 continue;
1260 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1261 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1262 if (!TREE_CONSTANT (cmp_rhs)
1263 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1264 continue;
1265 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1266 continue;
1267 if (!is_bb_annotated (bb, annotated_bb))
1268 continue;
1269 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1270 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1271 && gimple_assign_single_p (def_stmt)
1272 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1273 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1274 if (!def_stmt)
1275 continue;
1276 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1277 if (!phi_stmt)
1278 continue;
1279 FOR_EACH_EDGE (e, ei, bb->succs)
1281 unsigned i, total = 0;
1282 edge only_one;
1283 bool check_value_one = (((integer_onep (cmp_rhs))
1284 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1285 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1286 if (!is_edge_annotated (e, *annotated_edge))
1287 continue;
1288 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1290 tree val = gimple_phi_arg_def (phi_stmt, i);
1291 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1293 if (!TREE_CONSTANT (val)
1294 || !(integer_zerop (val) || integer_onep (val)))
1295 continue;
1296 if (check_value_one ^ integer_onep (val))
1297 continue;
1298 total++;
1299 only_one = ep;
1300 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1302 ep->probability = 0;
1303 ep->count = 0;
1304 set_edge_annotated (ep, annotated_edge);
1307 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1309 only_one->probability = e->probability;
1310 only_one->count = e->count;
1311 set_edge_annotated (only_one, annotated_edge);
1317 /* Propagate the basic block count and edge count on the control flow
1318 graph. We do the propagation iteratively until stablize. */
1320 static void
1321 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1323 basic_block bb;
1324 bool changed = true;
1325 int i = 0;
1327 FOR_ALL_BB_FN (bb, cfun)
1329 bb->count = ((basic_block)bb->aux)->count;
1330 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1331 set_bb_annotated (bb, annotated_bb);
1334 while (changed && i++ < 10)
1336 changed = false;
1338 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1339 changed = true;
1340 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1341 changed = true;
1342 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1346 /* Propagate counts on control flow graph and calculate branch
1347 probabilities. */
1349 static void
1350 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1352 basic_block bb;
1353 bool has_sample = false;
1355 FOR_EACH_BB_FN (bb, cfun)
1356 if (bb->count > 0)
1357 has_sample = true;
1359 if (!has_sample)
1360 return;
1362 calculate_dominance_info (CDI_POST_DOMINATORS);
1363 calculate_dominance_info (CDI_DOMINATORS);
1364 loop_optimizer_init (0);
1366 afdo_find_equiv_class (annotated_bb);
1367 afdo_propagate (annotated_bb, annotated_edge);
1369 FOR_EACH_BB_FN (bb, cfun)
1371 edge e;
1372 edge_iterator ei;
1373 int num_unknown_succ = 0;
1374 gcov_type total_count = 0;
1376 FOR_EACH_EDGE (e, ei, bb->succs)
1378 if (!is_edge_annotated (e, *annotated_edge))
1379 num_unknown_succ++;
1380 else
1381 total_count += e->count;
1383 if (num_unknown_succ == 0 && total_count > 0)
1385 FOR_EACH_EDGE (e, ei, bb->succs)
1386 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1389 FOR_ALL_BB_FN (bb, cfun)
1391 edge e;
1392 edge_iterator ei;
1394 FOR_EACH_EDGE (e, ei, bb->succs)
1395 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1396 bb->aux = NULL;
1399 loop_optimizer_finalize ();
1400 free_dominance_info (CDI_DOMINATORS);
1401 free_dominance_info (CDI_POST_DOMINATORS);
1404 /* Perform value profile transformation using AutoFDO profile. Add the
1405 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1406 indirect call promoted. */
1408 static bool
1409 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1411 basic_block bb;
1412 if (afdo_source_profile->get_function_instance_by_decl (
1413 current_function_decl) == NULL)
1414 return false;
1416 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1418 bool has_vpt = false;
1419 FOR_EACH_BB_FN (bb, cfun)
1421 if (!has_indirect_call (bb))
1422 continue;
1423 gimple_stmt_iterator gsi;
1425 gcov_type bb_count = 0;
1426 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1428 count_info info;
1429 gimple stmt = gsi_stmt (gsi);
1430 if (afdo_source_profile->get_count_info (stmt, &info))
1431 bb_count = MAX (bb_count, info.count);
1434 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1436 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1437 /* IC_promotion and early_inline_2 is done in multiple iterations.
1438 No need to promoted the stmt if its in promoted_stmts (means
1439 it is already been promoted in the previous iterations). */
1440 if ((!stmt) || gimple_call_fn (stmt) == NULL
1441 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1442 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1443 continue;
1445 count_info info;
1446 afdo_source_profile->get_count_info (stmt, &info);
1447 info.count = bb_count;
1448 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1450 /* Promote the indirect call and update the promoted_stmts. */
1451 promoted_stmts->insert (stmt);
1452 afdo_vpt (&gsi, info.targets, true);
1453 has_vpt = true;
1457 if (has_vpt)
1459 optimize_inline_calls (current_function_decl);
1460 return true;
1462 else
1463 return false;
1466 /* Annotate auto profile to the control flow graph. Do not annotate value
1467 profile for stmts in PROMOTED_STMTS. */
1469 static void
1470 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1472 basic_block bb;
1473 bb_set annotated_bb;
1474 edge_set annotated_edge;
1475 const function_instance *s
1476 = afdo_source_profile->get_function_instance_by_decl (
1477 current_function_decl);
1479 if (s == NULL)
1480 return;
1481 cgraph_node::get (current_function_decl)->count = s->head_count ();
1482 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1483 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1485 FOR_EACH_BB_FN (bb, cfun)
1487 edge e;
1488 edge_iterator ei;
1490 bb->count = 0;
1491 FOR_EACH_EDGE (e, ei, bb->succs)
1492 e->count = 0;
1494 if (afdo_set_bb_count (bb, promoted_stmts))
1495 set_bb_annotated (bb, &annotated_bb);
1496 if (bb->count > max_count)
1497 max_count = bb->count;
1499 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1500 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1502 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1503 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1504 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1506 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1507 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1509 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1510 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1511 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1513 afdo_source_profile->mark_annotated (
1514 DECL_SOURCE_LOCATION (current_function_decl));
1515 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1516 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1517 if (max_count > 0)
1519 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1520 counts_to_freqs ();
1521 profile_status_for_fn (cfun) = PROFILE_READ;
1523 if (flag_value_profile_transformations)
1525 gimple_value_profile_transformations ();
1526 free_dominance_info (CDI_DOMINATORS);
1527 free_dominance_info (CDI_POST_DOMINATORS);
1528 update_ssa (TODO_update_ssa);
1532 /* Wrapper function to invoke early inliner. */
1534 static void
1535 early_inline ()
1537 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1538 unsigned todo = early_inliner (cfun);
1539 if (todo & TODO_update_ssa_any)
1540 update_ssa (TODO_update_ssa);
1543 /* Use AutoFDO profile to annoate the control flow graph.
1544 Return the todo flag. */
1546 static unsigned int
1547 auto_profile (void)
1549 struct cgraph_node *node;
1551 if (symtab->state == FINISHED)
1552 return 0;
1554 init_node_map (true);
1555 profile_info = autofdo::afdo_profile_info;
1557 FOR_EACH_FUNCTION (node)
1559 if (!gimple_has_body_p (node->decl))
1560 continue;
1562 /* Don't profile functions produced for builtin stuff. */
1563 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1564 continue;
1566 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1568 /* First do indirect call promotion and early inline to make the
1569 IR match the profiled binary before actual annotation.
1571 This is needed because an indirect call might have been promoted
1572 and inlined in the profiled binary. If we do not promote and
1573 inline these indirect calls before annotation, the profile for
1574 these promoted functions will be lost.
1576 e.g. foo() --indirect_call--> bar()
1577 In profiled binary, the callsite is promoted and inlined, making
1578 the profile look like:
1580 foo: {
1581 loc_foo_1: count_1
1582 bar@loc_foo_2: {
1583 loc_bar_1: count_2
1584 loc_bar_2: count_3
1588 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1589 If we perform annotation on it, the profile inside bar@loc_foo2
1590 will be wasted.
1592 To avoid this, we promote loc_foo_2 and inline the promoted bar
1593 function before annotation, so the profile inside bar@loc_foo2
1594 will be useful. */
1595 autofdo::stmt_set promoted_stmts;
1596 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1598 if (!flag_value_profile_transformations
1599 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1600 break;
1601 early_inline ();
1604 early_inline ();
1605 autofdo::afdo_annotate_cfg (promoted_stmts);
1606 compute_function_frequency ();
1608 /* Local pure-const may imply need to fixup the cfg. */
1609 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1610 cleanup_tree_cfg ();
1612 free_dominance_info (CDI_DOMINATORS);
1613 free_dominance_info (CDI_POST_DOMINATORS);
1614 cgraph_edge::rebuild_edges ();
1615 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1616 pop_cfun ();
1619 return TODO_rebuild_cgraph_edges;
1621 } /* namespace autofdo. */
1623 /* Read the profile from the profile data file. */
1625 void
1626 read_autofdo_file (void)
1628 if (auto_profile_file == NULL)
1629 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1631 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1632 1, sizeof (struct gcov_ctr_summary));
1633 autofdo::afdo_profile_info->runs = 1;
1634 autofdo::afdo_profile_info->sum_max = 0;
1635 autofdo::afdo_profile_info->sum_all = 0;
1637 /* Read the profile from the profile file. */
1638 autofdo::read_profile ();
1641 /* Free the resources. */
1643 void
1644 end_auto_profile (void)
1646 delete autofdo::afdo_source_profile;
1647 delete autofdo::afdo_string_table;
1648 profile_info = NULL;
1651 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1653 bool
1654 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1656 gcov_type count
1657 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1658 if (count > 0)
1660 bool is_hot;
1661 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1662 /* At earling inline stage, profile_info is not set yet. We need to
1663 temporarily set it to afdo_profile_info to calculate hotness. */
1664 profile_info = autofdo::afdo_profile_info;
1665 is_hot = maybe_hot_count_p (NULL, count);
1666 profile_info = saved_profile_info;
1667 return is_hot;
1669 else
1670 return false;
1673 namespace
1676 const pass_data pass_data_ipa_auto_profile = {
1677 SIMPLE_IPA_PASS, "afdo", /* name */
1678 OPTGROUP_NONE, /* optinfo_flags */
1679 TV_IPA_AUTOFDO, /* tv_id */
1680 0, /* properties_required */
1681 0, /* properties_provided */
1682 0, /* properties_destroyed */
1683 0, /* todo_flags_start */
1684 0, /* todo_flags_finish */
1687 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1689 public:
1690 pass_ipa_auto_profile (gcc::context *ctxt)
1691 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1695 /* opt_pass methods: */
1696 virtual bool
1697 gate (function *)
1699 return flag_auto_profile;
1701 virtual unsigned int
1702 execute (function *)
1704 return autofdo::auto_profile ();
1706 }; // class pass_ipa_auto_profile
1708 } // anon namespace
1710 simple_ipa_opt_pass *
1711 make_pass_ipa_auto_profile (gcc::context *ctxt)
1713 return new pass_ipa_auto_profile (ctxt);