1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
38 The inliner itself is split into two passes:
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
94 #include "coretypes.h"
97 #include "trans-mem.h"
99 #include "tree-inline.h"
100 #include "langhooks.h"
102 #include "diagnostic.h"
103 #include "gimple-pretty-print.h"
107 #include "tree-pass.h"
108 #include "coverage.h"
111 #include "basic-block.h"
112 #include "tree-ssa-alias.h"
113 #include "internal-fn.h"
114 #include "gimple-expr.h"
117 #include "gimple-ssa.h"
118 #include "ipa-prop.h"
121 #include "ipa-inline.h"
122 #include "ipa-utils.h"
125 #include "builtins.h"
127 /* Statistics we collect about inlining algorithm. */
128 static int overall_size
;
129 static gcov_type max_count
;
130 static sreal max_count_real
, max_relbenefit_real
, half_int_min_real
;
131 static gcov_type spec_rem
;
133 /* Return false when inlining edge E would lead to violating
134 limits on function unit growth or stack usage growth.
136 The relative function body growth limit is present generally
137 to avoid problems with non-linear behavior of the compiler.
138 To allow inlining huge functions into tiny wrapper, the limit
139 is always based on the bigger of the two functions considered.
141 For stack growth limits we always base the growth in stack usage
142 of the callers. We want to prevent applications from segfaulting
143 on stack overflow when functions with huge stack frames gets
147 caller_growth_limits (struct cgraph_edge
*e
)
149 struct cgraph_node
*to
= e
->caller
;
150 struct cgraph_node
*what
= e
->callee
->ultimate_alias_target ();
153 HOST_WIDE_INT stack_size_limit
= 0, inlined_stack
;
154 struct inline_summary
*info
, *what_info
, *outer_info
= inline_summary (to
);
156 /* Look for function e->caller is inlined to. While doing
157 so work out the largest function body on the way. As
158 described above, we want to base our function growth
159 limits based on that. Not on the self size of the
160 outer function, not on the self size of inline code
161 we immediately inline to. This is the most relaxed
162 interpretation of the rule "do not grow large functions
163 too much in order to prevent compiler from exploding". */
166 info
= inline_summary (to
);
167 if (limit
< info
->self_size
)
168 limit
= info
->self_size
;
169 if (stack_size_limit
< info
->estimated_self_stack_size
)
170 stack_size_limit
= info
->estimated_self_stack_size
;
171 if (to
->global
.inlined_to
)
172 to
= to
->callers
->caller
;
177 what_info
= inline_summary (what
);
179 if (limit
< what_info
->self_size
)
180 limit
= what_info
->self_size
;
182 limit
+= limit
* PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH
) / 100;
184 /* Check the size after inlining against the function limits. But allow
185 the function to shrink if it went over the limits by forced inlining. */
186 newsize
= estimate_size_after_inlining (to
, e
);
187 if (newsize
>= info
->size
188 && newsize
> PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS
)
191 e
->inline_failed
= CIF_LARGE_FUNCTION_GROWTH_LIMIT
;
195 if (!what_info
->estimated_stack_size
)
198 /* FIXME: Stack size limit often prevents inlining in Fortran programs
199 due to large i/o datastructures used by the Fortran front-end.
200 We ought to ignore this limit when we know that the edge is executed
201 on every invocation of the caller (i.e. its call statement dominates
202 exit block). We do not track this information, yet. */
203 stack_size_limit
+= ((gcov_type
)stack_size_limit
204 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH
) / 100);
206 inlined_stack
= (outer_info
->stack_frame_offset
207 + outer_info
->estimated_self_stack_size
208 + what_info
->estimated_stack_size
);
209 /* Check new stack consumption with stack consumption at the place
211 if (inlined_stack
> stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
216 && inlined_stack
> info
->estimated_stack_size
217 && inlined_stack
> PARAM_VALUE (PARAM_LARGE_STACK_FRAME
))
219 e
->inline_failed
= CIF_LARGE_STACK_FRAME_GROWTH_LIMIT
;
225 /* Dump info about why inlining has failed. */
228 report_inline_failed_reason (struct cgraph_edge
*e
)
232 fprintf (dump_file
, " not inlinable: %s/%i -> %s/%i, %s\n",
233 xstrdup (e
->caller
->name ()), e
->caller
->order
,
234 xstrdup (e
->callee
->name ()), e
->callee
->order
,
235 cgraph_inline_failed_string (e
->inline_failed
));
239 /* Decide whether sanitizer-related attributes allow inlining. */
242 sanitize_attrs_match_for_inline_p (const_tree caller
, const_tree callee
)
244 /* Don't care if sanitizer is disabled */
245 if (!(flag_sanitize
& SANITIZE_ADDRESS
))
248 if (!caller
|| !callee
)
251 return !!lookup_attribute ("no_sanitize_address",
252 DECL_ATTRIBUTES (caller
)) ==
253 !!lookup_attribute ("no_sanitize_address",
254 DECL_ATTRIBUTES (callee
));
257 /* Decide if we can inline the edge and possibly update
258 inline_failed reason.
259 We check whether inlining is possible at all and whether
260 caller growth limits allow doing so.
262 if REPORT is true, output reason to the dump file.
264 if DISREGARD_LIMITS is true, ignore size limits.*/
267 can_inline_edge_p (struct cgraph_edge
*e
, bool report
,
268 bool disregard_limits
= false)
270 bool inlinable
= true;
271 enum availability avail
;
272 cgraph_node
*callee
= e
->callee
->ultimate_alias_target (&avail
);
273 tree caller_tree
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e
->caller
->decl
);
275 = callee
? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee
->decl
) : NULL
;
276 struct function
*caller_fun
= e
->caller
->get_fun ();
277 struct function
*callee_fun
= callee
? callee
->get_fun () : NULL
;
279 gcc_assert (e
->inline_failed
);
281 if (!callee
|| !callee
->definition
)
283 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
286 else if (callee
->calls_comdat_local
)
288 e
->inline_failed
= CIF_USES_COMDAT_LOCAL
;
291 else if (!inline_summary (callee
)->inlinable
292 || (caller_fun
&& fn_contains_cilk_spawn_p (caller_fun
)))
294 e
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
297 else if (avail
<= AVAIL_INTERPOSABLE
)
299 e
->inline_failed
= CIF_OVERWRITABLE
;
302 else if (e
->call_stmt_cannot_inline_p
)
304 if (e
->inline_failed
!= CIF_FUNCTION_NOT_OPTIMIZED
)
305 e
->inline_failed
= CIF_MISMATCHED_ARGUMENTS
;
308 /* Don't inline if the functions have different EH personalities. */
309 else if (DECL_FUNCTION_PERSONALITY (e
->caller
->decl
)
310 && DECL_FUNCTION_PERSONALITY (callee
->decl
)
311 && (DECL_FUNCTION_PERSONALITY (e
->caller
->decl
)
312 != DECL_FUNCTION_PERSONALITY (callee
->decl
)))
314 e
->inline_failed
= CIF_EH_PERSONALITY
;
317 /* TM pure functions should not be inlined into non-TM_pure
319 else if (is_tm_pure (callee
->decl
)
320 && !is_tm_pure (e
->caller
->decl
))
322 e
->inline_failed
= CIF_UNSPECIFIED
;
325 /* Don't inline if the callee can throw non-call exceptions but the
327 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
328 Move the flag into cgraph node or mirror it in the inline summary. */
329 else if (callee_fun
&& callee_fun
->can_throw_non_call_exceptions
330 && !(caller_fun
&& caller_fun
->can_throw_non_call_exceptions
))
332 e
->inline_failed
= CIF_NON_CALL_EXCEPTIONS
;
335 /* Check compatibility of target optimization options. */
336 else if (!targetm
.target_option
.can_inline_p (e
->caller
->decl
,
339 e
->inline_failed
= CIF_TARGET_OPTION_MISMATCH
;
342 /* Don't inline a function with mismatched sanitization attributes. */
343 else if (!sanitize_attrs_match_for_inline_p (e
->caller
->decl
, callee
->decl
))
345 e
->inline_failed
= CIF_ATTRIBUTE_MISMATCH
;
348 /* Check if caller growth allows the inlining. */
349 else if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
351 && !lookup_attribute ("flatten",
353 (e
->caller
->global
.inlined_to
354 ? e
->caller
->global
.inlined_to
->decl
356 && !caller_growth_limits (e
))
358 /* Don't inline a function with a higher optimization level than the
359 caller. FIXME: this is really just tip of iceberg of handling
360 optimization attribute. */
361 else if (caller_tree
!= callee_tree
)
363 struct cl_optimization
*caller_opt
364 = TREE_OPTIMIZATION ((caller_tree
)
366 : optimization_default_node
);
368 struct cl_optimization
*callee_opt
369 = TREE_OPTIMIZATION ((callee_tree
)
371 : optimization_default_node
);
373 if (((caller_opt
->x_optimize
> callee_opt
->x_optimize
)
374 || (caller_opt
->x_optimize_size
!= callee_opt
->x_optimize_size
))
375 /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */
376 && !DECL_DISREGARD_INLINE_LIMITS (e
->callee
->decl
))
378 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
383 if (!inlinable
&& report
)
384 report_inline_failed_reason (e
);
389 /* Return true if the edge E is inlinable during early inlining. */
392 can_early_inline_edge_p (struct cgraph_edge
*e
)
394 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
395 /* Early inliner might get called at WPA stage when IPA pass adds new
396 function. In this case we can not really do any of early inlining
397 because function bodies are missing. */
398 if (!gimple_has_body_p (callee
->decl
))
400 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
403 /* In early inliner some of callees may not be in SSA form yet
404 (i.e. the callgraph is cyclic and we did not process
405 the callee by early inliner, yet). We don't have CIF code for this
406 case; later we will re-do the decision in the real inliner. */
407 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->caller
->decl
))
408 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)))
411 fprintf (dump_file
, " edge not inlinable: not in SSA form\n");
414 if (!can_inline_edge_p (e
, true))
420 /* Return number of calls in N. Ignore cheap builtins. */
423 num_calls (struct cgraph_node
*n
)
425 struct cgraph_edge
*e
;
428 for (e
= n
->callees
; e
; e
= e
->next_callee
)
429 if (!is_inexpensive_builtin (e
->callee
->decl
))
435 /* Return true if we are interested in inlining small function. */
438 want_early_inline_function_p (struct cgraph_edge
*e
)
440 bool want_inline
= true;
441 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
443 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
445 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
446 && !flag_inline_small_functions
)
448 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
449 report_inline_failed_reason (e
);
454 int growth
= estimate_edge_growth (e
);
459 else if (!e
->maybe_hot_p ()
463 fprintf (dump_file
, " will not early inline: %s/%i->%s/%i, "
464 "call is cold and code would grow by %i\n",
465 xstrdup (e
->caller
->name ()),
467 xstrdup (callee
->name ()), callee
->order
,
471 else if (growth
> PARAM_VALUE (PARAM_EARLY_INLINING_INSNS
))
474 fprintf (dump_file
, " will not early inline: %s/%i->%s/%i, "
475 "growth %i exceeds --param early-inlining-insns\n",
476 xstrdup (e
->caller
->name ()),
478 xstrdup (callee
->name ()), callee
->order
,
482 else if ((n
= num_calls (callee
)) != 0
483 && growth
* (n
+ 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS
))
486 fprintf (dump_file
, " will not early inline: %s/%i->%s/%i, "
487 "growth %i exceeds --param early-inlining-insns "
488 "divided by number of calls\n",
489 xstrdup (e
->caller
->name ()),
491 xstrdup (callee
->name ()), callee
->order
,
499 /* Compute time of the edge->caller + edge->callee execution when inlining
503 compute_uninlined_call_time (struct inline_summary
*callee_info
,
504 struct cgraph_edge
*edge
)
506 gcov_type uninlined_call_time
=
507 RDIV ((gcov_type
)callee_info
->time
* MAX (edge
->frequency
, 1),
509 gcov_type caller_time
= inline_summary (edge
->caller
->global
.inlined_to
510 ? edge
->caller
->global
.inlined_to
511 : edge
->caller
)->time
;
512 return uninlined_call_time
+ caller_time
;
515 /* Same as compute_uinlined_call_time but compute time when inlining
519 compute_inlined_call_time (struct cgraph_edge
*edge
,
522 gcov_type caller_time
= inline_summary (edge
->caller
->global
.inlined_to
523 ? edge
->caller
->global
.inlined_to
524 : edge
->caller
)->time
;
525 gcov_type time
= (caller_time
526 + RDIV (((gcov_type
) edge_time
527 - inline_edge_summary (edge
)->call_stmt_time
)
528 * MAX (edge
->frequency
, 1), CGRAPH_FREQ_BASE
));
529 /* Possible one roundoff error, but watch for overflows. */
530 gcc_checking_assert (time
>= INT_MIN
/ 2);
536 /* Return true if the speedup for inlining E is bigger than
537 PARAM_MAX_INLINE_MIN_SPEEDUP. */
540 big_speedup_p (struct cgraph_edge
*e
)
542 gcov_type time
= compute_uninlined_call_time (inline_summary (e
->callee
),
544 gcov_type inlined_time
= compute_inlined_call_time (e
,
545 estimate_edge_time (e
));
546 if (time
- inlined_time
547 > RDIV (time
* PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP
), 100))
552 /* Return true if we are interested in inlining small function.
553 When REPORT is true, report reason to dump file. */
556 want_inline_small_function_p (struct cgraph_edge
*e
, bool report
)
558 bool want_inline
= true;
559 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
561 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
563 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
564 && !flag_inline_small_functions
)
566 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
569 /* Do fast and conservative check if the function can be good
570 inline cnadidate. At themoment we allow inline hints to
571 promote non-inline function to inline and we increase
572 MAX_INLINE_INSNS_SINGLE 16fold for inline functions. */
573 else if ((!DECL_DECLARED_INLINE_P (callee
->decl
)
574 && (!e
->count
|| !e
->maybe_hot_p ()))
575 && inline_summary (callee
)->min_size
- inline_edge_summary (e
)->call_stmt_size
576 > MAX (MAX_INLINE_INSNS_SINGLE
, MAX_INLINE_INSNS_AUTO
))
578 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
581 else if ((DECL_DECLARED_INLINE_P (callee
->decl
) || e
->count
)
582 && inline_summary (callee
)->min_size
- inline_edge_summary (e
)->call_stmt_size
583 > 16 * MAX_INLINE_INSNS_SINGLE
)
585 e
->inline_failed
= (DECL_DECLARED_INLINE_P (callee
->decl
)
586 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
587 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT
);
592 int growth
= estimate_edge_growth (e
);
593 inline_hints hints
= estimate_edge_hints (e
);
594 bool big_speedup
= big_speedup_p (e
);
598 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
599 hints suggests that inlining given function is very profitable. */
600 else if (DECL_DECLARED_INLINE_P (callee
->decl
)
601 && growth
>= MAX_INLINE_INSNS_SINGLE
603 && !(hints
& (INLINE_HINT_indirect_call
604 | INLINE_HINT_known_hot
605 | INLINE_HINT_loop_iterations
606 | INLINE_HINT_array_index
607 | INLINE_HINT_loop_stride
)))
608 || growth
>= MAX_INLINE_INSNS_SINGLE
* 16))
610 e
->inline_failed
= CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
;
613 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
614 && !flag_inline_functions
)
616 /* growth_likely_positive is expensive, always test it last. */
617 if (growth
>= MAX_INLINE_INSNS_SINGLE
618 || growth_likely_positive (callee
, growth
))
620 e
->inline_failed
= CIF_NOT_DECLARED_INLINED
;
624 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
625 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
626 inlining given function is very profitable. */
627 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
629 && !(hints
& INLINE_HINT_known_hot
)
630 && growth
>= ((hints
& (INLINE_HINT_indirect_call
631 | INLINE_HINT_loop_iterations
632 | INLINE_HINT_array_index
633 | INLINE_HINT_loop_stride
))
634 ? MAX (MAX_INLINE_INSNS_AUTO
,
635 MAX_INLINE_INSNS_SINGLE
)
636 : MAX_INLINE_INSNS_AUTO
))
638 /* growth_likely_positive is expensive, always test it last. */
639 if (growth
>= MAX_INLINE_INSNS_SINGLE
640 || growth_likely_positive (callee
, growth
))
642 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
646 /* If call is cold, do not inline when function body would grow. */
647 else if (!e
->maybe_hot_p ()
648 && (growth
>= MAX_INLINE_INSNS_SINGLE
649 || growth_likely_positive (callee
, growth
)))
651 e
->inline_failed
= CIF_UNLIKELY_CALL
;
655 if (!want_inline
&& report
)
656 report_inline_failed_reason (e
);
660 /* EDGE is self recursive edge.
661 We hand two cases - when function A is inlining into itself
662 or when function A is being inlined into another inliner copy of function
665 In first case OUTER_NODE points to the toplevel copy of A, while
666 in the second case OUTER_NODE points to the outermost copy of A in B.
668 In both cases we want to be extra selective since
669 inlining the call will just introduce new recursive calls to appear. */
672 want_inline_self_recursive_call_p (struct cgraph_edge
*edge
,
673 struct cgraph_node
*outer_node
,
677 char const *reason
= NULL
;
678 bool want_inline
= true;
679 int caller_freq
= CGRAPH_FREQ_BASE
;
680 int max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO
);
682 if (DECL_DECLARED_INLINE_P (edge
->caller
->decl
))
683 max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH
);
685 if (!edge
->maybe_hot_p ())
687 reason
= "recursive call is cold";
690 else if (max_count
&& !outer_node
->count
)
692 reason
= "not executed in profile";
695 else if (depth
> max_depth
)
697 reason
= "--param max-inline-recursive-depth exceeded.";
701 if (outer_node
->global
.inlined_to
)
702 caller_freq
= outer_node
->callers
->frequency
;
706 reason
= "function is inlined and unlikely";
712 /* Inlining of self recursive function into copy of itself within other function
713 is transformation similar to loop peeling.
715 Peeling is profitable if we can inline enough copies to make probability
716 of actual call to the self recursive function very small. Be sure that
717 the probability of recursion is small.
719 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
720 This way the expected number of recision is at most max_depth. */
723 int max_prob
= CGRAPH_FREQ_BASE
- ((CGRAPH_FREQ_BASE
+ max_depth
- 1)
726 for (i
= 1; i
< depth
; i
++)
727 max_prob
= max_prob
* max_prob
/ CGRAPH_FREQ_BASE
;
729 && (edge
->count
* CGRAPH_FREQ_BASE
/ outer_node
->count
732 reason
= "profile of recursive call is too large";
736 && (edge
->frequency
* CGRAPH_FREQ_BASE
/ caller_freq
739 reason
= "frequency of recursive call is too large";
743 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
744 depth is large. We reduce function call overhead and increase chances that
745 things fit in hardware return predictor.
747 Recursive inlining might however increase cost of stack frame setup
748 actually slowing down functions whose recursion tree is wide rather than
751 Deciding reliably on when to do recursive inlining without profile feedback
752 is tricky. For now we disable recursive inlining when probability of self
755 Recursive inlining of self recursive call within loop also results in large loop
756 depths that generally optimize badly. We may want to throttle down inlining
757 in those cases. In particular this seems to happen in one of libstdc++ rb tree
762 && (edge
->count
* 100 / outer_node
->count
763 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY
)))
765 reason
= "profile of recursive call is too small";
769 && (edge
->frequency
* 100 / caller_freq
770 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY
)))
772 reason
= "frequency of recursive call is too small";
776 if (!want_inline
&& dump_file
)
777 fprintf (dump_file
, " not inlining recursively: %s\n", reason
);
781 /* Return true when NODE has uninlinable caller;
782 set HAS_HOT_CALL if it has hot call.
783 Worker for cgraph_for_node_and_aliases. */
786 check_callers (struct cgraph_node
*node
, void *has_hot_call
)
788 struct cgraph_edge
*e
;
789 for (e
= node
->callers
; e
; e
= e
->next_caller
)
791 if (!can_inline_edge_p (e
, true))
793 if (!(*(bool *)has_hot_call
) && e
->maybe_hot_p ())
794 *(bool *)has_hot_call
= true;
799 /* If NODE has a caller, return true. */
802 has_caller_p (struct cgraph_node
*node
, void *data ATTRIBUTE_UNUSED
)
809 /* Decide if inlining NODE would reduce unit size by eliminating
810 the offline copy of function.
811 When COLD is true the cold calls are considered, too. */
814 want_inline_function_to_all_callers_p (struct cgraph_node
*node
, bool cold
)
816 struct cgraph_node
*function
= node
->ultimate_alias_target ();
817 bool has_hot_call
= false;
819 /* Does it have callers? */
820 if (!node
->call_for_symbol_thunks_and_aliases (has_caller_p
, NULL
, true))
822 /* Already inlined? */
823 if (function
->global
.inlined_to
)
825 if (node
->ultimate_alias_target () != node
)
827 /* Inlining into all callers would increase size? */
828 if (estimate_growth (node
) > 0)
830 /* All inlines must be possible. */
831 if (node
->call_for_symbol_thunks_and_aliases
832 (check_callers
, &has_hot_call
, true))
834 if (!cold
&& !has_hot_call
)
839 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
841 /* Return relative time improvement for inlining EDGE in range
842 1...RELATIVE_TIME_BENEFIT_RANGE */
845 relative_time_benefit (struct inline_summary
*callee_info
,
846 struct cgraph_edge
*edge
,
849 gcov_type relbenefit
;
850 gcov_type uninlined_call_time
= compute_uninlined_call_time (callee_info
, edge
);
851 gcov_type inlined_call_time
= compute_inlined_call_time (edge
, edge_time
);
853 /* Inlining into extern inline function is not a win. */
854 if (DECL_EXTERNAL (edge
->caller
->global
.inlined_to
855 ? edge
->caller
->global
.inlined_to
->decl
856 : edge
->caller
->decl
))
859 /* Watch overflows. */
860 gcc_checking_assert (uninlined_call_time
>= 0);
861 gcc_checking_assert (inlined_call_time
>= 0);
862 gcc_checking_assert (uninlined_call_time
>= inlined_call_time
);
864 /* Compute relative time benefit, i.e. how much the call becomes faster.
865 ??? perhaps computing how much the caller+calle together become faster
866 would lead to more realistic results. */
867 if (!uninlined_call_time
)
868 uninlined_call_time
= 1;
870 RDIV (((gcov_type
)uninlined_call_time
- inlined_call_time
) * RELATIVE_TIME_BENEFIT_RANGE
,
871 uninlined_call_time
);
872 relbenefit
= MIN (relbenefit
, RELATIVE_TIME_BENEFIT_RANGE
);
873 gcc_checking_assert (relbenefit
>= 0);
874 relbenefit
= MAX (relbenefit
, 1);
879 /* A cost model driving the inlining heuristics in a way so the edges with
880 smallest badness are inlined first. After each inlining is performed
881 the costs of all caller edges of nodes affected are recomputed so the
882 metrics may accurately depend on values such as number of inlinable callers
883 of the function or function body size. */
886 edge_badness (struct cgraph_edge
*edge
, bool dump
)
889 int growth
, edge_time
;
890 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
891 struct inline_summary
*callee_info
= inline_summary (callee
);
894 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
897 growth
= estimate_edge_growth (edge
);
898 edge_time
= estimate_edge_time (edge
);
899 hints
= estimate_edge_hints (edge
);
900 gcc_checking_assert (edge_time
>= 0);
901 gcc_checking_assert (edge_time
<= callee_info
->time
);
902 gcc_checking_assert (growth
<= callee_info
->size
);
906 fprintf (dump_file
, " Badness calculation for %s/%i -> %s/%i\n",
907 xstrdup (edge
->caller
->name ()),
909 xstrdup (callee
->name ()),
910 edge
->callee
->order
);
911 fprintf (dump_file
, " size growth %i, time %i ",
914 dump_inline_hints (dump_file
, hints
);
915 if (big_speedup_p (edge
))
916 fprintf (dump_file
, " big_speedup");
917 fprintf (dump_file
, "\n");
920 /* Always prefer inlining saving code size. */
923 badness
= INT_MIN
/ 2 + growth
;
925 fprintf (dump_file
, " %i: Growth %i <= 0\n", (int) badness
,
929 /* When profiling is available, compute badness as:
931 relative_edge_count * relative_time_benefit
932 goodness = -------------------------------------------
936 The fraction is upside down, because on edge counts and time beneits
937 the bounds are known. Edge growth is essentially unlimited. */
941 sreal tmp
, relbenefit_real
, growth_real
;
942 int relbenefit
= relative_time_benefit (callee_info
, edge
, edge_time
);
943 /* Capping edge->count to max_count. edge->count can be larger than
944 max_count if an inline adds new edges which increase max_count
945 after max_count is computed. */
946 gcov_type edge_count
= edge
->count
> max_count
? max_count
: edge
->count
;
948 sreal_init (&relbenefit_real
, relbenefit
, 0);
949 sreal_init (&growth_real
, growth
, 0);
951 /* relative_edge_count. */
952 sreal_init (&tmp
, edge_count
, 0);
953 sreal_div (&tmp
, &tmp
, &max_count_real
);
955 /* relative_time_benefit. */
956 sreal_mul (&tmp
, &tmp
, &relbenefit_real
);
957 sreal_div (&tmp
, &tmp
, &max_relbenefit_real
);
959 /* growth_f_caller. */
960 sreal_mul (&tmp
, &tmp
, &half_int_min_real
);
961 sreal_div (&tmp
, &tmp
, &growth_real
);
963 badness
= -1 * sreal_to_int (&tmp
);
968 " %i (relative %f): profile info. Relative count %f%s"
969 " * Relative benefit %f\n",
970 (int) badness
, (double) badness
/ INT_MIN
,
971 (double) edge_count
/ max_count
,
972 edge
->count
> max_count
? " (capped to max_count)" : "",
973 relbenefit
* 100.0 / RELATIVE_TIME_BENEFIT_RANGE
);
977 /* When function local profile is available. Compute badness as:
979 relative_time_benefit
980 goodness = ---------------------------------
981 growth_of_caller * overall_growth
985 compensated by the inline hints.
987 else if (flag_guess_branch_prob
)
989 badness
= (relative_time_benefit (callee_info
, edge
, edge_time
)
990 * (INT_MIN
/ 16 / RELATIVE_TIME_BENEFIT_RANGE
));
991 badness
/= (MIN (65536/2, growth
) * MIN (65536/2, MAX (1, callee_info
->growth
)));
992 gcc_checking_assert (badness
<=0 && badness
>= INT_MIN
/ 16);
993 if ((hints
& (INLINE_HINT_indirect_call
994 | INLINE_HINT_loop_iterations
995 | INLINE_HINT_array_index
996 | INLINE_HINT_loop_stride
))
997 || callee_info
->growth
<= 0)
999 if (hints
& (INLINE_HINT_same_scc
))
1001 else if (hints
& (INLINE_HINT_in_scc
))
1003 else if (hints
& (INLINE_HINT_cross_module
))
1005 gcc_checking_assert (badness
<= 0 && badness
>= INT_MIN
/ 2);
1006 if ((hints
& INLINE_HINT_declared_inline
) && badness
>= INT_MIN
/ 32)
1011 " %i: guessed profile. frequency %f,"
1012 " benefit %f%%, time w/o inlining %i, time w inlining %i"
1013 " overall growth %i (current) %i (original)\n",
1014 (int) badness
, (double)edge
->frequency
/ CGRAPH_FREQ_BASE
,
1015 relative_time_benefit (callee_info
, edge
, edge_time
) * 100.0
1016 / RELATIVE_TIME_BENEFIT_RANGE
,
1017 (int)compute_uninlined_call_time (callee_info
, edge
),
1018 (int)compute_inlined_call_time (edge
, edge_time
),
1019 estimate_growth (callee
),
1020 callee_info
->growth
);
1023 /* When function local profile is not available or it does not give
1024 useful information (ie frequency is zero), base the cost on
1025 loop nest and overall size growth, so we optimize for overall number
1026 of functions fully inlined in program. */
1029 int nest
= MIN (inline_edge_summary (edge
)->loop_depth
, 8);
1030 badness
= growth
* 256;
1032 /* Decrease badness if call is nested. */
1040 fprintf (dump_file
, " %i: no profile. nest %i\n", (int) badness
,
1044 /* Ensure that we did not overflow in all the fixed point math above. */
1045 gcc_assert (badness
>= INT_MIN
);
1046 gcc_assert (badness
<= INT_MAX
- 1);
1047 /* Make recursive inlining happen always after other inlining is done. */
1048 if (edge
->recursive_p ())
1054 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1056 update_edge_key (fibheap_t heap
, struct cgraph_edge
*edge
)
1058 int badness
= edge_badness (edge
, false);
1061 fibnode_t n
= (fibnode_t
) edge
->aux
;
1062 gcc_checking_assert (n
->data
== edge
);
1064 /* fibheap_replace_key only decrease the keys.
1065 When we increase the key we do not update heap
1066 and instead re-insert the element once it becomes
1067 a minimum of heap. */
1068 if (badness
< n
->key
)
1070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1073 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
1074 xstrdup (edge
->caller
->name ()),
1075 edge
->caller
->order
,
1076 xstrdup (edge
->callee
->name ()),
1077 edge
->callee
->order
,
1081 fibheap_replace_key (heap
, n
, badness
);
1082 gcc_checking_assert (n
->key
== badness
);
1087 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1090 " enqueuing call %s/%i -> %s/%i, badness %i\n",
1091 xstrdup (edge
->caller
->name ()),
1092 edge
->caller
->order
,
1093 xstrdup (edge
->callee
->name ()),
1094 edge
->callee
->order
,
1097 edge
->aux
= fibheap_insert (heap
, badness
, edge
);
1102 /* NODE was inlined.
1103 All caller edges needs to be resetted because
1104 size estimates change. Similarly callees needs reset
1105 because better context may be known. */
1108 reset_edge_caches (struct cgraph_node
*node
)
1110 struct cgraph_edge
*edge
;
1111 struct cgraph_edge
*e
= node
->callees
;
1112 struct cgraph_node
*where
= node
;
1113 struct ipa_ref
*ref
;
1115 if (where
->global
.inlined_to
)
1116 where
= where
->global
.inlined_to
;
1118 /* WHERE body size has changed, the cached growth is invalid. */
1119 reset_node_growth_cache (where
);
1121 for (edge
= where
->callers
; edge
; edge
= edge
->next_caller
)
1122 if (edge
->inline_failed
)
1123 reset_edge_growth_cache (edge
);
1125 FOR_EACH_ALIAS (where
, ref
)
1126 reset_edge_caches (dyn_cast
<cgraph_node
*> (ref
->referring
));
1132 if (!e
->inline_failed
&& e
->callee
->callees
)
1133 e
= e
->callee
->callees
;
1136 if (e
->inline_failed
)
1137 reset_edge_growth_cache (e
);
1144 if (e
->caller
== node
)
1146 e
= e
->caller
->callers
;
1148 while (!e
->next_callee
);
1154 /* Recompute HEAP nodes for each of caller of NODE.
1155 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1156 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1157 it is inlinable. Otherwise check all edges. */
1160 update_caller_keys (fibheap_t heap
, struct cgraph_node
*node
,
1161 bitmap updated_nodes
,
1162 struct cgraph_edge
*check_inlinablity_for
)
1164 struct cgraph_edge
*edge
;
1165 struct ipa_ref
*ref
;
1167 if ((!node
->alias
&& !inline_summary (node
)->inlinable
)
1168 || node
->global
.inlined_to
)
1170 if (!bitmap_set_bit (updated_nodes
, node
->uid
))
1173 FOR_EACH_ALIAS (node
, ref
)
1175 struct cgraph_node
*alias
= dyn_cast
<cgraph_node
*> (ref
->referring
);
1176 update_caller_keys (heap
, alias
, updated_nodes
, check_inlinablity_for
);
1179 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
1180 if (edge
->inline_failed
)
1182 if (!check_inlinablity_for
1183 || check_inlinablity_for
== edge
)
1185 if (can_inline_edge_p (edge
, false)
1186 && want_inline_small_function_p (edge
, false))
1187 update_edge_key (heap
, edge
);
1190 report_inline_failed_reason (edge
);
1191 fibheap_delete_node (heap
, (fibnode_t
) edge
->aux
);
1196 update_edge_key (heap
, edge
);
1200 /* Recompute HEAP nodes for each uninlined call in NODE.
1201 This is used when we know that edge badnesses are going only to increase
1202 (we introduced new call site) and thus all we need is to insert newly
1203 created edges into heap. */
1206 update_callee_keys (fibheap_t heap
, struct cgraph_node
*node
,
1207 bitmap updated_nodes
)
1209 struct cgraph_edge
*e
= node
->callees
;
1214 if (!e
->inline_failed
&& e
->callee
->callees
)
1215 e
= e
->callee
->callees
;
1218 enum availability avail
;
1219 struct cgraph_node
*callee
;
1220 /* We do not reset callee growth cache here. Since we added a new call,
1221 growth chould have just increased and consequentely badness metric
1222 don't need updating. */
1223 if (e
->inline_failed
1224 && (callee
= e
->callee
->ultimate_alias_target (&avail
))
1225 && inline_summary (callee
)->inlinable
1226 && avail
>= AVAIL_AVAILABLE
1227 && !bitmap_bit_p (updated_nodes
, callee
->uid
))
1229 if (can_inline_edge_p (e
, false)
1230 && want_inline_small_function_p (e
, false))
1231 update_edge_key (heap
, e
);
1234 report_inline_failed_reason (e
);
1235 fibheap_delete_node (heap
, (fibnode_t
) e
->aux
);
1245 if (e
->caller
== node
)
1247 e
= e
->caller
->callers
;
1249 while (!e
->next_callee
);
1255 /* Enqueue all recursive calls from NODE into priority queue depending on
1256 how likely we want to recursively inline the call. */
1259 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
1262 struct cgraph_edge
*e
;
1263 enum availability avail
;
1265 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1266 if (e
->callee
== node
1267 || (e
->callee
->ultimate_alias_target (&avail
) == node
1268 && avail
> AVAIL_INTERPOSABLE
))
1270 /* When profile feedback is available, prioritize by expected number
1272 fibheap_insert (heap
,
1273 !max_count
? -e
->frequency
1274 : -(e
->count
/ ((max_count
+ (1<<24) - 1) / (1<<24))),
1277 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1278 if (!e
->inline_failed
)
1279 lookup_recursive_calls (node
, e
->callee
, heap
);
1282 /* Decide on recursive inlining: in the case function has recursive calls,
1283 inline until body size reaches given argument. If any new indirect edges
1284 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1288 recursive_inlining (struct cgraph_edge
*edge
,
1289 vec
<cgraph_edge
*> *new_edges
)
1291 int limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO
);
1293 struct cgraph_node
*node
;
1294 struct cgraph_edge
*e
;
1295 struct cgraph_node
*master_clone
= NULL
, *next
;
1299 node
= edge
->caller
;
1300 if (node
->global
.inlined_to
)
1301 node
= node
->global
.inlined_to
;
1303 if (DECL_DECLARED_INLINE_P (node
->decl
))
1304 limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE
);
1306 /* Make sure that function is small enough to be considered for inlining. */
1307 if (estimate_size_after_inlining (node
, edge
) >= limit
)
1309 heap
= fibheap_new ();
1310 lookup_recursive_calls (node
, node
, heap
);
1311 if (fibheap_empty (heap
))
1313 fibheap_delete (heap
);
1319 " Performing recursive inlining on %s\n",
1322 /* Do the inlining and update list of recursive call during process. */
1323 while (!fibheap_empty (heap
))
1325 struct cgraph_edge
*curr
1326 = (struct cgraph_edge
*) fibheap_extract_min (heap
);
1327 struct cgraph_node
*cnode
, *dest
= curr
->callee
;
1329 if (!can_inline_edge_p (curr
, true))
1332 /* MASTER_CLONE is produced in the case we already started modified
1333 the function. Be sure to redirect edge to the original body before
1334 estimating growths otherwise we will be seeing growths after inlining
1335 the already modified body. */
1338 curr
->redirect_callee (master_clone
);
1339 reset_edge_growth_cache (curr
);
1342 if (estimate_size_after_inlining (node
, curr
) > limit
)
1344 curr
->redirect_callee (dest
);
1345 reset_edge_growth_cache (curr
);
1350 for (cnode
= curr
->caller
;
1351 cnode
->global
.inlined_to
; cnode
= cnode
->callers
->caller
)
1353 == curr
->callee
->ultimate_alias_target ()->decl
)
1356 if (!want_inline_self_recursive_call_p (curr
, node
, false, depth
))
1358 curr
->redirect_callee (dest
);
1359 reset_edge_growth_cache (curr
);
1366 " Inlining call of depth %i", depth
);
1369 fprintf (dump_file
, " called approx. %.2f times per call",
1370 (double)curr
->count
/ node
->count
);
1372 fprintf (dump_file
, "\n");
1376 /* We need original clone to copy around. */
1377 master_clone
= node
->create_clone (node
->decl
, node
->count
,
1378 CGRAPH_FREQ_BASE
, false, vNULL
,
1380 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
1381 if (!e
->inline_failed
)
1382 clone_inlined_nodes (e
, true, false, NULL
, CGRAPH_FREQ_BASE
);
1383 curr
->redirect_callee (master_clone
);
1384 reset_edge_growth_cache (curr
);
1387 inline_call (curr
, false, new_edges
, &overall_size
, true);
1388 lookup_recursive_calls (node
, curr
->callee
, heap
);
1392 if (!fibheap_empty (heap
) && dump_file
)
1393 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
1394 fibheap_delete (heap
);
1401 "\n Inlined %i times, "
1402 "body grown from size %i to %i, time %i to %i\n", n
,
1403 inline_summary (master_clone
)->size
, inline_summary (node
)->size
,
1404 inline_summary (master_clone
)->time
, inline_summary (node
)->time
);
1406 /* Remove master clone we used for inlining. We rely that clones inlined
1407 into master clone gets queued just before master clone so we don't
1409 for (node
= symtab
->first_function (); node
!= master_clone
;
1412 next
= symtab
->next_function (node
);
1413 if (node
->global
.inlined_to
== master_clone
)
1416 master_clone
->remove ();
1421 /* Given whole compilation unit estimate of INSNS, compute how large we can
1422 allow the unit to grow. */
1425 compute_max_insns (int insns
)
1427 int max_insns
= insns
;
1428 if (max_insns
< PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
))
1429 max_insns
= PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
);
1431 return ((int64_t) max_insns
1432 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH
)) / 100);
1436 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1439 add_new_edges_to_heap (fibheap_t heap
, vec
<cgraph_edge
*> new_edges
)
1441 while (new_edges
.length () > 0)
1443 struct cgraph_edge
*edge
= new_edges
.pop ();
1445 gcc_assert (!edge
->aux
);
1446 if (edge
->inline_failed
1447 && can_inline_edge_p (edge
, true)
1448 && want_inline_small_function_p (edge
, true))
1449 edge
->aux
= fibheap_insert (heap
, edge_badness (edge
, false), edge
);
1453 /* Remove EDGE from the fibheap. */
1456 heap_edge_removal_hook (struct cgraph_edge
*e
, void *data
)
1459 reset_node_growth_cache (e
->callee
);
1462 fibheap_delete_node ((fibheap_t
)data
, (fibnode_t
)e
->aux
);
1467 /* Return true if speculation of edge E seems useful.
1468 If ANTICIPATE_INLINING is true, be conservative and hope that E
1472 speculation_useful_p (struct cgraph_edge
*e
, bool anticipate_inlining
)
1474 enum availability avail
;
1475 struct cgraph_node
*target
= e
->callee
->ultimate_alias_target (&avail
);
1476 struct cgraph_edge
*direct
, *indirect
;
1477 struct ipa_ref
*ref
;
1479 gcc_assert (e
->speculative
&& !e
->indirect_unknown_callee
);
1481 if (!e
->maybe_hot_p ())
1484 /* See if IP optimizations found something potentially useful about the
1485 function. For now we look only for CONST/PURE flags. Almost everything
1486 else we propagate is useless. */
1487 if (avail
>= AVAIL_AVAILABLE
)
1489 int ecf_flags
= flags_from_decl_or_type (target
->decl
);
1490 if (ecf_flags
& ECF_CONST
)
1492 e
->speculative_call_info (direct
, indirect
, ref
);
1493 if (!(indirect
->indirect_info
->ecf_flags
& ECF_CONST
))
1496 else if (ecf_flags
& ECF_PURE
)
1498 e
->speculative_call_info (direct
, indirect
, ref
);
1499 if (!(indirect
->indirect_info
->ecf_flags
& ECF_PURE
))
1503 /* If we did not managed to inline the function nor redirect
1504 to an ipa-cp clone (that are seen by having local flag set),
1505 it is probably pointless to inline it unless hardware is missing
1506 indirect call predictor. */
1507 if (!anticipate_inlining
&& e
->inline_failed
&& !target
->local
.local
)
1509 /* For overwritable targets there is not much to do. */
1510 if (e
->inline_failed
&& !can_inline_edge_p (e
, false, true))
1512 /* OK, speculation seems interesting. */
1516 /* We know that EDGE is not going to be inlined.
1517 See if we can remove speculation. */
1520 resolve_noninline_speculation (fibheap_t edge_heap
, struct cgraph_edge
*edge
)
1522 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
1524 struct cgraph_node
*node
= edge
->caller
;
1525 struct cgraph_node
*where
= node
->global
.inlined_to
1526 ? node
->global
.inlined_to
: node
;
1527 bitmap updated_nodes
= BITMAP_ALLOC (NULL
);
1529 spec_rem
+= edge
->count
;
1530 edge
->resolve_speculation ();
1531 reset_edge_caches (where
);
1532 inline_update_overall_summary (where
);
1533 update_caller_keys (edge_heap
, where
,
1534 updated_nodes
, NULL
);
1535 update_callee_keys (edge_heap
, where
,
1537 BITMAP_FREE (updated_nodes
);
1541 /* We use greedy algorithm for inlining of small functions:
1542 All inline candidates are put into prioritized heap ordered in
1545 The inlining of small functions is bounded by unit growth parameters. */
1548 inline_small_functions (void)
1550 struct cgraph_node
*node
;
1551 struct cgraph_edge
*edge
;
1552 fibheap_t edge_heap
= fibheap_new ();
1553 bitmap updated_nodes
= BITMAP_ALLOC (NULL
);
1554 int min_size
, max_size
;
1555 auto_vec
<cgraph_edge
*> new_indirect_edges
;
1556 int initial_size
= 0;
1557 struct cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
1558 struct cgraph_edge_hook_list
*edge_removal_hook_holder
;
1559 if (flag_indirect_inlining
)
1560 new_indirect_edges
.create (8);
1562 edge_removal_hook_holder
1563 = symtab
->add_edge_removal_hook (&heap_edge_removal_hook
, edge_heap
);
1565 /* Compute overall unit size and other global parameters used by badness
1569 ipa_reduced_postorder (order
, true, true, NULL
);
1572 FOR_EACH_DEFINED_FUNCTION (node
)
1573 if (!node
->global
.inlined_to
)
1575 if (node
->has_gimple_body_p ()
1576 || node
->thunk
.thunk_p
)
1578 struct inline_summary
*info
= inline_summary (node
);
1579 struct ipa_dfs_info
*dfs
= (struct ipa_dfs_info
*) node
->aux
;
1581 /* Do not account external functions, they will be optimized out
1582 if not inlined. Also only count the non-cold portion of program. */
1583 if (!DECL_EXTERNAL (node
->decl
)
1584 && node
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
)
1585 initial_size
+= info
->size
;
1586 info
->growth
= estimate_growth (node
);
1587 if (dfs
&& dfs
->next_cycle
)
1589 struct cgraph_node
*n2
;
1590 int id
= dfs
->scc_no
+ 1;
1592 n2
= ((struct ipa_dfs_info
*) node
->aux
)->next_cycle
)
1594 struct inline_summary
*info2
= inline_summary (n2
);
1602 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
1603 if (max_count
< edge
->count
)
1604 max_count
= edge
->count
;
1606 sreal_init (&max_count_real
, max_count
, 0);
1607 sreal_init (&max_relbenefit_real
, RELATIVE_TIME_BENEFIT_RANGE
, 0);
1608 sreal_init (&half_int_min_real
, INT_MAX
/ 2, 0);
1609 ipa_free_postorder_info ();
1610 initialize_growth_caches ();
1614 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1617 overall_size
= initial_size
;
1618 max_size
= compute_max_insns (overall_size
);
1619 min_size
= overall_size
;
1621 /* Populate the heap with all edges we might inline. */
1623 FOR_EACH_DEFINED_FUNCTION (node
)
1625 bool update
= false;
1626 struct cgraph_edge
*next
;
1629 fprintf (dump_file
, "Enqueueing calls in %s/%i.\n",
1630 node
->name (), node
->order
);
1632 for (edge
= node
->callees
; edge
; edge
= next
)
1634 next
= edge
->next_callee
;
1635 if (edge
->inline_failed
1637 && can_inline_edge_p (edge
, true)
1638 && want_inline_small_function_p (edge
, true)
1639 && edge
->inline_failed
)
1641 gcc_assert (!edge
->aux
);
1642 update_edge_key (edge_heap
, edge
);
1644 if (edge
->speculative
&& !speculation_useful_p (edge
, edge
->aux
!= NULL
))
1646 edge
->resolve_speculation ();
1652 struct cgraph_node
*where
= node
->global
.inlined_to
1653 ? node
->global
.inlined_to
: node
;
1654 inline_update_overall_summary (where
);
1655 reset_node_growth_cache (where
);
1656 reset_edge_caches (where
);
1657 update_caller_keys (edge_heap
, where
,
1658 updated_nodes
, NULL
);
1659 bitmap_clear (updated_nodes
);
1663 gcc_assert (in_lto_p
1665 || (profile_info
&& flag_branch_probabilities
));
1667 while (!fibheap_empty (edge_heap
))
1669 int old_size
= overall_size
;
1670 struct cgraph_node
*where
, *callee
;
1671 int badness
= fibheap_min_key (edge_heap
);
1672 int current_badness
;
1676 edge
= (struct cgraph_edge
*) fibheap_extract_min (edge_heap
);
1677 gcc_assert (edge
->aux
);
1679 if (!edge
->inline_failed
|| !edge
->callee
->analyzed
)
1682 /* Be sure that caches are maintained consistent.
1683 We can not make this ENABLE_CHECKING only because it cause different
1684 updates of the fibheap queue. */
1685 cached_badness
= edge_badness (edge
, false);
1686 reset_edge_growth_cache (edge
);
1687 reset_node_growth_cache (edge
->callee
);
1689 /* When updating the edge costs, we only decrease badness in the keys.
1690 Increases of badness are handled lazilly; when we see key with out
1691 of date value on it, we re-insert it now. */
1692 current_badness
= edge_badness (edge
, false);
1693 gcc_assert (cached_badness
== current_badness
);
1694 gcc_assert (current_badness
>= badness
);
1695 if (current_badness
!= badness
)
1697 edge
->aux
= fibheap_insert (edge_heap
, current_badness
, edge
);
1701 if (!can_inline_edge_p (edge
, true))
1703 resolve_noninline_speculation (edge_heap
, edge
);
1707 callee
= edge
->callee
->ultimate_alias_target ();
1708 growth
= estimate_edge_growth (edge
);
1712 "\nConsidering %s/%i with %i size\n",
1713 callee
->name (), callee
->order
,
1714 inline_summary (callee
)->size
);
1716 " to be inlined into %s/%i in %s:%i\n"
1717 " Estimated badness is %i, frequency %.2f.\n",
1718 edge
->caller
->name (), edge
->caller
->order
,
1719 flag_wpa
? "unknown"
1720 : gimple_filename ((const_gimple
) edge
->call_stmt
),
1722 : gimple_lineno ((const_gimple
) edge
->call_stmt
),
1724 edge
->frequency
/ (double)CGRAPH_FREQ_BASE
);
1726 fprintf (dump_file
," Called %"PRId64
"x\n",
1728 if (dump_flags
& TDF_DETAILS
)
1729 edge_badness (edge
, true);
1732 if (overall_size
+ growth
> max_size
1733 && !DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
1735 edge
->inline_failed
= CIF_INLINE_UNIT_GROWTH_LIMIT
;
1736 report_inline_failed_reason (edge
);
1737 resolve_noninline_speculation (edge_heap
, edge
);
1741 if (!want_inline_small_function_p (edge
, true))
1743 resolve_noninline_speculation (edge_heap
, edge
);
1747 /* Heuristics for inlining small functions work poorly for
1748 recursive calls where we do effects similar to loop unrolling.
1749 When inlining such edge seems profitable, leave decision on
1750 specific inliner. */
1751 if (edge
->recursive_p ())
1753 where
= edge
->caller
;
1754 if (where
->global
.inlined_to
)
1755 where
= where
->global
.inlined_to
;
1756 if (!recursive_inlining (edge
,
1757 flag_indirect_inlining
1758 ? &new_indirect_edges
: NULL
))
1760 edge
->inline_failed
= CIF_RECURSIVE_INLINING
;
1761 resolve_noninline_speculation (edge_heap
, edge
);
1764 reset_edge_caches (where
);
1765 /* Recursive inliner inlines all recursive calls of the function
1766 at once. Consequently we need to update all callee keys. */
1767 if (flag_indirect_inlining
)
1768 add_new_edges_to_heap (edge_heap
, new_indirect_edges
);
1769 update_callee_keys (edge_heap
, where
, updated_nodes
);
1770 bitmap_clear (updated_nodes
);
1774 struct cgraph_node
*outer_node
= NULL
;
1777 /* Consider the case where self recursive function A is inlined
1778 into B. This is desired optimization in some cases, since it
1779 leads to effect similar of loop peeling and we might completely
1780 optimize out the recursive call. However we must be extra
1783 where
= edge
->caller
;
1784 while (where
->global
.inlined_to
)
1786 if (where
->decl
== callee
->decl
)
1787 outer_node
= where
, depth
++;
1788 where
= where
->callers
->caller
;
1791 && !want_inline_self_recursive_call_p (edge
, outer_node
,
1795 = (DECL_DISREGARD_INLINE_LIMITS (edge
->callee
->decl
)
1796 ? CIF_RECURSIVE_INLINING
: CIF_UNSPECIFIED
);
1797 resolve_noninline_speculation (edge_heap
, edge
);
1800 else if (depth
&& dump_file
)
1801 fprintf (dump_file
, " Peeling recursion with depth %i\n", depth
);
1803 gcc_checking_assert (!callee
->global
.inlined_to
);
1804 inline_call (edge
, true, &new_indirect_edges
, &overall_size
, true);
1805 if (flag_indirect_inlining
)
1806 add_new_edges_to_heap (edge_heap
, new_indirect_edges
);
1808 reset_edge_caches (edge
->callee
);
1809 reset_node_growth_cache (callee
);
1811 update_callee_keys (edge_heap
, where
, updated_nodes
);
1813 where
= edge
->caller
;
1814 if (where
->global
.inlined_to
)
1815 where
= where
->global
.inlined_to
;
1817 /* Our profitability metric can depend on local properties
1818 such as number of inlinable calls and size of the function body.
1819 After inlining these properties might change for the function we
1820 inlined into (since it's body size changed) and for the functions
1821 called by function we inlined (since number of it inlinable callers
1823 update_caller_keys (edge_heap
, where
, updated_nodes
, NULL
);
1824 bitmap_clear (updated_nodes
);
1829 " Inlined into %s which now has time %i and size %i,"
1830 "net change of %+i.\n",
1831 edge
->caller
->name (),
1832 inline_summary (edge
->caller
)->time
,
1833 inline_summary (edge
->caller
)->size
,
1834 overall_size
- old_size
);
1836 if (min_size
> overall_size
)
1838 min_size
= overall_size
;
1839 max_size
= compute_max_insns (min_size
);
1842 fprintf (dump_file
, "New minimal size reached: %i\n", min_size
);
1846 free_growth_caches ();
1847 fibheap_delete (edge_heap
);
1850 "Unit growth for small function inlining: %i->%i (%i%%)\n",
1851 initial_size
, overall_size
,
1852 initial_size
? overall_size
* 100 / (initial_size
) - 100: 0);
1853 BITMAP_FREE (updated_nodes
);
1854 symtab
->remove_edge_removal_hook (edge_removal_hook_holder
);
1857 /* Flatten NODE. Performed both during early inlining and
1858 at IPA inlining time. */
1861 flatten_function (struct cgraph_node
*node
, bool early
)
1863 struct cgraph_edge
*e
;
1865 /* We shouldn't be called recursively when we are being processed. */
1866 gcc_assert (node
->aux
== NULL
);
1868 node
->aux
= (void *) node
;
1870 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1872 struct cgraph_node
*orig_callee
;
1873 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
1875 /* We've hit cycle? It is time to give up. */
1880 "Not inlining %s into %s to avoid cycle.\n",
1881 xstrdup (callee
->name ()),
1882 xstrdup (e
->caller
->name ()));
1883 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
1887 /* When the edge is already inlined, we just need to recurse into
1888 it in order to fully flatten the leaves. */
1889 if (!e
->inline_failed
)
1891 flatten_function (callee
, early
);
1895 /* Flatten attribute needs to be processed during late inlining. For
1896 extra code quality we however do flattening during early optimization,
1899 ? !can_inline_edge_p (e
, true)
1900 : !can_early_inline_edge_p (e
))
1903 if (e
->recursive_p ())
1906 fprintf (dump_file
, "Not inlining: recursive call.\n");
1910 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1911 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)))
1914 fprintf (dump_file
, "Not inlining: SSA form does not match.\n");
1918 /* Inline the edge and flatten the inline clone. Avoid
1919 recursing through the original node if the node was cloned. */
1921 fprintf (dump_file
, " Inlining %s into %s.\n",
1922 xstrdup (callee
->name ()),
1923 xstrdup (e
->caller
->name ()));
1924 orig_callee
= callee
;
1925 inline_call (e
, true, NULL
, NULL
, false);
1926 if (e
->callee
!= orig_callee
)
1927 orig_callee
->aux
= (void *) node
;
1928 flatten_function (e
->callee
, early
);
1929 if (e
->callee
!= orig_callee
)
1930 orig_callee
->aux
= NULL
;
1934 if (!node
->global
.inlined_to
)
1935 inline_update_overall_summary (node
);
1938 /* Count number of callers of NODE and store it into DATA (that
1939 points to int. Worker for cgraph_for_node_and_aliases. */
1942 sum_callers (struct cgraph_node
*node
, void *data
)
1944 struct cgraph_edge
*e
;
1945 int *num_calls
= (int *)data
;
1947 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1952 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
1953 DATA points to number of calls originally found so we avoid infinite
1957 inline_to_all_callers (struct cgraph_node
*node
, void *data
)
1959 int *num_calls
= (int *)data
;
1960 bool callee_removed
= false;
1962 while (node
->callers
&& !node
->global
.inlined_to
)
1964 struct cgraph_node
*caller
= node
->callers
->caller
;
1969 "\nInlining %s size %i.\n",
1971 inline_summary (node
)->size
);
1973 " Called once from %s %i insns.\n",
1974 node
->callers
->caller
->name (),
1975 inline_summary (node
->callers
->caller
)->size
);
1978 inline_call (node
->callers
, true, NULL
, NULL
, true, &callee_removed
);
1981 " Inlined into %s which now has %i size\n",
1983 inline_summary (caller
)->size
);
1984 if (!(*num_calls
)--)
1987 fprintf (dump_file
, "New calls found; giving up.\n");
1988 return callee_removed
;
1996 /* Output overall time estimate. */
1998 dump_overall_stats (void)
2000 int64_t sum_weighted
= 0, sum
= 0;
2001 struct cgraph_node
*node
;
2003 FOR_EACH_DEFINED_FUNCTION (node
)
2004 if (!node
->global
.inlined_to
2007 int time
= inline_summary (node
)->time
;
2009 sum_weighted
+= time
* node
->count
;
2011 fprintf (dump_file
, "Overall time estimate: "
2012 "%"PRId64
" weighted by profile: "
2013 "%"PRId64
"\n", sum
, sum_weighted
);
2016 /* Output some useful stats about inlining. */
2019 dump_inline_stats (void)
2021 int64_t inlined_cnt
= 0, inlined_indir_cnt
= 0;
2022 int64_t inlined_virt_cnt
= 0, inlined_virt_indir_cnt
= 0;
2023 int64_t noninlined_cnt
= 0, noninlined_indir_cnt
= 0;
2024 int64_t noninlined_virt_cnt
= 0, noninlined_virt_indir_cnt
= 0;
2025 int64_t inlined_speculative
= 0, inlined_speculative_ply
= 0;
2026 int64_t indirect_poly_cnt
= 0, indirect_cnt
= 0;
2027 int64_t reason
[CIF_N_REASONS
][3];
2029 struct cgraph_node
*node
;
2031 memset (reason
, 0, sizeof (reason
));
2032 FOR_EACH_DEFINED_FUNCTION (node
)
2034 struct cgraph_edge
*e
;
2035 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2037 if (e
->inline_failed
)
2039 reason
[(int) e
->inline_failed
][0] += e
->count
;
2040 reason
[(int) e
->inline_failed
][1] += e
->frequency
;
2041 reason
[(int) e
->inline_failed
][2] ++;
2042 if (DECL_VIRTUAL_P (e
->callee
->decl
))
2044 if (e
->indirect_inlining_edge
)
2045 noninlined_virt_indir_cnt
+= e
->count
;
2047 noninlined_virt_cnt
+= e
->count
;
2051 if (e
->indirect_inlining_edge
)
2052 noninlined_indir_cnt
+= e
->count
;
2054 noninlined_cnt
+= e
->count
;
2061 if (DECL_VIRTUAL_P (e
->callee
->decl
))
2062 inlined_speculative_ply
+= e
->count
;
2064 inlined_speculative
+= e
->count
;
2066 else if (DECL_VIRTUAL_P (e
->callee
->decl
))
2068 if (e
->indirect_inlining_edge
)
2069 inlined_virt_indir_cnt
+= e
->count
;
2071 inlined_virt_cnt
+= e
->count
;
2075 if (e
->indirect_inlining_edge
)
2076 inlined_indir_cnt
+= e
->count
;
2078 inlined_cnt
+= e
->count
;
2082 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2083 if (e
->indirect_info
->polymorphic
)
2084 indirect_poly_cnt
+= e
->count
;
2086 indirect_cnt
+= e
->count
;
2091 "Inlined %"PRId64
" + speculative "
2092 "%"PRId64
" + speculative polymorphic "
2093 "%"PRId64
" + previously indirect "
2094 "%"PRId64
" + virtual "
2095 "%"PRId64
" + virtual and previously indirect "
2096 "%"PRId64
"\n" "Not inlined "
2097 "%"PRId64
" + previously indirect "
2098 "%"PRId64
" + virtual "
2099 "%"PRId64
" + virtual and previously indirect "
2100 "%"PRId64
" + stil indirect "
2101 "%"PRId64
" + still indirect polymorphic "
2102 "%"PRId64
"\n", inlined_cnt
,
2103 inlined_speculative
, inlined_speculative_ply
,
2104 inlined_indir_cnt
, inlined_virt_cnt
, inlined_virt_indir_cnt
,
2105 noninlined_cnt
, noninlined_indir_cnt
, noninlined_virt_cnt
,
2106 noninlined_virt_indir_cnt
, indirect_cnt
, indirect_poly_cnt
);
2108 "Removed speculations %"PRId64
"\n",
2111 dump_overall_stats ();
2112 fprintf (dump_file
, "\nWhy inlining failed?\n");
2113 for (i
= 0; i
< CIF_N_REASONS
; i
++)
2115 fprintf (dump_file
, "%-50s: %8i calls, %8i freq, %"PRId64
" count\n",
2116 cgraph_inline_failed_string ((cgraph_inline_failed_t
) i
),
2117 (int) reason
[i
][2], (int) reason
[i
][1], reason
[i
][0]);
2120 /* Decide on the inlining. We do so in the topological order to avoid
2121 expenses on updating data structures. */
2126 struct cgraph_node
*node
;
2128 struct cgraph_node
**order
;
2131 bool remove_functions
= false;
2136 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
2138 if (in_lto_p
&& optimize
)
2139 ipa_update_after_lto_read ();
2142 dump_inline_summaries (dump_file
);
2144 nnodes
= ipa_reverse_postorder (order
);
2146 FOR_EACH_FUNCTION (node
)
2150 fprintf (dump_file
, "\nFlattening functions:\n");
2152 /* In the first pass handle functions to be flattened. Do this with
2153 a priority so none of our later choices will make this impossible. */
2154 for (i
= nnodes
- 1; i
>= 0; i
--)
2158 /* Handle nodes to be flattened.
2159 Ideally when processing callees we stop inlining at the
2160 entry of cycles, possibly cloning that entry point and
2161 try to flatten itself turning it into a self-recursive
2163 if (lookup_attribute ("flatten",
2164 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
2168 "Flattening %s\n", node
->name ());
2169 flatten_function (node
, false);
2173 dump_overall_stats ();
2175 inline_small_functions ();
2177 /* Do first after-inlining removal. We want to remove all "stale" extern inline
2178 functions and virtual functions so we really know what is called once. */
2179 symtab
->remove_unreachable_nodes (false, dump_file
);
2182 /* Inline functions with a property that after inlining into all callers the
2183 code size will shrink because the out-of-line copy is eliminated.
2184 We do this regardless on the callee size as long as function growth limits
2188 "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2190 /* Inlining one function called once has good chance of preventing
2191 inlining other function into the same callee. Ideally we should
2192 work in priority order, but probably inlining hot functions first
2193 is good cut without the extra pain of maintaining the queue.
2195 ??? this is not really fitting the bill perfectly: inlining function
2196 into callee often leads to better optimization of callee due to
2197 increased context for optimization.
2198 For example if main() function calls a function that outputs help
2199 and then function that does the main optmization, we should inline
2200 the second with priority even if both calls are cold by themselves.
2202 We probably want to implement new predicate replacing our use of
2203 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2205 for (cold
= 0; cold
<= 1; cold
++)
2207 FOR_EACH_DEFINED_FUNCTION (node
)
2209 struct cgraph_edge
*edge
, *next
;
2212 for (edge
= node
->callees
; edge
; edge
= next
)
2214 next
= edge
->next_callee
;
2215 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
2217 edge
->resolve_speculation ();
2218 spec_rem
+= edge
->count
;
2220 remove_functions
= true;
2225 struct cgraph_node
*where
= node
->global
.inlined_to
2226 ? node
->global
.inlined_to
: node
;
2227 reset_node_growth_cache (where
);
2228 reset_edge_caches (where
);
2229 inline_update_overall_summary (where
);
2231 if (flag_inline_functions_called_once
2232 && want_inline_function_to_all_callers_p (node
, cold
))
2235 node
->call_for_symbol_thunks_and_aliases (sum_callers
, &num_calls
,
2237 while (node
->call_for_symbol_thunks_and_aliases (inline_to_all_callers
,
2240 remove_functions
= true;
2245 /* Free ipa-prop structures if they are no longer needed. */
2247 ipa_free_all_structures_after_iinln ();
2252 "\nInlined %i calls, eliminated %i functions\n\n",
2253 ncalls_inlined
, nfunctions_inlined
);
2254 dump_inline_stats ();
2258 dump_inline_summaries (dump_file
);
2259 /* In WPA we use inline summaries for partitioning process. */
2261 inline_free_summary ();
2262 return remove_functions
? TODO_remove_functions
: 0;
2265 /* Inline always-inline function calls in NODE. */
2268 inline_always_inline_functions (struct cgraph_node
*node
)
2270 struct cgraph_edge
*e
;
2271 bool inlined
= false;
2273 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2275 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2276 if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
2279 if (e
->recursive_p ())
2282 fprintf (dump_file
, " Not inlining recursive call to %s.\n",
2283 e
->callee
->name ());
2284 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
2288 if (!can_early_inline_edge_p (e
))
2290 /* Set inlined to true if the callee is marked "always_inline" but
2291 is not inlinable. This will allow flagging an error later in
2292 expand_call_inline in tree-inline.c. */
2293 if (lookup_attribute ("always_inline",
2294 DECL_ATTRIBUTES (callee
->decl
)) != NULL
)
2300 fprintf (dump_file
, " Inlining %s into %s (always_inline).\n",
2301 xstrdup (e
->callee
->name ()),
2302 xstrdup (e
->caller
->name ()));
2303 inline_call (e
, true, NULL
, NULL
, false);
2307 inline_update_overall_summary (node
);
2312 /* Decide on the inlining. We do so in the topological order to avoid
2313 expenses on updating data structures. */
2316 early_inline_small_functions (struct cgraph_node
*node
)
2318 struct cgraph_edge
*e
;
2319 bool inlined
= false;
2321 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2323 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2324 if (!inline_summary (callee
)->inlinable
2325 || !e
->inline_failed
)
2328 /* Do not consider functions not declared inline. */
2329 if (!DECL_DECLARED_INLINE_P (callee
->decl
)
2330 && !flag_inline_small_functions
2331 && !flag_inline_functions
)
2335 fprintf (dump_file
, "Considering inline candidate %s.\n",
2338 if (!can_early_inline_edge_p (e
))
2341 if (e
->recursive_p ())
2344 fprintf (dump_file
, " Not inlining: recursive call.\n");
2348 if (!want_early_inline_function_p (e
))
2352 fprintf (dump_file
, " Inlining %s into %s.\n",
2353 xstrdup (callee
->name ()),
2354 xstrdup (e
->caller
->name ()));
2355 inline_call (e
, true, NULL
, NULL
, true);
2362 /* Do inlining of small functions. Doing so early helps profiling and other
2363 passes to be somewhat more effective and avoids some code duplication in
2364 later real inlining pass for testcases with very many function calls. */
2368 const pass_data pass_data_early_inline
=
2370 GIMPLE_PASS
, /* type */
2371 "einline", /* name */
2372 OPTGROUP_INLINE
, /* optinfo_flags */
2373 TV_EARLY_INLINING
, /* tv_id */
2374 PROP_ssa
, /* properties_required */
2375 0, /* properties_provided */
2376 0, /* properties_destroyed */
2377 0, /* todo_flags_start */
2378 0, /* todo_flags_finish */
2381 class pass_early_inline
: public gimple_opt_pass
2384 pass_early_inline (gcc::context
*ctxt
)
2385 : gimple_opt_pass (pass_data_early_inline
, ctxt
)
2388 /* opt_pass methods: */
2389 virtual unsigned int execute (function
*);
2391 }; // class pass_early_inline
2394 pass_early_inline::execute (function
*fun
)
2396 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
2397 struct cgraph_edge
*edge
;
2398 unsigned int todo
= 0;
2400 bool inlined
= false;
2405 /* Do nothing if datastructures for ipa-inliner are already computed. This
2406 happens when some pass decides to construct new function and
2407 cgraph_add_new_function calls lowering passes and early optimization on
2408 it. This may confuse ourself when early inliner decide to inline call to
2409 function clone, because function clones don't have parameter list in
2410 ipa-prop matching their signature. */
2411 if (ipa_node_params_vector
.exists ())
2414 #ifdef ENABLE_CHECKING
2417 node
->remove_all_references ();
2419 /* Even when not optimizing or not inlining inline always-inline
2421 inlined
= inline_always_inline_functions (node
);
2425 || !flag_early_inlining
2426 /* Never inline regular functions into always-inline functions
2427 during incremental inlining. This sucks as functions calling
2428 always inline functions will get less optimized, but at the
2429 same time inlining of functions calling always inline
2430 function into an always inline function might introduce
2431 cycles of edges to be always inlined in the callgraph.
2433 We might want to be smarter and just avoid this type of inlining. */
2434 || DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
2436 else if (lookup_attribute ("flatten",
2437 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
2439 /* When the function is marked to be flattened, recursively inline
2443 "Flattening %s\n", node
->name ());
2444 flatten_function (node
, true);
2449 /* We iterate incremental inlining to get trivial cases of indirect
2451 while (iterations
< PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS
)
2452 && early_inline_small_functions (node
))
2454 timevar_push (TV_INTEGRATION
);
2455 todo
|= optimize_inline_calls (current_function_decl
);
2457 /* Technically we ought to recompute inline parameters so the new
2458 iteration of early inliner works as expected. We however have
2459 values approximately right and thus we only need to update edge
2460 info that might be cleared out for newly discovered edges. */
2461 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
2463 struct inline_edge_summary
*es
= inline_edge_summary (edge
);
2465 = estimate_num_insns (edge
->call_stmt
, &eni_size_weights
);
2467 = estimate_num_insns (edge
->call_stmt
, &eni_time_weights
);
2468 if (edge
->callee
->decl
2469 && !gimple_check_call_matching_types (
2470 edge
->call_stmt
, edge
->callee
->decl
, false))
2471 edge
->call_stmt_cannot_inline_p
= true;
2473 if (iterations
< PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS
) - 1)
2474 inline_update_overall_summary (node
);
2475 timevar_pop (TV_INTEGRATION
);
2480 fprintf (dump_file
, "Iterations: %i\n", iterations
);
2485 timevar_push (TV_INTEGRATION
);
2486 todo
|= optimize_inline_calls (current_function_decl
);
2487 timevar_pop (TV_INTEGRATION
);
2490 fun
->always_inline_functions_inlined
= true;
2498 make_pass_early_inline (gcc::context
*ctxt
)
2500 return new pass_early_inline (ctxt
);
2505 const pass_data pass_data_ipa_inline
=
2507 IPA_PASS
, /* type */
2508 "inline", /* name */
2509 OPTGROUP_INLINE
, /* optinfo_flags */
2510 TV_IPA_INLINING
, /* tv_id */
2511 0, /* properties_required */
2512 0, /* properties_provided */
2513 0, /* properties_destroyed */
2514 0, /* todo_flags_start */
2515 ( TODO_dump_symtab
), /* todo_flags_finish */
2518 class pass_ipa_inline
: public ipa_opt_pass_d
2521 pass_ipa_inline (gcc::context
*ctxt
)
2522 : ipa_opt_pass_d (pass_data_ipa_inline
, ctxt
,
2523 inline_generate_summary
, /* generate_summary */
2524 inline_write_summary
, /* write_summary */
2525 inline_read_summary
, /* read_summary */
2526 NULL
, /* write_optimization_summary */
2527 NULL
, /* read_optimization_summary */
2528 NULL
, /* stmt_fixup */
2529 0, /* function_transform_todo_flags_start */
2530 inline_transform
, /* function_transform */
2531 NULL
) /* variable_transform */
2534 /* opt_pass methods: */
2535 virtual unsigned int execute (function
*) { return ipa_inline (); }
2537 }; // class pass_ipa_inline
2542 make_pass_ipa_inline (gcc::context
*ctxt
)
2544 return new pass_ipa_inline (ctxt
);