1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
38 The inliner itself is split into two passes:
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
94 #include "coretypes.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
107 #include "tree-inline.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
122 #include "ipa-strub.h"
124 /* Inliner uses greedy algorithm to inline calls in a priority order.
125 Badness is used as the key in a Fibonacci heap which roughly corresponds
126 to negation of benefit to cost ratios.
127 In case multiple calls has same priority we want to stabilize the outcomes
128 for which we use ids. */
135 : badness (sreal::min ()), uid (0)
138 inline_badness (cgraph_edge
*e
, sreal b
)
139 : badness (b
), uid (e
->get_uid ())
142 bool operator<= (const inline_badness
&other
)
144 if (badness
!= other
.badness
)
145 return badness
<= other
.badness
;
146 return uid
<= other
.uid
;
148 bool operator== (const inline_badness
&other
)
150 return badness
== other
.badness
&& uid
== other
.uid
;
152 bool operator!= (const inline_badness
&other
)
154 return badness
!= other
.badness
|| uid
!= other
.uid
;
156 bool operator< (const inline_badness
&other
)
158 if (badness
!= other
.badness
)
159 return badness
< other
.badness
;
160 return uid
< other
.uid
;
162 bool operator> (const inline_badness
&other
)
164 if (badness
!= other
.badness
)
165 return badness
> other
.badness
;
166 return uid
> other
.uid
;
170 typedef fibonacci_heap
<inline_badness
, cgraph_edge
> edge_heap_t
;
171 typedef fibonacci_node
<inline_badness
, cgraph_edge
> edge_heap_node_t
;
173 /* Statistics we collect about inlining algorithm. */
174 static int overall_size
;
175 static profile_count max_count
;
176 static profile_count spec_rem
;
178 /* Return false when inlining edge E would lead to violating
179 limits on function unit growth or stack usage growth.
181 The relative function body growth limit is present generally
182 to avoid problems with non-linear behavior of the compiler.
183 To allow inlining huge functions into tiny wrapper, the limit
184 is always based on the bigger of the two functions considered.
186 For stack growth limits we always base the growth in stack usage
187 of the callers. We want to prevent applications from segfaulting
188 on stack overflow when functions with huge stack frames gets
192 caller_growth_limits (struct cgraph_edge
*e
)
194 struct cgraph_node
*to
= e
->caller
;
195 struct cgraph_node
*what
= e
->callee
->ultimate_alias_target ();
198 HOST_WIDE_INT stack_size_limit
= 0, inlined_stack
;
199 ipa_size_summary
*outer_info
= ipa_size_summaries
->get (to
);
201 /* Look for function e->caller is inlined to. While doing
202 so work out the largest function body on the way. As
203 described above, we want to base our function growth
204 limits based on that. Not on the self size of the
205 outer function, not on the self size of inline code
206 we immediately inline to. This is the most relaxed
207 interpretation of the rule "do not grow large functions
208 too much in order to prevent compiler from exploding". */
211 ipa_size_summary
*size_info
= ipa_size_summaries
->get (to
);
212 if (limit
< size_info
->self_size
)
213 limit
= size_info
->self_size
;
214 if (stack_size_limit
< size_info
->estimated_self_stack_size
)
215 stack_size_limit
= size_info
->estimated_self_stack_size
;
217 to
= to
->callers
->caller
;
222 ipa_fn_summary
*what_info
= ipa_fn_summaries
->get (what
);
223 ipa_size_summary
*what_size_info
= ipa_size_summaries
->get (what
);
225 if (limit
< what_size_info
->self_size
)
226 limit
= what_size_info
->self_size
;
228 limit
+= limit
* opt_for_fn (to
->decl
, param_large_function_growth
) / 100;
230 /* Check the size after inlining against the function limits. But allow
231 the function to shrink if it went over the limits by forced inlining. */
232 newsize
= estimate_size_after_inlining (to
, e
);
233 if (newsize
>= ipa_size_summaries
->get (what
)->size
234 && newsize
> opt_for_fn (to
->decl
, param_large_function_insns
)
237 e
->inline_failed
= CIF_LARGE_FUNCTION_GROWTH_LIMIT
;
241 if (!what_info
->estimated_stack_size
)
244 /* FIXME: Stack size limit often prevents inlining in Fortran programs
245 due to large i/o datastructures used by the Fortran front-end.
246 We ought to ignore this limit when we know that the edge is executed
247 on every invocation of the caller (i.e. its call statement dominates
248 exit block). We do not track this information, yet. */
249 stack_size_limit
+= ((gcov_type
)stack_size_limit
250 * opt_for_fn (to
->decl
, param_stack_frame_growth
)
253 inlined_stack
= (ipa_get_stack_frame_offset (to
)
254 + outer_info
->estimated_self_stack_size
255 + what_info
->estimated_stack_size
);
256 /* Check new stack consumption with stack consumption at the place
258 if (inlined_stack
> stack_size_limit
259 /* If function already has large stack usage from sibling
260 inline call, we can inline, too.
261 This bit overoptimistically assume that we are good at stack
263 && inlined_stack
> ipa_fn_summaries
->get (to
)->estimated_stack_size
264 && inlined_stack
> opt_for_fn (to
->decl
, param_large_stack_frame
))
266 e
->inline_failed
= CIF_LARGE_STACK_FRAME_GROWTH_LIMIT
;
272 /* Dump info about why inlining has failed. */
275 report_inline_failed_reason (struct cgraph_edge
*e
)
277 if (dump_enabled_p ())
279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
280 " not inlinable: %C -> %C, %s\n",
281 e
->caller
, e
->callee
,
282 cgraph_inline_failed_string (e
->inline_failed
));
283 if ((e
->inline_failed
== CIF_TARGET_OPTION_MISMATCH
284 || e
->inline_failed
== CIF_OPTIMIZATION_MISMATCH
)
285 && e
->caller
->lto_file_data
286 && e
->callee
->ultimate_alias_target ()->lto_file_data
)
288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
289 " LTO objects: %s, %s\n",
290 e
->caller
->lto_file_data
->file_name
,
291 e
->callee
->ultimate_alias_target ()->lto_file_data
->file_name
);
293 if (e
->inline_failed
== CIF_TARGET_OPTION_MISMATCH
)
295 cl_target_option_print_diff
296 (dump_file
, 2, target_opts_for_fn (e
->caller
->decl
),
297 target_opts_for_fn (e
->callee
->ultimate_alias_target ()->decl
));
298 if (e
->inline_failed
== CIF_OPTIMIZATION_MISMATCH
)
300 cl_optimization_print_diff
301 (dump_file
, 2, opts_for_fn (e
->caller
->decl
),
302 opts_for_fn (e
->callee
->ultimate_alias_target ()->decl
));
306 /* Decide whether sanitizer-related attributes allow inlining. */
309 sanitize_attrs_match_for_inline_p (const_tree caller
, const_tree callee
)
311 if (!caller
|| !callee
)
314 /* Follow clang and allow inlining for always_inline functions. */
315 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee
)))
318 const sanitize_code codes
[] =
323 SANITIZE_UNDEFINED_NONDEFAULT
,
324 SANITIZE_POINTER_COMPARE
,
325 SANITIZE_POINTER_SUBTRACT
328 for (unsigned i
= 0; i
< ARRAY_SIZE (codes
); i
++)
329 if (sanitize_flags_p (codes
[i
], caller
)
330 != sanitize_flags_p (codes
[i
], callee
))
333 if (sanitize_coverage_p (caller
) != sanitize_coverage_p (callee
))
339 /* Used for flags where it is safe to inline when caller's value is
340 grater than callee's. */
341 #define check_maybe_up(flag) \
342 (opts_for_fn (caller->decl)->x_##flag \
343 != opts_for_fn (callee->decl)->x_##flag \
345 || opts_for_fn (caller->decl)->x_##flag \
346 < opts_for_fn (callee->decl)->x_##flag))
347 /* Used for flags where it is safe to inline when caller's value is
348 smaller than callee's. */
349 #define check_maybe_down(flag) \
350 (opts_for_fn (caller->decl)->x_##flag \
351 != opts_for_fn (callee->decl)->x_##flag \
353 || opts_for_fn (caller->decl)->x_##flag \
354 > opts_for_fn (callee->decl)->x_##flag))
355 /* Used for flags where exact match is needed for correctness. */
356 #define check_match(flag) \
357 (opts_for_fn (caller->decl)->x_##flag \
358 != opts_for_fn (callee->decl)->x_##flag)
360 /* Decide if we can inline the edge and possibly update
361 inline_failed reason.
362 We check whether inlining is possible at all and whether
363 caller growth limits allow doing so.
365 if REPORT is true, output reason to the dump file. */
368 can_inline_edge_p (struct cgraph_edge
*e
, bool report
,
371 gcc_checking_assert (e
->inline_failed
);
373 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
376 report_inline_failed_reason (e
);
380 bool inlinable
= true;
381 enum availability avail
;
382 cgraph_node
*caller
= (e
->caller
->inlined_to
383 ? e
->caller
->inlined_to
: e
->caller
);
384 cgraph_node
*callee
= e
->callee
->ultimate_alias_target (&avail
, caller
);
386 if (!callee
->definition
)
388 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
391 if (!early
&& (!opt_for_fn (callee
->decl
, optimize
)
392 || !opt_for_fn (caller
->decl
, optimize
)))
394 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
397 else if (callee
->calls_comdat_local
)
399 e
->inline_failed
= CIF_USES_COMDAT_LOCAL
;
402 else if (avail
<= AVAIL_INTERPOSABLE
)
404 e
->inline_failed
= CIF_OVERWRITABLE
;
407 /* All edges with call_stmt_cannot_inline_p should have inline_failed
408 initialized to one of FINAL_ERROR reasons. */
409 else if (e
->call_stmt_cannot_inline_p
)
411 /* Don't inline if the functions have different EH personalities. */
412 else if (DECL_FUNCTION_PERSONALITY (caller
->decl
)
413 && DECL_FUNCTION_PERSONALITY (callee
->decl
)
414 && (DECL_FUNCTION_PERSONALITY (caller
->decl
)
415 != DECL_FUNCTION_PERSONALITY (callee
->decl
)))
417 e
->inline_failed
= CIF_EH_PERSONALITY
;
420 /* TM pure functions should not be inlined into non-TM_pure
422 else if (is_tm_pure (callee
->decl
) && !is_tm_pure (caller
->decl
))
424 e
->inline_failed
= CIF_UNSPECIFIED
;
427 /* Check compatibility of target optimization options. */
428 else if (!targetm
.target_option
.can_inline_p (caller
->decl
,
431 e
->inline_failed
= CIF_TARGET_OPTION_MISMATCH
;
434 else if (ipa_fn_summaries
->get (callee
) == NULL
435 || !ipa_fn_summaries
->get (callee
)->inlinable
)
437 e
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
440 /* Don't inline a function with mismatched sanitization attributes. */
441 else if (!sanitize_attrs_match_for_inline_p (caller
->decl
, callee
->decl
))
443 e
->inline_failed
= CIF_SANITIZE_ATTRIBUTE_MISMATCH
;
447 if (inlinable
&& !strub_inlinable_to_p (callee
, caller
))
449 e
->inline_failed
= CIF_UNSPECIFIED
;
452 if (!inlinable
&& report
)
453 report_inline_failed_reason (e
);
457 /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
458 scale up the bound. */
461 inline_insns_single (cgraph_node
*n
, bool hint
, bool hint2
)
465 int64_t spd
= opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
);
469 return opt_for_fn (n
->decl
, param_max_inline_insns_single
) * spd
/ 100;
472 return opt_for_fn (n
->decl
, param_max_inline_insns_single
)
473 * opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
) / 100;
474 return opt_for_fn (n
->decl
, param_max_inline_insns_single
);
477 /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
478 scale up the bound. */
481 inline_insns_auto (cgraph_node
*n
, bool hint
, bool hint2
)
483 int max_inline_insns_auto
= opt_for_fn (n
->decl
, param_max_inline_insns_auto
);
486 int64_t spd
= opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
);
490 return max_inline_insns_auto
* spd
/ 100;
493 return max_inline_insns_auto
494 * opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
) / 100;
495 return max_inline_insns_auto
;
498 /* Decide if we can inline the edge and possibly update
499 inline_failed reason.
500 We check whether inlining is possible at all and whether
501 caller growth limits allow doing so.
503 if REPORT is true, output reason to the dump file.
505 if DISREGARD_LIMITS is true, ignore size limits. */
508 can_inline_edge_by_limits_p (struct cgraph_edge
*e
, bool report
,
509 bool disregard_limits
= false, bool early
= false)
511 gcc_checking_assert (e
->inline_failed
);
513 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
516 report_inline_failed_reason (e
);
520 bool inlinable
= true;
521 enum availability avail
;
522 cgraph_node
*caller
= (e
->caller
->inlined_to
523 ? e
->caller
->inlined_to
: e
->caller
);
524 cgraph_node
*callee
= e
->callee
->ultimate_alias_target (&avail
, caller
);
525 tree caller_tree
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller
->decl
);
527 = callee
? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee
->decl
) : NULL
;
528 /* Check if caller growth allows the inlining. */
529 if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
531 && !lookup_attribute ("flatten",
532 DECL_ATTRIBUTES (caller
->decl
))
533 && !caller_growth_limits (e
))
535 else if (callee
->externally_visible
536 && !DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
537 && flag_live_patching
== LIVE_PATCHING_INLINE_ONLY_STATIC
)
539 e
->inline_failed
= CIF_EXTERN_LIVE_ONLY_STATIC
;
542 /* Don't inline a function with a higher optimization level than the
543 caller. FIXME: this is really just tip of iceberg of handling
544 optimization attribute. */
545 else if (caller_tree
!= callee_tree
)
548 (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
549 && lookup_attribute ("always_inline",
550 DECL_ATTRIBUTES (callee
->decl
)));
551 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (caller
);
552 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (callee
);
554 /* Until GCC 4.9 we did not check the semantics-altering flags
555 below and inlined across optimization boundaries.
556 Enabling checks below breaks several packages by refusing
557 to inline library always_inline functions. See PR65873.
558 Disable the check for early inlining for now until better solution
560 if (always_inline
&& early
)
562 /* There are some options that change IL semantics which means
563 we cannot inline in these cases for correctness reason.
564 Not even for always_inline declared functions. */
565 else if (check_match (flag_wrapv
)
566 || check_match (flag_trapv
)
567 || check_match (flag_pcc_struct_return
)
568 || check_maybe_down (optimize_debug
)
569 /* When caller or callee does FP math, be sure FP codegen flags
571 || ((caller_info
->fp_expressions
&& callee_info
->fp_expressions
)
572 && (check_maybe_up (flag_rounding_math
)
573 || check_maybe_up (flag_trapping_math
)
574 || check_maybe_down (flag_unsafe_math_optimizations
)
575 || check_maybe_down (flag_finite_math_only
)
576 || check_maybe_up (flag_signaling_nans
)
577 || check_maybe_down (flag_cx_limited_range
)
578 || check_maybe_up (flag_signed_zeros
)
579 || check_maybe_down (flag_associative_math
)
580 || check_maybe_down (flag_reciprocal_math
)
581 || check_maybe_down (flag_fp_int_builtin_inexact
)
582 /* Strictly speaking only when the callee contains function
583 calls that may end up setting errno. */
584 || check_maybe_up (flag_errno_math
)))
585 /* We do not want to make code compiled with exceptions to be
586 brought into a non-EH function unless we know that the callee
588 This is tracked by DECL_FUNCTION_PERSONALITY. */
589 || (check_maybe_up (flag_non_call_exceptions
)
590 && DECL_FUNCTION_PERSONALITY (callee
->decl
))
591 || (check_maybe_up (flag_exceptions
)
592 && DECL_FUNCTION_PERSONALITY (callee
->decl
))
593 /* When devirtualization is disabled for callee, it is not safe
594 to inline it as we possibly mangled the type info.
595 Allow early inlining of always inlines. */
596 || (!early
&& check_maybe_down (flag_devirtualize
)))
598 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
601 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
602 else if (always_inline
)
604 /* When user added an attribute to the callee honor it. */
605 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee
->decl
))
606 && opts_for_fn (caller
->decl
) != opts_for_fn (callee
->decl
))
608 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
611 /* If explicit optimize attribute are not used, the mismatch is caused
612 by different command line options used to build different units.
613 Do not care about COMDAT functions - those are intended to be
614 optimized with the optimization flags of module they are used in.
615 Also do not care about mixing up size/speed optimization when
616 DECL_DISREGARD_INLINE_LIMITS is set. */
617 else if ((callee
->merged_comdat
618 && !lookup_attribute ("optimize",
619 DECL_ATTRIBUTES (caller
->decl
)))
620 || DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
622 /* If mismatch is caused by merging two LTO units with different
623 optimization flags we want to be bit nicer. However never inline
624 if one of functions is not optimized at all. */
625 else if (!opt_for_fn (callee
->decl
, optimize
)
626 || !opt_for_fn (caller
->decl
, optimize
))
628 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
631 /* If callee is optimized for size and caller is not, allow inlining if
632 code shrinks or we are in param_max_inline_insns_single limit and
633 callee is inline (and thus likely an unified comdat).
634 This will allow caller to run faster. */
635 else if (opt_for_fn (callee
->decl
, optimize_size
)
636 > opt_for_fn (caller
->decl
, optimize_size
))
638 int growth
= estimate_edge_growth (e
);
639 if (growth
> opt_for_fn (caller
->decl
, param_max_inline_insns_size
)
640 && (!DECL_DECLARED_INLINE_P (callee
->decl
)
641 && growth
>= MAX (inline_insns_single (caller
, false, false),
642 inline_insns_auto (caller
, false, false))))
644 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
648 /* If callee is more aggressively optimized for performance than caller,
649 we generally want to inline only cheap (runtime wise) functions. */
650 else if (opt_for_fn (callee
->decl
, optimize_size
)
651 < opt_for_fn (caller
->decl
, optimize_size
)
652 || (opt_for_fn (callee
->decl
, optimize
)
653 > opt_for_fn (caller
->decl
, optimize
)))
655 if (estimate_edge_time (e
)
656 >= 20 + ipa_call_summaries
->get (e
)->call_stmt_time
)
658 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
665 if (!inlinable
&& report
)
666 report_inline_failed_reason (e
);
671 /* Return true if the edge E is inlinable during early inlining. */
674 can_early_inline_edge_p (struct cgraph_edge
*e
)
676 cgraph_node
*caller
= (e
->caller
->inlined_to
677 ? e
->caller
->inlined_to
: e
->caller
);
678 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
679 /* Early inliner might get called at WPA stage when IPA pass adds new
680 function. In this case we cannot really do any of early inlining
681 because function bodies are missing. */
682 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
684 if (!gimple_has_body_p (callee
->decl
))
686 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
689 gcc_assert (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->caller
->decl
))
690 && gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)));
692 && ((lookup_attribute ("no_profile_instrument_function",
693 DECL_ATTRIBUTES (caller
->decl
)) == NULL_TREE
)
694 != (lookup_attribute ("no_profile_instrument_function",
695 DECL_ATTRIBUTES (callee
->decl
)) == NULL_TREE
)))
698 if (!can_inline_edge_p (e
, true, true)
699 || !can_inline_edge_by_limits_p (e
, true, false, true))
701 /* When inlining regular functions into always-inline functions
702 during early inlining watch for possible inline cycles. */
703 if (DECL_DISREGARD_INLINE_LIMITS (caller
->decl
)
704 && lookup_attribute ("always_inline", DECL_ATTRIBUTES (caller
->decl
))
705 && (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
706 || !lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee
->decl
))))
708 /* If there are indirect calls, inlining may produce direct call.
709 TODO: We may lift this restriction if we avoid errors on formely
710 indirect calls to always_inline functions. Taking address
711 of always_inline function is generally bad idea and should
712 have been declared as undefined, but sadly we allow this. */
713 if (caller
->indirect_calls
|| e
->callee
->indirect_calls
)
715 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (callee
);
716 if (callee_info
->safe_to_inline_to_always_inline
)
717 return callee_info
->safe_to_inline_to_always_inline
- 1;
718 for (cgraph_edge
*e2
= callee
->callees
; e2
; e2
= e2
->next_callee
)
720 struct cgraph_node
*callee2
= e2
->callee
->ultimate_alias_target ();
721 /* As early inliner runs in RPO order, we will see uninlined
722 always_inline calls only in the case of cyclic graphs. */
723 if (DECL_DISREGARD_INLINE_LIMITS (callee2
->decl
)
724 || lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee2
->decl
)))
726 callee_info
->safe_to_inline_to_always_inline
= 1;
729 /* With LTO watch for case where function is later replaced
730 by always_inline definition.
731 TODO: We may either stop treating noninlined cross-module always
732 inlines as errors, or we can extend decl merging to produce
733 syntacic alias and honor always inline only in units it has
734 been declared as such. */
735 if (flag_lto
&& callee2
->externally_visible
)
737 callee_info
->safe_to_inline_to_always_inline
= 1;
741 callee_info
->safe_to_inline_to_always_inline
= 2;
747 /* Return number of calls in N. Ignore cheap builtins. */
750 num_calls (struct cgraph_node
*n
)
752 struct cgraph_edge
*e
;
755 for (e
= n
->callees
; e
; e
= e
->next_callee
)
756 if (!is_inexpensive_builtin (e
->callee
->decl
))
762 /* Return true if we are interested in inlining small function. */
765 want_early_inline_function_p (struct cgraph_edge
*e
)
767 bool want_inline
= true;
768 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
770 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
772 /* For AutoFDO, we need to make sure that before profile summary, all
773 hot paths' IR look exactly the same as profiled binary. As a result,
774 in einliner, we will disregard size limit and inline those callsites
776 * inlined in the profiled binary, and
777 * the cloned callee has enough samples to be considered "hot". */
778 else if (flag_auto_profile
&& afdo_callsite_hot_enough_for_early_inline (e
))
780 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
781 && !opt_for_fn (e
->caller
->decl
, flag_inline_small_functions
))
783 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
784 report_inline_failed_reason (e
);
789 /* First take care of very large functions. */
790 int min_growth
= estimate_min_edge_growth (e
), growth
= 0;
792 int early_inlining_insns
= param_early_inlining_insns
;
794 if (min_growth
> early_inlining_insns
)
796 if (dump_enabled_p ())
797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
798 " will not early inline: %C->%C, "
799 "call is cold and code would grow "
806 growth
= estimate_edge_growth (e
);
809 if (!want_inline
|| growth
<= param_max_inline_insns_size
)
811 else if (!e
->maybe_hot_p ())
813 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
815 " will not early inline: %C->%C, "
816 "call is cold and code would grow by %i\n",
821 else if (growth
> early_inlining_insns
)
823 if (dump_enabled_p ())
824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
825 " will not early inline: %C->%C, "
826 "growth %i exceeds --param early-inlining-insns\n",
827 e
->caller
, callee
, growth
);
830 else if ((n
= num_calls (callee
)) != 0
831 && growth
* (n
+ 1) > early_inlining_insns
)
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
835 " will not early inline: %C->%C, "
836 "growth %i exceeds --param early-inlining-insns "
837 "divided by number of calls\n",
838 e
->caller
, callee
, growth
);
845 /* Compute time of the edge->caller + edge->callee execution when inlining
849 compute_uninlined_call_time (struct cgraph_edge
*edge
,
850 sreal uninlined_call_time
,
853 cgraph_node
*caller
= (edge
->caller
->inlined_to
854 ? edge
->caller
->inlined_to
858 uninlined_call_time
*= freq
;
860 uninlined_call_time
= uninlined_call_time
>> 11;
862 sreal caller_time
= ipa_fn_summaries
->get (caller
)->time
;
863 return uninlined_call_time
+ caller_time
;
866 /* Same as compute_uinlined_call_time but compute time when inlining
870 compute_inlined_call_time (struct cgraph_edge
*edge
,
874 cgraph_node
*caller
= (edge
->caller
->inlined_to
875 ? edge
->caller
->inlined_to
877 sreal caller_time
= ipa_fn_summaries
->get (caller
)->time
;
884 /* This calculation should match one in ipa-inline-analysis.cc
885 (estimate_edge_size_and_time). */
886 time
-= (sreal
)ipa_call_summaries
->get (edge
)->call_stmt_time
* freq
;
889 time
= ((sreal
) 1) >> 8;
890 gcc_checking_assert (time
>= 0);
894 /* Determine time saved by inlining EDGE of frequency FREQ
895 where callee's runtime w/o inlining is UNINLINED_TYPE
896 and with inlined is INLINED_TYPE. */
899 inlining_speedup (struct cgraph_edge
*edge
,
901 sreal uninlined_time
,
904 sreal speedup
= uninlined_time
- inlined_time
;
905 /* Handling of call_time should match one in ipa-inline-fnsummary.c
906 (estimate_edge_size_and_time). */
907 sreal call_time
= ipa_call_summaries
->get (edge
)->call_stmt_time
;
911 speedup
= (speedup
+ call_time
);
913 speedup
= speedup
* freq
;
916 speedup
= speedup
>> 11;
917 gcc_checking_assert (speedup
>= 0);
921 /* Return true if the speedup for inlining E is bigger than
922 param_inline_min_speedup. */
925 big_speedup_p (struct cgraph_edge
*e
)
928 sreal spec_time
= estimate_edge_time (e
, &unspec_time
);
929 sreal freq
= e
->sreal_frequency ();
930 sreal time
= compute_uninlined_call_time (e
, unspec_time
, freq
);
931 sreal inlined_time
= compute_inlined_call_time (e
, spec_time
, freq
);
932 cgraph_node
*caller
= (e
->caller
->inlined_to
933 ? e
->caller
->inlined_to
935 int limit
= opt_for_fn (caller
->decl
, param_inline_min_speedup
);
937 if ((time
- inlined_time
) * 100 > time
* limit
)
942 /* Return true if we are interested in inlining small function.
943 When REPORT is true, report reason to dump file. */
946 want_inline_small_function_p (struct cgraph_edge
*e
, bool report
)
948 bool want_inline
= true;
949 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
950 cgraph_node
*to
= (e
->caller
->inlined_to
951 ? e
->caller
->inlined_to
: e
->caller
);
953 /* Allow this function to be called before can_inline_edge_p,
954 since it's usually cheaper. */
955 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
957 else if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
959 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
960 && !opt_for_fn (e
->caller
->decl
, flag_inline_small_functions
))
962 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
965 /* Do fast and conservative check if the function can be good
967 else if ((!DECL_DECLARED_INLINE_P (callee
->decl
)
968 && (!e
->count
.ipa ().initialized_p () || !e
->maybe_hot_p ()))
969 && ipa_fn_summaries
->get (callee
)->min_size
970 - ipa_call_summaries
->get (e
)->call_stmt_size
971 > inline_insns_auto (e
->caller
, true, true))
973 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
976 else if ((DECL_DECLARED_INLINE_P (callee
->decl
)
977 || e
->count
.ipa ().nonzero_p ())
978 && ipa_fn_summaries
->get (callee
)->min_size
979 - ipa_call_summaries
->get (e
)->call_stmt_size
980 > inline_insns_single (e
->caller
, true, true))
982 e
->inline_failed
= (DECL_DECLARED_INLINE_P (callee
->decl
)
983 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
984 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT
);
989 int growth
= estimate_edge_growth (e
);
990 ipa_hints hints
= estimate_edge_hints (e
);
991 /* We have two independent groups of hints. If one matches in each
992 of groups the limits are inreased. If both groups matches, limit
993 is increased even more. */
994 bool apply_hints
= (hints
& (INLINE_HINT_indirect_call
995 | INLINE_HINT_known_hot
996 | INLINE_HINT_loop_iterations
997 | INLINE_HINT_loop_stride
));
998 bool apply_hints2
= (hints
& INLINE_HINT_builtin_constant_p
);
1000 if (growth
<= opt_for_fn (to
->decl
,
1001 param_max_inline_insns_size
))
1003 /* Apply param_max_inline_insns_single limit. Do not do so when
1004 hints suggests that inlining given function is very profitable.
1005 Avoid computation of big_speedup_p when not necessary to change
1006 outcome of decision. */
1007 else if (DECL_DECLARED_INLINE_P (callee
->decl
)
1008 && growth
>= inline_insns_single (e
->caller
, apply_hints
,
1010 && (apply_hints
|| apply_hints2
1011 || growth
>= inline_insns_single (e
->caller
, true,
1013 || !big_speedup_p (e
)))
1015 e
->inline_failed
= CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
;
1016 want_inline
= false;
1018 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
1019 && !opt_for_fn (e
->caller
->decl
, flag_inline_functions
)
1020 && growth
>= opt_for_fn (to
->decl
,
1021 param_max_inline_insns_small
))
1023 /* growth_positive_p is expensive, always test it last. */
1024 if (growth
>= inline_insns_single (e
->caller
, false, false)
1025 || growth_positive_p (callee
, e
, growth
))
1027 e
->inline_failed
= CIF_NOT_DECLARED_INLINED
;
1028 want_inline
= false;
1031 /* Apply param_max_inline_insns_auto limit for functions not declared
1032 inline. Bypass the limit when speedup seems big. */
1033 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
1034 && growth
>= inline_insns_auto (e
->caller
, apply_hints
,
1036 && (apply_hints
|| apply_hints2
1037 || growth
>= inline_insns_auto (e
->caller
, true,
1039 || !big_speedup_p (e
)))
1041 /* growth_positive_p is expensive, always test it last. */
1042 if (growth
>= inline_insns_single (e
->caller
, false, false)
1043 || growth_positive_p (callee
, e
, growth
))
1045 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
1046 want_inline
= false;
1049 /* If call is cold, do not inline when function body would grow. */
1050 else if (!e
->maybe_hot_p ()
1051 && (growth
>= inline_insns_single (e
->caller
, false, false)
1052 || growth_positive_p (callee
, e
, growth
)))
1054 e
->inline_failed
= CIF_UNLIKELY_CALL
;
1055 want_inline
= false;
1058 if (!want_inline
&& report
)
1059 report_inline_failed_reason (e
);
1063 /* EDGE is self recursive edge.
1064 We handle two cases - when function A is inlining into itself
1065 or when function A is being inlined into another inliner copy of function
1066 A within function B.
1068 In first case OUTER_NODE points to the toplevel copy of A, while
1069 in the second case OUTER_NODE points to the outermost copy of A in B.
1071 In both cases we want to be extra selective since
1072 inlining the call will just introduce new recursive calls to appear. */
1075 want_inline_self_recursive_call_p (struct cgraph_edge
*edge
,
1076 struct cgraph_node
*outer_node
,
1080 char const *reason
= NULL
;
1081 bool want_inline
= true;
1082 sreal caller_freq
= 1;
1083 int max_depth
= opt_for_fn (outer_node
->decl
,
1084 param_max_inline_recursive_depth_auto
);
1086 if (DECL_DECLARED_INLINE_P (edge
->caller
->decl
))
1087 max_depth
= opt_for_fn (outer_node
->decl
,
1088 param_max_inline_recursive_depth
);
1090 if (!edge
->maybe_hot_p ())
1092 reason
= "recursive call is cold";
1093 want_inline
= false;
1095 else if (depth
> max_depth
)
1097 reason
= "--param max-inline-recursive-depth exceeded.";
1098 want_inline
= false;
1100 else if (outer_node
->inlined_to
1101 && (caller_freq
= outer_node
->callers
->sreal_frequency ()) == 0)
1103 reason
= "caller frequency is 0";
1104 want_inline
= false;
1109 /* Inlining of self recursive function into copy of itself within other
1110 function is transformation similar to loop peeling.
1112 Peeling is profitable if we can inline enough copies to make probability
1113 of actual call to the self recursive function very small. Be sure that
1114 the probability of recursion is small.
1116 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1117 This way the expected number of recursion is at most max_depth. */
1120 sreal max_prob
= (sreal
)1 - ((sreal
)1 / (sreal
)max_depth
);
1122 for (i
= 1; i
< depth
; i
++)
1123 max_prob
= max_prob
* max_prob
;
1124 if (edge
->sreal_frequency () >= max_prob
* caller_freq
)
1126 reason
= "frequency of recursive call is too large";
1127 want_inline
= false;
1130 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1131 recursion depth is large. We reduce function call overhead and increase
1132 chances that things fit in hardware return predictor.
1134 Recursive inlining might however increase cost of stack frame setup
1135 actually slowing down functions whose recursion tree is wide rather than
1138 Deciding reliably on when to do recursive inlining without profile feedback
1139 is tricky. For now we disable recursive inlining when probability of self
1142 Recursive inlining of self recursive call within loop also results in
1143 large loop depths that generally optimize badly. We may want to throttle
1144 down inlining in those cases. In particular this seems to happen in one
1145 of libstdc++ rb tree methods. */
1148 if (edge
->sreal_frequency () * 100
1150 * opt_for_fn (outer_node
->decl
,
1151 param_min_inline_recursive_probability
))
1153 reason
= "frequency of recursive call is too small";
1154 want_inline
= false;
1157 if (!want_inline
&& dump_enabled_p ())
1158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, edge
->call_stmt
,
1159 " not inlining recursively: %s\n", reason
);
1163 /* Return true when NODE has uninlinable caller;
1164 set HAS_HOT_CALL if it has hot call.
1165 Worker for cgraph_for_node_and_aliases. */
1168 check_callers (struct cgraph_node
*node
, void *has_hot_call
)
1170 struct cgraph_edge
*e
;
1171 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1173 if (!opt_for_fn (e
->caller
->decl
, flag_inline_functions_called_once
)
1174 || !opt_for_fn (e
->caller
->decl
, optimize
))
1176 if (!can_inline_edge_p (e
, true))
1178 if (e
->recursive_p ())
1180 if (!can_inline_edge_by_limits_p (e
, true))
1182 /* Inlining large functions to large loop depth is often harmful because
1183 of register pressure it implies. */
1184 if ((int)ipa_call_summaries
->get (e
)->loop_depth
1185 > param_inline_functions_called_once_loop_depth
)
1187 /* Do not produce gigantic functions. */
1188 if (estimate_size_after_inlining (e
->caller
->inlined_to
?
1189 e
->caller
->inlined_to
: e
->caller
, e
)
1190 > param_inline_functions_called_once_insns
)
1192 if (!(*(bool *)has_hot_call
) && e
->maybe_hot_p ())
1193 *(bool *)has_hot_call
= true;
1198 /* If NODE has a caller, return true. */
1201 has_caller_p (struct cgraph_node
*node
, void *data ATTRIBUTE_UNUSED
)
1208 /* Decide if inlining NODE would reduce unit size by eliminating
1209 the offline copy of function.
1210 When COLD is true the cold calls are considered, too. */
1213 want_inline_function_to_all_callers_p (struct cgraph_node
*node
, bool cold
)
1215 bool has_hot_call
= false;
1217 /* Aliases gets inlined along with the function they alias. */
1220 /* Already inlined? */
1221 if (node
->inlined_to
)
1223 /* Does it have callers? */
1224 if (!node
->call_for_symbol_and_aliases (has_caller_p
, NULL
, true))
1226 /* Inlining into all callers would increase size? */
1227 if (growth_positive_p (node
, NULL
, INT_MIN
) > 0)
1229 /* All inlines must be possible. */
1230 if (node
->call_for_symbol_and_aliases (check_callers
, &has_hot_call
,
1233 if (!cold
&& !has_hot_call
)
1238 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1239 in estimate_edge_badness. */
1242 wrapper_heuristics_may_apply (struct cgraph_node
*where
, int size
)
1244 return size
< (DECL_DECLARED_INLINE_P (where
->decl
)
1245 ? inline_insns_single (where
, false, false)
1246 : inline_insns_auto (where
, false, false));
1249 /* A cost model driving the inlining heuristics in a way so the edges with
1250 smallest badness are inlined first. After each inlining is performed
1251 the costs of all caller edges of nodes affected are recomputed so the
1252 metrics may accurately depend on values such as number of inlinable callers
1253 of the function or function body size. */
1256 edge_badness (struct cgraph_edge
*edge
, bool dump
)
1260 sreal edge_time
, unspec_edge_time
;
1261 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
1262 class ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (callee
);
1264 cgraph_node
*caller
= (edge
->caller
->inlined_to
1265 ? edge
->caller
->inlined_to
1268 growth
= estimate_edge_growth (edge
);
1269 edge_time
= estimate_edge_time (edge
, &unspec_edge_time
);
1270 hints
= estimate_edge_hints (edge
);
1271 gcc_checking_assert (edge_time
>= 0);
1272 /* Check that inlined time is better, but tolerate some roundoff issues.
1273 FIXME: When callee profile drops to 0 we account calls more. This
1274 should be fixed by never doing that. */
1275 gcc_checking_assert ((edge_time
* 100
1276 - callee_info
->time
* 101).to_int () <= 0
1277 || callee
->count
.ipa ().initialized_p ());
1278 gcc_checking_assert (growth
<= ipa_size_summaries
->get (callee
)->size
);
1282 fprintf (dump_file
, " Badness calculation for %s -> %s\n",
1283 edge
->caller
->dump_name (),
1284 edge
->callee
->dump_name ());
1285 fprintf (dump_file
, " size growth %i, time %f unspec %f ",
1287 edge_time
.to_double (),
1288 unspec_edge_time
.to_double ());
1289 ipa_dump_hints (dump_file
, hints
);
1290 if (big_speedup_p (edge
))
1291 fprintf (dump_file
, " big_speedup");
1292 fprintf (dump_file
, "\n");
1295 /* Always prefer inlining saving code size. */
1298 badness
= (sreal
) (-SREAL_MIN_SIG
+ growth
) << (SREAL_MAX_EXP
/ 256);
1300 fprintf (dump_file
, " %f: Growth %d <= 0\n", badness
.to_double (),
1303 /* Inlining into EXTERNAL functions is not going to change anything unless
1304 they are themselves inlined. */
1305 else if (DECL_EXTERNAL (caller
->decl
))
1308 fprintf (dump_file
, " max: function is external\n");
1309 return sreal::max ();
1311 /* When profile is available. Compute badness as:
1313 time_saved * caller_count
1314 goodness = -------------------------------------------------
1315 growth_of_caller * overall_growth * combined_size
1317 badness = - goodness
1319 Again use negative value to make calls with profile appear hotter
1322 else if (opt_for_fn (caller
->decl
, flag_guess_branch_prob
)
1323 || caller
->count
.ipa ().nonzero_p ())
1325 sreal numerator
, denominator
;
1327 sreal freq
= edge
->sreal_frequency ();
1329 numerator
= inlining_speedup (edge
, freq
, unspec_edge_time
, edge_time
);
1331 numerator
= ((sreal
) 1 >> 8);
1332 if (caller
->count
.ipa ().nonzero_p ())
1333 numerator
*= caller
->count
.ipa ().to_gcov_type ();
1334 else if (caller
->count
.ipa ().initialized_p ())
1335 numerator
= numerator
>> 11;
1336 denominator
= growth
;
1338 overall_growth
= callee_info
->growth
;
1340 /* Look for inliner wrappers of the form:
1346 noninline_callee ();
1348 Without penalizing this case, we usually inline noninline_callee
1349 into the inline_caller because overall_growth is small preventing
1350 further inlining of inline_caller.
1352 Penalize only callgraph edges to functions with small overall
1355 if (growth
> overall_growth
1356 /* ... and having only one caller which is not inlined ... */
1357 && callee_info
->single_caller
1358 && !edge
->caller
->inlined_to
1359 /* ... and edges executed only conditionally ... */
1361 /* ... consider case where callee is not inline but caller is ... */
1362 && ((!DECL_DECLARED_INLINE_P (edge
->callee
->decl
)
1363 && DECL_DECLARED_INLINE_P (caller
->decl
))
1364 /* ... or when early optimizers decided to split and edge
1365 frequency still indicates splitting is a win ... */
1366 || (callee
->split_part
&& !caller
->split_part
1368 < opt_for_fn (caller
->decl
,
1369 param_partial_inlining_entry_probability
)
1370 /* ... and do not overwrite user specified hints. */
1371 && (!DECL_DECLARED_INLINE_P (edge
->callee
->decl
)
1372 || DECL_DECLARED_INLINE_P (caller
->decl
)))))
1374 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (caller
);
1375 int caller_growth
= caller_info
->growth
;
1377 /* Only apply the penalty when caller looks like inline candidate,
1378 and it is not called once. */
1379 if (!caller_info
->single_caller
&& overall_growth
< caller_growth
1380 && caller_info
->inlinable
1381 && wrapper_heuristics_may_apply
1382 (caller
, ipa_size_summaries
->get (caller
)->size
))
1386 " Wrapper penalty. Increasing growth %i to %i\n",
1387 overall_growth
, caller_growth
);
1388 overall_growth
= caller_growth
;
1391 if (overall_growth
> 0)
1393 /* Strongly prefer functions with few callers that can be inlined
1394 fully. The square root here leads to smaller binaries at average.
1395 Watch however for extreme cases and return to linear function
1396 when growth is large. */
1397 if (overall_growth
< 256)
1398 overall_growth
*= overall_growth
;
1400 overall_growth
+= 256 * 256 - 256;
1401 denominator
*= overall_growth
;
1403 denominator
*= ipa_size_summaries
->get (caller
)->size
+ growth
;
1405 badness
= - numerator
/ denominator
;
1410 " %f: guessed profile. frequency %f, count %" PRId64
1411 " caller count %" PRId64
1413 " overall growth %i (current) %i (original)"
1414 " %i (compensated)\n",
1415 badness
.to_double (),
1417 edge
->count
.ipa ().initialized_p ()
1418 ? edge
->count
.ipa ().to_gcov_type () : -1,
1419 caller
->count
.ipa ().initialized_p ()
1420 ? caller
->count
.ipa ().to_gcov_type () : -1,
1421 inlining_speedup (edge
, freq
, unspec_edge_time
,
1422 edge_time
).to_double (),
1423 estimate_growth (callee
),
1424 callee_info
->growth
, overall_growth
);
1427 /* When function local profile is not available or it does not give
1428 useful information (i.e. frequency is zero), base the cost on
1429 loop nest and overall size growth, so we optimize for overall number
1430 of functions fully inlined in program. */
1433 int nest
= MIN (ipa_call_summaries
->get (edge
)->loop_depth
, 8);
1436 /* Decrease badness if call is nested. */
1438 badness
= badness
>> nest
;
1440 badness
= badness
<< nest
;
1442 fprintf (dump_file
, " %f: no profile. nest %i\n",
1443 badness
.to_double (), nest
);
1445 gcc_checking_assert (badness
!= 0);
1447 if (edge
->recursive_p ())
1448 badness
= badness
.shift (badness
> 0 ? 4 : -4);
1449 if ((hints
& (INLINE_HINT_indirect_call
1450 | INLINE_HINT_loop_iterations
1451 | INLINE_HINT_loop_stride
))
1452 || callee_info
->growth
<= 0)
1453 badness
= badness
.shift (badness
> 0 ? -2 : 2);
1454 if (hints
& INLINE_HINT_builtin_constant_p
)
1455 badness
= badness
.shift (badness
> 0 ? -4 : 4);
1456 if (hints
& (INLINE_HINT_same_scc
))
1457 badness
= badness
.shift (badness
> 0 ? 3 : -3);
1458 else if (hints
& (INLINE_HINT_in_scc
))
1459 badness
= badness
.shift (badness
> 0 ? 2 : -2);
1460 else if (hints
& (INLINE_HINT_cross_module
))
1461 badness
= badness
.shift (badness
> 0 ? 1 : -1);
1462 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
1463 badness
= badness
.shift (badness
> 0 ? -4 : 4);
1464 else if ((hints
& INLINE_HINT_declared_inline
))
1465 badness
= badness
.shift (badness
> 0 ? -3 : 3);
1467 fprintf (dump_file
, " Adjusted by hints %f\n", badness
.to_double ());
1471 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1473 update_edge_key (edge_heap_t
*heap
, struct cgraph_edge
*edge
)
1475 sreal badness
= edge_badness (edge
, false);
1478 edge_heap_node_t
*n
= (edge_heap_node_t
*) edge
->aux
;
1479 gcc_checking_assert (n
->get_data () == edge
);
1481 /* fibonacci_heap::replace_key does busy updating of the
1482 heap that is unnecessarily expensive.
1483 We do lazy increases: after extracting minimum if the key
1484 turns out to be out of date, it is re-inserted into heap
1485 with correct value. */
1486 if (badness
< n
->get_key ().badness
)
1488 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1491 " decreasing badness %s -> %s, %f to %f\n",
1492 edge
->caller
->dump_name (),
1493 edge
->callee
->dump_name (),
1494 n
->get_key ().badness
.to_double (),
1495 badness
.to_double ());
1497 inline_badness
b (edge
, badness
);
1498 heap
->decrease_key (n
, b
);
1503 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1506 " enqueuing call %s -> %s, badness %f\n",
1507 edge
->caller
->dump_name (),
1508 edge
->callee
->dump_name (),
1509 badness
.to_double ());
1511 inline_badness
b (edge
, badness
);
1512 edge
->aux
= heap
->insert (b
, edge
);
1517 /* NODE was inlined.
1518 All caller edges needs to be reset because
1519 size estimates change. Similarly callees needs reset
1520 because better context may be known. */
1523 reset_edge_caches (struct cgraph_node
*node
)
1525 struct cgraph_edge
*edge
;
1526 struct cgraph_edge
*e
= node
->callees
;
1527 struct cgraph_node
*where
= node
;
1528 struct ipa_ref
*ref
;
1530 if (where
->inlined_to
)
1531 where
= where
->inlined_to
;
1533 reset_node_cache (where
);
1535 if (edge_growth_cache
!= NULL
)
1536 for (edge
= where
->callers
; edge
; edge
= edge
->next_caller
)
1537 if (edge
->inline_failed
)
1538 edge_growth_cache
->remove (edge
);
1540 FOR_EACH_ALIAS (where
, ref
)
1541 reset_edge_caches (dyn_cast
<cgraph_node
*> (ref
->referring
));
1547 if (!e
->inline_failed
&& e
->callee
->callees
)
1548 e
= e
->callee
->callees
;
1551 if (edge_growth_cache
!= NULL
&& e
->inline_failed
)
1552 edge_growth_cache
->remove (e
);
1559 if (e
->caller
== node
)
1561 e
= e
->caller
->callers
;
1563 while (!e
->next_callee
);
1569 /* Recompute HEAP nodes for each of caller of NODE.
1570 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1571 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1572 it is inlinable. Otherwise check all edges. */
1575 update_caller_keys (edge_heap_t
*heap
, struct cgraph_node
*node
,
1576 bitmap updated_nodes
,
1577 struct cgraph_edge
*check_inlinablity_for
)
1579 struct cgraph_edge
*edge
;
1580 struct ipa_ref
*ref
;
1582 if ((!node
->alias
&& !ipa_fn_summaries
->get (node
)->inlinable
)
1583 || node
->inlined_to
)
1585 if (!bitmap_set_bit (updated_nodes
, node
->get_uid ()))
1588 FOR_EACH_ALIAS (node
, ref
)
1590 struct cgraph_node
*alias
= dyn_cast
<cgraph_node
*> (ref
->referring
);
1591 update_caller_keys (heap
, alias
, updated_nodes
, check_inlinablity_for
);
1594 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
1595 if (edge
->inline_failed
)
1597 if (!check_inlinablity_for
1598 || check_inlinablity_for
== edge
)
1600 if (can_inline_edge_p (edge
, false)
1601 && want_inline_small_function_p (edge
, false)
1602 && can_inline_edge_by_limits_p (edge
, false))
1603 update_edge_key (heap
, edge
);
1606 report_inline_failed_reason (edge
);
1607 heap
->delete_node ((edge_heap_node_t
*) edge
->aux
);
1612 update_edge_key (heap
, edge
);
1616 /* Recompute HEAP nodes for each uninlined call in NODE
1617 If UPDATE_SINCE is non-NULL check if edges called within that function
1618 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1619 where all edges have new context).
1621 This is used when we know that edge badnesses are going only to increase
1622 (we introduced new call site) and thus all we need is to insert newly
1623 created edges into heap. */
1626 update_callee_keys (edge_heap_t
*heap
, struct cgraph_node
*node
,
1627 struct cgraph_node
*update_since
,
1628 bitmap updated_nodes
)
1630 struct cgraph_edge
*e
= node
->callees
;
1631 bool check_inlinability
= update_since
== node
;
1636 if (!e
->inline_failed
&& e
->callee
->callees
)
1638 if (e
->callee
== update_since
)
1639 check_inlinability
= true;
1640 e
= e
->callee
->callees
;
1644 enum availability avail
;
1645 struct cgraph_node
*callee
;
1646 if (!check_inlinability
)
1649 && !bitmap_bit_p (updated_nodes
,
1650 e
->callee
->ultimate_alias_target
1651 (&avail
, e
->caller
)->get_uid ()))
1652 update_edge_key (heap
, e
);
1654 /* We do not reset callee growth cache here. Since we added a new call,
1655 growth should have just increased and consequently badness metric
1656 don't need updating. */
1657 else if (e
->inline_failed
1658 && (callee
= e
->callee
->ultimate_alias_target (&avail
,
1660 && avail
>= AVAIL_AVAILABLE
1661 && ipa_fn_summaries
->get (callee
) != NULL
1662 && ipa_fn_summaries
->get (callee
)->inlinable
1663 && !bitmap_bit_p (updated_nodes
, callee
->get_uid ()))
1665 if (can_inline_edge_p (e
, false)
1666 && want_inline_small_function_p (e
, false)
1667 && can_inline_edge_by_limits_p (e
, false))
1669 gcc_checking_assert (check_inlinability
|| can_inline_edge_p (e
, false));
1670 gcc_checking_assert (check_inlinability
|| e
->aux
);
1671 update_edge_key (heap
, e
);
1675 report_inline_failed_reason (e
);
1676 heap
->delete_node ((edge_heap_node_t
*) e
->aux
);
1680 /* In case we redirected to unreachable node we only need to remove the
1684 heap
->delete_node ((edge_heap_node_t
*) e
->aux
);
1693 if (e
->caller
== node
)
1695 if (e
->caller
== update_since
)
1696 check_inlinability
= false;
1697 e
= e
->caller
->callers
;
1699 while (!e
->next_callee
);
1705 /* Enqueue all recursive calls from NODE into priority queue depending on
1706 how likely we want to recursively inline the call. */
1709 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
1712 struct cgraph_edge
*e
;
1713 enum availability avail
;
1715 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1716 if (e
->callee
== node
1717 || (e
->callee
->ultimate_alias_target (&avail
, e
->caller
) == node
1718 && avail
> AVAIL_INTERPOSABLE
))
1720 inline_badness
b (e
, -e
->sreal_frequency ());
1721 heap
->insert (b
, e
);
1723 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1724 if (!e
->inline_failed
)
1725 lookup_recursive_calls (node
, e
->callee
, heap
);
1728 /* Decide on recursive inlining: in the case function has recursive calls,
1729 inline until body size reaches given argument. If any new indirect edges
1730 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1734 recursive_inlining (struct cgraph_edge
*edge
,
1735 vec
<cgraph_edge
*> *new_edges
)
1737 cgraph_node
*to
= (edge
->caller
->inlined_to
1738 ? edge
->caller
->inlined_to
: edge
->caller
);
1739 int limit
= opt_for_fn (to
->decl
,
1740 param_max_inline_insns_recursive_auto
);
1741 inline_badness
b (edge
, sreal::min ());
1742 edge_heap_t
heap (b
);
1743 struct cgraph_node
*node
;
1744 struct cgraph_edge
*e
;
1745 struct cgraph_node
*master_clone
= NULL
, *next
;
1749 node
= edge
->caller
;
1750 if (node
->inlined_to
)
1751 node
= node
->inlined_to
;
1753 if (DECL_DECLARED_INLINE_P (node
->decl
))
1754 limit
= opt_for_fn (to
->decl
, param_max_inline_insns_recursive
);
1756 /* Make sure that function is small enough to be considered for inlining. */
1757 if (estimate_size_after_inlining (node
, edge
) >= limit
)
1759 lookup_recursive_calls (node
, node
, &heap
);
1765 " Performing recursive inlining on %s\n", node
->dump_name ());
1767 /* Do the inlining and update list of recursive call during process. */
1768 while (!heap
.empty ())
1770 struct cgraph_edge
*curr
= heap
.extract_min ();
1771 struct cgraph_node
*cnode
, *dest
= curr
->callee
;
1773 if (!can_inline_edge_p (curr
, true)
1774 || !can_inline_edge_by_limits_p (curr
, true))
1777 /* MASTER_CLONE is produced in the case we already started modified
1778 the function. Be sure to redirect edge to the original body before
1779 estimating growths otherwise we will be seeing growths after inlining
1780 the already modified body. */
1783 curr
->redirect_callee (master_clone
);
1784 if (edge_growth_cache
!= NULL
)
1785 edge_growth_cache
->remove (curr
);
1788 if (estimate_size_after_inlining (node
, curr
) > limit
)
1790 curr
->redirect_callee (dest
);
1791 if (edge_growth_cache
!= NULL
)
1792 edge_growth_cache
->remove (curr
);
1797 for (cnode
= curr
->caller
;
1798 cnode
->inlined_to
; cnode
= cnode
->callers
->caller
)
1800 == curr
->callee
->ultimate_alias_target ()->decl
)
1803 if (!want_inline_self_recursive_call_p (curr
, node
, false, depth
))
1805 curr
->redirect_callee (dest
);
1806 if (edge_growth_cache
!= NULL
)
1807 edge_growth_cache
->remove (curr
);
1814 " Inlining call of depth %i", depth
);
1815 if (node
->count
.nonzero_p () && curr
->count
.initialized_p ())
1817 fprintf (dump_file
, " called approx. %.2f times per call",
1818 (double)curr
->count
.to_gcov_type ()
1819 / node
->count
.to_gcov_type ());
1821 fprintf (dump_file
, "\n");
1825 /* We need original clone to copy around. */
1826 master_clone
= node
->create_clone (node
->decl
, node
->count
,
1827 false, vNULL
, true, NULL
, NULL
);
1828 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
1829 if (!e
->inline_failed
)
1830 clone_inlined_nodes (e
, true, false, NULL
);
1831 curr
->redirect_callee (master_clone
);
1832 if (edge_growth_cache
!= NULL
)
1833 edge_growth_cache
->remove (curr
);
1836 inline_call (curr
, false, new_edges
, &overall_size
, true);
1837 reset_node_cache (node
);
1838 lookup_recursive_calls (node
, curr
->callee
, &heap
);
1842 if (!heap
.empty () && dump_file
)
1843 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
1848 if (dump_enabled_p ())
1849 dump_printf_loc (MSG_NOTE
, edge
->call_stmt
,
1850 "\n Inlined %i times, "
1851 "body grown from size %i to %i, time %f to %f\n", n
,
1852 ipa_size_summaries
->get (master_clone
)->size
,
1853 ipa_size_summaries
->get (node
)->size
,
1854 ipa_fn_summaries
->get (master_clone
)->time
.to_double (),
1855 ipa_fn_summaries
->get (node
)->time
.to_double ());
1857 /* Remove master clone we used for inlining. We rely that clones inlined
1858 into master clone gets queued just before master clone so we don't
1860 for (node
= symtab
->first_function (); node
!= master_clone
;
1863 next
= symtab
->next_function (node
);
1864 if (node
->inlined_to
== master_clone
)
1867 master_clone
->remove ();
1872 /* Given whole compilation unit estimate of INSNS, compute how large we can
1873 allow the unit to grow. */
1876 compute_max_insns (cgraph_node
*node
, int insns
)
1878 int max_insns
= insns
;
1879 if (max_insns
< opt_for_fn (node
->decl
, param_large_unit_insns
))
1880 max_insns
= opt_for_fn (node
->decl
, param_large_unit_insns
);
1882 return ((int64_t) max_insns
1883 * (100 + opt_for_fn (node
->decl
, param_inline_unit_growth
)) / 100);
1887 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1890 add_new_edges_to_heap (edge_heap_t
*heap
, vec
<cgraph_edge
*> &new_edges
)
1892 while (new_edges
.length () > 0)
1894 struct cgraph_edge
*edge
= new_edges
.pop ();
1896 gcc_assert (!edge
->aux
);
1897 gcc_assert (edge
->callee
);
1898 if (edge
->inline_failed
1899 && can_inline_edge_p (edge
, true)
1900 && want_inline_small_function_p (edge
, true)
1901 && can_inline_edge_by_limits_p (edge
, true))
1903 inline_badness
b (edge
, edge_badness (edge
, false));
1904 edge
->aux
= heap
->insert (b
, edge
);
1909 /* Remove EDGE from the fibheap. */
1912 heap_edge_removal_hook (struct cgraph_edge
*e
, void *data
)
1916 ((edge_heap_t
*)data
)->delete_node ((edge_heap_node_t
*)e
->aux
);
1921 /* Return true if speculation of edge E seems useful.
1922 If ANTICIPATE_INLINING is true, be conservative and hope that E
1926 speculation_useful_p (struct cgraph_edge
*e
, bool anticipate_inlining
)
1928 /* If we have already decided to inline the edge, it seems useful. */
1929 if (!e
->inline_failed
)
1932 enum availability avail
;
1933 struct cgraph_node
*target
= e
->callee
->ultimate_alias_target (&avail
,
1936 gcc_assert (e
->speculative
&& !e
->indirect_unknown_callee
);
1938 if (!e
->maybe_hot_p ())
1941 /* See if IP optimizations found something potentially useful about the
1942 function. For now we look only for CONST/PURE flags. Almost everything
1943 else we propagate is useless. */
1944 if (avail
>= AVAIL_AVAILABLE
)
1946 int ecf_flags
= flags_from_decl_or_type (target
->decl
);
1947 if (ecf_flags
& ECF_CONST
)
1949 if (!(e
->speculative_call_indirect_edge ()->indirect_info
1950 ->ecf_flags
& ECF_CONST
))
1953 else if (ecf_flags
& ECF_PURE
)
1955 if (!(e
->speculative_call_indirect_edge ()->indirect_info
1956 ->ecf_flags
& ECF_PURE
))
1960 /* If we did not managed to inline the function nor redirect
1961 to an ipa-cp clone (that are seen by having local flag set),
1962 it is probably pointless to inline it unless hardware is missing
1963 indirect call predictor. */
1964 if (!anticipate_inlining
&& !target
->local
)
1966 /* For overwritable targets there is not much to do. */
1967 if (!can_inline_edge_p (e
, false)
1968 || !can_inline_edge_by_limits_p (e
, false, true))
1970 /* OK, speculation seems interesting. */
1974 /* We know that EDGE is not going to be inlined.
1975 See if we can remove speculation. */
1978 resolve_noninline_speculation (edge_heap_t
*edge_heap
, struct cgraph_edge
*edge
)
1980 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
1982 struct cgraph_node
*node
= edge
->caller
;
1983 struct cgraph_node
*where
= node
->inlined_to
1984 ? node
->inlined_to
: node
;
1985 auto_bitmap updated_nodes
;
1987 if (edge
->count
.ipa ().initialized_p ())
1988 spec_rem
+= edge
->count
.ipa ();
1989 cgraph_edge::resolve_speculation (edge
);
1990 reset_edge_caches (where
);
1991 ipa_update_overall_fn_summary (where
);
1992 update_caller_keys (edge_heap
, where
,
1993 updated_nodes
, NULL
);
1994 update_callee_keys (edge_heap
, where
, NULL
,
1999 /* Return true if NODE should be accounted for overall size estimate.
2000 Skip all nodes optimized for size so we can measure the growth of hot
2001 part of program no matter of the padding. */
2004 inline_account_function_p (struct cgraph_node
*node
)
2006 return (!DECL_EXTERNAL (node
->decl
)
2007 && !opt_for_fn (node
->decl
, optimize_size
)
2008 && node
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
);
2011 /* Count number of callers of NODE and store it into DATA (that
2012 points to int. Worker for cgraph_for_node_and_aliases. */
2015 sum_callers (struct cgraph_node
*node
, void *data
)
2017 struct cgraph_edge
*e
;
2018 int *num_calls
= (int *)data
;
2020 for (e
= node
->callers
; e
; e
= e
->next_caller
)
2025 /* We only propagate across edges with non-interposable callee. */
2028 ignore_edge_p (struct cgraph_edge
*e
)
2030 enum availability avail
;
2031 e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
2032 return (avail
<= AVAIL_INTERPOSABLE
);
2035 /* We use greedy algorithm for inlining of small functions:
2036 All inline candidates are put into prioritized heap ordered in
2039 The inlining of small functions is bounded by unit growth parameters. */
2042 inline_small_functions (void)
2044 struct cgraph_node
*node
;
2045 struct cgraph_edge
*edge
;
2047 edge_heap_t
edge_heap (b
);
2048 auto_bitmap updated_nodes
;
2050 auto_vec
<cgraph_edge
*> new_indirect_edges
;
2051 int initial_size
= 0;
2052 struct cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
2053 struct cgraph_edge_hook_list
*edge_removal_hook_holder
;
2054 new_indirect_edges
.create (8);
2056 edge_removal_hook_holder
2057 = symtab
->add_edge_removal_hook (&heap_edge_removal_hook
, &edge_heap
);
2059 /* Compute overall unit size and other global parameters used by badness
2062 max_count
= profile_count::uninitialized ();
2063 ipa_reduced_postorder (order
, true, ignore_edge_p
);
2066 FOR_EACH_DEFINED_FUNCTION (node
)
2067 if (!node
->inlined_to
)
2069 if (!node
->alias
&& node
->analyzed
2070 && (node
->has_gimple_body_p () || node
->thunk
)
2071 && opt_for_fn (node
->decl
, optimize
))
2073 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
2074 struct ipa_dfs_info
*dfs
= (struct ipa_dfs_info
*) node
->aux
;
2076 /* Do not account external functions, they will be optimized out
2077 if not inlined. Also only count the non-cold portion of program. */
2078 if (inline_account_function_p (node
))
2079 initial_size
+= ipa_size_summaries
->get (node
)->size
;
2080 info
->growth
= estimate_growth (node
);
2083 node
->call_for_symbol_and_aliases (sum_callers
, &num_calls
,
2086 info
->single_caller
= true;
2087 if (dfs
&& dfs
->next_cycle
)
2089 struct cgraph_node
*n2
;
2090 int id
= dfs
->scc_no
+ 1;
2092 n2
= ((struct ipa_dfs_info
*) n2
->aux
)->next_cycle
)
2093 if (opt_for_fn (n2
->decl
, optimize
))
2095 ipa_fn_summary
*info2
= ipa_fn_summaries
->get
2096 (n2
->inlined_to
? n2
->inlined_to
: n2
);
2104 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
2105 max_count
= max_count
.max (edge
->count
.ipa ());
2107 ipa_free_postorder_info ();
2108 initialize_growth_caches ();
2112 "\nDeciding on inlining of small functions. Starting with size %i.\n",
2115 overall_size
= initial_size
;
2116 min_size
= overall_size
;
2118 /* Populate the heap with all edges we might inline. */
2120 FOR_EACH_DEFINED_FUNCTION (node
)
2122 bool update
= false;
2123 struct cgraph_edge
*next
= NULL
;
2124 bool has_speculative
= false;
2126 if (!opt_for_fn (node
->decl
, optimize
)
2127 /* With -Og we do not want to perform IPA inlining of small
2128 functions since there are no scalar cleanups after it
2129 that would realize the anticipated win. All abstraction
2130 is removed during early inlining. */
2131 || opt_for_fn (node
->decl
, optimize_debug
))
2135 fprintf (dump_file
, "Enqueueing calls in %s.\n", node
->dump_name ());
2137 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
2139 if (edge
->inline_failed
2141 && can_inline_edge_p (edge
, true)
2142 && want_inline_small_function_p (edge
, true)
2143 && can_inline_edge_by_limits_p (edge
, true)
2144 && edge
->inline_failed
)
2146 gcc_assert (!edge
->aux
);
2147 update_edge_key (&edge_heap
, edge
);
2149 if (edge
->speculative
)
2150 has_speculative
= true;
2152 if (has_speculative
)
2153 for (edge
= node
->callees
; edge
; edge
= next
)
2155 next
= edge
->next_callee
;
2156 if (edge
->speculative
2157 && !speculation_useful_p (edge
, edge
->aux
!= NULL
))
2159 cgraph_edge::resolve_speculation (edge
);
2165 struct cgraph_node
*where
= node
->inlined_to
2166 ? node
->inlined_to
: node
;
2167 ipa_update_overall_fn_summary (where
);
2168 reset_edge_caches (where
);
2169 update_caller_keys (&edge_heap
, where
,
2170 updated_nodes
, NULL
);
2171 update_callee_keys (&edge_heap
, where
, NULL
,
2173 bitmap_clear (updated_nodes
);
2177 gcc_assert (in_lto_p
2179 || (profile_info
&& flag_branch_probabilities
));
2181 while (!edge_heap
.empty ())
2183 int old_size
= overall_size
;
2184 struct cgraph_node
*where
, *callee
;
2185 sreal badness
= edge_heap
.min_key ().badness
;
2186 sreal current_badness
;
2189 edge
= edge_heap
.extract_min ();
2190 gcc_assert (edge
->aux
);
2192 if (!edge
->inline_failed
|| !edge
->callee
->analyzed
)
2195 /* Be sure that caches are maintained consistent.
2196 This check is affected by scaling roundoff errors when compiling for
2197 IPA this we skip it in that case. */
2198 if (flag_checking
&& !edge
->callee
->count
.ipa_p ()
2199 && (!max_count
.initialized_p () || !max_count
.nonzero_p ()))
2201 sreal cached_badness
= edge_badness (edge
, false);
2203 int old_size_est
= estimate_edge_size (edge
);
2204 sreal old_time_est
= estimate_edge_time (edge
);
2205 int old_hints_est
= estimate_edge_hints (edge
);
2207 if (edge_growth_cache
!= NULL
)
2208 edge_growth_cache
->remove (edge
);
2209 reset_node_cache (edge
->caller
->inlined_to
2210 ? edge
->caller
->inlined_to
2212 gcc_assert (old_size_est
== estimate_edge_size (edge
));
2213 gcc_assert (old_time_est
== estimate_edge_time (edge
));
2216 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2218 fails with profile feedback because some hints depends on
2219 maybe_hot_edge_p predicate and because callee gets inlined to other
2220 calls, the edge may become cold.
2221 This ought to be fixed by computing relative probabilities
2222 for given invocation but that will be better done once whole
2223 code is converted to sreals. Disable for now and revert to "wrong"
2224 value so enable/disable checking paths agree. */
2225 edge_growth_cache
->get (edge
)->hints
= old_hints_est
+ 1;
2227 /* When updating the edge costs, we only decrease badness in the keys.
2228 Increases of badness are handled lazily; when we see key with out
2229 of date value on it, we re-insert it now. */
2230 current_badness
= edge_badness (edge
, false);
2231 gcc_assert (cached_badness
== current_badness
);
2232 gcc_assert (current_badness
>= badness
);
2235 current_badness
= edge_badness (edge
, false);
2236 if (current_badness
!= badness
)
2238 if (edge_heap
.min () && current_badness
> edge_heap
.min_key ().badness
)
2240 inline_badness
b (edge
, current_badness
);
2241 edge
->aux
= edge_heap
.insert (b
, edge
);
2245 badness
= current_badness
;
2248 if (!can_inline_edge_p (edge
, true)
2249 || !can_inline_edge_by_limits_p (edge
, true))
2251 resolve_noninline_speculation (&edge_heap
, edge
);
2255 callee
= edge
->callee
->ultimate_alias_target ();
2256 growth
= estimate_edge_growth (edge
);
2260 "\nConsidering %s with %i size\n",
2261 callee
->dump_name (),
2262 ipa_size_summaries
->get (callee
)->size
);
2264 " to be inlined into %s in %s:%i\n"
2265 " Estimated badness is %f, frequency %.2f.\n",
2266 edge
->caller
->dump_name (),
2268 && (LOCATION_LOCUS (gimple_location ((const gimple
*)
2270 > BUILTINS_LOCATION
)
2271 ? gimple_filename ((const gimple
*) edge
->call_stmt
)
2274 ? gimple_lineno ((const gimple
*) edge
->call_stmt
)
2276 badness
.to_double (),
2277 edge
->sreal_frequency ().to_double ());
2278 if (edge
->count
.ipa ().initialized_p ())
2280 fprintf (dump_file
, " Called ");
2281 edge
->count
.ipa ().dump (dump_file
);
2282 fprintf (dump_file
, " times\n");
2284 if (dump_flags
& TDF_DETAILS
)
2285 edge_badness (edge
, true);
2288 where
= edge
->caller
;
2290 if (overall_size
+ growth
> compute_max_insns (where
, min_size
)
2291 && !DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
2293 edge
->inline_failed
= CIF_INLINE_UNIT_GROWTH_LIMIT
;
2294 report_inline_failed_reason (edge
);
2295 resolve_noninline_speculation (&edge_heap
, edge
);
2299 if (!want_inline_small_function_p (edge
, true))
2301 resolve_noninline_speculation (&edge_heap
, edge
);
2305 profile_count old_count
= callee
->count
;
2307 /* Heuristics for inlining small functions work poorly for
2308 recursive calls where we do effects similar to loop unrolling.
2309 When inlining such edge seems profitable, leave decision on
2310 specific inliner. */
2311 if (edge
->recursive_p ())
2313 if (where
->inlined_to
)
2314 where
= where
->inlined_to
;
2315 if (!recursive_inlining (edge
,
2316 opt_for_fn (edge
->caller
->decl
,
2317 flag_indirect_inlining
)
2318 ? &new_indirect_edges
: NULL
))
2320 edge
->inline_failed
= CIF_RECURSIVE_INLINING
;
2321 resolve_noninline_speculation (&edge_heap
, edge
);
2324 reset_edge_caches (where
);
2325 /* Recursive inliner inlines all recursive calls of the function
2326 at once. Consequently we need to update all callee keys. */
2327 if (opt_for_fn (edge
->caller
->decl
, flag_indirect_inlining
))
2328 add_new_edges_to_heap (&edge_heap
, new_indirect_edges
);
2329 update_callee_keys (&edge_heap
, where
, where
, updated_nodes
);
2330 bitmap_clear (updated_nodes
);
2334 struct cgraph_node
*outer_node
= NULL
;
2337 /* Consider the case where self recursive function A is inlined
2338 into B. This is desired optimization in some cases, since it
2339 leads to effect similar of loop peeling and we might completely
2340 optimize out the recursive call. However we must be extra
2343 where
= edge
->caller
;
2344 while (where
->inlined_to
)
2346 if (where
->decl
== callee
->decl
)
2347 outer_node
= where
, depth
++;
2348 where
= where
->callers
->caller
;
2351 && !want_inline_self_recursive_call_p (edge
, outer_node
,
2355 = (DECL_DISREGARD_INLINE_LIMITS (edge
->callee
->decl
)
2356 ? CIF_RECURSIVE_INLINING
: CIF_UNSPECIFIED
);
2357 resolve_noninline_speculation (&edge_heap
, edge
);
2360 else if (depth
&& dump_file
)
2361 fprintf (dump_file
, " Peeling recursion with depth %i\n", depth
);
2363 gcc_checking_assert (!callee
->inlined_to
);
2365 int old_size
= ipa_size_summaries
->get (where
)->size
;
2366 sreal old_time
= ipa_fn_summaries
->get (where
)->time
;
2368 inline_call (edge
, true, &new_indirect_edges
, &overall_size
, true);
2369 reset_edge_caches (edge
->callee
);
2370 add_new_edges_to_heap (&edge_heap
, new_indirect_edges
);
2372 /* If caller's size and time increased we do not need to update
2373 all edges because badness is not going to decrease. */
2374 if (old_size
<= ipa_size_summaries
->get (where
)->size
2375 && old_time
<= ipa_fn_summaries
->get (where
)->time
2376 /* Wrapper penalty may be non-monotonous in this respect.
2377 Fortunately it only affects small functions. */
2378 && !wrapper_heuristics_may_apply (where
, old_size
))
2379 update_callee_keys (&edge_heap
, edge
->callee
, edge
->callee
,
2382 update_callee_keys (&edge_heap
, where
,
2386 where
= edge
->caller
;
2387 if (where
->inlined_to
)
2388 where
= where
->inlined_to
;
2390 /* Our profitability metric can depend on local properties
2391 such as number of inlinable calls and size of the function body.
2392 After inlining these properties might change for the function we
2393 inlined into (since it's body size changed) and for the functions
2394 called by function we inlined (since number of it inlinable callers
2396 update_caller_keys (&edge_heap
, where
, updated_nodes
, NULL
);
2397 /* Offline copy count has possibly changed, recompute if profile is
2399 struct cgraph_node
*n
2400 = cgraph_node::get (edge
->callee
->decl
)->ultimate_alias_target ();
2401 if (n
!= edge
->callee
&& n
->analyzed
&& !(n
->count
== old_count
)
2402 && n
->count
.ipa_p ())
2403 update_callee_keys (&edge_heap
, n
, NULL
, updated_nodes
);
2404 bitmap_clear (updated_nodes
);
2406 if (dump_enabled_p ())
2408 ipa_fn_summary
*s
= ipa_fn_summaries
->get (where
);
2410 /* dump_printf can't handle %+i. */
2411 char buf_net_change
[100];
2412 snprintf (buf_net_change
, sizeof buf_net_change
, "%+i",
2413 overall_size
- old_size
);
2415 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, edge
->call_stmt
,
2416 " Inlined %C into %C which now has time %f and "
2417 "size %i, net change of %s%s.\n",
2418 edge
->callee
, edge
->caller
,
2419 s
->time
.to_double (),
2420 ipa_size_summaries
->get (edge
->caller
)->size
,
2422 cross_module_call_p (edge
) ? " (cross module)":"");
2424 if (min_size
> overall_size
)
2426 min_size
= overall_size
;
2429 fprintf (dump_file
, "New minimal size reached: %i\n", min_size
);
2433 free_growth_caches ();
2434 if (dump_enabled_p ())
2435 dump_printf (MSG_NOTE
,
2436 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2437 initial_size
, overall_size
,
2438 initial_size
? overall_size
* 100 / (initial_size
) - 100: 0);
2439 symtab
->remove_edge_removal_hook (edge_removal_hook_holder
);
2442 /* Flatten NODE. Performed both during early inlining and
2443 at IPA inlining time. */
2446 flatten_function (struct cgraph_node
*node
, bool early
, bool update
)
2448 struct cgraph_edge
*e
;
2450 /* We shouldn't be called recursively when we are being processed. */
2451 gcc_assert (node
->aux
== NULL
);
2453 node
->aux
= (void *) node
;
2455 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2457 struct cgraph_node
*orig_callee
;
2458 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2460 /* We've hit cycle? It is time to give up. */
2463 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2465 "Not inlining %C into %C to avoid cycle.\n",
2467 if (cgraph_inline_failed_type (e
->inline_failed
) != CIF_FINAL_ERROR
)
2468 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
2472 /* When the edge is already inlined, we just need to recurse into
2473 it in order to fully flatten the leaves. */
2474 if (!e
->inline_failed
)
2476 flatten_function (callee
, early
, false);
2480 /* Flatten attribute needs to be processed during late inlining. For
2481 extra code quality we however do flattening during early optimization,
2484 ? !can_inline_edge_p (e
, true)
2485 && !can_inline_edge_by_limits_p (e
, true)
2486 : !can_early_inline_edge_p (e
))
2489 if (e
->recursive_p ())
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2493 "Not inlining: recursive call.\n");
2497 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
2498 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)))
2500 if (dump_enabled_p ())
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2502 "Not inlining: SSA form does not match.\n");
2506 /* Inline the edge and flatten the inline clone. Avoid
2507 recursing through the original node if the node was cloned. */
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2510 " Inlining %C into %C.\n",
2512 orig_callee
= callee
;
2513 inline_call (e
, true, NULL
, NULL
, false);
2514 if (e
->callee
!= orig_callee
)
2515 orig_callee
->aux
= (void *) node
;
2516 flatten_function (e
->callee
, early
, false);
2517 if (e
->callee
!= orig_callee
)
2518 orig_callee
->aux
= NULL
;
2522 cgraph_node
*where
= node
->inlined_to
? node
->inlined_to
: node
;
2523 if (update
&& opt_for_fn (where
->decl
, optimize
))
2524 ipa_update_overall_fn_summary (where
);
2527 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2528 DATA points to number of calls originally found so we avoid infinite
2532 inline_to_all_callers_1 (struct cgraph_node
*node
, void *data
,
2533 hash_set
<cgraph_node
*> *callers
)
2535 int *num_calls
= (int *)data
;
2536 bool callee_removed
= false;
2538 while (node
->callers
&& !node
->inlined_to
)
2540 struct cgraph_node
*caller
= node
->callers
->caller
;
2542 if (!can_inline_edge_p (node
->callers
, true)
2543 || !can_inline_edge_by_limits_p (node
->callers
, true)
2544 || node
->callers
->recursive_p ())
2547 fprintf (dump_file
, "Uninlinable call found; giving up.\n");
2554 cgraph_node
*ultimate
= node
->ultimate_alias_target ();
2556 "\nInlining %s size %i.\n",
2557 ultimate
->dump_name (),
2558 ipa_size_summaries
->get (ultimate
)->size
);
2560 " Called once from %s %i insns.\n",
2561 node
->callers
->caller
->dump_name (),
2562 ipa_size_summaries
->get (node
->callers
->caller
)->size
);
2565 /* Remember which callers we inlined to, delaying updating the
2567 callers
->add (node
->callers
->caller
);
2568 inline_call (node
->callers
, true, NULL
, NULL
, false, &callee_removed
);
2571 " Inlined into %s which now has %i size\n",
2572 caller
->dump_name (),
2573 ipa_size_summaries
->get (caller
)->size
);
2574 if (!(*num_calls
)--)
2577 fprintf (dump_file
, "New calls found; giving up.\n");
2578 return callee_removed
;
2586 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2590 inline_to_all_callers (struct cgraph_node
*node
, void *data
)
2592 hash_set
<cgraph_node
*> callers
;
2593 bool res
= inline_to_all_callers_1 (node
, data
, &callers
);
2594 /* Perform the delayed update of the overall summary of all callers
2595 processed. This avoids quadratic behavior in the cases where
2596 we have a lot of calls to the same function. */
2597 for (hash_set
<cgraph_node
*>::iterator i
= callers
.begin ();
2598 i
!= callers
.end (); ++i
)
2599 ipa_update_overall_fn_summary ((*i
)->inlined_to
? (*i
)->inlined_to
: *i
);
2603 /* Output overall time estimate. */
2605 dump_overall_stats (void)
2607 sreal sum_weighted
= 0, sum
= 0;
2608 struct cgraph_node
*node
;
2610 FOR_EACH_DEFINED_FUNCTION (node
)
2611 if (!node
->inlined_to
2614 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2618 if (node
->count
.ipa ().initialized_p ())
2619 sum_weighted
+= s
->time
* node
->count
.ipa ().to_gcov_type ();
2622 fprintf (dump_file
, "Overall time estimate: "
2623 "%f weighted by profile: "
2624 "%f\n", sum
.to_double (), sum_weighted
.to_double ());
2627 /* Output some useful stats about inlining. */
2630 dump_inline_stats (void)
2632 int64_t inlined_cnt
= 0, inlined_indir_cnt
= 0;
2633 int64_t inlined_virt_cnt
= 0, inlined_virt_indir_cnt
= 0;
2634 int64_t noninlined_cnt
= 0, noninlined_indir_cnt
= 0;
2635 int64_t noninlined_virt_cnt
= 0, noninlined_virt_indir_cnt
= 0;
2636 int64_t inlined_speculative
= 0, inlined_speculative_ply
= 0;
2637 int64_t indirect_poly_cnt
= 0, indirect_cnt
= 0;
2638 int64_t reason
[CIF_N_REASONS
][2];
2639 sreal reason_freq
[CIF_N_REASONS
];
2641 struct cgraph_node
*node
;
2643 memset (reason
, 0, sizeof (reason
));
2644 for (i
=0; i
< CIF_N_REASONS
; i
++)
2646 FOR_EACH_DEFINED_FUNCTION (node
)
2648 struct cgraph_edge
*e
;
2649 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2651 if (e
->inline_failed
)
2653 if (e
->count
.ipa ().initialized_p ())
2654 reason
[(int) e
->inline_failed
][0] += e
->count
.ipa ().to_gcov_type ();
2655 reason_freq
[(int) e
->inline_failed
] += e
->sreal_frequency ();
2656 reason
[(int) e
->inline_failed
][1] ++;
2657 if (DECL_VIRTUAL_P (e
->callee
->decl
)
2658 && e
->count
.ipa ().initialized_p ())
2660 if (e
->indirect_inlining_edge
)
2661 noninlined_virt_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2663 noninlined_virt_cnt
+= e
->count
.ipa ().to_gcov_type ();
2665 else if (e
->count
.ipa ().initialized_p ())
2667 if (e
->indirect_inlining_edge
)
2668 noninlined_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2670 noninlined_cnt
+= e
->count
.ipa ().to_gcov_type ();
2673 else if (e
->count
.ipa ().initialized_p ())
2677 if (DECL_VIRTUAL_P (e
->callee
->decl
))
2678 inlined_speculative_ply
+= e
->count
.ipa ().to_gcov_type ();
2680 inlined_speculative
+= e
->count
.ipa ().to_gcov_type ();
2682 else if (DECL_VIRTUAL_P (e
->callee
->decl
))
2684 if (e
->indirect_inlining_edge
)
2685 inlined_virt_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2687 inlined_virt_cnt
+= e
->count
.ipa ().to_gcov_type ();
2691 if (e
->indirect_inlining_edge
)
2692 inlined_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2694 inlined_cnt
+= e
->count
.ipa ().to_gcov_type ();
2698 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2699 if (e
->indirect_info
->polymorphic
2700 & e
->count
.ipa ().initialized_p ())
2701 indirect_poly_cnt
+= e
->count
.ipa ().to_gcov_type ();
2702 else if (e
->count
.ipa ().initialized_p ())
2703 indirect_cnt
+= e
->count
.ipa ().to_gcov_type ();
2705 if (max_count
.initialized_p ())
2708 "Inlined %" PRId64
" + speculative "
2709 "%" PRId64
" + speculative polymorphic "
2710 "%" PRId64
" + previously indirect "
2711 "%" PRId64
" + virtual "
2712 "%" PRId64
" + virtual and previously indirect "
2713 "%" PRId64
"\n" "Not inlined "
2714 "%" PRId64
" + previously indirect "
2715 "%" PRId64
" + virtual "
2716 "%" PRId64
" + virtual and previously indirect "
2717 "%" PRId64
" + still indirect "
2718 "%" PRId64
" + still indirect polymorphic "
2719 "%" PRId64
"\n", inlined_cnt
,
2720 inlined_speculative
, inlined_speculative_ply
,
2721 inlined_indir_cnt
, inlined_virt_cnt
, inlined_virt_indir_cnt
,
2722 noninlined_cnt
, noninlined_indir_cnt
, noninlined_virt_cnt
,
2723 noninlined_virt_indir_cnt
, indirect_cnt
, indirect_poly_cnt
);
2724 fprintf (dump_file
, "Removed speculations ");
2725 spec_rem
.dump (dump_file
);
2726 fprintf (dump_file
, "\n");
2728 dump_overall_stats ();
2729 fprintf (dump_file
, "\nWhy inlining failed?\n");
2730 for (i
= 0; i
< CIF_N_REASONS
; i
++)
2732 fprintf (dump_file
, "%-50s: %8i calls, %8f freq, %" PRId64
" count\n",
2733 cgraph_inline_failed_string ((cgraph_inline_failed_t
) i
),
2734 (int) reason
[i
][1], reason_freq
[i
].to_double (), reason
[i
][0]);
2737 /* Called when node is removed. */
2740 flatten_remove_node_hook (struct cgraph_node
*node
, void *data
)
2742 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) == NULL
)
2745 hash_set
<struct cgraph_node
*> *removed
2746 = (hash_set
<struct cgraph_node
*> *) data
;
2747 removed
->add (node
);
2750 /* Decide on the inlining. We do so in the topological order to avoid
2751 expenses on updating data structures. */
2756 struct cgraph_node
*node
;
2758 struct cgraph_node
**order
;
2761 bool remove_functions
= false;
2763 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
2766 ipa_dump_fn_summaries (dump_file
);
2768 nnodes
= ipa_reverse_postorder (order
);
2769 spec_rem
= profile_count::zero ();
2771 FOR_EACH_FUNCTION (node
)
2775 /* Recompute the default reasons for inlining because they may have
2776 changed during merging. */
2779 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2781 gcc_assert (e
->inline_failed
);
2782 initialize_inline_failed (e
);
2784 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2785 initialize_inline_failed (e
);
2790 fprintf (dump_file
, "\nFlattening functions:\n");
2792 /* First shrink order array, so that it only contains nodes with
2793 flatten attribute. */
2794 for (i
= nnodes
- 1, j
= i
; i
>= 0; i
--)
2797 if (node
->definition
2798 /* Do not try to flatten aliases. These may happen for example when
2799 creating local aliases. */
2801 && lookup_attribute ("flatten",
2802 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
2803 order
[j
--] = order
[i
];
2806 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2807 nodes with flatten attribute. If there is more than one such
2808 node, we need to register a node removal hook, as flatten_function
2809 could remove other nodes with flatten attribute. See PR82801. */
2810 struct cgraph_node_hook_list
*node_removal_hook_holder
= NULL
;
2811 hash_set
<struct cgraph_node
*> *flatten_removed_nodes
= NULL
;
2814 flatten_removed_nodes
= new hash_set
<struct cgraph_node
*>;
2815 node_removal_hook_holder
2816 = symtab
->add_cgraph_removal_hook (&flatten_remove_node_hook
,
2817 flatten_removed_nodes
);
2820 /* In the first pass handle functions to be flattened. Do this with
2821 a priority so none of our later choices will make this impossible. */
2822 for (i
= nnodes
- 1; i
> j
; i
--)
2825 if (flatten_removed_nodes
2826 && flatten_removed_nodes
->contains (node
))
2829 /* Handle nodes to be flattened.
2830 Ideally when processing callees we stop inlining at the
2831 entry of cycles, possibly cloning that entry point and
2832 try to flatten itself turning it into a self-recursive
2835 fprintf (dump_file
, "Flattening %s\n", node
->dump_name ());
2836 flatten_function (node
, false, true);
2841 symtab
->remove_cgraph_removal_hook (node_removal_hook_holder
);
2842 delete flatten_removed_nodes
;
2847 dump_overall_stats ();
2849 inline_small_functions ();
2851 gcc_assert (symtab
->state
== IPA_SSA
);
2852 symtab
->state
= IPA_SSA_AFTER_INLINING
;
2853 /* Do first after-inlining removal. We want to remove all "stale" extern
2854 inline functions and virtual functions so we really know what is called
2856 symtab
->remove_unreachable_nodes (dump_file
);
2858 /* Inline functions with a property that after inlining into all callers the
2859 code size will shrink because the out-of-line copy is eliminated.
2860 We do this regardless on the callee size as long as function growth limits
2864 "\nDeciding on functions to be inlined into all callers and "
2865 "removing useless speculations:\n");
2867 /* Inlining one function called once has good chance of preventing
2868 inlining other function into the same callee. Ideally we should
2869 work in priority order, but probably inlining hot functions first
2870 is good cut without the extra pain of maintaining the queue.
2872 ??? this is not really fitting the bill perfectly: inlining function
2873 into callee often leads to better optimization of callee due to
2874 increased context for optimization.
2875 For example if main() function calls a function that outputs help
2876 and then function that does the main optimization, we should inline
2877 the second with priority even if both calls are cold by themselves.
2879 We probably want to implement new predicate replacing our use of
2880 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2882 for (cold
= 0; cold
<= 1; cold
++)
2884 FOR_EACH_DEFINED_FUNCTION (node
)
2886 struct cgraph_edge
*edge
, *next
;
2889 if (!opt_for_fn (node
->decl
, optimize
)
2890 || !opt_for_fn (node
->decl
, flag_inline_functions_called_once
))
2893 for (edge
= node
->callees
; edge
; edge
= next
)
2895 next
= edge
->next_callee
;
2896 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
2898 if (edge
->count
.ipa ().initialized_p ())
2899 spec_rem
+= edge
->count
.ipa ();
2900 cgraph_edge::resolve_speculation (edge
);
2902 remove_functions
= true;
2907 struct cgraph_node
*where
= node
->inlined_to
2908 ? node
->inlined_to
: node
;
2909 reset_edge_caches (where
);
2910 ipa_update_overall_fn_summary (where
);
2912 if (want_inline_function_to_all_callers_p (node
, cold
))
2915 node
->call_for_symbol_and_aliases (sum_callers
, &num_calls
,
2917 while (node
->call_for_symbol_and_aliases
2918 (inline_to_all_callers
, &num_calls
, true))
2920 remove_functions
= true;
2925 if (dump_enabled_p ())
2926 dump_printf (MSG_NOTE
,
2927 "\nInlined %i calls, eliminated %i functions\n\n",
2928 ncalls_inlined
, nfunctions_inlined
);
2930 dump_inline_stats ();
2933 ipa_dump_fn_summaries (dump_file
);
2934 return remove_functions
? TODO_remove_functions
: 0;
2937 /* Inline always-inline function calls in NODE
2938 (which itself is possibly inline). */
2941 inline_always_inline_functions (struct cgraph_node
*node
)
2943 struct cgraph_edge
*e
;
2944 bool inlined
= false;
2946 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2948 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2949 gcc_checking_assert (!callee
->aux
|| callee
->aux
== (void *)(size_t)1);
2950 if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
2951 /* Watch for self-recursive cycles. */
2955 if (e
->recursive_p ())
2957 if (dump_enabled_p ())
2958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2959 " Not inlining recursive call to %C.\n",
2961 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
2964 if (callee
->definition
2965 && !ipa_fn_summaries
->get (callee
))
2966 compute_fn_summary (callee
, true);
2968 if (!can_early_inline_edge_p (e
))
2970 /* Set inlined to true if the callee is marked "always_inline" but
2971 is not inlinable. This will allow flagging an error later in
2972 expand_call_inline in tree-inline.cc. */
2973 if (lookup_attribute ("always_inline",
2974 DECL_ATTRIBUTES (callee
->decl
)) != NULL
)
2979 if (dump_enabled_p ())
2980 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2981 " Inlining %C into %C (always_inline).\n",
2982 e
->callee
, e
->caller
);
2983 inline_call (e
, true, NULL
, NULL
, false);
2984 callee
->aux
= (void *)(size_t)1;
2985 /* Inline recursively to handle the case where always_inline function was
2986 not optimized yet since it is a part of a cycle in callgraph. */
2987 inline_always_inline_functions (e
->callee
);
2994 /* Decide on the inlining. We do so in the topological order to avoid
2995 expenses on updating data structures. */
2998 early_inline_small_functions (struct cgraph_node
*node
)
3000 struct cgraph_edge
*e
;
3001 bool inlined
= false;
3003 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3005 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
3007 /* We can encounter not-yet-analyzed function during
3008 early inlining on callgraphs with strongly
3009 connected components. */
3010 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
3011 if (s
== NULL
|| !s
->inlinable
|| !e
->inline_failed
)
3014 /* Do not consider functions not declared inline. */
3015 if (!DECL_DECLARED_INLINE_P (callee
->decl
)
3016 && !opt_for_fn (node
->decl
, flag_inline_small_functions
)
3017 && !opt_for_fn (node
->decl
, flag_inline_functions
))
3020 if (dump_enabled_p ())
3021 dump_printf_loc (MSG_NOTE
, e
->call_stmt
,
3022 "Considering inline candidate %C.\n",
3025 if (!can_early_inline_edge_p (e
))
3028 if (e
->recursive_p ())
3030 if (dump_enabled_p ())
3031 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
3032 " Not inlining: recursive call.\n");
3036 if (!want_early_inline_function_p (e
))
3039 if (dump_enabled_p ())
3040 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
3041 " Inlining %C into %C.\n",
3043 inline_call (e
, true, NULL
, NULL
, false);
3048 ipa_update_overall_fn_summary (node
);
3054 early_inliner (function
*fun
)
3056 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3057 struct cgraph_edge
*edge
;
3058 unsigned int todo
= 0;
3060 bool inlined
= false;
3065 /* Do nothing if datastructures for ipa-inliner are already computed. This
3066 happens when some pass decides to construct new function and
3067 cgraph_add_new_function calls lowering passes and early optimization on
3068 it. This may confuse ourself when early inliner decide to inline call to
3069 function clone, because function clones don't have parameter list in
3070 ipa-prop matching their signature. */
3071 if (ipa_node_params_sum
)
3076 node
->remove_all_references ();
3078 /* Even when not optimizing or not inlining inline always-inline
3080 inlined
= inline_always_inline_functions (node
);
3084 || !flag_early_inlining
)
3086 else if (lookup_attribute ("flatten",
3087 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
3089 /* When the function is marked to be flattened, recursively inline
3091 if (dump_enabled_p ())
3092 dump_printf (MSG_OPTIMIZED_LOCATIONS
,
3093 "Flattening %C\n", node
);
3094 flatten_function (node
, true, true);
3099 /* If some always_inline functions was inlined, apply the changes.
3100 This way we will not account always inline into growth limits and
3101 moreover we will inline calls from always inlines that we skipped
3102 previously because of conditional in can_early_inline_edge_p
3103 which prevents some inlining to always_inline. */
3106 timevar_push (TV_INTEGRATION
);
3107 todo
|= optimize_inline_calls (current_function_decl
);
3108 /* optimize_inline_calls call above might have introduced new
3109 statements that don't have inline parameters computed. */
3110 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
3112 /* We can enounter not-yet-analyzed function during
3113 early inlining on callgraphs with strongly
3114 connected components. */
3115 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
3117 = estimate_num_insns (edge
->call_stmt
, &eni_size_weights
);
3119 = estimate_num_insns (edge
->call_stmt
, &eni_time_weights
);
3121 ipa_update_overall_fn_summary (node
);
3123 timevar_pop (TV_INTEGRATION
);
3125 /* We iterate incremental inlining to get trivial cases of indirect
3127 while (iterations
< opt_for_fn (node
->decl
,
3128 param_early_inliner_max_iterations
)
3129 && early_inline_small_functions (node
))
3131 timevar_push (TV_INTEGRATION
);
3132 todo
|= optimize_inline_calls (current_function_decl
);
3134 /* Technically we ought to recompute inline parameters so the new
3135 iteration of early inliner works as expected. We however have
3136 values approximately right and thus we only need to update edge
3137 info that might be cleared out for newly discovered edges. */
3138 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
3140 /* We have no summary for new bound store calls yet. */
3141 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
3143 = estimate_num_insns (edge
->call_stmt
, &eni_size_weights
);
3145 = estimate_num_insns (edge
->call_stmt
, &eni_time_weights
);
3147 if (iterations
< opt_for_fn (node
->decl
,
3148 param_early_inliner_max_iterations
) - 1)
3149 ipa_update_overall_fn_summary (node
);
3150 timevar_pop (TV_INTEGRATION
);
3155 fprintf (dump_file
, "Iterations: %i\n", iterations
);
3160 timevar_push (TV_INTEGRATION
);
3161 todo
|= optimize_inline_calls (current_function_decl
);
3162 timevar_pop (TV_INTEGRATION
);
3165 fun
->always_inline_functions_inlined
= true;
3170 /* Do inlining of small functions. Doing so early helps profiling and other
3171 passes to be somewhat more effective and avoids some code duplication in
3172 later real inlining pass for testcases with very many function calls. */
3176 const pass_data pass_data_early_inline
=
3178 GIMPLE_PASS
, /* type */
3179 "einline", /* name */
3180 OPTGROUP_INLINE
, /* optinfo_flags */
3181 TV_EARLY_INLINING
, /* tv_id */
3182 PROP_ssa
, /* properties_required */
3183 0, /* properties_provided */
3184 0, /* properties_destroyed */
3185 0, /* todo_flags_start */
3186 0, /* todo_flags_finish */
3189 class pass_early_inline
: public gimple_opt_pass
3192 pass_early_inline (gcc::context
*ctxt
)
3193 : gimple_opt_pass (pass_data_early_inline
, ctxt
)
3196 /* opt_pass methods: */
3197 unsigned int execute (function
*) final override
;
3199 }; // class pass_early_inline
3202 pass_early_inline::execute (function
*fun
)
3204 return early_inliner (fun
);
3210 make_pass_early_inline (gcc::context
*ctxt
)
3212 return new pass_early_inline (ctxt
);
3217 const pass_data pass_data_ipa_inline
=
3219 IPA_PASS
, /* type */
3220 "inline", /* name */
3221 OPTGROUP_INLINE
, /* optinfo_flags */
3222 TV_IPA_INLINING
, /* tv_id */
3223 0, /* properties_required */
3224 0, /* properties_provided */
3225 0, /* properties_destroyed */
3226 0, /* todo_flags_start */
3227 ( TODO_dump_symtab
), /* todo_flags_finish */
3230 class pass_ipa_inline
: public ipa_opt_pass_d
3233 pass_ipa_inline (gcc::context
*ctxt
)
3234 : ipa_opt_pass_d (pass_data_ipa_inline
, ctxt
,
3235 NULL
, /* generate_summary */
3236 NULL
, /* write_summary */
3237 NULL
, /* read_summary */
3238 NULL
, /* write_optimization_summary */
3239 NULL
, /* read_optimization_summary */
3240 NULL
, /* stmt_fixup */
3241 0, /* function_transform_todo_flags_start */
3242 inline_transform
, /* function_transform */
3243 NULL
) /* variable_transform */
3246 /* opt_pass methods: */
3247 unsigned int execute (function
*) final override
{ return ipa_inline (); }
3249 }; // class pass_ipa_inline
3254 make_pass_ipa_inline (gcc::context
*ctxt
)
3256 return new pass_ipa_inline (ctxt
);