1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
38 The inliner itself is split into two passes:
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
94 #include "coretypes.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
107 #include "tree-inline.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
123 typedef fibonacci_heap
<sreal
, cgraph_edge
> edge_heap_t
;
124 typedef fibonacci_node
<sreal
, cgraph_edge
> edge_heap_node_t
;
126 /* Statistics we collect about inlining algorithm. */
127 static int overall_size
;
128 static profile_count max_count
;
129 static profile_count spec_rem
;
131 /* Return false when inlining edge E would lead to violating
132 limits on function unit growth or stack usage growth.
134 The relative function body growth limit is present generally
135 to avoid problems with non-linear behavior of the compiler.
136 To allow inlining huge functions into tiny wrapper, the limit
137 is always based on the bigger of the two functions considered.
139 For stack growth limits we always base the growth in stack usage
140 of the callers. We want to prevent applications from segfaulting
141 on stack overflow when functions with huge stack frames gets
145 caller_growth_limits (struct cgraph_edge
*e
)
147 struct cgraph_node
*to
= e
->caller
;
148 struct cgraph_node
*what
= e
->callee
->ultimate_alias_target ();
151 HOST_WIDE_INT stack_size_limit
= 0, inlined_stack
;
152 ipa_size_summary
*outer_info
= ipa_size_summaries
->get (to
);
154 /* Look for function e->caller is inlined to. While doing
155 so work out the largest function body on the way. As
156 described above, we want to base our function growth
157 limits based on that. Not on the self size of the
158 outer function, not on the self size of inline code
159 we immediately inline to. This is the most relaxed
160 interpretation of the rule "do not grow large functions
161 too much in order to prevent compiler from exploding". */
164 ipa_size_summary
*size_info
= ipa_size_summaries
->get (to
);
165 if (limit
< size_info
->self_size
)
166 limit
= size_info
->self_size
;
167 if (stack_size_limit
< size_info
->estimated_self_stack_size
)
168 stack_size_limit
= size_info
->estimated_self_stack_size
;
170 to
= to
->callers
->caller
;
175 ipa_fn_summary
*what_info
= ipa_fn_summaries
->get (what
);
176 ipa_size_summary
*what_size_info
= ipa_size_summaries
->get (what
);
178 if (limit
< what_size_info
->self_size
)
179 limit
= what_size_info
->self_size
;
181 limit
+= limit
* opt_for_fn (to
->decl
, param_large_function_growth
) / 100;
183 /* Check the size after inlining against the function limits. But allow
184 the function to shrink if it went over the limits by forced inlining. */
185 newsize
= estimate_size_after_inlining (to
, e
);
186 if (newsize
>= ipa_size_summaries
->get (what
)->size
187 && newsize
> opt_for_fn (to
->decl
, param_large_function_insns
)
190 e
->inline_failed
= CIF_LARGE_FUNCTION_GROWTH_LIMIT
;
194 if (!what_info
->estimated_stack_size
)
197 /* FIXME: Stack size limit often prevents inlining in Fortran programs
198 due to large i/o datastructures used by the Fortran front-end.
199 We ought to ignore this limit when we know that the edge is executed
200 on every invocation of the caller (i.e. its call statement dominates
201 exit block). We do not track this information, yet. */
202 stack_size_limit
+= ((gcov_type
)stack_size_limit
203 * opt_for_fn (to
->decl
, param_stack_frame_growth
)
206 inlined_stack
= (ipa_get_stack_frame_offset (to
)
207 + outer_info
->estimated_self_stack_size
208 + what_info
->estimated_stack_size
);
209 /* Check new stack consumption with stack consumption at the place
211 if (inlined_stack
> stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
216 && inlined_stack
> ipa_fn_summaries
->get (to
)->estimated_stack_size
217 && inlined_stack
> opt_for_fn (to
->decl
, param_large_stack_frame
))
219 e
->inline_failed
= CIF_LARGE_STACK_FRAME_GROWTH_LIMIT
;
225 /* Dump info about why inlining has failed. */
228 report_inline_failed_reason (struct cgraph_edge
*e
)
230 if (dump_enabled_p ())
232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
233 " not inlinable: %C -> %C, %s\n",
234 e
->caller
, e
->callee
,
235 cgraph_inline_failed_string (e
->inline_failed
));
236 if ((e
->inline_failed
== CIF_TARGET_OPTION_MISMATCH
237 || e
->inline_failed
== CIF_OPTIMIZATION_MISMATCH
)
238 && e
->caller
->lto_file_data
239 && e
->callee
->ultimate_alias_target ()->lto_file_data
)
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
242 " LTO objects: %s, %s\n",
243 e
->caller
->lto_file_data
->file_name
,
244 e
->callee
->ultimate_alias_target ()->lto_file_data
->file_name
);
246 if (e
->inline_failed
== CIF_TARGET_OPTION_MISMATCH
)
248 cl_target_option_print_diff
249 (dump_file
, 2, target_opts_for_fn (e
->caller
->decl
),
250 target_opts_for_fn (e
->callee
->ultimate_alias_target ()->decl
));
251 if (e
->inline_failed
== CIF_OPTIMIZATION_MISMATCH
)
253 cl_optimization_print_diff
254 (dump_file
, 2, opts_for_fn (e
->caller
->decl
),
255 opts_for_fn (e
->callee
->ultimate_alias_target ()->decl
));
259 /* Decide whether sanitizer-related attributes allow inlining. */
262 sanitize_attrs_match_for_inline_p (const_tree caller
, const_tree callee
)
264 if (!caller
|| !callee
)
267 /* Follow clang and allow inlining for always_inline functions. */
268 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee
)))
271 const sanitize_code codes
[] =
276 SANITIZE_UNDEFINED_NONDEFAULT
,
277 SANITIZE_POINTER_COMPARE
,
278 SANITIZE_POINTER_SUBTRACT
281 for (unsigned i
= 0; i
< sizeof (codes
) / sizeof (codes
[0]); i
++)
282 if (sanitize_flags_p (codes
[i
], caller
)
283 != sanitize_flags_p (codes
[i
], callee
))
289 /* Used for flags where it is safe to inline when caller's value is
290 grater than callee's. */
291 #define check_maybe_up(flag) \
292 (opts_for_fn (caller->decl)->x_##flag \
293 != opts_for_fn (callee->decl)->x_##flag \
295 || opts_for_fn (caller->decl)->x_##flag \
296 < opts_for_fn (callee->decl)->x_##flag))
297 /* Used for flags where it is safe to inline when caller's value is
298 smaller than callee's. */
299 #define check_maybe_down(flag) \
300 (opts_for_fn (caller->decl)->x_##flag \
301 != opts_for_fn (callee->decl)->x_##flag \
303 || opts_for_fn (caller->decl)->x_##flag \
304 > opts_for_fn (callee->decl)->x_##flag))
305 /* Used for flags where exact match is needed for correctness. */
306 #define check_match(flag) \
307 (opts_for_fn (caller->decl)->x_##flag \
308 != opts_for_fn (callee->decl)->x_##flag)
310 /* Decide if we can inline the edge and possibly update
311 inline_failed reason.
312 We check whether inlining is possible at all and whether
313 caller growth limits allow doing so.
315 if REPORT is true, output reason to the dump file. */
318 can_inline_edge_p (struct cgraph_edge
*e
, bool report
,
321 gcc_checking_assert (e
->inline_failed
);
323 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
326 report_inline_failed_reason (e
);
330 bool inlinable
= true;
331 enum availability avail
;
332 cgraph_node
*caller
= (e
->caller
->inlined_to
333 ? e
->caller
->inlined_to
: e
->caller
);
334 cgraph_node
*callee
= e
->callee
->ultimate_alias_target (&avail
, caller
);
336 if (!callee
->definition
)
338 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
341 if (!early
&& (!opt_for_fn (callee
->decl
, optimize
)
342 || !opt_for_fn (caller
->decl
, optimize
)))
344 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
347 else if (callee
->calls_comdat_local
)
349 e
->inline_failed
= CIF_USES_COMDAT_LOCAL
;
352 else if (avail
<= AVAIL_INTERPOSABLE
)
354 e
->inline_failed
= CIF_OVERWRITABLE
;
357 /* All edges with call_stmt_cannot_inline_p should have inline_failed
358 initialized to one of FINAL_ERROR reasons. */
359 else if (e
->call_stmt_cannot_inline_p
)
361 /* Don't inline if the functions have different EH personalities. */
362 else if (DECL_FUNCTION_PERSONALITY (caller
->decl
)
363 && DECL_FUNCTION_PERSONALITY (callee
->decl
)
364 && (DECL_FUNCTION_PERSONALITY (caller
->decl
)
365 != DECL_FUNCTION_PERSONALITY (callee
->decl
)))
367 e
->inline_failed
= CIF_EH_PERSONALITY
;
370 /* TM pure functions should not be inlined into non-TM_pure
372 else if (is_tm_pure (callee
->decl
) && !is_tm_pure (caller
->decl
))
374 e
->inline_failed
= CIF_UNSPECIFIED
;
377 /* Check compatibility of target optimization options. */
378 else if (!targetm
.target_option
.can_inline_p (caller
->decl
,
381 e
->inline_failed
= CIF_TARGET_OPTION_MISMATCH
;
384 else if (ipa_fn_summaries
->get (callee
) == NULL
385 || !ipa_fn_summaries
->get (callee
)->inlinable
)
387 e
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
390 /* Don't inline a function with mismatched sanitization attributes. */
391 else if (!sanitize_attrs_match_for_inline_p (caller
->decl
, callee
->decl
))
393 e
->inline_failed
= CIF_SANITIZE_ATTRIBUTE_MISMATCH
;
396 if (!inlinable
&& report
)
397 report_inline_failed_reason (e
);
401 /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
402 scale up the bound. */
405 inline_insns_single (cgraph_node
*n
, bool hint
, bool hint2
)
409 int64_t spd
= opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
);
413 return opt_for_fn (n
->decl
, param_max_inline_insns_single
) * spd
/ 100;
416 return opt_for_fn (n
->decl
, param_max_inline_insns_single
)
417 * opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
) / 100;
418 return opt_for_fn (n
->decl
, param_max_inline_insns_single
);
421 /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
422 scale up the bound. */
425 inline_insns_auto (cgraph_node
*n
, bool hint
, bool hint2
)
427 int max_inline_insns_auto
= opt_for_fn (n
->decl
, param_max_inline_insns_auto
);
430 int64_t spd
= opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
);
434 return max_inline_insns_auto
* spd
/ 100;
437 return max_inline_insns_auto
438 * opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
) / 100;
439 return max_inline_insns_auto
;
442 /* Decide if we can inline the edge and possibly update
443 inline_failed reason.
444 We check whether inlining is possible at all and whether
445 caller growth limits allow doing so.
447 if REPORT is true, output reason to the dump file.
449 if DISREGARD_LIMITS is true, ignore size limits. */
452 can_inline_edge_by_limits_p (struct cgraph_edge
*e
, bool report
,
453 bool disregard_limits
= false, bool early
= false)
455 gcc_checking_assert (e
->inline_failed
);
457 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
460 report_inline_failed_reason (e
);
464 bool inlinable
= true;
465 enum availability avail
;
466 cgraph_node
*caller
= (e
->caller
->inlined_to
467 ? e
->caller
->inlined_to
: e
->caller
);
468 cgraph_node
*callee
= e
->callee
->ultimate_alias_target (&avail
, caller
);
469 tree caller_tree
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller
->decl
);
471 = callee
? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee
->decl
) : NULL
;
472 /* Check if caller growth allows the inlining. */
473 if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
475 && !lookup_attribute ("flatten",
476 DECL_ATTRIBUTES (caller
->decl
))
477 && !caller_growth_limits (e
))
479 else if (callee
->externally_visible
480 && !DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
481 && flag_live_patching
== LIVE_PATCHING_INLINE_ONLY_STATIC
)
483 e
->inline_failed
= CIF_EXTERN_LIVE_ONLY_STATIC
;
486 /* Don't inline a function with a higher optimization level than the
487 caller. FIXME: this is really just tip of iceberg of handling
488 optimization attribute. */
489 else if (caller_tree
!= callee_tree
)
492 (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
493 && lookup_attribute ("always_inline",
494 DECL_ATTRIBUTES (callee
->decl
)));
495 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (caller
);
496 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (callee
);
498 /* Until GCC 4.9 we did not check the semantics-altering flags
499 below and inlined across optimization boundaries.
500 Enabling checks below breaks several packages by refusing
501 to inline library always_inline functions. See PR65873.
502 Disable the check for early inlining for now until better solution
504 if (always_inline
&& early
)
506 /* There are some options that change IL semantics which means
507 we cannot inline in these cases for correctness reason.
508 Not even for always_inline declared functions. */
509 else if (check_match (flag_wrapv
)
510 || check_match (flag_trapv
)
511 || check_match (flag_pcc_struct_return
)
512 || check_maybe_down (optimize_debug
)
513 /* When caller or callee does FP math, be sure FP codegen flags
515 || ((caller_info
->fp_expressions
&& callee_info
->fp_expressions
)
516 && (check_maybe_up (flag_rounding_math
)
517 || check_maybe_up (flag_trapping_math
)
518 || check_maybe_down (flag_unsafe_math_optimizations
)
519 || check_maybe_down (flag_finite_math_only
)
520 || check_maybe_up (flag_signaling_nans
)
521 || check_maybe_down (flag_cx_limited_range
)
522 || check_maybe_up (flag_signed_zeros
)
523 || check_maybe_down (flag_associative_math
)
524 || check_maybe_down (flag_reciprocal_math
)
525 || check_maybe_down (flag_fp_int_builtin_inexact
)
526 /* Strictly speaking only when the callee contains function
527 calls that may end up setting errno. */
528 || check_maybe_up (flag_errno_math
)))
529 /* We do not want to make code compiled with exceptions to be
530 brought into a non-EH function unless we know that the callee
532 This is tracked by DECL_FUNCTION_PERSONALITY. */
533 || (check_maybe_up (flag_non_call_exceptions
)
534 && DECL_FUNCTION_PERSONALITY (callee
->decl
))
535 || (check_maybe_up (flag_exceptions
)
536 && DECL_FUNCTION_PERSONALITY (callee
->decl
))
537 /* When devirtualization is disabled for callee, it is not safe
538 to inline it as we possibly mangled the type info.
539 Allow early inlining of always inlines. */
540 || (!early
&& check_maybe_down (flag_devirtualize
)))
542 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
545 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
546 else if (always_inline
)
548 /* When user added an attribute to the callee honor it. */
549 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee
->decl
))
550 && opts_for_fn (caller
->decl
) != opts_for_fn (callee
->decl
))
552 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
555 /* If explicit optimize attribute are not used, the mismatch is caused
556 by different command line options used to build different units.
557 Do not care about COMDAT functions - those are intended to be
558 optimized with the optimization flags of module they are used in.
559 Also do not care about mixing up size/speed optimization when
560 DECL_DISREGARD_INLINE_LIMITS is set. */
561 else if ((callee
->merged_comdat
562 && !lookup_attribute ("optimize",
563 DECL_ATTRIBUTES (caller
->decl
)))
564 || DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
566 /* If mismatch is caused by merging two LTO units with different
567 optimization flags we want to be bit nicer. However never inline
568 if one of functions is not optimized at all. */
569 else if (!opt_for_fn (callee
->decl
, optimize
)
570 || !opt_for_fn (caller
->decl
, optimize
))
572 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
575 /* If callee is optimized for size and caller is not, allow inlining if
576 code shrinks or we are in param_max_inline_insns_single limit and
577 callee is inline (and thus likely an unified comdat).
578 This will allow caller to run faster. */
579 else if (opt_for_fn (callee
->decl
, optimize_size
)
580 > opt_for_fn (caller
->decl
, optimize_size
))
582 int growth
= estimate_edge_growth (e
);
583 if (growth
> opt_for_fn (caller
->decl
, param_max_inline_insns_size
)
584 && (!DECL_DECLARED_INLINE_P (callee
->decl
)
585 && growth
>= MAX (inline_insns_single (caller
, false, false),
586 inline_insns_auto (caller
, false, false))))
588 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
592 /* If callee is more aggressively optimized for performance than caller,
593 we generally want to inline only cheap (runtime wise) functions. */
594 else if (opt_for_fn (callee
->decl
, optimize_size
)
595 < opt_for_fn (caller
->decl
, optimize_size
)
596 || (opt_for_fn (callee
->decl
, optimize
)
597 > opt_for_fn (caller
->decl
, optimize
)))
599 if (estimate_edge_time (e
)
600 >= 20 + ipa_call_summaries
->get (e
)->call_stmt_time
)
602 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
609 if (!inlinable
&& report
)
610 report_inline_failed_reason (e
);
615 /* Return true if the edge E is inlinable during early inlining. */
618 can_early_inline_edge_p (struct cgraph_edge
*e
)
620 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
621 /* Early inliner might get called at WPA stage when IPA pass adds new
622 function. In this case we cannot really do any of early inlining
623 because function bodies are missing. */
624 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
626 if (!gimple_has_body_p (callee
->decl
))
628 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
631 /* In early inliner some of callees may not be in SSA form yet
632 (i.e. the callgraph is cyclic and we did not process
633 the callee by early inliner, yet). We don't have CIF code for this
634 case; later we will re-do the decision in the real inliner. */
635 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->caller
->decl
))
636 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)))
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
640 " edge not inlinable: not in SSA form\n");
643 if (!can_inline_edge_p (e
, true, true)
644 || !can_inline_edge_by_limits_p (e
, true, false, true))
650 /* Return number of calls in N. Ignore cheap builtins. */
653 num_calls (struct cgraph_node
*n
)
655 struct cgraph_edge
*e
;
658 for (e
= n
->callees
; e
; e
= e
->next_callee
)
659 if (!is_inexpensive_builtin (e
->callee
->decl
))
665 /* Return true if we are interested in inlining small function. */
668 want_early_inline_function_p (struct cgraph_edge
*e
)
670 bool want_inline
= true;
671 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
673 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
675 /* For AutoFDO, we need to make sure that before profile summary, all
676 hot paths' IR look exactly the same as profiled binary. As a result,
677 in einliner, we will disregard size limit and inline those callsites
679 * inlined in the profiled binary, and
680 * the cloned callee has enough samples to be considered "hot". */
681 else if (flag_auto_profile
&& afdo_callsite_hot_enough_for_early_inline (e
))
683 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
684 && !opt_for_fn (e
->caller
->decl
, flag_inline_small_functions
))
686 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
687 report_inline_failed_reason (e
);
692 /* First take care of very large functions. */
693 int min_growth
= estimate_min_edge_growth (e
), growth
= 0;
695 int early_inlining_insns
= param_early_inlining_insns
;
697 if (min_growth
> early_inlining_insns
)
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
701 " will not early inline: %C->%C, "
702 "call is cold and code would grow "
709 growth
= estimate_edge_growth (e
);
712 if (!want_inline
|| growth
<= param_max_inline_insns_size
)
714 else if (!e
->maybe_hot_p ())
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
718 " will not early inline: %C->%C, "
719 "call is cold and code would grow by %i\n",
724 else if (growth
> early_inlining_insns
)
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
728 " will not early inline: %C->%C, "
729 "growth %i exceeds --param early-inlining-insns\n",
730 e
->caller
, callee
, growth
);
733 else if ((n
= num_calls (callee
)) != 0
734 && growth
* (n
+ 1) > early_inlining_insns
)
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
738 " will not early inline: %C->%C, "
739 "growth %i exceeds --param early-inlining-insns "
740 "divided by number of calls\n",
741 e
->caller
, callee
, growth
);
748 /* Compute time of the edge->caller + edge->callee execution when inlining
752 compute_uninlined_call_time (struct cgraph_edge
*edge
,
753 sreal uninlined_call_time
,
756 cgraph_node
*caller
= (edge
->caller
->inlined_to
757 ? edge
->caller
->inlined_to
761 uninlined_call_time
*= freq
;
763 uninlined_call_time
= uninlined_call_time
>> 11;
765 sreal caller_time
= ipa_fn_summaries
->get (caller
)->time
;
766 return uninlined_call_time
+ caller_time
;
769 /* Same as compute_uinlined_call_time but compute time when inlining
773 compute_inlined_call_time (struct cgraph_edge
*edge
,
777 cgraph_node
*caller
= (edge
->caller
->inlined_to
778 ? edge
->caller
->inlined_to
780 sreal caller_time
= ipa_fn_summaries
->get (caller
)->time
;
787 /* This calculation should match one in ipa-inline-analysis.c
788 (estimate_edge_size_and_time). */
789 time
-= (sreal
)ipa_call_summaries
->get (edge
)->call_stmt_time
* freq
;
792 time
= ((sreal
) 1) >> 8;
793 gcc_checking_assert (time
>= 0);
797 /* Determine time saved by inlining EDGE of frequency FREQ
798 where callee's runtime w/o inlining is UNINLINED_TYPE
799 and with inlined is INLINED_TYPE. */
802 inlining_speedup (struct cgraph_edge
*edge
,
804 sreal uninlined_time
,
807 sreal speedup
= uninlined_time
- inlined_time
;
808 /* Handling of call_time should match one in ipa-inline-fnsummary.c
809 (estimate_edge_size_and_time). */
810 sreal call_time
= ipa_call_summaries
->get (edge
)->call_stmt_time
;
814 speedup
= (speedup
+ call_time
);
816 speedup
= speedup
* freq
;
819 speedup
= speedup
>> 11;
820 gcc_checking_assert (speedup
>= 0);
824 /* Return true if the speedup for inlining E is bigger than
825 param_inline_min_speedup. */
828 big_speedup_p (struct cgraph_edge
*e
)
831 sreal spec_time
= estimate_edge_time (e
, &unspec_time
);
832 sreal freq
= e
->sreal_frequency ();
833 sreal time
= compute_uninlined_call_time (e
, unspec_time
, freq
);
834 sreal inlined_time
= compute_inlined_call_time (e
, spec_time
, freq
);
835 cgraph_node
*caller
= (e
->caller
->inlined_to
836 ? e
->caller
->inlined_to
838 int limit
= opt_for_fn (caller
->decl
, param_inline_min_speedup
);
840 if ((time
- inlined_time
) * 100 > time
* limit
)
845 /* Return true if we are interested in inlining small function.
846 When REPORT is true, report reason to dump file. */
849 want_inline_small_function_p (struct cgraph_edge
*e
, bool report
)
851 bool want_inline
= true;
852 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
853 cgraph_node
*to
= (e
->caller
->inlined_to
854 ? e
->caller
->inlined_to
: e
->caller
);
856 /* Allow this function to be called before can_inline_edge_p,
857 since it's usually cheaper. */
858 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
860 else if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
862 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
863 && !opt_for_fn (e
->caller
->decl
, flag_inline_small_functions
))
865 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
868 /* Do fast and conservative check if the function can be good
870 else if ((!DECL_DECLARED_INLINE_P (callee
->decl
)
871 && (!e
->count
.ipa ().initialized_p () || !e
->maybe_hot_p ()))
872 && ipa_fn_summaries
->get (callee
)->min_size
873 - ipa_call_summaries
->get (e
)->call_stmt_size
874 > inline_insns_auto (e
->caller
, true, true))
876 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
879 else if ((DECL_DECLARED_INLINE_P (callee
->decl
)
880 || e
->count
.ipa ().nonzero_p ())
881 && ipa_fn_summaries
->get (callee
)->min_size
882 - ipa_call_summaries
->get (e
)->call_stmt_size
883 > inline_insns_single (e
->caller
, true, true))
885 e
->inline_failed
= (DECL_DECLARED_INLINE_P (callee
->decl
)
886 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
887 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT
);
892 int growth
= estimate_edge_growth (e
);
893 ipa_hints hints
= estimate_edge_hints (e
);
894 /* We have two independent groups of hints. If one matches in each
895 of groups the limits are inreased. If both groups matches, limit
896 is increased even more. */
897 bool apply_hints
= (hints
& (INLINE_HINT_indirect_call
898 | INLINE_HINT_known_hot
899 | INLINE_HINT_loop_iterations
900 | INLINE_HINT_loop_stride
));
901 bool apply_hints2
= (hints
& INLINE_HINT_builtin_constant_p
);
903 if (growth
<= opt_for_fn (to
->decl
,
904 param_max_inline_insns_size
))
906 /* Apply param_max_inline_insns_single limit. Do not do so when
907 hints suggests that inlining given function is very profitable.
908 Avoid computation of big_speedup_p when not necessary to change
909 outcome of decision. */
910 else if (DECL_DECLARED_INLINE_P (callee
->decl
)
911 && growth
>= inline_insns_single (e
->caller
, apply_hints
,
913 && (apply_hints
|| apply_hints2
914 || growth
>= inline_insns_single (e
->caller
, true,
916 || !big_speedup_p (e
)))
918 e
->inline_failed
= CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
;
921 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
922 && !opt_for_fn (e
->caller
->decl
, flag_inline_functions
)
923 && growth
>= opt_for_fn (to
->decl
,
924 param_max_inline_insns_small
))
926 /* growth_positive_p is expensive, always test it last. */
927 if (growth
>= inline_insns_single (e
->caller
, false, false)
928 || growth_positive_p (callee
, e
, growth
))
930 e
->inline_failed
= CIF_NOT_DECLARED_INLINED
;
934 /* Apply param_max_inline_insns_auto limit for functions not declared
935 inline. Bypass the limit when speedup seems big. */
936 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
937 && growth
>= inline_insns_auto (e
->caller
, apply_hints
,
939 && (apply_hints
|| apply_hints2
940 || growth
>= inline_insns_auto (e
->caller
, true,
942 || !big_speedup_p (e
)))
944 /* growth_positive_p is expensive, always test it last. */
945 if (growth
>= inline_insns_single (e
->caller
, false, false)
946 || growth_positive_p (callee
, e
, growth
))
948 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
952 /* If call is cold, do not inline when function body would grow. */
953 else if (!e
->maybe_hot_p ()
954 && (growth
>= inline_insns_single (e
->caller
, false, false)
955 || growth_positive_p (callee
, e
, growth
)))
957 e
->inline_failed
= CIF_UNLIKELY_CALL
;
961 if (!want_inline
&& report
)
962 report_inline_failed_reason (e
);
966 /* EDGE is self recursive edge.
967 We handle two cases - when function A is inlining into itself
968 or when function A is being inlined into another inliner copy of function
971 In first case OUTER_NODE points to the toplevel copy of A, while
972 in the second case OUTER_NODE points to the outermost copy of A in B.
974 In both cases we want to be extra selective since
975 inlining the call will just introduce new recursive calls to appear. */
978 want_inline_self_recursive_call_p (struct cgraph_edge
*edge
,
979 struct cgraph_node
*outer_node
,
983 char const *reason
= NULL
;
984 bool want_inline
= true;
985 sreal caller_freq
= 1;
986 int max_depth
= opt_for_fn (outer_node
->decl
,
987 param_max_inline_recursive_depth_auto
);
989 if (DECL_DECLARED_INLINE_P (edge
->caller
->decl
))
990 max_depth
= opt_for_fn (outer_node
->decl
,
991 param_max_inline_recursive_depth
);
993 if (!edge
->maybe_hot_p ())
995 reason
= "recursive call is cold";
998 else if (depth
> max_depth
)
1000 reason
= "--param max-inline-recursive-depth exceeded.";
1001 want_inline
= false;
1003 else if (outer_node
->inlined_to
1004 && (caller_freq
= outer_node
->callers
->sreal_frequency ()) == 0)
1006 reason
= "caller frequency is 0";
1007 want_inline
= false;
1012 /* Inlining of self recursive function into copy of itself within other
1013 function is transformation similar to loop peeling.
1015 Peeling is profitable if we can inline enough copies to make probability
1016 of actual call to the self recursive function very small. Be sure that
1017 the probability of recursion is small.
1019 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1020 This way the expected number of recursion is at most max_depth. */
1023 sreal max_prob
= (sreal
)1 - ((sreal
)1 / (sreal
)max_depth
);
1025 for (i
= 1; i
< depth
; i
++)
1026 max_prob
= max_prob
* max_prob
;
1027 if (edge
->sreal_frequency () >= max_prob
* caller_freq
)
1029 reason
= "frequency of recursive call is too large";
1030 want_inline
= false;
1033 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1034 recursion depth is large. We reduce function call overhead and increase
1035 chances that things fit in hardware return predictor.
1037 Recursive inlining might however increase cost of stack frame setup
1038 actually slowing down functions whose recursion tree is wide rather than
1041 Deciding reliably on when to do recursive inlining without profile feedback
1042 is tricky. For now we disable recursive inlining when probability of self
1045 Recursive inlining of self recursive call within loop also results in
1046 large loop depths that generally optimize badly. We may want to throttle
1047 down inlining in those cases. In particular this seems to happen in one
1048 of libstdc++ rb tree methods. */
1051 if (edge
->sreal_frequency () * 100
1053 * opt_for_fn (outer_node
->decl
,
1054 param_min_inline_recursive_probability
))
1056 reason
= "frequency of recursive call is too small";
1057 want_inline
= false;
1060 if (!want_inline
&& dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, edge
->call_stmt
,
1062 " not inlining recursively: %s\n", reason
);
1066 /* Return true when NODE has uninlinable caller;
1067 set HAS_HOT_CALL if it has hot call.
1068 Worker for cgraph_for_node_and_aliases. */
1071 check_callers (struct cgraph_node
*node
, void *has_hot_call
)
1073 struct cgraph_edge
*e
;
1074 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1076 if (!opt_for_fn (e
->caller
->decl
, flag_inline_functions_called_once
)
1077 || !opt_for_fn (e
->caller
->decl
, optimize
))
1079 if (!can_inline_edge_p (e
, true))
1081 if (e
->recursive_p ())
1083 if (!can_inline_edge_by_limits_p (e
, true))
1085 if (!(*(bool *)has_hot_call
) && e
->maybe_hot_p ())
1086 *(bool *)has_hot_call
= true;
1091 /* If NODE has a caller, return true. */
1094 has_caller_p (struct cgraph_node
*node
, void *data ATTRIBUTE_UNUSED
)
1101 /* Decide if inlining NODE would reduce unit size by eliminating
1102 the offline copy of function.
1103 When COLD is true the cold calls are considered, too. */
1106 want_inline_function_to_all_callers_p (struct cgraph_node
*node
, bool cold
)
1108 bool has_hot_call
= false;
1110 /* Aliases gets inlined along with the function they alias. */
1113 /* Already inlined? */
1114 if (node
->inlined_to
)
1116 /* Does it have callers? */
1117 if (!node
->call_for_symbol_and_aliases (has_caller_p
, NULL
, true))
1119 /* Inlining into all callers would increase size? */
1120 if (growth_positive_p (node
, NULL
, INT_MIN
) > 0)
1122 /* All inlines must be possible. */
1123 if (node
->call_for_symbol_and_aliases (check_callers
, &has_hot_call
,
1126 if (!cold
&& !has_hot_call
)
1131 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1132 in estimate_edge_badness. */
1135 wrapper_heuristics_may_apply (struct cgraph_node
*where
, int size
)
1137 return size
< (DECL_DECLARED_INLINE_P (where
->decl
)
1138 ? inline_insns_single (where
, false, false)
1139 : inline_insns_auto (where
, false, false));
1142 /* A cost model driving the inlining heuristics in a way so the edges with
1143 smallest badness are inlined first. After each inlining is performed
1144 the costs of all caller edges of nodes affected are recomputed so the
1145 metrics may accurately depend on values such as number of inlinable callers
1146 of the function or function body size. */
1149 edge_badness (struct cgraph_edge
*edge
, bool dump
)
1153 sreal edge_time
, unspec_edge_time
;
1154 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
1155 class ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (callee
);
1157 cgraph_node
*caller
= (edge
->caller
->inlined_to
1158 ? edge
->caller
->inlined_to
1161 growth
= estimate_edge_growth (edge
);
1162 edge_time
= estimate_edge_time (edge
, &unspec_edge_time
);
1163 hints
= estimate_edge_hints (edge
);
1164 gcc_checking_assert (edge_time
>= 0);
1165 /* Check that inlined time is better, but tolerate some roundoff issues.
1166 FIXME: When callee profile drops to 0 we account calls more. This
1167 should be fixed by never doing that. */
1168 gcc_checking_assert ((edge_time
* 100
1169 - callee_info
->time
* 101).to_int () <= 0
1170 || callee
->count
.ipa ().initialized_p ());
1171 gcc_checking_assert (growth
<= ipa_size_summaries
->get (callee
)->size
);
1175 fprintf (dump_file
, " Badness calculation for %s -> %s\n",
1176 edge
->caller
->dump_name (),
1177 edge
->callee
->dump_name ());
1178 fprintf (dump_file
, " size growth %i, time %f unspec %f ",
1180 edge_time
.to_double (),
1181 unspec_edge_time
.to_double ());
1182 ipa_dump_hints (dump_file
, hints
);
1183 if (big_speedup_p (edge
))
1184 fprintf (dump_file
, " big_speedup");
1185 fprintf (dump_file
, "\n");
1188 /* Always prefer inlining saving code size. */
1191 badness
= (sreal
) (-SREAL_MIN_SIG
+ growth
) << (SREAL_MAX_EXP
/ 256);
1193 fprintf (dump_file
, " %f: Growth %d <= 0\n", badness
.to_double (),
1196 /* Inlining into EXTERNAL functions is not going to change anything unless
1197 they are themselves inlined. */
1198 else if (DECL_EXTERNAL (caller
->decl
))
1201 fprintf (dump_file
, " max: function is external\n");
1202 return sreal::max ();
1204 /* When profile is available. Compute badness as:
1206 time_saved * caller_count
1207 goodness = -------------------------------------------------
1208 growth_of_caller * overall_growth * combined_size
1210 badness = - goodness
1212 Again use negative value to make calls with profile appear hotter
1215 else if (opt_for_fn (caller
->decl
, flag_guess_branch_prob
)
1216 || caller
->count
.ipa ().nonzero_p ())
1218 sreal numerator
, denominator
;
1220 sreal freq
= edge
->sreal_frequency ();
1222 numerator
= inlining_speedup (edge
, freq
, unspec_edge_time
, edge_time
);
1224 numerator
= ((sreal
) 1 >> 8);
1225 if (caller
->count
.ipa ().nonzero_p ())
1226 numerator
*= caller
->count
.ipa ().to_gcov_type ();
1227 else if (caller
->count
.ipa ().initialized_p ())
1228 numerator
= numerator
>> 11;
1229 denominator
= growth
;
1231 overall_growth
= callee_info
->growth
;
1233 /* Look for inliner wrappers of the form:
1239 noninline_callee ();
1241 Without penalizing this case, we usually inline noninline_callee
1242 into the inline_caller because overall_growth is small preventing
1243 further inlining of inline_caller.
1245 Penalize only callgraph edges to functions with small overall
1248 if (growth
> overall_growth
1249 /* ... and having only one caller which is not inlined ... */
1250 && callee_info
->single_caller
1251 && !edge
->caller
->inlined_to
1252 /* ... and edges executed only conditionally ... */
1254 /* ... consider case where callee is not inline but caller is ... */
1255 && ((!DECL_DECLARED_INLINE_P (edge
->callee
->decl
)
1256 && DECL_DECLARED_INLINE_P (caller
->decl
))
1257 /* ... or when early optimizers decided to split and edge
1258 frequency still indicates splitting is a win ... */
1259 || (callee
->split_part
&& !caller
->split_part
1261 < opt_for_fn (caller
->decl
,
1262 param_partial_inlining_entry_probability
)
1263 /* ... and do not overwrite user specified hints. */
1264 && (!DECL_DECLARED_INLINE_P (edge
->callee
->decl
)
1265 || DECL_DECLARED_INLINE_P (caller
->decl
)))))
1267 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (caller
);
1268 int caller_growth
= caller_info
->growth
;
1270 /* Only apply the penalty when caller looks like inline candidate,
1271 and it is not called once. */
1272 if (!caller_info
->single_caller
&& overall_growth
< caller_growth
1273 && caller_info
->inlinable
1274 && wrapper_heuristics_may_apply
1275 (caller
, ipa_size_summaries
->get (caller
)->size
))
1279 " Wrapper penalty. Increasing growth %i to %i\n",
1280 overall_growth
, caller_growth
);
1281 overall_growth
= caller_growth
;
1284 if (overall_growth
> 0)
1286 /* Strongly prefer functions with few callers that can be inlined
1287 fully. The square root here leads to smaller binaries at average.
1288 Watch however for extreme cases and return to linear function
1289 when growth is large. */
1290 if (overall_growth
< 256)
1291 overall_growth
*= overall_growth
;
1293 overall_growth
+= 256 * 256 - 256;
1294 denominator
*= overall_growth
;
1296 denominator
*= ipa_size_summaries
->get (caller
)->size
+ growth
;
1298 badness
= - numerator
/ denominator
;
1303 " %f: guessed profile. frequency %f, count %" PRId64
1304 " caller count %" PRId64
1306 " overall growth %i (current) %i (original)"
1307 " %i (compensated)\n",
1308 badness
.to_double (),
1310 edge
->count
.ipa ().initialized_p () ? edge
->count
.ipa ().to_gcov_type () : -1,
1311 caller
->count
.ipa ().initialized_p () ? caller
->count
.ipa ().to_gcov_type () : -1,
1312 inlining_speedup (edge
, freq
, unspec_edge_time
, edge_time
).to_double (),
1313 estimate_growth (callee
),
1314 callee_info
->growth
, overall_growth
);
1317 /* When function local profile is not available or it does not give
1318 useful information (i.e. frequency is zero), base the cost on
1319 loop nest and overall size growth, so we optimize for overall number
1320 of functions fully inlined in program. */
1323 int nest
= MIN (ipa_call_summaries
->get (edge
)->loop_depth
, 8);
1326 /* Decrease badness if call is nested. */
1328 badness
= badness
>> nest
;
1330 badness
= badness
<< nest
;
1332 fprintf (dump_file
, " %f: no profile. nest %i\n",
1333 badness
.to_double (), nest
);
1335 gcc_checking_assert (badness
!= 0);
1337 if (edge
->recursive_p ())
1338 badness
= badness
.shift (badness
> 0 ? 4 : -4);
1339 if ((hints
& (INLINE_HINT_indirect_call
1340 | INLINE_HINT_loop_iterations
1341 | INLINE_HINT_loop_stride
))
1342 || callee_info
->growth
<= 0)
1343 badness
= badness
.shift (badness
> 0 ? -2 : 2);
1344 if (hints
& INLINE_HINT_builtin_constant_p
)
1345 badness
= badness
.shift (badness
> 0 ? -4 : 4);
1346 if (hints
& (INLINE_HINT_same_scc
))
1347 badness
= badness
.shift (badness
> 0 ? 3 : -3);
1348 else if (hints
& (INLINE_HINT_in_scc
))
1349 badness
= badness
.shift (badness
> 0 ? 2 : -2);
1350 else if (hints
& (INLINE_HINT_cross_module
))
1351 badness
= badness
.shift (badness
> 0 ? 1 : -1);
1352 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
1353 badness
= badness
.shift (badness
> 0 ? -4 : 4);
1354 else if ((hints
& INLINE_HINT_declared_inline
))
1355 badness
= badness
.shift (badness
> 0 ? -3 : 3);
1357 fprintf (dump_file
, " Adjusted by hints %f\n", badness
.to_double ());
1361 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1363 update_edge_key (edge_heap_t
*heap
, struct cgraph_edge
*edge
)
1365 sreal badness
= edge_badness (edge
, false);
1368 edge_heap_node_t
*n
= (edge_heap_node_t
*) edge
->aux
;
1369 gcc_checking_assert (n
->get_data () == edge
);
1371 /* fibonacci_heap::replace_key does busy updating of the
1372 heap that is unnecessarily expensive.
1373 We do lazy increases: after extracting minimum if the key
1374 turns out to be out of date, it is re-inserted into heap
1375 with correct value. */
1376 if (badness
< n
->get_key ())
1378 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1381 " decreasing badness %s -> %s, %f to %f\n",
1382 edge
->caller
->dump_name (),
1383 edge
->callee
->dump_name (),
1384 n
->get_key ().to_double (),
1385 badness
.to_double ());
1387 heap
->decrease_key (n
, badness
);
1392 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1395 " enqueuing call %s -> %s, badness %f\n",
1396 edge
->caller
->dump_name (),
1397 edge
->callee
->dump_name (),
1398 badness
.to_double ());
1400 edge
->aux
= heap
->insert (badness
, edge
);
1405 /* NODE was inlined.
1406 All caller edges needs to be reset because
1407 size estimates change. Similarly callees needs reset
1408 because better context may be known. */
1411 reset_edge_caches (struct cgraph_node
*node
)
1413 struct cgraph_edge
*edge
;
1414 struct cgraph_edge
*e
= node
->callees
;
1415 struct cgraph_node
*where
= node
;
1416 struct ipa_ref
*ref
;
1418 if (where
->inlined_to
)
1419 where
= where
->inlined_to
;
1421 reset_node_cache (where
);
1423 if (edge_growth_cache
!= NULL
)
1424 for (edge
= where
->callers
; edge
; edge
= edge
->next_caller
)
1425 if (edge
->inline_failed
)
1426 edge_growth_cache
->remove (edge
);
1428 FOR_EACH_ALIAS (where
, ref
)
1429 reset_edge_caches (dyn_cast
<cgraph_node
*> (ref
->referring
));
1435 if (!e
->inline_failed
&& e
->callee
->callees
)
1436 e
= e
->callee
->callees
;
1439 if (edge_growth_cache
!= NULL
&& e
->inline_failed
)
1440 edge_growth_cache
->remove (e
);
1447 if (e
->caller
== node
)
1449 e
= e
->caller
->callers
;
1451 while (!e
->next_callee
);
1457 /* Recompute HEAP nodes for each of caller of NODE.
1458 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1459 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1460 it is inlinable. Otherwise check all edges. */
1463 update_caller_keys (edge_heap_t
*heap
, struct cgraph_node
*node
,
1464 bitmap updated_nodes
,
1465 struct cgraph_edge
*check_inlinablity_for
)
1467 struct cgraph_edge
*edge
;
1468 struct ipa_ref
*ref
;
1470 if ((!node
->alias
&& !ipa_fn_summaries
->get (node
)->inlinable
)
1471 || node
->inlined_to
)
1473 if (!bitmap_set_bit (updated_nodes
, node
->get_uid ()))
1476 FOR_EACH_ALIAS (node
, ref
)
1478 struct cgraph_node
*alias
= dyn_cast
<cgraph_node
*> (ref
->referring
);
1479 update_caller_keys (heap
, alias
, updated_nodes
, check_inlinablity_for
);
1482 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
1483 if (edge
->inline_failed
)
1485 if (!check_inlinablity_for
1486 || check_inlinablity_for
== edge
)
1488 if (can_inline_edge_p (edge
, false)
1489 && want_inline_small_function_p (edge
, false)
1490 && can_inline_edge_by_limits_p (edge
, false))
1491 update_edge_key (heap
, edge
);
1494 report_inline_failed_reason (edge
);
1495 heap
->delete_node ((edge_heap_node_t
*) edge
->aux
);
1500 update_edge_key (heap
, edge
);
1504 /* Recompute HEAP nodes for each uninlined call in NODE
1505 If UPDATE_SINCE is non-NULL check if edges called within that function
1506 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1507 where all edges have new context).
1509 This is used when we know that edge badnesses are going only to increase
1510 (we introduced new call site) and thus all we need is to insert newly
1511 created edges into heap. */
1514 update_callee_keys (edge_heap_t
*heap
, struct cgraph_node
*node
,
1515 struct cgraph_node
*update_since
,
1516 bitmap updated_nodes
)
1518 struct cgraph_edge
*e
= node
->callees
;
1519 bool check_inlinability
= update_since
== node
;
1524 if (!e
->inline_failed
&& e
->callee
->callees
)
1526 if (e
->callee
== update_since
)
1527 check_inlinability
= true;
1528 e
= e
->callee
->callees
;
1532 enum availability avail
;
1533 struct cgraph_node
*callee
;
1534 if (!check_inlinability
)
1537 && !bitmap_bit_p (updated_nodes
,
1538 e
->callee
->ultimate_alias_target
1539 (&avail
, e
->caller
)->get_uid ()))
1540 update_edge_key (heap
, e
);
1542 /* We do not reset callee growth cache here. Since we added a new call,
1543 growth should have just increased and consequently badness metric
1544 don't need updating. */
1545 else if (e
->inline_failed
1546 && (callee
= e
->callee
->ultimate_alias_target (&avail
,
1548 && avail
>= AVAIL_AVAILABLE
1549 && ipa_fn_summaries
->get (callee
) != NULL
1550 && ipa_fn_summaries
->get (callee
)->inlinable
1551 && !bitmap_bit_p (updated_nodes
, callee
->get_uid ()))
1553 if (can_inline_edge_p (e
, false)
1554 && want_inline_small_function_p (e
, false)
1555 && can_inline_edge_by_limits_p (e
, false))
1557 gcc_checking_assert (check_inlinability
|| can_inline_edge_p (e
, false));
1558 gcc_checking_assert (check_inlinability
|| e
->aux
);
1559 update_edge_key (heap
, e
);
1563 report_inline_failed_reason (e
);
1564 heap
->delete_node ((edge_heap_node_t
*) e
->aux
);
1568 /* In case we redirected to unreachable node we only need to remove the
1572 heap
->delete_node ((edge_heap_node_t
*) e
->aux
);
1581 if (e
->caller
== node
)
1583 if (e
->caller
== update_since
)
1584 check_inlinability
= false;
1585 e
= e
->caller
->callers
;
1587 while (!e
->next_callee
);
1593 /* Enqueue all recursive calls from NODE into priority queue depending on
1594 how likely we want to recursively inline the call. */
1597 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
1600 struct cgraph_edge
*e
;
1601 enum availability avail
;
1603 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1604 if (e
->callee
== node
1605 || (e
->callee
->ultimate_alias_target (&avail
, e
->caller
) == node
1606 && avail
> AVAIL_INTERPOSABLE
))
1607 heap
->insert (-e
->sreal_frequency (), e
);
1608 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1609 if (!e
->inline_failed
)
1610 lookup_recursive_calls (node
, e
->callee
, heap
);
1613 /* Decide on recursive inlining: in the case function has recursive calls,
1614 inline until body size reaches given argument. If any new indirect edges
1615 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1619 recursive_inlining (struct cgraph_edge
*edge
,
1620 vec
<cgraph_edge
*> *new_edges
)
1622 cgraph_node
*to
= (edge
->caller
->inlined_to
1623 ? edge
->caller
->inlined_to
: edge
->caller
);
1624 int limit
= opt_for_fn (to
->decl
,
1625 param_max_inline_insns_recursive_auto
);
1626 edge_heap_t
heap (sreal::min ());
1627 struct cgraph_node
*node
;
1628 struct cgraph_edge
*e
;
1629 struct cgraph_node
*master_clone
= NULL
, *next
;
1633 node
= edge
->caller
;
1634 if (node
->inlined_to
)
1635 node
= node
->inlined_to
;
1637 if (DECL_DECLARED_INLINE_P (node
->decl
))
1638 limit
= opt_for_fn (to
->decl
, param_max_inline_insns_recursive
);
1640 /* Make sure that function is small enough to be considered for inlining. */
1641 if (estimate_size_after_inlining (node
, edge
) >= limit
)
1643 lookup_recursive_calls (node
, node
, &heap
);
1649 " Performing recursive inlining on %s\n", node
->dump_name ());
1651 /* Do the inlining and update list of recursive call during process. */
1652 while (!heap
.empty ())
1654 struct cgraph_edge
*curr
= heap
.extract_min ();
1655 struct cgraph_node
*cnode
, *dest
= curr
->callee
;
1657 if (!can_inline_edge_p (curr
, true)
1658 || !can_inline_edge_by_limits_p (curr
, true))
1661 /* MASTER_CLONE is produced in the case we already started modified
1662 the function. Be sure to redirect edge to the original body before
1663 estimating growths otherwise we will be seeing growths after inlining
1664 the already modified body. */
1667 curr
->redirect_callee (master_clone
);
1668 if (edge_growth_cache
!= NULL
)
1669 edge_growth_cache
->remove (curr
);
1672 if (estimate_size_after_inlining (node
, curr
) > limit
)
1674 curr
->redirect_callee (dest
);
1675 if (edge_growth_cache
!= NULL
)
1676 edge_growth_cache
->remove (curr
);
1681 for (cnode
= curr
->caller
;
1682 cnode
->inlined_to
; cnode
= cnode
->callers
->caller
)
1684 == curr
->callee
->ultimate_alias_target ()->decl
)
1687 if (!want_inline_self_recursive_call_p (curr
, node
, false, depth
))
1689 curr
->redirect_callee (dest
);
1690 if (edge_growth_cache
!= NULL
)
1691 edge_growth_cache
->remove (curr
);
1698 " Inlining call of depth %i", depth
);
1699 if (node
->count
.nonzero_p () && curr
->count
.initialized_p ())
1701 fprintf (dump_file
, " called approx. %.2f times per call",
1702 (double)curr
->count
.to_gcov_type ()
1703 / node
->count
.to_gcov_type ());
1705 fprintf (dump_file
, "\n");
1709 /* We need original clone to copy around. */
1710 master_clone
= node
->create_clone (node
->decl
, node
->count
,
1711 false, vNULL
, true, NULL
, NULL
);
1712 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
1713 if (!e
->inline_failed
)
1714 clone_inlined_nodes (e
, true, false, NULL
);
1715 curr
->redirect_callee (master_clone
);
1716 if (edge_growth_cache
!= NULL
)
1717 edge_growth_cache
->remove (curr
);
1720 inline_call (curr
, false, new_edges
, &overall_size
, true);
1721 reset_node_cache (node
);
1722 lookup_recursive_calls (node
, curr
->callee
, &heap
);
1726 if (!heap
.empty () && dump_file
)
1727 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_NOTE
, edge
->call_stmt
,
1734 "\n Inlined %i times, "
1735 "body grown from size %i to %i, time %f to %f\n", n
,
1736 ipa_size_summaries
->get (master_clone
)->size
,
1737 ipa_size_summaries
->get (node
)->size
,
1738 ipa_fn_summaries
->get (master_clone
)->time
.to_double (),
1739 ipa_fn_summaries
->get (node
)->time
.to_double ());
1741 /* Remove master clone we used for inlining. We rely that clones inlined
1742 into master clone gets queued just before master clone so we don't
1744 for (node
= symtab
->first_function (); node
!= master_clone
;
1747 next
= symtab
->next_function (node
);
1748 if (node
->inlined_to
== master_clone
)
1751 master_clone
->remove ();
1756 /* Given whole compilation unit estimate of INSNS, compute how large we can
1757 allow the unit to grow. */
1760 compute_max_insns (cgraph_node
*node
, int insns
)
1762 int max_insns
= insns
;
1763 if (max_insns
< opt_for_fn (node
->decl
, param_large_unit_insns
))
1764 max_insns
= opt_for_fn (node
->decl
, param_large_unit_insns
);
1766 return ((int64_t) max_insns
1767 * (100 + opt_for_fn (node
->decl
, param_inline_unit_growth
)) / 100);
1771 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1774 add_new_edges_to_heap (edge_heap_t
*heap
, vec
<cgraph_edge
*> new_edges
)
1776 while (new_edges
.length () > 0)
1778 struct cgraph_edge
*edge
= new_edges
.pop ();
1780 gcc_assert (!edge
->aux
);
1781 gcc_assert (edge
->callee
);
1782 if (edge
->inline_failed
1783 && can_inline_edge_p (edge
, true)
1784 && want_inline_small_function_p (edge
, true)
1785 && can_inline_edge_by_limits_p (edge
, true))
1786 edge
->aux
= heap
->insert (edge_badness (edge
, false), edge
);
1790 /* Remove EDGE from the fibheap. */
1793 heap_edge_removal_hook (struct cgraph_edge
*e
, void *data
)
1797 ((edge_heap_t
*)data
)->delete_node ((edge_heap_node_t
*)e
->aux
);
1802 /* Return true if speculation of edge E seems useful.
1803 If ANTICIPATE_INLINING is true, be conservative and hope that E
1807 speculation_useful_p (struct cgraph_edge
*e
, bool anticipate_inlining
)
1809 /* If we have already decided to inline the edge, it seems useful. */
1810 if (!e
->inline_failed
)
1813 enum availability avail
;
1814 struct cgraph_node
*target
= e
->callee
->ultimate_alias_target (&avail
,
1817 gcc_assert (e
->speculative
&& !e
->indirect_unknown_callee
);
1819 if (!e
->maybe_hot_p ())
1822 /* See if IP optimizations found something potentially useful about the
1823 function. For now we look only for CONST/PURE flags. Almost everything
1824 else we propagate is useless. */
1825 if (avail
>= AVAIL_AVAILABLE
)
1827 int ecf_flags
= flags_from_decl_or_type (target
->decl
);
1828 if (ecf_flags
& ECF_CONST
)
1830 if (!(e
->speculative_call_indirect_edge ()->indirect_info
1831 ->ecf_flags
& ECF_CONST
))
1834 else if (ecf_flags
& ECF_PURE
)
1836 if (!(e
->speculative_call_indirect_edge ()->indirect_info
1837 ->ecf_flags
& ECF_PURE
))
1841 /* If we did not managed to inline the function nor redirect
1842 to an ipa-cp clone (that are seen by having local flag set),
1843 it is probably pointless to inline it unless hardware is missing
1844 indirect call predictor. */
1845 if (!anticipate_inlining
&& !target
->local
)
1847 /* For overwritable targets there is not much to do. */
1848 if (!can_inline_edge_p (e
, false)
1849 || !can_inline_edge_by_limits_p (e
, false, true))
1851 /* OK, speculation seems interesting. */
1855 /* We know that EDGE is not going to be inlined.
1856 See if we can remove speculation. */
1859 resolve_noninline_speculation (edge_heap_t
*edge_heap
, struct cgraph_edge
*edge
)
1861 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
1863 struct cgraph_node
*node
= edge
->caller
;
1864 struct cgraph_node
*where
= node
->inlined_to
1865 ? node
->inlined_to
: node
;
1866 auto_bitmap updated_nodes
;
1868 if (edge
->count
.ipa ().initialized_p ())
1869 spec_rem
+= edge
->count
.ipa ();
1870 cgraph_edge::resolve_speculation (edge
);
1871 reset_edge_caches (where
);
1872 ipa_update_overall_fn_summary (where
);
1873 update_caller_keys (edge_heap
, where
,
1874 updated_nodes
, NULL
);
1875 update_callee_keys (edge_heap
, where
, NULL
,
1880 /* Return true if NODE should be accounted for overall size estimate.
1881 Skip all nodes optimized for size so we can measure the growth of hot
1882 part of program no matter of the padding. */
1885 inline_account_function_p (struct cgraph_node
*node
)
1887 return (!DECL_EXTERNAL (node
->decl
)
1888 && !opt_for_fn (node
->decl
, optimize_size
)
1889 && node
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
);
1892 /* Count number of callers of NODE and store it into DATA (that
1893 points to int. Worker for cgraph_for_node_and_aliases. */
1896 sum_callers (struct cgraph_node
*node
, void *data
)
1898 struct cgraph_edge
*e
;
1899 int *num_calls
= (int *)data
;
1901 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1906 /* We only propagate across edges with non-interposable callee. */
1909 ignore_edge_p (struct cgraph_edge
*e
)
1911 enum availability avail
;
1912 e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
1913 return (avail
<= AVAIL_INTERPOSABLE
);
1916 /* We use greedy algorithm for inlining of small functions:
1917 All inline candidates are put into prioritized heap ordered in
1920 The inlining of small functions is bounded by unit growth parameters. */
1923 inline_small_functions (void)
1925 struct cgraph_node
*node
;
1926 struct cgraph_edge
*edge
;
1927 edge_heap_t
edge_heap (sreal::min ());
1928 auto_bitmap updated_nodes
;
1930 auto_vec
<cgraph_edge
*> new_indirect_edges
;
1931 int initial_size
= 0;
1932 struct cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
1933 struct cgraph_edge_hook_list
*edge_removal_hook_holder
;
1934 new_indirect_edges
.create (8);
1936 edge_removal_hook_holder
1937 = symtab
->add_edge_removal_hook (&heap_edge_removal_hook
, &edge_heap
);
1939 /* Compute overall unit size and other global parameters used by badness
1942 max_count
= profile_count::uninitialized ();
1943 ipa_reduced_postorder (order
, true, ignore_edge_p
);
1946 FOR_EACH_DEFINED_FUNCTION (node
)
1947 if (!node
->inlined_to
)
1949 if (!node
->alias
&& node
->analyzed
1950 && (node
->has_gimple_body_p () || node
->thunk
)
1951 && opt_for_fn (node
->decl
, optimize
))
1953 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
1954 struct ipa_dfs_info
*dfs
= (struct ipa_dfs_info
*) node
->aux
;
1956 /* Do not account external functions, they will be optimized out
1957 if not inlined. Also only count the non-cold portion of program. */
1958 if (inline_account_function_p (node
))
1959 initial_size
+= ipa_size_summaries
->get (node
)->size
;
1960 info
->growth
= estimate_growth (node
);
1963 node
->call_for_symbol_and_aliases (sum_callers
, &num_calls
,
1966 info
->single_caller
= true;
1967 if (dfs
&& dfs
->next_cycle
)
1969 struct cgraph_node
*n2
;
1970 int id
= dfs
->scc_no
+ 1;
1972 n2
= ((struct ipa_dfs_info
*) n2
->aux
)->next_cycle
)
1973 if (opt_for_fn (n2
->decl
, optimize
))
1975 ipa_fn_summary
*info2
= ipa_fn_summaries
->get
1976 (n2
->inlined_to
? n2
->inlined_to
: n2
);
1984 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
1985 max_count
= max_count
.max (edge
->count
.ipa ());
1987 ipa_free_postorder_info ();
1988 initialize_growth_caches ();
1992 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1995 overall_size
= initial_size
;
1996 min_size
= overall_size
;
1998 /* Populate the heap with all edges we might inline. */
2000 FOR_EACH_DEFINED_FUNCTION (node
)
2002 bool update
= false;
2003 struct cgraph_edge
*next
= NULL
;
2004 bool has_speculative
= false;
2006 if (!opt_for_fn (node
->decl
, optimize
))
2010 fprintf (dump_file
, "Enqueueing calls in %s.\n", node
->dump_name ());
2012 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
2014 if (edge
->inline_failed
2016 && can_inline_edge_p (edge
, true)
2017 && want_inline_small_function_p (edge
, true)
2018 && can_inline_edge_by_limits_p (edge
, true)
2019 && edge
->inline_failed
)
2021 gcc_assert (!edge
->aux
);
2022 update_edge_key (&edge_heap
, edge
);
2024 if (edge
->speculative
)
2025 has_speculative
= true;
2027 if (has_speculative
)
2028 for (edge
= node
->callees
; edge
; edge
= next
)
2030 next
= edge
->next_callee
;
2031 if (edge
->speculative
2032 && !speculation_useful_p (edge
, edge
->aux
!= NULL
))
2034 cgraph_edge::resolve_speculation (edge
);
2040 struct cgraph_node
*where
= node
->inlined_to
2041 ? node
->inlined_to
: node
;
2042 ipa_update_overall_fn_summary (where
);
2043 reset_edge_caches (where
);
2044 update_caller_keys (&edge_heap
, where
,
2045 updated_nodes
, NULL
);
2046 update_callee_keys (&edge_heap
, where
, NULL
,
2048 bitmap_clear (updated_nodes
);
2052 gcc_assert (in_lto_p
2054 || (profile_info
&& flag_branch_probabilities
));
2056 while (!edge_heap
.empty ())
2058 int old_size
= overall_size
;
2059 struct cgraph_node
*where
, *callee
;
2060 sreal badness
= edge_heap
.min_key ();
2061 sreal current_badness
;
2064 edge
= edge_heap
.extract_min ();
2065 gcc_assert (edge
->aux
);
2067 if (!edge
->inline_failed
|| !edge
->callee
->analyzed
)
2070 /* Be sure that caches are maintained consistent.
2071 This check is affected by scaling roundoff errors when compiling for
2072 IPA this we skip it in that case. */
2073 if (flag_checking
&& !edge
->callee
->count
.ipa_p ()
2074 && (!max_count
.initialized_p () || !max_count
.nonzero_p ()))
2076 sreal cached_badness
= edge_badness (edge
, false);
2078 int old_size_est
= estimate_edge_size (edge
);
2079 sreal old_time_est
= estimate_edge_time (edge
);
2080 int old_hints_est
= estimate_edge_hints (edge
);
2082 if (edge_growth_cache
!= NULL
)
2083 edge_growth_cache
->remove (edge
);
2084 reset_node_cache (edge
->caller
->inlined_to
2085 ? edge
->caller
->inlined_to
2087 gcc_assert (old_size_est
== estimate_edge_size (edge
));
2088 gcc_assert (old_time_est
== estimate_edge_time (edge
));
2091 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2093 fails with profile feedback because some hints depends on
2094 maybe_hot_edge_p predicate and because callee gets inlined to other
2095 calls, the edge may become cold.
2096 This ought to be fixed by computing relative probabilities
2097 for given invocation but that will be better done once whole
2098 code is converted to sreals. Disable for now and revert to "wrong"
2099 value so enable/disable checking paths agree. */
2100 edge_growth_cache
->get (edge
)->hints
= old_hints_est
+ 1;
2102 /* When updating the edge costs, we only decrease badness in the keys.
2103 Increases of badness are handled lazily; when we see key with out
2104 of date value on it, we re-insert it now. */
2105 current_badness
= edge_badness (edge
, false);
2106 gcc_assert (cached_badness
== current_badness
);
2107 gcc_assert (current_badness
>= badness
);
2110 current_badness
= edge_badness (edge
, false);
2111 if (current_badness
!= badness
)
2113 if (edge_heap
.min () && current_badness
> edge_heap
.min_key ())
2115 edge
->aux
= edge_heap
.insert (current_badness
, edge
);
2119 badness
= current_badness
;
2122 if (!can_inline_edge_p (edge
, true)
2123 || !can_inline_edge_by_limits_p (edge
, true))
2125 resolve_noninline_speculation (&edge_heap
, edge
);
2129 callee
= edge
->callee
->ultimate_alias_target ();
2130 growth
= estimate_edge_growth (edge
);
2134 "\nConsidering %s with %i size\n",
2135 callee
->dump_name (),
2136 ipa_size_summaries
->get (callee
)->size
);
2138 " to be inlined into %s in %s:%i\n"
2139 " Estimated badness is %f, frequency %.2f.\n",
2140 edge
->caller
->dump_name (),
2142 && (LOCATION_LOCUS (gimple_location ((const gimple
*)
2144 > BUILTINS_LOCATION
)
2145 ? gimple_filename ((const gimple
*) edge
->call_stmt
)
2148 ? gimple_lineno ((const gimple
*) edge
->call_stmt
)
2150 badness
.to_double (),
2151 edge
->sreal_frequency ().to_double ());
2152 if (edge
->count
.ipa ().initialized_p ())
2154 fprintf (dump_file
, " Called ");
2155 edge
->count
.ipa ().dump (dump_file
);
2156 fprintf (dump_file
, " times\n");
2158 if (dump_flags
& TDF_DETAILS
)
2159 edge_badness (edge
, true);
2162 where
= edge
->caller
;
2164 if (overall_size
+ growth
> compute_max_insns (where
, min_size
)
2165 && !DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
2167 edge
->inline_failed
= CIF_INLINE_UNIT_GROWTH_LIMIT
;
2168 report_inline_failed_reason (edge
);
2169 resolve_noninline_speculation (&edge_heap
, edge
);
2173 if (!want_inline_small_function_p (edge
, true))
2175 resolve_noninline_speculation (&edge_heap
, edge
);
2179 profile_count old_count
= callee
->count
;
2181 /* Heuristics for inlining small functions work poorly for
2182 recursive calls where we do effects similar to loop unrolling.
2183 When inlining such edge seems profitable, leave decision on
2184 specific inliner. */
2185 if (edge
->recursive_p ())
2187 if (where
->inlined_to
)
2188 where
= where
->inlined_to
;
2189 if (!recursive_inlining (edge
,
2190 opt_for_fn (edge
->caller
->decl
,
2191 flag_indirect_inlining
)
2192 ? &new_indirect_edges
: NULL
))
2194 edge
->inline_failed
= CIF_RECURSIVE_INLINING
;
2195 resolve_noninline_speculation (&edge_heap
, edge
);
2198 reset_edge_caches (where
);
2199 /* Recursive inliner inlines all recursive calls of the function
2200 at once. Consequently we need to update all callee keys. */
2201 if (opt_for_fn (edge
->caller
->decl
, flag_indirect_inlining
))
2202 add_new_edges_to_heap (&edge_heap
, new_indirect_edges
);
2203 update_callee_keys (&edge_heap
, where
, where
, updated_nodes
);
2204 bitmap_clear (updated_nodes
);
2208 struct cgraph_node
*outer_node
= NULL
;
2211 /* Consider the case where self recursive function A is inlined
2212 into B. This is desired optimization in some cases, since it
2213 leads to effect similar of loop peeling and we might completely
2214 optimize out the recursive call. However we must be extra
2217 where
= edge
->caller
;
2218 while (where
->inlined_to
)
2220 if (where
->decl
== callee
->decl
)
2221 outer_node
= where
, depth
++;
2222 where
= where
->callers
->caller
;
2225 && !want_inline_self_recursive_call_p (edge
, outer_node
,
2229 = (DECL_DISREGARD_INLINE_LIMITS (edge
->callee
->decl
)
2230 ? CIF_RECURSIVE_INLINING
: CIF_UNSPECIFIED
);
2231 resolve_noninline_speculation (&edge_heap
, edge
);
2234 else if (depth
&& dump_file
)
2235 fprintf (dump_file
, " Peeling recursion with depth %i\n", depth
);
2237 gcc_checking_assert (!callee
->inlined_to
);
2239 int old_size
= ipa_size_summaries
->get (where
)->size
;
2240 sreal old_time
= ipa_fn_summaries
->get (where
)->time
;
2242 inline_call (edge
, true, &new_indirect_edges
, &overall_size
, true);
2243 reset_edge_caches (edge
->callee
);
2244 add_new_edges_to_heap (&edge_heap
, new_indirect_edges
);
2246 /* If caller's size and time increased we do not need to update
2247 all edges because badness is not going to decrease. */
2248 if (old_size
<= ipa_size_summaries
->get (where
)->size
2249 && old_time
<= ipa_fn_summaries
->get (where
)->time
2250 /* Wrapper penalty may be non-monotonous in this respect.
2251 Fortunately it only affects small functions. */
2252 && !wrapper_heuristics_may_apply (where
, old_size
))
2253 update_callee_keys (&edge_heap
, edge
->callee
, edge
->callee
,
2256 update_callee_keys (&edge_heap
, where
,
2260 where
= edge
->caller
;
2261 if (where
->inlined_to
)
2262 where
= where
->inlined_to
;
2264 /* Our profitability metric can depend on local properties
2265 such as number of inlinable calls and size of the function body.
2266 After inlining these properties might change for the function we
2267 inlined into (since it's body size changed) and for the functions
2268 called by function we inlined (since number of it inlinable callers
2270 update_caller_keys (&edge_heap
, where
, updated_nodes
, NULL
);
2271 /* Offline copy count has possibly changed, recompute if profile is
2273 struct cgraph_node
*n
2274 = cgraph_node::get (edge
->callee
->decl
)->ultimate_alias_target ();
2275 if (n
!= edge
->callee
&& n
->analyzed
&& !(n
->count
== old_count
)
2276 && n
->count
.ipa_p ())
2277 update_callee_keys (&edge_heap
, n
, NULL
, updated_nodes
);
2278 bitmap_clear (updated_nodes
);
2280 if (dump_enabled_p ())
2282 ipa_fn_summary
*s
= ipa_fn_summaries
->get (where
);
2284 /* dump_printf can't handle %+i. */
2285 char buf_net_change
[100];
2286 snprintf (buf_net_change
, sizeof buf_net_change
, "%+i",
2287 overall_size
- old_size
);
2289 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, edge
->call_stmt
,
2290 " Inlined %C into %C which now has time %f and "
2291 "size %i, net change of %s%s.\n",
2292 edge
->callee
, edge
->caller
,
2293 s
->time
.to_double (),
2294 ipa_size_summaries
->get (edge
->caller
)->size
,
2296 cross_module_call_p (edge
) ? " (cross module)":"");
2298 if (min_size
> overall_size
)
2300 min_size
= overall_size
;
2303 fprintf (dump_file
, "New minimal size reached: %i\n", min_size
);
2307 free_growth_caches ();
2308 if (dump_enabled_p ())
2309 dump_printf (MSG_NOTE
,
2310 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2311 initial_size
, overall_size
,
2312 initial_size
? overall_size
* 100 / (initial_size
) - 100: 0);
2313 symtab
->remove_edge_removal_hook (edge_removal_hook_holder
);
2316 /* Flatten NODE. Performed both during early inlining and
2317 at IPA inlining time. */
2320 flatten_function (struct cgraph_node
*node
, bool early
, bool update
)
2322 struct cgraph_edge
*e
;
2324 /* We shouldn't be called recursively when we are being processed. */
2325 gcc_assert (node
->aux
== NULL
);
2327 node
->aux
= (void *) node
;
2329 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2331 struct cgraph_node
*orig_callee
;
2332 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2334 /* We've hit cycle? It is time to give up. */
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2339 "Not inlining %C into %C to avoid cycle.\n",
2341 if (cgraph_inline_failed_type (e
->inline_failed
) != CIF_FINAL_ERROR
)
2342 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
2346 /* When the edge is already inlined, we just need to recurse into
2347 it in order to fully flatten the leaves. */
2348 if (!e
->inline_failed
)
2350 flatten_function (callee
, early
, false);
2354 /* Flatten attribute needs to be processed during late inlining. For
2355 extra code quality we however do flattening during early optimization,
2358 ? !can_inline_edge_p (e
, true)
2359 && !can_inline_edge_by_limits_p (e
, true)
2360 : !can_early_inline_edge_p (e
))
2363 if (e
->recursive_p ())
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2367 "Not inlining: recursive call.\n");
2371 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
2372 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2376 "Not inlining: SSA form does not match.\n");
2380 /* Inline the edge and flatten the inline clone. Avoid
2381 recursing through the original node if the node was cloned. */
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2384 " Inlining %C into %C.\n",
2386 orig_callee
= callee
;
2387 inline_call (e
, true, NULL
, NULL
, false);
2388 if (e
->callee
!= orig_callee
)
2389 orig_callee
->aux
= (void *) node
;
2390 flatten_function (e
->callee
, early
, false);
2391 if (e
->callee
!= orig_callee
)
2392 orig_callee
->aux
= NULL
;
2396 cgraph_node
*where
= node
->inlined_to
? node
->inlined_to
: node
;
2397 if (update
&& opt_for_fn (where
->decl
, optimize
))
2398 ipa_update_overall_fn_summary (where
);
2401 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2402 DATA points to number of calls originally found so we avoid infinite
2406 inline_to_all_callers_1 (struct cgraph_node
*node
, void *data
,
2407 hash_set
<cgraph_node
*> *callers
)
2409 int *num_calls
= (int *)data
;
2410 bool callee_removed
= false;
2412 while (node
->callers
&& !node
->inlined_to
)
2414 struct cgraph_node
*caller
= node
->callers
->caller
;
2416 if (!can_inline_edge_p (node
->callers
, true)
2417 || !can_inline_edge_by_limits_p (node
->callers
, true)
2418 || node
->callers
->recursive_p ())
2421 fprintf (dump_file
, "Uninlinable call found; giving up.\n");
2428 cgraph_node
*ultimate
= node
->ultimate_alias_target ();
2430 "\nInlining %s size %i.\n",
2431 ultimate
->dump_name (),
2432 ipa_size_summaries
->get (ultimate
)->size
);
2434 " Called once from %s %i insns.\n",
2435 node
->callers
->caller
->dump_name (),
2436 ipa_size_summaries
->get (node
->callers
->caller
)->size
);
2439 /* Remember which callers we inlined to, delaying updating the
2441 callers
->add (node
->callers
->caller
);
2442 inline_call (node
->callers
, true, NULL
, NULL
, false, &callee_removed
);
2445 " Inlined into %s which now has %i size\n",
2446 caller
->dump_name (),
2447 ipa_size_summaries
->get (caller
)->size
);
2448 if (!(*num_calls
)--)
2451 fprintf (dump_file
, "New calls found; giving up.\n");
2452 return callee_removed
;
2460 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2464 inline_to_all_callers (struct cgraph_node
*node
, void *data
)
2466 hash_set
<cgraph_node
*> callers
;
2467 bool res
= inline_to_all_callers_1 (node
, data
, &callers
);
2468 /* Perform the delayed update of the overall summary of all callers
2469 processed. This avoids quadratic behavior in the cases where
2470 we have a lot of calls to the same function. */
2471 for (hash_set
<cgraph_node
*>::iterator i
= callers
.begin ();
2472 i
!= callers
.end (); ++i
)
2473 ipa_update_overall_fn_summary ((*i
)->inlined_to
? (*i
)->inlined_to
: *i
);
2477 /* Output overall time estimate. */
2479 dump_overall_stats (void)
2481 sreal sum_weighted
= 0, sum
= 0;
2482 struct cgraph_node
*node
;
2484 FOR_EACH_DEFINED_FUNCTION (node
)
2485 if (!node
->inlined_to
2488 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2492 if (node
->count
.ipa ().initialized_p ())
2493 sum_weighted
+= s
->time
* node
->count
.ipa ().to_gcov_type ();
2496 fprintf (dump_file
, "Overall time estimate: "
2497 "%f weighted by profile: "
2498 "%f\n", sum
.to_double (), sum_weighted
.to_double ());
2501 /* Output some useful stats about inlining. */
2504 dump_inline_stats (void)
2506 int64_t inlined_cnt
= 0, inlined_indir_cnt
= 0;
2507 int64_t inlined_virt_cnt
= 0, inlined_virt_indir_cnt
= 0;
2508 int64_t noninlined_cnt
= 0, noninlined_indir_cnt
= 0;
2509 int64_t noninlined_virt_cnt
= 0, noninlined_virt_indir_cnt
= 0;
2510 int64_t inlined_speculative
= 0, inlined_speculative_ply
= 0;
2511 int64_t indirect_poly_cnt
= 0, indirect_cnt
= 0;
2512 int64_t reason
[CIF_N_REASONS
][2];
2513 sreal reason_freq
[CIF_N_REASONS
];
2515 struct cgraph_node
*node
;
2517 memset (reason
, 0, sizeof (reason
));
2518 for (i
=0; i
< CIF_N_REASONS
; i
++)
2520 FOR_EACH_DEFINED_FUNCTION (node
)
2522 struct cgraph_edge
*e
;
2523 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2525 if (e
->inline_failed
)
2527 if (e
->count
.ipa ().initialized_p ())
2528 reason
[(int) e
->inline_failed
][0] += e
->count
.ipa ().to_gcov_type ();
2529 reason_freq
[(int) e
->inline_failed
] += e
->sreal_frequency ();
2530 reason
[(int) e
->inline_failed
][1] ++;
2531 if (DECL_VIRTUAL_P (e
->callee
->decl
)
2532 && e
->count
.ipa ().initialized_p ())
2534 if (e
->indirect_inlining_edge
)
2535 noninlined_virt_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2537 noninlined_virt_cnt
+= e
->count
.ipa ().to_gcov_type ();
2539 else if (e
->count
.ipa ().initialized_p ())
2541 if (e
->indirect_inlining_edge
)
2542 noninlined_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2544 noninlined_cnt
+= e
->count
.ipa ().to_gcov_type ();
2547 else if (e
->count
.ipa ().initialized_p ())
2551 if (DECL_VIRTUAL_P (e
->callee
->decl
))
2552 inlined_speculative_ply
+= e
->count
.ipa ().to_gcov_type ();
2554 inlined_speculative
+= e
->count
.ipa ().to_gcov_type ();
2556 else if (DECL_VIRTUAL_P (e
->callee
->decl
))
2558 if (e
->indirect_inlining_edge
)
2559 inlined_virt_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2561 inlined_virt_cnt
+= e
->count
.ipa ().to_gcov_type ();
2565 if (e
->indirect_inlining_edge
)
2566 inlined_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2568 inlined_cnt
+= e
->count
.ipa ().to_gcov_type ();
2572 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2573 if (e
->indirect_info
->polymorphic
2574 & e
->count
.ipa ().initialized_p ())
2575 indirect_poly_cnt
+= e
->count
.ipa ().to_gcov_type ();
2576 else if (e
->count
.ipa ().initialized_p ())
2577 indirect_cnt
+= e
->count
.ipa ().to_gcov_type ();
2579 if (max_count
.initialized_p ())
2582 "Inlined %" PRId64
" + speculative "
2583 "%" PRId64
" + speculative polymorphic "
2584 "%" PRId64
" + previously indirect "
2585 "%" PRId64
" + virtual "
2586 "%" PRId64
" + virtual and previously indirect "
2587 "%" PRId64
"\n" "Not inlined "
2588 "%" PRId64
" + previously indirect "
2589 "%" PRId64
" + virtual "
2590 "%" PRId64
" + virtual and previously indirect "
2591 "%" PRId64
" + still indirect "
2592 "%" PRId64
" + still indirect polymorphic "
2593 "%" PRId64
"\n", inlined_cnt
,
2594 inlined_speculative
, inlined_speculative_ply
,
2595 inlined_indir_cnt
, inlined_virt_cnt
, inlined_virt_indir_cnt
,
2596 noninlined_cnt
, noninlined_indir_cnt
, noninlined_virt_cnt
,
2597 noninlined_virt_indir_cnt
, indirect_cnt
, indirect_poly_cnt
);
2598 fprintf (dump_file
, "Removed speculations ");
2599 spec_rem
.dump (dump_file
);
2600 fprintf (dump_file
, "\n");
2602 dump_overall_stats ();
2603 fprintf (dump_file
, "\nWhy inlining failed?\n");
2604 for (i
= 0; i
< CIF_N_REASONS
; i
++)
2606 fprintf (dump_file
, "%-50s: %8i calls, %8f freq, %" PRId64
" count\n",
2607 cgraph_inline_failed_string ((cgraph_inline_failed_t
) i
),
2608 (int) reason
[i
][1], reason_freq
[i
].to_double (), reason
[i
][0]);
2611 /* Called when node is removed. */
2614 flatten_remove_node_hook (struct cgraph_node
*node
, void *data
)
2616 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) == NULL
)
2619 hash_set
<struct cgraph_node
*> *removed
2620 = (hash_set
<struct cgraph_node
*> *) data
;
2621 removed
->add (node
);
2624 /* Decide on the inlining. We do so in the topological order to avoid
2625 expenses on updating data structures. */
2630 struct cgraph_node
*node
;
2632 struct cgraph_node
**order
;
2635 bool remove_functions
= false;
2637 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
2640 ipa_dump_fn_summaries (dump_file
);
2642 nnodes
= ipa_reverse_postorder (order
);
2643 spec_rem
= profile_count::zero ();
2645 FOR_EACH_FUNCTION (node
)
2649 /* Recompute the default reasons for inlining because they may have
2650 changed during merging. */
2653 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2655 gcc_assert (e
->inline_failed
);
2656 initialize_inline_failed (e
);
2658 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2659 initialize_inline_failed (e
);
2664 fprintf (dump_file
, "\nFlattening functions:\n");
2666 /* First shrink order array, so that it only contains nodes with
2667 flatten attribute. */
2668 for (i
= nnodes
- 1, j
= i
; i
>= 0; i
--)
2671 if (node
->definition
2672 /* Do not try to flatten aliases. These may happen for example when
2673 creating local aliases. */
2675 && lookup_attribute ("flatten",
2676 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
2677 order
[j
--] = order
[i
];
2680 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2681 nodes with flatten attribute. If there is more than one such
2682 node, we need to register a node removal hook, as flatten_function
2683 could remove other nodes with flatten attribute. See PR82801. */
2684 struct cgraph_node_hook_list
*node_removal_hook_holder
= NULL
;
2685 hash_set
<struct cgraph_node
*> *flatten_removed_nodes
= NULL
;
2688 flatten_removed_nodes
= new hash_set
<struct cgraph_node
*>;
2689 node_removal_hook_holder
2690 = symtab
->add_cgraph_removal_hook (&flatten_remove_node_hook
,
2691 flatten_removed_nodes
);
2694 /* In the first pass handle functions to be flattened. Do this with
2695 a priority so none of our later choices will make this impossible. */
2696 for (i
= nnodes
- 1; i
> j
; i
--)
2699 if (flatten_removed_nodes
2700 && flatten_removed_nodes
->contains (node
))
2703 /* Handle nodes to be flattened.
2704 Ideally when processing callees we stop inlining at the
2705 entry of cycles, possibly cloning that entry point and
2706 try to flatten itself turning it into a self-recursive
2709 fprintf (dump_file
, "Flattening %s\n", node
->dump_name ());
2710 flatten_function (node
, false, true);
2715 symtab
->remove_cgraph_removal_hook (node_removal_hook_holder
);
2716 delete flatten_removed_nodes
;
2721 dump_overall_stats ();
2723 inline_small_functions ();
2725 gcc_assert (symtab
->state
== IPA_SSA
);
2726 symtab
->state
= IPA_SSA_AFTER_INLINING
;
2727 /* Do first after-inlining removal. We want to remove all "stale" extern
2728 inline functions and virtual functions so we really know what is called
2730 symtab
->remove_unreachable_nodes (dump_file
);
2732 /* Inline functions with a property that after inlining into all callers the
2733 code size will shrink because the out-of-line copy is eliminated.
2734 We do this regardless on the callee size as long as function growth limits
2738 "\nDeciding on functions to be inlined into all callers and "
2739 "removing useless speculations:\n");
2741 /* Inlining one function called once has good chance of preventing
2742 inlining other function into the same callee. Ideally we should
2743 work in priority order, but probably inlining hot functions first
2744 is good cut without the extra pain of maintaining the queue.
2746 ??? this is not really fitting the bill perfectly: inlining function
2747 into callee often leads to better optimization of callee due to
2748 increased context for optimization.
2749 For example if main() function calls a function that outputs help
2750 and then function that does the main optimization, we should inline
2751 the second with priority even if both calls are cold by themselves.
2753 We probably want to implement new predicate replacing our use of
2754 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2756 for (cold
= 0; cold
<= 1; cold
++)
2758 FOR_EACH_DEFINED_FUNCTION (node
)
2760 struct cgraph_edge
*edge
, *next
;
2763 if (!opt_for_fn (node
->decl
, optimize
)
2764 || !opt_for_fn (node
->decl
, flag_inline_functions_called_once
))
2767 for (edge
= node
->callees
; edge
; edge
= next
)
2769 next
= edge
->next_callee
;
2770 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
2772 if (edge
->count
.ipa ().initialized_p ())
2773 spec_rem
+= edge
->count
.ipa ();
2774 cgraph_edge::resolve_speculation (edge
);
2776 remove_functions
= true;
2781 struct cgraph_node
*where
= node
->inlined_to
2782 ? node
->inlined_to
: node
;
2783 reset_edge_caches (where
);
2784 ipa_update_overall_fn_summary (where
);
2786 if (want_inline_function_to_all_callers_p (node
, cold
))
2789 node
->call_for_symbol_and_aliases (sum_callers
, &num_calls
,
2791 while (node
->call_for_symbol_and_aliases
2792 (inline_to_all_callers
, &num_calls
, true))
2794 remove_functions
= true;
2799 if (dump_enabled_p ())
2800 dump_printf (MSG_NOTE
,
2801 "\nInlined %i calls, eliminated %i functions\n\n",
2802 ncalls_inlined
, nfunctions_inlined
);
2804 dump_inline_stats ();
2807 ipa_dump_fn_summaries (dump_file
);
2808 return remove_functions
? TODO_remove_functions
: 0;
2811 /* Inline always-inline function calls in NODE. */
2814 inline_always_inline_functions (struct cgraph_node
*node
)
2816 struct cgraph_edge
*e
;
2817 bool inlined
= false;
2819 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2821 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2822 if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
2825 if (e
->recursive_p ())
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2829 " Not inlining recursive call to %C.\n",
2831 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
2835 if (!can_early_inline_edge_p (e
))
2837 /* Set inlined to true if the callee is marked "always_inline" but
2838 is not inlinable. This will allow flagging an error later in
2839 expand_call_inline in tree-inline.c. */
2840 if (lookup_attribute ("always_inline",
2841 DECL_ATTRIBUTES (callee
->decl
)) != NULL
)
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2848 " Inlining %C into %C (always_inline).\n",
2849 e
->callee
, e
->caller
);
2850 inline_call (e
, true, NULL
, NULL
, false);
2854 ipa_update_overall_fn_summary (node
);
2859 /* Decide on the inlining. We do so in the topological order to avoid
2860 expenses on updating data structures. */
2863 early_inline_small_functions (struct cgraph_node
*node
)
2865 struct cgraph_edge
*e
;
2866 bool inlined
= false;
2868 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2870 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2872 /* We can encounter not-yet-analyzed function during
2873 early inlining on callgraphs with strongly
2874 connected components. */
2875 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
2876 if (s
== NULL
|| !s
->inlinable
|| !e
->inline_failed
)
2879 /* Do not consider functions not declared inline. */
2880 if (!DECL_DECLARED_INLINE_P (callee
->decl
)
2881 && !opt_for_fn (node
->decl
, flag_inline_small_functions
)
2882 && !opt_for_fn (node
->decl
, flag_inline_functions
))
2885 if (dump_enabled_p ())
2886 dump_printf_loc (MSG_NOTE
, e
->call_stmt
,
2887 "Considering inline candidate %C.\n",
2890 if (!can_early_inline_edge_p (e
))
2893 if (e
->recursive_p ())
2895 if (dump_enabled_p ())
2896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2897 " Not inlining: recursive call.\n");
2901 if (!want_early_inline_function_p (e
))
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2906 " Inlining %C into %C.\n",
2908 inline_call (e
, true, NULL
, NULL
, false);
2913 ipa_update_overall_fn_summary (node
);
2919 early_inliner (function
*fun
)
2921 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
2922 struct cgraph_edge
*edge
;
2923 unsigned int todo
= 0;
2925 bool inlined
= false;
2930 /* Do nothing if datastructures for ipa-inliner are already computed. This
2931 happens when some pass decides to construct new function and
2932 cgraph_add_new_function calls lowering passes and early optimization on
2933 it. This may confuse ourself when early inliner decide to inline call to
2934 function clone, because function clones don't have parameter list in
2935 ipa-prop matching their signature. */
2936 if (ipa_node_params_sum
)
2941 node
->remove_all_references ();
2943 /* Even when not optimizing or not inlining inline always-inline
2945 inlined
= inline_always_inline_functions (node
);
2949 || !flag_early_inlining
2950 /* Never inline regular functions into always-inline functions
2951 during incremental inlining. This sucks as functions calling
2952 always inline functions will get less optimized, but at the
2953 same time inlining of functions calling always inline
2954 function into an always inline function might introduce
2955 cycles of edges to be always inlined in the callgraph.
2957 We might want to be smarter and just avoid this type of inlining. */
2958 || (DECL_DISREGARD_INLINE_LIMITS (node
->decl
)
2959 && lookup_attribute ("always_inline",
2960 DECL_ATTRIBUTES (node
->decl
))))
2962 else if (lookup_attribute ("flatten",
2963 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
2965 /* When the function is marked to be flattened, recursively inline
2967 if (dump_enabled_p ())
2968 dump_printf (MSG_OPTIMIZED_LOCATIONS
,
2969 "Flattening %C\n", node
);
2970 flatten_function (node
, true, true);
2975 /* If some always_inline functions was inlined, apply the changes.
2976 This way we will not account always inline into growth limits and
2977 moreover we will inline calls from always inlines that we skipped
2978 previously because of conditional above. */
2981 timevar_push (TV_INTEGRATION
);
2982 todo
|= optimize_inline_calls (current_function_decl
);
2983 /* optimize_inline_calls call above might have introduced new
2984 statements that don't have inline parameters computed. */
2985 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
2987 /* We can enounter not-yet-analyzed function during
2988 early inlining on callgraphs with strongly
2989 connected components. */
2990 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2992 = estimate_num_insns (edge
->call_stmt
, &eni_size_weights
);
2994 = estimate_num_insns (edge
->call_stmt
, &eni_time_weights
);
2996 ipa_update_overall_fn_summary (node
);
2998 timevar_pop (TV_INTEGRATION
);
3000 /* We iterate incremental inlining to get trivial cases of indirect
3002 while (iterations
< opt_for_fn (node
->decl
,
3003 param_early_inliner_max_iterations
)
3004 && early_inline_small_functions (node
))
3006 timevar_push (TV_INTEGRATION
);
3007 todo
|= optimize_inline_calls (current_function_decl
);
3009 /* Technically we ought to recompute inline parameters so the new
3010 iteration of early inliner works as expected. We however have
3011 values approximately right and thus we only need to update edge
3012 info that might be cleared out for newly discovered edges. */
3013 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
3015 /* We have no summary for new bound store calls yet. */
3016 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
3018 = estimate_num_insns (edge
->call_stmt
, &eni_size_weights
);
3020 = estimate_num_insns (edge
->call_stmt
, &eni_time_weights
);
3022 if (iterations
< opt_for_fn (node
->decl
,
3023 param_early_inliner_max_iterations
) - 1)
3024 ipa_update_overall_fn_summary (node
);
3025 timevar_pop (TV_INTEGRATION
);
3030 fprintf (dump_file
, "Iterations: %i\n", iterations
);
3035 timevar_push (TV_INTEGRATION
);
3036 todo
|= optimize_inline_calls (current_function_decl
);
3037 timevar_pop (TV_INTEGRATION
);
3040 fun
->always_inline_functions_inlined
= true;
3045 /* Do inlining of small functions. Doing so early helps profiling and other
3046 passes to be somewhat more effective and avoids some code duplication in
3047 later real inlining pass for testcases with very many function calls. */
3051 const pass_data pass_data_early_inline
=
3053 GIMPLE_PASS
, /* type */
3054 "einline", /* name */
3055 OPTGROUP_INLINE
, /* optinfo_flags */
3056 TV_EARLY_INLINING
, /* tv_id */
3057 PROP_ssa
, /* properties_required */
3058 0, /* properties_provided */
3059 0, /* properties_destroyed */
3060 0, /* todo_flags_start */
3061 0, /* todo_flags_finish */
3064 class pass_early_inline
: public gimple_opt_pass
3067 pass_early_inline (gcc::context
*ctxt
)
3068 : gimple_opt_pass (pass_data_early_inline
, ctxt
)
3071 /* opt_pass methods: */
3072 virtual unsigned int execute (function
*);
3074 }; // class pass_early_inline
3077 pass_early_inline::execute (function
*fun
)
3079 return early_inliner (fun
);
3085 make_pass_early_inline (gcc::context
*ctxt
)
3087 return new pass_early_inline (ctxt
);
3092 const pass_data pass_data_ipa_inline
=
3094 IPA_PASS
, /* type */
3095 "inline", /* name */
3096 OPTGROUP_INLINE
, /* optinfo_flags */
3097 TV_IPA_INLINING
, /* tv_id */
3098 0, /* properties_required */
3099 0, /* properties_provided */
3100 0, /* properties_destroyed */
3101 0, /* todo_flags_start */
3102 ( TODO_dump_symtab
), /* todo_flags_finish */
3105 class pass_ipa_inline
: public ipa_opt_pass_d
3108 pass_ipa_inline (gcc::context
*ctxt
)
3109 : ipa_opt_pass_d (pass_data_ipa_inline
, ctxt
,
3110 NULL
, /* generate_summary */
3111 NULL
, /* write_summary */
3112 NULL
, /* read_summary */
3113 NULL
, /* write_optimization_summary */
3114 NULL
, /* read_optimization_summary */
3115 NULL
, /* stmt_fixup */
3116 0, /* function_transform_todo_flags_start */
3117 inline_transform
, /* function_transform */
3118 NULL
) /* variable_transform */
3121 /* opt_pass methods: */
3122 virtual unsigned int execute (function
*) { return ipa_inline (); }
3124 }; // class pass_ipa_inline
3129 make_pass_ipa_inline (gcc::context
*ctxt
)
3131 return new pass_ipa_inline (ctxt
);