1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
38 The inliner itself is split into two passes:
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
94 #include "coretypes.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
107 #include "tree-inline.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
123 typedef fibonacci_heap
<sreal
, cgraph_edge
> edge_heap_t
;
124 typedef fibonacci_node
<sreal
, cgraph_edge
> edge_heap_node_t
;
126 /* Statistics we collect about inlining algorithm. */
127 static int overall_size
;
128 static profile_count max_count
;
129 static profile_count spec_rem
;
131 /* Return false when inlining edge E would lead to violating
132 limits on function unit growth or stack usage growth.
134 The relative function body growth limit is present generally
135 to avoid problems with non-linear behavior of the compiler.
136 To allow inlining huge functions into tiny wrapper, the limit
137 is always based on the bigger of the two functions considered.
139 For stack growth limits we always base the growth in stack usage
140 of the callers. We want to prevent applications from segfaulting
141 on stack overflow when functions with huge stack frames gets
145 caller_growth_limits (struct cgraph_edge
*e
)
147 struct cgraph_node
*to
= e
->caller
;
148 struct cgraph_node
*what
= e
->callee
->ultimate_alias_target ();
151 HOST_WIDE_INT stack_size_limit
= 0, inlined_stack
;
152 ipa_size_summary
*outer_info
= ipa_size_summaries
->get (to
);
154 /* Look for function e->caller is inlined to. While doing
155 so work out the largest function body on the way. As
156 described above, we want to base our function growth
157 limits based on that. Not on the self size of the
158 outer function, not on the self size of inline code
159 we immediately inline to. This is the most relaxed
160 interpretation of the rule "do not grow large functions
161 too much in order to prevent compiler from exploding". */
164 ipa_size_summary
*size_info
= ipa_size_summaries
->get (to
);
165 if (limit
< size_info
->self_size
)
166 limit
= size_info
->self_size
;
167 if (stack_size_limit
< size_info
->estimated_self_stack_size
)
168 stack_size_limit
= size_info
->estimated_self_stack_size
;
170 to
= to
->callers
->caller
;
175 ipa_fn_summary
*what_info
= ipa_fn_summaries
->get (what
);
176 ipa_size_summary
*what_size_info
= ipa_size_summaries
->get (what
);
178 if (limit
< what_size_info
->self_size
)
179 limit
= what_size_info
->self_size
;
181 limit
+= limit
* opt_for_fn (to
->decl
, param_large_function_growth
) / 100;
183 /* Check the size after inlining against the function limits. But allow
184 the function to shrink if it went over the limits by forced inlining. */
185 newsize
= estimate_size_after_inlining (to
, e
);
186 if (newsize
>= ipa_size_summaries
->get (what
)->size
187 && newsize
> opt_for_fn (to
->decl
, param_large_function_insns
)
190 e
->inline_failed
= CIF_LARGE_FUNCTION_GROWTH_LIMIT
;
194 if (!what_info
->estimated_stack_size
)
197 /* FIXME: Stack size limit often prevents inlining in Fortran programs
198 due to large i/o datastructures used by the Fortran front-end.
199 We ought to ignore this limit when we know that the edge is executed
200 on every invocation of the caller (i.e. its call statement dominates
201 exit block). We do not track this information, yet. */
202 stack_size_limit
+= ((gcov_type
)stack_size_limit
203 * opt_for_fn (to
->decl
, param_stack_frame_growth
)
206 inlined_stack
= (ipa_get_stack_frame_offset (to
)
207 + outer_info
->estimated_self_stack_size
208 + what_info
->estimated_stack_size
);
209 /* Check new stack consumption with stack consumption at the place
211 if (inlined_stack
> stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
216 && inlined_stack
> ipa_fn_summaries
->get (to
)->estimated_stack_size
217 && inlined_stack
> opt_for_fn (to
->decl
, param_large_stack_frame
))
219 e
->inline_failed
= CIF_LARGE_STACK_FRAME_GROWTH_LIMIT
;
225 /* Dump info about why inlining has failed. */
228 report_inline_failed_reason (struct cgraph_edge
*e
)
230 if (dump_enabled_p ())
232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
233 " not inlinable: %C -> %C, %s\n",
234 e
->caller
, e
->callee
,
235 cgraph_inline_failed_string (e
->inline_failed
));
236 if ((e
->inline_failed
== CIF_TARGET_OPTION_MISMATCH
237 || e
->inline_failed
== CIF_OPTIMIZATION_MISMATCH
)
238 && e
->caller
->lto_file_data
239 && e
->callee
->ultimate_alias_target ()->lto_file_data
)
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
242 " LTO objects: %s, %s\n",
243 e
->caller
->lto_file_data
->file_name
,
244 e
->callee
->ultimate_alias_target ()->lto_file_data
->file_name
);
246 if (e
->inline_failed
== CIF_TARGET_OPTION_MISMATCH
)
248 cl_target_option_print_diff
249 (dump_file
, 2, target_opts_for_fn (e
->caller
->decl
),
250 target_opts_for_fn (e
->callee
->ultimate_alias_target ()->decl
));
251 if (e
->inline_failed
== CIF_OPTIMIZATION_MISMATCH
)
253 cl_optimization_print_diff
254 (dump_file
, 2, opts_for_fn (e
->caller
->decl
),
255 opts_for_fn (e
->callee
->ultimate_alias_target ()->decl
));
259 /* Decide whether sanitizer-related attributes allow inlining. */
262 sanitize_attrs_match_for_inline_p (const_tree caller
, const_tree callee
)
264 if (!caller
|| !callee
)
267 /* Follow clang and allow inlining for always_inline functions. */
268 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee
)))
271 const sanitize_code codes
[] =
276 SANITIZE_UNDEFINED_NONDEFAULT
,
277 SANITIZE_POINTER_COMPARE
,
278 SANITIZE_POINTER_SUBTRACT
281 for (unsigned i
= 0; i
< sizeof (codes
) / sizeof (codes
[0]); i
++)
282 if (sanitize_flags_p (codes
[i
], caller
)
283 != sanitize_flags_p (codes
[i
], callee
))
286 if (sanitize_coverage_p (caller
) != sanitize_coverage_p (callee
))
292 /* Used for flags where it is safe to inline when caller's value is
293 grater than callee's. */
294 #define check_maybe_up(flag) \
295 (opts_for_fn (caller->decl)->x_##flag \
296 != opts_for_fn (callee->decl)->x_##flag \
298 || opts_for_fn (caller->decl)->x_##flag \
299 < opts_for_fn (callee->decl)->x_##flag))
300 /* Used for flags where it is safe to inline when caller's value is
301 smaller than callee's. */
302 #define check_maybe_down(flag) \
303 (opts_for_fn (caller->decl)->x_##flag \
304 != opts_for_fn (callee->decl)->x_##flag \
306 || opts_for_fn (caller->decl)->x_##flag \
307 > opts_for_fn (callee->decl)->x_##flag))
308 /* Used for flags where exact match is needed for correctness. */
309 #define check_match(flag) \
310 (opts_for_fn (caller->decl)->x_##flag \
311 != opts_for_fn (callee->decl)->x_##flag)
313 /* Decide if we can inline the edge and possibly update
314 inline_failed reason.
315 We check whether inlining is possible at all and whether
316 caller growth limits allow doing so.
318 if REPORT is true, output reason to the dump file. */
321 can_inline_edge_p (struct cgraph_edge
*e
, bool report
,
324 gcc_checking_assert (e
->inline_failed
);
326 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
329 report_inline_failed_reason (e
);
333 bool inlinable
= true;
334 enum availability avail
;
335 cgraph_node
*caller
= (e
->caller
->inlined_to
336 ? e
->caller
->inlined_to
: e
->caller
);
337 cgraph_node
*callee
= e
->callee
->ultimate_alias_target (&avail
, caller
);
339 if (!callee
->definition
)
341 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
344 if (!early
&& (!opt_for_fn (callee
->decl
, optimize
)
345 || !opt_for_fn (caller
->decl
, optimize
)))
347 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
350 else if (callee
->calls_comdat_local
)
352 e
->inline_failed
= CIF_USES_COMDAT_LOCAL
;
355 else if (avail
<= AVAIL_INTERPOSABLE
)
357 e
->inline_failed
= CIF_OVERWRITABLE
;
360 /* All edges with call_stmt_cannot_inline_p should have inline_failed
361 initialized to one of FINAL_ERROR reasons. */
362 else if (e
->call_stmt_cannot_inline_p
)
364 /* Don't inline if the functions have different EH personalities. */
365 else if (DECL_FUNCTION_PERSONALITY (caller
->decl
)
366 && DECL_FUNCTION_PERSONALITY (callee
->decl
)
367 && (DECL_FUNCTION_PERSONALITY (caller
->decl
)
368 != DECL_FUNCTION_PERSONALITY (callee
->decl
)))
370 e
->inline_failed
= CIF_EH_PERSONALITY
;
373 /* TM pure functions should not be inlined into non-TM_pure
375 else if (is_tm_pure (callee
->decl
) && !is_tm_pure (caller
->decl
))
377 e
->inline_failed
= CIF_UNSPECIFIED
;
380 /* Check compatibility of target optimization options. */
381 else if (!targetm
.target_option
.can_inline_p (caller
->decl
,
384 e
->inline_failed
= CIF_TARGET_OPTION_MISMATCH
;
387 else if (ipa_fn_summaries
->get (callee
) == NULL
388 || !ipa_fn_summaries
->get (callee
)->inlinable
)
390 e
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
393 /* Don't inline a function with mismatched sanitization attributes. */
394 else if (!sanitize_attrs_match_for_inline_p (caller
->decl
, callee
->decl
))
396 e
->inline_failed
= CIF_SANITIZE_ATTRIBUTE_MISMATCH
;
399 else if (profile_arc_flag
400 && (lookup_attribute ("no_profile_instrument_function",
401 DECL_ATTRIBUTES (caller
->decl
)) == NULL_TREE
)
402 != (lookup_attribute ("no_profile_instrument_function",
403 DECL_ATTRIBUTES (callee
->decl
)) == NULL_TREE
))
405 cgraph_node
*origin
= caller
;
406 while (origin
->clone_of
)
407 origin
= origin
->clone_of
;
409 if (!DECL_STRUCT_FUNCTION (origin
->decl
)->always_inline_functions_inlined
)
411 e
->inline_failed
= CIF_UNSPECIFIED
;
416 if (!inlinable
&& report
)
417 report_inline_failed_reason (e
);
421 /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
422 scale up the bound. */
425 inline_insns_single (cgraph_node
*n
, bool hint
, bool hint2
)
429 int64_t spd
= opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
);
433 return opt_for_fn (n
->decl
, param_max_inline_insns_single
) * spd
/ 100;
436 return opt_for_fn (n
->decl
, param_max_inline_insns_single
)
437 * opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
) / 100;
438 return opt_for_fn (n
->decl
, param_max_inline_insns_single
);
441 /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
442 scale up the bound. */
445 inline_insns_auto (cgraph_node
*n
, bool hint
, bool hint2
)
447 int max_inline_insns_auto
= opt_for_fn (n
->decl
, param_max_inline_insns_auto
);
450 int64_t spd
= opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
);
454 return max_inline_insns_auto
* spd
/ 100;
457 return max_inline_insns_auto
458 * opt_for_fn (n
->decl
, param_inline_heuristics_hint_percent
) / 100;
459 return max_inline_insns_auto
;
462 /* Decide if we can inline the edge and possibly update
463 inline_failed reason.
464 We check whether inlining is possible at all and whether
465 caller growth limits allow doing so.
467 if REPORT is true, output reason to the dump file.
469 if DISREGARD_LIMITS is true, ignore size limits. */
472 can_inline_edge_by_limits_p (struct cgraph_edge
*e
, bool report
,
473 bool disregard_limits
= false, bool early
= false)
475 gcc_checking_assert (e
->inline_failed
);
477 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
480 report_inline_failed_reason (e
);
484 bool inlinable
= true;
485 enum availability avail
;
486 cgraph_node
*caller
= (e
->caller
->inlined_to
487 ? e
->caller
->inlined_to
: e
->caller
);
488 cgraph_node
*callee
= e
->callee
->ultimate_alias_target (&avail
, caller
);
489 tree caller_tree
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller
->decl
);
491 = callee
? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee
->decl
) : NULL
;
492 /* Check if caller growth allows the inlining. */
493 if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
495 && !lookup_attribute ("flatten",
496 DECL_ATTRIBUTES (caller
->decl
))
497 && !caller_growth_limits (e
))
499 else if (callee
->externally_visible
500 && !DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
501 && flag_live_patching
== LIVE_PATCHING_INLINE_ONLY_STATIC
)
503 e
->inline_failed
= CIF_EXTERN_LIVE_ONLY_STATIC
;
506 /* Don't inline a function with a higher optimization level than the
507 caller. FIXME: this is really just tip of iceberg of handling
508 optimization attribute. */
509 else if (caller_tree
!= callee_tree
)
512 (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
)
513 && lookup_attribute ("always_inline",
514 DECL_ATTRIBUTES (callee
->decl
)));
515 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (caller
);
516 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (callee
);
518 /* Until GCC 4.9 we did not check the semantics-altering flags
519 below and inlined across optimization boundaries.
520 Enabling checks below breaks several packages by refusing
521 to inline library always_inline functions. See PR65873.
522 Disable the check for early inlining for now until better solution
524 if (always_inline
&& early
)
526 /* There are some options that change IL semantics which means
527 we cannot inline in these cases for correctness reason.
528 Not even for always_inline declared functions. */
529 else if (check_match (flag_wrapv
)
530 || check_match (flag_trapv
)
531 || check_match (flag_pcc_struct_return
)
532 || check_maybe_down (optimize_debug
)
533 /* When caller or callee does FP math, be sure FP codegen flags
535 || ((caller_info
->fp_expressions
&& callee_info
->fp_expressions
)
536 && (check_maybe_up (flag_rounding_math
)
537 || check_maybe_up (flag_trapping_math
)
538 || check_maybe_down (flag_unsafe_math_optimizations
)
539 || check_maybe_down (flag_finite_math_only
)
540 || check_maybe_up (flag_signaling_nans
)
541 || check_maybe_down (flag_cx_limited_range
)
542 || check_maybe_up (flag_signed_zeros
)
543 || check_maybe_down (flag_associative_math
)
544 || check_maybe_down (flag_reciprocal_math
)
545 || check_maybe_down (flag_fp_int_builtin_inexact
)
546 /* Strictly speaking only when the callee contains function
547 calls that may end up setting errno. */
548 || check_maybe_up (flag_errno_math
)))
549 /* We do not want to make code compiled with exceptions to be
550 brought into a non-EH function unless we know that the callee
552 This is tracked by DECL_FUNCTION_PERSONALITY. */
553 || (check_maybe_up (flag_non_call_exceptions
)
554 && DECL_FUNCTION_PERSONALITY (callee
->decl
))
555 || (check_maybe_up (flag_exceptions
)
556 && DECL_FUNCTION_PERSONALITY (callee
->decl
))
557 /* When devirtualization is disabled for callee, it is not safe
558 to inline it as we possibly mangled the type info.
559 Allow early inlining of always inlines. */
560 || (!early
&& check_maybe_down (flag_devirtualize
)))
562 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
565 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
566 else if (always_inline
)
568 /* When user added an attribute to the callee honor it. */
569 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee
->decl
))
570 && opts_for_fn (caller
->decl
) != opts_for_fn (callee
->decl
))
572 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
575 /* If explicit optimize attribute are not used, the mismatch is caused
576 by different command line options used to build different units.
577 Do not care about COMDAT functions - those are intended to be
578 optimized with the optimization flags of module they are used in.
579 Also do not care about mixing up size/speed optimization when
580 DECL_DISREGARD_INLINE_LIMITS is set. */
581 else if ((callee
->merged_comdat
582 && !lookup_attribute ("optimize",
583 DECL_ATTRIBUTES (caller
->decl
)))
584 || DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
586 /* If mismatch is caused by merging two LTO units with different
587 optimization flags we want to be bit nicer. However never inline
588 if one of functions is not optimized at all. */
589 else if (!opt_for_fn (callee
->decl
, optimize
)
590 || !opt_for_fn (caller
->decl
, optimize
))
592 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
595 /* If callee is optimized for size and caller is not, allow inlining if
596 code shrinks or we are in param_max_inline_insns_single limit and
597 callee is inline (and thus likely an unified comdat).
598 This will allow caller to run faster. */
599 else if (opt_for_fn (callee
->decl
, optimize_size
)
600 > opt_for_fn (caller
->decl
, optimize_size
))
602 int growth
= estimate_edge_growth (e
);
603 if (growth
> opt_for_fn (caller
->decl
, param_max_inline_insns_size
)
604 && (!DECL_DECLARED_INLINE_P (callee
->decl
)
605 && growth
>= MAX (inline_insns_single (caller
, false, false),
606 inline_insns_auto (caller
, false, false))))
608 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
612 /* If callee is more aggressively optimized for performance than caller,
613 we generally want to inline only cheap (runtime wise) functions. */
614 else if (opt_for_fn (callee
->decl
, optimize_size
)
615 < opt_for_fn (caller
->decl
, optimize_size
)
616 || (opt_for_fn (callee
->decl
, optimize
)
617 > opt_for_fn (caller
->decl
, optimize
)))
619 if (estimate_edge_time (e
)
620 >= 20 + ipa_call_summaries
->get (e
)->call_stmt_time
)
622 e
->inline_failed
= CIF_OPTIMIZATION_MISMATCH
;
629 if (!inlinable
&& report
)
630 report_inline_failed_reason (e
);
635 /* Return true if the edge E is inlinable during early inlining. */
638 can_early_inline_edge_p (struct cgraph_edge
*e
)
640 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
641 /* Early inliner might get called at WPA stage when IPA pass adds new
642 function. In this case we cannot really do any of early inlining
643 because function bodies are missing. */
644 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
646 if (!gimple_has_body_p (callee
->decl
))
648 e
->inline_failed
= CIF_BODY_NOT_AVAILABLE
;
651 /* In early inliner some of callees may not be in SSA form yet
652 (i.e. the callgraph is cyclic and we did not process
653 the callee by early inliner, yet). We don't have CIF code for this
654 case; later we will re-do the decision in the real inliner. */
655 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->caller
->decl
))
656 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)))
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
660 " edge not inlinable: not in SSA form\n");
663 if (!can_inline_edge_p (e
, true, true)
664 || !can_inline_edge_by_limits_p (e
, true, false, true))
670 /* Return number of calls in N. Ignore cheap builtins. */
673 num_calls (struct cgraph_node
*n
)
675 struct cgraph_edge
*e
;
678 for (e
= n
->callees
; e
; e
= e
->next_callee
)
679 if (!is_inexpensive_builtin (e
->callee
->decl
))
685 /* Return true if we are interested in inlining small function. */
688 want_early_inline_function_p (struct cgraph_edge
*e
)
690 bool want_inline
= true;
691 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
693 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
695 /* For AutoFDO, we need to make sure that before profile summary, all
696 hot paths' IR look exactly the same as profiled binary. As a result,
697 in einliner, we will disregard size limit and inline those callsites
699 * inlined in the profiled binary, and
700 * the cloned callee has enough samples to be considered "hot". */
701 else if (flag_auto_profile
&& afdo_callsite_hot_enough_for_early_inline (e
))
703 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
704 && !opt_for_fn (e
->caller
->decl
, flag_inline_small_functions
))
706 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
707 report_inline_failed_reason (e
);
712 /* First take care of very large functions. */
713 int min_growth
= estimate_min_edge_growth (e
), growth
= 0;
715 int early_inlining_insns
= param_early_inlining_insns
;
717 if (min_growth
> early_inlining_insns
)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
721 " will not early inline: %C->%C, "
722 "call is cold and code would grow "
729 growth
= estimate_edge_growth (e
);
732 if (!want_inline
|| growth
<= param_max_inline_insns_size
)
734 else if (!e
->maybe_hot_p ())
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
738 " will not early inline: %C->%C, "
739 "call is cold and code would grow by %i\n",
744 else if (growth
> early_inlining_insns
)
746 if (dump_enabled_p ())
747 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
748 " will not early inline: %C->%C, "
749 "growth %i exceeds --param early-inlining-insns\n",
750 e
->caller
, callee
, growth
);
753 else if ((n
= num_calls (callee
)) != 0
754 && growth
* (n
+ 1) > early_inlining_insns
)
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
758 " will not early inline: %C->%C, "
759 "growth %i exceeds --param early-inlining-insns "
760 "divided by number of calls\n",
761 e
->caller
, callee
, growth
);
768 /* Compute time of the edge->caller + edge->callee execution when inlining
772 compute_uninlined_call_time (struct cgraph_edge
*edge
,
773 sreal uninlined_call_time
,
776 cgraph_node
*caller
= (edge
->caller
->inlined_to
777 ? edge
->caller
->inlined_to
781 uninlined_call_time
*= freq
;
783 uninlined_call_time
= uninlined_call_time
>> 11;
785 sreal caller_time
= ipa_fn_summaries
->get (caller
)->time
;
786 return uninlined_call_time
+ caller_time
;
789 /* Same as compute_uinlined_call_time but compute time when inlining
793 compute_inlined_call_time (struct cgraph_edge
*edge
,
797 cgraph_node
*caller
= (edge
->caller
->inlined_to
798 ? edge
->caller
->inlined_to
800 sreal caller_time
= ipa_fn_summaries
->get (caller
)->time
;
807 /* This calculation should match one in ipa-inline-analysis.c
808 (estimate_edge_size_and_time). */
809 time
-= (sreal
)ipa_call_summaries
->get (edge
)->call_stmt_time
* freq
;
812 time
= ((sreal
) 1) >> 8;
813 gcc_checking_assert (time
>= 0);
817 /* Determine time saved by inlining EDGE of frequency FREQ
818 where callee's runtime w/o inlining is UNINLINED_TYPE
819 and with inlined is INLINED_TYPE. */
822 inlining_speedup (struct cgraph_edge
*edge
,
824 sreal uninlined_time
,
827 sreal speedup
= uninlined_time
- inlined_time
;
828 /* Handling of call_time should match one in ipa-inline-fnsummary.c
829 (estimate_edge_size_and_time). */
830 sreal call_time
= ipa_call_summaries
->get (edge
)->call_stmt_time
;
834 speedup
= (speedup
+ call_time
);
836 speedup
= speedup
* freq
;
839 speedup
= speedup
>> 11;
840 gcc_checking_assert (speedup
>= 0);
844 /* Return true if the speedup for inlining E is bigger than
845 param_inline_min_speedup. */
848 big_speedup_p (struct cgraph_edge
*e
)
851 sreal spec_time
= estimate_edge_time (e
, &unspec_time
);
852 sreal freq
= e
->sreal_frequency ();
853 sreal time
= compute_uninlined_call_time (e
, unspec_time
, freq
);
854 sreal inlined_time
= compute_inlined_call_time (e
, spec_time
, freq
);
855 cgraph_node
*caller
= (e
->caller
->inlined_to
856 ? e
->caller
->inlined_to
858 int limit
= opt_for_fn (caller
->decl
, param_inline_min_speedup
);
860 if ((time
- inlined_time
) * 100 > time
* limit
)
865 /* Return true if we are interested in inlining small function.
866 When REPORT is true, report reason to dump file. */
869 want_inline_small_function_p (struct cgraph_edge
*e
, bool report
)
871 bool want_inline
= true;
872 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
873 cgraph_node
*to
= (e
->caller
->inlined_to
874 ? e
->caller
->inlined_to
: e
->caller
);
876 /* Allow this function to be called before can_inline_edge_p,
877 since it's usually cheaper. */
878 if (cgraph_inline_failed_type (e
->inline_failed
) == CIF_FINAL_ERROR
)
880 else if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
882 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
883 && !opt_for_fn (e
->caller
->decl
, flag_inline_small_functions
))
885 e
->inline_failed
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
888 /* Do fast and conservative check if the function can be good
890 else if ((!DECL_DECLARED_INLINE_P (callee
->decl
)
891 && (!e
->count
.ipa ().initialized_p () || !e
->maybe_hot_p ()))
892 && ipa_fn_summaries
->get (callee
)->min_size
893 - ipa_call_summaries
->get (e
)->call_stmt_size
894 > inline_insns_auto (e
->caller
, true, true))
896 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
899 else if ((DECL_DECLARED_INLINE_P (callee
->decl
)
900 || e
->count
.ipa ().nonzero_p ())
901 && ipa_fn_summaries
->get (callee
)->min_size
902 - ipa_call_summaries
->get (e
)->call_stmt_size
903 > inline_insns_single (e
->caller
, true, true))
905 e
->inline_failed
= (DECL_DECLARED_INLINE_P (callee
->decl
)
906 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
907 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT
);
912 int growth
= estimate_edge_growth (e
);
913 ipa_hints hints
= estimate_edge_hints (e
);
914 /* We have two independent groups of hints. If one matches in each
915 of groups the limits are inreased. If both groups matches, limit
916 is increased even more. */
917 bool apply_hints
= (hints
& (INLINE_HINT_indirect_call
918 | INLINE_HINT_known_hot
919 | INLINE_HINT_loop_iterations
920 | INLINE_HINT_loop_stride
));
921 bool apply_hints2
= (hints
& INLINE_HINT_builtin_constant_p
);
923 if (growth
<= opt_for_fn (to
->decl
,
924 param_max_inline_insns_size
))
926 /* Apply param_max_inline_insns_single limit. Do not do so when
927 hints suggests that inlining given function is very profitable.
928 Avoid computation of big_speedup_p when not necessary to change
929 outcome of decision. */
930 else if (DECL_DECLARED_INLINE_P (callee
->decl
)
931 && growth
>= inline_insns_single (e
->caller
, apply_hints
,
933 && (apply_hints
|| apply_hints2
934 || growth
>= inline_insns_single (e
->caller
, true,
936 || !big_speedup_p (e
)))
938 e
->inline_failed
= CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
;
941 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
942 && !opt_for_fn (e
->caller
->decl
, flag_inline_functions
)
943 && growth
>= opt_for_fn (to
->decl
,
944 param_max_inline_insns_small
))
946 /* growth_positive_p is expensive, always test it last. */
947 if (growth
>= inline_insns_single (e
->caller
, false, false)
948 || growth_positive_p (callee
, e
, growth
))
950 e
->inline_failed
= CIF_NOT_DECLARED_INLINED
;
954 /* Apply param_max_inline_insns_auto limit for functions not declared
955 inline. Bypass the limit when speedup seems big. */
956 else if (!DECL_DECLARED_INLINE_P (callee
->decl
)
957 && growth
>= inline_insns_auto (e
->caller
, apply_hints
,
959 && (apply_hints
|| apply_hints2
960 || growth
>= inline_insns_auto (e
->caller
, true,
962 || !big_speedup_p (e
)))
964 /* growth_positive_p is expensive, always test it last. */
965 if (growth
>= inline_insns_single (e
->caller
, false, false)
966 || growth_positive_p (callee
, e
, growth
))
968 e
->inline_failed
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
972 /* If call is cold, do not inline when function body would grow. */
973 else if (!e
->maybe_hot_p ()
974 && (growth
>= inline_insns_single (e
->caller
, false, false)
975 || growth_positive_p (callee
, e
, growth
)))
977 e
->inline_failed
= CIF_UNLIKELY_CALL
;
981 if (!want_inline
&& report
)
982 report_inline_failed_reason (e
);
986 /* EDGE is self recursive edge.
987 We handle two cases - when function A is inlining into itself
988 or when function A is being inlined into another inliner copy of function
991 In first case OUTER_NODE points to the toplevel copy of A, while
992 in the second case OUTER_NODE points to the outermost copy of A in B.
994 In both cases we want to be extra selective since
995 inlining the call will just introduce new recursive calls to appear. */
998 want_inline_self_recursive_call_p (struct cgraph_edge
*edge
,
999 struct cgraph_node
*outer_node
,
1003 char const *reason
= NULL
;
1004 bool want_inline
= true;
1005 sreal caller_freq
= 1;
1006 int max_depth
= opt_for_fn (outer_node
->decl
,
1007 param_max_inline_recursive_depth_auto
);
1009 if (DECL_DECLARED_INLINE_P (edge
->caller
->decl
))
1010 max_depth
= opt_for_fn (outer_node
->decl
,
1011 param_max_inline_recursive_depth
);
1013 if (!edge
->maybe_hot_p ())
1015 reason
= "recursive call is cold";
1016 want_inline
= false;
1018 else if (depth
> max_depth
)
1020 reason
= "--param max-inline-recursive-depth exceeded.";
1021 want_inline
= false;
1023 else if (outer_node
->inlined_to
1024 && (caller_freq
= outer_node
->callers
->sreal_frequency ()) == 0)
1026 reason
= "caller frequency is 0";
1027 want_inline
= false;
1032 /* Inlining of self recursive function into copy of itself within other
1033 function is transformation similar to loop peeling.
1035 Peeling is profitable if we can inline enough copies to make probability
1036 of actual call to the self recursive function very small. Be sure that
1037 the probability of recursion is small.
1039 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1040 This way the expected number of recursion is at most max_depth. */
1043 sreal max_prob
= (sreal
)1 - ((sreal
)1 / (sreal
)max_depth
);
1045 for (i
= 1; i
< depth
; i
++)
1046 max_prob
= max_prob
* max_prob
;
1047 if (edge
->sreal_frequency () >= max_prob
* caller_freq
)
1049 reason
= "frequency of recursive call is too large";
1050 want_inline
= false;
1053 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1054 recursion depth is large. We reduce function call overhead and increase
1055 chances that things fit in hardware return predictor.
1057 Recursive inlining might however increase cost of stack frame setup
1058 actually slowing down functions whose recursion tree is wide rather than
1061 Deciding reliably on when to do recursive inlining without profile feedback
1062 is tricky. For now we disable recursive inlining when probability of self
1065 Recursive inlining of self recursive call within loop also results in
1066 large loop depths that generally optimize badly. We may want to throttle
1067 down inlining in those cases. In particular this seems to happen in one
1068 of libstdc++ rb tree methods. */
1071 if (edge
->sreal_frequency () * 100
1073 * opt_for_fn (outer_node
->decl
,
1074 param_min_inline_recursive_probability
))
1076 reason
= "frequency of recursive call is too small";
1077 want_inline
= false;
1080 if (!want_inline
&& dump_enabled_p ())
1081 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, edge
->call_stmt
,
1082 " not inlining recursively: %s\n", reason
);
1086 /* Return true when NODE has uninlinable caller;
1087 set HAS_HOT_CALL if it has hot call.
1088 Worker for cgraph_for_node_and_aliases. */
1091 check_callers (struct cgraph_node
*node
, void *has_hot_call
)
1093 struct cgraph_edge
*e
;
1094 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1096 if (!opt_for_fn (e
->caller
->decl
, flag_inline_functions_called_once
)
1097 || !opt_for_fn (e
->caller
->decl
, optimize
))
1099 if (!can_inline_edge_p (e
, true))
1101 if (e
->recursive_p ())
1103 if (!can_inline_edge_by_limits_p (e
, true))
1105 if (!(*(bool *)has_hot_call
) && e
->maybe_hot_p ())
1106 *(bool *)has_hot_call
= true;
1111 /* If NODE has a caller, return true. */
1114 has_caller_p (struct cgraph_node
*node
, void *data ATTRIBUTE_UNUSED
)
1121 /* Decide if inlining NODE would reduce unit size by eliminating
1122 the offline copy of function.
1123 When COLD is true the cold calls are considered, too. */
1126 want_inline_function_to_all_callers_p (struct cgraph_node
*node
, bool cold
)
1128 bool has_hot_call
= false;
1130 /* Aliases gets inlined along with the function they alias. */
1133 /* Already inlined? */
1134 if (node
->inlined_to
)
1136 /* Does it have callers? */
1137 if (!node
->call_for_symbol_and_aliases (has_caller_p
, NULL
, true))
1139 /* Inlining into all callers would increase size? */
1140 if (growth_positive_p (node
, NULL
, INT_MIN
) > 0)
1142 /* All inlines must be possible. */
1143 if (node
->call_for_symbol_and_aliases (check_callers
, &has_hot_call
,
1146 if (!cold
&& !has_hot_call
)
1151 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1152 in estimate_edge_badness. */
1155 wrapper_heuristics_may_apply (struct cgraph_node
*where
, int size
)
1157 return size
< (DECL_DECLARED_INLINE_P (where
->decl
)
1158 ? inline_insns_single (where
, false, false)
1159 : inline_insns_auto (where
, false, false));
1162 /* A cost model driving the inlining heuristics in a way so the edges with
1163 smallest badness are inlined first. After each inlining is performed
1164 the costs of all caller edges of nodes affected are recomputed so the
1165 metrics may accurately depend on values such as number of inlinable callers
1166 of the function or function body size. */
1169 edge_badness (struct cgraph_edge
*edge
, bool dump
)
1173 sreal edge_time
, unspec_edge_time
;
1174 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
1175 class ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (callee
);
1177 cgraph_node
*caller
= (edge
->caller
->inlined_to
1178 ? edge
->caller
->inlined_to
1181 growth
= estimate_edge_growth (edge
);
1182 edge_time
= estimate_edge_time (edge
, &unspec_edge_time
);
1183 hints
= estimate_edge_hints (edge
);
1184 gcc_checking_assert (edge_time
>= 0);
1185 /* Check that inlined time is better, but tolerate some roundoff issues.
1186 FIXME: When callee profile drops to 0 we account calls more. This
1187 should be fixed by never doing that. */
1188 gcc_checking_assert ((edge_time
* 100
1189 - callee_info
->time
* 101).to_int () <= 0
1190 || callee
->count
.ipa ().initialized_p ());
1191 gcc_checking_assert (growth
<= ipa_size_summaries
->get (callee
)->size
);
1195 fprintf (dump_file
, " Badness calculation for %s -> %s\n",
1196 edge
->caller
->dump_name (),
1197 edge
->callee
->dump_name ());
1198 fprintf (dump_file
, " size growth %i, time %f unspec %f ",
1200 edge_time
.to_double (),
1201 unspec_edge_time
.to_double ());
1202 ipa_dump_hints (dump_file
, hints
);
1203 if (big_speedup_p (edge
))
1204 fprintf (dump_file
, " big_speedup");
1205 fprintf (dump_file
, "\n");
1208 /* Always prefer inlining saving code size. */
1211 badness
= (sreal
) (-SREAL_MIN_SIG
+ growth
) << (SREAL_MAX_EXP
/ 256);
1213 fprintf (dump_file
, " %f: Growth %d <= 0\n", badness
.to_double (),
1216 /* Inlining into EXTERNAL functions is not going to change anything unless
1217 they are themselves inlined. */
1218 else if (DECL_EXTERNAL (caller
->decl
))
1221 fprintf (dump_file
, " max: function is external\n");
1222 return sreal::max ();
1224 /* When profile is available. Compute badness as:
1226 time_saved * caller_count
1227 goodness = -------------------------------------------------
1228 growth_of_caller * overall_growth * combined_size
1230 badness = - goodness
1232 Again use negative value to make calls with profile appear hotter
1235 else if (opt_for_fn (caller
->decl
, flag_guess_branch_prob
)
1236 || caller
->count
.ipa ().nonzero_p ())
1238 sreal numerator
, denominator
;
1240 sreal freq
= edge
->sreal_frequency ();
1242 numerator
= inlining_speedup (edge
, freq
, unspec_edge_time
, edge_time
);
1244 numerator
= ((sreal
) 1 >> 8);
1245 if (caller
->count
.ipa ().nonzero_p ())
1246 numerator
*= caller
->count
.ipa ().to_gcov_type ();
1247 else if (caller
->count
.ipa ().initialized_p ())
1248 numerator
= numerator
>> 11;
1249 denominator
= growth
;
1251 overall_growth
= callee_info
->growth
;
1253 /* Look for inliner wrappers of the form:
1259 noninline_callee ();
1261 Without penalizing this case, we usually inline noninline_callee
1262 into the inline_caller because overall_growth is small preventing
1263 further inlining of inline_caller.
1265 Penalize only callgraph edges to functions with small overall
1268 if (growth
> overall_growth
1269 /* ... and having only one caller which is not inlined ... */
1270 && callee_info
->single_caller
1271 && !edge
->caller
->inlined_to
1272 /* ... and edges executed only conditionally ... */
1274 /* ... consider case where callee is not inline but caller is ... */
1275 && ((!DECL_DECLARED_INLINE_P (edge
->callee
->decl
)
1276 && DECL_DECLARED_INLINE_P (caller
->decl
))
1277 /* ... or when early optimizers decided to split and edge
1278 frequency still indicates splitting is a win ... */
1279 || (callee
->split_part
&& !caller
->split_part
1281 < opt_for_fn (caller
->decl
,
1282 param_partial_inlining_entry_probability
)
1283 /* ... and do not overwrite user specified hints. */
1284 && (!DECL_DECLARED_INLINE_P (edge
->callee
->decl
)
1285 || DECL_DECLARED_INLINE_P (caller
->decl
)))))
1287 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (caller
);
1288 int caller_growth
= caller_info
->growth
;
1290 /* Only apply the penalty when caller looks like inline candidate,
1291 and it is not called once. */
1292 if (!caller_info
->single_caller
&& overall_growth
< caller_growth
1293 && caller_info
->inlinable
1294 && wrapper_heuristics_may_apply
1295 (caller
, ipa_size_summaries
->get (caller
)->size
))
1299 " Wrapper penalty. Increasing growth %i to %i\n",
1300 overall_growth
, caller_growth
);
1301 overall_growth
= caller_growth
;
1304 if (overall_growth
> 0)
1306 /* Strongly prefer functions with few callers that can be inlined
1307 fully. The square root here leads to smaller binaries at average.
1308 Watch however for extreme cases and return to linear function
1309 when growth is large. */
1310 if (overall_growth
< 256)
1311 overall_growth
*= overall_growth
;
1313 overall_growth
+= 256 * 256 - 256;
1314 denominator
*= overall_growth
;
1316 denominator
*= ipa_size_summaries
->get (caller
)->size
+ growth
;
1318 badness
= - numerator
/ denominator
;
1323 " %f: guessed profile. frequency %f, count %" PRId64
1324 " caller count %" PRId64
1326 " overall growth %i (current) %i (original)"
1327 " %i (compensated)\n",
1328 badness
.to_double (),
1330 edge
->count
.ipa ().initialized_p () ? edge
->count
.ipa ().to_gcov_type () : -1,
1331 caller
->count
.ipa ().initialized_p () ? caller
->count
.ipa ().to_gcov_type () : -1,
1332 inlining_speedup (edge
, freq
, unspec_edge_time
, edge_time
).to_double (),
1333 estimate_growth (callee
),
1334 callee_info
->growth
, overall_growth
);
1337 /* When function local profile is not available or it does not give
1338 useful information (i.e. frequency is zero), base the cost on
1339 loop nest and overall size growth, so we optimize for overall number
1340 of functions fully inlined in program. */
1343 int nest
= MIN (ipa_call_summaries
->get (edge
)->loop_depth
, 8);
1346 /* Decrease badness if call is nested. */
1348 badness
= badness
>> nest
;
1350 badness
= badness
<< nest
;
1352 fprintf (dump_file
, " %f: no profile. nest %i\n",
1353 badness
.to_double (), nest
);
1355 gcc_checking_assert (badness
!= 0);
1357 if (edge
->recursive_p ())
1358 badness
= badness
.shift (badness
> 0 ? 4 : -4);
1359 if ((hints
& (INLINE_HINT_indirect_call
1360 | INLINE_HINT_loop_iterations
1361 | INLINE_HINT_loop_stride
))
1362 || callee_info
->growth
<= 0)
1363 badness
= badness
.shift (badness
> 0 ? -2 : 2);
1364 if (hints
& INLINE_HINT_builtin_constant_p
)
1365 badness
= badness
.shift (badness
> 0 ? -4 : 4);
1366 if (hints
& (INLINE_HINT_same_scc
))
1367 badness
= badness
.shift (badness
> 0 ? 3 : -3);
1368 else if (hints
& (INLINE_HINT_in_scc
))
1369 badness
= badness
.shift (badness
> 0 ? 2 : -2);
1370 else if (hints
& (INLINE_HINT_cross_module
))
1371 badness
= badness
.shift (badness
> 0 ? 1 : -1);
1372 if (DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
1373 badness
= badness
.shift (badness
> 0 ? -4 : 4);
1374 else if ((hints
& INLINE_HINT_declared_inline
))
1375 badness
= badness
.shift (badness
> 0 ? -3 : 3);
1377 fprintf (dump_file
, " Adjusted by hints %f\n", badness
.to_double ());
1381 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1383 update_edge_key (edge_heap_t
*heap
, struct cgraph_edge
*edge
)
1385 sreal badness
= edge_badness (edge
, false);
1388 edge_heap_node_t
*n
= (edge_heap_node_t
*) edge
->aux
;
1389 gcc_checking_assert (n
->get_data () == edge
);
1391 /* fibonacci_heap::replace_key does busy updating of the
1392 heap that is unnecessarily expensive.
1393 We do lazy increases: after extracting minimum if the key
1394 turns out to be out of date, it is re-inserted into heap
1395 with correct value. */
1396 if (badness
< n
->get_key ())
1398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1401 " decreasing badness %s -> %s, %f to %f\n",
1402 edge
->caller
->dump_name (),
1403 edge
->callee
->dump_name (),
1404 n
->get_key ().to_double (),
1405 badness
.to_double ());
1407 heap
->decrease_key (n
, badness
);
1412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1415 " enqueuing call %s -> %s, badness %f\n",
1416 edge
->caller
->dump_name (),
1417 edge
->callee
->dump_name (),
1418 badness
.to_double ());
1420 edge
->aux
= heap
->insert (badness
, edge
);
1425 /* NODE was inlined.
1426 All caller edges needs to be reset because
1427 size estimates change. Similarly callees needs reset
1428 because better context may be known. */
1431 reset_edge_caches (struct cgraph_node
*node
)
1433 struct cgraph_edge
*edge
;
1434 struct cgraph_edge
*e
= node
->callees
;
1435 struct cgraph_node
*where
= node
;
1436 struct ipa_ref
*ref
;
1438 if (where
->inlined_to
)
1439 where
= where
->inlined_to
;
1441 reset_node_cache (where
);
1443 if (edge_growth_cache
!= NULL
)
1444 for (edge
= where
->callers
; edge
; edge
= edge
->next_caller
)
1445 if (edge
->inline_failed
)
1446 edge_growth_cache
->remove (edge
);
1448 FOR_EACH_ALIAS (where
, ref
)
1449 reset_edge_caches (dyn_cast
<cgraph_node
*> (ref
->referring
));
1455 if (!e
->inline_failed
&& e
->callee
->callees
)
1456 e
= e
->callee
->callees
;
1459 if (edge_growth_cache
!= NULL
&& e
->inline_failed
)
1460 edge_growth_cache
->remove (e
);
1467 if (e
->caller
== node
)
1469 e
= e
->caller
->callers
;
1471 while (!e
->next_callee
);
1477 /* Recompute HEAP nodes for each of caller of NODE.
1478 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1479 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1480 it is inlinable. Otherwise check all edges. */
1483 update_caller_keys (edge_heap_t
*heap
, struct cgraph_node
*node
,
1484 bitmap updated_nodes
,
1485 struct cgraph_edge
*check_inlinablity_for
)
1487 struct cgraph_edge
*edge
;
1488 struct ipa_ref
*ref
;
1490 if ((!node
->alias
&& !ipa_fn_summaries
->get (node
)->inlinable
)
1491 || node
->inlined_to
)
1493 if (!bitmap_set_bit (updated_nodes
, node
->get_uid ()))
1496 FOR_EACH_ALIAS (node
, ref
)
1498 struct cgraph_node
*alias
= dyn_cast
<cgraph_node
*> (ref
->referring
);
1499 update_caller_keys (heap
, alias
, updated_nodes
, check_inlinablity_for
);
1502 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
1503 if (edge
->inline_failed
)
1505 if (!check_inlinablity_for
1506 || check_inlinablity_for
== edge
)
1508 if (can_inline_edge_p (edge
, false)
1509 && want_inline_small_function_p (edge
, false)
1510 && can_inline_edge_by_limits_p (edge
, false))
1511 update_edge_key (heap
, edge
);
1514 report_inline_failed_reason (edge
);
1515 heap
->delete_node ((edge_heap_node_t
*) edge
->aux
);
1520 update_edge_key (heap
, edge
);
1524 /* Recompute HEAP nodes for each uninlined call in NODE
1525 If UPDATE_SINCE is non-NULL check if edges called within that function
1526 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1527 where all edges have new context).
1529 This is used when we know that edge badnesses are going only to increase
1530 (we introduced new call site) and thus all we need is to insert newly
1531 created edges into heap. */
1534 update_callee_keys (edge_heap_t
*heap
, struct cgraph_node
*node
,
1535 struct cgraph_node
*update_since
,
1536 bitmap updated_nodes
)
1538 struct cgraph_edge
*e
= node
->callees
;
1539 bool check_inlinability
= update_since
== node
;
1544 if (!e
->inline_failed
&& e
->callee
->callees
)
1546 if (e
->callee
== update_since
)
1547 check_inlinability
= true;
1548 e
= e
->callee
->callees
;
1552 enum availability avail
;
1553 struct cgraph_node
*callee
;
1554 if (!check_inlinability
)
1557 && !bitmap_bit_p (updated_nodes
,
1558 e
->callee
->ultimate_alias_target
1559 (&avail
, e
->caller
)->get_uid ()))
1560 update_edge_key (heap
, e
);
1562 /* We do not reset callee growth cache here. Since we added a new call,
1563 growth should have just increased and consequently badness metric
1564 don't need updating. */
1565 else if (e
->inline_failed
1566 && (callee
= e
->callee
->ultimate_alias_target (&avail
,
1568 && avail
>= AVAIL_AVAILABLE
1569 && ipa_fn_summaries
->get (callee
) != NULL
1570 && ipa_fn_summaries
->get (callee
)->inlinable
1571 && !bitmap_bit_p (updated_nodes
, callee
->get_uid ()))
1573 if (can_inline_edge_p (e
, false)
1574 && want_inline_small_function_p (e
, false)
1575 && can_inline_edge_by_limits_p (e
, false))
1577 gcc_checking_assert (check_inlinability
|| can_inline_edge_p (e
, false));
1578 gcc_checking_assert (check_inlinability
|| e
->aux
);
1579 update_edge_key (heap
, e
);
1583 report_inline_failed_reason (e
);
1584 heap
->delete_node ((edge_heap_node_t
*) e
->aux
);
1588 /* In case we redirected to unreachable node we only need to remove the
1592 heap
->delete_node ((edge_heap_node_t
*) e
->aux
);
1601 if (e
->caller
== node
)
1603 if (e
->caller
== update_since
)
1604 check_inlinability
= false;
1605 e
= e
->caller
->callers
;
1607 while (!e
->next_callee
);
1613 /* Enqueue all recursive calls from NODE into priority queue depending on
1614 how likely we want to recursively inline the call. */
1617 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
1620 struct cgraph_edge
*e
;
1621 enum availability avail
;
1623 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1624 if (e
->callee
== node
1625 || (e
->callee
->ultimate_alias_target (&avail
, e
->caller
) == node
1626 && avail
> AVAIL_INTERPOSABLE
))
1627 heap
->insert (-e
->sreal_frequency (), e
);
1628 for (e
= where
->callees
; e
; e
= e
->next_callee
)
1629 if (!e
->inline_failed
)
1630 lookup_recursive_calls (node
, e
->callee
, heap
);
1633 /* Decide on recursive inlining: in the case function has recursive calls,
1634 inline until body size reaches given argument. If any new indirect edges
1635 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1639 recursive_inlining (struct cgraph_edge
*edge
,
1640 vec
<cgraph_edge
*> *new_edges
)
1642 cgraph_node
*to
= (edge
->caller
->inlined_to
1643 ? edge
->caller
->inlined_to
: edge
->caller
);
1644 int limit
= opt_for_fn (to
->decl
,
1645 param_max_inline_insns_recursive_auto
);
1646 edge_heap_t
heap (sreal::min ());
1647 struct cgraph_node
*node
;
1648 struct cgraph_edge
*e
;
1649 struct cgraph_node
*master_clone
= NULL
, *next
;
1653 node
= edge
->caller
;
1654 if (node
->inlined_to
)
1655 node
= node
->inlined_to
;
1657 if (DECL_DECLARED_INLINE_P (node
->decl
))
1658 limit
= opt_for_fn (to
->decl
, param_max_inline_insns_recursive
);
1660 /* Make sure that function is small enough to be considered for inlining. */
1661 if (estimate_size_after_inlining (node
, edge
) >= limit
)
1663 lookup_recursive_calls (node
, node
, &heap
);
1669 " Performing recursive inlining on %s\n", node
->dump_name ());
1671 /* Do the inlining and update list of recursive call during process. */
1672 while (!heap
.empty ())
1674 struct cgraph_edge
*curr
= heap
.extract_min ();
1675 struct cgraph_node
*cnode
, *dest
= curr
->callee
;
1677 if (!can_inline_edge_p (curr
, true)
1678 || !can_inline_edge_by_limits_p (curr
, true))
1681 /* MASTER_CLONE is produced in the case we already started modified
1682 the function. Be sure to redirect edge to the original body before
1683 estimating growths otherwise we will be seeing growths after inlining
1684 the already modified body. */
1687 curr
->redirect_callee (master_clone
);
1688 if (edge_growth_cache
!= NULL
)
1689 edge_growth_cache
->remove (curr
);
1692 if (estimate_size_after_inlining (node
, curr
) > limit
)
1694 curr
->redirect_callee (dest
);
1695 if (edge_growth_cache
!= NULL
)
1696 edge_growth_cache
->remove (curr
);
1701 for (cnode
= curr
->caller
;
1702 cnode
->inlined_to
; cnode
= cnode
->callers
->caller
)
1704 == curr
->callee
->ultimate_alias_target ()->decl
)
1707 if (!want_inline_self_recursive_call_p (curr
, node
, false, depth
))
1709 curr
->redirect_callee (dest
);
1710 if (edge_growth_cache
!= NULL
)
1711 edge_growth_cache
->remove (curr
);
1718 " Inlining call of depth %i", depth
);
1719 if (node
->count
.nonzero_p () && curr
->count
.initialized_p ())
1721 fprintf (dump_file
, " called approx. %.2f times per call",
1722 (double)curr
->count
.to_gcov_type ()
1723 / node
->count
.to_gcov_type ());
1725 fprintf (dump_file
, "\n");
1729 /* We need original clone to copy around. */
1730 master_clone
= node
->create_clone (node
->decl
, node
->count
,
1731 false, vNULL
, true, NULL
, NULL
);
1732 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
1733 if (!e
->inline_failed
)
1734 clone_inlined_nodes (e
, true, false, NULL
);
1735 curr
->redirect_callee (master_clone
);
1736 if (edge_growth_cache
!= NULL
)
1737 edge_growth_cache
->remove (curr
);
1740 inline_call (curr
, false, new_edges
, &overall_size
, true);
1741 reset_node_cache (node
);
1742 lookup_recursive_calls (node
, curr
->callee
, &heap
);
1746 if (!heap
.empty () && dump_file
)
1747 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_NOTE
, edge
->call_stmt
,
1754 "\n Inlined %i times, "
1755 "body grown from size %i to %i, time %f to %f\n", n
,
1756 ipa_size_summaries
->get (master_clone
)->size
,
1757 ipa_size_summaries
->get (node
)->size
,
1758 ipa_fn_summaries
->get (master_clone
)->time
.to_double (),
1759 ipa_fn_summaries
->get (node
)->time
.to_double ());
1761 /* Remove master clone we used for inlining. We rely that clones inlined
1762 into master clone gets queued just before master clone so we don't
1764 for (node
= symtab
->first_function (); node
!= master_clone
;
1767 next
= symtab
->next_function (node
);
1768 if (node
->inlined_to
== master_clone
)
1771 master_clone
->remove ();
1776 /* Given whole compilation unit estimate of INSNS, compute how large we can
1777 allow the unit to grow. */
1780 compute_max_insns (cgraph_node
*node
, int insns
)
1782 int max_insns
= insns
;
1783 if (max_insns
< opt_for_fn (node
->decl
, param_large_unit_insns
))
1784 max_insns
= opt_for_fn (node
->decl
, param_large_unit_insns
);
1786 return ((int64_t) max_insns
1787 * (100 + opt_for_fn (node
->decl
, param_inline_unit_growth
)) / 100);
1791 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1794 add_new_edges_to_heap (edge_heap_t
*heap
, vec
<cgraph_edge
*> &new_edges
)
1796 while (new_edges
.length () > 0)
1798 struct cgraph_edge
*edge
= new_edges
.pop ();
1800 gcc_assert (!edge
->aux
);
1801 gcc_assert (edge
->callee
);
1802 if (edge
->inline_failed
1803 && can_inline_edge_p (edge
, true)
1804 && want_inline_small_function_p (edge
, true)
1805 && can_inline_edge_by_limits_p (edge
, true))
1806 edge
->aux
= heap
->insert (edge_badness (edge
, false), edge
);
1810 /* Remove EDGE from the fibheap. */
1813 heap_edge_removal_hook (struct cgraph_edge
*e
, void *data
)
1817 ((edge_heap_t
*)data
)->delete_node ((edge_heap_node_t
*)e
->aux
);
1822 /* Return true if speculation of edge E seems useful.
1823 If ANTICIPATE_INLINING is true, be conservative and hope that E
1827 speculation_useful_p (struct cgraph_edge
*e
, bool anticipate_inlining
)
1829 /* If we have already decided to inline the edge, it seems useful. */
1830 if (!e
->inline_failed
)
1833 enum availability avail
;
1834 struct cgraph_node
*target
= e
->callee
->ultimate_alias_target (&avail
,
1837 gcc_assert (e
->speculative
&& !e
->indirect_unknown_callee
);
1839 if (!e
->maybe_hot_p ())
1842 /* See if IP optimizations found something potentially useful about the
1843 function. For now we look only for CONST/PURE flags. Almost everything
1844 else we propagate is useless. */
1845 if (avail
>= AVAIL_AVAILABLE
)
1847 int ecf_flags
= flags_from_decl_or_type (target
->decl
);
1848 if (ecf_flags
& ECF_CONST
)
1850 if (!(e
->speculative_call_indirect_edge ()->indirect_info
1851 ->ecf_flags
& ECF_CONST
))
1854 else if (ecf_flags
& ECF_PURE
)
1856 if (!(e
->speculative_call_indirect_edge ()->indirect_info
1857 ->ecf_flags
& ECF_PURE
))
1861 /* If we did not managed to inline the function nor redirect
1862 to an ipa-cp clone (that are seen by having local flag set),
1863 it is probably pointless to inline it unless hardware is missing
1864 indirect call predictor. */
1865 if (!anticipate_inlining
&& !target
->local
)
1867 /* For overwritable targets there is not much to do. */
1868 if (!can_inline_edge_p (e
, false)
1869 || !can_inline_edge_by_limits_p (e
, false, true))
1871 /* OK, speculation seems interesting. */
1875 /* We know that EDGE is not going to be inlined.
1876 See if we can remove speculation. */
1879 resolve_noninline_speculation (edge_heap_t
*edge_heap
, struct cgraph_edge
*edge
)
1881 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
1883 struct cgraph_node
*node
= edge
->caller
;
1884 struct cgraph_node
*where
= node
->inlined_to
1885 ? node
->inlined_to
: node
;
1886 auto_bitmap updated_nodes
;
1888 if (edge
->count
.ipa ().initialized_p ())
1889 spec_rem
+= edge
->count
.ipa ();
1890 cgraph_edge::resolve_speculation (edge
);
1891 reset_edge_caches (where
);
1892 ipa_update_overall_fn_summary (where
);
1893 update_caller_keys (edge_heap
, where
,
1894 updated_nodes
, NULL
);
1895 update_callee_keys (edge_heap
, where
, NULL
,
1900 /* Return true if NODE should be accounted for overall size estimate.
1901 Skip all nodes optimized for size so we can measure the growth of hot
1902 part of program no matter of the padding. */
1905 inline_account_function_p (struct cgraph_node
*node
)
1907 return (!DECL_EXTERNAL (node
->decl
)
1908 && !opt_for_fn (node
->decl
, optimize_size
)
1909 && node
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
);
1912 /* Count number of callers of NODE and store it into DATA (that
1913 points to int. Worker for cgraph_for_node_and_aliases. */
1916 sum_callers (struct cgraph_node
*node
, void *data
)
1918 struct cgraph_edge
*e
;
1919 int *num_calls
= (int *)data
;
1921 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1926 /* We only propagate across edges with non-interposable callee. */
1929 ignore_edge_p (struct cgraph_edge
*e
)
1931 enum availability avail
;
1932 e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
1933 return (avail
<= AVAIL_INTERPOSABLE
);
1936 /* We use greedy algorithm for inlining of small functions:
1937 All inline candidates are put into prioritized heap ordered in
1940 The inlining of small functions is bounded by unit growth parameters. */
1943 inline_small_functions (void)
1945 struct cgraph_node
*node
;
1946 struct cgraph_edge
*edge
;
1947 edge_heap_t
edge_heap (sreal::min ());
1948 auto_bitmap updated_nodes
;
1950 auto_vec
<cgraph_edge
*> new_indirect_edges
;
1951 int initial_size
= 0;
1952 struct cgraph_node
**order
= XCNEWVEC (cgraph_node
*, symtab
->cgraph_count
);
1953 struct cgraph_edge_hook_list
*edge_removal_hook_holder
;
1954 new_indirect_edges
.create (8);
1956 edge_removal_hook_holder
1957 = symtab
->add_edge_removal_hook (&heap_edge_removal_hook
, &edge_heap
);
1959 /* Compute overall unit size and other global parameters used by badness
1962 max_count
= profile_count::uninitialized ();
1963 ipa_reduced_postorder (order
, true, ignore_edge_p
);
1966 FOR_EACH_DEFINED_FUNCTION (node
)
1967 if (!node
->inlined_to
)
1969 if (!node
->alias
&& node
->analyzed
1970 && (node
->has_gimple_body_p () || node
->thunk
)
1971 && opt_for_fn (node
->decl
, optimize
))
1973 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
1974 struct ipa_dfs_info
*dfs
= (struct ipa_dfs_info
*) node
->aux
;
1976 /* Do not account external functions, they will be optimized out
1977 if not inlined. Also only count the non-cold portion of program. */
1978 if (inline_account_function_p (node
))
1979 initial_size
+= ipa_size_summaries
->get (node
)->size
;
1980 info
->growth
= estimate_growth (node
);
1983 node
->call_for_symbol_and_aliases (sum_callers
, &num_calls
,
1986 info
->single_caller
= true;
1987 if (dfs
&& dfs
->next_cycle
)
1989 struct cgraph_node
*n2
;
1990 int id
= dfs
->scc_no
+ 1;
1992 n2
= ((struct ipa_dfs_info
*) n2
->aux
)->next_cycle
)
1993 if (opt_for_fn (n2
->decl
, optimize
))
1995 ipa_fn_summary
*info2
= ipa_fn_summaries
->get
1996 (n2
->inlined_to
? n2
->inlined_to
: n2
);
2004 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
2005 max_count
= max_count
.max (edge
->count
.ipa ());
2007 ipa_free_postorder_info ();
2008 initialize_growth_caches ();
2012 "\nDeciding on inlining of small functions. Starting with size %i.\n",
2015 overall_size
= initial_size
;
2016 min_size
= overall_size
;
2018 /* Populate the heap with all edges we might inline. */
2020 FOR_EACH_DEFINED_FUNCTION (node
)
2022 bool update
= false;
2023 struct cgraph_edge
*next
= NULL
;
2024 bool has_speculative
= false;
2026 if (!opt_for_fn (node
->decl
, optimize
))
2030 fprintf (dump_file
, "Enqueueing calls in %s.\n", node
->dump_name ());
2032 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
2034 if (edge
->inline_failed
2036 && can_inline_edge_p (edge
, true)
2037 && want_inline_small_function_p (edge
, true)
2038 && can_inline_edge_by_limits_p (edge
, true)
2039 && edge
->inline_failed
)
2041 gcc_assert (!edge
->aux
);
2042 update_edge_key (&edge_heap
, edge
);
2044 if (edge
->speculative
)
2045 has_speculative
= true;
2047 if (has_speculative
)
2048 for (edge
= node
->callees
; edge
; edge
= next
)
2050 next
= edge
->next_callee
;
2051 if (edge
->speculative
2052 && !speculation_useful_p (edge
, edge
->aux
!= NULL
))
2054 cgraph_edge::resolve_speculation (edge
);
2060 struct cgraph_node
*where
= node
->inlined_to
2061 ? node
->inlined_to
: node
;
2062 ipa_update_overall_fn_summary (where
);
2063 reset_edge_caches (where
);
2064 update_caller_keys (&edge_heap
, where
,
2065 updated_nodes
, NULL
);
2066 update_callee_keys (&edge_heap
, where
, NULL
,
2068 bitmap_clear (updated_nodes
);
2072 gcc_assert (in_lto_p
2074 || (profile_info
&& flag_branch_probabilities
));
2076 while (!edge_heap
.empty ())
2078 int old_size
= overall_size
;
2079 struct cgraph_node
*where
, *callee
;
2080 sreal badness
= edge_heap
.min_key ();
2081 sreal current_badness
;
2084 edge
= edge_heap
.extract_min ();
2085 gcc_assert (edge
->aux
);
2087 if (!edge
->inline_failed
|| !edge
->callee
->analyzed
)
2090 /* Be sure that caches are maintained consistent.
2091 This check is affected by scaling roundoff errors when compiling for
2092 IPA this we skip it in that case. */
2093 if (flag_checking
&& !edge
->callee
->count
.ipa_p ()
2094 && (!max_count
.initialized_p () || !max_count
.nonzero_p ()))
2096 sreal cached_badness
= edge_badness (edge
, false);
2098 int old_size_est
= estimate_edge_size (edge
);
2099 sreal old_time_est
= estimate_edge_time (edge
);
2100 int old_hints_est
= estimate_edge_hints (edge
);
2102 if (edge_growth_cache
!= NULL
)
2103 edge_growth_cache
->remove (edge
);
2104 reset_node_cache (edge
->caller
->inlined_to
2105 ? edge
->caller
->inlined_to
2107 gcc_assert (old_size_est
== estimate_edge_size (edge
));
2108 gcc_assert (old_time_est
== estimate_edge_time (edge
));
2111 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2113 fails with profile feedback because some hints depends on
2114 maybe_hot_edge_p predicate and because callee gets inlined to other
2115 calls, the edge may become cold.
2116 This ought to be fixed by computing relative probabilities
2117 for given invocation but that will be better done once whole
2118 code is converted to sreals. Disable for now and revert to "wrong"
2119 value so enable/disable checking paths agree. */
2120 edge_growth_cache
->get (edge
)->hints
= old_hints_est
+ 1;
2122 /* When updating the edge costs, we only decrease badness in the keys.
2123 Increases of badness are handled lazily; when we see key with out
2124 of date value on it, we re-insert it now. */
2125 current_badness
= edge_badness (edge
, false);
2126 gcc_assert (cached_badness
== current_badness
);
2127 gcc_assert (current_badness
>= badness
);
2130 current_badness
= edge_badness (edge
, false);
2131 if (current_badness
!= badness
)
2133 if (edge_heap
.min () && current_badness
> edge_heap
.min_key ())
2135 edge
->aux
= edge_heap
.insert (current_badness
, edge
);
2139 badness
= current_badness
;
2142 if (!can_inline_edge_p (edge
, true)
2143 || !can_inline_edge_by_limits_p (edge
, true))
2145 resolve_noninline_speculation (&edge_heap
, edge
);
2149 callee
= edge
->callee
->ultimate_alias_target ();
2150 growth
= estimate_edge_growth (edge
);
2154 "\nConsidering %s with %i size\n",
2155 callee
->dump_name (),
2156 ipa_size_summaries
->get (callee
)->size
);
2158 " to be inlined into %s in %s:%i\n"
2159 " Estimated badness is %f, frequency %.2f.\n",
2160 edge
->caller
->dump_name (),
2162 && (LOCATION_LOCUS (gimple_location ((const gimple
*)
2164 > BUILTINS_LOCATION
)
2165 ? gimple_filename ((const gimple
*) edge
->call_stmt
)
2168 ? gimple_lineno ((const gimple
*) edge
->call_stmt
)
2170 badness
.to_double (),
2171 edge
->sreal_frequency ().to_double ());
2172 if (edge
->count
.ipa ().initialized_p ())
2174 fprintf (dump_file
, " Called ");
2175 edge
->count
.ipa ().dump (dump_file
);
2176 fprintf (dump_file
, " times\n");
2178 if (dump_flags
& TDF_DETAILS
)
2179 edge_badness (edge
, true);
2182 where
= edge
->caller
;
2184 if (overall_size
+ growth
> compute_max_insns (where
, min_size
)
2185 && !DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
2187 edge
->inline_failed
= CIF_INLINE_UNIT_GROWTH_LIMIT
;
2188 report_inline_failed_reason (edge
);
2189 resolve_noninline_speculation (&edge_heap
, edge
);
2193 if (!want_inline_small_function_p (edge
, true))
2195 resolve_noninline_speculation (&edge_heap
, edge
);
2199 profile_count old_count
= callee
->count
;
2201 /* Heuristics for inlining small functions work poorly for
2202 recursive calls where we do effects similar to loop unrolling.
2203 When inlining such edge seems profitable, leave decision on
2204 specific inliner. */
2205 if (edge
->recursive_p ())
2207 if (where
->inlined_to
)
2208 where
= where
->inlined_to
;
2209 if (!recursive_inlining (edge
,
2210 opt_for_fn (edge
->caller
->decl
,
2211 flag_indirect_inlining
)
2212 ? &new_indirect_edges
: NULL
))
2214 edge
->inline_failed
= CIF_RECURSIVE_INLINING
;
2215 resolve_noninline_speculation (&edge_heap
, edge
);
2218 reset_edge_caches (where
);
2219 /* Recursive inliner inlines all recursive calls of the function
2220 at once. Consequently we need to update all callee keys. */
2221 if (opt_for_fn (edge
->caller
->decl
, flag_indirect_inlining
))
2222 add_new_edges_to_heap (&edge_heap
, new_indirect_edges
);
2223 update_callee_keys (&edge_heap
, where
, where
, updated_nodes
);
2224 bitmap_clear (updated_nodes
);
2228 struct cgraph_node
*outer_node
= NULL
;
2231 /* Consider the case where self recursive function A is inlined
2232 into B. This is desired optimization in some cases, since it
2233 leads to effect similar of loop peeling and we might completely
2234 optimize out the recursive call. However we must be extra
2237 where
= edge
->caller
;
2238 while (where
->inlined_to
)
2240 if (where
->decl
== callee
->decl
)
2241 outer_node
= where
, depth
++;
2242 where
= where
->callers
->caller
;
2245 && !want_inline_self_recursive_call_p (edge
, outer_node
,
2249 = (DECL_DISREGARD_INLINE_LIMITS (edge
->callee
->decl
)
2250 ? CIF_RECURSIVE_INLINING
: CIF_UNSPECIFIED
);
2251 resolve_noninline_speculation (&edge_heap
, edge
);
2254 else if (depth
&& dump_file
)
2255 fprintf (dump_file
, " Peeling recursion with depth %i\n", depth
);
2257 gcc_checking_assert (!callee
->inlined_to
);
2259 int old_size
= ipa_size_summaries
->get (where
)->size
;
2260 sreal old_time
= ipa_fn_summaries
->get (where
)->time
;
2262 inline_call (edge
, true, &new_indirect_edges
, &overall_size
, true);
2263 reset_edge_caches (edge
->callee
);
2264 add_new_edges_to_heap (&edge_heap
, new_indirect_edges
);
2266 /* If caller's size and time increased we do not need to update
2267 all edges because badness is not going to decrease. */
2268 if (old_size
<= ipa_size_summaries
->get (where
)->size
2269 && old_time
<= ipa_fn_summaries
->get (where
)->time
2270 /* Wrapper penalty may be non-monotonous in this respect.
2271 Fortunately it only affects small functions. */
2272 && !wrapper_heuristics_may_apply (where
, old_size
))
2273 update_callee_keys (&edge_heap
, edge
->callee
, edge
->callee
,
2276 update_callee_keys (&edge_heap
, where
,
2280 where
= edge
->caller
;
2281 if (where
->inlined_to
)
2282 where
= where
->inlined_to
;
2284 /* Our profitability metric can depend on local properties
2285 such as number of inlinable calls and size of the function body.
2286 After inlining these properties might change for the function we
2287 inlined into (since it's body size changed) and for the functions
2288 called by function we inlined (since number of it inlinable callers
2290 update_caller_keys (&edge_heap
, where
, updated_nodes
, NULL
);
2291 /* Offline copy count has possibly changed, recompute if profile is
2293 struct cgraph_node
*n
2294 = cgraph_node::get (edge
->callee
->decl
)->ultimate_alias_target ();
2295 if (n
!= edge
->callee
&& n
->analyzed
&& !(n
->count
== old_count
)
2296 && n
->count
.ipa_p ())
2297 update_callee_keys (&edge_heap
, n
, NULL
, updated_nodes
);
2298 bitmap_clear (updated_nodes
);
2300 if (dump_enabled_p ())
2302 ipa_fn_summary
*s
= ipa_fn_summaries
->get (where
);
2304 /* dump_printf can't handle %+i. */
2305 char buf_net_change
[100];
2306 snprintf (buf_net_change
, sizeof buf_net_change
, "%+i",
2307 overall_size
- old_size
);
2309 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, edge
->call_stmt
,
2310 " Inlined %C into %C which now has time %f and "
2311 "size %i, net change of %s%s.\n",
2312 edge
->callee
, edge
->caller
,
2313 s
->time
.to_double (),
2314 ipa_size_summaries
->get (edge
->caller
)->size
,
2316 cross_module_call_p (edge
) ? " (cross module)":"");
2318 if (min_size
> overall_size
)
2320 min_size
= overall_size
;
2323 fprintf (dump_file
, "New minimal size reached: %i\n", min_size
);
2327 free_growth_caches ();
2328 if (dump_enabled_p ())
2329 dump_printf (MSG_NOTE
,
2330 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2331 initial_size
, overall_size
,
2332 initial_size
? overall_size
* 100 / (initial_size
) - 100: 0);
2333 symtab
->remove_edge_removal_hook (edge_removal_hook_holder
);
2336 /* Flatten NODE. Performed both during early inlining and
2337 at IPA inlining time. */
2340 flatten_function (struct cgraph_node
*node
, bool early
, bool update
)
2342 struct cgraph_edge
*e
;
2344 /* We shouldn't be called recursively when we are being processed. */
2345 gcc_assert (node
->aux
== NULL
);
2347 node
->aux
= (void *) node
;
2349 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2351 struct cgraph_node
*orig_callee
;
2352 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2354 /* We've hit cycle? It is time to give up. */
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2359 "Not inlining %C into %C to avoid cycle.\n",
2361 if (cgraph_inline_failed_type (e
->inline_failed
) != CIF_FINAL_ERROR
)
2362 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
2366 /* When the edge is already inlined, we just need to recurse into
2367 it in order to fully flatten the leaves. */
2368 if (!e
->inline_failed
)
2370 flatten_function (callee
, early
, false);
2374 /* Flatten attribute needs to be processed during late inlining. For
2375 extra code quality we however do flattening during early optimization,
2378 ? !can_inline_edge_p (e
, true)
2379 && !can_inline_edge_by_limits_p (e
, true)
2380 : !can_early_inline_edge_p (e
))
2383 if (e
->recursive_p ())
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2387 "Not inlining: recursive call.\n");
2391 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
2392 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee
->decl
)))
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2396 "Not inlining: SSA form does not match.\n");
2400 /* Inline the edge and flatten the inline clone. Avoid
2401 recursing through the original node if the node was cloned. */
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2404 " Inlining %C into %C.\n",
2406 orig_callee
= callee
;
2407 inline_call (e
, true, NULL
, NULL
, false);
2408 if (e
->callee
!= orig_callee
)
2409 orig_callee
->aux
= (void *) node
;
2410 flatten_function (e
->callee
, early
, false);
2411 if (e
->callee
!= orig_callee
)
2412 orig_callee
->aux
= NULL
;
2416 cgraph_node
*where
= node
->inlined_to
? node
->inlined_to
: node
;
2417 if (update
&& opt_for_fn (where
->decl
, optimize
))
2418 ipa_update_overall_fn_summary (where
);
2421 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2422 DATA points to number of calls originally found so we avoid infinite
2426 inline_to_all_callers_1 (struct cgraph_node
*node
, void *data
,
2427 hash_set
<cgraph_node
*> *callers
)
2429 int *num_calls
= (int *)data
;
2430 bool callee_removed
= false;
2432 while (node
->callers
&& !node
->inlined_to
)
2434 struct cgraph_node
*caller
= node
->callers
->caller
;
2436 if (!can_inline_edge_p (node
->callers
, true)
2437 || !can_inline_edge_by_limits_p (node
->callers
, true)
2438 || node
->callers
->recursive_p ())
2441 fprintf (dump_file
, "Uninlinable call found; giving up.\n");
2448 cgraph_node
*ultimate
= node
->ultimate_alias_target ();
2450 "\nInlining %s size %i.\n",
2451 ultimate
->dump_name (),
2452 ipa_size_summaries
->get (ultimate
)->size
);
2454 " Called once from %s %i insns.\n",
2455 node
->callers
->caller
->dump_name (),
2456 ipa_size_summaries
->get (node
->callers
->caller
)->size
);
2459 /* Remember which callers we inlined to, delaying updating the
2461 callers
->add (node
->callers
->caller
);
2462 inline_call (node
->callers
, true, NULL
, NULL
, false, &callee_removed
);
2465 " Inlined into %s which now has %i size\n",
2466 caller
->dump_name (),
2467 ipa_size_summaries
->get (caller
)->size
);
2468 if (!(*num_calls
)--)
2471 fprintf (dump_file
, "New calls found; giving up.\n");
2472 return callee_removed
;
2480 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2484 inline_to_all_callers (struct cgraph_node
*node
, void *data
)
2486 hash_set
<cgraph_node
*> callers
;
2487 bool res
= inline_to_all_callers_1 (node
, data
, &callers
);
2488 /* Perform the delayed update of the overall summary of all callers
2489 processed. This avoids quadratic behavior in the cases where
2490 we have a lot of calls to the same function. */
2491 for (hash_set
<cgraph_node
*>::iterator i
= callers
.begin ();
2492 i
!= callers
.end (); ++i
)
2493 ipa_update_overall_fn_summary ((*i
)->inlined_to
? (*i
)->inlined_to
: *i
);
2497 /* Output overall time estimate. */
2499 dump_overall_stats (void)
2501 sreal sum_weighted
= 0, sum
= 0;
2502 struct cgraph_node
*node
;
2504 FOR_EACH_DEFINED_FUNCTION (node
)
2505 if (!node
->inlined_to
2508 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2512 if (node
->count
.ipa ().initialized_p ())
2513 sum_weighted
+= s
->time
* node
->count
.ipa ().to_gcov_type ();
2516 fprintf (dump_file
, "Overall time estimate: "
2517 "%f weighted by profile: "
2518 "%f\n", sum
.to_double (), sum_weighted
.to_double ());
2521 /* Output some useful stats about inlining. */
2524 dump_inline_stats (void)
2526 int64_t inlined_cnt
= 0, inlined_indir_cnt
= 0;
2527 int64_t inlined_virt_cnt
= 0, inlined_virt_indir_cnt
= 0;
2528 int64_t noninlined_cnt
= 0, noninlined_indir_cnt
= 0;
2529 int64_t noninlined_virt_cnt
= 0, noninlined_virt_indir_cnt
= 0;
2530 int64_t inlined_speculative
= 0, inlined_speculative_ply
= 0;
2531 int64_t indirect_poly_cnt
= 0, indirect_cnt
= 0;
2532 int64_t reason
[CIF_N_REASONS
][2];
2533 sreal reason_freq
[CIF_N_REASONS
];
2535 struct cgraph_node
*node
;
2537 memset (reason
, 0, sizeof (reason
));
2538 for (i
=0; i
< CIF_N_REASONS
; i
++)
2540 FOR_EACH_DEFINED_FUNCTION (node
)
2542 struct cgraph_edge
*e
;
2543 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2545 if (e
->inline_failed
)
2547 if (e
->count
.ipa ().initialized_p ())
2548 reason
[(int) e
->inline_failed
][0] += e
->count
.ipa ().to_gcov_type ();
2549 reason_freq
[(int) e
->inline_failed
] += e
->sreal_frequency ();
2550 reason
[(int) e
->inline_failed
][1] ++;
2551 if (DECL_VIRTUAL_P (e
->callee
->decl
)
2552 && e
->count
.ipa ().initialized_p ())
2554 if (e
->indirect_inlining_edge
)
2555 noninlined_virt_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2557 noninlined_virt_cnt
+= e
->count
.ipa ().to_gcov_type ();
2559 else if (e
->count
.ipa ().initialized_p ())
2561 if (e
->indirect_inlining_edge
)
2562 noninlined_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2564 noninlined_cnt
+= e
->count
.ipa ().to_gcov_type ();
2567 else if (e
->count
.ipa ().initialized_p ())
2571 if (DECL_VIRTUAL_P (e
->callee
->decl
))
2572 inlined_speculative_ply
+= e
->count
.ipa ().to_gcov_type ();
2574 inlined_speculative
+= e
->count
.ipa ().to_gcov_type ();
2576 else if (DECL_VIRTUAL_P (e
->callee
->decl
))
2578 if (e
->indirect_inlining_edge
)
2579 inlined_virt_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2581 inlined_virt_cnt
+= e
->count
.ipa ().to_gcov_type ();
2585 if (e
->indirect_inlining_edge
)
2586 inlined_indir_cnt
+= e
->count
.ipa ().to_gcov_type ();
2588 inlined_cnt
+= e
->count
.ipa ().to_gcov_type ();
2592 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2593 if (e
->indirect_info
->polymorphic
2594 & e
->count
.ipa ().initialized_p ())
2595 indirect_poly_cnt
+= e
->count
.ipa ().to_gcov_type ();
2596 else if (e
->count
.ipa ().initialized_p ())
2597 indirect_cnt
+= e
->count
.ipa ().to_gcov_type ();
2599 if (max_count
.initialized_p ())
2602 "Inlined %" PRId64
" + speculative "
2603 "%" PRId64
" + speculative polymorphic "
2604 "%" PRId64
" + previously indirect "
2605 "%" PRId64
" + virtual "
2606 "%" PRId64
" + virtual and previously indirect "
2607 "%" PRId64
"\n" "Not inlined "
2608 "%" PRId64
" + previously indirect "
2609 "%" PRId64
" + virtual "
2610 "%" PRId64
" + virtual and previously indirect "
2611 "%" PRId64
" + still indirect "
2612 "%" PRId64
" + still indirect polymorphic "
2613 "%" PRId64
"\n", inlined_cnt
,
2614 inlined_speculative
, inlined_speculative_ply
,
2615 inlined_indir_cnt
, inlined_virt_cnt
, inlined_virt_indir_cnt
,
2616 noninlined_cnt
, noninlined_indir_cnt
, noninlined_virt_cnt
,
2617 noninlined_virt_indir_cnt
, indirect_cnt
, indirect_poly_cnt
);
2618 fprintf (dump_file
, "Removed speculations ");
2619 spec_rem
.dump (dump_file
);
2620 fprintf (dump_file
, "\n");
2622 dump_overall_stats ();
2623 fprintf (dump_file
, "\nWhy inlining failed?\n");
2624 for (i
= 0; i
< CIF_N_REASONS
; i
++)
2626 fprintf (dump_file
, "%-50s: %8i calls, %8f freq, %" PRId64
" count\n",
2627 cgraph_inline_failed_string ((cgraph_inline_failed_t
) i
),
2628 (int) reason
[i
][1], reason_freq
[i
].to_double (), reason
[i
][0]);
2631 /* Called when node is removed. */
2634 flatten_remove_node_hook (struct cgraph_node
*node
, void *data
)
2636 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) == NULL
)
2639 hash_set
<struct cgraph_node
*> *removed
2640 = (hash_set
<struct cgraph_node
*> *) data
;
2641 removed
->add (node
);
2644 /* Decide on the inlining. We do so in the topological order to avoid
2645 expenses on updating data structures. */
2650 struct cgraph_node
*node
;
2652 struct cgraph_node
**order
;
2655 bool remove_functions
= false;
2657 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
2660 ipa_dump_fn_summaries (dump_file
);
2662 nnodes
= ipa_reverse_postorder (order
);
2663 spec_rem
= profile_count::zero ();
2665 FOR_EACH_FUNCTION (node
)
2669 /* Recompute the default reasons for inlining because they may have
2670 changed during merging. */
2673 for (cgraph_edge
*e
= node
->callees
; e
; e
= e
->next_callee
)
2675 gcc_assert (e
->inline_failed
);
2676 initialize_inline_failed (e
);
2678 for (cgraph_edge
*e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2679 initialize_inline_failed (e
);
2684 fprintf (dump_file
, "\nFlattening functions:\n");
2686 /* First shrink order array, so that it only contains nodes with
2687 flatten attribute. */
2688 for (i
= nnodes
- 1, j
= i
; i
>= 0; i
--)
2691 if (node
->definition
2692 /* Do not try to flatten aliases. These may happen for example when
2693 creating local aliases. */
2695 && lookup_attribute ("flatten",
2696 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
2697 order
[j
--] = order
[i
];
2700 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2701 nodes with flatten attribute. If there is more than one such
2702 node, we need to register a node removal hook, as flatten_function
2703 could remove other nodes with flatten attribute. See PR82801. */
2704 struct cgraph_node_hook_list
*node_removal_hook_holder
= NULL
;
2705 hash_set
<struct cgraph_node
*> *flatten_removed_nodes
= NULL
;
2708 flatten_removed_nodes
= new hash_set
<struct cgraph_node
*>;
2709 node_removal_hook_holder
2710 = symtab
->add_cgraph_removal_hook (&flatten_remove_node_hook
,
2711 flatten_removed_nodes
);
2714 /* In the first pass handle functions to be flattened. Do this with
2715 a priority so none of our later choices will make this impossible. */
2716 for (i
= nnodes
- 1; i
> j
; i
--)
2719 if (flatten_removed_nodes
2720 && flatten_removed_nodes
->contains (node
))
2723 /* Handle nodes to be flattened.
2724 Ideally when processing callees we stop inlining at the
2725 entry of cycles, possibly cloning that entry point and
2726 try to flatten itself turning it into a self-recursive
2729 fprintf (dump_file
, "Flattening %s\n", node
->dump_name ());
2730 flatten_function (node
, false, true);
2735 symtab
->remove_cgraph_removal_hook (node_removal_hook_holder
);
2736 delete flatten_removed_nodes
;
2741 dump_overall_stats ();
2743 inline_small_functions ();
2745 gcc_assert (symtab
->state
== IPA_SSA
);
2746 symtab
->state
= IPA_SSA_AFTER_INLINING
;
2747 /* Do first after-inlining removal. We want to remove all "stale" extern
2748 inline functions and virtual functions so we really know what is called
2750 symtab
->remove_unreachable_nodes (dump_file
);
2752 /* Inline functions with a property that after inlining into all callers the
2753 code size will shrink because the out-of-line copy is eliminated.
2754 We do this regardless on the callee size as long as function growth limits
2758 "\nDeciding on functions to be inlined into all callers and "
2759 "removing useless speculations:\n");
2761 /* Inlining one function called once has good chance of preventing
2762 inlining other function into the same callee. Ideally we should
2763 work in priority order, but probably inlining hot functions first
2764 is good cut without the extra pain of maintaining the queue.
2766 ??? this is not really fitting the bill perfectly: inlining function
2767 into callee often leads to better optimization of callee due to
2768 increased context for optimization.
2769 For example if main() function calls a function that outputs help
2770 and then function that does the main optimization, we should inline
2771 the second with priority even if both calls are cold by themselves.
2773 We probably want to implement new predicate replacing our use of
2774 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2776 for (cold
= 0; cold
<= 1; cold
++)
2778 FOR_EACH_DEFINED_FUNCTION (node
)
2780 struct cgraph_edge
*edge
, *next
;
2783 if (!opt_for_fn (node
->decl
, optimize
)
2784 || !opt_for_fn (node
->decl
, flag_inline_functions_called_once
))
2787 for (edge
= node
->callees
; edge
; edge
= next
)
2789 next
= edge
->next_callee
;
2790 if (edge
->speculative
&& !speculation_useful_p (edge
, false))
2792 if (edge
->count
.ipa ().initialized_p ())
2793 spec_rem
+= edge
->count
.ipa ();
2794 cgraph_edge::resolve_speculation (edge
);
2796 remove_functions
= true;
2801 struct cgraph_node
*where
= node
->inlined_to
2802 ? node
->inlined_to
: node
;
2803 reset_edge_caches (where
);
2804 ipa_update_overall_fn_summary (where
);
2806 if (want_inline_function_to_all_callers_p (node
, cold
))
2809 node
->call_for_symbol_and_aliases (sum_callers
, &num_calls
,
2811 while (node
->call_for_symbol_and_aliases
2812 (inline_to_all_callers
, &num_calls
, true))
2814 remove_functions
= true;
2819 if (dump_enabled_p ())
2820 dump_printf (MSG_NOTE
,
2821 "\nInlined %i calls, eliminated %i functions\n\n",
2822 ncalls_inlined
, nfunctions_inlined
);
2824 dump_inline_stats ();
2827 ipa_dump_fn_summaries (dump_file
);
2828 return remove_functions
? TODO_remove_functions
: 0;
2831 /* Inline always-inline function calls in NODE. */
2834 inline_always_inline_functions (struct cgraph_node
*node
)
2836 struct cgraph_edge
*e
;
2837 bool inlined
= false;
2839 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2841 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2842 if (!DECL_DISREGARD_INLINE_LIMITS (callee
->decl
))
2845 if (e
->recursive_p ())
2847 if (dump_enabled_p ())
2848 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2849 " Not inlining recursive call to %C.\n",
2851 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
2855 if (!can_early_inline_edge_p (e
))
2857 /* Set inlined to true if the callee is marked "always_inline" but
2858 is not inlinable. This will allow flagging an error later in
2859 expand_call_inline in tree-inline.c. */
2860 if (lookup_attribute ("always_inline",
2861 DECL_ATTRIBUTES (callee
->decl
)) != NULL
)
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2868 " Inlining %C into %C (always_inline).\n",
2869 e
->callee
, e
->caller
);
2870 inline_call (e
, true, NULL
, NULL
, false);
2874 ipa_update_overall_fn_summary (node
);
2879 /* Decide on the inlining. We do so in the topological order to avoid
2880 expenses on updating data structures. */
2883 early_inline_small_functions (struct cgraph_node
*node
)
2885 struct cgraph_edge
*e
;
2886 bool inlined
= false;
2888 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2890 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
2892 /* We can encounter not-yet-analyzed function during
2893 early inlining on callgraphs with strongly
2894 connected components. */
2895 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
2896 if (s
== NULL
|| !s
->inlinable
|| !e
->inline_failed
)
2899 /* Do not consider functions not declared inline. */
2900 if (!DECL_DECLARED_INLINE_P (callee
->decl
)
2901 && !opt_for_fn (node
->decl
, flag_inline_small_functions
)
2902 && !opt_for_fn (node
->decl
, flag_inline_functions
))
2905 if (dump_enabled_p ())
2906 dump_printf_loc (MSG_NOTE
, e
->call_stmt
,
2907 "Considering inline candidate %C.\n",
2910 if (!can_early_inline_edge_p (e
))
2913 if (e
->recursive_p ())
2915 if (dump_enabled_p ())
2916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, e
->call_stmt
,
2917 " Not inlining: recursive call.\n");
2921 if (!want_early_inline_function_p (e
))
2924 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, e
->call_stmt
,
2926 " Inlining %C into %C.\n",
2928 inline_call (e
, true, NULL
, NULL
, false);
2933 ipa_update_overall_fn_summary (node
);
2939 early_inliner (function
*fun
)
2941 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
2942 struct cgraph_edge
*edge
;
2943 unsigned int todo
= 0;
2945 bool inlined
= false;
2950 /* Do nothing if datastructures for ipa-inliner are already computed. This
2951 happens when some pass decides to construct new function and
2952 cgraph_add_new_function calls lowering passes and early optimization on
2953 it. This may confuse ourself when early inliner decide to inline call to
2954 function clone, because function clones don't have parameter list in
2955 ipa-prop matching their signature. */
2956 if (ipa_node_params_sum
)
2961 node
->remove_all_references ();
2963 /* Even when not optimizing or not inlining inline always-inline
2965 inlined
= inline_always_inline_functions (node
);
2969 || !flag_early_inlining
2970 /* Never inline regular functions into always-inline functions
2971 during incremental inlining. This sucks as functions calling
2972 always inline functions will get less optimized, but at the
2973 same time inlining of functions calling always inline
2974 function into an always inline function might introduce
2975 cycles of edges to be always inlined in the callgraph.
2977 We might want to be smarter and just avoid this type of inlining. */
2978 || (DECL_DISREGARD_INLINE_LIMITS (node
->decl
)
2979 && lookup_attribute ("always_inline",
2980 DECL_ATTRIBUTES (node
->decl
))))
2982 else if (lookup_attribute ("flatten",
2983 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
2985 /* When the function is marked to be flattened, recursively inline
2987 if (dump_enabled_p ())
2988 dump_printf (MSG_OPTIMIZED_LOCATIONS
,
2989 "Flattening %C\n", node
);
2990 flatten_function (node
, true, true);
2995 /* If some always_inline functions was inlined, apply the changes.
2996 This way we will not account always inline into growth limits and
2997 moreover we will inline calls from always inlines that we skipped
2998 previously because of conditional above. */
3001 timevar_push (TV_INTEGRATION
);
3002 todo
|= optimize_inline_calls (current_function_decl
);
3003 /* optimize_inline_calls call above might have introduced new
3004 statements that don't have inline parameters computed. */
3005 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
3007 /* We can enounter not-yet-analyzed function during
3008 early inlining on callgraphs with strongly
3009 connected components. */
3010 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
3012 = estimate_num_insns (edge
->call_stmt
, &eni_size_weights
);
3014 = estimate_num_insns (edge
->call_stmt
, &eni_time_weights
);
3016 ipa_update_overall_fn_summary (node
);
3018 timevar_pop (TV_INTEGRATION
);
3020 /* We iterate incremental inlining to get trivial cases of indirect
3022 while (iterations
< opt_for_fn (node
->decl
,
3023 param_early_inliner_max_iterations
)
3024 && early_inline_small_functions (node
))
3026 timevar_push (TV_INTEGRATION
);
3027 todo
|= optimize_inline_calls (current_function_decl
);
3029 /* Technically we ought to recompute inline parameters so the new
3030 iteration of early inliner works as expected. We however have
3031 values approximately right and thus we only need to update edge
3032 info that might be cleared out for newly discovered edges. */
3033 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
3035 /* We have no summary for new bound store calls yet. */
3036 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
3038 = estimate_num_insns (edge
->call_stmt
, &eni_size_weights
);
3040 = estimate_num_insns (edge
->call_stmt
, &eni_time_weights
);
3042 if (iterations
< opt_for_fn (node
->decl
,
3043 param_early_inliner_max_iterations
) - 1)
3044 ipa_update_overall_fn_summary (node
);
3045 timevar_pop (TV_INTEGRATION
);
3050 fprintf (dump_file
, "Iterations: %i\n", iterations
);
3055 timevar_push (TV_INTEGRATION
);
3056 todo
|= optimize_inline_calls (current_function_decl
);
3057 timevar_pop (TV_INTEGRATION
);
3060 fun
->always_inline_functions_inlined
= true;
3065 /* Do inlining of small functions. Doing so early helps profiling and other
3066 passes to be somewhat more effective and avoids some code duplication in
3067 later real inlining pass for testcases with very many function calls. */
3071 const pass_data pass_data_early_inline
=
3073 GIMPLE_PASS
, /* type */
3074 "einline", /* name */
3075 OPTGROUP_INLINE
, /* optinfo_flags */
3076 TV_EARLY_INLINING
, /* tv_id */
3077 PROP_ssa
, /* properties_required */
3078 0, /* properties_provided */
3079 0, /* properties_destroyed */
3080 0, /* todo_flags_start */
3081 0, /* todo_flags_finish */
3084 class pass_early_inline
: public gimple_opt_pass
3087 pass_early_inline (gcc::context
*ctxt
)
3088 : gimple_opt_pass (pass_data_early_inline
, ctxt
)
3091 /* opt_pass methods: */
3092 virtual unsigned int execute (function
*);
3094 }; // class pass_early_inline
3097 pass_early_inline::execute (function
*fun
)
3099 return early_inliner (fun
);
3105 make_pass_early_inline (gcc::context
*ctxt
)
3107 return new pass_early_inline (ctxt
);
3112 const pass_data pass_data_ipa_inline
=
3114 IPA_PASS
, /* type */
3115 "inline", /* name */
3116 OPTGROUP_INLINE
, /* optinfo_flags */
3117 TV_IPA_INLINING
, /* tv_id */
3118 0, /* properties_required */
3119 0, /* properties_provided */
3120 0, /* properties_destroyed */
3121 0, /* todo_flags_start */
3122 ( TODO_dump_symtab
), /* todo_flags_finish */
3125 class pass_ipa_inline
: public ipa_opt_pass_d
3128 pass_ipa_inline (gcc::context
*ctxt
)
3129 : ipa_opt_pass_d (pass_data_ipa_inline
, ctxt
,
3130 NULL
, /* generate_summary */
3131 NULL
, /* write_summary */
3132 NULL
, /* read_summary */
3133 NULL
, /* write_optimization_summary */
3134 NULL
, /* read_optimization_summary */
3135 NULL
, /* stmt_fixup */
3136 0, /* function_transform_todo_flags_start */
3137 inline_transform
, /* function_transform */
3138 NULL
) /* variable_transform */
3141 /* opt_pass methods: */
3142 virtual unsigned int execute (function
*) { return ipa_inline (); }
3144 }; // class pass_ipa_inline
3149 make_pass_ipa_inline (gcc::context
*ctxt
)
3151 return new pass_ipa_inline (ctxt
);