[Ada] Empty CUDA_Global procedures when compiling for host
[official-gcc.git] / gcc / ipa-inline.c
blob012b326b5e9fdc2c6cedb7a9fce57978012a94dd
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
36 inlining heuristics
38 The inliner itself is split into two passes:
40 pass_early_inlining
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
68 pass_ipa_inline
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "profile.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
115 #include "sreal.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
120 #include "attribs.h"
121 #include "asan.h"
123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
126 /* Statistics we collect about inlining algorithm. */
127 static int overall_size;
128 static profile_count max_count;
129 static profile_count spec_rem;
131 /* Return false when inlining edge E would lead to violating
132 limits on function unit growth or stack usage growth.
134 The relative function body growth limit is present generally
135 to avoid problems with non-linear behavior of the compiler.
136 To allow inlining huge functions into tiny wrapper, the limit
137 is always based on the bigger of the two functions considered.
139 For stack growth limits we always base the growth in stack usage
140 of the callers. We want to prevent applications from segfaulting
141 on stack overflow when functions with huge stack frames gets
142 inlined. */
144 static bool
145 caller_growth_limits (struct cgraph_edge *e)
147 struct cgraph_node *to = e->caller;
148 struct cgraph_node *what = e->callee->ultimate_alias_target ();
149 int newsize;
150 int limit = 0;
151 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
152 ipa_size_summary *outer_info = ipa_size_summaries->get (to);
154 /* Look for function e->caller is inlined to. While doing
155 so work out the largest function body on the way. As
156 described above, we want to base our function growth
157 limits based on that. Not on the self size of the
158 outer function, not on the self size of inline code
159 we immediately inline to. This is the most relaxed
160 interpretation of the rule "do not grow large functions
161 too much in order to prevent compiler from exploding". */
162 while (true)
164 ipa_size_summary *size_info = ipa_size_summaries->get (to);
165 if (limit < size_info->self_size)
166 limit = size_info->self_size;
167 if (stack_size_limit < size_info->estimated_self_stack_size)
168 stack_size_limit = size_info->estimated_self_stack_size;
169 if (to->inlined_to)
170 to = to->callers->caller;
171 else
172 break;
175 ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
176 ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
178 if (limit < what_size_info->self_size)
179 limit = what_size_info->self_size;
181 limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
183 /* Check the size after inlining against the function limits. But allow
184 the function to shrink if it went over the limits by forced inlining. */
185 newsize = estimate_size_after_inlining (to, e);
186 if (newsize >= ipa_size_summaries->get (what)->size
187 && newsize > opt_for_fn (to->decl, param_large_function_insns)
188 && newsize > limit)
190 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
191 return false;
194 if (!what_info->estimated_stack_size)
195 return true;
197 /* FIXME: Stack size limit often prevents inlining in Fortran programs
198 due to large i/o datastructures used by the Fortran front-end.
199 We ought to ignore this limit when we know that the edge is executed
200 on every invocation of the caller (i.e. its call statement dominates
201 exit block). We do not track this information, yet. */
202 stack_size_limit += ((gcov_type)stack_size_limit
203 * opt_for_fn (to->decl, param_stack_frame_growth)
204 / 100);
206 inlined_stack = (ipa_get_stack_frame_offset (to)
207 + outer_info->estimated_self_stack_size
208 + what_info->estimated_stack_size);
209 /* Check new stack consumption with stack consumption at the place
210 stack is used. */
211 if (inlined_stack > stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
215 packing. */
216 && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
217 && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
219 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220 return false;
222 return true;
225 /* Dump info about why inlining has failed. */
227 static void
228 report_inline_failed_reason (struct cgraph_edge *e)
230 if (dump_enabled_p ())
232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 " not inlinable: %C -> %C, %s\n",
234 e->caller, e->callee,
235 cgraph_inline_failed_string (e->inline_failed));
236 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 && e->caller->lto_file_data
239 && e->callee->ultimate_alias_target ()->lto_file_data)
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 " LTO objects: %s, %s\n",
243 e->caller->lto_file_data->file_name,
244 e->callee->ultimate_alias_target ()->lto_file_data->file_name);
246 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 if (dump_file)
248 cl_target_option_print_diff
249 (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 if (dump_file)
253 cl_optimization_print_diff
254 (dump_file, 2, opts_for_fn (e->caller->decl),
255 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
259 /* Decide whether sanitizer-related attributes allow inlining. */
261 static bool
262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
264 if (!caller || !callee)
265 return true;
267 /* Follow clang and allow inlining for always_inline functions. */
268 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
269 return true;
271 const sanitize_code codes[] =
273 SANITIZE_ADDRESS,
274 SANITIZE_THREAD,
275 SANITIZE_UNDEFINED,
276 SANITIZE_UNDEFINED_NONDEFAULT,
277 SANITIZE_POINTER_COMPARE,
278 SANITIZE_POINTER_SUBTRACT
281 for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
282 if (sanitize_flags_p (codes[i], caller)
283 != sanitize_flags_p (codes[i], callee))
284 return false;
286 if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
287 return false;
289 return true;
292 /* Used for flags where it is safe to inline when caller's value is
293 grater than callee's. */
294 #define check_maybe_up(flag) \
295 (opts_for_fn (caller->decl)->x_##flag \
296 != opts_for_fn (callee->decl)->x_##flag \
297 && (!always_inline \
298 || opts_for_fn (caller->decl)->x_##flag \
299 < opts_for_fn (callee->decl)->x_##flag))
300 /* Used for flags where it is safe to inline when caller's value is
301 smaller than callee's. */
302 #define check_maybe_down(flag) \
303 (opts_for_fn (caller->decl)->x_##flag \
304 != opts_for_fn (callee->decl)->x_##flag \
305 && (!always_inline \
306 || opts_for_fn (caller->decl)->x_##flag \
307 > opts_for_fn (callee->decl)->x_##flag))
308 /* Used for flags where exact match is needed for correctness. */
309 #define check_match(flag) \
310 (opts_for_fn (caller->decl)->x_##flag \
311 != opts_for_fn (callee->decl)->x_##flag)
313 /* Decide if we can inline the edge and possibly update
314 inline_failed reason.
315 We check whether inlining is possible at all and whether
316 caller growth limits allow doing so.
318 if REPORT is true, output reason to the dump file. */
320 static bool
321 can_inline_edge_p (struct cgraph_edge *e, bool report,
322 bool early = false)
324 gcc_checking_assert (e->inline_failed);
326 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
328 if (report)
329 report_inline_failed_reason (e);
330 return false;
333 bool inlinable = true;
334 enum availability avail;
335 cgraph_node *caller = (e->caller->inlined_to
336 ? e->caller->inlined_to : e->caller);
337 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
339 if (!callee->definition)
341 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
342 inlinable = false;
344 if (!early && (!opt_for_fn (callee->decl, optimize)
345 || !opt_for_fn (caller->decl, optimize)))
347 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
348 inlinable = false;
350 else if (callee->calls_comdat_local)
352 e->inline_failed = CIF_USES_COMDAT_LOCAL;
353 inlinable = false;
355 else if (avail <= AVAIL_INTERPOSABLE)
357 e->inline_failed = CIF_OVERWRITABLE;
358 inlinable = false;
360 /* All edges with call_stmt_cannot_inline_p should have inline_failed
361 initialized to one of FINAL_ERROR reasons. */
362 else if (e->call_stmt_cannot_inline_p)
363 gcc_unreachable ();
364 /* Don't inline if the functions have different EH personalities. */
365 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
366 && DECL_FUNCTION_PERSONALITY (callee->decl)
367 && (DECL_FUNCTION_PERSONALITY (caller->decl)
368 != DECL_FUNCTION_PERSONALITY (callee->decl)))
370 e->inline_failed = CIF_EH_PERSONALITY;
371 inlinable = false;
373 /* TM pure functions should not be inlined into non-TM_pure
374 functions. */
375 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
377 e->inline_failed = CIF_UNSPECIFIED;
378 inlinable = false;
380 /* Check compatibility of target optimization options. */
381 else if (!targetm.target_option.can_inline_p (caller->decl,
382 callee->decl))
384 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
385 inlinable = false;
387 else if (ipa_fn_summaries->get (callee) == NULL
388 || !ipa_fn_summaries->get (callee)->inlinable)
390 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
391 inlinable = false;
393 /* Don't inline a function with mismatched sanitization attributes. */
394 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
396 e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
397 inlinable = false;
399 else if (profile_arc_flag
400 && (lookup_attribute ("no_profile_instrument_function",
401 DECL_ATTRIBUTES (caller->decl)) == NULL_TREE)
402 != (lookup_attribute ("no_profile_instrument_function",
403 DECL_ATTRIBUTES (callee->decl)) == NULL_TREE))
405 cgraph_node *origin = caller;
406 while (origin->clone_of)
407 origin = origin->clone_of;
409 if (!DECL_STRUCT_FUNCTION (origin->decl)->always_inline_functions_inlined)
411 e->inline_failed = CIF_UNSPECIFIED;
412 inlinable = false;
416 if (!inlinable && report)
417 report_inline_failed_reason (e);
418 return inlinable;
421 /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
422 scale up the bound. */
424 static int
425 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
427 if (hint && hint2)
429 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
430 spd = spd * spd;
431 if (spd > 1000000)
432 spd = 1000000;
433 return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
435 if (hint || hint2)
436 return opt_for_fn (n->decl, param_max_inline_insns_single)
437 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
438 return opt_for_fn (n->decl, param_max_inline_insns_single);
441 /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
442 scale up the bound. */
444 static int
445 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
447 int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
448 if (hint && hint2)
450 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
451 spd = spd * spd;
452 if (spd > 1000000)
453 spd = 1000000;
454 return max_inline_insns_auto * spd / 100;
456 if (hint || hint2)
457 return max_inline_insns_auto
458 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
459 return max_inline_insns_auto;
462 /* Decide if we can inline the edge and possibly update
463 inline_failed reason.
464 We check whether inlining is possible at all and whether
465 caller growth limits allow doing so.
467 if REPORT is true, output reason to the dump file.
469 if DISREGARD_LIMITS is true, ignore size limits. */
471 static bool
472 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
473 bool disregard_limits = false, bool early = false)
475 gcc_checking_assert (e->inline_failed);
477 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
479 if (report)
480 report_inline_failed_reason (e);
481 return false;
484 bool inlinable = true;
485 enum availability avail;
486 cgraph_node *caller = (e->caller->inlined_to
487 ? e->caller->inlined_to : e->caller);
488 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
489 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
490 tree callee_tree
491 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
492 /* Check if caller growth allows the inlining. */
493 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
494 && !disregard_limits
495 && !lookup_attribute ("flatten",
496 DECL_ATTRIBUTES (caller->decl))
497 && !caller_growth_limits (e))
498 inlinable = false;
499 else if (callee->externally_visible
500 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
501 && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
503 e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
504 inlinable = false;
506 /* Don't inline a function with a higher optimization level than the
507 caller. FIXME: this is really just tip of iceberg of handling
508 optimization attribute. */
509 else if (caller_tree != callee_tree)
511 bool always_inline =
512 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
513 && lookup_attribute ("always_inline",
514 DECL_ATTRIBUTES (callee->decl)));
515 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
516 ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
518 /* Until GCC 4.9 we did not check the semantics-altering flags
519 below and inlined across optimization boundaries.
520 Enabling checks below breaks several packages by refusing
521 to inline library always_inline functions. See PR65873.
522 Disable the check for early inlining for now until better solution
523 is found. */
524 if (always_inline && early)
526 /* There are some options that change IL semantics which means
527 we cannot inline in these cases for correctness reason.
528 Not even for always_inline declared functions. */
529 else if (check_match (flag_wrapv)
530 || check_match (flag_trapv)
531 || check_match (flag_pcc_struct_return)
532 || check_maybe_down (optimize_debug)
533 /* When caller or callee does FP math, be sure FP codegen flags
534 compatible. */
535 || ((caller_info->fp_expressions && callee_info->fp_expressions)
536 && (check_maybe_up (flag_rounding_math)
537 || check_maybe_up (flag_trapping_math)
538 || check_maybe_down (flag_unsafe_math_optimizations)
539 || check_maybe_down (flag_finite_math_only)
540 || check_maybe_up (flag_signaling_nans)
541 || check_maybe_down (flag_cx_limited_range)
542 || check_maybe_up (flag_signed_zeros)
543 || check_maybe_down (flag_associative_math)
544 || check_maybe_down (flag_reciprocal_math)
545 || check_maybe_down (flag_fp_int_builtin_inexact)
546 /* Strictly speaking only when the callee contains function
547 calls that may end up setting errno. */
548 || check_maybe_up (flag_errno_math)))
549 /* We do not want to make code compiled with exceptions to be
550 brought into a non-EH function unless we know that the callee
551 does not throw.
552 This is tracked by DECL_FUNCTION_PERSONALITY. */
553 || (check_maybe_up (flag_non_call_exceptions)
554 && DECL_FUNCTION_PERSONALITY (callee->decl))
555 || (check_maybe_up (flag_exceptions)
556 && DECL_FUNCTION_PERSONALITY (callee->decl))
557 /* When devirtualization is disabled for callee, it is not safe
558 to inline it as we possibly mangled the type info.
559 Allow early inlining of always inlines. */
560 || (!early && check_maybe_down (flag_devirtualize)))
562 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
563 inlinable = false;
565 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
566 else if (always_inline)
568 /* When user added an attribute to the callee honor it. */
569 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
570 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
572 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
573 inlinable = false;
575 /* If explicit optimize attribute are not used, the mismatch is caused
576 by different command line options used to build different units.
577 Do not care about COMDAT functions - those are intended to be
578 optimized with the optimization flags of module they are used in.
579 Also do not care about mixing up size/speed optimization when
580 DECL_DISREGARD_INLINE_LIMITS is set. */
581 else if ((callee->merged_comdat
582 && !lookup_attribute ("optimize",
583 DECL_ATTRIBUTES (caller->decl)))
584 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
586 /* If mismatch is caused by merging two LTO units with different
587 optimization flags we want to be bit nicer. However never inline
588 if one of functions is not optimized at all. */
589 else if (!opt_for_fn (callee->decl, optimize)
590 || !opt_for_fn (caller->decl, optimize))
592 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
593 inlinable = false;
595 /* If callee is optimized for size and caller is not, allow inlining if
596 code shrinks or we are in param_max_inline_insns_single limit and
597 callee is inline (and thus likely an unified comdat).
598 This will allow caller to run faster. */
599 else if (opt_for_fn (callee->decl, optimize_size)
600 > opt_for_fn (caller->decl, optimize_size))
602 int growth = estimate_edge_growth (e);
603 if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
604 && (!DECL_DECLARED_INLINE_P (callee->decl)
605 && growth >= MAX (inline_insns_single (caller, false, false),
606 inline_insns_auto (caller, false, false))))
608 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
609 inlinable = false;
612 /* If callee is more aggressively optimized for performance than caller,
613 we generally want to inline only cheap (runtime wise) functions. */
614 else if (opt_for_fn (callee->decl, optimize_size)
615 < opt_for_fn (caller->decl, optimize_size)
616 || (opt_for_fn (callee->decl, optimize)
617 > opt_for_fn (caller->decl, optimize)))
619 if (estimate_edge_time (e)
620 >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
622 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
623 inlinable = false;
629 if (!inlinable && report)
630 report_inline_failed_reason (e);
631 return inlinable;
635 /* Return true if the edge E is inlinable during early inlining. */
637 static bool
638 can_early_inline_edge_p (struct cgraph_edge *e)
640 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
641 /* Early inliner might get called at WPA stage when IPA pass adds new
642 function. In this case we cannot really do any of early inlining
643 because function bodies are missing. */
644 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
645 return false;
646 if (!gimple_has_body_p (callee->decl))
648 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
649 return false;
651 /* In early inliner some of callees may not be in SSA form yet
652 (i.e. the callgraph is cyclic and we did not process
653 the callee by early inliner, yet). We don't have CIF code for this
654 case; later we will re-do the decision in the real inliner. */
655 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
656 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
660 " edge not inlinable: not in SSA form\n");
661 return false;
663 if (!can_inline_edge_p (e, true, true)
664 || !can_inline_edge_by_limits_p (e, true, false, true))
665 return false;
666 return true;
670 /* Return number of calls in N. Ignore cheap builtins. */
672 static int
673 num_calls (struct cgraph_node *n)
675 struct cgraph_edge *e;
676 int num = 0;
678 for (e = n->callees; e; e = e->next_callee)
679 if (!is_inexpensive_builtin (e->callee->decl))
680 num++;
681 return num;
685 /* Return true if we are interested in inlining small function. */
687 static bool
688 want_early_inline_function_p (struct cgraph_edge *e)
690 bool want_inline = true;
691 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
693 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
695 /* For AutoFDO, we need to make sure that before profile summary, all
696 hot paths' IR look exactly the same as profiled binary. As a result,
697 in einliner, we will disregard size limit and inline those callsites
698 that are:
699 * inlined in the profiled binary, and
700 * the cloned callee has enough samples to be considered "hot". */
701 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
703 else if (!DECL_DECLARED_INLINE_P (callee->decl)
704 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
706 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
707 report_inline_failed_reason (e);
708 want_inline = false;
710 else
712 /* First take care of very large functions. */
713 int min_growth = estimate_min_edge_growth (e), growth = 0;
714 int n;
715 int early_inlining_insns = param_early_inlining_insns;
717 if (min_growth > early_inlining_insns)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
721 " will not early inline: %C->%C, "
722 "call is cold and code would grow "
723 "at least by %i\n",
724 e->caller, callee,
725 min_growth);
726 want_inline = false;
728 else
729 growth = estimate_edge_growth (e);
732 if (!want_inline || growth <= param_max_inline_insns_size)
734 else if (!e->maybe_hot_p ())
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
738 " will not early inline: %C->%C, "
739 "call is cold and code would grow by %i\n",
740 e->caller, callee,
741 growth);
742 want_inline = false;
744 else if (growth > early_inlining_insns)
746 if (dump_enabled_p ())
747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
748 " will not early inline: %C->%C, "
749 "growth %i exceeds --param early-inlining-insns\n",
750 e->caller, callee, growth);
751 want_inline = false;
753 else if ((n = num_calls (callee)) != 0
754 && growth * (n + 1) > early_inlining_insns)
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
758 " will not early inline: %C->%C, "
759 "growth %i exceeds --param early-inlining-insns "
760 "divided by number of calls\n",
761 e->caller, callee, growth);
762 want_inline = false;
765 return want_inline;
768 /* Compute time of the edge->caller + edge->callee execution when inlining
769 does not happen. */
771 inline sreal
772 compute_uninlined_call_time (struct cgraph_edge *edge,
773 sreal uninlined_call_time,
774 sreal freq)
776 cgraph_node *caller = (edge->caller->inlined_to
777 ? edge->caller->inlined_to
778 : edge->caller);
780 if (freq > 0)
781 uninlined_call_time *= freq;
782 else
783 uninlined_call_time = uninlined_call_time >> 11;
785 sreal caller_time = ipa_fn_summaries->get (caller)->time;
786 return uninlined_call_time + caller_time;
789 /* Same as compute_uinlined_call_time but compute time when inlining
790 does happen. */
792 inline sreal
793 compute_inlined_call_time (struct cgraph_edge *edge,
794 sreal time,
795 sreal freq)
797 cgraph_node *caller = (edge->caller->inlined_to
798 ? edge->caller->inlined_to
799 : edge->caller);
800 sreal caller_time = ipa_fn_summaries->get (caller)->time;
802 if (freq > 0)
803 time *= freq;
804 else
805 time = time >> 11;
807 /* This calculation should match one in ipa-inline-analysis.c
808 (estimate_edge_size_and_time). */
809 time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
810 time += caller_time;
811 if (time <= 0)
812 time = ((sreal) 1) >> 8;
813 gcc_checking_assert (time >= 0);
814 return time;
817 /* Determine time saved by inlining EDGE of frequency FREQ
818 where callee's runtime w/o inlining is UNINLINED_TYPE
819 and with inlined is INLINED_TYPE. */
821 inline sreal
822 inlining_speedup (struct cgraph_edge *edge,
823 sreal freq,
824 sreal uninlined_time,
825 sreal inlined_time)
827 sreal speedup = uninlined_time - inlined_time;
828 /* Handling of call_time should match one in ipa-inline-fnsummary.c
829 (estimate_edge_size_and_time). */
830 sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
832 if (freq > 0)
834 speedup = (speedup + call_time);
835 if (freq != 1)
836 speedup = speedup * freq;
838 else if (freq == 0)
839 speedup = speedup >> 11;
840 gcc_checking_assert (speedup >= 0);
841 return speedup;
844 /* Return true if the speedup for inlining E is bigger than
845 param_inline_min_speedup. */
847 static bool
848 big_speedup_p (struct cgraph_edge *e)
850 sreal unspec_time;
851 sreal spec_time = estimate_edge_time (e, &unspec_time);
852 sreal freq = e->sreal_frequency ();
853 sreal time = compute_uninlined_call_time (e, unspec_time, freq);
854 sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
855 cgraph_node *caller = (e->caller->inlined_to
856 ? e->caller->inlined_to
857 : e->caller);
858 int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
860 if ((time - inlined_time) * 100 > time * limit)
861 return true;
862 return false;
865 /* Return true if we are interested in inlining small function.
866 When REPORT is true, report reason to dump file. */
868 static bool
869 want_inline_small_function_p (struct cgraph_edge *e, bool report)
871 bool want_inline = true;
872 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
873 cgraph_node *to = (e->caller->inlined_to
874 ? e->caller->inlined_to : e->caller);
876 /* Allow this function to be called before can_inline_edge_p,
877 since it's usually cheaper. */
878 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
879 want_inline = false;
880 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
882 else if (!DECL_DECLARED_INLINE_P (callee->decl)
883 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
885 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
886 want_inline = false;
888 /* Do fast and conservative check if the function can be good
889 inline candidate. */
890 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
891 && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
892 && ipa_fn_summaries->get (callee)->min_size
893 - ipa_call_summaries->get (e)->call_stmt_size
894 > inline_insns_auto (e->caller, true, true))
896 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
897 want_inline = false;
899 else if ((DECL_DECLARED_INLINE_P (callee->decl)
900 || e->count.ipa ().nonzero_p ())
901 && ipa_fn_summaries->get (callee)->min_size
902 - ipa_call_summaries->get (e)->call_stmt_size
903 > inline_insns_single (e->caller, true, true))
905 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
906 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
907 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
908 want_inline = false;
910 else
912 int growth = estimate_edge_growth (e);
913 ipa_hints hints = estimate_edge_hints (e);
914 /* We have two independent groups of hints. If one matches in each
915 of groups the limits are inreased. If both groups matches, limit
916 is increased even more. */
917 bool apply_hints = (hints & (INLINE_HINT_indirect_call
918 | INLINE_HINT_known_hot
919 | INLINE_HINT_loop_iterations
920 | INLINE_HINT_loop_stride));
921 bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
923 if (growth <= opt_for_fn (to->decl,
924 param_max_inline_insns_size))
926 /* Apply param_max_inline_insns_single limit. Do not do so when
927 hints suggests that inlining given function is very profitable.
928 Avoid computation of big_speedup_p when not necessary to change
929 outcome of decision. */
930 else if (DECL_DECLARED_INLINE_P (callee->decl)
931 && growth >= inline_insns_single (e->caller, apply_hints,
932 apply_hints2)
933 && (apply_hints || apply_hints2
934 || growth >= inline_insns_single (e->caller, true,
935 apply_hints2)
936 || !big_speedup_p (e)))
938 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
939 want_inline = false;
941 else if (!DECL_DECLARED_INLINE_P (callee->decl)
942 && !opt_for_fn (e->caller->decl, flag_inline_functions)
943 && growth >= opt_for_fn (to->decl,
944 param_max_inline_insns_small))
946 /* growth_positive_p is expensive, always test it last. */
947 if (growth >= inline_insns_single (e->caller, false, false)
948 || growth_positive_p (callee, e, growth))
950 e->inline_failed = CIF_NOT_DECLARED_INLINED;
951 want_inline = false;
954 /* Apply param_max_inline_insns_auto limit for functions not declared
955 inline. Bypass the limit when speedup seems big. */
956 else if (!DECL_DECLARED_INLINE_P (callee->decl)
957 && growth >= inline_insns_auto (e->caller, apply_hints,
958 apply_hints2)
959 && (apply_hints || apply_hints2
960 || growth >= inline_insns_auto (e->caller, true,
961 apply_hints2)
962 || !big_speedup_p (e)))
964 /* growth_positive_p is expensive, always test it last. */
965 if (growth >= inline_insns_single (e->caller, false, false)
966 || growth_positive_p (callee, e, growth))
968 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
969 want_inline = false;
972 /* If call is cold, do not inline when function body would grow. */
973 else if (!e->maybe_hot_p ()
974 && (growth >= inline_insns_single (e->caller, false, false)
975 || growth_positive_p (callee, e, growth)))
977 e->inline_failed = CIF_UNLIKELY_CALL;
978 want_inline = false;
981 if (!want_inline && report)
982 report_inline_failed_reason (e);
983 return want_inline;
986 /* EDGE is self recursive edge.
987 We handle two cases - when function A is inlining into itself
988 or when function A is being inlined into another inliner copy of function
989 A within function B.
991 In first case OUTER_NODE points to the toplevel copy of A, while
992 in the second case OUTER_NODE points to the outermost copy of A in B.
994 In both cases we want to be extra selective since
995 inlining the call will just introduce new recursive calls to appear. */
997 static bool
998 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
999 struct cgraph_node *outer_node,
1000 bool peeling,
1001 int depth)
1003 char const *reason = NULL;
1004 bool want_inline = true;
1005 sreal caller_freq = 1;
1006 int max_depth = opt_for_fn (outer_node->decl,
1007 param_max_inline_recursive_depth_auto);
1009 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
1010 max_depth = opt_for_fn (outer_node->decl,
1011 param_max_inline_recursive_depth);
1013 if (!edge->maybe_hot_p ())
1015 reason = "recursive call is cold";
1016 want_inline = false;
1018 else if (depth > max_depth)
1020 reason = "--param max-inline-recursive-depth exceeded.";
1021 want_inline = false;
1023 else if (outer_node->inlined_to
1024 && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1026 reason = "caller frequency is 0";
1027 want_inline = false;
1030 if (!want_inline)
1032 /* Inlining of self recursive function into copy of itself within other
1033 function is transformation similar to loop peeling.
1035 Peeling is profitable if we can inline enough copies to make probability
1036 of actual call to the self recursive function very small. Be sure that
1037 the probability of recursion is small.
1039 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1040 This way the expected number of recursion is at most max_depth. */
1041 else if (peeling)
1043 sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1044 int i;
1045 for (i = 1; i < depth; i++)
1046 max_prob = max_prob * max_prob;
1047 if (edge->sreal_frequency () >= max_prob * caller_freq)
1049 reason = "frequency of recursive call is too large";
1050 want_inline = false;
1053 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1054 recursion depth is large. We reduce function call overhead and increase
1055 chances that things fit in hardware return predictor.
1057 Recursive inlining might however increase cost of stack frame setup
1058 actually slowing down functions whose recursion tree is wide rather than
1059 deep.
1061 Deciding reliably on when to do recursive inlining without profile feedback
1062 is tricky. For now we disable recursive inlining when probability of self
1063 recursion is low.
1065 Recursive inlining of self recursive call within loop also results in
1066 large loop depths that generally optimize badly. We may want to throttle
1067 down inlining in those cases. In particular this seems to happen in one
1068 of libstdc++ rb tree methods. */
1069 else
1071 if (edge->sreal_frequency () * 100
1072 <= caller_freq
1073 * opt_for_fn (outer_node->decl,
1074 param_min_inline_recursive_probability))
1076 reason = "frequency of recursive call is too small";
1077 want_inline = false;
1080 if (!want_inline && dump_enabled_p ())
1081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1082 " not inlining recursively: %s\n", reason);
1083 return want_inline;
1086 /* Return true when NODE has uninlinable caller;
1087 set HAS_HOT_CALL if it has hot call.
1088 Worker for cgraph_for_node_and_aliases. */
1090 static bool
1091 check_callers (struct cgraph_node *node, void *has_hot_call)
1093 struct cgraph_edge *e;
1094 for (e = node->callers; e; e = e->next_caller)
1096 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1097 || !opt_for_fn (e->caller->decl, optimize))
1098 return true;
1099 if (!can_inline_edge_p (e, true))
1100 return true;
1101 if (e->recursive_p ())
1102 return true;
1103 if (!can_inline_edge_by_limits_p (e, true))
1104 return true;
1105 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1106 *(bool *)has_hot_call = true;
1108 return false;
1111 /* If NODE has a caller, return true. */
1113 static bool
1114 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1116 if (node->callers)
1117 return true;
1118 return false;
1121 /* Decide if inlining NODE would reduce unit size by eliminating
1122 the offline copy of function.
1123 When COLD is true the cold calls are considered, too. */
1125 static bool
1126 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1128 bool has_hot_call = false;
1130 /* Aliases gets inlined along with the function they alias. */
1131 if (node->alias)
1132 return false;
1133 /* Already inlined? */
1134 if (node->inlined_to)
1135 return false;
1136 /* Does it have callers? */
1137 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1138 return false;
1139 /* Inlining into all callers would increase size? */
1140 if (growth_positive_p (node, NULL, INT_MIN) > 0)
1141 return false;
1142 /* All inlines must be possible. */
1143 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1144 true))
1145 return false;
1146 if (!cold && !has_hot_call)
1147 return false;
1148 return true;
1151 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1152 in estimate_edge_badness. */
1154 static bool
1155 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1157 return size < (DECL_DECLARED_INLINE_P (where->decl)
1158 ? inline_insns_single (where, false, false)
1159 : inline_insns_auto (where, false, false));
1162 /* A cost model driving the inlining heuristics in a way so the edges with
1163 smallest badness are inlined first. After each inlining is performed
1164 the costs of all caller edges of nodes affected are recomputed so the
1165 metrics may accurately depend on values such as number of inlinable callers
1166 of the function or function body size. */
1168 static sreal
1169 edge_badness (struct cgraph_edge *edge, bool dump)
1171 sreal badness;
1172 int growth;
1173 sreal edge_time, unspec_edge_time;
1174 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1175 class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1176 ipa_hints hints;
1177 cgraph_node *caller = (edge->caller->inlined_to
1178 ? edge->caller->inlined_to
1179 : edge->caller);
1181 growth = estimate_edge_growth (edge);
1182 edge_time = estimate_edge_time (edge, &unspec_edge_time);
1183 hints = estimate_edge_hints (edge);
1184 gcc_checking_assert (edge_time >= 0);
1185 /* Check that inlined time is better, but tolerate some roundoff issues.
1186 FIXME: When callee profile drops to 0 we account calls more. This
1187 should be fixed by never doing that. */
1188 gcc_checking_assert ((edge_time * 100
1189 - callee_info->time * 101).to_int () <= 0
1190 || callee->count.ipa ().initialized_p ());
1191 gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1193 if (dump)
1195 fprintf (dump_file, " Badness calculation for %s -> %s\n",
1196 edge->caller->dump_name (),
1197 edge->callee->dump_name ());
1198 fprintf (dump_file, " size growth %i, time %f unspec %f ",
1199 growth,
1200 edge_time.to_double (),
1201 unspec_edge_time.to_double ());
1202 ipa_dump_hints (dump_file, hints);
1203 if (big_speedup_p (edge))
1204 fprintf (dump_file, " big_speedup");
1205 fprintf (dump_file, "\n");
1208 /* Always prefer inlining saving code size. */
1209 if (growth <= 0)
1211 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1212 if (dump)
1213 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1214 growth);
1216 /* Inlining into EXTERNAL functions is not going to change anything unless
1217 they are themselves inlined. */
1218 else if (DECL_EXTERNAL (caller->decl))
1220 if (dump)
1221 fprintf (dump_file, " max: function is external\n");
1222 return sreal::max ();
1224 /* When profile is available. Compute badness as:
1226 time_saved * caller_count
1227 goodness = -------------------------------------------------
1228 growth_of_caller * overall_growth * combined_size
1230 badness = - goodness
1232 Again use negative value to make calls with profile appear hotter
1233 then calls without.
1235 else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1236 || caller->count.ipa ().nonzero_p ())
1238 sreal numerator, denominator;
1239 int overall_growth;
1240 sreal freq = edge->sreal_frequency ();
1242 numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1243 if (numerator <= 0)
1244 numerator = ((sreal) 1 >> 8);
1245 if (caller->count.ipa ().nonzero_p ())
1246 numerator *= caller->count.ipa ().to_gcov_type ();
1247 else if (caller->count.ipa ().initialized_p ())
1248 numerator = numerator >> 11;
1249 denominator = growth;
1251 overall_growth = callee_info->growth;
1253 /* Look for inliner wrappers of the form:
1255 inline_caller ()
1257 do_fast_job...
1258 if (need_more_work)
1259 noninline_callee ();
1261 Without penalizing this case, we usually inline noninline_callee
1262 into the inline_caller because overall_growth is small preventing
1263 further inlining of inline_caller.
1265 Penalize only callgraph edges to functions with small overall
1266 growth ...
1268 if (growth > overall_growth
1269 /* ... and having only one caller which is not inlined ... */
1270 && callee_info->single_caller
1271 && !edge->caller->inlined_to
1272 /* ... and edges executed only conditionally ... */
1273 && freq < 1
1274 /* ... consider case where callee is not inline but caller is ... */
1275 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1276 && DECL_DECLARED_INLINE_P (caller->decl))
1277 /* ... or when early optimizers decided to split and edge
1278 frequency still indicates splitting is a win ... */
1279 || (callee->split_part && !caller->split_part
1280 && freq * 100
1281 < opt_for_fn (caller->decl,
1282 param_partial_inlining_entry_probability)
1283 /* ... and do not overwrite user specified hints. */
1284 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1285 || DECL_DECLARED_INLINE_P (caller->decl)))))
1287 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1288 int caller_growth = caller_info->growth;
1290 /* Only apply the penalty when caller looks like inline candidate,
1291 and it is not called once. */
1292 if (!caller_info->single_caller && overall_growth < caller_growth
1293 && caller_info->inlinable
1294 && wrapper_heuristics_may_apply
1295 (caller, ipa_size_summaries->get (caller)->size))
1297 if (dump)
1298 fprintf (dump_file,
1299 " Wrapper penalty. Increasing growth %i to %i\n",
1300 overall_growth, caller_growth);
1301 overall_growth = caller_growth;
1304 if (overall_growth > 0)
1306 /* Strongly prefer functions with few callers that can be inlined
1307 fully. The square root here leads to smaller binaries at average.
1308 Watch however for extreme cases and return to linear function
1309 when growth is large. */
1310 if (overall_growth < 256)
1311 overall_growth *= overall_growth;
1312 else
1313 overall_growth += 256 * 256 - 256;
1314 denominator *= overall_growth;
1316 denominator *= ipa_size_summaries->get (caller)->size + growth;
1318 badness = - numerator / denominator;
1320 if (dump)
1322 fprintf (dump_file,
1323 " %f: guessed profile. frequency %f, count %" PRId64
1324 " caller count %" PRId64
1325 " time saved %f"
1326 " overall growth %i (current) %i (original)"
1327 " %i (compensated)\n",
1328 badness.to_double (),
1329 freq.to_double (),
1330 edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1331 caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1332 inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1333 estimate_growth (callee),
1334 callee_info->growth, overall_growth);
1337 /* When function local profile is not available or it does not give
1338 useful information (i.e. frequency is zero), base the cost on
1339 loop nest and overall size growth, so we optimize for overall number
1340 of functions fully inlined in program. */
1341 else
1343 int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1344 badness = growth;
1346 /* Decrease badness if call is nested. */
1347 if (badness > 0)
1348 badness = badness >> nest;
1349 else
1350 badness = badness << nest;
1351 if (dump)
1352 fprintf (dump_file, " %f: no profile. nest %i\n",
1353 badness.to_double (), nest);
1355 gcc_checking_assert (badness != 0);
1357 if (edge->recursive_p ())
1358 badness = badness.shift (badness > 0 ? 4 : -4);
1359 if ((hints & (INLINE_HINT_indirect_call
1360 | INLINE_HINT_loop_iterations
1361 | INLINE_HINT_loop_stride))
1362 || callee_info->growth <= 0)
1363 badness = badness.shift (badness > 0 ? -2 : 2);
1364 if (hints & INLINE_HINT_builtin_constant_p)
1365 badness = badness.shift (badness > 0 ? -4 : 4);
1366 if (hints & (INLINE_HINT_same_scc))
1367 badness = badness.shift (badness > 0 ? 3 : -3);
1368 else if (hints & (INLINE_HINT_in_scc))
1369 badness = badness.shift (badness > 0 ? 2 : -2);
1370 else if (hints & (INLINE_HINT_cross_module))
1371 badness = badness.shift (badness > 0 ? 1 : -1);
1372 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1373 badness = badness.shift (badness > 0 ? -4 : 4);
1374 else if ((hints & INLINE_HINT_declared_inline))
1375 badness = badness.shift (badness > 0 ? -3 : 3);
1376 if (dump)
1377 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1378 return badness;
1381 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1382 static inline void
1383 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1385 sreal badness = edge_badness (edge, false);
1386 if (edge->aux)
1388 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1389 gcc_checking_assert (n->get_data () == edge);
1391 /* fibonacci_heap::replace_key does busy updating of the
1392 heap that is unnecessarily expensive.
1393 We do lazy increases: after extracting minimum if the key
1394 turns out to be out of date, it is re-inserted into heap
1395 with correct value. */
1396 if (badness < n->get_key ())
1398 if (dump_file && (dump_flags & TDF_DETAILS))
1400 fprintf (dump_file,
1401 " decreasing badness %s -> %s, %f to %f\n",
1402 edge->caller->dump_name (),
1403 edge->callee->dump_name (),
1404 n->get_key ().to_double (),
1405 badness.to_double ());
1407 heap->decrease_key (n, badness);
1410 else
1412 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file,
1415 " enqueuing call %s -> %s, badness %f\n",
1416 edge->caller->dump_name (),
1417 edge->callee->dump_name (),
1418 badness.to_double ());
1420 edge->aux = heap->insert (badness, edge);
1425 /* NODE was inlined.
1426 All caller edges needs to be reset because
1427 size estimates change. Similarly callees needs reset
1428 because better context may be known. */
1430 static void
1431 reset_edge_caches (struct cgraph_node *node)
1433 struct cgraph_edge *edge;
1434 struct cgraph_edge *e = node->callees;
1435 struct cgraph_node *where = node;
1436 struct ipa_ref *ref;
1438 if (where->inlined_to)
1439 where = where->inlined_to;
1441 reset_node_cache (where);
1443 if (edge_growth_cache != NULL)
1444 for (edge = where->callers; edge; edge = edge->next_caller)
1445 if (edge->inline_failed)
1446 edge_growth_cache->remove (edge);
1448 FOR_EACH_ALIAS (where, ref)
1449 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1451 if (!e)
1452 return;
1454 while (true)
1455 if (!e->inline_failed && e->callee->callees)
1456 e = e->callee->callees;
1457 else
1459 if (edge_growth_cache != NULL && e->inline_failed)
1460 edge_growth_cache->remove (e);
1461 if (e->next_callee)
1462 e = e->next_callee;
1463 else
1467 if (e->caller == node)
1468 return;
1469 e = e->caller->callers;
1471 while (!e->next_callee);
1472 e = e->next_callee;
1477 /* Recompute HEAP nodes for each of caller of NODE.
1478 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1479 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1480 it is inlinable. Otherwise check all edges. */
1482 static void
1483 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1484 bitmap updated_nodes,
1485 struct cgraph_edge *check_inlinablity_for)
1487 struct cgraph_edge *edge;
1488 struct ipa_ref *ref;
1490 if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1491 || node->inlined_to)
1492 return;
1493 if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1494 return;
1496 FOR_EACH_ALIAS (node, ref)
1498 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1499 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1502 for (edge = node->callers; edge; edge = edge->next_caller)
1503 if (edge->inline_failed)
1505 if (!check_inlinablity_for
1506 || check_inlinablity_for == edge)
1508 if (can_inline_edge_p (edge, false)
1509 && want_inline_small_function_p (edge, false)
1510 && can_inline_edge_by_limits_p (edge, false))
1511 update_edge_key (heap, edge);
1512 else if (edge->aux)
1514 report_inline_failed_reason (edge);
1515 heap->delete_node ((edge_heap_node_t *) edge->aux);
1516 edge->aux = NULL;
1519 else if (edge->aux)
1520 update_edge_key (heap, edge);
1524 /* Recompute HEAP nodes for each uninlined call in NODE
1525 If UPDATE_SINCE is non-NULL check if edges called within that function
1526 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1527 where all edges have new context).
1529 This is used when we know that edge badnesses are going only to increase
1530 (we introduced new call site) and thus all we need is to insert newly
1531 created edges into heap. */
1533 static void
1534 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1535 struct cgraph_node *update_since,
1536 bitmap updated_nodes)
1538 struct cgraph_edge *e = node->callees;
1539 bool check_inlinability = update_since == node;
1541 if (!e)
1542 return;
1543 while (true)
1544 if (!e->inline_failed && e->callee->callees)
1546 if (e->callee == update_since)
1547 check_inlinability = true;
1548 e = e->callee->callees;
1550 else
1552 enum availability avail;
1553 struct cgraph_node *callee;
1554 if (!check_inlinability)
1556 if (e->aux
1557 && !bitmap_bit_p (updated_nodes,
1558 e->callee->ultimate_alias_target
1559 (&avail, e->caller)->get_uid ()))
1560 update_edge_key (heap, e);
1562 /* We do not reset callee growth cache here. Since we added a new call,
1563 growth should have just increased and consequently badness metric
1564 don't need updating. */
1565 else if (e->inline_failed
1566 && (callee = e->callee->ultimate_alias_target (&avail,
1567 e->caller))
1568 && avail >= AVAIL_AVAILABLE
1569 && ipa_fn_summaries->get (callee) != NULL
1570 && ipa_fn_summaries->get (callee)->inlinable
1571 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1573 if (can_inline_edge_p (e, false)
1574 && want_inline_small_function_p (e, false)
1575 && can_inline_edge_by_limits_p (e, false))
1577 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1578 gcc_checking_assert (check_inlinability || e->aux);
1579 update_edge_key (heap, e);
1581 else if (e->aux)
1583 report_inline_failed_reason (e);
1584 heap->delete_node ((edge_heap_node_t *) e->aux);
1585 e->aux = NULL;
1588 /* In case we redirected to unreachable node we only need to remove the
1589 fibheap entry. */
1590 else if (e->aux)
1592 heap->delete_node ((edge_heap_node_t *) e->aux);
1593 e->aux = NULL;
1595 if (e->next_callee)
1596 e = e->next_callee;
1597 else
1601 if (e->caller == node)
1602 return;
1603 if (e->caller == update_since)
1604 check_inlinability = false;
1605 e = e->caller->callers;
1607 while (!e->next_callee);
1608 e = e->next_callee;
1613 /* Enqueue all recursive calls from NODE into priority queue depending on
1614 how likely we want to recursively inline the call. */
1616 static void
1617 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1618 edge_heap_t *heap)
1620 struct cgraph_edge *e;
1621 enum availability avail;
1623 for (e = where->callees; e; e = e->next_callee)
1624 if (e->callee == node
1625 || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1626 && avail > AVAIL_INTERPOSABLE))
1627 heap->insert (-e->sreal_frequency (), e);
1628 for (e = where->callees; e; e = e->next_callee)
1629 if (!e->inline_failed)
1630 lookup_recursive_calls (node, e->callee, heap);
1633 /* Decide on recursive inlining: in the case function has recursive calls,
1634 inline until body size reaches given argument. If any new indirect edges
1635 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1636 is NULL. */
1638 static bool
1639 recursive_inlining (struct cgraph_edge *edge,
1640 vec<cgraph_edge *> *new_edges)
1642 cgraph_node *to = (edge->caller->inlined_to
1643 ? edge->caller->inlined_to : edge->caller);
1644 int limit = opt_for_fn (to->decl,
1645 param_max_inline_insns_recursive_auto);
1646 edge_heap_t heap (sreal::min ());
1647 struct cgraph_node *node;
1648 struct cgraph_edge *e;
1649 struct cgraph_node *master_clone = NULL, *next;
1650 int depth = 0;
1651 int n = 0;
1653 node = edge->caller;
1654 if (node->inlined_to)
1655 node = node->inlined_to;
1657 if (DECL_DECLARED_INLINE_P (node->decl))
1658 limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1660 /* Make sure that function is small enough to be considered for inlining. */
1661 if (estimate_size_after_inlining (node, edge) >= limit)
1662 return false;
1663 lookup_recursive_calls (node, node, &heap);
1664 if (heap.empty ())
1665 return false;
1667 if (dump_file)
1668 fprintf (dump_file,
1669 " Performing recursive inlining on %s\n", node->dump_name ());
1671 /* Do the inlining and update list of recursive call during process. */
1672 while (!heap.empty ())
1674 struct cgraph_edge *curr = heap.extract_min ();
1675 struct cgraph_node *cnode, *dest = curr->callee;
1677 if (!can_inline_edge_p (curr, true)
1678 || !can_inline_edge_by_limits_p (curr, true))
1679 continue;
1681 /* MASTER_CLONE is produced in the case we already started modified
1682 the function. Be sure to redirect edge to the original body before
1683 estimating growths otherwise we will be seeing growths after inlining
1684 the already modified body. */
1685 if (master_clone)
1687 curr->redirect_callee (master_clone);
1688 if (edge_growth_cache != NULL)
1689 edge_growth_cache->remove (curr);
1692 if (estimate_size_after_inlining (node, curr) > limit)
1694 curr->redirect_callee (dest);
1695 if (edge_growth_cache != NULL)
1696 edge_growth_cache->remove (curr);
1697 break;
1700 depth = 1;
1701 for (cnode = curr->caller;
1702 cnode->inlined_to; cnode = cnode->callers->caller)
1703 if (node->decl
1704 == curr->callee->ultimate_alias_target ()->decl)
1705 depth++;
1707 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1709 curr->redirect_callee (dest);
1710 if (edge_growth_cache != NULL)
1711 edge_growth_cache->remove (curr);
1712 continue;
1715 if (dump_file)
1717 fprintf (dump_file,
1718 " Inlining call of depth %i", depth);
1719 if (node->count.nonzero_p () && curr->count.initialized_p ())
1721 fprintf (dump_file, " called approx. %.2f times per call",
1722 (double)curr->count.to_gcov_type ()
1723 / node->count.to_gcov_type ());
1725 fprintf (dump_file, "\n");
1727 if (!master_clone)
1729 /* We need original clone to copy around. */
1730 master_clone = node->create_clone (node->decl, node->count,
1731 false, vNULL, true, NULL, NULL);
1732 for (e = master_clone->callees; e; e = e->next_callee)
1733 if (!e->inline_failed)
1734 clone_inlined_nodes (e, true, false, NULL);
1735 curr->redirect_callee (master_clone);
1736 if (edge_growth_cache != NULL)
1737 edge_growth_cache->remove (curr);
1740 inline_call (curr, false, new_edges, &overall_size, true);
1741 reset_node_cache (node);
1742 lookup_recursive_calls (node, curr->callee, &heap);
1743 n++;
1746 if (!heap.empty () && dump_file)
1747 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1749 if (!master_clone)
1750 return false;
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_NOTE, edge->call_stmt,
1754 "\n Inlined %i times, "
1755 "body grown from size %i to %i, time %f to %f\n", n,
1756 ipa_size_summaries->get (master_clone)->size,
1757 ipa_size_summaries->get (node)->size,
1758 ipa_fn_summaries->get (master_clone)->time.to_double (),
1759 ipa_fn_summaries->get (node)->time.to_double ());
1761 /* Remove master clone we used for inlining. We rely that clones inlined
1762 into master clone gets queued just before master clone so we don't
1763 need recursion. */
1764 for (node = symtab->first_function (); node != master_clone;
1765 node = next)
1767 next = symtab->next_function (node);
1768 if (node->inlined_to == master_clone)
1769 node->remove ();
1771 master_clone->remove ();
1772 return true;
1776 /* Given whole compilation unit estimate of INSNS, compute how large we can
1777 allow the unit to grow. */
1779 static int64_t
1780 compute_max_insns (cgraph_node *node, int insns)
1782 int max_insns = insns;
1783 if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1784 max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1786 return ((int64_t) max_insns
1787 * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1791 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1793 static void
1794 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1796 while (new_edges.length () > 0)
1798 struct cgraph_edge *edge = new_edges.pop ();
1800 gcc_assert (!edge->aux);
1801 gcc_assert (edge->callee);
1802 if (edge->inline_failed
1803 && can_inline_edge_p (edge, true)
1804 && want_inline_small_function_p (edge, true)
1805 && can_inline_edge_by_limits_p (edge, true))
1806 edge->aux = heap->insert (edge_badness (edge, false), edge);
1810 /* Remove EDGE from the fibheap. */
1812 static void
1813 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1815 if (e->aux)
1817 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1818 e->aux = NULL;
1822 /* Return true if speculation of edge E seems useful.
1823 If ANTICIPATE_INLINING is true, be conservative and hope that E
1824 may get inlined. */
1826 bool
1827 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1829 /* If we have already decided to inline the edge, it seems useful. */
1830 if (!e->inline_failed)
1831 return true;
1833 enum availability avail;
1834 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1835 e->caller);
1837 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1839 if (!e->maybe_hot_p ())
1840 return false;
1842 /* See if IP optimizations found something potentially useful about the
1843 function. For now we look only for CONST/PURE flags. Almost everything
1844 else we propagate is useless. */
1845 if (avail >= AVAIL_AVAILABLE)
1847 int ecf_flags = flags_from_decl_or_type (target->decl);
1848 if (ecf_flags & ECF_CONST)
1850 if (!(e->speculative_call_indirect_edge ()->indirect_info
1851 ->ecf_flags & ECF_CONST))
1852 return true;
1854 else if (ecf_flags & ECF_PURE)
1856 if (!(e->speculative_call_indirect_edge ()->indirect_info
1857 ->ecf_flags & ECF_PURE))
1858 return true;
1861 /* If we did not managed to inline the function nor redirect
1862 to an ipa-cp clone (that are seen by having local flag set),
1863 it is probably pointless to inline it unless hardware is missing
1864 indirect call predictor. */
1865 if (!anticipate_inlining && !target->local)
1866 return false;
1867 /* For overwritable targets there is not much to do. */
1868 if (!can_inline_edge_p (e, false)
1869 || !can_inline_edge_by_limits_p (e, false, true))
1870 return false;
1871 /* OK, speculation seems interesting. */
1872 return true;
1875 /* We know that EDGE is not going to be inlined.
1876 See if we can remove speculation. */
1878 static void
1879 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1881 if (edge->speculative && !speculation_useful_p (edge, false))
1883 struct cgraph_node *node = edge->caller;
1884 struct cgraph_node *where = node->inlined_to
1885 ? node->inlined_to : node;
1886 auto_bitmap updated_nodes;
1888 if (edge->count.ipa ().initialized_p ())
1889 spec_rem += edge->count.ipa ();
1890 cgraph_edge::resolve_speculation (edge);
1891 reset_edge_caches (where);
1892 ipa_update_overall_fn_summary (where);
1893 update_caller_keys (edge_heap, where,
1894 updated_nodes, NULL);
1895 update_callee_keys (edge_heap, where, NULL,
1896 updated_nodes);
1900 /* Return true if NODE should be accounted for overall size estimate.
1901 Skip all nodes optimized for size so we can measure the growth of hot
1902 part of program no matter of the padding. */
1904 bool
1905 inline_account_function_p (struct cgraph_node *node)
1907 return (!DECL_EXTERNAL (node->decl)
1908 && !opt_for_fn (node->decl, optimize_size)
1909 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1912 /* Count number of callers of NODE and store it into DATA (that
1913 points to int. Worker for cgraph_for_node_and_aliases. */
1915 static bool
1916 sum_callers (struct cgraph_node *node, void *data)
1918 struct cgraph_edge *e;
1919 int *num_calls = (int *)data;
1921 for (e = node->callers; e; e = e->next_caller)
1922 (*num_calls)++;
1923 return false;
1926 /* We only propagate across edges with non-interposable callee. */
1928 inline bool
1929 ignore_edge_p (struct cgraph_edge *e)
1931 enum availability avail;
1932 e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1933 return (avail <= AVAIL_INTERPOSABLE);
1936 /* We use greedy algorithm for inlining of small functions:
1937 All inline candidates are put into prioritized heap ordered in
1938 increasing badness.
1940 The inlining of small functions is bounded by unit growth parameters. */
1942 static void
1943 inline_small_functions (void)
1945 struct cgraph_node *node;
1946 struct cgraph_edge *edge;
1947 edge_heap_t edge_heap (sreal::min ());
1948 auto_bitmap updated_nodes;
1949 int min_size;
1950 auto_vec<cgraph_edge *> new_indirect_edges;
1951 int initial_size = 0;
1952 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1953 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1954 new_indirect_edges.create (8);
1956 edge_removal_hook_holder
1957 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1959 /* Compute overall unit size and other global parameters used by badness
1960 metrics. */
1962 max_count = profile_count::uninitialized ();
1963 ipa_reduced_postorder (order, true, ignore_edge_p);
1964 free (order);
1966 FOR_EACH_DEFINED_FUNCTION (node)
1967 if (!node->inlined_to)
1969 if (!node->alias && node->analyzed
1970 && (node->has_gimple_body_p () || node->thunk)
1971 && opt_for_fn (node->decl, optimize))
1973 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1974 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1976 /* Do not account external functions, they will be optimized out
1977 if not inlined. Also only count the non-cold portion of program. */
1978 if (inline_account_function_p (node))
1979 initial_size += ipa_size_summaries->get (node)->size;
1980 info->growth = estimate_growth (node);
1982 int num_calls = 0;
1983 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1984 true);
1985 if (num_calls == 1)
1986 info->single_caller = true;
1987 if (dfs && dfs->next_cycle)
1989 struct cgraph_node *n2;
1990 int id = dfs->scc_no + 1;
1991 for (n2 = node; n2;
1992 n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1993 if (opt_for_fn (n2->decl, optimize))
1995 ipa_fn_summary *info2 = ipa_fn_summaries->get
1996 (n2->inlined_to ? n2->inlined_to : n2);
1997 if (info2->scc_no)
1998 break;
1999 info2->scc_no = id;
2004 for (edge = node->callers; edge; edge = edge->next_caller)
2005 max_count = max_count.max (edge->count.ipa ());
2007 ipa_free_postorder_info ();
2008 initialize_growth_caches ();
2010 if (dump_file)
2011 fprintf (dump_file,
2012 "\nDeciding on inlining of small functions. Starting with size %i.\n",
2013 initial_size);
2015 overall_size = initial_size;
2016 min_size = overall_size;
2018 /* Populate the heap with all edges we might inline. */
2020 FOR_EACH_DEFINED_FUNCTION (node)
2022 bool update = false;
2023 struct cgraph_edge *next = NULL;
2024 bool has_speculative = false;
2026 if (!opt_for_fn (node->decl, optimize))
2027 continue;
2029 if (dump_file)
2030 fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2032 for (edge = node->callees; edge; edge = edge->next_callee)
2034 if (edge->inline_failed
2035 && !edge->aux
2036 && can_inline_edge_p (edge, true)
2037 && want_inline_small_function_p (edge, true)
2038 && can_inline_edge_by_limits_p (edge, true)
2039 && edge->inline_failed)
2041 gcc_assert (!edge->aux);
2042 update_edge_key (&edge_heap, edge);
2044 if (edge->speculative)
2045 has_speculative = true;
2047 if (has_speculative)
2048 for (edge = node->callees; edge; edge = next)
2050 next = edge->next_callee;
2051 if (edge->speculative
2052 && !speculation_useful_p (edge, edge->aux != NULL))
2054 cgraph_edge::resolve_speculation (edge);
2055 update = true;
2058 if (update)
2060 struct cgraph_node *where = node->inlined_to
2061 ? node->inlined_to : node;
2062 ipa_update_overall_fn_summary (where);
2063 reset_edge_caches (where);
2064 update_caller_keys (&edge_heap, where,
2065 updated_nodes, NULL);
2066 update_callee_keys (&edge_heap, where, NULL,
2067 updated_nodes);
2068 bitmap_clear (updated_nodes);
2072 gcc_assert (in_lto_p
2073 || !(max_count > 0)
2074 || (profile_info && flag_branch_probabilities));
2076 while (!edge_heap.empty ())
2078 int old_size = overall_size;
2079 struct cgraph_node *where, *callee;
2080 sreal badness = edge_heap.min_key ();
2081 sreal current_badness;
2082 int growth;
2084 edge = edge_heap.extract_min ();
2085 gcc_assert (edge->aux);
2086 edge->aux = NULL;
2087 if (!edge->inline_failed || !edge->callee->analyzed)
2088 continue;
2090 /* Be sure that caches are maintained consistent.
2091 This check is affected by scaling roundoff errors when compiling for
2092 IPA this we skip it in that case. */
2093 if (flag_checking && !edge->callee->count.ipa_p ()
2094 && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2096 sreal cached_badness = edge_badness (edge, false);
2098 int old_size_est = estimate_edge_size (edge);
2099 sreal old_time_est = estimate_edge_time (edge);
2100 int old_hints_est = estimate_edge_hints (edge);
2102 if (edge_growth_cache != NULL)
2103 edge_growth_cache->remove (edge);
2104 reset_node_cache (edge->caller->inlined_to
2105 ? edge->caller->inlined_to
2106 : edge->caller);
2107 gcc_assert (old_size_est == estimate_edge_size (edge));
2108 gcc_assert (old_time_est == estimate_edge_time (edge));
2109 /* FIXME:
2111 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2113 fails with profile feedback because some hints depends on
2114 maybe_hot_edge_p predicate and because callee gets inlined to other
2115 calls, the edge may become cold.
2116 This ought to be fixed by computing relative probabilities
2117 for given invocation but that will be better done once whole
2118 code is converted to sreals. Disable for now and revert to "wrong"
2119 value so enable/disable checking paths agree. */
2120 edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2122 /* When updating the edge costs, we only decrease badness in the keys.
2123 Increases of badness are handled lazily; when we see key with out
2124 of date value on it, we re-insert it now. */
2125 current_badness = edge_badness (edge, false);
2126 gcc_assert (cached_badness == current_badness);
2127 gcc_assert (current_badness >= badness);
2129 else
2130 current_badness = edge_badness (edge, false);
2131 if (current_badness != badness)
2133 if (edge_heap.min () && current_badness > edge_heap.min_key ())
2135 edge->aux = edge_heap.insert (current_badness, edge);
2136 continue;
2138 else
2139 badness = current_badness;
2142 if (!can_inline_edge_p (edge, true)
2143 || !can_inline_edge_by_limits_p (edge, true))
2145 resolve_noninline_speculation (&edge_heap, edge);
2146 continue;
2149 callee = edge->callee->ultimate_alias_target ();
2150 growth = estimate_edge_growth (edge);
2151 if (dump_file)
2153 fprintf (dump_file,
2154 "\nConsidering %s with %i size\n",
2155 callee->dump_name (),
2156 ipa_size_summaries->get (callee)->size);
2157 fprintf (dump_file,
2158 " to be inlined into %s in %s:%i\n"
2159 " Estimated badness is %f, frequency %.2f.\n",
2160 edge->caller->dump_name (),
2161 edge->call_stmt
2162 && (LOCATION_LOCUS (gimple_location ((const gimple *)
2163 edge->call_stmt))
2164 > BUILTINS_LOCATION)
2165 ? gimple_filename ((const gimple *) edge->call_stmt)
2166 : "unknown",
2167 edge->call_stmt
2168 ? gimple_lineno ((const gimple *) edge->call_stmt)
2169 : -1,
2170 badness.to_double (),
2171 edge->sreal_frequency ().to_double ());
2172 if (edge->count.ipa ().initialized_p ())
2174 fprintf (dump_file, " Called ");
2175 edge->count.ipa ().dump (dump_file);
2176 fprintf (dump_file, " times\n");
2178 if (dump_flags & TDF_DETAILS)
2179 edge_badness (edge, true);
2182 where = edge->caller;
2184 if (overall_size + growth > compute_max_insns (where, min_size)
2185 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2187 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2188 report_inline_failed_reason (edge);
2189 resolve_noninline_speculation (&edge_heap, edge);
2190 continue;
2193 if (!want_inline_small_function_p (edge, true))
2195 resolve_noninline_speculation (&edge_heap, edge);
2196 continue;
2199 profile_count old_count = callee->count;
2201 /* Heuristics for inlining small functions work poorly for
2202 recursive calls where we do effects similar to loop unrolling.
2203 When inlining such edge seems profitable, leave decision on
2204 specific inliner. */
2205 if (edge->recursive_p ())
2207 if (where->inlined_to)
2208 where = where->inlined_to;
2209 if (!recursive_inlining (edge,
2210 opt_for_fn (edge->caller->decl,
2211 flag_indirect_inlining)
2212 ? &new_indirect_edges : NULL))
2214 edge->inline_failed = CIF_RECURSIVE_INLINING;
2215 resolve_noninline_speculation (&edge_heap, edge);
2216 continue;
2218 reset_edge_caches (where);
2219 /* Recursive inliner inlines all recursive calls of the function
2220 at once. Consequently we need to update all callee keys. */
2221 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2222 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2223 update_callee_keys (&edge_heap, where, where, updated_nodes);
2224 bitmap_clear (updated_nodes);
2226 else
2228 struct cgraph_node *outer_node = NULL;
2229 int depth = 0;
2231 /* Consider the case where self recursive function A is inlined
2232 into B. This is desired optimization in some cases, since it
2233 leads to effect similar of loop peeling and we might completely
2234 optimize out the recursive call. However we must be extra
2235 selective. */
2237 where = edge->caller;
2238 while (where->inlined_to)
2240 if (where->decl == callee->decl)
2241 outer_node = where, depth++;
2242 where = where->callers->caller;
2244 if (outer_node
2245 && !want_inline_self_recursive_call_p (edge, outer_node,
2246 true, depth))
2248 edge->inline_failed
2249 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2250 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2251 resolve_noninline_speculation (&edge_heap, edge);
2252 continue;
2254 else if (depth && dump_file)
2255 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2257 gcc_checking_assert (!callee->inlined_to);
2259 int old_size = ipa_size_summaries->get (where)->size;
2260 sreal old_time = ipa_fn_summaries->get (where)->time;
2262 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2263 reset_edge_caches (edge->callee);
2264 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2266 /* If caller's size and time increased we do not need to update
2267 all edges because badness is not going to decrease. */
2268 if (old_size <= ipa_size_summaries->get (where)->size
2269 && old_time <= ipa_fn_summaries->get (where)->time
2270 /* Wrapper penalty may be non-monotonous in this respect.
2271 Fortunately it only affects small functions. */
2272 && !wrapper_heuristics_may_apply (where, old_size))
2273 update_callee_keys (&edge_heap, edge->callee, edge->callee,
2274 updated_nodes);
2275 else
2276 update_callee_keys (&edge_heap, where,
2277 edge->callee,
2278 updated_nodes);
2280 where = edge->caller;
2281 if (where->inlined_to)
2282 where = where->inlined_to;
2284 /* Our profitability metric can depend on local properties
2285 such as number of inlinable calls and size of the function body.
2286 After inlining these properties might change for the function we
2287 inlined into (since it's body size changed) and for the functions
2288 called by function we inlined (since number of it inlinable callers
2289 might change). */
2290 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2291 /* Offline copy count has possibly changed, recompute if profile is
2292 available. */
2293 struct cgraph_node *n
2294 = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2295 if (n != edge->callee && n->analyzed && !(n->count == old_count)
2296 && n->count.ipa_p ())
2297 update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2298 bitmap_clear (updated_nodes);
2300 if (dump_enabled_p ())
2302 ipa_fn_summary *s = ipa_fn_summaries->get (where);
2304 /* dump_printf can't handle %+i. */
2305 char buf_net_change[100];
2306 snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2307 overall_size - old_size);
2309 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2310 " Inlined %C into %C which now has time %f and "
2311 "size %i, net change of %s%s.\n",
2312 edge->callee, edge->caller,
2313 s->time.to_double (),
2314 ipa_size_summaries->get (edge->caller)->size,
2315 buf_net_change,
2316 cross_module_call_p (edge) ? " (cross module)":"");
2318 if (min_size > overall_size)
2320 min_size = overall_size;
2322 if (dump_file)
2323 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2327 free_growth_caches ();
2328 if (dump_enabled_p ())
2329 dump_printf (MSG_NOTE,
2330 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2331 initial_size, overall_size,
2332 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2333 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2336 /* Flatten NODE. Performed both during early inlining and
2337 at IPA inlining time. */
2339 static void
2340 flatten_function (struct cgraph_node *node, bool early, bool update)
2342 struct cgraph_edge *e;
2344 /* We shouldn't be called recursively when we are being processed. */
2345 gcc_assert (node->aux == NULL);
2347 node->aux = (void *) node;
2349 for (e = node->callees; e; e = e->next_callee)
2351 struct cgraph_node *orig_callee;
2352 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2354 /* We've hit cycle? It is time to give up. */
2355 if (callee->aux)
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2359 "Not inlining %C into %C to avoid cycle.\n",
2360 callee, e->caller);
2361 if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2362 e->inline_failed = CIF_RECURSIVE_INLINING;
2363 continue;
2366 /* When the edge is already inlined, we just need to recurse into
2367 it in order to fully flatten the leaves. */
2368 if (!e->inline_failed)
2370 flatten_function (callee, early, false);
2371 continue;
2374 /* Flatten attribute needs to be processed during late inlining. For
2375 extra code quality we however do flattening during early optimization,
2376 too. */
2377 if (!early
2378 ? !can_inline_edge_p (e, true)
2379 && !can_inline_edge_by_limits_p (e, true)
2380 : !can_early_inline_edge_p (e))
2381 continue;
2383 if (e->recursive_p ())
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2387 "Not inlining: recursive call.\n");
2388 continue;
2391 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2392 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2396 "Not inlining: SSA form does not match.\n");
2397 continue;
2400 /* Inline the edge and flatten the inline clone. Avoid
2401 recursing through the original node if the node was cloned. */
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2404 " Inlining %C into %C.\n",
2405 callee, e->caller);
2406 orig_callee = callee;
2407 inline_call (e, true, NULL, NULL, false);
2408 if (e->callee != orig_callee)
2409 orig_callee->aux = (void *) node;
2410 flatten_function (e->callee, early, false);
2411 if (e->callee != orig_callee)
2412 orig_callee->aux = NULL;
2415 node->aux = NULL;
2416 cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2417 if (update && opt_for_fn (where->decl, optimize))
2418 ipa_update_overall_fn_summary (where);
2421 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2422 DATA points to number of calls originally found so we avoid infinite
2423 recursion. */
2425 static bool
2426 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2427 hash_set<cgraph_node *> *callers)
2429 int *num_calls = (int *)data;
2430 bool callee_removed = false;
2432 while (node->callers && !node->inlined_to)
2434 struct cgraph_node *caller = node->callers->caller;
2436 if (!can_inline_edge_p (node->callers, true)
2437 || !can_inline_edge_by_limits_p (node->callers, true)
2438 || node->callers->recursive_p ())
2440 if (dump_file)
2441 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2442 *num_calls = 0;
2443 return false;
2446 if (dump_file)
2448 cgraph_node *ultimate = node->ultimate_alias_target ();
2449 fprintf (dump_file,
2450 "\nInlining %s size %i.\n",
2451 ultimate->dump_name (),
2452 ipa_size_summaries->get (ultimate)->size);
2453 fprintf (dump_file,
2454 " Called once from %s %i insns.\n",
2455 node->callers->caller->dump_name (),
2456 ipa_size_summaries->get (node->callers->caller)->size);
2459 /* Remember which callers we inlined to, delaying updating the
2460 overall summary. */
2461 callers->add (node->callers->caller);
2462 inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2463 if (dump_file)
2464 fprintf (dump_file,
2465 " Inlined into %s which now has %i size\n",
2466 caller->dump_name (),
2467 ipa_size_summaries->get (caller)->size);
2468 if (!(*num_calls)--)
2470 if (dump_file)
2471 fprintf (dump_file, "New calls found; giving up.\n");
2472 return callee_removed;
2474 if (callee_removed)
2475 return true;
2477 return false;
2480 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2481 update. */
2483 static bool
2484 inline_to_all_callers (struct cgraph_node *node, void *data)
2486 hash_set<cgraph_node *> callers;
2487 bool res = inline_to_all_callers_1 (node, data, &callers);
2488 /* Perform the delayed update of the overall summary of all callers
2489 processed. This avoids quadratic behavior in the cases where
2490 we have a lot of calls to the same function. */
2491 for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2492 i != callers.end (); ++i)
2493 ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2494 return res;
2497 /* Output overall time estimate. */
2498 static void
2499 dump_overall_stats (void)
2501 sreal sum_weighted = 0, sum = 0;
2502 struct cgraph_node *node;
2504 FOR_EACH_DEFINED_FUNCTION (node)
2505 if (!node->inlined_to
2506 && !node->alias)
2508 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2509 if (s != NULL)
2511 sum += s->time;
2512 if (node->count.ipa ().initialized_p ())
2513 sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2516 fprintf (dump_file, "Overall time estimate: "
2517 "%f weighted by profile: "
2518 "%f\n", sum.to_double (), sum_weighted.to_double ());
2521 /* Output some useful stats about inlining. */
2523 static void
2524 dump_inline_stats (void)
2526 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2527 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2528 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2529 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2530 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2531 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2532 int64_t reason[CIF_N_REASONS][2];
2533 sreal reason_freq[CIF_N_REASONS];
2534 int i;
2535 struct cgraph_node *node;
2537 memset (reason, 0, sizeof (reason));
2538 for (i=0; i < CIF_N_REASONS; i++)
2539 reason_freq[i] = 0;
2540 FOR_EACH_DEFINED_FUNCTION (node)
2542 struct cgraph_edge *e;
2543 for (e = node->callees; e; e = e->next_callee)
2545 if (e->inline_failed)
2547 if (e->count.ipa ().initialized_p ())
2548 reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2549 reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2550 reason[(int) e->inline_failed][1] ++;
2551 if (DECL_VIRTUAL_P (e->callee->decl)
2552 && e->count.ipa ().initialized_p ())
2554 if (e->indirect_inlining_edge)
2555 noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2556 else
2557 noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2559 else if (e->count.ipa ().initialized_p ())
2561 if (e->indirect_inlining_edge)
2562 noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2563 else
2564 noninlined_cnt += e->count.ipa ().to_gcov_type ();
2567 else if (e->count.ipa ().initialized_p ())
2569 if (e->speculative)
2571 if (DECL_VIRTUAL_P (e->callee->decl))
2572 inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2573 else
2574 inlined_speculative += e->count.ipa ().to_gcov_type ();
2576 else if (DECL_VIRTUAL_P (e->callee->decl))
2578 if (e->indirect_inlining_edge)
2579 inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2580 else
2581 inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2583 else
2585 if (e->indirect_inlining_edge)
2586 inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2587 else
2588 inlined_cnt += e->count.ipa ().to_gcov_type ();
2592 for (e = node->indirect_calls; e; e = e->next_callee)
2593 if (e->indirect_info->polymorphic
2594 & e->count.ipa ().initialized_p ())
2595 indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2596 else if (e->count.ipa ().initialized_p ())
2597 indirect_cnt += e->count.ipa ().to_gcov_type ();
2599 if (max_count.initialized_p ())
2601 fprintf (dump_file,
2602 "Inlined %" PRId64 " + speculative "
2603 "%" PRId64 " + speculative polymorphic "
2604 "%" PRId64 " + previously indirect "
2605 "%" PRId64 " + virtual "
2606 "%" PRId64 " + virtual and previously indirect "
2607 "%" PRId64 "\n" "Not inlined "
2608 "%" PRId64 " + previously indirect "
2609 "%" PRId64 " + virtual "
2610 "%" PRId64 " + virtual and previously indirect "
2611 "%" PRId64 " + still indirect "
2612 "%" PRId64 " + still indirect polymorphic "
2613 "%" PRId64 "\n", inlined_cnt,
2614 inlined_speculative, inlined_speculative_ply,
2615 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2616 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2617 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2618 fprintf (dump_file, "Removed speculations ");
2619 spec_rem.dump (dump_file);
2620 fprintf (dump_file, "\n");
2622 dump_overall_stats ();
2623 fprintf (dump_file, "\nWhy inlining failed?\n");
2624 for (i = 0; i < CIF_N_REASONS; i++)
2625 if (reason[i][1])
2626 fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2627 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2628 (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2631 /* Called when node is removed. */
2633 static void
2634 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2636 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2637 return;
2639 hash_set<struct cgraph_node *> *removed
2640 = (hash_set<struct cgraph_node *> *) data;
2641 removed->add (node);
2644 /* Decide on the inlining. We do so in the topological order to avoid
2645 expenses on updating data structures. */
2647 static unsigned int
2648 ipa_inline (void)
2650 struct cgraph_node *node;
2651 int nnodes;
2652 struct cgraph_node **order;
2653 int i, j;
2654 int cold;
2655 bool remove_functions = false;
2657 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2659 if (dump_file)
2660 ipa_dump_fn_summaries (dump_file);
2662 nnodes = ipa_reverse_postorder (order);
2663 spec_rem = profile_count::zero ();
2665 FOR_EACH_FUNCTION (node)
2667 node->aux = 0;
2669 /* Recompute the default reasons for inlining because they may have
2670 changed during merging. */
2671 if (in_lto_p)
2673 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2675 gcc_assert (e->inline_failed);
2676 initialize_inline_failed (e);
2678 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2679 initialize_inline_failed (e);
2683 if (dump_file)
2684 fprintf (dump_file, "\nFlattening functions:\n");
2686 /* First shrink order array, so that it only contains nodes with
2687 flatten attribute. */
2688 for (i = nnodes - 1, j = i; i >= 0; i--)
2690 node = order[i];
2691 if (node->definition
2692 /* Do not try to flatten aliases. These may happen for example when
2693 creating local aliases. */
2694 && !node->alias
2695 && lookup_attribute ("flatten",
2696 DECL_ATTRIBUTES (node->decl)) != NULL)
2697 order[j--] = order[i];
2700 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2701 nodes with flatten attribute. If there is more than one such
2702 node, we need to register a node removal hook, as flatten_function
2703 could remove other nodes with flatten attribute. See PR82801. */
2704 struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2705 hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2706 if (j < nnodes - 2)
2708 flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2709 node_removal_hook_holder
2710 = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2711 flatten_removed_nodes);
2714 /* In the first pass handle functions to be flattened. Do this with
2715 a priority so none of our later choices will make this impossible. */
2716 for (i = nnodes - 1; i > j; i--)
2718 node = order[i];
2719 if (flatten_removed_nodes
2720 && flatten_removed_nodes->contains (node))
2721 continue;
2723 /* Handle nodes to be flattened.
2724 Ideally when processing callees we stop inlining at the
2725 entry of cycles, possibly cloning that entry point and
2726 try to flatten itself turning it into a self-recursive
2727 function. */
2728 if (dump_file)
2729 fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2730 flatten_function (node, false, true);
2733 if (j < nnodes - 2)
2735 symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2736 delete flatten_removed_nodes;
2738 free (order);
2740 if (dump_file)
2741 dump_overall_stats ();
2743 inline_small_functions ();
2745 gcc_assert (symtab->state == IPA_SSA);
2746 symtab->state = IPA_SSA_AFTER_INLINING;
2747 /* Do first after-inlining removal. We want to remove all "stale" extern
2748 inline functions and virtual functions so we really know what is called
2749 once. */
2750 symtab->remove_unreachable_nodes (dump_file);
2752 /* Inline functions with a property that after inlining into all callers the
2753 code size will shrink because the out-of-line copy is eliminated.
2754 We do this regardless on the callee size as long as function growth limits
2755 are met. */
2756 if (dump_file)
2757 fprintf (dump_file,
2758 "\nDeciding on functions to be inlined into all callers and "
2759 "removing useless speculations:\n");
2761 /* Inlining one function called once has good chance of preventing
2762 inlining other function into the same callee. Ideally we should
2763 work in priority order, but probably inlining hot functions first
2764 is good cut without the extra pain of maintaining the queue.
2766 ??? this is not really fitting the bill perfectly: inlining function
2767 into callee often leads to better optimization of callee due to
2768 increased context for optimization.
2769 For example if main() function calls a function that outputs help
2770 and then function that does the main optimization, we should inline
2771 the second with priority even if both calls are cold by themselves.
2773 We probably want to implement new predicate replacing our use of
2774 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2775 to be hot. */
2776 for (cold = 0; cold <= 1; cold ++)
2778 FOR_EACH_DEFINED_FUNCTION (node)
2780 struct cgraph_edge *edge, *next;
2781 bool update=false;
2783 if (!opt_for_fn (node->decl, optimize)
2784 || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2785 continue;
2787 for (edge = node->callees; edge; edge = next)
2789 next = edge->next_callee;
2790 if (edge->speculative && !speculation_useful_p (edge, false))
2792 if (edge->count.ipa ().initialized_p ())
2793 spec_rem += edge->count.ipa ();
2794 cgraph_edge::resolve_speculation (edge);
2795 update = true;
2796 remove_functions = true;
2799 if (update)
2801 struct cgraph_node *where = node->inlined_to
2802 ? node->inlined_to : node;
2803 reset_edge_caches (where);
2804 ipa_update_overall_fn_summary (where);
2806 if (want_inline_function_to_all_callers_p (node, cold))
2808 int num_calls = 0;
2809 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2810 true);
2811 while (node->call_for_symbol_and_aliases
2812 (inline_to_all_callers, &num_calls, true))
2814 remove_functions = true;
2819 if (dump_enabled_p ())
2820 dump_printf (MSG_NOTE,
2821 "\nInlined %i calls, eliminated %i functions\n\n",
2822 ncalls_inlined, nfunctions_inlined);
2823 if (dump_file)
2824 dump_inline_stats ();
2826 if (dump_file)
2827 ipa_dump_fn_summaries (dump_file);
2828 return remove_functions ? TODO_remove_functions : 0;
2831 /* Inline always-inline function calls in NODE. */
2833 static bool
2834 inline_always_inline_functions (struct cgraph_node *node)
2836 struct cgraph_edge *e;
2837 bool inlined = false;
2839 for (e = node->callees; e; e = e->next_callee)
2841 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2842 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2843 continue;
2845 if (e->recursive_p ())
2847 if (dump_enabled_p ())
2848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2849 " Not inlining recursive call to %C.\n",
2850 e->callee);
2851 e->inline_failed = CIF_RECURSIVE_INLINING;
2852 continue;
2855 if (!can_early_inline_edge_p (e))
2857 /* Set inlined to true if the callee is marked "always_inline" but
2858 is not inlinable. This will allow flagging an error later in
2859 expand_call_inline in tree-inline.c. */
2860 if (lookup_attribute ("always_inline",
2861 DECL_ATTRIBUTES (callee->decl)) != NULL)
2862 inlined = true;
2863 continue;
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2868 " Inlining %C into %C (always_inline).\n",
2869 e->callee, e->caller);
2870 inline_call (e, true, NULL, NULL, false);
2871 inlined = true;
2873 if (inlined)
2874 ipa_update_overall_fn_summary (node);
2876 return inlined;
2879 /* Decide on the inlining. We do so in the topological order to avoid
2880 expenses on updating data structures. */
2882 static bool
2883 early_inline_small_functions (struct cgraph_node *node)
2885 struct cgraph_edge *e;
2886 bool inlined = false;
2888 for (e = node->callees; e; e = e->next_callee)
2890 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2892 /* We can encounter not-yet-analyzed function during
2893 early inlining on callgraphs with strongly
2894 connected components. */
2895 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2896 if (s == NULL || !s->inlinable || !e->inline_failed)
2897 continue;
2899 /* Do not consider functions not declared inline. */
2900 if (!DECL_DECLARED_INLINE_P (callee->decl)
2901 && !opt_for_fn (node->decl, flag_inline_small_functions)
2902 && !opt_for_fn (node->decl, flag_inline_functions))
2903 continue;
2905 if (dump_enabled_p ())
2906 dump_printf_loc (MSG_NOTE, e->call_stmt,
2907 "Considering inline candidate %C.\n",
2908 callee);
2910 if (!can_early_inline_edge_p (e))
2911 continue;
2913 if (e->recursive_p ())
2915 if (dump_enabled_p ())
2916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2917 " Not inlining: recursive call.\n");
2918 continue;
2921 if (!want_early_inline_function_p (e))
2922 continue;
2924 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2926 " Inlining %C into %C.\n",
2927 callee, e->caller);
2928 inline_call (e, true, NULL, NULL, false);
2929 inlined = true;
2932 if (inlined)
2933 ipa_update_overall_fn_summary (node);
2935 return inlined;
2938 unsigned int
2939 early_inliner (function *fun)
2941 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2942 struct cgraph_edge *edge;
2943 unsigned int todo = 0;
2944 int iterations = 0;
2945 bool inlined = false;
2947 if (seen_error ())
2948 return 0;
2950 /* Do nothing if datastructures for ipa-inliner are already computed. This
2951 happens when some pass decides to construct new function and
2952 cgraph_add_new_function calls lowering passes and early optimization on
2953 it. This may confuse ourself when early inliner decide to inline call to
2954 function clone, because function clones don't have parameter list in
2955 ipa-prop matching their signature. */
2956 if (ipa_node_params_sum)
2957 return 0;
2959 if (flag_checking)
2960 node->verify ();
2961 node->remove_all_references ();
2963 /* Even when not optimizing or not inlining inline always-inline
2964 functions. */
2965 inlined = inline_always_inline_functions (node);
2967 if (!optimize
2968 || flag_no_inline
2969 || !flag_early_inlining
2970 /* Never inline regular functions into always-inline functions
2971 during incremental inlining. This sucks as functions calling
2972 always inline functions will get less optimized, but at the
2973 same time inlining of functions calling always inline
2974 function into an always inline function might introduce
2975 cycles of edges to be always inlined in the callgraph.
2977 We might want to be smarter and just avoid this type of inlining. */
2978 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2979 && lookup_attribute ("always_inline",
2980 DECL_ATTRIBUTES (node->decl))))
2982 else if (lookup_attribute ("flatten",
2983 DECL_ATTRIBUTES (node->decl)) != NULL)
2985 /* When the function is marked to be flattened, recursively inline
2986 all calls in it. */
2987 if (dump_enabled_p ())
2988 dump_printf (MSG_OPTIMIZED_LOCATIONS,
2989 "Flattening %C\n", node);
2990 flatten_function (node, true, true);
2991 inlined = true;
2993 else
2995 /* If some always_inline functions was inlined, apply the changes.
2996 This way we will not account always inline into growth limits and
2997 moreover we will inline calls from always inlines that we skipped
2998 previously because of conditional above. */
2999 if (inlined)
3001 timevar_push (TV_INTEGRATION);
3002 todo |= optimize_inline_calls (current_function_decl);
3003 /* optimize_inline_calls call above might have introduced new
3004 statements that don't have inline parameters computed. */
3005 for (edge = node->callees; edge; edge = edge->next_callee)
3007 /* We can enounter not-yet-analyzed function during
3008 early inlining on callgraphs with strongly
3009 connected components. */
3010 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3011 es->call_stmt_size
3012 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3013 es->call_stmt_time
3014 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3016 ipa_update_overall_fn_summary (node);
3017 inlined = false;
3018 timevar_pop (TV_INTEGRATION);
3020 /* We iterate incremental inlining to get trivial cases of indirect
3021 inlining. */
3022 while (iterations < opt_for_fn (node->decl,
3023 param_early_inliner_max_iterations)
3024 && early_inline_small_functions (node))
3026 timevar_push (TV_INTEGRATION);
3027 todo |= optimize_inline_calls (current_function_decl);
3029 /* Technically we ought to recompute inline parameters so the new
3030 iteration of early inliner works as expected. We however have
3031 values approximately right and thus we only need to update edge
3032 info that might be cleared out for newly discovered edges. */
3033 for (edge = node->callees; edge; edge = edge->next_callee)
3035 /* We have no summary for new bound store calls yet. */
3036 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3037 es->call_stmt_size
3038 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3039 es->call_stmt_time
3040 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3042 if (iterations < opt_for_fn (node->decl,
3043 param_early_inliner_max_iterations) - 1)
3044 ipa_update_overall_fn_summary (node);
3045 timevar_pop (TV_INTEGRATION);
3046 iterations++;
3047 inlined = false;
3049 if (dump_file)
3050 fprintf (dump_file, "Iterations: %i\n", iterations);
3053 if (inlined)
3055 timevar_push (TV_INTEGRATION);
3056 todo |= optimize_inline_calls (current_function_decl);
3057 timevar_pop (TV_INTEGRATION);
3060 fun->always_inline_functions_inlined = true;
3062 return todo;
3065 /* Do inlining of small functions. Doing so early helps profiling and other
3066 passes to be somewhat more effective and avoids some code duplication in
3067 later real inlining pass for testcases with very many function calls. */
3069 namespace {
3071 const pass_data pass_data_early_inline =
3073 GIMPLE_PASS, /* type */
3074 "einline", /* name */
3075 OPTGROUP_INLINE, /* optinfo_flags */
3076 TV_EARLY_INLINING, /* tv_id */
3077 PROP_ssa, /* properties_required */
3078 0, /* properties_provided */
3079 0, /* properties_destroyed */
3080 0, /* todo_flags_start */
3081 0, /* todo_flags_finish */
3084 class pass_early_inline : public gimple_opt_pass
3086 public:
3087 pass_early_inline (gcc::context *ctxt)
3088 : gimple_opt_pass (pass_data_early_inline, ctxt)
3091 /* opt_pass methods: */
3092 virtual unsigned int execute (function *);
3094 }; // class pass_early_inline
3096 unsigned int
3097 pass_early_inline::execute (function *fun)
3099 return early_inliner (fun);
3102 } // anon namespace
3104 gimple_opt_pass *
3105 make_pass_early_inline (gcc::context *ctxt)
3107 return new pass_early_inline (ctxt);
3110 namespace {
3112 const pass_data pass_data_ipa_inline =
3114 IPA_PASS, /* type */
3115 "inline", /* name */
3116 OPTGROUP_INLINE, /* optinfo_flags */
3117 TV_IPA_INLINING, /* tv_id */
3118 0, /* properties_required */
3119 0, /* properties_provided */
3120 0, /* properties_destroyed */
3121 0, /* todo_flags_start */
3122 ( TODO_dump_symtab ), /* todo_flags_finish */
3125 class pass_ipa_inline : public ipa_opt_pass_d
3127 public:
3128 pass_ipa_inline (gcc::context *ctxt)
3129 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3130 NULL, /* generate_summary */
3131 NULL, /* write_summary */
3132 NULL, /* read_summary */
3133 NULL, /* write_optimization_summary */
3134 NULL, /* read_optimization_summary */
3135 NULL, /* stmt_fixup */
3136 0, /* function_transform_todo_flags_start */
3137 inline_transform, /* function_transform */
3138 NULL) /* variable_transform */
3141 /* opt_pass methods: */
3142 virtual unsigned int execute (function *) { return ipa_inline (); }
3144 }; // class pass_ipa_inline
3146 } // anon namespace
3148 ipa_opt_pass_d *
3149 make_pass_ipa_inline (gcc::context *ctxt)
3151 return new pass_ipa_inline (ctxt);