testsuite: Update scanning symbol sections to support AIX.
[official-gcc.git] / gcc / ipa-inline.c
blob8db23457daee947446836a32d670a8afa3ed087c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
36 inlining heuristics
38 The inliner itself is split into two passes:
40 pass_early_inlining
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
68 pass_ipa_inline
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "profile.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
115 #include "sreal.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
120 #include "attribs.h"
121 #include "asan.h"
123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
126 /* Statistics we collect about inlining algorithm. */
127 static int overall_size;
128 static profile_count max_count;
129 static profile_count spec_rem;
131 /* Return false when inlining edge E would lead to violating
132 limits on function unit growth or stack usage growth.
134 The relative function body growth limit is present generally
135 to avoid problems with non-linear behavior of the compiler.
136 To allow inlining huge functions into tiny wrapper, the limit
137 is always based on the bigger of the two functions considered.
139 For stack growth limits we always base the growth in stack usage
140 of the callers. We want to prevent applications from segfaulting
141 on stack overflow when functions with huge stack frames gets
142 inlined. */
144 static bool
145 caller_growth_limits (struct cgraph_edge *e)
147 struct cgraph_node *to = e->caller;
148 struct cgraph_node *what = e->callee->ultimate_alias_target ();
149 int newsize;
150 int limit = 0;
151 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
152 ipa_size_summary *outer_info = ipa_size_summaries->get (to);
154 /* Look for function e->caller is inlined to. While doing
155 so work out the largest function body on the way. As
156 described above, we want to base our function growth
157 limits based on that. Not on the self size of the
158 outer function, not on the self size of inline code
159 we immediately inline to. This is the most relaxed
160 interpretation of the rule "do not grow large functions
161 too much in order to prevent compiler from exploding". */
162 while (true)
164 ipa_size_summary *size_info = ipa_size_summaries->get (to);
165 if (limit < size_info->self_size)
166 limit = size_info->self_size;
167 if (stack_size_limit < size_info->estimated_self_stack_size)
168 stack_size_limit = size_info->estimated_self_stack_size;
169 if (to->inlined_to)
170 to = to->callers->caller;
171 else
172 break;
175 ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
176 ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
178 if (limit < what_size_info->self_size)
179 limit = what_size_info->self_size;
181 limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
183 /* Check the size after inlining against the function limits. But allow
184 the function to shrink if it went over the limits by forced inlining. */
185 newsize = estimate_size_after_inlining (to, e);
186 if (newsize >= ipa_size_summaries->get (what)->size
187 && newsize > opt_for_fn (to->decl, param_large_function_insns)
188 && newsize > limit)
190 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
191 return false;
194 if (!what_info->estimated_stack_size)
195 return true;
197 /* FIXME: Stack size limit often prevents inlining in Fortran programs
198 due to large i/o datastructures used by the Fortran front-end.
199 We ought to ignore this limit when we know that the edge is executed
200 on every invocation of the caller (i.e. its call statement dominates
201 exit block). We do not track this information, yet. */
202 stack_size_limit += ((gcov_type)stack_size_limit
203 * opt_for_fn (to->decl, param_stack_frame_growth)
204 / 100);
206 inlined_stack = (ipa_get_stack_frame_offset (to)
207 + outer_info->estimated_self_stack_size
208 + what_info->estimated_stack_size);
209 /* Check new stack consumption with stack consumption at the place
210 stack is used. */
211 if (inlined_stack > stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
215 packing. */
216 && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
217 && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
219 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220 return false;
222 return true;
225 /* Dump info about why inlining has failed. */
227 static void
228 report_inline_failed_reason (struct cgraph_edge *e)
230 if (dump_enabled_p ())
232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 " not inlinable: %C -> %C, %s\n",
234 e->caller, e->callee,
235 cgraph_inline_failed_string (e->inline_failed));
236 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 && e->caller->lto_file_data
239 && e->callee->ultimate_alias_target ()->lto_file_data)
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 " LTO objects: %s, %s\n",
243 e->caller->lto_file_data->file_name,
244 e->callee->ultimate_alias_target ()->lto_file_data->file_name);
246 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 if (dump_file)
248 cl_target_option_print_diff
249 (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 if (dump_file)
253 cl_optimization_print_diff
254 (dump_file, 2, opts_for_fn (e->caller->decl),
255 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
259 /* Decide whether sanitizer-related attributes allow inlining. */
261 static bool
262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
264 if (!caller || !callee)
265 return true;
267 /* Follow clang and allow inlining for always_inline functions. */
268 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
269 return true;
271 const sanitize_code codes[] =
273 SANITIZE_ADDRESS,
274 SANITIZE_THREAD,
275 SANITIZE_UNDEFINED,
276 SANITIZE_UNDEFINED_NONDEFAULT,
277 SANITIZE_POINTER_COMPARE,
278 SANITIZE_POINTER_SUBTRACT
281 for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
282 if (sanitize_flags_p (codes[i], caller)
283 != sanitize_flags_p (codes[i], callee))
284 return false;
286 return true;
289 /* Used for flags where it is safe to inline when caller's value is
290 grater than callee's. */
291 #define check_maybe_up(flag) \
292 (opts_for_fn (caller->decl)->x_##flag \
293 != opts_for_fn (callee->decl)->x_##flag \
294 && (!always_inline \
295 || opts_for_fn (caller->decl)->x_##flag \
296 < opts_for_fn (callee->decl)->x_##flag))
297 /* Used for flags where it is safe to inline when caller's value is
298 smaller than callee's. */
299 #define check_maybe_down(flag) \
300 (opts_for_fn (caller->decl)->x_##flag \
301 != opts_for_fn (callee->decl)->x_##flag \
302 && (!always_inline \
303 || opts_for_fn (caller->decl)->x_##flag \
304 > opts_for_fn (callee->decl)->x_##flag))
305 /* Used for flags where exact match is needed for correctness. */
306 #define check_match(flag) \
307 (opts_for_fn (caller->decl)->x_##flag \
308 != opts_for_fn (callee->decl)->x_##flag)
310 /* Decide if we can inline the edge and possibly update
311 inline_failed reason.
312 We check whether inlining is possible at all and whether
313 caller growth limits allow doing so.
315 if REPORT is true, output reason to the dump file. */
317 static bool
318 can_inline_edge_p (struct cgraph_edge *e, bool report,
319 bool early = false)
321 gcc_checking_assert (e->inline_failed);
323 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
325 if (report)
326 report_inline_failed_reason (e);
327 return false;
330 bool inlinable = true;
331 enum availability avail;
332 cgraph_node *caller = (e->caller->inlined_to
333 ? e->caller->inlined_to : e->caller);
334 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
336 if (!callee->definition)
338 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
339 inlinable = false;
341 if (!early && (!opt_for_fn (callee->decl, optimize)
342 || !opt_for_fn (caller->decl, optimize)))
344 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
345 inlinable = false;
347 else if (callee->calls_comdat_local)
349 e->inline_failed = CIF_USES_COMDAT_LOCAL;
350 inlinable = false;
352 else if (avail <= AVAIL_INTERPOSABLE)
354 e->inline_failed = CIF_OVERWRITABLE;
355 inlinable = false;
357 /* All edges with call_stmt_cannot_inline_p should have inline_failed
358 initialized to one of FINAL_ERROR reasons. */
359 else if (e->call_stmt_cannot_inline_p)
360 gcc_unreachable ();
361 /* Don't inline if the functions have different EH personalities. */
362 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
363 && DECL_FUNCTION_PERSONALITY (callee->decl)
364 && (DECL_FUNCTION_PERSONALITY (caller->decl)
365 != DECL_FUNCTION_PERSONALITY (callee->decl)))
367 e->inline_failed = CIF_EH_PERSONALITY;
368 inlinable = false;
370 /* TM pure functions should not be inlined into non-TM_pure
371 functions. */
372 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
374 e->inline_failed = CIF_UNSPECIFIED;
375 inlinable = false;
377 /* Check compatibility of target optimization options. */
378 else if (!targetm.target_option.can_inline_p (caller->decl,
379 callee->decl))
381 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
382 inlinable = false;
384 else if (ipa_fn_summaries->get (callee) == NULL
385 || !ipa_fn_summaries->get (callee)->inlinable)
387 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
388 inlinable = false;
390 /* Don't inline a function with mismatched sanitization attributes. */
391 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
393 e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
394 inlinable = false;
396 if (!inlinable && report)
397 report_inline_failed_reason (e);
398 return inlinable;
401 /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
402 scale up the bound. */
404 static int
405 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
407 if (hint && hint2)
409 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
410 spd = spd * spd;
411 if (spd > 1000000)
412 spd = 1000000;
413 return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
415 if (hint || hint2)
416 return opt_for_fn (n->decl, param_max_inline_insns_single)
417 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
418 return opt_for_fn (n->decl, param_max_inline_insns_single);
421 /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
422 scale up the bound. */
424 static int
425 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
427 int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
428 if (hint && hint2)
430 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
431 spd = spd * spd;
432 if (spd > 1000000)
433 spd = 1000000;
434 return max_inline_insns_auto * spd / 100;
436 if (hint || hint2)
437 return max_inline_insns_auto
438 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
439 return max_inline_insns_auto;
442 /* Decide if we can inline the edge and possibly update
443 inline_failed reason.
444 We check whether inlining is possible at all and whether
445 caller growth limits allow doing so.
447 if REPORT is true, output reason to the dump file.
449 if DISREGARD_LIMITS is true, ignore size limits. */
451 static bool
452 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
453 bool disregard_limits = false, bool early = false)
455 gcc_checking_assert (e->inline_failed);
457 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
459 if (report)
460 report_inline_failed_reason (e);
461 return false;
464 bool inlinable = true;
465 enum availability avail;
466 cgraph_node *caller = (e->caller->inlined_to
467 ? e->caller->inlined_to : e->caller);
468 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
469 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
470 tree callee_tree
471 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
472 /* Check if caller growth allows the inlining. */
473 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
474 && !disregard_limits
475 && !lookup_attribute ("flatten",
476 DECL_ATTRIBUTES (caller->decl))
477 && !caller_growth_limits (e))
478 inlinable = false;
479 else if (callee->externally_visible
480 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
481 && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
483 e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
484 inlinable = false;
486 /* Don't inline a function with a higher optimization level than the
487 caller. FIXME: this is really just tip of iceberg of handling
488 optimization attribute. */
489 else if (caller_tree != callee_tree)
491 bool always_inline =
492 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
493 && lookup_attribute ("always_inline",
494 DECL_ATTRIBUTES (callee->decl)));
495 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
496 ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
498 /* Until GCC 4.9 we did not check the semantics-altering flags
499 below and inlined across optimization boundaries.
500 Enabling checks below breaks several packages by refusing
501 to inline library always_inline functions. See PR65873.
502 Disable the check for early inlining for now until better solution
503 is found. */
504 if (always_inline && early)
506 /* There are some options that change IL semantics which means
507 we cannot inline in these cases for correctness reason.
508 Not even for always_inline declared functions. */
509 else if (check_match (flag_wrapv)
510 || check_match (flag_trapv)
511 || check_match (flag_pcc_struct_return)
512 || check_maybe_down (optimize_debug)
513 /* When caller or callee does FP math, be sure FP codegen flags
514 compatible. */
515 || ((caller_info->fp_expressions && callee_info->fp_expressions)
516 && (check_maybe_up (flag_rounding_math)
517 || check_maybe_up (flag_trapping_math)
518 || check_maybe_down (flag_unsafe_math_optimizations)
519 || check_maybe_down (flag_finite_math_only)
520 || check_maybe_up (flag_signaling_nans)
521 || check_maybe_down (flag_cx_limited_range)
522 || check_maybe_up (flag_signed_zeros)
523 || check_maybe_down (flag_associative_math)
524 || check_maybe_down (flag_reciprocal_math)
525 || check_maybe_down (flag_fp_int_builtin_inexact)
526 /* Strictly speaking only when the callee contains function
527 calls that may end up setting errno. */
528 || check_maybe_up (flag_errno_math)))
529 /* We do not want to make code compiled with exceptions to be
530 brought into a non-EH function unless we know that the callee
531 does not throw.
532 This is tracked by DECL_FUNCTION_PERSONALITY. */
533 || (check_maybe_up (flag_non_call_exceptions)
534 && DECL_FUNCTION_PERSONALITY (callee->decl))
535 || (check_maybe_up (flag_exceptions)
536 && DECL_FUNCTION_PERSONALITY (callee->decl))
537 /* When devirtualization is disabled for callee, it is not safe
538 to inline it as we possibly mangled the type info.
539 Allow early inlining of always inlines. */
540 || (!early && check_maybe_down (flag_devirtualize)))
542 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
543 inlinable = false;
545 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
546 else if (always_inline)
548 /* When user added an attribute to the callee honor it. */
549 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
550 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
552 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
553 inlinable = false;
555 /* If explicit optimize attribute are not used, the mismatch is caused
556 by different command line options used to build different units.
557 Do not care about COMDAT functions - those are intended to be
558 optimized with the optimization flags of module they are used in.
559 Also do not care about mixing up size/speed optimization when
560 DECL_DISREGARD_INLINE_LIMITS is set. */
561 else if ((callee->merged_comdat
562 && !lookup_attribute ("optimize",
563 DECL_ATTRIBUTES (caller->decl)))
564 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
566 /* If mismatch is caused by merging two LTO units with different
567 optimization flags we want to be bit nicer. However never inline
568 if one of functions is not optimized at all. */
569 else if (!opt_for_fn (callee->decl, optimize)
570 || !opt_for_fn (caller->decl, optimize))
572 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
573 inlinable = false;
575 /* If callee is optimized for size and caller is not, allow inlining if
576 code shrinks or we are in param_max_inline_insns_single limit and
577 callee is inline (and thus likely an unified comdat).
578 This will allow caller to run faster. */
579 else if (opt_for_fn (callee->decl, optimize_size)
580 > opt_for_fn (caller->decl, optimize_size))
582 int growth = estimate_edge_growth (e);
583 if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
584 && (!DECL_DECLARED_INLINE_P (callee->decl)
585 && growth >= MAX (inline_insns_single (caller, false, false),
586 inline_insns_auto (caller, false, false))))
588 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
589 inlinable = false;
592 /* If callee is more aggressively optimized for performance than caller,
593 we generally want to inline only cheap (runtime wise) functions. */
594 else if (opt_for_fn (callee->decl, optimize_size)
595 < opt_for_fn (caller->decl, optimize_size)
596 || (opt_for_fn (callee->decl, optimize)
597 > opt_for_fn (caller->decl, optimize)))
599 if (estimate_edge_time (e)
600 >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
602 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
603 inlinable = false;
609 if (!inlinable && report)
610 report_inline_failed_reason (e);
611 return inlinable;
615 /* Return true if the edge E is inlinable during early inlining. */
617 static bool
618 can_early_inline_edge_p (struct cgraph_edge *e)
620 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
621 /* Early inliner might get called at WPA stage when IPA pass adds new
622 function. In this case we cannot really do any of early inlining
623 because function bodies are missing. */
624 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
625 return false;
626 if (!gimple_has_body_p (callee->decl))
628 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
629 return false;
631 /* In early inliner some of callees may not be in SSA form yet
632 (i.e. the callgraph is cyclic and we did not process
633 the callee by early inliner, yet). We don't have CIF code for this
634 case; later we will re-do the decision in the real inliner. */
635 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
636 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
640 " edge not inlinable: not in SSA form\n");
641 return false;
643 if (!can_inline_edge_p (e, true, true)
644 || !can_inline_edge_by_limits_p (e, true, false, true))
645 return false;
646 return true;
650 /* Return number of calls in N. Ignore cheap builtins. */
652 static int
653 num_calls (struct cgraph_node *n)
655 struct cgraph_edge *e;
656 int num = 0;
658 for (e = n->callees; e; e = e->next_callee)
659 if (!is_inexpensive_builtin (e->callee->decl))
660 num++;
661 return num;
665 /* Return true if we are interested in inlining small function. */
667 static bool
668 want_early_inline_function_p (struct cgraph_edge *e)
670 bool want_inline = true;
671 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
673 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
675 /* For AutoFDO, we need to make sure that before profile summary, all
676 hot paths' IR look exactly the same as profiled binary. As a result,
677 in einliner, we will disregard size limit and inline those callsites
678 that are:
679 * inlined in the profiled binary, and
680 * the cloned callee has enough samples to be considered "hot". */
681 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
683 else if (!DECL_DECLARED_INLINE_P (callee->decl)
684 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
686 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
687 report_inline_failed_reason (e);
688 want_inline = false;
690 else
692 /* First take care of very large functions. */
693 int min_growth = estimate_min_edge_growth (e), growth = 0;
694 int n;
695 int early_inlining_insns = param_early_inlining_insns;
697 if (min_growth > early_inlining_insns)
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
701 " will not early inline: %C->%C, "
702 "call is cold and code would grow "
703 "at least by %i\n",
704 e->caller, callee,
705 min_growth);
706 want_inline = false;
708 else
709 growth = estimate_edge_growth (e);
712 if (!want_inline || growth <= param_max_inline_insns_size)
714 else if (!e->maybe_hot_p ())
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
718 " will not early inline: %C->%C, "
719 "call is cold and code would grow by %i\n",
720 e->caller, callee,
721 growth);
722 want_inline = false;
724 else if (growth > early_inlining_insns)
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
728 " will not early inline: %C->%C, "
729 "growth %i exceeds --param early-inlining-insns\n",
730 e->caller, callee, growth);
731 want_inline = false;
733 else if ((n = num_calls (callee)) != 0
734 && growth * (n + 1) > early_inlining_insns)
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
738 " will not early inline: %C->%C, "
739 "growth %i exceeds --param early-inlining-insns "
740 "divided by number of calls\n",
741 e->caller, callee, growth);
742 want_inline = false;
745 return want_inline;
748 /* Compute time of the edge->caller + edge->callee execution when inlining
749 does not happen. */
751 inline sreal
752 compute_uninlined_call_time (struct cgraph_edge *edge,
753 sreal uninlined_call_time,
754 sreal freq)
756 cgraph_node *caller = (edge->caller->inlined_to
757 ? edge->caller->inlined_to
758 : edge->caller);
760 if (freq > 0)
761 uninlined_call_time *= freq;
762 else
763 uninlined_call_time = uninlined_call_time >> 11;
765 sreal caller_time = ipa_fn_summaries->get (caller)->time;
766 return uninlined_call_time + caller_time;
769 /* Same as compute_uinlined_call_time but compute time when inlining
770 does happen. */
772 inline sreal
773 compute_inlined_call_time (struct cgraph_edge *edge,
774 sreal time,
775 sreal freq)
777 cgraph_node *caller = (edge->caller->inlined_to
778 ? edge->caller->inlined_to
779 : edge->caller);
780 sreal caller_time = ipa_fn_summaries->get (caller)->time;
782 if (freq > 0)
783 time *= freq;
784 else
785 time = time >> 11;
787 /* This calculation should match one in ipa-inline-analysis.c
788 (estimate_edge_size_and_time). */
789 time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
790 time += caller_time;
791 if (time <= 0)
792 time = ((sreal) 1) >> 8;
793 gcc_checking_assert (time >= 0);
794 return time;
797 /* Determine time saved by inlining EDGE of frequency FREQ
798 where callee's runtime w/o inlining is UNINLINED_TYPE
799 and with inlined is INLINED_TYPE. */
801 inline sreal
802 inlining_speedup (struct cgraph_edge *edge,
803 sreal freq,
804 sreal uninlined_time,
805 sreal inlined_time)
807 sreal speedup = uninlined_time - inlined_time;
808 /* Handling of call_time should match one in ipa-inline-fnsummary.c
809 (estimate_edge_size_and_time). */
810 sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
812 if (freq > 0)
814 speedup = (speedup + call_time);
815 if (freq != 1)
816 speedup = speedup * freq;
818 else if (freq == 0)
819 speedup = speedup >> 11;
820 gcc_checking_assert (speedup >= 0);
821 return speedup;
824 /* Return true if the speedup for inlining E is bigger than
825 param_inline_min_speedup. */
827 static bool
828 big_speedup_p (struct cgraph_edge *e)
830 sreal unspec_time;
831 sreal spec_time = estimate_edge_time (e, &unspec_time);
832 sreal freq = e->sreal_frequency ();
833 sreal time = compute_uninlined_call_time (e, unspec_time, freq);
834 sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
835 cgraph_node *caller = (e->caller->inlined_to
836 ? e->caller->inlined_to
837 : e->caller);
838 int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
840 if ((time - inlined_time) * 100 > time * limit)
841 return true;
842 return false;
845 /* Return true if we are interested in inlining small function.
846 When REPORT is true, report reason to dump file. */
848 static bool
849 want_inline_small_function_p (struct cgraph_edge *e, bool report)
851 bool want_inline = true;
852 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
853 cgraph_node *to = (e->caller->inlined_to
854 ? e->caller->inlined_to : e->caller);
856 /* Allow this function to be called before can_inline_edge_p,
857 since it's usually cheaper. */
858 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
859 want_inline = false;
860 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
862 else if (!DECL_DECLARED_INLINE_P (callee->decl)
863 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
865 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
866 want_inline = false;
868 /* Do fast and conservative check if the function can be good
869 inline candidate. */
870 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
871 && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
872 && ipa_fn_summaries->get (callee)->min_size
873 - ipa_call_summaries->get (e)->call_stmt_size
874 > inline_insns_auto (e->caller, true, true))
876 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
877 want_inline = false;
879 else if ((DECL_DECLARED_INLINE_P (callee->decl)
880 || e->count.ipa ().nonzero_p ())
881 && ipa_fn_summaries->get (callee)->min_size
882 - ipa_call_summaries->get (e)->call_stmt_size
883 > inline_insns_single (e->caller, true, true))
885 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
886 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
887 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
888 want_inline = false;
890 else
892 int growth = estimate_edge_growth (e);
893 ipa_hints hints = estimate_edge_hints (e);
894 /* We have two independent groups of hints. If one matches in each
895 of groups the limits are inreased. If both groups matches, limit
896 is increased even more. */
897 bool apply_hints = (hints & (INLINE_HINT_indirect_call
898 | INLINE_HINT_known_hot
899 | INLINE_HINT_loop_iterations
900 | INLINE_HINT_loop_stride));
901 bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
903 if (growth <= opt_for_fn (to->decl,
904 param_max_inline_insns_size))
906 /* Apply param_max_inline_insns_single limit. Do not do so when
907 hints suggests that inlining given function is very profitable.
908 Avoid computation of big_speedup_p when not necessary to change
909 outcome of decision. */
910 else if (DECL_DECLARED_INLINE_P (callee->decl)
911 && growth >= inline_insns_single (e->caller, apply_hints,
912 apply_hints2)
913 && (apply_hints || apply_hints2
914 || growth >= inline_insns_single (e->caller, true,
915 apply_hints2)
916 || !big_speedup_p (e)))
918 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
919 want_inline = false;
921 else if (!DECL_DECLARED_INLINE_P (callee->decl)
922 && !opt_for_fn (e->caller->decl, flag_inline_functions)
923 && growth >= opt_for_fn (to->decl,
924 param_max_inline_insns_small))
926 /* growth_positive_p is expensive, always test it last. */
927 if (growth >= inline_insns_single (e->caller, false, false)
928 || growth_positive_p (callee, e, growth))
930 e->inline_failed = CIF_NOT_DECLARED_INLINED;
931 want_inline = false;
934 /* Apply param_max_inline_insns_auto limit for functions not declared
935 inline. Bypass the limit when speedup seems big. */
936 else if (!DECL_DECLARED_INLINE_P (callee->decl)
937 && growth >= inline_insns_auto (e->caller, apply_hints,
938 apply_hints2)
939 && (apply_hints || apply_hints2
940 || growth >= inline_insns_auto (e->caller, true,
941 apply_hints2)
942 || !big_speedup_p (e)))
944 /* growth_positive_p is expensive, always test it last. */
945 if (growth >= inline_insns_single (e->caller, false, false)
946 || growth_positive_p (callee, e, growth))
948 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
949 want_inline = false;
952 /* If call is cold, do not inline when function body would grow. */
953 else if (!e->maybe_hot_p ()
954 && (growth >= inline_insns_single (e->caller, false, false)
955 || growth_positive_p (callee, e, growth)))
957 e->inline_failed = CIF_UNLIKELY_CALL;
958 want_inline = false;
961 if (!want_inline && report)
962 report_inline_failed_reason (e);
963 return want_inline;
966 /* EDGE is self recursive edge.
967 We handle two cases - when function A is inlining into itself
968 or when function A is being inlined into another inliner copy of function
969 A within function B.
971 In first case OUTER_NODE points to the toplevel copy of A, while
972 in the second case OUTER_NODE points to the outermost copy of A in B.
974 In both cases we want to be extra selective since
975 inlining the call will just introduce new recursive calls to appear. */
977 static bool
978 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
979 struct cgraph_node *outer_node,
980 bool peeling,
981 int depth)
983 char const *reason = NULL;
984 bool want_inline = true;
985 sreal caller_freq = 1;
986 int max_depth = opt_for_fn (outer_node->decl,
987 param_max_inline_recursive_depth_auto);
989 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
990 max_depth = opt_for_fn (outer_node->decl,
991 param_max_inline_recursive_depth);
993 if (!edge->maybe_hot_p ())
995 reason = "recursive call is cold";
996 want_inline = false;
998 else if (depth > max_depth)
1000 reason = "--param max-inline-recursive-depth exceeded.";
1001 want_inline = false;
1003 else if (outer_node->inlined_to
1004 && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1006 reason = "caller frequency is 0";
1007 want_inline = false;
1010 if (!want_inline)
1012 /* Inlining of self recursive function into copy of itself within other
1013 function is transformation similar to loop peeling.
1015 Peeling is profitable if we can inline enough copies to make probability
1016 of actual call to the self recursive function very small. Be sure that
1017 the probability of recursion is small.
1019 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1020 This way the expected number of recursion is at most max_depth. */
1021 else if (peeling)
1023 sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1024 int i;
1025 for (i = 1; i < depth; i++)
1026 max_prob = max_prob * max_prob;
1027 if (edge->sreal_frequency () >= max_prob * caller_freq)
1029 reason = "frequency of recursive call is too large";
1030 want_inline = false;
1033 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1034 recursion depth is large. We reduce function call overhead and increase
1035 chances that things fit in hardware return predictor.
1037 Recursive inlining might however increase cost of stack frame setup
1038 actually slowing down functions whose recursion tree is wide rather than
1039 deep.
1041 Deciding reliably on when to do recursive inlining without profile feedback
1042 is tricky. For now we disable recursive inlining when probability of self
1043 recursion is low.
1045 Recursive inlining of self recursive call within loop also results in
1046 large loop depths that generally optimize badly. We may want to throttle
1047 down inlining in those cases. In particular this seems to happen in one
1048 of libstdc++ rb tree methods. */
1049 else
1051 if (edge->sreal_frequency () * 100
1052 <= caller_freq
1053 * opt_for_fn (outer_node->decl,
1054 param_min_inline_recursive_probability))
1056 reason = "frequency of recursive call is too small";
1057 want_inline = false;
1060 if (!want_inline && dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1062 " not inlining recursively: %s\n", reason);
1063 return want_inline;
1066 /* Return true when NODE has uninlinable caller;
1067 set HAS_HOT_CALL if it has hot call.
1068 Worker for cgraph_for_node_and_aliases. */
1070 static bool
1071 check_callers (struct cgraph_node *node, void *has_hot_call)
1073 struct cgraph_edge *e;
1074 for (e = node->callers; e; e = e->next_caller)
1076 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1077 || !opt_for_fn (e->caller->decl, optimize))
1078 return true;
1079 if (!can_inline_edge_p (e, true))
1080 return true;
1081 if (e->recursive_p ())
1082 return true;
1083 if (!can_inline_edge_by_limits_p (e, true))
1084 return true;
1085 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1086 *(bool *)has_hot_call = true;
1088 return false;
1091 /* If NODE has a caller, return true. */
1093 static bool
1094 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1096 if (node->callers)
1097 return true;
1098 return false;
1101 /* Decide if inlining NODE would reduce unit size by eliminating
1102 the offline copy of function.
1103 When COLD is true the cold calls are considered, too. */
1105 static bool
1106 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1108 bool has_hot_call = false;
1110 /* Aliases gets inlined along with the function they alias. */
1111 if (node->alias)
1112 return false;
1113 /* Already inlined? */
1114 if (node->inlined_to)
1115 return false;
1116 /* Does it have callers? */
1117 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1118 return false;
1119 /* Inlining into all callers would increase size? */
1120 if (growth_positive_p (node, NULL, INT_MIN) > 0)
1121 return false;
1122 /* All inlines must be possible. */
1123 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1124 true))
1125 return false;
1126 if (!cold && !has_hot_call)
1127 return false;
1128 return true;
1131 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1132 in estimate_edge_badness. */
1134 static bool
1135 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1137 return size < (DECL_DECLARED_INLINE_P (where->decl)
1138 ? inline_insns_single (where, false, false)
1139 : inline_insns_auto (where, false, false));
1142 /* A cost model driving the inlining heuristics in a way so the edges with
1143 smallest badness are inlined first. After each inlining is performed
1144 the costs of all caller edges of nodes affected are recomputed so the
1145 metrics may accurately depend on values such as number of inlinable callers
1146 of the function or function body size. */
1148 static sreal
1149 edge_badness (struct cgraph_edge *edge, bool dump)
1151 sreal badness;
1152 int growth;
1153 sreal edge_time, unspec_edge_time;
1154 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1155 class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1156 ipa_hints hints;
1157 cgraph_node *caller = (edge->caller->inlined_to
1158 ? edge->caller->inlined_to
1159 : edge->caller);
1161 growth = estimate_edge_growth (edge);
1162 edge_time = estimate_edge_time (edge, &unspec_edge_time);
1163 hints = estimate_edge_hints (edge);
1164 gcc_checking_assert (edge_time >= 0);
1165 /* Check that inlined time is better, but tolerate some roundoff issues.
1166 FIXME: When callee profile drops to 0 we account calls more. This
1167 should be fixed by never doing that. */
1168 gcc_checking_assert ((edge_time * 100
1169 - callee_info->time * 101).to_int () <= 0
1170 || callee->count.ipa ().initialized_p ());
1171 gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1173 if (dump)
1175 fprintf (dump_file, " Badness calculation for %s -> %s\n",
1176 edge->caller->dump_name (),
1177 edge->callee->dump_name ());
1178 fprintf (dump_file, " size growth %i, time %f unspec %f ",
1179 growth,
1180 edge_time.to_double (),
1181 unspec_edge_time.to_double ());
1182 ipa_dump_hints (dump_file, hints);
1183 if (big_speedup_p (edge))
1184 fprintf (dump_file, " big_speedup");
1185 fprintf (dump_file, "\n");
1188 /* Always prefer inlining saving code size. */
1189 if (growth <= 0)
1191 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1192 if (dump)
1193 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1194 growth);
1196 /* Inlining into EXTERNAL functions is not going to change anything unless
1197 they are themselves inlined. */
1198 else if (DECL_EXTERNAL (caller->decl))
1200 if (dump)
1201 fprintf (dump_file, " max: function is external\n");
1202 return sreal::max ();
1204 /* When profile is available. Compute badness as:
1206 time_saved * caller_count
1207 goodness = -------------------------------------------------
1208 growth_of_caller * overall_growth * combined_size
1210 badness = - goodness
1212 Again use negative value to make calls with profile appear hotter
1213 then calls without.
1215 else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1216 || caller->count.ipa ().nonzero_p ())
1218 sreal numerator, denominator;
1219 int overall_growth;
1220 sreal freq = edge->sreal_frequency ();
1222 numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1223 if (numerator <= 0)
1224 numerator = ((sreal) 1 >> 8);
1225 if (caller->count.ipa ().nonzero_p ())
1226 numerator *= caller->count.ipa ().to_gcov_type ();
1227 else if (caller->count.ipa ().initialized_p ())
1228 numerator = numerator >> 11;
1229 denominator = growth;
1231 overall_growth = callee_info->growth;
1233 /* Look for inliner wrappers of the form:
1235 inline_caller ()
1237 do_fast_job...
1238 if (need_more_work)
1239 noninline_callee ();
1241 Without penalizing this case, we usually inline noninline_callee
1242 into the inline_caller because overall_growth is small preventing
1243 further inlining of inline_caller.
1245 Penalize only callgraph edges to functions with small overall
1246 growth ...
1248 if (growth > overall_growth
1249 /* ... and having only one caller which is not inlined ... */
1250 && callee_info->single_caller
1251 && !edge->caller->inlined_to
1252 /* ... and edges executed only conditionally ... */
1253 && freq < 1
1254 /* ... consider case where callee is not inline but caller is ... */
1255 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1256 && DECL_DECLARED_INLINE_P (caller->decl))
1257 /* ... or when early optimizers decided to split and edge
1258 frequency still indicates splitting is a win ... */
1259 || (callee->split_part && !caller->split_part
1260 && freq * 100
1261 < opt_for_fn (caller->decl,
1262 param_partial_inlining_entry_probability)
1263 /* ... and do not overwrite user specified hints. */
1264 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1265 || DECL_DECLARED_INLINE_P (caller->decl)))))
1267 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1268 int caller_growth = caller_info->growth;
1270 /* Only apply the penalty when caller looks like inline candidate,
1271 and it is not called once. */
1272 if (!caller_info->single_caller && overall_growth < caller_growth
1273 && caller_info->inlinable
1274 && wrapper_heuristics_may_apply
1275 (caller, ipa_size_summaries->get (caller)->size))
1277 if (dump)
1278 fprintf (dump_file,
1279 " Wrapper penalty. Increasing growth %i to %i\n",
1280 overall_growth, caller_growth);
1281 overall_growth = caller_growth;
1284 if (overall_growth > 0)
1286 /* Strongly prefer functions with few callers that can be inlined
1287 fully. The square root here leads to smaller binaries at average.
1288 Watch however for extreme cases and return to linear function
1289 when growth is large. */
1290 if (overall_growth < 256)
1291 overall_growth *= overall_growth;
1292 else
1293 overall_growth += 256 * 256 - 256;
1294 denominator *= overall_growth;
1296 denominator *= ipa_size_summaries->get (caller)->size + growth;
1298 badness = - numerator / denominator;
1300 if (dump)
1302 fprintf (dump_file,
1303 " %f: guessed profile. frequency %f, count %" PRId64
1304 " caller count %" PRId64
1305 " time saved %f"
1306 " overall growth %i (current) %i (original)"
1307 " %i (compensated)\n",
1308 badness.to_double (),
1309 freq.to_double (),
1310 edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1311 caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1312 inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1313 estimate_growth (callee),
1314 callee_info->growth, overall_growth);
1317 /* When function local profile is not available or it does not give
1318 useful information (i.e. frequency is zero), base the cost on
1319 loop nest and overall size growth, so we optimize for overall number
1320 of functions fully inlined in program. */
1321 else
1323 int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1324 badness = growth;
1326 /* Decrease badness if call is nested. */
1327 if (badness > 0)
1328 badness = badness >> nest;
1329 else
1330 badness = badness << nest;
1331 if (dump)
1332 fprintf (dump_file, " %f: no profile. nest %i\n",
1333 badness.to_double (), nest);
1335 gcc_checking_assert (badness != 0);
1337 if (edge->recursive_p ())
1338 badness = badness.shift (badness > 0 ? 4 : -4);
1339 if ((hints & (INLINE_HINT_indirect_call
1340 | INLINE_HINT_loop_iterations
1341 | INLINE_HINT_loop_stride))
1342 || callee_info->growth <= 0)
1343 badness = badness.shift (badness > 0 ? -2 : 2);
1344 if (hints & INLINE_HINT_builtin_constant_p)
1345 badness = badness.shift (badness > 0 ? -4 : 4);
1346 if (hints & (INLINE_HINT_same_scc))
1347 badness = badness.shift (badness > 0 ? 3 : -3);
1348 else if (hints & (INLINE_HINT_in_scc))
1349 badness = badness.shift (badness > 0 ? 2 : -2);
1350 else if (hints & (INLINE_HINT_cross_module))
1351 badness = badness.shift (badness > 0 ? 1 : -1);
1352 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1353 badness = badness.shift (badness > 0 ? -4 : 4);
1354 else if ((hints & INLINE_HINT_declared_inline))
1355 badness = badness.shift (badness > 0 ? -3 : 3);
1356 if (dump)
1357 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1358 return badness;
1361 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1362 static inline void
1363 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1365 sreal badness = edge_badness (edge, false);
1366 if (edge->aux)
1368 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1369 gcc_checking_assert (n->get_data () == edge);
1371 /* fibonacci_heap::replace_key does busy updating of the
1372 heap that is unnecessarily expensive.
1373 We do lazy increases: after extracting minimum if the key
1374 turns out to be out of date, it is re-inserted into heap
1375 with correct value. */
1376 if (badness < n->get_key ())
1378 if (dump_file && (dump_flags & TDF_DETAILS))
1380 fprintf (dump_file,
1381 " decreasing badness %s -> %s, %f to %f\n",
1382 edge->caller->dump_name (),
1383 edge->callee->dump_name (),
1384 n->get_key ().to_double (),
1385 badness.to_double ());
1387 heap->decrease_key (n, badness);
1390 else
1392 if (dump_file && (dump_flags & TDF_DETAILS))
1394 fprintf (dump_file,
1395 " enqueuing call %s -> %s, badness %f\n",
1396 edge->caller->dump_name (),
1397 edge->callee->dump_name (),
1398 badness.to_double ());
1400 edge->aux = heap->insert (badness, edge);
1405 /* NODE was inlined.
1406 All caller edges needs to be reset because
1407 size estimates change. Similarly callees needs reset
1408 because better context may be known. */
1410 static void
1411 reset_edge_caches (struct cgraph_node *node)
1413 struct cgraph_edge *edge;
1414 struct cgraph_edge *e = node->callees;
1415 struct cgraph_node *where = node;
1416 struct ipa_ref *ref;
1418 if (where->inlined_to)
1419 where = where->inlined_to;
1421 reset_node_cache (where);
1423 if (edge_growth_cache != NULL)
1424 for (edge = where->callers; edge; edge = edge->next_caller)
1425 if (edge->inline_failed)
1426 edge_growth_cache->remove (edge);
1428 FOR_EACH_ALIAS (where, ref)
1429 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1431 if (!e)
1432 return;
1434 while (true)
1435 if (!e->inline_failed && e->callee->callees)
1436 e = e->callee->callees;
1437 else
1439 if (edge_growth_cache != NULL && e->inline_failed)
1440 edge_growth_cache->remove (e);
1441 if (e->next_callee)
1442 e = e->next_callee;
1443 else
1447 if (e->caller == node)
1448 return;
1449 e = e->caller->callers;
1451 while (!e->next_callee);
1452 e = e->next_callee;
1457 /* Recompute HEAP nodes for each of caller of NODE.
1458 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1459 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1460 it is inlinable. Otherwise check all edges. */
1462 static void
1463 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1464 bitmap updated_nodes,
1465 struct cgraph_edge *check_inlinablity_for)
1467 struct cgraph_edge *edge;
1468 struct ipa_ref *ref;
1470 if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1471 || node->inlined_to)
1472 return;
1473 if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1474 return;
1476 FOR_EACH_ALIAS (node, ref)
1478 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1479 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1482 for (edge = node->callers; edge; edge = edge->next_caller)
1483 if (edge->inline_failed)
1485 if (!check_inlinablity_for
1486 || check_inlinablity_for == edge)
1488 if (can_inline_edge_p (edge, false)
1489 && want_inline_small_function_p (edge, false)
1490 && can_inline_edge_by_limits_p (edge, false))
1491 update_edge_key (heap, edge);
1492 else if (edge->aux)
1494 report_inline_failed_reason (edge);
1495 heap->delete_node ((edge_heap_node_t *) edge->aux);
1496 edge->aux = NULL;
1499 else if (edge->aux)
1500 update_edge_key (heap, edge);
1504 /* Recompute HEAP nodes for each uninlined call in NODE
1505 If UPDATE_SINCE is non-NULL check if edges called within that function
1506 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1507 where all edges have new context).
1509 This is used when we know that edge badnesses are going only to increase
1510 (we introduced new call site) and thus all we need is to insert newly
1511 created edges into heap. */
1513 static void
1514 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1515 struct cgraph_node *update_since,
1516 bitmap updated_nodes)
1518 struct cgraph_edge *e = node->callees;
1519 bool check_inlinability = update_since == node;
1521 if (!e)
1522 return;
1523 while (true)
1524 if (!e->inline_failed && e->callee->callees)
1526 if (e->callee == update_since)
1527 check_inlinability = true;
1528 e = e->callee->callees;
1530 else
1532 enum availability avail;
1533 struct cgraph_node *callee;
1534 if (!check_inlinability)
1536 if (e->aux
1537 && !bitmap_bit_p (updated_nodes,
1538 e->callee->ultimate_alias_target
1539 (&avail, e->caller)->get_uid ()))
1540 update_edge_key (heap, e);
1542 /* We do not reset callee growth cache here. Since we added a new call,
1543 growth should have just increased and consequently badness metric
1544 don't need updating. */
1545 else if (e->inline_failed
1546 && (callee = e->callee->ultimate_alias_target (&avail,
1547 e->caller))
1548 && avail >= AVAIL_AVAILABLE
1549 && ipa_fn_summaries->get (callee) != NULL
1550 && ipa_fn_summaries->get (callee)->inlinable
1551 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1553 if (can_inline_edge_p (e, false)
1554 && want_inline_small_function_p (e, false)
1555 && can_inline_edge_by_limits_p (e, false))
1557 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1558 gcc_checking_assert (check_inlinability || e->aux);
1559 update_edge_key (heap, e);
1561 else if (e->aux)
1563 report_inline_failed_reason (e);
1564 heap->delete_node ((edge_heap_node_t *) e->aux);
1565 e->aux = NULL;
1568 /* In case we redirected to unreachable node we only need to remove the
1569 fibheap entry. */
1570 else if (e->aux)
1572 heap->delete_node ((edge_heap_node_t *) e->aux);
1573 e->aux = NULL;
1575 if (e->next_callee)
1576 e = e->next_callee;
1577 else
1581 if (e->caller == node)
1582 return;
1583 if (e->caller == update_since)
1584 check_inlinability = false;
1585 e = e->caller->callers;
1587 while (!e->next_callee);
1588 e = e->next_callee;
1593 /* Enqueue all recursive calls from NODE into priority queue depending on
1594 how likely we want to recursively inline the call. */
1596 static void
1597 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1598 edge_heap_t *heap)
1600 struct cgraph_edge *e;
1601 enum availability avail;
1603 for (e = where->callees; e; e = e->next_callee)
1604 if (e->callee == node
1605 || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1606 && avail > AVAIL_INTERPOSABLE))
1607 heap->insert (-e->sreal_frequency (), e);
1608 for (e = where->callees; e; e = e->next_callee)
1609 if (!e->inline_failed)
1610 lookup_recursive_calls (node, e->callee, heap);
1613 /* Decide on recursive inlining: in the case function has recursive calls,
1614 inline until body size reaches given argument. If any new indirect edges
1615 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1616 is NULL. */
1618 static bool
1619 recursive_inlining (struct cgraph_edge *edge,
1620 vec<cgraph_edge *> *new_edges)
1622 cgraph_node *to = (edge->caller->inlined_to
1623 ? edge->caller->inlined_to : edge->caller);
1624 int limit = opt_for_fn (to->decl,
1625 param_max_inline_insns_recursive_auto);
1626 edge_heap_t heap (sreal::min ());
1627 struct cgraph_node *node;
1628 struct cgraph_edge *e;
1629 struct cgraph_node *master_clone = NULL, *next;
1630 int depth = 0;
1631 int n = 0;
1633 node = edge->caller;
1634 if (node->inlined_to)
1635 node = node->inlined_to;
1637 if (DECL_DECLARED_INLINE_P (node->decl))
1638 limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1640 /* Make sure that function is small enough to be considered for inlining. */
1641 if (estimate_size_after_inlining (node, edge) >= limit)
1642 return false;
1643 lookup_recursive_calls (node, node, &heap);
1644 if (heap.empty ())
1645 return false;
1647 if (dump_file)
1648 fprintf (dump_file,
1649 " Performing recursive inlining on %s\n", node->dump_name ());
1651 /* Do the inlining and update list of recursive call during process. */
1652 while (!heap.empty ())
1654 struct cgraph_edge *curr = heap.extract_min ();
1655 struct cgraph_node *cnode, *dest = curr->callee;
1657 if (!can_inline_edge_p (curr, true)
1658 || !can_inline_edge_by_limits_p (curr, true))
1659 continue;
1661 /* MASTER_CLONE is produced in the case we already started modified
1662 the function. Be sure to redirect edge to the original body before
1663 estimating growths otherwise we will be seeing growths after inlining
1664 the already modified body. */
1665 if (master_clone)
1667 curr->redirect_callee (master_clone);
1668 if (edge_growth_cache != NULL)
1669 edge_growth_cache->remove (curr);
1672 if (estimate_size_after_inlining (node, curr) > limit)
1674 curr->redirect_callee (dest);
1675 if (edge_growth_cache != NULL)
1676 edge_growth_cache->remove (curr);
1677 break;
1680 depth = 1;
1681 for (cnode = curr->caller;
1682 cnode->inlined_to; cnode = cnode->callers->caller)
1683 if (node->decl
1684 == curr->callee->ultimate_alias_target ()->decl)
1685 depth++;
1687 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1689 curr->redirect_callee (dest);
1690 if (edge_growth_cache != NULL)
1691 edge_growth_cache->remove (curr);
1692 continue;
1695 if (dump_file)
1697 fprintf (dump_file,
1698 " Inlining call of depth %i", depth);
1699 if (node->count.nonzero_p () && curr->count.initialized_p ())
1701 fprintf (dump_file, " called approx. %.2f times per call",
1702 (double)curr->count.to_gcov_type ()
1703 / node->count.to_gcov_type ());
1705 fprintf (dump_file, "\n");
1707 if (!master_clone)
1709 /* We need original clone to copy around. */
1710 master_clone = node->create_clone (node->decl, node->count,
1711 false, vNULL, true, NULL, NULL);
1712 for (e = master_clone->callees; e; e = e->next_callee)
1713 if (!e->inline_failed)
1714 clone_inlined_nodes (e, true, false, NULL);
1715 curr->redirect_callee (master_clone);
1716 if (edge_growth_cache != NULL)
1717 edge_growth_cache->remove (curr);
1720 inline_call (curr, false, new_edges, &overall_size, true);
1721 reset_node_cache (node);
1722 lookup_recursive_calls (node, curr->callee, &heap);
1723 n++;
1726 if (!heap.empty () && dump_file)
1727 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1729 if (!master_clone)
1730 return false;
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_NOTE, edge->call_stmt,
1734 "\n Inlined %i times, "
1735 "body grown from size %i to %i, time %f to %f\n", n,
1736 ipa_size_summaries->get (master_clone)->size,
1737 ipa_size_summaries->get (node)->size,
1738 ipa_fn_summaries->get (master_clone)->time.to_double (),
1739 ipa_fn_summaries->get (node)->time.to_double ());
1741 /* Remove master clone we used for inlining. We rely that clones inlined
1742 into master clone gets queued just before master clone so we don't
1743 need recursion. */
1744 for (node = symtab->first_function (); node != master_clone;
1745 node = next)
1747 next = symtab->next_function (node);
1748 if (node->inlined_to == master_clone)
1749 node->remove ();
1751 master_clone->remove ();
1752 return true;
1756 /* Given whole compilation unit estimate of INSNS, compute how large we can
1757 allow the unit to grow. */
1759 static int64_t
1760 compute_max_insns (cgraph_node *node, int insns)
1762 int max_insns = insns;
1763 if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1764 max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1766 return ((int64_t) max_insns
1767 * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1771 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1773 static void
1774 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1776 while (new_edges.length () > 0)
1778 struct cgraph_edge *edge = new_edges.pop ();
1780 gcc_assert (!edge->aux);
1781 gcc_assert (edge->callee);
1782 if (edge->inline_failed
1783 && can_inline_edge_p (edge, true)
1784 && want_inline_small_function_p (edge, true)
1785 && can_inline_edge_by_limits_p (edge, true))
1786 edge->aux = heap->insert (edge_badness (edge, false), edge);
1790 /* Remove EDGE from the fibheap. */
1792 static void
1793 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1795 if (e->aux)
1797 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1798 e->aux = NULL;
1802 /* Return true if speculation of edge E seems useful.
1803 If ANTICIPATE_INLINING is true, be conservative and hope that E
1804 may get inlined. */
1806 bool
1807 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1809 /* If we have already decided to inline the edge, it seems useful. */
1810 if (!e->inline_failed)
1811 return true;
1813 enum availability avail;
1814 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1815 e->caller);
1817 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1819 if (!e->maybe_hot_p ())
1820 return false;
1822 /* See if IP optimizations found something potentially useful about the
1823 function. For now we look only for CONST/PURE flags. Almost everything
1824 else we propagate is useless. */
1825 if (avail >= AVAIL_AVAILABLE)
1827 int ecf_flags = flags_from_decl_or_type (target->decl);
1828 if (ecf_flags & ECF_CONST)
1830 if (!(e->speculative_call_indirect_edge ()->indirect_info
1831 ->ecf_flags & ECF_CONST))
1832 return true;
1834 else if (ecf_flags & ECF_PURE)
1836 if (!(e->speculative_call_indirect_edge ()->indirect_info
1837 ->ecf_flags & ECF_PURE))
1838 return true;
1841 /* If we did not managed to inline the function nor redirect
1842 to an ipa-cp clone (that are seen by having local flag set),
1843 it is probably pointless to inline it unless hardware is missing
1844 indirect call predictor. */
1845 if (!anticipate_inlining && !target->local)
1846 return false;
1847 /* For overwritable targets there is not much to do. */
1848 if (!can_inline_edge_p (e, false)
1849 || !can_inline_edge_by_limits_p (e, false, true))
1850 return false;
1851 /* OK, speculation seems interesting. */
1852 return true;
1855 /* We know that EDGE is not going to be inlined.
1856 See if we can remove speculation. */
1858 static void
1859 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1861 if (edge->speculative && !speculation_useful_p (edge, false))
1863 struct cgraph_node *node = edge->caller;
1864 struct cgraph_node *where = node->inlined_to
1865 ? node->inlined_to : node;
1866 auto_bitmap updated_nodes;
1868 if (edge->count.ipa ().initialized_p ())
1869 spec_rem += edge->count.ipa ();
1870 cgraph_edge::resolve_speculation (edge);
1871 reset_edge_caches (where);
1872 ipa_update_overall_fn_summary (where);
1873 update_caller_keys (edge_heap, where,
1874 updated_nodes, NULL);
1875 update_callee_keys (edge_heap, where, NULL,
1876 updated_nodes);
1880 /* Return true if NODE should be accounted for overall size estimate.
1881 Skip all nodes optimized for size so we can measure the growth of hot
1882 part of program no matter of the padding. */
1884 bool
1885 inline_account_function_p (struct cgraph_node *node)
1887 return (!DECL_EXTERNAL (node->decl)
1888 && !opt_for_fn (node->decl, optimize_size)
1889 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1892 /* Count number of callers of NODE and store it into DATA (that
1893 points to int. Worker for cgraph_for_node_and_aliases. */
1895 static bool
1896 sum_callers (struct cgraph_node *node, void *data)
1898 struct cgraph_edge *e;
1899 int *num_calls = (int *)data;
1901 for (e = node->callers; e; e = e->next_caller)
1902 (*num_calls)++;
1903 return false;
1906 /* We only propagate across edges with non-interposable callee. */
1908 inline bool
1909 ignore_edge_p (struct cgraph_edge *e)
1911 enum availability avail;
1912 e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1913 return (avail <= AVAIL_INTERPOSABLE);
1916 /* We use greedy algorithm for inlining of small functions:
1917 All inline candidates are put into prioritized heap ordered in
1918 increasing badness.
1920 The inlining of small functions is bounded by unit growth parameters. */
1922 static void
1923 inline_small_functions (void)
1925 struct cgraph_node *node;
1926 struct cgraph_edge *edge;
1927 edge_heap_t edge_heap (sreal::min ());
1928 auto_bitmap updated_nodes;
1929 int min_size;
1930 auto_vec<cgraph_edge *> new_indirect_edges;
1931 int initial_size = 0;
1932 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1933 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1934 new_indirect_edges.create (8);
1936 edge_removal_hook_holder
1937 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1939 /* Compute overall unit size and other global parameters used by badness
1940 metrics. */
1942 max_count = profile_count::uninitialized ();
1943 ipa_reduced_postorder (order, true, ignore_edge_p);
1944 free (order);
1946 FOR_EACH_DEFINED_FUNCTION (node)
1947 if (!node->inlined_to)
1949 if (!node->alias && node->analyzed
1950 && (node->has_gimple_body_p () || node->thunk)
1951 && opt_for_fn (node->decl, optimize))
1953 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1954 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1956 /* Do not account external functions, they will be optimized out
1957 if not inlined. Also only count the non-cold portion of program. */
1958 if (inline_account_function_p (node))
1959 initial_size += ipa_size_summaries->get (node)->size;
1960 info->growth = estimate_growth (node);
1962 int num_calls = 0;
1963 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1964 true);
1965 if (num_calls == 1)
1966 info->single_caller = true;
1967 if (dfs && dfs->next_cycle)
1969 struct cgraph_node *n2;
1970 int id = dfs->scc_no + 1;
1971 for (n2 = node; n2;
1972 n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1973 if (opt_for_fn (n2->decl, optimize))
1975 ipa_fn_summary *info2 = ipa_fn_summaries->get
1976 (n2->inlined_to ? n2->inlined_to : n2);
1977 if (info2->scc_no)
1978 break;
1979 info2->scc_no = id;
1984 for (edge = node->callers; edge; edge = edge->next_caller)
1985 max_count = max_count.max (edge->count.ipa ());
1987 ipa_free_postorder_info ();
1988 initialize_growth_caches ();
1990 if (dump_file)
1991 fprintf (dump_file,
1992 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1993 initial_size);
1995 overall_size = initial_size;
1996 min_size = overall_size;
1998 /* Populate the heap with all edges we might inline. */
2000 FOR_EACH_DEFINED_FUNCTION (node)
2002 bool update = false;
2003 struct cgraph_edge *next = NULL;
2004 bool has_speculative = false;
2006 if (!opt_for_fn (node->decl, optimize))
2007 continue;
2009 if (dump_file)
2010 fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2012 for (edge = node->callees; edge; edge = edge->next_callee)
2014 if (edge->inline_failed
2015 && !edge->aux
2016 && can_inline_edge_p (edge, true)
2017 && want_inline_small_function_p (edge, true)
2018 && can_inline_edge_by_limits_p (edge, true)
2019 && edge->inline_failed)
2021 gcc_assert (!edge->aux);
2022 update_edge_key (&edge_heap, edge);
2024 if (edge->speculative)
2025 has_speculative = true;
2027 if (has_speculative)
2028 for (edge = node->callees; edge; edge = next)
2030 next = edge->next_callee;
2031 if (edge->speculative
2032 && !speculation_useful_p (edge, edge->aux != NULL))
2034 cgraph_edge::resolve_speculation (edge);
2035 update = true;
2038 if (update)
2040 struct cgraph_node *where = node->inlined_to
2041 ? node->inlined_to : node;
2042 ipa_update_overall_fn_summary (where);
2043 reset_edge_caches (where);
2044 update_caller_keys (&edge_heap, where,
2045 updated_nodes, NULL);
2046 update_callee_keys (&edge_heap, where, NULL,
2047 updated_nodes);
2048 bitmap_clear (updated_nodes);
2052 gcc_assert (in_lto_p
2053 || !(max_count > 0)
2054 || (profile_info && flag_branch_probabilities));
2056 while (!edge_heap.empty ())
2058 int old_size = overall_size;
2059 struct cgraph_node *where, *callee;
2060 sreal badness = edge_heap.min_key ();
2061 sreal current_badness;
2062 int growth;
2064 edge = edge_heap.extract_min ();
2065 gcc_assert (edge->aux);
2066 edge->aux = NULL;
2067 if (!edge->inline_failed || !edge->callee->analyzed)
2068 continue;
2070 /* Be sure that caches are maintained consistent.
2071 This check is affected by scaling roundoff errors when compiling for
2072 IPA this we skip it in that case. */
2073 if (flag_checking && !edge->callee->count.ipa_p ()
2074 && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2076 sreal cached_badness = edge_badness (edge, false);
2078 int old_size_est = estimate_edge_size (edge);
2079 sreal old_time_est = estimate_edge_time (edge);
2080 int old_hints_est = estimate_edge_hints (edge);
2082 if (edge_growth_cache != NULL)
2083 edge_growth_cache->remove (edge);
2084 reset_node_cache (edge->caller->inlined_to
2085 ? edge->caller->inlined_to
2086 : edge->caller);
2087 gcc_assert (old_size_est == estimate_edge_size (edge));
2088 gcc_assert (old_time_est == estimate_edge_time (edge));
2089 /* FIXME:
2091 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2093 fails with profile feedback because some hints depends on
2094 maybe_hot_edge_p predicate and because callee gets inlined to other
2095 calls, the edge may become cold.
2096 This ought to be fixed by computing relative probabilities
2097 for given invocation but that will be better done once whole
2098 code is converted to sreals. Disable for now and revert to "wrong"
2099 value so enable/disable checking paths agree. */
2100 edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2102 /* When updating the edge costs, we only decrease badness in the keys.
2103 Increases of badness are handled lazily; when we see key with out
2104 of date value on it, we re-insert it now. */
2105 current_badness = edge_badness (edge, false);
2106 gcc_assert (cached_badness == current_badness);
2107 gcc_assert (current_badness >= badness);
2109 else
2110 current_badness = edge_badness (edge, false);
2111 if (current_badness != badness)
2113 if (edge_heap.min () && current_badness > edge_heap.min_key ())
2115 edge->aux = edge_heap.insert (current_badness, edge);
2116 continue;
2118 else
2119 badness = current_badness;
2122 if (!can_inline_edge_p (edge, true)
2123 || !can_inline_edge_by_limits_p (edge, true))
2125 resolve_noninline_speculation (&edge_heap, edge);
2126 continue;
2129 callee = edge->callee->ultimate_alias_target ();
2130 growth = estimate_edge_growth (edge);
2131 if (dump_file)
2133 fprintf (dump_file,
2134 "\nConsidering %s with %i size\n",
2135 callee->dump_name (),
2136 ipa_size_summaries->get (callee)->size);
2137 fprintf (dump_file,
2138 " to be inlined into %s in %s:%i\n"
2139 " Estimated badness is %f, frequency %.2f.\n",
2140 edge->caller->dump_name (),
2141 edge->call_stmt
2142 && (LOCATION_LOCUS (gimple_location ((const gimple *)
2143 edge->call_stmt))
2144 > BUILTINS_LOCATION)
2145 ? gimple_filename ((const gimple *) edge->call_stmt)
2146 : "unknown",
2147 edge->call_stmt
2148 ? gimple_lineno ((const gimple *) edge->call_stmt)
2149 : -1,
2150 badness.to_double (),
2151 edge->sreal_frequency ().to_double ());
2152 if (edge->count.ipa ().initialized_p ())
2154 fprintf (dump_file, " Called ");
2155 edge->count.ipa ().dump (dump_file);
2156 fprintf (dump_file, " times\n");
2158 if (dump_flags & TDF_DETAILS)
2159 edge_badness (edge, true);
2162 where = edge->caller;
2164 if (overall_size + growth > compute_max_insns (where, min_size)
2165 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2167 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2168 report_inline_failed_reason (edge);
2169 resolve_noninline_speculation (&edge_heap, edge);
2170 continue;
2173 if (!want_inline_small_function_p (edge, true))
2175 resolve_noninline_speculation (&edge_heap, edge);
2176 continue;
2179 profile_count old_count = callee->count;
2181 /* Heuristics for inlining small functions work poorly for
2182 recursive calls where we do effects similar to loop unrolling.
2183 When inlining such edge seems profitable, leave decision on
2184 specific inliner. */
2185 if (edge->recursive_p ())
2187 if (where->inlined_to)
2188 where = where->inlined_to;
2189 if (!recursive_inlining (edge,
2190 opt_for_fn (edge->caller->decl,
2191 flag_indirect_inlining)
2192 ? &new_indirect_edges : NULL))
2194 edge->inline_failed = CIF_RECURSIVE_INLINING;
2195 resolve_noninline_speculation (&edge_heap, edge);
2196 continue;
2198 reset_edge_caches (where);
2199 /* Recursive inliner inlines all recursive calls of the function
2200 at once. Consequently we need to update all callee keys. */
2201 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2202 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2203 update_callee_keys (&edge_heap, where, where, updated_nodes);
2204 bitmap_clear (updated_nodes);
2206 else
2208 struct cgraph_node *outer_node = NULL;
2209 int depth = 0;
2211 /* Consider the case where self recursive function A is inlined
2212 into B. This is desired optimization in some cases, since it
2213 leads to effect similar of loop peeling and we might completely
2214 optimize out the recursive call. However we must be extra
2215 selective. */
2217 where = edge->caller;
2218 while (where->inlined_to)
2220 if (where->decl == callee->decl)
2221 outer_node = where, depth++;
2222 where = where->callers->caller;
2224 if (outer_node
2225 && !want_inline_self_recursive_call_p (edge, outer_node,
2226 true, depth))
2228 edge->inline_failed
2229 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2230 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2231 resolve_noninline_speculation (&edge_heap, edge);
2232 continue;
2234 else if (depth && dump_file)
2235 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2237 gcc_checking_assert (!callee->inlined_to);
2239 int old_size = ipa_size_summaries->get (where)->size;
2240 sreal old_time = ipa_fn_summaries->get (where)->time;
2242 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2243 reset_edge_caches (edge->callee);
2244 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2246 /* If caller's size and time increased we do not need to update
2247 all edges because badness is not going to decrease. */
2248 if (old_size <= ipa_size_summaries->get (where)->size
2249 && old_time <= ipa_fn_summaries->get (where)->time
2250 /* Wrapper penalty may be non-monotonous in this respect.
2251 Fortunately it only affects small functions. */
2252 && !wrapper_heuristics_may_apply (where, old_size))
2253 update_callee_keys (&edge_heap, edge->callee, edge->callee,
2254 updated_nodes);
2255 else
2256 update_callee_keys (&edge_heap, where,
2257 edge->callee,
2258 updated_nodes);
2260 where = edge->caller;
2261 if (where->inlined_to)
2262 where = where->inlined_to;
2264 /* Our profitability metric can depend on local properties
2265 such as number of inlinable calls and size of the function body.
2266 After inlining these properties might change for the function we
2267 inlined into (since it's body size changed) and for the functions
2268 called by function we inlined (since number of it inlinable callers
2269 might change). */
2270 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2271 /* Offline copy count has possibly changed, recompute if profile is
2272 available. */
2273 struct cgraph_node *n
2274 = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2275 if (n != edge->callee && n->analyzed && !(n->count == old_count)
2276 && n->count.ipa_p ())
2277 update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2278 bitmap_clear (updated_nodes);
2280 if (dump_enabled_p ())
2282 ipa_fn_summary *s = ipa_fn_summaries->get (where);
2284 /* dump_printf can't handle %+i. */
2285 char buf_net_change[100];
2286 snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2287 overall_size - old_size);
2289 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2290 " Inlined %C into %C which now has time %f and "
2291 "size %i, net change of %s%s.\n",
2292 edge->callee, edge->caller,
2293 s->time.to_double (),
2294 ipa_size_summaries->get (edge->caller)->size,
2295 buf_net_change,
2296 cross_module_call_p (edge) ? " (cross module)":"");
2298 if (min_size > overall_size)
2300 min_size = overall_size;
2302 if (dump_file)
2303 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2307 free_growth_caches ();
2308 if (dump_enabled_p ())
2309 dump_printf (MSG_NOTE,
2310 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2311 initial_size, overall_size,
2312 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2313 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2316 /* Flatten NODE. Performed both during early inlining and
2317 at IPA inlining time. */
2319 static void
2320 flatten_function (struct cgraph_node *node, bool early, bool update)
2322 struct cgraph_edge *e;
2324 /* We shouldn't be called recursively when we are being processed. */
2325 gcc_assert (node->aux == NULL);
2327 node->aux = (void *) node;
2329 for (e = node->callees; e; e = e->next_callee)
2331 struct cgraph_node *orig_callee;
2332 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2334 /* We've hit cycle? It is time to give up. */
2335 if (callee->aux)
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2339 "Not inlining %C into %C to avoid cycle.\n",
2340 callee, e->caller);
2341 if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2342 e->inline_failed = CIF_RECURSIVE_INLINING;
2343 continue;
2346 /* When the edge is already inlined, we just need to recurse into
2347 it in order to fully flatten the leaves. */
2348 if (!e->inline_failed)
2350 flatten_function (callee, early, false);
2351 continue;
2354 /* Flatten attribute needs to be processed during late inlining. For
2355 extra code quality we however do flattening during early optimization,
2356 too. */
2357 if (!early
2358 ? !can_inline_edge_p (e, true)
2359 && !can_inline_edge_by_limits_p (e, true)
2360 : !can_early_inline_edge_p (e))
2361 continue;
2363 if (e->recursive_p ())
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2367 "Not inlining: recursive call.\n");
2368 continue;
2371 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2372 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2376 "Not inlining: SSA form does not match.\n");
2377 continue;
2380 /* Inline the edge and flatten the inline clone. Avoid
2381 recursing through the original node if the node was cloned. */
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2384 " Inlining %C into %C.\n",
2385 callee, e->caller);
2386 orig_callee = callee;
2387 inline_call (e, true, NULL, NULL, false);
2388 if (e->callee != orig_callee)
2389 orig_callee->aux = (void *) node;
2390 flatten_function (e->callee, early, false);
2391 if (e->callee != orig_callee)
2392 orig_callee->aux = NULL;
2395 node->aux = NULL;
2396 cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2397 if (update && opt_for_fn (where->decl, optimize))
2398 ipa_update_overall_fn_summary (where);
2401 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2402 DATA points to number of calls originally found so we avoid infinite
2403 recursion. */
2405 static bool
2406 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2407 hash_set<cgraph_node *> *callers)
2409 int *num_calls = (int *)data;
2410 bool callee_removed = false;
2412 while (node->callers && !node->inlined_to)
2414 struct cgraph_node *caller = node->callers->caller;
2416 if (!can_inline_edge_p (node->callers, true)
2417 || !can_inline_edge_by_limits_p (node->callers, true)
2418 || node->callers->recursive_p ())
2420 if (dump_file)
2421 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2422 *num_calls = 0;
2423 return false;
2426 if (dump_file)
2428 cgraph_node *ultimate = node->ultimate_alias_target ();
2429 fprintf (dump_file,
2430 "\nInlining %s size %i.\n",
2431 ultimate->dump_name (),
2432 ipa_size_summaries->get (ultimate)->size);
2433 fprintf (dump_file,
2434 " Called once from %s %i insns.\n",
2435 node->callers->caller->dump_name (),
2436 ipa_size_summaries->get (node->callers->caller)->size);
2439 /* Remember which callers we inlined to, delaying updating the
2440 overall summary. */
2441 callers->add (node->callers->caller);
2442 inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2443 if (dump_file)
2444 fprintf (dump_file,
2445 " Inlined into %s which now has %i size\n",
2446 caller->dump_name (),
2447 ipa_size_summaries->get (caller)->size);
2448 if (!(*num_calls)--)
2450 if (dump_file)
2451 fprintf (dump_file, "New calls found; giving up.\n");
2452 return callee_removed;
2454 if (callee_removed)
2455 return true;
2457 return false;
2460 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2461 update. */
2463 static bool
2464 inline_to_all_callers (struct cgraph_node *node, void *data)
2466 hash_set<cgraph_node *> callers;
2467 bool res = inline_to_all_callers_1 (node, data, &callers);
2468 /* Perform the delayed update of the overall summary of all callers
2469 processed. This avoids quadratic behavior in the cases where
2470 we have a lot of calls to the same function. */
2471 for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2472 i != callers.end (); ++i)
2473 ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2474 return res;
2477 /* Output overall time estimate. */
2478 static void
2479 dump_overall_stats (void)
2481 sreal sum_weighted = 0, sum = 0;
2482 struct cgraph_node *node;
2484 FOR_EACH_DEFINED_FUNCTION (node)
2485 if (!node->inlined_to
2486 && !node->alias)
2488 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2489 if (s != NULL)
2491 sum += s->time;
2492 if (node->count.ipa ().initialized_p ())
2493 sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2496 fprintf (dump_file, "Overall time estimate: "
2497 "%f weighted by profile: "
2498 "%f\n", sum.to_double (), sum_weighted.to_double ());
2501 /* Output some useful stats about inlining. */
2503 static void
2504 dump_inline_stats (void)
2506 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2507 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2508 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2509 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2510 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2511 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2512 int64_t reason[CIF_N_REASONS][2];
2513 sreal reason_freq[CIF_N_REASONS];
2514 int i;
2515 struct cgraph_node *node;
2517 memset (reason, 0, sizeof (reason));
2518 for (i=0; i < CIF_N_REASONS; i++)
2519 reason_freq[i] = 0;
2520 FOR_EACH_DEFINED_FUNCTION (node)
2522 struct cgraph_edge *e;
2523 for (e = node->callees; e; e = e->next_callee)
2525 if (e->inline_failed)
2527 if (e->count.ipa ().initialized_p ())
2528 reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2529 reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2530 reason[(int) e->inline_failed][1] ++;
2531 if (DECL_VIRTUAL_P (e->callee->decl)
2532 && e->count.ipa ().initialized_p ())
2534 if (e->indirect_inlining_edge)
2535 noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2536 else
2537 noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2539 else if (e->count.ipa ().initialized_p ())
2541 if (e->indirect_inlining_edge)
2542 noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2543 else
2544 noninlined_cnt += e->count.ipa ().to_gcov_type ();
2547 else if (e->count.ipa ().initialized_p ())
2549 if (e->speculative)
2551 if (DECL_VIRTUAL_P (e->callee->decl))
2552 inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2553 else
2554 inlined_speculative += e->count.ipa ().to_gcov_type ();
2556 else if (DECL_VIRTUAL_P (e->callee->decl))
2558 if (e->indirect_inlining_edge)
2559 inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2560 else
2561 inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2563 else
2565 if (e->indirect_inlining_edge)
2566 inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2567 else
2568 inlined_cnt += e->count.ipa ().to_gcov_type ();
2572 for (e = node->indirect_calls; e; e = e->next_callee)
2573 if (e->indirect_info->polymorphic
2574 & e->count.ipa ().initialized_p ())
2575 indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2576 else if (e->count.ipa ().initialized_p ())
2577 indirect_cnt += e->count.ipa ().to_gcov_type ();
2579 if (max_count.initialized_p ())
2581 fprintf (dump_file,
2582 "Inlined %" PRId64 " + speculative "
2583 "%" PRId64 " + speculative polymorphic "
2584 "%" PRId64 " + previously indirect "
2585 "%" PRId64 " + virtual "
2586 "%" PRId64 " + virtual and previously indirect "
2587 "%" PRId64 "\n" "Not inlined "
2588 "%" PRId64 " + previously indirect "
2589 "%" PRId64 " + virtual "
2590 "%" PRId64 " + virtual and previously indirect "
2591 "%" PRId64 " + still indirect "
2592 "%" PRId64 " + still indirect polymorphic "
2593 "%" PRId64 "\n", inlined_cnt,
2594 inlined_speculative, inlined_speculative_ply,
2595 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2596 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2597 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2598 fprintf (dump_file, "Removed speculations ");
2599 spec_rem.dump (dump_file);
2600 fprintf (dump_file, "\n");
2602 dump_overall_stats ();
2603 fprintf (dump_file, "\nWhy inlining failed?\n");
2604 for (i = 0; i < CIF_N_REASONS; i++)
2605 if (reason[i][1])
2606 fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2607 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2608 (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2611 /* Called when node is removed. */
2613 static void
2614 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2616 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2617 return;
2619 hash_set<struct cgraph_node *> *removed
2620 = (hash_set<struct cgraph_node *> *) data;
2621 removed->add (node);
2624 /* Decide on the inlining. We do so in the topological order to avoid
2625 expenses on updating data structures. */
2627 static unsigned int
2628 ipa_inline (void)
2630 struct cgraph_node *node;
2631 int nnodes;
2632 struct cgraph_node **order;
2633 int i, j;
2634 int cold;
2635 bool remove_functions = false;
2637 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2639 if (dump_file)
2640 ipa_dump_fn_summaries (dump_file);
2642 nnodes = ipa_reverse_postorder (order);
2643 spec_rem = profile_count::zero ();
2645 FOR_EACH_FUNCTION (node)
2647 node->aux = 0;
2649 /* Recompute the default reasons for inlining because they may have
2650 changed during merging. */
2651 if (in_lto_p)
2653 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2655 gcc_assert (e->inline_failed);
2656 initialize_inline_failed (e);
2658 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2659 initialize_inline_failed (e);
2663 if (dump_file)
2664 fprintf (dump_file, "\nFlattening functions:\n");
2666 /* First shrink order array, so that it only contains nodes with
2667 flatten attribute. */
2668 for (i = nnodes - 1, j = i; i >= 0; i--)
2670 node = order[i];
2671 if (node->definition
2672 /* Do not try to flatten aliases. These may happen for example when
2673 creating local aliases. */
2674 && !node->alias
2675 && lookup_attribute ("flatten",
2676 DECL_ATTRIBUTES (node->decl)) != NULL)
2677 order[j--] = order[i];
2680 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2681 nodes with flatten attribute. If there is more than one such
2682 node, we need to register a node removal hook, as flatten_function
2683 could remove other nodes with flatten attribute. See PR82801. */
2684 struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2685 hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2686 if (j < nnodes - 2)
2688 flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2689 node_removal_hook_holder
2690 = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2691 flatten_removed_nodes);
2694 /* In the first pass handle functions to be flattened. Do this with
2695 a priority so none of our later choices will make this impossible. */
2696 for (i = nnodes - 1; i > j; i--)
2698 node = order[i];
2699 if (flatten_removed_nodes
2700 && flatten_removed_nodes->contains (node))
2701 continue;
2703 /* Handle nodes to be flattened.
2704 Ideally when processing callees we stop inlining at the
2705 entry of cycles, possibly cloning that entry point and
2706 try to flatten itself turning it into a self-recursive
2707 function. */
2708 if (dump_file)
2709 fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2710 flatten_function (node, false, true);
2713 if (j < nnodes - 2)
2715 symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2716 delete flatten_removed_nodes;
2718 free (order);
2720 if (dump_file)
2721 dump_overall_stats ();
2723 inline_small_functions ();
2725 gcc_assert (symtab->state == IPA_SSA);
2726 symtab->state = IPA_SSA_AFTER_INLINING;
2727 /* Do first after-inlining removal. We want to remove all "stale" extern
2728 inline functions and virtual functions so we really know what is called
2729 once. */
2730 symtab->remove_unreachable_nodes (dump_file);
2732 /* Inline functions with a property that after inlining into all callers the
2733 code size will shrink because the out-of-line copy is eliminated.
2734 We do this regardless on the callee size as long as function growth limits
2735 are met. */
2736 if (dump_file)
2737 fprintf (dump_file,
2738 "\nDeciding on functions to be inlined into all callers and "
2739 "removing useless speculations:\n");
2741 /* Inlining one function called once has good chance of preventing
2742 inlining other function into the same callee. Ideally we should
2743 work in priority order, but probably inlining hot functions first
2744 is good cut without the extra pain of maintaining the queue.
2746 ??? this is not really fitting the bill perfectly: inlining function
2747 into callee often leads to better optimization of callee due to
2748 increased context for optimization.
2749 For example if main() function calls a function that outputs help
2750 and then function that does the main optimization, we should inline
2751 the second with priority even if both calls are cold by themselves.
2753 We probably want to implement new predicate replacing our use of
2754 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2755 to be hot. */
2756 for (cold = 0; cold <= 1; cold ++)
2758 FOR_EACH_DEFINED_FUNCTION (node)
2760 struct cgraph_edge *edge, *next;
2761 bool update=false;
2763 if (!opt_for_fn (node->decl, optimize)
2764 || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2765 continue;
2767 for (edge = node->callees; edge; edge = next)
2769 next = edge->next_callee;
2770 if (edge->speculative && !speculation_useful_p (edge, false))
2772 if (edge->count.ipa ().initialized_p ())
2773 spec_rem += edge->count.ipa ();
2774 cgraph_edge::resolve_speculation (edge);
2775 update = true;
2776 remove_functions = true;
2779 if (update)
2781 struct cgraph_node *where = node->inlined_to
2782 ? node->inlined_to : node;
2783 reset_edge_caches (where);
2784 ipa_update_overall_fn_summary (where);
2786 if (want_inline_function_to_all_callers_p (node, cold))
2788 int num_calls = 0;
2789 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2790 true);
2791 while (node->call_for_symbol_and_aliases
2792 (inline_to_all_callers, &num_calls, true))
2794 remove_functions = true;
2799 if (dump_enabled_p ())
2800 dump_printf (MSG_NOTE,
2801 "\nInlined %i calls, eliminated %i functions\n\n",
2802 ncalls_inlined, nfunctions_inlined);
2803 if (dump_file)
2804 dump_inline_stats ();
2806 if (dump_file)
2807 ipa_dump_fn_summaries (dump_file);
2808 return remove_functions ? TODO_remove_functions : 0;
2811 /* Inline always-inline function calls in NODE. */
2813 static bool
2814 inline_always_inline_functions (struct cgraph_node *node)
2816 struct cgraph_edge *e;
2817 bool inlined = false;
2819 for (e = node->callees; e; e = e->next_callee)
2821 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2822 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2823 continue;
2825 if (e->recursive_p ())
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2829 " Not inlining recursive call to %C.\n",
2830 e->callee);
2831 e->inline_failed = CIF_RECURSIVE_INLINING;
2832 continue;
2835 if (!can_early_inline_edge_p (e))
2837 /* Set inlined to true if the callee is marked "always_inline" but
2838 is not inlinable. This will allow flagging an error later in
2839 expand_call_inline in tree-inline.c. */
2840 if (lookup_attribute ("always_inline",
2841 DECL_ATTRIBUTES (callee->decl)) != NULL)
2842 inlined = true;
2843 continue;
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2848 " Inlining %C into %C (always_inline).\n",
2849 e->callee, e->caller);
2850 inline_call (e, true, NULL, NULL, false);
2851 inlined = true;
2853 if (inlined)
2854 ipa_update_overall_fn_summary (node);
2856 return inlined;
2859 /* Decide on the inlining. We do so in the topological order to avoid
2860 expenses on updating data structures. */
2862 static bool
2863 early_inline_small_functions (struct cgraph_node *node)
2865 struct cgraph_edge *e;
2866 bool inlined = false;
2868 for (e = node->callees; e; e = e->next_callee)
2870 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2872 /* We can encounter not-yet-analyzed function during
2873 early inlining on callgraphs with strongly
2874 connected components. */
2875 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2876 if (s == NULL || !s->inlinable || !e->inline_failed)
2877 continue;
2879 /* Do not consider functions not declared inline. */
2880 if (!DECL_DECLARED_INLINE_P (callee->decl)
2881 && !opt_for_fn (node->decl, flag_inline_small_functions)
2882 && !opt_for_fn (node->decl, flag_inline_functions))
2883 continue;
2885 if (dump_enabled_p ())
2886 dump_printf_loc (MSG_NOTE, e->call_stmt,
2887 "Considering inline candidate %C.\n",
2888 callee);
2890 if (!can_early_inline_edge_p (e))
2891 continue;
2893 if (e->recursive_p ())
2895 if (dump_enabled_p ())
2896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2897 " Not inlining: recursive call.\n");
2898 continue;
2901 if (!want_early_inline_function_p (e))
2902 continue;
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2906 " Inlining %C into %C.\n",
2907 callee, e->caller);
2908 inline_call (e, true, NULL, NULL, false);
2909 inlined = true;
2912 if (inlined)
2913 ipa_update_overall_fn_summary (node);
2915 return inlined;
2918 unsigned int
2919 early_inliner (function *fun)
2921 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2922 struct cgraph_edge *edge;
2923 unsigned int todo = 0;
2924 int iterations = 0;
2925 bool inlined = false;
2927 if (seen_error ())
2928 return 0;
2930 /* Do nothing if datastructures for ipa-inliner are already computed. This
2931 happens when some pass decides to construct new function and
2932 cgraph_add_new_function calls lowering passes and early optimization on
2933 it. This may confuse ourself when early inliner decide to inline call to
2934 function clone, because function clones don't have parameter list in
2935 ipa-prop matching their signature. */
2936 if (ipa_node_params_sum)
2937 return 0;
2939 if (flag_checking)
2940 node->verify ();
2941 node->remove_all_references ();
2943 /* Even when not optimizing or not inlining inline always-inline
2944 functions. */
2945 inlined = inline_always_inline_functions (node);
2947 if (!optimize
2948 || flag_no_inline
2949 || !flag_early_inlining
2950 /* Never inline regular functions into always-inline functions
2951 during incremental inlining. This sucks as functions calling
2952 always inline functions will get less optimized, but at the
2953 same time inlining of functions calling always inline
2954 function into an always inline function might introduce
2955 cycles of edges to be always inlined in the callgraph.
2957 We might want to be smarter and just avoid this type of inlining. */
2958 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2959 && lookup_attribute ("always_inline",
2960 DECL_ATTRIBUTES (node->decl))))
2962 else if (lookup_attribute ("flatten",
2963 DECL_ATTRIBUTES (node->decl)) != NULL)
2965 /* When the function is marked to be flattened, recursively inline
2966 all calls in it. */
2967 if (dump_enabled_p ())
2968 dump_printf (MSG_OPTIMIZED_LOCATIONS,
2969 "Flattening %C\n", node);
2970 flatten_function (node, true, true);
2971 inlined = true;
2973 else
2975 /* If some always_inline functions was inlined, apply the changes.
2976 This way we will not account always inline into growth limits and
2977 moreover we will inline calls from always inlines that we skipped
2978 previously because of conditional above. */
2979 if (inlined)
2981 timevar_push (TV_INTEGRATION);
2982 todo |= optimize_inline_calls (current_function_decl);
2983 /* optimize_inline_calls call above might have introduced new
2984 statements that don't have inline parameters computed. */
2985 for (edge = node->callees; edge; edge = edge->next_callee)
2987 /* We can enounter not-yet-analyzed function during
2988 early inlining on callgraphs with strongly
2989 connected components. */
2990 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2991 es->call_stmt_size
2992 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2993 es->call_stmt_time
2994 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2996 ipa_update_overall_fn_summary (node);
2997 inlined = false;
2998 timevar_pop (TV_INTEGRATION);
3000 /* We iterate incremental inlining to get trivial cases of indirect
3001 inlining. */
3002 while (iterations < opt_for_fn (node->decl,
3003 param_early_inliner_max_iterations)
3004 && early_inline_small_functions (node))
3006 timevar_push (TV_INTEGRATION);
3007 todo |= optimize_inline_calls (current_function_decl);
3009 /* Technically we ought to recompute inline parameters so the new
3010 iteration of early inliner works as expected. We however have
3011 values approximately right and thus we only need to update edge
3012 info that might be cleared out for newly discovered edges. */
3013 for (edge = node->callees; edge; edge = edge->next_callee)
3015 /* We have no summary for new bound store calls yet. */
3016 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3017 es->call_stmt_size
3018 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3019 es->call_stmt_time
3020 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3022 if (iterations < opt_for_fn (node->decl,
3023 param_early_inliner_max_iterations) - 1)
3024 ipa_update_overall_fn_summary (node);
3025 timevar_pop (TV_INTEGRATION);
3026 iterations++;
3027 inlined = false;
3029 if (dump_file)
3030 fprintf (dump_file, "Iterations: %i\n", iterations);
3033 if (inlined)
3035 timevar_push (TV_INTEGRATION);
3036 todo |= optimize_inline_calls (current_function_decl);
3037 timevar_pop (TV_INTEGRATION);
3040 fun->always_inline_functions_inlined = true;
3042 return todo;
3045 /* Do inlining of small functions. Doing so early helps profiling and other
3046 passes to be somewhat more effective and avoids some code duplication in
3047 later real inlining pass for testcases with very many function calls. */
3049 namespace {
3051 const pass_data pass_data_early_inline =
3053 GIMPLE_PASS, /* type */
3054 "einline", /* name */
3055 OPTGROUP_INLINE, /* optinfo_flags */
3056 TV_EARLY_INLINING, /* tv_id */
3057 PROP_ssa, /* properties_required */
3058 0, /* properties_provided */
3059 0, /* properties_destroyed */
3060 0, /* todo_flags_start */
3061 0, /* todo_flags_finish */
3064 class pass_early_inline : public gimple_opt_pass
3066 public:
3067 pass_early_inline (gcc::context *ctxt)
3068 : gimple_opt_pass (pass_data_early_inline, ctxt)
3071 /* opt_pass methods: */
3072 virtual unsigned int execute (function *);
3074 }; // class pass_early_inline
3076 unsigned int
3077 pass_early_inline::execute (function *fun)
3079 return early_inliner (fun);
3082 } // anon namespace
3084 gimple_opt_pass *
3085 make_pass_early_inline (gcc::context *ctxt)
3087 return new pass_early_inline (ctxt);
3090 namespace {
3092 const pass_data pass_data_ipa_inline =
3094 IPA_PASS, /* type */
3095 "inline", /* name */
3096 OPTGROUP_INLINE, /* optinfo_flags */
3097 TV_IPA_INLINING, /* tv_id */
3098 0, /* properties_required */
3099 0, /* properties_provided */
3100 0, /* properties_destroyed */
3101 0, /* todo_flags_start */
3102 ( TODO_dump_symtab ), /* todo_flags_finish */
3105 class pass_ipa_inline : public ipa_opt_pass_d
3107 public:
3108 pass_ipa_inline (gcc::context *ctxt)
3109 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3110 NULL, /* generate_summary */
3111 NULL, /* write_summary */
3112 NULL, /* read_summary */
3113 NULL, /* write_optimization_summary */
3114 NULL, /* read_optimization_summary */
3115 NULL, /* stmt_fixup */
3116 0, /* function_transform_todo_flags_start */
3117 inline_transform, /* function_transform */
3118 NULL) /* variable_transform */
3121 /* opt_pass methods: */
3122 virtual unsigned int execute (function *) { return ipa_inline (); }
3124 }; // class pass_ipa_inline
3126 } // anon namespace
3128 ipa_opt_pass_d *
3129 make_pass_ipa_inline (gcc::context *ctxt)
3131 return new pass_ipa_inline (ctxt);