Don't warn when alignment of global common data exceeds maximum alignment.
[official-gcc.git] / gcc / ipa-inline.c
blob413446bcc46cd22b42ae90b7024135b8210a38e3
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
36 inlining heuristics
38 The inliner itself is split into two passes:
40 pass_early_inlining
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
62 Because of lack of whole unit knowledge, the pass cannot really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
68 pass_ipa_inline
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "profile.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
115 #include "sreal.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
120 #include "attribs.h"
121 #include "asan.h"
123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
126 /* Statistics we collect about inlining algorithm. */
127 static int overall_size;
128 static profile_count max_count;
129 static profile_count spec_rem;
131 /* Return false when inlining edge E would lead to violating
132 limits on function unit growth or stack usage growth.
134 The relative function body growth limit is present generally
135 to avoid problems with non-linear behavior of the compiler.
136 To allow inlining huge functions into tiny wrapper, the limit
137 is always based on the bigger of the two functions considered.
139 For stack growth limits we always base the growth in stack usage
140 of the callers. We want to prevent applications from segfaulting
141 on stack overflow when functions with huge stack frames gets
142 inlined. */
144 static bool
145 caller_growth_limits (struct cgraph_edge *e)
147 struct cgraph_node *to = e->caller;
148 struct cgraph_node *what = e->callee->ultimate_alias_target ();
149 int newsize;
150 int limit = 0;
151 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
152 ipa_size_summary *outer_info = ipa_size_summaries->get (to);
154 /* Look for function e->caller is inlined to. While doing
155 so work out the largest function body on the way. As
156 described above, we want to base our function growth
157 limits based on that. Not on the self size of the
158 outer function, not on the self size of inline code
159 we immediately inline to. This is the most relaxed
160 interpretation of the rule "do not grow large functions
161 too much in order to prevent compiler from exploding". */
162 while (true)
164 ipa_size_summary *size_info = ipa_size_summaries->get (to);
165 if (limit < size_info->self_size)
166 limit = size_info->self_size;
167 if (stack_size_limit < size_info->estimated_self_stack_size)
168 stack_size_limit = size_info->estimated_self_stack_size;
169 if (to->inlined_to)
170 to = to->callers->caller;
171 else
172 break;
175 ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
176 ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
178 if (limit < what_size_info->self_size)
179 limit = what_size_info->self_size;
181 limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
183 /* Check the size after inlining against the function limits. But allow
184 the function to shrink if it went over the limits by forced inlining. */
185 newsize = estimate_size_after_inlining (to, e);
186 if (newsize >= ipa_size_summaries->get (what)->size
187 && newsize > opt_for_fn (to->decl, param_large_function_insns)
188 && newsize > limit)
190 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
191 return false;
194 if (!what_info->estimated_stack_size)
195 return true;
197 /* FIXME: Stack size limit often prevents inlining in Fortran programs
198 due to large i/o datastructures used by the Fortran front-end.
199 We ought to ignore this limit when we know that the edge is executed
200 on every invocation of the caller (i.e. its call statement dominates
201 exit block). We do not track this information, yet. */
202 stack_size_limit += ((gcov_type)stack_size_limit
203 * opt_for_fn (to->decl, param_stack_frame_growth)
204 / 100);
206 inlined_stack = (ipa_get_stack_frame_offset (to)
207 + outer_info->estimated_self_stack_size
208 + what_info->estimated_stack_size);
209 /* Check new stack consumption with stack consumption at the place
210 stack is used. */
211 if (inlined_stack > stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
215 packing. */
216 && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
217 && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
219 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220 return false;
222 return true;
225 /* Dump info about why inlining has failed. */
227 static void
228 report_inline_failed_reason (struct cgraph_edge *e)
230 if (dump_enabled_p ())
232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 " not inlinable: %C -> %C, %s\n",
234 e->caller, e->callee,
235 cgraph_inline_failed_string (e->inline_failed));
236 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 && e->caller->lto_file_data
239 && e->callee->ultimate_alias_target ()->lto_file_data)
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 " LTO objects: %s, %s\n",
243 e->caller->lto_file_data->file_name,
244 e->callee->ultimate_alias_target ()->lto_file_data->file_name);
246 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 if (dump_file)
248 cl_target_option_print_diff
249 (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 if (dump_file)
253 cl_optimization_print_diff
254 (dump_file, 2, opts_for_fn (e->caller->decl),
255 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
259 /* Decide whether sanitizer-related attributes allow inlining. */
261 static bool
262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
264 if (!caller || !callee)
265 return true;
267 /* Follow clang and allow inlining for always_inline functions. */
268 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
269 return true;
271 const sanitize_code codes[] =
273 SANITIZE_ADDRESS,
274 SANITIZE_THREAD,
275 SANITIZE_UNDEFINED,
276 SANITIZE_UNDEFINED_NONDEFAULT,
277 SANITIZE_POINTER_COMPARE,
278 SANITIZE_POINTER_SUBTRACT
281 for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
282 if (sanitize_flags_p (codes[i], caller)
283 != sanitize_flags_p (codes[i], callee))
284 return false;
286 if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
287 return false;
289 return true;
292 /* Used for flags where it is safe to inline when caller's value is
293 grater than callee's. */
294 #define check_maybe_up(flag) \
295 (opts_for_fn (caller->decl)->x_##flag \
296 != opts_for_fn (callee->decl)->x_##flag \
297 && (!always_inline \
298 || opts_for_fn (caller->decl)->x_##flag \
299 < opts_for_fn (callee->decl)->x_##flag))
300 /* Used for flags where it is safe to inline when caller's value is
301 smaller than callee's. */
302 #define check_maybe_down(flag) \
303 (opts_for_fn (caller->decl)->x_##flag \
304 != opts_for_fn (callee->decl)->x_##flag \
305 && (!always_inline \
306 || opts_for_fn (caller->decl)->x_##flag \
307 > opts_for_fn (callee->decl)->x_##flag))
308 /* Used for flags where exact match is needed for correctness. */
309 #define check_match(flag) \
310 (opts_for_fn (caller->decl)->x_##flag \
311 != opts_for_fn (callee->decl)->x_##flag)
313 /* Decide if we can inline the edge and possibly update
314 inline_failed reason.
315 We check whether inlining is possible at all and whether
316 caller growth limits allow doing so.
318 if REPORT is true, output reason to the dump file. */
320 static bool
321 can_inline_edge_p (struct cgraph_edge *e, bool report,
322 bool early = false)
324 gcc_checking_assert (e->inline_failed);
326 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
328 if (report)
329 report_inline_failed_reason (e);
330 return false;
333 bool inlinable = true;
334 enum availability avail;
335 cgraph_node *caller = (e->caller->inlined_to
336 ? e->caller->inlined_to : e->caller);
337 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
339 if (!callee->definition)
341 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
342 inlinable = false;
344 if (!early && (!opt_for_fn (callee->decl, optimize)
345 || !opt_for_fn (caller->decl, optimize)))
347 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
348 inlinable = false;
350 else if (callee->calls_comdat_local)
352 e->inline_failed = CIF_USES_COMDAT_LOCAL;
353 inlinable = false;
355 else if (avail <= AVAIL_INTERPOSABLE)
357 e->inline_failed = CIF_OVERWRITABLE;
358 inlinable = false;
360 /* All edges with call_stmt_cannot_inline_p should have inline_failed
361 initialized to one of FINAL_ERROR reasons. */
362 else if (e->call_stmt_cannot_inline_p)
363 gcc_unreachable ();
364 /* Don't inline if the functions have different EH personalities. */
365 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
366 && DECL_FUNCTION_PERSONALITY (callee->decl)
367 && (DECL_FUNCTION_PERSONALITY (caller->decl)
368 != DECL_FUNCTION_PERSONALITY (callee->decl)))
370 e->inline_failed = CIF_EH_PERSONALITY;
371 inlinable = false;
373 /* TM pure functions should not be inlined into non-TM_pure
374 functions. */
375 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
377 e->inline_failed = CIF_UNSPECIFIED;
378 inlinable = false;
380 /* Check compatibility of target optimization options. */
381 else if (!targetm.target_option.can_inline_p (caller->decl,
382 callee->decl))
384 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
385 inlinable = false;
387 else if (ipa_fn_summaries->get (callee) == NULL
388 || !ipa_fn_summaries->get (callee)->inlinable)
390 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
391 inlinable = false;
393 /* Don't inline a function with mismatched sanitization attributes. */
394 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
396 e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
397 inlinable = false;
399 if (!inlinable && report)
400 report_inline_failed_reason (e);
401 return inlinable;
404 /* Return inlining_insns_single limit for function N. If HINT or HINT2 is true
405 scale up the bound. */
407 static int
408 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
410 if (hint && hint2)
412 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
413 spd = spd * spd;
414 if (spd > 1000000)
415 spd = 1000000;
416 return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
418 if (hint || hint2)
419 return opt_for_fn (n->decl, param_max_inline_insns_single)
420 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
421 return opt_for_fn (n->decl, param_max_inline_insns_single);
424 /* Return inlining_insns_auto limit for function N. If HINT or HINT2 is true
425 scale up the bound. */
427 static int
428 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
430 int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
431 if (hint && hint2)
433 int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
434 spd = spd * spd;
435 if (spd > 1000000)
436 spd = 1000000;
437 return max_inline_insns_auto * spd / 100;
439 if (hint || hint2)
440 return max_inline_insns_auto
441 * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
442 return max_inline_insns_auto;
445 /* Decide if we can inline the edge and possibly update
446 inline_failed reason.
447 We check whether inlining is possible at all and whether
448 caller growth limits allow doing so.
450 if REPORT is true, output reason to the dump file.
452 if DISREGARD_LIMITS is true, ignore size limits. */
454 static bool
455 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
456 bool disregard_limits = false, bool early = false)
458 gcc_checking_assert (e->inline_failed);
460 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
462 if (report)
463 report_inline_failed_reason (e);
464 return false;
467 bool inlinable = true;
468 enum availability avail;
469 cgraph_node *caller = (e->caller->inlined_to
470 ? e->caller->inlined_to : e->caller);
471 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
472 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
473 tree callee_tree
474 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
475 /* Check if caller growth allows the inlining. */
476 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
477 && !disregard_limits
478 && !lookup_attribute ("flatten",
479 DECL_ATTRIBUTES (caller->decl))
480 && !caller_growth_limits (e))
481 inlinable = false;
482 else if (callee->externally_visible
483 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
484 && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
486 e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
487 inlinable = false;
489 /* Don't inline a function with a higher optimization level than the
490 caller. FIXME: this is really just tip of iceberg of handling
491 optimization attribute. */
492 else if (caller_tree != callee_tree)
494 bool always_inline =
495 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
496 && lookup_attribute ("always_inline",
497 DECL_ATTRIBUTES (callee->decl)));
498 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
499 ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
501 /* Until GCC 4.9 we did not check the semantics-altering flags
502 below and inlined across optimization boundaries.
503 Enabling checks below breaks several packages by refusing
504 to inline library always_inline functions. See PR65873.
505 Disable the check for early inlining for now until better solution
506 is found. */
507 if (always_inline && early)
509 /* There are some options that change IL semantics which means
510 we cannot inline in these cases for correctness reason.
511 Not even for always_inline declared functions. */
512 else if (check_match (flag_wrapv)
513 || check_match (flag_trapv)
514 || check_match (flag_pcc_struct_return)
515 || check_maybe_down (optimize_debug)
516 /* When caller or callee does FP math, be sure FP codegen flags
517 compatible. */
518 || ((caller_info->fp_expressions && callee_info->fp_expressions)
519 && (check_maybe_up (flag_rounding_math)
520 || check_maybe_up (flag_trapping_math)
521 || check_maybe_down (flag_unsafe_math_optimizations)
522 || check_maybe_down (flag_finite_math_only)
523 || check_maybe_up (flag_signaling_nans)
524 || check_maybe_down (flag_cx_limited_range)
525 || check_maybe_up (flag_signed_zeros)
526 || check_maybe_down (flag_associative_math)
527 || check_maybe_down (flag_reciprocal_math)
528 || check_maybe_down (flag_fp_int_builtin_inexact)
529 /* Strictly speaking only when the callee contains function
530 calls that may end up setting errno. */
531 || check_maybe_up (flag_errno_math)))
532 /* We do not want to make code compiled with exceptions to be
533 brought into a non-EH function unless we know that the callee
534 does not throw.
535 This is tracked by DECL_FUNCTION_PERSONALITY. */
536 || (check_maybe_up (flag_non_call_exceptions)
537 && DECL_FUNCTION_PERSONALITY (callee->decl))
538 || (check_maybe_up (flag_exceptions)
539 && DECL_FUNCTION_PERSONALITY (callee->decl))
540 /* When devirtualization is disabled for callee, it is not safe
541 to inline it as we possibly mangled the type info.
542 Allow early inlining of always inlines. */
543 || (!early && check_maybe_down (flag_devirtualize)))
545 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
546 inlinable = false;
548 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
549 else if (always_inline)
551 /* When user added an attribute to the callee honor it. */
552 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
553 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
555 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
556 inlinable = false;
558 /* If explicit optimize attribute are not used, the mismatch is caused
559 by different command line options used to build different units.
560 Do not care about COMDAT functions - those are intended to be
561 optimized with the optimization flags of module they are used in.
562 Also do not care about mixing up size/speed optimization when
563 DECL_DISREGARD_INLINE_LIMITS is set. */
564 else if ((callee->merged_comdat
565 && !lookup_attribute ("optimize",
566 DECL_ATTRIBUTES (caller->decl)))
567 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
569 /* If mismatch is caused by merging two LTO units with different
570 optimization flags we want to be bit nicer. However never inline
571 if one of functions is not optimized at all. */
572 else if (!opt_for_fn (callee->decl, optimize)
573 || !opt_for_fn (caller->decl, optimize))
575 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
576 inlinable = false;
578 /* If callee is optimized for size and caller is not, allow inlining if
579 code shrinks or we are in param_max_inline_insns_single limit and
580 callee is inline (and thus likely an unified comdat).
581 This will allow caller to run faster. */
582 else if (opt_for_fn (callee->decl, optimize_size)
583 > opt_for_fn (caller->decl, optimize_size))
585 int growth = estimate_edge_growth (e);
586 if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
587 && (!DECL_DECLARED_INLINE_P (callee->decl)
588 && growth >= MAX (inline_insns_single (caller, false, false),
589 inline_insns_auto (caller, false, false))))
591 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
592 inlinable = false;
595 /* If callee is more aggressively optimized for performance than caller,
596 we generally want to inline only cheap (runtime wise) functions. */
597 else if (opt_for_fn (callee->decl, optimize_size)
598 < opt_for_fn (caller->decl, optimize_size)
599 || (opt_for_fn (callee->decl, optimize)
600 > opt_for_fn (caller->decl, optimize)))
602 if (estimate_edge_time (e)
603 >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
605 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
606 inlinable = false;
612 if (!inlinable && report)
613 report_inline_failed_reason (e);
614 return inlinable;
618 /* Return true if the edge E is inlinable during early inlining. */
620 static bool
621 can_early_inline_edge_p (struct cgraph_edge *e)
623 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
624 /* Early inliner might get called at WPA stage when IPA pass adds new
625 function. In this case we cannot really do any of early inlining
626 because function bodies are missing. */
627 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
628 return false;
629 if (!gimple_has_body_p (callee->decl))
631 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
632 return false;
634 /* In early inliner some of callees may not be in SSA form yet
635 (i.e. the callgraph is cyclic and we did not process
636 the callee by early inliner, yet). We don't have CIF code for this
637 case; later we will re-do the decision in the real inliner. */
638 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
639 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
641 if (dump_enabled_p ())
642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
643 " edge not inlinable: not in SSA form\n");
644 return false;
646 if (!can_inline_edge_p (e, true, true)
647 || !can_inline_edge_by_limits_p (e, true, false, true))
648 return false;
649 return true;
653 /* Return number of calls in N. Ignore cheap builtins. */
655 static int
656 num_calls (struct cgraph_node *n)
658 struct cgraph_edge *e;
659 int num = 0;
661 for (e = n->callees; e; e = e->next_callee)
662 if (!is_inexpensive_builtin (e->callee->decl))
663 num++;
664 return num;
668 /* Return true if we are interested in inlining small function. */
670 static bool
671 want_early_inline_function_p (struct cgraph_edge *e)
673 bool want_inline = true;
674 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
676 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
678 /* For AutoFDO, we need to make sure that before profile summary, all
679 hot paths' IR look exactly the same as profiled binary. As a result,
680 in einliner, we will disregard size limit and inline those callsites
681 that are:
682 * inlined in the profiled binary, and
683 * the cloned callee has enough samples to be considered "hot". */
684 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
686 else if (!DECL_DECLARED_INLINE_P (callee->decl)
687 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
689 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
690 report_inline_failed_reason (e);
691 want_inline = false;
693 else
695 /* First take care of very large functions. */
696 int min_growth = estimate_min_edge_growth (e), growth = 0;
697 int n;
698 int early_inlining_insns = param_early_inlining_insns;
700 if (min_growth > early_inlining_insns)
702 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
704 " will not early inline: %C->%C, "
705 "call is cold and code would grow "
706 "at least by %i\n",
707 e->caller, callee,
708 min_growth);
709 want_inline = false;
711 else
712 growth = estimate_edge_growth (e);
715 if (!want_inline || growth <= param_max_inline_insns_size)
717 else if (!e->maybe_hot_p ())
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
721 " will not early inline: %C->%C, "
722 "call is cold and code would grow by %i\n",
723 e->caller, callee,
724 growth);
725 want_inline = false;
727 else if (growth > early_inlining_insns)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
731 " will not early inline: %C->%C, "
732 "growth %i exceeds --param early-inlining-insns\n",
733 e->caller, callee, growth);
734 want_inline = false;
736 else if ((n = num_calls (callee)) != 0
737 && growth * (n + 1) > early_inlining_insns)
739 if (dump_enabled_p ())
740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
741 " will not early inline: %C->%C, "
742 "growth %i exceeds --param early-inlining-insns "
743 "divided by number of calls\n",
744 e->caller, callee, growth);
745 want_inline = false;
748 return want_inline;
751 /* Compute time of the edge->caller + edge->callee execution when inlining
752 does not happen. */
754 inline sreal
755 compute_uninlined_call_time (struct cgraph_edge *edge,
756 sreal uninlined_call_time,
757 sreal freq)
759 cgraph_node *caller = (edge->caller->inlined_to
760 ? edge->caller->inlined_to
761 : edge->caller);
763 if (freq > 0)
764 uninlined_call_time *= freq;
765 else
766 uninlined_call_time = uninlined_call_time >> 11;
768 sreal caller_time = ipa_fn_summaries->get (caller)->time;
769 return uninlined_call_time + caller_time;
772 /* Same as compute_uinlined_call_time but compute time when inlining
773 does happen. */
775 inline sreal
776 compute_inlined_call_time (struct cgraph_edge *edge,
777 sreal time,
778 sreal freq)
780 cgraph_node *caller = (edge->caller->inlined_to
781 ? edge->caller->inlined_to
782 : edge->caller);
783 sreal caller_time = ipa_fn_summaries->get (caller)->time;
785 if (freq > 0)
786 time *= freq;
787 else
788 time = time >> 11;
790 /* This calculation should match one in ipa-inline-analysis.c
791 (estimate_edge_size_and_time). */
792 time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
793 time += caller_time;
794 if (time <= 0)
795 time = ((sreal) 1) >> 8;
796 gcc_checking_assert (time >= 0);
797 return time;
800 /* Determine time saved by inlining EDGE of frequency FREQ
801 where callee's runtime w/o inlining is UNINLINED_TYPE
802 and with inlined is INLINED_TYPE. */
804 inline sreal
805 inlining_speedup (struct cgraph_edge *edge,
806 sreal freq,
807 sreal uninlined_time,
808 sreal inlined_time)
810 sreal speedup = uninlined_time - inlined_time;
811 /* Handling of call_time should match one in ipa-inline-fnsummary.c
812 (estimate_edge_size_and_time). */
813 sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
815 if (freq > 0)
817 speedup = (speedup + call_time);
818 if (freq != 1)
819 speedup = speedup * freq;
821 else if (freq == 0)
822 speedup = speedup >> 11;
823 gcc_checking_assert (speedup >= 0);
824 return speedup;
827 /* Return true if the speedup for inlining E is bigger than
828 param_inline_min_speedup. */
830 static bool
831 big_speedup_p (struct cgraph_edge *e)
833 sreal unspec_time;
834 sreal spec_time = estimate_edge_time (e, &unspec_time);
835 sreal freq = e->sreal_frequency ();
836 sreal time = compute_uninlined_call_time (e, unspec_time, freq);
837 sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
838 cgraph_node *caller = (e->caller->inlined_to
839 ? e->caller->inlined_to
840 : e->caller);
841 int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
843 if ((time - inlined_time) * 100 > time * limit)
844 return true;
845 return false;
848 /* Return true if we are interested in inlining small function.
849 When REPORT is true, report reason to dump file. */
851 static bool
852 want_inline_small_function_p (struct cgraph_edge *e, bool report)
854 bool want_inline = true;
855 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
856 cgraph_node *to = (e->caller->inlined_to
857 ? e->caller->inlined_to : e->caller);
859 /* Allow this function to be called before can_inline_edge_p,
860 since it's usually cheaper. */
861 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
862 want_inline = false;
863 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
865 else if (!DECL_DECLARED_INLINE_P (callee->decl)
866 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
868 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
869 want_inline = false;
871 /* Do fast and conservative check if the function can be good
872 inline candidate. */
873 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
874 && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
875 && ipa_fn_summaries->get (callee)->min_size
876 - ipa_call_summaries->get (e)->call_stmt_size
877 > inline_insns_auto (e->caller, true, true))
879 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
880 want_inline = false;
882 else if ((DECL_DECLARED_INLINE_P (callee->decl)
883 || e->count.ipa ().nonzero_p ())
884 && ipa_fn_summaries->get (callee)->min_size
885 - ipa_call_summaries->get (e)->call_stmt_size
886 > inline_insns_single (e->caller, true, true))
888 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
889 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
890 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
891 want_inline = false;
893 else
895 int growth = estimate_edge_growth (e);
896 ipa_hints hints = estimate_edge_hints (e);
897 /* We have two independent groups of hints. If one matches in each
898 of groups the limits are inreased. If both groups matches, limit
899 is increased even more. */
900 bool apply_hints = (hints & (INLINE_HINT_indirect_call
901 | INLINE_HINT_known_hot
902 | INLINE_HINT_loop_iterations
903 | INLINE_HINT_loop_stride));
904 bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
906 if (growth <= opt_for_fn (to->decl,
907 param_max_inline_insns_size))
909 /* Apply param_max_inline_insns_single limit. Do not do so when
910 hints suggests that inlining given function is very profitable.
911 Avoid computation of big_speedup_p when not necessary to change
912 outcome of decision. */
913 else if (DECL_DECLARED_INLINE_P (callee->decl)
914 && growth >= inline_insns_single (e->caller, apply_hints,
915 apply_hints2)
916 && (apply_hints || apply_hints2
917 || growth >= inline_insns_single (e->caller, true,
918 apply_hints2)
919 || !big_speedup_p (e)))
921 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
922 want_inline = false;
924 else if (!DECL_DECLARED_INLINE_P (callee->decl)
925 && !opt_for_fn (e->caller->decl, flag_inline_functions)
926 && growth >= opt_for_fn (to->decl,
927 param_max_inline_insns_small))
929 /* growth_positive_p is expensive, always test it last. */
930 if (growth >= inline_insns_single (e->caller, false, false)
931 || growth_positive_p (callee, e, growth))
933 e->inline_failed = CIF_NOT_DECLARED_INLINED;
934 want_inline = false;
937 /* Apply param_max_inline_insns_auto limit for functions not declared
938 inline. Bypass the limit when speedup seems big. */
939 else if (!DECL_DECLARED_INLINE_P (callee->decl)
940 && growth >= inline_insns_auto (e->caller, apply_hints,
941 apply_hints2)
942 && (apply_hints || apply_hints2
943 || growth >= inline_insns_auto (e->caller, true,
944 apply_hints2)
945 || !big_speedup_p (e)))
947 /* growth_positive_p is expensive, always test it last. */
948 if (growth >= inline_insns_single (e->caller, false, false)
949 || growth_positive_p (callee, e, growth))
951 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
952 want_inline = false;
955 /* If call is cold, do not inline when function body would grow. */
956 else if (!e->maybe_hot_p ()
957 && (growth >= inline_insns_single (e->caller, false, false)
958 || growth_positive_p (callee, e, growth)))
960 e->inline_failed = CIF_UNLIKELY_CALL;
961 want_inline = false;
964 if (!want_inline && report)
965 report_inline_failed_reason (e);
966 return want_inline;
969 /* EDGE is self recursive edge.
970 We handle two cases - when function A is inlining into itself
971 or when function A is being inlined into another inliner copy of function
972 A within function B.
974 In first case OUTER_NODE points to the toplevel copy of A, while
975 in the second case OUTER_NODE points to the outermost copy of A in B.
977 In both cases we want to be extra selective since
978 inlining the call will just introduce new recursive calls to appear. */
980 static bool
981 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
982 struct cgraph_node *outer_node,
983 bool peeling,
984 int depth)
986 char const *reason = NULL;
987 bool want_inline = true;
988 sreal caller_freq = 1;
989 int max_depth = opt_for_fn (outer_node->decl,
990 param_max_inline_recursive_depth_auto);
992 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
993 max_depth = opt_for_fn (outer_node->decl,
994 param_max_inline_recursive_depth);
996 if (!edge->maybe_hot_p ())
998 reason = "recursive call is cold";
999 want_inline = false;
1001 else if (depth > max_depth)
1003 reason = "--param max-inline-recursive-depth exceeded.";
1004 want_inline = false;
1006 else if (outer_node->inlined_to
1007 && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1009 reason = "caller frequency is 0";
1010 want_inline = false;
1013 if (!want_inline)
1015 /* Inlining of self recursive function into copy of itself within other
1016 function is transformation similar to loop peeling.
1018 Peeling is profitable if we can inline enough copies to make probability
1019 of actual call to the self recursive function very small. Be sure that
1020 the probability of recursion is small.
1022 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1023 This way the expected number of recursion is at most max_depth. */
1024 else if (peeling)
1026 sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1027 int i;
1028 for (i = 1; i < depth; i++)
1029 max_prob = max_prob * max_prob;
1030 if (edge->sreal_frequency () >= max_prob * caller_freq)
1032 reason = "frequency of recursive call is too large";
1033 want_inline = false;
1036 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1037 recursion depth is large. We reduce function call overhead and increase
1038 chances that things fit in hardware return predictor.
1040 Recursive inlining might however increase cost of stack frame setup
1041 actually slowing down functions whose recursion tree is wide rather than
1042 deep.
1044 Deciding reliably on when to do recursive inlining without profile feedback
1045 is tricky. For now we disable recursive inlining when probability of self
1046 recursion is low.
1048 Recursive inlining of self recursive call within loop also results in
1049 large loop depths that generally optimize badly. We may want to throttle
1050 down inlining in those cases. In particular this seems to happen in one
1051 of libstdc++ rb tree methods. */
1052 else
1054 if (edge->sreal_frequency () * 100
1055 <= caller_freq
1056 * opt_for_fn (outer_node->decl,
1057 param_min_inline_recursive_probability))
1059 reason = "frequency of recursive call is too small";
1060 want_inline = false;
1063 if (!want_inline && dump_enabled_p ())
1064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1065 " not inlining recursively: %s\n", reason);
1066 return want_inline;
1069 /* Return true when NODE has uninlinable caller;
1070 set HAS_HOT_CALL if it has hot call.
1071 Worker for cgraph_for_node_and_aliases. */
1073 static bool
1074 check_callers (struct cgraph_node *node, void *has_hot_call)
1076 struct cgraph_edge *e;
1077 for (e = node->callers; e; e = e->next_caller)
1079 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1080 || !opt_for_fn (e->caller->decl, optimize))
1081 return true;
1082 if (!can_inline_edge_p (e, true))
1083 return true;
1084 if (e->recursive_p ())
1085 return true;
1086 if (!can_inline_edge_by_limits_p (e, true))
1087 return true;
1088 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1089 *(bool *)has_hot_call = true;
1091 return false;
1094 /* If NODE has a caller, return true. */
1096 static bool
1097 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1099 if (node->callers)
1100 return true;
1101 return false;
1104 /* Decide if inlining NODE would reduce unit size by eliminating
1105 the offline copy of function.
1106 When COLD is true the cold calls are considered, too. */
1108 static bool
1109 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1111 bool has_hot_call = false;
1113 /* Aliases gets inlined along with the function they alias. */
1114 if (node->alias)
1115 return false;
1116 /* Already inlined? */
1117 if (node->inlined_to)
1118 return false;
1119 /* Does it have callers? */
1120 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1121 return false;
1122 /* Inlining into all callers would increase size? */
1123 if (growth_positive_p (node, NULL, INT_MIN) > 0)
1124 return false;
1125 /* All inlines must be possible. */
1126 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1127 true))
1128 return false;
1129 if (!cold && !has_hot_call)
1130 return false;
1131 return true;
1134 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1135 in estimate_edge_badness. */
1137 static bool
1138 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1140 return size < (DECL_DECLARED_INLINE_P (where->decl)
1141 ? inline_insns_single (where, false, false)
1142 : inline_insns_auto (where, false, false));
1145 /* A cost model driving the inlining heuristics in a way so the edges with
1146 smallest badness are inlined first. After each inlining is performed
1147 the costs of all caller edges of nodes affected are recomputed so the
1148 metrics may accurately depend on values such as number of inlinable callers
1149 of the function or function body size. */
1151 static sreal
1152 edge_badness (struct cgraph_edge *edge, bool dump)
1154 sreal badness;
1155 int growth;
1156 sreal edge_time, unspec_edge_time;
1157 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1158 class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1159 ipa_hints hints;
1160 cgraph_node *caller = (edge->caller->inlined_to
1161 ? edge->caller->inlined_to
1162 : edge->caller);
1164 growth = estimate_edge_growth (edge);
1165 edge_time = estimate_edge_time (edge, &unspec_edge_time);
1166 hints = estimate_edge_hints (edge);
1167 gcc_checking_assert (edge_time >= 0);
1168 /* Check that inlined time is better, but tolerate some roundoff issues.
1169 FIXME: When callee profile drops to 0 we account calls more. This
1170 should be fixed by never doing that. */
1171 gcc_checking_assert ((edge_time * 100
1172 - callee_info->time * 101).to_int () <= 0
1173 || callee->count.ipa ().initialized_p ());
1174 gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1176 if (dump)
1178 fprintf (dump_file, " Badness calculation for %s -> %s\n",
1179 edge->caller->dump_name (),
1180 edge->callee->dump_name ());
1181 fprintf (dump_file, " size growth %i, time %f unspec %f ",
1182 growth,
1183 edge_time.to_double (),
1184 unspec_edge_time.to_double ());
1185 ipa_dump_hints (dump_file, hints);
1186 if (big_speedup_p (edge))
1187 fprintf (dump_file, " big_speedup");
1188 fprintf (dump_file, "\n");
1191 /* Always prefer inlining saving code size. */
1192 if (growth <= 0)
1194 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1195 if (dump)
1196 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1197 growth);
1199 /* Inlining into EXTERNAL functions is not going to change anything unless
1200 they are themselves inlined. */
1201 else if (DECL_EXTERNAL (caller->decl))
1203 if (dump)
1204 fprintf (dump_file, " max: function is external\n");
1205 return sreal::max ();
1207 /* When profile is available. Compute badness as:
1209 time_saved * caller_count
1210 goodness = -------------------------------------------------
1211 growth_of_caller * overall_growth * combined_size
1213 badness = - goodness
1215 Again use negative value to make calls with profile appear hotter
1216 then calls without.
1218 else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1219 || caller->count.ipa ().nonzero_p ())
1221 sreal numerator, denominator;
1222 int overall_growth;
1223 sreal freq = edge->sreal_frequency ();
1225 numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1226 if (numerator <= 0)
1227 numerator = ((sreal) 1 >> 8);
1228 if (caller->count.ipa ().nonzero_p ())
1229 numerator *= caller->count.ipa ().to_gcov_type ();
1230 else if (caller->count.ipa ().initialized_p ())
1231 numerator = numerator >> 11;
1232 denominator = growth;
1234 overall_growth = callee_info->growth;
1236 /* Look for inliner wrappers of the form:
1238 inline_caller ()
1240 do_fast_job...
1241 if (need_more_work)
1242 noninline_callee ();
1244 Without penalizing this case, we usually inline noninline_callee
1245 into the inline_caller because overall_growth is small preventing
1246 further inlining of inline_caller.
1248 Penalize only callgraph edges to functions with small overall
1249 growth ...
1251 if (growth > overall_growth
1252 /* ... and having only one caller which is not inlined ... */
1253 && callee_info->single_caller
1254 && !edge->caller->inlined_to
1255 /* ... and edges executed only conditionally ... */
1256 && freq < 1
1257 /* ... consider case where callee is not inline but caller is ... */
1258 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1259 && DECL_DECLARED_INLINE_P (caller->decl))
1260 /* ... or when early optimizers decided to split and edge
1261 frequency still indicates splitting is a win ... */
1262 || (callee->split_part && !caller->split_part
1263 && freq * 100
1264 < opt_for_fn (caller->decl,
1265 param_partial_inlining_entry_probability)
1266 /* ... and do not overwrite user specified hints. */
1267 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1268 || DECL_DECLARED_INLINE_P (caller->decl)))))
1270 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1271 int caller_growth = caller_info->growth;
1273 /* Only apply the penalty when caller looks like inline candidate,
1274 and it is not called once. */
1275 if (!caller_info->single_caller && overall_growth < caller_growth
1276 && caller_info->inlinable
1277 && wrapper_heuristics_may_apply
1278 (caller, ipa_size_summaries->get (caller)->size))
1280 if (dump)
1281 fprintf (dump_file,
1282 " Wrapper penalty. Increasing growth %i to %i\n",
1283 overall_growth, caller_growth);
1284 overall_growth = caller_growth;
1287 if (overall_growth > 0)
1289 /* Strongly prefer functions with few callers that can be inlined
1290 fully. The square root here leads to smaller binaries at average.
1291 Watch however for extreme cases and return to linear function
1292 when growth is large. */
1293 if (overall_growth < 256)
1294 overall_growth *= overall_growth;
1295 else
1296 overall_growth += 256 * 256 - 256;
1297 denominator *= overall_growth;
1299 denominator *= ipa_size_summaries->get (caller)->size + growth;
1301 badness = - numerator / denominator;
1303 if (dump)
1305 fprintf (dump_file,
1306 " %f: guessed profile. frequency %f, count %" PRId64
1307 " caller count %" PRId64
1308 " time saved %f"
1309 " overall growth %i (current) %i (original)"
1310 " %i (compensated)\n",
1311 badness.to_double (),
1312 freq.to_double (),
1313 edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1314 caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1315 inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1316 estimate_growth (callee),
1317 callee_info->growth, overall_growth);
1320 /* When function local profile is not available or it does not give
1321 useful information (i.e. frequency is zero), base the cost on
1322 loop nest and overall size growth, so we optimize for overall number
1323 of functions fully inlined in program. */
1324 else
1326 int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1327 badness = growth;
1329 /* Decrease badness if call is nested. */
1330 if (badness > 0)
1331 badness = badness >> nest;
1332 else
1333 badness = badness << nest;
1334 if (dump)
1335 fprintf (dump_file, " %f: no profile. nest %i\n",
1336 badness.to_double (), nest);
1338 gcc_checking_assert (badness != 0);
1340 if (edge->recursive_p ())
1341 badness = badness.shift (badness > 0 ? 4 : -4);
1342 if ((hints & (INLINE_HINT_indirect_call
1343 | INLINE_HINT_loop_iterations
1344 | INLINE_HINT_loop_stride))
1345 || callee_info->growth <= 0)
1346 badness = badness.shift (badness > 0 ? -2 : 2);
1347 if (hints & INLINE_HINT_builtin_constant_p)
1348 badness = badness.shift (badness > 0 ? -4 : 4);
1349 if (hints & (INLINE_HINT_same_scc))
1350 badness = badness.shift (badness > 0 ? 3 : -3);
1351 else if (hints & (INLINE_HINT_in_scc))
1352 badness = badness.shift (badness > 0 ? 2 : -2);
1353 else if (hints & (INLINE_HINT_cross_module))
1354 badness = badness.shift (badness > 0 ? 1 : -1);
1355 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1356 badness = badness.shift (badness > 0 ? -4 : 4);
1357 else if ((hints & INLINE_HINT_declared_inline))
1358 badness = badness.shift (badness > 0 ? -3 : 3);
1359 if (dump)
1360 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1361 return badness;
1364 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1365 static inline void
1366 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1368 sreal badness = edge_badness (edge, false);
1369 if (edge->aux)
1371 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1372 gcc_checking_assert (n->get_data () == edge);
1374 /* fibonacci_heap::replace_key does busy updating of the
1375 heap that is unnecessarily expensive.
1376 We do lazy increases: after extracting minimum if the key
1377 turns out to be out of date, it is re-inserted into heap
1378 with correct value. */
1379 if (badness < n->get_key ())
1381 if (dump_file && (dump_flags & TDF_DETAILS))
1383 fprintf (dump_file,
1384 " decreasing badness %s -> %s, %f to %f\n",
1385 edge->caller->dump_name (),
1386 edge->callee->dump_name (),
1387 n->get_key ().to_double (),
1388 badness.to_double ());
1390 heap->decrease_key (n, badness);
1393 else
1395 if (dump_file && (dump_flags & TDF_DETAILS))
1397 fprintf (dump_file,
1398 " enqueuing call %s -> %s, badness %f\n",
1399 edge->caller->dump_name (),
1400 edge->callee->dump_name (),
1401 badness.to_double ());
1403 edge->aux = heap->insert (badness, edge);
1408 /* NODE was inlined.
1409 All caller edges needs to be reset because
1410 size estimates change. Similarly callees needs reset
1411 because better context may be known. */
1413 static void
1414 reset_edge_caches (struct cgraph_node *node)
1416 struct cgraph_edge *edge;
1417 struct cgraph_edge *e = node->callees;
1418 struct cgraph_node *where = node;
1419 struct ipa_ref *ref;
1421 if (where->inlined_to)
1422 where = where->inlined_to;
1424 reset_node_cache (where);
1426 if (edge_growth_cache != NULL)
1427 for (edge = where->callers; edge; edge = edge->next_caller)
1428 if (edge->inline_failed)
1429 edge_growth_cache->remove (edge);
1431 FOR_EACH_ALIAS (where, ref)
1432 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1434 if (!e)
1435 return;
1437 while (true)
1438 if (!e->inline_failed && e->callee->callees)
1439 e = e->callee->callees;
1440 else
1442 if (edge_growth_cache != NULL && e->inline_failed)
1443 edge_growth_cache->remove (e);
1444 if (e->next_callee)
1445 e = e->next_callee;
1446 else
1450 if (e->caller == node)
1451 return;
1452 e = e->caller->callers;
1454 while (!e->next_callee);
1455 e = e->next_callee;
1460 /* Recompute HEAP nodes for each of caller of NODE.
1461 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1462 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1463 it is inlinable. Otherwise check all edges. */
1465 static void
1466 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1467 bitmap updated_nodes,
1468 struct cgraph_edge *check_inlinablity_for)
1470 struct cgraph_edge *edge;
1471 struct ipa_ref *ref;
1473 if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1474 || node->inlined_to)
1475 return;
1476 if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1477 return;
1479 FOR_EACH_ALIAS (node, ref)
1481 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1482 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1485 for (edge = node->callers; edge; edge = edge->next_caller)
1486 if (edge->inline_failed)
1488 if (!check_inlinablity_for
1489 || check_inlinablity_for == edge)
1491 if (can_inline_edge_p (edge, false)
1492 && want_inline_small_function_p (edge, false)
1493 && can_inline_edge_by_limits_p (edge, false))
1494 update_edge_key (heap, edge);
1495 else if (edge->aux)
1497 report_inline_failed_reason (edge);
1498 heap->delete_node ((edge_heap_node_t *) edge->aux);
1499 edge->aux = NULL;
1502 else if (edge->aux)
1503 update_edge_key (heap, edge);
1507 /* Recompute HEAP nodes for each uninlined call in NODE
1508 If UPDATE_SINCE is non-NULL check if edges called within that function
1509 are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1510 where all edges have new context).
1512 This is used when we know that edge badnesses are going only to increase
1513 (we introduced new call site) and thus all we need is to insert newly
1514 created edges into heap. */
1516 static void
1517 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1518 struct cgraph_node *update_since,
1519 bitmap updated_nodes)
1521 struct cgraph_edge *e = node->callees;
1522 bool check_inlinability = update_since == node;
1524 if (!e)
1525 return;
1526 while (true)
1527 if (!e->inline_failed && e->callee->callees)
1529 if (e->callee == update_since)
1530 check_inlinability = true;
1531 e = e->callee->callees;
1533 else
1535 enum availability avail;
1536 struct cgraph_node *callee;
1537 if (!check_inlinability)
1539 if (e->aux
1540 && !bitmap_bit_p (updated_nodes,
1541 e->callee->ultimate_alias_target
1542 (&avail, e->caller)->get_uid ()))
1543 update_edge_key (heap, e);
1545 /* We do not reset callee growth cache here. Since we added a new call,
1546 growth should have just increased and consequently badness metric
1547 don't need updating. */
1548 else if (e->inline_failed
1549 && (callee = e->callee->ultimate_alias_target (&avail,
1550 e->caller))
1551 && avail >= AVAIL_AVAILABLE
1552 && ipa_fn_summaries->get (callee) != NULL
1553 && ipa_fn_summaries->get (callee)->inlinable
1554 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1556 if (can_inline_edge_p (e, false)
1557 && want_inline_small_function_p (e, false)
1558 && can_inline_edge_by_limits_p (e, false))
1560 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1561 gcc_checking_assert (check_inlinability || e->aux);
1562 update_edge_key (heap, e);
1564 else if (e->aux)
1566 report_inline_failed_reason (e);
1567 heap->delete_node ((edge_heap_node_t *) e->aux);
1568 e->aux = NULL;
1571 /* In case we redirected to unreachable node we only need to remove the
1572 fibheap entry. */
1573 else if (e->aux)
1575 heap->delete_node ((edge_heap_node_t *) e->aux);
1576 e->aux = NULL;
1578 if (e->next_callee)
1579 e = e->next_callee;
1580 else
1584 if (e->caller == node)
1585 return;
1586 if (e->caller == update_since)
1587 check_inlinability = false;
1588 e = e->caller->callers;
1590 while (!e->next_callee);
1591 e = e->next_callee;
1596 /* Enqueue all recursive calls from NODE into priority queue depending on
1597 how likely we want to recursively inline the call. */
1599 static void
1600 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1601 edge_heap_t *heap)
1603 struct cgraph_edge *e;
1604 enum availability avail;
1606 for (e = where->callees; e; e = e->next_callee)
1607 if (e->callee == node
1608 || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1609 && avail > AVAIL_INTERPOSABLE))
1610 heap->insert (-e->sreal_frequency (), e);
1611 for (e = where->callees; e; e = e->next_callee)
1612 if (!e->inline_failed)
1613 lookup_recursive_calls (node, e->callee, heap);
1616 /* Decide on recursive inlining: in the case function has recursive calls,
1617 inline until body size reaches given argument. If any new indirect edges
1618 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1619 is NULL. */
1621 static bool
1622 recursive_inlining (struct cgraph_edge *edge,
1623 vec<cgraph_edge *> *new_edges)
1625 cgraph_node *to = (edge->caller->inlined_to
1626 ? edge->caller->inlined_to : edge->caller);
1627 int limit = opt_for_fn (to->decl,
1628 param_max_inline_insns_recursive_auto);
1629 edge_heap_t heap (sreal::min ());
1630 struct cgraph_node *node;
1631 struct cgraph_edge *e;
1632 struct cgraph_node *master_clone = NULL, *next;
1633 int depth = 0;
1634 int n = 0;
1636 node = edge->caller;
1637 if (node->inlined_to)
1638 node = node->inlined_to;
1640 if (DECL_DECLARED_INLINE_P (node->decl))
1641 limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1643 /* Make sure that function is small enough to be considered for inlining. */
1644 if (estimate_size_after_inlining (node, edge) >= limit)
1645 return false;
1646 lookup_recursive_calls (node, node, &heap);
1647 if (heap.empty ())
1648 return false;
1650 if (dump_file)
1651 fprintf (dump_file,
1652 " Performing recursive inlining on %s\n", node->dump_name ());
1654 /* Do the inlining and update list of recursive call during process. */
1655 while (!heap.empty ())
1657 struct cgraph_edge *curr = heap.extract_min ();
1658 struct cgraph_node *cnode, *dest = curr->callee;
1660 if (!can_inline_edge_p (curr, true)
1661 || !can_inline_edge_by_limits_p (curr, true))
1662 continue;
1664 /* MASTER_CLONE is produced in the case we already started modified
1665 the function. Be sure to redirect edge to the original body before
1666 estimating growths otherwise we will be seeing growths after inlining
1667 the already modified body. */
1668 if (master_clone)
1670 curr->redirect_callee (master_clone);
1671 if (edge_growth_cache != NULL)
1672 edge_growth_cache->remove (curr);
1675 if (estimate_size_after_inlining (node, curr) > limit)
1677 curr->redirect_callee (dest);
1678 if (edge_growth_cache != NULL)
1679 edge_growth_cache->remove (curr);
1680 break;
1683 depth = 1;
1684 for (cnode = curr->caller;
1685 cnode->inlined_to; cnode = cnode->callers->caller)
1686 if (node->decl
1687 == curr->callee->ultimate_alias_target ()->decl)
1688 depth++;
1690 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1692 curr->redirect_callee (dest);
1693 if (edge_growth_cache != NULL)
1694 edge_growth_cache->remove (curr);
1695 continue;
1698 if (dump_file)
1700 fprintf (dump_file,
1701 " Inlining call of depth %i", depth);
1702 if (node->count.nonzero_p () && curr->count.initialized_p ())
1704 fprintf (dump_file, " called approx. %.2f times per call",
1705 (double)curr->count.to_gcov_type ()
1706 / node->count.to_gcov_type ());
1708 fprintf (dump_file, "\n");
1710 if (!master_clone)
1712 /* We need original clone to copy around. */
1713 master_clone = node->create_clone (node->decl, node->count,
1714 false, vNULL, true, NULL, NULL);
1715 for (e = master_clone->callees; e; e = e->next_callee)
1716 if (!e->inline_failed)
1717 clone_inlined_nodes (e, true, false, NULL);
1718 curr->redirect_callee (master_clone);
1719 if (edge_growth_cache != NULL)
1720 edge_growth_cache->remove (curr);
1723 inline_call (curr, false, new_edges, &overall_size, true);
1724 reset_node_cache (node);
1725 lookup_recursive_calls (node, curr->callee, &heap);
1726 n++;
1729 if (!heap.empty () && dump_file)
1730 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1732 if (!master_clone)
1733 return false;
1735 if (dump_enabled_p ())
1736 dump_printf_loc (MSG_NOTE, edge->call_stmt,
1737 "\n Inlined %i times, "
1738 "body grown from size %i to %i, time %f to %f\n", n,
1739 ipa_size_summaries->get (master_clone)->size,
1740 ipa_size_summaries->get (node)->size,
1741 ipa_fn_summaries->get (master_clone)->time.to_double (),
1742 ipa_fn_summaries->get (node)->time.to_double ());
1744 /* Remove master clone we used for inlining. We rely that clones inlined
1745 into master clone gets queued just before master clone so we don't
1746 need recursion. */
1747 for (node = symtab->first_function (); node != master_clone;
1748 node = next)
1750 next = symtab->next_function (node);
1751 if (node->inlined_to == master_clone)
1752 node->remove ();
1754 master_clone->remove ();
1755 return true;
1759 /* Given whole compilation unit estimate of INSNS, compute how large we can
1760 allow the unit to grow. */
1762 static int64_t
1763 compute_max_insns (cgraph_node *node, int insns)
1765 int max_insns = insns;
1766 if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1767 max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1769 return ((int64_t) max_insns
1770 * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1774 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1776 static void
1777 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1779 while (new_edges.length () > 0)
1781 struct cgraph_edge *edge = new_edges.pop ();
1783 gcc_assert (!edge->aux);
1784 gcc_assert (edge->callee);
1785 if (edge->inline_failed
1786 && can_inline_edge_p (edge, true)
1787 && want_inline_small_function_p (edge, true)
1788 && can_inline_edge_by_limits_p (edge, true))
1789 edge->aux = heap->insert (edge_badness (edge, false), edge);
1793 /* Remove EDGE from the fibheap. */
1795 static void
1796 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1798 if (e->aux)
1800 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1801 e->aux = NULL;
1805 /* Return true if speculation of edge E seems useful.
1806 If ANTICIPATE_INLINING is true, be conservative and hope that E
1807 may get inlined. */
1809 bool
1810 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1812 /* If we have already decided to inline the edge, it seems useful. */
1813 if (!e->inline_failed)
1814 return true;
1816 enum availability avail;
1817 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1818 e->caller);
1820 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1822 if (!e->maybe_hot_p ())
1823 return false;
1825 /* See if IP optimizations found something potentially useful about the
1826 function. For now we look only for CONST/PURE flags. Almost everything
1827 else we propagate is useless. */
1828 if (avail >= AVAIL_AVAILABLE)
1830 int ecf_flags = flags_from_decl_or_type (target->decl);
1831 if (ecf_flags & ECF_CONST)
1833 if (!(e->speculative_call_indirect_edge ()->indirect_info
1834 ->ecf_flags & ECF_CONST))
1835 return true;
1837 else if (ecf_flags & ECF_PURE)
1839 if (!(e->speculative_call_indirect_edge ()->indirect_info
1840 ->ecf_flags & ECF_PURE))
1841 return true;
1844 /* If we did not managed to inline the function nor redirect
1845 to an ipa-cp clone (that are seen by having local flag set),
1846 it is probably pointless to inline it unless hardware is missing
1847 indirect call predictor. */
1848 if (!anticipate_inlining && !target->local)
1849 return false;
1850 /* For overwritable targets there is not much to do. */
1851 if (!can_inline_edge_p (e, false)
1852 || !can_inline_edge_by_limits_p (e, false, true))
1853 return false;
1854 /* OK, speculation seems interesting. */
1855 return true;
1858 /* We know that EDGE is not going to be inlined.
1859 See if we can remove speculation. */
1861 static void
1862 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1864 if (edge->speculative && !speculation_useful_p (edge, false))
1866 struct cgraph_node *node = edge->caller;
1867 struct cgraph_node *where = node->inlined_to
1868 ? node->inlined_to : node;
1869 auto_bitmap updated_nodes;
1871 if (edge->count.ipa ().initialized_p ())
1872 spec_rem += edge->count.ipa ();
1873 cgraph_edge::resolve_speculation (edge);
1874 reset_edge_caches (where);
1875 ipa_update_overall_fn_summary (where);
1876 update_caller_keys (edge_heap, where,
1877 updated_nodes, NULL);
1878 update_callee_keys (edge_heap, where, NULL,
1879 updated_nodes);
1883 /* Return true if NODE should be accounted for overall size estimate.
1884 Skip all nodes optimized for size so we can measure the growth of hot
1885 part of program no matter of the padding. */
1887 bool
1888 inline_account_function_p (struct cgraph_node *node)
1890 return (!DECL_EXTERNAL (node->decl)
1891 && !opt_for_fn (node->decl, optimize_size)
1892 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1895 /* Count number of callers of NODE and store it into DATA (that
1896 points to int. Worker for cgraph_for_node_and_aliases. */
1898 static bool
1899 sum_callers (struct cgraph_node *node, void *data)
1901 struct cgraph_edge *e;
1902 int *num_calls = (int *)data;
1904 for (e = node->callers; e; e = e->next_caller)
1905 (*num_calls)++;
1906 return false;
1909 /* We only propagate across edges with non-interposable callee. */
1911 inline bool
1912 ignore_edge_p (struct cgraph_edge *e)
1914 enum availability avail;
1915 e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1916 return (avail <= AVAIL_INTERPOSABLE);
1919 /* We use greedy algorithm for inlining of small functions:
1920 All inline candidates are put into prioritized heap ordered in
1921 increasing badness.
1923 The inlining of small functions is bounded by unit growth parameters. */
1925 static void
1926 inline_small_functions (void)
1928 struct cgraph_node *node;
1929 struct cgraph_edge *edge;
1930 edge_heap_t edge_heap (sreal::min ());
1931 auto_bitmap updated_nodes;
1932 int min_size;
1933 auto_vec<cgraph_edge *> new_indirect_edges;
1934 int initial_size = 0;
1935 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1936 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1937 new_indirect_edges.create (8);
1939 edge_removal_hook_holder
1940 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1942 /* Compute overall unit size and other global parameters used by badness
1943 metrics. */
1945 max_count = profile_count::uninitialized ();
1946 ipa_reduced_postorder (order, true, ignore_edge_p);
1947 free (order);
1949 FOR_EACH_DEFINED_FUNCTION (node)
1950 if (!node->inlined_to)
1952 if (!node->alias && node->analyzed
1953 && (node->has_gimple_body_p () || node->thunk)
1954 && opt_for_fn (node->decl, optimize))
1956 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1957 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1959 /* Do not account external functions, they will be optimized out
1960 if not inlined. Also only count the non-cold portion of program. */
1961 if (inline_account_function_p (node))
1962 initial_size += ipa_size_summaries->get (node)->size;
1963 info->growth = estimate_growth (node);
1965 int num_calls = 0;
1966 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1967 true);
1968 if (num_calls == 1)
1969 info->single_caller = true;
1970 if (dfs && dfs->next_cycle)
1972 struct cgraph_node *n2;
1973 int id = dfs->scc_no + 1;
1974 for (n2 = node; n2;
1975 n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1976 if (opt_for_fn (n2->decl, optimize))
1978 ipa_fn_summary *info2 = ipa_fn_summaries->get
1979 (n2->inlined_to ? n2->inlined_to : n2);
1980 if (info2->scc_no)
1981 break;
1982 info2->scc_no = id;
1987 for (edge = node->callers; edge; edge = edge->next_caller)
1988 max_count = max_count.max (edge->count.ipa ());
1990 ipa_free_postorder_info ();
1991 initialize_growth_caches ();
1993 if (dump_file)
1994 fprintf (dump_file,
1995 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1996 initial_size);
1998 overall_size = initial_size;
1999 min_size = overall_size;
2001 /* Populate the heap with all edges we might inline. */
2003 FOR_EACH_DEFINED_FUNCTION (node)
2005 bool update = false;
2006 struct cgraph_edge *next = NULL;
2007 bool has_speculative = false;
2009 if (!opt_for_fn (node->decl, optimize))
2010 continue;
2012 if (dump_file)
2013 fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2015 for (edge = node->callees; edge; edge = edge->next_callee)
2017 if (edge->inline_failed
2018 && !edge->aux
2019 && can_inline_edge_p (edge, true)
2020 && want_inline_small_function_p (edge, true)
2021 && can_inline_edge_by_limits_p (edge, true)
2022 && edge->inline_failed)
2024 gcc_assert (!edge->aux);
2025 update_edge_key (&edge_heap, edge);
2027 if (edge->speculative)
2028 has_speculative = true;
2030 if (has_speculative)
2031 for (edge = node->callees; edge; edge = next)
2033 next = edge->next_callee;
2034 if (edge->speculative
2035 && !speculation_useful_p (edge, edge->aux != NULL))
2037 cgraph_edge::resolve_speculation (edge);
2038 update = true;
2041 if (update)
2043 struct cgraph_node *where = node->inlined_to
2044 ? node->inlined_to : node;
2045 ipa_update_overall_fn_summary (where);
2046 reset_edge_caches (where);
2047 update_caller_keys (&edge_heap, where,
2048 updated_nodes, NULL);
2049 update_callee_keys (&edge_heap, where, NULL,
2050 updated_nodes);
2051 bitmap_clear (updated_nodes);
2055 gcc_assert (in_lto_p
2056 || !(max_count > 0)
2057 || (profile_info && flag_branch_probabilities));
2059 while (!edge_heap.empty ())
2061 int old_size = overall_size;
2062 struct cgraph_node *where, *callee;
2063 sreal badness = edge_heap.min_key ();
2064 sreal current_badness;
2065 int growth;
2067 edge = edge_heap.extract_min ();
2068 gcc_assert (edge->aux);
2069 edge->aux = NULL;
2070 if (!edge->inline_failed || !edge->callee->analyzed)
2071 continue;
2073 /* Be sure that caches are maintained consistent.
2074 This check is affected by scaling roundoff errors when compiling for
2075 IPA this we skip it in that case. */
2076 if (flag_checking && !edge->callee->count.ipa_p ()
2077 && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2079 sreal cached_badness = edge_badness (edge, false);
2081 int old_size_est = estimate_edge_size (edge);
2082 sreal old_time_est = estimate_edge_time (edge);
2083 int old_hints_est = estimate_edge_hints (edge);
2085 if (edge_growth_cache != NULL)
2086 edge_growth_cache->remove (edge);
2087 reset_node_cache (edge->caller->inlined_to
2088 ? edge->caller->inlined_to
2089 : edge->caller);
2090 gcc_assert (old_size_est == estimate_edge_size (edge));
2091 gcc_assert (old_time_est == estimate_edge_time (edge));
2092 /* FIXME:
2094 gcc_assert (old_hints_est == estimate_edge_hints (edge));
2096 fails with profile feedback because some hints depends on
2097 maybe_hot_edge_p predicate and because callee gets inlined to other
2098 calls, the edge may become cold.
2099 This ought to be fixed by computing relative probabilities
2100 for given invocation but that will be better done once whole
2101 code is converted to sreals. Disable for now and revert to "wrong"
2102 value so enable/disable checking paths agree. */
2103 edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2105 /* When updating the edge costs, we only decrease badness in the keys.
2106 Increases of badness are handled lazily; when we see key with out
2107 of date value on it, we re-insert it now. */
2108 current_badness = edge_badness (edge, false);
2109 gcc_assert (cached_badness == current_badness);
2110 gcc_assert (current_badness >= badness);
2112 else
2113 current_badness = edge_badness (edge, false);
2114 if (current_badness != badness)
2116 if (edge_heap.min () && current_badness > edge_heap.min_key ())
2118 edge->aux = edge_heap.insert (current_badness, edge);
2119 continue;
2121 else
2122 badness = current_badness;
2125 if (!can_inline_edge_p (edge, true)
2126 || !can_inline_edge_by_limits_p (edge, true))
2128 resolve_noninline_speculation (&edge_heap, edge);
2129 continue;
2132 callee = edge->callee->ultimate_alias_target ();
2133 growth = estimate_edge_growth (edge);
2134 if (dump_file)
2136 fprintf (dump_file,
2137 "\nConsidering %s with %i size\n",
2138 callee->dump_name (),
2139 ipa_size_summaries->get (callee)->size);
2140 fprintf (dump_file,
2141 " to be inlined into %s in %s:%i\n"
2142 " Estimated badness is %f, frequency %.2f.\n",
2143 edge->caller->dump_name (),
2144 edge->call_stmt
2145 && (LOCATION_LOCUS (gimple_location ((const gimple *)
2146 edge->call_stmt))
2147 > BUILTINS_LOCATION)
2148 ? gimple_filename ((const gimple *) edge->call_stmt)
2149 : "unknown",
2150 edge->call_stmt
2151 ? gimple_lineno ((const gimple *) edge->call_stmt)
2152 : -1,
2153 badness.to_double (),
2154 edge->sreal_frequency ().to_double ());
2155 if (edge->count.ipa ().initialized_p ())
2157 fprintf (dump_file, " Called ");
2158 edge->count.ipa ().dump (dump_file);
2159 fprintf (dump_file, " times\n");
2161 if (dump_flags & TDF_DETAILS)
2162 edge_badness (edge, true);
2165 where = edge->caller;
2167 if (overall_size + growth > compute_max_insns (where, min_size)
2168 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2170 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2171 report_inline_failed_reason (edge);
2172 resolve_noninline_speculation (&edge_heap, edge);
2173 continue;
2176 if (!want_inline_small_function_p (edge, true))
2178 resolve_noninline_speculation (&edge_heap, edge);
2179 continue;
2182 profile_count old_count = callee->count;
2184 /* Heuristics for inlining small functions work poorly for
2185 recursive calls where we do effects similar to loop unrolling.
2186 When inlining such edge seems profitable, leave decision on
2187 specific inliner. */
2188 if (edge->recursive_p ())
2190 if (where->inlined_to)
2191 where = where->inlined_to;
2192 if (!recursive_inlining (edge,
2193 opt_for_fn (edge->caller->decl,
2194 flag_indirect_inlining)
2195 ? &new_indirect_edges : NULL))
2197 edge->inline_failed = CIF_RECURSIVE_INLINING;
2198 resolve_noninline_speculation (&edge_heap, edge);
2199 continue;
2201 reset_edge_caches (where);
2202 /* Recursive inliner inlines all recursive calls of the function
2203 at once. Consequently we need to update all callee keys. */
2204 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2205 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2206 update_callee_keys (&edge_heap, where, where, updated_nodes);
2207 bitmap_clear (updated_nodes);
2209 else
2211 struct cgraph_node *outer_node = NULL;
2212 int depth = 0;
2214 /* Consider the case where self recursive function A is inlined
2215 into B. This is desired optimization in some cases, since it
2216 leads to effect similar of loop peeling and we might completely
2217 optimize out the recursive call. However we must be extra
2218 selective. */
2220 where = edge->caller;
2221 while (where->inlined_to)
2223 if (where->decl == callee->decl)
2224 outer_node = where, depth++;
2225 where = where->callers->caller;
2227 if (outer_node
2228 && !want_inline_self_recursive_call_p (edge, outer_node,
2229 true, depth))
2231 edge->inline_failed
2232 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2233 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2234 resolve_noninline_speculation (&edge_heap, edge);
2235 continue;
2237 else if (depth && dump_file)
2238 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2240 gcc_checking_assert (!callee->inlined_to);
2242 int old_size = ipa_size_summaries->get (where)->size;
2243 sreal old_time = ipa_fn_summaries->get (where)->time;
2245 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2246 reset_edge_caches (edge->callee);
2247 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2249 /* If caller's size and time increased we do not need to update
2250 all edges because badness is not going to decrease. */
2251 if (old_size <= ipa_size_summaries->get (where)->size
2252 && old_time <= ipa_fn_summaries->get (where)->time
2253 /* Wrapper penalty may be non-monotonous in this respect.
2254 Fortunately it only affects small functions. */
2255 && !wrapper_heuristics_may_apply (where, old_size))
2256 update_callee_keys (&edge_heap, edge->callee, edge->callee,
2257 updated_nodes);
2258 else
2259 update_callee_keys (&edge_heap, where,
2260 edge->callee,
2261 updated_nodes);
2263 where = edge->caller;
2264 if (where->inlined_to)
2265 where = where->inlined_to;
2267 /* Our profitability metric can depend on local properties
2268 such as number of inlinable calls and size of the function body.
2269 After inlining these properties might change for the function we
2270 inlined into (since it's body size changed) and for the functions
2271 called by function we inlined (since number of it inlinable callers
2272 might change). */
2273 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2274 /* Offline copy count has possibly changed, recompute if profile is
2275 available. */
2276 struct cgraph_node *n
2277 = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2278 if (n != edge->callee && n->analyzed && !(n->count == old_count)
2279 && n->count.ipa_p ())
2280 update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2281 bitmap_clear (updated_nodes);
2283 if (dump_enabled_p ())
2285 ipa_fn_summary *s = ipa_fn_summaries->get (where);
2287 /* dump_printf can't handle %+i. */
2288 char buf_net_change[100];
2289 snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2290 overall_size - old_size);
2292 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2293 " Inlined %C into %C which now has time %f and "
2294 "size %i, net change of %s%s.\n",
2295 edge->callee, edge->caller,
2296 s->time.to_double (),
2297 ipa_size_summaries->get (edge->caller)->size,
2298 buf_net_change,
2299 cross_module_call_p (edge) ? " (cross module)":"");
2301 if (min_size > overall_size)
2303 min_size = overall_size;
2305 if (dump_file)
2306 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2310 free_growth_caches ();
2311 if (dump_enabled_p ())
2312 dump_printf (MSG_NOTE,
2313 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2314 initial_size, overall_size,
2315 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2316 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2319 /* Flatten NODE. Performed both during early inlining and
2320 at IPA inlining time. */
2322 static void
2323 flatten_function (struct cgraph_node *node, bool early, bool update)
2325 struct cgraph_edge *e;
2327 /* We shouldn't be called recursively when we are being processed. */
2328 gcc_assert (node->aux == NULL);
2330 node->aux = (void *) node;
2332 for (e = node->callees; e; e = e->next_callee)
2334 struct cgraph_node *orig_callee;
2335 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2337 /* We've hit cycle? It is time to give up. */
2338 if (callee->aux)
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2342 "Not inlining %C into %C to avoid cycle.\n",
2343 callee, e->caller);
2344 if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2345 e->inline_failed = CIF_RECURSIVE_INLINING;
2346 continue;
2349 /* When the edge is already inlined, we just need to recurse into
2350 it in order to fully flatten the leaves. */
2351 if (!e->inline_failed)
2353 flatten_function (callee, early, false);
2354 continue;
2357 /* Flatten attribute needs to be processed during late inlining. For
2358 extra code quality we however do flattening during early optimization,
2359 too. */
2360 if (!early
2361 ? !can_inline_edge_p (e, true)
2362 && !can_inline_edge_by_limits_p (e, true)
2363 : !can_early_inline_edge_p (e))
2364 continue;
2366 if (e->recursive_p ())
2368 if (dump_enabled_p ())
2369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2370 "Not inlining: recursive call.\n");
2371 continue;
2374 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2375 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2379 "Not inlining: SSA form does not match.\n");
2380 continue;
2383 /* Inline the edge and flatten the inline clone. Avoid
2384 recursing through the original node if the node was cloned. */
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2387 " Inlining %C into %C.\n",
2388 callee, e->caller);
2389 orig_callee = callee;
2390 inline_call (e, true, NULL, NULL, false);
2391 if (e->callee != orig_callee)
2392 orig_callee->aux = (void *) node;
2393 flatten_function (e->callee, early, false);
2394 if (e->callee != orig_callee)
2395 orig_callee->aux = NULL;
2398 node->aux = NULL;
2399 cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2400 if (update && opt_for_fn (where->decl, optimize))
2401 ipa_update_overall_fn_summary (where);
2404 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2405 DATA points to number of calls originally found so we avoid infinite
2406 recursion. */
2408 static bool
2409 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2410 hash_set<cgraph_node *> *callers)
2412 int *num_calls = (int *)data;
2413 bool callee_removed = false;
2415 while (node->callers && !node->inlined_to)
2417 struct cgraph_node *caller = node->callers->caller;
2419 if (!can_inline_edge_p (node->callers, true)
2420 || !can_inline_edge_by_limits_p (node->callers, true)
2421 || node->callers->recursive_p ())
2423 if (dump_file)
2424 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2425 *num_calls = 0;
2426 return false;
2429 if (dump_file)
2431 cgraph_node *ultimate = node->ultimate_alias_target ();
2432 fprintf (dump_file,
2433 "\nInlining %s size %i.\n",
2434 ultimate->dump_name (),
2435 ipa_size_summaries->get (ultimate)->size);
2436 fprintf (dump_file,
2437 " Called once from %s %i insns.\n",
2438 node->callers->caller->dump_name (),
2439 ipa_size_summaries->get (node->callers->caller)->size);
2442 /* Remember which callers we inlined to, delaying updating the
2443 overall summary. */
2444 callers->add (node->callers->caller);
2445 inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2446 if (dump_file)
2447 fprintf (dump_file,
2448 " Inlined into %s which now has %i size\n",
2449 caller->dump_name (),
2450 ipa_size_summaries->get (caller)->size);
2451 if (!(*num_calls)--)
2453 if (dump_file)
2454 fprintf (dump_file, "New calls found; giving up.\n");
2455 return callee_removed;
2457 if (callee_removed)
2458 return true;
2460 return false;
2463 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2464 update. */
2466 static bool
2467 inline_to_all_callers (struct cgraph_node *node, void *data)
2469 hash_set<cgraph_node *> callers;
2470 bool res = inline_to_all_callers_1 (node, data, &callers);
2471 /* Perform the delayed update of the overall summary of all callers
2472 processed. This avoids quadratic behavior in the cases where
2473 we have a lot of calls to the same function. */
2474 for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2475 i != callers.end (); ++i)
2476 ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2477 return res;
2480 /* Output overall time estimate. */
2481 static void
2482 dump_overall_stats (void)
2484 sreal sum_weighted = 0, sum = 0;
2485 struct cgraph_node *node;
2487 FOR_EACH_DEFINED_FUNCTION (node)
2488 if (!node->inlined_to
2489 && !node->alias)
2491 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2492 if (s != NULL)
2494 sum += s->time;
2495 if (node->count.ipa ().initialized_p ())
2496 sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2499 fprintf (dump_file, "Overall time estimate: "
2500 "%f weighted by profile: "
2501 "%f\n", sum.to_double (), sum_weighted.to_double ());
2504 /* Output some useful stats about inlining. */
2506 static void
2507 dump_inline_stats (void)
2509 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2510 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2511 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2512 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2513 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2514 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2515 int64_t reason[CIF_N_REASONS][2];
2516 sreal reason_freq[CIF_N_REASONS];
2517 int i;
2518 struct cgraph_node *node;
2520 memset (reason, 0, sizeof (reason));
2521 for (i=0; i < CIF_N_REASONS; i++)
2522 reason_freq[i] = 0;
2523 FOR_EACH_DEFINED_FUNCTION (node)
2525 struct cgraph_edge *e;
2526 for (e = node->callees; e; e = e->next_callee)
2528 if (e->inline_failed)
2530 if (e->count.ipa ().initialized_p ())
2531 reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2532 reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2533 reason[(int) e->inline_failed][1] ++;
2534 if (DECL_VIRTUAL_P (e->callee->decl)
2535 && e->count.ipa ().initialized_p ())
2537 if (e->indirect_inlining_edge)
2538 noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2539 else
2540 noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2542 else if (e->count.ipa ().initialized_p ())
2544 if (e->indirect_inlining_edge)
2545 noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2546 else
2547 noninlined_cnt += e->count.ipa ().to_gcov_type ();
2550 else if (e->count.ipa ().initialized_p ())
2552 if (e->speculative)
2554 if (DECL_VIRTUAL_P (e->callee->decl))
2555 inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2556 else
2557 inlined_speculative += e->count.ipa ().to_gcov_type ();
2559 else if (DECL_VIRTUAL_P (e->callee->decl))
2561 if (e->indirect_inlining_edge)
2562 inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2563 else
2564 inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2566 else
2568 if (e->indirect_inlining_edge)
2569 inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2570 else
2571 inlined_cnt += e->count.ipa ().to_gcov_type ();
2575 for (e = node->indirect_calls; e; e = e->next_callee)
2576 if (e->indirect_info->polymorphic
2577 & e->count.ipa ().initialized_p ())
2578 indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2579 else if (e->count.ipa ().initialized_p ())
2580 indirect_cnt += e->count.ipa ().to_gcov_type ();
2582 if (max_count.initialized_p ())
2584 fprintf (dump_file,
2585 "Inlined %" PRId64 " + speculative "
2586 "%" PRId64 " + speculative polymorphic "
2587 "%" PRId64 " + previously indirect "
2588 "%" PRId64 " + virtual "
2589 "%" PRId64 " + virtual and previously indirect "
2590 "%" PRId64 "\n" "Not inlined "
2591 "%" PRId64 " + previously indirect "
2592 "%" PRId64 " + virtual "
2593 "%" PRId64 " + virtual and previously indirect "
2594 "%" PRId64 " + still indirect "
2595 "%" PRId64 " + still indirect polymorphic "
2596 "%" PRId64 "\n", inlined_cnt,
2597 inlined_speculative, inlined_speculative_ply,
2598 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2599 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2600 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2601 fprintf (dump_file, "Removed speculations ");
2602 spec_rem.dump (dump_file);
2603 fprintf (dump_file, "\n");
2605 dump_overall_stats ();
2606 fprintf (dump_file, "\nWhy inlining failed?\n");
2607 for (i = 0; i < CIF_N_REASONS; i++)
2608 if (reason[i][1])
2609 fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2610 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2611 (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2614 /* Called when node is removed. */
2616 static void
2617 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2619 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2620 return;
2622 hash_set<struct cgraph_node *> *removed
2623 = (hash_set<struct cgraph_node *> *) data;
2624 removed->add (node);
2627 /* Decide on the inlining. We do so in the topological order to avoid
2628 expenses on updating data structures. */
2630 static unsigned int
2631 ipa_inline (void)
2633 struct cgraph_node *node;
2634 int nnodes;
2635 struct cgraph_node **order;
2636 int i, j;
2637 int cold;
2638 bool remove_functions = false;
2640 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2642 if (dump_file)
2643 ipa_dump_fn_summaries (dump_file);
2645 nnodes = ipa_reverse_postorder (order);
2646 spec_rem = profile_count::zero ();
2648 FOR_EACH_FUNCTION (node)
2650 node->aux = 0;
2652 /* Recompute the default reasons for inlining because they may have
2653 changed during merging. */
2654 if (in_lto_p)
2656 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2658 gcc_assert (e->inline_failed);
2659 initialize_inline_failed (e);
2661 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2662 initialize_inline_failed (e);
2666 if (dump_file)
2667 fprintf (dump_file, "\nFlattening functions:\n");
2669 /* First shrink order array, so that it only contains nodes with
2670 flatten attribute. */
2671 for (i = nnodes - 1, j = i; i >= 0; i--)
2673 node = order[i];
2674 if (node->definition
2675 /* Do not try to flatten aliases. These may happen for example when
2676 creating local aliases. */
2677 && !node->alias
2678 && lookup_attribute ("flatten",
2679 DECL_ATTRIBUTES (node->decl)) != NULL)
2680 order[j--] = order[i];
2683 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2684 nodes with flatten attribute. If there is more than one such
2685 node, we need to register a node removal hook, as flatten_function
2686 could remove other nodes with flatten attribute. See PR82801. */
2687 struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2688 hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2689 if (j < nnodes - 2)
2691 flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2692 node_removal_hook_holder
2693 = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2694 flatten_removed_nodes);
2697 /* In the first pass handle functions to be flattened. Do this with
2698 a priority so none of our later choices will make this impossible. */
2699 for (i = nnodes - 1; i > j; i--)
2701 node = order[i];
2702 if (flatten_removed_nodes
2703 && flatten_removed_nodes->contains (node))
2704 continue;
2706 /* Handle nodes to be flattened.
2707 Ideally when processing callees we stop inlining at the
2708 entry of cycles, possibly cloning that entry point and
2709 try to flatten itself turning it into a self-recursive
2710 function. */
2711 if (dump_file)
2712 fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2713 flatten_function (node, false, true);
2716 if (j < nnodes - 2)
2718 symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2719 delete flatten_removed_nodes;
2721 free (order);
2723 if (dump_file)
2724 dump_overall_stats ();
2726 inline_small_functions ();
2728 gcc_assert (symtab->state == IPA_SSA);
2729 symtab->state = IPA_SSA_AFTER_INLINING;
2730 /* Do first after-inlining removal. We want to remove all "stale" extern
2731 inline functions and virtual functions so we really know what is called
2732 once. */
2733 symtab->remove_unreachable_nodes (dump_file);
2735 /* Inline functions with a property that after inlining into all callers the
2736 code size will shrink because the out-of-line copy is eliminated.
2737 We do this regardless on the callee size as long as function growth limits
2738 are met. */
2739 if (dump_file)
2740 fprintf (dump_file,
2741 "\nDeciding on functions to be inlined into all callers and "
2742 "removing useless speculations:\n");
2744 /* Inlining one function called once has good chance of preventing
2745 inlining other function into the same callee. Ideally we should
2746 work in priority order, but probably inlining hot functions first
2747 is good cut without the extra pain of maintaining the queue.
2749 ??? this is not really fitting the bill perfectly: inlining function
2750 into callee often leads to better optimization of callee due to
2751 increased context for optimization.
2752 For example if main() function calls a function that outputs help
2753 and then function that does the main optimization, we should inline
2754 the second with priority even if both calls are cold by themselves.
2756 We probably want to implement new predicate replacing our use of
2757 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2758 to be hot. */
2759 for (cold = 0; cold <= 1; cold ++)
2761 FOR_EACH_DEFINED_FUNCTION (node)
2763 struct cgraph_edge *edge, *next;
2764 bool update=false;
2766 if (!opt_for_fn (node->decl, optimize)
2767 || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2768 continue;
2770 for (edge = node->callees; edge; edge = next)
2772 next = edge->next_callee;
2773 if (edge->speculative && !speculation_useful_p (edge, false))
2775 if (edge->count.ipa ().initialized_p ())
2776 spec_rem += edge->count.ipa ();
2777 cgraph_edge::resolve_speculation (edge);
2778 update = true;
2779 remove_functions = true;
2782 if (update)
2784 struct cgraph_node *where = node->inlined_to
2785 ? node->inlined_to : node;
2786 reset_edge_caches (where);
2787 ipa_update_overall_fn_summary (where);
2789 if (want_inline_function_to_all_callers_p (node, cold))
2791 int num_calls = 0;
2792 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2793 true);
2794 while (node->call_for_symbol_and_aliases
2795 (inline_to_all_callers, &num_calls, true))
2797 remove_functions = true;
2802 if (dump_enabled_p ())
2803 dump_printf (MSG_NOTE,
2804 "\nInlined %i calls, eliminated %i functions\n\n",
2805 ncalls_inlined, nfunctions_inlined);
2806 if (dump_file)
2807 dump_inline_stats ();
2809 if (dump_file)
2810 ipa_dump_fn_summaries (dump_file);
2811 return remove_functions ? TODO_remove_functions : 0;
2814 /* Inline always-inline function calls in NODE. */
2816 static bool
2817 inline_always_inline_functions (struct cgraph_node *node)
2819 struct cgraph_edge *e;
2820 bool inlined = false;
2822 for (e = node->callees; e; e = e->next_callee)
2824 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2825 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2826 continue;
2828 if (e->recursive_p ())
2830 if (dump_enabled_p ())
2831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2832 " Not inlining recursive call to %C.\n",
2833 e->callee);
2834 e->inline_failed = CIF_RECURSIVE_INLINING;
2835 continue;
2838 if (!can_early_inline_edge_p (e))
2840 /* Set inlined to true if the callee is marked "always_inline" but
2841 is not inlinable. This will allow flagging an error later in
2842 expand_call_inline in tree-inline.c. */
2843 if (lookup_attribute ("always_inline",
2844 DECL_ATTRIBUTES (callee->decl)) != NULL)
2845 inlined = true;
2846 continue;
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2851 " Inlining %C into %C (always_inline).\n",
2852 e->callee, e->caller);
2853 inline_call (e, true, NULL, NULL, false);
2854 inlined = true;
2856 if (inlined)
2857 ipa_update_overall_fn_summary (node);
2859 return inlined;
2862 /* Decide on the inlining. We do so in the topological order to avoid
2863 expenses on updating data structures. */
2865 static bool
2866 early_inline_small_functions (struct cgraph_node *node)
2868 struct cgraph_edge *e;
2869 bool inlined = false;
2871 for (e = node->callees; e; e = e->next_callee)
2873 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2875 /* We can encounter not-yet-analyzed function during
2876 early inlining on callgraphs with strongly
2877 connected components. */
2878 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2879 if (s == NULL || !s->inlinable || !e->inline_failed)
2880 continue;
2882 /* Do not consider functions not declared inline. */
2883 if (!DECL_DECLARED_INLINE_P (callee->decl)
2884 && !opt_for_fn (node->decl, flag_inline_small_functions)
2885 && !opt_for_fn (node->decl, flag_inline_functions))
2886 continue;
2888 if (dump_enabled_p ())
2889 dump_printf_loc (MSG_NOTE, e->call_stmt,
2890 "Considering inline candidate %C.\n",
2891 callee);
2893 if (!can_early_inline_edge_p (e))
2894 continue;
2896 if (e->recursive_p ())
2898 if (dump_enabled_p ())
2899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2900 " Not inlining: recursive call.\n");
2901 continue;
2904 if (!want_early_inline_function_p (e))
2905 continue;
2907 if (dump_enabled_p ())
2908 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2909 " Inlining %C into %C.\n",
2910 callee, e->caller);
2911 inline_call (e, true, NULL, NULL, false);
2912 inlined = true;
2915 if (inlined)
2916 ipa_update_overall_fn_summary (node);
2918 return inlined;
2921 unsigned int
2922 early_inliner (function *fun)
2924 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2925 struct cgraph_edge *edge;
2926 unsigned int todo = 0;
2927 int iterations = 0;
2928 bool inlined = false;
2930 if (seen_error ())
2931 return 0;
2933 /* Do nothing if datastructures for ipa-inliner are already computed. This
2934 happens when some pass decides to construct new function and
2935 cgraph_add_new_function calls lowering passes and early optimization on
2936 it. This may confuse ourself when early inliner decide to inline call to
2937 function clone, because function clones don't have parameter list in
2938 ipa-prop matching their signature. */
2939 if (ipa_node_params_sum)
2940 return 0;
2942 if (flag_checking)
2943 node->verify ();
2944 node->remove_all_references ();
2946 /* Even when not optimizing or not inlining inline always-inline
2947 functions. */
2948 inlined = inline_always_inline_functions (node);
2950 if (!optimize
2951 || flag_no_inline
2952 || !flag_early_inlining
2953 /* Never inline regular functions into always-inline functions
2954 during incremental inlining. This sucks as functions calling
2955 always inline functions will get less optimized, but at the
2956 same time inlining of functions calling always inline
2957 function into an always inline function might introduce
2958 cycles of edges to be always inlined in the callgraph.
2960 We might want to be smarter and just avoid this type of inlining. */
2961 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2962 && lookup_attribute ("always_inline",
2963 DECL_ATTRIBUTES (node->decl))))
2965 else if (lookup_attribute ("flatten",
2966 DECL_ATTRIBUTES (node->decl)) != NULL)
2968 /* When the function is marked to be flattened, recursively inline
2969 all calls in it. */
2970 if (dump_enabled_p ())
2971 dump_printf (MSG_OPTIMIZED_LOCATIONS,
2972 "Flattening %C\n", node);
2973 flatten_function (node, true, true);
2974 inlined = true;
2976 else
2978 /* If some always_inline functions was inlined, apply the changes.
2979 This way we will not account always inline into growth limits and
2980 moreover we will inline calls from always inlines that we skipped
2981 previously because of conditional above. */
2982 if (inlined)
2984 timevar_push (TV_INTEGRATION);
2985 todo |= optimize_inline_calls (current_function_decl);
2986 /* optimize_inline_calls call above might have introduced new
2987 statements that don't have inline parameters computed. */
2988 for (edge = node->callees; edge; edge = edge->next_callee)
2990 /* We can enounter not-yet-analyzed function during
2991 early inlining on callgraphs with strongly
2992 connected components. */
2993 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2994 es->call_stmt_size
2995 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2996 es->call_stmt_time
2997 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2999 ipa_update_overall_fn_summary (node);
3000 inlined = false;
3001 timevar_pop (TV_INTEGRATION);
3003 /* We iterate incremental inlining to get trivial cases of indirect
3004 inlining. */
3005 while (iterations < opt_for_fn (node->decl,
3006 param_early_inliner_max_iterations)
3007 && early_inline_small_functions (node))
3009 timevar_push (TV_INTEGRATION);
3010 todo |= optimize_inline_calls (current_function_decl);
3012 /* Technically we ought to recompute inline parameters so the new
3013 iteration of early inliner works as expected. We however have
3014 values approximately right and thus we only need to update edge
3015 info that might be cleared out for newly discovered edges. */
3016 for (edge = node->callees; edge; edge = edge->next_callee)
3018 /* We have no summary for new bound store calls yet. */
3019 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3020 es->call_stmt_size
3021 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3022 es->call_stmt_time
3023 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3025 if (iterations < opt_for_fn (node->decl,
3026 param_early_inliner_max_iterations) - 1)
3027 ipa_update_overall_fn_summary (node);
3028 timevar_pop (TV_INTEGRATION);
3029 iterations++;
3030 inlined = false;
3032 if (dump_file)
3033 fprintf (dump_file, "Iterations: %i\n", iterations);
3036 if (inlined)
3038 timevar_push (TV_INTEGRATION);
3039 todo |= optimize_inline_calls (current_function_decl);
3040 timevar_pop (TV_INTEGRATION);
3043 fun->always_inline_functions_inlined = true;
3045 return todo;
3048 /* Do inlining of small functions. Doing so early helps profiling and other
3049 passes to be somewhat more effective and avoids some code duplication in
3050 later real inlining pass for testcases with very many function calls. */
3052 namespace {
3054 const pass_data pass_data_early_inline =
3056 GIMPLE_PASS, /* type */
3057 "einline", /* name */
3058 OPTGROUP_INLINE, /* optinfo_flags */
3059 TV_EARLY_INLINING, /* tv_id */
3060 PROP_ssa, /* properties_required */
3061 0, /* properties_provided */
3062 0, /* properties_destroyed */
3063 0, /* todo_flags_start */
3064 0, /* todo_flags_finish */
3067 class pass_early_inline : public gimple_opt_pass
3069 public:
3070 pass_early_inline (gcc::context *ctxt)
3071 : gimple_opt_pass (pass_data_early_inline, ctxt)
3074 /* opt_pass methods: */
3075 virtual unsigned int execute (function *);
3077 }; // class pass_early_inline
3079 unsigned int
3080 pass_early_inline::execute (function *fun)
3082 return early_inliner (fun);
3085 } // anon namespace
3087 gimple_opt_pass *
3088 make_pass_early_inline (gcc::context *ctxt)
3090 return new pass_early_inline (ctxt);
3093 namespace {
3095 const pass_data pass_data_ipa_inline =
3097 IPA_PASS, /* type */
3098 "inline", /* name */
3099 OPTGROUP_INLINE, /* optinfo_flags */
3100 TV_IPA_INLINING, /* tv_id */
3101 0, /* properties_required */
3102 0, /* properties_provided */
3103 0, /* properties_destroyed */
3104 0, /* todo_flags_start */
3105 ( TODO_dump_symtab ), /* todo_flags_finish */
3108 class pass_ipa_inline : public ipa_opt_pass_d
3110 public:
3111 pass_ipa_inline (gcc::context *ctxt)
3112 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3113 NULL, /* generate_summary */
3114 NULL, /* write_summary */
3115 NULL, /* read_summary */
3116 NULL, /* write_optimization_summary */
3117 NULL, /* read_optimization_summary */
3118 NULL, /* stmt_fixup */
3119 0, /* function_transform_todo_flags_start */
3120 inline_transform, /* function_transform */
3121 NULL) /* variable_transform */
3124 /* opt_pass methods: */
3125 virtual unsigned int execute (function *) { return ipa_inline (); }
3127 }; // class pass_ipa_inline
3129 } // anon namespace
3131 ipa_opt_pass_d *
3132 make_pass_ipa_inline (gcc::context *ctxt)
3134 return new pass_ipa_inline (ctxt);