* varasm.c (output_constructor_regular_field): Check TYPE_SIZE_UNIT of
[official-gcc.git] / gcc / ipa-inline.c
blob025788522fbe602e1d190a30ccea006b9ab053c5
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
36 inlining heuristics
38 The inliner itself is split into two passes:
40 pass_early_inlining
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
68 pass_ipa_inline
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "params.h"
109 #include "profile.h"
110 #include "symbol-summary.h"
111 #include "tree-vrp.h"
112 #include "ipa-prop.h"
113 #include "ipa-fnsummary.h"
114 #include "ipa-inline.h"
115 #include "ipa-utils.h"
116 #include "sreal.h"
117 #include "auto-profile.h"
118 #include "builtins.h"
119 #include "fibonacci_heap.h"
120 #include "stringpool.h"
121 #include "attribs.h"
122 #include "asan.h"
124 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
125 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
127 /* Statistics we collect about inlining algorithm. */
128 static int overall_size;
129 static profile_count max_count;
130 static profile_count spec_rem;
132 /* Return false when inlining edge E would lead to violating
133 limits on function unit growth or stack usage growth.
135 The relative function body growth limit is present generally
136 to avoid problems with non-linear behavior of the compiler.
137 To allow inlining huge functions into tiny wrapper, the limit
138 is always based on the bigger of the two functions considered.
140 For stack growth limits we always base the growth in stack usage
141 of the callers. We want to prevent applications from segfaulting
142 on stack overflow when functions with huge stack frames gets
143 inlined. */
145 static bool
146 caller_growth_limits (struct cgraph_edge *e)
148 struct cgraph_node *to = e->caller;
149 struct cgraph_node *what = e->callee->ultimate_alias_target ();
150 int newsize;
151 int limit = 0;
152 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
153 ipa_fn_summary *info, *what_info;
154 ipa_fn_summary *outer_info = ipa_fn_summaries->get (to);
156 /* Look for function e->caller is inlined to. While doing
157 so work out the largest function body on the way. As
158 described above, we want to base our function growth
159 limits based on that. Not on the self size of the
160 outer function, not on the self size of inline code
161 we immediately inline to. This is the most relaxed
162 interpretation of the rule "do not grow large functions
163 too much in order to prevent compiler from exploding". */
164 while (true)
166 info = ipa_fn_summaries->get (to);
167 if (limit < info->self_size)
168 limit = info->self_size;
169 if (stack_size_limit < info->estimated_self_stack_size)
170 stack_size_limit = info->estimated_self_stack_size;
171 if (to->global.inlined_to)
172 to = to->callers->caller;
173 else
174 break;
177 what_info = ipa_fn_summaries->get (what);
179 if (limit < what_info->self_size)
180 limit = what_info->self_size;
182 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
184 /* Check the size after inlining against the function limits. But allow
185 the function to shrink if it went over the limits by forced inlining. */
186 newsize = estimate_size_after_inlining (to, e);
187 if (newsize >= info->size
188 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
189 && newsize > limit)
191 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
192 return false;
195 if (!what_info->estimated_stack_size)
196 return true;
198 /* FIXME: Stack size limit often prevents inlining in Fortran programs
199 due to large i/o datastructures used by the Fortran front-end.
200 We ought to ignore this limit when we know that the edge is executed
201 on every invocation of the caller (i.e. its call statement dominates
202 exit block). We do not track this information, yet. */
203 stack_size_limit += ((gcov_type)stack_size_limit
204 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
206 inlined_stack = (outer_info->stack_frame_offset
207 + outer_info->estimated_self_stack_size
208 + what_info->estimated_stack_size);
209 /* Check new stack consumption with stack consumption at the place
210 stack is used. */
211 if (inlined_stack > stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
215 packing. */
216 && inlined_stack > info->estimated_stack_size
217 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
219 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220 return false;
222 return true;
225 /* Dump info about why inlining has failed. */
227 static void
228 report_inline_failed_reason (struct cgraph_edge *e)
230 if (dump_file)
232 fprintf (dump_file, " not inlinable: %s -> %s, %s\n",
233 e->caller->dump_name (),
234 e->callee->dump_name (),
235 cgraph_inline_failed_string (e->inline_failed));
236 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 && e->caller->lto_file_data
239 && e->callee->ultimate_alias_target ()->lto_file_data)
241 fprintf (dump_file, " LTO objects: %s, %s\n",
242 e->caller->lto_file_data->file_name,
243 e->callee->ultimate_alias_target ()->lto_file_data->file_name);
245 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
246 cl_target_option_print_diff
247 (dump_file, 2, target_opts_for_fn (e->caller->decl),
248 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
249 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
250 cl_optimization_print_diff
251 (dump_file, 2, opts_for_fn (e->caller->decl),
252 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
256 /* Decide whether sanitizer-related attributes allow inlining. */
258 static bool
259 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
261 if (!caller || !callee)
262 return true;
264 return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
265 == sanitize_flags_p (SANITIZE_ADDRESS, callee))
266 && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
267 == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
268 && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
269 == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
272 /* Used for flags where it is safe to inline when caller's value is
273 grater than callee's. */
274 #define check_maybe_up(flag) \
275 (opts_for_fn (caller->decl)->x_##flag \
276 != opts_for_fn (callee->decl)->x_##flag \
277 && (!always_inline \
278 || opts_for_fn (caller->decl)->x_##flag \
279 < opts_for_fn (callee->decl)->x_##flag))
280 /* Used for flags where it is safe to inline when caller's value is
281 smaller than callee's. */
282 #define check_maybe_down(flag) \
283 (opts_for_fn (caller->decl)->x_##flag \
284 != opts_for_fn (callee->decl)->x_##flag \
285 && (!always_inline \
286 || opts_for_fn (caller->decl)->x_##flag \
287 > opts_for_fn (callee->decl)->x_##flag))
288 /* Used for flags where exact match is needed for correctness. */
289 #define check_match(flag) \
290 (opts_for_fn (caller->decl)->x_##flag \
291 != opts_for_fn (callee->decl)->x_##flag)
293 /* Decide if we can inline the edge and possibly update
294 inline_failed reason.
295 We check whether inlining is possible at all and whether
296 caller growth limits allow doing so.
298 if REPORT is true, output reason to the dump file. */
300 static bool
301 can_inline_edge_p (struct cgraph_edge *e, bool report,
302 bool early = false)
304 gcc_checking_assert (e->inline_failed);
306 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
308 if (report)
309 report_inline_failed_reason (e);
310 return false;
313 bool inlinable = true;
314 enum availability avail;
315 cgraph_node *caller = e->caller->global.inlined_to
316 ? e->caller->global.inlined_to : e->caller;
317 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
319 if (!callee->definition)
321 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
322 inlinable = false;
324 if (!early && (!opt_for_fn (callee->decl, optimize)
325 || !opt_for_fn (caller->decl, optimize)))
327 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
328 inlinable = false;
330 else if (callee->calls_comdat_local)
332 e->inline_failed = CIF_USES_COMDAT_LOCAL;
333 inlinable = false;
335 else if (avail <= AVAIL_INTERPOSABLE)
337 e->inline_failed = CIF_OVERWRITABLE;
338 inlinable = false;
340 /* All edges with call_stmt_cannot_inline_p should have inline_failed
341 initialized to one of FINAL_ERROR reasons. */
342 else if (e->call_stmt_cannot_inline_p)
343 gcc_unreachable ();
344 /* Don't inline if the functions have different EH personalities. */
345 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
346 && DECL_FUNCTION_PERSONALITY (callee->decl)
347 && (DECL_FUNCTION_PERSONALITY (caller->decl)
348 != DECL_FUNCTION_PERSONALITY (callee->decl)))
350 e->inline_failed = CIF_EH_PERSONALITY;
351 inlinable = false;
353 /* TM pure functions should not be inlined into non-TM_pure
354 functions. */
355 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
357 e->inline_failed = CIF_UNSPECIFIED;
358 inlinable = false;
360 /* Check compatibility of target optimization options. */
361 else if (!targetm.target_option.can_inline_p (caller->decl,
362 callee->decl))
364 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
365 inlinable = false;
367 else if (ipa_fn_summaries->get (callee) == NULL
368 || !ipa_fn_summaries->get (callee)->inlinable)
370 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
371 inlinable = false;
373 /* Don't inline a function with mismatched sanitization attributes. */
374 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
376 e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
377 inlinable = false;
379 if (!inlinable && report)
380 report_inline_failed_reason (e);
381 return inlinable;
384 /* Decide if we can inline the edge and possibly update
385 inline_failed reason.
386 We check whether inlining is possible at all and whether
387 caller growth limits allow doing so.
389 if REPORT is true, output reason to the dump file.
391 if DISREGARD_LIMITS is true, ignore size limits. */
393 static bool
394 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
395 bool disregard_limits = false, bool early = false)
397 gcc_checking_assert (e->inline_failed);
399 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
401 if (report)
402 report_inline_failed_reason (e);
403 return false;
406 bool inlinable = true;
407 enum availability avail;
408 cgraph_node *caller = e->caller->global.inlined_to
409 ? e->caller->global.inlined_to : e->caller;
410 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
411 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
412 tree callee_tree
413 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
414 /* Check if caller growth allows the inlining. */
415 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
416 && !disregard_limits
417 && !lookup_attribute ("flatten",
418 DECL_ATTRIBUTES (caller->decl))
419 && !caller_growth_limits (e))
420 inlinable = false;
421 /* Don't inline a function with a higher optimization level than the
422 caller. FIXME: this is really just tip of iceberg of handling
423 optimization attribute. */
424 else if (caller_tree != callee_tree)
426 bool always_inline =
427 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
428 && lookup_attribute ("always_inline",
429 DECL_ATTRIBUTES (callee->decl)));
430 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
431 ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
433 /* Until GCC 4.9 we did not check the semantics-altering flags
434 below and inlined across optimization boundaries.
435 Enabling checks below breaks several packages by refusing
436 to inline library always_inline functions. See PR65873.
437 Disable the check for early inlining for now until better solution
438 is found. */
439 if (always_inline && early)
441 /* There are some options that change IL semantics which means
442 we cannot inline in these cases for correctness reason.
443 Not even for always_inline declared functions. */
444 else if (check_match (flag_wrapv)
445 || check_match (flag_trapv)
446 || check_match (flag_pcc_struct_return)
447 /* When caller or callee does FP math, be sure FP codegen flags
448 compatible. */
449 || ((caller_info->fp_expressions && callee_info->fp_expressions)
450 && (check_maybe_up (flag_rounding_math)
451 || check_maybe_up (flag_trapping_math)
452 || check_maybe_down (flag_unsafe_math_optimizations)
453 || check_maybe_down (flag_finite_math_only)
454 || check_maybe_up (flag_signaling_nans)
455 || check_maybe_down (flag_cx_limited_range)
456 || check_maybe_up (flag_signed_zeros)
457 || check_maybe_down (flag_associative_math)
458 || check_maybe_down (flag_reciprocal_math)
459 || check_maybe_down (flag_fp_int_builtin_inexact)
460 /* Strictly speaking only when the callee contains function
461 calls that may end up setting errno. */
462 || check_maybe_up (flag_errno_math)))
463 /* We do not want to make code compiled with exceptions to be
464 brought into a non-EH function unless we know that the callee
465 does not throw.
466 This is tracked by DECL_FUNCTION_PERSONALITY. */
467 || (check_maybe_up (flag_non_call_exceptions)
468 && DECL_FUNCTION_PERSONALITY (callee->decl))
469 || (check_maybe_up (flag_exceptions)
470 && DECL_FUNCTION_PERSONALITY (callee->decl))
471 /* When devirtualization is diabled for callee, it is not safe
472 to inline it as we possibly mangled the type info.
473 Allow early inlining of always inlines. */
474 || (!early && check_maybe_down (flag_devirtualize)))
476 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
477 inlinable = false;
479 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
480 else if (always_inline)
482 /* When user added an attribute to the callee honor it. */
483 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
484 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
486 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
487 inlinable = false;
489 /* If explicit optimize attribute are not used, the mismatch is caused
490 by different command line options used to build different units.
491 Do not care about COMDAT functions - those are intended to be
492 optimized with the optimization flags of module they are used in.
493 Also do not care about mixing up size/speed optimization when
494 DECL_DISREGARD_INLINE_LIMITS is set. */
495 else if ((callee->merged_comdat
496 && !lookup_attribute ("optimize",
497 DECL_ATTRIBUTES (caller->decl)))
498 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
500 /* If mismatch is caused by merging two LTO units with different
501 optimizationflags we want to be bit nicer. However never inline
502 if one of functions is not optimized at all. */
503 else if (!opt_for_fn (callee->decl, optimize)
504 || !opt_for_fn (caller->decl, optimize))
506 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
507 inlinable = false;
509 /* If callee is optimized for size and caller is not, allow inlining if
510 code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
511 is inline (and thus likely an unified comdat). This will allow caller
512 to run faster. */
513 else if (opt_for_fn (callee->decl, optimize_size)
514 > opt_for_fn (caller->decl, optimize_size))
516 int growth = estimate_edge_growth (e);
517 if (growth > 0
518 && (!DECL_DECLARED_INLINE_P (callee->decl)
519 && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
520 MAX_INLINE_INSNS_AUTO)))
522 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
523 inlinable = false;
526 /* If callee is more aggressively optimized for performance than caller,
527 we generally want to inline only cheap (runtime wise) functions. */
528 else if (opt_for_fn (callee->decl, optimize_size)
529 < opt_for_fn (caller->decl, optimize_size)
530 || (opt_for_fn (callee->decl, optimize)
531 > opt_for_fn (caller->decl, optimize)))
533 if (estimate_edge_time (e)
534 >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
536 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
537 inlinable = false;
543 if (!inlinable && report)
544 report_inline_failed_reason (e);
545 return inlinable;
549 /* Return true if the edge E is inlinable during early inlining. */
551 static bool
552 can_early_inline_edge_p (struct cgraph_edge *e)
554 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
555 /* Early inliner might get called at WPA stage when IPA pass adds new
556 function. In this case we can not really do any of early inlining
557 because function bodies are missing. */
558 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
559 return false;
560 if (!gimple_has_body_p (callee->decl))
562 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
563 return false;
565 /* In early inliner some of callees may not be in SSA form yet
566 (i.e. the callgraph is cyclic and we did not process
567 the callee by early inliner, yet). We don't have CIF code for this
568 case; later we will re-do the decision in the real inliner. */
569 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
570 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
572 if (dump_file)
573 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
574 return false;
576 if (!can_inline_edge_p (e, true, true)
577 || !can_inline_edge_by_limits_p (e, true, false, true))
578 return false;
579 return true;
583 /* Return number of calls in N. Ignore cheap builtins. */
585 static int
586 num_calls (struct cgraph_node *n)
588 struct cgraph_edge *e;
589 int num = 0;
591 for (e = n->callees; e; e = e->next_callee)
592 if (!is_inexpensive_builtin (e->callee->decl))
593 num++;
594 return num;
598 /* Return true if we are interested in inlining small function. */
600 static bool
601 want_early_inline_function_p (struct cgraph_edge *e)
603 bool want_inline = true;
604 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
606 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
608 /* For AutoFDO, we need to make sure that before profile summary, all
609 hot paths' IR look exactly the same as profiled binary. As a result,
610 in einliner, we will disregard size limit and inline those callsites
611 that are:
612 * inlined in the profiled binary, and
613 * the cloned callee has enough samples to be considered "hot". */
614 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
616 else if (!DECL_DECLARED_INLINE_P (callee->decl)
617 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
619 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
620 report_inline_failed_reason (e);
621 want_inline = false;
623 else
625 int growth = estimate_edge_growth (e);
626 int n;
628 if (growth <= 0)
630 else if (!e->maybe_hot_p ()
631 && growth > 0)
633 if (dump_file)
634 fprintf (dump_file, " will not early inline: %s->%s, "
635 "call is cold and code would grow by %i\n",
636 e->caller->dump_name (),
637 callee->dump_name (),
638 growth);
639 want_inline = false;
641 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
643 if (dump_file)
644 fprintf (dump_file, " will not early inline: %s->%s, "
645 "growth %i exceeds --param early-inlining-insns\n",
646 e->caller->dump_name (),
647 callee->dump_name (),
648 growth);
649 want_inline = false;
651 else if ((n = num_calls (callee)) != 0
652 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
654 if (dump_file)
655 fprintf (dump_file, " will not early inline: %s->%s, "
656 "growth %i exceeds --param early-inlining-insns "
657 "divided by number of calls\n",
658 e->caller->dump_name (),
659 callee->dump_name (),
660 growth);
661 want_inline = false;
664 return want_inline;
667 /* Compute time of the edge->caller + edge->callee execution when inlining
668 does not happen. */
670 inline sreal
671 compute_uninlined_call_time (struct cgraph_edge *edge,
672 sreal uninlined_call_time)
674 cgraph_node *caller = (edge->caller->global.inlined_to
675 ? edge->caller->global.inlined_to
676 : edge->caller);
678 sreal freq = edge->sreal_frequency ();
679 if (freq > 0)
680 uninlined_call_time *= freq;
681 else
682 uninlined_call_time = uninlined_call_time >> 11;
684 sreal caller_time = ipa_fn_summaries->get (caller)->time;
685 return uninlined_call_time + caller_time;
688 /* Same as compute_uinlined_call_time but compute time when inlining
689 does happen. */
691 inline sreal
692 compute_inlined_call_time (struct cgraph_edge *edge,
693 sreal time)
695 cgraph_node *caller = (edge->caller->global.inlined_to
696 ? edge->caller->global.inlined_to
697 : edge->caller);
698 sreal caller_time = ipa_fn_summaries->get (caller)->time;
700 sreal freq = edge->sreal_frequency ();
701 if (freq > 0)
702 time *= freq;
703 else
704 time = time >> 11;
706 /* This calculation should match one in ipa-inline-analysis.c
707 (estimate_edge_size_and_time). */
708 time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
709 time += caller_time;
710 if (time <= 0)
711 time = ((sreal) 1) >> 8;
712 gcc_checking_assert (time >= 0);
713 return time;
716 /* Return true if the speedup for inlining E is bigger than
717 PARAM_MAX_INLINE_MIN_SPEEDUP. */
719 static bool
720 big_speedup_p (struct cgraph_edge *e)
722 sreal unspec_time;
723 sreal spec_time = estimate_edge_time (e, &unspec_time);
724 sreal time = compute_uninlined_call_time (e, unspec_time);
725 sreal inlined_time = compute_inlined_call_time (e, spec_time);
727 if ((time - inlined_time) * 100
728 > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)))
729 return true;
730 return false;
733 /* Return true if we are interested in inlining small function.
734 When REPORT is true, report reason to dump file. */
736 static bool
737 want_inline_small_function_p (struct cgraph_edge *e, bool report)
739 bool want_inline = true;
740 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
742 /* Allow this function to be called before can_inline_edge_p,
743 since it's usually cheaper. */
744 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
745 want_inline = false;
746 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
748 else if (!DECL_DECLARED_INLINE_P (callee->decl)
749 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
751 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
752 want_inline = false;
754 /* Do fast and conservative check if the function can be good
755 inline candidate. At the moment we allow inline hints to
756 promote non-inline functions to inline and we increase
757 MAX_INLINE_INSNS_SINGLE 16-fold for inline functions. */
758 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
759 && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
760 && ipa_fn_summaries->get (callee)->min_size
761 - ipa_call_summaries->get (e)->call_stmt_size
762 > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
764 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
765 want_inline = false;
767 else if ((DECL_DECLARED_INLINE_P (callee->decl)
768 || e->count.ipa ().nonzero_p ())
769 && ipa_fn_summaries->get (callee)->min_size
770 - ipa_call_summaries->get (e)->call_stmt_size
771 > 16 * MAX_INLINE_INSNS_SINGLE)
773 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
774 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
775 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
776 want_inline = false;
778 else
780 int growth = estimate_edge_growth (e);
781 ipa_hints hints = estimate_edge_hints (e);
782 bool big_speedup = big_speedup_p (e);
784 if (growth <= 0)
786 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
787 hints suggests that inlining given function is very profitable. */
788 else if (DECL_DECLARED_INLINE_P (callee->decl)
789 && growth >= MAX_INLINE_INSNS_SINGLE
790 && ((!big_speedup
791 && !(hints & (INLINE_HINT_indirect_call
792 | INLINE_HINT_known_hot
793 | INLINE_HINT_loop_iterations
794 | INLINE_HINT_array_index
795 | INLINE_HINT_loop_stride)))
796 || growth >= MAX_INLINE_INSNS_SINGLE * 16))
798 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
799 want_inline = false;
801 else if (!DECL_DECLARED_INLINE_P (callee->decl)
802 && !opt_for_fn (e->caller->decl, flag_inline_functions))
804 /* growth_likely_positive is expensive, always test it last. */
805 if (growth >= MAX_INLINE_INSNS_SINGLE
806 || growth_likely_positive (callee, growth))
808 e->inline_failed = CIF_NOT_DECLARED_INLINED;
809 want_inline = false;
812 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
813 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
814 inlining given function is very profitable. */
815 else if (!DECL_DECLARED_INLINE_P (callee->decl)
816 && !big_speedup
817 && !(hints & INLINE_HINT_known_hot)
818 && growth >= ((hints & (INLINE_HINT_indirect_call
819 | INLINE_HINT_loop_iterations
820 | INLINE_HINT_array_index
821 | INLINE_HINT_loop_stride))
822 ? MAX (MAX_INLINE_INSNS_AUTO,
823 MAX_INLINE_INSNS_SINGLE)
824 : MAX_INLINE_INSNS_AUTO))
826 /* growth_likely_positive is expensive, always test it last. */
827 if (growth >= MAX_INLINE_INSNS_SINGLE
828 || growth_likely_positive (callee, growth))
830 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
831 want_inline = false;
834 /* If call is cold, do not inline when function body would grow. */
835 else if (!e->maybe_hot_p ()
836 && (growth >= MAX_INLINE_INSNS_SINGLE
837 || growth_likely_positive (callee, growth)))
839 e->inline_failed = CIF_UNLIKELY_CALL;
840 want_inline = false;
843 if (!want_inline && report)
844 report_inline_failed_reason (e);
845 return want_inline;
848 /* EDGE is self recursive edge.
849 We hand two cases - when function A is inlining into itself
850 or when function A is being inlined into another inliner copy of function
851 A within function B.
853 In first case OUTER_NODE points to the toplevel copy of A, while
854 in the second case OUTER_NODE points to the outermost copy of A in B.
856 In both cases we want to be extra selective since
857 inlining the call will just introduce new recursive calls to appear. */
859 static bool
860 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
861 struct cgraph_node *outer_node,
862 bool peeling,
863 int depth)
865 char const *reason = NULL;
866 bool want_inline = true;
867 sreal caller_freq = 1;
868 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
870 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
871 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
873 if (!edge->maybe_hot_p ())
875 reason = "recursive call is cold";
876 want_inline = false;
878 else if (depth > max_depth)
880 reason = "--param max-inline-recursive-depth exceeded.";
881 want_inline = false;
883 else if (outer_node->global.inlined_to
884 && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
886 reason = "caller frequency is 0";
887 want_inline = false;
890 if (!want_inline)
892 /* Inlining of self recursive function into copy of itself within other
893 function is transformation similar to loop peeling.
895 Peeling is profitable if we can inline enough copies to make probability
896 of actual call to the self recursive function very small. Be sure that
897 the probability of recursion is small.
899 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
900 This way the expected number of recursion is at most max_depth. */
901 else if (peeling)
903 sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
904 int i;
905 for (i = 1; i < depth; i++)
906 max_prob = max_prob * max_prob;
907 if (edge->sreal_frequency () >= max_prob * caller_freq)
909 reason = "frequency of recursive call is too large";
910 want_inline = false;
913 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
914 recursion depth is large. We reduce function call overhead and increase
915 chances that things fit in hardware return predictor.
917 Recursive inlining might however increase cost of stack frame setup
918 actually slowing down functions whose recursion tree is wide rather than
919 deep.
921 Deciding reliably on when to do recursive inlining without profile feedback
922 is tricky. For now we disable recursive inlining when probability of self
923 recursion is low.
925 Recursive inlining of self recursive call within loop also results in
926 large loop depths that generally optimize badly. We may want to throttle
927 down inlining in those cases. In particular this seems to happen in one
928 of libstdc++ rb tree methods. */
929 else
931 if (edge->sreal_frequency () * 100
932 <= caller_freq
933 * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
935 reason = "frequency of recursive call is too small";
936 want_inline = false;
939 if (!want_inline && dump_file)
940 fprintf (dump_file, " not inlining recursively: %s\n", reason);
941 return want_inline;
944 /* Return true when NODE has uninlinable caller;
945 set HAS_HOT_CALL if it has hot call.
946 Worker for cgraph_for_node_and_aliases. */
948 static bool
949 check_callers (struct cgraph_node *node, void *has_hot_call)
951 struct cgraph_edge *e;
952 for (e = node->callers; e; e = e->next_caller)
954 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
955 || !opt_for_fn (e->caller->decl, optimize))
956 return true;
957 if (!can_inline_edge_p (e, true))
958 return true;
959 if (e->recursive_p ())
960 return true;
961 if (!can_inline_edge_by_limits_p (e, true))
962 return true;
963 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
964 *(bool *)has_hot_call = true;
966 return false;
969 /* If NODE has a caller, return true. */
971 static bool
972 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
974 if (node->callers)
975 return true;
976 return false;
979 /* Decide if inlining NODE would reduce unit size by eliminating
980 the offline copy of function.
981 When COLD is true the cold calls are considered, too. */
983 static bool
984 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
986 bool has_hot_call = false;
988 /* Aliases gets inlined along with the function they alias. */
989 if (node->alias)
990 return false;
991 /* Already inlined? */
992 if (node->global.inlined_to)
993 return false;
994 /* Does it have callers? */
995 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
996 return false;
997 /* Inlining into all callers would increase size? */
998 if (estimate_growth (node) > 0)
999 return false;
1000 /* All inlines must be possible. */
1001 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1002 true))
1003 return false;
1004 if (!cold && !has_hot_call)
1005 return false;
1006 return true;
1009 /* A cost model driving the inlining heuristics in a way so the edges with
1010 smallest badness are inlined first. After each inlining is performed
1011 the costs of all caller edges of nodes affected are recomputed so the
1012 metrics may accurately depend on values such as number of inlinable callers
1013 of the function or function body size. */
1015 static sreal
1016 edge_badness (struct cgraph_edge *edge, bool dump)
1018 sreal badness;
1019 int growth;
1020 sreal edge_time, unspec_edge_time;
1021 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1022 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1023 ipa_hints hints;
1024 cgraph_node *caller = (edge->caller->global.inlined_to
1025 ? edge->caller->global.inlined_to
1026 : edge->caller);
1028 growth = estimate_edge_growth (edge);
1029 edge_time = estimate_edge_time (edge, &unspec_edge_time);
1030 hints = estimate_edge_hints (edge);
1031 gcc_checking_assert (edge_time >= 0);
1032 /* Check that inlined time is better, but tolerate some roundoff issues.
1033 FIXME: When callee profile drops to 0 we account calls more. This
1034 should be fixed by never doing that. */
1035 gcc_checking_assert ((edge_time * 100
1036 - callee_info->time * 101).to_int () <= 0
1037 || callee->count.ipa ().initialized_p ());
1038 gcc_checking_assert (growth <= callee_info->size);
1040 if (dump)
1042 fprintf (dump_file, " Badness calculation for %s -> %s\n",
1043 edge->caller->dump_name (),
1044 edge->callee->dump_name ());
1045 fprintf (dump_file, " size growth %i, time %f unspec %f ",
1046 growth,
1047 edge_time.to_double (),
1048 unspec_edge_time.to_double ());
1049 ipa_dump_hints (dump_file, hints);
1050 if (big_speedup_p (edge))
1051 fprintf (dump_file, " big_speedup");
1052 fprintf (dump_file, "\n");
1055 /* Always prefer inlining saving code size. */
1056 if (growth <= 0)
1058 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1059 if (dump)
1060 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1061 growth);
1063 /* Inlining into EXTERNAL functions is not going to change anything unless
1064 they are themselves inlined. */
1065 else if (DECL_EXTERNAL (caller->decl))
1067 if (dump)
1068 fprintf (dump_file, " max: function is external\n");
1069 return sreal::max ();
1071 /* When profile is available. Compute badness as:
1073 time_saved * caller_count
1074 goodness = -------------------------------------------------
1075 growth_of_caller * overall_growth * combined_size
1077 badness = - goodness
1079 Again use negative value to make calls with profile appear hotter
1080 then calls without.
1082 else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1083 || caller->count.ipa ().nonzero_p ())
1085 sreal numerator, denominator;
1086 int overall_growth;
1087 sreal inlined_time = compute_inlined_call_time (edge, edge_time);
1089 numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
1090 - inlined_time);
1091 if (numerator <= 0)
1092 numerator = ((sreal) 1 >> 8);
1093 if (caller->count.ipa ().nonzero_p ())
1094 numerator *= caller->count.ipa ().to_gcov_type ();
1095 else if (caller->count.ipa ().initialized_p ())
1096 numerator = numerator >> 11;
1097 denominator = growth;
1099 overall_growth = callee_info->growth;
1101 /* Look for inliner wrappers of the form:
1103 inline_caller ()
1105 do_fast_job...
1106 if (need_more_work)
1107 noninline_callee ();
1109 Withhout panilizing this case, we usually inline noninline_callee
1110 into the inline_caller because overall_growth is small preventing
1111 further inlining of inline_caller.
1113 Penalize only callgraph edges to functions with small overall
1114 growth ...
1116 if (growth > overall_growth
1117 /* ... and having only one caller which is not inlined ... */
1118 && callee_info->single_caller
1119 && !edge->caller->global.inlined_to
1120 /* ... and edges executed only conditionally ... */
1121 && edge->sreal_frequency () < 1
1122 /* ... consider case where callee is not inline but caller is ... */
1123 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1124 && DECL_DECLARED_INLINE_P (caller->decl))
1125 /* ... or when early optimizers decided to split and edge
1126 frequency still indicates splitting is a win ... */
1127 || (callee->split_part && !caller->split_part
1128 && edge->sreal_frequency () * 100
1129 < PARAM_VALUE
1130 (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
1131 /* ... and do not overwrite user specified hints. */
1132 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1133 || DECL_DECLARED_INLINE_P (caller->decl)))))
1135 ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1136 int caller_growth = caller_info->growth;
1138 /* Only apply the penalty when caller looks like inline candidate,
1139 and it is not called once and. */
1140 if (!caller_info->single_caller && overall_growth < caller_growth
1141 && caller_info->inlinable
1142 && caller_info->size
1143 < (DECL_DECLARED_INLINE_P (caller->decl)
1144 ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1146 if (dump)
1147 fprintf (dump_file,
1148 " Wrapper penalty. Increasing growth %i to %i\n",
1149 overall_growth, caller_growth);
1150 overall_growth = caller_growth;
1153 if (overall_growth > 0)
1155 /* Strongly preffer functions with few callers that can be inlined
1156 fully. The square root here leads to smaller binaries at average.
1157 Watch however for extreme cases and return to linear function
1158 when growth is large. */
1159 if (overall_growth < 256)
1160 overall_growth *= overall_growth;
1161 else
1162 overall_growth += 256 * 256 - 256;
1163 denominator *= overall_growth;
1165 denominator *= inlined_time;
1167 badness = - numerator / denominator;
1169 if (dump)
1171 fprintf (dump_file,
1172 " %f: guessed profile. frequency %f, count %" PRId64
1173 " caller count %" PRId64
1174 " time w/o inlining %f, time with inlining %f"
1175 " overall growth %i (current) %i (original)"
1176 " %i (compensated)\n",
1177 badness.to_double (),
1178 edge->sreal_frequency ().to_double (),
1179 edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1180 caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1181 compute_uninlined_call_time (edge,
1182 unspec_edge_time).to_double (),
1183 inlined_time.to_double (),
1184 estimate_growth (callee),
1185 callee_info->growth, overall_growth);
1188 /* When function local profile is not available or it does not give
1189 useful information (ie frequency is zero), base the cost on
1190 loop nest and overall size growth, so we optimize for overall number
1191 of functions fully inlined in program. */
1192 else
1194 int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1195 badness = growth;
1197 /* Decrease badness if call is nested. */
1198 if (badness > 0)
1199 badness = badness >> nest;
1200 else
1201 badness = badness << nest;
1202 if (dump)
1203 fprintf (dump_file, " %f: no profile. nest %i\n",
1204 badness.to_double (), nest);
1206 gcc_checking_assert (badness != 0);
1208 if (edge->recursive_p ())
1209 badness = badness.shift (badness > 0 ? 4 : -4);
1210 if ((hints & (INLINE_HINT_indirect_call
1211 | INLINE_HINT_loop_iterations
1212 | INLINE_HINT_array_index
1213 | INLINE_HINT_loop_stride))
1214 || callee_info->growth <= 0)
1215 badness = badness.shift (badness > 0 ? -2 : 2);
1216 if (hints & (INLINE_HINT_same_scc))
1217 badness = badness.shift (badness > 0 ? 3 : -3);
1218 else if (hints & (INLINE_HINT_in_scc))
1219 badness = badness.shift (badness > 0 ? 2 : -2);
1220 else if (hints & (INLINE_HINT_cross_module))
1221 badness = badness.shift (badness > 0 ? 1 : -1);
1222 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1223 badness = badness.shift (badness > 0 ? -4 : 4);
1224 else if ((hints & INLINE_HINT_declared_inline))
1225 badness = badness.shift (badness > 0 ? -3 : 3);
1226 if (dump)
1227 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1228 return badness;
1231 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1232 static inline void
1233 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1235 sreal badness = edge_badness (edge, false);
1236 if (edge->aux)
1238 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1239 gcc_checking_assert (n->get_data () == edge);
1241 /* fibonacci_heap::replace_key does busy updating of the
1242 heap that is unnecesarily expensive.
1243 We do lazy increases: after extracting minimum if the key
1244 turns out to be out of date, it is re-inserted into heap
1245 with correct value. */
1246 if (badness < n->get_key ())
1248 if (dump_file && (dump_flags & TDF_DETAILS))
1250 fprintf (dump_file,
1251 " decreasing badness %s -> %s, %f to %f\n",
1252 edge->caller->dump_name (),
1253 edge->callee->dump_name (),
1254 n->get_key ().to_double (),
1255 badness.to_double ());
1257 heap->decrease_key (n, badness);
1260 else
1262 if (dump_file && (dump_flags & TDF_DETAILS))
1264 fprintf (dump_file,
1265 " enqueuing call %s -> %s, badness %f\n",
1266 edge->caller->dump_name (),
1267 edge->callee->dump_name (),
1268 badness.to_double ());
1270 edge->aux = heap->insert (badness, edge);
1275 /* NODE was inlined.
1276 All caller edges needs to be resetted because
1277 size estimates change. Similarly callees needs reset
1278 because better context may be known. */
1280 static void
1281 reset_edge_caches (struct cgraph_node *node)
1283 struct cgraph_edge *edge;
1284 struct cgraph_edge *e = node->callees;
1285 struct cgraph_node *where = node;
1286 struct ipa_ref *ref;
1288 if (where->global.inlined_to)
1289 where = where->global.inlined_to;
1291 if (edge_growth_cache != NULL)
1292 for (edge = where->callers; edge; edge = edge->next_caller)
1293 if (edge->inline_failed)
1294 edge_growth_cache->remove (edge);
1296 FOR_EACH_ALIAS (where, ref)
1297 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1299 if (!e)
1300 return;
1302 while (true)
1303 if (!e->inline_failed && e->callee->callees)
1304 e = e->callee->callees;
1305 else
1307 if (edge_growth_cache != NULL && e->inline_failed)
1308 edge_growth_cache->remove (e);
1309 if (e->next_callee)
1310 e = e->next_callee;
1311 else
1315 if (e->caller == node)
1316 return;
1317 e = e->caller->callers;
1319 while (!e->next_callee);
1320 e = e->next_callee;
1325 /* Recompute HEAP nodes for each of caller of NODE.
1326 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1327 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1328 it is inlinable. Otherwise check all edges. */
1330 static void
1331 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1332 bitmap updated_nodes,
1333 struct cgraph_edge *check_inlinablity_for)
1335 struct cgraph_edge *edge;
1336 struct ipa_ref *ref;
1338 if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1339 || node->global.inlined_to)
1340 return;
1341 if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1342 return;
1344 FOR_EACH_ALIAS (node, ref)
1346 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1347 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1350 for (edge = node->callers; edge; edge = edge->next_caller)
1351 if (edge->inline_failed)
1353 if (!check_inlinablity_for
1354 || check_inlinablity_for == edge)
1356 if (can_inline_edge_p (edge, false)
1357 && want_inline_small_function_p (edge, false)
1358 && can_inline_edge_by_limits_p (edge, false))
1359 update_edge_key (heap, edge);
1360 else if (edge->aux)
1362 report_inline_failed_reason (edge);
1363 heap->delete_node ((edge_heap_node_t *) edge->aux);
1364 edge->aux = NULL;
1367 else if (edge->aux)
1368 update_edge_key (heap, edge);
1372 /* Recompute HEAP nodes for each uninlined call in NODE.
1373 This is used when we know that edge badnesses are going only to increase
1374 (we introduced new call site) and thus all we need is to insert newly
1375 created edges into heap. */
1377 static void
1378 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1379 bitmap updated_nodes)
1381 struct cgraph_edge *e = node->callees;
1383 if (!e)
1384 return;
1385 while (true)
1386 if (!e->inline_failed && e->callee->callees)
1387 e = e->callee->callees;
1388 else
1390 enum availability avail;
1391 struct cgraph_node *callee;
1392 /* We do not reset callee growth cache here. Since we added a new call,
1393 growth chould have just increased and consequentely badness metric
1394 don't need updating. */
1395 if (e->inline_failed
1396 && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1397 && ipa_fn_summaries->get (callee) != NULL
1398 && ipa_fn_summaries->get (callee)->inlinable
1399 && avail >= AVAIL_AVAILABLE
1400 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1402 if (can_inline_edge_p (e, false)
1403 && want_inline_small_function_p (e, false)
1404 && can_inline_edge_by_limits_p (e, false))
1405 update_edge_key (heap, e);
1406 else if (e->aux)
1408 report_inline_failed_reason (e);
1409 heap->delete_node ((edge_heap_node_t *) e->aux);
1410 e->aux = NULL;
1413 if (e->next_callee)
1414 e = e->next_callee;
1415 else
1419 if (e->caller == node)
1420 return;
1421 e = e->caller->callers;
1423 while (!e->next_callee);
1424 e = e->next_callee;
1429 /* Enqueue all recursive calls from NODE into priority queue depending on
1430 how likely we want to recursively inline the call. */
1432 static void
1433 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1434 edge_heap_t *heap)
1436 struct cgraph_edge *e;
1437 enum availability avail;
1439 for (e = where->callees; e; e = e->next_callee)
1440 if (e->callee == node
1441 || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1442 && avail > AVAIL_INTERPOSABLE))
1443 heap->insert (-e->sreal_frequency (), e);
1444 for (e = where->callees; e; e = e->next_callee)
1445 if (!e->inline_failed)
1446 lookup_recursive_calls (node, e->callee, heap);
1449 /* Decide on recursive inlining: in the case function has recursive calls,
1450 inline until body size reaches given argument. If any new indirect edges
1451 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1452 is NULL. */
1454 static bool
1455 recursive_inlining (struct cgraph_edge *edge,
1456 vec<cgraph_edge *> *new_edges)
1458 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1459 edge_heap_t heap (sreal::min ());
1460 struct cgraph_node *node;
1461 struct cgraph_edge *e;
1462 struct cgraph_node *master_clone = NULL, *next;
1463 int depth = 0;
1464 int n = 0;
1466 node = edge->caller;
1467 if (node->global.inlined_to)
1468 node = node->global.inlined_to;
1470 if (DECL_DECLARED_INLINE_P (node->decl))
1471 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1473 /* Make sure that function is small enough to be considered for inlining. */
1474 if (estimate_size_after_inlining (node, edge) >= limit)
1475 return false;
1476 lookup_recursive_calls (node, node, &heap);
1477 if (heap.empty ())
1478 return false;
1480 if (dump_file)
1481 fprintf (dump_file,
1482 " Performing recursive inlining on %s\n",
1483 node->name ());
1485 /* Do the inlining and update list of recursive call during process. */
1486 while (!heap.empty ())
1488 struct cgraph_edge *curr = heap.extract_min ();
1489 struct cgraph_node *cnode, *dest = curr->callee;
1491 if (!can_inline_edge_p (curr, true)
1492 || can_inline_edge_by_limits_p (curr, true))
1493 continue;
1495 /* MASTER_CLONE is produced in the case we already started modified
1496 the function. Be sure to redirect edge to the original body before
1497 estimating growths otherwise we will be seeing growths after inlining
1498 the already modified body. */
1499 if (master_clone)
1501 curr->redirect_callee (master_clone);
1502 if (edge_growth_cache != NULL)
1503 edge_growth_cache->remove (curr);
1506 if (estimate_size_after_inlining (node, curr) > limit)
1508 curr->redirect_callee (dest);
1509 if (edge_growth_cache != NULL)
1510 edge_growth_cache->remove (curr);
1511 break;
1514 depth = 1;
1515 for (cnode = curr->caller;
1516 cnode->global.inlined_to; cnode = cnode->callers->caller)
1517 if (node->decl
1518 == curr->callee->ultimate_alias_target ()->decl)
1519 depth++;
1521 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1523 curr->redirect_callee (dest);
1524 if (edge_growth_cache != NULL)
1525 edge_growth_cache->remove (curr);
1526 continue;
1529 if (dump_file)
1531 fprintf (dump_file,
1532 " Inlining call of depth %i", depth);
1533 if (node->count.nonzero_p ())
1535 fprintf (dump_file, " called approx. %.2f times per call",
1536 (double)curr->count.to_gcov_type ()
1537 / node->count.to_gcov_type ());
1539 fprintf (dump_file, "\n");
1541 if (!master_clone)
1543 /* We need original clone to copy around. */
1544 master_clone = node->create_clone (node->decl, node->count,
1545 false, vNULL, true, NULL, NULL);
1546 for (e = master_clone->callees; e; e = e->next_callee)
1547 if (!e->inline_failed)
1548 clone_inlined_nodes (e, true, false, NULL);
1549 curr->redirect_callee (master_clone);
1550 if (edge_growth_cache != NULL)
1551 edge_growth_cache->remove (curr);
1554 inline_call (curr, false, new_edges, &overall_size, true);
1555 lookup_recursive_calls (node, curr->callee, &heap);
1556 n++;
1559 if (!heap.empty () && dump_file)
1560 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1562 if (!master_clone)
1563 return false;
1565 if (dump_file)
1566 fprintf (dump_file,
1567 "\n Inlined %i times, "
1568 "body grown from size %i to %i, time %f to %f\n", n,
1569 ipa_fn_summaries->get (master_clone)->size,
1570 ipa_fn_summaries->get (node)->size,
1571 ipa_fn_summaries->get (master_clone)->time.to_double (),
1572 ipa_fn_summaries->get (node)->time.to_double ());
1574 /* Remove master clone we used for inlining. We rely that clones inlined
1575 into master clone gets queued just before master clone so we don't
1576 need recursion. */
1577 for (node = symtab->first_function (); node != master_clone;
1578 node = next)
1580 next = symtab->next_function (node);
1581 if (node->global.inlined_to == master_clone)
1582 node->remove ();
1584 master_clone->remove ();
1585 return true;
1589 /* Given whole compilation unit estimate of INSNS, compute how large we can
1590 allow the unit to grow. */
1592 static int
1593 compute_max_insns (int insns)
1595 int max_insns = insns;
1596 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1597 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1599 return ((int64_t) max_insns
1600 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1604 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1606 static void
1607 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1609 while (new_edges.length () > 0)
1611 struct cgraph_edge *edge = new_edges.pop ();
1613 gcc_assert (!edge->aux);
1614 if (edge->inline_failed
1615 && can_inline_edge_p (edge, true)
1616 && want_inline_small_function_p (edge, true)
1617 && can_inline_edge_by_limits_p (edge, true))
1618 edge->aux = heap->insert (edge_badness (edge, false), edge);
1622 /* Remove EDGE from the fibheap. */
1624 static void
1625 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1627 if (e->aux)
1629 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1630 e->aux = NULL;
1634 /* Return true if speculation of edge E seems useful.
1635 If ANTICIPATE_INLINING is true, be conservative and hope that E
1636 may get inlined. */
1638 bool
1639 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1641 enum availability avail;
1642 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1643 e->caller);
1644 struct cgraph_edge *direct, *indirect;
1645 struct ipa_ref *ref;
1647 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1649 if (!e->maybe_hot_p ())
1650 return false;
1652 /* See if IP optimizations found something potentially useful about the
1653 function. For now we look only for CONST/PURE flags. Almost everything
1654 else we propagate is useless. */
1655 if (avail >= AVAIL_AVAILABLE)
1657 int ecf_flags = flags_from_decl_or_type (target->decl);
1658 if (ecf_flags & ECF_CONST)
1660 e->speculative_call_info (direct, indirect, ref);
1661 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1662 return true;
1664 else if (ecf_flags & ECF_PURE)
1666 e->speculative_call_info (direct, indirect, ref);
1667 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1668 return true;
1671 /* If we did not managed to inline the function nor redirect
1672 to an ipa-cp clone (that are seen by having local flag set),
1673 it is probably pointless to inline it unless hardware is missing
1674 indirect call predictor. */
1675 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1676 return false;
1677 /* For overwritable targets there is not much to do. */
1678 if (e->inline_failed
1679 && (!can_inline_edge_p (e, false)
1680 || !can_inline_edge_by_limits_p (e, false, true)))
1681 return false;
1682 /* OK, speculation seems interesting. */
1683 return true;
1686 /* We know that EDGE is not going to be inlined.
1687 See if we can remove speculation. */
1689 static void
1690 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1692 if (edge->speculative && !speculation_useful_p (edge, false))
1694 struct cgraph_node *node = edge->caller;
1695 struct cgraph_node *where = node->global.inlined_to
1696 ? node->global.inlined_to : node;
1697 auto_bitmap updated_nodes;
1699 if (edge->count.ipa ().initialized_p ())
1700 spec_rem += edge->count.ipa ();
1701 edge->resolve_speculation ();
1702 reset_edge_caches (where);
1703 ipa_update_overall_fn_summary (where);
1704 update_caller_keys (edge_heap, where,
1705 updated_nodes, NULL);
1706 update_callee_keys (edge_heap, where,
1707 updated_nodes);
1711 /* Return true if NODE should be accounted for overall size estimate.
1712 Skip all nodes optimized for size so we can measure the growth of hot
1713 part of program no matter of the padding. */
1715 bool
1716 inline_account_function_p (struct cgraph_node *node)
1718 return (!DECL_EXTERNAL (node->decl)
1719 && !opt_for_fn (node->decl, optimize_size)
1720 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1723 /* Count number of callers of NODE and store it into DATA (that
1724 points to int. Worker for cgraph_for_node_and_aliases. */
1726 static bool
1727 sum_callers (struct cgraph_node *node, void *data)
1729 struct cgraph_edge *e;
1730 int *num_calls = (int *)data;
1732 for (e = node->callers; e; e = e->next_caller)
1733 (*num_calls)++;
1734 return false;
1737 /* We use greedy algorithm for inlining of small functions:
1738 All inline candidates are put into prioritized heap ordered in
1739 increasing badness.
1741 The inlining of small functions is bounded by unit growth parameters. */
1743 static void
1744 inline_small_functions (void)
1746 struct cgraph_node *node;
1747 struct cgraph_edge *edge;
1748 edge_heap_t edge_heap (sreal::min ());
1749 auto_bitmap updated_nodes;
1750 int min_size, max_size;
1751 auto_vec<cgraph_edge *> new_indirect_edges;
1752 int initial_size = 0;
1753 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1754 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1755 new_indirect_edges.create (8);
1757 edge_removal_hook_holder
1758 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1760 /* Compute overall unit size and other global parameters used by badness
1761 metrics. */
1763 max_count = profile_count::uninitialized ();
1764 ipa_reduced_postorder (order, true, true, NULL);
1765 free (order);
1767 FOR_EACH_DEFINED_FUNCTION (node)
1768 if (!node->global.inlined_to)
1770 if (!node->alias && node->analyzed
1771 && (node->has_gimple_body_p () || node->thunk.thunk_p)
1772 && opt_for_fn (node->decl, optimize))
1774 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1775 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1777 /* Do not account external functions, they will be optimized out
1778 if not inlined. Also only count the non-cold portion of program. */
1779 if (inline_account_function_p (node))
1780 initial_size += info->size;
1781 info->growth = estimate_growth (node);
1783 int num_calls = 0;
1784 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1785 true);
1786 if (num_calls == 1)
1787 info->single_caller = true;
1788 if (dfs && dfs->next_cycle)
1790 struct cgraph_node *n2;
1791 int id = dfs->scc_no + 1;
1792 for (n2 = node; n2;
1793 n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1794 if (opt_for_fn (n2->decl, optimize))
1796 ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
1797 if (info2->scc_no)
1798 break;
1799 info2->scc_no = id;
1804 for (edge = node->callers; edge; edge = edge->next_caller)
1805 max_count = max_count.max (edge->count.ipa ());
1807 ipa_free_postorder_info ();
1808 edge_growth_cache
1809 = new call_summary<edge_growth_cache_entry *> (symtab, false);
1811 if (dump_file)
1812 fprintf (dump_file,
1813 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1814 initial_size);
1816 overall_size = initial_size;
1817 max_size = compute_max_insns (overall_size);
1818 min_size = overall_size;
1820 /* Populate the heap with all edges we might inline. */
1822 FOR_EACH_DEFINED_FUNCTION (node)
1824 bool update = false;
1825 struct cgraph_edge *next = NULL;
1826 bool has_speculative = false;
1828 if (!opt_for_fn (node->decl, optimize))
1829 continue;
1831 if (dump_file)
1832 fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1834 for (edge = node->callees; edge; edge = next)
1836 next = edge->next_callee;
1837 if (edge->inline_failed
1838 && !edge->aux
1839 && can_inline_edge_p (edge, true)
1840 && want_inline_small_function_p (edge, true)
1841 && can_inline_edge_by_limits_p (edge, true)
1842 && edge->inline_failed)
1844 gcc_assert (!edge->aux);
1845 update_edge_key (&edge_heap, edge);
1847 if (edge->speculative)
1848 has_speculative = true;
1850 if (has_speculative)
1851 for (edge = node->callees; edge; edge = next)
1852 if (edge->speculative && !speculation_useful_p (edge,
1853 edge->aux != NULL))
1855 edge->resolve_speculation ();
1856 update = true;
1858 if (update)
1860 struct cgraph_node *where = node->global.inlined_to
1861 ? node->global.inlined_to : node;
1862 ipa_update_overall_fn_summary (where);
1863 reset_edge_caches (where);
1864 update_caller_keys (&edge_heap, where,
1865 updated_nodes, NULL);
1866 update_callee_keys (&edge_heap, where,
1867 updated_nodes);
1868 bitmap_clear (updated_nodes);
1872 gcc_assert (in_lto_p
1873 || !(max_count > 0)
1874 || (profile_info && flag_branch_probabilities));
1876 while (!edge_heap.empty ())
1878 int old_size = overall_size;
1879 struct cgraph_node *where, *callee;
1880 sreal badness = edge_heap.min_key ();
1881 sreal current_badness;
1882 int growth;
1884 edge = edge_heap.extract_min ();
1885 gcc_assert (edge->aux);
1886 edge->aux = NULL;
1887 if (!edge->inline_failed || !edge->callee->analyzed)
1888 continue;
1890 #if CHECKING_P
1891 /* Be sure that caches are maintained consistent.
1892 This check is affected by scaling roundoff errors when compiling for
1893 IPA this we skip it in that case. */
1894 if (!edge->callee->count.ipa_p ()
1895 && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1897 sreal cached_badness = edge_badness (edge, false);
1899 int old_size_est = estimate_edge_size (edge);
1900 sreal old_time_est = estimate_edge_time (edge);
1901 int old_hints_est = estimate_edge_hints (edge);
1903 if (edge_growth_cache != NULL)
1904 edge_growth_cache->remove (edge);
1905 gcc_assert (old_size_est == estimate_edge_size (edge));
1906 gcc_assert (old_time_est == estimate_edge_time (edge));
1907 /* FIXME:
1909 gcc_assert (old_hints_est == estimate_edge_hints (edge));
1911 fails with profile feedback because some hints depends on
1912 maybe_hot_edge_p predicate and because callee gets inlined to other
1913 calls, the edge may become cold.
1914 This ought to be fixed by computing relative probabilities
1915 for given invocation but that will be better done once whole
1916 code is converted to sreals. Disable for now and revert to "wrong"
1917 value so enable/disable checking paths agree. */
1918 edge_growth_cache->get (edge)->hints = old_hints_est + 1;
1920 /* When updating the edge costs, we only decrease badness in the keys.
1921 Increases of badness are handled lazilly; when we see key with out
1922 of date value on it, we re-insert it now. */
1923 current_badness = edge_badness (edge, false);
1924 gcc_assert (cached_badness == current_badness);
1925 gcc_assert (current_badness >= badness);
1927 else
1928 current_badness = edge_badness (edge, false);
1929 #else
1930 current_badness = edge_badness (edge, false);
1931 #endif
1932 if (current_badness != badness)
1934 if (edge_heap.min () && current_badness > edge_heap.min_key ())
1936 edge->aux = edge_heap.insert (current_badness, edge);
1937 continue;
1939 else
1940 badness = current_badness;
1943 if (!can_inline_edge_p (edge, true)
1944 || !can_inline_edge_by_limits_p (edge, true))
1946 resolve_noninline_speculation (&edge_heap, edge);
1947 continue;
1950 callee = edge->callee->ultimate_alias_target ();
1951 growth = estimate_edge_growth (edge);
1952 if (dump_file)
1954 fprintf (dump_file,
1955 "\nConsidering %s with %i size\n",
1956 callee->dump_name (),
1957 ipa_fn_summaries->get (callee)->size);
1958 fprintf (dump_file,
1959 " to be inlined into %s in %s:%i\n"
1960 " Estimated badness is %f, frequency %.2f.\n",
1961 edge->caller->dump_name (),
1962 edge->call_stmt
1963 && (LOCATION_LOCUS (gimple_location ((const gimple *)
1964 edge->call_stmt))
1965 > BUILTINS_LOCATION)
1966 ? gimple_filename ((const gimple *) edge->call_stmt)
1967 : "unknown",
1968 edge->call_stmt
1969 ? gimple_lineno ((const gimple *) edge->call_stmt)
1970 : -1,
1971 badness.to_double (),
1972 edge->sreal_frequency ().to_double ());
1973 if (edge->count.ipa ().initialized_p ())
1975 fprintf (dump_file, " Called ");
1976 edge->count.ipa ().dump (dump_file);
1977 fprintf (dump_file, " times\n");
1979 if (dump_flags & TDF_DETAILS)
1980 edge_badness (edge, true);
1983 if (overall_size + growth > max_size
1984 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1986 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1987 report_inline_failed_reason (edge);
1988 resolve_noninline_speculation (&edge_heap, edge);
1989 continue;
1992 if (!want_inline_small_function_p (edge, true))
1994 resolve_noninline_speculation (&edge_heap, edge);
1995 continue;
1998 /* Heuristics for inlining small functions work poorly for
1999 recursive calls where we do effects similar to loop unrolling.
2000 When inlining such edge seems profitable, leave decision on
2001 specific inliner. */
2002 if (edge->recursive_p ())
2004 where = edge->caller;
2005 if (where->global.inlined_to)
2006 where = where->global.inlined_to;
2007 if (!recursive_inlining (edge,
2008 opt_for_fn (edge->caller->decl,
2009 flag_indirect_inlining)
2010 ? &new_indirect_edges : NULL))
2012 edge->inline_failed = CIF_RECURSIVE_INLINING;
2013 resolve_noninline_speculation (&edge_heap, edge);
2014 continue;
2016 reset_edge_caches (where);
2017 /* Recursive inliner inlines all recursive calls of the function
2018 at once. Consequently we need to update all callee keys. */
2019 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2020 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2021 update_callee_keys (&edge_heap, where, updated_nodes);
2022 bitmap_clear (updated_nodes);
2024 else
2026 struct cgraph_node *outer_node = NULL;
2027 int depth = 0;
2029 /* Consider the case where self recursive function A is inlined
2030 into B. This is desired optimization in some cases, since it
2031 leads to effect similar of loop peeling and we might completely
2032 optimize out the recursive call. However we must be extra
2033 selective. */
2035 where = edge->caller;
2036 while (where->global.inlined_to)
2038 if (where->decl == callee->decl)
2039 outer_node = where, depth++;
2040 where = where->callers->caller;
2042 if (outer_node
2043 && !want_inline_self_recursive_call_p (edge, outer_node,
2044 true, depth))
2046 edge->inline_failed
2047 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2048 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2049 resolve_noninline_speculation (&edge_heap, edge);
2050 continue;
2052 else if (depth && dump_file)
2053 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2055 gcc_checking_assert (!callee->global.inlined_to);
2056 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2057 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2059 reset_edge_caches (edge->callee);
2061 update_callee_keys (&edge_heap, where, updated_nodes);
2063 where = edge->caller;
2064 if (where->global.inlined_to)
2065 where = where->global.inlined_to;
2067 /* Our profitability metric can depend on local properties
2068 such as number of inlinable calls and size of the function body.
2069 After inlining these properties might change for the function we
2070 inlined into (since it's body size changed) and for the functions
2071 called by function we inlined (since number of it inlinable callers
2072 might change). */
2073 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2074 /* Offline copy count has possibly changed, recompute if profile is
2075 available. */
2076 struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2077 if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2078 update_callee_keys (&edge_heap, n, updated_nodes);
2079 bitmap_clear (updated_nodes);
2081 if (dump_file)
2083 ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller);
2084 fprintf (dump_file,
2085 " Inlined %s into %s which now has time %f and size %i, "
2086 "net change of %+i.\n",
2087 xstrdup_for_dump (edge->callee->name ()),
2088 xstrdup_for_dump (edge->caller->name ()),
2089 s->time.to_double (),
2090 s->size,
2091 overall_size - old_size);
2093 if (min_size > overall_size)
2095 min_size = overall_size;
2096 max_size = compute_max_insns (min_size);
2098 if (dump_file)
2099 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2103 free_growth_caches ();
2104 if (dump_file)
2105 fprintf (dump_file,
2106 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2107 initial_size, overall_size,
2108 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2109 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2112 /* Flatten NODE. Performed both during early inlining and
2113 at IPA inlining time. */
2115 static void
2116 flatten_function (struct cgraph_node *node, bool early)
2118 struct cgraph_edge *e;
2120 /* We shouldn't be called recursively when we are being processed. */
2121 gcc_assert (node->aux == NULL);
2123 node->aux = (void *) node;
2125 for (e = node->callees; e; e = e->next_callee)
2127 struct cgraph_node *orig_callee;
2128 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2130 /* We've hit cycle? It is time to give up. */
2131 if (callee->aux)
2133 if (dump_file)
2134 fprintf (dump_file,
2135 "Not inlining %s into %s to avoid cycle.\n",
2136 xstrdup_for_dump (callee->name ()),
2137 xstrdup_for_dump (e->caller->name ()));
2138 if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2139 e->inline_failed = CIF_RECURSIVE_INLINING;
2140 continue;
2143 /* When the edge is already inlined, we just need to recurse into
2144 it in order to fully flatten the leaves. */
2145 if (!e->inline_failed)
2147 flatten_function (callee, early);
2148 continue;
2151 /* Flatten attribute needs to be processed during late inlining. For
2152 extra code quality we however do flattening during early optimization,
2153 too. */
2154 if (!early
2155 ? !can_inline_edge_p (e, true)
2156 && !can_inline_edge_by_limits_p (e, true)
2157 : !can_early_inline_edge_p (e))
2158 continue;
2160 if (e->recursive_p ())
2162 if (dump_file)
2163 fprintf (dump_file, "Not inlining: recursive call.\n");
2164 continue;
2167 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2168 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2170 if (dump_file)
2171 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2172 continue;
2175 /* Inline the edge and flatten the inline clone. Avoid
2176 recursing through the original node if the node was cloned. */
2177 if (dump_file)
2178 fprintf (dump_file, " Inlining %s into %s.\n",
2179 xstrdup_for_dump (callee->name ()),
2180 xstrdup_for_dump (e->caller->name ()));
2181 orig_callee = callee;
2182 inline_call (e, true, NULL, NULL, false);
2183 if (e->callee != orig_callee)
2184 orig_callee->aux = (void *) node;
2185 flatten_function (e->callee, early);
2186 if (e->callee != orig_callee)
2187 orig_callee->aux = NULL;
2190 node->aux = NULL;
2191 if (!node->global.inlined_to)
2192 ipa_update_overall_fn_summary (node);
2195 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2196 DATA points to number of calls originally found so we avoid infinite
2197 recursion. */
2199 static bool
2200 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2201 hash_set<cgraph_node *> *callers)
2203 int *num_calls = (int *)data;
2204 bool callee_removed = false;
2206 while (node->callers && !node->global.inlined_to)
2208 struct cgraph_node *caller = node->callers->caller;
2210 if (!can_inline_edge_p (node->callers, true)
2211 || !can_inline_edge_by_limits_p (node->callers, true)
2212 || node->callers->recursive_p ())
2214 if (dump_file)
2215 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2216 *num_calls = 0;
2217 return false;
2220 if (dump_file)
2222 fprintf (dump_file,
2223 "\nInlining %s size %i.\n",
2224 node->name (),
2225 ipa_fn_summaries->get (node)->size);
2226 fprintf (dump_file,
2227 " Called once from %s %i insns.\n",
2228 node->callers->caller->name (),
2229 ipa_fn_summaries->get (node->callers->caller)->size);
2232 /* Remember which callers we inlined to, delaying updating the
2233 overall summary. */
2234 callers->add (node->callers->caller);
2235 inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2236 if (dump_file)
2237 fprintf (dump_file,
2238 " Inlined into %s which now has %i size\n",
2239 caller->name (),
2240 ipa_fn_summaries->get (caller)->size);
2241 if (!(*num_calls)--)
2243 if (dump_file)
2244 fprintf (dump_file, "New calls found; giving up.\n");
2245 return callee_removed;
2247 if (callee_removed)
2248 return true;
2250 return false;
2253 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2254 update. */
2256 static bool
2257 inline_to_all_callers (struct cgraph_node *node, void *data)
2259 hash_set<cgraph_node *> callers;
2260 bool res = inline_to_all_callers_1 (node, data, &callers);
2261 /* Perform the delayed update of the overall summary of all callers
2262 processed. This avoids quadratic behavior in the cases where
2263 we have a lot of calls to the same function. */
2264 for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2265 i != callers.end (); ++i)
2266 ipa_update_overall_fn_summary (*i);
2267 return res;
2270 /* Output overall time estimate. */
2271 static void
2272 dump_overall_stats (void)
2274 sreal sum_weighted = 0, sum = 0;
2275 struct cgraph_node *node;
2277 FOR_EACH_DEFINED_FUNCTION (node)
2278 if (!node->global.inlined_to
2279 && !node->alias)
2281 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2282 if (s != NULL)
2284 sum += s->time;
2285 if (node->count.ipa ().initialized_p ())
2286 sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2289 fprintf (dump_file, "Overall time estimate: "
2290 "%f weighted by profile: "
2291 "%f\n", sum.to_double (), sum_weighted.to_double ());
2294 /* Output some useful stats about inlining. */
2296 static void
2297 dump_inline_stats (void)
2299 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2300 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2301 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2302 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2303 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2304 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2305 int64_t reason[CIF_N_REASONS][2];
2306 sreal reason_freq[CIF_N_REASONS];
2307 int i;
2308 struct cgraph_node *node;
2310 memset (reason, 0, sizeof (reason));
2311 for (i=0; i < CIF_N_REASONS; i++)
2312 reason_freq[i] = 0;
2313 FOR_EACH_DEFINED_FUNCTION (node)
2315 struct cgraph_edge *e;
2316 for (e = node->callees; e; e = e->next_callee)
2318 if (e->inline_failed)
2320 if (e->count.ipa ().initialized_p ())
2321 reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2322 reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2323 reason[(int) e->inline_failed][1] ++;
2324 if (DECL_VIRTUAL_P (e->callee->decl)
2325 && e->count.ipa ().initialized_p ())
2327 if (e->indirect_inlining_edge)
2328 noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2329 else
2330 noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2332 else if (e->count.ipa ().initialized_p ())
2334 if (e->indirect_inlining_edge)
2335 noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2336 else
2337 noninlined_cnt += e->count.ipa ().to_gcov_type ();
2340 else if (e->count.ipa ().initialized_p ())
2342 if (e->speculative)
2344 if (DECL_VIRTUAL_P (e->callee->decl))
2345 inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2346 else
2347 inlined_speculative += e->count.ipa ().to_gcov_type ();
2349 else if (DECL_VIRTUAL_P (e->callee->decl))
2351 if (e->indirect_inlining_edge)
2352 inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2353 else
2354 inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2356 else
2358 if (e->indirect_inlining_edge)
2359 inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2360 else
2361 inlined_cnt += e->count.ipa ().to_gcov_type ();
2365 for (e = node->indirect_calls; e; e = e->next_callee)
2366 if (e->indirect_info->polymorphic
2367 & e->count.ipa ().initialized_p ())
2368 indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2369 else if (e->count.ipa ().initialized_p ())
2370 indirect_cnt += e->count.ipa ().to_gcov_type ();
2372 if (max_count.initialized_p ())
2374 fprintf (dump_file,
2375 "Inlined %" PRId64 " + speculative "
2376 "%" PRId64 " + speculative polymorphic "
2377 "%" PRId64 " + previously indirect "
2378 "%" PRId64 " + virtual "
2379 "%" PRId64 " + virtual and previously indirect "
2380 "%" PRId64 "\n" "Not inlined "
2381 "%" PRId64 " + previously indirect "
2382 "%" PRId64 " + virtual "
2383 "%" PRId64 " + virtual and previously indirect "
2384 "%" PRId64 " + stil indirect "
2385 "%" PRId64 " + still indirect polymorphic "
2386 "%" PRId64 "\n", inlined_cnt,
2387 inlined_speculative, inlined_speculative_ply,
2388 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2389 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2390 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2391 fprintf (dump_file, "Removed speculations ");
2392 spec_rem.dump (dump_file);
2393 fprintf (dump_file, "\n");
2395 dump_overall_stats ();
2396 fprintf (dump_file, "\nWhy inlining failed?\n");
2397 for (i = 0; i < CIF_N_REASONS; i++)
2398 if (reason[i][1])
2399 fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2400 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2401 (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2404 /* Called when node is removed. */
2406 static void
2407 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2409 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2410 return;
2412 hash_set<struct cgraph_node *> *removed
2413 = (hash_set<struct cgraph_node *> *) data;
2414 removed->add (node);
2417 /* Decide on the inlining. We do so in the topological order to avoid
2418 expenses on updating data structures. */
2420 static unsigned int
2421 ipa_inline (void)
2423 struct cgraph_node *node;
2424 int nnodes;
2425 struct cgraph_node **order;
2426 int i, j;
2427 int cold;
2428 bool remove_functions = false;
2430 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2432 if (dump_file)
2433 ipa_dump_fn_summaries (dump_file);
2435 nnodes = ipa_reverse_postorder (order);
2436 spec_rem = profile_count::zero ();
2438 FOR_EACH_FUNCTION (node)
2440 node->aux = 0;
2442 /* Recompute the default reasons for inlining because they may have
2443 changed during merging. */
2444 if (in_lto_p)
2446 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2448 gcc_assert (e->inline_failed);
2449 initialize_inline_failed (e);
2451 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2452 initialize_inline_failed (e);
2456 if (dump_file)
2457 fprintf (dump_file, "\nFlattening functions:\n");
2459 /* First shrink order array, so that it only contains nodes with
2460 flatten attribute. */
2461 for (i = nnodes - 1, j = i; i >= 0; i--)
2463 node = order[i];
2464 if (lookup_attribute ("flatten",
2465 DECL_ATTRIBUTES (node->decl)) != NULL)
2466 order[j--] = order[i];
2469 /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2470 nodes with flatten attribute. If there is more than one such
2471 node, we need to register a node removal hook, as flatten_function
2472 could remove other nodes with flatten attribute. See PR82801. */
2473 struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2474 hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2475 if (j < nnodes - 2)
2477 flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2478 node_removal_hook_holder
2479 = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2480 flatten_removed_nodes);
2483 /* In the first pass handle functions to be flattened. Do this with
2484 a priority so none of our later choices will make this impossible. */
2485 for (i = nnodes - 1; i > j; i--)
2487 node = order[i];
2488 if (flatten_removed_nodes
2489 && flatten_removed_nodes->contains (node))
2490 continue;
2492 /* Handle nodes to be flattened.
2493 Ideally when processing callees we stop inlining at the
2494 entry of cycles, possibly cloning that entry point and
2495 try to flatten itself turning it into a self-recursive
2496 function. */
2497 if (dump_file)
2498 fprintf (dump_file, "Flattening %s\n", node->name ());
2499 flatten_function (node, false);
2502 if (j < nnodes - 2)
2504 symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2505 delete flatten_removed_nodes;
2507 free (order);
2509 if (dump_file)
2510 dump_overall_stats ();
2512 inline_small_functions ();
2514 gcc_assert (symtab->state == IPA_SSA);
2515 symtab->state = IPA_SSA_AFTER_INLINING;
2516 /* Do first after-inlining removal. We want to remove all "stale" extern
2517 inline functions and virtual functions so we really know what is called
2518 once. */
2519 symtab->remove_unreachable_nodes (dump_file);
2521 /* Inline functions with a property that after inlining into all callers the
2522 code size will shrink because the out-of-line copy is eliminated.
2523 We do this regardless on the callee size as long as function growth limits
2524 are met. */
2525 if (dump_file)
2526 fprintf (dump_file,
2527 "\nDeciding on functions to be inlined into all callers and "
2528 "removing useless speculations:\n");
2530 /* Inlining one function called once has good chance of preventing
2531 inlining other function into the same callee. Ideally we should
2532 work in priority order, but probably inlining hot functions first
2533 is good cut without the extra pain of maintaining the queue.
2535 ??? this is not really fitting the bill perfectly: inlining function
2536 into callee often leads to better optimization of callee due to
2537 increased context for optimization.
2538 For example if main() function calls a function that outputs help
2539 and then function that does the main optmization, we should inline
2540 the second with priority even if both calls are cold by themselves.
2542 We probably want to implement new predicate replacing our use of
2543 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2544 to be hot. */
2545 for (cold = 0; cold <= 1; cold ++)
2547 FOR_EACH_DEFINED_FUNCTION (node)
2549 struct cgraph_edge *edge, *next;
2550 bool update=false;
2552 if (!opt_for_fn (node->decl, optimize)
2553 || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2554 continue;
2556 for (edge = node->callees; edge; edge = next)
2558 next = edge->next_callee;
2559 if (edge->speculative && !speculation_useful_p (edge, false))
2561 if (edge->count.ipa ().initialized_p ())
2562 spec_rem += edge->count.ipa ();
2563 edge->resolve_speculation ();
2564 update = true;
2565 remove_functions = true;
2568 if (update)
2570 struct cgraph_node *where = node->global.inlined_to
2571 ? node->global.inlined_to : node;
2572 reset_edge_caches (where);
2573 ipa_update_overall_fn_summary (where);
2575 if (want_inline_function_to_all_callers_p (node, cold))
2577 int num_calls = 0;
2578 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2579 true);
2580 while (node->call_for_symbol_and_aliases
2581 (inline_to_all_callers, &num_calls, true))
2583 remove_functions = true;
2588 /* Free ipa-prop structures if they are no longer needed. */
2589 ipa_free_all_structures_after_iinln ();
2591 if (dump_file)
2593 fprintf (dump_file,
2594 "\nInlined %i calls, eliminated %i functions\n\n",
2595 ncalls_inlined, nfunctions_inlined);
2596 dump_inline_stats ();
2599 if (dump_file)
2600 ipa_dump_fn_summaries (dump_file);
2601 return remove_functions ? TODO_remove_functions : 0;
2604 /* Inline always-inline function calls in NODE. */
2606 static bool
2607 inline_always_inline_functions (struct cgraph_node *node)
2609 struct cgraph_edge *e;
2610 bool inlined = false;
2612 for (e = node->callees; e; e = e->next_callee)
2614 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2615 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2616 continue;
2618 if (e->recursive_p ())
2620 if (dump_file)
2621 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2622 e->callee->name ());
2623 e->inline_failed = CIF_RECURSIVE_INLINING;
2624 continue;
2627 if (!can_early_inline_edge_p (e))
2629 /* Set inlined to true if the callee is marked "always_inline" but
2630 is not inlinable. This will allow flagging an error later in
2631 expand_call_inline in tree-inline.c. */
2632 if (lookup_attribute ("always_inline",
2633 DECL_ATTRIBUTES (callee->decl)) != NULL)
2634 inlined = true;
2635 continue;
2638 if (dump_file)
2639 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2640 xstrdup_for_dump (e->callee->name ()),
2641 xstrdup_for_dump (e->caller->name ()));
2642 inline_call (e, true, NULL, NULL, false);
2643 inlined = true;
2645 if (inlined)
2646 ipa_update_overall_fn_summary (node);
2648 return inlined;
2651 /* Decide on the inlining. We do so in the topological order to avoid
2652 expenses on updating data structures. */
2654 static bool
2655 early_inline_small_functions (struct cgraph_node *node)
2657 struct cgraph_edge *e;
2658 bool inlined = false;
2660 for (e = node->callees; e; e = e->next_callee)
2662 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2664 /* We can enounter not-yet-analyzed function during
2665 early inlining on callgraphs with strongly
2666 connected components. */
2667 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2668 if (s == NULL || !s->inlinable || !e->inline_failed)
2669 continue;
2671 /* Do not consider functions not declared inline. */
2672 if (!DECL_DECLARED_INLINE_P (callee->decl)
2673 && !opt_for_fn (node->decl, flag_inline_small_functions)
2674 && !opt_for_fn (node->decl, flag_inline_functions))
2675 continue;
2677 if (dump_file)
2678 fprintf (dump_file, "Considering inline candidate %s.\n",
2679 callee->name ());
2681 if (!can_early_inline_edge_p (e))
2682 continue;
2684 if (e->recursive_p ())
2686 if (dump_file)
2687 fprintf (dump_file, " Not inlining: recursive call.\n");
2688 continue;
2691 if (!want_early_inline_function_p (e))
2692 continue;
2694 if (dump_file)
2695 fprintf (dump_file, " Inlining %s into %s.\n",
2696 xstrdup_for_dump (callee->name ()),
2697 xstrdup_for_dump (e->caller->name ()));
2698 inline_call (e, true, NULL, NULL, false);
2699 inlined = true;
2702 if (inlined)
2703 ipa_update_overall_fn_summary (node);
2705 return inlined;
2708 unsigned int
2709 early_inliner (function *fun)
2711 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2712 struct cgraph_edge *edge;
2713 unsigned int todo = 0;
2714 int iterations = 0;
2715 bool inlined = false;
2717 if (seen_error ())
2718 return 0;
2720 /* Do nothing if datastructures for ipa-inliner are already computed. This
2721 happens when some pass decides to construct new function and
2722 cgraph_add_new_function calls lowering passes and early optimization on
2723 it. This may confuse ourself when early inliner decide to inline call to
2724 function clone, because function clones don't have parameter list in
2725 ipa-prop matching their signature. */
2726 if (ipa_node_params_sum)
2727 return 0;
2729 if (flag_checking)
2730 node->verify ();
2731 node->remove_all_references ();
2733 /* Even when not optimizing or not inlining inline always-inline
2734 functions. */
2735 inlined = inline_always_inline_functions (node);
2737 if (!optimize
2738 || flag_no_inline
2739 || !flag_early_inlining
2740 /* Never inline regular functions into always-inline functions
2741 during incremental inlining. This sucks as functions calling
2742 always inline functions will get less optimized, but at the
2743 same time inlining of functions calling always inline
2744 function into an always inline function might introduce
2745 cycles of edges to be always inlined in the callgraph.
2747 We might want to be smarter and just avoid this type of inlining. */
2748 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2749 && lookup_attribute ("always_inline",
2750 DECL_ATTRIBUTES (node->decl))))
2752 else if (lookup_attribute ("flatten",
2753 DECL_ATTRIBUTES (node->decl)) != NULL)
2755 /* When the function is marked to be flattened, recursively inline
2756 all calls in it. */
2757 if (dump_file)
2758 fprintf (dump_file,
2759 "Flattening %s\n", node->name ());
2760 flatten_function (node, true);
2761 inlined = true;
2763 else
2765 /* If some always_inline functions was inlined, apply the changes.
2766 This way we will not account always inline into growth limits and
2767 moreover we will inline calls from always inlines that we skipped
2768 previously because of conditional above. */
2769 if (inlined)
2771 timevar_push (TV_INTEGRATION);
2772 todo |= optimize_inline_calls (current_function_decl);
2773 /* optimize_inline_calls call above might have introduced new
2774 statements that don't have inline parameters computed. */
2775 for (edge = node->callees; edge; edge = edge->next_callee)
2777 /* We can enounter not-yet-analyzed function during
2778 early inlining on callgraphs with strongly
2779 connected components. */
2780 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2781 es->call_stmt_size
2782 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2783 es->call_stmt_time
2784 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2786 ipa_update_overall_fn_summary (node);
2787 inlined = false;
2788 timevar_pop (TV_INTEGRATION);
2790 /* We iterate incremental inlining to get trivial cases of indirect
2791 inlining. */
2792 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2793 && early_inline_small_functions (node))
2795 timevar_push (TV_INTEGRATION);
2796 todo |= optimize_inline_calls (current_function_decl);
2798 /* Technically we ought to recompute inline parameters so the new
2799 iteration of early inliner works as expected. We however have
2800 values approximately right and thus we only need to update edge
2801 info that might be cleared out for newly discovered edges. */
2802 for (edge = node->callees; edge; edge = edge->next_callee)
2804 /* We have no summary for new bound store calls yet. */
2805 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2806 es->call_stmt_size
2807 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2808 es->call_stmt_time
2809 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2811 if (edge->callee->decl
2812 && !gimple_check_call_matching_types (
2813 edge->call_stmt, edge->callee->decl, false))
2815 edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2816 edge->call_stmt_cannot_inline_p = true;
2819 if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2820 ipa_update_overall_fn_summary (node);
2821 timevar_pop (TV_INTEGRATION);
2822 iterations++;
2823 inlined = false;
2825 if (dump_file)
2826 fprintf (dump_file, "Iterations: %i\n", iterations);
2829 if (inlined)
2831 timevar_push (TV_INTEGRATION);
2832 todo |= optimize_inline_calls (current_function_decl);
2833 timevar_pop (TV_INTEGRATION);
2836 fun->always_inline_functions_inlined = true;
2838 return todo;
2841 /* Do inlining of small functions. Doing so early helps profiling and other
2842 passes to be somewhat more effective and avoids some code duplication in
2843 later real inlining pass for testcases with very many function calls. */
2845 namespace {
2847 const pass_data pass_data_early_inline =
2849 GIMPLE_PASS, /* type */
2850 "einline", /* name */
2851 OPTGROUP_INLINE, /* optinfo_flags */
2852 TV_EARLY_INLINING, /* tv_id */
2853 PROP_ssa, /* properties_required */
2854 0, /* properties_provided */
2855 0, /* properties_destroyed */
2856 0, /* todo_flags_start */
2857 0, /* todo_flags_finish */
2860 class pass_early_inline : public gimple_opt_pass
2862 public:
2863 pass_early_inline (gcc::context *ctxt)
2864 : gimple_opt_pass (pass_data_early_inline, ctxt)
2867 /* opt_pass methods: */
2868 virtual unsigned int execute (function *);
2870 }; // class pass_early_inline
2872 unsigned int
2873 pass_early_inline::execute (function *fun)
2875 return early_inliner (fun);
2878 } // anon namespace
2880 gimple_opt_pass *
2881 make_pass_early_inline (gcc::context *ctxt)
2883 return new pass_early_inline (ctxt);
2886 namespace {
2888 const pass_data pass_data_ipa_inline =
2890 IPA_PASS, /* type */
2891 "inline", /* name */
2892 OPTGROUP_INLINE, /* optinfo_flags */
2893 TV_IPA_INLINING, /* tv_id */
2894 0, /* properties_required */
2895 0, /* properties_provided */
2896 0, /* properties_destroyed */
2897 0, /* todo_flags_start */
2898 ( TODO_dump_symtab ), /* todo_flags_finish */
2901 class pass_ipa_inline : public ipa_opt_pass_d
2903 public:
2904 pass_ipa_inline (gcc::context *ctxt)
2905 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2906 NULL, /* generate_summary */
2907 NULL, /* write_summary */
2908 NULL, /* read_summary */
2909 NULL, /* write_optimization_summary */
2910 NULL, /* read_optimization_summary */
2911 NULL, /* stmt_fixup */
2912 0, /* function_transform_todo_flags_start */
2913 inline_transform, /* function_transform */
2914 NULL) /* variable_transform */
2917 /* opt_pass methods: */
2918 virtual unsigned int execute (function *) { return ipa_inline (); }
2920 }; // class pass_ipa_inline
2922 } // anon namespace
2924 ipa_opt_pass_d *
2925 make_pass_ipa_inline (gcc::context *ctxt)
2927 return new pass_ipa_inline (ctxt);