IPA ICF, part 4/5
[official-gcc.git] / gcc / ipa-inline.c
blob9ac19298c198a31206829db953f9baf29503a38d
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
36 inlining heuristics
38 The inliner itself is split into two passes:
40 pass_early_inlining
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
68 pass_ipa_inline
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "tm.h"
96 #include "tree.h"
97 #include "trans-mem.h"
98 #include "calls.h"
99 #include "tree-inline.h"
100 #include "langhooks.h"
101 #include "flags.h"
102 #include "diagnostic.h"
103 #include "gimple-pretty-print.h"
104 #include "params.h"
105 #include "fibheap.h"
106 #include "intl.h"
107 #include "tree-pass.h"
108 #include "coverage.h"
109 #include "rtl.h"
110 #include "bitmap.h"
111 #include "basic-block.h"
112 #include "tree-ssa-alias.h"
113 #include "internal-fn.h"
114 #include "gimple-expr.h"
115 #include "is-a.h"
116 #include "gimple.h"
117 #include "gimple-ssa.h"
118 #include "ipa-prop.h"
119 #include "except.h"
120 #include "target.h"
121 #include "ipa-inline.h"
122 #include "ipa-utils.h"
123 #include "sreal.h"
124 #include "cilk.h"
125 #include "builtins.h"
127 /* Statistics we collect about inlining algorithm. */
128 static int overall_size;
129 static gcov_type max_count;
130 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
131 static gcov_type spec_rem;
133 /* Return false when inlining edge E would lead to violating
134 limits on function unit growth or stack usage growth.
136 The relative function body growth limit is present generally
137 to avoid problems with non-linear behavior of the compiler.
138 To allow inlining huge functions into tiny wrapper, the limit
139 is always based on the bigger of the two functions considered.
141 For stack growth limits we always base the growth in stack usage
142 of the callers. We want to prevent applications from segfaulting
143 on stack overflow when functions with huge stack frames gets
144 inlined. */
146 static bool
147 caller_growth_limits (struct cgraph_edge *e)
149 struct cgraph_node *to = e->caller;
150 struct cgraph_node *what = e->callee->ultimate_alias_target ();
151 int newsize;
152 int limit = 0;
153 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
154 struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
156 /* Look for function e->caller is inlined to. While doing
157 so work out the largest function body on the way. As
158 described above, we want to base our function growth
159 limits based on that. Not on the self size of the
160 outer function, not on the self size of inline code
161 we immediately inline to. This is the most relaxed
162 interpretation of the rule "do not grow large functions
163 too much in order to prevent compiler from exploding". */
164 while (true)
166 info = inline_summary (to);
167 if (limit < info->self_size)
168 limit = info->self_size;
169 if (stack_size_limit < info->estimated_self_stack_size)
170 stack_size_limit = info->estimated_self_stack_size;
171 if (to->global.inlined_to)
172 to = to->callers->caller;
173 else
174 break;
177 what_info = inline_summary (what);
179 if (limit < what_info->self_size)
180 limit = what_info->self_size;
182 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
184 /* Check the size after inlining against the function limits. But allow
185 the function to shrink if it went over the limits by forced inlining. */
186 newsize = estimate_size_after_inlining (to, e);
187 if (newsize >= info->size
188 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
189 && newsize > limit)
191 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
192 return false;
195 if (!what_info->estimated_stack_size)
196 return true;
198 /* FIXME: Stack size limit often prevents inlining in Fortran programs
199 due to large i/o datastructures used by the Fortran front-end.
200 We ought to ignore this limit when we know that the edge is executed
201 on every invocation of the caller (i.e. its call statement dominates
202 exit block). We do not track this information, yet. */
203 stack_size_limit += ((gcov_type)stack_size_limit
204 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
206 inlined_stack = (outer_info->stack_frame_offset
207 + outer_info->estimated_self_stack_size
208 + what_info->estimated_stack_size);
209 /* Check new stack consumption with stack consumption at the place
210 stack is used. */
211 if (inlined_stack > stack_size_limit
212 /* If function already has large stack usage from sibling
213 inline call, we can inline, too.
214 This bit overoptimistically assume that we are good at stack
215 packing. */
216 && inlined_stack > info->estimated_stack_size
217 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
219 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220 return false;
222 return true;
225 /* Dump info about why inlining has failed. */
227 static void
228 report_inline_failed_reason (struct cgraph_edge *e)
230 if (dump_file)
232 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
233 xstrdup (e->caller->name ()), e->caller->order,
234 xstrdup (e->callee->name ()), e->callee->order,
235 cgraph_inline_failed_string (e->inline_failed));
239 /* Decide whether sanitizer-related attributes allow inlining. */
241 static bool
242 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
244 /* Don't care if sanitizer is disabled */
245 if (!(flag_sanitize & SANITIZE_ADDRESS))
246 return true;
248 if (!caller || !callee)
249 return true;
251 return !!lookup_attribute ("no_sanitize_address",
252 DECL_ATTRIBUTES (caller)) ==
253 !!lookup_attribute ("no_sanitize_address",
254 DECL_ATTRIBUTES (callee));
257 /* Decide if we can inline the edge and possibly update
258 inline_failed reason.
259 We check whether inlining is possible at all and whether
260 caller growth limits allow doing so.
262 if REPORT is true, output reason to the dump file.
264 if DISREGARD_LIMITS is true, ignore size limits.*/
266 static bool
267 can_inline_edge_p (struct cgraph_edge *e, bool report,
268 bool disregard_limits = false)
270 bool inlinable = true;
271 enum availability avail;
272 cgraph_node *callee = e->callee->ultimate_alias_target (&avail);
273 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
274 tree callee_tree
275 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
276 struct function *caller_fun = e->caller->get_fun ();
277 struct function *callee_fun = callee ? callee->get_fun () : NULL;
279 gcc_assert (e->inline_failed);
281 if (!callee || !callee->definition)
283 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
284 inlinable = false;
286 else if (callee->calls_comdat_local)
288 e->inline_failed = CIF_USES_COMDAT_LOCAL;
289 inlinable = false;
291 else if (!inline_summary (callee)->inlinable
292 || (caller_fun && fn_contains_cilk_spawn_p (caller_fun)))
294 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
295 inlinable = false;
297 else if (avail <= AVAIL_INTERPOSABLE)
299 e->inline_failed = CIF_OVERWRITABLE;
300 inlinable = false;
302 else if (e->call_stmt_cannot_inline_p)
304 if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
305 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
306 inlinable = false;
308 /* Don't inline if the functions have different EH personalities. */
309 else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
310 && DECL_FUNCTION_PERSONALITY (callee->decl)
311 && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
312 != DECL_FUNCTION_PERSONALITY (callee->decl)))
314 e->inline_failed = CIF_EH_PERSONALITY;
315 inlinable = false;
317 /* TM pure functions should not be inlined into non-TM_pure
318 functions. */
319 else if (is_tm_pure (callee->decl)
320 && !is_tm_pure (e->caller->decl))
322 e->inline_failed = CIF_UNSPECIFIED;
323 inlinable = false;
325 /* Don't inline if the callee can throw non-call exceptions but the
326 caller cannot.
327 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
328 Move the flag into cgraph node or mirror it in the inline summary. */
329 else if (callee_fun && callee_fun->can_throw_non_call_exceptions
330 && !(caller_fun && caller_fun->can_throw_non_call_exceptions))
332 e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
333 inlinable = false;
335 /* Check compatibility of target optimization options. */
336 else if (!targetm.target_option.can_inline_p (e->caller->decl,
337 callee->decl))
339 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
340 inlinable = false;
342 /* Don't inline a function with mismatched sanitization attributes. */
343 else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
345 e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
346 inlinable = false;
348 /* Check if caller growth allows the inlining. */
349 else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
350 && !disregard_limits
351 && !lookup_attribute ("flatten",
352 DECL_ATTRIBUTES
353 (e->caller->global.inlined_to
354 ? e->caller->global.inlined_to->decl
355 : e->caller->decl))
356 && !caller_growth_limits (e))
357 inlinable = false;
358 /* Don't inline a function with a higher optimization level than the
359 caller. FIXME: this is really just tip of iceberg of handling
360 optimization attribute. */
361 else if (caller_tree != callee_tree)
363 struct cl_optimization *caller_opt
364 = TREE_OPTIMIZATION ((caller_tree)
365 ? caller_tree
366 : optimization_default_node);
368 struct cl_optimization *callee_opt
369 = TREE_OPTIMIZATION ((callee_tree)
370 ? callee_tree
371 : optimization_default_node);
373 if (((caller_opt->x_optimize > callee_opt->x_optimize)
374 || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
375 /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */
376 && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
378 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
379 inlinable = false;
383 if (!inlinable && report)
384 report_inline_failed_reason (e);
385 return inlinable;
389 /* Return true if the edge E is inlinable during early inlining. */
391 static bool
392 can_early_inline_edge_p (struct cgraph_edge *e)
394 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
395 /* Early inliner might get called at WPA stage when IPA pass adds new
396 function. In this case we can not really do any of early inlining
397 because function bodies are missing. */
398 if (!gimple_has_body_p (callee->decl))
400 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
401 return false;
403 /* In early inliner some of callees may not be in SSA form yet
404 (i.e. the callgraph is cyclic and we did not process
405 the callee by early inliner, yet). We don't have CIF code for this
406 case; later we will re-do the decision in the real inliner. */
407 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
408 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
410 if (dump_file)
411 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
412 return false;
414 if (!can_inline_edge_p (e, true))
415 return false;
416 return true;
420 /* Return number of calls in N. Ignore cheap builtins. */
422 static int
423 num_calls (struct cgraph_node *n)
425 struct cgraph_edge *e;
426 int num = 0;
428 for (e = n->callees; e; e = e->next_callee)
429 if (!is_inexpensive_builtin (e->callee->decl))
430 num++;
431 return num;
435 /* Return true if we are interested in inlining small function. */
437 static bool
438 want_early_inline_function_p (struct cgraph_edge *e)
440 bool want_inline = true;
441 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
443 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
445 else if (!DECL_DECLARED_INLINE_P (callee->decl)
446 && !flag_inline_small_functions)
448 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
449 report_inline_failed_reason (e);
450 want_inline = false;
452 else
454 int growth = estimate_edge_growth (e);
455 int n;
457 if (growth <= 0)
459 else if (!e->maybe_hot_p ()
460 && growth > 0)
462 if (dump_file)
463 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
464 "call is cold and code would grow by %i\n",
465 xstrdup (e->caller->name ()),
466 e->caller->order,
467 xstrdup (callee->name ()), callee->order,
468 growth);
469 want_inline = false;
471 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
473 if (dump_file)
474 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
475 "growth %i exceeds --param early-inlining-insns\n",
476 xstrdup (e->caller->name ()),
477 e->caller->order,
478 xstrdup (callee->name ()), callee->order,
479 growth);
480 want_inline = false;
482 else if ((n = num_calls (callee)) != 0
483 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
485 if (dump_file)
486 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
487 "growth %i exceeds --param early-inlining-insns "
488 "divided by number of calls\n",
489 xstrdup (e->caller->name ()),
490 e->caller->order,
491 xstrdup (callee->name ()), callee->order,
492 growth);
493 want_inline = false;
496 return want_inline;
499 /* Compute time of the edge->caller + edge->callee execution when inlining
500 does not happen. */
502 inline gcov_type
503 compute_uninlined_call_time (struct inline_summary *callee_info,
504 struct cgraph_edge *edge)
506 gcov_type uninlined_call_time =
507 RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
508 CGRAPH_FREQ_BASE);
509 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
510 ? edge->caller->global.inlined_to
511 : edge->caller)->time;
512 return uninlined_call_time + caller_time;
515 /* Same as compute_uinlined_call_time but compute time when inlining
516 does happen. */
518 inline gcov_type
519 compute_inlined_call_time (struct cgraph_edge *edge,
520 int edge_time)
522 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
523 ? edge->caller->global.inlined_to
524 : edge->caller)->time;
525 gcov_type time = (caller_time
526 + RDIV (((gcov_type) edge_time
527 - inline_edge_summary (edge)->call_stmt_time)
528 * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
529 /* Possible one roundoff error, but watch for overflows. */
530 gcc_checking_assert (time >= INT_MIN / 2);
531 if (time < 0)
532 time = 0;
533 return time;
536 /* Return true if the speedup for inlining E is bigger than
537 PARAM_MAX_INLINE_MIN_SPEEDUP. */
539 static bool
540 big_speedup_p (struct cgraph_edge *e)
542 gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
544 gcov_type inlined_time = compute_inlined_call_time (e,
545 estimate_edge_time (e));
546 if (time - inlined_time
547 > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
548 return true;
549 return false;
552 /* Return true if we are interested in inlining small function.
553 When REPORT is true, report reason to dump file. */
555 static bool
556 want_inline_small_function_p (struct cgraph_edge *e, bool report)
558 bool want_inline = true;
559 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
561 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
563 else if (!DECL_DECLARED_INLINE_P (callee->decl)
564 && !flag_inline_small_functions)
566 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
567 want_inline = false;
569 /* Do fast and conservative check if the function can be good
570 inline cnadidate. At themoment we allow inline hints to
571 promote non-inline function to inline and we increase
572 MAX_INLINE_INSNS_SINGLE 16fold for inline functions. */
573 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
574 && (!e->count || !e->maybe_hot_p ()))
575 && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
576 > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
578 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
579 want_inline = false;
581 else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
582 && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
583 > 16 * MAX_INLINE_INSNS_SINGLE)
585 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
586 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
587 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
588 want_inline = false;
590 else
592 int growth = estimate_edge_growth (e);
593 inline_hints hints = estimate_edge_hints (e);
594 bool big_speedup = big_speedup_p (e);
596 if (growth <= 0)
598 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
599 hints suggests that inlining given function is very profitable. */
600 else if (DECL_DECLARED_INLINE_P (callee->decl)
601 && growth >= MAX_INLINE_INSNS_SINGLE
602 && ((!big_speedup
603 && !(hints & (INLINE_HINT_indirect_call
604 | INLINE_HINT_known_hot
605 | INLINE_HINT_loop_iterations
606 | INLINE_HINT_array_index
607 | INLINE_HINT_loop_stride)))
608 || growth >= MAX_INLINE_INSNS_SINGLE * 16))
610 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
611 want_inline = false;
613 else if (!DECL_DECLARED_INLINE_P (callee->decl)
614 && !flag_inline_functions)
616 /* growth_likely_positive is expensive, always test it last. */
617 if (growth >= MAX_INLINE_INSNS_SINGLE
618 || growth_likely_positive (callee, growth))
620 e->inline_failed = CIF_NOT_DECLARED_INLINED;
621 want_inline = false;
624 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
625 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
626 inlining given function is very profitable. */
627 else if (!DECL_DECLARED_INLINE_P (callee->decl)
628 && !big_speedup
629 && !(hints & INLINE_HINT_known_hot)
630 && growth >= ((hints & (INLINE_HINT_indirect_call
631 | INLINE_HINT_loop_iterations
632 | INLINE_HINT_array_index
633 | INLINE_HINT_loop_stride))
634 ? MAX (MAX_INLINE_INSNS_AUTO,
635 MAX_INLINE_INSNS_SINGLE)
636 : MAX_INLINE_INSNS_AUTO))
638 /* growth_likely_positive is expensive, always test it last. */
639 if (growth >= MAX_INLINE_INSNS_SINGLE
640 || growth_likely_positive (callee, growth))
642 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
643 want_inline = false;
646 /* If call is cold, do not inline when function body would grow. */
647 else if (!e->maybe_hot_p ()
648 && (growth >= MAX_INLINE_INSNS_SINGLE
649 || growth_likely_positive (callee, growth)))
651 e->inline_failed = CIF_UNLIKELY_CALL;
652 want_inline = false;
655 if (!want_inline && report)
656 report_inline_failed_reason (e);
657 return want_inline;
660 /* EDGE is self recursive edge.
661 We hand two cases - when function A is inlining into itself
662 or when function A is being inlined into another inliner copy of function
663 A within function B.
665 In first case OUTER_NODE points to the toplevel copy of A, while
666 in the second case OUTER_NODE points to the outermost copy of A in B.
668 In both cases we want to be extra selective since
669 inlining the call will just introduce new recursive calls to appear. */
671 static bool
672 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
673 struct cgraph_node *outer_node,
674 bool peeling,
675 int depth)
677 char const *reason = NULL;
678 bool want_inline = true;
679 int caller_freq = CGRAPH_FREQ_BASE;
680 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
682 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
683 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
685 if (!edge->maybe_hot_p ())
687 reason = "recursive call is cold";
688 want_inline = false;
690 else if (max_count && !outer_node->count)
692 reason = "not executed in profile";
693 want_inline = false;
695 else if (depth > max_depth)
697 reason = "--param max-inline-recursive-depth exceeded.";
698 want_inline = false;
701 if (outer_node->global.inlined_to)
702 caller_freq = outer_node->callers->frequency;
704 if (!caller_freq)
706 reason = "function is inlined and unlikely";
707 want_inline = false;
710 if (!want_inline)
712 /* Inlining of self recursive function into copy of itself within other function
713 is transformation similar to loop peeling.
715 Peeling is profitable if we can inline enough copies to make probability
716 of actual call to the self recursive function very small. Be sure that
717 the probability of recursion is small.
719 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
720 This way the expected number of recision is at most max_depth. */
721 else if (peeling)
723 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
724 / max_depth);
725 int i;
726 for (i = 1; i < depth; i++)
727 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
728 if (max_count
729 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
730 >= max_prob))
732 reason = "profile of recursive call is too large";
733 want_inline = false;
735 if (!max_count
736 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
737 >= max_prob))
739 reason = "frequency of recursive call is too large";
740 want_inline = false;
743 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
744 depth is large. We reduce function call overhead and increase chances that
745 things fit in hardware return predictor.
747 Recursive inlining might however increase cost of stack frame setup
748 actually slowing down functions whose recursion tree is wide rather than
749 deep.
751 Deciding reliably on when to do recursive inlining without profile feedback
752 is tricky. For now we disable recursive inlining when probability of self
753 recursion is low.
755 Recursive inlining of self recursive call within loop also results in large loop
756 depths that generally optimize badly. We may want to throttle down inlining
757 in those cases. In particular this seems to happen in one of libstdc++ rb tree
758 methods. */
759 else
761 if (max_count
762 && (edge->count * 100 / outer_node->count
763 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
765 reason = "profile of recursive call is too small";
766 want_inline = false;
768 else if (!max_count
769 && (edge->frequency * 100 / caller_freq
770 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
772 reason = "frequency of recursive call is too small";
773 want_inline = false;
776 if (!want_inline && dump_file)
777 fprintf (dump_file, " not inlining recursively: %s\n", reason);
778 return want_inline;
781 /* Return true when NODE has uninlinable caller;
782 set HAS_HOT_CALL if it has hot call.
783 Worker for cgraph_for_node_and_aliases. */
785 static bool
786 check_callers (struct cgraph_node *node, void *has_hot_call)
788 struct cgraph_edge *e;
789 for (e = node->callers; e; e = e->next_caller)
791 if (!can_inline_edge_p (e, true))
792 return true;
793 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
794 *(bool *)has_hot_call = true;
796 return false;
799 /* If NODE has a caller, return true. */
801 static bool
802 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
804 if (node->callers)
805 return true;
806 return false;
809 /* Decide if inlining NODE would reduce unit size by eliminating
810 the offline copy of function.
811 When COLD is true the cold calls are considered, too. */
813 static bool
814 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
816 struct cgraph_node *function = node->ultimate_alias_target ();
817 bool has_hot_call = false;
819 /* Does it have callers? */
820 if (!node->call_for_symbol_thunks_and_aliases (has_caller_p, NULL, true))
821 return false;
822 /* Already inlined? */
823 if (function->global.inlined_to)
824 return false;
825 if (node->ultimate_alias_target () != node)
826 return false;
827 /* Inlining into all callers would increase size? */
828 if (estimate_growth (node) > 0)
829 return false;
830 /* All inlines must be possible. */
831 if (node->call_for_symbol_thunks_and_aliases
832 (check_callers, &has_hot_call, true))
833 return false;
834 if (!cold && !has_hot_call)
835 return false;
836 return true;
839 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
841 /* Return relative time improvement for inlining EDGE in range
842 1...RELATIVE_TIME_BENEFIT_RANGE */
844 static inline int
845 relative_time_benefit (struct inline_summary *callee_info,
846 struct cgraph_edge *edge,
847 int edge_time)
849 gcov_type relbenefit;
850 gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
851 gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
853 /* Inlining into extern inline function is not a win. */
854 if (DECL_EXTERNAL (edge->caller->global.inlined_to
855 ? edge->caller->global.inlined_to->decl
856 : edge->caller->decl))
857 return 1;
859 /* Watch overflows. */
860 gcc_checking_assert (uninlined_call_time >= 0);
861 gcc_checking_assert (inlined_call_time >= 0);
862 gcc_checking_assert (uninlined_call_time >= inlined_call_time);
864 /* Compute relative time benefit, i.e. how much the call becomes faster.
865 ??? perhaps computing how much the caller+calle together become faster
866 would lead to more realistic results. */
867 if (!uninlined_call_time)
868 uninlined_call_time = 1;
869 relbenefit =
870 RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
871 uninlined_call_time);
872 relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
873 gcc_checking_assert (relbenefit >= 0);
874 relbenefit = MAX (relbenefit, 1);
875 return relbenefit;
879 /* A cost model driving the inlining heuristics in a way so the edges with
880 smallest badness are inlined first. After each inlining is performed
881 the costs of all caller edges of nodes affected are recomputed so the
882 metrics may accurately depend on values such as number of inlinable callers
883 of the function or function body size. */
885 static int
886 edge_badness (struct cgraph_edge *edge, bool dump)
888 gcov_type badness;
889 int growth, edge_time;
890 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
891 struct inline_summary *callee_info = inline_summary (callee);
892 inline_hints hints;
894 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
895 return INT_MIN;
897 growth = estimate_edge_growth (edge);
898 edge_time = estimate_edge_time (edge);
899 hints = estimate_edge_hints (edge);
900 gcc_checking_assert (edge_time >= 0);
901 gcc_checking_assert (edge_time <= callee_info->time);
902 gcc_checking_assert (growth <= callee_info->size);
904 if (dump)
906 fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n",
907 xstrdup (edge->caller->name ()),
908 edge->caller->order,
909 xstrdup (callee->name ()),
910 edge->callee->order);
911 fprintf (dump_file, " size growth %i, time %i ",
912 growth,
913 edge_time);
914 dump_inline_hints (dump_file, hints);
915 if (big_speedup_p (edge))
916 fprintf (dump_file, " big_speedup");
917 fprintf (dump_file, "\n");
920 /* Always prefer inlining saving code size. */
921 if (growth <= 0)
923 badness = INT_MIN / 2 + growth;
924 if (dump)
925 fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness,
926 growth);
929 /* When profiling is available, compute badness as:
931 relative_edge_count * relative_time_benefit
932 goodness = -------------------------------------------
933 growth_f_caller
934 badness = -goodness
936 The fraction is upside down, because on edge counts and time beneits
937 the bounds are known. Edge growth is essentially unlimited. */
939 else if (max_count)
941 sreal tmp, relbenefit_real, growth_real;
942 int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
943 /* Capping edge->count to max_count. edge->count can be larger than
944 max_count if an inline adds new edges which increase max_count
945 after max_count is computed. */
946 gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
948 sreal_init (&relbenefit_real, relbenefit, 0);
949 sreal_init (&growth_real, growth, 0);
951 /* relative_edge_count. */
952 sreal_init (&tmp, edge_count, 0);
953 sreal_div (&tmp, &tmp, &max_count_real);
955 /* relative_time_benefit. */
956 sreal_mul (&tmp, &tmp, &relbenefit_real);
957 sreal_div (&tmp, &tmp, &max_relbenefit_real);
959 /* growth_f_caller. */
960 sreal_mul (&tmp, &tmp, &half_int_min_real);
961 sreal_div (&tmp, &tmp, &growth_real);
963 badness = -1 * sreal_to_int (&tmp);
965 if (dump)
967 fprintf (dump_file,
968 " %i (relative %f): profile info. Relative count %f%s"
969 " * Relative benefit %f\n",
970 (int) badness, (double) badness / INT_MIN,
971 (double) edge_count / max_count,
972 edge->count > max_count ? " (capped to max_count)" : "",
973 relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
977 /* When function local profile is available. Compute badness as:
979 relative_time_benefit
980 goodness = ---------------------------------
981 growth_of_caller * overall_growth
983 badness = - goodness
985 compensated by the inline hints.
987 else if (flag_guess_branch_prob)
989 badness = (relative_time_benefit (callee_info, edge, edge_time)
990 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
991 badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
992 gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
993 if ((hints & (INLINE_HINT_indirect_call
994 | INLINE_HINT_loop_iterations
995 | INLINE_HINT_array_index
996 | INLINE_HINT_loop_stride))
997 || callee_info->growth <= 0)
998 badness *= 8;
999 if (hints & (INLINE_HINT_same_scc))
1000 badness /= 16;
1001 else if (hints & (INLINE_HINT_in_scc))
1002 badness /= 8;
1003 else if (hints & (INLINE_HINT_cross_module))
1004 badness /= 2;
1005 gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
1006 if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
1007 badness *= 16;
1008 if (dump)
1010 fprintf (dump_file,
1011 " %i: guessed profile. frequency %f,"
1012 " benefit %f%%, time w/o inlining %i, time w inlining %i"
1013 " overall growth %i (current) %i (original)\n",
1014 (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
1015 relative_time_benefit (callee_info, edge, edge_time) * 100.0
1016 / RELATIVE_TIME_BENEFIT_RANGE,
1017 (int)compute_uninlined_call_time (callee_info, edge),
1018 (int)compute_inlined_call_time (edge, edge_time),
1019 estimate_growth (callee),
1020 callee_info->growth);
1023 /* When function local profile is not available or it does not give
1024 useful information (ie frequency is zero), base the cost on
1025 loop nest and overall size growth, so we optimize for overall number
1026 of functions fully inlined in program. */
1027 else
1029 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1030 badness = growth * 256;
1032 /* Decrease badness if call is nested. */
1033 if (badness > 0)
1034 badness >>= nest;
1035 else
1037 badness <<= nest;
1039 if (dump)
1040 fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
1041 nest);
1044 /* Ensure that we did not overflow in all the fixed point math above. */
1045 gcc_assert (badness >= INT_MIN);
1046 gcc_assert (badness <= INT_MAX - 1);
1047 /* Make recursive inlining happen always after other inlining is done. */
1048 if (edge->recursive_p ())
1049 return badness + 1;
1050 else
1051 return badness;
1054 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1055 static inline void
1056 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1058 int badness = edge_badness (edge, false);
1059 if (edge->aux)
1061 fibnode_t n = (fibnode_t) edge->aux;
1062 gcc_checking_assert (n->data == edge);
1064 /* fibheap_replace_key only decrease the keys.
1065 When we increase the key we do not update heap
1066 and instead re-insert the element once it becomes
1067 a minimum of heap. */
1068 if (badness < n->key)
1070 if (dump_file && (dump_flags & TDF_DETAILS))
1072 fprintf (dump_file,
1073 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
1074 xstrdup (edge->caller->name ()),
1075 edge->caller->order,
1076 xstrdup (edge->callee->name ()),
1077 edge->callee->order,
1078 (int)n->key,
1079 badness);
1081 fibheap_replace_key (heap, n, badness);
1082 gcc_checking_assert (n->key == badness);
1085 else
1087 if (dump_file && (dump_flags & TDF_DETAILS))
1089 fprintf (dump_file,
1090 " enqueuing call %s/%i -> %s/%i, badness %i\n",
1091 xstrdup (edge->caller->name ()),
1092 edge->caller->order,
1093 xstrdup (edge->callee->name ()),
1094 edge->callee->order,
1095 badness);
1097 edge->aux = fibheap_insert (heap, badness, edge);
1102 /* NODE was inlined.
1103 All caller edges needs to be resetted because
1104 size estimates change. Similarly callees needs reset
1105 because better context may be known. */
1107 static void
1108 reset_edge_caches (struct cgraph_node *node)
1110 struct cgraph_edge *edge;
1111 struct cgraph_edge *e = node->callees;
1112 struct cgraph_node *where = node;
1113 struct ipa_ref *ref;
1115 if (where->global.inlined_to)
1116 where = where->global.inlined_to;
1118 /* WHERE body size has changed, the cached growth is invalid. */
1119 reset_node_growth_cache (where);
1121 for (edge = where->callers; edge; edge = edge->next_caller)
1122 if (edge->inline_failed)
1123 reset_edge_growth_cache (edge);
1125 FOR_EACH_ALIAS (where, ref)
1126 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1128 if (!e)
1129 return;
1131 while (true)
1132 if (!e->inline_failed && e->callee->callees)
1133 e = e->callee->callees;
1134 else
1136 if (e->inline_failed)
1137 reset_edge_growth_cache (e);
1138 if (e->next_callee)
1139 e = e->next_callee;
1140 else
1144 if (e->caller == node)
1145 return;
1146 e = e->caller->callers;
1148 while (!e->next_callee);
1149 e = e->next_callee;
1154 /* Recompute HEAP nodes for each of caller of NODE.
1155 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1156 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1157 it is inlinable. Otherwise check all edges. */
1159 static void
1160 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1161 bitmap updated_nodes,
1162 struct cgraph_edge *check_inlinablity_for)
1164 struct cgraph_edge *edge;
1165 struct ipa_ref *ref;
1167 if ((!node->alias && !inline_summary (node)->inlinable)
1168 || node->global.inlined_to)
1169 return;
1170 if (!bitmap_set_bit (updated_nodes, node->uid))
1171 return;
1173 FOR_EACH_ALIAS (node, ref)
1175 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1176 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1179 for (edge = node->callers; edge; edge = edge->next_caller)
1180 if (edge->inline_failed)
1182 if (!check_inlinablity_for
1183 || check_inlinablity_for == edge)
1185 if (can_inline_edge_p (edge, false)
1186 && want_inline_small_function_p (edge, false))
1187 update_edge_key (heap, edge);
1188 else if (edge->aux)
1190 report_inline_failed_reason (edge);
1191 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1192 edge->aux = NULL;
1195 else if (edge->aux)
1196 update_edge_key (heap, edge);
1200 /* Recompute HEAP nodes for each uninlined call in NODE.
1201 This is used when we know that edge badnesses are going only to increase
1202 (we introduced new call site) and thus all we need is to insert newly
1203 created edges into heap. */
1205 static void
1206 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1207 bitmap updated_nodes)
1209 struct cgraph_edge *e = node->callees;
1211 if (!e)
1212 return;
1213 while (true)
1214 if (!e->inline_failed && e->callee->callees)
1215 e = e->callee->callees;
1216 else
1218 enum availability avail;
1219 struct cgraph_node *callee;
1220 /* We do not reset callee growth cache here. Since we added a new call,
1221 growth chould have just increased and consequentely badness metric
1222 don't need updating. */
1223 if (e->inline_failed
1224 && (callee = e->callee->ultimate_alias_target (&avail))
1225 && inline_summary (callee)->inlinable
1226 && avail >= AVAIL_AVAILABLE
1227 && !bitmap_bit_p (updated_nodes, callee->uid))
1229 if (can_inline_edge_p (e, false)
1230 && want_inline_small_function_p (e, false))
1231 update_edge_key (heap, e);
1232 else if (e->aux)
1234 report_inline_failed_reason (e);
1235 fibheap_delete_node (heap, (fibnode_t) e->aux);
1236 e->aux = NULL;
1239 if (e->next_callee)
1240 e = e->next_callee;
1241 else
1245 if (e->caller == node)
1246 return;
1247 e = e->caller->callers;
1249 while (!e->next_callee);
1250 e = e->next_callee;
1255 /* Enqueue all recursive calls from NODE into priority queue depending on
1256 how likely we want to recursively inline the call. */
1258 static void
1259 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1260 fibheap_t heap)
1262 struct cgraph_edge *e;
1263 enum availability avail;
1265 for (e = where->callees; e; e = e->next_callee)
1266 if (e->callee == node
1267 || (e->callee->ultimate_alias_target (&avail) == node
1268 && avail > AVAIL_INTERPOSABLE))
1270 /* When profile feedback is available, prioritize by expected number
1271 of calls. */
1272 fibheap_insert (heap,
1273 !max_count ? -e->frequency
1274 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1277 for (e = where->callees; e; e = e->next_callee)
1278 if (!e->inline_failed)
1279 lookup_recursive_calls (node, e->callee, heap);
1282 /* Decide on recursive inlining: in the case function has recursive calls,
1283 inline until body size reaches given argument. If any new indirect edges
1284 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1285 is NULL. */
1287 static bool
1288 recursive_inlining (struct cgraph_edge *edge,
1289 vec<cgraph_edge *> *new_edges)
1291 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1292 fibheap_t heap;
1293 struct cgraph_node *node;
1294 struct cgraph_edge *e;
1295 struct cgraph_node *master_clone = NULL, *next;
1296 int depth = 0;
1297 int n = 0;
1299 node = edge->caller;
1300 if (node->global.inlined_to)
1301 node = node->global.inlined_to;
1303 if (DECL_DECLARED_INLINE_P (node->decl))
1304 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1306 /* Make sure that function is small enough to be considered for inlining. */
1307 if (estimate_size_after_inlining (node, edge) >= limit)
1308 return false;
1309 heap = fibheap_new ();
1310 lookup_recursive_calls (node, node, heap);
1311 if (fibheap_empty (heap))
1313 fibheap_delete (heap);
1314 return false;
1317 if (dump_file)
1318 fprintf (dump_file,
1319 " Performing recursive inlining on %s\n",
1320 node->name ());
1322 /* Do the inlining and update list of recursive call during process. */
1323 while (!fibheap_empty (heap))
1325 struct cgraph_edge *curr
1326 = (struct cgraph_edge *) fibheap_extract_min (heap);
1327 struct cgraph_node *cnode, *dest = curr->callee;
1329 if (!can_inline_edge_p (curr, true))
1330 continue;
1332 /* MASTER_CLONE is produced in the case we already started modified
1333 the function. Be sure to redirect edge to the original body before
1334 estimating growths otherwise we will be seeing growths after inlining
1335 the already modified body. */
1336 if (master_clone)
1338 curr->redirect_callee (master_clone);
1339 reset_edge_growth_cache (curr);
1342 if (estimate_size_after_inlining (node, curr) > limit)
1344 curr->redirect_callee (dest);
1345 reset_edge_growth_cache (curr);
1346 break;
1349 depth = 1;
1350 for (cnode = curr->caller;
1351 cnode->global.inlined_to; cnode = cnode->callers->caller)
1352 if (node->decl
1353 == curr->callee->ultimate_alias_target ()->decl)
1354 depth++;
1356 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1358 curr->redirect_callee (dest);
1359 reset_edge_growth_cache (curr);
1360 continue;
1363 if (dump_file)
1365 fprintf (dump_file,
1366 " Inlining call of depth %i", depth);
1367 if (node->count)
1369 fprintf (dump_file, " called approx. %.2f times per call",
1370 (double)curr->count / node->count);
1372 fprintf (dump_file, "\n");
1374 if (!master_clone)
1376 /* We need original clone to copy around. */
1377 master_clone = node->create_clone (node->decl, node->count,
1378 CGRAPH_FREQ_BASE, false, vNULL,
1379 true, NULL, NULL);
1380 for (e = master_clone->callees; e; e = e->next_callee)
1381 if (!e->inline_failed)
1382 clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1383 curr->redirect_callee (master_clone);
1384 reset_edge_growth_cache (curr);
1387 inline_call (curr, false, new_edges, &overall_size, true);
1388 lookup_recursive_calls (node, curr->callee, heap);
1389 n++;
1392 if (!fibheap_empty (heap) && dump_file)
1393 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1394 fibheap_delete (heap);
1396 if (!master_clone)
1397 return false;
1399 if (dump_file)
1400 fprintf (dump_file,
1401 "\n Inlined %i times, "
1402 "body grown from size %i to %i, time %i to %i\n", n,
1403 inline_summary (master_clone)->size, inline_summary (node)->size,
1404 inline_summary (master_clone)->time, inline_summary (node)->time);
1406 /* Remove master clone we used for inlining. We rely that clones inlined
1407 into master clone gets queued just before master clone so we don't
1408 need recursion. */
1409 for (node = symtab->first_function (); node != master_clone;
1410 node = next)
1412 next = symtab->next_function (node);
1413 if (node->global.inlined_to == master_clone)
1414 node->remove ();
1416 master_clone->remove ();
1417 return true;
1421 /* Given whole compilation unit estimate of INSNS, compute how large we can
1422 allow the unit to grow. */
1424 static int
1425 compute_max_insns (int insns)
1427 int max_insns = insns;
1428 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1429 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1431 return ((int64_t) max_insns
1432 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1436 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1438 static void
1439 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge *> new_edges)
1441 while (new_edges.length () > 0)
1443 struct cgraph_edge *edge = new_edges.pop ();
1445 gcc_assert (!edge->aux);
1446 if (edge->inline_failed
1447 && can_inline_edge_p (edge, true)
1448 && want_inline_small_function_p (edge, true))
1449 edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1453 /* Remove EDGE from the fibheap. */
1455 static void
1456 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1458 if (e->callee)
1459 reset_node_growth_cache (e->callee);
1460 if (e->aux)
1462 fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1463 e->aux = NULL;
1467 /* Return true if speculation of edge E seems useful.
1468 If ANTICIPATE_INLINING is true, be conservative and hope that E
1469 may get inlined. */
1471 bool
1472 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1474 enum availability avail;
1475 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail);
1476 struct cgraph_edge *direct, *indirect;
1477 struct ipa_ref *ref;
1479 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1481 if (!e->maybe_hot_p ())
1482 return false;
1484 /* See if IP optimizations found something potentially useful about the
1485 function. For now we look only for CONST/PURE flags. Almost everything
1486 else we propagate is useless. */
1487 if (avail >= AVAIL_AVAILABLE)
1489 int ecf_flags = flags_from_decl_or_type (target->decl);
1490 if (ecf_flags & ECF_CONST)
1492 e->speculative_call_info (direct, indirect, ref);
1493 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1494 return true;
1496 else if (ecf_flags & ECF_PURE)
1498 e->speculative_call_info (direct, indirect, ref);
1499 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1500 return true;
1503 /* If we did not managed to inline the function nor redirect
1504 to an ipa-cp clone (that are seen by having local flag set),
1505 it is probably pointless to inline it unless hardware is missing
1506 indirect call predictor. */
1507 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1508 return false;
1509 /* For overwritable targets there is not much to do. */
1510 if (e->inline_failed && !can_inline_edge_p (e, false, true))
1511 return false;
1512 /* OK, speculation seems interesting. */
1513 return true;
1516 /* We know that EDGE is not going to be inlined.
1517 See if we can remove speculation. */
1519 static void
1520 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1522 if (edge->speculative && !speculation_useful_p (edge, false))
1524 struct cgraph_node *node = edge->caller;
1525 struct cgraph_node *where = node->global.inlined_to
1526 ? node->global.inlined_to : node;
1527 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1529 spec_rem += edge->count;
1530 edge->resolve_speculation ();
1531 reset_edge_caches (where);
1532 inline_update_overall_summary (where);
1533 update_caller_keys (edge_heap, where,
1534 updated_nodes, NULL);
1535 update_callee_keys (edge_heap, where,
1536 updated_nodes);
1537 BITMAP_FREE (updated_nodes);
1541 /* We use greedy algorithm for inlining of small functions:
1542 All inline candidates are put into prioritized heap ordered in
1543 increasing badness.
1545 The inlining of small functions is bounded by unit growth parameters. */
1547 static void
1548 inline_small_functions (void)
1550 struct cgraph_node *node;
1551 struct cgraph_edge *edge;
1552 fibheap_t edge_heap = fibheap_new ();
1553 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1554 int min_size, max_size;
1555 auto_vec<cgraph_edge *> new_indirect_edges;
1556 int initial_size = 0;
1557 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1558 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1559 if (flag_indirect_inlining)
1560 new_indirect_edges.create (8);
1562 edge_removal_hook_holder
1563 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1565 /* Compute overall unit size and other global parameters used by badness
1566 metrics. */
1568 max_count = 0;
1569 ipa_reduced_postorder (order, true, true, NULL);
1570 free (order);
1572 FOR_EACH_DEFINED_FUNCTION (node)
1573 if (!node->global.inlined_to)
1575 if (node->has_gimple_body_p ()
1576 || node->thunk.thunk_p)
1578 struct inline_summary *info = inline_summary (node);
1579 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1581 /* Do not account external functions, they will be optimized out
1582 if not inlined. Also only count the non-cold portion of program. */
1583 if (!DECL_EXTERNAL (node->decl)
1584 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
1585 initial_size += info->size;
1586 info->growth = estimate_growth (node);
1587 if (dfs && dfs->next_cycle)
1589 struct cgraph_node *n2;
1590 int id = dfs->scc_no + 1;
1591 for (n2 = node; n2;
1592 n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1594 struct inline_summary *info2 = inline_summary (n2);
1595 if (info2->scc_no)
1596 break;
1597 info2->scc_no = id;
1602 for (edge = node->callers; edge; edge = edge->next_caller)
1603 if (max_count < edge->count)
1604 max_count = edge->count;
1606 sreal_init (&max_count_real, max_count, 0);
1607 sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1608 sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1609 ipa_free_postorder_info ();
1610 initialize_growth_caches ();
1612 if (dump_file)
1613 fprintf (dump_file,
1614 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1615 initial_size);
1617 overall_size = initial_size;
1618 max_size = compute_max_insns (overall_size);
1619 min_size = overall_size;
1621 /* Populate the heap with all edges we might inline. */
1623 FOR_EACH_DEFINED_FUNCTION (node)
1625 bool update = false;
1626 struct cgraph_edge *next;
1628 if (dump_file)
1629 fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1630 node->name (), node->order);
1632 for (edge = node->callees; edge; edge = next)
1634 next = edge->next_callee;
1635 if (edge->inline_failed
1636 && !edge->aux
1637 && can_inline_edge_p (edge, true)
1638 && want_inline_small_function_p (edge, true)
1639 && edge->inline_failed)
1641 gcc_assert (!edge->aux);
1642 update_edge_key (edge_heap, edge);
1644 if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1646 edge->resolve_speculation ();
1647 update = true;
1650 if (update)
1652 struct cgraph_node *where = node->global.inlined_to
1653 ? node->global.inlined_to : node;
1654 inline_update_overall_summary (where);
1655 reset_node_growth_cache (where);
1656 reset_edge_caches (where);
1657 update_caller_keys (edge_heap, where,
1658 updated_nodes, NULL);
1659 bitmap_clear (updated_nodes);
1663 gcc_assert (in_lto_p
1664 || !max_count
1665 || (profile_info && flag_branch_probabilities));
1667 while (!fibheap_empty (edge_heap))
1669 int old_size = overall_size;
1670 struct cgraph_node *where, *callee;
1671 int badness = fibheap_min_key (edge_heap);
1672 int current_badness;
1673 int cached_badness;
1674 int growth;
1676 edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1677 gcc_assert (edge->aux);
1678 edge->aux = NULL;
1679 if (!edge->inline_failed || !edge->callee->analyzed)
1680 continue;
1682 /* Be sure that caches are maintained consistent.
1683 We can not make this ENABLE_CHECKING only because it cause different
1684 updates of the fibheap queue. */
1685 cached_badness = edge_badness (edge, false);
1686 reset_edge_growth_cache (edge);
1687 reset_node_growth_cache (edge->callee);
1689 /* When updating the edge costs, we only decrease badness in the keys.
1690 Increases of badness are handled lazilly; when we see key with out
1691 of date value on it, we re-insert it now. */
1692 current_badness = edge_badness (edge, false);
1693 gcc_assert (cached_badness == current_badness);
1694 gcc_assert (current_badness >= badness);
1695 if (current_badness != badness)
1697 edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1698 continue;
1701 if (!can_inline_edge_p (edge, true))
1703 resolve_noninline_speculation (edge_heap, edge);
1704 continue;
1707 callee = edge->callee->ultimate_alias_target ();
1708 growth = estimate_edge_growth (edge);
1709 if (dump_file)
1711 fprintf (dump_file,
1712 "\nConsidering %s/%i with %i size\n",
1713 callee->name (), callee->order,
1714 inline_summary (callee)->size);
1715 fprintf (dump_file,
1716 " to be inlined into %s/%i in %s:%i\n"
1717 " Estimated badness is %i, frequency %.2f.\n",
1718 edge->caller->name (), edge->caller->order,
1719 flag_wpa ? "unknown"
1720 : gimple_filename ((const_gimple) edge->call_stmt),
1721 flag_wpa ? -1
1722 : gimple_lineno ((const_gimple) edge->call_stmt),
1723 badness,
1724 edge->frequency / (double)CGRAPH_FREQ_BASE);
1725 if (edge->count)
1726 fprintf (dump_file," Called %"PRId64"x\n",
1727 edge->count);
1728 if (dump_flags & TDF_DETAILS)
1729 edge_badness (edge, true);
1732 if (overall_size + growth > max_size
1733 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1735 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1736 report_inline_failed_reason (edge);
1737 resolve_noninline_speculation (edge_heap, edge);
1738 continue;
1741 if (!want_inline_small_function_p (edge, true))
1743 resolve_noninline_speculation (edge_heap, edge);
1744 continue;
1747 /* Heuristics for inlining small functions work poorly for
1748 recursive calls where we do effects similar to loop unrolling.
1749 When inlining such edge seems profitable, leave decision on
1750 specific inliner. */
1751 if (edge->recursive_p ())
1753 where = edge->caller;
1754 if (where->global.inlined_to)
1755 where = where->global.inlined_to;
1756 if (!recursive_inlining (edge,
1757 flag_indirect_inlining
1758 ? &new_indirect_edges : NULL))
1760 edge->inline_failed = CIF_RECURSIVE_INLINING;
1761 resolve_noninline_speculation (edge_heap, edge);
1762 continue;
1764 reset_edge_caches (where);
1765 /* Recursive inliner inlines all recursive calls of the function
1766 at once. Consequently we need to update all callee keys. */
1767 if (flag_indirect_inlining)
1768 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1769 update_callee_keys (edge_heap, where, updated_nodes);
1770 bitmap_clear (updated_nodes);
1772 else
1774 struct cgraph_node *outer_node = NULL;
1775 int depth = 0;
1777 /* Consider the case where self recursive function A is inlined
1778 into B. This is desired optimization in some cases, since it
1779 leads to effect similar of loop peeling and we might completely
1780 optimize out the recursive call. However we must be extra
1781 selective. */
1783 where = edge->caller;
1784 while (where->global.inlined_to)
1786 if (where->decl == callee->decl)
1787 outer_node = where, depth++;
1788 where = where->callers->caller;
1790 if (outer_node
1791 && !want_inline_self_recursive_call_p (edge, outer_node,
1792 true, depth))
1794 edge->inline_failed
1795 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1796 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1797 resolve_noninline_speculation (edge_heap, edge);
1798 continue;
1800 else if (depth && dump_file)
1801 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1803 gcc_checking_assert (!callee->global.inlined_to);
1804 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1805 if (flag_indirect_inlining)
1806 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1808 reset_edge_caches (edge->callee);
1809 reset_node_growth_cache (callee);
1811 update_callee_keys (edge_heap, where, updated_nodes);
1813 where = edge->caller;
1814 if (where->global.inlined_to)
1815 where = where->global.inlined_to;
1817 /* Our profitability metric can depend on local properties
1818 such as number of inlinable calls and size of the function body.
1819 After inlining these properties might change for the function we
1820 inlined into (since it's body size changed) and for the functions
1821 called by function we inlined (since number of it inlinable callers
1822 might change). */
1823 update_caller_keys (edge_heap, where, updated_nodes, NULL);
1824 bitmap_clear (updated_nodes);
1826 if (dump_file)
1828 fprintf (dump_file,
1829 " Inlined into %s which now has time %i and size %i,"
1830 "net change of %+i.\n",
1831 edge->caller->name (),
1832 inline_summary (edge->caller)->time,
1833 inline_summary (edge->caller)->size,
1834 overall_size - old_size);
1836 if (min_size > overall_size)
1838 min_size = overall_size;
1839 max_size = compute_max_insns (min_size);
1841 if (dump_file)
1842 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1846 free_growth_caches ();
1847 fibheap_delete (edge_heap);
1848 if (dump_file)
1849 fprintf (dump_file,
1850 "Unit growth for small function inlining: %i->%i (%i%%)\n",
1851 initial_size, overall_size,
1852 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1853 BITMAP_FREE (updated_nodes);
1854 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
1857 /* Flatten NODE. Performed both during early inlining and
1858 at IPA inlining time. */
1860 static void
1861 flatten_function (struct cgraph_node *node, bool early)
1863 struct cgraph_edge *e;
1865 /* We shouldn't be called recursively when we are being processed. */
1866 gcc_assert (node->aux == NULL);
1868 node->aux = (void *) node;
1870 for (e = node->callees; e; e = e->next_callee)
1872 struct cgraph_node *orig_callee;
1873 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
1875 /* We've hit cycle? It is time to give up. */
1876 if (callee->aux)
1878 if (dump_file)
1879 fprintf (dump_file,
1880 "Not inlining %s into %s to avoid cycle.\n",
1881 xstrdup (callee->name ()),
1882 xstrdup (e->caller->name ()));
1883 e->inline_failed = CIF_RECURSIVE_INLINING;
1884 continue;
1887 /* When the edge is already inlined, we just need to recurse into
1888 it in order to fully flatten the leaves. */
1889 if (!e->inline_failed)
1891 flatten_function (callee, early);
1892 continue;
1895 /* Flatten attribute needs to be processed during late inlining. For
1896 extra code quality we however do flattening during early optimization,
1897 too. */
1898 if (!early
1899 ? !can_inline_edge_p (e, true)
1900 : !can_early_inline_edge_p (e))
1901 continue;
1903 if (e->recursive_p ())
1905 if (dump_file)
1906 fprintf (dump_file, "Not inlining: recursive call.\n");
1907 continue;
1910 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1911 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1913 if (dump_file)
1914 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1915 continue;
1918 /* Inline the edge and flatten the inline clone. Avoid
1919 recursing through the original node if the node was cloned. */
1920 if (dump_file)
1921 fprintf (dump_file, " Inlining %s into %s.\n",
1922 xstrdup (callee->name ()),
1923 xstrdup (e->caller->name ()));
1924 orig_callee = callee;
1925 inline_call (e, true, NULL, NULL, false);
1926 if (e->callee != orig_callee)
1927 orig_callee->aux = (void *) node;
1928 flatten_function (e->callee, early);
1929 if (e->callee != orig_callee)
1930 orig_callee->aux = NULL;
1933 node->aux = NULL;
1934 if (!node->global.inlined_to)
1935 inline_update_overall_summary (node);
1938 /* Count number of callers of NODE and store it into DATA (that
1939 points to int. Worker for cgraph_for_node_and_aliases. */
1941 static bool
1942 sum_callers (struct cgraph_node *node, void *data)
1944 struct cgraph_edge *e;
1945 int *num_calls = (int *)data;
1947 for (e = node->callers; e; e = e->next_caller)
1948 (*num_calls)++;
1949 return false;
1952 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
1953 DATA points to number of calls originally found so we avoid infinite
1954 recursion. */
1956 static bool
1957 inline_to_all_callers (struct cgraph_node *node, void *data)
1959 int *num_calls = (int *)data;
1960 bool callee_removed = false;
1962 while (node->callers && !node->global.inlined_to)
1964 struct cgraph_node *caller = node->callers->caller;
1966 if (dump_file)
1968 fprintf (dump_file,
1969 "\nInlining %s size %i.\n",
1970 node->name (),
1971 inline_summary (node)->size);
1972 fprintf (dump_file,
1973 " Called once from %s %i insns.\n",
1974 node->callers->caller->name (),
1975 inline_summary (node->callers->caller)->size);
1978 inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
1979 if (dump_file)
1980 fprintf (dump_file,
1981 " Inlined into %s which now has %i size\n",
1982 caller->name (),
1983 inline_summary (caller)->size);
1984 if (!(*num_calls)--)
1986 if (dump_file)
1987 fprintf (dump_file, "New calls found; giving up.\n");
1988 return callee_removed;
1990 if (callee_removed)
1991 return true;
1993 return false;
1996 /* Output overall time estimate. */
1997 static void
1998 dump_overall_stats (void)
2000 int64_t sum_weighted = 0, sum = 0;
2001 struct cgraph_node *node;
2003 FOR_EACH_DEFINED_FUNCTION (node)
2004 if (!node->global.inlined_to
2005 && !node->alias)
2007 int time = inline_summary (node)->time;
2008 sum += time;
2009 sum_weighted += time * node->count;
2011 fprintf (dump_file, "Overall time estimate: "
2012 "%"PRId64" weighted by profile: "
2013 "%"PRId64"\n", sum, sum_weighted);
2016 /* Output some useful stats about inlining. */
2018 static void
2019 dump_inline_stats (void)
2021 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2022 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2023 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2024 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2025 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2026 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2027 int64_t reason[CIF_N_REASONS][3];
2028 int i;
2029 struct cgraph_node *node;
2031 memset (reason, 0, sizeof (reason));
2032 FOR_EACH_DEFINED_FUNCTION (node)
2034 struct cgraph_edge *e;
2035 for (e = node->callees; e; e = e->next_callee)
2037 if (e->inline_failed)
2039 reason[(int) e->inline_failed][0] += e->count;
2040 reason[(int) e->inline_failed][1] += e->frequency;
2041 reason[(int) e->inline_failed][2] ++;
2042 if (DECL_VIRTUAL_P (e->callee->decl))
2044 if (e->indirect_inlining_edge)
2045 noninlined_virt_indir_cnt += e->count;
2046 else
2047 noninlined_virt_cnt += e->count;
2049 else
2051 if (e->indirect_inlining_edge)
2052 noninlined_indir_cnt += e->count;
2053 else
2054 noninlined_cnt += e->count;
2057 else
2059 if (e->speculative)
2061 if (DECL_VIRTUAL_P (e->callee->decl))
2062 inlined_speculative_ply += e->count;
2063 else
2064 inlined_speculative += e->count;
2066 else if (DECL_VIRTUAL_P (e->callee->decl))
2068 if (e->indirect_inlining_edge)
2069 inlined_virt_indir_cnt += e->count;
2070 else
2071 inlined_virt_cnt += e->count;
2073 else
2075 if (e->indirect_inlining_edge)
2076 inlined_indir_cnt += e->count;
2077 else
2078 inlined_cnt += e->count;
2082 for (e = node->indirect_calls; e; e = e->next_callee)
2083 if (e->indirect_info->polymorphic)
2084 indirect_poly_cnt += e->count;
2085 else
2086 indirect_cnt += e->count;
2088 if (max_count)
2090 fprintf (dump_file,
2091 "Inlined %"PRId64 " + speculative "
2092 "%"PRId64 " + speculative polymorphic "
2093 "%"PRId64 " + previously indirect "
2094 "%"PRId64 " + virtual "
2095 "%"PRId64 " + virtual and previously indirect "
2096 "%"PRId64 "\n" "Not inlined "
2097 "%"PRId64 " + previously indirect "
2098 "%"PRId64 " + virtual "
2099 "%"PRId64 " + virtual and previously indirect "
2100 "%"PRId64 " + stil indirect "
2101 "%"PRId64 " + still indirect polymorphic "
2102 "%"PRId64 "\n", inlined_cnt,
2103 inlined_speculative, inlined_speculative_ply,
2104 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2105 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2106 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2107 fprintf (dump_file,
2108 "Removed speculations %"PRId64 "\n",
2109 spec_rem);
2111 dump_overall_stats ();
2112 fprintf (dump_file, "\nWhy inlining failed?\n");
2113 for (i = 0; i < CIF_N_REASONS; i++)
2114 if (reason[i][2])
2115 fprintf (dump_file, "%-50s: %8i calls, %8i freq, %"PRId64" count\n",
2116 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2117 (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2120 /* Decide on the inlining. We do so in the topological order to avoid
2121 expenses on updating data structures. */
2123 static unsigned int
2124 ipa_inline (void)
2126 struct cgraph_node *node;
2127 int nnodes;
2128 struct cgraph_node **order;
2129 int i;
2130 int cold;
2131 bool remove_functions = false;
2133 if (!optimize)
2134 return 0;
2136 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2138 if (in_lto_p && optimize)
2139 ipa_update_after_lto_read ();
2141 if (dump_file)
2142 dump_inline_summaries (dump_file);
2144 nnodes = ipa_reverse_postorder (order);
2146 FOR_EACH_FUNCTION (node)
2147 node->aux = 0;
2149 if (dump_file)
2150 fprintf (dump_file, "\nFlattening functions:\n");
2152 /* In the first pass handle functions to be flattened. Do this with
2153 a priority so none of our later choices will make this impossible. */
2154 for (i = nnodes - 1; i >= 0; i--)
2156 node = order[i];
2158 /* Handle nodes to be flattened.
2159 Ideally when processing callees we stop inlining at the
2160 entry of cycles, possibly cloning that entry point and
2161 try to flatten itself turning it into a self-recursive
2162 function. */
2163 if (lookup_attribute ("flatten",
2164 DECL_ATTRIBUTES (node->decl)) != NULL)
2166 if (dump_file)
2167 fprintf (dump_file,
2168 "Flattening %s\n", node->name ());
2169 flatten_function (node, false);
2172 if (dump_file)
2173 dump_overall_stats ();
2175 inline_small_functions ();
2177 /* Do first after-inlining removal. We want to remove all "stale" extern inline
2178 functions and virtual functions so we really know what is called once. */
2179 symtab->remove_unreachable_nodes (false, dump_file);
2180 free (order);
2182 /* Inline functions with a property that after inlining into all callers the
2183 code size will shrink because the out-of-line copy is eliminated.
2184 We do this regardless on the callee size as long as function growth limits
2185 are met. */
2186 if (dump_file)
2187 fprintf (dump_file,
2188 "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2190 /* Inlining one function called once has good chance of preventing
2191 inlining other function into the same callee. Ideally we should
2192 work in priority order, but probably inlining hot functions first
2193 is good cut without the extra pain of maintaining the queue.
2195 ??? this is not really fitting the bill perfectly: inlining function
2196 into callee often leads to better optimization of callee due to
2197 increased context for optimization.
2198 For example if main() function calls a function that outputs help
2199 and then function that does the main optmization, we should inline
2200 the second with priority even if both calls are cold by themselves.
2202 We probably want to implement new predicate replacing our use of
2203 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2204 to be hot. */
2205 for (cold = 0; cold <= 1; cold ++)
2207 FOR_EACH_DEFINED_FUNCTION (node)
2209 struct cgraph_edge *edge, *next;
2210 bool update=false;
2212 for (edge = node->callees; edge; edge = next)
2214 next = edge->next_callee;
2215 if (edge->speculative && !speculation_useful_p (edge, false))
2217 edge->resolve_speculation ();
2218 spec_rem += edge->count;
2219 update = true;
2220 remove_functions = true;
2223 if (update)
2225 struct cgraph_node *where = node->global.inlined_to
2226 ? node->global.inlined_to : node;
2227 reset_node_growth_cache (where);
2228 reset_edge_caches (where);
2229 inline_update_overall_summary (where);
2231 if (flag_inline_functions_called_once
2232 && want_inline_function_to_all_callers_p (node, cold))
2234 int num_calls = 0;
2235 node->call_for_symbol_thunks_and_aliases (sum_callers, &num_calls,
2236 true);
2237 while (node->call_for_symbol_thunks_and_aliases (inline_to_all_callers,
2238 &num_calls, true))
2240 remove_functions = true;
2245 /* Free ipa-prop structures if they are no longer needed. */
2246 if (optimize)
2247 ipa_free_all_structures_after_iinln ();
2249 if (dump_file)
2251 fprintf (dump_file,
2252 "\nInlined %i calls, eliminated %i functions\n\n",
2253 ncalls_inlined, nfunctions_inlined);
2254 dump_inline_stats ();
2257 if (dump_file)
2258 dump_inline_summaries (dump_file);
2259 /* In WPA we use inline summaries for partitioning process. */
2260 if (!flag_wpa)
2261 inline_free_summary ();
2262 return remove_functions ? TODO_remove_functions : 0;
2265 /* Inline always-inline function calls in NODE. */
2267 static bool
2268 inline_always_inline_functions (struct cgraph_node *node)
2270 struct cgraph_edge *e;
2271 bool inlined = false;
2273 for (e = node->callees; e; e = e->next_callee)
2275 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2276 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2277 continue;
2279 if (e->recursive_p ())
2281 if (dump_file)
2282 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2283 e->callee->name ());
2284 e->inline_failed = CIF_RECURSIVE_INLINING;
2285 continue;
2288 if (!can_early_inline_edge_p (e))
2290 /* Set inlined to true if the callee is marked "always_inline" but
2291 is not inlinable. This will allow flagging an error later in
2292 expand_call_inline in tree-inline.c. */
2293 if (lookup_attribute ("always_inline",
2294 DECL_ATTRIBUTES (callee->decl)) != NULL)
2295 inlined = true;
2296 continue;
2299 if (dump_file)
2300 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2301 xstrdup (e->callee->name ()),
2302 xstrdup (e->caller->name ()));
2303 inline_call (e, true, NULL, NULL, false);
2304 inlined = true;
2306 if (inlined)
2307 inline_update_overall_summary (node);
2309 return inlined;
2312 /* Decide on the inlining. We do so in the topological order to avoid
2313 expenses on updating data structures. */
2315 static bool
2316 early_inline_small_functions (struct cgraph_node *node)
2318 struct cgraph_edge *e;
2319 bool inlined = false;
2321 for (e = node->callees; e; e = e->next_callee)
2323 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2324 if (!inline_summary (callee)->inlinable
2325 || !e->inline_failed)
2326 continue;
2328 /* Do not consider functions not declared inline. */
2329 if (!DECL_DECLARED_INLINE_P (callee->decl)
2330 && !flag_inline_small_functions
2331 && !flag_inline_functions)
2332 continue;
2334 if (dump_file)
2335 fprintf (dump_file, "Considering inline candidate %s.\n",
2336 callee->name ());
2338 if (!can_early_inline_edge_p (e))
2339 continue;
2341 if (e->recursive_p ())
2343 if (dump_file)
2344 fprintf (dump_file, " Not inlining: recursive call.\n");
2345 continue;
2348 if (!want_early_inline_function_p (e))
2349 continue;
2351 if (dump_file)
2352 fprintf (dump_file, " Inlining %s into %s.\n",
2353 xstrdup (callee->name ()),
2354 xstrdup (e->caller->name ()));
2355 inline_call (e, true, NULL, NULL, true);
2356 inlined = true;
2359 return inlined;
2362 /* Do inlining of small functions. Doing so early helps profiling and other
2363 passes to be somewhat more effective and avoids some code duplication in
2364 later real inlining pass for testcases with very many function calls. */
2366 namespace {
2368 const pass_data pass_data_early_inline =
2370 GIMPLE_PASS, /* type */
2371 "einline", /* name */
2372 OPTGROUP_INLINE, /* optinfo_flags */
2373 TV_EARLY_INLINING, /* tv_id */
2374 PROP_ssa, /* properties_required */
2375 0, /* properties_provided */
2376 0, /* properties_destroyed */
2377 0, /* todo_flags_start */
2378 0, /* todo_flags_finish */
2381 class pass_early_inline : public gimple_opt_pass
2383 public:
2384 pass_early_inline (gcc::context *ctxt)
2385 : gimple_opt_pass (pass_data_early_inline, ctxt)
2388 /* opt_pass methods: */
2389 virtual unsigned int execute (function *);
2391 }; // class pass_early_inline
2393 unsigned int
2394 pass_early_inline::execute (function *fun)
2396 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2397 struct cgraph_edge *edge;
2398 unsigned int todo = 0;
2399 int iterations = 0;
2400 bool inlined = false;
2402 if (seen_error ())
2403 return 0;
2405 /* Do nothing if datastructures for ipa-inliner are already computed. This
2406 happens when some pass decides to construct new function and
2407 cgraph_add_new_function calls lowering passes and early optimization on
2408 it. This may confuse ourself when early inliner decide to inline call to
2409 function clone, because function clones don't have parameter list in
2410 ipa-prop matching their signature. */
2411 if (ipa_node_params_vector.exists ())
2412 return 0;
2414 #ifdef ENABLE_CHECKING
2415 node->verify ();
2416 #endif
2417 node->remove_all_references ();
2419 /* Even when not optimizing or not inlining inline always-inline
2420 functions. */
2421 inlined = inline_always_inline_functions (node);
2423 if (!optimize
2424 || flag_no_inline
2425 || !flag_early_inlining
2426 /* Never inline regular functions into always-inline functions
2427 during incremental inlining. This sucks as functions calling
2428 always inline functions will get less optimized, but at the
2429 same time inlining of functions calling always inline
2430 function into an always inline function might introduce
2431 cycles of edges to be always inlined in the callgraph.
2433 We might want to be smarter and just avoid this type of inlining. */
2434 || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2436 else if (lookup_attribute ("flatten",
2437 DECL_ATTRIBUTES (node->decl)) != NULL)
2439 /* When the function is marked to be flattened, recursively inline
2440 all calls in it. */
2441 if (dump_file)
2442 fprintf (dump_file,
2443 "Flattening %s\n", node->name ());
2444 flatten_function (node, true);
2445 inlined = true;
2447 else
2449 /* We iterate incremental inlining to get trivial cases of indirect
2450 inlining. */
2451 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2452 && early_inline_small_functions (node))
2454 timevar_push (TV_INTEGRATION);
2455 todo |= optimize_inline_calls (current_function_decl);
2457 /* Technically we ought to recompute inline parameters so the new
2458 iteration of early inliner works as expected. We however have
2459 values approximately right and thus we only need to update edge
2460 info that might be cleared out for newly discovered edges. */
2461 for (edge = node->callees; edge; edge = edge->next_callee)
2463 struct inline_edge_summary *es = inline_edge_summary (edge);
2464 es->call_stmt_size
2465 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2466 es->call_stmt_time
2467 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2468 if (edge->callee->decl
2469 && !gimple_check_call_matching_types (
2470 edge->call_stmt, edge->callee->decl, false))
2471 edge->call_stmt_cannot_inline_p = true;
2473 if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2474 inline_update_overall_summary (node);
2475 timevar_pop (TV_INTEGRATION);
2476 iterations++;
2477 inlined = false;
2479 if (dump_file)
2480 fprintf (dump_file, "Iterations: %i\n", iterations);
2483 if (inlined)
2485 timevar_push (TV_INTEGRATION);
2486 todo |= optimize_inline_calls (current_function_decl);
2487 timevar_pop (TV_INTEGRATION);
2490 fun->always_inline_functions_inlined = true;
2492 return todo;
2495 } // anon namespace
2497 gimple_opt_pass *
2498 make_pass_early_inline (gcc::context *ctxt)
2500 return new pass_early_inline (ctxt);
2503 namespace {
2505 const pass_data pass_data_ipa_inline =
2507 IPA_PASS, /* type */
2508 "inline", /* name */
2509 OPTGROUP_INLINE, /* optinfo_flags */
2510 TV_IPA_INLINING, /* tv_id */
2511 0, /* properties_required */
2512 0, /* properties_provided */
2513 0, /* properties_destroyed */
2514 0, /* todo_flags_start */
2515 ( TODO_dump_symtab ), /* todo_flags_finish */
2518 class pass_ipa_inline : public ipa_opt_pass_d
2520 public:
2521 pass_ipa_inline (gcc::context *ctxt)
2522 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2523 inline_generate_summary, /* generate_summary */
2524 inline_write_summary, /* write_summary */
2525 inline_read_summary, /* read_summary */
2526 NULL, /* write_optimization_summary */
2527 NULL, /* read_optimization_summary */
2528 NULL, /* stmt_fixup */
2529 0, /* function_transform_todo_flags_start */
2530 inline_transform, /* function_transform */
2531 NULL) /* variable_transform */
2534 /* opt_pass methods: */
2535 virtual unsigned int execute (function *) { return ipa_inline (); }
2537 }; // class pass_ipa_inline
2539 } // anon namespace
2541 ipa_opt_pass_d *
2542 make_pass_ipa_inline (gcc::context *ctxt)
2544 return new pass_ipa_inline (ctxt);