lto-streamer-in.c (unpack_value_fields): Remove unneeded asserts.
[official-gcc.git] / gcc / ipa-inline.c
blob0718334e84ec8dadd7b3271a471e8fcdf89c37c3
1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Inlining decision heuristics
24 The implementation of inliner is organized as follows:
26 inlining heuristics limits
28 can_inline_edge_p allow to check that particular inlining is allowed
29 by the limits specified by user (allowed function growth, growth and so
30 on).
32 Functions are inlined when it is obvious the result is profitable (such
33 as functions called once or when inlining reduce code size).
34 In addition to that we perform inlining of small functions and recursive
35 inlining.
37 inlining heuristics
39 The inliner itself is split into two passes:
41 pass_early_inlining
43 Simple local inlining pass inlining callees into current function.
44 This pass makes no use of whole unit analysis and thus it can do only
45 very simple decisions based on local properties.
47 The strength of the pass is that it is run in topological order
48 (reverse postorder) on the callgraph. Functions are converted into SSA
49 form just before this pass and optimized subsequently. As a result, the
50 callees of the function seen by the early inliner was already optimized
51 and results of early inlining adds a lot of optimization opportunities
52 for the local optimization.
54 The pass handle the obvious inlining decisions within the compilation
55 unit - inlining auto inline functions, inlining for size and
56 flattening.
58 main strength of the pass is the ability to eliminate abstraction
59 penalty in C++ code (via combination of inlining and early
60 optimization) and thus improve quality of analysis done by real IPA
61 optimizers.
63 Because of lack of whole unit knowledge, the pass can not really make
64 good code size/performance tradeoffs. It however does very simple
65 speculative inlining allowing code size to grow by
66 EARLY_INLINING_INSNS when callee is leaf function. In this case the
67 optimizations performed later are very likely to eliminate the cost.
69 pass_ipa_inline
71 This is the real inliner able to handle inlining with whole program
72 knowledge. It performs following steps:
74 1) inlining of small functions. This is implemented by greedy
75 algorithm ordering all inlinable cgraph edges by their badness and
76 inlining them in this order as long as inline limits allows doing so.
78 This heuristics is not very good on inlining recursive calls. Recursive
79 calls can be inlined with results similar to loop unrolling. To do so,
80 special purpose recursive inliner is executed on function when
81 recursive edge is met as viable candidate.
83 2) Unreachable functions are removed from callgraph. Inlining leads
84 to devirtualization and other modification of callgraph so functions
85 may become unreachable during the process. Also functions declared as
86 extern inline or virtual functions are removed, since after inlining
87 we no longer need the offline bodies.
89 3) Functions called once and not exported from the unit are inlined.
90 This should almost always lead to reduction of code size by eliminating
91 the need for offline copy of the function. */
93 #include "config.h"
94 #include "system.h"
95 #include "coretypes.h"
96 #include "tm.h"
97 #include "tree.h"
98 #include "tree-inline.h"
99 #include "langhooks.h"
100 #include "flags.h"
101 #include "cgraph.h"
102 #include "diagnostic.h"
103 #include "gimple-pretty-print.h"
104 #include "timevar.h"
105 #include "params.h"
106 #include "fibheap.h"
107 #include "intl.h"
108 #include "tree-pass.h"
109 #include "coverage.h"
110 #include "ggc.h"
111 #include "rtl.h"
112 #include "tree-flow.h"
113 #include "ipa-prop.h"
114 #include "except.h"
115 #include "target.h"
116 #include "ipa-inline.h"
117 #include "ipa-utils.h"
119 /* Statistics we collect about inlining algorithm. */
120 static int overall_size;
121 static gcov_type max_count;
123 /* Return false when inlining edge E would lead to violating
124 limits on function unit growth or stack usage growth.
126 The relative function body growth limit is present generally
127 to avoid problems with non-linear behavior of the compiler.
128 To allow inlining huge functions into tiny wrapper, the limit
129 is always based on the bigger of the two functions considered.
131 For stack growth limits we always base the growth in stack usage
132 of the callers. We want to prevent applications from segfaulting
133 on stack overflow when functions with huge stack frames gets
134 inlined. */
136 static bool
137 caller_growth_limits (struct cgraph_edge *e)
139 struct cgraph_node *to = e->caller;
140 struct cgraph_node *what = e->callee;
141 int newsize;
142 int limit = 0;
143 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
144 struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
146 /* Look for function e->caller is inlined to. While doing
147 so work out the largest function body on the way. As
148 described above, we want to base our function growth
149 limits based on that. Not on the self size of the
150 outer function, not on the self size of inline code
151 we immediately inline to. This is the most relaxed
152 interpretation of the rule "do not grow large functions
153 too much in order to prevent compiler from exploding". */
154 while (true)
156 info = inline_summary (to);
157 if (limit < info->self_size)
158 limit = info->self_size;
159 if (stack_size_limit < info->estimated_self_stack_size)
160 stack_size_limit = info->estimated_self_stack_size;
161 if (to->global.inlined_to)
162 to = to->callers->caller;
163 else
164 break;
167 what_info = inline_summary (what);
169 if (limit < what_info->self_size)
170 limit = what_info->self_size;
172 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
174 /* Check the size after inlining against the function limits. But allow
175 the function to shrink if it went over the limits by forced inlining. */
176 newsize = estimate_size_after_inlining (to, e);
177 if (newsize >= info->size
178 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
179 && newsize > limit)
181 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
182 return false;
185 if (!what_info->estimated_stack_size)
186 return true;
188 /* FIXME: Stack size limit often prevents inlining in Fortran programs
189 due to large i/o datastructures used by the Fortran front-end.
190 We ought to ignore this limit when we know that the edge is executed
191 on every invocation of the caller (i.e. its call statement dominates
192 exit block). We do not track this information, yet. */
193 stack_size_limit += ((gcov_type)stack_size_limit
194 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
196 inlined_stack = (outer_info->stack_frame_offset
197 + outer_info->estimated_self_stack_size
198 + what_info->estimated_stack_size);
199 /* Check new stack consumption with stack consumption at the place
200 stack is used. */
201 if (inlined_stack > stack_size_limit
202 /* If function already has large stack usage from sibling
203 inline call, we can inline, too.
204 This bit overoptimistically assume that we are good at stack
205 packing. */
206 && inlined_stack > info->estimated_stack_size
207 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
209 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
210 return false;
212 return true;
215 /* Dump info about why inlining has failed. */
217 static void
218 report_inline_failed_reason (struct cgraph_edge *e)
220 if (dump_file)
222 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
223 cgraph_node_name (e->caller), e->caller->uid,
224 cgraph_node_name (e->callee), e->callee->uid,
225 cgraph_inline_failed_string (e->inline_failed));
229 /* Decide if we can inline the edge and possibly update
230 inline_failed reason.
231 We check whether inlining is possible at all and whether
232 caller growth limits allow doing so.
234 if REPORT is true, output reason to the dump file. */
236 static bool
237 can_inline_edge_p (struct cgraph_edge *e, bool report)
239 bool inlinable = true;
240 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
241 tree callee_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->callee->decl);
243 gcc_assert (e->inline_failed);
245 if (!e->callee->analyzed)
247 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
248 inlinable = false;
250 else if (!inline_summary (e->callee)->inlinable)
252 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
253 inlinable = false;
255 else if (cgraph_function_body_availability (e->callee) <= AVAIL_OVERWRITABLE)
257 e->inline_failed = CIF_OVERWRITABLE;
258 return false;
260 else if (e->call_stmt_cannot_inline_p)
262 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
263 inlinable = false;
265 /* Don't inline if the functions have different EH personalities. */
266 else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
267 && DECL_FUNCTION_PERSONALITY (e->callee->decl)
268 && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
269 != DECL_FUNCTION_PERSONALITY (e->callee->decl)))
271 e->inline_failed = CIF_EH_PERSONALITY;
272 inlinable = false;
274 /* Don't inline if the callee can throw non-call exceptions but the
275 caller cannot.
276 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
277 Move the flag into cgraph node or mirror it in the inline summary. */
278 else if (DECL_STRUCT_FUNCTION (e->callee->decl)
279 && DECL_STRUCT_FUNCTION
280 (e->callee->decl)->can_throw_non_call_exceptions
281 && !(DECL_STRUCT_FUNCTION (e->caller->decl)
282 && DECL_STRUCT_FUNCTION
283 (e->caller->decl)->can_throw_non_call_exceptions))
285 e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
286 inlinable = false;
288 /* Check compatibility of target optimization options. */
289 else if (!targetm.target_option.can_inline_p (e->caller->decl,
290 e->callee->decl))
292 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
293 inlinable = false;
295 /* Check if caller growth allows the inlining. */
296 else if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl)
297 && !lookup_attribute ("flatten",
298 DECL_ATTRIBUTES
299 (e->caller->global.inlined_to
300 ? e->caller->global.inlined_to->decl
301 : e->caller->decl))
302 && !caller_growth_limits (e))
303 inlinable = false;
304 /* Don't inline a function with a higher optimization level than the
305 caller. FIXME: this is really just tip of iceberg of handling
306 optimization attribute. */
307 else if (caller_tree != callee_tree)
309 struct cl_optimization *caller_opt
310 = TREE_OPTIMIZATION ((caller_tree)
311 ? caller_tree
312 : optimization_default_node);
314 struct cl_optimization *callee_opt
315 = TREE_OPTIMIZATION ((callee_tree)
316 ? callee_tree
317 : optimization_default_node);
319 if ((caller_opt->x_optimize > callee_opt->x_optimize)
320 || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
322 e->inline_failed = CIF_TARGET_OPTIMIZATION_MISMATCH;
323 inlinable = false;
327 /* Be sure that the cannot_inline_p flag is up to date. */
328 gcc_checking_assert (!e->call_stmt
329 || (gimple_call_cannot_inline_p (e->call_stmt)
330 == e->call_stmt_cannot_inline_p)
331 /* In -flto-partition=none mode we really keep things out of
332 sync because call_stmt_cannot_inline_p is set at cgraph
333 merging when function bodies are not there yet. */
334 || (in_lto_p && !gimple_call_cannot_inline_p (e->call_stmt)));
335 if (!inlinable && report)
336 report_inline_failed_reason (e);
337 return inlinable;
341 /* Return true if the edge E is inlinable during early inlining. */
343 static bool
344 can_early_inline_edge_p (struct cgraph_edge *e)
346 /* Early inliner might get called at WPA stage when IPA pass adds new
347 function. In this case we can not really do any of early inlining
348 because function bodies are missing. */
349 if (!gimple_has_body_p (e->callee->decl))
351 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
352 return false;
354 /* In early inliner some of callees may not be in SSA form yet
355 (i.e. the callgraph is cyclic and we did not process
356 the callee by early inliner, yet). We don't have CIF code for this
357 case; later we will re-do the decision in the real inliner. */
358 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
359 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
361 if (dump_file)
362 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
363 return false;
365 if (!can_inline_edge_p (e, true))
366 return false;
367 return true;
371 /* Return true when N is leaf function. Accept cheap builtins
372 in leaf functions. */
374 static bool
375 leaf_node_p (struct cgraph_node *n)
377 struct cgraph_edge *e;
378 for (e = n->callees; e; e = e->next_callee)
379 if (!is_inexpensive_builtin (e->callee->decl))
380 return false;
381 return true;
385 /* Return true if we are interested in inlining small function. */
387 static bool
388 want_early_inline_function_p (struct cgraph_edge *e)
390 bool want_inline = true;
392 if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
394 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
395 && !flag_inline_small_functions)
397 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
398 report_inline_failed_reason (e);
399 want_inline = false;
401 else
403 int growth = estimate_edge_growth (e);
404 if (growth <= 0)
406 else if (!cgraph_maybe_hot_edge_p (e)
407 && growth > 0)
409 if (dump_file)
410 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
411 "call is cold and code would grow by %i\n",
412 cgraph_node_name (e->caller), e->caller->uid,
413 cgraph_node_name (e->callee), e->callee->uid,
414 growth);
415 want_inline = false;
417 else if (!leaf_node_p (e->callee)
418 && growth > 0)
420 if (dump_file)
421 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
422 "callee is not leaf and code would grow by %i\n",
423 cgraph_node_name (e->caller), e->caller->uid,
424 cgraph_node_name (e->callee), e->callee->uid,
425 growth);
426 want_inline = false;
428 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
430 if (dump_file)
431 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
432 "growth %i exceeds --param early-inlining-insns\n",
433 cgraph_node_name (e->caller), e->caller->uid,
434 cgraph_node_name (e->callee), e->callee->uid,
435 growth);
436 want_inline = false;
439 return want_inline;
442 /* Return true if we are interested in inlining small function.
443 When REPORT is true, report reason to dump file. */
445 static bool
446 want_inline_small_function_p (struct cgraph_edge *e, bool report)
448 bool want_inline = true;
450 if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
452 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
453 && !flag_inline_small_functions)
455 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
456 want_inline = false;
458 else
460 int growth = estimate_edge_growth (e);
462 if (growth <= 0)
464 else if (DECL_DECLARED_INLINE_P (e->callee->decl)
465 && growth >= MAX_INLINE_INSNS_SINGLE)
467 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
468 want_inline = false;
470 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
471 && !flag_inline_functions)
473 e->inline_failed = CIF_NOT_DECLARED_INLINED;
474 want_inline = false;
476 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
477 && growth >= MAX_INLINE_INSNS_AUTO)
479 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
480 want_inline = false;
482 /* If call is cold, do not inline when function body would grow.
483 Still inline when the overall unit size will shrink because the offline
484 copy of function being eliminated.
486 This is slightly wrong on aggressive side: it is entirely possible
487 that function is called many times with a context where inlining
488 reduces code size and few times with a context where inlining increase
489 code size. Resoluting growth estimate will be negative even if it
490 would make more sense to keep offline copy and do not inline into the
491 call sites that makes the code size grow.
493 When badness orders the calls in a way that code reducing calls come
494 first, this situation is not a problem at all: after inlining all
495 "good" calls, we will realize that keeping the function around is
496 better. */
497 else if (!cgraph_maybe_hot_edge_p (e)
498 && (DECL_EXTERNAL (e->callee->decl)
500 /* Unlike for functions called once, we play unsafe with
501 COMDATs. We can allow that since we know functions
502 in consideration are small (and thus risk is small) and
503 moreover grow estimates already accounts that COMDAT
504 functions may or may not disappear when eliminated from
505 current unit. With good probability making aggressive
506 choice in all units is going to make overall program
507 smaller.
509 Consequently we ask cgraph_can_remove_if_no_direct_calls_p
510 instead of
511 cgraph_will_be_removed_from_program_if_no_direct_calls */
513 || !cgraph_can_remove_if_no_direct_calls_p (e->callee)
514 || estimate_growth (e->callee) > 0))
516 e->inline_failed = CIF_UNLIKELY_CALL;
517 want_inline = false;
520 if (!want_inline && report)
521 report_inline_failed_reason (e);
522 return want_inline;
525 /* EDGE is self recursive edge.
526 We hand two cases - when function A is inlining into itself
527 or when function A is being inlined into another inliner copy of function
528 A within function B.
530 In first case OUTER_NODE points to the toplevel copy of A, while
531 in the second case OUTER_NODE points to the outermost copy of A in B.
533 In both cases we want to be extra selective since
534 inlining the call will just introduce new recursive calls to appear. */
536 static bool
537 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
538 struct cgraph_node *outer_node,
539 bool peeling,
540 int depth)
542 char const *reason = NULL;
543 bool want_inline = true;
544 int caller_freq = CGRAPH_FREQ_BASE;
545 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
547 if (DECL_DECLARED_INLINE_P (edge->callee->decl))
548 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
550 if (!cgraph_maybe_hot_edge_p (edge))
552 reason = "recursive call is cold";
553 want_inline = false;
555 else if (max_count && !outer_node->count)
557 reason = "not executed in profile";
558 want_inline = false;
560 else if (depth > max_depth)
562 reason = "--param max-inline-recursive-depth exceeded.";
563 want_inline = false;
566 if (outer_node->global.inlined_to)
567 caller_freq = outer_node->callers->frequency;
569 if (!want_inline)
571 /* Inlining of self recursive function into copy of itself within other function
572 is transformation similar to loop peeling.
574 Peeling is profitable if we can inline enough copies to make probability
575 of actual call to the self recursive function very small. Be sure that
576 the probability of recursion is small.
578 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
579 This way the expected number of recision is at most max_depth. */
580 else if (peeling)
582 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
583 / max_depth);
584 int i;
585 for (i = 1; i < depth; i++)
586 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
587 if (max_count
588 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
589 >= max_prob))
591 reason = "profile of recursive call is too large";
592 want_inline = false;
594 if (!max_count
595 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
596 >= max_prob))
598 reason = "frequency of recursive call is too large";
599 want_inline = false;
602 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
603 depth is large. We reduce function call overhead and increase chances that
604 things fit in hardware return predictor.
606 Recursive inlining might however increase cost of stack frame setup
607 actually slowing down functions whose recursion tree is wide rather than
608 deep.
610 Deciding reliably on when to do recursive inlining without profile feedback
611 is tricky. For now we disable recursive inlining when probability of self
612 recursion is low.
614 Recursive inlining of self recursive call within loop also results in large loop
615 depths that generally optimize badly. We may want to throttle down inlining
616 in those cases. In particular this seems to happen in one of libstdc++ rb tree
617 methods. */
618 else
620 if (max_count
621 && (edge->count * 100 / outer_node->count
622 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
624 reason = "profile of recursive call is too small";
625 want_inline = false;
627 else if (!max_count
628 && (edge->frequency * 100 / caller_freq
629 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
631 reason = "frequency of recursive call is too small";
632 want_inline = false;
635 if (!want_inline && dump_file)
636 fprintf (dump_file, " not inlining recursively: %s\n", reason);
637 return want_inline;
641 /* Decide if NODE is called once inlining it would eliminate need
642 for the offline copy of function. */
644 static bool
645 want_inline_function_called_once_p (struct cgraph_node *node)
647 /* Already inlined? */
648 if (node->global.inlined_to)
649 return false;
650 /* Zero or more then one callers? */
651 if (!node->callers
652 || node->callers->next_caller)
653 return false;
654 /* Recursive call makes no sense to inline. */
655 if (node->callers->caller == node)
656 return false;
657 /* External functions are not really in the unit, so inlining
658 them when called once would just increase the program size. */
659 if (DECL_EXTERNAL (node->decl))
660 return false;
661 /* Offline body must be optimized out. */
662 if (!cgraph_will_be_removed_from_program_if_no_direct_calls (node))
663 return false;
664 if (!can_inline_edge_p (node->callers, true))
665 return false;
666 return true;
670 /* Return relative time improvement for inlining EDGE in range
671 1...2^9. */
673 static inline int
674 relative_time_benefit (struct inline_summary *callee_info,
675 struct cgraph_edge *edge,
676 int time_growth)
678 int relbenefit;
679 gcov_type uninlined_call_time;
681 uninlined_call_time =
682 ((gcov_type)
683 (callee_info->time
684 + inline_edge_summary (edge)->call_stmt_time
685 + CGRAPH_FREQ_BASE / 2) * edge->frequency
686 / CGRAPH_FREQ_BASE);
687 /* Compute relative time benefit, i.e. how much the call becomes faster.
688 ??? perhaps computing how much the caller+calle together become faster
689 would lead to more realistic results. */
690 if (!uninlined_call_time)
691 uninlined_call_time = 1;
692 relbenefit =
693 (uninlined_call_time - time_growth) * 256 / (uninlined_call_time);
694 relbenefit = MIN (relbenefit, 512);
695 relbenefit = MAX (relbenefit, 1);
696 return relbenefit;
700 /* A cost model driving the inlining heuristics in a way so the edges with
701 smallest badness are inlined first. After each inlining is performed
702 the costs of all caller edges of nodes affected are recomputed so the
703 metrics may accurately depend on values such as number of inlinable callers
704 of the function or function body size. */
706 static int
707 edge_badness (struct cgraph_edge *edge, bool dump)
709 gcov_type badness;
710 int growth, time_growth;
711 struct inline_summary *callee_info = inline_summary (edge->callee);
713 if (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
714 return INT_MIN;
716 growth = estimate_edge_growth (edge);
717 time_growth = estimate_edge_time (edge);
719 if (dump)
721 fprintf (dump_file, " Badness calculation for %s -> %s\n",
722 cgraph_node_name (edge->caller),
723 cgraph_node_name (edge->callee));
724 fprintf (dump_file, " size growth %i, time growth %i\n",
725 growth,
726 time_growth);
729 /* Always prefer inlining saving code size. */
730 if (growth <= 0)
732 badness = INT_MIN / 2 + growth;
733 if (dump)
734 fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness,
735 growth);
738 /* When profiling is available, compute badness as:
740 relative_edge_count * relative_time_benefit
741 goodness = -------------------------------------------
742 edge_growth
743 badness = -goodness
745 The fraction is upside down, becuase on edge counts and time beneits
746 the bounds are known. Edge growth is essentially unlimited. */
748 else if (max_count)
750 int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
751 badness =
752 ((int)
753 ((double) edge->count * INT_MIN / 2 / max_count / 512) *
754 relative_time_benefit (callee_info, edge, time_growth)) / growth;
756 /* Be sure that insanity of the profile won't lead to increasing counts
757 in the scalling and thus to overflow in the computation above. */
758 gcc_assert (max_count >= edge->count);
759 if (dump)
761 fprintf (dump_file,
762 " %i (relative %f): profile info. Relative count %f"
763 " * Relative benefit %f\n",
764 (int) badness, (double) badness / INT_MIN,
765 (double) edge->count / max_count,
766 relbenefit * 100 / 256.0);
770 /* When function local profile is available. Compute badness as:
773 growth_of_callee
774 badness = -------------------------------------- + growth_for-all
775 relative_time_benefit * edge_frequency
778 else if (flag_guess_branch_prob)
780 int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX;
781 int growth_for_all;
783 div = MAX (div, 1);
784 gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX);
785 div *= relative_time_benefit (callee_info, edge, time_growth);
787 /* frequency is normalized in range 1...2^10.
788 relbenefit in range 1...2^9
789 DIV should be in range 1....2^19. */
790 gcc_checking_assert (div >= 1 && div <= (1<<19));
792 /* Result must be integer in range 0...INT_MAX.
793 Set the base of fixed point calculation so we don't lose much of
794 precision for small bandesses (those are interesting) yet we don't
795 overflow for growths that are still in interesting range. */
796 badness = ((gcov_type)growth) * (1<<18);
797 badness = (badness + div / 2) / div;
799 /* Overall growth of inlining all calls of function matters: we want to
800 inline so offline copy of function is no longer needed.
802 Additionally functions that can be fully inlined without much of
803 effort are better inline candidates than functions that can be fully
804 inlined only after noticeable overall unit growths. The latter
805 are better in a sense compressing of code size by factoring out common
806 code into separate function shared by multiple code paths.
808 We might mix the valud into the fraction by taking into account
809 relative growth of the unit, but for now just add the number
810 into resulting fraction. */
811 growth_for_all = estimate_growth (edge->callee);
812 badness += growth_for_all;
813 if (badness > INT_MAX - 1)
814 badness = INT_MAX - 1;
815 if (dump)
817 fprintf (dump_file,
818 " %i: guessed profile. frequency %f, overall growth %i,"
819 " benefit %f%%, divisor %i\n",
820 (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE, growth_for_all,
821 relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div);
824 /* When function local profile is not available or it does not give
825 useful information (ie frequency is zero), base the cost on
826 loop nest and overall size growth, so we optimize for overall number
827 of functions fully inlined in program. */
828 else
830 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
831 badness = estimate_growth (edge->callee) * 256;
833 /* Decrease badness if call is nested. */
834 if (badness > 0)
835 badness >>= nest;
836 else
838 badness <<= nest;
840 if (dump)
841 fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
842 nest);
845 /* Ensure that we did not overflow in all the fixed point math above. */
846 gcc_assert (badness >= INT_MIN);
847 gcc_assert (badness <= INT_MAX - 1);
848 /* Make recursive inlining happen always after other inlining is done. */
849 if (cgraph_edge_recursive_p (edge))
850 return badness + 1;
851 else
852 return badness;
855 /* Recompute badness of EDGE and update its key in HEAP if needed. */
856 static inline void
857 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
859 int badness = edge_badness (edge, false);
860 if (edge->aux)
862 fibnode_t n = (fibnode_t) edge->aux;
863 gcc_checking_assert (n->data == edge);
865 /* fibheap_replace_key only decrease the keys.
866 When we increase the key we do not update heap
867 and instead re-insert the element once it becomes
868 a minimum of heap. */
869 if (badness < n->key)
871 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file,
874 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
875 cgraph_node_name (edge->caller), edge->caller->uid,
876 cgraph_node_name (edge->callee), edge->callee->uid,
877 (int)n->key,
878 badness);
880 fibheap_replace_key (heap, n, badness);
881 gcc_checking_assert (n->key == badness);
884 else
886 if (dump_file && (dump_flags & TDF_DETAILS))
888 fprintf (dump_file,
889 " enqueuing call %s/%i -> %s/%i, badness %i\n",
890 cgraph_node_name (edge->caller), edge->caller->uid,
891 cgraph_node_name (edge->callee), edge->callee->uid,
892 badness);
894 edge->aux = fibheap_insert (heap, badness, edge);
899 /* NODE was inlined.
900 All caller edges needs to be resetted because
901 size estimates change. Similarly callees needs reset
902 because better context may be known. */
904 static void
905 reset_edge_caches (struct cgraph_node *node)
907 struct cgraph_edge *edge;
908 struct cgraph_edge *e = node->callees;
909 struct cgraph_node *where = node;
911 if (where->global.inlined_to)
912 where = where->global.inlined_to;
914 /* WHERE body size has changed, the cached growth is invalid. */
915 reset_node_growth_cache (where);
917 for (edge = where->callers; edge; edge = edge->next_caller)
918 if (edge->inline_failed)
919 reset_edge_growth_cache (edge);
921 if (!e)
922 return;
924 while (true)
925 if (!e->inline_failed && e->callee->callees)
926 e = e->callee->callees;
927 else
929 if (e->inline_failed)
930 reset_edge_growth_cache (e);
931 if (e->next_callee)
932 e = e->next_callee;
933 else
937 if (e->caller == node)
938 return;
939 e = e->caller->callers;
941 while (!e->next_callee);
942 e = e->next_callee;
947 /* Recompute HEAP nodes for each of caller of NODE.
948 UPDATED_NODES track nodes we already visited, to avoid redundant work.
949 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
950 it is inlinable. Otherwise check all edges. */
952 static void
953 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
954 bitmap updated_nodes,
955 struct cgraph_edge *check_inlinablity_for)
957 struct cgraph_edge *edge;
959 if (!inline_summary (node)->inlinable
960 || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
961 || node->global.inlined_to)
962 return;
963 if (!bitmap_set_bit (updated_nodes, node->uid))
964 return;
966 for (edge = node->callers; edge; edge = edge->next_caller)
967 if (edge->inline_failed)
969 if (!check_inlinablity_for
970 || check_inlinablity_for == edge)
972 if (can_inline_edge_p (edge, false)
973 && want_inline_small_function_p (edge, false))
974 update_edge_key (heap, edge);
975 else if (edge->aux)
977 report_inline_failed_reason (edge);
978 fibheap_delete_node (heap, (fibnode_t) edge->aux);
979 edge->aux = NULL;
982 else if (edge->aux)
983 update_edge_key (heap, edge);
987 /* Recompute HEAP nodes for each uninlined call in NODE.
988 This is used when we know that edge badnesses are going only to increase
989 (we introduced new call site) and thus all we need is to insert newly
990 created edges into heap. */
992 static void
993 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
994 bitmap updated_nodes)
996 struct cgraph_edge *e = node->callees;
998 if (!e)
999 return;
1000 while (true)
1001 if (!e->inline_failed && e->callee->callees)
1002 e = e->callee->callees;
1003 else
1005 /* We do not reset callee growth cache here. Since we added a new call,
1006 growth chould have just increased and consequentely badness metric
1007 don't need updating. */
1008 if (e->inline_failed
1009 && inline_summary (e->callee)->inlinable
1010 && cgraph_function_body_availability (e->callee) >= AVAIL_AVAILABLE
1011 && !bitmap_bit_p (updated_nodes, e->callee->uid))
1013 if (can_inline_edge_p (e, false)
1014 && want_inline_small_function_p (e, false))
1015 update_edge_key (heap, e);
1016 else if (e->aux)
1018 report_inline_failed_reason (e);
1019 fibheap_delete_node (heap, (fibnode_t) e->aux);
1020 e->aux = NULL;
1023 if (e->next_callee)
1024 e = e->next_callee;
1025 else
1029 if (e->caller == node)
1030 return;
1031 e = e->caller->callers;
1033 while (!e->next_callee);
1034 e = e->next_callee;
1039 /* Recompute heap nodes for each of caller edges of each of callees.
1040 Walk recursively into all inline clones. */
1042 static void
1043 update_all_callee_keys (fibheap_t heap, struct cgraph_node *node,
1044 bitmap updated_nodes)
1046 struct cgraph_edge *e = node->callees;
1048 if (!e)
1049 return;
1050 while (true)
1051 if (!e->inline_failed && e->callee->callees)
1052 e = e->callee->callees;
1053 else
1055 /* We inlined and thus callees might have different number of calls.
1056 Reset their caches */
1057 reset_node_growth_cache (e->callee);
1058 if (e->inline_failed)
1059 update_caller_keys (heap, e->callee, updated_nodes, e);
1060 if (e->next_callee)
1061 e = e->next_callee;
1062 else
1066 if (e->caller == node)
1067 return;
1068 e = e->caller->callers;
1070 while (!e->next_callee);
1071 e = e->next_callee;
1076 /* Enqueue all recursive calls from NODE into priority queue depending on
1077 how likely we want to recursively inline the call. */
1079 static void
1080 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1081 fibheap_t heap)
1083 struct cgraph_edge *e;
1084 for (e = where->callees; e; e = e->next_callee)
1085 if (e->callee == node)
1087 /* When profile feedback is available, prioritize by expected number
1088 of calls. */
1089 fibheap_insert (heap,
1090 !max_count ? -e->frequency
1091 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1094 for (e = where->callees; e; e = e->next_callee)
1095 if (!e->inline_failed)
1096 lookup_recursive_calls (node, e->callee, heap);
1099 /* Decide on recursive inlining: in the case function has recursive calls,
1100 inline until body size reaches given argument. If any new indirect edges
1101 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1102 is NULL. */
1104 static bool
1105 recursive_inlining (struct cgraph_edge *edge,
1106 VEC (cgraph_edge_p, heap) **new_edges)
1108 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1109 fibheap_t heap;
1110 struct cgraph_node *node;
1111 struct cgraph_edge *e;
1112 struct cgraph_node *master_clone = NULL, *next;
1113 int depth = 0;
1114 int n = 0;
1116 node = edge->caller;
1117 if (node->global.inlined_to)
1118 node = node->global.inlined_to;
1120 if (DECL_DECLARED_INLINE_P (node->decl))
1121 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1123 /* Make sure that function is small enough to be considered for inlining. */
1124 if (estimate_size_after_inlining (node, edge) >= limit)
1125 return false;
1126 heap = fibheap_new ();
1127 lookup_recursive_calls (node, node, heap);
1128 if (fibheap_empty (heap))
1130 fibheap_delete (heap);
1131 return false;
1134 if (dump_file)
1135 fprintf (dump_file,
1136 " Performing recursive inlining on %s\n",
1137 cgraph_node_name (node));
1139 /* Do the inlining and update list of recursive call during process. */
1140 while (!fibheap_empty (heap))
1142 struct cgraph_edge *curr
1143 = (struct cgraph_edge *) fibheap_extract_min (heap);
1144 struct cgraph_node *cnode;
1146 if (estimate_size_after_inlining (node, curr) > limit)
1147 break;
1149 if (!can_inline_edge_p (curr, true))
1150 continue;
1152 depth = 1;
1153 for (cnode = curr->caller;
1154 cnode->global.inlined_to; cnode = cnode->callers->caller)
1155 if (node->decl == curr->callee->decl)
1156 depth++;
1158 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1159 continue;
1161 if (dump_file)
1163 fprintf (dump_file,
1164 " Inlining call of depth %i", depth);
1165 if (node->count)
1167 fprintf (dump_file, " called approx. %.2f times per call",
1168 (double)curr->count / node->count);
1170 fprintf (dump_file, "\n");
1172 if (!master_clone)
1174 /* We need original clone to copy around. */
1175 master_clone = cgraph_clone_node (node, node->decl,
1176 node->count, CGRAPH_FREQ_BASE,
1177 false, NULL, true);
1178 for (e = master_clone->callees; e; e = e->next_callee)
1179 if (!e->inline_failed)
1180 clone_inlined_nodes (e, true, false, NULL);
1183 cgraph_redirect_edge_callee (curr, master_clone);
1184 inline_call (curr, false, new_edges, &overall_size);
1185 lookup_recursive_calls (node, curr->callee, heap);
1186 n++;
1189 if (!fibheap_empty (heap) && dump_file)
1190 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1191 fibheap_delete (heap);
1193 if (!master_clone)
1194 return false;
1196 if (dump_file)
1197 fprintf (dump_file,
1198 "\n Inlined %i times, "
1199 "body grown from size %i to %i, time %i to %i\n", n,
1200 inline_summary (master_clone)->size, inline_summary (node)->size,
1201 inline_summary (master_clone)->time, inline_summary (node)->time);
1203 /* Remove master clone we used for inlining. We rely that clones inlined
1204 into master clone gets queued just before master clone so we don't
1205 need recursion. */
1206 for (node = cgraph_nodes; node != master_clone;
1207 node = next)
1209 next = node->next;
1210 if (node->global.inlined_to == master_clone)
1211 cgraph_remove_node (node);
1213 cgraph_remove_node (master_clone);
1214 return true;
1218 /* Given whole compilation unit estimate of INSNS, compute how large we can
1219 allow the unit to grow. */
1221 static int
1222 compute_max_insns (int insns)
1224 int max_insns = insns;
1225 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1226 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1228 return ((HOST_WIDEST_INT) max_insns
1229 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1233 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1235 static void
1236 add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
1238 while (VEC_length (cgraph_edge_p, new_edges) > 0)
1240 struct cgraph_edge *edge = VEC_pop (cgraph_edge_p, new_edges);
1242 gcc_assert (!edge->aux);
1243 if (inline_summary (edge->callee)->inlinable
1244 && edge->inline_failed
1245 && can_inline_edge_p (edge, true)
1246 && want_inline_small_function_p (edge, true))
1247 edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1252 /* We use greedy algorithm for inlining of small functions:
1253 All inline candidates are put into prioritized heap ordered in
1254 increasing badness.
1256 The inlining of small functions is bounded by unit growth parameters. */
1258 static void
1259 inline_small_functions (void)
1261 struct cgraph_node *node;
1262 struct cgraph_edge *edge;
1263 fibheap_t heap = fibheap_new ();
1264 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1265 int min_size, max_size;
1266 VEC (cgraph_edge_p, heap) *new_indirect_edges = NULL;
1267 int initial_size = 0;
1269 if (flag_indirect_inlining)
1270 new_indirect_edges = VEC_alloc (cgraph_edge_p, heap, 8);
1272 if (dump_file)
1273 fprintf (dump_file,
1274 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1275 initial_size);
1277 /* Compute overall unit size and other global parameters used by badness
1278 metrics. */
1280 max_count = 0;
1281 initialize_growth_caches ();
1283 FOR_EACH_DEFINED_FUNCTION (node)
1284 if (!node->global.inlined_to)
1286 struct inline_summary *info = inline_summary (node);
1288 if (!DECL_EXTERNAL (node->decl))
1289 initial_size += info->size;
1291 for (edge = node->callers; edge; edge = edge->next_caller)
1292 if (max_count < edge->count)
1293 max_count = edge->count;
1296 overall_size = initial_size;
1297 max_size = compute_max_insns (overall_size);
1298 min_size = overall_size;
1300 /* Populate the heeap with all edges we might inline. */
1302 FOR_EACH_DEFINED_FUNCTION (node)
1303 if (!node->global.inlined_to)
1305 if (dump_file)
1306 fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
1307 cgraph_node_name (node), node->uid);
1309 for (edge = node->callers; edge; edge = edge->next_caller)
1310 if (edge->inline_failed
1311 && can_inline_edge_p (edge, true)
1312 && want_inline_small_function_p (edge, true)
1313 && edge->inline_failed)
1315 gcc_assert (!edge->aux);
1316 update_edge_key (heap, edge);
1320 gcc_assert (in_lto_p
1321 || !max_count
1322 || (profile_info && flag_branch_probabilities));
1324 while (!fibheap_empty (heap))
1326 int old_size = overall_size;
1327 struct cgraph_node *where, *callee;
1328 int badness = fibheap_min_key (heap);
1329 int current_badness;
1330 int growth;
1332 edge = (struct cgraph_edge *) fibheap_extract_min (heap);
1333 gcc_assert (edge->aux);
1334 edge->aux = NULL;
1335 if (!edge->inline_failed)
1336 continue;
1338 /* Be sure that caches are maintained consistent. */
1339 #ifdef ENABLE_CHECKING
1340 reset_edge_growth_cache (edge);
1341 reset_node_growth_cache (edge->callee);
1342 #endif
1344 /* When updating the edge costs, we only decrease badness in the keys.
1345 Increases of badness are handled lazilly; when we see key with out
1346 of date value on it, we re-insert it now. */
1347 current_badness = edge_badness (edge, false);
1348 gcc_assert (current_badness >= badness);
1349 if (current_badness != badness)
1351 edge->aux = fibheap_insert (heap, current_badness, edge);
1352 continue;
1355 if (!can_inline_edge_p (edge, true))
1356 continue;
1358 callee = edge->callee;
1359 growth = estimate_edge_growth (edge);
1360 if (dump_file)
1362 fprintf (dump_file,
1363 "\nConsidering %s with %i size\n",
1364 cgraph_node_name (edge->callee),
1365 inline_summary (edge->callee)->size);
1366 fprintf (dump_file,
1367 " to be inlined into %s in %s:%i\n"
1368 " Estimated growth after inlined into all is %+i insns.\n"
1369 " Estimated badness is %i, frequency %.2f.\n",
1370 cgraph_node_name (edge->caller),
1371 flag_wpa ? "unknown"
1372 : gimple_filename ((const_gimple) edge->call_stmt),
1373 flag_wpa ? -1
1374 : gimple_lineno ((const_gimple) edge->call_stmt),
1375 estimate_growth (edge->callee),
1376 badness,
1377 edge->frequency / (double)CGRAPH_FREQ_BASE);
1378 if (edge->count)
1379 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1380 edge->count);
1381 if (dump_flags & TDF_DETAILS)
1382 edge_badness (edge, true);
1385 if (overall_size + growth > max_size
1386 && !DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
1388 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1389 report_inline_failed_reason (edge);
1390 continue;
1393 if (!want_inline_small_function_p (edge, true))
1394 continue;
1396 /* Heuristics for inlining small functions works poorly for
1397 recursive calls where we do efect similar to loop unrolling.
1398 When inliing such edge seems profitable, leave decision on
1399 specific inliner. */
1400 if (cgraph_edge_recursive_p (edge))
1402 where = edge->caller;
1403 if (where->global.inlined_to)
1404 where = where->global.inlined_to;
1405 if (!recursive_inlining (edge,
1406 flag_indirect_inlining
1407 ? &new_indirect_edges : NULL))
1409 edge->inline_failed = CIF_RECURSIVE_INLINING;
1410 continue;
1412 reset_edge_caches (where);
1413 /* Recursive inliner inlines all recursive calls of the function
1414 at once. Consequently we need to update all callee keys. */
1415 if (flag_indirect_inlining)
1416 add_new_edges_to_heap (heap, new_indirect_edges);
1417 update_all_callee_keys (heap, where, updated_nodes);
1419 else
1421 struct cgraph_node *callee;
1422 struct cgraph_node *outer_node = NULL;
1423 int depth = 0;
1425 /* Consider the case where self recursive function A is inlined into B.
1426 This is desired optimization in some cases, since it leads to effect
1427 similar of loop peeling and we might completely optimize out the
1428 recursive call. However we must be extra selective. */
1430 where = edge->caller;
1431 while (where->global.inlined_to)
1433 if (where->decl == edge->callee->decl)
1434 outer_node = where, depth++;
1435 where = where->callers->caller;
1437 if (outer_node
1438 && !want_inline_self_recursive_call_p (edge, outer_node,
1439 true, depth))
1441 edge->inline_failed
1442 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1443 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1444 continue;
1446 else if (depth && dump_file)
1447 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1449 callee = edge->callee;
1450 gcc_checking_assert (!callee->global.inlined_to);
1451 inline_call (edge, true, &new_indirect_edges, &overall_size);
1452 if (flag_indirect_inlining)
1453 add_new_edges_to_heap (heap, new_indirect_edges);
1455 reset_edge_caches (edge->callee);
1456 reset_node_growth_cache (callee);
1458 /* We inlined last offline copy to the body. This might lead
1459 to callees of function having fewer call sites and thus they
1460 may need updating. */
1461 if (callee->global.inlined_to)
1462 update_all_callee_keys (heap, callee, updated_nodes);
1463 else
1464 update_callee_keys (heap, edge->callee, updated_nodes);
1466 where = edge->caller;
1467 if (where->global.inlined_to)
1468 where = where->global.inlined_to;
1470 /* Our profitability metric can depend on local properties
1471 such as number of inlinable calls and size of the function body.
1472 After inlining these properties might change for the function we
1473 inlined into (since it's body size changed) and for the functions
1474 called by function we inlined (since number of it inlinable callers
1475 might change). */
1476 update_caller_keys (heap, where, updated_nodes, NULL);
1478 /* We removed one call of the function we just inlined. If offline
1479 copy is still needed, be sure to update the keys. */
1480 if (callee != where && !callee->global.inlined_to)
1481 update_caller_keys (heap, callee, updated_nodes, NULL);
1482 bitmap_clear (updated_nodes);
1484 if (dump_file)
1486 fprintf (dump_file,
1487 " Inlined into %s which now has time %i and size %i,"
1488 "net change of %+i.\n",
1489 cgraph_node_name (edge->caller),
1490 inline_summary (edge->caller)->time,
1491 inline_summary (edge->caller)->size,
1492 overall_size - old_size);
1494 if (min_size > overall_size)
1496 min_size = overall_size;
1497 max_size = compute_max_insns (min_size);
1499 if (dump_file)
1500 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1504 free_growth_caches ();
1505 if (new_indirect_edges)
1506 VEC_free (cgraph_edge_p, heap, new_indirect_edges);
1507 fibheap_delete (heap);
1508 if (dump_file)
1509 fprintf (dump_file,
1510 "Unit growth for small function inlining: %i->%i (%i%%)\n",
1511 initial_size, overall_size,
1512 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1513 BITMAP_FREE (updated_nodes);
1516 /* Flatten NODE. Performed both during early inlining and
1517 at IPA inlining time. */
1519 static void
1520 flatten_function (struct cgraph_node *node, bool early)
1522 struct cgraph_edge *e;
1524 /* We shouldn't be called recursively when we are being processed. */
1525 gcc_assert (node->aux == NULL);
1527 node->aux = (void *) node;
1529 for (e = node->callees; e; e = e->next_callee)
1531 struct cgraph_node *orig_callee;
1533 /* We've hit cycle? It is time to give up. */
1534 if (e->callee->aux)
1536 if (dump_file)
1537 fprintf (dump_file,
1538 "Not inlining %s into %s to avoid cycle.\n",
1539 cgraph_node_name (e->callee),
1540 cgraph_node_name (e->caller));
1541 e->inline_failed = CIF_RECURSIVE_INLINING;
1542 continue;
1545 /* When the edge is already inlined, we just need to recurse into
1546 it in order to fully flatten the leaves. */
1547 if (!e->inline_failed)
1549 flatten_function (e->callee, early);
1550 continue;
1553 /* Flatten attribute needs to be processed during late inlining. For
1554 extra code quality we however do flattening during early optimization,
1555 too. */
1556 if (!early
1557 ? !can_inline_edge_p (e, true)
1558 : !can_early_inline_edge_p (e))
1559 continue;
1561 if (cgraph_edge_recursive_p (e))
1563 if (dump_file)
1564 fprintf (dump_file, "Not inlining: recursive call.\n");
1565 continue;
1568 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1569 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1571 if (dump_file)
1572 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1573 continue;
1576 /* Inline the edge and flatten the inline clone. Avoid
1577 recursing through the original node if the node was cloned. */
1578 if (dump_file)
1579 fprintf (dump_file, " Inlining %s into %s.\n",
1580 cgraph_node_name (e->callee),
1581 cgraph_node_name (e->caller));
1582 orig_callee = e->callee;
1583 inline_call (e, true, NULL, NULL);
1584 if (e->callee != orig_callee)
1585 orig_callee->aux = (void *) node;
1586 flatten_function (e->callee, early);
1587 if (e->callee != orig_callee)
1588 orig_callee->aux = NULL;
1591 node->aux = NULL;
1594 /* Decide on the inlining. We do so in the topological order to avoid
1595 expenses on updating data structures. */
1597 static unsigned int
1598 ipa_inline (void)
1600 struct cgraph_node *node;
1601 int nnodes;
1602 struct cgraph_node **order =
1603 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1604 int i;
1606 if (in_lto_p && flag_indirect_inlining)
1607 ipa_update_after_lto_read ();
1608 if (flag_indirect_inlining)
1609 ipa_create_all_structures_for_iinln ();
1611 if (dump_file)
1612 dump_inline_summaries (dump_file);
1614 nnodes = ipa_reverse_postorder (order);
1616 for (node = cgraph_nodes; node; node = node->next)
1617 node->aux = 0;
1619 if (dump_file)
1620 fprintf (dump_file, "\nFlattening functions:\n");
1622 /* In the first pass handle functions to be flattened. Do this with
1623 a priority so none of our later choices will make this impossible. */
1624 for (i = nnodes - 1; i >= 0; i--)
1626 node = order[i];
1628 /* Handle nodes to be flattened.
1629 Ideally when processing callees we stop inlining at the
1630 entry of cycles, possibly cloning that entry point and
1631 try to flatten itself turning it into a self-recursive
1632 function. */
1633 if (lookup_attribute ("flatten",
1634 DECL_ATTRIBUTES (node->decl)) != NULL)
1636 if (dump_file)
1637 fprintf (dump_file,
1638 "Flattening %s\n", cgraph_node_name (node));
1639 flatten_function (node, false);
1643 inline_small_functions ();
1644 cgraph_remove_unreachable_nodes (true, dump_file);
1645 free (order);
1647 /* We already perform some inlining of functions called once during
1648 inlining small functions above. After unreachable nodes are removed,
1649 we still might do a quick check that nothing new is found. */
1650 if (flag_inline_functions_called_once)
1652 int cold;
1653 if (dump_file)
1654 fprintf (dump_file, "\nDeciding on functions called once:\n");
1656 /* Inlining one function called once has good chance of preventing
1657 inlining other function into the same callee. Ideally we should
1658 work in priority order, but probably inlining hot functions first
1659 is good cut without the extra pain of maintaining the queue.
1661 ??? this is not really fitting the bill perfectly: inlining function
1662 into callee often leads to better optimization of callee due to
1663 increased context for optimization.
1664 For example if main() function calls a function that outputs help
1665 and then function that does the main optmization, we should inline
1666 the second with priority even if both calls are cold by themselves.
1668 We probably want to implement new predicate replacing our use of
1669 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
1670 to be hot. */
1671 for (cold = 0; cold <= 1; cold ++)
1673 for (node = cgraph_nodes; node; node = node->next)
1675 if (want_inline_function_called_once_p (node)
1676 && (cold
1677 || cgraph_maybe_hot_edge_p (node->callers)))
1679 struct cgraph_node *caller = node->callers->caller;
1681 if (dump_file)
1683 fprintf (dump_file,
1684 "\nInlining %s size %i.\n",
1685 cgraph_node_name (node), inline_summary (node)->size);
1686 fprintf (dump_file,
1687 " Called once from %s %i insns.\n",
1688 cgraph_node_name (node->callers->caller),
1689 inline_summary (node->callers->caller)->size);
1692 inline_call (node->callers, true, NULL, NULL);
1693 if (dump_file)
1694 fprintf (dump_file,
1695 " Inlined into %s which now has %i size\n",
1696 cgraph_node_name (caller),
1697 inline_summary (caller)->size);
1703 /* Free ipa-prop structures if they are no longer needed. */
1704 if (flag_indirect_inlining)
1705 ipa_free_all_structures_after_iinln ();
1707 if (dump_file)
1708 fprintf (dump_file,
1709 "\nInlined %i calls, eliminated %i functions\n\n",
1710 ncalls_inlined, nfunctions_inlined);
1712 if (dump_file)
1713 dump_inline_summaries (dump_file);
1714 /* In WPA we use inline summaries for partitioning process. */
1715 if (!flag_wpa)
1716 inline_free_summary ();
1717 return 0;
1720 /* Inline always-inline function calls in NODE. */
1722 static bool
1723 inline_always_inline_functions (struct cgraph_node *node)
1725 struct cgraph_edge *e;
1726 bool inlined = false;
1728 for (e = node->callees; e; e = e->next_callee)
1730 if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
1731 continue;
1733 if (cgraph_edge_recursive_p (e))
1735 if (dump_file)
1736 fprintf (dump_file, " Not inlining recursive call to %s.\n",
1737 cgraph_node_name (e->callee));
1738 e->inline_failed = CIF_RECURSIVE_INLINING;
1739 continue;
1742 if (!can_early_inline_edge_p (e))
1743 continue;
1745 if (dump_file)
1746 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
1747 cgraph_node_name (e->callee),
1748 cgraph_node_name (e->caller));
1749 inline_call (e, true, NULL, NULL);
1750 inlined = true;
1753 return inlined;
1756 /* Decide on the inlining. We do so in the topological order to avoid
1757 expenses on updating data structures. */
1759 static bool
1760 early_inline_small_functions (struct cgraph_node *node)
1762 struct cgraph_edge *e;
1763 bool inlined = false;
1765 for (e = node->callees; e; e = e->next_callee)
1767 if (!inline_summary (e->callee)->inlinable
1768 || !e->inline_failed)
1769 continue;
1771 /* Do not consider functions not declared inline. */
1772 if (!DECL_DECLARED_INLINE_P (e->callee->decl)
1773 && !flag_inline_small_functions
1774 && !flag_inline_functions)
1775 continue;
1777 if (dump_file)
1778 fprintf (dump_file, "Considering inline candidate %s.\n",
1779 cgraph_node_name (e->callee));
1781 if (!can_early_inline_edge_p (e))
1782 continue;
1784 if (cgraph_edge_recursive_p (e))
1786 if (dump_file)
1787 fprintf (dump_file, " Not inlining: recursive call.\n");
1788 continue;
1791 if (!want_early_inline_function_p (e))
1792 continue;
1794 if (dump_file)
1795 fprintf (dump_file, " Inlining %s into %s.\n",
1796 cgraph_node_name (e->callee),
1797 cgraph_node_name (e->caller));
1798 inline_call (e, true, NULL, NULL);
1799 inlined = true;
1802 return inlined;
1805 /* Do inlining of small functions. Doing so early helps profiling and other
1806 passes to be somewhat more effective and avoids some code duplication in
1807 later real inlining pass for testcases with very many function calls. */
1808 static unsigned int
1809 early_inliner (void)
1811 struct cgraph_node *node = cgraph_get_node (current_function_decl);
1812 struct cgraph_edge *edge;
1813 unsigned int todo = 0;
1814 int iterations = 0;
1815 bool inlined = false;
1817 if (seen_error ())
1818 return 0;
1820 /* Do nothing if datastructures for ipa-inliner are already computed. This
1821 happens when some pass decides to construct new function and
1822 cgraph_add_new_function calls lowering passes and early optimization on
1823 it. This may confuse ourself when early inliner decide to inline call to
1824 function clone, because function clones don't have parameter list in
1825 ipa-prop matching their signature. */
1826 if (ipa_node_params_vector)
1827 return 0;
1829 #ifdef ENABLE_CHECKING
1830 verify_cgraph_node (node);
1831 #endif
1833 /* Even when not optimizing or not inlining inline always-inline
1834 functions. */
1835 inlined = inline_always_inline_functions (node);
1837 if (!optimize
1838 || flag_no_inline
1839 || !flag_early_inlining
1840 /* Never inline regular functions into always-inline functions
1841 during incremental inlining. This sucks as functions calling
1842 always inline functions will get less optimized, but at the
1843 same time inlining of functions calling always inline
1844 function into an always inline function might introduce
1845 cycles of edges to be always inlined in the callgraph.
1847 We might want to be smarter and just avoid this type of inlining. */
1848 || DECL_DISREGARD_INLINE_LIMITS (node->decl))
1850 else if (lookup_attribute ("flatten",
1851 DECL_ATTRIBUTES (node->decl)) != NULL)
1853 /* When the function is marked to be flattened, recursively inline
1854 all calls in it. */
1855 if (dump_file)
1856 fprintf (dump_file,
1857 "Flattening %s\n", cgraph_node_name (node));
1858 flatten_function (node, true);
1859 inlined = true;
1861 else
1863 /* We iterate incremental inlining to get trivial cases of indirect
1864 inlining. */
1865 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
1866 && early_inline_small_functions (node))
1868 timevar_push (TV_INTEGRATION);
1869 todo |= optimize_inline_calls (current_function_decl);
1871 /* Technically we ought to recompute inline parameters so the new
1872 iteration of early inliner works as expected. We however have
1873 values approximately right and thus we only need to update edge
1874 info that might be cleared out for newly discovered edges. */
1875 for (edge = node->callees; edge; edge = edge->next_callee)
1877 struct inline_edge_summary *es = inline_edge_summary (edge);
1878 es->call_stmt_size
1879 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
1880 es->call_stmt_time
1881 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
1883 timevar_pop (TV_INTEGRATION);
1884 iterations++;
1885 inlined = false;
1887 if (dump_file)
1888 fprintf (dump_file, "Iterations: %i\n", iterations);
1891 if (inlined)
1893 timevar_push (TV_INTEGRATION);
1894 todo |= optimize_inline_calls (current_function_decl);
1895 timevar_pop (TV_INTEGRATION);
1898 cfun->always_inline_functions_inlined = true;
1900 return todo;
1903 struct gimple_opt_pass pass_early_inline =
1906 GIMPLE_PASS,
1907 "einline", /* name */
1908 NULL, /* gate */
1909 early_inliner, /* execute */
1910 NULL, /* sub */
1911 NULL, /* next */
1912 0, /* static_pass_number */
1913 TV_INLINE_HEURISTICS, /* tv_id */
1914 PROP_ssa, /* properties_required */
1915 0, /* properties_provided */
1916 0, /* properties_destroyed */
1917 0, /* todo_flags_start */
1918 TODO_dump_func /* todo_flags_finish */
1923 /* When to run IPA inlining. Inlining of always-inline functions
1924 happens during early inlining. */
1926 static bool
1927 gate_ipa_inline (void)
1929 /* ??? We'd like to skip this if not optimizing or not inlining as
1930 all always-inline functions have been processed by early
1931 inlining already. But this at least breaks EH with C++ as
1932 we need to unconditionally run fixup_cfg even at -O0.
1933 So leave it on unconditionally for now. */
1934 return 1;
1937 struct ipa_opt_pass_d pass_ipa_inline =
1940 IPA_PASS,
1941 "inline", /* name */
1942 gate_ipa_inline, /* gate */
1943 ipa_inline, /* execute */
1944 NULL, /* sub */
1945 NULL, /* next */
1946 0, /* static_pass_number */
1947 TV_INLINE_HEURISTICS, /* tv_id */
1948 0, /* properties_required */
1949 0, /* properties_provided */
1950 0, /* properties_destroyed */
1951 TODO_remove_functions, /* todo_flags_finish */
1952 TODO_dump_cgraph | TODO_dump_func
1953 | TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */
1955 inline_generate_summary, /* generate_summary */
1956 inline_write_summary, /* write_summary */
1957 inline_read_summary, /* read_summary */
1958 NULL, /* write_optimization_summary */
1959 NULL, /* read_optimization_summary */
1960 NULL, /* stmt_fixup */
1961 0, /* TODOs */
1962 inline_transform, /* function_transform */
1963 NULL, /* variable_transform */