* pph-streamer.h (pph_output_tree_or_ref_1): New.
[official-gcc.git] / gcc / ipa-inline.c
blob23a72c730b7fafb288992b13befd8ee0b0efaa97
1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Inlining decision heuristics
24 The implementation of inliner is organized as follows:
26 Transformation of callgraph to represent inlining decisions.
28 The inline decisions are stored in callgraph in "inline plan" and
29 all applied later.
31 To mark given call inline, use cgraph_mark_inline function.
32 The function marks the edge inlinable and, if necessary, produces
33 virtual clone in the callgraph representing the new copy of callee's
34 function body.
36 The inline plan is applied on given function body by inline_transform.
38 inlining heuristics limits
40 can_inline_edge_p allow to check that particular inlining is allowed
41 by the limits specified by user (allowed function growth, growth and so
42 on).
44 Functions are inlined when it is obvious the result is profitable (such
45 as functions called once or when inlining reduce code size).
46 In addition to that we perform inlining of small functions and recursive
47 inlining.
49 inlining heuristics
51 The inliner itself is split into two passes:
53 pass_early_inlining
55 Simple local inlining pass inlining callees into current function.
56 This pass makes no use of whole unit analysis and thus it can do only
57 very simple decisions based on local properties.
59 The strength of the pass is that it is run in topological order
60 (reverse postorder) on the callgraph. Functions are converted into SSA
61 form just before this pass and optimized subsequently. As a result, the
62 callees of the function seen by the early inliner was already optimized
63 and results of early inlining adds a lot of optimization opportunities
64 for the local optimization.
66 The pass handle the obvious inlining decisions within the compilation
67 unit - inlining auto inline functions, inlining for size and
68 flattening.
70 main strength of the pass is the ability to eliminate abstraction
71 penalty in C++ code (via combination of inlining and early
72 optimization) and thus improve quality of analysis done by real IPA
73 optimizers.
75 Because of lack of whole unit knowledge, the pass can not really make
76 good code size/performance tradeoffs. It however does very simple
77 speculative inlining allowing code size to grow by
78 EARLY_INLINING_INSNS when callee is leaf function. In this case the
79 optimizations performed later are very likely to eliminate the cost.
81 pass_ipa_inline
83 This is the real inliner able to handle inlining with whole program
84 knowledge. It performs following steps:
86 1) inlining of small functions. This is implemented by greedy
87 algorithm ordering all inlinable cgraph edges by their badness and
88 inlining them in this order as long as inline limits allows doing so.
90 This heuristics is not very good on inlining recursive calls. Recursive
91 calls can be inlined with results similar to loop unrolling. To do so,
92 special purpose recursive inliner is executed on function when
93 recursive edge is met as viable candidate.
95 2) Unreachable functions are removed from callgraph. Inlining leads
96 to devirtualization and other modification of callgraph so functions
97 may become unreachable during the process. Also functions declared as
98 extern inline or virtual functions are removed, since after inlining
99 we no longer need the offline bodies.
101 3) Functions called once and not exported from the unit are inlined.
102 This should almost always lead to reduction of code size by eliminating
103 the need for offline copy of the function. */
105 #include "config.h"
106 #include "system.h"
107 #include "coretypes.h"
108 #include "tm.h"
109 #include "tree.h"
110 #include "tree-inline.h"
111 #include "langhooks.h"
112 #include "flags.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "gimple-pretty-print.h"
116 #include "timevar.h"
117 #include "params.h"
118 #include "fibheap.h"
119 #include "intl.h"
120 #include "tree-pass.h"
121 #include "coverage.h"
122 #include "ggc.h"
123 #include "rtl.h"
124 #include "tree-flow.h"
125 #include "ipa-prop.h"
126 #include "except.h"
127 #include "target.h"
128 #include "ipa-inline.h"
130 /* Statistics we collect about inlining algorithm. */
131 static int ncalls_inlined;
132 static int nfunctions_inlined;
133 static int overall_size;
134 static gcov_type max_count, max_benefit;
136 /* Scale frequency of NODE edges by FREQ_SCALE and increase loop nest
137 by NEST. */
139 static void
140 update_noncloned_frequencies (struct cgraph_node *node,
141 int freq_scale, int nest)
143 struct cgraph_edge *e;
145 /* We do not want to ignore high loop nest after freq drops to 0. */
146 if (!freq_scale)
147 freq_scale = 1;
148 for (e = node->callees; e; e = e->next_callee)
150 e->loop_nest += nest;
151 e->frequency = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
152 if (e->frequency > CGRAPH_FREQ_MAX)
153 e->frequency = CGRAPH_FREQ_MAX;
154 if (!e->inline_failed)
155 update_noncloned_frequencies (e->callee, freq_scale, nest);
159 /* E is expected to be an edge being inlined. Clone destination node of
160 the edge and redirect it to the new clone.
161 DUPLICATE is used for bookkeeping on whether we are actually creating new
162 clones or re-using node originally representing out-of-line function call.
164 void
165 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
166 bool update_original)
168 HOST_WIDE_INT peak;
169 struct inline_summary *caller_info, *callee_info;
171 if (duplicate)
173 /* We may eliminate the need for out-of-line copy to be output.
174 In that case just go ahead and re-use it. */
175 if (!e->callee->callers->next_caller
176 /* Recursive inlining never wants the master clone to
177 be overwritten. */
178 && update_original
179 /* FIXME: When address is taken of DECL_EXTERNAL function we still
180 can remove its offline copy, but we would need to keep unanalyzed
181 node in the callgraph so references can point to it. */
182 && !e->callee->address_taken
183 && cgraph_can_remove_if_no_direct_calls_p (e->callee)
184 /* Inlining might enable more devirtualizing, so we want to remove
185 those only after all devirtualizable virtual calls are processed.
186 Lacking may edges in callgraph we just preserve them post
187 inlining. */
188 && (!DECL_VIRTUAL_P (e->callee->decl)
189 || (!DECL_COMDAT (e->callee->decl)
190 && !DECL_EXTERNAL (e->callee->decl)))
191 /* Don't reuse if more than one function shares a comdat group.
192 If the other function(s) are needed, we need to emit even
193 this function out of line. */
194 && !e->callee->same_comdat_group
195 && !cgraph_new_nodes)
197 gcc_assert (!e->callee->global.inlined_to);
198 if (e->callee->analyzed && !DECL_EXTERNAL (e->callee->decl))
200 overall_size -= inline_summary (e->callee)->size;
201 nfunctions_inlined++;
203 duplicate = false;
204 e->callee->local.externally_visible = false;
205 update_noncloned_frequencies (e->callee, e->frequency, e->loop_nest);
207 else
209 struct cgraph_node *n;
210 n = cgraph_clone_node (e->callee, e->callee->decl,
211 e->count, e->frequency, e->loop_nest,
212 update_original, NULL);
213 cgraph_redirect_edge_callee (e, n);
217 callee_info = inline_summary (e->callee);
218 caller_info = inline_summary (e->caller);
220 if (e->caller->global.inlined_to)
221 e->callee->global.inlined_to = e->caller->global.inlined_to;
222 else
223 e->callee->global.inlined_to = e->caller;
224 callee_info->stack_frame_offset
225 = caller_info->stack_frame_offset
226 + caller_info->estimated_self_stack_size;
227 peak = callee_info->stack_frame_offset
228 + callee_info->estimated_self_stack_size;
229 if (inline_summary (e->callee->global.inlined_to)->estimated_stack_size
230 < peak)
231 inline_summary (e->callee->global.inlined_to)->estimated_stack_size = peak;
232 cgraph_propagate_frequency (e->callee);
234 /* Recursively clone all bodies. */
235 for (e = e->callee->callees; e; e = e->next_callee)
236 if (!e->inline_failed)
237 cgraph_clone_inlined_nodes (e, duplicate, update_original);
240 /* Mark edge E as inlined and update callgraph accordingly. UPDATE_ORIGINAL
241 specify whether profile of original function should be updated. If any new
242 indirect edges are discovered in the process, add them to NEW_EDGES, unless
243 it is NULL. Return true iff any new callgraph edges were discovered as a
244 result of inlining. */
246 static bool
247 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original,
248 VEC (cgraph_edge_p, heap) **new_edges)
250 int old_size = 0, new_size = 0;
251 struct cgraph_node *to = NULL;
252 struct cgraph_edge *curr = e;
253 struct inline_summary *info;
255 /* Don't inline inlined edges. */
256 gcc_assert (e->inline_failed);
257 /* Don't even think of inlining inline clone. */
258 gcc_assert (!e->callee->global.inlined_to);
260 e->inline_failed = CIF_OK;
261 DECL_POSSIBLY_INLINED (e->callee->decl) = true;
263 cgraph_clone_inlined_nodes (e, true, update_original);
265 /* Now update size of caller and all functions caller is inlined into. */
266 for (;e && !e->inline_failed; e = e->caller->callers)
268 to = e->caller;
269 info = inline_summary (to);
270 old_size = info->size;
271 new_size = estimate_size_after_inlining (to, curr);
272 info->size = new_size;
273 info->time = estimate_time_after_inlining (to, curr);
275 gcc_assert (curr->callee->global.inlined_to == to);
276 if (new_size > old_size)
277 overall_size += new_size - old_size;
278 ncalls_inlined++;
280 /* FIXME: We should remove the optimize check after we ensure we never run
281 IPA passes when not optimizing. */
282 if (flag_indirect_inlining && optimize)
283 return ipa_propagate_indirect_call_infos (curr, new_edges);
284 else
285 return false;
288 /* Return false when inlining edge E would lead to violating
289 limits on function unit growth or stack usage growth.
291 The relative function body growth limit is present generally
292 to avoid problems with non-linear behavior of the compiler.
293 To allow inlining huge functions into tiny wrapper, the limit
294 is always based on the bigger of the two functions considered.
296 For stack growth limits we always base the growth in stack usage
297 of the callers. We want to prevent applications from segfaulting
298 on stack overflow when functions with huge stack frames gets
299 inlined. */
301 static bool
302 caller_growth_limits (struct cgraph_edge *e)
304 struct cgraph_node *to = e->caller;
305 struct cgraph_node *what = e->callee;
306 int newsize;
307 int limit = 0;
308 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
309 struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
311 /* Look for function e->caller is inlined to. While doing
312 so work out the largest function body on the way. As
313 described above, we want to base our function growth
314 limits based on that. Not on the self size of the
315 outer function, not on the self size of inline code
316 we immediately inline to. This is the most relaxed
317 interpretation of the rule "do not grow large functions
318 too much in order to prevent compiler from exploding". */
321 info = inline_summary (to);
322 if (limit < info->self_size)
323 limit = info->self_size;
324 if (stack_size_limit < info->estimated_self_stack_size)
325 stack_size_limit = info->estimated_self_stack_size;
326 if (to->global.inlined_to)
327 to = to->callers->caller;
329 while (to->global.inlined_to);
331 what_info = inline_summary (what);
333 if (limit < what_info->self_size)
334 limit = what_info->self_size;
336 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
338 /* Check the size after inlining against the function limits. But allow
339 the function to shrink if it went over the limits by forced inlining. */
340 newsize = estimate_size_after_inlining (to, e);
341 if (newsize >= info->size
342 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
343 && newsize > limit)
345 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
346 return false;
349 /* FIXME: Stack size limit often prevents inlining in Fortran programs
350 due to large i/o datastructures used by the Fortran front-end.
351 We ought to ignore this limit when we know that the edge is executed
352 on every invocation of the caller (i.e. its call statement dominates
353 exit block). We do not track this information, yet. */
354 stack_size_limit += (stack_size_limit
355 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
357 inlined_stack = (outer_info->stack_frame_offset
358 + outer_info->estimated_self_stack_size
359 + what_info->estimated_stack_size);
360 /* Check new stack consumption with stack consumption at the place
361 stack is used. */
362 if (inlined_stack > stack_size_limit
363 /* If function already has large stack usage from sibling
364 inline call, we can inline, too.
365 This bit overoptimistically assume that we are good at stack
366 packing. */
367 && inlined_stack > info->estimated_stack_size
368 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
370 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
371 return false;
373 return true;
376 /* Dump info about why inlining has failed. */
378 static void
379 report_inline_failed_reason (struct cgraph_edge *e)
381 if (dump_file)
383 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
384 cgraph_node_name (e->caller), e->caller->uid,
385 cgraph_node_name (e->callee), e->callee->uid,
386 cgraph_inline_failed_string (e->inline_failed));
390 /* Decide if we can inline the edge and possibly update
391 inline_failed reason.
392 We check whether inlining is possible at all and whether
393 caller growth limits allow doing so.
395 if REPORT is true, output reason to the dump file. */
397 static bool
398 can_inline_edge_p (struct cgraph_edge *e, bool report)
400 bool inlinable = true;
401 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
402 tree callee_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->callee->decl);
404 gcc_assert (e->inline_failed);
406 if (!e->callee->analyzed)
408 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
409 inlinable = false;
411 else if (!inline_summary (e->callee)->inlinable)
413 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
414 inlinable = false;
416 else if (cgraph_function_body_availability (e->callee) <= AVAIL_OVERWRITABLE)
418 e->inline_failed = CIF_OVERWRITABLE;
419 return false;
421 else if (e->call_stmt_cannot_inline_p)
423 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
424 inlinable = false;
426 /* Don't inline if the functions have different EH personalities. */
427 else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
428 && DECL_FUNCTION_PERSONALITY (e->callee->decl)
429 && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
430 != DECL_FUNCTION_PERSONALITY (e->callee->decl)))
432 e->inline_failed = CIF_EH_PERSONALITY;
433 inlinable = false;
435 /* Don't inline if the callee can throw non-call exceptions but the
436 caller cannot.
437 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
438 Move the flag into cgraph node or mirror it in the inline summary. */
439 else if (DECL_STRUCT_FUNCTION (e->callee->decl)
440 && DECL_STRUCT_FUNCTION (e->callee->decl)->can_throw_non_call_exceptions
441 && !(DECL_STRUCT_FUNCTION (e->caller->decl)
442 && DECL_STRUCT_FUNCTION (e->caller->decl)->can_throw_non_call_exceptions))
444 e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
445 inlinable = false;
447 /* Check compatibility of target optimization options. */
448 else if (!targetm.target_option.can_inline_p (e->caller->decl,
449 e->callee->decl))
451 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
452 inlinable = false;
454 /* Check if caller growth allows the inlining. */
455 else if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl)
456 && !caller_growth_limits (e))
457 inlinable = false;
458 /* Don't inline a function with a higher optimization level than the
459 caller. FIXME: this is really just tip of iceberg of handling
460 optimization attribute. */
461 else if (caller_tree != callee_tree)
463 struct cl_optimization *caller_opt
464 = TREE_OPTIMIZATION ((caller_tree)
465 ? caller_tree
466 : optimization_default_node);
468 struct cl_optimization *callee_opt
469 = TREE_OPTIMIZATION ((callee_tree)
470 ? callee_tree
471 : optimization_default_node);
473 if ((caller_opt->x_optimize > callee_opt->x_optimize)
474 || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
476 e->inline_failed = CIF_TARGET_OPTIMIZATION_MISMATCH;
477 inlinable = false;
481 /* Be sure that the cannot_inline_p flag is up to date. */
482 gcc_checking_assert (!e->call_stmt
483 || (gimple_call_cannot_inline_p (e->call_stmt)
484 == e->call_stmt_cannot_inline_p)
485 /* In -flto-partition=none mode we really keep things out of
486 sync because call_stmt_cannot_inline_p is set at cgraph
487 merging when function bodies are not there yet. */
488 || (in_lto_p && !gimple_call_cannot_inline_p (e->call_stmt)));
489 if (!inlinable && report)
490 report_inline_failed_reason (e);
491 return inlinable;
495 /* Return true if the edge E is inlinable during early inlining. */
497 static bool
498 can_early_inline_edge_p (struct cgraph_edge *e)
500 /* Early inliner might get called at WPA stage when IPA pass adds new
501 function. In this case we can not really do any of early inlining
502 because function bodies are missing. */
503 if (!gimple_has_body_p (e->callee->decl))
505 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
506 return false;
508 /* In early inliner some of callees may not be in SSA form yet
509 (i.e. the callgraph is cyclic and we did not process
510 the callee by early inliner, yet). We don't have CIF code for this
511 case; later we will re-do the decision in the real inliner. */
512 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
513 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
515 if (dump_file)
516 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
517 return false;
519 if (!can_inline_edge_p (e, true))
520 return false;
521 return true;
525 /* Return true when N is leaf function. Accept cheap builtins
526 in leaf functions. */
528 static bool
529 leaf_node_p (struct cgraph_node *n)
531 struct cgraph_edge *e;
532 for (e = n->callees; e; e = e->next_callee)
533 if (!is_inexpensive_builtin (e->callee->decl))
534 return false;
535 return true;
539 /* Return true if we are interested in inlining small function. */
541 static bool
542 want_early_inline_function_p (struct cgraph_edge *e)
544 bool want_inline = true;
546 if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
548 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
549 && !flag_inline_small_functions)
551 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
552 report_inline_failed_reason (e);
553 want_inline = false;
555 else
557 int growth = estimate_edge_growth (e);
558 if (growth <= 0)
560 else if (!cgraph_maybe_hot_edge_p (e)
561 && growth > 0)
563 if (dump_file)
564 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
565 "call is cold and code would grow by %i\n",
566 cgraph_node_name (e->caller), e->caller->uid,
567 cgraph_node_name (e->callee), e->callee->uid,
568 growth);
569 want_inline = false;
571 else if (!leaf_node_p (e->callee)
572 && growth > 0)
574 if (dump_file)
575 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
576 "callee is not leaf and code would grow by %i\n",
577 cgraph_node_name (e->caller), e->caller->uid,
578 cgraph_node_name (e->callee), e->callee->uid,
579 growth);
580 want_inline = false;
582 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
584 if (dump_file)
585 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
586 "growth %i exceeds --param early-inlining-insns\n",
587 cgraph_node_name (e->caller), e->caller->uid,
588 cgraph_node_name (e->callee), e->callee->uid,
589 growth);
590 want_inline = false;
593 return want_inline;
596 /* Return true if we are interested in inlining small function.
597 When REPORT is true, report reason to dump file. */
599 static bool
600 want_inline_small_function_p (struct cgraph_edge *e, bool report)
602 bool want_inline = true;
604 if (DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
606 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
607 && !flag_inline_small_functions)
609 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
610 want_inline = false;
612 else
614 int growth = estimate_edge_growth (e);
616 if (growth <= 0)
618 else if (DECL_DECLARED_INLINE_P (e->callee->decl)
619 && growth >= MAX_INLINE_INSNS_SINGLE)
621 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
622 want_inline = false;
624 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
625 && !flag_inline_functions)
627 e->inline_failed = CIF_NOT_DECLARED_INLINED;
628 want_inline = false;
630 else if (!DECL_DECLARED_INLINE_P (e->callee->decl)
631 && growth >= MAX_INLINE_INSNS_AUTO)
633 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
634 want_inline = false;
636 else if (!cgraph_maybe_hot_edge_p (e)
637 && estimate_growth (e->callee) > 0)
639 e->inline_failed = CIF_UNLIKELY_CALL;
640 want_inline = false;
643 if (!want_inline && report)
644 report_inline_failed_reason (e);
645 return want_inline;
648 /* EDGE is self recursive edge.
649 We hand two cases - when function A is inlining into itself
650 or when function A is being inlined into another inliner copy of function
651 A within function B.
653 In first case OUTER_NODE points to the toplevel copy of A, while
654 in the second case OUTER_NODE points to the outermost copy of A in B.
656 In both cases we want to be extra selective since
657 inlining the call will just introduce new recursive calls to appear. */
659 static bool
660 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
661 struct cgraph_node *outer_node,
662 bool peeling,
663 int depth)
665 char const *reason = NULL;
666 bool want_inline = true;
667 int caller_freq = CGRAPH_FREQ_BASE;
668 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
670 if (DECL_DECLARED_INLINE_P (edge->callee->decl))
671 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
673 if (!cgraph_maybe_hot_edge_p (edge))
675 reason = "recursive call is cold";
676 want_inline = false;
678 else if (max_count && !outer_node->count)
680 reason = "not executed in profile";
681 want_inline = false;
683 else if (depth > max_depth)
685 reason = "--param max-inline-recursive-depth exceeded.";
686 want_inline = false;
689 if (outer_node->global.inlined_to)
690 caller_freq = outer_node->callers->frequency;
692 if (!want_inline)
694 /* Inlining of self recursive function into copy of itself within other function
695 is transformation similar to loop peeling.
697 Peeling is profitable if we can inline enough copies to make probability
698 of actual call to the self recursive function very small. Be sure that
699 the probability of recursion is small.
701 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
702 This way the expected number of recision is at most max_depth. */
703 else if (peeling)
705 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
706 / max_depth);
707 int i;
708 for (i = 1; i < depth; i++)
709 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
710 if (max_count
711 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
712 >= max_prob))
714 reason = "profile of recursive call is too large";
715 want_inline = false;
717 if (!max_count
718 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
719 >= max_prob))
721 reason = "frequency of recursive call is too large";
722 want_inline = false;
725 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
726 depth is large. We reduce function call overhead and increase chances that
727 things fit in hardware return predictor.
729 Recursive inlining might however increase cost of stack frame setup
730 actually slowing down functions whose recursion tree is wide rather than
731 deep.
733 Deciding reliably on when to do recursive inlining without profile feedback
734 is tricky. For now we disable recursive inlining when probability of self
735 recursion is low.
737 Recursive inlining of self recursive call within loop also results in large loop
738 depths that generally optimize badly. We may want to throttle down inlining
739 in those cases. In particular this seems to happen in one of libstdc++ rb tree
740 methods. */
741 else
743 if (max_count
744 && (edge->count * 100 / outer_node->count
745 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
747 reason = "profile of recursive call is too small";
748 want_inline = false;
750 else if (!max_count
751 && (edge->frequency * 100 / caller_freq
752 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
754 reason = "frequency of recursive call is too small";
755 want_inline = false;
758 if (!want_inline && dump_file)
759 fprintf (dump_file, " not inlining recursively: %s\n", reason);
760 return want_inline;
764 /* Decide if NODE is called once inlining it would eliminate need
765 for the offline copy of function. */
767 static bool
768 want_inline_function_called_once_p (struct cgraph_node *node)
770 /* Already inlined? */
771 if (node->global.inlined_to)
772 return false;
773 /* Zero or more then one callers? */
774 if (!node->callers
775 || node->callers->next_caller)
776 return false;
777 /* Recursive call makes no sense to inline. */
778 if (node->callers->caller == node)
779 return false;
780 /* External functions are not really in the unit, so inlining
781 them when called once would just increase the program size. */
782 if (DECL_EXTERNAL (node->decl))
783 return false;
784 /* Offline body must be optimized out. */
785 if (!cgraph_will_be_removed_from_program_if_no_direct_calls (node))
786 return false;
787 if (!can_inline_edge_p (node->callers, true))
788 return false;
789 return true;
792 /* A cost model driving the inlining heuristics in a way so the edges with
793 smallest badness are inlined first. After each inlining is performed
794 the costs of all caller edges of nodes affected are recomputed so the
795 metrics may accurately depend on values such as number of inlinable callers
796 of the function or function body size. */
798 static int
799 edge_badness (struct cgraph_edge *edge, bool dump)
801 gcov_type badness;
802 int growth;
803 struct inline_summary *callee_info = inline_summary (edge->callee);
805 if (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
806 return INT_MIN;
808 growth = estimate_edge_growth (edge);
810 if (dump)
812 fprintf (dump_file, " Badness calculation for %s -> %s\n",
813 cgraph_node_name (edge->caller),
814 cgraph_node_name (edge->callee));
815 fprintf (dump_file, " growth %i, time %i-%i, size %i-%i\n",
816 growth,
817 callee_info->time,
818 callee_info->time_inlining_benefit
819 + edge->call_stmt_time,
820 callee_info->size,
821 callee_info->size_inlining_benefit
822 + edge->call_stmt_size);
825 /* Always prefer inlining saving code size. */
826 if (growth <= 0)
828 badness = INT_MIN - growth;
829 if (dump)
830 fprintf (dump_file, " %i: Growth %i < 0\n", (int) badness,
831 growth);
834 /* When profiling is available, base priorities -(#calls / growth).
835 So we optimize for overall number of "executed" inlined calls. */
836 else if (max_count)
838 badness =
839 ((int)
840 ((double) edge->count * INT_MIN / max_count / (max_benefit + 1)) *
841 (callee_info->time_inlining_benefit
842 + edge->call_stmt_time + 1)) / growth;
844 /* Be sure that insanity of the profile won't lead to increasing counts
845 in the scalling and thus to overflow in the computation above. */
846 gcc_assert (max_count >= edge->count);
847 if (dump)
849 fprintf (dump_file,
850 " %i (relative %f): profile info. Relative count %f"
851 " * Relative benefit %f\n",
852 (int) badness, (double) badness / INT_MIN,
853 (double) edge->count / max_count,
854 (double) (inline_summary (edge->callee)->
855 time_inlining_benefit
856 + edge->call_stmt_time + 1) / (max_benefit + 1));
860 /* When function local profile is available, base priorities on
861 growth / frequency, so we optimize for overall frequency of inlined
862 calls. This is not too accurate since while the call might be frequent
863 within function, the function itself is infrequent.
865 Other objective to optimize for is number of different calls inlined.
866 We add the estimated growth after inlining all functions to bias the
867 priorities slightly in this direction (so fewer times called functions
868 of the same size gets priority). */
869 else if (flag_guess_branch_prob)
871 int div = edge->frequency * 100 / CGRAPH_FREQ_BASE + 1;
872 int benefitperc;
873 int growth_for_all;
874 badness = growth * 10000;
875 benefitperc =
876 100 * (callee_info->time_inlining_benefit
877 + edge->call_stmt_time)
878 / (callee_info->time + 1) + 1;
879 benefitperc = MIN (benefitperc, 100);
880 div *= benefitperc;
882 /* Decrease badness if call is nested. */
883 /* Compress the range so we don't overflow. */
884 if (div > 10000)
885 div = 10000 + ceil_log2 (div) - 8;
886 if (div < 1)
887 div = 1;
888 if (badness > 0)
889 badness /= div;
890 growth_for_all = estimate_growth (edge->callee);
891 badness += growth_for_all;
892 if (badness > INT_MAX)
893 badness = INT_MAX;
894 if (dump)
896 fprintf (dump_file,
897 " %i: guessed profile. frequency %i, overall growth %i,"
898 " benefit %i%%, divisor %i\n",
899 (int) badness, edge->frequency, growth_for_all,
900 benefitperc, div);
903 /* When function local profile is not available or it does not give
904 useful information (ie frequency is zero), base the cost on
905 loop nest and overall size growth, so we optimize for overall number
906 of functions fully inlined in program. */
907 else
909 int nest = MIN (edge->loop_nest, 8);
910 badness = estimate_growth (edge->callee) * 256;
912 /* Decrease badness if call is nested. */
913 if (badness > 0)
914 badness >>= nest;
915 else
917 badness <<= nest;
919 if (dump)
920 fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
921 nest);
924 /* Ensure that we did not overflow in all the fixed point math above. */
925 gcc_assert (badness >= INT_MIN);
926 gcc_assert (badness <= INT_MAX - 1);
927 /* Make recursive inlining happen always after other inlining is done. */
928 if (cgraph_edge_recursive_p (edge))
929 return badness + 1;
930 else
931 return badness;
934 /* Recompute badness of EDGE and update its key in HEAP if needed. */
935 static inline void
936 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
938 int badness = edge_badness (edge, false);
939 if (edge->aux)
941 fibnode_t n = (fibnode_t) edge->aux;
942 gcc_checking_assert (n->data == edge);
944 /* fibheap_replace_key only decrease the keys.
945 When we increase the key we do not update heap
946 and instead re-insert the element once it becomes
947 a minimum of heap. */
948 if (badness < n->key)
950 fibheap_replace_key (heap, n, badness);
951 if (dump_file && (dump_flags & TDF_DETAILS))
953 fprintf (dump_file,
954 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
955 cgraph_node_name (edge->caller), edge->caller->uid,
956 cgraph_node_name (edge->callee), edge->callee->uid,
957 (int)n->key,
958 badness);
960 gcc_checking_assert (n->key == badness);
963 else
965 if (dump_file && (dump_flags & TDF_DETAILS))
967 fprintf (dump_file,
968 " enqueuing call %s/%i -> %s/%i, badness %i\n",
969 cgraph_node_name (edge->caller), edge->caller->uid,
970 cgraph_node_name (edge->callee), edge->callee->uid,
971 badness);
973 edge->aux = fibheap_insert (heap, badness, edge);
977 /* Recompute heap nodes for each of caller edge. */
979 static void
980 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
981 bitmap updated_nodes)
983 struct cgraph_edge *edge;
985 if (!inline_summary (node)->inlinable
986 || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
987 || node->global.inlined_to)
988 return;
989 if (!bitmap_set_bit (updated_nodes, node->uid))
990 return;
991 inline_summary (node)->estimated_growth = INT_MIN;
993 /* See if there is something to do. */
994 for (edge = node->callers; edge; edge = edge->next_caller)
995 if (edge->inline_failed)
996 break;
997 if (!edge)
998 return;
1000 for (; edge; edge = edge->next_caller)
1001 if (edge->inline_failed)
1003 if (can_inline_edge_p (edge, false)
1004 && want_inline_small_function_p (edge, false))
1005 update_edge_key (heap, edge);
1006 else if (edge->aux)
1008 report_inline_failed_reason (edge);
1009 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1010 edge->aux = NULL;
1015 /* Recompute heap nodes for each uninlined call.
1016 This is used when we know that edge badnesses are going only to increase
1017 (we introduced new call site) and thus all we need is to insert newly
1018 created edges into heap. */
1020 static void
1021 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1022 bitmap updated_nodes)
1024 struct cgraph_edge *e = node->callees;
1026 inline_summary (node)->estimated_growth = INT_MIN;
1028 if (!e)
1029 return;
1030 while (true)
1031 if (!e->inline_failed && e->callee->callees)
1032 e = e->callee->callees;
1033 else
1035 if (e->inline_failed
1036 && inline_summary (e->callee)->inlinable
1037 && cgraph_function_body_availability (e->callee) >= AVAIL_AVAILABLE
1038 && !bitmap_bit_p (updated_nodes, e->callee->uid))
1040 inline_summary (node)->estimated_growth = INT_MIN;
1041 update_edge_key (heap, e);
1043 if (e->next_callee)
1044 e = e->next_callee;
1045 else
1049 if (e->caller == node)
1050 return;
1051 e = e->caller->callers;
1053 while (!e->next_callee);
1054 e = e->next_callee;
1059 /* Recompute heap nodes for each of caller edges of each of callees.
1060 Walk recursively into all inline clones. */
1062 static void
1063 update_all_callee_keys (fibheap_t heap, struct cgraph_node *node,
1064 bitmap updated_nodes)
1066 struct cgraph_edge *e = node->callees;
1068 inline_summary (node)->estimated_growth = INT_MIN;
1070 if (!e)
1071 return;
1072 while (true)
1073 if (!e->inline_failed && e->callee->callees)
1074 e = e->callee->callees;
1075 else
1077 if (e->inline_failed)
1078 update_caller_keys (heap, e->callee, updated_nodes);
1079 if (e->next_callee)
1080 e = e->next_callee;
1081 else
1085 if (e->caller == node)
1086 return;
1087 e = e->caller->callers;
1089 while (!e->next_callee);
1090 e = e->next_callee;
1095 /* Enqueue all recursive calls from NODE into priority queue depending on
1096 how likely we want to recursively inline the call. */
1098 static void
1099 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1100 fibheap_t heap)
1102 struct cgraph_edge *e;
1103 for (e = where->callees; e; e = e->next_callee)
1104 if (e->callee == node)
1106 /* When profile feedback is available, prioritize by expected number
1107 of calls. */
1108 fibheap_insert (heap,
1109 !max_count ? -e->frequency
1110 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1113 for (e = where->callees; e; e = e->next_callee)
1114 if (!e->inline_failed)
1115 lookup_recursive_calls (node, e->callee, heap);
1118 /* Decide on recursive inlining: in the case function has recursive calls,
1119 inline until body size reaches given argument. If any new indirect edges
1120 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1121 is NULL. */
1123 static bool
1124 recursive_inlining (struct cgraph_edge *edge,
1125 VEC (cgraph_edge_p, heap) **new_edges)
1127 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1128 fibheap_t heap;
1129 struct cgraph_node *node;
1130 struct cgraph_edge *e;
1131 struct cgraph_node *master_clone = NULL, *next;
1132 int depth = 0;
1133 int n = 0;
1135 node = edge->caller;
1136 if (node->global.inlined_to)
1137 node = node->global.inlined_to;
1139 if (DECL_DECLARED_INLINE_P (node->decl))
1140 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1142 /* Make sure that function is small enough to be considered for inlining. */
1143 if (estimate_size_after_inlining (node, edge) >= limit)
1144 return false;
1145 heap = fibheap_new ();
1146 lookup_recursive_calls (node, node, heap);
1147 if (fibheap_empty (heap))
1149 fibheap_delete (heap);
1150 return false;
1153 if (dump_file)
1154 fprintf (dump_file,
1155 " Performing recursive inlining on %s\n",
1156 cgraph_node_name (node));
1158 /* Do the inlining and update list of recursive call during process. */
1159 while (!fibheap_empty (heap))
1161 struct cgraph_edge *curr
1162 = (struct cgraph_edge *) fibheap_extract_min (heap);
1163 struct cgraph_node *cnode;
1165 if (estimate_size_after_inlining (node, curr) > limit)
1166 break;
1168 if (!can_inline_edge_p (curr, true))
1169 continue;
1171 depth = 1;
1172 for (cnode = curr->caller;
1173 cnode->global.inlined_to; cnode = cnode->callers->caller)
1174 if (node->decl == curr->callee->decl)
1175 depth++;
1177 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1178 continue;
1180 if (dump_file)
1182 fprintf (dump_file,
1183 " Inlining call of depth %i", depth);
1184 if (node->count)
1186 fprintf (dump_file, " called approx. %.2f times per call",
1187 (double)curr->count / node->count);
1189 fprintf (dump_file, "\n");
1191 if (!master_clone)
1193 /* We need original clone to copy around. */
1194 master_clone = cgraph_clone_node (node, node->decl,
1195 node->count, CGRAPH_FREQ_BASE, 1,
1196 false, NULL);
1197 for (e = master_clone->callees; e; e = e->next_callee)
1198 if (!e->inline_failed)
1199 cgraph_clone_inlined_nodes (e, true, false);
1202 cgraph_redirect_edge_callee (curr, master_clone);
1203 cgraph_mark_inline_edge (curr, false, new_edges);
1204 lookup_recursive_calls (node, curr->callee, heap);
1205 n++;
1208 if (!fibheap_empty (heap) && dump_file)
1209 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1210 fibheap_delete (heap);
1212 if (!master_clone)
1213 return false;
1215 if (dump_file)
1216 fprintf (dump_file,
1217 "\n Inlined %i times, "
1218 "body grown from size %i to %i, time %i to %i\n", n,
1219 inline_summary (master_clone)->size, inline_summary (node)->size,
1220 inline_summary (master_clone)->time, inline_summary (node)->time);
1222 /* Remove master clone we used for inlining. We rely that clones inlined
1223 into master clone gets queued just before master clone so we don't
1224 need recursion. */
1225 for (node = cgraph_nodes; node != master_clone;
1226 node = next)
1228 next = node->next;
1229 if (node->global.inlined_to == master_clone)
1230 cgraph_remove_node (node);
1232 cgraph_remove_node (master_clone);
1233 return true;
1237 /* Given whole compilation unit estimate of INSNS, compute how large we can
1238 allow the unit to grow. */
1240 static int
1241 compute_max_insns (int insns)
1243 int max_insns = insns;
1244 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1245 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1247 return ((HOST_WIDEST_INT) max_insns
1248 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1252 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1254 static void
1255 add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
1257 while (VEC_length (cgraph_edge_p, new_edges) > 0)
1259 struct cgraph_edge *edge = VEC_pop (cgraph_edge_p, new_edges);
1261 gcc_assert (!edge->aux);
1262 if (inline_summary (edge->callee)->inlinable
1263 && edge->inline_failed
1264 && can_inline_edge_p (edge, true)
1265 && want_inline_small_function_p (edge, true))
1266 edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1271 /* We use greedy algorithm for inlining of small functions:
1272 All inline candidates are put into prioritized heap ordered in
1273 increasing badness.
1275 The inlining of small functions is bounded by unit growth parameters. */
1277 static void
1278 inline_small_functions (void)
1280 struct cgraph_node *node;
1281 struct cgraph_edge *edge;
1282 fibheap_t heap = fibheap_new ();
1283 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1284 int min_size, max_size;
1285 VEC (cgraph_edge_p, heap) *new_indirect_edges = NULL;
1286 int initial_size = 0;
1288 if (flag_indirect_inlining)
1289 new_indirect_edges = VEC_alloc (cgraph_edge_p, heap, 8);
1291 if (dump_file)
1292 fprintf (dump_file,
1293 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1294 initial_size);
1296 /* Populate the heeap with all edges we might inline.
1297 While doing so compute overall unit size and other global
1298 parameters used by badness metrics. */
1300 max_count = 0;
1301 max_benefit = 0;
1302 for (node = cgraph_nodes; node; node = node->next)
1303 if (node->analyzed
1304 && !node->global.inlined_to)
1306 struct inline_summary *info = inline_summary (node);
1308 if (dump_file)
1309 fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
1310 cgraph_node_name (node), node->uid);
1312 info->estimated_growth = INT_MIN;
1314 if (!DECL_EXTERNAL (node->decl))
1315 initial_size += info->size;
1317 for (edge = node->callers; edge; edge = edge->next_caller)
1319 int benefit = (info->time_inlining_benefit
1320 + edge->call_stmt_time);
1321 if (max_count < edge->count)
1322 max_count = edge->count;
1323 if (max_benefit < benefit)
1324 max_benefit = benefit;
1325 if (edge->inline_failed
1326 && can_inline_edge_p (edge, true)
1327 && want_inline_small_function_p (edge, true)
1328 && edge->inline_failed)
1330 gcc_assert (!edge->aux);
1331 update_edge_key (heap, edge);
1336 max_size = compute_max_insns (overall_size);
1337 min_size = overall_size;
1338 gcc_assert (in_lto_p
1339 || !max_count
1340 || (profile_info && flag_branch_probabilities));
1341 overall_size = initial_size;
1343 while (!fibheap_empty (heap))
1345 int old_size = overall_size;
1346 struct cgraph_node *where, *callee;
1347 int badness = fibheap_min_key (heap);
1348 int current_badness;
1349 int growth;
1351 edge = (struct cgraph_edge *) fibheap_extract_min (heap);
1352 gcc_assert (edge->aux);
1353 edge->aux = NULL;
1354 if (!edge->inline_failed)
1355 continue;
1357 /* When updating the edge costs, we only decrease badness in the keys.
1358 Increases of badness are handled lazilly; when we see key with out
1359 of date value on it, we re-insert it now. */
1360 current_badness = edge_badness (edge, false);
1361 gcc_assert (current_badness >= badness);
1362 if (current_badness != badness)
1364 edge->aux = fibheap_insert (heap, current_badness, edge);
1365 continue;
1368 if (!can_inline_edge_p (edge, true))
1369 continue;
1371 callee = edge->callee;
1372 growth = estimate_edge_growth (edge);
1373 if (dump_file)
1375 fprintf (dump_file,
1376 "\nConsidering %s with %i size\n",
1377 cgraph_node_name (edge->callee),
1378 inline_summary (edge->callee)->size);
1379 fprintf (dump_file,
1380 " to be inlined into %s in %s:%i\n"
1381 " Estimated growth after inlined into all is %+i insns.\n"
1382 " Estimated badness is %i, frequency %.2f.\n",
1383 cgraph_node_name (edge->caller),
1384 flag_wpa ? "unknown"
1385 : gimple_filename ((const_gimple) edge->call_stmt),
1386 flag_wpa ? -1
1387 : gimple_lineno ((const_gimple) edge->call_stmt),
1388 estimate_growth (edge->callee),
1389 badness,
1390 edge->frequency / (double)CGRAPH_FREQ_BASE);
1391 if (edge->count)
1392 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1393 edge->count);
1394 if (dump_flags & TDF_DETAILS)
1395 edge_badness (edge, true);
1398 if (overall_size + growth > max_size
1399 && !DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
1401 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1402 report_inline_failed_reason (edge);
1403 continue;
1406 if (!want_inline_small_function_p (edge, true))
1407 continue;
1409 /* Heuristics for inlining small functions works poorly for
1410 recursive calls where we do efect similar to loop unrolling.
1411 When inliing such edge seems profitable, leave decision on
1412 specific inliner. */
1413 if (cgraph_edge_recursive_p (edge))
1415 where = edge->caller;
1416 if (where->global.inlined_to)
1417 where = where->global.inlined_to;
1418 if (!recursive_inlining (edge,
1419 flag_indirect_inlining
1420 ? &new_indirect_edges : NULL))
1422 edge->inline_failed = CIF_RECURSIVE_INLINING;
1423 continue;
1425 /* Recursive inliner inlines all recursive calls of the function
1426 at once. Consequently we need to update all callee keys. */
1427 if (flag_indirect_inlining)
1428 add_new_edges_to_heap (heap, new_indirect_edges);
1429 update_all_callee_keys (heap, where, updated_nodes);
1431 else
1433 struct cgraph_node *callee;
1434 struct cgraph_node *outer_node = NULL;
1435 int depth = 0;
1437 /* Consider the case where self recursive function A is inlined into B.
1438 This is desired optimization in some cases, since it leads to effect
1439 similar of loop peeling and we might completely optimize out the
1440 recursive call. However we must be extra selective. */
1442 where = edge->caller;
1443 while (where->global.inlined_to)
1445 if (where->decl == edge->callee->decl)
1446 outer_node = where, depth++;
1447 where = where->callers->caller;
1449 if (outer_node
1450 && !want_inline_self_recursive_call_p (edge, outer_node,
1451 true, depth))
1453 edge->inline_failed
1454 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1455 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1456 continue;
1458 else if (depth && dump_file)
1459 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1461 callee = edge->callee;
1462 gcc_checking_assert (!callee->global.inlined_to);
1463 cgraph_mark_inline_edge (edge, true, &new_indirect_edges);
1464 if (flag_indirect_inlining)
1465 add_new_edges_to_heap (heap, new_indirect_edges);
1467 /* We inlined last offline copy to the body. This might lead
1468 to callees of function having fewer call sites and thus they
1469 may need updating. */
1470 if (callee->global.inlined_to)
1471 update_all_callee_keys (heap, callee, updated_nodes);
1472 else
1473 update_callee_keys (heap, edge->callee, updated_nodes);
1475 where = edge->caller;
1476 if (where->global.inlined_to)
1477 where = where->global.inlined_to;
1479 /* Our profitability metric can depend on local properties
1480 such as number of inlinable calls and size of the function body.
1481 After inlining these properties might change for the function we
1482 inlined into (since it's body size changed) and for the functions
1483 called by function we inlined (since number of it inlinable callers
1484 might change). */
1485 update_caller_keys (heap, where, updated_nodes);
1487 /* We removed one call of the function we just inlined. If offline
1488 copy is still needed, be sure to update the keys. */
1489 if (callee != where && !callee->global.inlined_to)
1490 update_caller_keys (heap, callee, updated_nodes);
1491 bitmap_clear (updated_nodes);
1493 if (dump_file)
1495 fprintf (dump_file,
1496 " Inlined into %s which now has time %i and size %i,"
1497 "net change of %+i.\n",
1498 cgraph_node_name (edge->caller),
1499 inline_summary (edge->caller)->time,
1500 inline_summary (edge->caller)->size,
1501 overall_size - old_size);
1503 if (min_size > overall_size)
1505 min_size = overall_size;
1506 max_size = compute_max_insns (min_size);
1508 if (dump_file)
1509 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1513 if (new_indirect_edges)
1514 VEC_free (cgraph_edge_p, heap, new_indirect_edges);
1515 fibheap_delete (heap);
1516 if (dump_file)
1517 fprintf (dump_file,
1518 "Unit growth for small function inlining: %i->%i (%i%%)\n",
1519 overall_size, initial_size,
1520 overall_size * 100 / (initial_size + 1) - 100);
1521 BITMAP_FREE (updated_nodes);
1524 /* Flatten NODE. Performed both during early inlining and
1525 at IPA inlining time. */
1527 static void
1528 flatten_function (struct cgraph_node *node)
1530 struct cgraph_edge *e;
1532 /* We shouldn't be called recursively when we are being processed. */
1533 gcc_assert (node->aux == NULL);
1535 node->aux = (void *) node;
1537 for (e = node->callees; e; e = e->next_callee)
1539 struct cgraph_node *orig_callee;
1541 /* We've hit cycle? It is time to give up. */
1542 if (e->callee->aux)
1544 if (dump_file)
1545 fprintf (dump_file,
1546 "Not inlining %s into %s to avoid cycle.\n",
1547 cgraph_node_name (e->callee),
1548 cgraph_node_name (e->caller));
1549 e->inline_failed = CIF_RECURSIVE_INLINING;
1550 continue;
1553 /* When the edge is already inlined, we just need to recurse into
1554 it in order to fully flatten the leaves. */
1555 if (!e->inline_failed)
1557 flatten_function (e->callee);
1558 continue;
1561 /* Flatten attribute needs to be processed during late inlining. For
1562 extra code quality we however do flattening during early optimization,
1563 too. */
1564 if (cgraph_state != CGRAPH_STATE_IPA_SSA
1565 ? !can_inline_edge_p (e, true)
1566 : !can_early_inline_edge_p (e))
1567 continue;
1569 if (cgraph_edge_recursive_p (e))
1571 if (dump_file)
1572 fprintf (dump_file, "Not inlining: recursive call.\n");
1573 continue;
1576 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1577 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1579 if (dump_file)
1580 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1581 continue;
1584 /* Inline the edge and flatten the inline clone. Avoid
1585 recursing through the original node if the node was cloned. */
1586 if (dump_file)
1587 fprintf (dump_file, " Inlining %s into %s.\n",
1588 cgraph_node_name (e->callee),
1589 cgraph_node_name (e->caller));
1590 orig_callee = e->callee;
1591 cgraph_mark_inline_edge (e, true, NULL);
1592 if (e->callee != orig_callee)
1593 orig_callee->aux = (void *) node;
1594 flatten_function (e->callee);
1595 if (e->callee != orig_callee)
1596 orig_callee->aux = NULL;
1599 node->aux = NULL;
1602 /* Decide on the inlining. We do so in the topological order to avoid
1603 expenses on updating data structures. */
1605 static unsigned int
1606 ipa_inline (void)
1608 struct cgraph_node *node;
1609 int nnodes;
1610 struct cgraph_node **order =
1611 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1612 int i;
1614 if (in_lto_p && flag_indirect_inlining)
1615 ipa_update_after_lto_read ();
1616 if (flag_indirect_inlining)
1617 ipa_create_all_structures_for_iinln ();
1619 if (dump_file)
1620 dump_inline_summaries (dump_file);
1622 nnodes = cgraph_postorder (order);
1624 for (node = cgraph_nodes; node; node = node->next)
1625 node->aux = 0;
1627 if (dump_file)
1628 fprintf (dump_file, "\nFlattening functions:\n");
1630 /* In the first pass handle functions to be flattened. Do this with
1631 a priority so none of our later choices will make this impossible. */
1632 for (i = nnodes - 1; i >= 0; i--)
1634 node = order[i];
1636 /* Handle nodes to be flattened.
1637 Ideally when processing callees we stop inlining at the
1638 entry of cycles, possibly cloning that entry point and
1639 try to flatten itself turning it into a self-recursive
1640 function. */
1641 if (lookup_attribute ("flatten",
1642 DECL_ATTRIBUTES (node->decl)) != NULL)
1644 if (dump_file)
1645 fprintf (dump_file,
1646 "Flattening %s\n", cgraph_node_name (node));
1647 flatten_function (node);
1651 inline_small_functions ();
1652 cgraph_remove_unreachable_nodes (true, dump_file);
1653 free (order);
1655 /* We already perform some inlining of functions called once during
1656 inlining small functions above. After unreachable nodes are removed,
1657 we still might do a quick check that nothing new is found. */
1658 if (flag_inline_functions_called_once)
1660 int cold;
1661 if (dump_file)
1662 fprintf (dump_file, "\nDeciding on functions called once:\n");
1664 /* Inlining one function called once has good chance of preventing
1665 inlining other function into the same callee. Ideally we should
1666 work in priority order, but probably inlining hot functions first
1667 is good cut without the extra pain of maintaining the queue.
1669 ??? this is not really fitting the bill perfectly: inlining function
1670 into callee often leads to better optimization of callee due to
1671 increased context for optimization.
1672 For example if main() function calls a function that outputs help
1673 and then function that does the main optmization, we should inline
1674 the second with priority even if both calls are cold by themselves.
1676 We probably want to implement new predicate replacing our use of
1677 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
1678 to be hot. */
1679 for (cold = 0; cold <= 1; cold ++)
1681 for (node = cgraph_nodes; node; node = node->next)
1683 if (want_inline_function_called_once_p (node)
1684 && (cold
1685 || cgraph_maybe_hot_edge_p (node->callers)))
1687 struct cgraph_node *caller = node->callers->caller;
1689 if (dump_file)
1691 fprintf (dump_file,
1692 "\nInlining %s size %i.\n",
1693 cgraph_node_name (node), inline_summary (node)->size);
1694 fprintf (dump_file,
1695 " Called once from %s %i insns.\n",
1696 cgraph_node_name (node->callers->caller),
1697 inline_summary (node->callers->caller)->size);
1700 cgraph_mark_inline_edge (node->callers, true, NULL);
1701 if (dump_file)
1702 fprintf (dump_file,
1703 " Inlined into %s which now has %i size\n",
1704 cgraph_node_name (caller),
1705 inline_summary (caller)->size);
1711 /* Free ipa-prop structures if they are no longer needed. */
1712 if (flag_indirect_inlining)
1713 ipa_free_all_structures_after_iinln ();
1715 if (dump_file)
1716 fprintf (dump_file,
1717 "\nInlined %i calls, eliminated %i functions\n\n",
1718 ncalls_inlined, nfunctions_inlined);
1720 /* In WPA we use inline summaries for partitioning process. */
1721 if (!flag_wpa)
1722 inline_free_summary ();
1723 return 0;
1726 /* Inline always-inline function calls in NODE. */
1728 static bool
1729 inline_always_inline_functions (struct cgraph_node *node)
1731 struct cgraph_edge *e;
1732 bool inlined = false;
1734 for (e = node->callees; e; e = e->next_callee)
1736 if (!DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
1737 continue;
1739 if (cgraph_edge_recursive_p (e))
1741 if (dump_file)
1742 fprintf (dump_file, " Not inlining recursive call to %s.\n",
1743 cgraph_node_name (e->callee));
1744 e->inline_failed = CIF_RECURSIVE_INLINING;
1745 continue;
1748 if (!can_early_inline_edge_p (e))
1749 continue;
1751 if (dump_file)
1752 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
1753 cgraph_node_name (e->callee),
1754 cgraph_node_name (e->caller));
1755 cgraph_mark_inline_edge (e, true, NULL);
1756 inlined = true;
1759 return inlined;
1762 /* Decide on the inlining. We do so in the topological order to avoid
1763 expenses on updating data structures. */
1765 static bool
1766 early_inline_small_functions (struct cgraph_node *node)
1768 struct cgraph_edge *e;
1769 bool inlined = false;
1771 for (e = node->callees; e; e = e->next_callee)
1773 if (!inline_summary (e->callee)->inlinable
1774 || !e->inline_failed)
1775 continue;
1777 /* Do not consider functions not declared inline. */
1778 if (!DECL_DECLARED_INLINE_P (e->callee->decl)
1779 && !flag_inline_small_functions
1780 && !flag_inline_functions)
1781 continue;
1783 if (dump_file)
1784 fprintf (dump_file, "Considering inline candidate %s.\n",
1785 cgraph_node_name (e->callee));
1787 if (!can_early_inline_edge_p (e))
1788 continue;
1790 if (cgraph_edge_recursive_p (e))
1792 if (dump_file)
1793 fprintf (dump_file, " Not inlining: recursive call.\n");
1794 continue;
1797 if (!want_early_inline_function_p (e))
1798 continue;
1800 if (dump_file)
1801 fprintf (dump_file, " Inlining %s into %s.\n",
1802 cgraph_node_name (e->callee),
1803 cgraph_node_name (e->caller));
1804 cgraph_mark_inline_edge (e, true, NULL);
1805 inlined = true;
1808 return inlined;
1811 /* Do inlining of small functions. Doing so early helps profiling and other
1812 passes to be somewhat more effective and avoids some code duplication in
1813 later real inlining pass for testcases with very many function calls. */
1814 static unsigned int
1815 early_inliner (void)
1817 struct cgraph_node *node = cgraph_get_node (current_function_decl);
1818 struct cgraph_edge *edge;
1819 unsigned int todo = 0;
1820 int iterations = 0;
1821 bool inlined = false;
1823 if (seen_error ())
1824 return 0;
1826 #ifdef ENABLE_CHECKING
1827 verify_cgraph_node (node);
1828 #endif
1830 /* Even when not optimizing or not inlining inline always-inline
1831 functions. */
1832 inlined = inline_always_inline_functions (node);
1834 if (!optimize
1835 || flag_no_inline
1836 || !flag_early_inlining
1837 /* Never inline regular functions into always-inline functions
1838 during incremental inlining. This sucks as functions calling
1839 always inline functions will get less optimized, but at the
1840 same time inlining of functions calling always inline
1841 function into an always inline function might introduce
1842 cycles of edges to be always inlined in the callgraph.
1844 We might want to be smarter and just avoid this type of inlining. */
1845 || DECL_DISREGARD_INLINE_LIMITS (node->decl))
1847 else if (lookup_attribute ("flatten",
1848 DECL_ATTRIBUTES (node->decl)) != NULL)
1850 /* When the function is marked to be flattened, recursively inline
1851 all calls in it. */
1852 if (dump_file)
1853 fprintf (dump_file,
1854 "Flattening %s\n", cgraph_node_name (node));
1855 flatten_function (node);
1856 inlined = true;
1858 else
1860 /* We iterate incremental inlining to get trivial cases of indirect
1861 inlining. */
1862 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
1863 && early_inline_small_functions (node))
1865 timevar_push (TV_INTEGRATION);
1866 todo |= optimize_inline_calls (current_function_decl);
1868 /* Technically we ought to recompute inline parameters so the new
1869 iteration of early inliner works as expected. We however have
1870 values approximately right and thus we only need to update edge
1871 info that might be cleared out for newly discovered edges. */
1872 for (edge = node->callees; edge; edge = edge->next_callee)
1874 edge->call_stmt_size
1875 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
1876 edge->call_stmt_time
1877 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
1879 timevar_pop (TV_INTEGRATION);
1880 iterations++;
1881 inlined = false;
1883 if (dump_file)
1884 fprintf (dump_file, "Iterations: %i\n", iterations);
1887 if (inlined)
1889 timevar_push (TV_INTEGRATION);
1890 todo |= optimize_inline_calls (current_function_decl);
1891 timevar_pop (TV_INTEGRATION);
1894 cfun->always_inline_functions_inlined = true;
1896 return todo;
1899 struct gimple_opt_pass pass_early_inline =
1902 GIMPLE_PASS,
1903 "einline", /* name */
1904 NULL, /* gate */
1905 early_inliner, /* execute */
1906 NULL, /* sub */
1907 NULL, /* next */
1908 0, /* static_pass_number */
1909 TV_INLINE_HEURISTICS, /* tv_id */
1910 PROP_ssa, /* properties_required */
1911 0, /* properties_provided */
1912 0, /* properties_destroyed */
1913 0, /* todo_flags_start */
1914 TODO_dump_func /* todo_flags_finish */
1919 /* Apply inline plan to function. */
1920 static unsigned int
1921 inline_transform (struct cgraph_node *node)
1923 unsigned int todo = 0;
1924 struct cgraph_edge *e;
1925 bool inline_p = false;
1927 /* FIXME: Currently the pass manager is adding inline transform more than
1928 once to some clones. This needs revisiting after WPA cleanups. */
1929 if (cfun->after_inlining)
1930 return 0;
1932 /* We might need the body of this function so that we can expand
1933 it inline somewhere else. */
1934 if (cgraph_preserve_function_body_p (node->decl))
1935 save_inline_function_body (node);
1937 for (e = node->callees; e; e = e->next_callee)
1939 cgraph_redirect_edge_call_stmt_to_callee (e);
1940 if (!e->inline_failed || warn_inline)
1941 inline_p = true;
1944 if (inline_p)
1946 timevar_push (TV_INTEGRATION);
1947 todo = optimize_inline_calls (current_function_decl);
1948 timevar_pop (TV_INTEGRATION);
1950 cfun->always_inline_functions_inlined = true;
1951 cfun->after_inlining = true;
1952 return todo | execute_fixup_cfg ();
1955 /* When to run IPA inlining. Inlining of always-inline functions
1956 happens during early inlining. */
1958 static bool
1959 gate_ipa_inline (void)
1961 /* ??? We'd like to skip this if not optimizing or not inlining as
1962 all always-inline functions have been processed by early
1963 inlining already. But this at least breaks EH with C++ as
1964 we need to unconditionally run fixup_cfg even at -O0.
1965 So leave it on unconditionally for now. */
1966 return 1;
1969 struct ipa_opt_pass_d pass_ipa_inline =
1972 IPA_PASS,
1973 "inline", /* name */
1974 gate_ipa_inline, /* gate */
1975 ipa_inline, /* execute */
1976 NULL, /* sub */
1977 NULL, /* next */
1978 0, /* static_pass_number */
1979 TV_INLINE_HEURISTICS, /* tv_id */
1980 0, /* properties_required */
1981 0, /* properties_provided */
1982 0, /* properties_destroyed */
1983 TODO_remove_functions, /* todo_flags_finish */
1984 TODO_dump_cgraph | TODO_dump_func
1985 | TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */
1987 inline_generate_summary, /* generate_summary */
1988 inline_write_summary, /* write_summary */
1989 inline_read_summary, /* read_summary */
1990 NULL, /* write_optimization_summary */
1991 NULL, /* read_optimization_summary */
1992 NULL, /* stmt_fixup */
1993 0, /* TODOs */
1994 inline_transform, /* function_transform */
1995 NULL, /* variable_transform */