Merge aosp-toolchain/gcc/gcc-4_9 changes.
[official-gcc.git] / gcc-4_9-mobile / gcc / ipa-inline.c
blobc116f740fedbfaed02a813fbc469923680306b34
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
36 inlining heuristics
38 The inliner itself is split into two passes:
40 pass_early_inlining
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
68 pass_ipa_inline
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "tm.h"
96 #include "tree.h"
97 #include "trans-mem.h"
98 #include "calls.h"
99 #include "tree-inline.h"
100 #include "langhooks.h"
101 #include "flags.h"
102 #include "diagnostic.h"
103 #include "gimple-pretty-print.h"
104 #include "params.h"
105 #include "fibheap.h"
106 #include "intl.h"
107 #include "tree-pass.h"
108 #include "coverage.h"
109 #include "rtl.h"
110 #include "bitmap.h"
111 #include "basic-block.h"
112 #include "tree-ssa-alias.h"
113 #include "internal-fn.h"
114 #include "gimple-expr.h"
115 #include "is-a.h"
116 #include "gimple.h"
117 #include "gimple-ssa.h"
118 #include "ipa-prop.h"
119 #include "basic-block.h"
120 #include "toplev.h"
121 #include "dbgcnt.h"
122 #include "except.h"
123 #include "l-ipo.h"
124 #include "target.h"
125 #include "ipa-inline.h"
126 #include "ipa-utils.h"
127 #include "auto-profile.h"
128 #include "sreal.h"
129 #include "cilk.h"
131 /* Statistics we collect about inlining algorithm. */
132 static int overall_size;
133 static gcov_type max_count;
134 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
136 /* Global variable to denote if it is in ipa-inline pass. */
137 bool is_in_ipa_inline = false;
139 /* Return false when inlining edge E would lead to violating
140 limits on function unit growth or stack usage growth.
142 The relative function body growth limit is present generally
143 to avoid problems with non-linear behavior of the compiler.
144 To allow inlining huge functions into tiny wrapper, the limit
145 is always based on the bigger of the two functions considered.
147 For stack growth limits we always base the growth in stack usage
148 of the callers. We want to prevent applications from segfaulting
149 on stack overflow when functions with huge stack frames gets
150 inlined. */
152 static bool
153 caller_growth_limits (struct cgraph_edge *e)
155 struct cgraph_node *to = e->caller;
156 struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
157 int newsize;
158 int limit = 0;
159 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
160 struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
162 /* Look for function e->caller is inlined to. While doing
163 so work out the largest function body on the way. As
164 described above, we want to base our function growth
165 limits based on that. Not on the self size of the
166 outer function, not on the self size of inline code
167 we immediately inline to. This is the most relaxed
168 interpretation of the rule "do not grow large functions
169 too much in order to prevent compiler from exploding". */
170 while (true)
172 info = inline_summary (to);
173 if (limit < info->self_size)
174 limit = info->self_size;
175 if (stack_size_limit < info->estimated_self_stack_size)
176 stack_size_limit = info->estimated_self_stack_size;
177 if (to->global.inlined_to)
178 to = to->callers->caller;
179 else
180 break;
183 what_info = inline_summary (what);
185 if (limit < what_info->self_size)
186 limit = what_info->self_size;
188 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
190 /* Check the size after inlining against the function limits. But allow
191 the function to shrink if it went over the limits by forced inlining. */
192 newsize = estimate_size_after_inlining (to, e);
193 if (newsize >= info->size
194 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
195 && newsize > limit)
197 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
198 return false;
201 if (!what_info->estimated_stack_size)
202 return true;
204 /* FIXME: Stack size limit often prevents inlining in Fortran programs
205 due to large i/o datastructures used by the Fortran front-end.
206 We ought to ignore this limit when we know that the edge is executed
207 on every invocation of the caller (i.e. its call statement dominates
208 exit block). We do not track this information, yet. */
209 stack_size_limit += ((gcov_type)stack_size_limit
210 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
212 inlined_stack = (outer_info->estimated_stack_size
213 + what_info->estimated_stack_size);
214 /* Check new stack consumption with stack consumption at the place
215 stack is used. */
216 if (inlined_stack > stack_size_limit
217 /* If function already has large stack usage from sibling
218 inline call, we can inline, too.
219 This bit overoptimistically assume that we are good at stack
220 packing. */
221 && inlined_stack > info->estimated_stack_size
222 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
224 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
225 return false;
227 return true;
230 /* Dump info about why inlining has failed. */
232 static void
233 report_inline_failed_reason (struct cgraph_edge *e)
235 if (dump_file)
237 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
238 xstrdup (e->caller->name ()), e->caller->order,
239 xstrdup (e->callee->name ()), e->callee->order,
240 cgraph_inline_failed_string (e->inline_failed));
244 /* Decide whether sanitizer-related attributes allow inlining. */
246 static bool
247 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
249 /* Don't care if sanitizer is disabled */
250 if (!(flag_sanitize & SANITIZE_ADDRESS))
251 return true;
253 if (!caller || !callee)
254 return true;
256 return !!lookup_attribute ("no_sanitize_address",
257 DECL_ATTRIBUTES (caller)) ==
258 !!lookup_attribute ("no_sanitize_address",
259 DECL_ATTRIBUTES (callee));
262 /* Decide if we can inline the edge and possibly update
263 inline_failed reason.
264 We check whether inlining is possible at all and whether
265 caller growth limits allow doing so.
267 if REPORT is true, output reason to the dump file.
269 if DISREGARD_LIMITS is true, ignore size limits.*/
271 static bool
272 can_inline_edge_p (struct cgraph_edge *e, bool report,
273 bool disregard_limits = false)
275 bool inlinable = true;
276 enum availability avail;
277 struct cgraph_node *callee
278 = cgraph_function_or_thunk_node (e->callee, &avail);
279 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
280 tree callee_tree
281 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
282 struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->decl);
283 struct function *callee_cfun
284 = callee ? DECL_STRUCT_FUNCTION (callee->decl) : NULL;
286 if (!caller_cfun && e->caller->clone_of)
287 caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->decl);
289 if (!callee_cfun && callee && callee->clone_of)
290 callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->decl);
292 gcc_assert (e->inline_failed);
294 if (!callee || !callee->definition)
296 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
297 inlinable = false;
299 else if (callee->calls_comdat_local)
301 e->inline_failed = CIF_USES_COMDAT_LOCAL;
302 inlinable = false;
304 else if (!inline_summary (callee)->inlinable
305 || (caller_cfun && fn_contains_cilk_spawn_p (caller_cfun)))
307 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
308 inlinable = false;
310 else if (avail <= AVAIL_OVERWRITABLE)
312 e->inline_failed = CIF_OVERWRITABLE;
313 inlinable = false;
315 else if (e->call_stmt_cannot_inline_p)
317 if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
318 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
319 inlinable = false;
321 /* Don't inline if the functions have different EH personalities. */
322 else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
323 && DECL_FUNCTION_PERSONALITY (callee->decl)
324 && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
325 != DECL_FUNCTION_PERSONALITY (callee->decl)))
327 e->inline_failed = CIF_EH_PERSONALITY;
328 inlinable = false;
330 /* TM pure functions should not be inlined into non-TM_pure
331 functions. */
332 else if (is_tm_pure (callee->decl)
333 && !is_tm_pure (e->caller->decl))
335 e->inline_failed = CIF_UNSPECIFIED;
336 inlinable = false;
338 /* Don't inline if the callee can throw non-call exceptions but the
339 caller cannot.
340 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
341 Move the flag into cgraph node or mirror it in the inline summary. */
342 else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
343 && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
345 e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
346 inlinable = false;
348 /* Check compatibility of target optimization options. */
349 else if (!targetm.target_option.can_inline_p (e->caller->decl,
350 callee->decl))
352 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
353 inlinable = false;
355 /* Don't inline a function with mismatched sanitization attributes. */
356 else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
358 e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
359 inlinable = false;
361 /* Check if caller growth allows the inlining. */
362 else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
363 && !disregard_limits
364 && !lookup_attribute ("flatten",
365 DECL_ATTRIBUTES
366 (e->caller->global.inlined_to
367 ? e->caller->global.inlined_to->decl
368 : e->caller->decl))
369 && !caller_growth_limits (e))
370 inlinable = false;
371 /* Don't inline a function with a higher optimization level than the
372 caller. FIXME: this is really just tip of iceberg of handling
373 optimization attribute. */
374 else if (caller_tree != callee_tree)
376 struct cl_optimization *caller_opt
377 = TREE_OPTIMIZATION ((caller_tree)
378 ? caller_tree
379 : optimization_default_node);
381 struct cl_optimization *callee_opt
382 = TREE_OPTIMIZATION ((callee_tree)
383 ? callee_tree
384 : optimization_default_node);
386 if (((caller_opt->x_optimize > callee_opt->x_optimize)
387 || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
388 /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */
389 && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
391 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
392 inlinable = false;
396 if (!inlinable && report)
397 report_inline_failed_reason (e);
398 return inlinable;
402 /* Return true if the edge E is inlinable during early inlining. */
404 static bool
405 can_early_inline_edge_p (struct cgraph_edge *e)
407 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
408 NULL);
409 /* Early inliner might get called at WPA stage when IPA pass adds new
410 function. In this case we can not really do any of early inlining
411 because function bodies are missing. */
412 if (!gimple_has_body_p (callee->decl))
414 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
415 return false;
418 /* Skip fake edges */
419 if (L_IPO_COMP_MODE && !e->call_stmt)
420 return false;
422 /* In early inliner some of callees may not be in SSA form yet
423 (i.e. the callgraph is cyclic and we did not process
424 the callee by early inliner, yet). We don't have CIF code for this
425 case; later we will re-do the decision in the real inliner. */
426 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
427 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
429 if (dump_file)
430 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
431 return false;
433 if (!can_inline_edge_p (e, true))
434 return false;
435 return true;
439 /* Return number of calls in N. Ignore cheap builtins. */
441 static int
442 num_calls (struct cgraph_node *n)
444 struct cgraph_edge *e;
445 /* The following is buggy -- indirect call is not considered. */
446 int num = 0;
448 for (e = n->callees; e; e = e->next_callee)
449 if (e->call_stmt /* Only exist in profile use pass in LIPO */
450 && !is_inexpensive_builtin (e->callee->decl))
451 num++;
452 return num;
455 /* Return true if we are interested in inlining small function. */
457 static bool
458 want_early_inline_function_p (struct cgraph_edge *e)
460 bool want_inline = true;
461 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
463 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
465 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
467 else if (!DECL_DECLARED_INLINE_P (callee->decl)
468 && !flag_inline_small_functions)
470 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
471 report_inline_failed_reason (e);
472 want_inline = false;
474 else
476 int growth = estimate_edge_growth (e);
477 struct cgraph_node *callee = e->callee;
478 int n;
480 if (growth <= PARAM_VALUE (PARAM_EARLY_INLINING_INSNS_ANY))
482 else if (!cgraph_maybe_hot_edge_p (e))
484 if (dump_file)
485 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
486 "call is cold and code would grow by %i\n",
487 xstrdup (e->caller->name ()),
488 e->caller->order,
489 xstrdup (callee->name ()), callee->order,
490 growth);
491 want_inline = false;
493 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
495 if (dump_file)
496 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
497 "growth %i exceeds --param early-inlining-insns\n",
498 xstrdup (e->caller->name ()),
499 e->caller->order,
500 xstrdup (callee->name ()), callee->order,
501 growth);
502 want_inline = false;
504 else if (!flag_auto_profile && DECL_COMDAT (callee->decl)
505 && growth <= PARAM_VALUE (PARAM_EARLY_INLINING_INSNS_COMDAT))
507 else if ((n = num_calls (callee)) != 0
508 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
510 if (dump_file)
511 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
512 "growth %i exceeds --param early-inlining-insns "
513 "divided by number of calls\n",
514 xstrdup (e->caller->name ()),
515 e->caller->order,
516 xstrdup (callee->name ()), callee->order,
517 growth);
518 want_inline = false;
521 return want_inline;
524 /* Compute time of the edge->caller + edge->callee execution when inlining
525 does not happen. */
527 inline gcov_type
528 compute_uninlined_call_time (struct inline_summary *callee_info,
529 struct cgraph_edge *edge)
531 gcov_type uninlined_call_time =
532 RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
533 CGRAPH_FREQ_BASE);
534 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
535 ? edge->caller->global.inlined_to
536 : edge->caller)->time;
537 return uninlined_call_time + caller_time;
540 /* Same as compute_uinlined_call_time but compute time when inlining
541 does happen. */
543 inline gcov_type
544 compute_inlined_call_time (struct cgraph_edge *edge,
545 int edge_time)
547 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
548 ? edge->caller->global.inlined_to
549 : edge->caller)->time;
550 gcov_type time = (caller_time
551 + RDIV (((gcov_type) edge_time
552 - inline_edge_summary (edge)->call_stmt_time)
553 * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
554 /* Possible one roundoff error, but watch for overflows. */
555 gcc_checking_assert (time >= INT_MIN / 2);
556 if (time < 0)
557 time = 0;
558 return time;
561 /* Return true if the speedup for inlining E is bigger than
562 PARAM_MAX_INLINE_MIN_SPEEDUP. */
564 static bool
565 big_speedup_p (struct cgraph_edge *e)
567 gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
569 gcov_type inlined_time = compute_inlined_call_time (e,
570 estimate_edge_time (e));
571 if (time - inlined_time
572 > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
573 return true;
574 return false;
577 /* Returns true if callee of edge E is considered useful to inline
578 even if it is cold. A callee is considered useful if there is at
579 least one argument of pointer type with IPA_JF_KNOWN_TYPE or
580 IPA_JF_UNKNOWN as the jump function. The reasoniong here is that
581 it is often beneficial to inline bar into foo in the following
582 code even if the callsite is cold:
583 void foo () {
584 A a;
585 bar (&a);
589 This exposes accesses to the 'a' object. The jump function of &a
590 is either IPA_JF_KNOWN_TYPE or IPA_JF_UNKNOWN (depending on
591 intervening code). */
593 static inline bool
594 useful_cold_callee (struct cgraph_edge *e)
596 gimple call = e->call_stmt;
597 int n, arg_num = gimple_call_num_args (call);
598 struct ipa_edge_args *args = IPA_EDGE_REF (e);
600 if (ipa_node_params_vector.exists ())
602 for (n = 0; n < arg_num; n++)
604 tree arg = gimple_call_arg (call, n);
605 if (POINTER_TYPE_P (TREE_TYPE (arg)))
607 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, n);
608 if (jfunc->type == IPA_JF_KNOWN_TYPE
609 || jfunc->type == IPA_JF_UNKNOWN)
610 return true;
614 return false;
617 /* Returns true if hot caller heuristic should be used. */
619 static inline bool
620 enable_hot_caller_heuristic (void)
623 gcov_working_set_t *ws = NULL;
624 int size_threshold = PARAM_VALUE (PARAM_HOT_CALLER_CODESIZE_THRESHOLD);
625 int num_counters = 0;
626 int param_inline_hot_caller = PARAM_VALUE (PARAM_INLINE_HOT_CALLER);
628 if (param_inline_hot_caller == 0)
629 return false;
630 else if (param_inline_hot_caller == 1)
631 return true;
633 ws = find_working_set(PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
634 if (!ws)
635 return false;
636 num_counters = ws->num_counters;
637 return num_counters <= size_threshold;
640 /* Returns true if an edge or its caller are hot enough to
641 be considered for inlining. */
643 static bool
644 edge_hot_enough_p (struct cgraph_edge *edge)
646 static bool use_hot_caller_heuristic = enable_hot_caller_heuristic ();
647 if (cgraph_maybe_hot_edge_p (edge))
648 return true;
650 /* We disable hot-caller heuristic if the callee's entry count is
651 0 because in this case we do not have enough information to
652 calculate the scaling factor. */
653 if (flag_auto_profile && edge->callee->count == 0
654 && edge->callee->max_bb_count > 0)
655 return false;
656 if (use_hot_caller_heuristic)
658 struct cgraph_node *where = edge->caller;
659 if (maybe_hot_count_p (NULL, where->max_bb_count))
661 if (PARAM_VALUE (PARAM_INLINE_USEFUL_COLD_CALLEE))
662 return useful_cold_callee (edge);
663 else
664 return true;
668 return false;
671 /* Return true if we are interested in inlining small function.
672 When REPORT is true, report reason to dump file. */
674 static bool
675 want_inline_small_function_p (struct cgraph_edge *e, bool report)
677 bool want_inline = true;
678 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
680 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
682 else if (!DECL_DECLARED_INLINE_P (callee->decl)
683 && !flag_inline_small_functions)
685 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
686 want_inline = false;
688 /* Do fast and conservative check if the function can be good
689 inline cnadidate. At themoment we allow inline hints to
690 promote non-inline function to inline and we increase
691 MAX_INLINE_INSNS_SINGLE 16fold for inline functions. */
692 else if (!DECL_DECLARED_INLINE_P (callee->decl)
693 && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
694 > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
696 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
697 want_inline = false;
699 else if (DECL_DECLARED_INLINE_P (callee->decl)
700 && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
701 > 16 * MAX_INLINE_INSNS_SINGLE)
703 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
704 want_inline = false;
706 else
708 int growth = estimate_edge_growth (e);
709 inline_hints hints = estimate_edge_hints (e);
710 bool big_speedup = big_speedup_p (e);
712 if (growth <= 0)
714 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
715 hints suggests that inlining given function is very profitable. */
716 else if (DECL_DECLARED_INLINE_P (callee->decl)
717 && growth >= MAX_INLINE_INSNS_SINGLE
718 && ((!big_speedup
719 && !(hints & (INLINE_HINT_indirect_call
720 | INLINE_HINT_loop_iterations
721 | INLINE_HINT_array_index
722 | INLINE_HINT_loop_stride)))
723 || growth >= MAX_INLINE_INSNS_SINGLE * 16))
725 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
726 want_inline = false;
728 else if (!DECL_DECLARED_INLINE_P (callee->decl)
729 && !flag_inline_functions)
731 /* For functions not declared inline, if it has big_speedup
732 or has good hints for performance and the size growth is
733 small, they are profitable to inline. */
734 if (big_speedup
735 || (hints & (INLINE_HINT_indirect_call
736 | INLINE_HINT_loop_iterations
737 | INLINE_HINT_array_index
738 | INLINE_HINT_loop_stride)))
740 if (growth >= MAX_GROWTH_AUTO_INLINE_FUNC)
741 want_inline = false;
743 /* growth_likely_positive is expensive, always test it last. */
744 else if (growth >= MAX_INLINE_INSNS_SINGLE
745 || growth_likely_positive (callee, growth))
747 e->inline_failed = CIF_NOT_DECLARED_INLINED;
748 want_inline = false;
751 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
752 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
753 inlining given function is very profitable. */
754 else if (!DECL_DECLARED_INLINE_P (callee->decl)
755 && !big_speedup
756 && growth >= ((hints & (INLINE_HINT_indirect_call
757 | INLINE_HINT_loop_iterations
758 | INLINE_HINT_array_index
759 | INLINE_HINT_loop_stride))
760 ? MAX (MAX_INLINE_INSNS_AUTO,
761 MAX_INLINE_INSNS_SINGLE)
762 : MAX_INLINE_INSNS_AUTO))
764 /* growth_likely_positive is expensive, always test it last. */
765 if (growth >= MAX_INLINE_INSNS_SINGLE
766 || growth_likely_positive (callee, growth))
768 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
769 want_inline = false;
772 /* If call is cold, do not inline when function body would grow. */
773 else if (!edge_hot_enough_p (e)
774 && (growth >= MAX_INLINE_INSNS_SINGLE
775 || growth_likely_positive (callee, growth)))
777 e->inline_failed = CIF_UNLIKELY_CALL;
778 want_inline = false;
781 if (!want_inline && report)
782 report_inline_failed_reason (e);
783 return want_inline;
786 /* EDGE is self recursive edge.
787 We hand two cases - when function A is inlining into itself
788 or when function A is being inlined into another inliner copy of function
789 A within function B.
791 In first case OUTER_NODE points to the toplevel copy of A, while
792 in the second case OUTER_NODE points to the outermost copy of A in B.
794 In both cases we want to be extra selective since
795 inlining the call will just introduce new recursive calls to appear. */
797 static bool
798 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
799 struct cgraph_node *outer_node,
800 bool peeling,
801 int depth)
803 char const *reason = NULL;
804 bool want_inline = true;
805 int caller_freq = CGRAPH_FREQ_BASE;
806 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
808 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
809 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
811 if (!cgraph_maybe_hot_edge_p (edge))
813 reason = "recursive call is cold";
814 want_inline = false;
816 else if (max_count && !outer_node->count)
818 reason = "not executed in profile";
819 want_inline = false;
821 else if (depth > max_depth)
823 reason = "--param max-inline-recursive-depth exceeded.";
824 want_inline = false;
827 if (outer_node->global.inlined_to)
828 caller_freq = outer_node->callers->frequency;
830 if (!caller_freq)
832 reason = "function is inlined and unlikely";
833 want_inline = false;
836 if (!want_inline)
838 /* Inlining of self recursive function into copy of itself within other function
839 is transformation similar to loop peeling.
841 Peeling is profitable if we can inline enough copies to make probability
842 of actual call to the self recursive function very small. Be sure that
843 the probability of recursion is small.
845 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
846 This way the expected number of recision is at most max_depth. */
847 else if (peeling)
849 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
850 / max_depth);
851 int i;
852 for (i = 1; i < depth; i++)
853 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
854 if (max_count
855 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
856 >= max_prob))
858 reason = "profile of recursive call is too large";
859 want_inline = false;
861 if (!max_count
862 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
863 >= max_prob))
865 reason = "frequency of recursive call is too large";
866 want_inline = false;
869 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
870 depth is large. We reduce function call overhead and increase chances that
871 things fit in hardware return predictor.
873 Recursive inlining might however increase cost of stack frame setup
874 actually slowing down functions whose recursion tree is wide rather than
875 deep.
877 Deciding reliably on when to do recursive inlining without profile feedback
878 is tricky. For now we disable recursive inlining when probability of self
879 recursion is low.
881 Recursive inlining of self recursive call within loop also results in large loop
882 depths that generally optimize badly. We may want to throttle down inlining
883 in those cases. In particular this seems to happen in one of libstdc++ rb tree
884 methods. */
885 else
887 if (max_count
888 && (edge->count * 100 / outer_node->count
889 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
891 reason = "profile of recursive call is too small";
892 want_inline = false;
894 else if (!max_count
895 && (edge->frequency * 100 / caller_freq
896 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
898 reason = "frequency of recursive call is too small";
899 want_inline = false;
902 if (!want_inline && dump_file)
903 fprintf (dump_file, " not inlining recursively: %s\n", reason);
904 return want_inline;
907 /* Return true when NODE has uninlinable caller;
908 set HAS_HOT_CALL if it has hot call.
909 Worker for cgraph_for_node_and_aliases. */
911 static bool
912 check_callers (struct cgraph_node *node, void *has_hot_call)
914 struct cgraph_edge *e;
915 for (e = node->callers; e; e = e->next_caller)
917 if (!can_inline_edge_p (e, true))
918 return true;
919 if (!(*(bool *)has_hot_call) && cgraph_maybe_hot_edge_p (e))
920 *(bool *)has_hot_call = true;
922 return false;
925 /* If NODE has a caller, return true. */
927 static bool
928 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
930 if (node->callers)
931 return true;
932 return false;
935 /* Decide if inlining NODE would reduce unit size by eliminating
936 the offline copy of function.
937 When COLD is true the cold calls are considered, too. */
939 static bool
940 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
942 struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
943 bool has_hot_call = false;
945 /* Does it have callers? */
946 if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
947 return false;
948 /* Already inlined? */
949 if (function->global.inlined_to)
950 return false;
951 if (cgraph_function_or_thunk_node (node, NULL) != node)
952 return false;
953 /* Inlining into all callers would increase size? */
954 if (estimate_growth (node) > 0)
955 return false;
956 /* All inlines must be possible. */
957 if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
958 return false;
959 if (!cold && !has_hot_call)
960 return false;
961 return true;
964 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
966 /* Return true if FUNCDECL is a function with fixed
967 argument list. */
969 static bool
970 fixed_arg_function_p (tree fndecl)
972 tree fntype = TREE_TYPE (fndecl);
973 return (TYPE_ARG_TYPES (fntype) == 0
974 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
975 == void_type_node));
978 /* For profile collection with flag_dyn_ipa (LIPO), we always
979 want to inline comdat functions for the following reasons:
980 1) Functions in comdat may be actually defined in a different
981 module (depending on how linker picks). This results in a edge
982 from one module to another module in the dynamic callgraph.
983 The edge is false and result in unnecessary module grouping.
984 2) The profile counters in comdat functions are not 'comdated'
985 -- which means each copy of the same comdat function has its
986 own set of counters. With inlining, we are actually splitting
987 the counters and make the profile information 'context sensitive',
988 which is a good thing.
989 3) During profile-use pass of LIPO (flag_dyn_ipa == 1),
990 the pre-tree_profile inline decisions have to be the same as the
991 profile-gen pass (otherwise coverage mismatch will occur). Due to
992 this reason, it is better for each module to 'use' the comdat copy
993 of its own. The only way to get profile data for the copy is to
994 inline the copy in profile-gen phase.
995 TODO: For indirectly called comdat functions, the above issues
996 still exist. */
998 static bool
999 better_inline_comdat_function_p (struct cgraph_node *node)
1001 return (profile_arc_flag && flag_dyn_ipa
1002 && DECL_COMDAT (node->decl)
1003 && inline_summary (node)->size
1004 <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE)
1005 && fixed_arg_function_p (node->decl));
1009 /* Return relative time improvement for inlining EDGE in range
1010 1...RELATIVE_TIME_BENEFIT_RANGE */
1012 static inline int
1013 relative_time_benefit (struct inline_summary *callee_info,
1014 struct cgraph_edge *edge,
1015 int edge_time)
1017 gcov_type relbenefit;
1018 gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
1019 gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
1021 /* Inlining into extern inline function is not a win. */
1022 if (DECL_EXTERNAL (edge->caller->global.inlined_to
1023 ? edge->caller->global.inlined_to->decl
1024 : edge->caller->decl))
1025 return 1;
1027 /* Watch overflows. */
1028 gcc_checking_assert (uninlined_call_time >= 0);
1029 gcc_checking_assert (inlined_call_time >= 0);
1030 gcc_checking_assert (uninlined_call_time >= inlined_call_time);
1032 /* Compute relative time benefit, i.e. how much the call becomes faster.
1033 ??? perhaps computing how much the caller+calle together become faster
1034 would lead to more realistic results. */
1035 if (!uninlined_call_time)
1036 uninlined_call_time = 1;
1037 relbenefit =
1038 RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
1039 uninlined_call_time);
1040 relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
1041 gcc_checking_assert (relbenefit >= 0);
1042 relbenefit = MAX (relbenefit, 1);
1043 return relbenefit;
1047 /* A cost model driving the inlining heuristics in a way so the edges with
1048 smallest badness are inlined first. After each inlining is performed
1049 the costs of all caller edges of nodes affected are recomputed so the
1050 metrics may accurately depend on values such as number of inlinable callers
1051 of the function or function body size. */
1053 static int
1054 edge_badness (struct cgraph_edge *edge, bool dump)
1056 gcov_type badness;
1057 int growth, edge_time;
1058 struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
1059 NULL);
1060 struct inline_summary *callee_info = inline_summary (callee);
1061 inline_hints hints;
1063 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1064 return INT_MIN;
1066 growth = estimate_edge_growth (edge);
1067 edge_time = estimate_edge_time (edge);
1068 hints = estimate_edge_hints (edge);
1069 gcc_checking_assert (edge_time >= 0);
1070 gcc_checking_assert (edge_time <= callee_info->time);
1071 gcc_checking_assert (growth <= callee_info->size);
1073 if (dump)
1075 fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n",
1076 xstrdup (edge->caller->name ()),
1077 edge->caller->order,
1078 xstrdup (callee->name ()),
1079 edge->callee->order);
1080 fprintf (dump_file, " size growth %i, time %i ",
1081 growth,
1082 edge_time);
1083 dump_inline_hints (dump_file, hints);
1084 if (big_speedup_p (edge))
1085 fprintf (dump_file, " big_speedup");
1086 fprintf (dump_file, "\n");
1089 /* Always prefer inlining saving code size. */
1090 if (growth <= 0)
1092 badness = INT_MIN / 2 + growth;
1093 if (dump)
1094 fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness,
1095 growth);
1098 /* When profiling is available, compute badness as:
1100 relative_edge_count * relative_time_benefit
1101 goodness = -------------------------------------------
1102 growth_f_caller
1103 badness = -goodness
1105 The fraction is upside down, because on edge counts and time beneits
1106 the bounds are known. Edge growth is essentially unlimited. */
1108 else if (max_count)
1110 sreal tmp, relbenefit_real, growth_real;
1111 int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
1112 /* Capping edge->count to max_count. edge->count can be larger than
1113 max_count if an inline adds new edges which increase max_count
1114 after max_count is computed. */
1115 gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
1117 sreal_init (&relbenefit_real, relbenefit, 0);
1118 sreal_init (&growth_real, growth, 0);
1120 /* relative_edge_count. */
1121 sreal_init (&tmp, edge_count, 0);
1122 sreal_div (&tmp, &tmp, &max_count_real);
1124 /* relative_time_benefit. */
1125 sreal_mul (&tmp, &tmp, &relbenefit_real);
1126 sreal_div (&tmp, &tmp, &max_relbenefit_real);
1128 /* growth_f_caller. */
1129 sreal_mul (&tmp, &tmp, &half_int_min_real);
1130 sreal_div (&tmp, &tmp, &growth_real);
1132 badness = -1 * sreal_to_int (&tmp);
1134 if (dump)
1136 fprintf (dump_file,
1137 " %i (relative %f): profile info. Relative count %f%s"
1138 " * Relative benefit %f\n",
1139 (int) badness, (double) badness / INT_MIN,
1140 (double) edge_count / max_count,
1141 edge->count > max_count ? " (capped to max_count)" : "",
1142 relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
1146 /* When function local profile is available. Compute badness as:
1148 relative_time_benefit
1149 goodness = ---------------------------------
1150 growth_of_caller * overall_growth
1152 badness = - goodness
1154 compensated by the inline hints.
1156 else if (flag_guess_branch_prob)
1158 badness = (relative_time_benefit (callee_info, edge, edge_time)
1159 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
1160 badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
1161 gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
1162 if ((hints & (INLINE_HINT_indirect_call
1163 | INLINE_HINT_loop_iterations
1164 | INLINE_HINT_array_index
1165 | INLINE_HINT_loop_stride))
1166 || callee_info->growth <= 0)
1167 badness *= 8;
1168 if (hints & (INLINE_HINT_same_scc))
1169 badness /= 16;
1170 else if (hints & (INLINE_HINT_in_scc))
1171 badness /= 8;
1172 else if (hints & (INLINE_HINT_cross_module))
1173 badness /= 2;
1174 gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
1175 if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
1176 badness *= 16;
1177 if (dump)
1179 fprintf (dump_file,
1180 " %i: guessed profile. frequency %f,"
1181 " benefit %f%%, time w/o inlining %i, time w inlining %i"
1182 " overall growth %i (current) %i (original)\n",
1183 (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
1184 relative_time_benefit (callee_info, edge, edge_time) * 100.0
1185 / RELATIVE_TIME_BENEFIT_RANGE,
1186 (int)compute_uninlined_call_time (callee_info, edge),
1187 (int)compute_inlined_call_time (edge, edge_time),
1188 estimate_growth (callee),
1189 callee_info->growth);
1192 /* When function local profile is not available or it does not give
1193 useful information (ie frequency is zero), base the cost on
1194 loop nest and overall size growth, so we optimize for overall number
1195 of functions fully inlined in program. */
1196 else
1198 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1199 badness = growth * 256;
1201 /* Decrease badness if call is nested. */
1202 if (badness > 0)
1203 badness >>= nest;
1204 else
1206 badness <<= nest;
1208 if (dump)
1209 fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
1210 nest);
1213 /* Ensure that we did not overflow in all the fixed point math above. */
1214 gcc_assert (badness >= INT_MIN);
1215 gcc_assert (badness <= INT_MAX - 1);
1216 /* Make recursive inlining happen always after other inlining is done. */
1217 if (cgraph_edge_recursive_p (edge))
1218 return badness + 1;
1219 else
1221 if (better_inline_comdat_function_p (edge->callee))
1222 return INT_MIN + 1;
1223 else
1224 return badness;
1228 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1229 static inline void
1230 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1232 int badness = edge_badness (edge, false);
1233 if (edge->aux)
1235 fibnode_t n = (fibnode_t) edge->aux;
1236 gcc_checking_assert (n->data == edge);
1238 /* fibheap_replace_key only decrease the keys.
1239 When we increase the key we do not update heap
1240 and instead re-insert the element once it becomes
1241 a minimum of heap. */
1242 if (badness < n->key)
1244 if (dump_file && (dump_flags & TDF_DETAILS))
1246 fprintf (dump_file,
1247 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
1248 xstrdup (edge->caller->name ()),
1249 edge->caller->order,
1250 xstrdup (edge->callee->name ()),
1251 edge->callee->order,
1252 (int)n->key,
1253 badness);
1255 fibheap_replace_key (heap, n, badness);
1256 gcc_checking_assert (n->key == badness);
1259 else
1261 if (dump_file && (dump_flags & TDF_DETAILS))
1263 fprintf (dump_file,
1264 " enqueuing call %s/%i -> %s/%i, badness %i\n",
1265 xstrdup (edge->caller->name ()),
1266 edge->caller->order,
1267 xstrdup (edge->callee->name ()),
1268 edge->callee->order,
1269 badness);
1271 edge->aux = fibheap_insert (heap, badness, edge);
1276 /* NODE was inlined.
1277 All caller edges needs to be resetted because
1278 size estimates change. Similarly callees needs reset
1279 because better context may be known. */
1281 static void
1282 reset_edge_caches (struct cgraph_node *node)
1284 struct cgraph_edge *edge;
1285 struct cgraph_edge *e = node->callees;
1286 struct cgraph_node *where = node;
1287 int i;
1288 struct ipa_ref *ref;
1290 if (where->global.inlined_to)
1291 where = where->global.inlined_to;
1293 /* WHERE body size has changed, the cached growth is invalid. */
1294 reset_node_growth_cache (where);
1296 for (edge = where->callers; edge; edge = edge->next_caller)
1297 if (edge->inline_failed)
1298 reset_edge_growth_cache (edge);
1299 for (i = 0; ipa_ref_list_referring_iterate (&where->ref_list,
1300 i, ref); i++)
1301 if (ref->use == IPA_REF_ALIAS)
1302 reset_edge_caches (ipa_ref_referring_node (ref));
1304 if (!e)
1305 return;
1307 while (true)
1308 if (!e->inline_failed && e->callee->callees)
1309 e = e->callee->callees;
1310 else
1312 if (e->inline_failed)
1313 reset_edge_growth_cache (e);
1314 if (e->next_callee)
1315 e = e->next_callee;
1316 else
1320 if (e->caller == node)
1321 return;
1322 e = e->caller->callers;
1324 while (!e->next_callee);
1325 e = e->next_callee;
1330 /* Recompute HEAP nodes for each of caller of NODE.
1331 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1332 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1333 it is inlinable. Otherwise check all edges. */
1335 static void
1336 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1337 bitmap updated_nodes,
1338 struct cgraph_edge *check_inlinablity_for)
1340 struct cgraph_edge *edge;
1341 int i;
1342 struct ipa_ref *ref;
1344 if ((!node->alias && !inline_summary (node)->inlinable)
1345 || node->global.inlined_to)
1346 return;
1347 if (!bitmap_set_bit (updated_nodes, node->uid))
1348 return;
1350 for (i = 0; ipa_ref_list_referring_iterate (&node->ref_list,
1351 i, ref); i++)
1352 if (ref->use == IPA_REF_ALIAS)
1354 struct cgraph_node *alias = ipa_ref_referring_node (ref);
1355 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1358 for (edge = node->callers; edge; edge = edge->next_caller)
1359 if (edge->inline_failed)
1361 if (!check_inlinablity_for
1362 || check_inlinablity_for == edge)
1364 if (can_inline_edge_p (edge, false)
1365 && (want_inline_small_function_p (edge, false)
1366 || better_inline_comdat_function_p (node)))
1367 update_edge_key (heap, edge);
1368 else if (edge->aux)
1370 report_inline_failed_reason (edge);
1371 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1372 edge->aux = NULL;
1375 else if (edge->aux)
1376 update_edge_key (heap, edge);
1380 /* Recompute HEAP nodes for each uninlined call in NODE.
1381 This is used when we know that edge badnesses are going only to increase
1382 (we introduced new call site) and thus all we need is to insert newly
1383 created edges into heap. */
1385 static void
1386 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1387 bitmap updated_nodes)
1389 struct cgraph_edge *e = node->callees;
1391 if (!e)
1392 return;
1393 while (true)
1394 if (!e->inline_failed && e->callee->callees)
1395 e = e->callee->callees;
1396 else
1398 enum availability avail;
1399 struct cgraph_node *callee;
1400 /* We do not reset callee growth cache here. Since we added a new call,
1401 growth chould have just increased and consequentely badness metric
1402 don't need updating. */
1403 if (e->inline_failed
1404 && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1405 && inline_summary (callee)->inlinable
1406 && avail >= AVAIL_AVAILABLE
1407 && !bitmap_bit_p (updated_nodes, callee->uid))
1409 if (can_inline_edge_p (e, false)
1410 && want_inline_small_function_p (e, false))
1411 update_edge_key (heap, e);
1412 else if (e->aux)
1414 report_inline_failed_reason (e);
1415 fibheap_delete_node (heap, (fibnode_t) e->aux);
1416 e->aux = NULL;
1419 if (e->next_callee)
1420 e = e->next_callee;
1421 else
1425 if (e->caller == node)
1426 return;
1427 e = e->caller->callers;
1429 while (!e->next_callee);
1430 e = e->next_callee;
1435 /* Enqueue all recursive calls from NODE into priority queue depending on
1436 how likely we want to recursively inline the call. */
1438 static void
1439 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1440 fibheap_t heap)
1442 struct cgraph_edge *e;
1443 enum availability avail;
1445 for (e = where->callees; e; e = e->next_callee)
1446 if (e->callee == node
1447 || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1448 && avail > AVAIL_OVERWRITABLE))
1450 /* When profile feedback is available, prioritize by expected number
1451 of calls. */
1452 fibheap_insert (heap,
1453 !max_count ? -e->frequency
1454 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1457 for (e = where->callees; e; e = e->next_callee)
1458 if (!e->inline_failed)
1459 lookup_recursive_calls (node, e->callee, heap);
1462 /* Decide on recursive inlining: in the case function has recursive calls,
1463 inline until body size reaches given argument. If any new indirect edges
1464 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1465 is NULL. */
1467 static bool
1468 recursive_inlining (struct cgraph_edge *edge,
1469 vec<cgraph_edge_p> *new_edges)
1471 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1472 int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
1473 fibheap_t heap;
1474 struct cgraph_node *node;
1475 struct cgraph_edge *e;
1476 struct cgraph_node *master_clone = NULL, *next;
1477 int depth = 0;
1478 int n = 0;
1480 node = edge->caller;
1481 if (node->global.inlined_to)
1482 node = node->global.inlined_to;
1484 if (DECL_DECLARED_INLINE_P (node->decl))
1485 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1487 /* Make sure that function is small enough to be considered for inlining. */
1488 if (estimate_size_after_inlining (node, edge) >= limit)
1489 return false;
1490 heap = fibheap_new ();
1491 lookup_recursive_calls (node, node, heap);
1492 if (fibheap_empty (heap))
1494 fibheap_delete (heap);
1495 return false;
1498 if (dump_file)
1499 fprintf (dump_file,
1500 " Performing recursive inlining on %s\n",
1501 node->name ());
1503 /* Do the inlining and update list of recursive call during process. */
1504 while (!fibheap_empty (heap))
1506 struct cgraph_edge *curr
1507 = (struct cgraph_edge *) fibheap_extract_min (heap);
1508 struct cgraph_node *cnode, *dest = curr->callee;
1510 if (!can_inline_edge_p (curr, true))
1511 continue;
1513 /* MASTER_CLONE is produced in the case we already started modified
1514 the function. Be sure to redirect edge to the original body before
1515 estimating growths otherwise we will be seeing growths after inlining
1516 the already modified body. */
1517 if (master_clone)
1519 cgraph_redirect_edge_callee (curr, master_clone);
1520 reset_edge_growth_cache (curr);
1523 if (estimate_size_after_inlining (node, curr) > limit)
1525 cgraph_redirect_edge_callee (curr, dest);
1526 reset_edge_growth_cache (curr);
1527 break;
1530 depth = 1;
1531 for (cnode = curr->caller;
1532 cnode->global.inlined_to; cnode = cnode->callers->caller)
1533 if (node->decl
1534 == cgraph_function_or_thunk_node (curr->callee, NULL)->decl)
1535 depth++;
1537 if (max_count)
1539 if (!cgraph_maybe_hot_edge_p (curr))
1541 if (dump_file)
1542 fprintf (dump_file, " Not inlining cold call\n");
1544 cgraph_redirect_edge_callee (curr, dest);
1545 reset_edge_growth_cache (curr);
1546 continue;
1548 if (node->count == 0 || curr->count * 100 / node->count < probability)
1550 if (dump_file)
1551 fprintf (dump_file,
1552 " Probability of edge is too small\n");
1554 cgraph_redirect_edge_callee (curr, dest);
1555 reset_edge_growth_cache (curr);
1556 continue;
1560 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1562 cgraph_redirect_edge_callee (curr, dest);
1563 reset_edge_growth_cache (curr);
1564 continue;
1567 if (!dbg_cnt (inl))
1568 continue;
1570 if (dump_file)
1572 fprintf (dump_file,
1573 " Inlining call of depth %i", depth);
1574 if (node->count)
1576 fprintf (dump_file, " called approx. %.2f times per call",
1577 (double)curr->count / node->count);
1579 fprintf (dump_file, "\n");
1581 if (!master_clone)
1583 /* We need original clone to copy around. */
1584 master_clone = cgraph_clone_node (node, node->decl,
1585 node->count, CGRAPH_FREQ_BASE,
1586 false, vNULL, true, NULL, NULL);
1587 for (e = master_clone->callees; e; e = e->next_callee)
1588 if (!e->inline_failed)
1589 clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1590 cgraph_redirect_edge_callee (curr, master_clone);
1591 reset_edge_growth_cache (curr);
1594 inline_call (curr, false, new_edges, &overall_size, true);
1595 lookup_recursive_calls (node, curr->callee, heap);
1596 n++;
1599 if (!fibheap_empty (heap) && dump_file)
1600 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1601 fibheap_delete (heap);
1603 if (!master_clone)
1604 return false;
1606 if (dump_file)
1607 fprintf (dump_file,
1608 "\n Inlined %i times, "
1609 "body grown from size %i to %i, time %i to %i\n", n,
1610 inline_summary (master_clone)->size, inline_summary (node)->size,
1611 inline_summary (master_clone)->time, inline_summary (node)->time);
1613 /* Remove master clone we used for inlining. We rely that clones inlined
1614 into master clone gets queued just before master clone so we don't
1615 need recursion. */
1616 for (node = cgraph_first_function (); node != master_clone;
1617 node = next)
1619 next = cgraph_next_function (node);
1620 if (node->global.inlined_to == master_clone)
1621 cgraph_remove_node (node);
1623 cgraph_remove_node (master_clone);
1624 return true;
1628 /* Given whole compilation unit estimate of INSNS, compute how large we can
1629 allow the unit to grow. */
1631 static int
1632 compute_max_insns (int insns)
1634 int max_insns = insns;
1635 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1636 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1638 return ((HOST_WIDEST_INT) max_insns
1639 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1643 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1645 static void
1646 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1648 while (new_edges.length () > 0)
1650 struct cgraph_edge *edge = new_edges.pop ();
1652 gcc_assert (!edge->aux);
1653 if (edge->inline_failed
1654 && can_inline_edge_p (edge, true)
1655 && want_inline_small_function_p (edge, true))
1656 edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1660 /* Remove EDGE from the fibheap. */
1662 static void
1663 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1665 if (e->callee)
1666 reset_node_growth_cache (e->callee);
1667 if (e->aux)
1669 fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1670 e->aux = NULL;
1674 /* Return true if speculation of edge E seems useful.
1675 If ANTICIPATE_INLINING is true, be conservative and hope that E
1676 may get inlined. */
1678 bool
1679 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1681 enum availability avail;
1682 struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1683 struct cgraph_edge *direct, *indirect;
1684 struct ipa_ref *ref;
1686 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1688 if (!cgraph_maybe_hot_edge_p (e))
1689 return false;
1691 /* See if IP optimizations found something potentially useful about the
1692 function. For now we look only for CONST/PURE flags. Almost everything
1693 else we propagate is useless. */
1694 if (avail >= AVAIL_AVAILABLE)
1696 int ecf_flags = flags_from_decl_or_type (target->decl);
1697 if (ecf_flags & ECF_CONST)
1699 cgraph_speculative_call_info (e, direct, indirect, ref);
1700 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1701 return true;
1703 else if (ecf_flags & ECF_PURE)
1705 cgraph_speculative_call_info (e, direct, indirect, ref);
1706 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1707 return true;
1710 /* If we did not managed to inline the function nor redirect
1711 to an ipa-cp clone (that are seen by having local flag set),
1712 it is probably pointless to inline it unless hardware is missing
1713 indirect call predictor. */
1714 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1715 return false;
1716 /* For overwritable targets there is not much to do. */
1717 if (e->inline_failed && !can_inline_edge_p (e, false, true))
1718 return false;
1719 /* OK, speculation seems interesting. */
1720 return true;
1723 /* We know that EDGE is not going to be inlined.
1724 See if we can remove speculation. */
1726 static void
1727 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1729 if (edge->speculative && !speculation_useful_p (edge, false))
1731 struct cgraph_node *node = edge->caller;
1732 struct cgraph_node *where = node->global.inlined_to
1733 ? node->global.inlined_to : node;
1734 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1736 cgraph_resolve_speculation (edge, NULL);
1737 reset_edge_caches (where);
1738 inline_update_overall_summary (where);
1739 update_caller_keys (edge_heap, where,
1740 updated_nodes, NULL);
1741 update_callee_keys (edge_heap, where,
1742 updated_nodes);
1743 BITMAP_FREE (updated_nodes);
1747 /* We use greedy algorithm for inlining of small functions:
1748 All inline candidates are put into prioritized heap ordered in
1749 increasing badness.
1751 The inlining of small functions is bounded by unit growth parameters. */
1753 static void
1754 inline_small_functions (void)
1756 struct cgraph_node *node;
1757 struct cgraph_edge *edge;
1758 fibheap_t edge_heap = fibheap_new ();
1759 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1760 int min_size, max_size;
1761 auto_vec<cgraph_edge_p> new_indirect_edges;
1762 int initial_size = 0;
1763 struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1764 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1766 is_in_ipa_inline = true;
1768 if (flag_indirect_inlining)
1769 new_indirect_edges.create (8);
1771 edge_removal_hook_holder
1772 = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1774 /* Compute overall unit size and other global parameters used by badness
1775 metrics. */
1777 max_count = 0;
1778 ipa_reduced_postorder (order, true, true, NULL);
1779 free (order);
1781 FOR_EACH_DEFINED_FUNCTION (node)
1782 if (!node->global.inlined_to)
1784 if (cgraph_function_with_gimple_body_p (node)
1785 || node->thunk.thunk_p)
1787 struct inline_summary *info = inline_summary (node);
1788 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1790 if (!DECL_EXTERNAL (node->decl))
1791 initial_size += info->size;
1792 info->growth = estimate_growth (node);
1793 if (dfs && dfs->next_cycle)
1795 struct cgraph_node *n2;
1796 int id = dfs->scc_no + 1;
1797 for (n2 = node; n2;
1798 n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1800 struct inline_summary *info2 = inline_summary (n2);
1801 if (info2->scc_no)
1802 break;
1803 info2->scc_no = id;
1808 for (edge = node->callers; edge; edge = edge->next_caller)
1809 if (max_count < edge->count)
1810 max_count = edge->count;
1811 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1812 if (max_count < edge->count)
1813 max_count = edge->count;
1815 sreal_init (&max_count_real, max_count, 0);
1816 sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1817 sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1818 ipa_free_postorder_info ();
1819 initialize_growth_caches ();
1821 if (dump_file)
1822 fprintf (dump_file,
1823 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1824 initial_size);
1826 overall_size = initial_size;
1827 max_size = compute_max_insns (overall_size);
1828 min_size = overall_size;
1830 /* Populate the heap with all edges we might inline. */
1832 FOR_EACH_DEFINED_FUNCTION (node)
1834 bool update = false;
1835 struct cgraph_edge *next;
1837 if (dump_file)
1838 fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1839 node->name (), node->order);
1841 for (edge = node->callees; edge; edge = next)
1843 next = edge->next_callee;
1844 if (edge->inline_failed
1845 && !edge->aux
1846 && can_inline_edge_p (edge, true)
1847 && (want_inline_small_function_p (edge, true)
1848 || better_inline_comdat_function_p (node))
1849 && edge->inline_failed)
1851 gcc_assert (!edge->aux);
1852 update_edge_key (edge_heap, edge);
1854 if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1856 cgraph_resolve_speculation (edge, NULL);
1857 update = true;
1860 if (update)
1862 struct cgraph_node *where = node->global.inlined_to
1863 ? node->global.inlined_to : node;
1864 inline_update_overall_summary (where);
1865 reset_node_growth_cache (where);
1866 reset_edge_caches (where);
1867 update_caller_keys (edge_heap, where,
1868 updated_nodes, NULL);
1869 bitmap_clear (updated_nodes);
1873 gcc_assert (in_lto_p
1874 || !max_count
1875 || (profile_info && flag_branch_probabilities));
1877 while (!fibheap_empty (edge_heap))
1879 int old_size = overall_size;
1880 struct cgraph_node *where, *callee;
1881 int badness = fibheap_min_key (edge_heap);
1882 int current_badness;
1883 int cached_badness;
1884 int growth;
1886 edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1887 gcc_assert (edge->aux);
1888 edge->aux = NULL;
1889 if (!edge->inline_failed || !edge->callee->analyzed)
1890 continue;
1892 if (L_IPO_COMP_MODE && !edge->call_stmt)
1893 continue;
1895 /* Be sure that caches are maintained consistent.
1896 We can not make this ENABLE_CHECKING only because it cause different
1897 updates of the fibheap queue. */
1898 cached_badness = edge_badness (edge, false);
1899 reset_edge_growth_cache (edge);
1900 reset_node_growth_cache (edge->callee);
1902 /* When updating the edge costs, we only decrease badness in the keys.
1903 Increases of badness are handled lazilly; when we see key with out
1904 of date value on it, we re-insert it now. */
1905 current_badness = edge_badness (edge, false);
1906 gcc_assert (cached_badness == current_badness);
1907 gcc_assert (current_badness >= badness);
1908 if (current_badness != badness)
1910 edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1911 continue;
1914 if (!can_inline_edge_p (edge, true))
1916 resolve_noninline_speculation (edge_heap, edge);
1917 continue;
1920 callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1921 growth = estimate_edge_growth (edge);
1922 if (dump_file)
1924 fprintf (dump_file,
1925 "\nConsidering %s/%i with %i size\n",
1926 callee->name (), callee->order,
1927 inline_summary (callee)->size);
1928 fprintf (dump_file,
1929 " to be inlined into %s/%i in %s:%i\n"
1930 " Estimated badness is %i, frequency %.2f.\n",
1931 edge->caller->name (), edge->caller->order,
1932 flag_wpa ? "unknown"
1933 : gimple_filename ((const_gimple) edge->call_stmt),
1934 flag_wpa ? -1
1935 : gimple_lineno ((const_gimple) edge->call_stmt),
1936 badness,
1937 edge->frequency / (double)CGRAPH_FREQ_BASE);
1938 if (edge->count)
1939 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1940 edge->count);
1941 if (dump_flags & TDF_DETAILS)
1942 edge_badness (edge, true);
1945 if (overall_size + growth > max_size
1946 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1948 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1949 report_inline_failed_reason (edge);
1950 resolve_noninline_speculation (edge_heap, edge);
1951 continue;
1954 if (!want_inline_small_function_p (edge, true)
1955 && !better_inline_comdat_function_p (edge->callee))
1957 resolve_noninline_speculation (edge_heap, edge);
1958 continue;
1961 if (!dbg_cnt (inl))
1962 continue;
1964 /* Heuristics for inlining small functions work poorly for
1965 recursive calls where we do effects similar to loop unrolling.
1966 When inlining such edge seems profitable, leave decision on
1967 specific inliner. */
1968 if (cgraph_edge_recursive_p (edge))
1970 where = edge->caller;
1971 if (where->global.inlined_to)
1972 where = where->global.inlined_to;
1973 if (!recursive_inlining (edge,
1974 flag_indirect_inlining
1975 ? &new_indirect_edges : NULL))
1977 edge->inline_failed = CIF_RECURSIVE_INLINING;
1978 resolve_noninline_speculation (edge_heap, edge);
1979 continue;
1981 reset_edge_caches (where);
1982 /* Recursive inliner inlines all recursive calls of the function
1983 at once. Consequently we need to update all callee keys. */
1984 if (flag_indirect_inlining)
1985 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1986 update_callee_keys (edge_heap, where, updated_nodes);
1987 bitmap_clear (updated_nodes);
1989 else
1991 struct cgraph_node *outer_node = NULL;
1992 int depth = 0;
1994 if (!dbg_cnt (inl))
1995 continue;
1997 /* Consider the case where self recursive function A is inlined
1998 into B. This is desired optimization in some cases, since it
1999 leads to effect similar of loop peeling and we might completely
2000 optimize out the recursive call. However we must be extra
2001 selective. */
2003 where = edge->caller;
2004 while (where->global.inlined_to)
2006 if (where->decl == callee->decl)
2007 outer_node = where, depth++;
2008 where = where->callers->caller;
2010 if (outer_node
2011 && !want_inline_self_recursive_call_p (edge, outer_node,
2012 true, depth))
2014 edge->inline_failed
2015 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2016 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2017 resolve_noninline_speculation (edge_heap, edge);
2018 continue;
2020 else if (depth && dump_file)
2021 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2023 gcc_checking_assert (!callee->global.inlined_to);
2024 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2025 if (flag_indirect_inlining)
2026 add_new_edges_to_heap (edge_heap, new_indirect_edges);
2028 reset_edge_caches (edge->callee);
2029 reset_node_growth_cache (callee);
2031 update_callee_keys (edge_heap, where, updated_nodes);
2033 where = edge->caller;
2034 if (where->global.inlined_to)
2035 where = where->global.inlined_to;
2037 /* Our profitability metric can depend on local properties
2038 such as number of inlinable calls and size of the function body.
2039 After inlining these properties might change for the function we
2040 inlined into (since it's body size changed) and for the functions
2041 called by function we inlined (since number of it inlinable callers
2042 might change). */
2043 update_caller_keys (edge_heap, where, updated_nodes, NULL);
2044 bitmap_clear (updated_nodes);
2046 if (dump_file)
2048 fprintf (dump_file,
2049 "INFO: %s Inlined into %s which now has time %i and size %i,"
2050 "net change of %+i.\n",
2051 edge->callee->name (),
2052 edge->caller->name (),
2053 inline_summary (edge->caller)->time,
2054 inline_summary (edge->caller)->size,
2055 overall_size - old_size);
2057 if (min_size > overall_size)
2059 min_size = overall_size;
2060 max_size = compute_max_insns (min_size);
2062 if (dump_file)
2063 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2067 free_growth_caches ();
2068 fibheap_delete (edge_heap);
2069 if (dump_file)
2070 fprintf (dump_file,
2071 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2072 initial_size, overall_size,
2073 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2074 BITMAP_FREE (updated_nodes);
2075 cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
2078 /* Flatten NODE. Performed both during early inlining and
2079 at IPA inlining time. */
2081 static void
2082 flatten_function (struct cgraph_node *node, bool early)
2084 struct cgraph_edge *e;
2086 /* We shouldn't be called recursively when we are being processed. */
2087 gcc_assert (node->aux == NULL);
2089 node->aux = (void *) node;
2091 for (e = node->callees; e; e = e->next_callee)
2093 struct cgraph_node *orig_callee;
2094 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2096 /* We've hit cycle? It is time to give up. */
2097 if (callee->aux)
2099 if (dump_file)
2100 fprintf (dump_file,
2101 "Not inlining %s into %s to avoid cycle.\n",
2102 xstrdup (callee->name ()),
2103 xstrdup (e->caller->name ()));
2104 e->inline_failed = CIF_RECURSIVE_INLINING;
2105 continue;
2108 /* When the edge is already inlined, we just need to recurse into
2109 it in order to fully flatten the leaves. */
2110 if (!e->inline_failed)
2112 flatten_function (callee, early);
2113 continue;
2116 /* Flatten attribute needs to be processed during late inlining. For
2117 extra code quality we however do flattening during early optimization,
2118 too. */
2119 if (!early
2120 ? !can_inline_edge_p (e, true)
2121 : !can_early_inline_edge_p (e))
2122 continue;
2124 if (cgraph_edge_recursive_p (e))
2126 if (dump_file)
2127 fprintf (dump_file, "Not inlining: recursive call.\n");
2128 continue;
2131 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2132 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2134 if (dump_file)
2135 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2136 continue;
2139 /* Inline the edge and flatten the inline clone. Avoid
2140 recursing through the original node if the node was cloned. */
2141 if (dump_file)
2142 fprintf (dump_file, " Inlining %s into %s.\n",
2143 xstrdup (callee->name ()),
2144 xstrdup (e->caller->name ()));
2145 orig_callee = callee;
2146 inline_call (e, true, NULL, NULL, false);
2147 if (e->callee != orig_callee)
2148 orig_callee->aux = (void *) node;
2149 flatten_function (e->callee, early);
2150 if (e->callee != orig_callee)
2151 orig_callee->aux = NULL;
2154 node->aux = NULL;
2155 if (!node->global.inlined_to)
2156 inline_update_overall_summary (node);
2159 /* Count number of callers of NODE and store it into DATA (that
2160 points to int. Worker for cgraph_for_node_and_aliases. */
2162 static bool
2163 sum_callers (struct cgraph_node *node, void *data)
2165 struct cgraph_edge *e;
2166 int *num_calls = (int *)data;
2168 for (e = node->callers; e; e = e->next_caller)
2169 (*num_calls)++;
2170 return false;
2173 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2174 DATA points to number of calls originally found so we avoid infinite
2175 recursion. */
2177 static bool
2178 inline_to_all_callers (struct cgraph_node *node, void *data)
2180 int *num_calls = (int *)data;
2181 bool callee_removed = false;
2183 while (node->callers && !node->global.inlined_to)
2185 struct cgraph_node *caller = node->callers->caller;
2187 if (dump_file)
2189 fprintf (dump_file,
2190 "\nInlining %s size %i.\n",
2191 node->name (),
2192 inline_summary (node)->size);
2193 fprintf (dump_file,
2194 " Called once from %s %i insns.\n",
2195 node->callers->caller->name (),
2196 inline_summary (node->callers->caller)->size);
2199 inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
2200 if (dump_file)
2201 fprintf (dump_file,
2202 " Inlined into %s which now has %i size\n",
2203 caller->name (),
2204 inline_summary (caller)->size);
2205 if (!(*num_calls)--)
2207 if (dump_file)
2208 fprintf (dump_file, "New calls found; giving up.\n");
2209 return callee_removed;
2211 if (callee_removed)
2212 return true;
2214 return false;
2217 /* Decide on the inlining. We do so in the topological order to avoid
2218 expenses on updating data structures. */
2220 static unsigned int
2221 ipa_inline (void)
2223 struct cgraph_node *node;
2224 int nnodes;
2225 struct cgraph_node **order;
2226 int i;
2227 int cold;
2228 bool remove_functions = false;
2230 if (!optimize)
2231 return 0;
2233 order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
2235 if (in_lto_p && optimize)
2236 ipa_update_after_lto_read ();
2238 if (dump_file)
2239 dump_inline_summaries (dump_file);
2241 nnodes = ipa_reverse_postorder (order);
2243 FOR_EACH_FUNCTION (node)
2244 node->aux = 0;
2246 if (dump_file)
2247 fprintf (dump_file, "\nFlattening functions:\n");
2249 /* In the first pass handle functions to be flattened. Do this with
2250 a priority so none of our later choices will make this impossible. */
2251 for (i = nnodes - 1; i >= 0; i--)
2253 node = order[i];
2255 /* Handle nodes to be flattened.
2256 Ideally when processing callees we stop inlining at the
2257 entry of cycles, possibly cloning that entry point and
2258 try to flatten itself turning it into a self-recursive
2259 function. */
2260 if (lookup_attribute ("flatten",
2261 DECL_ATTRIBUTES (node->decl)) != NULL)
2263 if (dump_file)
2264 fprintf (dump_file,
2265 "Flattening %s\n", node->name ());
2266 flatten_function (node, false);
2270 inline_small_functions ();
2272 /* Do first after-inlining removal. We want to remove all "stale" extern inline
2273 functions and virtual functions so we really know what is called once. */
2274 symtab_remove_unreachable_nodes (false, dump_file);
2275 free (order);
2277 /* Inline functions with a property that after inlining into all callers the
2278 code size will shrink because the out-of-line copy is eliminated.
2279 We do this regardless on the callee size as long as function growth limits
2280 are met. */
2281 if (dump_file)
2282 fprintf (dump_file,
2283 "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2285 /* Inlining one function called once has good chance of preventing
2286 inlining other function into the same callee. Ideally we should
2287 work in priority order, but probably inlining hot functions first
2288 is good cut without the extra pain of maintaining the queue.
2290 ??? this is not really fitting the bill perfectly: inlining function
2291 into callee often leads to better optimization of callee due to
2292 increased context for optimization.
2293 For example if main() function calls a function that outputs help
2294 and then function that does the main optmization, we should inline
2295 the second with priority even if both calls are cold by themselves.
2297 We probably want to implement new predicate replacing our use of
2298 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2299 to be hot. */
2300 for (cold = 0; cold <= 1; cold ++)
2302 FOR_EACH_DEFINED_FUNCTION (node)
2304 struct cgraph_edge *edge, *next;
2305 bool update=false;
2307 for (edge = node->callees; edge; edge = next)
2309 next = edge->next_callee;
2310 if (edge->speculative && !speculation_useful_p (edge, false))
2312 cgraph_resolve_speculation (edge, NULL);
2313 update = true;
2314 remove_functions = true;
2317 if (update)
2319 struct cgraph_node *where = node->global.inlined_to
2320 ? node->global.inlined_to : node;
2321 reset_node_growth_cache (where);
2322 reset_edge_caches (where);
2323 inline_update_overall_summary (where);
2325 if (flag_inline_functions_called_once
2326 && want_inline_function_to_all_callers_p (node, cold))
2328 int num_calls = 0;
2329 cgraph_for_node_and_aliases (node, sum_callers,
2330 &num_calls, true);
2331 while (cgraph_for_node_and_aliases (node, inline_to_all_callers,
2332 &num_calls, true))
2334 remove_functions = true;
2339 /* Free ipa-prop structures if they are no longer needed. */
2340 if (optimize)
2341 ipa_free_all_structures_after_iinln ();
2343 if (dump_file)
2344 fprintf (dump_file,
2345 "\nInlined %i calls, eliminated %i functions\n\n",
2346 ncalls_inlined, nfunctions_inlined);
2348 if (dump_file)
2349 dump_inline_summaries (dump_file);
2350 /* In WPA we use inline summaries for partitioning process. */
2351 if (!flag_wpa)
2352 inline_free_summary ();
2353 return remove_functions ? TODO_remove_functions : 0;
2356 /* Inline always-inline function calls in NODE. */
2358 static bool
2359 inline_always_inline_functions (struct cgraph_node *node)
2361 struct cgraph_edge *e;
2362 bool inlined = false;
2364 for (e = node->callees; e; e = e->next_callee)
2366 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2367 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2368 continue;
2370 if (cgraph_edge_recursive_p (e))
2372 if (dump_file)
2373 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2374 e->callee->name ());
2375 e->inline_failed = CIF_RECURSIVE_INLINING;
2376 continue;
2379 if (!can_early_inline_edge_p (e))
2381 /* Set inlined to true if the callee is marked "always_inline" but
2382 is not inlinable. This will allow flagging an error later in
2383 expand_call_inline in tree-inline.c. */
2384 if (lookup_attribute ("always_inline",
2385 DECL_ATTRIBUTES (callee->decl)) != NULL)
2386 inlined = true;
2387 continue;
2390 if (dump_file)
2391 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2392 xstrdup (e->callee->name ()),
2393 xstrdup (e->caller->name ()));
2394 inline_call (e, true, NULL, NULL, false);
2395 inlined = true;
2397 if (inlined)
2398 inline_update_overall_summary (node);
2400 return inlined;
2403 /* Decide on the inlining. We do so in the topological order to avoid
2404 expenses on updating data structures. */
2406 static bool
2407 early_inline_small_functions (struct cgraph_node *node)
2409 struct cgraph_edge *e;
2410 bool inlined = false;
2412 for (e = node->callees; e; e = e->next_callee)
2414 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2415 if (!inline_summary (callee)->inlinable
2416 || !e->inline_failed)
2417 continue;
2419 /* Do not consider functions not declared inline. */
2420 if (!DECL_DECLARED_INLINE_P (callee->decl)
2421 && !flag_inline_small_functions
2422 && !flag_inline_functions)
2423 continue;
2425 if (dump_file)
2426 fprintf (dump_file, "Considering inline candidate %s.\n",
2427 callee->name ());
2429 if (!can_early_inline_edge_p (e))
2430 continue;
2432 if (cgraph_edge_recursive_p (e))
2435 if (dump_file)
2436 fprintf (dump_file, " Not inlining: recursive call.\n");
2437 continue;
2440 if (!want_early_inline_function_p (e))
2441 continue;
2443 if (dump_file)
2444 fprintf (dump_file, " Inlining %s into %s.\n",
2445 xstrdup (callee->name ()),
2446 xstrdup (e->caller->name ()));
2447 inline_call (e, true, NULL, NULL, true);
2448 inlined = true;
2451 return inlined;
2454 /* Do inlining of small functions. Doing so early helps profiling and other
2455 passes to be somewhat more effective and avoids some code duplication in
2456 later real inlining pass for testcases with very many function calls. */
2457 unsigned int
2458 early_inliner (void)
2460 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2461 struct cgraph_edge *edge;
2462 unsigned int todo = 0;
2463 int iterations = 0;
2464 bool inlined = false;
2466 if (seen_error ())
2467 return 0;
2469 /* Do nothing if datastructures for ipa-inliner are already computed. This
2470 happens when some pass decides to construct new function and
2471 cgraph_add_new_function calls lowering passes and early optimization on
2472 it. This may confuse ourself when early inliner decide to inline call to
2473 function clone, because function clones don't have parameter list in
2474 ipa-prop matching their signature. */
2475 if (ipa_node_params_vector.exists ())
2476 return 0;
2478 #ifdef ENABLE_CHECKING
2479 verify_cgraph_node (node);
2480 #endif
2481 ipa_remove_all_references (&node->ref_list);
2483 /* Even when not optimizing or not inlining inline always-inline
2484 functions. */
2485 inlined = inline_always_inline_functions (node);
2487 if (!optimize
2488 || flag_no_inline
2489 || !flag_early_inlining
2490 /* Never inline regular functions into always-inline functions
2491 during incremental inlining. This sucks as functions calling
2492 always inline functions will get less optimized, but at the
2493 same time inlining of functions calling always inline
2494 function into an always inline function might introduce
2495 cycles of edges to be always inlined in the callgraph.
2497 We might want to be smarter and just avoid this type of inlining. */
2498 || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2500 else if (lookup_attribute ("flatten",
2501 DECL_ATTRIBUTES (node->decl)) != NULL)
2503 /* When the function is marked to be flattened, recursively inline
2504 all calls in it. */
2505 if (dump_file)
2506 fprintf (dump_file,
2507 "Flattening %s\n", node->name ());
2508 flatten_function (node, true);
2509 inlined = true;
2511 else
2513 /* We iterate incremental inlining to get trivial cases of indirect
2514 inlining. */
2515 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2516 && early_inline_small_functions (node))
2518 timevar_push (TV_INTEGRATION);
2519 todo |= optimize_inline_calls (current_function_decl);
2521 /* Technically we ought to recompute inline parameters so the new
2522 iteration of early inliner works as expected. We however have
2523 values approximately right and thus we only need to update edge
2524 info that might be cleared out for newly discovered edges. */
2525 for (edge = node->callees; edge; edge = edge->next_callee)
2527 struct inline_edge_summary *es = inline_edge_summary (edge);
2529 if (!edge->call_stmt)
2530 continue;
2531 es->call_stmt_size
2532 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2533 es->call_stmt_time
2534 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2535 if (edge->callee->decl
2536 && !gimple_check_call_matching_types (
2537 edge->call_stmt, edge->callee->decl, false))
2538 edge->call_stmt_cannot_inline_p = true;
2540 if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2541 inline_update_overall_summary (node);
2542 timevar_pop (TV_INTEGRATION);
2543 iterations++;
2544 inlined = false;
2546 if (dump_file)
2547 fprintf (dump_file, "Iterations: %i\n", iterations);
2550 if (inlined)
2552 timevar_push (TV_INTEGRATION);
2553 todo |= optimize_inline_calls (current_function_decl);
2554 timevar_pop (TV_INTEGRATION);
2557 cfun->always_inline_functions_inlined = true;
2559 return todo;
2562 namespace {
2564 const pass_data pass_data_early_inline =
2566 GIMPLE_PASS, /* type */
2567 "einline", /* name */
2568 OPTGROUP_INLINE, /* optinfo_flags */
2569 false, /* has_gate */
2570 true, /* has_execute */
2571 TV_EARLY_INLINING, /* tv_id */
2572 PROP_ssa, /* properties_required */
2573 0, /* properties_provided */
2574 0, /* properties_destroyed */
2575 0, /* todo_flags_start */
2576 0, /* todo_flags_finish */
2579 class pass_early_inline : public gimple_opt_pass
2581 public:
2582 pass_early_inline (gcc::context *ctxt)
2583 : gimple_opt_pass (pass_data_early_inline, ctxt)
2586 /* opt_pass methods: */
2587 unsigned int execute () { return early_inliner (); }
2589 }; // class pass_early_inline
2591 } // anon namespace
2593 gimple_opt_pass *
2594 make_pass_early_inline (gcc::context *ctxt)
2596 return new pass_early_inline (ctxt);
2599 namespace {
2601 const pass_data pass_data_ipa_inline =
2603 IPA_PASS, /* type */
2604 "inline", /* name */
2605 OPTGROUP_INLINE, /* optinfo_flags */
2606 false, /* has_gate */
2607 true, /* has_execute */
2608 TV_IPA_INLINING, /* tv_id */
2609 0, /* properties_required */
2610 0, /* properties_provided */
2611 0, /* properties_destroyed */
2612 TODO_remove_functions, /* todo_flags_start */
2613 ( TODO_dump_symtab ), /* todo_flags_finish */
2616 class pass_ipa_inline : public ipa_opt_pass_d
2618 public:
2619 pass_ipa_inline (gcc::context *ctxt)
2620 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2621 inline_generate_summary, /* generate_summary */
2622 inline_write_summary, /* write_summary */
2623 inline_read_summary, /* read_summary */
2624 NULL, /* write_optimization_summary */
2625 NULL, /* read_optimization_summary */
2626 NULL, /* stmt_fixup */
2627 0, /* function_transform_todo_flags_start */
2628 inline_transform, /* function_transform */
2629 NULL) /* variable_transform */
2632 /* opt_pass methods: */
2633 unsigned int execute () { return ipa_inline (); }
2635 }; // class pass_ipa_inline
2637 } // anon namespace
2639 ipa_opt_pass_d *
2640 make_pass_ipa_inline (gcc::context *ctxt)
2642 return new pass_ipa_inline (ctxt);