2013-11-20 Jan-Benedict Glaw <jbglaw@lug-owl.de>
[official-gcc.git] / gcc / ipa-inline.c
blobfbb63da7dc83beab8c68fb25721f1a3654f67785
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Inlining decision heuristics
23 The implementation of inliner is organized as follows:
25 inlining heuristics limits
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
36 inlining heuristics
38 The inliner itself is split into two passes:
40 pass_early_inlining
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
68 pass_ipa_inline
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "tm.h"
96 #include "tree.h"
97 #include "trans-mem.h"
98 #include "calls.h"
99 #include "tree-inline.h"
100 #include "langhooks.h"
101 #include "flags.h"
102 #include "diagnostic.h"
103 #include "gimple-pretty-print.h"
104 #include "params.h"
105 #include "fibheap.h"
106 #include "intl.h"
107 #include "tree-pass.h"
108 #include "coverage.h"
109 #include "ggc.h"
110 #include "rtl.h"
111 #include "bitmap.h"
112 #include "gimple.h"
113 #include "gimple-ssa.h"
114 #include "ipa-prop.h"
115 #include "except.h"
116 #include "target.h"
117 #include "ipa-inline.h"
118 #include "ipa-utils.h"
119 #include "sreal.h"
120 #include "cilk.h"
122 /* Statistics we collect about inlining algorithm. */
123 static int overall_size;
124 static gcov_type max_count;
125 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
127 /* Return false when inlining edge E would lead to violating
128 limits on function unit growth or stack usage growth.
130 The relative function body growth limit is present generally
131 to avoid problems with non-linear behavior of the compiler.
132 To allow inlining huge functions into tiny wrapper, the limit
133 is always based on the bigger of the two functions considered.
135 For stack growth limits we always base the growth in stack usage
136 of the callers. We want to prevent applications from segfaulting
137 on stack overflow when functions with huge stack frames gets
138 inlined. */
140 static bool
141 caller_growth_limits (struct cgraph_edge *e)
143 struct cgraph_node *to = e->caller;
144 struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
145 int newsize;
146 int limit = 0;
147 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
148 struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
150 /* Look for function e->caller is inlined to. While doing
151 so work out the largest function body on the way. As
152 described above, we want to base our function growth
153 limits based on that. Not on the self size of the
154 outer function, not on the self size of inline code
155 we immediately inline to. This is the most relaxed
156 interpretation of the rule "do not grow large functions
157 too much in order to prevent compiler from exploding". */
158 while (true)
160 info = inline_summary (to);
161 if (limit < info->self_size)
162 limit = info->self_size;
163 if (stack_size_limit < info->estimated_self_stack_size)
164 stack_size_limit = info->estimated_self_stack_size;
165 if (to->global.inlined_to)
166 to = to->callers->caller;
167 else
168 break;
171 what_info = inline_summary (what);
173 if (limit < what_info->self_size)
174 limit = what_info->self_size;
176 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
178 /* Check the size after inlining against the function limits. But allow
179 the function to shrink if it went over the limits by forced inlining. */
180 newsize = estimate_size_after_inlining (to, e);
181 if (newsize >= info->size
182 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
183 && newsize > limit)
185 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
186 return false;
189 if (!what_info->estimated_stack_size)
190 return true;
192 /* FIXME: Stack size limit often prevents inlining in Fortran programs
193 due to large i/o datastructures used by the Fortran front-end.
194 We ought to ignore this limit when we know that the edge is executed
195 on every invocation of the caller (i.e. its call statement dominates
196 exit block). We do not track this information, yet. */
197 stack_size_limit += ((gcov_type)stack_size_limit
198 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
200 inlined_stack = (outer_info->stack_frame_offset
201 + outer_info->estimated_self_stack_size
202 + what_info->estimated_stack_size);
203 /* Check new stack consumption with stack consumption at the place
204 stack is used. */
205 if (inlined_stack > stack_size_limit
206 /* If function already has large stack usage from sibling
207 inline call, we can inline, too.
208 This bit overoptimistically assume that we are good at stack
209 packing. */
210 && inlined_stack > info->estimated_stack_size
211 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
213 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
214 return false;
216 return true;
219 /* Dump info about why inlining has failed. */
221 static void
222 report_inline_failed_reason (struct cgraph_edge *e)
224 if (dump_file)
226 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
227 xstrdup (e->caller->name ()), e->caller->order,
228 xstrdup (e->callee->name ()), e->callee->order,
229 cgraph_inline_failed_string (e->inline_failed));
233 /* Decide if we can inline the edge and possibly update
234 inline_failed reason.
235 We check whether inlining is possible at all and whether
236 caller growth limits allow doing so.
238 if REPORT is true, output reason to the dump file.
240 if DISREGARD_LIMITES is true, ignore size limits.*/
242 static bool
243 can_inline_edge_p (struct cgraph_edge *e, bool report,
244 bool disregard_limits = false)
246 bool inlinable = true;
247 enum availability avail;
248 struct cgraph_node *callee
249 = cgraph_function_or_thunk_node (e->callee, &avail);
250 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
251 tree callee_tree
252 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
253 struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->decl);
254 struct function *callee_cfun
255 = callee ? DECL_STRUCT_FUNCTION (callee->decl) : NULL;
257 if (!caller_cfun && e->caller->clone_of)
258 caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->decl);
260 if (!callee_cfun && callee && callee->clone_of)
261 callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->decl);
263 gcc_assert (e->inline_failed);
265 if (!callee || !callee->definition)
267 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
268 inlinable = false;
270 else if (!inline_summary (callee)->inlinable
271 || (caller_cfun && fn_contains_cilk_spawn_p (caller_cfun)))
273 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
274 inlinable = false;
276 else if (avail <= AVAIL_OVERWRITABLE)
278 e->inline_failed = CIF_OVERWRITABLE;
279 inlinable = false;
281 else if (e->call_stmt_cannot_inline_p)
283 if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
284 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
285 inlinable = false;
287 /* Don't inline if the functions have different EH personalities. */
288 else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
289 && DECL_FUNCTION_PERSONALITY (callee->decl)
290 && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
291 != DECL_FUNCTION_PERSONALITY (callee->decl)))
293 e->inline_failed = CIF_EH_PERSONALITY;
294 inlinable = false;
296 /* TM pure functions should not be inlined into non-TM_pure
297 functions. */
298 else if (is_tm_pure (callee->decl)
299 && !is_tm_pure (e->caller->decl))
301 e->inline_failed = CIF_UNSPECIFIED;
302 inlinable = false;
304 /* Don't inline if the callee can throw non-call exceptions but the
305 caller cannot.
306 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
307 Move the flag into cgraph node or mirror it in the inline summary. */
308 else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
309 && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
311 e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
312 inlinable = false;
314 /* Check compatibility of target optimization options. */
315 else if (!targetm.target_option.can_inline_p (e->caller->decl,
316 callee->decl))
318 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
319 inlinable = false;
321 /* Check if caller growth allows the inlining. */
322 else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
323 && !disregard_limits
324 && !lookup_attribute ("flatten",
325 DECL_ATTRIBUTES
326 (e->caller->global.inlined_to
327 ? e->caller->global.inlined_to->decl
328 : e->caller->decl))
329 && !caller_growth_limits (e))
330 inlinable = false;
331 /* Don't inline a function with a higher optimization level than the
332 caller. FIXME: this is really just tip of iceberg of handling
333 optimization attribute. */
334 else if (caller_tree != callee_tree)
336 struct cl_optimization *caller_opt
337 = TREE_OPTIMIZATION ((caller_tree)
338 ? caller_tree
339 : optimization_default_node);
341 struct cl_optimization *callee_opt
342 = TREE_OPTIMIZATION ((callee_tree)
343 ? callee_tree
344 : optimization_default_node);
346 if (((caller_opt->x_optimize > callee_opt->x_optimize)
347 || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
348 /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */
349 && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
351 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
352 inlinable = false;
356 if (!inlinable && report)
357 report_inline_failed_reason (e);
358 return inlinable;
362 /* Return true if the edge E is inlinable during early inlining. */
364 static bool
365 can_early_inline_edge_p (struct cgraph_edge *e)
367 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
368 NULL);
369 /* Early inliner might get called at WPA stage when IPA pass adds new
370 function. In this case we can not really do any of early inlining
371 because function bodies are missing. */
372 if (!gimple_has_body_p (callee->decl))
374 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
375 return false;
377 /* In early inliner some of callees may not be in SSA form yet
378 (i.e. the callgraph is cyclic and we did not process
379 the callee by early inliner, yet). We don't have CIF code for this
380 case; later we will re-do the decision in the real inliner. */
381 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
382 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
384 if (dump_file)
385 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
386 return false;
388 if (!can_inline_edge_p (e, true))
389 return false;
390 return true;
394 /* Return number of calls in N. Ignore cheap builtins. */
396 static int
397 num_calls (struct cgraph_node *n)
399 struct cgraph_edge *e;
400 int num = 0;
402 for (e = n->callees; e; e = e->next_callee)
403 if (!is_inexpensive_builtin (e->callee->decl))
404 num++;
405 return num;
409 /* Return true if we are interested in inlining small function. */
411 static bool
412 want_early_inline_function_p (struct cgraph_edge *e)
414 bool want_inline = true;
415 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
417 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
419 else if (!DECL_DECLARED_INLINE_P (callee->decl)
420 && !flag_inline_small_functions)
422 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
423 report_inline_failed_reason (e);
424 want_inline = false;
426 else
428 int growth = estimate_edge_growth (e);
429 int n;
431 if (growth <= 0)
433 else if (!cgraph_maybe_hot_edge_p (e)
434 && growth > 0)
436 if (dump_file)
437 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
438 "call is cold and code would grow by %i\n",
439 xstrdup (e->caller->name ()),
440 e->caller->order,
441 xstrdup (callee->name ()), callee->order,
442 growth);
443 want_inline = false;
445 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
447 if (dump_file)
448 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
449 "growth %i exceeds --param early-inlining-insns\n",
450 xstrdup (e->caller->name ()),
451 e->caller->order,
452 xstrdup (callee->name ()), callee->order,
453 growth);
454 want_inline = false;
456 else if ((n = num_calls (callee)) != 0
457 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
459 if (dump_file)
460 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
461 "growth %i exceeds --param early-inlining-insns "
462 "divided by number of calls\n",
463 xstrdup (e->caller->name ()),
464 e->caller->order,
465 xstrdup (callee->name ()), callee->order,
466 growth);
467 want_inline = false;
470 return want_inline;
473 /* Compute time of the edge->caller + edge->callee execution when inlining
474 does not happen. */
476 inline gcov_type
477 compute_uninlined_call_time (struct inline_summary *callee_info,
478 struct cgraph_edge *edge)
480 gcov_type uninlined_call_time =
481 RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
482 CGRAPH_FREQ_BASE);
483 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
484 ? edge->caller->global.inlined_to
485 : edge->caller)->time;
486 return uninlined_call_time + caller_time;
489 /* Same as compute_uinlined_call_time but compute time when inlining
490 does happen. */
492 inline gcov_type
493 compute_inlined_call_time (struct cgraph_edge *edge,
494 int edge_time)
496 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
497 ? edge->caller->global.inlined_to
498 : edge->caller)->time;
499 gcov_type time = (caller_time
500 + RDIV (((gcov_type) edge_time
501 - inline_edge_summary (edge)->call_stmt_time)
502 * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
503 /* Possible one roundoff error, but watch for overflows. */
504 gcc_checking_assert (time >= INT_MIN / 2);
505 if (time < 0)
506 time = 0;
507 return time;
510 /* Return true if the speedup for inlining E is bigger than
511 PARAM_MAX_INLINE_MIN_SPEEDUP. */
513 static bool
514 big_speedup_p (struct cgraph_edge *e)
516 gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
518 gcov_type inlined_time = compute_inlined_call_time (e,
519 estimate_edge_time (e));
520 if (time - inlined_time
521 > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
522 return true;
523 return false;
526 /* Return true if we are interested in inlining small function.
527 When REPORT is true, report reason to dump file. */
529 static bool
530 want_inline_small_function_p (struct cgraph_edge *e, bool report)
532 bool want_inline = true;
533 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
535 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
537 else if (!DECL_DECLARED_INLINE_P (callee->decl)
538 && !flag_inline_small_functions)
540 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
541 want_inline = false;
543 else
545 int growth = estimate_edge_growth (e);
546 inline_hints hints = estimate_edge_hints (e);
547 bool big_speedup = big_speedup_p (e);
549 if (growth <= 0)
551 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
552 hints suggests that inlining given function is very profitable. */
553 else if (DECL_DECLARED_INLINE_P (callee->decl)
554 && growth >= MAX_INLINE_INSNS_SINGLE
555 && !big_speedup
556 && !(hints & (INLINE_HINT_indirect_call
557 | INLINE_HINT_loop_iterations
558 | INLINE_HINT_array_index
559 | INLINE_HINT_loop_stride)))
561 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
562 want_inline = false;
564 /* Before giving up based on fact that caller size will grow, allow
565 functions that are called few times and eliminating the offline
566 copy will lead to overall code size reduction.
567 Not all of these will be handled by subsequent inlining of functions
568 called once: in particular weak functions are not handled or funcitons
569 that inline to multiple calls but a lot of bodies is optimized out.
570 Finally we want to inline earlier to allow inlining of callbacks.
572 This is slightly wrong on aggressive side: it is entirely possible
573 that function is called many times with a context where inlining
574 reduces code size and few times with a context where inlining increase
575 code size. Resoluting growth estimate will be negative even if it
576 would make more sense to keep offline copy and do not inline into the
577 call sites that makes the code size grow.
579 When badness orders the calls in a way that code reducing calls come
580 first, this situation is not a problem at all: after inlining all
581 "good" calls, we will realize that keeping the function around is
582 better. */
583 else if (growth <= MAX_INLINE_INSNS_SINGLE
584 /* Unlike for functions called once, we play unsafe with
585 COMDATs. We can allow that since we know functions
586 in consideration are small (and thus risk is small) and
587 moreover grow estimates already accounts that COMDAT
588 functions may or may not disappear when eliminated from
589 current unit. With good probability making aggressive
590 choice in all units is going to make overall program
591 smaller.
593 Consequently we ask cgraph_can_remove_if_no_direct_calls_p
594 instead of
595 cgraph_will_be_removed_from_program_if_no_direct_calls */
596 && !DECL_EXTERNAL (callee->decl)
597 && cgraph_can_remove_if_no_direct_calls_p (callee)
598 && estimate_growth (callee) <= 0)
600 else if (!DECL_DECLARED_INLINE_P (callee->decl)
601 && !flag_inline_functions)
603 e->inline_failed = CIF_NOT_DECLARED_INLINED;
604 want_inline = false;
606 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
607 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
608 inlining given function is very profitable. */
609 else if (!DECL_DECLARED_INLINE_P (callee->decl)
610 && !big_speedup
611 && growth >= ((hints & (INLINE_HINT_indirect_call
612 | INLINE_HINT_loop_iterations
613 | INLINE_HINT_array_index
614 | INLINE_HINT_loop_stride))
615 ? MAX (MAX_INLINE_INSNS_AUTO,
616 MAX_INLINE_INSNS_SINGLE)
617 : MAX_INLINE_INSNS_AUTO))
619 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
620 want_inline = false;
622 /* If call is cold, do not inline when function body would grow. */
623 else if (!cgraph_maybe_hot_edge_p (e))
625 e->inline_failed = CIF_UNLIKELY_CALL;
626 want_inline = false;
629 if (!want_inline && report)
630 report_inline_failed_reason (e);
631 return want_inline;
634 /* EDGE is self recursive edge.
635 We hand two cases - when function A is inlining into itself
636 or when function A is being inlined into another inliner copy of function
637 A within function B.
639 In first case OUTER_NODE points to the toplevel copy of A, while
640 in the second case OUTER_NODE points to the outermost copy of A in B.
642 In both cases we want to be extra selective since
643 inlining the call will just introduce new recursive calls to appear. */
645 static bool
646 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
647 struct cgraph_node *outer_node,
648 bool peeling,
649 int depth)
651 char const *reason = NULL;
652 bool want_inline = true;
653 int caller_freq = CGRAPH_FREQ_BASE;
654 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
656 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
657 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
659 if (!cgraph_maybe_hot_edge_p (edge))
661 reason = "recursive call is cold";
662 want_inline = false;
664 else if (max_count && !outer_node->count)
666 reason = "not executed in profile";
667 want_inline = false;
669 else if (depth > max_depth)
671 reason = "--param max-inline-recursive-depth exceeded.";
672 want_inline = false;
675 if (outer_node->global.inlined_to)
676 caller_freq = outer_node->callers->frequency;
678 if (!want_inline)
680 /* Inlining of self recursive function into copy of itself within other function
681 is transformation similar to loop peeling.
683 Peeling is profitable if we can inline enough copies to make probability
684 of actual call to the self recursive function very small. Be sure that
685 the probability of recursion is small.
687 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
688 This way the expected number of recision is at most max_depth. */
689 else if (peeling)
691 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
692 / max_depth);
693 int i;
694 for (i = 1; i < depth; i++)
695 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
696 if (max_count
697 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
698 >= max_prob))
700 reason = "profile of recursive call is too large";
701 want_inline = false;
703 if (!max_count
704 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
705 >= max_prob))
707 reason = "frequency of recursive call is too large";
708 want_inline = false;
711 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
712 depth is large. We reduce function call overhead and increase chances that
713 things fit in hardware return predictor.
715 Recursive inlining might however increase cost of stack frame setup
716 actually slowing down functions whose recursion tree is wide rather than
717 deep.
719 Deciding reliably on when to do recursive inlining without profile feedback
720 is tricky. For now we disable recursive inlining when probability of self
721 recursion is low.
723 Recursive inlining of self recursive call within loop also results in large loop
724 depths that generally optimize badly. We may want to throttle down inlining
725 in those cases. In particular this seems to happen in one of libstdc++ rb tree
726 methods. */
727 else
729 if (max_count
730 && (edge->count * 100 / outer_node->count
731 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
733 reason = "profile of recursive call is too small";
734 want_inline = false;
736 else if (!max_count
737 && (edge->frequency * 100 / caller_freq
738 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
740 reason = "frequency of recursive call is too small";
741 want_inline = false;
744 if (!want_inline && dump_file)
745 fprintf (dump_file, " not inlining recursively: %s\n", reason);
746 return want_inline;
749 /* Return true when NODE has uninlinable caller;
750 set HAS_HOT_CALL if it has hot call.
751 Worker for cgraph_for_node_and_aliases. */
753 static bool
754 check_callers (struct cgraph_node *node, void *has_hot_call)
756 struct cgraph_edge *e;
757 for (e = node->callers; e; e = e->next_caller)
759 if (!can_inline_edge_p (e, true))
760 return true;
761 if (!has_hot_call && cgraph_maybe_hot_edge_p (e))
762 *(bool *)has_hot_call = true;
764 return false;
767 /* If NODE has a caller, return true. */
769 static bool
770 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
772 if (node->callers)
773 return true;
774 return false;
777 /* Decide if inlining NODE would reduce unit size by eliminating
778 the offline copy of function.
779 When COLD is true the cold calls are considered, too. */
781 static bool
782 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
784 struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
785 bool has_hot_call = false;
787 /* Does it have callers? */
788 if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
789 return false;
790 /* Already inlined? */
791 if (function->global.inlined_to)
792 return false;
793 if (cgraph_function_or_thunk_node (node, NULL) != node)
794 return false;
795 /* Inlining into all callers would increase size? */
796 if (estimate_growth (node) > 0)
797 return false;
798 /* All inlines must be possible. */
799 if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
800 return false;
801 if (!cold && !has_hot_call)
802 return false;
803 return true;
806 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
808 /* Return relative time improvement for inlining EDGE in range
809 1...RELATIVE_TIME_BENEFIT_RANGE */
811 static inline int
812 relative_time_benefit (struct inline_summary *callee_info,
813 struct cgraph_edge *edge,
814 int edge_time)
816 gcov_type relbenefit;
817 gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
818 gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
820 /* Inlining into extern inline function is not a win. */
821 if (DECL_EXTERNAL (edge->caller->global.inlined_to
822 ? edge->caller->global.inlined_to->decl
823 : edge->caller->decl))
824 return 1;
826 /* Watch overflows. */
827 gcc_checking_assert (uninlined_call_time >= 0);
828 gcc_checking_assert (inlined_call_time >= 0);
829 gcc_checking_assert (uninlined_call_time >= inlined_call_time);
831 /* Compute relative time benefit, i.e. how much the call becomes faster.
832 ??? perhaps computing how much the caller+calle together become faster
833 would lead to more realistic results. */
834 if (!uninlined_call_time)
835 uninlined_call_time = 1;
836 relbenefit =
837 RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
838 uninlined_call_time);
839 relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
840 gcc_checking_assert (relbenefit >= 0);
841 relbenefit = MAX (relbenefit, 1);
842 return relbenefit;
846 /* A cost model driving the inlining heuristics in a way so the edges with
847 smallest badness are inlined first. After each inlining is performed
848 the costs of all caller edges of nodes affected are recomputed so the
849 metrics may accurately depend on values such as number of inlinable callers
850 of the function or function body size. */
852 static int
853 edge_badness (struct cgraph_edge *edge, bool dump)
855 gcov_type badness;
856 int growth, edge_time;
857 struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
858 NULL);
859 struct inline_summary *callee_info = inline_summary (callee);
860 inline_hints hints;
862 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
863 return INT_MIN;
865 growth = estimate_edge_growth (edge);
866 edge_time = estimate_edge_time (edge);
867 hints = estimate_edge_hints (edge);
868 gcc_checking_assert (edge_time >= 0);
869 gcc_checking_assert (edge_time <= callee_info->time);
870 gcc_checking_assert (growth <= callee_info->size);
872 if (dump)
874 fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n",
875 xstrdup (edge->caller->name ()),
876 edge->caller->order,
877 xstrdup (callee->name ()),
878 edge->callee->order);
879 fprintf (dump_file, " size growth %i, time %i ",
880 growth,
881 edge_time);
882 dump_inline_hints (dump_file, hints);
883 if (big_speedup_p (edge))
884 fprintf (dump_file, " big_speedup");
885 fprintf (dump_file, "\n");
888 /* Always prefer inlining saving code size. */
889 if (growth <= 0)
891 badness = INT_MIN / 2 + growth;
892 if (dump)
893 fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness,
894 growth);
897 /* When profiling is available, compute badness as:
899 relative_edge_count * relative_time_benefit
900 goodness = -------------------------------------------
901 growth_f_caller
902 badness = -goodness
904 The fraction is upside down, because on edge counts and time beneits
905 the bounds are known. Edge growth is essentially unlimited. */
907 else if (max_count)
909 sreal tmp, relbenefit_real, growth_real;
910 int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
911 /* Capping edge->count to max_count. edge->count can be larger than
912 max_count if an inline adds new edges which increase max_count
913 after max_count is computed. */
914 gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
916 sreal_init (&relbenefit_real, relbenefit, 0);
917 sreal_init (&growth_real, growth, 0);
919 /* relative_edge_count. */
920 sreal_init (&tmp, edge_count, 0);
921 sreal_div (&tmp, &tmp, &max_count_real);
923 /* relative_time_benefit. */
924 sreal_mul (&tmp, &tmp, &relbenefit_real);
925 sreal_div (&tmp, &tmp, &max_relbenefit_real);
927 /* growth_f_caller. */
928 sreal_mul (&tmp, &tmp, &half_int_min_real);
929 sreal_div (&tmp, &tmp, &growth_real);
931 badness = -1 * sreal_to_int (&tmp);
933 if (dump)
935 fprintf (dump_file,
936 " %i (relative %f): profile info. Relative count %f%s"
937 " * Relative benefit %f\n",
938 (int) badness, (double) badness / INT_MIN,
939 (double) edge_count / max_count,
940 edge->count > max_count ? " (capped to max_count)" : "",
941 relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
945 /* When function local profile is available. Compute badness as:
947 relative_time_benefit
948 goodness = ---------------------------------
949 growth_of_caller * overall_growth
951 badness = - goodness
953 compensated by the inline hints.
955 else if (flag_guess_branch_prob)
957 badness = (relative_time_benefit (callee_info, edge, edge_time)
958 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
959 badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
960 gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
961 if ((hints & (INLINE_HINT_indirect_call
962 | INLINE_HINT_loop_iterations
963 | INLINE_HINT_array_index
964 | INLINE_HINT_loop_stride))
965 || callee_info->growth <= 0)
966 badness *= 8;
967 if (hints & (INLINE_HINT_same_scc))
968 badness /= 16;
969 else if (hints & (INLINE_HINT_in_scc))
970 badness /= 8;
971 else if (hints & (INLINE_HINT_cross_module))
972 badness /= 2;
973 gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
974 if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
975 badness *= 16;
976 if (dump)
978 fprintf (dump_file,
979 " %i: guessed profile. frequency %f,"
980 " benefit %f%%, time w/o inlining %i, time w inlining %i"
981 " overall growth %i (current) %i (original)\n",
982 (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
983 relative_time_benefit (callee_info, edge, edge_time) * 100.0
984 / RELATIVE_TIME_BENEFIT_RANGE,
985 (int)compute_uninlined_call_time (callee_info, edge),
986 (int)compute_inlined_call_time (edge, edge_time),
987 estimate_growth (callee),
988 callee_info->growth);
991 /* When function local profile is not available or it does not give
992 useful information (ie frequency is zero), base the cost on
993 loop nest and overall size growth, so we optimize for overall number
994 of functions fully inlined in program. */
995 else
997 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
998 badness = growth * 256;
1000 /* Decrease badness if call is nested. */
1001 if (badness > 0)
1002 badness >>= nest;
1003 else
1005 badness <<= nest;
1007 if (dump)
1008 fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
1009 nest);
1012 /* Ensure that we did not overflow in all the fixed point math above. */
1013 gcc_assert (badness >= INT_MIN);
1014 gcc_assert (badness <= INT_MAX - 1);
1015 /* Make recursive inlining happen always after other inlining is done. */
1016 if (cgraph_edge_recursive_p (edge))
1017 return badness + 1;
1018 else
1019 return badness;
1022 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1023 static inline void
1024 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1026 int badness = edge_badness (edge, false);
1027 if (edge->aux)
1029 fibnode_t n = (fibnode_t) edge->aux;
1030 gcc_checking_assert (n->data == edge);
1032 /* fibheap_replace_key only decrease the keys.
1033 When we increase the key we do not update heap
1034 and instead re-insert the element once it becomes
1035 a minimum of heap. */
1036 if (badness < n->key)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1040 fprintf (dump_file,
1041 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
1042 xstrdup (edge->caller->name ()),
1043 edge->caller->order,
1044 xstrdup (edge->callee->name ()),
1045 edge->callee->order,
1046 (int)n->key,
1047 badness);
1049 fibheap_replace_key (heap, n, badness);
1050 gcc_checking_assert (n->key == badness);
1053 else
1055 if (dump_file && (dump_flags & TDF_DETAILS))
1057 fprintf (dump_file,
1058 " enqueuing call %s/%i -> %s/%i, badness %i\n",
1059 xstrdup (edge->caller->name ()),
1060 edge->caller->order,
1061 xstrdup (edge->callee->name ()),
1062 edge->callee->order,
1063 badness);
1065 edge->aux = fibheap_insert (heap, badness, edge);
1070 /* NODE was inlined.
1071 All caller edges needs to be resetted because
1072 size estimates change. Similarly callees needs reset
1073 because better context may be known. */
1075 static void
1076 reset_edge_caches (struct cgraph_node *node)
1078 struct cgraph_edge *edge;
1079 struct cgraph_edge *e = node->callees;
1080 struct cgraph_node *where = node;
1081 int i;
1082 struct ipa_ref *ref;
1084 if (where->global.inlined_to)
1085 where = where->global.inlined_to;
1087 /* WHERE body size has changed, the cached growth is invalid. */
1088 reset_node_growth_cache (where);
1090 for (edge = where->callers; edge; edge = edge->next_caller)
1091 if (edge->inline_failed)
1092 reset_edge_growth_cache (edge);
1093 for (i = 0; ipa_ref_list_referring_iterate (&where->ref_list,
1094 i, ref); i++)
1095 if (ref->use == IPA_REF_ALIAS)
1096 reset_edge_caches (ipa_ref_referring_node (ref));
1098 if (!e)
1099 return;
1101 while (true)
1102 if (!e->inline_failed && e->callee->callees)
1103 e = e->callee->callees;
1104 else
1106 if (e->inline_failed)
1107 reset_edge_growth_cache (e);
1108 if (e->next_callee)
1109 e = e->next_callee;
1110 else
1114 if (e->caller == node)
1115 return;
1116 e = e->caller->callers;
1118 while (!e->next_callee);
1119 e = e->next_callee;
1124 /* Recompute HEAP nodes for each of caller of NODE.
1125 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1126 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1127 it is inlinable. Otherwise check all edges. */
1129 static void
1130 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1131 bitmap updated_nodes,
1132 struct cgraph_edge *check_inlinablity_for)
1134 struct cgraph_edge *edge;
1135 int i;
1136 struct ipa_ref *ref;
1138 if ((!node->alias && !inline_summary (node)->inlinable)
1139 || node->global.inlined_to)
1140 return;
1141 if (!bitmap_set_bit (updated_nodes, node->uid))
1142 return;
1144 for (i = 0; ipa_ref_list_referring_iterate (&node->ref_list,
1145 i, ref); i++)
1146 if (ref->use == IPA_REF_ALIAS)
1148 struct cgraph_node *alias = ipa_ref_referring_node (ref);
1149 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1152 for (edge = node->callers; edge; edge = edge->next_caller)
1153 if (edge->inline_failed)
1155 if (!check_inlinablity_for
1156 || check_inlinablity_for == edge)
1158 if (can_inline_edge_p (edge, false)
1159 && want_inline_small_function_p (edge, false))
1160 update_edge_key (heap, edge);
1161 else if (edge->aux)
1163 report_inline_failed_reason (edge);
1164 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1165 edge->aux = NULL;
1168 else if (edge->aux)
1169 update_edge_key (heap, edge);
1173 /* Recompute HEAP nodes for each uninlined call in NODE.
1174 This is used when we know that edge badnesses are going only to increase
1175 (we introduced new call site) and thus all we need is to insert newly
1176 created edges into heap. */
1178 static void
1179 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1180 bitmap updated_nodes)
1182 struct cgraph_edge *e = node->callees;
1184 if (!e)
1185 return;
1186 while (true)
1187 if (!e->inline_failed && e->callee->callees)
1188 e = e->callee->callees;
1189 else
1191 enum availability avail;
1192 struct cgraph_node *callee;
1193 /* We do not reset callee growth cache here. Since we added a new call,
1194 growth chould have just increased and consequentely badness metric
1195 don't need updating. */
1196 if (e->inline_failed
1197 && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1198 && inline_summary (callee)->inlinable
1199 && avail >= AVAIL_AVAILABLE
1200 && !bitmap_bit_p (updated_nodes, callee->uid))
1202 if (can_inline_edge_p (e, false)
1203 && want_inline_small_function_p (e, false))
1204 update_edge_key (heap, e);
1205 else if (e->aux)
1207 report_inline_failed_reason (e);
1208 fibheap_delete_node (heap, (fibnode_t) e->aux);
1209 e->aux = NULL;
1212 if (e->next_callee)
1213 e = e->next_callee;
1214 else
1218 if (e->caller == node)
1219 return;
1220 e = e->caller->callers;
1222 while (!e->next_callee);
1223 e = e->next_callee;
1228 /* Enqueue all recursive calls from NODE into priority queue depending on
1229 how likely we want to recursively inline the call. */
1231 static void
1232 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1233 fibheap_t heap)
1235 struct cgraph_edge *e;
1236 enum availability avail;
1238 for (e = where->callees; e; e = e->next_callee)
1239 if (e->callee == node
1240 || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1241 && avail > AVAIL_OVERWRITABLE))
1243 /* When profile feedback is available, prioritize by expected number
1244 of calls. */
1245 fibheap_insert (heap,
1246 !max_count ? -e->frequency
1247 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1250 for (e = where->callees; e; e = e->next_callee)
1251 if (!e->inline_failed)
1252 lookup_recursive_calls (node, e->callee, heap);
1255 /* Decide on recursive inlining: in the case function has recursive calls,
1256 inline until body size reaches given argument. If any new indirect edges
1257 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1258 is NULL. */
1260 static bool
1261 recursive_inlining (struct cgraph_edge *edge,
1262 vec<cgraph_edge_p> *new_edges)
1264 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1265 fibheap_t heap;
1266 struct cgraph_node *node;
1267 struct cgraph_edge *e;
1268 struct cgraph_node *master_clone = NULL, *next;
1269 int depth = 0;
1270 int n = 0;
1272 node = edge->caller;
1273 if (node->global.inlined_to)
1274 node = node->global.inlined_to;
1276 if (DECL_DECLARED_INLINE_P (node->decl))
1277 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1279 /* Make sure that function is small enough to be considered for inlining. */
1280 if (estimate_size_after_inlining (node, edge) >= limit)
1281 return false;
1282 heap = fibheap_new ();
1283 lookup_recursive_calls (node, node, heap);
1284 if (fibheap_empty (heap))
1286 fibheap_delete (heap);
1287 return false;
1290 if (dump_file)
1291 fprintf (dump_file,
1292 " Performing recursive inlining on %s\n",
1293 node->name ());
1295 /* Do the inlining and update list of recursive call during process. */
1296 while (!fibheap_empty (heap))
1298 struct cgraph_edge *curr
1299 = (struct cgraph_edge *) fibheap_extract_min (heap);
1300 struct cgraph_node *cnode, *dest = curr->callee;
1302 if (!can_inline_edge_p (curr, true))
1303 continue;
1305 /* MASTER_CLONE is produced in the case we already started modified
1306 the function. Be sure to redirect edge to the original body before
1307 estimating growths otherwise we will be seeing growths after inlining
1308 the already modified body. */
1309 if (master_clone)
1311 cgraph_redirect_edge_callee (curr, master_clone);
1312 reset_edge_growth_cache (curr);
1315 if (estimate_size_after_inlining (node, curr) > limit)
1317 cgraph_redirect_edge_callee (curr, dest);
1318 reset_edge_growth_cache (curr);
1319 break;
1322 depth = 1;
1323 for (cnode = curr->caller;
1324 cnode->global.inlined_to; cnode = cnode->callers->caller)
1325 if (node->decl
1326 == cgraph_function_or_thunk_node (curr->callee, NULL)->decl)
1327 depth++;
1329 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1331 cgraph_redirect_edge_callee (curr, dest);
1332 reset_edge_growth_cache (curr);
1333 continue;
1336 if (dump_file)
1338 fprintf (dump_file,
1339 " Inlining call of depth %i", depth);
1340 if (node->count)
1342 fprintf (dump_file, " called approx. %.2f times per call",
1343 (double)curr->count / node->count);
1345 fprintf (dump_file, "\n");
1347 if (!master_clone)
1349 /* We need original clone to copy around. */
1350 master_clone = cgraph_clone_node (node, node->decl,
1351 node->count, CGRAPH_FREQ_BASE,
1352 false, vNULL, true, NULL);
1353 for (e = master_clone->callees; e; e = e->next_callee)
1354 if (!e->inline_failed)
1355 clone_inlined_nodes (e, true, false, NULL);
1356 cgraph_redirect_edge_callee (curr, master_clone);
1357 reset_edge_growth_cache (curr);
1360 inline_call (curr, false, new_edges, &overall_size, true);
1361 lookup_recursive_calls (node, curr->callee, heap);
1362 n++;
1365 if (!fibheap_empty (heap) && dump_file)
1366 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1367 fibheap_delete (heap);
1369 if (!master_clone)
1370 return false;
1372 if (dump_file)
1373 fprintf (dump_file,
1374 "\n Inlined %i times, "
1375 "body grown from size %i to %i, time %i to %i\n", n,
1376 inline_summary (master_clone)->size, inline_summary (node)->size,
1377 inline_summary (master_clone)->time, inline_summary (node)->time);
1379 /* Remove master clone we used for inlining. We rely that clones inlined
1380 into master clone gets queued just before master clone so we don't
1381 need recursion. */
1382 for (node = cgraph_first_function (); node != master_clone;
1383 node = next)
1385 next = cgraph_next_function (node);
1386 if (node->global.inlined_to == master_clone)
1387 cgraph_remove_node (node);
1389 cgraph_remove_node (master_clone);
1390 return true;
1394 /* Given whole compilation unit estimate of INSNS, compute how large we can
1395 allow the unit to grow. */
1397 static int
1398 compute_max_insns (int insns)
1400 int max_insns = insns;
1401 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1402 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1404 return ((HOST_WIDEST_INT) max_insns
1405 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1409 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1411 static void
1412 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1414 while (new_edges.length () > 0)
1416 struct cgraph_edge *edge = new_edges.pop ();
1418 gcc_assert (!edge->aux);
1419 if (edge->inline_failed
1420 && can_inline_edge_p (edge, true)
1421 && want_inline_small_function_p (edge, true))
1422 edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1426 /* Remove EDGE from the fibheap. */
1428 static void
1429 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1431 if (e->callee)
1432 reset_node_growth_cache (e->callee);
1433 if (e->aux)
1435 fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1436 e->aux = NULL;
1440 /* Return true if speculation of edge E seems useful.
1441 If ANTICIPATE_INLINING is true, be conservative and hope that E
1442 may get inlined. */
1444 bool
1445 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1447 enum availability avail;
1448 struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1449 struct cgraph_edge *direct, *indirect;
1450 struct ipa_ref *ref;
1452 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1454 if (!cgraph_maybe_hot_edge_p (e))
1455 return false;
1457 /* See if IP optimizations found something potentially useful about the
1458 function. For now we look only for CONST/PURE flags. Almost everything
1459 else we propagate is useless. */
1460 if (avail >= AVAIL_AVAILABLE)
1462 int ecf_flags = flags_from_decl_or_type (target->decl);
1463 if (ecf_flags & ECF_CONST)
1465 cgraph_speculative_call_info (e, direct, indirect, ref);
1466 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1467 return true;
1469 else if (ecf_flags & ECF_PURE)
1471 cgraph_speculative_call_info (e, direct, indirect, ref);
1472 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1473 return true;
1476 /* If we did not managed to inline the function nor redirect
1477 to an ipa-cp clone (that are seen by having local flag set),
1478 it is probably pointless to inline it unless hardware is missing
1479 indirect call predictor. */
1480 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1481 return false;
1482 /* For overwritable targets there is not much to do. */
1483 if (e->inline_failed && !can_inline_edge_p (e, false, true))
1484 return false;
1485 /* OK, speculation seems interesting. */
1486 return true;
1489 /* We know that EDGE is not going to be inlined.
1490 See if we can remove speculation. */
1492 static void
1493 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1495 if (edge->speculative && !speculation_useful_p (edge, false))
1497 struct cgraph_node *node = edge->caller;
1498 struct cgraph_node *where = node->global.inlined_to
1499 ? node->global.inlined_to : node;
1500 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1502 cgraph_resolve_speculation (edge, NULL);
1503 reset_edge_caches (where);
1504 inline_update_overall_summary (where);
1505 update_caller_keys (edge_heap, where,
1506 updated_nodes, NULL);
1507 update_callee_keys (edge_heap, where,
1508 updated_nodes);
1509 BITMAP_FREE (updated_nodes);
1513 /* We use greedy algorithm for inlining of small functions:
1514 All inline candidates are put into prioritized heap ordered in
1515 increasing badness.
1517 The inlining of small functions is bounded by unit growth parameters. */
1519 static void
1520 inline_small_functions (void)
1522 struct cgraph_node *node;
1523 struct cgraph_edge *edge;
1524 fibheap_t edge_heap = fibheap_new ();
1525 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1526 int min_size, max_size;
1527 vec<cgraph_edge_p> new_indirect_edges = vNULL;
1528 int initial_size = 0;
1529 struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1530 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1532 if (flag_indirect_inlining)
1533 new_indirect_edges.create (8);
1535 edge_removal_hook_holder
1536 = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1538 /* Compute overall unit size and other global parameters used by badness
1539 metrics. */
1541 max_count = 0;
1542 ipa_reduced_postorder (order, true, true, NULL);
1543 free (order);
1545 FOR_EACH_DEFINED_FUNCTION (node)
1546 if (!node->global.inlined_to)
1548 if (cgraph_function_with_gimple_body_p (node)
1549 || node->thunk.thunk_p)
1551 struct inline_summary *info = inline_summary (node);
1552 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1554 if (!DECL_EXTERNAL (node->decl))
1555 initial_size += info->size;
1556 info->growth = estimate_growth (node);
1557 if (dfs && dfs->next_cycle)
1559 struct cgraph_node *n2;
1560 int id = dfs->scc_no + 1;
1561 for (n2 = node; n2;
1562 n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1564 struct inline_summary *info2 = inline_summary (n2);
1565 if (info2->scc_no)
1566 break;
1567 info2->scc_no = id;
1572 for (edge = node->callers; edge; edge = edge->next_caller)
1573 if (max_count < edge->count)
1574 max_count = edge->count;
1576 sreal_init (&max_count_real, max_count, 0);
1577 sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1578 sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1579 ipa_free_postorder_info ();
1580 initialize_growth_caches ();
1582 if (dump_file)
1583 fprintf (dump_file,
1584 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1585 initial_size);
1587 overall_size = initial_size;
1588 max_size = compute_max_insns (overall_size);
1589 min_size = overall_size;
1591 /* Populate the heeap with all edges we might inline. */
1593 FOR_EACH_DEFINED_FUNCTION (node)
1595 bool update = false;
1596 struct cgraph_edge *next;
1598 if (dump_file)
1599 fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1600 node->name (), node->order);
1602 for (edge = node->callees; edge; edge = next)
1604 next = edge->next_callee;
1605 if (edge->inline_failed
1606 && !edge->aux
1607 && can_inline_edge_p (edge, true)
1608 && want_inline_small_function_p (edge, true)
1609 && edge->inline_failed)
1611 gcc_assert (!edge->aux);
1612 update_edge_key (edge_heap, edge);
1614 if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1616 cgraph_resolve_speculation (edge, NULL);
1617 update = true;
1620 if (update)
1622 struct cgraph_node *where = node->global.inlined_to
1623 ? node->global.inlined_to : node;
1624 inline_update_overall_summary (where);
1625 reset_node_growth_cache (where);
1626 reset_edge_caches (where);
1627 update_caller_keys (edge_heap, where,
1628 updated_nodes, NULL);
1629 bitmap_clear (updated_nodes);
1633 gcc_assert (in_lto_p
1634 || !max_count
1635 || (profile_info && flag_branch_probabilities));
1637 while (!fibheap_empty (edge_heap))
1639 int old_size = overall_size;
1640 struct cgraph_node *where, *callee;
1641 int badness = fibheap_min_key (edge_heap);
1642 int current_badness;
1643 int cached_badness;
1644 int growth;
1646 edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1647 gcc_assert (edge->aux);
1648 edge->aux = NULL;
1649 if (!edge->inline_failed)
1650 continue;
1652 /* Be sure that caches are maintained consistent.
1653 We can not make this ENABLE_CHECKING only because it cause different
1654 updates of the fibheap queue. */
1655 cached_badness = edge_badness (edge, false);
1656 reset_edge_growth_cache (edge);
1657 reset_node_growth_cache (edge->callee);
1659 /* When updating the edge costs, we only decrease badness in the keys.
1660 Increases of badness are handled lazilly; when we see key with out
1661 of date value on it, we re-insert it now. */
1662 current_badness = edge_badness (edge, false);
1663 gcc_assert (cached_badness == current_badness);
1664 gcc_assert (current_badness >= badness);
1665 if (current_badness != badness)
1667 edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1668 continue;
1671 if (!can_inline_edge_p (edge, true))
1673 resolve_noninline_speculation (edge_heap, edge);
1674 continue;
1677 callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1678 growth = estimate_edge_growth (edge);
1679 if (dump_file)
1681 fprintf (dump_file,
1682 "\nConsidering %s/%i with %i size\n",
1683 callee->name (), callee->order,
1684 inline_summary (callee)->size);
1685 fprintf (dump_file,
1686 " to be inlined into %s/%i in %s:%i\n"
1687 " Estimated growth after inlined into all is %+i insns.\n"
1688 " Estimated badness is %i, frequency %.2f.\n",
1689 edge->caller->name (), edge->caller->order,
1690 flag_wpa ? "unknown"
1691 : gimple_filename ((const_gimple) edge->call_stmt),
1692 flag_wpa ? -1
1693 : gimple_lineno ((const_gimple) edge->call_stmt),
1694 estimate_growth (callee),
1695 badness,
1696 edge->frequency / (double)CGRAPH_FREQ_BASE);
1697 if (edge->count)
1698 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1699 edge->count);
1700 if (dump_flags & TDF_DETAILS)
1701 edge_badness (edge, true);
1704 if (overall_size + growth > max_size
1705 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1707 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1708 report_inline_failed_reason (edge);
1709 resolve_noninline_speculation (edge_heap, edge);
1710 continue;
1713 if (!want_inline_small_function_p (edge, true))
1715 resolve_noninline_speculation (edge_heap, edge);
1716 continue;
1719 /* Heuristics for inlining small functions works poorly for
1720 recursive calls where we do efect similar to loop unrolling.
1721 When inliing such edge seems profitable, leave decision on
1722 specific inliner. */
1723 if (cgraph_edge_recursive_p (edge))
1725 where = edge->caller;
1726 if (where->global.inlined_to)
1727 where = where->global.inlined_to;
1728 if (!recursive_inlining (edge,
1729 flag_indirect_inlining
1730 ? &new_indirect_edges : NULL))
1732 edge->inline_failed = CIF_RECURSIVE_INLINING;
1733 resolve_noninline_speculation (edge_heap, edge);
1734 continue;
1736 reset_edge_caches (where);
1737 /* Recursive inliner inlines all recursive calls of the function
1738 at once. Consequently we need to update all callee keys. */
1739 if (flag_indirect_inlining)
1740 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1741 update_callee_keys (edge_heap, where, updated_nodes);
1742 bitmap_clear (updated_nodes);
1744 else
1746 struct cgraph_node *outer_node = NULL;
1747 int depth = 0;
1749 /* Consider the case where self recursive function A is inlined into B.
1750 This is desired optimization in some cases, since it leads to effect
1751 similar of loop peeling and we might completely optimize out the
1752 recursive call. However we must be extra selective. */
1754 where = edge->caller;
1755 while (where->global.inlined_to)
1757 if (where->decl == callee->decl)
1758 outer_node = where, depth++;
1759 where = where->callers->caller;
1761 if (outer_node
1762 && !want_inline_self_recursive_call_p (edge, outer_node,
1763 true, depth))
1765 edge->inline_failed
1766 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1767 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1768 resolve_noninline_speculation (edge_heap, edge);
1769 continue;
1771 else if (depth && dump_file)
1772 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1774 gcc_checking_assert (!callee->global.inlined_to);
1775 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1776 if (flag_indirect_inlining)
1777 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1779 reset_edge_caches (edge->callee);
1780 reset_node_growth_cache (callee);
1782 update_callee_keys (edge_heap, where, updated_nodes);
1784 where = edge->caller;
1785 if (where->global.inlined_to)
1786 where = where->global.inlined_to;
1788 /* Our profitability metric can depend on local properties
1789 such as number of inlinable calls and size of the function body.
1790 After inlining these properties might change for the function we
1791 inlined into (since it's body size changed) and for the functions
1792 called by function we inlined (since number of it inlinable callers
1793 might change). */
1794 update_caller_keys (edge_heap, where, updated_nodes, NULL);
1795 bitmap_clear (updated_nodes);
1797 if (dump_file)
1799 fprintf (dump_file,
1800 " Inlined into %s which now has time %i and size %i,"
1801 "net change of %+i.\n",
1802 edge->caller->name (),
1803 inline_summary (edge->caller)->time,
1804 inline_summary (edge->caller)->size,
1805 overall_size - old_size);
1807 if (min_size > overall_size)
1809 min_size = overall_size;
1810 max_size = compute_max_insns (min_size);
1812 if (dump_file)
1813 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1817 free_growth_caches ();
1818 new_indirect_edges.release ();
1819 fibheap_delete (edge_heap);
1820 if (dump_file)
1821 fprintf (dump_file,
1822 "Unit growth for small function inlining: %i->%i (%i%%)\n",
1823 initial_size, overall_size,
1824 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1825 BITMAP_FREE (updated_nodes);
1826 cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1829 /* Flatten NODE. Performed both during early inlining and
1830 at IPA inlining time. */
1832 static void
1833 flatten_function (struct cgraph_node *node, bool early)
1835 struct cgraph_edge *e;
1837 /* We shouldn't be called recursively when we are being processed. */
1838 gcc_assert (node->aux == NULL);
1840 node->aux = (void *) node;
1842 for (e = node->callees; e; e = e->next_callee)
1844 struct cgraph_node *orig_callee;
1845 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1847 /* We've hit cycle? It is time to give up. */
1848 if (callee->aux)
1850 if (dump_file)
1851 fprintf (dump_file,
1852 "Not inlining %s into %s to avoid cycle.\n",
1853 xstrdup (callee->name ()),
1854 xstrdup (e->caller->name ()));
1855 e->inline_failed = CIF_RECURSIVE_INLINING;
1856 continue;
1859 /* When the edge is already inlined, we just need to recurse into
1860 it in order to fully flatten the leaves. */
1861 if (!e->inline_failed)
1863 flatten_function (callee, early);
1864 continue;
1867 /* Flatten attribute needs to be processed during late inlining. For
1868 extra code quality we however do flattening during early optimization,
1869 too. */
1870 if (!early
1871 ? !can_inline_edge_p (e, true)
1872 : !can_early_inline_edge_p (e))
1873 continue;
1875 if (cgraph_edge_recursive_p (e))
1877 if (dump_file)
1878 fprintf (dump_file, "Not inlining: recursive call.\n");
1879 continue;
1882 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1883 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1885 if (dump_file)
1886 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1887 continue;
1890 /* Inline the edge and flatten the inline clone. Avoid
1891 recursing through the original node if the node was cloned. */
1892 if (dump_file)
1893 fprintf (dump_file, " Inlining %s into %s.\n",
1894 xstrdup (callee->name ()),
1895 xstrdup (e->caller->name ()));
1896 orig_callee = callee;
1897 inline_call (e, true, NULL, NULL, false);
1898 if (e->callee != orig_callee)
1899 orig_callee->aux = (void *) node;
1900 flatten_function (e->callee, early);
1901 if (e->callee != orig_callee)
1902 orig_callee->aux = NULL;
1905 node->aux = NULL;
1906 if (!node->global.inlined_to)
1907 inline_update_overall_summary (node);
1910 /* Count number of callers of NODE and store it into DATA (that
1911 points to int. Worker for cgraph_for_node_and_aliases. */
1913 static bool
1914 sum_callers (struct cgraph_node *node, void *data)
1916 struct cgraph_edge *e;
1917 int *num_calls = (int *)data;
1919 for (e = node->callers; e; e = e->next_caller)
1920 (*num_calls)++;
1921 return false;
1924 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
1925 DATA points to number of calls originally found so we avoid infinite
1926 recursion. */
1928 static bool
1929 inline_to_all_callers (struct cgraph_node *node, void *data)
1931 int *num_calls = (int *)data;
1932 while (node->callers && !node->global.inlined_to)
1934 struct cgraph_node *caller = node->callers->caller;
1936 if (dump_file)
1938 fprintf (dump_file,
1939 "\nInlining %s size %i.\n",
1940 node->name (),
1941 inline_summary (node)->size);
1942 fprintf (dump_file,
1943 " Called once from %s %i insns.\n",
1944 node->callers->caller->name (),
1945 inline_summary (node->callers->caller)->size);
1948 inline_call (node->callers, true, NULL, NULL, true);
1949 if (dump_file)
1950 fprintf (dump_file,
1951 " Inlined into %s which now has %i size\n",
1952 caller->name (),
1953 inline_summary (caller)->size);
1954 if (!(*num_calls)--)
1956 if (dump_file)
1957 fprintf (dump_file, "New calls found; giving up.\n");
1958 return true;
1961 return false;
1964 /* Decide on the inlining. We do so in the topological order to avoid
1965 expenses on updating data structures. */
1967 static unsigned int
1968 ipa_inline (void)
1970 struct cgraph_node *node;
1971 int nnodes;
1972 struct cgraph_node **order;
1973 int i;
1974 int cold;
1975 bool remove_functions = false;
1977 if (!optimize)
1978 return 0;
1980 order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1982 if (in_lto_p && optimize)
1983 ipa_update_after_lto_read ();
1985 if (dump_file)
1986 dump_inline_summaries (dump_file);
1988 nnodes = ipa_reverse_postorder (order);
1990 FOR_EACH_FUNCTION (node)
1991 node->aux = 0;
1993 if (dump_file)
1994 fprintf (dump_file, "\nFlattening functions:\n");
1996 /* In the first pass handle functions to be flattened. Do this with
1997 a priority so none of our later choices will make this impossible. */
1998 for (i = nnodes - 1; i >= 0; i--)
2000 node = order[i];
2002 /* Handle nodes to be flattened.
2003 Ideally when processing callees we stop inlining at the
2004 entry of cycles, possibly cloning that entry point and
2005 try to flatten itself turning it into a self-recursive
2006 function. */
2007 if (lookup_attribute ("flatten",
2008 DECL_ATTRIBUTES (node->decl)) != NULL)
2010 if (dump_file)
2011 fprintf (dump_file,
2012 "Flattening %s\n", node->name ());
2013 flatten_function (node, false);
2017 inline_small_functions ();
2019 /* Do first after-inlining removal. We want to remove all "stale" extern inline
2020 functions and virtual functions so we really know what is called once. */
2021 symtab_remove_unreachable_nodes (false, dump_file);
2022 free (order);
2024 /* Inline functions with a property that after inlining into all callers the
2025 code size will shrink because the out-of-line copy is eliminated.
2026 We do this regardless on the callee size as long as function growth limits
2027 are met. */
2028 if (dump_file)
2029 fprintf (dump_file,
2030 "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2032 /* Inlining one function called once has good chance of preventing
2033 inlining other function into the same callee. Ideally we should
2034 work in priority order, but probably inlining hot functions first
2035 is good cut without the extra pain of maintaining the queue.
2037 ??? this is not really fitting the bill perfectly: inlining function
2038 into callee often leads to better optimization of callee due to
2039 increased context for optimization.
2040 For example if main() function calls a function that outputs help
2041 and then function that does the main optmization, we should inline
2042 the second with priority even if both calls are cold by themselves.
2044 We probably want to implement new predicate replacing our use of
2045 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2046 to be hot. */
2047 for (cold = 0; cold <= 1; cold ++)
2049 FOR_EACH_DEFINED_FUNCTION (node)
2051 struct cgraph_edge *edge, *next;
2052 bool update=false;
2054 for (edge = node->callees; edge; edge = next)
2056 next = edge->next_callee;
2057 if (edge->speculative && !speculation_useful_p (edge, false))
2059 cgraph_resolve_speculation (edge, NULL);
2060 update = true;
2061 remove_functions = true;
2064 if (update)
2066 struct cgraph_node *where = node->global.inlined_to
2067 ? node->global.inlined_to : node;
2068 reset_node_growth_cache (where);
2069 reset_edge_caches (where);
2070 inline_update_overall_summary (where);
2072 if (flag_inline_functions_called_once
2073 && want_inline_function_to_all_callers_p (node, cold))
2075 int num_calls = 0;
2076 cgraph_for_node_and_aliases (node, sum_callers,
2077 &num_calls, true);
2078 cgraph_for_node_and_aliases (node, inline_to_all_callers,
2079 &num_calls, true);
2080 remove_functions = true;
2085 /* Free ipa-prop structures if they are no longer needed. */
2086 if (optimize)
2087 ipa_free_all_structures_after_iinln ();
2089 if (dump_file)
2090 fprintf (dump_file,
2091 "\nInlined %i calls, eliminated %i functions\n\n",
2092 ncalls_inlined, nfunctions_inlined);
2094 if (dump_file)
2095 dump_inline_summaries (dump_file);
2096 /* In WPA we use inline summaries for partitioning process. */
2097 if (!flag_wpa)
2098 inline_free_summary ();
2099 return remove_functions ? TODO_remove_functions : 0;
2102 /* Inline always-inline function calls in NODE. */
2104 static bool
2105 inline_always_inline_functions (struct cgraph_node *node)
2107 struct cgraph_edge *e;
2108 bool inlined = false;
2110 for (e = node->callees; e; e = e->next_callee)
2112 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2113 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2114 continue;
2116 if (cgraph_edge_recursive_p (e))
2118 if (dump_file)
2119 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2120 e->callee->name ());
2121 e->inline_failed = CIF_RECURSIVE_INLINING;
2122 continue;
2125 if (!can_early_inline_edge_p (e))
2127 /* Set inlined to true if the callee is marked "always_inline" but
2128 is not inlinable. This will allow flagging an error later in
2129 expand_call_inline in tree-inline.c. */
2130 if (lookup_attribute ("always_inline",
2131 DECL_ATTRIBUTES (callee->decl)) != NULL)
2132 inlined = true;
2133 continue;
2136 if (dump_file)
2137 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2138 xstrdup (e->callee->name ()),
2139 xstrdup (e->caller->name ()));
2140 inline_call (e, true, NULL, NULL, false);
2141 inlined = true;
2143 if (inlined)
2144 inline_update_overall_summary (node);
2146 return inlined;
2149 /* Decide on the inlining. We do so in the topological order to avoid
2150 expenses on updating data structures. */
2152 static bool
2153 early_inline_small_functions (struct cgraph_node *node)
2155 struct cgraph_edge *e;
2156 bool inlined = false;
2158 for (e = node->callees; e; e = e->next_callee)
2160 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2161 if (!inline_summary (callee)->inlinable
2162 || !e->inline_failed)
2163 continue;
2165 /* Do not consider functions not declared inline. */
2166 if (!DECL_DECLARED_INLINE_P (callee->decl)
2167 && !flag_inline_small_functions
2168 && !flag_inline_functions)
2169 continue;
2171 if (dump_file)
2172 fprintf (dump_file, "Considering inline candidate %s.\n",
2173 callee->name ());
2175 if (!can_early_inline_edge_p (e))
2176 continue;
2178 if (cgraph_edge_recursive_p (e))
2180 if (dump_file)
2181 fprintf (dump_file, " Not inlining: recursive call.\n");
2182 continue;
2185 if (!want_early_inline_function_p (e))
2186 continue;
2188 if (dump_file)
2189 fprintf (dump_file, " Inlining %s into %s.\n",
2190 xstrdup (callee->name ()),
2191 xstrdup (e->caller->name ()));
2192 inline_call (e, true, NULL, NULL, true);
2193 inlined = true;
2196 return inlined;
2199 /* Do inlining of small functions. Doing so early helps profiling and other
2200 passes to be somewhat more effective and avoids some code duplication in
2201 later real inlining pass for testcases with very many function calls. */
2202 static unsigned int
2203 early_inliner (void)
2205 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2206 struct cgraph_edge *edge;
2207 unsigned int todo = 0;
2208 int iterations = 0;
2209 bool inlined = false;
2211 if (seen_error ())
2212 return 0;
2214 /* Do nothing if datastructures for ipa-inliner are already computed. This
2215 happens when some pass decides to construct new function and
2216 cgraph_add_new_function calls lowering passes and early optimization on
2217 it. This may confuse ourself when early inliner decide to inline call to
2218 function clone, because function clones don't have parameter list in
2219 ipa-prop matching their signature. */
2220 if (ipa_node_params_vector.exists ())
2221 return 0;
2223 #ifdef ENABLE_CHECKING
2224 verify_cgraph_node (node);
2225 #endif
2226 ipa_remove_all_references (&node->ref_list);
2228 /* Even when not optimizing or not inlining inline always-inline
2229 functions. */
2230 inlined = inline_always_inline_functions (node);
2232 if (!optimize
2233 || flag_no_inline
2234 || !flag_early_inlining
2235 /* Never inline regular functions into always-inline functions
2236 during incremental inlining. This sucks as functions calling
2237 always inline functions will get less optimized, but at the
2238 same time inlining of functions calling always inline
2239 function into an always inline function might introduce
2240 cycles of edges to be always inlined in the callgraph.
2242 We might want to be smarter and just avoid this type of inlining. */
2243 || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2245 else if (lookup_attribute ("flatten",
2246 DECL_ATTRIBUTES (node->decl)) != NULL)
2248 /* When the function is marked to be flattened, recursively inline
2249 all calls in it. */
2250 if (dump_file)
2251 fprintf (dump_file,
2252 "Flattening %s\n", node->name ());
2253 flatten_function (node, true);
2254 inlined = true;
2256 else
2258 /* We iterate incremental inlining to get trivial cases of indirect
2259 inlining. */
2260 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2261 && early_inline_small_functions (node))
2263 timevar_push (TV_INTEGRATION);
2264 todo |= optimize_inline_calls (current_function_decl);
2266 /* Technically we ought to recompute inline parameters so the new
2267 iteration of early inliner works as expected. We however have
2268 values approximately right and thus we only need to update edge
2269 info that might be cleared out for newly discovered edges. */
2270 for (edge = node->callees; edge; edge = edge->next_callee)
2272 struct inline_edge_summary *es = inline_edge_summary (edge);
2273 es->call_stmt_size
2274 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2275 es->call_stmt_time
2276 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2277 if (edge->callee->decl
2278 && !gimple_check_call_matching_types (
2279 edge->call_stmt, edge->callee->decl, false))
2280 edge->call_stmt_cannot_inline_p = true;
2282 timevar_pop (TV_INTEGRATION);
2283 iterations++;
2284 inlined = false;
2286 if (dump_file)
2287 fprintf (dump_file, "Iterations: %i\n", iterations);
2290 if (inlined)
2292 timevar_push (TV_INTEGRATION);
2293 todo |= optimize_inline_calls (current_function_decl);
2294 timevar_pop (TV_INTEGRATION);
2297 cfun->always_inline_functions_inlined = true;
2299 return todo;
2302 namespace {
2304 const pass_data pass_data_early_inline =
2306 GIMPLE_PASS, /* type */
2307 "einline", /* name */
2308 OPTGROUP_INLINE, /* optinfo_flags */
2309 false, /* has_gate */
2310 true, /* has_execute */
2311 TV_EARLY_INLINING, /* tv_id */
2312 PROP_ssa, /* properties_required */
2313 0, /* properties_provided */
2314 0, /* properties_destroyed */
2315 0, /* todo_flags_start */
2316 0, /* todo_flags_finish */
2319 class pass_early_inline : public gimple_opt_pass
2321 public:
2322 pass_early_inline (gcc::context *ctxt)
2323 : gimple_opt_pass (pass_data_early_inline, ctxt)
2326 /* opt_pass methods: */
2327 unsigned int execute () { return early_inliner (); }
2329 }; // class pass_early_inline
2331 } // anon namespace
2333 gimple_opt_pass *
2334 make_pass_early_inline (gcc::context *ctxt)
2336 return new pass_early_inline (ctxt);
2340 /* When to run IPA inlining. Inlining of always-inline functions
2341 happens during early inlining.
2343 Enable inlining unconditoinally, because callgraph redirection
2344 happens here. */
2346 static bool
2347 gate_ipa_inline (void)
2349 return true;
2352 namespace {
2354 const pass_data pass_data_ipa_inline =
2356 IPA_PASS, /* type */
2357 "inline", /* name */
2358 OPTGROUP_INLINE, /* optinfo_flags */
2359 true, /* has_gate */
2360 true, /* has_execute */
2361 TV_IPA_INLINING, /* tv_id */
2362 0, /* properties_required */
2363 0, /* properties_provided */
2364 0, /* properties_destroyed */
2365 TODO_remove_functions, /* todo_flags_start */
2366 ( TODO_dump_symtab ), /* todo_flags_finish */
2369 class pass_ipa_inline : public ipa_opt_pass_d
2371 public:
2372 pass_ipa_inline (gcc::context *ctxt)
2373 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2374 inline_generate_summary, /* generate_summary */
2375 inline_write_summary, /* write_summary */
2376 inline_read_summary, /* read_summary */
2377 NULL, /* write_optimization_summary */
2378 NULL, /* read_optimization_summary */
2379 NULL, /* stmt_fixup */
2380 0, /* function_transform_todo_flags_start */
2381 inline_transform, /* function_transform */
2382 NULL) /* variable_transform */
2385 /* opt_pass methods: */
2386 bool gate () { return gate_ipa_inline (); }
2387 unsigned int execute () { return ipa_inline (); }
2389 }; // class pass_ipa_inline
2391 } // anon namespace
2393 ipa_opt_pass_d *
2394 make_pass_ipa_inline (gcc::context *ctxt)
2396 return new pass_ipa_inline (ctxt);