* cgraphunit.c, config/arm/arm.c, config/m68k/m68k.c,
[official-gcc.git] / gcc / ipa-inline.c
blob8b63b51c7f53b4fb989c310561a27918f179cac2
1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
27 callgraph.
29 There are three major parts of this file:
31 cgraph_mark_inline implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
35 statistics)
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
41 growth and so on).
43 inlining heuristics
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode.
66 The inliner itself is split into several passes:
68 pass_inline_parameters
70 This pass computes local properties of functions that are used by inliner:
71 estimated function body size, whether function is inlinable at all and
72 stack frame consumption.
74 Before executing any of inliner passes, this local pass has to be applied
75 to each function in the callgraph (ie run as subpass of some earlier
76 IPA pass). The results are made out of date by any optimization applied
77 on the function body.
79 pass_early_inlining
81 Simple local inlining pass inlining callees into current function. This
82 pass makes no global whole compilation unit analysis and this when allowed
83 to do inlining expanding code size it might result in unbounded growth of
84 whole unit.
86 This is the main inlining pass in non-unit-at-a-time.
88 With unit-at-a-time the pass is run during conversion into SSA form.
89 Only functions already converted into SSA form are inlined, so the
90 conversion must happen in topological order on the callgraph (that is
91 maintained by pass manager). The functions after inlining are early
92 optimized so the early inliner sees unoptimized function itself, but
93 all considered callees are already optimized allowing it to unfold
94 abstraction penalty on C++ effectively and cheaply.
96 pass_ipa_early_inlining
98 With profiling, the early inlining is also necessary to reduce
99 instrumentation costs on program with high abstraction penalty (doing
100 many redundant calls). This can't happen in parallel with early
101 optimization and profile instrumentation, because we would end up
102 re-instrumenting already instrumented function bodies we brought in via
103 inlining.
105 To avoid this, this pass is executed as IPA pass before profiling. It is
106 simple wrapper to pass_early_inlining and ensures first inlining.
108 pass_ipa_inline
110 This is the main pass implementing simple greedy algorithm to do inlining
111 of small functions that results in overall growth of compilation unit and
112 inlining of functions called once. The pass compute just so called inline
113 plan (representation of inlining to be done in callgraph) and unlike early
114 inlining it is not performing the inlining itself.
116 pass_apply_inline
118 This pass performs actual inlining according to pass_ipa_inline on given
119 function. Possible the function body before inlining is saved when it is
120 needed for further inlining later.
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "tree.h"
128 #include "tree-inline.h"
129 #include "langhooks.h"
130 #include "flags.h"
131 #include "cgraph.h"
132 #include "diagnostic.h"
133 #include "timevar.h"
134 #include "params.h"
135 #include "fibheap.h"
136 #include "intl.h"
137 #include "tree-pass.h"
138 #include "hashtab.h"
139 #include "coverage.h"
140 #include "ggc.h"
141 #include "tree-flow.h"
143 /* Mode incremental inliner operate on:
145 In ALWAYS_INLINE only functions marked
146 always_inline are inlined. This mode is used after detecting cycle during
147 flattening.
149 In SIZE mode, only functions that reduce function body size after inlining
150 are inlined, this is used during early inlining.
152 In SPEED mode, all small functions are inlined. This might result in
153 unbounded growth of compilation unit and is used only in non-unit-at-a-time
154 mode.
156 in ALL mode, everything is inlined. This is used during flattening. */
157 enum inlining_mode {
158 INLINE_NONE = 0,
159 INLINE_ALWAYS_INLINE,
160 INLINE_SIZE,
161 INLINE_SPEED,
162 INLINE_ALL
164 static bool
165 cgraph_decide_inlining_incrementally (struct cgraph_node *, enum inlining_mode,
166 int);
169 /* Statistics we collect about inlining algorithm. */
170 static int ncalls_inlined;
171 static int nfunctions_inlined;
172 static int overall_insns;
173 static gcov_type max_count;
175 /* Estimate size of the function after inlining WHAT into TO. */
177 static int
178 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
179 struct cgraph_node *what)
181 int size;
182 tree fndecl = what->decl, arg;
183 int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
185 for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
186 call_insns += estimate_move_cost (TREE_TYPE (arg));
187 size = (what->global.insns - call_insns) * times + to->global.insns;
188 gcc_assert (size >= 0);
189 return size;
192 /* E is expected to be an edge being inlined. Clone destination node of
193 the edge and redirect it to the new clone.
194 DUPLICATE is used for bookkeeping on whether we are actually creating new
195 clones or re-using node originally representing out-of-line function call.
197 void
198 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
200 HOST_WIDE_INT peak;
201 if (duplicate)
203 /* We may eliminate the need for out-of-line copy to be output.
204 In that case just go ahead and re-use it. */
205 if (!e->callee->callers->next_caller
206 && !e->callee->needed
207 && flag_unit_at_a_time)
209 gcc_assert (!e->callee->global.inlined_to);
210 if (DECL_SAVED_TREE (e->callee->decl))
211 overall_insns -= e->callee->global.insns, nfunctions_inlined++;
212 duplicate = false;
214 else
216 struct cgraph_node *n;
217 n = cgraph_clone_node (e->callee, e->count, e->loop_nest,
218 update_original);
219 cgraph_redirect_edge_callee (e, n);
223 if (e->caller->global.inlined_to)
224 e->callee->global.inlined_to = e->caller->global.inlined_to;
225 else
226 e->callee->global.inlined_to = e->caller;
227 e->callee->global.stack_frame_offset
228 = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
229 peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
230 if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
231 e->callee->global.inlined_to->global.estimated_stack_size = peak;
233 /* Recursively clone all bodies. */
234 for (e = e->callee->callees; e; e = e->next_callee)
235 if (!e->inline_failed)
236 cgraph_clone_inlined_nodes (e, duplicate, update_original);
239 /* Mark edge E as inlined and update callgraph accordingly.
240 UPDATE_ORIGINAL specify whether profile of original function should be
241 updated. */
243 void
244 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original)
246 int old_insns = 0, new_insns = 0;
247 struct cgraph_node *to = NULL, *what;
249 if (e->callee->inline_decl)
250 cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
252 gcc_assert (e->inline_failed);
253 e->inline_failed = NULL;
255 if (!e->callee->global.inlined && flag_unit_at_a_time)
256 DECL_POSSIBLY_INLINED (e->callee->decl) = true;
257 e->callee->global.inlined = true;
259 cgraph_clone_inlined_nodes (e, true, update_original);
261 what = e->callee;
263 /* Now update size of caller and all functions caller is inlined into. */
264 for (;e && !e->inline_failed; e = e->caller->callers)
266 old_insns = e->caller->global.insns;
267 new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
268 what);
269 gcc_assert (new_insns >= 0);
270 to = e->caller;
271 to->global.insns = new_insns;
273 gcc_assert (what->global.inlined_to == to);
274 if (new_insns > old_insns)
275 overall_insns += new_insns - old_insns;
276 ncalls_inlined++;
279 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
280 Return following unredirected edge in the list of callers
281 of EDGE->CALLEE */
283 static struct cgraph_edge *
284 cgraph_mark_inline (struct cgraph_edge *edge)
286 struct cgraph_node *to = edge->caller;
287 struct cgraph_node *what = edge->callee;
288 struct cgraph_edge *e, *next;
289 int times = 0;
291 /* Look for all calls, mark them inline and clone recursively
292 all inlined functions. */
293 for (e = what->callers; e; e = next)
295 next = e->next_caller;
296 if (e->caller == to && e->inline_failed)
298 cgraph_mark_inline_edge (e, true);
299 if (e == edge)
300 edge = next;
301 times++;
304 gcc_assert (times);
305 return edge;
308 /* Estimate the growth caused by inlining NODE into all callees. */
310 static int
311 cgraph_estimate_growth (struct cgraph_node *node)
313 int growth = 0;
314 struct cgraph_edge *e;
315 if (node->global.estimated_growth != INT_MIN)
316 return node->global.estimated_growth;
318 for (e = node->callers; e; e = e->next_caller)
319 if (e->inline_failed)
320 growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
321 - e->caller->global.insns);
323 /* ??? Wrong for self recursive functions or cases where we decide to not
324 inline for different reasons, but it is not big deal as in that case
325 we will keep the body around, but we will also avoid some inlining. */
326 if (!node->needed && !DECL_EXTERNAL (node->decl))
327 growth -= node->global.insns;
329 node->global.estimated_growth = growth;
330 return growth;
333 /* Return false when inlining WHAT into TO is not good idea
334 as it would cause too large growth of function bodies.
335 When ONE_ONLY is true, assume that only one call site is going
336 to be inlined, otherwise figure out how many call sites in
337 TO calls WHAT and verify that all can be inlined.
340 static bool
341 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
342 const char **reason, bool one_only)
344 int times = 0;
345 struct cgraph_edge *e;
346 int newsize;
347 int limit;
348 HOST_WIDE_INT stack_size_limit, inlined_stack;
350 if (one_only)
351 times = 1;
352 else
353 for (e = to->callees; e; e = e->next_callee)
354 if (e->callee == what)
355 times++;
357 if (to->global.inlined_to)
358 to = to->global.inlined_to;
360 /* When inlining large function body called once into small function,
361 take the inlined function as base for limiting the growth. */
362 if (to->local.self_insns > what->local.self_insns)
363 limit = to->local.self_insns;
364 else
365 limit = what->local.self_insns;
367 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
369 /* Check the size after inlining against the function limits. But allow
370 the function to shrink if it went over the limits by forced inlining. */
371 newsize = cgraph_estimate_size_after_inlining (times, to, what);
372 if (newsize >= to->global.insns
373 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
374 && newsize > limit)
376 if (reason)
377 *reason = N_("--param large-function-growth limit reached");
378 return false;
381 stack_size_limit = to->local.estimated_self_stack_size;
383 stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
385 inlined_stack = (to->global.stack_frame_offset
386 + to->local.estimated_self_stack_size
387 + what->global.estimated_stack_size);
388 if (inlined_stack > stack_size_limit
389 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
391 if (reason)
392 *reason = N_("--param large-stack-frame-growth limit reached");
393 return false;
395 return true;
398 /* Return true when function N is small enough to be inlined. */
400 bool
401 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
403 tree decl = n->decl;
405 if (n->inline_decl)
406 decl = n->inline_decl;
407 if (!DECL_INLINE (decl))
409 if (reason)
410 *reason = N_("function not inlinable");
411 return false;
414 if (!DECL_STRUCT_FUNCTION (decl)->cfg)
416 if (reason)
417 *reason = N_("function body not available");
418 return false;
421 if (DECL_DECLARED_INLINE_P (decl))
423 if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
425 if (reason)
426 *reason = N_("--param max-inline-insns-single limit reached");
427 return false;
430 else
432 if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
434 if (reason)
435 *reason = N_("--param max-inline-insns-auto limit reached");
436 return false;
440 return true;
443 /* Return true when inlining WHAT would create recursive inlining.
444 We call recursive inlining all cases where same function appears more than
445 once in the single recursion nest path in the inline graph. */
447 static bool
448 cgraph_recursive_inlining_p (struct cgraph_node *to,
449 struct cgraph_node *what,
450 const char **reason)
452 bool recursive;
453 if (to->global.inlined_to)
454 recursive = what->decl == to->global.inlined_to->decl;
455 else
456 recursive = what->decl == to->decl;
457 /* Marking recursive function inline has sane semantic and thus we should
458 not warn on it. */
459 if (recursive && reason)
460 *reason = (what->local.disregard_inline_limits
461 ? N_("recursive inlining") : "");
462 return recursive;
465 /* Return true if the call can be hot. */
466 static bool
467 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
469 if (profile_info && flag_branch_probabilities
470 && (edge->count
471 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
472 return false;
473 return true;
476 /* A cost model driving the inlining heuristics in a way so the edges with
477 smallest badness are inlined first. After each inlining is performed
478 the costs of all caller edges of nodes affected are recomputed so the
479 metrics may accurately depend on values such as number of inlinable callers
480 of the function or function body size.
482 With profiling we use number of executions of each edge to drive the cost.
483 We also should distinguish hot and cold calls where the cold calls are
484 inlined into only when code size is overall improved.
487 static int
488 cgraph_edge_badness (struct cgraph_edge *edge)
490 if (max_count)
492 int growth =
493 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
494 growth -= edge->caller->global.insns;
496 /* Always prefer inlining saving code size. */
497 if (growth <= 0)
498 return INT_MIN - growth;
499 return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
501 else
503 int nest = MIN (edge->loop_nest, 8);
504 int badness = cgraph_estimate_growth (edge->callee) * 256;
506 /* Decrease badness if call is nested. */
507 if (badness > 0)
508 badness >>= nest;
509 else
510 badness <<= nest;
512 /* Make recursive inlining happen always after other inlining is done. */
513 if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
514 return badness + 1;
515 else
516 return badness;
520 /* Recompute heap nodes for each of caller edge. */
522 static void
523 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
524 bitmap updated_nodes)
526 struct cgraph_edge *edge;
527 const char *failed_reason;
529 if (!node->local.inlinable || node->local.disregard_inline_limits
530 || node->global.inlined_to)
531 return;
532 if (bitmap_bit_p (updated_nodes, node->uid))
533 return;
534 bitmap_set_bit (updated_nodes, node->uid);
535 node->global.estimated_growth = INT_MIN;
537 if (!node->local.inlinable)
538 return;
539 /* Prune out edges we won't inline into anymore. */
540 if (!cgraph_default_inline_p (node, &failed_reason))
542 for (edge = node->callers; edge; edge = edge->next_caller)
543 if (edge->aux)
545 fibheap_delete_node (heap, edge->aux);
546 edge->aux = NULL;
547 if (edge->inline_failed)
548 edge->inline_failed = failed_reason;
550 return;
553 for (edge = node->callers; edge; edge = edge->next_caller)
554 if (edge->inline_failed)
556 int badness = cgraph_edge_badness (edge);
557 if (edge->aux)
559 fibnode_t n = edge->aux;
560 gcc_assert (n->data == edge);
561 if (n->key == badness)
562 continue;
564 /* fibheap_replace_key only increase the keys. */
565 if (fibheap_replace_key (heap, n, badness))
566 continue;
567 fibheap_delete_node (heap, edge->aux);
569 edge->aux = fibheap_insert (heap, badness, edge);
573 /* Recompute heap nodes for each of caller edges of each of callees. */
575 static void
576 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
577 bitmap updated_nodes)
579 struct cgraph_edge *e;
580 node->global.estimated_growth = INT_MIN;
582 for (e = node->callees; e; e = e->next_callee)
583 if (e->inline_failed)
584 update_caller_keys (heap, e->callee, updated_nodes);
585 else if (!e->inline_failed)
586 update_callee_keys (heap, e->callee, updated_nodes);
589 /* Enqueue all recursive calls from NODE into priority queue depending on
590 how likely we want to recursively inline the call. */
592 static void
593 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
594 fibheap_t heap)
596 static int priority;
597 struct cgraph_edge *e;
598 for (e = where->callees; e; e = e->next_callee)
599 if (e->callee == node)
601 /* When profile feedback is available, prioritize by expected number
602 of calls. Without profile feedback we maintain simple queue
603 to order candidates via recursive depths. */
604 fibheap_insert (heap,
605 !max_count ? priority++
606 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
609 for (e = where->callees; e; e = e->next_callee)
610 if (!e->inline_failed)
611 lookup_recursive_calls (node, e->callee, heap);
614 /* Decide on recursive inlining: in the case function has recursive calls,
615 inline until body size reaches given argument. */
617 static bool
618 cgraph_decide_recursive_inlining (struct cgraph_node *node)
620 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
621 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
622 int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
623 fibheap_t heap;
624 struct cgraph_edge *e;
625 struct cgraph_node *master_clone, *next;
626 int depth = 0;
627 int n = 0;
629 if (DECL_DECLARED_INLINE_P (node->decl))
631 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
632 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
635 /* Make sure that function is small enough to be considered for inlining. */
636 if (!max_depth
637 || cgraph_estimate_size_after_inlining (1, node, node) >= limit)
638 return false;
639 heap = fibheap_new ();
640 lookup_recursive_calls (node, node, heap);
641 if (fibheap_empty (heap))
643 fibheap_delete (heap);
644 return false;
647 if (dump_file)
648 fprintf (dump_file,
649 " Performing recursive inlining on %s\n",
650 cgraph_node_name (node));
652 /* We need original clone to copy around. */
653 master_clone = cgraph_clone_node (node, node->count, 1, false);
654 master_clone->needed = true;
655 for (e = master_clone->callees; e; e = e->next_callee)
656 if (!e->inline_failed)
657 cgraph_clone_inlined_nodes (e, true, false);
659 /* Do the inlining and update list of recursive call during process. */
660 while (!fibheap_empty (heap)
661 && (cgraph_estimate_size_after_inlining (1, node, master_clone)
662 <= limit))
664 struct cgraph_edge *curr = fibheap_extract_min (heap);
665 struct cgraph_node *cnode;
667 depth = 1;
668 for (cnode = curr->caller;
669 cnode->global.inlined_to; cnode = cnode->callers->caller)
670 if (node->decl == curr->callee->decl)
671 depth++;
672 if (depth > max_depth)
674 if (dump_file)
675 fprintf (dump_file,
676 " maxmal depth reached\n");
677 continue;
680 if (max_count)
682 if (!cgraph_maybe_hot_edge_p (curr))
684 if (dump_file)
685 fprintf (dump_file, " Not inlining cold call\n");
686 continue;
688 if (curr->count * 100 / node->count < probability)
690 if (dump_file)
691 fprintf (dump_file,
692 " Probability of edge is too small\n");
693 continue;
697 if (dump_file)
699 fprintf (dump_file,
700 " Inlining call of depth %i", depth);
701 if (node->count)
703 fprintf (dump_file, " called approx. %.2f times per call",
704 (double)curr->count / node->count);
706 fprintf (dump_file, "\n");
708 cgraph_redirect_edge_callee (curr, master_clone);
709 cgraph_mark_inline_edge (curr, false);
710 lookup_recursive_calls (node, curr->callee, heap);
711 n++;
713 if (!fibheap_empty (heap) && dump_file)
714 fprintf (dump_file, " Recursive inlining growth limit met.\n");
716 fibheap_delete (heap);
717 if (dump_file)
718 fprintf (dump_file,
719 "\n Inlined %i times, body grown from %i to %i insns\n", n,
720 master_clone->global.insns, node->global.insns);
722 /* Remove master clone we used for inlining. We rely that clones inlined
723 into master clone gets queued just before master clone so we don't
724 need recursion. */
725 for (node = cgraph_nodes; node != master_clone;
726 node = next)
728 next = node->next;
729 if (node->global.inlined_to == master_clone)
730 cgraph_remove_node (node);
732 cgraph_remove_node (master_clone);
733 /* FIXME: Recursive inlining actually reduces number of calls of the
734 function. At this place we should probably walk the function and
735 inline clones and compensate the counts accordingly. This probably
736 doesn't matter much in practice. */
737 return n > 0;
740 /* Set inline_failed for all callers of given function to REASON. */
742 static void
743 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
745 struct cgraph_edge *e;
747 if (dump_file)
748 fprintf (dump_file, "Inlining failed: %s\n", reason);
749 for (e = node->callers; e; e = e->next_caller)
750 if (e->inline_failed)
751 e->inline_failed = reason;
754 /* Given whole compilation unit estimate of INSNS, compute how large we can
755 allow the unit to grow. */
756 static int
757 compute_max_insns (int insns)
759 int max_insns = insns;
760 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
761 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
763 return ((HOST_WIDEST_INT) max_insns
764 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
767 /* We use greedy algorithm for inlining of small functions:
768 All inline candidates are put into prioritized heap based on estimated
769 growth of the overall number of instructions and then update the estimates.
771 INLINED and INLINED_CALEES are just pointers to arrays large enough
772 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
774 static void
775 cgraph_decide_inlining_of_small_functions (void)
777 struct cgraph_node *node;
778 struct cgraph_edge *edge;
779 const char *failed_reason;
780 fibheap_t heap = fibheap_new ();
781 bitmap updated_nodes = BITMAP_ALLOC (NULL);
782 int min_insns, max_insns;
784 if (dump_file)
785 fprintf (dump_file, "\nDeciding on smaller functions:\n");
787 /* Put all inline candidates into the heap. */
789 for (node = cgraph_nodes; node; node = node->next)
791 if (!node->local.inlinable || !node->callers
792 || node->local.disregard_inline_limits)
793 continue;
794 if (dump_file)
795 fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
797 node->global.estimated_growth = INT_MIN;
798 if (!cgraph_default_inline_p (node, &failed_reason))
800 cgraph_set_inline_failed (node, failed_reason);
801 continue;
804 for (edge = node->callers; edge; edge = edge->next_caller)
805 if (edge->inline_failed)
807 gcc_assert (!edge->aux);
808 edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
812 max_insns = compute_max_insns (overall_insns);
813 min_insns = overall_insns;
815 while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
817 int old_insns = overall_insns;
818 struct cgraph_node *where;
819 int growth =
820 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
822 growth -= edge->caller->global.insns;
824 if (dump_file)
826 fprintf (dump_file,
827 "\nConsidering %s with %i insns\n",
828 cgraph_node_name (edge->callee),
829 edge->callee->global.insns);
830 fprintf (dump_file,
831 " to be inlined into %s\n"
832 " Estimated growth after inlined into all callees is %+i insns.\n"
833 " Estimated badness is %i.\n",
834 cgraph_node_name (edge->caller),
835 cgraph_estimate_growth (edge->callee),
836 cgraph_edge_badness (edge));
837 if (edge->count)
838 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
840 gcc_assert (edge->aux);
841 edge->aux = NULL;
842 if (!edge->inline_failed)
843 continue;
845 /* When not having profile info ready we don't weight by any way the
846 position of call in procedure itself. This means if call of
847 function A from function B seems profitable to inline, the recursive
848 call of function A in inline copy of A in B will look profitable too
849 and we end up inlining until reaching maximal function growth. This
850 is not good idea so prohibit the recursive inlining.
852 ??? When the frequencies are taken into account we might not need this
853 restriction. */
854 if (!max_count)
856 where = edge->caller;
857 while (where->global.inlined_to)
859 if (where->decl == edge->callee->decl)
860 break;
861 where = where->callers->caller;
863 if (where->global.inlined_to)
865 edge->inline_failed
866 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
867 if (dump_file)
868 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
869 continue;
873 if (!cgraph_maybe_hot_edge_p (edge) && growth > 0)
875 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
876 &edge->inline_failed))
878 edge->inline_failed =
879 N_("call is unlikely");
880 if (dump_file)
881 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
883 continue;
885 if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
887 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
888 &edge->inline_failed))
890 if (dump_file)
891 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
893 continue;
895 if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
896 &edge->inline_failed))
898 where = edge->caller;
899 if (where->global.inlined_to)
900 where = where->global.inlined_to;
901 if (!cgraph_decide_recursive_inlining (where))
902 continue;
903 update_callee_keys (heap, where, updated_nodes);
905 else
907 struct cgraph_node *callee;
908 if (!cgraph_check_inline_limits (edge->caller, edge->callee,
909 &edge->inline_failed, true))
911 if (dump_file)
912 fprintf (dump_file, " Not inlining into %s:%s.\n",
913 cgraph_node_name (edge->caller), edge->inline_failed);
914 continue;
916 callee = edge->callee;
917 cgraph_mark_inline_edge (edge, true);
918 update_callee_keys (heap, callee, updated_nodes);
920 where = edge->caller;
921 if (where->global.inlined_to)
922 where = where->global.inlined_to;
924 /* Our profitability metric can depend on local properties
925 such as number of inlinable calls and size of the function body.
926 After inlining these properties might change for the function we
927 inlined into (since it's body size changed) and for the functions
928 called by function we inlined (since number of it inlinable callers
929 might change). */
930 update_caller_keys (heap, where, updated_nodes);
931 bitmap_clear (updated_nodes);
933 if (dump_file)
935 fprintf (dump_file,
936 " Inlined into %s which now has %i insns,"
937 "net change of %+i insns.\n",
938 cgraph_node_name (edge->caller),
939 edge->caller->global.insns,
940 overall_insns - old_insns);
942 if (min_insns > overall_insns)
944 min_insns = overall_insns;
945 max_insns = compute_max_insns (min_insns);
947 if (dump_file)
948 fprintf (dump_file, "New minimal insns reached: %i\n", min_insns);
951 while ((edge = fibheap_extract_min (heap)) != NULL)
953 gcc_assert (edge->aux);
954 edge->aux = NULL;
955 if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
956 && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
957 &edge->inline_failed))
958 edge->inline_failed = N_("--param inline-unit-growth limit reached");
960 fibheap_delete (heap);
961 BITMAP_FREE (updated_nodes);
964 /* Decide on the inlining. We do so in the topological order to avoid
965 expenses on updating data structures. */
967 static unsigned int
968 cgraph_decide_inlining (void)
970 struct cgraph_node *node;
971 int nnodes;
972 struct cgraph_node **order =
973 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
974 int old_insns = 0;
975 int i;
976 int initial_insns = 0;
978 max_count = 0;
979 for (node = cgraph_nodes; node; node = node->next)
980 if (node->analyzed && (node->needed || node->reachable))
982 struct cgraph_edge *e;
984 initial_insns += node->local.self_insns;
985 gcc_assert (node->local.self_insns == node->global.insns);
986 for (e = node->callees; e; e = e->next_callee)
987 if (max_count < e->count)
988 max_count = e->count;
990 overall_insns = initial_insns;
991 gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
993 nnodes = cgraph_postorder (order);
995 if (dump_file)
996 fprintf (dump_file,
997 "\nDeciding on inlining. Starting with %i insns.\n",
998 initial_insns);
1000 for (node = cgraph_nodes; node; node = node->next)
1001 node->aux = 0;
1003 if (dump_file)
1004 fprintf (dump_file, "\nInlining always_inline functions:\n");
1006 /* In the first pass mark all always_inline edges. Do this with a priority
1007 so none of our later choices will make this impossible. */
1008 for (i = nnodes - 1; i >= 0; i--)
1010 struct cgraph_edge *e, *next;
1012 node = order[i];
1014 /* Handle nodes to be flattened, but don't update overall unit size. */
1015 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
1017 if (dump_file)
1018 fprintf (dump_file,
1019 "Flattening %s\n", cgraph_node_name (node));
1020 cgraph_decide_inlining_incrementally (node, INLINE_ALL, 0);
1023 if (!node->local.disregard_inline_limits)
1024 continue;
1025 if (dump_file)
1026 fprintf (dump_file,
1027 "\nConsidering %s %i insns (always inline)\n",
1028 cgraph_node_name (node), node->global.insns);
1029 old_insns = overall_insns;
1030 for (e = node->callers; e; e = next)
1032 next = e->next_caller;
1033 if (!e->inline_failed)
1034 continue;
1035 if (cgraph_recursive_inlining_p (e->caller, e->callee,
1036 &e->inline_failed))
1037 continue;
1038 cgraph_mark_inline_edge (e, true);
1039 if (dump_file)
1040 fprintf (dump_file,
1041 " Inlined into %s which now has %i insns.\n",
1042 cgraph_node_name (e->caller),
1043 e->caller->global.insns);
1045 /* Inlining self recursive function might introduce new calls to
1046 themselves we didn't see in the loop above. Fill in the proper
1047 reason why inline failed. */
1048 for (e = node->callers; e; e = e->next_caller)
1049 if (e->inline_failed)
1050 e->inline_failed = N_("recursive inlining");
1051 if (dump_file)
1052 fprintf (dump_file,
1053 " Inlined for a net change of %+i insns.\n",
1054 overall_insns - old_insns);
1057 if (!flag_really_no_inline)
1058 cgraph_decide_inlining_of_small_functions ();
1060 if (!flag_really_no_inline
1061 && flag_inline_functions_called_once)
1063 if (dump_file)
1064 fprintf (dump_file, "\nDeciding on functions called once:\n");
1066 /* And finally decide what functions are called once. */
1068 for (i = nnodes - 1; i >= 0; i--)
1070 node = order[i];
1072 if (node->callers && !node->callers->next_caller && !node->needed
1073 && node->local.inlinable && node->callers->inline_failed
1074 && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1076 if (dump_file)
1078 fprintf (dump_file,
1079 "\nConsidering %s %i insns.\n",
1080 cgraph_node_name (node), node->global.insns);
1081 fprintf (dump_file,
1082 " Called once from %s %i insns.\n",
1083 cgraph_node_name (node->callers->caller),
1084 node->callers->caller->global.insns);
1087 old_insns = overall_insns;
1089 if (cgraph_check_inline_limits (node->callers->caller, node,
1090 NULL, false))
1092 cgraph_mark_inline (node->callers);
1093 if (dump_file)
1094 fprintf (dump_file,
1095 " Inlined into %s which now has %i insns"
1096 " for a net change of %+i insns.\n",
1097 cgraph_node_name (node->callers->caller),
1098 node->callers->caller->global.insns,
1099 overall_insns - old_insns);
1101 else
1103 if (dump_file)
1104 fprintf (dump_file,
1105 " Inline limit reached, not inlined.\n");
1111 if (dump_file)
1112 fprintf (dump_file,
1113 "\nInlined %i calls, eliminated %i functions, "
1114 "%i insns turned to %i insns.\n\n",
1115 ncalls_inlined, nfunctions_inlined, initial_insns,
1116 overall_insns);
1117 free (order);
1118 return 0;
1121 /* Try to inline edge E from incremental inliner. MODE specifies mode
1122 of inliner.
1124 We are detecting cycles by storing mode of inliner into cgraph_node last
1125 time we visited it in the recursion. In general when mode is set, we have
1126 recursive inlining, but as an special case, we want to try harder inline
1127 ALWAYS_INLINE functions: consider callgraph a->b->c->b, with a being
1128 flatten, b being always inline. Flattening 'a' will collapse
1129 a->b->c before hitting cycle. To accommodate always inline, we however
1130 need to inline a->b->c->b.
1132 So after hitting cycle first time, we switch into ALWAYS_INLINE mode and
1133 stop inlining only after hitting ALWAYS_INLINE in ALWAY_INLINE mode. */
1134 static bool
1135 try_inline (struct cgraph_edge *e, enum inlining_mode mode, int depth)
1137 struct cgraph_node *callee = e->callee;
1138 enum inlining_mode callee_mode = (size_t) callee->aux;
1139 bool always_inline = e->callee->local.disregard_inline_limits;
1141 /* We've hit cycle? */
1142 if (callee_mode)
1144 /* It is first time we see it and we are not in ALWAY_INLINE only
1145 mode yet. and the function in question is always_inline. */
1146 if (always_inline && mode != INLINE_ALWAYS_INLINE)
1147 mode = INLINE_ALWAYS_INLINE;
1148 /* Otherwise it is time to give up. */
1149 else
1151 if (dump_file)
1153 indent_to (dump_file, depth);
1154 fprintf (dump_file,
1155 "Not inlining %s into %s to avoid cycle.\n",
1156 cgraph_node_name (callee),
1157 cgraph_node_name (e->caller));
1159 e->inline_failed = (e->callee->local.disregard_inline_limits
1160 ? N_("recursive inlining") : "");
1161 return false;
1165 callee->aux = (void *)(size_t) mode;
1166 if (dump_file)
1168 indent_to (dump_file, depth);
1169 fprintf (dump_file, " Inlining %s into %s.\n",
1170 cgraph_node_name (e->callee),
1171 cgraph_node_name (e->caller));
1173 cgraph_mark_inline (e);
1175 /* In order to fully inline always_inline functions at -O0, we need to
1176 recurse here, since the inlined functions might not be processed by
1177 incremental inlining at all yet.
1179 Also flattening needs to be done recursively. */
1181 if (!flag_unit_at_a_time || mode == INLINE_ALL || always_inline)
1182 cgraph_decide_inlining_incrementally (e->callee, mode, depth + 1);
1183 callee->aux = (void *)(size_t) callee_mode;
1184 return true;
1187 /* Decide on the inlining. We do so in the topological order to avoid
1188 expenses on updating data structures.
1189 DEPTH is depth of recursion, used only for debug output. */
1191 static bool
1192 cgraph_decide_inlining_incrementally (struct cgraph_node *node, enum inlining_mode mode,
1193 int depth)
1195 struct cgraph_edge *e;
1196 bool inlined = false;
1197 const char *failed_reason;
1198 enum inlining_mode old_mode;
1200 #ifdef ENABLE_CHECKING
1201 verify_cgraph_node (node);
1202 #endif
1204 old_mode = (size_t)node->aux;
1206 if (mode != INLINE_ALWAYS_INLINE
1207 && lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
1209 if (dump_file)
1210 fprintf (dump_file, " Flattening %s\n", cgraph_node_name (node));
1211 mode = INLINE_ALL;
1214 node->aux = (void *)(size_t) mode;
1216 /* First of all look for always inline functions. */
1217 for (e = node->callees; e; e = e->next_callee)
1219 if (dump_file && e->callee->local.inlinable
1220 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1221 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl))))
1223 fprintf (dump_file, " Ignoring %s: SSA form not computed yet.\n",
1224 cgraph_node_name (e->callee));
1226 if ((e->callee->local.disregard_inline_limits
1227 || (mode == INLINE_ALL && e->callee->local.inlinable))
1228 && e->inline_failed
1229 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1230 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1231 && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1232 /* ??? It is possible that renaming variable removed the function body
1233 in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
1234 && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1236 inlined |= try_inline (e, mode, depth);
1240 /* Now do the automatic inlining. */
1241 if (!flag_really_no_inline && mode != INLINE_ALL
1242 && mode != INLINE_ALWAYS_INLINE)
1243 for (e = node->callees; e; e = e->next_callee)
1244 if (e->callee->local.inlinable
1245 && e->inline_failed
1246 && !e->callee->local.disregard_inline_limits
1247 && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1248 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1249 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1250 && (mode != INLINE_SIZE
1251 || (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
1252 <= e->caller->global.insns))
1253 && cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
1254 false)
1255 && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1257 if (cgraph_default_inline_p (e->callee, &failed_reason))
1258 inlined |= try_inline (e, mode, depth);
1259 else if (!flag_unit_at_a_time)
1260 e->inline_failed = failed_reason;
1262 node->aux = (void *)(size_t) old_mode;
1263 return inlined;
1266 /* When inlining shall be performed. */
1267 static bool
1268 cgraph_gate_inlining (void)
1270 return flag_inline_trees;
1273 struct tree_opt_pass pass_ipa_inline =
1275 "inline", /* name */
1276 cgraph_gate_inlining, /* gate */
1277 cgraph_decide_inlining, /* execute */
1278 NULL, /* sub */
1279 NULL, /* next */
1280 0, /* static_pass_number */
1281 TV_INLINE_HEURISTICS, /* tv_id */
1282 0, /* properties_required */
1283 PROP_cfg, /* properties_provided */
1284 0, /* properties_destroyed */
1285 TODO_remove_functions, /* todo_flags_finish */
1286 TODO_dump_cgraph | TODO_dump_func
1287 | TODO_remove_functions, /* todo_flags_finish */
1288 0 /* letter */
1291 /* Because inlining might remove no-longer reachable nodes, we need to
1292 keep the array visible to garbage collector to avoid reading collected
1293 out nodes. */
1294 static int nnodes;
1295 static GTY ((length ("nnodes"))) struct cgraph_node **order;
1297 /* Do inlining of small functions. Doing so early helps profiling and other
1298 passes to be somewhat more effective and avoids some code duplication in
1299 later real inlining pass for testcases with very many function calls. */
1300 static unsigned int
1301 cgraph_early_inlining (void)
1303 struct cgraph_node *node = cgraph_node (current_function_decl);
1304 unsigned int todo = 0;
1306 if (sorrycount || errorcount)
1307 return 0;
1308 if (cgraph_decide_inlining_incrementally (node,
1309 flag_unit_at_a_time
1310 ? INLINE_SIZE : INLINE_SPEED, 0))
1312 timevar_push (TV_INTEGRATION);
1313 todo = optimize_inline_calls (current_function_decl);
1314 timevar_pop (TV_INTEGRATION);
1316 return todo;
1319 /* When inlining shall be performed. */
1320 static bool
1321 cgraph_gate_early_inlining (void)
1323 return flag_inline_trees && flag_early_inlining;
1326 struct tree_opt_pass pass_early_inline =
1328 "einline", /* name */
1329 cgraph_gate_early_inlining, /* gate */
1330 cgraph_early_inlining, /* execute */
1331 NULL, /* sub */
1332 NULL, /* next */
1333 0, /* static_pass_number */
1334 TV_INLINE_HEURISTICS, /* tv_id */
1335 0, /* properties_required */
1336 PROP_cfg, /* properties_provided */
1337 0, /* properties_destroyed */
1338 0, /* todo_flags_start */
1339 TODO_dump_func, /* todo_flags_finish */
1340 0 /* letter */
1343 /* When inlining shall be performed. */
1344 static bool
1345 cgraph_gate_ipa_early_inlining (void)
1347 return (flag_inline_trees && flag_early_inlining
1348 && (flag_branch_probabilities || flag_test_coverage
1349 || profile_arc_flag));
1352 /* IPA pass wrapper for early inlining pass. We need to run early inlining
1353 before tree profiling so we have stand alone IPA pass for doing so. */
1354 struct tree_opt_pass pass_ipa_early_inline =
1356 "einline_ipa", /* name */
1357 cgraph_gate_ipa_early_inlining, /* gate */
1358 NULL, /* execute */
1359 NULL, /* sub */
1360 NULL, /* next */
1361 0, /* static_pass_number */
1362 TV_INLINE_HEURISTICS, /* tv_id */
1363 0, /* properties_required */
1364 PROP_cfg, /* properties_provided */
1365 0, /* properties_destroyed */
1366 0, /* todo_flags_start */
1367 TODO_dump_cgraph, /* todo_flags_finish */
1368 0 /* letter */
1371 /* Compute parameters of functions used by inliner. */
1372 static unsigned int
1373 compute_inline_parameters (void)
1375 struct cgraph_node *node = cgraph_node (current_function_decl);
1377 gcc_assert (!node->global.inlined_to);
1378 node->local.estimated_self_stack_size = estimated_stack_frame_size ();
1379 node->global.estimated_stack_size = node->local.estimated_self_stack_size;
1380 node->global.stack_frame_offset = 0;
1381 node->local.inlinable = tree_inlinable_function_p (current_function_decl);
1382 node->local.self_insns = estimate_num_insns (current_function_decl,
1383 &eni_inlining_weights);
1384 if (node->local.inlinable)
1385 node->local.disregard_inline_limits
1386 = lang_hooks.tree_inlining.disregard_inline_limits (current_function_decl);
1387 if (flag_really_no_inline && !node->local.disregard_inline_limits)
1388 node->local.inlinable = 0;
1389 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
1390 node->global.insns = node->local.self_insns;
1391 return 0;
1394 /* When inlining shall be performed. */
1395 static bool
1396 gate_inline_passes (void)
1398 return flag_inline_trees;
1401 struct tree_opt_pass pass_inline_parameters =
1403 NULL, /* name */
1404 gate_inline_passes, /* gate */
1405 compute_inline_parameters, /* execute */
1406 NULL, /* sub */
1407 NULL, /* next */
1408 0, /* static_pass_number */
1409 TV_INLINE_HEURISTICS, /* tv_id */
1410 0, /* properties_required */
1411 PROP_cfg, /* properties_provided */
1412 0, /* properties_destroyed */
1413 0, /* todo_flags_start */
1414 0, /* todo_flags_finish */
1415 0 /* letter */
1418 /* Apply inline plan to the function. */
1419 static unsigned int
1420 apply_inline (void)
1422 unsigned int todo = 0;
1423 struct cgraph_edge *e;
1424 struct cgraph_node *node = cgraph_node (current_function_decl);
1426 /* Even when not optimizing, ensure that always_inline functions get inlined.
1428 if (!optimize)
1429 cgraph_decide_inlining_incrementally (node, INLINE_SPEED, 0);
1431 /* We might need the body of this function so that we can expand
1432 it inline somewhere else. */
1433 if (cgraph_preserve_function_body_p (current_function_decl))
1434 save_inline_function_body (node);
1436 for (e = node->callees; e; e = e->next_callee)
1437 if (!e->inline_failed || warn_inline)
1438 break;
1439 if (e)
1441 timevar_push (TV_INTEGRATION);
1442 todo = optimize_inline_calls (current_function_decl);
1443 timevar_pop (TV_INTEGRATION);
1445 /* In non-unit-at-a-time we must mark all referenced functions as needed. */
1446 if (!flag_unit_at_a_time)
1448 struct cgraph_edge *e;
1449 for (e = node->callees; e; e = e->next_callee)
1450 if (e->callee->analyzed)
1451 cgraph_mark_needed_node (e->callee);
1453 return todo | execute_fixup_cfg ();
1456 struct tree_opt_pass pass_apply_inline =
1458 "apply_inline", /* name */
1459 NULL, /* gate */
1460 apply_inline, /* execute */
1461 NULL, /* sub */
1462 NULL, /* next */
1463 0, /* static_pass_number */
1464 TV_INLINE_HEURISTICS, /* tv_id */
1465 0, /* properties_required */
1466 PROP_cfg, /* properties_provided */
1467 0, /* properties_destroyed */
1468 0, /* todo_flags_start */
1469 TODO_dump_func | TODO_verify_flow
1470 | TODO_verify_stmts, /* todo_flags_finish */
1471 0 /* letter */
1474 #include "gt-ipa-inline.h"