1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
29 There are three major parts of this file:
31 cgraph_mark_inline implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode.
66 The inliner itself is split into several passes:
68 pass_inline_parameters
70 This pass computes local properties of functions that are used by inliner:
71 estimated function body size, whether function is inlinable at all and
72 stack frame consumption.
74 Before executing any of inliner passes, this local pass has to be applied
75 to each function in the callgraph (ie run as subpass of some earlier
76 IPA pass). The results are made out of date by any optimization applied
81 Simple local inlining pass inlining callees into current function. This
82 pass makes no global whole compilation unit analysis and this when allowed
83 to do inlining expanding code size it might result in unbounded growth of
86 This is the main inlining pass in non-unit-at-a-time.
88 With unit-at-a-time the pass is run during conversion into SSA form.
89 Only functions already converted into SSA form are inlined, so the
90 conversion must happen in topological order on the callgraph (that is
91 maintained by pass manager). The functions after inlining are early
92 optimized so the early inliner sees unoptimized function itself, but
93 all considered callees are already optimized allowing it to unfold
94 abstraction penalty on C++ effectivly and cheaply.
96 pass_ipa_early_inlining
98 With profiling, the early inlining is also neccesary to reduce
99 instrumentation costs on program with high abstraction penalty (doing
100 many redundant calls). This can't happen in parallel with early
101 optimization and profile instrumentation, because we would end up
102 re-instrumenting already instrumented function bodies we brought in via
105 To avoid this, this pass is executed as IPA pass before profiling. It is
106 simple wrapper to pass_early_inlining and ensures first inlining.
110 This is the main pass implementing simple greedy algorithm to do inlining
111 of small functions that results in overall growth of compilation unit and
112 inlining of functions called once. The pass compute just so called inline
113 plan (representation of inlining to be done in callgraph) and unlike early
114 inlining it is not performing the inlining itself.
118 This pass performs actual inlining according to pass_ipa_inline on given
119 function. Possible the function body before inlining is saved when it is
120 needed for further inlining later.
125 #include "coretypes.h"
128 #include "tree-inline.h"
129 #include "langhooks.h"
132 #include "diagnostic.h"
137 #include "tree-pass.h"
139 #include "coverage.h"
141 #include "tree-flow.h"
143 /* Mode incremental inliner operate on:
145 In ALWAYS_INLINE only functions marked
146 always_inline are inlined. This mode is used after detecting cycle during
149 In SIZE mode, only functions that reduce function body size after inlining
150 are inlined, this is used during early inlining.
152 In SPEED mode, all small functions are inlined. This might result in
153 unbounded growth of compilation unit and is used only in non-unit-at-a-time
156 in ALL mode, everything is inlined. This is used during flattening. */
159 INLINE_ALWAYS_INLINE
,
165 cgraph_decide_inlining_incrementally (struct cgraph_node
*, enum inlining_mode
,
169 /* Statistics we collect about inlining algorithm. */
170 static int ncalls_inlined
;
171 static int nfunctions_inlined
;
172 static int overall_insns
;
173 static gcov_type max_count
;
175 /* Estimate size of the function after inlining WHAT into TO. */
178 cgraph_estimate_size_after_inlining (int times
, struct cgraph_node
*to
,
179 struct cgraph_node
*what
)
182 tree fndecl
= what
->decl
, arg
;
183 int call_insns
= PARAM_VALUE (PARAM_INLINE_CALL_COST
);
185 for (arg
= DECL_ARGUMENTS (fndecl
); arg
; arg
= TREE_CHAIN (arg
))
186 call_insns
+= estimate_move_cost (TREE_TYPE (arg
));
187 size
= (what
->global
.insns
- call_insns
) * times
+ to
->global
.insns
;
188 gcc_assert (size
>= 0);
192 /* E is expected to be an edge being inlined. Clone destination node of
193 the edge and redirect it to the new clone.
194 DUPLICATE is used for bookkeeping on whether we are actually creating new
195 clones or re-using node originally representing out-of-line function call.
198 cgraph_clone_inlined_nodes (struct cgraph_edge
*e
, bool duplicate
, bool update_original
)
203 /* We may eliminate the need for out-of-line copy to be output.
204 In that case just go ahead and re-use it. */
205 if (!e
->callee
->callers
->next_caller
206 && !e
->callee
->needed
207 && flag_unit_at_a_time
)
209 gcc_assert (!e
->callee
->global
.inlined_to
);
210 if (DECL_SAVED_TREE (e
->callee
->decl
))
211 overall_insns
-= e
->callee
->global
.insns
, nfunctions_inlined
++;
216 struct cgraph_node
*n
;
217 n
= cgraph_clone_node (e
->callee
, e
->count
, e
->loop_nest
,
219 cgraph_redirect_edge_callee (e
, n
);
223 if (e
->caller
->global
.inlined_to
)
224 e
->callee
->global
.inlined_to
= e
->caller
->global
.inlined_to
;
226 e
->callee
->global
.inlined_to
= e
->caller
;
227 e
->callee
->global
.stack_frame_offset
228 = e
->caller
->global
.stack_frame_offset
+ e
->caller
->local
.estimated_self_stack_size
;
229 peak
= e
->callee
->global
.stack_frame_offset
+ e
->callee
->local
.estimated_self_stack_size
;
230 if (e
->callee
->global
.inlined_to
->global
.estimated_stack_size
< peak
)
231 e
->callee
->global
.inlined_to
->global
.estimated_stack_size
= peak
;
233 /* Recursively clone all bodies. */
234 for (e
= e
->callee
->callees
; e
; e
= e
->next_callee
)
235 if (!e
->inline_failed
)
236 cgraph_clone_inlined_nodes (e
, duplicate
, update_original
);
239 /* Mark edge E as inlined and update callgraph accordingly.
240 UPDATE_ORIGINAL specify whether profile of original function should be
244 cgraph_mark_inline_edge (struct cgraph_edge
*e
, bool update_original
)
246 int old_insns
= 0, new_insns
= 0;
247 struct cgraph_node
*to
= NULL
, *what
;
249 if (e
->callee
->inline_decl
)
250 cgraph_redirect_edge_callee (e
, cgraph_node (e
->callee
->inline_decl
));
252 gcc_assert (e
->inline_failed
);
253 e
->inline_failed
= NULL
;
255 if (!e
->callee
->global
.inlined
&& flag_unit_at_a_time
)
256 DECL_POSSIBLY_INLINED (e
->callee
->decl
) = true;
257 e
->callee
->global
.inlined
= true;
259 cgraph_clone_inlined_nodes (e
, true, update_original
);
263 /* Now update size of caller and all functions caller is inlined into. */
264 for (;e
&& !e
->inline_failed
; e
= e
->caller
->callers
)
266 old_insns
= e
->caller
->global
.insns
;
267 new_insns
= cgraph_estimate_size_after_inlining (1, e
->caller
,
269 gcc_assert (new_insns
>= 0);
271 to
->global
.insns
= new_insns
;
273 gcc_assert (what
->global
.inlined_to
== to
);
274 if (new_insns
> old_insns
)
275 overall_insns
+= new_insns
- old_insns
;
279 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
280 Return following unredirected edge in the list of callers
283 static struct cgraph_edge
*
284 cgraph_mark_inline (struct cgraph_edge
*edge
)
286 struct cgraph_node
*to
= edge
->caller
;
287 struct cgraph_node
*what
= edge
->callee
;
288 struct cgraph_edge
*e
, *next
;
291 /* Look for all calls, mark them inline and clone recursively
292 all inlined functions. */
293 for (e
= what
->callers
; e
; e
= next
)
295 next
= e
->next_caller
;
296 if (e
->caller
== to
&& e
->inline_failed
)
298 cgraph_mark_inline_edge (e
, true);
308 /* Estimate the growth caused by inlining NODE into all callees. */
311 cgraph_estimate_growth (struct cgraph_node
*node
)
314 struct cgraph_edge
*e
;
315 if (node
->global
.estimated_growth
!= INT_MIN
)
316 return node
->global
.estimated_growth
;
318 for (e
= node
->callers
; e
; e
= e
->next_caller
)
319 if (e
->inline_failed
)
320 growth
+= (cgraph_estimate_size_after_inlining (1, e
->caller
, node
)
321 - e
->caller
->global
.insns
);
323 /* ??? Wrong for self recursive functions or cases where we decide to not
324 inline for different reasons, but it is not big deal as in that case
325 we will keep the body around, but we will also avoid some inlining. */
326 if (!node
->needed
&& !DECL_EXTERNAL (node
->decl
))
327 growth
-= node
->global
.insns
;
329 node
->global
.estimated_growth
= growth
;
333 /* Return false when inlining WHAT into TO is not good idea
334 as it would cause too large growth of function bodies.
335 When ONE_ONLY is true, assume that only one call site is going
336 to be inlined, otherwise figure out how many call sites in
337 TO calls WHAT and verify that all can be inlined.
341 cgraph_check_inline_limits (struct cgraph_node
*to
, struct cgraph_node
*what
,
342 const char **reason
, bool one_only
)
345 struct cgraph_edge
*e
;
348 HOST_WIDE_INT stack_size_limit
, inlined_stack
;
353 for (e
= to
->callees
; e
; e
= e
->next_callee
)
354 if (e
->callee
== what
)
357 if (to
->global
.inlined_to
)
358 to
= to
->global
.inlined_to
;
360 /* When inlining large function body called once into small function,
361 take the inlined function as base for limiting the growth. */
362 if (to
->local
.self_insns
> what
->local
.self_insns
)
363 limit
= to
->local
.self_insns
;
365 limit
= what
->local
.self_insns
;
367 limit
+= limit
* PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH
) / 100;
369 /* Check the size after inlining against the function limits. But allow
370 the function to shrink if it went over the limits by forced inlining. */
371 newsize
= cgraph_estimate_size_after_inlining (times
, to
, what
);
372 if (newsize
>= to
->global
.insns
373 && newsize
> PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS
)
377 *reason
= N_("--param large-function-growth limit reached");
381 stack_size_limit
= to
->local
.estimated_self_stack_size
;
383 stack_size_limit
+= stack_size_limit
* PARAM_VALUE (PARAM_STACK_FRAME_GROWTH
) / 100;
385 inlined_stack
= (to
->global
.stack_frame_offset
386 + to
->local
.estimated_self_stack_size
387 + what
->global
.estimated_stack_size
);
388 if (inlined_stack
> stack_size_limit
389 && inlined_stack
> PARAM_VALUE (PARAM_LARGE_STACK_FRAME
))
392 *reason
= N_("--param large-stack-frame-growth limit reached");
398 /* Return true when function N is small enough to be inlined. */
401 cgraph_default_inline_p (struct cgraph_node
*n
, const char **reason
)
406 decl
= n
->inline_decl
;
407 if (!DECL_INLINE (decl
))
410 *reason
= N_("function not inlinable");
414 if (!DECL_STRUCT_FUNCTION (decl
)->cfg
)
417 *reason
= N_("function body not available");
421 if (DECL_DECLARED_INLINE_P (decl
))
423 if (n
->global
.insns
>= MAX_INLINE_INSNS_SINGLE
)
426 *reason
= N_("--param max-inline-insns-single limit reached");
432 if (n
->global
.insns
>= MAX_INLINE_INSNS_AUTO
)
435 *reason
= N_("--param max-inline-insns-auto limit reached");
443 /* Return true when inlining WHAT would create recursive inlining.
444 We call recursive inlining all cases where same function appears more than
445 once in the single recursion nest path in the inline graph. */
448 cgraph_recursive_inlining_p (struct cgraph_node
*to
,
449 struct cgraph_node
*what
,
453 if (to
->global
.inlined_to
)
454 recursive
= what
->decl
== to
->global
.inlined_to
->decl
;
456 recursive
= what
->decl
== to
->decl
;
457 /* Marking recursive function inline has sane semantic and thus we should
459 if (recursive
&& reason
)
460 *reason
= (what
->local
.disregard_inline_limits
461 ? N_("recursive inlining") : "");
465 /* Return true if the call can be hot. */
467 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
469 if (profile_info
&& flag_branch_probabilities
471 <= profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
476 /* A cost model driving the inlining heuristics in a way so the edges with
477 smallest badness are inlined first. After each inlining is performed
478 the costs of all caller edges of nodes affected are recomputed so the
479 metrics may accurately depend on values such as number of inlinable callers
480 of the function or function body size.
482 With profiling we use number of executions of each edge to drive the cost.
483 We also should distinguish hot and cold calls where the cold calls are
484 inlined into only when code size is overall improved.
488 cgraph_edge_badness (struct cgraph_edge
*edge
)
493 cgraph_estimate_size_after_inlining (1, edge
->caller
, edge
->callee
);
494 growth
-= edge
->caller
->global
.insns
;
496 /* Always prefer inlining saving code size. */
498 return INT_MIN
- growth
;
499 return ((int)((double)edge
->count
* INT_MIN
/ max_count
)) / growth
;
503 int nest
= MIN (edge
->loop_nest
, 8);
504 int badness
= cgraph_estimate_growth (edge
->callee
) * 256;
506 /* Decrease badness if call is nested. */
512 /* Make recursive inlining happen always after other inlining is done. */
513 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
, NULL
))
520 /* Recompute heap nodes for each of caller edge. */
523 update_caller_keys (fibheap_t heap
, struct cgraph_node
*node
,
524 bitmap updated_nodes
)
526 struct cgraph_edge
*edge
;
527 const char *failed_reason
;
529 if (!node
->local
.inlinable
|| node
->local
.disregard_inline_limits
530 || node
->global
.inlined_to
)
532 if (bitmap_bit_p (updated_nodes
, node
->uid
))
534 bitmap_set_bit (updated_nodes
, node
->uid
);
535 node
->global
.estimated_growth
= INT_MIN
;
537 if (!node
->local
.inlinable
)
539 /* Prune out edges we won't inline into anymore. */
540 if (!cgraph_default_inline_p (node
, &failed_reason
))
542 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
545 fibheap_delete_node (heap
, edge
->aux
);
547 if (edge
->inline_failed
)
548 edge
->inline_failed
= failed_reason
;
553 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
554 if (edge
->inline_failed
)
556 int badness
= cgraph_edge_badness (edge
);
559 fibnode_t n
= edge
->aux
;
560 gcc_assert (n
->data
== edge
);
561 if (n
->key
== badness
)
564 /* fibheap_replace_key only increase the keys. */
565 if (fibheap_replace_key (heap
, n
, badness
))
567 fibheap_delete_node (heap
, edge
->aux
);
569 edge
->aux
= fibheap_insert (heap
, badness
, edge
);
573 /* Recompute heap nodes for each of caller edges of each of callees. */
576 update_callee_keys (fibheap_t heap
, struct cgraph_node
*node
,
577 bitmap updated_nodes
)
579 struct cgraph_edge
*e
;
580 node
->global
.estimated_growth
= INT_MIN
;
582 for (e
= node
->callees
; e
; e
= e
->next_callee
)
583 if (e
->inline_failed
)
584 update_caller_keys (heap
, e
->callee
, updated_nodes
);
585 else if (!e
->inline_failed
)
586 update_callee_keys (heap
, e
->callee
, updated_nodes
);
589 /* Enqueue all recursive calls from NODE into priority queue depending on
590 how likely we want to recursively inline the call. */
593 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
597 struct cgraph_edge
*e
;
598 for (e
= where
->callees
; e
; e
= e
->next_callee
)
599 if (e
->callee
== node
)
601 /* When profile feedback is available, prioritize by expected number
602 of calls. Without profile feedback we maintain simple queue
603 to order candidates via recursive depths. */
604 fibheap_insert (heap
,
605 !max_count
? priority
++
606 : -(e
->count
/ ((max_count
+ (1<<24) - 1) / (1<<24))),
609 for (e
= where
->callees
; e
; e
= e
->next_callee
)
610 if (!e
->inline_failed
)
611 lookup_recursive_calls (node
, e
->callee
, heap
);
614 /* Decide on recursive inlining: in the case function has recursive calls,
615 inline until body size reaches given argument. */
618 cgraph_decide_recursive_inlining (struct cgraph_node
*node
)
620 int limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO
);
621 int max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO
);
622 int probability
= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY
);
624 struct cgraph_edge
*e
;
625 struct cgraph_node
*master_clone
, *next
;
629 if (DECL_DECLARED_INLINE_P (node
->decl
))
631 limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE
);
632 max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH
);
635 /* Make sure that function is small enough to be considered for inlining. */
637 || cgraph_estimate_size_after_inlining (1, node
, node
) >= limit
)
639 heap
= fibheap_new ();
640 lookup_recursive_calls (node
, node
, heap
);
641 if (fibheap_empty (heap
))
643 fibheap_delete (heap
);
649 " Performing recursive inlining on %s\n",
650 cgraph_node_name (node
));
652 /* We need original clone to copy around. */
653 master_clone
= cgraph_clone_node (node
, node
->count
, 1, false);
654 master_clone
->needed
= true;
655 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
656 if (!e
->inline_failed
)
657 cgraph_clone_inlined_nodes (e
, true, false);
659 /* Do the inlining and update list of recursive call during process. */
660 while (!fibheap_empty (heap
)
661 && (cgraph_estimate_size_after_inlining (1, node
, master_clone
)
664 struct cgraph_edge
*curr
= fibheap_extract_min (heap
);
665 struct cgraph_node
*cnode
;
668 for (cnode
= curr
->caller
;
669 cnode
->global
.inlined_to
; cnode
= cnode
->callers
->caller
)
670 if (node
->decl
== curr
->callee
->decl
)
672 if (depth
> max_depth
)
676 " maxmal depth reached\n");
682 if (!cgraph_maybe_hot_edge_p (curr
))
685 fprintf (dump_file
, " Not inlining cold call\n");
688 if (curr
->count
* 100 / node
->count
< probability
)
692 " Probability of edge is too small\n");
700 " Inlining call of depth %i", depth
);
703 fprintf (dump_file
, " called approx. %.2f times per call",
704 (double)curr
->count
/ node
->count
);
706 fprintf (dump_file
, "\n");
708 cgraph_redirect_edge_callee (curr
, master_clone
);
709 cgraph_mark_inline_edge (curr
, false);
710 lookup_recursive_calls (node
, curr
->callee
, heap
);
713 if (!fibheap_empty (heap
) && dump_file
)
714 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
716 fibheap_delete (heap
);
719 "\n Inlined %i times, body grown from %i to %i insns\n", n
,
720 master_clone
->global
.insns
, node
->global
.insns
);
722 /* Remove master clone we used for inlining. We rely that clones inlined
723 into master clone gets queued just before master clone so we don't
725 for (node
= cgraph_nodes
; node
!= master_clone
;
729 if (node
->global
.inlined_to
== master_clone
)
730 cgraph_remove_node (node
);
732 cgraph_remove_node (master_clone
);
733 /* FIXME: Recursive inlining actually reduces number of calls of the
734 function. At this place we should probably walk the function and
735 inline clones and compensate the counts accordingly. This probably
736 doesn't matter much in practice. */
740 /* Set inline_failed for all callers of given function to REASON. */
743 cgraph_set_inline_failed (struct cgraph_node
*node
, const char *reason
)
745 struct cgraph_edge
*e
;
748 fprintf (dump_file
, "Inlining failed: %s\n", reason
);
749 for (e
= node
->callers
; e
; e
= e
->next_caller
)
750 if (e
->inline_failed
)
751 e
->inline_failed
= reason
;
754 /* Given whole compilation unit esitmate of INSNS, compute how large we can
755 allow the unit to grow. */
757 compute_max_insns (int insns
)
759 int max_insns
= insns
;
760 if (max_insns
< PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
))
761 max_insns
= PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
);
763 return ((HOST_WIDEST_INT
) max_insns
764 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH
)) / 100);
767 /* We use greedy algorithm for inlining of small functions:
768 All inline candidates are put into prioritized heap based on estimated
769 growth of the overall number of instructions and then update the estimates.
771 INLINED and INLINED_CALEES are just pointers to arrays large enough
772 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
775 cgraph_decide_inlining_of_small_functions (void)
777 struct cgraph_node
*node
;
778 struct cgraph_edge
*edge
;
779 const char *failed_reason
;
780 fibheap_t heap
= fibheap_new ();
781 bitmap updated_nodes
= BITMAP_ALLOC (NULL
);
782 int min_insns
, max_insns
;
785 fprintf (dump_file
, "\nDeciding on smaller functions:\n");
787 /* Put all inline candidates into the heap. */
789 for (node
= cgraph_nodes
; node
; node
= node
->next
)
791 if (!node
->local
.inlinable
|| !node
->callers
792 || node
->local
.disregard_inline_limits
)
795 fprintf (dump_file
, "Considering inline candidate %s.\n", cgraph_node_name (node
));
797 node
->global
.estimated_growth
= INT_MIN
;
798 if (!cgraph_default_inline_p (node
, &failed_reason
))
800 cgraph_set_inline_failed (node
, failed_reason
);
804 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
805 if (edge
->inline_failed
)
807 gcc_assert (!edge
->aux
);
808 edge
->aux
= fibheap_insert (heap
, cgraph_edge_badness (edge
), edge
);
812 max_insns
= compute_max_insns (overall_insns
);
813 min_insns
= overall_insns
;
815 while (overall_insns
<= max_insns
&& (edge
= fibheap_extract_min (heap
)))
817 int old_insns
= overall_insns
;
818 struct cgraph_node
*where
;
820 cgraph_estimate_size_after_inlining (1, edge
->caller
, edge
->callee
);
822 growth
-= edge
->caller
->global
.insns
;
827 "\nConsidering %s with %i insns\n",
828 cgraph_node_name (edge
->callee
),
829 edge
->callee
->global
.insns
);
831 " to be inlined into %s\n"
832 " Estimated growth after inlined into all callees is %+i insns.\n"
833 " Estimated badness is %i.\n",
834 cgraph_node_name (edge
->caller
),
835 cgraph_estimate_growth (edge
->callee
),
836 cgraph_edge_badness (edge
));
838 fprintf (dump_file
," Called "HOST_WIDEST_INT_PRINT_DEC
"x\n", edge
->count
);
840 gcc_assert (edge
->aux
);
842 if (!edge
->inline_failed
)
845 /* When not having profile info ready we don't weight by any way the
846 position of call in procedure itself. This means if call of
847 function A from function B seems profitable to inline, the recursive
848 call of function A in inline copy of A in B will look profitable too
849 and we end up inlining until reaching maximal function growth. This
850 is not good idea so prohibit the recursive inlining.
852 ??? When the frequencies are taken into account we might not need this
856 where
= edge
->caller
;
857 while (where
->global
.inlined_to
)
859 if (where
->decl
== edge
->callee
->decl
)
861 where
= where
->callers
->caller
;
863 if (where
->global
.inlined_to
)
866 = (edge
->callee
->local
.disregard_inline_limits
? N_("recursive inlining") : "");
868 fprintf (dump_file
, " inline_failed:Recursive inlining performed only for function itself.\n");
873 if (!cgraph_maybe_hot_edge_p (edge
) && growth
> 0)
875 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
876 &edge
->inline_failed
))
878 edge
->inline_failed
=
879 N_("call is unlikely");
881 fprintf (dump_file
, " inline_failed:%s.\n", edge
->inline_failed
);
885 if (!cgraph_default_inline_p (edge
->callee
, &edge
->inline_failed
))
887 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
888 &edge
->inline_failed
))
891 fprintf (dump_file
, " inline_failed:%s.\n", edge
->inline_failed
);
895 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
896 &edge
->inline_failed
))
898 where
= edge
->caller
;
899 if (where
->global
.inlined_to
)
900 where
= where
->global
.inlined_to
;
901 if (!cgraph_decide_recursive_inlining (where
))
903 update_callee_keys (heap
, where
, updated_nodes
);
907 struct cgraph_node
*callee
;
908 if (!cgraph_check_inline_limits (edge
->caller
, edge
->callee
,
909 &edge
->inline_failed
, true))
912 fprintf (dump_file
, " Not inlining into %s:%s.\n",
913 cgraph_node_name (edge
->caller
), edge
->inline_failed
);
916 callee
= edge
->callee
;
917 cgraph_mark_inline_edge (edge
, true);
918 update_callee_keys (heap
, callee
, updated_nodes
);
920 where
= edge
->caller
;
921 if (where
->global
.inlined_to
)
922 where
= where
->global
.inlined_to
;
924 /* Our profitability metric can depend on local properties
925 such as number of inlinable calls and size of the function body.
926 After inlining these properties might change for the function we
927 inlined into (since it's body size changed) and for the functions
928 called by function we inlined (since number of it inlinable callers
930 update_caller_keys (heap
, where
, updated_nodes
);
931 bitmap_clear (updated_nodes
);
936 " Inlined into %s which now has %i insns,"
937 "net change of %+i insns.\n",
938 cgraph_node_name (edge
->caller
),
939 edge
->caller
->global
.insns
,
940 overall_insns
- old_insns
);
942 if (min_insns
> overall_insns
)
944 min_insns
= overall_insns
;
945 max_insns
= compute_max_insns (min_insns
);
948 fprintf (dump_file
, "New minimal insns reached: %i\n", min_insns
);
951 while ((edge
= fibheap_extract_min (heap
)) != NULL
)
953 gcc_assert (edge
->aux
);
955 if (!edge
->callee
->local
.disregard_inline_limits
&& edge
->inline_failed
956 && !cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
957 &edge
->inline_failed
))
958 edge
->inline_failed
= N_("--param inline-unit-growth limit reached");
960 fibheap_delete (heap
);
961 BITMAP_FREE (updated_nodes
);
964 /* Decide on the inlining. We do so in the topological order to avoid
965 expenses on updating data structures. */
968 cgraph_decide_inlining (void)
970 struct cgraph_node
*node
;
972 struct cgraph_node
**order
=
973 XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
976 int initial_insns
= 0;
979 for (node
= cgraph_nodes
; node
; node
= node
->next
)
980 if (node
->analyzed
&& (node
->needed
|| node
->reachable
))
982 struct cgraph_edge
*e
;
984 initial_insns
+= node
->local
.self_insns
;
985 gcc_assert (node
->local
.self_insns
== node
->global
.insns
);
986 for (e
= node
->callees
; e
; e
= e
->next_callee
)
987 if (max_count
< e
->count
)
988 max_count
= e
->count
;
990 overall_insns
= initial_insns
;
991 gcc_assert (!max_count
|| (profile_info
&& flag_branch_probabilities
));
993 nnodes
= cgraph_postorder (order
);
997 "\nDeciding on inlining. Starting with %i insns.\n",
1000 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1004 fprintf (dump_file
, "\nInlining always_inline functions:\n");
1006 /* In the first pass mark all always_inline edges. Do this with a priority
1007 so none of our later choices will make this impossible. */
1008 for (i
= nnodes
- 1; i
>= 0; i
--)
1010 struct cgraph_edge
*e
, *next
;
1014 /* Handle nodes to be flattened, but don't update overall unit size. */
1015 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) != NULL
)
1019 "Flattening %s\n", cgraph_node_name (node
));
1020 cgraph_decide_inlining_incrementally (node
, INLINE_ALL
, 0);
1023 if (!node
->local
.disregard_inline_limits
)
1027 "\nConsidering %s %i insns (always inline)\n",
1028 cgraph_node_name (node
), node
->global
.insns
);
1029 old_insns
= overall_insns
;
1030 for (e
= node
->callers
; e
; e
= next
)
1032 next
= e
->next_caller
;
1033 if (!e
->inline_failed
)
1035 if (cgraph_recursive_inlining_p (e
->caller
, e
->callee
,
1038 cgraph_mark_inline_edge (e
, true);
1041 " Inlined into %s which now has %i insns.\n",
1042 cgraph_node_name (e
->caller
),
1043 e
->caller
->global
.insns
);
1045 /* Inlining self recursive function might introduce new calls to
1046 thsemselves we didn't see in the loop above. Fill in the proper
1047 reason why inline failed. */
1048 for (e
= node
->callers
; e
; e
= e
->next_caller
)
1049 if (e
->inline_failed
)
1050 e
->inline_failed
= N_("recursive inlining");
1053 " Inlined for a net change of %+i insns.\n",
1054 overall_insns
- old_insns
);
1057 if (!flag_really_no_inline
)
1058 cgraph_decide_inlining_of_small_functions ();
1060 if (!flag_really_no_inline
1061 && flag_inline_functions_called_once
)
1064 fprintf (dump_file
, "\nDeciding on functions called once:\n");
1066 /* And finally decide what functions are called once. */
1068 for (i
= nnodes
- 1; i
>= 0; i
--)
1072 if (node
->callers
&& !node
->callers
->next_caller
&& !node
->needed
1073 && node
->local
.inlinable
&& node
->callers
->inline_failed
1074 && !DECL_EXTERNAL (node
->decl
) && !DECL_COMDAT (node
->decl
))
1079 "\nConsidering %s %i insns.\n",
1080 cgraph_node_name (node
), node
->global
.insns
);
1082 " Called once from %s %i insns.\n",
1083 cgraph_node_name (node
->callers
->caller
),
1084 node
->callers
->caller
->global
.insns
);
1087 old_insns
= overall_insns
;
1089 if (cgraph_check_inline_limits (node
->callers
->caller
, node
,
1092 cgraph_mark_inline (node
->callers
);
1095 " Inlined into %s which now has %i insns"
1096 " for a net change of %+i insns.\n",
1097 cgraph_node_name (node
->callers
->caller
),
1098 node
->callers
->caller
->global
.insns
,
1099 overall_insns
- old_insns
);
1105 " Inline limit reached, not inlined.\n");
1113 "\nInlined %i calls, eliminated %i functions, "
1114 "%i insns turned to %i insns.\n\n",
1115 ncalls_inlined
, nfunctions_inlined
, initial_insns
,
1121 /* Try to inline edge E from incremental inliner. MODE specifies mode
1124 We are detecting cycles by storing mode of inliner into cgraph_node last
1125 time we visited it in the recursion. In general when mode is set, we have
1126 recursive inlining, but as an special case, we want to try harder inline
1127 ALWAYS_INLINE functions: consider callgraph a->b->c->b, with a being
1128 flatten, b being always inline. Flattening 'a' will collapse
1129 a->b->c before hitting cycle. To accomondate always inline, we however
1130 need to inline a->b->c->b.
1132 So after hitting cycle first time, we switch into ALWAYS_INLINE mode and
1133 stop inlining only after hitting ALWAYS_INLINE in ALWAY_INLINE mode. */
1135 try_inline (struct cgraph_edge
*e
, enum inlining_mode mode
, int depth
)
1137 struct cgraph_node
*callee
= e
->callee
;
1138 enum inlining_mode callee_mode
= (size_t) callee
->aux
;
1139 bool always_inline
= e
->callee
->local
.disregard_inline_limits
;
1141 /* We've hit cycle? */
1144 /* It is first time we see it and we are not in ALWAY_INLINE only
1145 mode yet. and the function in question is always_inline. */
1146 if (always_inline
&& mode
!= INLINE_ALWAYS_INLINE
)
1147 mode
= INLINE_ALWAYS_INLINE
;
1148 /* Otheriwse it is time to give up. */
1153 indent_to (dump_file
, depth
);
1155 "Not inlining %s into %s to avoid cycle.\n",
1156 cgraph_node_name (callee
),
1157 cgraph_node_name (e
->caller
));
1159 e
->inline_failed
= (e
->callee
->local
.disregard_inline_limits
1160 ? N_("recursive inlining") : "");
1165 callee
->aux
= (void *)(size_t) mode
;
1168 indent_to (dump_file
, depth
);
1169 fprintf (dump_file
, " Inlining %s into %s.\n",
1170 cgraph_node_name (e
->callee
),
1171 cgraph_node_name (e
->caller
));
1173 cgraph_mark_inline (e
);
1175 /* In order to fully inline always_inline functions at -O0, we need to
1176 recurse here, since the inlined functions might not be processed by
1177 incremental inlining at all yet.
1179 Also flattening needs to be done recursively. */
1181 if (!flag_unit_at_a_time
|| mode
== INLINE_ALL
|| always_inline
)
1182 cgraph_decide_inlining_incrementally (e
->callee
, mode
, depth
+ 1);
1183 callee
->aux
= (void *)(size_t) callee_mode
;
1187 /* Decide on the inlining. We do so in the topological order to avoid
1188 expenses on updating data structures.
1189 DEPTH is depth of recursion, used only for debug output. */
1192 cgraph_decide_inlining_incrementally (struct cgraph_node
*node
, enum inlining_mode mode
,
1195 struct cgraph_edge
*e
;
1196 bool inlined
= false;
1197 const char *failed_reason
;
1198 enum inlining_mode old_mode
;
1200 #ifdef ENABLE_CHECKING
1201 verify_cgraph_node (node
);
1204 old_mode
= (size_t)node
->aux
;
1206 if (mode
!= INLINE_ALWAYS_INLINE
1207 && lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) != NULL
)
1210 fprintf (dump_file
, " Flattening %s\n", cgraph_node_name (node
));
1214 node
->aux
= (void *)(size_t) mode
;
1216 /* First of all look for always inline functions. */
1217 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1219 if (dump_file
&& e
->callee
->local
.inlinable
1220 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1221 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
))))
1223 fprintf (dump_file
, " Ignoring %s: SSA form not computed yet.\n",
1224 cgraph_node_name (e
->callee
));
1226 if ((e
->callee
->local
.disregard_inline_limits
1227 || (mode
== INLINE_ALL
&& e
->callee
->local
.inlinable
))
1229 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1230 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
)))
1231 && !cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
)
1232 /* ??? It is possible that renaming variable removed the function body
1233 in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
1234 && (DECL_SAVED_TREE (e
->callee
->decl
) || e
->callee
->inline_decl
))
1236 inlined
|= try_inline (e
, mode
, depth
);
1240 /* Now do the automatic inlining. */
1241 if (!flag_really_no_inline
&& mode
!= INLINE_ALL
1242 && mode
!= INLINE_ALWAYS_INLINE
)
1243 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1244 if (e
->callee
->local
.inlinable
1246 && !e
->callee
->local
.disregard_inline_limits
1247 && !cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
)
1248 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1249 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
)))
1250 && (mode
!= INLINE_SIZE
1251 || (cgraph_estimate_size_after_inlining (1, e
->caller
, e
->callee
)
1252 <= e
->caller
->global
.insns
))
1253 && cgraph_check_inline_limits (node
, e
->callee
, &e
->inline_failed
,
1255 && (DECL_SAVED_TREE (e
->callee
->decl
) || e
->callee
->inline_decl
))
1257 if (cgraph_default_inline_p (e
->callee
, &failed_reason
))
1258 inlined
|= try_inline (e
, mode
, depth
);
1259 else if (!flag_unit_at_a_time
)
1260 e
->inline_failed
= failed_reason
;
1262 node
->aux
= (void *)(size_t) old_mode
;
1266 /* When inlining shall be performed. */
1268 cgraph_gate_inlining (void)
1270 return flag_inline_trees
;
1273 struct tree_opt_pass pass_ipa_inline
=
1275 "inline", /* name */
1276 cgraph_gate_inlining
, /* gate */
1277 cgraph_decide_inlining
, /* execute */
1280 0, /* static_pass_number */
1281 TV_INLINE_HEURISTICS
, /* tv_id */
1282 0, /* properties_required */
1283 PROP_cfg
, /* properties_provided */
1284 0, /* properties_destroyed */
1285 TODO_remove_functions
, /* todo_flags_finish */
1286 TODO_dump_cgraph
| TODO_dump_func
1287 | TODO_remove_functions
, /* todo_flags_finish */
1291 /* Because inlining might remove no-longer reachable nodes, we need to
1292 keep the array visible to garbage collector to avoid reading collected
1295 static GTY ((length ("nnodes"))) struct cgraph_node
**order
;
1297 /* Do inlining of small functions. Doing so early helps profiling and other
1298 passes to be somewhat more effective and avoids some code duplication in
1299 later real inlining pass for testcases with very many function calls. */
1301 cgraph_early_inlining (void)
1303 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
1304 unsigned int todo
= 0;
1306 if (sorrycount
|| errorcount
)
1308 if (cgraph_decide_inlining_incrementally (node
,
1310 ? INLINE_SIZE
: INLINE_SPEED
, 0))
1312 timevar_push (TV_INTEGRATION
);
1313 todo
= optimize_inline_calls (current_function_decl
);
1314 timevar_pop (TV_INTEGRATION
);
1319 /* When inlining shall be performed. */
1321 cgraph_gate_early_inlining (void)
1323 return flag_inline_trees
&& flag_early_inlining
;
1326 struct tree_opt_pass pass_early_inline
=
1328 "einline", /* name */
1329 cgraph_gate_early_inlining
, /* gate */
1330 cgraph_early_inlining
, /* execute */
1333 0, /* static_pass_number */
1334 TV_INLINE_HEURISTICS
, /* tv_id */
1335 0, /* properties_required */
1336 PROP_cfg
, /* properties_provided */
1337 0, /* properties_destroyed */
1338 0, /* todo_flags_start */
1339 TODO_dump_func
, /* todo_flags_finish */
1343 /* When inlining shall be performed. */
1345 cgraph_gate_ipa_early_inlining (void)
1347 return (flag_inline_trees
&& flag_early_inlining
1348 && (flag_branch_probabilities
|| flag_test_coverage
1349 || profile_arc_flag
));
1352 /* IPA pass wrapper for early inlining pass. We need to run early inlining
1353 before tree profiling so we have stand alone IPA pass for doing so. */
1354 struct tree_opt_pass pass_ipa_early_inline
=
1356 "einline_ipa", /* name */
1357 cgraph_gate_ipa_early_inlining
, /* gate */
1361 0, /* static_pass_number */
1362 TV_INLINE_HEURISTICS
, /* tv_id */
1363 0, /* properties_required */
1364 PROP_cfg
, /* properties_provided */
1365 0, /* properties_destroyed */
1366 0, /* todo_flags_start */
1367 TODO_dump_cgraph
, /* todo_flags_finish */
1371 /* Compute parameters of functions used by inliner. */
1373 compute_inline_parameters (void)
1375 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
1377 gcc_assert (!node
->global
.inlined_to
);
1378 node
->local
.estimated_self_stack_size
= estimated_stack_frame_size ();
1379 node
->global
.estimated_stack_size
= node
->local
.estimated_self_stack_size
;
1380 node
->global
.stack_frame_offset
= 0;
1381 node
->local
.inlinable
= tree_inlinable_function_p (current_function_decl
);
1382 node
->local
.self_insns
= estimate_num_insns (current_function_decl
);
1383 if (node
->local
.inlinable
)
1384 node
->local
.disregard_inline_limits
1385 = lang_hooks
.tree_inlining
.disregard_inline_limits (current_function_decl
);
1386 if (flag_really_no_inline
&& !node
->local
.disregard_inline_limits
)
1387 node
->local
.inlinable
= 0;
1388 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
1389 node
->global
.insns
= node
->local
.self_insns
;
1393 /* When inlining shall be performed. */
1395 gate_inline_passes (void)
1397 return flag_inline_trees
;
1400 struct tree_opt_pass pass_inline_parameters
=
1403 gate_inline_passes
, /* gate */
1404 compute_inline_parameters
, /* execute */
1407 0, /* static_pass_number */
1408 TV_INLINE_HEURISTICS
, /* tv_id */
1409 0, /* properties_required */
1410 PROP_cfg
, /* properties_provided */
1411 0, /* properties_destroyed */
1412 0, /* todo_flags_start */
1413 0, /* todo_flags_finish */
1417 /* Apply inline plan to the function. */
1421 unsigned int todo
= 0;
1422 struct cgraph_edge
*e
;
1423 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
1425 /* Even when not optimizing, ensure that always_inline functions get inlined.
1428 cgraph_decide_inlining_incrementally (node
, INLINE_SPEED
, 0);
1430 /* We might need the body of this function so that we can expand
1431 it inline somewhere else. */
1432 if (cgraph_preserve_function_body_p (current_function_decl
))
1433 save_inline_function_body (node
);
1435 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1436 if (!e
->inline_failed
|| warn_inline
)
1440 timevar_push (TV_INTEGRATION
);
1441 todo
= optimize_inline_calls (current_function_decl
);
1442 timevar_pop (TV_INTEGRATION
);
1444 /* In non-unit-at-a-time we must mark all referenced functions as needed. */
1445 if (!flag_unit_at_a_time
)
1447 struct cgraph_edge
*e
;
1448 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1449 if (e
->callee
->analyzed
)
1450 cgraph_mark_needed_node (e
->callee
);
1452 return todo
| execute_fixup_cfg ();
1455 struct tree_opt_pass pass_apply_inline
=
1457 "apply_inline", /* name */
1459 apply_inline
, /* execute */
1462 0, /* static_pass_number */
1463 TV_INLINE_HEURISTICS
, /* tv_id */
1464 0, /* properties_required */
1465 PROP_cfg
, /* properties_provided */
1466 0, /* properties_destroyed */
1467 0, /* todo_flags_start */
1468 TODO_dump_func
| TODO_verify_flow
1469 | TODO_verify_stmts
, /* todo_flags_finish */
1473 #include "gt-ipa-inline.h"