1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
29 There are three major parts of this file:
31 cgraph_mark_inline implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode. */
68 #include "coretypes.h"
71 #include "tree-inline.h"
72 #include "langhooks.h"
75 #include "diagnostic.h"
80 #include "tree-pass.h"
85 /* Statistics we collect about inlining algorithm. */
86 static int ncalls_inlined
;
87 static int nfunctions_inlined
;
88 static int initial_insns
;
89 static int overall_insns
;
91 static gcov_type max_count
;
93 /* Estimate size of the function after inlining WHAT into TO. */
96 cgraph_estimate_size_after_inlining (int times
, struct cgraph_node
*to
,
97 struct cgraph_node
*what
)
100 tree fndecl
= what
->decl
, arg
;
101 int call_insns
= PARAM_VALUE (PARAM_INLINE_CALL_COST
);
103 for (arg
= DECL_ARGUMENTS (fndecl
); arg
; arg
= TREE_CHAIN (arg
))
104 call_insns
+= estimate_move_cost (TREE_TYPE (arg
));
105 size
= (what
->global
.insns
- call_insns
) * times
+ to
->global
.insns
;
106 gcc_assert (size
>= 0);
110 /* E is expected to be an edge being inlined. Clone destination node of
111 the edge and redirect it to the new clone.
112 DUPLICATE is used for bookkeeping on whether we are actually creating new
113 clones or re-using node originally representing out-of-line function call.
116 cgraph_clone_inlined_nodes (struct cgraph_edge
*e
, bool duplicate
, bool update_original
)
120 /* We may eliminate the need for out-of-line copy to be output.
121 In that case just go ahead and re-use it. */
122 if (!e
->callee
->callers
->next_caller
123 && !e
->callee
->needed
124 && flag_unit_at_a_time
)
126 gcc_assert (!e
->callee
->global
.inlined_to
);
127 if (DECL_SAVED_TREE (e
->callee
->decl
))
128 overall_insns
-= e
->callee
->global
.insns
, nfunctions_inlined
++;
133 struct cgraph_node
*n
;
134 n
= cgraph_clone_node (e
->callee
, e
->count
, e
->loop_nest
,
136 cgraph_redirect_edge_callee (e
, n
);
140 if (e
->caller
->global
.inlined_to
)
141 e
->callee
->global
.inlined_to
= e
->caller
->global
.inlined_to
;
143 e
->callee
->global
.inlined_to
= e
->caller
;
145 /* Recursively clone all bodies. */
146 for (e
= e
->callee
->callees
; e
; e
= e
->next_callee
)
147 if (!e
->inline_failed
)
148 cgraph_clone_inlined_nodes (e
, duplicate
, update_original
);
151 /* Mark edge E as inlined and update callgraph accordingly.
152 UPDATE_ORIGINAL specify whether profile of original function should be
156 cgraph_mark_inline_edge (struct cgraph_edge
*e
, bool update_original
)
158 int old_insns
= 0, new_insns
= 0;
159 struct cgraph_node
*to
= NULL
, *what
;
161 if (e
->callee
->inline_decl
)
162 cgraph_redirect_edge_callee (e
, cgraph_node (e
->callee
->inline_decl
));
164 gcc_assert (e
->inline_failed
);
165 e
->inline_failed
= NULL
;
167 if (!e
->callee
->global
.inlined
&& flag_unit_at_a_time
)
168 DECL_POSSIBLY_INLINED (e
->callee
->decl
) = true;
169 e
->callee
->global
.inlined
= true;
171 cgraph_clone_inlined_nodes (e
, true, update_original
);
175 /* Now update size of caller and all functions caller is inlined into. */
176 for (;e
&& !e
->inline_failed
; e
= e
->caller
->callers
)
178 old_insns
= e
->caller
->global
.insns
;
179 new_insns
= cgraph_estimate_size_after_inlining (1, e
->caller
,
181 gcc_assert (new_insns
>= 0);
183 to
->global
.insns
= new_insns
;
185 gcc_assert (what
->global
.inlined_to
== to
);
186 if (new_insns
> old_insns
)
187 overall_insns
+= new_insns
- old_insns
;
191 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
192 Return following unredirected edge in the list of callers
195 static struct cgraph_edge
*
196 cgraph_mark_inline (struct cgraph_edge
*edge
)
198 struct cgraph_node
*to
= edge
->caller
;
199 struct cgraph_node
*what
= edge
->callee
;
200 struct cgraph_edge
*e
, *next
;
203 /* Look for all calls, mark them inline and clone recursively
204 all inlined functions. */
205 for (e
= what
->callers
; e
; e
= next
)
207 next
= e
->next_caller
;
208 if (e
->caller
== to
&& e
->inline_failed
)
210 cgraph_mark_inline_edge (e
, true);
220 /* Estimate the growth caused by inlining NODE into all callees. */
223 cgraph_estimate_growth (struct cgraph_node
*node
)
226 struct cgraph_edge
*e
;
227 if (node
->global
.estimated_growth
!= INT_MIN
)
228 return node
->global
.estimated_growth
;
230 for (e
= node
->callers
; e
; e
= e
->next_caller
)
231 if (e
->inline_failed
)
232 growth
+= (cgraph_estimate_size_after_inlining (1, e
->caller
, node
)
233 - e
->caller
->global
.insns
);
235 /* ??? Wrong for self recursive functions or cases where we decide to not
236 inline for different reasons, but it is not big deal as in that case
237 we will keep the body around, but we will also avoid some inlining. */
238 if (!node
->needed
&& !DECL_EXTERNAL (node
->decl
))
239 growth
-= node
->global
.insns
;
241 node
->global
.estimated_growth
= growth
;
245 /* Return false when inlining WHAT into TO is not good idea
246 as it would cause too large growth of function bodies.
247 When ONE_ONLY is true, assume that only one call site is going
248 to be inlined, otherwise figure out how many call sites in
249 TO calls WHAT and verify that all can be inlined.
253 cgraph_check_inline_limits (struct cgraph_node
*to
, struct cgraph_node
*what
,
254 const char **reason
, bool one_only
)
257 struct cgraph_edge
*e
;
264 for (e
= to
->callees
; e
; e
= e
->next_callee
)
265 if (e
->callee
== what
)
268 if (to
->global
.inlined_to
)
269 to
= to
->global
.inlined_to
;
271 /* When inlining large function body called once into small function,
272 take the inlined function as base for limiting the growth. */
273 if (to
->local
.self_insns
> what
->local
.self_insns
)
274 limit
= to
->local
.self_insns
;
276 limit
= what
->local
.self_insns
;
278 limit
+= limit
* PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH
) / 100;
280 /* Check the size after inlining against the function limits. But allow
281 the function to shrink if it went over the limits by forced inlining. */
282 newsize
= cgraph_estimate_size_after_inlining (times
, to
, what
);
283 if (newsize
>= to
->global
.insns
284 && newsize
> PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS
)
288 *reason
= N_("--param large-function-growth limit reached");
294 /* Return true when function N is small enough to be inlined. */
297 cgraph_default_inline_p (struct cgraph_node
*n
, const char **reason
)
302 decl
= n
->inline_decl
;
303 if (!DECL_INLINE (decl
))
306 *reason
= N_("function not inlinable");
310 if (!DECL_STRUCT_FUNCTION (decl
)->cfg
)
313 *reason
= N_("function body not available");
317 if (DECL_DECLARED_INLINE_P (decl
))
319 if (n
->global
.insns
>= MAX_INLINE_INSNS_SINGLE
)
322 *reason
= N_("--param max-inline-insns-single limit reached");
328 if (n
->global
.insns
>= MAX_INLINE_INSNS_AUTO
)
331 *reason
= N_("--param max-inline-insns-auto limit reached");
339 /* Return true when inlining WHAT would create recursive inlining.
340 We call recursive inlining all cases where same function appears more than
341 once in the single recursion nest path in the inline graph. */
344 cgraph_recursive_inlining_p (struct cgraph_node
*to
,
345 struct cgraph_node
*what
,
349 if (to
->global
.inlined_to
)
350 recursive
= what
->decl
== to
->global
.inlined_to
->decl
;
352 recursive
= what
->decl
== to
->decl
;
353 /* Marking recursive function inline has sane semantic and thus we should
355 if (recursive
&& reason
)
356 *reason
= (what
->local
.disregard_inline_limits
357 ? N_("recursive inlining") : "");
361 /* Return true if the call can be hot. */
363 cgraph_maybe_hot_edge_p (struct cgraph_edge
*edge
)
365 if (profile_info
&& flag_branch_probabilities
367 <= profile_info
->sum_max
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION
)))
372 /* A cost model driving the inlining heuristics in a way so the edges with
373 smallest badness are inlined first. After each inlining is performed
374 the costs of all caller edges of nodes affected are recomputed so the
375 metrics may accurately depend on values such as number of inlinable callers
376 of the function or function body size.
378 With profiling we use number of executions of each edge to drive the cost.
379 We also should distinguish hot and cold calls where the cold calls are
380 inlined into only when code size is overall improved.
384 cgraph_edge_badness (struct cgraph_edge
*edge
)
389 cgraph_estimate_size_after_inlining (1, edge
->caller
, edge
->callee
);
390 growth
-= edge
->caller
->global
.insns
;
392 /* Always prefer inlining saving code size. */
394 return INT_MIN
- growth
;
395 return ((int)((double)edge
->count
* INT_MIN
/ max_count
)) / growth
;
399 int nest
= MIN (edge
->loop_nest
, 8);
400 int badness
= cgraph_estimate_growth (edge
->callee
) * 256;
402 /* Decrease badness if call is nested. */
408 /* Make recursive inlining happen always after other inlining is done. */
409 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
, NULL
))
416 /* Recompute heap nodes for each of caller edge. */
419 update_caller_keys (fibheap_t heap
, struct cgraph_node
*node
,
420 bitmap updated_nodes
)
422 struct cgraph_edge
*edge
;
423 const char *failed_reason
;
425 if (!node
->local
.inlinable
|| node
->local
.disregard_inline_limits
426 || node
->global
.inlined_to
)
428 if (bitmap_bit_p (updated_nodes
, node
->uid
))
430 bitmap_set_bit (updated_nodes
, node
->uid
);
431 node
->global
.estimated_growth
= INT_MIN
;
433 if (!node
->local
.inlinable
)
435 /* Prune out edges we won't inline into anymore. */
436 if (!cgraph_default_inline_p (node
, &failed_reason
))
438 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
441 fibheap_delete_node (heap
, edge
->aux
);
443 if (edge
->inline_failed
)
444 edge
->inline_failed
= failed_reason
;
449 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
450 if (edge
->inline_failed
)
452 int badness
= cgraph_edge_badness (edge
);
455 fibnode_t n
= edge
->aux
;
456 gcc_assert (n
->data
== edge
);
457 if (n
->key
== badness
)
460 /* fibheap_replace_key only increase the keys. */
461 if (fibheap_replace_key (heap
, n
, badness
))
463 fibheap_delete_node (heap
, edge
->aux
);
465 edge
->aux
= fibheap_insert (heap
, badness
, edge
);
469 /* Recompute heap nodes for each of caller edges of each of callees. */
472 update_callee_keys (fibheap_t heap
, struct cgraph_node
*node
,
473 bitmap updated_nodes
)
475 struct cgraph_edge
*e
;
476 node
->global
.estimated_growth
= INT_MIN
;
478 for (e
= node
->callees
; e
; e
= e
->next_callee
)
479 if (e
->inline_failed
)
480 update_caller_keys (heap
, e
->callee
, updated_nodes
);
481 else if (!e
->inline_failed
)
482 update_callee_keys (heap
, e
->callee
, updated_nodes
);
485 /* Enqueue all recursive calls from NODE into priority queue depending on
486 how likely we want to recursively inline the call. */
489 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
493 struct cgraph_edge
*e
;
494 for (e
= where
->callees
; e
; e
= e
->next_callee
)
495 if (e
->callee
== node
)
497 /* When profile feedback is available, prioritize by expected number
498 of calls. Without profile feedback we maintain simple queue
499 to order candidates via recursive depths. */
500 fibheap_insert (heap
,
501 !max_count
? priority
++
502 : -(e
->count
/ ((max_count
+ (1<<24) - 1) / (1<<24))),
505 for (e
= where
->callees
; e
; e
= e
->next_callee
)
506 if (!e
->inline_failed
)
507 lookup_recursive_calls (node
, e
->callee
, heap
);
510 /* Find callgraph nodes closing a circle in the graph. The
511 resulting hashtab can be used to avoid walking the circles.
512 Uses the cgraph nodes ->aux field which needs to be zero
513 before and will be zero after operation. */
516 cgraph_find_cycles (struct cgraph_node
*node
, htab_t cycles
)
518 struct cgraph_edge
*e
;
523 slot
= htab_find_slot (cycles
, node
, INSERT
);
527 fprintf (dump_file
, "Cycle contains %s\n", cgraph_node_name (node
));
534 for (e
= node
->callees
; e
; e
= e
->next_callee
)
535 cgraph_find_cycles (e
->callee
, cycles
);
541 cgraph_apply_inline_plan (void)
543 struct cgraph_node
*node
;
544 struct cgraph_node
**order
=
545 xcalloc (cgraph_n_nodes
, sizeof (struct cgraph_node
*));
546 int order_pos
= 0, new_order_pos
= 0;
549 timevar_push (TV_INTEGRATION
);
550 order_pos
= cgraph_postorder (order
);
551 gcc_assert (order_pos
== cgraph_n_nodes
);
553 /* Garbage collector may remove inline clones we eliminate during
554 optimization. So we must be sure to not reference them. */
555 for (i
= 0; i
< order_pos
; i
++)
556 if (!order
[i
]->global
.inlined_to
)
557 order
[new_order_pos
++] = order
[i
];
559 /* Initialize the default bitmap obstack. */
560 bitmap_obstack_initialize (NULL
);
563 for (i
= 0; i
< new_order_pos
; i
++)
565 struct cgraph_edge
*e
;
568 for (e
= node
->callees
; e
; e
= e
->next_callee
)
569 if (!e
->inline_failed
|| warn_inline
)
573 if (cgraph_preserve_function_body_p (node
->decl
, true))
574 save_inline_function_body (node
);
575 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
576 tree_register_cfg_hooks ();
577 current_function_decl
= node
->decl
;
578 optimize_inline_calls (node
->decl
, false);
579 free_dominance_info (CDI_DOMINATORS
);
580 free_dominance_info (CDI_POST_DOMINATORS
);
581 node
->local
.self_insns
= node
->global
.insns
;
588 timevar_pop (TV_INTEGRATION
);
591 /* Leafify the cgraph node. We have to be careful in recursing
592 as to not run endlessly in circles of the callgraph.
593 We do so by using a hashtab of cycle entering nodes as generated
594 by cgraph_find_cycles. */
597 cgraph_flatten_node (struct cgraph_node
*node
, htab_t cycles
)
599 struct cgraph_edge
*e
;
601 for (e
= node
->callees
; e
; e
= e
->next_callee
)
603 /* Inline call, if possible, and recurse. Be sure we are not
604 entering callgraph circles here. */
606 && e
->callee
->local
.inlinable
607 && !cgraph_recursive_inlining_p (node
, e
->callee
,
609 && !htab_find (cycles
, e
->callee
))
612 fprintf (dump_file
, " inlining %s", cgraph_node_name (e
->callee
));
613 cgraph_mark_inline_edge (e
, true);
614 cgraph_flatten_node (e
->callee
, cycles
);
617 fprintf (dump_file
, " !inlining %s", cgraph_node_name (e
->callee
));
621 /* Decide on recursive inlining: in the case function has recursive calls,
622 inline until body size reaches given argument. */
625 cgraph_decide_recursive_inlining (struct cgraph_node
*node
)
627 int limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO
);
628 int max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO
);
629 int probability
= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY
);
631 struct cgraph_edge
*e
;
632 struct cgraph_node
*master_clone
, *next
;
636 if (DECL_DECLARED_INLINE_P (node
->decl
))
638 limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE
);
639 max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH
);
642 /* Make sure that function is small enough to be considered for inlining. */
644 || cgraph_estimate_size_after_inlining (1, node
, node
) >= limit
)
646 heap
= fibheap_new ();
647 lookup_recursive_calls (node
, node
, heap
);
648 if (fibheap_empty (heap
))
650 fibheap_delete (heap
);
656 " Performing recursive inlining on %s\n",
657 cgraph_node_name (node
));
659 /* We need original clone to copy around. */
660 master_clone
= cgraph_clone_node (node
, node
->count
, 1, false);
661 master_clone
->needed
= true;
662 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
663 if (!e
->inline_failed
)
664 cgraph_clone_inlined_nodes (e
, true, false);
666 /* Do the inlining and update list of recursive call during process. */
667 while (!fibheap_empty (heap
)
668 && (cgraph_estimate_size_after_inlining (1, node
, master_clone
)
671 struct cgraph_edge
*curr
= fibheap_extract_min (heap
);
672 struct cgraph_node
*cnode
;
675 for (cnode
= curr
->caller
;
676 cnode
->global
.inlined_to
; cnode
= cnode
->callers
->caller
)
677 if (node
->decl
== curr
->callee
->decl
)
679 if (depth
> max_depth
)
683 " maxmal depth reached\n");
689 if (!cgraph_maybe_hot_edge_p (curr
))
692 fprintf (dump_file
, " Not inlining cold call\n");
695 if (curr
->count
* 100 / node
->count
< probability
)
699 " Probability of edge is too small\n");
707 " Inlining call of depth %i", depth
);
710 fprintf (dump_file
, " called approx. %.2f times per call",
711 (double)curr
->count
/ node
->count
);
713 fprintf (dump_file
, "\n");
715 cgraph_redirect_edge_callee (curr
, master_clone
);
716 cgraph_mark_inline_edge (curr
, false);
717 lookup_recursive_calls (node
, curr
->callee
, heap
);
720 if (!fibheap_empty (heap
) && dump_file
)
721 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
723 fibheap_delete (heap
);
726 "\n Inlined %i times, body grown from %i to %i insns\n", n
,
727 master_clone
->global
.insns
, node
->global
.insns
);
729 /* Remove master clone we used for inlining. We rely that clones inlined
730 into master clone gets queued just before master clone so we don't
732 for (node
= cgraph_nodes
; node
!= master_clone
;
736 if (node
->global
.inlined_to
== master_clone
)
737 cgraph_remove_node (node
);
739 cgraph_remove_node (master_clone
);
740 /* FIXME: Recursive inlining actually reduces number of calls of the
741 function. At this place we should probably walk the function and
742 inline clones and compensate the counts accordingly. This probably
743 doesn't matter much in practice. */
747 /* Set inline_failed for all callers of given function to REASON. */
750 cgraph_set_inline_failed (struct cgraph_node
*node
, const char *reason
)
752 struct cgraph_edge
*e
;
755 fprintf (dump_file
, "Inlining failed: %s\n", reason
);
756 for (e
= node
->callers
; e
; e
= e
->next_caller
)
757 if (e
->inline_failed
)
758 e
->inline_failed
= reason
;
761 /* We use greedy algorithm for inlining of small functions:
762 All inline candidates are put into prioritized heap based on estimated
763 growth of the overall number of instructions and then update the estimates.
765 INLINED and INLINED_CALEES are just pointers to arrays large enough
766 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
769 cgraph_decide_inlining_of_small_functions (void)
771 struct cgraph_node
*node
;
772 struct cgraph_edge
*edge
;
773 const char *failed_reason
;
774 fibheap_t heap
= fibheap_new ();
775 bitmap updated_nodes
= BITMAP_ALLOC (NULL
);
778 fprintf (dump_file
, "\nDeciding on smaller functions:\n");
780 /* Put all inline candidates into the heap. */
782 for (node
= cgraph_nodes
; node
; node
= node
->next
)
784 if (!node
->local
.inlinable
|| !node
->callers
785 || node
->local
.disregard_inline_limits
)
788 fprintf (dump_file
, "Considering inline candidate %s.\n", cgraph_node_name (node
));
790 node
->global
.estimated_growth
= INT_MIN
;
791 if (!cgraph_default_inline_p (node
, &failed_reason
))
793 cgraph_set_inline_failed (node
, failed_reason
);
797 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
798 if (edge
->inline_failed
)
800 gcc_assert (!edge
->aux
);
801 edge
->aux
= fibheap_insert (heap
, cgraph_edge_badness (edge
), edge
);
804 while (overall_insns
<= max_insns
&& (edge
= fibheap_extract_min (heap
)))
806 int old_insns
= overall_insns
;
807 struct cgraph_node
*where
;
809 cgraph_estimate_size_after_inlining (1, edge
->caller
, edge
->callee
);
811 growth
-= edge
->caller
->global
.insns
;
816 "\nConsidering %s with %i insns\n",
817 cgraph_node_name (edge
->callee
),
818 edge
->callee
->global
.insns
);
820 " to be inlined into %s\n"
821 " Estimated growth after inlined into all callees is %+i insns.\n"
822 " Estimated badness is %i.\n",
823 cgraph_node_name (edge
->caller
),
824 cgraph_estimate_growth (edge
->callee
),
825 cgraph_edge_badness (edge
));
827 fprintf (dump_file
," Called "HOST_WIDEST_INT_PRINT_DEC
"x\n", edge
->count
);
829 gcc_assert (edge
->aux
);
831 if (!edge
->inline_failed
)
834 /* When not having profile info ready we don't weight by any way the
835 position of call in procedure itself. This means if call of
836 function A from function B seems profitable to inline, the recursive
837 call of function A in inline copy of A in B will look profitable too
838 and we end up inlining until reaching maximal function growth. This
839 is not good idea so prohibit the recursive inlining.
841 ??? When the frequencies are taken into account we might not need this
845 where
= edge
->caller
;
846 while (where
->global
.inlined_to
)
848 if (where
->decl
== edge
->callee
->decl
)
850 where
= where
->callers
->caller
;
852 if (where
->global
.inlined_to
)
855 = (edge
->callee
->local
.disregard_inline_limits
? N_("recursive inlining") : "");
857 fprintf (dump_file
, " inline_failed:Recursive inlining performed only for function itself.\n");
862 if (!cgraph_maybe_hot_edge_p (edge
) && growth
> 0)
864 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
865 &edge
->inline_failed
))
867 edge
->inline_failed
=
868 N_("call is unlikely");
870 fprintf (dump_file
, " inline_failed:%s.\n", edge
->inline_failed
);
874 if (!cgraph_default_inline_p (edge
->callee
, &edge
->inline_failed
))
876 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
877 &edge
->inline_failed
))
880 fprintf (dump_file
, " inline_failed:%s.\n", edge
->inline_failed
);
884 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
885 &edge
->inline_failed
))
887 where
= edge
->caller
;
888 if (where
->global
.inlined_to
)
889 where
= where
->global
.inlined_to
;
890 if (!cgraph_decide_recursive_inlining (where
))
892 update_callee_keys (heap
, where
, updated_nodes
);
896 struct cgraph_node
*callee
;
897 if (!cgraph_check_inline_limits (edge
->caller
, edge
->callee
,
898 &edge
->inline_failed
, true))
901 fprintf (dump_file
, " Not inlining into %s:%s.\n",
902 cgraph_node_name (edge
->caller
), edge
->inline_failed
);
905 callee
= edge
->callee
;
906 cgraph_mark_inline_edge (edge
, true);
907 update_callee_keys (heap
, callee
, updated_nodes
);
909 where
= edge
->caller
;
910 if (where
->global
.inlined_to
)
911 where
= where
->global
.inlined_to
;
913 /* Our profitability metric can depend on local properties
914 such as number of inlinable calls and size of the function body.
915 After inlining these properties might change for the function we
916 inlined into (since it's body size changed) and for the functions
917 called by function we inlined (since number of it inlinable callers
919 update_caller_keys (heap
, where
, updated_nodes
);
920 bitmap_clear (updated_nodes
);
925 " Inlined into %s which now has %i insns,"
926 "net change of %+i insns.\n",
927 cgraph_node_name (edge
->caller
),
928 edge
->caller
->global
.insns
,
929 overall_insns
- old_insns
);
932 while ((edge
= fibheap_extract_min (heap
)) != NULL
)
934 gcc_assert (edge
->aux
);
936 if (!edge
->callee
->local
.disregard_inline_limits
&& edge
->inline_failed
937 && !cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
938 &edge
->inline_failed
))
939 edge
->inline_failed
= N_("--param inline-unit-growth limit reached");
941 fibheap_delete (heap
);
942 BITMAP_FREE (updated_nodes
);
945 /* Decide on the inlining. We do so in the topological order to avoid
946 expenses on updating data structures. */
949 cgraph_decide_inlining (void)
951 struct cgraph_node
*node
;
953 struct cgraph_node
**order
=
954 XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
958 timevar_push (TV_INLINE_HEURISTICS
);
960 for (node
= cgraph_nodes
; node
; node
= node
->next
)
961 if (node
->analyzed
&& (node
->needed
|| node
->reachable
))
963 struct cgraph_edge
*e
;
965 /* At the moment, no IPA passes change function bodies before inlining.
966 Save some time by not recomputing function body sizes if early inlining
968 if (!flag_early_inlining
)
969 node
->local
.self_insns
= node
->global
.insns
970 = estimate_num_insns (node
->decl
);
972 initial_insns
+= node
->local
.self_insns
;
973 gcc_assert (node
->local
.self_insns
== node
->global
.insns
);
974 for (e
= node
->callees
; e
; e
= e
->next_callee
)
975 if (max_count
< e
->count
)
976 max_count
= e
->count
;
978 overall_insns
= initial_insns
;
979 gcc_assert (!max_count
|| (profile_info
&& flag_branch_probabilities
));
981 max_insns
= overall_insns
;
982 if (max_insns
< PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
))
983 max_insns
= PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
);
985 max_insns
= ((HOST_WIDEST_INT
) max_insns
986 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH
)) / 100);
988 nnodes
= cgraph_postorder (order
);
992 "\nDeciding on inlining. Starting with %i insns.\n",
995 for (node
= cgraph_nodes
; node
; node
= node
->next
)
999 fprintf (dump_file
, "\nInlining always_inline functions:\n");
1001 /* In the first pass mark all always_inline edges. Do this with a priority
1002 so none of our later choices will make this impossible. */
1003 for (i
= nnodes
- 1; i
>= 0; i
--)
1005 struct cgraph_edge
*e
, *next
;
1009 /* Handle nodes to be flattened, but don't update overall unit size. */
1010 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) != NULL
)
1012 int old_overall_insns
= overall_insns
;
1016 "Leafifying %s\n", cgraph_node_name (node
));
1017 cycles
= htab_create (7, htab_hash_pointer
, htab_eq_pointer
, NULL
);
1018 cgraph_find_cycles (node
, cycles
);
1019 cgraph_flatten_node (node
, cycles
);
1020 htab_delete (cycles
);
1021 overall_insns
= old_overall_insns
;
1022 /* We don't need to consider always_inline functions inside the flattened
1023 function anymore. */
1027 if (!node
->local
.disregard_inline_limits
)
1031 "\nConsidering %s %i insns (always inline)\n",
1032 cgraph_node_name (node
), node
->global
.insns
);
1033 old_insns
= overall_insns
;
1034 for (e
= node
->callers
; e
; e
= next
)
1036 next
= e
->next_caller
;
1037 if (!e
->inline_failed
)
1039 if (cgraph_recursive_inlining_p (e
->caller
, e
->callee
,
1042 cgraph_mark_inline_edge (e
, true);
1045 " Inlined into %s which now has %i insns.\n",
1046 cgraph_node_name (e
->caller
),
1047 e
->caller
->global
.insns
);
1051 " Inlined for a net change of %+i insns.\n",
1052 overall_insns
- old_insns
);
1055 if (!flag_really_no_inline
)
1056 cgraph_decide_inlining_of_small_functions ();
1058 if (!flag_really_no_inline
1059 && flag_inline_functions_called_once
)
1062 fprintf (dump_file
, "\nDeciding on functions called once:\n");
1064 /* And finally decide what functions are called once. */
1066 for (i
= nnodes
- 1; i
>= 0; i
--)
1070 if (node
->callers
&& !node
->callers
->next_caller
&& !node
->needed
1071 && node
->local
.inlinable
&& node
->callers
->inline_failed
1072 && !DECL_EXTERNAL (node
->decl
) && !DECL_COMDAT (node
->decl
))
1075 struct cgraph_node
*node1
;
1077 /* Verify that we won't duplicate the caller. */
1078 for (node1
= node
->callers
->caller
;
1079 node1
->callers
&& !node1
->callers
->inline_failed
1080 && ok
; node1
= node1
->callers
->caller
)
1081 if (node1
->callers
->next_caller
|| node1
->needed
)
1088 "\nConsidering %s %i insns.\n",
1089 cgraph_node_name (node
), node
->global
.insns
);
1091 " Called once from %s %i insns.\n",
1092 cgraph_node_name (node
->callers
->caller
),
1093 node
->callers
->caller
->global
.insns
);
1096 old_insns
= overall_insns
;
1098 if (cgraph_check_inline_limits (node
->callers
->caller
, node
,
1101 cgraph_mark_inline (node
->callers
);
1104 " Inlined into %s which now has %i insns"
1105 " for a net change of %+i insns.\n",
1106 cgraph_node_name (node
->callers
->caller
),
1107 node
->callers
->caller
->global
.insns
,
1108 overall_insns
- old_insns
);
1114 " Inline limit reached, not inlined.\n");
1121 cgraph_remove_unreachable_nodes (false, dump_file
);
1122 cgraph_apply_inline_plan ();
1123 cgraph_remove_unreachable_nodes (false, dump_file
);
1127 "\nInlined %i calls, eliminated %i functions, "
1128 "%i insns turned to %i insns.\n\n",
1129 ncalls_inlined
, nfunctions_inlined
, initial_insns
,
1132 timevar_pop (TV_INLINE_HEURISTICS
);
1136 /* Decide on the inlining. We do so in the topological order to avoid
1137 expenses on updating data structures. */
1140 cgraph_decide_inlining_incrementally (struct cgraph_node
*node
, bool early
)
1142 struct cgraph_edge
*e
;
1143 bool inlined
= false;
1144 const char *failed_reason
;
1146 /* First of all look for always inline functions. */
1147 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1148 if (e
->callee
->local
.disregard_inline_limits
1150 && !cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
)
1151 /* ??? It is possible that renaming variable removed the function body
1152 in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
1153 && (DECL_SAVED_TREE (e
->callee
->decl
) || e
->callee
->inline_decl
))
1155 if (dump_file
&& early
)
1157 fprintf (dump_file
, " Early inlining %s",
1158 cgraph_node_name (e
->callee
));
1159 fprintf (dump_file
, " into %s\n", cgraph_node_name (node
));
1161 cgraph_mark_inline (e
);
1165 /* Now do the automatic inlining. */
1166 if (!flag_really_no_inline
)
1167 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1168 if (e
->callee
->local
.inlinable
1170 && !e
->callee
->local
.disregard_inline_limits
1171 && !cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
)
1173 || (cgraph_estimate_size_after_inlining (1, e
->caller
, e
->callee
)
1174 <= e
->caller
->global
.insns
))
1175 && cgraph_check_inline_limits (node
, e
->callee
, &e
->inline_failed
,
1177 && (DECL_SAVED_TREE (e
->callee
->decl
) || e
->callee
->inline_decl
))
1179 if (cgraph_default_inline_p (e
->callee
, &failed_reason
))
1181 if (dump_file
&& early
)
1183 fprintf (dump_file
, " Early inlining %s",
1184 cgraph_node_name (e
->callee
));
1185 fprintf (dump_file
, " into %s\n", cgraph_node_name (node
));
1187 cgraph_mark_inline (e
);
1191 e
->inline_failed
= failed_reason
;
1193 if (inlined
|| (warn_inline
&& !early
))
1195 /* Initialize the default bitmap obstack. */
1196 bitmap_obstack_initialize (NULL
);
1197 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
1198 tree_register_cfg_hooks ();
1199 current_function_decl
= node
->decl
;
1200 optimize_inline_calls (current_function_decl
, early
);
1201 free_dominance_info (CDI_DOMINATORS
);
1202 free_dominance_info (CDI_POST_DOMINATORS
);
1203 node
->local
.self_insns
= node
->global
.insns
;
1204 current_function_decl
= NULL
;
1210 /* When inlining shall be performed. */
1212 cgraph_gate_inlining (void)
1214 return flag_inline_trees
/*&& 0*/;
1217 struct tree_opt_pass pass_ipa_inline
=
1219 "inline", /* name */
1220 cgraph_gate_inlining
, /* gate */
1221 cgraph_decide_inlining
, /* execute */
1224 0, /* static_pass_number */
1225 TV_INTEGRATION
, /* tv_id */
1226 0, /* properties_required */
1227 PROP_cfg
, /* properties_provided */
1228 0, /* properties_destroyed */
1229 0, /* todo_flags_start */
1230 TODO_dump_cgraph
| TODO_dump_func
, /* todo_flags_finish */
1234 /* Because inlining might remove no-longer reachable nodes, we need to
1235 keep the array visible to garbage collector to avoid reading collected
1238 static GTY ((length ("nnodes"))) struct cgraph_node
**order
;
1240 /* Do inlining of small functions. Doing so early helps profiling and other
1241 passes to be somewhat more effective and avoids some code duplication in
1242 later real inlining pass for testcases with very many function calls. */
1244 cgraph_early_inlining (void)
1246 struct cgraph_node
*node
;
1249 if (sorrycount
|| errorcount
)
1251 #ifdef ENABLE_CHECKING
1252 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1253 gcc_assert (!node
->aux
);
1256 order
= ggc_alloc (sizeof (*order
) * cgraph_n_nodes
);
1257 nnodes
= cgraph_postorder (order
);
1258 for (i
= nnodes
- 1; i
>= 0; i
--)
1261 if (node
->analyzed
&& (node
->needed
|| node
->reachable
))
1262 node
->local
.self_insns
= node
->global
.insns
1263 = estimate_num_insns (node
->decl
);
1265 for (i
= nnodes
- 1; i
>= 0; i
--)
1268 if (node
->analyzed
&& node
->local
.inlinable
1269 && (node
->needed
|| node
->reachable
)
1272 if (cgraph_decide_inlining_incrementally (node
, true))
1276 cgraph_remove_unreachable_nodes (true, dump_file
);
1277 #ifdef ENABLE_CHECKING
1278 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1279 gcc_assert (!node
->global
.inlined_to
);
1287 /* When inlining shall be performed. */
1289 cgraph_gate_early_inlining (void)
1291 return flag_inline_trees
&& flag_early_inlining
;
1294 struct tree_opt_pass pass_early_ipa_inline
=
1296 "einline", /* name */
1297 cgraph_gate_early_inlining
, /* gate */
1298 cgraph_early_inlining
, /* execute */
1301 0, /* static_pass_number */
1302 TV_INTEGRATION
, /* tv_id */
1303 0, /* properties_required */
1304 PROP_cfg
, /* properties_provided */
1305 0, /* properties_destroyed */
1306 0, /* todo_flags_start */
1307 TODO_dump_cgraph
| TODO_dump_func
, /* todo_flags_finish */
1311 #include "gt-ipa-inline.h"