1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
29 There are three major parts of this file:
31 cgraph_mark_inline_edge implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used by early inliner.
66 The inliner itself is split into several passes:
68 pass_inline_parameters
70 This pass computes local properties of functions that are used by inliner:
71 estimated function body size, whether function is inlinable at all and
72 stack frame consumption.
74 Before executing any of inliner passes, this local pass has to be applied
75 to each function in the callgraph (ie run as subpass of some earlier
76 IPA pass). The results are made out of date by any optimization applied
81 Simple local inlining pass inlining callees into current function. This
82 pass makes no global whole compilation unit analysis and this when allowed
83 to do inlining expanding code size it might result in unbounded growth of
86 The pass is run during conversion into SSA form. Only functions already
87 converted into SSA form are inlined, so the conversion must happen in
88 topological order on the callgraph (that is maintained by pass manager).
89 The functions after inlining are early optimized so the early inliner sees
90 unoptimized function itself, but all considered callees are already
91 optimized allowing it to unfold abstraction penalty on C++ effectively and
96 This is the main pass implementing simple greedy algorithm to do inlining
97 of small functions that results in overall growth of compilation unit and
98 inlining of functions called once. The pass compute just so called inline
99 plan (representation of inlining to be done in callgraph) and unlike early
100 inlining it is not performing the inlining itself.
105 #include "coretypes.h"
108 #include "tree-inline.h"
109 #include "langhooks.h"
112 #include "diagnostic.h"
113 #include "gimple-pretty-print.h"
118 #include "tree-pass.h"
120 #include "coverage.h"
122 #include "tree-flow.h"
124 #include "ipa-prop.h"
127 #define MAX_TIME 1000000000
129 /* Mode incremental inliner operate on:
131 In ALWAYS_INLINE only functions marked
132 always_inline are inlined. This mode is used after detecting cycle during
135 In SIZE mode, only functions that reduce function body size after inlining
136 are inlined, this is used during early inlining.
138 in ALL mode, everything is inlined. This is used during flattening. */
141 INLINE_ALWAYS_INLINE
,
142 INLINE_SIZE_NORECURSIVE
,
148 cgraph_decide_inlining_incrementally (struct cgraph_node
*, enum inlining_mode
);
149 static void cgraph_flatten (struct cgraph_node
*node
);
152 /* Statistics we collect about inlining algorithm. */
153 static int ncalls_inlined
;
154 static int nfunctions_inlined
;
155 static int overall_size
;
156 static gcov_type max_count
, max_benefit
;
158 /* Holders of ipa cgraph hooks: */
159 static struct cgraph_node_hook_list
*function_insertion_hook_holder
;
161 static inline struct inline_summary
*
162 inline_summary (struct cgraph_node
*node
)
164 return &node
->local
.inline_summary
;
167 /* Estimate self time of the function after inlining WHAT into TO. */
170 cgraph_estimate_time_after_inlining (int frequency
, struct cgraph_node
*to
,
171 struct cgraph_node
*what
)
173 gcov_type time
= (((gcov_type
)what
->global
.time
174 - inline_summary (what
)->time_inlining_benefit
)
175 * frequency
+ CGRAPH_FREQ_BASE
/ 2) / CGRAPH_FREQ_BASE
184 /* Estimate self size of the function after inlining WHAT into TO. */
187 cgraph_estimate_size_after_inlining (struct cgraph_node
*to
,
188 struct cgraph_node
*what
)
190 int size
= ((what
->global
.size
- inline_summary (what
)->size_inlining_benefit
)
192 gcc_assert (size
>= 0);
196 /* Scale frequency of NODE edges by FREQ_SCALE and increase loop nest
200 update_noncloned_frequencies (struct cgraph_node
*node
,
201 int freq_scale
, int nest
)
203 struct cgraph_edge
*e
;
205 /* We do not want to ignore high loop nest after freq drops to 0. */
208 for (e
= node
->callees
; e
; e
= e
->next_callee
)
210 e
->loop_nest
+= nest
;
211 e
->frequency
= e
->frequency
* (gcov_type
) freq_scale
/ CGRAPH_FREQ_BASE
;
212 if (e
->frequency
> CGRAPH_FREQ_MAX
)
213 e
->frequency
= CGRAPH_FREQ_MAX
;
214 if (!e
->inline_failed
)
215 update_noncloned_frequencies (e
->callee
, freq_scale
, nest
);
219 /* E is expected to be an edge being inlined. Clone destination node of
220 the edge and redirect it to the new clone.
221 DUPLICATE is used for bookkeeping on whether we are actually creating new
222 clones or re-using node originally representing out-of-line function call.
225 cgraph_clone_inlined_nodes (struct cgraph_edge
*e
, bool duplicate
,
226 bool update_original
)
232 /* We may eliminate the need for out-of-line copy to be output.
233 In that case just go ahead and re-use it. */
234 if (!e
->callee
->callers
->next_caller
235 /* Recursive inlining never wants the master clone to be overwritten. */
237 /* FIXME: When address is taken of DECL_EXTERNAL function we still can remove its
238 offline copy, but we would need to keep unanalyzed node in the callgraph so
239 references can point to it. */
240 && !e
->callee
->address_taken
241 && cgraph_can_remove_if_no_direct_calls_p (e
->callee
)
242 /* Inlining might enable more devirtualizing, so we want to remove
243 those only after all devirtualizable virtual calls are processed.
244 Lacking may edges in callgraph we just preserve them post
246 && (!DECL_VIRTUAL_P (e
->callee
->decl
)
247 || (!DECL_COMDAT (e
->callee
->decl
) && !DECL_EXTERNAL (e
->callee
->decl
)))
248 /* Don't reuse if more than one function shares a comdat group.
249 If the other function(s) are needed, we need to emit even
250 this function out of line. */
251 && !e
->callee
->same_comdat_group
252 && !cgraph_new_nodes
)
254 gcc_assert (!e
->callee
->global
.inlined_to
);
255 if (e
->callee
->analyzed
&& !DECL_EXTERNAL (e
->callee
->decl
))
257 overall_size
-= e
->callee
->global
.size
;
258 nfunctions_inlined
++;
261 e
->callee
->local
.externally_visible
= false;
262 update_noncloned_frequencies (e
->callee
, e
->frequency
, e
->loop_nest
);
266 struct cgraph_node
*n
;
267 n
= cgraph_clone_node (e
->callee
, e
->callee
->decl
,
268 e
->count
, e
->frequency
, e
->loop_nest
,
269 update_original
, NULL
);
270 cgraph_redirect_edge_callee (e
, n
);
274 if (e
->caller
->global
.inlined_to
)
275 e
->callee
->global
.inlined_to
= e
->caller
->global
.inlined_to
;
277 e
->callee
->global
.inlined_to
= e
->caller
;
278 e
->callee
->global
.stack_frame_offset
279 = e
->caller
->global
.stack_frame_offset
280 + inline_summary (e
->caller
)->estimated_self_stack_size
;
281 peak
= e
->callee
->global
.stack_frame_offset
282 + inline_summary (e
->callee
)->estimated_self_stack_size
;
283 if (e
->callee
->global
.inlined_to
->global
.estimated_stack_size
< peak
)
284 e
->callee
->global
.inlined_to
->global
.estimated_stack_size
= peak
;
285 cgraph_propagate_frequency (e
->callee
);
287 /* Recursively clone all bodies. */
288 for (e
= e
->callee
->callees
; e
; e
= e
->next_callee
)
289 if (!e
->inline_failed
)
290 cgraph_clone_inlined_nodes (e
, duplicate
, update_original
);
293 /* Mark edge E as inlined and update callgraph accordingly. UPDATE_ORIGINAL
294 specify whether profile of original function should be updated. If any new
295 indirect edges are discovered in the process, add them to NEW_EDGES, unless
296 it is NULL. Return true iff any new callgraph edges were discovered as a
297 result of inlining. */
300 cgraph_mark_inline_edge (struct cgraph_edge
*e
, bool update_original
,
301 VEC (cgraph_edge_p
, heap
) **new_edges
)
303 int old_size
= 0, new_size
= 0;
304 struct cgraph_node
*to
= NULL
, *what
;
305 struct cgraph_edge
*curr
= e
;
308 /* Don't inline inlined edges. */
309 gcc_assert (e
->inline_failed
);
310 /* Don't even think of inlining inline clone. */
311 gcc_assert (!e
->callee
->global
.inlined_to
);
313 e
->inline_failed
= CIF_OK
;
314 DECL_POSSIBLY_INLINED (e
->callee
->decl
) = true;
316 cgraph_clone_inlined_nodes (e
, true, update_original
);
321 /* Now update size of caller and all functions caller is inlined into. */
322 for (;e
&& !e
->inline_failed
; e
= e
->caller
->callers
)
325 old_size
= e
->caller
->global
.size
;
326 new_size
= cgraph_estimate_size_after_inlining (to
, what
);
327 to
->global
.size
= new_size
;
328 to
->global
.time
= cgraph_estimate_time_after_inlining (freq
, to
, what
);
330 gcc_assert (what
->global
.inlined_to
== to
);
331 if (new_size
> old_size
)
332 overall_size
+= new_size
- old_size
;
335 /* FIXME: We should remove the optimize check after we ensure we never run
336 IPA passes when not optimizng. */
337 if (flag_indirect_inlining
&& optimize
)
338 return ipa_propagate_indirect_call_infos (curr
, new_edges
);
343 /* Estimate the growth caused by inlining NODE into all callees. */
346 cgraph_estimate_growth (struct cgraph_node
*node
)
349 struct cgraph_edge
*e
;
350 bool self_recursive
= false;
352 if (node
->global
.estimated_growth
!= INT_MIN
)
353 return node
->global
.estimated_growth
;
355 for (e
= node
->callers
; e
; e
= e
->next_caller
)
357 if (e
->caller
== node
)
358 self_recursive
= true;
359 if (e
->inline_failed
)
360 growth
+= (cgraph_estimate_size_after_inlining (e
->caller
, node
)
361 - e
->caller
->global
.size
);
364 /* ??? Wrong for non-trivially self recursive functions or cases where
365 we decide to not inline for different reasons, but it is not big deal
366 as in that case we will keep the body around, but we will also avoid
368 if (cgraph_will_be_removed_from_program_if_no_direct_calls (node
)
369 && !DECL_EXTERNAL (node
->decl
) && !self_recursive
)
370 growth
-= node
->global
.size
;
371 /* COMDAT functions are very often not shared across multiple units since they
372 come from various template instantiations. Take this into account. */
373 else if (DECL_COMDAT (node
->decl
) && !self_recursive
374 && cgraph_can_remove_if_no_direct_calls_p (node
))
375 growth
-= (node
->global
.size
376 * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY
)) + 50) / 100;
378 node
->global
.estimated_growth
= growth
;
382 /* Return false when inlining WHAT into TO is not good idea
383 as it would cause too large growth of function bodies.
384 When ONE_ONLY is true, assume that only one call site is going
385 to be inlined, otherwise figure out how many call sites in
386 TO calls WHAT and verify that all can be inlined.
390 cgraph_check_inline_limits (struct cgraph_node
*to
, struct cgraph_node
*what
,
391 cgraph_inline_failed_t
*reason
)
395 HOST_WIDE_INT stack_size_limit
, inlined_stack
;
397 if (to
->global
.inlined_to
)
398 to
= to
->global
.inlined_to
;
400 /* When inlining large function body called once into small function,
401 take the inlined function as base for limiting the growth. */
402 if (inline_summary (to
)->self_size
> inline_summary(what
)->self_size
)
403 limit
= inline_summary (to
)->self_size
;
405 limit
= inline_summary (what
)->self_size
;
407 limit
+= limit
* PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH
) / 100;
409 /* Check the size after inlining against the function limits. But allow
410 the function to shrink if it went over the limits by forced inlining. */
411 newsize
= cgraph_estimate_size_after_inlining (to
, what
);
412 if (newsize
>= to
->global
.size
413 && newsize
> PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS
)
417 *reason
= CIF_LARGE_FUNCTION_GROWTH_LIMIT
;
421 stack_size_limit
= inline_summary (to
)->estimated_self_stack_size
;
423 stack_size_limit
+= stack_size_limit
* PARAM_VALUE (PARAM_STACK_FRAME_GROWTH
) / 100;
425 inlined_stack
= (to
->global
.stack_frame_offset
426 + inline_summary (to
)->estimated_self_stack_size
427 + what
->global
.estimated_stack_size
);
428 if (inlined_stack
> stack_size_limit
429 && inlined_stack
> PARAM_VALUE (PARAM_LARGE_STACK_FRAME
))
432 *reason
= CIF_LARGE_STACK_FRAME_GROWTH_LIMIT
;
438 /* Return true when function N is small enough to be inlined. */
441 cgraph_default_inline_p (struct cgraph_node
*n
, cgraph_inline_failed_t
*reason
)
445 if (n
->local
.disregard_inline_limits
)
448 if (!flag_inline_small_functions
&& !DECL_DECLARED_INLINE_P (decl
))
451 *reason
= CIF_FUNCTION_NOT_INLINE_CANDIDATE
;
457 *reason
= CIF_BODY_NOT_AVAILABLE
;
460 if (cgraph_function_body_availability (n
) <= AVAIL_OVERWRITABLE
)
463 *reason
= CIF_OVERWRITABLE
;
468 if (DECL_DECLARED_INLINE_P (decl
))
470 if (n
->global
.size
>= MAX_INLINE_INSNS_SINGLE
)
473 *reason
= CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
;
479 if (n
->global
.size
>= MAX_INLINE_INSNS_AUTO
)
482 *reason
= CIF_MAX_INLINE_INSNS_AUTO_LIMIT
;
490 /* Return true when inlining WHAT would create recursive inlining.
491 We call recursive inlining all cases where same function appears more than
492 once in the single recursion nest path in the inline graph. */
495 cgraph_recursive_inlining_p (struct cgraph_node
*to
,
496 struct cgraph_node
*what
,
497 cgraph_inline_failed_t
*reason
)
500 if (to
->global
.inlined_to
)
501 recursive
= what
->decl
== to
->global
.inlined_to
->decl
;
503 recursive
= what
->decl
== to
->decl
;
504 /* Marking recursive function inline has sane semantic and thus we should
506 if (recursive
&& reason
)
507 *reason
= (what
->local
.disregard_inline_limits
508 ? CIF_RECURSIVE_INLINING
: CIF_UNSPECIFIED
);
512 /* A cost model driving the inlining heuristics in a way so the edges with
513 smallest badness are inlined first. After each inlining is performed
514 the costs of all caller edges of nodes affected are recomputed so the
515 metrics may accurately depend on values such as number of inlinable callers
516 of the function or function body size. */
519 cgraph_edge_badness (struct cgraph_edge
*edge
, bool dump
)
523 (cgraph_estimate_size_after_inlining (edge
->caller
, edge
->callee
)
524 - edge
->caller
->global
.size
);
526 if (edge
->callee
->local
.disregard_inline_limits
)
531 fprintf (dump_file
, " Badness calculcation for %s -> %s\n",
532 cgraph_node_name (edge
->caller
),
533 cgraph_node_name (edge
->callee
));
534 fprintf (dump_file
, " growth %i, time %i-%i, size %i-%i\n",
536 edge
->callee
->global
.time
,
537 inline_summary (edge
->callee
)->time_inlining_benefit
,
538 edge
->callee
->global
.size
,
539 inline_summary (edge
->callee
)->size_inlining_benefit
);
542 /* Always prefer inlining saving code size. */
545 badness
= INT_MIN
- growth
;
547 fprintf (dump_file
, " %i: Growth %i < 0\n", (int) badness
,
551 /* When profiling is available, base priorities -(#calls / growth).
552 So we optimize for overall number of "executed" inlined calls. */
557 ((double) edge
->count
* INT_MIN
/ max_count
/ (max_benefit
+ 1)) *
558 (inline_summary (edge
->callee
)->time_inlining_benefit
+ 1)) / growth
;
562 " %i (relative %f): profile info. Relative count %f"
563 " * Relative benefit %f\n",
564 (int) badness
, (double) badness
/ INT_MIN
,
565 (double) edge
->count
/ max_count
,
566 (double) (inline_summary (edge
->callee
)->
567 time_inlining_benefit
+ 1) / (max_benefit
+ 1));
571 /* When function local profile is available, base priorities on
572 growth / frequency, so we optimize for overall frequency of inlined
573 calls. This is not too accurate since while the call might be frequent
574 within function, the function itself is infrequent.
576 Other objective to optimize for is number of different calls inlined.
577 We add the estimated growth after inlining all functions to bias the
578 priorities slightly in this direction (so fewer times called functions
579 of the same size gets priority). */
580 else if (flag_guess_branch_prob
)
582 int div
= edge
->frequency
* 100 / CGRAPH_FREQ_BASE
+ 1;
585 badness
= growth
* 10000;
587 MIN (100 * inline_summary (edge
->callee
)->time_inlining_benefit
/
588 (edge
->callee
->global
.time
+ 1) +1, 100);
592 /* Decrease badness if call is nested. */
593 /* Compress the range so we don't overflow. */
595 div
= 10000 + ceil_log2 (div
) - 8;
600 growth_for_all
= cgraph_estimate_growth (edge
->callee
);
601 badness
+= growth_for_all
;
602 if (badness
> INT_MAX
)
607 " %i: guessed profile. frequency %i, overall growth %i,"
608 " benefit %i%%, divisor %i\n",
609 (int) badness
, edge
->frequency
, growth_for_all
, benefitperc
, div
);
612 /* When function local profile is not available or it does not give
613 useful information (ie frequency is zero), base the cost on
614 loop nest and overall size growth, so we optimize for overall number
615 of functions fully inlined in program. */
618 int nest
= MIN (edge
->loop_nest
, 8);
619 badness
= cgraph_estimate_growth (edge
->callee
) * 256;
621 /* Decrease badness if call is nested. */
629 fprintf (dump_file
, " %i: no profile. nest %i\n", (int) badness
,
633 /* Ensure that we did not overflow in all the fixed point math above. */
634 gcc_assert (badness
>= INT_MIN
);
635 gcc_assert (badness
<= INT_MAX
- 1);
636 /* Make recursive inlining happen always after other inlining is done. */
637 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
, NULL
))
643 /* Recompute badness of EDGE and update its key in HEAP if needed. */
645 update_edge_key (fibheap_t heap
, struct cgraph_edge
*edge
)
647 int badness
= cgraph_edge_badness (edge
, false);
650 fibnode_t n
= (fibnode_t
) edge
->aux
;
651 gcc_checking_assert (n
->data
== edge
);
653 /* fibheap_replace_key only decrease the keys.
654 When we increase the key we do not update heap
655 and instead re-insert the element once it becomes
657 if (badness
< n
->key
)
659 fibheap_replace_key (heap
, n
, badness
);
660 gcc_checking_assert (n
->key
== badness
);
664 edge
->aux
= fibheap_insert (heap
, badness
, edge
);
667 /* Recompute heap nodes for each of caller edge. */
670 update_caller_keys (fibheap_t heap
, struct cgraph_node
*node
,
671 bitmap updated_nodes
)
673 struct cgraph_edge
*edge
;
674 cgraph_inline_failed_t failed_reason
;
676 if (!node
->local
.inlinable
677 || cgraph_function_body_availability (node
) <= AVAIL_OVERWRITABLE
678 || node
->global
.inlined_to
)
680 if (!bitmap_set_bit (updated_nodes
, node
->uid
))
682 node
->global
.estimated_growth
= INT_MIN
;
684 /* See if there is something to do. */
685 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
686 if (edge
->inline_failed
)
690 /* Prune out edges we won't inline into anymore. */
691 if (!cgraph_default_inline_p (node
, &failed_reason
))
693 for (; edge
; edge
= edge
->next_caller
)
696 fibheap_delete_node (heap
, (fibnode_t
) edge
->aux
);
698 if (edge
->inline_failed
)
699 edge
->inline_failed
= failed_reason
;
704 for (; edge
; edge
= edge
->next_caller
)
705 if (edge
->inline_failed
)
706 update_edge_key (heap
, edge
);
709 /* Recompute heap nodes for each uninlined call.
710 This is used when we know that edge badnesses are going only to increase
711 (we introduced new call site) and thus all we need is to insert newly
712 created edges into heap. */
715 update_callee_keys (fibheap_t heap
, struct cgraph_node
*node
,
716 bitmap updated_nodes
)
718 struct cgraph_edge
*e
= node
->callees
;
719 node
->global
.estimated_growth
= INT_MIN
;
724 if (!e
->inline_failed
&& e
->callee
->callees
)
725 e
= e
->callee
->callees
;
729 && e
->callee
->local
.inlinable
730 && cgraph_function_body_availability (e
->callee
) >= AVAIL_AVAILABLE
731 && !bitmap_bit_p (updated_nodes
, e
->callee
->uid
))
733 node
->global
.estimated_growth
= INT_MIN
;
734 /* If function becomes uninlinable, we need to remove it from the heap. */
735 if (!cgraph_default_inline_p (e
->callee
, &e
->inline_failed
))
736 update_caller_keys (heap
, e
->callee
, updated_nodes
);
738 /* Otherwise update just edge E. */
739 update_edge_key (heap
, e
);
747 if (e
->caller
== node
)
749 e
= e
->caller
->callers
;
751 while (!e
->next_callee
);
757 /* Recompute heap nodes for each of caller edges of each of callees.
758 Walk recursively into all inline clones. */
761 update_all_callee_keys (fibheap_t heap
, struct cgraph_node
*node
,
762 bitmap updated_nodes
)
764 struct cgraph_edge
*e
= node
->callees
;
765 node
->global
.estimated_growth
= INT_MIN
;
770 if (!e
->inline_failed
&& e
->callee
->callees
)
771 e
= e
->callee
->callees
;
774 if (e
->inline_failed
)
775 update_caller_keys (heap
, e
->callee
, updated_nodes
);
782 if (e
->caller
== node
)
784 e
= e
->caller
->callers
;
786 while (!e
->next_callee
);
792 /* Enqueue all recursive calls from NODE into priority queue depending on
793 how likely we want to recursively inline the call. */
796 lookup_recursive_calls (struct cgraph_node
*node
, struct cgraph_node
*where
,
800 struct cgraph_edge
*e
;
801 for (e
= where
->callees
; e
; e
= e
->next_callee
)
802 if (e
->callee
== node
)
804 /* When profile feedback is available, prioritize by expected number
805 of calls. Without profile feedback we maintain simple queue
806 to order candidates via recursive depths. */
807 fibheap_insert (heap
,
808 !max_count
? priority
++
809 : -(e
->count
/ ((max_count
+ (1<<24) - 1) / (1<<24))),
812 for (e
= where
->callees
; e
; e
= e
->next_callee
)
813 if (!e
->inline_failed
)
814 lookup_recursive_calls (node
, e
->callee
, heap
);
817 /* Decide on recursive inlining: in the case function has recursive calls,
818 inline until body size reaches given argument. If any new indirect edges
819 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
823 cgraph_decide_recursive_inlining (struct cgraph_node
*node
,
824 VEC (cgraph_edge_p
, heap
) **new_edges
)
826 int limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO
);
827 int max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO
);
828 int probability
= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY
);
830 struct cgraph_edge
*e
;
831 struct cgraph_node
*master_clone
, *next
;
835 /* It does not make sense to recursively inline always-inline functions
836 as we are going to sorry() on the remaining calls anyway. */
837 if (node
->local
.disregard_inline_limits
838 && lookup_attribute ("always_inline", DECL_ATTRIBUTES (node
->decl
)))
841 if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node
->decl
))
842 || (!flag_inline_functions
&& !DECL_DECLARED_INLINE_P (node
->decl
)))
845 if (DECL_DECLARED_INLINE_P (node
->decl
))
847 limit
= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE
);
848 max_depth
= PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH
);
851 /* Make sure that function is small enough to be considered for inlining. */
853 || cgraph_estimate_size_after_inlining (node
, node
) >= limit
)
855 heap
= fibheap_new ();
856 lookup_recursive_calls (node
, node
, heap
);
857 if (fibheap_empty (heap
))
859 fibheap_delete (heap
);
865 " Performing recursive inlining on %s\n",
866 cgraph_node_name (node
));
868 /* We need original clone to copy around. */
869 master_clone
= cgraph_clone_node (node
, node
->decl
,
870 node
->count
, CGRAPH_FREQ_BASE
, 1,
872 for (e
= master_clone
->callees
; e
; e
= e
->next_callee
)
873 if (!e
->inline_failed
)
874 cgraph_clone_inlined_nodes (e
, true, false);
876 /* Do the inlining and update list of recursive call during process. */
877 while (!fibheap_empty (heap
)
878 && (cgraph_estimate_size_after_inlining (node
, master_clone
)
881 struct cgraph_edge
*curr
882 = (struct cgraph_edge
*) fibheap_extract_min (heap
);
883 struct cgraph_node
*cnode
;
886 for (cnode
= curr
->caller
;
887 cnode
->global
.inlined_to
; cnode
= cnode
->callers
->caller
)
888 if (node
->decl
== curr
->callee
->decl
)
890 if (depth
> max_depth
)
894 " maximal depth reached\n");
900 if (!cgraph_maybe_hot_edge_p (curr
))
903 fprintf (dump_file
, " Not inlining cold call\n");
906 if (curr
->count
* 100 / node
->count
< probability
)
910 " Probability of edge is too small\n");
918 " Inlining call of depth %i", depth
);
921 fprintf (dump_file
, " called approx. %.2f times per call",
922 (double)curr
->count
/ node
->count
);
924 fprintf (dump_file
, "\n");
926 cgraph_redirect_edge_callee (curr
, master_clone
);
927 cgraph_mark_inline_edge (curr
, false, new_edges
);
928 lookup_recursive_calls (node
, curr
->callee
, heap
);
931 if (!fibheap_empty (heap
) && dump_file
)
932 fprintf (dump_file
, " Recursive inlining growth limit met.\n");
934 fibheap_delete (heap
);
937 "\n Inlined %i times, body grown from size %i to %i, time %i to %i\n", n
,
938 master_clone
->global
.size
, node
->global
.size
,
939 master_clone
->global
.time
, node
->global
.time
);
941 /* Remove master clone we used for inlining. We rely that clones inlined
942 into master clone gets queued just before master clone so we don't
944 for (node
= cgraph_nodes
; node
!= master_clone
;
948 if (node
->global
.inlined_to
== master_clone
)
949 cgraph_remove_node (node
);
951 cgraph_remove_node (master_clone
);
952 /* FIXME: Recursive inlining actually reduces number of calls of the
953 function. At this place we should probably walk the function and
954 inline clones and compensate the counts accordingly. This probably
955 doesn't matter much in practice. */
959 /* Set inline_failed for all callers of given function to REASON. */
962 cgraph_set_inline_failed (struct cgraph_node
*node
,
963 cgraph_inline_failed_t reason
)
965 struct cgraph_edge
*e
;
968 fprintf (dump_file
, "Inlining failed: %s\n",
969 cgraph_inline_failed_string (reason
));
970 for (e
= node
->callers
; e
; e
= e
->next_caller
)
971 if (e
->inline_failed
)
972 e
->inline_failed
= reason
;
975 /* Given whole compilation unit estimate of INSNS, compute how large we can
976 allow the unit to grow. */
978 compute_max_insns (int insns
)
980 int max_insns
= insns
;
981 if (max_insns
< PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
))
982 max_insns
= PARAM_VALUE (PARAM_LARGE_UNIT_INSNS
);
984 return ((HOST_WIDEST_INT
) max_insns
985 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH
)) / 100);
988 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
990 add_new_edges_to_heap (fibheap_t heap
, VEC (cgraph_edge_p
, heap
) *new_edges
)
992 while (VEC_length (cgraph_edge_p
, new_edges
) > 0)
994 struct cgraph_edge
*edge
= VEC_pop (cgraph_edge_p
, new_edges
);
996 gcc_assert (!edge
->aux
);
997 if (edge
->callee
->local
.inlinable
998 && edge
->inline_failed
999 && cgraph_default_inline_p (edge
->callee
, &edge
->inline_failed
))
1000 edge
->aux
= fibheap_insert (heap
, cgraph_edge_badness (edge
, false), edge
);
1005 /* We use greedy algorithm for inlining of small functions:
1006 All inline candidates are put into prioritized heap based on estimated
1007 growth of the overall number of instructions and then update the estimates.
1009 INLINED and INLINED_CALEES are just pointers to arrays large enough
1010 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
1013 cgraph_decide_inlining_of_small_functions (void)
1015 struct cgraph_node
*node
;
1016 struct cgraph_edge
*edge
;
1017 cgraph_inline_failed_t failed_reason
;
1018 fibheap_t heap
= fibheap_new ();
1019 bitmap updated_nodes
= BITMAP_ALLOC (NULL
);
1020 int min_size
, max_size
;
1021 VEC (cgraph_edge_p
, heap
) *new_indirect_edges
= NULL
;
1023 if (flag_indirect_inlining
)
1024 new_indirect_edges
= VEC_alloc (cgraph_edge_p
, heap
, 8);
1027 fprintf (dump_file
, "\nDeciding on smaller functions:\n");
1029 /* Put all inline candidates into the heap. */
1031 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1033 if (!node
->local
.inlinable
|| !node
->callers
)
1036 fprintf (dump_file
, "Considering inline candidate %s.\n", cgraph_node_name (node
));
1038 node
->global
.estimated_growth
= INT_MIN
;
1039 if (!cgraph_default_inline_p (node
, &failed_reason
))
1041 cgraph_set_inline_failed (node
, failed_reason
);
1045 for (edge
= node
->callers
; edge
; edge
= edge
->next_caller
)
1046 if (edge
->inline_failed
)
1048 gcc_assert (!edge
->aux
);
1049 edge
->aux
= fibheap_insert (heap
, cgraph_edge_badness (edge
, false), edge
);
1053 max_size
= compute_max_insns (overall_size
);
1054 min_size
= overall_size
;
1056 while (overall_size
<= max_size
1057 && !fibheap_empty (heap
))
1059 int old_size
= overall_size
;
1060 struct cgraph_node
*where
, *callee
;
1061 int badness
= fibheap_min_key (heap
);
1062 int current_badness
;
1064 cgraph_inline_failed_t not_good
= CIF_OK
;
1066 edge
= (struct cgraph_edge
*) fibheap_extract_min (heap
);
1067 gcc_assert (edge
->aux
);
1069 if (!edge
->inline_failed
)
1072 /* When updating the edge costs, we only decrease badness in the keys.
1073 When the badness increase, we keep the heap as it is and re-insert
1075 current_badness
= cgraph_edge_badness (edge
, false);
1076 gcc_assert (current_badness
>= badness
);
1077 if (current_badness
!= badness
)
1079 edge
->aux
= fibheap_insert (heap
, current_badness
, edge
);
1083 callee
= edge
->callee
;
1085 growth
= (cgraph_estimate_size_after_inlining (edge
->caller
, edge
->callee
)
1086 - edge
->caller
->global
.size
);
1091 "\nConsidering %s with %i size\n",
1092 cgraph_node_name (edge
->callee
),
1093 edge
->callee
->global
.size
);
1095 " to be inlined into %s in %s:%i\n"
1096 " Estimated growth after inlined into all callees is %+i insns.\n"
1097 " Estimated badness is %i, frequency %.2f.\n",
1098 cgraph_node_name (edge
->caller
),
1099 flag_wpa
? "unknown"
1100 : gimple_filename ((const_gimple
) edge
->call_stmt
),
1101 flag_wpa
? -1 : gimple_lineno ((const_gimple
) edge
->call_stmt
),
1102 cgraph_estimate_growth (edge
->callee
),
1104 edge
->frequency
/ (double)CGRAPH_FREQ_BASE
);
1106 fprintf (dump_file
," Called "HOST_WIDEST_INT_PRINT_DEC
"x\n", edge
->count
);
1107 if (dump_flags
& TDF_DETAILS
)
1108 cgraph_edge_badness (edge
, true);
1111 /* When not having profile info ready we don't weight by any way the
1112 position of call in procedure itself. This means if call of
1113 function A from function B seems profitable to inline, the recursive
1114 call of function A in inline copy of A in B will look profitable too
1115 and we end up inlining until reaching maximal function growth. This
1116 is not good idea so prohibit the recursive inlining.
1118 ??? When the frequencies are taken into account we might not need this
1121 We need to be cureful here, in some testcases, e.g. directivec.c in
1122 libcpp, we can estimate self recursive function to have negative growth
1123 for inlining completely.
1127 where
= edge
->caller
;
1128 while (where
->global
.inlined_to
)
1130 if (where
->decl
== edge
->callee
->decl
)
1132 where
= where
->callers
->caller
;
1134 if (where
->global
.inlined_to
)
1137 = (edge
->callee
->local
.disregard_inline_limits
1138 ? CIF_RECURSIVE_INLINING
: CIF_UNSPECIFIED
);
1140 fprintf (dump_file
, " inline_failed:Recursive inlining performed only for function itself.\n");
1145 if (edge
->callee
->local
.disregard_inline_limits
)
1147 else if (!cgraph_maybe_hot_edge_p (edge
))
1148 not_good
= CIF_UNLIKELY_CALL
;
1149 else if (!flag_inline_functions
1150 && !DECL_DECLARED_INLINE_P (edge
->callee
->decl
))
1151 not_good
= CIF_NOT_DECLARED_INLINED
;
1152 else if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION(edge
->caller
->decl
)))
1153 not_good
= CIF_OPTIMIZING_FOR_SIZE
;
1154 if (not_good
&& growth
> 0 && cgraph_estimate_growth (edge
->callee
) > 0)
1156 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
1157 &edge
->inline_failed
))
1159 edge
->inline_failed
= not_good
;
1161 fprintf (dump_file
, " inline_failed:%s.\n",
1162 cgraph_inline_failed_string (edge
->inline_failed
));
1166 if (!cgraph_default_inline_p (edge
->callee
, &edge
->inline_failed
))
1168 if (!cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
1169 &edge
->inline_failed
))
1172 fprintf (dump_file
, " inline_failed:%s.\n",
1173 cgraph_inline_failed_string (edge
->inline_failed
));
1177 if (!tree_can_inline_p (edge
)
1178 || edge
->call_stmt_cannot_inline_p
)
1181 fprintf (dump_file
, " inline_failed:%s.\n",
1182 cgraph_inline_failed_string (edge
->inline_failed
));
1185 if (cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
1186 &edge
->inline_failed
))
1188 where
= edge
->caller
;
1189 if (where
->global
.inlined_to
)
1190 where
= where
->global
.inlined_to
;
1191 if (!cgraph_decide_recursive_inlining (where
,
1192 flag_indirect_inlining
1193 ? &new_indirect_edges
: NULL
))
1195 if (flag_indirect_inlining
)
1196 add_new_edges_to_heap (heap
, new_indirect_edges
);
1197 update_all_callee_keys (heap
, where
, updated_nodes
);
1201 struct cgraph_node
*callee
;
1202 if (!cgraph_check_inline_limits (edge
->caller
, edge
->callee
,
1203 &edge
->inline_failed
))
1206 fprintf (dump_file
, " Not inlining into %s:%s.\n",
1207 cgraph_node_name (edge
->caller
),
1208 cgraph_inline_failed_string (edge
->inline_failed
));
1211 callee
= edge
->callee
;
1212 gcc_checking_assert (!callee
->global
.inlined_to
);
1213 cgraph_mark_inline_edge (edge
, true, &new_indirect_edges
);
1214 if (flag_indirect_inlining
)
1215 add_new_edges_to_heap (heap
, new_indirect_edges
);
1217 /* We inlined last offline copy to the body. This might lead
1218 to callees of function having fewer call sites and thus they
1219 may need updating. */
1220 if (callee
->global
.inlined_to
)
1221 update_all_callee_keys (heap
, callee
, updated_nodes
);
1223 update_callee_keys (heap
, edge
->callee
, updated_nodes
);
1225 where
= edge
->caller
;
1226 if (where
->global
.inlined_to
)
1227 where
= where
->global
.inlined_to
;
1229 /* Our profitability metric can depend on local properties
1230 such as number of inlinable calls and size of the function body.
1231 After inlining these properties might change for the function we
1232 inlined into (since it's body size changed) and for the functions
1233 called by function we inlined (since number of it inlinable callers
1235 update_caller_keys (heap
, where
, updated_nodes
);
1237 /* We removed one call of the function we just inlined. If offline
1238 copy is still needed, be sure to update the keys. */
1239 if (callee
!= where
&& !callee
->global
.inlined_to
)
1240 update_caller_keys (heap
, callee
, updated_nodes
);
1241 bitmap_clear (updated_nodes
);
1246 " Inlined into %s which now has size %i and self time %i,"
1247 "net change of %+i.\n",
1248 cgraph_node_name (edge
->caller
),
1249 edge
->caller
->global
.time
,
1250 edge
->caller
->global
.size
,
1251 overall_size
- old_size
);
1253 if (min_size
> overall_size
)
1255 min_size
= overall_size
;
1256 max_size
= compute_max_insns (min_size
);
1259 fprintf (dump_file
, "New minimal size reached: %i\n", min_size
);
1262 while (!fibheap_empty (heap
))
1264 int badness
= fibheap_min_key (heap
);
1266 edge
= (struct cgraph_edge
*) fibheap_extract_min (heap
);
1267 gcc_assert (edge
->aux
);
1269 if (!edge
->inline_failed
)
1271 #ifdef ENABLE_CHECKING
1272 gcc_assert (cgraph_edge_badness (edge
, false) >= badness
);
1277 "\nSkipping %s with %i size\n",
1278 cgraph_node_name (edge
->callee
),
1279 edge
->callee
->global
.size
);
1281 " called by %s in %s:%i\n"
1282 " Estimated growth after inlined into all callees is %+i insns.\n"
1283 " Estimated badness is %i, frequency %.2f.\n",
1284 cgraph_node_name (edge
->caller
),
1285 flag_wpa
? "unknown"
1286 : gimple_filename ((const_gimple
) edge
->call_stmt
),
1287 flag_wpa
? -1 : gimple_lineno ((const_gimple
) edge
->call_stmt
),
1288 cgraph_estimate_growth (edge
->callee
),
1290 edge
->frequency
/ (double)CGRAPH_FREQ_BASE
);
1292 fprintf (dump_file
," Called "HOST_WIDEST_INT_PRINT_DEC
"x\n", edge
->count
);
1293 if (dump_flags
& TDF_DETAILS
)
1294 cgraph_edge_badness (edge
, true);
1296 if (!edge
->callee
->local
.disregard_inline_limits
&& edge
->inline_failed
1297 && !cgraph_recursive_inlining_p (edge
->caller
, edge
->callee
,
1298 &edge
->inline_failed
))
1299 edge
->inline_failed
= CIF_INLINE_UNIT_GROWTH_LIMIT
;
1302 if (new_indirect_edges
)
1303 VEC_free (cgraph_edge_p
, heap
, new_indirect_edges
);
1304 fibheap_delete (heap
);
1305 BITMAP_FREE (updated_nodes
);
1308 /* Flatten NODE from the IPA inliner. */
1311 cgraph_flatten (struct cgraph_node
*node
)
1313 struct cgraph_edge
*e
;
1315 /* We shouldn't be called recursively when we are being processed. */
1316 gcc_assert (node
->aux
== NULL
);
1318 node
->aux
= (void *)(size_t) INLINE_ALL
;
1320 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1322 struct cgraph_node
*orig_callee
;
1324 if (e
->call_stmt_cannot_inline_p
)
1327 fprintf (dump_file
, "Not inlining: %s",
1328 cgraph_inline_failed_string (e
->inline_failed
));
1332 if (!e
->callee
->analyzed
)
1336 "Not inlining: Function body not available.\n");
1340 /* We've hit cycle? It is time to give up. */
1345 "Not inlining %s into %s to avoid cycle.\n",
1346 cgraph_node_name (e
->callee
),
1347 cgraph_node_name (e
->caller
));
1348 e
->inline_failed
= CIF_RECURSIVE_INLINING
;
1352 /* When the edge is already inlined, we just need to recurse into
1353 it in order to fully flatten the leaves. */
1354 if (!e
->inline_failed
)
1356 cgraph_flatten (e
->callee
);
1360 if (cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
))
1363 fprintf (dump_file
, "Not inlining: recursive call.\n");
1367 if (!tree_can_inline_p (e
))
1370 fprintf (dump_file
, "Not inlining: %s",
1371 cgraph_inline_failed_string (e
->inline_failed
));
1375 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1376 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
)))
1379 fprintf (dump_file
, "Not inlining: SSA form does not match.\n");
1383 /* Inline the edge and flatten the inline clone. Avoid
1384 recursing through the original node if the node was cloned. */
1386 fprintf (dump_file
, " Inlining %s into %s.\n",
1387 cgraph_node_name (e
->callee
),
1388 cgraph_node_name (e
->caller
));
1389 orig_callee
= e
->callee
;
1390 cgraph_mark_inline_edge (e
, true, NULL
);
1391 if (e
->callee
!= orig_callee
)
1392 orig_callee
->aux
= (void *)(size_t) INLINE_ALL
;
1393 cgraph_flatten (e
->callee
);
1394 if (e
->callee
!= orig_callee
)
1395 orig_callee
->aux
= NULL
;
1401 /* Decide on the inlining. We do so in the topological order to avoid
1402 expenses on updating data structures. */
1405 cgraph_decide_inlining (void)
1407 struct cgraph_node
*node
;
1409 struct cgraph_node
**order
=
1410 XCNEWVEC (struct cgraph_node
*, cgraph_n_nodes
);
1413 int initial_size
= 0;
1415 cgraph_remove_function_insertion_hook (function_insertion_hook_holder
);
1416 if (in_lto_p
&& flag_indirect_inlining
)
1417 ipa_update_after_lto_read ();
1418 if (flag_indirect_inlining
)
1419 ipa_create_all_structures_for_iinln ();
1423 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1426 struct cgraph_edge
*e
;
1428 gcc_assert (inline_summary (node
)->self_size
== node
->global
.size
);
1429 if (!DECL_EXTERNAL (node
->decl
))
1430 initial_size
+= node
->global
.size
;
1431 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1432 if (max_count
< e
->count
)
1433 max_count
= e
->count
;
1434 if (max_benefit
< inline_summary (node
)->time_inlining_benefit
)
1435 max_benefit
= inline_summary (node
)->time_inlining_benefit
;
1437 gcc_assert (in_lto_p
1439 || (profile_info
&& flag_branch_probabilities
));
1440 overall_size
= initial_size
;
1442 nnodes
= cgraph_postorder (order
);
1446 "\nDeciding on inlining. Starting with size %i.\n",
1449 for (node
= cgraph_nodes
; node
; node
= node
->next
)
1453 fprintf (dump_file
, "\nFlattening functions:\n");
1455 /* In the first pass handle functions to be flattened. Do this with
1456 a priority so none of our later choices will make this impossible. */
1457 for (i
= nnodes
- 1; i
>= 0; i
--)
1461 /* Handle nodes to be flattened, but don't update overall unit
1462 size. Calling the incremental inliner here is lame,
1463 a simple worklist should be enough. What should be left
1464 here from the early inliner (if it runs) is cyclic cases.
1465 Ideally when processing callees we stop inlining at the
1466 entry of cycles, possibly cloning that entry point and
1467 try to flatten itself turning it into a self-recursive
1469 if (lookup_attribute ("flatten",
1470 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
1474 "Flattening %s\n", cgraph_node_name (node
));
1475 cgraph_flatten (node
);
1479 cgraph_decide_inlining_of_small_functions ();
1481 if (flag_inline_functions_called_once
)
1484 fprintf (dump_file
, "\nDeciding on functions called once:\n");
1486 /* And finally decide what functions are called once. */
1487 for (i
= nnodes
- 1; i
>= 0; i
--)
1492 && !node
->callers
->next_caller
1493 && !node
->global
.inlined_to
1494 && cgraph_will_be_removed_from_program_if_no_direct_calls (node
)
1495 && node
->local
.inlinable
1496 && cgraph_function_body_availability (node
) >= AVAIL_AVAILABLE
1497 && node
->callers
->inline_failed
1498 && node
->callers
->caller
!= node
1499 && node
->callers
->caller
->global
.inlined_to
!= node
1500 && !node
->callers
->call_stmt_cannot_inline_p
1501 && !DECL_EXTERNAL (node
->decl
))
1503 cgraph_inline_failed_t reason
;
1504 old_size
= overall_size
;
1508 "\nConsidering %s size %i.\n",
1509 cgraph_node_name (node
), node
->global
.size
);
1511 " Called once from %s %i insns.\n",
1512 cgraph_node_name (node
->callers
->caller
),
1513 node
->callers
->caller
->global
.size
);
1516 if (cgraph_check_inline_limits (node
->callers
->caller
, node
,
1519 struct cgraph_node
*caller
= node
->callers
->caller
;
1520 cgraph_mark_inline_edge (node
->callers
, true, NULL
);
1523 " Inlined into %s which now has %i size"
1524 " for a net change of %+i size.\n",
1525 cgraph_node_name (caller
),
1526 caller
->global
.size
,
1527 overall_size
- old_size
);
1533 " Not inlining: %s.\n",
1534 cgraph_inline_failed_string (reason
));
1540 /* Free ipa-prop structures if they are no longer needed. */
1541 if (flag_indirect_inlining
)
1542 ipa_free_all_structures_after_iinln ();
1546 "\nInlined %i calls, eliminated %i functions, "
1547 "size %i turned to %i size.\n\n",
1548 ncalls_inlined
, nfunctions_inlined
, initial_size
,
1554 /* Return true when N is leaf function. Accept cheap builtins
1555 in leaf functions. */
1558 leaf_node_p (struct cgraph_node
*n
)
1560 struct cgraph_edge
*e
;
1561 for (e
= n
->callees
; e
; e
= e
->next_callee
)
1562 if (!is_inexpensive_builtin (e
->callee
->decl
))
1567 /* Decide on the inlining. We do so in the topological order to avoid
1568 expenses on updating data structures. */
1571 cgraph_decide_inlining_incrementally (struct cgraph_node
*node
,
1572 enum inlining_mode mode
)
1574 struct cgraph_edge
*e
;
1575 bool inlined
= false;
1576 cgraph_inline_failed_t failed_reason
;
1578 #ifdef ENABLE_CHECKING
1579 verify_cgraph_node (node
);
1582 if (mode
!= INLINE_ALWAYS_INLINE
&& mode
!= INLINE_SIZE_NORECURSIVE
1583 && lookup_attribute ("flatten", DECL_ATTRIBUTES (node
->decl
)) != NULL
)
1586 fprintf (dump_file
, "Incrementally flattening %s\n",
1587 cgraph_node_name (node
));
1591 /* First of all look for always inline functions. */
1592 if (mode
!= INLINE_SIZE_NORECURSIVE
)
1593 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1595 if (!e
->callee
->local
.disregard_inline_limits
1596 && (mode
!= INLINE_ALL
|| !e
->callee
->local
.inlinable
))
1600 "Considering to always inline inline candidate %s.\n",
1601 cgraph_node_name (e
->callee
));
1602 if (cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
))
1605 fprintf (dump_file
, "Not inlining: recursive call.\n");
1608 if (!tree_can_inline_p (e
)
1609 || e
->call_stmt_cannot_inline_p
)
1614 cgraph_inline_failed_string (e
->inline_failed
));
1617 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1618 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
)))
1621 fprintf (dump_file
, "Not inlining: SSA form does not match.\n");
1624 if (!e
->callee
->analyzed
)
1628 "Not inlining: Function body no longer available.\n");
1633 fprintf (dump_file
, " Inlining %s into %s.\n",
1634 cgraph_node_name (e
->callee
),
1635 cgraph_node_name (e
->caller
));
1636 cgraph_mark_inline_edge (e
, true, NULL
);
1640 /* Now do the automatic inlining. */
1641 if (mode
!= INLINE_ALL
&& mode
!= INLINE_ALWAYS_INLINE
1642 /* Never inline regular functions into always-inline functions
1643 during incremental inlining. */
1644 && !node
->local
.disregard_inline_limits
)
1646 bitmap visited
= BITMAP_ALLOC (NULL
);
1647 for (e
= node
->callees
; e
; e
= e
->next_callee
)
1649 int allowed_growth
= 0;
1650 if (!e
->callee
->local
.inlinable
1651 || !e
->inline_failed
1652 || e
->callee
->local
.disregard_inline_limits
)
1654 /* We are inlining a function to all call-sites in node
1655 or to none. So visit each candidate only once. */
1656 if (!bitmap_set_bit (visited
, e
->callee
->uid
))
1659 fprintf (dump_file
, "Considering inline candidate %s.\n",
1660 cgraph_node_name (e
->callee
));
1661 if (cgraph_recursive_inlining_p (node
, e
->callee
, &e
->inline_failed
))
1664 fprintf (dump_file
, "Not inlining: recursive call.\n");
1667 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node
->decl
))
1668 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e
->callee
->decl
)))
1672 "Not inlining: SSA form does not match.\n");
1676 if (cgraph_maybe_hot_edge_p (e
) && leaf_node_p (e
->callee
)
1677 && optimize_function_for_speed_p (cfun
))
1678 allowed_growth
= PARAM_VALUE (PARAM_EARLY_INLINING_INSNS
);
1680 /* When the function body would grow and inlining the function
1681 won't eliminate the need for offline copy of the function,
1683 if (((mode
== INLINE_SIZE
|| mode
== INLINE_SIZE_NORECURSIVE
)
1684 || (!flag_inline_functions
1685 && !DECL_DECLARED_INLINE_P (e
->callee
->decl
)))
1686 && (cgraph_estimate_size_after_inlining (e
->caller
, e
->callee
)
1687 > e
->caller
->global
.size
+ allowed_growth
)
1688 && cgraph_estimate_growth (e
->callee
) > allowed_growth
)
1692 "Not inlining: code size would grow by %i.\n",
1693 cgraph_estimate_size_after_inlining (e
->caller
,
1695 - e
->caller
->global
.size
);
1698 if (e
->call_stmt_cannot_inline_p
1699 || !tree_can_inline_p (e
))
1703 "Not inlining: call site not inlinable.\n");
1706 if (!e
->callee
->analyzed
)
1710 "Not inlining: Function body no longer available.\n");
1713 if (!cgraph_check_inline_limits (node
, e
->callee
, &e
->inline_failed
))
1716 fprintf (dump_file
, "Not inlining: %s.\n",
1717 cgraph_inline_failed_string (e
->inline_failed
));
1720 if (cgraph_default_inline_p (e
->callee
, &failed_reason
))
1723 fprintf (dump_file
, " Inlining %s into %s.\n",
1724 cgraph_node_name (e
->callee
),
1725 cgraph_node_name (e
->caller
));
1726 cgraph_mark_inline_edge (e
, true, NULL
);
1730 BITMAP_FREE (visited
);
1735 /* Because inlining might remove no-longer reachable nodes, we need to
1736 keep the array visible to garbage collector to avoid reading collected
1739 static GTY ((length ("nnodes"))) struct cgraph_node
**order
;
1741 /* Do inlining of small functions. Doing so early helps profiling and other
1742 passes to be somewhat more effective and avoids some code duplication in
1743 later real inlining pass for testcases with very many function calls. */
1745 cgraph_early_inlining (void)
1747 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
1748 unsigned int todo
= 0;
1756 || !flag_early_inlining
)
1758 /* When not optimizing or not inlining inline only always-inline
1760 cgraph_decide_inlining_incrementally (node
, INLINE_ALWAYS_INLINE
);
1761 timevar_push (TV_INTEGRATION
);
1762 todo
|= optimize_inline_calls (current_function_decl
);
1763 timevar_pop (TV_INTEGRATION
);
1767 if (lookup_attribute ("flatten",
1768 DECL_ATTRIBUTES (node
->decl
)) != NULL
)
1772 "Flattening %s\n", cgraph_node_name (node
));
1773 cgraph_flatten (node
);
1774 timevar_push (TV_INTEGRATION
);
1775 todo
|= optimize_inline_calls (current_function_decl
);
1776 timevar_pop (TV_INTEGRATION
);
1778 /* We iterate incremental inlining to get trivial cases of indirect
1780 while (iterations
< PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS
)
1781 && cgraph_decide_inlining_incrementally (node
,
1783 ? INLINE_SIZE_NORECURSIVE
1786 timevar_push (TV_INTEGRATION
);
1787 todo
|= optimize_inline_calls (current_function_decl
);
1789 timevar_pop (TV_INTEGRATION
);
1792 fprintf (dump_file
, "Iterations: %i\n", iterations
);
1795 cfun
->always_inline_functions_inlined
= true;
1800 struct gimple_opt_pass pass_early_inline
=
1804 "einline", /* name */
1806 cgraph_early_inlining
, /* execute */
1809 0, /* static_pass_number */
1810 TV_INLINE_HEURISTICS
, /* tv_id */
1811 0, /* properties_required */
1812 0, /* properties_provided */
1813 0, /* properties_destroyed */
1814 0, /* todo_flags_start */
1815 TODO_dump_func
/* todo_flags_finish */
1820 /* See if statement might disappear after inlining.
1821 0 - means not eliminated
1822 1 - half of statements goes away
1823 2 - for sure it is eliminated.
1824 We are not terribly sophisficated, basically looking for simple abstraction
1825 penalty wrappers. */
1828 eliminated_by_inlining_prob (gimple stmt
)
1830 enum gimple_code code
= gimple_code (stmt
);
1836 if (gimple_num_ops (stmt
) != 2)
1839 /* Casts of parameters, loads from parameters passed by reference
1840 and stores to return value or parameters are often free after
1841 inlining dua to SRA and further combining.
1842 Assume that half of statements goes away. */
1843 if (gimple_assign_rhs_code (stmt
) == CONVERT_EXPR
1844 || gimple_assign_rhs_code (stmt
) == NOP_EXPR
1845 || gimple_assign_rhs_code (stmt
) == VIEW_CONVERT_EXPR
1846 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1848 tree rhs
= gimple_assign_rhs1 (stmt
);
1849 tree lhs
= gimple_assign_lhs (stmt
);
1850 tree inner_rhs
= rhs
;
1851 tree inner_lhs
= lhs
;
1852 bool rhs_free
= false;
1853 bool lhs_free
= false;
1855 while (handled_component_p (inner_lhs
)
1856 || TREE_CODE (inner_lhs
) == MEM_REF
)
1857 inner_lhs
= TREE_OPERAND (inner_lhs
, 0);
1858 while (handled_component_p (inner_rhs
)
1859 || TREE_CODE (inner_rhs
) == ADDR_EXPR
1860 || TREE_CODE (inner_rhs
) == MEM_REF
)
1861 inner_rhs
= TREE_OPERAND (inner_rhs
, 0);
1864 if (TREE_CODE (inner_rhs
) == PARM_DECL
1865 || (TREE_CODE (inner_rhs
) == SSA_NAME
1866 && SSA_NAME_IS_DEFAULT_DEF (inner_rhs
)
1867 && TREE_CODE (SSA_NAME_VAR (inner_rhs
)) == PARM_DECL
))
1869 if (rhs_free
&& is_gimple_reg (lhs
))
1871 if (((TREE_CODE (inner_lhs
) == PARM_DECL
1872 || (TREE_CODE (inner_lhs
) == SSA_NAME
1873 && SSA_NAME_IS_DEFAULT_DEF (inner_lhs
)
1874 && TREE_CODE (SSA_NAME_VAR (inner_lhs
)) == PARM_DECL
))
1875 && inner_lhs
!= lhs
)
1876 || TREE_CODE (inner_lhs
) == RESULT_DECL
1877 || (TREE_CODE (inner_lhs
) == SSA_NAME
1878 && TREE_CODE (SSA_NAME_VAR (inner_lhs
)) == RESULT_DECL
))
1881 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1883 if (lhs_free
&& rhs_free
)
1892 /* Compute function body size parameters for NODE. */
1895 estimate_function_body_sizes (struct cgraph_node
*node
)
1898 gcov_type time_inlining_benefit
= 0;
1899 /* Estimate static overhead for function prologue/epilogue and alignment. */
1901 /* Benefits are scaled by probability of elimination that is in range
1903 int size_inlining_benefit
= 2 * 2;
1905 gimple_stmt_iterator bsi
;
1906 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1909 tree funtype
= TREE_TYPE (node
->decl
);
1912 fprintf (dump_file
, "Analyzing function body size: %s\n",
1913 cgraph_node_name (node
));
1915 gcc_assert (my_function
&& my_function
->cfg
);
1916 FOR_EACH_BB_FN (bb
, my_function
)
1918 freq
= compute_call_stmt_bb_frequency (node
->decl
, bb
);
1919 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1921 gimple stmt
= gsi_stmt (bsi
);
1922 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
1923 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
1926 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1928 fprintf (dump_file
, " freq:%6i size:%3i time:%3i ",
1929 freq
, this_size
, this_time
);
1930 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1935 prob
= eliminated_by_inlining_prob (stmt
);
1936 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
1937 fprintf (dump_file
, " 50%% will be eliminated by inlining\n");
1938 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
1939 fprintf (dump_file
, " will eliminated by inlining\n");
1940 size_inlining_benefit
+= this_size
* prob
;
1941 time_inlining_benefit
+= this_time
* prob
;
1942 gcc_assert (time
>= 0);
1943 gcc_assert (size
>= 0);
1946 time
= (time
+ CGRAPH_FREQ_BASE
/ 2) / CGRAPH_FREQ_BASE
;
1947 time_inlining_benefit
= ((time_inlining_benefit
+ CGRAPH_FREQ_BASE
)
1948 / (CGRAPH_FREQ_BASE
* 2));
1949 size_inlining_benefit
= (size_inlining_benefit
+ 1) / 2;
1951 fprintf (dump_file
, "Overall function body time: %i-%i size: %i-%i\n",
1952 (int)time
, (int)time_inlining_benefit
,
1953 size
, size_inlining_benefit
);
1954 time_inlining_benefit
+= eni_time_weights
.call_cost
;
1955 size_inlining_benefit
+= eni_size_weights
.call_cost
;
1956 if (!VOID_TYPE_P (TREE_TYPE (funtype
)))
1958 int cost
= estimate_move_cost (TREE_TYPE (funtype
));
1959 time_inlining_benefit
+= cost
;
1960 size_inlining_benefit
+= cost
;
1962 for (arg
= DECL_ARGUMENTS (node
->decl
); arg
; arg
= DECL_CHAIN (arg
))
1963 if (!VOID_TYPE_P (TREE_TYPE (arg
)))
1965 int cost
= estimate_move_cost (TREE_TYPE (arg
));
1966 time_inlining_benefit
+= cost
;
1967 size_inlining_benefit
+= cost
;
1969 if (time_inlining_benefit
> MAX_TIME
)
1970 time_inlining_benefit
= MAX_TIME
;
1971 if (time
> MAX_TIME
)
1973 inline_summary (node
)->self_time
= time
;
1974 inline_summary (node
)->self_size
= size
;
1976 fprintf (dump_file
, "With function call overhead time: %i-%i size: %i-%i\n",
1977 (int)time
, (int)time_inlining_benefit
,
1978 size
, size_inlining_benefit
);
1979 inline_summary (node
)->time_inlining_benefit
= time_inlining_benefit
;
1980 inline_summary (node
)->size_inlining_benefit
= size_inlining_benefit
;
1983 /* Compute parameters of functions used by inliner. */
1985 compute_inline_parameters (struct cgraph_node
*node
)
1987 HOST_WIDE_INT self_stack_size
;
1989 gcc_assert (!node
->global
.inlined_to
);
1991 /* Estimate the stack size for the function. But not at -O0
1992 because estimated_stack_frame_size is a quadratic problem. */
1993 self_stack_size
= optimize
? estimated_stack_frame_size (node
->decl
) : 0;
1994 inline_summary (node
)->estimated_self_stack_size
= self_stack_size
;
1995 node
->global
.estimated_stack_size
= self_stack_size
;
1996 node
->global
.stack_frame_offset
= 0;
1998 /* Can this function be inlined at all? */
1999 node
->local
.inlinable
= tree_inlinable_function_p (node
->decl
);
2001 /* Inlinable functions always can change signature. */
2002 if (node
->local
.inlinable
)
2003 node
->local
.can_change_signature
= true;
2006 struct cgraph_edge
*e
;
2008 /* Functions calling builtlin_apply can not change signature. */
2009 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2010 if (DECL_BUILT_IN (e
->callee
->decl
)
2011 && DECL_BUILT_IN_CLASS (e
->callee
->decl
) == BUILT_IN_NORMAL
2012 && DECL_FUNCTION_CODE (e
->callee
->decl
) == BUILT_IN_APPLY_ARGS
)
2014 node
->local
.can_change_signature
= !e
;
2016 if (node
->local
.inlinable
&& !node
->local
.disregard_inline_limits
)
2017 node
->local
.disregard_inline_limits
2018 = DECL_DISREGARD_INLINE_LIMITS (node
->decl
);
2019 estimate_function_body_sizes (node
);
2020 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2021 node
->global
.time
= inline_summary (node
)->self_time
;
2022 node
->global
.size
= inline_summary (node
)->self_size
;
2027 /* Compute parameters of functions used by inliner using
2028 current_function_decl. */
2030 compute_inline_parameters_for_current (void)
2032 compute_inline_parameters (cgraph_node (current_function_decl
));
2036 struct gimple_opt_pass pass_inline_parameters
=
2040 "inline_param", /* name */
2042 compute_inline_parameters_for_current
,/* execute */
2045 0, /* static_pass_number */
2046 TV_INLINE_HEURISTICS
, /* tv_id */
2047 0, /* properties_required */
2048 0, /* properties_provided */
2049 0, /* properties_destroyed */
2050 0, /* todo_flags_start */
2051 0 /* todo_flags_finish */
2055 /* This function performs intraprocedural analyzis in NODE that is required to
2056 inline indirect calls. */
2058 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
2060 ipa_analyze_node (node
);
2061 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2063 ipa_print_node_params (dump_file
, node
);
2064 ipa_print_node_jump_functions (dump_file
, node
);
2068 /* Note function body size. */
2070 analyze_function (struct cgraph_node
*node
)
2072 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2073 current_function_decl
= node
->decl
;
2075 compute_inline_parameters (node
);
2076 /* FIXME: We should remove the optimize check after we ensure we never run
2077 IPA passes when not optimizng. */
2078 if (flag_indirect_inlining
&& optimize
)
2079 inline_indirect_intraprocedural_analysis (node
);
2081 current_function_decl
= NULL
;
2085 /* Called when new function is inserted to callgraph late. */
2087 add_new_function (struct cgraph_node
*node
, void *data ATTRIBUTE_UNUSED
)
2089 analyze_function (node
);
2092 /* Note function body size. */
2094 inline_generate_summary (void)
2096 struct cgraph_node
*node
;
2098 function_insertion_hook_holder
=
2099 cgraph_add_function_insertion_hook (&add_new_function
, NULL
);
2101 if (flag_indirect_inlining
)
2103 ipa_register_cgraph_hooks ();
2104 ipa_check_create_node_params ();
2105 ipa_check_create_edge_args ();
2108 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2110 analyze_function (node
);
2115 /* Apply inline plan to function. */
2117 inline_transform (struct cgraph_node
*node
)
2119 unsigned int todo
= 0;
2120 struct cgraph_edge
*e
;
2121 bool inline_p
= false;
2123 /* FIXME: Currently the passmanager is adding inline transform more than once to some
2124 clones. This needs revisiting after WPA cleanups. */
2125 if (cfun
->after_inlining
)
2128 /* We might need the body of this function so that we can expand
2129 it inline somewhere else. */
2130 if (cgraph_preserve_function_body_p (node
->decl
))
2131 save_inline_function_body (node
);
2133 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2135 cgraph_redirect_edge_call_stmt_to_callee (e
);
2136 if (!e
->inline_failed
|| warn_inline
)
2142 timevar_push (TV_INTEGRATION
);
2143 todo
= optimize_inline_calls (current_function_decl
);
2144 timevar_pop (TV_INTEGRATION
);
2146 cfun
->always_inline_functions_inlined
= true;
2147 cfun
->after_inlining
= true;
2148 return todo
| execute_fixup_cfg ();
2151 /* Read inline summary. Jump functions are shared among ipa-cp
2152 and inliner, so when ipa-cp is active, we don't need to write them
2156 inline_read_summary (void)
2158 if (flag_indirect_inlining
)
2160 ipa_register_cgraph_hooks ();
2162 ipa_prop_read_jump_functions ();
2164 function_insertion_hook_holder
=
2165 cgraph_add_function_insertion_hook (&add_new_function
, NULL
);
2168 /* Write inline summary for node in SET.
2169 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
2170 active, we don't need to write them twice. */
2173 inline_write_summary (cgraph_node_set set
,
2174 varpool_node_set vset ATTRIBUTE_UNUSED
)
2176 if (flag_indirect_inlining
&& !flag_ipa_cp
)
2177 ipa_prop_write_jump_functions (set
);
2180 /* When to run IPA inlining. Inlining of always-inline functions
2181 happens during early inlining. */
2184 gate_cgraph_decide_inlining (void)
2186 /* ??? We'd like to skip this if not optimizing or not inlining as
2187 all always-inline functions have been processed by early
2188 inlining already. But this at least breaks EH with C++ as
2189 we need to unconditionally run fixup_cfg even at -O0.
2190 So leave it on unconditionally for now. */
2194 struct ipa_opt_pass_d pass_ipa_inline
=
2198 "inline", /* name */
2199 gate_cgraph_decide_inlining
, /* gate */
2200 cgraph_decide_inlining
, /* execute */
2203 0, /* static_pass_number */
2204 TV_INLINE_HEURISTICS
, /* tv_id */
2205 0, /* properties_required */
2206 0, /* properties_provided */
2207 0, /* properties_destroyed */
2208 TODO_remove_functions
, /* todo_flags_finish */
2209 TODO_dump_cgraph
| TODO_dump_func
2210 | TODO_remove_functions
| TODO_ggc_collect
/* todo_flags_finish */
2212 inline_generate_summary
, /* generate_summary */
2213 inline_write_summary
, /* write_summary */
2214 inline_read_summary
, /* read_summary */
2215 NULL
, /* write_optimization_summary */
2216 NULL
, /* read_optimization_summary */
2217 NULL
, /* stmt_fixup */
2219 inline_transform
, /* function_transform */
2220 NULL
, /* variable_transform */
2224 #include "gt-ipa-inline.h"