* cgraph.h (cgraph_decide_inlining_incrementally): Kill.
[official-gcc.git] / gcc / ipa-inline.c
blobd369a32dc8ed04d720e8a74bb067b09a4c6d6d51
1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
27 callgraph.
29 There are three major parts of this file:
31 cgraph_mark_inline implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
35 statistics)
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
41 growth and so on).
43 inlining heuristics
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode.
66 The inliner itself is split into several passes:
68 pass_inline_parameters
70 This pass computes local properties of functions that are used by inliner:
71 estimated function body size, whether function is inlinable at all and
72 stack frame consumption.
74 Before executing any of inliner passes, this local pass has to be applied
75 to each function in the callgraph (ie run as subpass of some earlier
76 IPA pass). The results are made out of date by any optimization applied
77 on the function body.
79 pass_early_inlining
81 Simple local inlining pass inlining callees into current function. This
82 pass makes no global whole compilation unit analysis and this when allowed
83 to do inlining expanding code size it might result in unbounded growth of
84 whole unit.
86 This is the main inlining pass in non-unit-at-a-time.
88 With unit-at-a-time the pass is run during conversion into SSA form.
89 Only functions already converted into SSA form are inlined, so the
90 conversion must happen in topological order on the callgraph (that is
91 maintained by pass manager). The functions after inlining are early
92 optimized so the early inliner sees unoptimized function itself, but
93 all considered callees are already optimized allowing it to unfold
94 abstraction penalty on C++ effectivly and cheaply.
96 pass_ipa_early_inlining
98 With profiling, the early inlining is also neccesary to reduce
99 instrumentation costs on program with high abstraction penalty (doing
100 many redundant calls). This can't happen in parallel with early
101 optimization and profile instrumentation, because we would end up
102 re-instrumenting already instrumented function bodies we brought in via
103 inlining.
105 To avoid this, this pass is executed as IPA pass before profiling. It is
106 simple wrapper to pass_early_inlining and ensures first inlining.
108 pass_ipa_inline
110 This is the main pass implementing simple greedy algorithm to do inlining
111 of small functions that results in overall growth of compilation unit and
112 inlining of functions called once. The pass compute just so called inline
113 plan (representation of inlining to be done in callgraph) and unlike early
114 inlining it is not performing the inlining itself.
116 pass_apply_inline
118 This pass performs actual inlining according to pass_ipa_inline on given
119 function. Possible the function body before inlining is saved when it is
120 needed for further inlining later.
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "tree.h"
128 #include "tree-inline.h"
129 #include "langhooks.h"
130 #include "flags.h"
131 #include "cgraph.h"
132 #include "diagnostic.h"
133 #include "timevar.h"
134 #include "params.h"
135 #include "fibheap.h"
136 #include "intl.h"
137 #include "tree-pass.h"
138 #include "hashtab.h"
139 #include "coverage.h"
140 #include "ggc.h"
141 #include "tree-flow.h"
143 /* Statistics we collect about inlining algorithm. */
144 static int ncalls_inlined;
145 static int nfunctions_inlined;
146 static int initial_insns;
147 static int overall_insns;
148 static int max_insns;
149 static gcov_type max_count;
151 /* Estimate size of the function after inlining WHAT into TO. */
153 static int
154 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
155 struct cgraph_node *what)
157 int size;
158 tree fndecl = what->decl, arg;
159 int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
161 for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
162 call_insns += estimate_move_cost (TREE_TYPE (arg));
163 size = (what->global.insns - call_insns) * times + to->global.insns;
164 gcc_assert (size >= 0);
165 return size;
168 /* E is expected to be an edge being inlined. Clone destination node of
169 the edge and redirect it to the new clone.
170 DUPLICATE is used for bookkeeping on whether we are actually creating new
171 clones or re-using node originally representing out-of-line function call.
173 void
174 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
176 HOST_WIDE_INT peak;
177 if (duplicate)
179 /* We may eliminate the need for out-of-line copy to be output.
180 In that case just go ahead and re-use it. */
181 if (!e->callee->callers->next_caller
182 && !e->callee->needed
183 && flag_unit_at_a_time)
185 gcc_assert (!e->callee->global.inlined_to);
186 if (DECL_SAVED_TREE (e->callee->decl))
187 overall_insns -= e->callee->global.insns, nfunctions_inlined++;
188 duplicate = false;
190 else
192 struct cgraph_node *n;
193 n = cgraph_clone_node (e->callee, e->count, e->loop_nest,
194 update_original);
195 cgraph_redirect_edge_callee (e, n);
199 if (e->caller->global.inlined_to)
200 e->callee->global.inlined_to = e->caller->global.inlined_to;
201 else
202 e->callee->global.inlined_to = e->caller;
203 e->callee->global.stack_frame_offset
204 = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
205 peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
206 if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
207 e->callee->global.inlined_to->global.estimated_stack_size = peak;
209 /* Recursively clone all bodies. */
210 for (e = e->callee->callees; e; e = e->next_callee)
211 if (!e->inline_failed)
212 cgraph_clone_inlined_nodes (e, duplicate, update_original);
215 /* Mark edge E as inlined and update callgraph accordingly.
216 UPDATE_ORIGINAL specify whether profile of original function should be
217 updated. */
219 void
220 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original)
222 int old_insns = 0, new_insns = 0;
223 struct cgraph_node *to = NULL, *what;
225 if (e->callee->inline_decl)
226 cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
228 gcc_assert (e->inline_failed);
229 e->inline_failed = NULL;
231 if (!e->callee->global.inlined && flag_unit_at_a_time)
232 DECL_POSSIBLY_INLINED (e->callee->decl) = true;
233 e->callee->global.inlined = true;
235 cgraph_clone_inlined_nodes (e, true, update_original);
237 what = e->callee;
239 /* Now update size of caller and all functions caller is inlined into. */
240 for (;e && !e->inline_failed; e = e->caller->callers)
242 old_insns = e->caller->global.insns;
243 new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
244 what);
245 gcc_assert (new_insns >= 0);
246 to = e->caller;
247 to->global.insns = new_insns;
249 gcc_assert (what->global.inlined_to == to);
250 if (new_insns > old_insns)
251 overall_insns += new_insns - old_insns;
252 ncalls_inlined++;
255 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
256 Return following unredirected edge in the list of callers
257 of EDGE->CALLEE */
259 static struct cgraph_edge *
260 cgraph_mark_inline (struct cgraph_edge *edge)
262 struct cgraph_node *to = edge->caller;
263 struct cgraph_node *what = edge->callee;
264 struct cgraph_edge *e, *next;
265 int times = 0;
267 /* Look for all calls, mark them inline and clone recursively
268 all inlined functions. */
269 for (e = what->callers; e; e = next)
271 next = e->next_caller;
272 if (e->caller == to && e->inline_failed)
274 cgraph_mark_inline_edge (e, true);
275 if (e == edge)
276 edge = next;
277 times++;
280 gcc_assert (times);
281 return edge;
284 /* Estimate the growth caused by inlining NODE into all callees. */
286 static int
287 cgraph_estimate_growth (struct cgraph_node *node)
289 int growth = 0;
290 struct cgraph_edge *e;
291 if (node->global.estimated_growth != INT_MIN)
292 return node->global.estimated_growth;
294 for (e = node->callers; e; e = e->next_caller)
295 if (e->inline_failed)
296 growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
297 - e->caller->global.insns);
299 /* ??? Wrong for self recursive functions or cases where we decide to not
300 inline for different reasons, but it is not big deal as in that case
301 we will keep the body around, but we will also avoid some inlining. */
302 if (!node->needed && !DECL_EXTERNAL (node->decl))
303 growth -= node->global.insns;
305 node->global.estimated_growth = growth;
306 return growth;
309 /* Return false when inlining WHAT into TO is not good idea
310 as it would cause too large growth of function bodies.
311 When ONE_ONLY is true, assume that only one call site is going
312 to be inlined, otherwise figure out how many call sites in
313 TO calls WHAT and verify that all can be inlined.
316 static bool
317 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
318 const char **reason, bool one_only)
320 int times = 0;
321 struct cgraph_edge *e;
322 int newsize;
323 int limit;
324 HOST_WIDE_INT stack_size_limit, inlined_stack;
326 if (one_only)
327 times = 1;
328 else
329 for (e = to->callees; e; e = e->next_callee)
330 if (e->callee == what)
331 times++;
333 if (to->global.inlined_to)
334 to = to->global.inlined_to;
336 /* When inlining large function body called once into small function,
337 take the inlined function as base for limiting the growth. */
338 if (to->local.self_insns > what->local.self_insns)
339 limit = to->local.self_insns;
340 else
341 limit = what->local.self_insns;
343 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
345 /* Check the size after inlining against the function limits. But allow
346 the function to shrink if it went over the limits by forced inlining. */
347 newsize = cgraph_estimate_size_after_inlining (times, to, what);
348 if (newsize >= to->global.insns
349 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
350 && newsize > limit)
352 if (reason)
353 *reason = N_("--param large-function-growth limit reached");
354 return false;
357 stack_size_limit = to->local.estimated_self_stack_size;
359 stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
361 inlined_stack = (to->global.stack_frame_offset
362 + to->local.estimated_self_stack_size
363 + what->global.estimated_stack_size);
364 if (inlined_stack > stack_size_limit
365 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
367 if (reason)
368 *reason = N_("--param large-stack-frame-growth limit reached");
369 return false;
371 return true;
374 /* Return true when function N is small enough to be inlined. */
376 bool
377 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
379 tree decl = n->decl;
381 if (n->inline_decl)
382 decl = n->inline_decl;
383 if (!DECL_INLINE (decl))
385 if (reason)
386 *reason = N_("function not inlinable");
387 return false;
390 if (!DECL_STRUCT_FUNCTION (decl)->cfg)
392 if (reason)
393 *reason = N_("function body not available");
394 return false;
397 if (DECL_DECLARED_INLINE_P (decl))
399 if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
401 if (reason)
402 *reason = N_("--param max-inline-insns-single limit reached");
403 return false;
406 else
408 if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
410 if (reason)
411 *reason = N_("--param max-inline-insns-auto limit reached");
412 return false;
416 return true;
419 /* Return true when inlining WHAT would create recursive inlining.
420 We call recursive inlining all cases where same function appears more than
421 once in the single recursion nest path in the inline graph. */
423 static bool
424 cgraph_recursive_inlining_p (struct cgraph_node *to,
425 struct cgraph_node *what,
426 const char **reason)
428 bool recursive;
429 if (to->global.inlined_to)
430 recursive = what->decl == to->global.inlined_to->decl;
431 else
432 recursive = what->decl == to->decl;
433 /* Marking recursive function inline has sane semantic and thus we should
434 not warn on it. */
435 if (recursive && reason)
436 *reason = (what->local.disregard_inline_limits
437 ? N_("recursive inlining") : "");
438 return recursive;
441 /* Return true if the call can be hot. */
442 static bool
443 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
445 if (profile_info && flag_branch_probabilities
446 && (edge->count
447 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
448 return false;
449 return true;
452 /* A cost model driving the inlining heuristics in a way so the edges with
453 smallest badness are inlined first. After each inlining is performed
454 the costs of all caller edges of nodes affected are recomputed so the
455 metrics may accurately depend on values such as number of inlinable callers
456 of the function or function body size.
458 With profiling we use number of executions of each edge to drive the cost.
459 We also should distinguish hot and cold calls where the cold calls are
460 inlined into only when code size is overall improved.
463 static int
464 cgraph_edge_badness (struct cgraph_edge *edge)
466 if (max_count)
468 int growth =
469 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
470 growth -= edge->caller->global.insns;
472 /* Always prefer inlining saving code size. */
473 if (growth <= 0)
474 return INT_MIN - growth;
475 return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
477 else
479 int nest = MIN (edge->loop_nest, 8);
480 int badness = cgraph_estimate_growth (edge->callee) * 256;
482 /* Decrease badness if call is nested. */
483 if (badness > 0)
484 badness >>= nest;
485 else
486 badness <<= nest;
488 /* Make recursive inlining happen always after other inlining is done. */
489 if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
490 return badness + 1;
491 else
492 return badness;
496 /* Recompute heap nodes for each of caller edge. */
498 static void
499 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
500 bitmap updated_nodes)
502 struct cgraph_edge *edge;
503 const char *failed_reason;
505 if (!node->local.inlinable || node->local.disregard_inline_limits
506 || node->global.inlined_to)
507 return;
508 if (bitmap_bit_p (updated_nodes, node->uid))
509 return;
510 bitmap_set_bit (updated_nodes, node->uid);
511 node->global.estimated_growth = INT_MIN;
513 if (!node->local.inlinable)
514 return;
515 /* Prune out edges we won't inline into anymore. */
516 if (!cgraph_default_inline_p (node, &failed_reason))
518 for (edge = node->callers; edge; edge = edge->next_caller)
519 if (edge->aux)
521 fibheap_delete_node (heap, edge->aux);
522 edge->aux = NULL;
523 if (edge->inline_failed)
524 edge->inline_failed = failed_reason;
526 return;
529 for (edge = node->callers; edge; edge = edge->next_caller)
530 if (edge->inline_failed)
532 int badness = cgraph_edge_badness (edge);
533 if (edge->aux)
535 fibnode_t n = edge->aux;
536 gcc_assert (n->data == edge);
537 if (n->key == badness)
538 continue;
540 /* fibheap_replace_key only increase the keys. */
541 if (fibheap_replace_key (heap, n, badness))
542 continue;
543 fibheap_delete_node (heap, edge->aux);
545 edge->aux = fibheap_insert (heap, badness, edge);
549 /* Recompute heap nodes for each of caller edges of each of callees. */
551 static void
552 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
553 bitmap updated_nodes)
555 struct cgraph_edge *e;
556 node->global.estimated_growth = INT_MIN;
558 for (e = node->callees; e; e = e->next_callee)
559 if (e->inline_failed)
560 update_caller_keys (heap, e->callee, updated_nodes);
561 else if (!e->inline_failed)
562 update_callee_keys (heap, e->callee, updated_nodes);
565 /* Enqueue all recursive calls from NODE into priority queue depending on
566 how likely we want to recursively inline the call. */
568 static void
569 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
570 fibheap_t heap)
572 static int priority;
573 struct cgraph_edge *e;
574 for (e = where->callees; e; e = e->next_callee)
575 if (e->callee == node)
577 /* When profile feedback is available, prioritize by expected number
578 of calls. Without profile feedback we maintain simple queue
579 to order candidates via recursive depths. */
580 fibheap_insert (heap,
581 !max_count ? priority++
582 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
585 for (e = where->callees; e; e = e->next_callee)
586 if (!e->inline_failed)
587 lookup_recursive_calls (node, e->callee, heap);
590 /* Find callgraph nodes closing a circle in the graph. The
591 resulting hashtab can be used to avoid walking the circles.
592 Uses the cgraph nodes ->aux field which needs to be zero
593 before and will be zero after operation. */
595 static void
596 cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
598 struct cgraph_edge *e;
600 if (node->aux)
602 void **slot;
603 slot = htab_find_slot (cycles, node, INSERT);
604 if (!*slot)
606 if (dump_file)
607 fprintf (dump_file, "Cycle contains %s\n", cgraph_node_name (node));
608 *slot = node;
610 return;
613 node->aux = node;
614 for (e = node->callees; e; e = e->next_callee)
615 cgraph_find_cycles (e->callee, cycles);
616 node->aux = 0;
619 /* Flatten the cgraph node. We have to be careful in recursing
620 as to not run endlessly in circles of the callgraph.
621 We do so by using a hashtab of cycle entering nodes as generated
622 by cgraph_find_cycles. */
624 static void
625 cgraph_flatten_node (struct cgraph_node *node, htab_t cycles)
627 struct cgraph_edge *e;
629 for (e = node->callees; e; e = e->next_callee)
631 /* Inline call, if possible, and recurse. Be sure we are not
632 entering callgraph circles here. */
633 if (e->inline_failed
634 && e->callee->local.inlinable
635 && !cgraph_recursive_inlining_p (node, e->callee,
636 &e->inline_failed)
637 && !htab_find (cycles, e->callee))
639 if (dump_file)
640 fprintf (dump_file, " inlining %s", cgraph_node_name (e->callee));
641 cgraph_mark_inline_edge (e, true);
642 cgraph_flatten_node (e->callee, cycles);
644 else if (dump_file)
645 fprintf (dump_file, " !inlining %s", cgraph_node_name (e->callee));
649 /* Decide on recursive inlining: in the case function has recursive calls,
650 inline until body size reaches given argument. */
652 static bool
653 cgraph_decide_recursive_inlining (struct cgraph_node *node)
655 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
656 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
657 int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
658 fibheap_t heap;
659 struct cgraph_edge *e;
660 struct cgraph_node *master_clone, *next;
661 int depth = 0;
662 int n = 0;
664 if (DECL_DECLARED_INLINE_P (node->decl))
666 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
667 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
670 /* Make sure that function is small enough to be considered for inlining. */
671 if (!max_depth
672 || cgraph_estimate_size_after_inlining (1, node, node) >= limit)
673 return false;
674 heap = fibheap_new ();
675 lookup_recursive_calls (node, node, heap);
676 if (fibheap_empty (heap))
678 fibheap_delete (heap);
679 return false;
682 if (dump_file)
683 fprintf (dump_file,
684 " Performing recursive inlining on %s\n",
685 cgraph_node_name (node));
687 /* We need original clone to copy around. */
688 master_clone = cgraph_clone_node (node, node->count, 1, false);
689 master_clone->needed = true;
690 for (e = master_clone->callees; e; e = e->next_callee)
691 if (!e->inline_failed)
692 cgraph_clone_inlined_nodes (e, true, false);
694 /* Do the inlining and update list of recursive call during process. */
695 while (!fibheap_empty (heap)
696 && (cgraph_estimate_size_after_inlining (1, node, master_clone)
697 <= limit))
699 struct cgraph_edge *curr = fibheap_extract_min (heap);
700 struct cgraph_node *cnode;
702 depth = 1;
703 for (cnode = curr->caller;
704 cnode->global.inlined_to; cnode = cnode->callers->caller)
705 if (node->decl == curr->callee->decl)
706 depth++;
707 if (depth > max_depth)
709 if (dump_file)
710 fprintf (dump_file,
711 " maxmal depth reached\n");
712 continue;
715 if (max_count)
717 if (!cgraph_maybe_hot_edge_p (curr))
719 if (dump_file)
720 fprintf (dump_file, " Not inlining cold call\n");
721 continue;
723 if (curr->count * 100 / node->count < probability)
725 if (dump_file)
726 fprintf (dump_file,
727 " Probability of edge is too small\n");
728 continue;
732 if (dump_file)
734 fprintf (dump_file,
735 " Inlining call of depth %i", depth);
736 if (node->count)
738 fprintf (dump_file, " called approx. %.2f times per call",
739 (double)curr->count / node->count);
741 fprintf (dump_file, "\n");
743 cgraph_redirect_edge_callee (curr, master_clone);
744 cgraph_mark_inline_edge (curr, false);
745 lookup_recursive_calls (node, curr->callee, heap);
746 n++;
748 if (!fibheap_empty (heap) && dump_file)
749 fprintf (dump_file, " Recursive inlining growth limit met.\n");
751 fibheap_delete (heap);
752 if (dump_file)
753 fprintf (dump_file,
754 "\n Inlined %i times, body grown from %i to %i insns\n", n,
755 master_clone->global.insns, node->global.insns);
757 /* Remove master clone we used for inlining. We rely that clones inlined
758 into master clone gets queued just before master clone so we don't
759 need recursion. */
760 for (node = cgraph_nodes; node != master_clone;
761 node = next)
763 next = node->next;
764 if (node->global.inlined_to == master_clone)
765 cgraph_remove_node (node);
767 cgraph_remove_node (master_clone);
768 /* FIXME: Recursive inlining actually reduces number of calls of the
769 function. At this place we should probably walk the function and
770 inline clones and compensate the counts accordingly. This probably
771 doesn't matter much in practice. */
772 return n > 0;
775 /* Set inline_failed for all callers of given function to REASON. */
777 static void
778 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
780 struct cgraph_edge *e;
782 if (dump_file)
783 fprintf (dump_file, "Inlining failed: %s\n", reason);
784 for (e = node->callers; e; e = e->next_caller)
785 if (e->inline_failed)
786 e->inline_failed = reason;
789 /* We use greedy algorithm for inlining of small functions:
790 All inline candidates are put into prioritized heap based on estimated
791 growth of the overall number of instructions and then update the estimates.
793 INLINED and INLINED_CALEES are just pointers to arrays large enough
794 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
796 static void
797 cgraph_decide_inlining_of_small_functions (void)
799 struct cgraph_node *node;
800 struct cgraph_edge *edge;
801 const char *failed_reason;
802 fibheap_t heap = fibheap_new ();
803 bitmap updated_nodes = BITMAP_ALLOC (NULL);
805 if (dump_file)
806 fprintf (dump_file, "\nDeciding on smaller functions:\n");
808 /* Put all inline candidates into the heap. */
810 for (node = cgraph_nodes; node; node = node->next)
812 if (!node->local.inlinable || !node->callers
813 || node->local.disregard_inline_limits)
814 continue;
815 if (dump_file)
816 fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
818 node->global.estimated_growth = INT_MIN;
819 if (!cgraph_default_inline_p (node, &failed_reason))
821 cgraph_set_inline_failed (node, failed_reason);
822 continue;
825 for (edge = node->callers; edge; edge = edge->next_caller)
826 if (edge->inline_failed)
828 gcc_assert (!edge->aux);
829 edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
832 while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
834 int old_insns = overall_insns;
835 struct cgraph_node *where;
836 int growth =
837 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
839 growth -= edge->caller->global.insns;
841 if (dump_file)
843 fprintf (dump_file,
844 "\nConsidering %s with %i insns\n",
845 cgraph_node_name (edge->callee),
846 edge->callee->global.insns);
847 fprintf (dump_file,
848 " to be inlined into %s\n"
849 " Estimated growth after inlined into all callees is %+i insns.\n"
850 " Estimated badness is %i.\n",
851 cgraph_node_name (edge->caller),
852 cgraph_estimate_growth (edge->callee),
853 cgraph_edge_badness (edge));
854 if (edge->count)
855 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
857 gcc_assert (edge->aux);
858 edge->aux = NULL;
859 if (!edge->inline_failed)
860 continue;
862 /* When not having profile info ready we don't weight by any way the
863 position of call in procedure itself. This means if call of
864 function A from function B seems profitable to inline, the recursive
865 call of function A in inline copy of A in B will look profitable too
866 and we end up inlining until reaching maximal function growth. This
867 is not good idea so prohibit the recursive inlining.
869 ??? When the frequencies are taken into account we might not need this
870 restriction. */
871 if (!max_count)
873 where = edge->caller;
874 while (where->global.inlined_to)
876 if (where->decl == edge->callee->decl)
877 break;
878 where = where->callers->caller;
880 if (where->global.inlined_to)
882 edge->inline_failed
883 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
884 if (dump_file)
885 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
886 continue;
890 if (!cgraph_maybe_hot_edge_p (edge) && growth > 0)
892 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
893 &edge->inline_failed))
895 edge->inline_failed =
896 N_("call is unlikely");
897 if (dump_file)
898 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
900 continue;
902 if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
904 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
905 &edge->inline_failed))
907 if (dump_file)
908 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
910 continue;
912 if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
913 &edge->inline_failed))
915 where = edge->caller;
916 if (where->global.inlined_to)
917 where = where->global.inlined_to;
918 if (!cgraph_decide_recursive_inlining (where))
919 continue;
920 update_callee_keys (heap, where, updated_nodes);
922 else
924 struct cgraph_node *callee;
925 if (!cgraph_check_inline_limits (edge->caller, edge->callee,
926 &edge->inline_failed, true))
928 if (dump_file)
929 fprintf (dump_file, " Not inlining into %s:%s.\n",
930 cgraph_node_name (edge->caller), edge->inline_failed);
931 continue;
933 callee = edge->callee;
934 cgraph_mark_inline_edge (edge, true);
935 update_callee_keys (heap, callee, updated_nodes);
937 where = edge->caller;
938 if (where->global.inlined_to)
939 where = where->global.inlined_to;
941 /* Our profitability metric can depend on local properties
942 such as number of inlinable calls and size of the function body.
943 After inlining these properties might change for the function we
944 inlined into (since it's body size changed) and for the functions
945 called by function we inlined (since number of it inlinable callers
946 might change). */
947 update_caller_keys (heap, where, updated_nodes);
948 bitmap_clear (updated_nodes);
950 if (dump_file)
952 fprintf (dump_file,
953 " Inlined into %s which now has %i insns,"
954 "net change of %+i insns.\n",
955 cgraph_node_name (edge->caller),
956 edge->caller->global.insns,
957 overall_insns - old_insns);
960 while ((edge = fibheap_extract_min (heap)) != NULL)
962 gcc_assert (edge->aux);
963 edge->aux = NULL;
964 if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
965 && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
966 &edge->inline_failed))
967 edge->inline_failed = N_("--param inline-unit-growth limit reached");
969 fibheap_delete (heap);
970 BITMAP_FREE (updated_nodes);
973 /* Decide on the inlining. We do so in the topological order to avoid
974 expenses on updating data structures. */
976 static unsigned int
977 cgraph_decide_inlining (void)
979 struct cgraph_node *node;
980 int nnodes;
981 struct cgraph_node **order =
982 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
983 int old_insns = 0;
984 int i;
986 max_count = 0;
987 for (node = cgraph_nodes; node; node = node->next)
988 if (node->analyzed && (node->needed || node->reachable))
990 struct cgraph_edge *e;
992 initial_insns += node->local.self_insns;
993 gcc_assert (node->local.self_insns == node->global.insns);
994 for (e = node->callees; e; e = e->next_callee)
995 if (max_count < e->count)
996 max_count = e->count;
998 overall_insns = initial_insns;
999 gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
1001 max_insns = overall_insns;
1002 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1003 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1005 max_insns = ((HOST_WIDEST_INT) max_insns
1006 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1008 nnodes = cgraph_postorder (order);
1010 if (dump_file)
1011 fprintf (dump_file,
1012 "\nDeciding on inlining. Starting with %i insns.\n",
1013 initial_insns);
1015 for (node = cgraph_nodes; node; node = node->next)
1016 node->aux = 0;
1018 if (dump_file)
1019 fprintf (dump_file, "\nInlining always_inline functions:\n");
1021 /* In the first pass mark all always_inline edges. Do this with a priority
1022 so none of our later choices will make this impossible. */
1023 for (i = nnodes - 1; i >= 0; i--)
1025 struct cgraph_edge *e, *next;
1027 node = order[i];
1029 /* Handle nodes to be flattened, but don't update overall unit size. */
1030 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
1032 int old_overall_insns = overall_insns;
1033 htab_t cycles;
1034 if (dump_file)
1035 fprintf (dump_file,
1036 "Flattening %s\n", cgraph_node_name (node));
1037 cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
1038 cgraph_find_cycles (node, cycles);
1039 cgraph_flatten_node (node, cycles);
1040 htab_delete (cycles);
1041 overall_insns = old_overall_insns;
1042 /* We don't need to consider always_inline functions inside the flattened
1043 function anymore. */
1044 continue;
1047 if (!node->local.disregard_inline_limits)
1048 continue;
1049 if (dump_file)
1050 fprintf (dump_file,
1051 "\nConsidering %s %i insns (always inline)\n",
1052 cgraph_node_name (node), node->global.insns);
1053 old_insns = overall_insns;
1054 for (e = node->callers; e; e = next)
1056 next = e->next_caller;
1057 if (!e->inline_failed)
1058 continue;
1059 if (cgraph_recursive_inlining_p (e->caller, e->callee,
1060 &e->inline_failed))
1061 continue;
1062 cgraph_mark_inline_edge (e, true);
1063 if (dump_file)
1064 fprintf (dump_file,
1065 " Inlined into %s which now has %i insns.\n",
1066 cgraph_node_name (e->caller),
1067 e->caller->global.insns);
1069 if (dump_file)
1070 fprintf (dump_file,
1071 " Inlined for a net change of %+i insns.\n",
1072 overall_insns - old_insns);
1075 if (!flag_really_no_inline)
1076 cgraph_decide_inlining_of_small_functions ();
1078 if (!flag_really_no_inline
1079 && flag_inline_functions_called_once)
1081 if (dump_file)
1082 fprintf (dump_file, "\nDeciding on functions called once:\n");
1084 /* And finally decide what functions are called once. */
1086 for (i = nnodes - 1; i >= 0; i--)
1088 node = order[i];
1090 if (node->callers && !node->callers->next_caller && !node->needed
1091 && node->local.inlinable && node->callers->inline_failed
1092 && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1094 if (dump_file)
1096 fprintf (dump_file,
1097 "\nConsidering %s %i insns.\n",
1098 cgraph_node_name (node), node->global.insns);
1099 fprintf (dump_file,
1100 " Called once from %s %i insns.\n",
1101 cgraph_node_name (node->callers->caller),
1102 node->callers->caller->global.insns);
1105 old_insns = overall_insns;
1107 if (cgraph_check_inline_limits (node->callers->caller, node,
1108 NULL, false))
1110 cgraph_mark_inline (node->callers);
1111 if (dump_file)
1112 fprintf (dump_file,
1113 " Inlined into %s which now has %i insns"
1114 " for a net change of %+i insns.\n",
1115 cgraph_node_name (node->callers->caller),
1116 node->callers->caller->global.insns,
1117 overall_insns - old_insns);
1119 else
1121 if (dump_file)
1122 fprintf (dump_file,
1123 " Inline limit reached, not inlined.\n");
1129 if (dump_file)
1130 fprintf (dump_file,
1131 "\nInlined %i calls, eliminated %i functions, "
1132 "%i insns turned to %i insns.\n\n",
1133 ncalls_inlined, nfunctions_inlined, initial_insns,
1134 overall_insns);
1135 free (order);
1136 return 0;
1139 /* Decide on the inlining. We do so in the topological order to avoid
1140 expenses on updating data structures. */
1142 static unsigned int
1143 cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
1145 struct cgraph_edge *e;
1146 bool inlined = false;
1147 const char *failed_reason;
1148 unsigned int todo = 0;
1150 #ifdef ENABLE_CHECKING
1151 verify_cgraph_node (node);
1152 #endif
1154 /* First of all look for always inline functions. */
1155 for (e = node->callees; e; e = e->next_callee)
1156 if (e->callee->local.disregard_inline_limits
1157 && e->inline_failed
1158 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1159 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1160 && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1161 /* ??? It is possible that renaming variable removed the function body
1162 in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
1163 && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1165 if (dump_file && early)
1167 fprintf (dump_file, " Early inlining %s",
1168 cgraph_node_name (e->callee));
1169 fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1171 cgraph_mark_inline (e);
1172 /* In order to fully inline alway_inline functions at -O0, we need to
1173 recurse here, since the inlined functions might not be processed by
1174 incremental inlining at all yet. */
1176 if (!flag_unit_at_a_time)
1177 cgraph_decide_inlining_incrementally (e->callee, early);
1179 inlined = true;
1182 /* Now do the automatic inlining. */
1183 if (!flag_really_no_inline)
1184 for (e = node->callees; e; e = e->next_callee)
1185 if (e->callee->local.inlinable
1186 && e->inline_failed
1187 && !e->callee->local.disregard_inline_limits
1188 && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1189 && (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1190 == gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1191 && (!early
1192 || (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
1193 <= e->caller->global.insns))
1194 && cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
1195 false)
1196 && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1198 if (cgraph_default_inline_p (e->callee, &failed_reason))
1200 if (dump_file && early)
1202 fprintf (dump_file, " Early inlining %s",
1203 cgraph_node_name (e->callee));
1204 fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1206 cgraph_mark_inline (e);
1207 inlined = true;
1209 else if (!early)
1210 e->inline_failed = failed_reason;
1212 if (early && inlined && !node->global.inlined_to)
1214 timevar_push (TV_INTEGRATION);
1215 todo = optimize_inline_calls (current_function_decl);
1216 timevar_pop (TV_INTEGRATION);
1218 return todo;
1221 /* When inlining shall be performed. */
1222 static bool
1223 cgraph_gate_inlining (void)
1225 return flag_inline_trees;
1228 struct tree_opt_pass pass_ipa_inline =
1230 "inline", /* name */
1231 cgraph_gate_inlining, /* gate */
1232 cgraph_decide_inlining, /* execute */
1233 NULL, /* sub */
1234 NULL, /* next */
1235 0, /* static_pass_number */
1236 TV_INLINE_HEURISTICS, /* tv_id */
1237 0, /* properties_required */
1238 PROP_cfg, /* properties_provided */
1239 0, /* properties_destroyed */
1240 TODO_remove_functions, /* todo_flags_finish */
1241 TODO_dump_cgraph | TODO_dump_func
1242 | TODO_remove_functions, /* todo_flags_finish */
1243 0 /* letter */
1246 /* Because inlining might remove no-longer reachable nodes, we need to
1247 keep the array visible to garbage collector to avoid reading collected
1248 out nodes. */
1249 static int nnodes;
1250 static GTY ((length ("nnodes"))) struct cgraph_node **order;
1252 /* Do inlining of small functions. Doing so early helps profiling and other
1253 passes to be somewhat more effective and avoids some code duplication in
1254 later real inlining pass for testcases with very many function calls. */
1255 static unsigned int
1256 cgraph_early_inlining (void)
1258 struct cgraph_node *node = cgraph_node (current_function_decl);
1260 if (sorrycount || errorcount)
1261 return 0;
1262 return cgraph_decide_inlining_incrementally (node, flag_unit_at_a_time);
1265 /* When inlining shall be performed. */
1266 static bool
1267 cgraph_gate_early_inlining (void)
1269 return flag_inline_trees && flag_early_inlining;
1272 struct tree_opt_pass pass_early_inline =
1274 "einline", /* name */
1275 cgraph_gate_early_inlining, /* gate */
1276 cgraph_early_inlining, /* execute */
1277 NULL, /* sub */
1278 NULL, /* next */
1279 0, /* static_pass_number */
1280 TV_INLINE_HEURISTICS, /* tv_id */
1281 0, /* properties_required */
1282 PROP_cfg, /* properties_provided */
1283 0, /* properties_destroyed */
1284 0, /* todo_flags_start */
1285 TODO_dump_func, /* todo_flags_finish */
1286 0 /* letter */
1289 /* When inlining shall be performed. */
1290 static bool
1291 cgraph_gate_ipa_early_inlining (void)
1293 return (flag_inline_trees && flag_early_inlining
1294 && (flag_branch_probabilities || flag_test_coverage
1295 || profile_arc_flag));
1298 /* IPA pass wrapper for early inlining pass. We need to run early inlining
1299 before tree profiling so we have stand alone IPA pass for doing so. */
1300 struct tree_opt_pass pass_ipa_early_inline =
1302 "einline_ipa", /* name */
1303 cgraph_gate_ipa_early_inlining, /* gate */
1304 NULL, /* execute */
1305 NULL, /* sub */
1306 NULL, /* next */
1307 0, /* static_pass_number */
1308 TV_INLINE_HEURISTICS, /* tv_id */
1309 0, /* properties_required */
1310 PROP_cfg, /* properties_provided */
1311 0, /* properties_destroyed */
1312 0, /* todo_flags_start */
1313 TODO_dump_cgraph, /* todo_flags_finish */
1314 0 /* letter */
1317 /* Compute parameters of functions used by inliner. */
1318 static unsigned int
1319 compute_inline_parameters (void)
1321 struct cgraph_node *node = cgraph_node (current_function_decl);
1323 gcc_assert (!node->global.inlined_to);
1324 node->local.estimated_self_stack_size = estimated_stack_frame_size ();
1325 node->global.estimated_stack_size = node->local.estimated_self_stack_size;
1326 node->global.stack_frame_offset = 0;
1327 node->local.inlinable = tree_inlinable_function_p (current_function_decl);
1328 node->local.self_insns = estimate_num_insns (current_function_decl);
1329 if (node->local.inlinable)
1330 node->local.disregard_inline_limits
1331 = lang_hooks.tree_inlining.disregard_inline_limits (current_function_decl);
1332 if (flag_really_no_inline && !node->local.disregard_inline_limits)
1333 node->local.inlinable = 0;
1334 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
1335 node->global.insns = node->local.self_insns;
1336 return 0;
1339 /* When inlining shall be performed. */
1340 static bool
1341 gate_inline_passes (void)
1343 return flag_inline_trees;
1346 struct tree_opt_pass pass_inline_parameters =
1348 NULL, /* name */
1349 gate_inline_passes, /* gate */
1350 compute_inline_parameters, /* execute */
1351 NULL, /* sub */
1352 NULL, /* next */
1353 0, /* static_pass_number */
1354 TV_INLINE_HEURISTICS, /* tv_id */
1355 0, /* properties_required */
1356 PROP_cfg, /* properties_provided */
1357 0, /* properties_destroyed */
1358 0, /* todo_flags_start */
1359 0, /* todo_flags_finish */
1360 0 /* letter */
1363 /* Apply inline plan to the function. */
1364 static unsigned int
1365 apply_inline (void)
1367 unsigned int todo = 0;
1368 struct cgraph_edge *e;
1369 struct cgraph_node *node = cgraph_node (current_function_decl);
1371 /* Even when not optimizing, ensure that always_inline functions get inlined.
1373 if (!optimize)
1374 cgraph_decide_inlining_incrementally (node, false);
1376 /* We might need the body of this function so that we can expand
1377 it inline somewhere else. */
1378 if (cgraph_preserve_function_body_p (current_function_decl))
1379 save_inline_function_body (node);
1381 for (e = node->callees; e; e = e->next_callee)
1382 if (!e->inline_failed || warn_inline)
1383 break;
1384 if (e)
1386 timevar_push (TV_INTEGRATION);
1387 todo = optimize_inline_calls (current_function_decl);
1388 timevar_pop (TV_INTEGRATION);
1390 /* In non-unit-at-a-time we must mark all referenced functions as needed. */
1391 if (!flag_unit_at_a_time)
1393 struct cgraph_edge *e;
1394 for (e = node->callees; e; e = e->next_callee)
1395 if (e->callee->analyzed)
1396 cgraph_mark_needed_node (e->callee);
1398 return todo | execute_fixup_cfg ();
1401 struct tree_opt_pass pass_apply_inline =
1403 "apply_inline", /* name */
1404 NULL, /* gate */
1405 apply_inline, /* execute */
1406 NULL, /* sub */
1407 NULL, /* next */
1408 0, /* static_pass_number */
1409 TV_INLINE_HEURISTICS, /* tv_id */
1410 0, /* properties_required */
1411 PROP_cfg, /* properties_provided */
1412 0, /* properties_destroyed */
1413 0, /* todo_flags_start */
1414 TODO_dump_func | TODO_verify_flow
1415 | TODO_verify_stmts, /* todo_flags_finish */
1416 0 /* letter */
1419 #include "gt-ipa-inline.h"