* ipa-inline-analysis.c (estimate_function_body_sizes): Recompute
[official-gcc.git] / gcc / ipa-inline-analysis.c
blobd44191ade00d0f573406f17ec7b3305b58a522d7
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis used by the inliner and other passes limiting code size growth.
23 We estimate for each function
24 - function body size
25 - average function execution time
26 - inlining size benefit (that is how much of function body size
27 and its call sequence is expected to disappear by inlining)
28 - inlining time benefit
29 - function frame size
30 For each call
31 - call statement size and time
33 inline_summary data structures store above information locally (i.e.
34 parameters of the function itself) and globally (i.e. parameters of
35 the function created by applying all the inline decisions already
36 present in the callgraph).
38 We provide access to the inline_summary data structure and
39 basic logic updating the parameters when inlining is performed.
41 The summaries are context sensitive. Context means
42 1) partial assignment of known constant values of operands
43 2) whether function is inlined into the call or not.
44 It is easy to add more variants. To represent function size and time
45 that depends on context (i.e. it is known to be optimized away when
46 context is known either by inlining or from IP-CP and cloning),
47 we use predicates. Predicates are logical formulas in
48 conjunctive-disjunctive form consisting of clauses. Clauses are bitmaps
49 specifying what conditions must be true. Conditions are simple test
50 of the form described above.
52 In order to make predicate (possibly) true, all of its clauses must
53 be (possibly) true. To make clause (possibly) true, one of conditions
54 it mentions must be (possibly) true. There are fixed bounds on
55 number of clauses and conditions and all the manipulation functions
56 are conservative in positive direction. I.e. we may lose precision
57 by thinking that predicate may be true even when it is not.
59 estimate_edge_size and estimate_edge_growth can be used to query
60 function size/time in the given context. inline_merge_summary merges
61 properties of caller and callee after inlining.
63 Finally pass_inline_parameters is exported. This is used to drive
64 computation of function parameters used by the early inliner. IPA
65 inlined performs analysis via its analyze_function method. */
67 #include "config.h"
68 #include "system.h"
69 #include "coretypes.h"
70 #include "backend.h"
71 #include "tree.h"
72 #include "gimple.h"
73 #include "alloc-pool.h"
74 #include "tree-pass.h"
75 #include "ssa.h"
76 #include "tree-streamer.h"
77 #include "cgraph.h"
78 #include "diagnostic.h"
79 #include "fold-const.h"
80 #include "print-tree.h"
81 #include "tree-inline.h"
82 #include "gimple-pretty-print.h"
83 #include "params.h"
84 #include "cfganal.h"
85 #include "gimple-iterator.h"
86 #include "tree-cfg.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
89 #include "symbol-summary.h"
90 #include "ipa-prop.h"
91 #include "ipa-inline.h"
92 #include "cfgloop.h"
93 #include "tree-scalar-evolution.h"
94 #include "ipa-utils.h"
95 #include "cilk.h"
96 #include "cfgexpand.h"
97 #include "gimplify.h"
99 /* Number of bits in integer, but we really want to be stable across different
100 hosts. */
101 #define NUM_CONDITIONS 32
103 enum predicate_conditions
105 predicate_false_condition = 0,
106 predicate_not_inlined_condition = 1,
107 predicate_first_dynamic_condition = 2
110 /* Special condition code we use to represent test that operand is compile time
111 constant. */
112 #define IS_NOT_CONSTANT ERROR_MARK
113 /* Special condition code we use to represent test that operand is not changed
114 across invocation of the function. When operand IS_NOT_CONSTANT it is always
115 CHANGED, however i.e. loop invariants can be NOT_CHANGED given percentage
116 of executions even when they are not compile time constants. */
117 #define CHANGED IDENTIFIER_NODE
119 /* Holders of ipa cgraph hooks: */
120 static struct cgraph_2edge_hook_list *edge_duplication_hook_holder;
121 static struct cgraph_edge_hook_list *edge_removal_hook_holder;
122 static void inline_edge_removal_hook (struct cgraph_edge *, void *);
123 static void inline_edge_duplication_hook (struct cgraph_edge *,
124 struct cgraph_edge *, void *);
126 /* VECtor holding inline summaries.
127 In GGC memory because conditions might point to constant trees. */
128 function_summary <inline_summary *> *inline_summaries;
129 vec<inline_edge_summary_t> inline_edge_summary_vec;
131 /* Cached node/edge growths. */
132 vec<edge_growth_cache_entry> edge_growth_cache;
134 /* Edge predicates goes here. */
135 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
137 /* Return true predicate (tautology).
138 We represent it by empty list of clauses. */
140 static inline struct predicate
141 true_predicate (void)
143 struct predicate p;
144 p.clause[0] = 0;
145 return p;
149 /* Return predicate testing single condition number COND. */
151 static inline struct predicate
152 single_cond_predicate (int cond)
154 struct predicate p;
155 p.clause[0] = 1 << cond;
156 p.clause[1] = 0;
157 return p;
161 /* Return false predicate. First clause require false condition. */
163 static inline struct predicate
164 false_predicate (void)
166 return single_cond_predicate (predicate_false_condition);
170 /* Return true if P is (true). */
172 static inline bool
173 true_predicate_p (struct predicate *p)
175 return !p->clause[0];
179 /* Return true if P is (false). */
181 static inline bool
182 false_predicate_p (struct predicate *p)
184 if (p->clause[0] == (1 << predicate_false_condition))
186 gcc_checking_assert (!p->clause[1]
187 && p->clause[0] == 1 << predicate_false_condition);
188 return true;
190 return false;
194 /* Return predicate that is set true when function is not inlined. */
196 static inline struct predicate
197 not_inlined_predicate (void)
199 return single_cond_predicate (predicate_not_inlined_condition);
202 /* Simple description of whether a memory load or a condition refers to a load
203 from an aggregate and if so, how and where from in the aggregate.
204 Individual fields have the same meaning like fields with the same name in
205 struct condition. */
207 struct agg_position_info
209 HOST_WIDE_INT offset;
210 bool agg_contents;
211 bool by_ref;
214 /* Add condition to condition list SUMMARY. OPERAND_NUM, SIZE, CODE and VAL
215 correspond to fields of condition structure. AGGPOS describes whether the
216 used operand is loaded from an aggregate and where in the aggregate it is.
217 It can be NULL, which means this not a load from an aggregate. */
219 static struct predicate
220 add_condition (struct inline_summary *summary, int operand_num,
221 HOST_WIDE_INT size, struct agg_position_info *aggpos,
222 enum tree_code code, tree val)
224 int i;
225 struct condition *c;
226 struct condition new_cond;
227 HOST_WIDE_INT offset;
228 bool agg_contents, by_ref;
230 if (aggpos)
232 offset = aggpos->offset;
233 agg_contents = aggpos->agg_contents;
234 by_ref = aggpos->by_ref;
236 else
238 offset = 0;
239 agg_contents = false;
240 by_ref = false;
243 gcc_checking_assert (operand_num >= 0);
244 for (i = 0; vec_safe_iterate (summary->conds, i, &c); i++)
246 if (c->operand_num == operand_num
247 && c->size == size
248 && c->code == code
249 && c->val == val
250 && c->agg_contents == agg_contents
251 && (!agg_contents || (c->offset == offset && c->by_ref == by_ref)))
252 return single_cond_predicate (i + predicate_first_dynamic_condition);
254 /* Too many conditions. Give up and return constant true. */
255 if (i == NUM_CONDITIONS - predicate_first_dynamic_condition)
256 return true_predicate ();
258 new_cond.operand_num = operand_num;
259 new_cond.code = code;
260 new_cond.val = val;
261 new_cond.agg_contents = agg_contents;
262 new_cond.by_ref = by_ref;
263 new_cond.offset = offset;
264 new_cond.size = size;
265 vec_safe_push (summary->conds, new_cond);
266 return single_cond_predicate (i + predicate_first_dynamic_condition);
270 /* Add clause CLAUSE into the predicate P. */
272 static inline void
273 add_clause (conditions conditions, struct predicate *p, clause_t clause)
275 int i;
276 int i2;
277 int insert_here = -1;
278 int c1, c2;
280 /* True clause. */
281 if (!clause)
282 return;
284 /* False clause makes the whole predicate false. Kill the other variants. */
285 if (clause == (1 << predicate_false_condition))
287 p->clause[0] = (1 << predicate_false_condition);
288 p->clause[1] = 0;
289 return;
291 if (false_predicate_p (p))
292 return;
294 /* No one should be silly enough to add false into nontrivial clauses. */
295 gcc_checking_assert (!(clause & (1 << predicate_false_condition)));
297 /* Look where to insert the clause. At the same time prune out
298 clauses of P that are implied by the new clause and thus
299 redundant. */
300 for (i = 0, i2 = 0; i <= MAX_CLAUSES; i++)
302 p->clause[i2] = p->clause[i];
304 if (!p->clause[i])
305 break;
307 /* If p->clause[i] implies clause, there is nothing to add. */
308 if ((p->clause[i] & clause) == p->clause[i])
310 /* We had nothing to add, none of clauses should've become
311 redundant. */
312 gcc_checking_assert (i == i2);
313 return;
316 if (p->clause[i] < clause && insert_here < 0)
317 insert_here = i2;
319 /* If clause implies p->clause[i], then p->clause[i] becomes redundant.
320 Otherwise the p->clause[i] has to stay. */
321 if ((p->clause[i] & clause) != clause)
322 i2++;
325 /* Look for clauses that are obviously true. I.e.
326 op0 == 5 || op0 != 5. */
327 for (c1 = predicate_first_dynamic_condition; c1 < NUM_CONDITIONS; c1++)
329 condition *cc1;
330 if (!(clause & (1 << c1)))
331 continue;
332 cc1 = &(*conditions)[c1 - predicate_first_dynamic_condition];
333 /* We have no way to represent !CHANGED and !IS_NOT_CONSTANT
334 and thus there is no point for looking for them. */
335 if (cc1->code == CHANGED || cc1->code == IS_NOT_CONSTANT)
336 continue;
337 for (c2 = c1 + 1; c2 < NUM_CONDITIONS; c2++)
338 if (clause & (1 << c2))
340 condition *cc1 =
341 &(*conditions)[c1 - predicate_first_dynamic_condition];
342 condition *cc2 =
343 &(*conditions)[c2 - predicate_first_dynamic_condition];
344 if (cc1->operand_num == cc2->operand_num
345 && cc1->val == cc2->val
346 && cc2->code != IS_NOT_CONSTANT
347 && cc2->code != CHANGED
348 && cc1->code == invert_tree_comparison (cc2->code,
349 HONOR_NANS (cc1->val)))
350 return;
355 /* We run out of variants. Be conservative in positive direction. */
356 if (i2 == MAX_CLAUSES)
357 return;
358 /* Keep clauses in decreasing order. This makes equivalence testing easy. */
359 p->clause[i2 + 1] = 0;
360 if (insert_here >= 0)
361 for (; i2 > insert_here; i2--)
362 p->clause[i2] = p->clause[i2 - 1];
363 else
364 insert_here = i2;
365 p->clause[insert_here] = clause;
369 /* Return P & P2. */
371 static struct predicate
372 and_predicates (conditions conditions,
373 struct predicate *p, struct predicate *p2)
375 struct predicate out = *p;
376 int i;
378 /* Avoid busy work. */
379 if (false_predicate_p (p2) || true_predicate_p (p))
380 return *p2;
381 if (false_predicate_p (p) || true_predicate_p (p2))
382 return *p;
384 /* See how far predicates match. */
385 for (i = 0; p->clause[i] && p->clause[i] == p2->clause[i]; i++)
387 gcc_checking_assert (i < MAX_CLAUSES);
390 /* Combine the predicates rest. */
391 for (; p2->clause[i]; i++)
393 gcc_checking_assert (i < MAX_CLAUSES);
394 add_clause (conditions, &out, p2->clause[i]);
396 return out;
400 /* Return true if predicates are obviously equal. */
402 static inline bool
403 predicates_equal_p (struct predicate *p, struct predicate *p2)
405 int i;
406 for (i = 0; p->clause[i]; i++)
408 gcc_checking_assert (i < MAX_CLAUSES);
409 gcc_checking_assert (p->clause[i] > p->clause[i + 1]);
410 gcc_checking_assert (!p2->clause[i]
411 || p2->clause[i] > p2->clause[i + 1]);
412 if (p->clause[i] != p2->clause[i])
413 return false;
415 return !p2->clause[i];
419 /* Return P | P2. */
421 static struct predicate
422 or_predicates (conditions conditions,
423 struct predicate *p, struct predicate *p2)
425 struct predicate out = true_predicate ();
426 int i, j;
428 /* Avoid busy work. */
429 if (false_predicate_p (p2) || true_predicate_p (p))
430 return *p;
431 if (false_predicate_p (p) || true_predicate_p (p2))
432 return *p2;
433 if (predicates_equal_p (p, p2))
434 return *p;
436 /* OK, combine the predicates. */
437 for (i = 0; p->clause[i]; i++)
438 for (j = 0; p2->clause[j]; j++)
440 gcc_checking_assert (i < MAX_CLAUSES && j < MAX_CLAUSES);
441 add_clause (conditions, &out, p->clause[i] | p2->clause[j]);
443 return out;
447 /* Having partial truth assignment in POSSIBLE_TRUTHS, return false
448 if predicate P is known to be false. */
450 static bool
451 evaluate_predicate (struct predicate *p, clause_t possible_truths)
453 int i;
455 /* True remains true. */
456 if (true_predicate_p (p))
457 return true;
459 gcc_assert (!(possible_truths & (1 << predicate_false_condition)));
461 /* See if we can find clause we can disprove. */
462 for (i = 0; p->clause[i]; i++)
464 gcc_checking_assert (i < MAX_CLAUSES);
465 if (!(p->clause[i] & possible_truths))
466 return false;
468 return true;
471 /* Return the probability in range 0...REG_BR_PROB_BASE that the predicated
472 instruction will be recomputed per invocation of the inlined call. */
474 static int
475 predicate_probability (conditions conds,
476 struct predicate *p, clause_t possible_truths,
477 vec<inline_param_summary> inline_param_summary)
479 int i;
480 int combined_prob = REG_BR_PROB_BASE;
482 /* True remains true. */
483 if (true_predicate_p (p))
484 return REG_BR_PROB_BASE;
486 if (false_predicate_p (p))
487 return 0;
489 gcc_assert (!(possible_truths & (1 << predicate_false_condition)));
491 /* See if we can find clause we can disprove. */
492 for (i = 0; p->clause[i]; i++)
494 gcc_checking_assert (i < MAX_CLAUSES);
495 if (!(p->clause[i] & possible_truths))
496 return 0;
497 else
499 int this_prob = 0;
500 int i2;
501 if (!inline_param_summary.exists ())
502 return REG_BR_PROB_BASE;
503 for (i2 = 0; i2 < NUM_CONDITIONS; i2++)
504 if ((p->clause[i] & possible_truths) & (1 << i2))
506 if (i2 >= predicate_first_dynamic_condition)
508 condition *c =
509 &(*conds)[i2 - predicate_first_dynamic_condition];
510 if (c->code == CHANGED
511 && (c->operand_num <
512 (int) inline_param_summary.length ()))
514 int iprob =
515 inline_param_summary[c->operand_num].change_prob;
516 this_prob = MAX (this_prob, iprob);
518 else
519 this_prob = REG_BR_PROB_BASE;
521 else
522 this_prob = REG_BR_PROB_BASE;
524 combined_prob = MIN (this_prob, combined_prob);
525 if (!combined_prob)
526 return 0;
529 return combined_prob;
533 /* Dump conditional COND. */
535 static void
536 dump_condition (FILE *f, conditions conditions, int cond)
538 condition *c;
539 if (cond == predicate_false_condition)
540 fprintf (f, "false");
541 else if (cond == predicate_not_inlined_condition)
542 fprintf (f, "not inlined");
543 else
545 c = &(*conditions)[cond - predicate_first_dynamic_condition];
546 fprintf (f, "op%i", c->operand_num);
547 if (c->agg_contents)
548 fprintf (f, "[%soffset: " HOST_WIDE_INT_PRINT_DEC "]",
549 c->by_ref ? "ref " : "", c->offset);
550 if (c->code == IS_NOT_CONSTANT)
552 fprintf (f, " not constant");
553 return;
555 if (c->code == CHANGED)
557 fprintf (f, " changed");
558 return;
560 fprintf (f, " %s ", op_symbol_code (c->code));
561 print_generic_expr (f, c->val, 1);
566 /* Dump clause CLAUSE. */
568 static void
569 dump_clause (FILE *f, conditions conds, clause_t clause)
571 int i;
572 bool found = false;
573 fprintf (f, "(");
574 if (!clause)
575 fprintf (f, "true");
576 for (i = 0; i < NUM_CONDITIONS; i++)
577 if (clause & (1 << i))
579 if (found)
580 fprintf (f, " || ");
581 found = true;
582 dump_condition (f, conds, i);
584 fprintf (f, ")");
588 /* Dump PREDICATE to F. CONDS a vector of conditions used when evauating
589 predicats. When NL is true new line is output at the end of dump. */
591 static void
592 dump_predicate (FILE *f, conditions conds, struct predicate *pred,
593 bool nl = true)
595 int i;
596 if (true_predicate_p (pred))
597 dump_clause (f, conds, 0);
598 else
599 for (i = 0; pred->clause[i]; i++)
601 if (i)
602 fprintf (f, " && ");
603 dump_clause (f, conds, pred->clause[i]);
605 if (nl)
606 fprintf (f, "\n");
610 /* Dump inline hints. */
611 void
612 dump_inline_hints (FILE *f, inline_hints hints)
614 if (!hints)
615 return;
616 fprintf (f, "inline hints:");
617 if (hints & INLINE_HINT_indirect_call)
619 hints &= ~INLINE_HINT_indirect_call;
620 fprintf (f, " indirect_call");
622 if (hints & INLINE_HINT_loop_iterations)
624 hints &= ~INLINE_HINT_loop_iterations;
625 fprintf (f, " loop_iterations");
627 if (hints & INLINE_HINT_loop_stride)
629 hints &= ~INLINE_HINT_loop_stride;
630 fprintf (f, " loop_stride");
632 if (hints & INLINE_HINT_same_scc)
634 hints &= ~INLINE_HINT_same_scc;
635 fprintf (f, " same_scc");
637 if (hints & INLINE_HINT_in_scc)
639 hints &= ~INLINE_HINT_in_scc;
640 fprintf (f, " in_scc");
642 if (hints & INLINE_HINT_cross_module)
644 hints &= ~INLINE_HINT_cross_module;
645 fprintf (f, " cross_module");
647 if (hints & INLINE_HINT_declared_inline)
649 hints &= ~INLINE_HINT_declared_inline;
650 fprintf (f, " declared_inline");
652 if (hints & INLINE_HINT_array_index)
654 hints &= ~INLINE_HINT_array_index;
655 fprintf (f, " array_index");
657 if (hints & INLINE_HINT_known_hot)
659 hints &= ~INLINE_HINT_known_hot;
660 fprintf (f, " known_hot");
662 gcc_assert (!hints);
666 /* Record SIZE and TIME to SUMMARY.
667 The accounted code will be executed when EXEC_PRED is true.
668 When NONCONST_PRED is false the code will evaulate to constant and
669 will get optimized out in specialized clones of the function. */
671 static void
672 account_size_time (struct inline_summary *summary, int size, sreal time,
673 struct predicate *exec_pred,
674 struct predicate *nonconst_pred_ptr)
676 size_time_entry *e;
677 bool found = false;
678 int i;
679 struct predicate nonconst_pred;
681 if (false_predicate_p (exec_pred))
682 return;
684 nonconst_pred = and_predicates (summary->conds, nonconst_pred_ptr, exec_pred);
686 if (false_predicate_p (&nonconst_pred))
687 return;
689 /* We need to create initial empty unconitional clause, but otherwie
690 we don't need to account empty times and sizes. */
691 if (!size && time == 0 && summary->entry)
692 return;
694 gcc_assert (time >= 0);
696 for (i = 0; vec_safe_iterate (summary->entry, i, &e); i++)
697 if (predicates_equal_p (&e->exec_predicate, exec_pred)
698 && predicates_equal_p (&e->nonconst_predicate, &nonconst_pred))
700 found = true;
701 break;
703 if (i == 256)
705 i = 0;
706 found = true;
707 e = &(*summary->entry)[0];
708 gcc_assert (!e->exec_predicate.clause[0]);
709 if (dump_file && (dump_flags & TDF_DETAILS))
710 fprintf (dump_file,
711 "\t\tReached limit on number of entries, "
712 "ignoring the predicate.");
714 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
716 fprintf (dump_file,
717 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
718 ((double) size) / INLINE_SIZE_SCALE,
719 (time.to_double ()), found ? "" : "new ");
720 dump_predicate (dump_file, summary->conds, exec_pred, 0);
721 if (!predicates_equal_p (exec_pred, &nonconst_pred))
723 fprintf (dump_file, " nonconst:");
724 dump_predicate (dump_file, summary->conds, &nonconst_pred);
726 else
727 fprintf (dump_file, "\n");
729 if (!found)
731 struct size_time_entry new_entry;
732 new_entry.size = size;
733 new_entry.time = time;
734 new_entry.exec_predicate = *exec_pred;
735 new_entry.nonconst_predicate = nonconst_pred;
736 vec_safe_push (summary->entry, new_entry);
738 else
740 e->size += size;
741 e->time += time;
745 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
747 static struct cgraph_edge *
748 redirect_to_unreachable (struct cgraph_edge *e)
750 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
751 struct cgraph_node *target = cgraph_node::get_create
752 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
754 if (e->speculative)
755 e = e->resolve_speculation (target->decl);
756 else if (!e->callee)
757 e->make_direct (target);
758 else
759 e->redirect_callee (target);
760 struct inline_edge_summary *es = inline_edge_summary (e);
761 e->inline_failed = CIF_UNREACHABLE;
762 e->frequency = 0;
763 e->count = 0;
764 es->call_stmt_size = 0;
765 es->call_stmt_time = 0;
766 if (callee)
767 callee->remove_symbol_and_inline_clones ();
768 return e;
771 /* Set predicate for edge E. */
773 static void
774 edge_set_predicate (struct cgraph_edge *e, struct predicate *predicate)
776 /* If the edge is determined to be never executed, redirect it
777 to BUILTIN_UNREACHABLE to save inliner from inlining into it. */
778 if (predicate && false_predicate_p (predicate)
779 /* When handling speculative edges, we need to do the redirection
780 just once. Do it always on the direct edge, so we do not
781 attempt to resolve speculation while duplicating the edge. */
782 && (!e->speculative || e->callee))
783 e = redirect_to_unreachable (e);
785 struct inline_edge_summary *es = inline_edge_summary (e);
786 if (predicate && !true_predicate_p (predicate))
788 if (!es->predicate)
789 es->predicate = edge_predicate_pool.allocate ();
790 *es->predicate = *predicate;
792 else
794 if (es->predicate)
795 edge_predicate_pool.remove (es->predicate);
796 es->predicate = NULL;
800 /* Set predicate for hint *P. */
802 static void
803 set_hint_predicate (struct predicate **p, struct predicate new_predicate)
805 if (false_predicate_p (&new_predicate) || true_predicate_p (&new_predicate))
807 if (*p)
808 edge_predicate_pool.remove (*p);
809 *p = NULL;
811 else
813 if (!*p)
814 *p = edge_predicate_pool.allocate ();
815 **p = new_predicate;
820 /* Compute what conditions may or may not hold given invormation about
821 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
822 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
823 copy when called in a given context. It is a bitmask of conditions. Bit
824 0 means that condition is known to be false, while bit 1 means that condition
825 may or may not be true. These differs - for example NOT_INLINED condition
826 is always false in the second and also builtin_constant_p tests can not use
827 the fact that parameter is indeed a constant.
829 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
830 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
831 Return clause of possible truths. When INLINE_P is true, assume that we are
832 inlining.
834 ERROR_MARK means compile time invariant. */
836 static void
837 evaluate_conditions_for_known_args (struct cgraph_node *node,
838 bool inline_p,
839 vec<tree> known_vals,
840 vec<ipa_agg_jump_function_p>
841 known_aggs,
842 clause_t *ret_clause,
843 clause_t *ret_nonspec_clause)
845 clause_t clause = inline_p ? 0 : 1 << predicate_not_inlined_condition;
846 clause_t nonspec_clause = 1 << predicate_not_inlined_condition;
847 struct inline_summary *info = inline_summaries->get (node);
848 int i;
849 struct condition *c;
851 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
853 tree val;
854 tree res;
856 /* We allow call stmt to have fewer arguments than the callee function
857 (especially for K&R style programs). So bound check here (we assume
858 known_aggs vector, if non-NULL, has the same length as
859 known_vals). */
860 gcc_checking_assert (!known_aggs.exists ()
861 || (known_vals.length () == known_aggs.length ()));
862 if (c->operand_num >= (int) known_vals.length ())
864 clause |= 1 << (i + predicate_first_dynamic_condition);
865 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
866 continue;
869 if (c->agg_contents)
871 struct ipa_agg_jump_function *agg;
873 if (c->code == CHANGED
874 && !c->by_ref
875 && (known_vals[c->operand_num] == error_mark_node))
876 continue;
878 if (known_aggs.exists ())
880 agg = known_aggs[c->operand_num];
881 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
882 c->offset, c->by_ref);
884 else
885 val = NULL_TREE;
887 else
889 val = known_vals[c->operand_num];
890 if (val == error_mark_node && c->code != CHANGED)
891 val = NULL_TREE;
894 if (!val)
896 clause |= 1 << (i + predicate_first_dynamic_condition);
897 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
898 continue;
900 if (c->code == CHANGED)
902 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
903 continue;
906 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
908 clause |= 1 << (i + predicate_first_dynamic_condition);
909 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
910 continue;
912 if (c->code == IS_NOT_CONSTANT)
914 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
915 continue;
918 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
919 res = val
920 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
921 : NULL;
923 if (res && integer_zerop (res))
924 continue;
926 clause |= 1 << (i + predicate_first_dynamic_condition);
927 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
929 *ret_clause = clause;
930 if (ret_nonspec_clause)
931 *ret_nonspec_clause = nonspec_clause;
935 /* Work out what conditions might be true at invocation of E. */
937 static void
938 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
939 clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
940 vec<tree> *known_vals_ptr,
941 vec<ipa_polymorphic_call_context>
942 *known_contexts_ptr,
943 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
945 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
946 struct inline_summary *info = inline_summaries->get (callee);
947 vec<tree> known_vals = vNULL;
948 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
950 if (clause_ptr)
951 *clause_ptr = inline_p ? 0 : 1 << predicate_not_inlined_condition;
952 if (known_vals_ptr)
953 known_vals_ptr->create (0);
954 if (known_contexts_ptr)
955 known_contexts_ptr->create (0);
957 if (ipa_node_params_sum
958 && !e->call_stmt_cannot_inline_p
959 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
961 struct ipa_node_params *parms_info;
962 struct ipa_edge_args *args = IPA_EDGE_REF (e);
963 struct inline_edge_summary *es = inline_edge_summary (e);
964 int i, count = ipa_get_cs_argument_count (args);
966 if (e->caller->global.inlined_to)
967 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
968 else
969 parms_info = IPA_NODE_REF (e->caller);
971 if (count && (info->conds || known_vals_ptr))
972 known_vals.safe_grow_cleared (count);
973 if (count && (info->conds || known_aggs_ptr))
974 known_aggs.safe_grow_cleared (count);
975 if (count && known_contexts_ptr)
976 known_contexts_ptr->safe_grow_cleared (count);
978 for (i = 0; i < count; i++)
980 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
981 tree cst = ipa_value_from_jfunc (parms_info, jf);
983 if (!cst && e->call_stmt
984 && i < (int)gimple_call_num_args (e->call_stmt))
986 cst = gimple_call_arg (e->call_stmt, i);
987 if (!is_gimple_min_invariant (cst))
988 cst = NULL;
990 if (cst)
992 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
993 if (known_vals.exists ())
994 known_vals[i] = cst;
996 else if (inline_p && !es->param[i].change_prob)
997 known_vals[i] = error_mark_node;
999 if (known_contexts_ptr)
1000 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
1001 i, jf);
1002 /* TODO: When IPA-CP starts propagating and merging aggregate jump
1003 functions, use its knowledge of the caller too, just like the
1004 scalar case above. */
1005 known_aggs[i] = &jf->agg;
1008 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
1009 && ((clause_ptr && info->conds) || known_vals_ptr))
1011 int i, count = (int)gimple_call_num_args (e->call_stmt);
1013 if (count && (info->conds || known_vals_ptr))
1014 known_vals.safe_grow_cleared (count);
1015 for (i = 0; i < count; i++)
1017 tree cst = gimple_call_arg (e->call_stmt, i);
1018 if (!is_gimple_min_invariant (cst))
1019 cst = NULL;
1020 if (cst)
1021 known_vals[i] = cst;
1025 evaluate_conditions_for_known_args (callee, inline_p,
1026 known_vals, known_aggs, clause_ptr,
1027 nonspec_clause_ptr);
1029 if (known_vals_ptr)
1030 *known_vals_ptr = known_vals;
1031 else
1032 known_vals.release ();
1034 if (known_aggs_ptr)
1035 *known_aggs_ptr = known_aggs;
1036 else
1037 known_aggs.release ();
1041 /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */
1043 static void
1044 inline_summary_alloc (void)
1046 if (!edge_removal_hook_holder)
1047 edge_removal_hook_holder =
1048 symtab->add_edge_removal_hook (&inline_edge_removal_hook, NULL);
1049 if (!edge_duplication_hook_holder)
1050 edge_duplication_hook_holder =
1051 symtab->add_edge_duplication_hook (&inline_edge_duplication_hook, NULL);
1053 if (!inline_summaries)
1054 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
1056 if (inline_edge_summary_vec.length () <= (unsigned) symtab->edges_max_uid)
1057 inline_edge_summary_vec.safe_grow_cleared (symtab->edges_max_uid + 1);
1060 /* We are called multiple time for given function; clear
1061 data from previous run so they are not cumulated. */
1063 static void
1064 reset_inline_edge_summary (struct cgraph_edge *e)
1066 if (e->uid < (int) inline_edge_summary_vec.length ())
1068 struct inline_edge_summary *es = inline_edge_summary (e);
1070 es->call_stmt_size = es->call_stmt_time = 0;
1071 if (es->predicate)
1072 edge_predicate_pool.remove (es->predicate);
1073 es->predicate = NULL;
1074 es->param.release ();
1078 /* We are called multiple time for given function; clear
1079 data from previous run so they are not cumulated. */
1081 static void
1082 reset_inline_summary (struct cgraph_node *node,
1083 inline_summary *info)
1085 struct cgraph_edge *e;
1087 info->self_size = 0;
1088 info->self_time = 0;
1089 info->estimated_stack_size = 0;
1090 info->estimated_self_stack_size = 0;
1091 info->stack_frame_offset = 0;
1092 info->size = 0;
1093 info->time = 0;
1094 info->growth = 0;
1095 info->scc_no = 0;
1096 if (info->loop_iterations)
1098 edge_predicate_pool.remove (info->loop_iterations);
1099 info->loop_iterations = NULL;
1101 if (info->loop_stride)
1103 edge_predicate_pool.remove (info->loop_stride);
1104 info->loop_stride = NULL;
1106 if (info->array_index)
1108 edge_predicate_pool.remove (info->array_index);
1109 info->array_index = NULL;
1111 vec_free (info->conds);
1112 vec_free (info->entry);
1113 for (e = node->callees; e; e = e->next_callee)
1114 reset_inline_edge_summary (e);
1115 for (e = node->indirect_calls; e; e = e->next_callee)
1116 reset_inline_edge_summary (e);
1117 info->fp_expressions = false;
1120 /* Hook that is called by cgraph.c when a node is removed. */
1122 void
1123 inline_summary_t::remove (cgraph_node *node, inline_summary *info)
1125 reset_inline_summary (node, info);
1128 /* Remap predicate P of former function to be predicate of duplicated function.
1129 POSSIBLE_TRUTHS is clause of possible truths in the duplicated node,
1130 INFO is inline summary of the duplicated node. */
1132 static struct predicate
1133 remap_predicate_after_duplication (struct predicate *p,
1134 clause_t possible_truths,
1135 struct inline_summary *info)
1137 struct predicate new_predicate = true_predicate ();
1138 int j;
1139 for (j = 0; p->clause[j]; j++)
1140 if (!(possible_truths & p->clause[j]))
1142 new_predicate = false_predicate ();
1143 break;
1145 else
1146 add_clause (info->conds, &new_predicate,
1147 possible_truths & p->clause[j]);
1148 return new_predicate;
1151 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
1152 Additionally care about allocating new memory slot for updated predicate
1153 and set it to NULL when it becomes true or false (and thus uninteresting).
1156 static void
1157 remap_hint_predicate_after_duplication (struct predicate **p,
1158 clause_t possible_truths,
1159 struct inline_summary *info)
1161 struct predicate new_predicate;
1163 if (!*p)
1164 return;
1166 new_predicate = remap_predicate_after_duplication (*p,
1167 possible_truths, info);
1168 /* We do not want to free previous predicate; it is used by node origin. */
1169 *p = NULL;
1170 set_hint_predicate (p, new_predicate);
1174 /* Hook that is called by cgraph.c when a node is duplicated. */
1175 void
1176 inline_summary_t::duplicate (cgraph_node *src,
1177 cgraph_node *dst,
1178 inline_summary *,
1179 inline_summary *info)
1181 inline_summary_alloc ();
1182 memcpy (info, inline_summaries->get (src), sizeof (inline_summary));
1183 /* TODO: as an optimization, we may avoid copying conditions
1184 that are known to be false or true. */
1185 info->conds = vec_safe_copy (info->conds);
1187 /* When there are any replacements in the function body, see if we can figure
1188 out that something was optimized out. */
1189 if (ipa_node_params_sum && dst->clone.tree_map)
1191 vec<size_time_entry, va_gc> *entry = info->entry;
1192 /* Use SRC parm info since it may not be copied yet. */
1193 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
1194 vec<tree> known_vals = vNULL;
1195 int count = ipa_get_param_count (parms_info);
1196 int i, j;
1197 clause_t possible_truths;
1198 struct predicate true_pred = true_predicate ();
1199 size_time_entry *e;
1200 int optimized_out_size = 0;
1201 bool inlined_to_p = false;
1202 struct cgraph_edge *edge, *next;
1204 info->entry = 0;
1205 known_vals.safe_grow_cleared (count);
1206 for (i = 0; i < count; i++)
1208 struct ipa_replace_map *r;
1210 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
1212 if (((!r->old_tree && r->parm_num == i)
1213 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
1214 && r->replace_p && !r->ref_p)
1216 known_vals[i] = r->new_tree;
1217 break;
1221 evaluate_conditions_for_known_args (dst, false,
1222 known_vals,
1223 vNULL,
1224 &possible_truths,
1225 /* We are going to specialize,
1226 so ignore nonspec truths. */
1227 NULL);
1228 known_vals.release ();
1230 account_size_time (info, 0, 0, &true_pred, &true_pred);
1232 /* Remap size_time vectors.
1233 Simplify the predicate by prunning out alternatives that are known
1234 to be false.
1235 TODO: as on optimization, we can also eliminate conditions known
1236 to be true. */
1237 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
1239 struct predicate new_exec_pred;
1240 struct predicate new_nonconst_pred;
1241 new_exec_pred = remap_predicate_after_duplication (&e->exec_predicate,
1242 possible_truths,
1243 info);
1244 new_nonconst_pred
1245 = remap_predicate_after_duplication (&e->nonconst_predicate,
1246 possible_truths,
1247 info);
1248 if (false_predicate_p (&new_exec_pred)
1249 || false_predicate_p (&new_nonconst_pred))
1250 optimized_out_size += e->size;
1251 else
1252 account_size_time (info, e->size, e->time, &new_exec_pred,
1253 &new_nonconst_pred);
1256 /* Remap edge predicates with the same simplification as above.
1257 Also copy constantness arrays. */
1258 for (edge = dst->callees; edge; edge = next)
1260 struct predicate new_predicate;
1261 struct inline_edge_summary *es = inline_edge_summary (edge);
1262 next = edge->next_callee;
1264 if (!edge->inline_failed)
1265 inlined_to_p = true;
1266 if (!es->predicate)
1267 continue;
1268 new_predicate = remap_predicate_after_duplication (es->predicate,
1269 possible_truths,
1270 info);
1271 if (false_predicate_p (&new_predicate)
1272 && !false_predicate_p (es->predicate))
1273 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
1274 edge_set_predicate (edge, &new_predicate);
1277 /* Remap indirect edge predicates with the same simplificaiton as above.
1278 Also copy constantness arrays. */
1279 for (edge = dst->indirect_calls; edge; edge = next)
1281 struct predicate new_predicate;
1282 struct inline_edge_summary *es = inline_edge_summary (edge);
1283 next = edge->next_callee;
1285 gcc_checking_assert (edge->inline_failed);
1286 if (!es->predicate)
1287 continue;
1288 new_predicate = remap_predicate_after_duplication (es->predicate,
1289 possible_truths,
1290 info);
1291 if (false_predicate_p (&new_predicate)
1292 && !false_predicate_p (es->predicate))
1293 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
1294 edge_set_predicate (edge, &new_predicate);
1296 remap_hint_predicate_after_duplication (&info->loop_iterations,
1297 possible_truths, info);
1298 remap_hint_predicate_after_duplication (&info->loop_stride,
1299 possible_truths, info);
1300 remap_hint_predicate_after_duplication (&info->array_index,
1301 possible_truths, info);
1303 /* If inliner or someone after inliner will ever start producing
1304 non-trivial clones, we will get trouble with lack of information
1305 about updating self sizes, because size vectors already contains
1306 sizes of the calees. */
1307 gcc_assert (!inlined_to_p || !optimized_out_size);
1309 else
1311 info->entry = vec_safe_copy (info->entry);
1312 if (info->loop_iterations)
1314 predicate p = *info->loop_iterations;
1315 info->loop_iterations = NULL;
1316 set_hint_predicate (&info->loop_iterations, p);
1318 if (info->loop_stride)
1320 predicate p = *info->loop_stride;
1321 info->loop_stride = NULL;
1322 set_hint_predicate (&info->loop_stride, p);
1324 if (info->array_index)
1326 predicate p = *info->array_index;
1327 info->array_index = NULL;
1328 set_hint_predicate (&info->array_index, p);
1331 if (!dst->global.inlined_to)
1332 inline_update_overall_summary (dst);
1336 /* Hook that is called by cgraph.c when a node is duplicated. */
1338 static void
1339 inline_edge_duplication_hook (struct cgraph_edge *src,
1340 struct cgraph_edge *dst,
1341 ATTRIBUTE_UNUSED void *data)
1343 struct inline_edge_summary *info;
1344 struct inline_edge_summary *srcinfo;
1345 inline_summary_alloc ();
1346 info = inline_edge_summary (dst);
1347 srcinfo = inline_edge_summary (src);
1348 memcpy (info, srcinfo, sizeof (struct inline_edge_summary));
1349 info->predicate = NULL;
1350 edge_set_predicate (dst, srcinfo->predicate);
1351 info->param = srcinfo->param.copy ();
1352 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
1354 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
1355 - eni_size_weights.call_cost);
1356 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
1357 - eni_time_weights.call_cost);
1362 /* Keep edge cache consistent across edge removal. */
1364 static void
1365 inline_edge_removal_hook (struct cgraph_edge *edge,
1366 void *data ATTRIBUTE_UNUSED)
1368 if (edge_growth_cache.exists ())
1369 reset_edge_growth_cache (edge);
1370 reset_inline_edge_summary (edge);
1374 /* Initialize growth caches. */
1376 void
1377 initialize_growth_caches (void)
1379 if (symtab->edges_max_uid)
1380 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
1384 /* Free growth caches. */
1386 void
1387 free_growth_caches (void)
1389 edge_growth_cache.release ();
1393 /* Dump edge summaries associated to NODE and recursively to all clones.
1394 Indent by INDENT. */
1396 static void
1397 dump_inline_edge_summary (FILE *f, int indent, struct cgraph_node *node,
1398 struct inline_summary *info)
1400 struct cgraph_edge *edge;
1401 for (edge = node->callees; edge; edge = edge->next_callee)
1403 struct inline_edge_summary *es = inline_edge_summary (edge);
1404 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1405 int i;
1407 fprintf (f,
1408 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
1409 " time: %2i callee size:%2i stack:%2i",
1410 indent, "", callee->name (), callee->order,
1411 !edge->inline_failed
1412 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1413 indent, "", es->loop_depth, edge->frequency,
1414 es->call_stmt_size, es->call_stmt_time,
1415 (int) inline_summaries->get (callee)->size / INLINE_SIZE_SCALE,
1416 (int) inline_summaries->get (callee)->estimated_stack_size);
1418 if (es->predicate)
1420 fprintf (f, " predicate: ");
1421 dump_predicate (f, info->conds, es->predicate);
1423 else
1424 fprintf (f, "\n");
1425 if (es->param.exists ())
1426 for (i = 0; i < (int) es->param.length (); i++)
1428 int prob = es->param[i].change_prob;
1430 if (!prob)
1431 fprintf (f, "%*s op%i is compile time invariant\n",
1432 indent + 2, "", i);
1433 else if (prob != REG_BR_PROB_BASE)
1434 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1435 prob * 100.0 / REG_BR_PROB_BASE);
1437 if (!edge->inline_failed)
1439 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
1440 " callee size %i\n",
1441 indent + 2, "",
1442 (int) inline_summaries->get (callee)->stack_frame_offset,
1443 (int) inline_summaries->get (callee)->estimated_self_stack_size,
1444 (int) inline_summaries->get (callee)->estimated_stack_size);
1445 dump_inline_edge_summary (f, indent + 2, callee, info);
1448 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1450 struct inline_edge_summary *es = inline_edge_summary (edge);
1451 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
1452 " time: %2i",
1453 indent, "",
1454 es->loop_depth,
1455 edge->frequency, es->call_stmt_size, es->call_stmt_time);
1456 if (es->predicate)
1458 fprintf (f, "predicate: ");
1459 dump_predicate (f, info->conds, es->predicate);
1461 else
1462 fprintf (f, "\n");
1467 void
1468 dump_inline_summary (FILE *f, struct cgraph_node *node)
1470 if (node->definition)
1472 struct inline_summary *s = inline_summaries->get (node);
1473 size_time_entry *e;
1474 int i;
1475 fprintf (f, "Inline summary for %s/%i", node->name (),
1476 node->order);
1477 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1478 fprintf (f, " always_inline");
1479 if (s->inlinable)
1480 fprintf (f, " inlinable");
1481 if (s->contains_cilk_spawn)
1482 fprintf (f, " contains_cilk_spawn");
1483 if (s->fp_expressions)
1484 fprintf (f, " fp_expression");
1485 fprintf (f, "\n self time: %f\n", s->self_time.to_double ());
1486 fprintf (f, " global time: %f\n", s->time.to_double ());
1487 fprintf (f, " self size: %i\n", s->self_size);
1488 fprintf (f, " global size: %i\n", s->size);
1489 fprintf (f, " min size: %i\n", s->min_size);
1490 fprintf (f, " self stack: %i\n",
1491 (int) s->estimated_self_stack_size);
1492 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1493 if (s->growth)
1494 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1495 if (s->scc_no)
1496 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1497 for (i = 0; vec_safe_iterate (s->entry, i, &e); i++)
1499 fprintf (f, " size:%f, time:%f",
1500 (double) e->size / INLINE_SIZE_SCALE,
1501 e->time.to_double ());
1502 if (!true_predicate_p (&e->exec_predicate))
1504 fprintf (f, ", executed if:");
1505 dump_predicate (f, s->conds, &e->exec_predicate, 0);
1507 if (!predicates_equal_p (&e->exec_predicate,
1508 &e->nonconst_predicate))
1510 fprintf (f, ", nonconst if:");
1511 dump_predicate (f, s->conds, &e->nonconst_predicate, 0);
1513 fprintf (f, "\n");
1515 if (s->loop_iterations)
1517 fprintf (f, " loop iterations:");
1518 dump_predicate (f, s->conds, s->loop_iterations);
1520 if (s->loop_stride)
1522 fprintf (f, " loop stride:");
1523 dump_predicate (f, s->conds, s->loop_stride);
1525 if (s->array_index)
1527 fprintf (f, " array index:");
1528 dump_predicate (f, s->conds, s->array_index);
1530 fprintf (f, " calls:\n");
1531 dump_inline_edge_summary (f, 4, node, s);
1532 fprintf (f, "\n");
1536 DEBUG_FUNCTION void
1537 debug_inline_summary (struct cgraph_node *node)
1539 dump_inline_summary (stderr, node);
1542 void
1543 dump_inline_summaries (FILE *f)
1545 struct cgraph_node *node;
1547 FOR_EACH_DEFINED_FUNCTION (node)
1548 if (!node->global.inlined_to)
1549 dump_inline_summary (f, node);
1552 /* Give initial reasons why inlining would fail on EDGE. This gets either
1553 nullified or usually overwritten by more precise reasons later. */
1555 void
1556 initialize_inline_failed (struct cgraph_edge *e)
1558 struct cgraph_node *callee = e->callee;
1560 if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE
1561 && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
1563 else if (e->indirect_unknown_callee)
1564 e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL;
1565 else if (!callee->definition)
1566 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
1567 else if (callee->local.redefined_extern_inline)
1568 e->inline_failed = CIF_REDEFINED_EXTERN_INLINE;
1569 else
1570 e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
1571 gcc_checking_assert (!e->call_stmt_cannot_inline_p
1572 || cgraph_inline_failed_type (e->inline_failed)
1573 == CIF_FINAL_ERROR);
1576 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1577 boolean variable pointed to by DATA. */
1579 static bool
1580 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1581 void *data)
1583 bool *b = (bool *) data;
1584 *b = true;
1585 return true;
1588 /* If OP refers to value of function parameter, return the corresponding
1589 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1590 PARM_DECL) will be stored to *SIZE_P in that case too. */
1592 static tree
1593 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1595 /* SSA_NAME referring to parm default def? */
1596 if (TREE_CODE (op) == SSA_NAME
1597 && SSA_NAME_IS_DEFAULT_DEF (op)
1598 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1600 if (size_p)
1601 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1602 return SSA_NAME_VAR (op);
1604 /* Non-SSA parm reference? */
1605 if (TREE_CODE (op) == PARM_DECL)
1607 bool modified = false;
1609 ao_ref refd;
1610 ao_ref_init (&refd, op);
1611 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
1612 NULL);
1613 if (!modified)
1615 if (size_p)
1616 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1617 return op;
1620 return NULL_TREE;
1623 /* If OP refers to value of function parameter, return the corresponding
1624 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1625 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1626 stored to *SIZE_P in that case too. */
1628 static tree
1629 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1631 tree res = unmodified_parm_1 (stmt, op, size_p);
1632 if (res)
1633 return res;
1635 if (TREE_CODE (op) == SSA_NAME
1636 && !SSA_NAME_IS_DEFAULT_DEF (op)
1637 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1638 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1639 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1640 size_p);
1641 return NULL_TREE;
1644 /* If OP refers to a value of a function parameter or value loaded from an
1645 aggregate passed to a parameter (either by value or reference), return TRUE
1646 and store the number of the parameter to *INDEX_P, the access size into
1647 *SIZE_P, and information whether and how it has been loaded from an
1648 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1649 statement in which OP is used or loaded. */
1651 static bool
1652 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1653 gimple *stmt, tree op, int *index_p,
1654 HOST_WIDE_INT *size_p,
1655 struct agg_position_info *aggpos)
1657 tree res = unmodified_parm_1 (stmt, op, size_p);
1659 gcc_checking_assert (aggpos);
1660 if (res)
1662 *index_p = ipa_get_param_decl_index (fbi->info, res);
1663 if (*index_p < 0)
1664 return false;
1665 aggpos->agg_contents = false;
1666 aggpos->by_ref = false;
1667 return true;
1670 if (TREE_CODE (op) == SSA_NAME)
1672 if (SSA_NAME_IS_DEFAULT_DEF (op)
1673 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1674 return false;
1675 stmt = SSA_NAME_DEF_STMT (op);
1676 op = gimple_assign_rhs1 (stmt);
1677 if (!REFERENCE_CLASS_P (op))
1678 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1679 aggpos);
1682 aggpos->agg_contents = true;
1683 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1684 stmt, op, index_p, &aggpos->offset,
1685 size_p, &aggpos->by_ref);
1688 /* See if statement might disappear after inlining.
1689 0 - means not eliminated
1690 1 - half of statements goes away
1691 2 - for sure it is eliminated.
1692 We are not terribly sophisticated, basically looking for simple abstraction
1693 penalty wrappers. */
1695 static int
1696 eliminated_by_inlining_prob (gimple *stmt)
1698 enum gimple_code code = gimple_code (stmt);
1699 enum tree_code rhs_code;
1701 if (!optimize)
1702 return 0;
1704 switch (code)
1706 case GIMPLE_RETURN:
1707 return 2;
1708 case GIMPLE_ASSIGN:
1709 if (gimple_num_ops (stmt) != 2)
1710 return 0;
1712 rhs_code = gimple_assign_rhs_code (stmt);
1714 /* Casts of parameters, loads from parameters passed by reference
1715 and stores to return value or parameters are often free after
1716 inlining dua to SRA and further combining.
1717 Assume that half of statements goes away. */
1718 if (CONVERT_EXPR_CODE_P (rhs_code)
1719 || rhs_code == VIEW_CONVERT_EXPR
1720 || rhs_code == ADDR_EXPR
1721 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1723 tree rhs = gimple_assign_rhs1 (stmt);
1724 tree lhs = gimple_assign_lhs (stmt);
1725 tree inner_rhs = get_base_address (rhs);
1726 tree inner_lhs = get_base_address (lhs);
1727 bool rhs_free = false;
1728 bool lhs_free = false;
1730 if (!inner_rhs)
1731 inner_rhs = rhs;
1732 if (!inner_lhs)
1733 inner_lhs = lhs;
1735 /* Reads of parameter are expected to be free. */
1736 if (unmodified_parm (stmt, inner_rhs, NULL))
1737 rhs_free = true;
1738 /* Match expressions of form &this->field. Those will most likely
1739 combine with something upstream after inlining. */
1740 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1742 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1743 if (TREE_CODE (op) == PARM_DECL)
1744 rhs_free = true;
1745 else if (TREE_CODE (op) == MEM_REF
1746 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1747 rhs_free = true;
1750 /* When parameter is not SSA register because its address is taken
1751 and it is just copied into one, the statement will be completely
1752 free after inlining (we will copy propagate backward). */
1753 if (rhs_free && is_gimple_reg (lhs))
1754 return 2;
1756 /* Reads of parameters passed by reference
1757 expected to be free (i.e. optimized out after inlining). */
1758 if (TREE_CODE (inner_rhs) == MEM_REF
1759 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1760 rhs_free = true;
1762 /* Copying parameter passed by reference into gimple register is
1763 probably also going to copy propagate, but we can't be quite
1764 sure. */
1765 if (rhs_free && is_gimple_reg (lhs))
1766 lhs_free = true;
1768 /* Writes to parameters, parameters passed by value and return value
1769 (either dirrectly or passed via invisible reference) are free.
1771 TODO: We ought to handle testcase like
1772 struct a {int a,b;};
1773 struct a
1774 retrurnsturct (void)
1776 struct a a ={1,2};
1777 return a;
1780 This translate into:
1782 retrurnsturct ()
1784 int a$b;
1785 int a$a;
1786 struct a a;
1787 struct a D.2739;
1789 <bb 2>:
1790 D.2739.a = 1;
1791 D.2739.b = 2;
1792 return D.2739;
1795 For that we either need to copy ipa-split logic detecting writes
1796 to return value. */
1797 if (TREE_CODE (inner_lhs) == PARM_DECL
1798 || TREE_CODE (inner_lhs) == RESULT_DECL
1799 || (TREE_CODE (inner_lhs) == MEM_REF
1800 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1801 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1802 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1803 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1804 (inner_lhs,
1805 0))) == RESULT_DECL))))
1806 lhs_free = true;
1807 if (lhs_free
1808 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1809 rhs_free = true;
1810 if (lhs_free && rhs_free)
1811 return 1;
1813 return 0;
1814 default:
1815 return 0;
1820 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1821 predicates to the CFG edges. */
1823 static void
1824 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1825 struct inline_summary *summary,
1826 basic_block bb)
1828 gimple *last;
1829 tree op;
1830 int index;
1831 HOST_WIDE_INT size;
1832 struct agg_position_info aggpos;
1833 enum tree_code code, inverted_code;
1834 edge e;
1835 edge_iterator ei;
1836 gimple *set_stmt;
1837 tree op2;
1839 last = last_stmt (bb);
1840 if (!last || gimple_code (last) != GIMPLE_COND)
1841 return;
1842 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1843 return;
1844 op = gimple_cond_lhs (last);
1845 /* TODO: handle conditionals like
1846 var = op0 < 4;
1847 if (var != 0). */
1848 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1850 code = gimple_cond_code (last);
1851 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1853 FOR_EACH_EDGE (e, ei, bb->succs)
1855 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1856 ? code : inverted_code);
1857 /* invert_tree_comparison will return ERROR_MARK on FP
1858 comparsions that are not EQ/NE instead of returning proper
1859 unordered one. Be sure it is not confused with NON_CONSTANT. */
1860 if (this_code != ERROR_MARK)
1862 struct predicate p
1863 = add_condition (summary, index, size, &aggpos, this_code,
1864 unshare_expr_without_location
1865 (gimple_cond_rhs (last)));
1866 e->aux = edge_predicate_pool.allocate ();
1867 *(struct predicate *) e->aux = p;
1872 if (TREE_CODE (op) != SSA_NAME)
1873 return;
1874 /* Special case
1875 if (builtin_constant_p (op))
1876 constant_code
1877 else
1878 nonconstant_code.
1879 Here we can predicate nonconstant_code. We can't
1880 really handle constant_code since we have no predicate
1881 for this and also the constant code is not known to be
1882 optimized away when inliner doen't see operand is constant.
1883 Other optimizers might think otherwise. */
1884 if (gimple_cond_code (last) != NE_EXPR
1885 || !integer_zerop (gimple_cond_rhs (last)))
1886 return;
1887 set_stmt = SSA_NAME_DEF_STMT (op);
1888 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1889 || gimple_call_num_args (set_stmt) != 1)
1890 return;
1891 op2 = gimple_call_arg (set_stmt, 0);
1892 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1893 &aggpos))
1894 return;
1895 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1897 struct predicate p = add_condition (summary, index, size, &aggpos,
1898 IS_NOT_CONSTANT, NULL_TREE);
1899 e->aux = edge_predicate_pool.allocate ();
1900 *(struct predicate *) e->aux = p;
1905 /* If BB ends by a switch we can turn into predicates, attach corresponding
1906 predicates to the CFG edges. */
1908 static void
1909 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1910 struct inline_summary *summary,
1911 basic_block bb)
1913 gimple *lastg;
1914 tree op;
1915 int index;
1916 HOST_WIDE_INT size;
1917 struct agg_position_info aggpos;
1918 edge e;
1919 edge_iterator ei;
1920 size_t n;
1921 size_t case_idx;
1923 lastg = last_stmt (bb);
1924 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1925 return;
1926 gswitch *last = as_a <gswitch *> (lastg);
1927 op = gimple_switch_index (last);
1928 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1929 return;
1931 FOR_EACH_EDGE (e, ei, bb->succs)
1933 e->aux = edge_predicate_pool.allocate ();
1934 *(struct predicate *) e->aux = false_predicate ();
1936 n = gimple_switch_num_labels (last);
1937 for (case_idx = 0; case_idx < n; ++case_idx)
1939 tree cl = gimple_switch_label (last, case_idx);
1940 tree min, max;
1941 struct predicate p;
1943 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1944 min = CASE_LOW (cl);
1945 max = CASE_HIGH (cl);
1947 /* For default we might want to construct predicate that none
1948 of cases is met, but it is bit hard to do not having negations
1949 of conditionals handy. */
1950 if (!min && !max)
1951 p = true_predicate ();
1952 else if (!max)
1953 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1954 unshare_expr_without_location (min));
1955 else
1957 struct predicate p1, p2;
1958 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1959 unshare_expr_without_location (min));
1960 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1961 unshare_expr_without_location (max));
1962 p = and_predicates (summary->conds, &p1, &p2);
1964 *(struct predicate *) e->aux
1965 = or_predicates (summary->conds, &p, (struct predicate *) e->aux);
1970 /* For each BB in NODE attach to its AUX pointer predicate under
1971 which it is executable. */
1973 static void
1974 compute_bb_predicates (struct ipa_func_body_info *fbi,
1975 struct cgraph_node *node,
1976 struct inline_summary *summary)
1978 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1979 bool done = false;
1980 basic_block bb;
1982 FOR_EACH_BB_FN (bb, my_function)
1984 set_cond_stmt_execution_predicate (fbi, summary, bb);
1985 set_switch_stmt_execution_predicate (fbi, summary, bb);
1988 /* Entry block is always executable. */
1989 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1990 = edge_predicate_pool.allocate ();
1991 *(struct predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1992 = true_predicate ();
1994 /* A simple dataflow propagation of predicates forward in the CFG.
1995 TODO: work in reverse postorder. */
1996 while (!done)
1998 done = true;
1999 FOR_EACH_BB_FN (bb, my_function)
2001 struct predicate p = false_predicate ();
2002 edge e;
2003 edge_iterator ei;
2004 FOR_EACH_EDGE (e, ei, bb->preds)
2006 if (e->src->aux)
2008 struct predicate this_bb_predicate
2009 = *(struct predicate *) e->src->aux;
2010 if (e->aux)
2011 this_bb_predicate
2012 = and_predicates (summary->conds, &this_bb_predicate,
2013 (struct predicate *) e->aux);
2014 p = or_predicates (summary->conds, &p, &this_bb_predicate);
2015 if (true_predicate_p (&p))
2016 break;
2019 if (false_predicate_p (&p))
2020 gcc_assert (!bb->aux);
2021 else
2023 if (!bb->aux)
2025 done = false;
2026 bb->aux = edge_predicate_pool.allocate ();
2027 *((struct predicate *) bb->aux) = p;
2029 else if (!predicates_equal_p (&p, (struct predicate *) bb->aux))
2031 /* This OR operation is needed to ensure monotonous data flow
2032 in the case we hit the limit on number of clauses and the
2033 and/or operations above give approximate answers. */
2034 p = or_predicates (summary->conds, &p, (struct predicate *)bb->aux);
2035 if (!predicates_equal_p (&p, (struct predicate *) bb->aux))
2037 done = false;
2038 *((struct predicate *) bb->aux) = p;
2047 /* We keep info about constantness of SSA names. */
2049 typedef struct predicate predicate_t;
2050 /* Return predicate specifying when the STMT might have result that is not
2051 a compile time constant. */
2053 static struct predicate
2054 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
2055 struct inline_summary *summary,
2056 tree expr,
2057 vec<predicate_t> nonconstant_names)
2059 tree parm;
2060 int index;
2061 HOST_WIDE_INT size;
2063 while (UNARY_CLASS_P (expr))
2064 expr = TREE_OPERAND (expr, 0);
2066 parm = unmodified_parm (NULL, expr, &size);
2067 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
2068 return add_condition (summary, index, size, NULL, CHANGED, NULL_TREE);
2069 if (is_gimple_min_invariant (expr))
2070 return false_predicate ();
2071 if (TREE_CODE (expr) == SSA_NAME)
2072 return nonconstant_names[SSA_NAME_VERSION (expr)];
2073 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
2075 struct predicate p1 = will_be_nonconstant_expr_predicate
2076 (info, summary, TREE_OPERAND (expr, 0),
2077 nonconstant_names);
2078 struct predicate p2;
2079 if (true_predicate_p (&p1))
2080 return p1;
2081 p2 = will_be_nonconstant_expr_predicate (info, summary,
2082 TREE_OPERAND (expr, 1),
2083 nonconstant_names);
2084 return or_predicates (summary->conds, &p1, &p2);
2086 else if (TREE_CODE (expr) == COND_EXPR)
2088 struct predicate p1 = will_be_nonconstant_expr_predicate
2089 (info, summary, TREE_OPERAND (expr, 0),
2090 nonconstant_names);
2091 struct predicate p2;
2092 if (true_predicate_p (&p1))
2093 return p1;
2094 p2 = will_be_nonconstant_expr_predicate (info, summary,
2095 TREE_OPERAND (expr, 1),
2096 nonconstant_names);
2097 if (true_predicate_p (&p2))
2098 return p2;
2099 p1 = or_predicates (summary->conds, &p1, &p2);
2100 p2 = will_be_nonconstant_expr_predicate (info, summary,
2101 TREE_OPERAND (expr, 2),
2102 nonconstant_names);
2103 return or_predicates (summary->conds, &p1, &p2);
2105 else
2107 debug_tree (expr);
2108 gcc_unreachable ();
2110 return false_predicate ();
2114 /* Return predicate specifying when the STMT might have result that is not
2115 a compile time constant. */
2117 static struct predicate
2118 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2119 struct inline_summary *summary,
2120 gimple *stmt,
2121 vec<predicate_t> nonconstant_names)
2123 struct predicate p = true_predicate ();
2124 ssa_op_iter iter;
2125 tree use;
2126 struct predicate op_non_const;
2127 bool is_load;
2128 int base_index;
2129 HOST_WIDE_INT size;
2130 struct agg_position_info aggpos;
2132 /* What statments might be optimized away
2133 when their arguments are constant. */
2134 if (gimple_code (stmt) != GIMPLE_ASSIGN
2135 && gimple_code (stmt) != GIMPLE_COND
2136 && gimple_code (stmt) != GIMPLE_SWITCH
2137 && (gimple_code (stmt) != GIMPLE_CALL
2138 || !(gimple_call_flags (stmt) & ECF_CONST)))
2139 return p;
2141 /* Stores will stay anyway. */
2142 if (gimple_store_p (stmt))
2143 return p;
2145 is_load = gimple_assign_load_p (stmt);
2147 /* Loads can be optimized when the value is known. */
2148 if (is_load)
2150 tree op;
2151 gcc_assert (gimple_assign_single_p (stmt));
2152 op = gimple_assign_rhs1 (stmt);
2153 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
2154 &aggpos))
2155 return p;
2157 else
2158 base_index = -1;
2160 /* See if we understand all operands before we start
2161 adding conditionals. */
2162 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2164 tree parm = unmodified_parm (stmt, use, NULL);
2165 /* For arguments we can build a condition. */
2166 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2167 continue;
2168 if (TREE_CODE (use) != SSA_NAME)
2169 return p;
2170 /* If we know when operand is constant,
2171 we still can say something useful. */
2172 if (!true_predicate_p (&nonconstant_names[SSA_NAME_VERSION (use)]))
2173 continue;
2174 return p;
2177 if (is_load)
2178 op_non_const =
2179 add_condition (summary, base_index, size, &aggpos, CHANGED, NULL);
2180 else
2181 op_non_const = false_predicate ();
2182 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2184 HOST_WIDE_INT size;
2185 tree parm = unmodified_parm (stmt, use, &size);
2186 int index;
2188 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2190 if (index != base_index)
2191 p = add_condition (summary, index, size, NULL, CHANGED, NULL_TREE);
2192 else
2193 continue;
2195 else
2196 p = nonconstant_names[SSA_NAME_VERSION (use)];
2197 op_non_const = or_predicates (summary->conds, &p, &op_non_const);
2199 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2200 && gimple_op (stmt, 0)
2201 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2202 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2203 = op_non_const;
2204 return op_non_const;
2207 struct record_modified_bb_info
2209 bitmap bb_set;
2210 gimple *stmt;
2213 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
2214 probability how often it changes between USE_BB.
2215 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
2216 is in different loop nest, we can do better.
2217 This is all just estimate. In theory we look for minimal cut separating
2218 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2219 anyway. */
2221 static basic_block
2222 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2224 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2225 if (l && l->header->frequency < init_bb->frequency)
2226 return l->header;
2227 return init_bb;
2230 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2231 set except for info->stmt. */
2233 static bool
2234 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2236 struct record_modified_bb_info *info =
2237 (struct record_modified_bb_info *) data;
2238 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2239 return false;
2240 bitmap_set_bit (info->bb_set,
2241 SSA_NAME_IS_DEFAULT_DEF (vdef)
2242 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2243 : get_minimal_bb
2244 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2245 gimple_bb (info->stmt))->index);
2246 return false;
2249 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2250 will change since last invocation of STMT.
2252 Value 0 is reserved for compile time invariants.
2253 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2254 ought to be REG_BR_PROB_BASE / estimated_iters. */
2256 static int
2257 param_change_prob (gimple *stmt, int i)
2259 tree op = gimple_call_arg (stmt, i);
2260 basic_block bb = gimple_bb (stmt);
2262 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2263 op = TREE_OPERAND (op, 0);
2265 tree base = get_base_address (op);
2267 /* Global invariants never change. */
2268 if (is_gimple_min_invariant (base))
2269 return 0;
2271 /* We would have to do non-trivial analysis to really work out what
2272 is the probability of value to change (i.e. when init statement
2273 is in a sibling loop of the call).
2275 We do an conservative estimate: when call is executed N times more often
2276 than the statement defining value, we take the frequency 1/N. */
2277 if (TREE_CODE (base) == SSA_NAME)
2279 int init_freq;
2281 if (!bb->frequency)
2282 return REG_BR_PROB_BASE;
2284 if (SSA_NAME_IS_DEFAULT_DEF (base))
2285 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
2286 else
2287 init_freq = get_minimal_bb
2288 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2289 gimple_bb (stmt))->frequency;
2291 if (!init_freq)
2292 init_freq = 1;
2293 if (init_freq < bb->frequency)
2294 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
2295 else
2296 return REG_BR_PROB_BASE;
2298 else
2300 ao_ref refd;
2301 int max;
2302 struct record_modified_bb_info info;
2303 bitmap_iterator bi;
2304 unsigned index;
2305 tree init = ctor_for_folding (base);
2307 if (init != error_mark_node)
2308 return 0;
2309 if (!bb->frequency)
2310 return REG_BR_PROB_BASE;
2311 ao_ref_init (&refd, op);
2312 info.stmt = stmt;
2313 info.bb_set = BITMAP_ALLOC (NULL);
2314 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2315 NULL);
2316 if (bitmap_bit_p (info.bb_set, bb->index))
2318 BITMAP_FREE (info.bb_set);
2319 return REG_BR_PROB_BASE;
2322 /* Assume that every memory is initialized at entry.
2323 TODO: Can we easilly determine if value is always defined
2324 and thus we may skip entry block? */
2325 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
2326 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
2327 else
2328 max = 1;
2330 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2331 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
2333 BITMAP_FREE (info.bb_set);
2334 if (max < bb->frequency)
2335 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
2336 else
2337 return REG_BR_PROB_BASE;
2341 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2342 sub-graph and if the predicate the condition depends on is known. If so,
2343 return true and store the pointer the predicate in *P. */
2345 static bool
2346 phi_result_unknown_predicate (struct ipa_node_params *info,
2347 inline_summary *summary, basic_block bb,
2348 struct predicate *p,
2349 vec<predicate_t> nonconstant_names)
2351 edge e;
2352 edge_iterator ei;
2353 basic_block first_bb = NULL;
2354 gimple *stmt;
2356 if (single_pred_p (bb))
2358 *p = false_predicate ();
2359 return true;
2362 FOR_EACH_EDGE (e, ei, bb->preds)
2364 if (single_succ_p (e->src))
2366 if (!single_pred_p (e->src))
2367 return false;
2368 if (!first_bb)
2369 first_bb = single_pred (e->src);
2370 else if (single_pred (e->src) != first_bb)
2371 return false;
2373 else
2375 if (!first_bb)
2376 first_bb = e->src;
2377 else if (e->src != first_bb)
2378 return false;
2382 if (!first_bb)
2383 return false;
2385 stmt = last_stmt (first_bb);
2386 if (!stmt
2387 || gimple_code (stmt) != GIMPLE_COND
2388 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2389 return false;
2391 *p = will_be_nonconstant_expr_predicate (info, summary,
2392 gimple_cond_lhs (stmt),
2393 nonconstant_names);
2394 if (true_predicate_p (p))
2395 return false;
2396 else
2397 return true;
2400 /* Given a PHI statement in a function described by inline properties SUMMARY
2401 and *P being the predicate describing whether the selected PHI argument is
2402 known, store a predicate for the result of the PHI statement into
2403 NONCONSTANT_NAMES, if possible. */
2405 static void
2406 predicate_for_phi_result (struct inline_summary *summary, gphi *phi,
2407 struct predicate *p,
2408 vec<predicate_t> nonconstant_names)
2410 unsigned i;
2412 for (i = 0; i < gimple_phi_num_args (phi); i++)
2414 tree arg = gimple_phi_arg (phi, i)->def;
2415 if (!is_gimple_min_invariant (arg))
2417 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2418 *p = or_predicates (summary->conds, p,
2419 &nonconstant_names[SSA_NAME_VERSION (arg)]);
2420 if (true_predicate_p (p))
2421 return;
2425 if (dump_file && (dump_flags & TDF_DETAILS))
2427 fprintf (dump_file, "\t\tphi predicate: ");
2428 dump_predicate (dump_file, summary->conds, p);
2430 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2433 /* Return predicate specifying when array index in access OP becomes non-constant. */
2435 static struct predicate
2436 array_index_predicate (inline_summary *info,
2437 vec< predicate_t> nonconstant_names, tree op)
2439 struct predicate p = false_predicate ();
2440 while (handled_component_p (op))
2442 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
2444 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2445 p = or_predicates (info->conds, &p,
2446 &nonconstant_names[SSA_NAME_VERSION
2447 (TREE_OPERAND (op, 1))]);
2449 op = TREE_OPERAND (op, 0);
2451 return p;
2454 /* For a typical usage of __builtin_expect (a<b, 1), we
2455 may introduce an extra relation stmt:
2456 With the builtin, we have
2457 t1 = a <= b;
2458 t2 = (long int) t1;
2459 t3 = __builtin_expect (t2, 1);
2460 if (t3 != 0)
2461 goto ...
2462 Without the builtin, we have
2463 if (a<=b)
2464 goto...
2465 This affects the size/time estimation and may have
2466 an impact on the earlier inlining.
2467 Here find this pattern and fix it up later. */
2469 static gimple *
2470 find_foldable_builtin_expect (basic_block bb)
2472 gimple_stmt_iterator bsi;
2474 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2476 gimple *stmt = gsi_stmt (bsi);
2477 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2478 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2480 tree var = gimple_call_lhs (stmt);
2481 tree arg = gimple_call_arg (stmt, 0);
2482 use_operand_p use_p;
2483 gimple *use_stmt;
2484 bool match = false;
2485 bool done = false;
2487 if (!var || !arg)
2488 continue;
2489 gcc_assert (TREE_CODE (var) == SSA_NAME);
2491 while (TREE_CODE (arg) == SSA_NAME)
2493 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2494 if (!is_gimple_assign (stmt_tmp))
2495 break;
2496 switch (gimple_assign_rhs_code (stmt_tmp))
2498 case LT_EXPR:
2499 case LE_EXPR:
2500 case GT_EXPR:
2501 case GE_EXPR:
2502 case EQ_EXPR:
2503 case NE_EXPR:
2504 match = true;
2505 done = true;
2506 break;
2507 CASE_CONVERT:
2508 break;
2509 default:
2510 done = true;
2511 break;
2513 if (done)
2514 break;
2515 arg = gimple_assign_rhs1 (stmt_tmp);
2518 if (match && single_imm_use (var, &use_p, &use_stmt)
2519 && gimple_code (use_stmt) == GIMPLE_COND)
2520 return use_stmt;
2523 return NULL;
2526 /* Return true when the basic blocks contains only clobbers followed by RESX.
2527 Such BBs are kept around to make removal of dead stores possible with
2528 presence of EH and will be optimized out by optimize_clobbers later in the
2529 game.
2531 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2532 that can be clobber only, too.. When it is false, the RESX is not necessary
2533 on the end of basic block. */
2535 static bool
2536 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2538 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2539 edge_iterator ei;
2540 edge e;
2542 if (need_eh)
2544 if (gsi_end_p (gsi))
2545 return false;
2546 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2547 return false;
2548 gsi_prev (&gsi);
2550 else if (!single_succ_p (bb))
2551 return false;
2553 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2555 gimple *stmt = gsi_stmt (gsi);
2556 if (is_gimple_debug (stmt))
2557 continue;
2558 if (gimple_clobber_p (stmt))
2559 continue;
2560 if (gimple_code (stmt) == GIMPLE_LABEL)
2561 break;
2562 return false;
2565 /* See if all predecestors are either throws or clobber only BBs. */
2566 FOR_EACH_EDGE (e, ei, bb->preds)
2567 if (!(e->flags & EDGE_EH)
2568 && !clobber_only_eh_bb_p (e->src, false))
2569 return false;
2571 return true;
2574 /* Return true if STMT compute a floating point expression that may be affected
2575 by -ffast-math and similar flags. */
2577 static bool
2578 fp_expression_p (gimple *stmt)
2580 ssa_op_iter i;
2581 tree op;
2583 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2584 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2585 return true;
2586 return false;
2589 /* Compute function body size parameters for NODE.
2590 When EARLY is true, we compute only simple summaries without
2591 non-trivial predicates to drive the early inliner. */
2593 static void
2594 estimate_function_body_sizes (struct cgraph_node *node, bool early)
2596 sreal time = 0;
2597 /* Estimate static overhead for function prologue/epilogue and alignment. */
2598 int size = 2;
2599 /* Benefits are scaled by probability of elimination that is in range
2600 <0,2>. */
2601 basic_block bb;
2602 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2603 int freq;
2604 struct inline_summary *info = inline_summaries->get (node);
2605 struct predicate bb_predicate;
2606 struct ipa_func_body_info fbi;
2607 vec<predicate_t> nonconstant_names = vNULL;
2608 int nblocks, n;
2609 int *order;
2610 predicate array_index = true_predicate ();
2611 gimple *fix_builtin_expect_stmt;
2613 gcc_assert (my_function && my_function->cfg);
2614 gcc_assert (cfun == my_function);
2616 memset(&fbi, 0, sizeof(fbi));
2617 info->conds = NULL;
2618 info->entry = NULL;
2620 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2621 so we can produce proper inline hints.
2623 When optimizing and analyzing for early inliner, initialize node params
2624 so we can produce correct BB predicates. */
2626 if (opt_for_fn (node->decl, optimize))
2628 calculate_dominance_info (CDI_DOMINATORS);
2629 if (!early)
2630 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2631 else
2633 ipa_check_create_node_params ();
2634 ipa_initialize_node_params (node);
2637 if (ipa_node_params_sum)
2639 fbi.node = node;
2640 fbi.info = IPA_NODE_REF (node);
2641 fbi.bb_infos = vNULL;
2642 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2643 fbi.param_count = count_formal_params(node->decl);
2644 nonconstant_names.safe_grow_cleared
2645 (SSANAMES (my_function)->length ());
2649 if (dump_file)
2650 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2651 node->name ());
2653 /* When we run into maximal number of entries, we assign everything to the
2654 constant truth case. Be sure to have it in list. */
2655 bb_predicate = true_predicate ();
2656 account_size_time (info, 0, 0, &bb_predicate, &bb_predicate);
2658 bb_predicate = not_inlined_predicate ();
2659 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate,
2660 &bb_predicate);
2662 if (fbi.info)
2663 compute_bb_predicates (&fbi, node, info);
2664 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2665 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2666 for (n = 0; n < nblocks; n++)
2668 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2669 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2670 if (clobber_only_eh_bb_p (bb))
2672 if (dump_file && (dump_flags & TDF_DETAILS))
2673 fprintf (dump_file, "\n Ignoring BB %i;"
2674 " it will be optimized away by cleanup_clobbers\n",
2675 bb->index);
2676 continue;
2679 /* TODO: Obviously predicates can be propagated down across CFG. */
2680 if (fbi.info)
2682 if (bb->aux)
2683 bb_predicate = *(struct predicate *) bb->aux;
2684 else
2685 bb_predicate = false_predicate ();
2687 else
2688 bb_predicate = true_predicate ();
2690 if (dump_file && (dump_flags & TDF_DETAILS))
2692 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2693 dump_predicate (dump_file, info->conds, &bb_predicate);
2696 if (fbi.info && nonconstant_names.exists ())
2698 struct predicate phi_predicate;
2699 bool first_phi = true;
2701 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2702 gsi_next (&bsi))
2704 if (first_phi
2705 && !phi_result_unknown_predicate (fbi.info, info, bb,
2706 &phi_predicate,
2707 nonconstant_names))
2708 break;
2709 first_phi = false;
2710 if (dump_file && (dump_flags & TDF_DETAILS))
2712 fprintf (dump_file, " ");
2713 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0, 0);
2715 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2716 nonconstant_names);
2720 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2722 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2723 gsi_next (&bsi))
2725 gimple *stmt = gsi_stmt (bsi);
2726 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2727 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2728 int prob;
2729 struct predicate will_be_nonconstant;
2731 /* This relation stmt should be folded after we remove
2732 buildin_expect call. Adjust the cost here. */
2733 if (stmt == fix_builtin_expect_stmt)
2735 this_size--;
2736 this_time--;
2739 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, " ");
2742 print_gimple_stmt (dump_file, stmt, 0, 0);
2743 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2744 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2745 this_time);
2748 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2750 struct predicate this_array_index;
2751 this_array_index =
2752 array_index_predicate (info, nonconstant_names,
2753 gimple_assign_rhs1 (stmt));
2754 if (!false_predicate_p (&this_array_index))
2755 array_index =
2756 and_predicates (info->conds, &array_index,
2757 &this_array_index);
2759 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2761 struct predicate this_array_index;
2762 this_array_index =
2763 array_index_predicate (info, nonconstant_names,
2764 gimple_get_lhs (stmt));
2765 if (!false_predicate_p (&this_array_index))
2766 array_index =
2767 and_predicates (info->conds, &array_index,
2768 &this_array_index);
2772 if (is_gimple_call (stmt)
2773 && !gimple_call_internal_p (stmt))
2775 struct cgraph_edge *edge = node->get_edge (stmt);
2776 struct inline_edge_summary *es = inline_edge_summary (edge);
2778 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2779 resolved as constant. We however don't want to optimize
2780 out the cgraph edges. */
2781 if (nonconstant_names.exists ()
2782 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2783 && gimple_call_lhs (stmt)
2784 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2786 struct predicate false_p = false_predicate ();
2787 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2788 = false_p;
2790 if (ipa_node_params_sum)
2792 int count = gimple_call_num_args (stmt);
2793 int i;
2795 if (count)
2796 es->param.safe_grow_cleared (count);
2797 for (i = 0; i < count; i++)
2799 int prob = param_change_prob (stmt, i);
2800 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2801 es->param[i].change_prob = prob;
2805 es->call_stmt_size = this_size;
2806 es->call_stmt_time = this_time;
2807 es->loop_depth = bb_loop_depth (bb);
2808 edge_set_predicate (edge, &bb_predicate);
2811 /* TODO: When conditional jump or swithc is known to be constant, but
2812 we did not translate it into the predicates, we really can account
2813 just maximum of the possible paths. */
2814 if (fbi.info)
2815 will_be_nonconstant
2816 = will_be_nonconstant_predicate (&fbi, info,
2817 stmt, nonconstant_names);
2818 else
2819 will_be_nonconstant = true_predicate ();
2820 if (this_time || this_size)
2822 this_time *= freq;
2824 prob = eliminated_by_inlining_prob (stmt);
2825 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2826 fprintf (dump_file,
2827 "\t\t50%% will be eliminated by inlining\n");
2828 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2829 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2831 struct predicate p = and_predicates (info->conds, &bb_predicate,
2832 &will_be_nonconstant);
2834 /* We can ignore statement when we proved it is never going
2835 to happen, but we can not do that for call statements
2836 because edges are accounted specially. */
2838 if (!false_predicate_p (is_gimple_call (stmt)
2839 ? &bb_predicate : &p))
2841 time += this_time;
2842 size += this_size;
2845 /* We account everything but the calls. Calls have their own
2846 size/time info attached to cgraph edges. This is necessary
2847 in order to make the cost disappear after inlining. */
2848 if (!is_gimple_call (stmt))
2850 if (prob)
2852 struct predicate ip = not_inlined_predicate ();
2853 ip = and_predicates (info->conds, &ip, &bb_predicate);
2854 account_size_time (info, this_size * prob,
2855 (sreal)(this_time * prob)
2856 / (CGRAPH_FREQ_BASE * 2), &ip,
2857 &p);
2859 if (prob != 2)
2860 account_size_time (info, this_size * (2 - prob),
2861 (sreal)(this_time * (2 - prob))
2862 / (CGRAPH_FREQ_BASE * 2),
2863 &bb_predicate,
2864 &p);
2867 if (!info->fp_expressions && fp_expression_p (stmt))
2869 info->fp_expressions = true;
2870 if (dump_file)
2871 fprintf (dump_file, " fp_expression set\n");
2874 gcc_assert (time >= 0);
2875 gcc_assert (size >= 0);
2879 set_hint_predicate (&inline_summaries->get (node)->array_index, array_index);
2880 time = time / CGRAPH_FREQ_BASE;
2881 free (order);
2883 if (nonconstant_names.exists () && !early)
2885 struct loop *loop;
2886 predicate loop_iterations = true_predicate ();
2887 predicate loop_stride = true_predicate ();
2889 if (dump_file && (dump_flags & TDF_DETAILS))
2890 flow_loops_dump (dump_file, NULL, 0);
2891 scev_initialize ();
2892 FOR_EACH_LOOP (loop, 0)
2894 vec<edge> exits;
2895 edge ex;
2896 unsigned int j;
2897 struct tree_niter_desc niter_desc;
2898 bb_predicate = *(struct predicate *) loop->header->aux;
2900 exits = get_loop_exit_edges (loop);
2901 FOR_EACH_VEC_ELT (exits, j, ex)
2902 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2903 && !is_gimple_min_invariant (niter_desc.niter))
2905 predicate will_be_nonconstant
2906 = will_be_nonconstant_expr_predicate (fbi.info, info,
2907 niter_desc.niter,
2908 nonconstant_names);
2909 if (!true_predicate_p (&will_be_nonconstant))
2910 will_be_nonconstant = and_predicates (info->conds,
2911 &bb_predicate,
2912 &will_be_nonconstant);
2913 if (!true_predicate_p (&will_be_nonconstant)
2914 && !false_predicate_p (&will_be_nonconstant))
2915 /* This is slightly inprecise. We may want to represent each
2916 loop with independent predicate. */
2917 loop_iterations =
2918 and_predicates (info->conds, &loop_iterations,
2919 &will_be_nonconstant);
2921 exits.release ();
2924 /* To avoid quadratic behavior we analyze stride predicates only
2925 with respect to the containing loop. Thus we simply iterate
2926 over all defs in the outermost loop body. */
2927 for (loop = loops_for_fn (cfun)->tree_root->inner;
2928 loop != NULL; loop = loop->next)
2930 basic_block *body = get_loop_body (loop);
2931 for (unsigned i = 0; i < loop->num_nodes; i++)
2933 gimple_stmt_iterator gsi;
2934 bb_predicate = *(struct predicate *) body[i]->aux;
2935 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2936 gsi_next (&gsi))
2938 gimple *stmt = gsi_stmt (gsi);
2940 if (!is_gimple_assign (stmt))
2941 continue;
2943 tree def = gimple_assign_lhs (stmt);
2944 if (TREE_CODE (def) != SSA_NAME)
2945 continue;
2947 affine_iv iv;
2948 if (!simple_iv (loop_containing_stmt (stmt),
2949 loop_containing_stmt (stmt),
2950 def, &iv, true)
2951 || is_gimple_min_invariant (iv.step))
2952 continue;
2954 predicate will_be_nonconstant
2955 = will_be_nonconstant_expr_predicate (fbi.info, info,
2956 iv.step,
2957 nonconstant_names);
2958 if (!true_predicate_p (&will_be_nonconstant))
2959 will_be_nonconstant
2960 = and_predicates (info->conds, &bb_predicate,
2961 &will_be_nonconstant);
2962 if (!true_predicate_p (&will_be_nonconstant)
2963 && !false_predicate_p (&will_be_nonconstant))
2964 /* This is slightly inprecise. We may want to represent
2965 each loop with independent predicate. */
2966 loop_stride = and_predicates (info->conds, &loop_stride,
2967 &will_be_nonconstant);
2970 free (body);
2972 set_hint_predicate (&inline_summaries->get (node)->loop_iterations,
2973 loop_iterations);
2974 set_hint_predicate (&inline_summaries->get (node)->loop_stride,
2975 loop_stride);
2976 scev_finalize ();
2978 FOR_ALL_BB_FN (bb, my_function)
2980 edge e;
2981 edge_iterator ei;
2983 if (bb->aux)
2984 edge_predicate_pool.remove ((predicate *)bb->aux);
2985 bb->aux = NULL;
2986 FOR_EACH_EDGE (e, ei, bb->succs)
2988 if (e->aux)
2989 edge_predicate_pool.remove ((predicate *) e->aux);
2990 e->aux = NULL;
2993 inline_summaries->get (node)->self_time = time;
2994 inline_summaries->get (node)->self_size = size;
2995 nonconstant_names.release ();
2996 ipa_release_body_info (&fbi);
2997 inline_update_overall_summary (node);
2998 if (opt_for_fn (node->decl, optimize))
3000 if (!early)
3001 loop_optimizer_finalize ();
3002 else if (!ipa_edge_args_sum)
3003 ipa_free_all_node_params ();
3004 free_dominance_info (CDI_DOMINATORS);
3006 if (dump_file)
3008 fprintf (dump_file, "\n");
3009 dump_inline_summary (dump_file, node);
3014 /* Compute parameters of functions used by inliner.
3015 EARLY is true when we compute parameters for the early inliner */
3017 void
3018 compute_inline_parameters (struct cgraph_node *node, bool early)
3020 HOST_WIDE_INT self_stack_size;
3021 struct cgraph_edge *e;
3022 struct inline_summary *info;
3024 gcc_assert (!node->global.inlined_to);
3026 inline_summary_alloc ();
3028 info = inline_summaries->get (node);
3029 reset_inline_summary (node, info);
3031 /* Estimate the stack size for the function if we're optimizing. */
3032 self_stack_size = optimize && !node->thunk.thunk_p
3033 ? estimated_stack_frame_size (node) : 0;
3034 info->estimated_self_stack_size = self_stack_size;
3035 info->estimated_stack_size = self_stack_size;
3036 info->stack_frame_offset = 0;
3038 if (node->thunk.thunk_p)
3040 struct inline_edge_summary *es = inline_edge_summary (node->callees);
3041 struct predicate t = true_predicate ();
3043 node->local.can_change_signature = false;
3044 es->call_stmt_size = eni_size_weights.call_cost;
3045 es->call_stmt_time = eni_time_weights.call_cost;
3046 account_size_time (info, INLINE_SIZE_SCALE * 2,
3047 2, &t, &t);
3048 t = not_inlined_predicate ();
3049 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t, &t);
3050 inline_update_overall_summary (node);
3051 info->self_size = info->size;
3052 info->self_time = info->time;
3053 /* We can not inline instrumentation clones. */
3054 if (node->thunk.add_pointer_bounds_args)
3056 info->inlinable = false;
3057 node->callees->inline_failed = CIF_CHKP;
3059 else
3060 info->inlinable = true;
3062 else
3064 /* Even is_gimple_min_invariant rely on current_function_decl. */
3065 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3067 /* Can this function be inlined at all? */
3068 if (!opt_for_fn (node->decl, optimize)
3069 && !lookup_attribute ("always_inline",
3070 DECL_ATTRIBUTES (node->decl)))
3071 info->inlinable = false;
3072 else
3073 info->inlinable = tree_inlinable_function_p (node->decl);
3075 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
3077 /* Type attributes can use parameter indices to describe them. */
3078 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
3079 node->local.can_change_signature = false;
3080 else
3082 /* Otherwise, inlinable functions always can change signature. */
3083 if (info->inlinable)
3084 node->local.can_change_signature = true;
3085 else
3087 /* Functions calling builtin_apply can not change signature. */
3088 for (e = node->callees; e; e = e->next_callee)
3090 tree cdecl = e->callee->decl;
3091 if (DECL_BUILT_IN (cdecl)
3092 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
3093 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
3094 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
3095 break;
3097 node->local.can_change_signature = !e;
3100 /* Functions called by instrumentation thunk can't change signature
3101 because instrumentation thunk modification is not supported. */
3102 if (node->local.can_change_signature)
3103 for (e = node->callers; e; e = e->next_caller)
3104 if (e->caller->thunk.thunk_p
3105 && e->caller->thunk.add_pointer_bounds_args)
3107 node->local.can_change_signature = false;
3108 break;
3110 estimate_function_body_sizes (node, early);
3111 pop_cfun ();
3113 for (e = node->callees; e; e = e->next_callee)
3114 if (e->callee->comdat_local_p ())
3115 break;
3116 node->calls_comdat_local = (e != NULL);
3118 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3119 info->time = info->self_time;
3120 info->size = info->self_size;
3121 info->stack_frame_offset = 0;
3122 info->estimated_stack_size = info->estimated_self_stack_size;
3124 /* Code above should compute exactly the same result as
3125 inline_update_overall_summary but because computation happens in
3126 different order the roundoff errors result in slight changes. */
3127 inline_update_overall_summary (node);
3128 gcc_assert (!(info->time - info->self_time).to_int ()
3129 && info->size == info->self_size);
3133 /* Compute parameters of functions used by inliner using
3134 current_function_decl. */
3136 static unsigned int
3137 compute_inline_parameters_for_current (void)
3139 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
3140 return 0;
3143 namespace {
3145 const pass_data pass_data_inline_parameters =
3147 GIMPLE_PASS, /* type */
3148 "inline_param", /* name */
3149 OPTGROUP_INLINE, /* optinfo_flags */
3150 TV_INLINE_PARAMETERS, /* tv_id */
3151 0, /* properties_required */
3152 0, /* properties_provided */
3153 0, /* properties_destroyed */
3154 0, /* todo_flags_start */
3155 0, /* todo_flags_finish */
3158 class pass_inline_parameters : public gimple_opt_pass
3160 public:
3161 pass_inline_parameters (gcc::context *ctxt)
3162 : gimple_opt_pass (pass_data_inline_parameters, ctxt)
3165 /* opt_pass methods: */
3166 opt_pass * clone () { return new pass_inline_parameters (m_ctxt); }
3167 virtual unsigned int execute (function *)
3169 return compute_inline_parameters_for_current ();
3172 }; // class pass_inline_parameters
3174 } // anon namespace
3176 gimple_opt_pass *
3177 make_pass_inline_parameters (gcc::context *ctxt)
3179 return new pass_inline_parameters (ctxt);
3183 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
3184 KNOWN_CONTEXTS and KNOWN_AGGS. */
3186 static bool
3187 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3188 int *size, int *time,
3189 vec<tree> known_vals,
3190 vec<ipa_polymorphic_call_context> known_contexts,
3191 vec<ipa_agg_jump_function_p> known_aggs)
3193 tree target;
3194 struct cgraph_node *callee;
3195 struct inline_summary *isummary;
3196 enum availability avail;
3197 bool speculative;
3199 if (!known_vals.exists () && !known_contexts.exists ())
3200 return false;
3201 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3202 return false;
3204 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
3205 known_aggs, &speculative);
3206 if (!target || speculative)
3207 return false;
3209 /* Account for difference in cost between indirect and direct calls. */
3210 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3211 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3212 gcc_checking_assert (*time >= 0);
3213 gcc_checking_assert (*size >= 0);
3215 callee = cgraph_node::get (target);
3216 if (!callee || !callee->definition)
3217 return false;
3218 callee = callee->function_symbol (&avail);
3219 if (avail < AVAIL_AVAILABLE)
3220 return false;
3221 isummary = inline_summaries->get (callee);
3222 return isummary->inlinable;
3225 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3226 handle edge E with probability PROB.
3227 Set HINTS if edge may be devirtualized.
3228 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
3229 site. */
3231 static inline void
3232 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3233 sreal *time,
3234 int prob,
3235 vec<tree> known_vals,
3236 vec<ipa_polymorphic_call_context> known_contexts,
3237 vec<ipa_agg_jump_function_p> known_aggs,
3238 inline_hints *hints)
3240 struct inline_edge_summary *es = inline_edge_summary (e);
3241 int call_size = es->call_stmt_size;
3242 int call_time = es->call_stmt_time;
3243 int cur_size;
3244 if (!e->callee
3245 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
3246 known_vals, known_contexts, known_aggs)
3247 && hints && e->maybe_hot_p ())
3248 *hints |= INLINE_HINT_indirect_call;
3249 cur_size = call_size * INLINE_SIZE_SCALE;
3250 *size += cur_size;
3251 if (min_size)
3252 *min_size += cur_size;
3253 if (prob == REG_BR_PROB_BASE)
3254 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
3255 else
3256 *time += ((sreal)call_time) * (prob * e->frequency)
3257 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
3262 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3263 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3264 describe context of the call site. */
3266 static void
3267 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3268 int *min_size, sreal *time,
3269 inline_hints *hints,
3270 clause_t possible_truths,
3271 vec<tree> known_vals,
3272 vec<ipa_polymorphic_call_context> known_contexts,
3273 vec<ipa_agg_jump_function_p> known_aggs)
3275 struct cgraph_edge *e;
3276 for (e = node->callees; e; e = e->next_callee)
3278 if (inline_edge_summary_vec.length () <= (unsigned) e->uid)
3279 continue;
3281 struct inline_edge_summary *es = inline_edge_summary (e);
3283 /* Do not care about zero sized builtins. */
3284 if (e->inline_failed && !es->call_stmt_size)
3286 gcc_checking_assert (!es->call_stmt_time);
3287 continue;
3289 if (!es->predicate
3290 || evaluate_predicate (es->predicate, possible_truths))
3292 if (e->inline_failed)
3294 /* Predicates of calls shall not use NOT_CHANGED codes,
3295 sowe do not need to compute probabilities. */
3296 estimate_edge_size_and_time (e, size,
3297 es->predicate ? NULL : min_size,
3298 time, REG_BR_PROB_BASE,
3299 known_vals, known_contexts,
3300 known_aggs, hints);
3302 else
3303 estimate_calls_size_and_time (e->callee, size, min_size, time,
3304 hints,
3305 possible_truths,
3306 known_vals, known_contexts,
3307 known_aggs);
3310 for (e = node->indirect_calls; e; e = e->next_callee)
3312 if (inline_edge_summary_vec.length () <= (unsigned) e->uid)
3313 continue;
3315 struct inline_edge_summary *es = inline_edge_summary (e);
3316 if (!es->predicate
3317 || evaluate_predicate (es->predicate, possible_truths))
3318 estimate_edge_size_and_time (e, size,
3319 es->predicate ? NULL : min_size,
3320 time, REG_BR_PROB_BASE,
3321 known_vals, known_contexts, known_aggs,
3322 hints);
3327 /* Estimate size and time needed to execute NODE assuming
3328 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3329 information about NODE's arguments. If non-NULL use also probability
3330 information present in INLINE_PARAM_SUMMARY vector.
3331 Additionally detemine hints determined by the context. Finally compute
3332 minimal size needed for the call that is independent on the call context and
3333 can be used for fast estimates. Return the values in RET_SIZE,
3334 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3336 static void
3337 estimate_node_size_and_time (struct cgraph_node *node,
3338 clause_t possible_truths,
3339 clause_t nonspec_possible_truths,
3340 vec<tree> known_vals,
3341 vec<ipa_polymorphic_call_context> known_contexts,
3342 vec<ipa_agg_jump_function_p> known_aggs,
3343 int *ret_size, int *ret_min_size,
3344 sreal *ret_time,
3345 sreal *ret_nonspecialized_time,
3346 inline_hints *ret_hints,
3347 vec<inline_param_summary>
3348 inline_param_summary)
3350 struct inline_summary *info = inline_summaries->get (node);
3351 size_time_entry *e;
3352 int size = 0;
3353 sreal time = 0;
3354 int min_size = 0;
3355 inline_hints hints = 0;
3356 int i;
3358 if (dump_file && (dump_flags & TDF_DETAILS))
3360 bool found = false;
3361 fprintf (dump_file, " Estimating body: %s/%i\n"
3362 " Known to be false: ", node->name (),
3363 node->order);
3365 for (i = predicate_not_inlined_condition;
3366 i < (predicate_first_dynamic_condition
3367 + (int) vec_safe_length (info->conds)); i++)
3368 if (!(possible_truths & (1 << i)))
3370 if (found)
3371 fprintf (dump_file, ", ");
3372 found = true;
3373 dump_condition (dump_file, info->conds, i);
3377 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
3378 known_vals, known_contexts, known_aggs);
3379 sreal nonspecialized_time = time;
3381 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3383 bool nonconst = evaluate_predicate (&e->nonconst_predicate,
3384 possible_truths);
3385 bool exec = evaluate_predicate (&e->exec_predicate,
3386 nonspec_possible_truths);
3387 gcc_assert (!nonconst || exec);
3388 if (exec)
3390 gcc_checking_assert (e->time >= 0);
3391 gcc_checking_assert (time >= 0);
3393 /* We compute specialized size only because size of nonspecialized
3394 copy is context independent.
3396 The difference between nonspecialized execution and specialized is
3397 that nonspecialized is not going to have optimized out computations
3398 known to be constant in a specialized setting. */
3399 if (nonconst)
3400 size += e->size;
3401 nonspecialized_time += e->time;
3402 if (!nonconst)
3404 else if (!inline_param_summary.exists ())
3406 if (nonconst)
3407 time += e->time;
3409 else
3411 int prob = predicate_probability (info->conds,
3412 &e->nonconst_predicate,
3413 possible_truths,
3414 inline_param_summary);
3415 gcc_checking_assert (prob >= 0);
3416 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3417 time += e->time * prob / REG_BR_PROB_BASE;
3419 gcc_checking_assert (time >= 0);
3422 gcc_checking_assert (true_predicate_p (&(*info->entry)[0].exec_predicate));
3423 gcc_checking_assert (true_predicate_p (&(*info->entry)[0].nonconst_predicate));
3424 min_size = (*info->entry)[0].size;
3425 gcc_checking_assert (size >= 0);
3426 gcc_checking_assert (time >= 0);
3427 /* nonspecialized_time should be always bigger than specialized time.
3428 Roundoff issues however may get into the way. */
3429 gcc_checking_assert ((nonspecialized_time - time) >= -1);
3431 /* Roundoff issues may make specialized time bigger than nonspecialized
3432 time. We do not really want that to happen because some heurstics
3433 may get confused by seeing negative speedups. */
3434 if (time > nonspecialized_time)
3435 time = nonspecialized_time;
3437 if (info->loop_iterations
3438 && !evaluate_predicate (info->loop_iterations, possible_truths))
3439 hints |= INLINE_HINT_loop_iterations;
3440 if (info->loop_stride
3441 && !evaluate_predicate (info->loop_stride, possible_truths))
3442 hints |= INLINE_HINT_loop_stride;
3443 if (info->array_index
3444 && !evaluate_predicate (info->array_index, possible_truths))
3445 hints |= INLINE_HINT_array_index;
3446 if (info->scc_no)
3447 hints |= INLINE_HINT_in_scc;
3448 if (DECL_DECLARED_INLINE_P (node->decl))
3449 hints |= INLINE_HINT_declared_inline;
3451 size = RDIV (size, INLINE_SIZE_SCALE);
3452 min_size = RDIV (min_size, INLINE_SIZE_SCALE);
3454 if (dump_file && (dump_flags & TDF_DETAILS))
3455 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3456 time.to_double (), nonspecialized_time.to_double ());
3457 if (ret_time)
3458 *ret_time = time;
3459 if (ret_nonspecialized_time)
3460 *ret_nonspecialized_time = nonspecialized_time;
3461 if (ret_size)
3462 *ret_size = size;
3463 if (ret_min_size)
3464 *ret_min_size = min_size;
3465 if (ret_hints)
3466 *ret_hints = hints;
3467 return;
3471 /* Estimate size and time needed to execute callee of EDGE assuming that
3472 parameters known to be constant at caller of EDGE are propagated.
3473 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3474 and types for parameters. */
3476 void
3477 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3478 vec<tree> known_vals,
3479 vec<ipa_polymorphic_call_context>
3480 known_contexts,
3481 vec<ipa_agg_jump_function_p> known_aggs,
3482 int *ret_size, sreal *ret_time,
3483 sreal *ret_nonspec_time,
3484 inline_hints *hints)
3486 clause_t clause, nonspec_clause;
3488 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
3489 &clause, &nonspec_clause);
3490 estimate_node_size_and_time (node, clause, nonspec_clause,
3491 known_vals, known_contexts,
3492 known_aggs, ret_size, NULL, ret_time,
3493 ret_nonspec_time, hints, vNULL);
3496 /* Translate all conditions from callee representation into caller
3497 representation and symbolically evaluate predicate P into new predicate.
3499 INFO is inline_summary of function we are adding predicate into, CALLEE_INFO
3500 is summary of function predicate P is from. OPERAND_MAP is array giving
3501 callee formal IDs the caller formal IDs. POSSSIBLE_TRUTHS is clausule of all
3502 callee conditions that may be true in caller context. TOPLEV_PREDICATE is
3503 predicate under which callee is executed. OFFSET_MAP is an array of of
3504 offsets that need to be added to conditions, negative offset means that
3505 conditions relying on values passed by reference have to be discarded
3506 because they might not be preserved (and should be considered offset zero
3507 for other purposes). */
3509 static struct predicate
3510 remap_predicate (struct inline_summary *info,
3511 struct inline_summary *callee_info,
3512 struct predicate *p,
3513 vec<int> operand_map,
3514 vec<int> offset_map,
3515 clause_t possible_truths, struct predicate *toplev_predicate)
3517 int i;
3518 struct predicate out = true_predicate ();
3520 /* True predicate is easy. */
3521 if (true_predicate_p (p))
3522 return *toplev_predicate;
3523 for (i = 0; p->clause[i]; i++)
3525 clause_t clause = p->clause[i];
3526 int cond;
3527 struct predicate clause_predicate = false_predicate ();
3529 gcc_assert (i < MAX_CLAUSES);
3531 for (cond = 0; cond < NUM_CONDITIONS; cond++)
3532 /* Do we have condition we can't disprove? */
3533 if (clause & possible_truths & (1 << cond))
3535 struct predicate cond_predicate;
3536 /* Work out if the condition can translate to predicate in the
3537 inlined function. */
3538 if (cond >= predicate_first_dynamic_condition)
3540 struct condition *c;
3542 c = &(*callee_info->conds)[cond
3544 predicate_first_dynamic_condition];
3545 /* See if we can remap condition operand to caller's operand.
3546 Otherwise give up. */
3547 if (!operand_map.exists ()
3548 || (int) operand_map.length () <= c->operand_num
3549 || operand_map[c->operand_num] == -1
3550 /* TODO: For non-aggregate conditions, adding an offset is
3551 basically an arithmetic jump function processing which
3552 we should support in future. */
3553 || ((!c->agg_contents || !c->by_ref)
3554 && offset_map[c->operand_num] > 0)
3555 || (c->agg_contents && c->by_ref
3556 && offset_map[c->operand_num] < 0))
3557 cond_predicate = true_predicate ();
3558 else
3560 struct agg_position_info ap;
3561 HOST_WIDE_INT offset_delta = offset_map[c->operand_num];
3562 if (offset_delta < 0)
3564 gcc_checking_assert (!c->agg_contents || !c->by_ref);
3565 offset_delta = 0;
3567 gcc_assert (!c->agg_contents
3568 || c->by_ref || offset_delta == 0);
3569 ap.offset = c->offset + offset_delta;
3570 ap.agg_contents = c->agg_contents;
3571 ap.by_ref = c->by_ref;
3572 cond_predicate = add_condition (info,
3573 operand_map[c->operand_num],
3574 c->size, &ap, c->code,
3575 c->val);
3578 /* Fixed conditions remains same, construct single
3579 condition predicate. */
3580 else
3582 cond_predicate.clause[0] = 1 << cond;
3583 cond_predicate.clause[1] = 0;
3585 clause_predicate = or_predicates (info->conds, &clause_predicate,
3586 &cond_predicate);
3588 out = and_predicates (info->conds, &out, &clause_predicate);
3590 return and_predicates (info->conds, &out, toplev_predicate);
3594 /* Update summary information of inline clones after inlining.
3595 Compute peak stack usage. */
3597 static void
3598 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3600 struct cgraph_edge *e;
3601 struct inline_summary *callee_info = inline_summaries->get (node);
3602 struct inline_summary *caller_info = inline_summaries->get (node->callers->caller);
3603 HOST_WIDE_INT peak;
3605 callee_info->stack_frame_offset
3606 = caller_info->stack_frame_offset
3607 + caller_info->estimated_self_stack_size;
3608 peak = callee_info->stack_frame_offset
3609 + callee_info->estimated_self_stack_size;
3610 if (inline_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
3611 inline_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
3612 ipa_propagate_frequency (node);
3613 for (e = node->callees; e; e = e->next_callee)
3615 if (!e->inline_failed)
3616 inline_update_callee_summaries (e->callee, depth);
3617 inline_edge_summary (e)->loop_depth += depth;
3619 for (e = node->indirect_calls; e; e = e->next_callee)
3620 inline_edge_summary (e)->loop_depth += depth;
3623 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3624 When functoin A is inlined in B and A calls C with parameter that
3625 changes with probability PROB1 and C is known to be passthroug
3626 of argument if B that change with probability PROB2, the probability
3627 of change is now PROB1*PROB2. */
3629 static void
3630 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3631 struct cgraph_edge *edge)
3633 if (ipa_node_params_sum)
3635 int i;
3636 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3637 struct inline_edge_summary *es = inline_edge_summary (edge);
3638 struct inline_edge_summary *inlined_es
3639 = inline_edge_summary (inlined_edge);
3641 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3643 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3644 if (jfunc->type == IPA_JF_PASS_THROUGH
3645 || jfunc->type == IPA_JF_ANCESTOR)
3647 int id = jfunc->type == IPA_JF_PASS_THROUGH
3648 ? ipa_get_jf_pass_through_formal_id (jfunc)
3649 : ipa_get_jf_ancestor_formal_id (jfunc);
3650 if (id < (int) inlined_es->param.length ())
3652 int prob1 = es->param[i].change_prob;
3653 int prob2 = inlined_es->param[id].change_prob;
3654 int prob = combine_probabilities (prob1, prob2);
3656 if (prob1 && prob2 && !prob)
3657 prob = 1;
3659 es->param[i].change_prob = prob;
3666 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3668 Remap predicates of callees of NODE. Rest of arguments match
3669 remap_predicate.
3671 Also update change probabilities. */
3673 static void
3674 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3675 struct cgraph_node *node,
3676 struct inline_summary *info,
3677 struct inline_summary *callee_info,
3678 vec<int> operand_map,
3679 vec<int> offset_map,
3680 clause_t possible_truths,
3681 struct predicate *toplev_predicate)
3683 struct cgraph_edge *e, *next;
3684 for (e = node->callees; e; e = next)
3686 struct inline_edge_summary *es = inline_edge_summary (e);
3687 struct predicate p;
3688 next = e->next_callee;
3690 if (e->inline_failed)
3692 remap_edge_change_prob (inlined_edge, e);
3694 if (es->predicate)
3696 p = remap_predicate (info, callee_info,
3697 es->predicate, operand_map, offset_map,
3698 possible_truths, toplev_predicate);
3699 edge_set_predicate (e, &p);
3701 else
3702 edge_set_predicate (e, toplev_predicate);
3704 else
3705 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
3706 operand_map, offset_map, possible_truths,
3707 toplev_predicate);
3709 for (e = node->indirect_calls; e; e = next)
3711 struct inline_edge_summary *es = inline_edge_summary (e);
3712 struct predicate p;
3713 next = e->next_callee;
3715 remap_edge_change_prob (inlined_edge, e);
3716 if (es->predicate)
3718 p = remap_predicate (info, callee_info,
3719 es->predicate, operand_map, offset_map,
3720 possible_truths, toplev_predicate);
3721 edge_set_predicate (e, &p);
3723 else
3724 edge_set_predicate (e, toplev_predicate);
3728 /* Same as remap_predicate, but set result into hint *HINT. */
3730 static void
3731 remap_hint_predicate (struct inline_summary *info,
3732 struct inline_summary *callee_info,
3733 struct predicate **hint,
3734 vec<int> operand_map,
3735 vec<int> offset_map,
3736 clause_t possible_truths,
3737 struct predicate *toplev_predicate)
3739 predicate p;
3741 if (!*hint)
3742 return;
3743 p = remap_predicate (info, callee_info,
3744 *hint,
3745 operand_map, offset_map,
3746 possible_truths, toplev_predicate);
3747 if (!false_predicate_p (&p) && !true_predicate_p (&p))
3749 if (!*hint)
3750 set_hint_predicate (hint, p);
3751 else
3752 **hint = and_predicates (info->conds, *hint, &p);
3756 /* We inlined EDGE. Update summary of the function we inlined into. */
3758 void
3759 inline_merge_summary (struct cgraph_edge *edge)
3761 struct inline_summary *callee_info = inline_summaries->get (edge->callee);
3762 struct cgraph_node *to = (edge->caller->global.inlined_to
3763 ? edge->caller->global.inlined_to : edge->caller);
3764 struct inline_summary *info = inline_summaries->get (to);
3765 clause_t clause = 0; /* not_inline is known to be false. */
3766 size_time_entry *e;
3767 vec<int> operand_map = vNULL;
3768 vec<int> offset_map = vNULL;
3769 int i;
3770 struct predicate toplev_predicate;
3771 struct predicate true_p = true_predicate ();
3772 struct inline_edge_summary *es = inline_edge_summary (edge);
3774 if (es->predicate)
3775 toplev_predicate = *es->predicate;
3776 else
3777 toplev_predicate = true_predicate ();
3779 info->fp_expressions |= callee_info->fp_expressions;
3781 if (callee_info->conds)
3782 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3783 if (ipa_node_params_sum && callee_info->conds)
3785 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3786 int count = ipa_get_cs_argument_count (args);
3787 int i;
3789 if (count)
3791 operand_map.safe_grow_cleared (count);
3792 offset_map.safe_grow_cleared (count);
3794 for (i = 0; i < count; i++)
3796 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3797 int map = -1;
3799 /* TODO: handle non-NOPs when merging. */
3800 if (jfunc->type == IPA_JF_PASS_THROUGH)
3802 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3803 map = ipa_get_jf_pass_through_formal_id (jfunc);
3804 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3805 offset_map[i] = -1;
3807 else if (jfunc->type == IPA_JF_ANCESTOR)
3809 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3810 if (offset >= 0 && offset < INT_MAX)
3812 map = ipa_get_jf_ancestor_formal_id (jfunc);
3813 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3814 offset = -1;
3815 offset_map[i] = offset;
3818 operand_map[i] = map;
3819 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3822 for (i = 0; vec_safe_iterate (callee_info->entry, i, &e); i++)
3824 struct predicate p = remap_predicate (info, callee_info,
3825 &e->exec_predicate, operand_map,
3826 offset_map, clause,
3827 &toplev_predicate);
3828 struct predicate nonconstp
3829 = remap_predicate (info, callee_info,
3830 &e->nonconst_predicate, operand_map,
3831 offset_map, clause,
3832 &toplev_predicate);
3833 if (!false_predicate_p (&p) && !false_predicate_p (&nonconstp))
3835 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3836 int prob = predicate_probability (callee_info->conds,
3837 &e->nonconst_predicate,
3838 clause, es->param);
3839 add_time = add_time * prob / REG_BR_PROB_BASE;
3840 if (prob != REG_BR_PROB_BASE
3841 && dump_file && (dump_flags & TDF_DETAILS))
3843 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3844 (double) prob / REG_BR_PROB_BASE);
3846 account_size_time (info, e->size, add_time, &p, &nonconstp);
3849 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3850 offset_map, clause, &toplev_predicate);
3851 remap_hint_predicate (info, callee_info,
3852 &callee_info->loop_iterations,
3853 operand_map, offset_map, clause, &toplev_predicate);
3854 remap_hint_predicate (info, callee_info,
3855 &callee_info->loop_stride,
3856 operand_map, offset_map, clause, &toplev_predicate);
3857 remap_hint_predicate (info, callee_info,
3858 &callee_info->array_index,
3859 operand_map, offset_map, clause, &toplev_predicate);
3861 inline_update_callee_summaries (edge->callee,
3862 inline_edge_summary (edge)->loop_depth);
3864 /* We do not maintain predicates of inlined edges, free it. */
3865 edge_set_predicate (edge, &true_p);
3866 /* Similarly remove param summaries. */
3867 es->param.release ();
3868 operand_map.release ();
3869 offset_map.release ();
3872 /* For performance reasons inline_merge_summary is not updating overall size
3873 and time. Recompute it. */
3875 void
3876 inline_update_overall_summary (struct cgraph_node *node)
3878 struct inline_summary *info = inline_summaries->get (node);
3879 size_time_entry *e;
3880 int i;
3882 info->size = 0;
3883 info->time = 0;
3884 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3886 info->size += e->size;
3887 info->time += e->time;
3889 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3890 &info->time, NULL,
3891 ~(clause_t) (1 << predicate_false_condition),
3892 vNULL, vNULL, vNULL);
3893 info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
3896 /* Return hints derrived from EDGE. */
3898 simple_edge_hints (struct cgraph_edge *edge)
3900 int hints = 0;
3901 struct cgraph_node *to = (edge->caller->global.inlined_to
3902 ? edge->caller->global.inlined_to : edge->caller);
3903 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
3904 if (inline_summaries->get (to)->scc_no
3905 && inline_summaries->get (to)->scc_no
3906 == inline_summaries->get (callee)->scc_no
3907 && !edge->recursive_p ())
3908 hints |= INLINE_HINT_same_scc;
3910 if (callee->lto_file_data && edge->caller->lto_file_data
3911 && edge->caller->lto_file_data != callee->lto_file_data
3912 && !callee->merged_comdat && !callee->icf_merged)
3913 hints |= INLINE_HINT_cross_module;
3915 return hints;
3918 /* Estimate the time cost for the caller when inlining EDGE.
3919 Only to be called via estimate_edge_time, that handles the
3920 caching mechanism.
3922 When caching, also update the cache entry. Compute both time and
3923 size, since we always need both metrics eventually. */
3925 sreal
3926 do_estimate_edge_time (struct cgraph_edge *edge)
3928 sreal time, nonspec_time;
3929 int size;
3930 inline_hints hints;
3931 struct cgraph_node *callee;
3932 clause_t clause, nonspec_clause;
3933 vec<tree> known_vals;
3934 vec<ipa_polymorphic_call_context> known_contexts;
3935 vec<ipa_agg_jump_function_p> known_aggs;
3936 struct inline_edge_summary *es = inline_edge_summary (edge);
3937 int min_size;
3939 callee = edge->callee->ultimate_alias_target ();
3941 gcc_checking_assert (edge->inline_failed);
3942 evaluate_properties_for_edge (edge, true,
3943 &clause, &nonspec_clause, &known_vals,
3944 &known_contexts, &known_aggs);
3945 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3946 known_contexts, known_aggs, &size, &min_size,
3947 &time, &nonspec_time, &hints, es->param);
3949 /* When we have profile feedback, we can quite safely identify hot
3950 edges and for those we disable size limits. Don't do that when
3951 probability that caller will call the callee is low however, since it
3952 may hurt optimization of the caller's hot path. */
3953 if (edge->count && edge->maybe_hot_p ()
3954 && (edge->count * 2
3955 > (edge->caller->global.inlined_to
3956 ? edge->caller->global.inlined_to->count : edge->caller->count)))
3957 hints |= INLINE_HINT_known_hot;
3959 known_vals.release ();
3960 known_contexts.release ();
3961 known_aggs.release ();
3962 gcc_checking_assert (size >= 0);
3963 gcc_checking_assert (time >= 0);
3965 /* When caching, update the cache entry. */
3966 if (edge_growth_cache.exists ())
3968 inline_summaries->get (edge->callee)->min_size = min_size;
3969 if ((int) edge_growth_cache.length () <= edge->uid)
3970 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
3971 edge_growth_cache[edge->uid].time = time;
3972 edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
3974 edge_growth_cache[edge->uid].size = size + (size >= 0);
3975 hints |= simple_edge_hints (edge);
3976 edge_growth_cache[edge->uid].hints = hints + 1;
3978 return time;
3982 /* Return estimated callee growth after inlining EDGE.
3983 Only to be called via estimate_edge_size. */
3986 do_estimate_edge_size (struct cgraph_edge *edge)
3988 int size;
3989 struct cgraph_node *callee;
3990 clause_t clause, nonspec_clause;
3991 vec<tree> known_vals;
3992 vec<ipa_polymorphic_call_context> known_contexts;
3993 vec<ipa_agg_jump_function_p> known_aggs;
3995 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3997 if (edge_growth_cache.exists ())
3999 do_estimate_edge_time (edge);
4000 size = edge_growth_cache[edge->uid].size;
4001 gcc_checking_assert (size);
4002 return size - (size > 0);
4005 callee = edge->callee->ultimate_alias_target ();
4007 /* Early inliner runs without caching, go ahead and do the dirty work. */
4008 gcc_checking_assert (edge->inline_failed);
4009 evaluate_properties_for_edge (edge, true,
4010 &clause, &nonspec_clause,
4011 &known_vals, &known_contexts,
4012 &known_aggs);
4013 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
4014 known_contexts, known_aggs, &size, NULL, NULL,
4015 NULL, NULL, vNULL);
4016 known_vals.release ();
4017 known_contexts.release ();
4018 known_aggs.release ();
4019 return size;
4023 /* Estimate the growth of the caller when inlining EDGE.
4024 Only to be called via estimate_edge_size. */
4026 inline_hints
4027 do_estimate_edge_hints (struct cgraph_edge *edge)
4029 inline_hints hints;
4030 struct cgraph_node *callee;
4031 clause_t clause, nonspec_clause;
4032 vec<tree> known_vals;
4033 vec<ipa_polymorphic_call_context> known_contexts;
4034 vec<ipa_agg_jump_function_p> known_aggs;
4036 /* When we do caching, use do_estimate_edge_time to populate the entry. */
4038 if (edge_growth_cache.exists ())
4040 do_estimate_edge_time (edge);
4041 hints = edge_growth_cache[edge->uid].hints;
4042 gcc_checking_assert (hints);
4043 return hints - 1;
4046 callee = edge->callee->ultimate_alias_target ();
4048 /* Early inliner runs without caching, go ahead and do the dirty work. */
4049 gcc_checking_assert (edge->inline_failed);
4050 evaluate_properties_for_edge (edge, true,
4051 &clause, &nonspec_clause,
4052 &known_vals, &known_contexts,
4053 &known_aggs);
4054 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
4055 known_contexts, known_aggs, NULL, NULL,
4056 NULL, NULL, &hints, vNULL);
4057 known_vals.release ();
4058 known_contexts.release ();
4059 known_aggs.release ();
4060 hints |= simple_edge_hints (edge);
4061 return hints;
4064 /* Estimate the size of NODE after inlining EDGE which should be an
4065 edge to either NODE or a call inlined into NODE. */
4068 estimate_size_after_inlining (struct cgraph_node *node,
4069 struct cgraph_edge *edge)
4071 struct inline_edge_summary *es = inline_edge_summary (edge);
4072 if (!es->predicate || !false_predicate_p (es->predicate))
4074 int size = inline_summaries->get (node)->size + estimate_edge_growth (edge);
4075 gcc_assert (size >= 0);
4076 return size;
4078 return inline_summaries->get (node)->size;
4082 struct growth_data
4084 struct cgraph_node *node;
4085 bool self_recursive;
4086 bool uninlinable;
4087 int growth;
4091 /* Worker for do_estimate_growth. Collect growth for all callers. */
4093 static bool
4094 do_estimate_growth_1 (struct cgraph_node *node, void *data)
4096 struct cgraph_edge *e;
4097 struct growth_data *d = (struct growth_data *) data;
4099 for (e = node->callers; e; e = e->next_caller)
4101 gcc_checking_assert (e->inline_failed);
4103 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
4105 d->uninlinable = true;
4106 continue;
4109 if (e->recursive_p ())
4111 d->self_recursive = true;
4112 continue;
4114 d->growth += estimate_edge_growth (e);
4116 return false;
4120 /* Estimate the growth caused by inlining NODE into all callees. */
4123 estimate_growth (struct cgraph_node *node)
4125 struct growth_data d = { node, false, false, 0 };
4126 struct inline_summary *info = inline_summaries->get (node);
4128 node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
4130 /* For self recursive functions the growth estimation really should be
4131 infinity. We don't want to return very large values because the growth
4132 plays various roles in badness computation fractions. Be sure to not
4133 return zero or negative growths. */
4134 if (d.self_recursive)
4135 d.growth = d.growth < info->size ? info->size : d.growth;
4136 else if (DECL_EXTERNAL (node->decl) || d.uninlinable)
4138 else
4140 if (node->will_be_removed_from_program_if_no_direct_calls_p ())
4141 d.growth -= info->size;
4142 /* COMDAT functions are very often not shared across multiple units
4143 since they come from various template instantiations.
4144 Take this into account. */
4145 else if (DECL_COMDAT (node->decl)
4146 && node->can_remove_if_no_direct_calls_p ())
4147 d.growth -= (info->size
4148 * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
4149 + 50) / 100;
4152 return d.growth;
4155 /* Verify if there are fewer than MAX_CALLERS. */
4157 static bool
4158 check_callers (cgraph_node *node, int *max_callers)
4160 ipa_ref *ref;
4162 if (!node->can_remove_if_no_direct_calls_and_refs_p ())
4163 return true;
4165 for (cgraph_edge *e = node->callers; e; e = e->next_caller)
4167 (*max_callers)--;
4168 if (!*max_callers
4169 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
4170 return true;
4173 FOR_EACH_ALIAS (node, ref)
4174 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), max_callers))
4175 return true;
4177 return false;
4181 /* Make cheap estimation if growth of NODE is likely positive knowing
4182 EDGE_GROWTH of one particular edge.
4183 We assume that most of other edges will have similar growth
4184 and skip computation if there are too many callers. */
4186 bool
4187 growth_likely_positive (struct cgraph_node *node,
4188 int edge_growth)
4190 int max_callers;
4191 struct cgraph_edge *e;
4192 gcc_checking_assert (edge_growth > 0);
4194 /* First quickly check if NODE is removable at all. */
4195 if (DECL_EXTERNAL (node->decl))
4196 return true;
4197 if (!node->can_remove_if_no_direct_calls_and_refs_p ()
4198 || node->address_taken)
4199 return true;
4201 max_callers = inline_summaries->get (node)->size * 4 / edge_growth + 2;
4203 for (e = node->callers; e; e = e->next_caller)
4205 max_callers--;
4206 if (!max_callers
4207 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
4208 return true;
4211 ipa_ref *ref;
4212 FOR_EACH_ALIAS (node, ref)
4213 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), &max_callers))
4214 return true;
4216 /* Unlike for functions called once, we play unsafe with
4217 COMDATs. We can allow that since we know functions
4218 in consideration are small (and thus risk is small) and
4219 moreover grow estimates already accounts that COMDAT
4220 functions may or may not disappear when eliminated from
4221 current unit. With good probability making aggressive
4222 choice in all units is going to make overall program
4223 smaller. */
4224 if (DECL_COMDAT (node->decl))
4226 if (!node->can_remove_if_no_direct_calls_p ())
4227 return true;
4229 else if (!node->will_be_removed_from_program_if_no_direct_calls_p ())
4230 return true;
4232 return estimate_growth (node) > 0;
4236 /* This function performs intraprocedural analysis in NODE that is required to
4237 inline indirect calls. */
4239 static void
4240 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4242 ipa_analyze_node (node);
4243 if (dump_file && (dump_flags & TDF_DETAILS))
4245 ipa_print_node_params (dump_file, node);
4246 ipa_print_node_jump_functions (dump_file, node);
4251 /* Note function body size. */
4253 void
4254 inline_analyze_function (struct cgraph_node *node)
4256 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4258 if (dump_file)
4259 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
4260 node->name (), node->order);
4261 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
4262 inline_indirect_intraprocedural_analysis (node);
4263 compute_inline_parameters (node, false);
4264 if (!optimize)
4266 struct cgraph_edge *e;
4267 for (e = node->callees; e; e = e->next_callee)
4268 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4269 for (e = node->indirect_calls; e; e = e->next_callee)
4270 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4273 pop_cfun ();
4277 /* Called when new function is inserted to callgraph late. */
4279 void
4280 inline_summary_t::insert (struct cgraph_node *node, inline_summary *)
4282 inline_analyze_function (node);
4285 /* Note function body size. */
4287 void
4288 inline_generate_summary (void)
4290 struct cgraph_node *node;
4292 FOR_EACH_DEFINED_FUNCTION (node)
4293 if (DECL_STRUCT_FUNCTION (node->decl))
4294 node->local.versionable = tree_versionable_function_p (node->decl);
4296 /* When not optimizing, do not bother to analyze. Inlining is still done
4297 because edge redirection needs to happen there. */
4298 if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa)
4299 return;
4301 if (!inline_summaries)
4302 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
4304 inline_summaries->enable_insertion_hook ();
4306 ipa_register_cgraph_hooks ();
4307 inline_free_summary ();
4309 FOR_EACH_DEFINED_FUNCTION (node)
4310 if (!node->alias)
4311 inline_analyze_function (node);
4315 /* Read predicate from IB. */
4317 static struct predicate
4318 read_predicate (struct lto_input_block *ib)
4320 struct predicate out;
4321 clause_t clause;
4322 int k = 0;
4326 gcc_assert (k <= MAX_CLAUSES);
4327 clause = out.clause[k++] = streamer_read_uhwi (ib);
4329 while (clause);
4331 /* Zero-initialize the remaining clauses in OUT. */
4332 while (k <= MAX_CLAUSES)
4333 out.clause[k++] = 0;
4335 return out;
4339 /* Write inline summary for edge E to OB. */
4341 static void
4342 read_inline_edge_summary (struct lto_input_block *ib, struct cgraph_edge *e)
4344 struct inline_edge_summary *es = inline_edge_summary (e);
4345 struct predicate p;
4346 int length, i;
4348 es->call_stmt_size = streamer_read_uhwi (ib);
4349 es->call_stmt_time = streamer_read_uhwi (ib);
4350 es->loop_depth = streamer_read_uhwi (ib);
4351 p = read_predicate (ib);
4352 edge_set_predicate (e, &p);
4353 length = streamer_read_uhwi (ib);
4354 if (length)
4356 es->param.safe_grow_cleared (length);
4357 for (i = 0; i < length; i++)
4358 es->param[i].change_prob = streamer_read_uhwi (ib);
4363 /* Stream in inline summaries from the section. */
4365 static void
4366 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4367 size_t len)
4369 const struct lto_function_header *header =
4370 (const struct lto_function_header *) data;
4371 const int cfg_offset = sizeof (struct lto_function_header);
4372 const int main_offset = cfg_offset + header->cfg_size;
4373 const int string_offset = main_offset + header->main_size;
4374 struct data_in *data_in;
4375 unsigned int i, count2, j;
4376 unsigned int f_count;
4378 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4379 file_data->mode_table);
4381 data_in =
4382 lto_data_in_create (file_data, (const char *) data + string_offset,
4383 header->string_size, vNULL);
4384 f_count = streamer_read_uhwi (&ib);
4385 for (i = 0; i < f_count; i++)
4387 unsigned int index;
4388 struct cgraph_node *node;
4389 struct inline_summary *info;
4390 lto_symtab_encoder_t encoder;
4391 struct bitpack_d bp;
4392 struct cgraph_edge *e;
4393 predicate p;
4395 index = streamer_read_uhwi (&ib);
4396 encoder = file_data->symtab_node_encoder;
4397 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4398 index));
4399 info = inline_summaries->get (node);
4401 info->estimated_stack_size
4402 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
4403 info->size = info->self_size = streamer_read_uhwi (&ib);
4404 info->time = info->self_time = sreal::stream_in (&ib);
4406 bp = streamer_read_bitpack (&ib);
4407 info->inlinable = bp_unpack_value (&bp, 1);
4408 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
4409 info->fp_expressions = bp_unpack_value (&bp, 1);
4411 count2 = streamer_read_uhwi (&ib);
4412 gcc_assert (!info->conds);
4413 for (j = 0; j < count2; j++)
4415 struct condition c;
4416 c.operand_num = streamer_read_uhwi (&ib);
4417 c.size = streamer_read_uhwi (&ib);
4418 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4419 c.val = stream_read_tree (&ib, data_in);
4420 bp = streamer_read_bitpack (&ib);
4421 c.agg_contents = bp_unpack_value (&bp, 1);
4422 c.by_ref = bp_unpack_value (&bp, 1);
4423 if (c.agg_contents)
4424 c.offset = streamer_read_uhwi (&ib);
4425 vec_safe_push (info->conds, c);
4427 count2 = streamer_read_uhwi (&ib);
4428 gcc_assert (!info->entry);
4429 for (j = 0; j < count2; j++)
4431 struct size_time_entry e;
4433 e.size = streamer_read_uhwi (&ib);
4434 e.time = sreal::stream_in (&ib);
4435 e.exec_predicate = read_predicate (&ib);
4436 e.nonconst_predicate = read_predicate (&ib);
4438 vec_safe_push (info->entry, e);
4441 p = read_predicate (&ib);
4442 set_hint_predicate (&info->loop_iterations, p);
4443 p = read_predicate (&ib);
4444 set_hint_predicate (&info->loop_stride, p);
4445 p = read_predicate (&ib);
4446 set_hint_predicate (&info->array_index, p);
4447 for (e = node->callees; e; e = e->next_callee)
4448 read_inline_edge_summary (&ib, e);
4449 for (e = node->indirect_calls; e; e = e->next_callee)
4450 read_inline_edge_summary (&ib, e);
4453 lto_free_section_data (file_data, LTO_section_inline_summary, NULL, data,
4454 len);
4455 lto_data_in_delete (data_in);
4459 /* Read inline summary. Jump functions are shared among ipa-cp
4460 and inliner, so when ipa-cp is active, we don't need to write them
4461 twice. */
4463 void
4464 inline_read_summary (void)
4466 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4467 struct lto_file_decl_data *file_data;
4468 unsigned int j = 0;
4470 inline_summary_alloc ();
4472 while ((file_data = file_data_vec[j++]))
4474 size_t len;
4475 const char *data = lto_get_section_data (file_data,
4476 LTO_section_inline_summary,
4477 NULL, &len);
4478 if (data)
4479 inline_read_section (file_data, data, len);
4480 else
4481 /* Fatal error here. We do not want to support compiling ltrans units
4482 with different version of compiler or different flags than the WPA
4483 unit, so this should never happen. */
4484 fatal_error (input_location,
4485 "ipa inline summary is missing in input file");
4487 if (optimize)
4489 ipa_register_cgraph_hooks ();
4490 if (!flag_ipa_cp)
4491 ipa_prop_read_jump_functions ();
4494 gcc_assert (inline_summaries);
4495 inline_summaries->enable_insertion_hook ();
4499 /* Write predicate P to OB. */
4501 static void
4502 write_predicate (struct output_block *ob, struct predicate *p)
4504 int j;
4505 if (p)
4506 for (j = 0; p->clause[j]; j++)
4508 gcc_assert (j < MAX_CLAUSES);
4509 streamer_write_uhwi (ob, p->clause[j]);
4511 streamer_write_uhwi (ob, 0);
4515 /* Write inline summary for edge E to OB. */
4517 static void
4518 write_inline_edge_summary (struct output_block *ob, struct cgraph_edge *e)
4520 struct inline_edge_summary *es = inline_edge_summary (e);
4521 int i;
4523 streamer_write_uhwi (ob, es->call_stmt_size);
4524 streamer_write_uhwi (ob, es->call_stmt_time);
4525 streamer_write_uhwi (ob, es->loop_depth);
4526 write_predicate (ob, es->predicate);
4527 streamer_write_uhwi (ob, es->param.length ());
4528 for (i = 0; i < (int) es->param.length (); i++)
4529 streamer_write_uhwi (ob, es->param[i].change_prob);
4533 /* Write inline summary for node in SET.
4534 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4535 active, we don't need to write them twice. */
4537 void
4538 inline_write_summary (void)
4540 struct output_block *ob = create_output_block (LTO_section_inline_summary);
4541 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4542 unsigned int count = 0;
4543 int i;
4545 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4547 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4548 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4549 if (cnode && cnode->definition && !cnode->alias)
4550 count++;
4552 streamer_write_uhwi (ob, count);
4554 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4556 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4557 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4558 if (cnode && cnode->definition && !cnode->alias)
4560 struct inline_summary *info = inline_summaries->get (cnode);
4561 struct bitpack_d bp;
4562 struct cgraph_edge *edge;
4563 int i;
4564 size_time_entry *e;
4565 struct condition *c;
4567 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4568 streamer_write_hwi (ob, info->estimated_self_stack_size);
4569 streamer_write_hwi (ob, info->self_size);
4570 info->self_time.stream_out (ob);
4571 bp = bitpack_create (ob->main_stream);
4572 bp_pack_value (&bp, info->inlinable, 1);
4573 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
4574 bp_pack_value (&bp, info->fp_expressions, 1);
4575 streamer_write_bitpack (&bp);
4576 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4577 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4579 streamer_write_uhwi (ob, c->operand_num);
4580 streamer_write_uhwi (ob, c->size);
4581 streamer_write_uhwi (ob, c->code);
4582 stream_write_tree (ob, c->val, true);
4583 bp = bitpack_create (ob->main_stream);
4584 bp_pack_value (&bp, c->agg_contents, 1);
4585 bp_pack_value (&bp, c->by_ref, 1);
4586 streamer_write_bitpack (&bp);
4587 if (c->agg_contents)
4588 streamer_write_uhwi (ob, c->offset);
4590 streamer_write_uhwi (ob, vec_safe_length (info->entry));
4591 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
4593 streamer_write_uhwi (ob, e->size);
4594 e->time.stream_out (ob);
4595 write_predicate (ob, &e->exec_predicate);
4596 write_predicate (ob, &e->nonconst_predicate);
4598 write_predicate (ob, info->loop_iterations);
4599 write_predicate (ob, info->loop_stride);
4600 write_predicate (ob, info->array_index);
4601 for (edge = cnode->callees; edge; edge = edge->next_callee)
4602 write_inline_edge_summary (ob, edge);
4603 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4604 write_inline_edge_summary (ob, edge);
4607 streamer_write_char_stream (ob->main_stream, 0);
4608 produce_asm (ob, NULL);
4609 destroy_output_block (ob);
4611 if (optimize && !flag_ipa_cp)
4612 ipa_prop_write_jump_functions ();
4616 /* Release inline summary. */
4618 void
4619 inline_free_summary (void)
4621 struct cgraph_node *node;
4622 if (edge_removal_hook_holder)
4623 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
4624 edge_removal_hook_holder = NULL;
4625 if (edge_duplication_hook_holder)
4626 symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
4627 edge_duplication_hook_holder = NULL;
4628 if (!inline_edge_summary_vec.exists ())
4629 return;
4630 FOR_EACH_DEFINED_FUNCTION (node)
4631 if (!node->alias)
4632 reset_inline_summary (node, inline_summaries->get (node));
4633 inline_summaries->release ();
4634 inline_summaries = NULL;
4635 inline_edge_summary_vec.release ();
4636 edge_predicate_pool.release ();