PR tree-optimization/78496
[official-gcc.git] / gcc / ipa-inline-analysis.c
blob929de8e9791a9b91d54096e134116e0be4f15d82
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis used by the inliner and other passes limiting code size growth.
23 We estimate for each function
24 - function body size
25 - average function execution time
26 - inlining size benefit (that is how much of function body size
27 and its call sequence is expected to disappear by inlining)
28 - inlining time benefit
29 - function frame size
30 For each call
31 - call statement size and time
33 inline_summary data structures store above information locally (i.e.
34 parameters of the function itself) and globally (i.e. parameters of
35 the function created by applying all the inline decisions already
36 present in the callgraph).
38 We provide access to the inline_summary data structure and
39 basic logic updating the parameters when inlining is performed.
41 The summaries are context sensitive. Context means
42 1) partial assignment of known constant values of operands
43 2) whether function is inlined into the call or not.
44 It is easy to add more variants. To represent function size and time
45 that depends on context (i.e. it is known to be optimized away when
46 context is known either by inlining or from IP-CP and cloning),
47 we use predicates. Predicates are logical formulas in
48 conjunctive-disjunctive form consisting of clauses. Clauses are bitmaps
49 specifying what conditions must be true. Conditions are simple test
50 of the form described above.
52 In order to make predicate (possibly) true, all of its clauses must
53 be (possibly) true. To make clause (possibly) true, one of conditions
54 it mentions must be (possibly) true. There are fixed bounds on
55 number of clauses and conditions and all the manipulation functions
56 are conservative in positive direction. I.e. we may lose precision
57 by thinking that predicate may be true even when it is not.
59 estimate_edge_size and estimate_edge_growth can be used to query
60 function size/time in the given context. inline_merge_summary merges
61 properties of caller and callee after inlining.
63 Finally pass_inline_parameters is exported. This is used to drive
64 computation of function parameters used by the early inliner. IPA
65 inlined performs analysis via its analyze_function method. */
67 #include "config.h"
68 #include "system.h"
69 #include "coretypes.h"
70 #include "backend.h"
71 #include "tree.h"
72 #include "gimple.h"
73 #include "alloc-pool.h"
74 #include "tree-pass.h"
75 #include "ssa.h"
76 #include "tree-streamer.h"
77 #include "cgraph.h"
78 #include "diagnostic.h"
79 #include "fold-const.h"
80 #include "print-tree.h"
81 #include "tree-inline.h"
82 #include "gimple-pretty-print.h"
83 #include "params.h"
84 #include "cfganal.h"
85 #include "gimple-iterator.h"
86 #include "tree-cfg.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
89 #include "symbol-summary.h"
90 #include "ipa-prop.h"
91 #include "ipa-inline.h"
92 #include "cfgloop.h"
93 #include "tree-scalar-evolution.h"
94 #include "ipa-utils.h"
95 #include "cilk.h"
96 #include "cfgexpand.h"
97 #include "gimplify.h"
99 /* Number of bits in integer, but we really want to be stable across different
100 hosts. */
101 #define NUM_CONDITIONS 32
103 enum predicate_conditions
105 predicate_false_condition = 0,
106 predicate_not_inlined_condition = 1,
107 predicate_first_dynamic_condition = 2
110 /* Special condition code we use to represent test that operand is compile time
111 constant. */
112 #define IS_NOT_CONSTANT ERROR_MARK
113 /* Special condition code we use to represent test that operand is not changed
114 across invocation of the function. When operand IS_NOT_CONSTANT it is always
115 CHANGED, however i.e. loop invariants can be NOT_CHANGED given percentage
116 of executions even when they are not compile time constants. */
117 #define CHANGED IDENTIFIER_NODE
119 /* Holders of ipa cgraph hooks: */
120 static struct cgraph_2edge_hook_list *edge_duplication_hook_holder;
121 static struct cgraph_edge_hook_list *edge_removal_hook_holder;
122 static void inline_edge_removal_hook (struct cgraph_edge *, void *);
123 static void inline_edge_duplication_hook (struct cgraph_edge *,
124 struct cgraph_edge *, void *);
126 /* VECtor holding inline summaries.
127 In GGC memory because conditions might point to constant trees. */
128 function_summary <inline_summary *> *inline_summaries;
129 vec<inline_edge_summary_t> inline_edge_summary_vec;
131 /* Cached node/edge growths. */
132 vec<edge_growth_cache_entry> edge_growth_cache;
134 /* Edge predicates goes here. */
135 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
137 /* Return true predicate (tautology).
138 We represent it by empty list of clauses. */
140 static inline struct predicate
141 true_predicate (void)
143 struct predicate p;
144 p.clause[0] = 0;
145 return p;
149 /* Return predicate testing single condition number COND. */
151 static inline struct predicate
152 single_cond_predicate (int cond)
154 struct predicate p;
155 p.clause[0] = 1 << cond;
156 p.clause[1] = 0;
157 return p;
161 /* Return false predicate. First clause require false condition. */
163 static inline struct predicate
164 false_predicate (void)
166 return single_cond_predicate (predicate_false_condition);
170 /* Return true if P is (true). */
172 static inline bool
173 true_predicate_p (struct predicate *p)
175 return !p->clause[0];
179 /* Return true if P is (false). */
181 static inline bool
182 false_predicate_p (struct predicate *p)
184 if (p->clause[0] == (1 << predicate_false_condition))
186 gcc_checking_assert (!p->clause[1]
187 && p->clause[0] == 1 << predicate_false_condition);
188 return true;
190 return false;
194 /* Return predicate that is set true when function is not inlined. */
196 static inline struct predicate
197 not_inlined_predicate (void)
199 return single_cond_predicate (predicate_not_inlined_condition);
202 /* Simple description of whether a memory load or a condition refers to a load
203 from an aggregate and if so, how and where from in the aggregate.
204 Individual fields have the same meaning like fields with the same name in
205 struct condition. */
207 struct agg_position_info
209 HOST_WIDE_INT offset;
210 bool agg_contents;
211 bool by_ref;
214 /* Add condition to condition list SUMMARY. OPERAND_NUM, SIZE, CODE and VAL
215 correspond to fields of condition structure. AGGPOS describes whether the
216 used operand is loaded from an aggregate and where in the aggregate it is.
217 It can be NULL, which means this not a load from an aggregate. */
219 static struct predicate
220 add_condition (struct inline_summary *summary, int operand_num,
221 HOST_WIDE_INT size, struct agg_position_info *aggpos,
222 enum tree_code code, tree val)
224 int i;
225 struct condition *c;
226 struct condition new_cond;
227 HOST_WIDE_INT offset;
228 bool agg_contents, by_ref;
230 if (aggpos)
232 offset = aggpos->offset;
233 agg_contents = aggpos->agg_contents;
234 by_ref = aggpos->by_ref;
236 else
238 offset = 0;
239 agg_contents = false;
240 by_ref = false;
243 gcc_checking_assert (operand_num >= 0);
244 for (i = 0; vec_safe_iterate (summary->conds, i, &c); i++)
246 if (c->operand_num == operand_num
247 && c->size == size
248 && c->code == code
249 && c->val == val
250 && c->agg_contents == agg_contents
251 && (!agg_contents || (c->offset == offset && c->by_ref == by_ref)))
252 return single_cond_predicate (i + predicate_first_dynamic_condition);
254 /* Too many conditions. Give up and return constant true. */
255 if (i == NUM_CONDITIONS - predicate_first_dynamic_condition)
256 return true_predicate ();
258 new_cond.operand_num = operand_num;
259 new_cond.code = code;
260 new_cond.val = val;
261 new_cond.agg_contents = agg_contents;
262 new_cond.by_ref = by_ref;
263 new_cond.offset = offset;
264 new_cond.size = size;
265 vec_safe_push (summary->conds, new_cond);
266 return single_cond_predicate (i + predicate_first_dynamic_condition);
270 /* Add clause CLAUSE into the predicate P. */
272 static inline void
273 add_clause (conditions conditions, struct predicate *p, clause_t clause)
275 int i;
276 int i2;
277 int insert_here = -1;
278 int c1, c2;
280 /* True clause. */
281 if (!clause)
282 return;
284 /* False clause makes the whole predicate false. Kill the other variants. */
285 if (clause == (1 << predicate_false_condition))
287 p->clause[0] = (1 << predicate_false_condition);
288 p->clause[1] = 0;
289 return;
291 if (false_predicate_p (p))
292 return;
294 /* No one should be silly enough to add false into nontrivial clauses. */
295 gcc_checking_assert (!(clause & (1 << predicate_false_condition)));
297 /* Look where to insert the clause. At the same time prune out
298 clauses of P that are implied by the new clause and thus
299 redundant. */
300 for (i = 0, i2 = 0; i <= MAX_CLAUSES; i++)
302 p->clause[i2] = p->clause[i];
304 if (!p->clause[i])
305 break;
307 /* If p->clause[i] implies clause, there is nothing to add. */
308 if ((p->clause[i] & clause) == p->clause[i])
310 /* We had nothing to add, none of clauses should've become
311 redundant. */
312 gcc_checking_assert (i == i2);
313 return;
316 if (p->clause[i] < clause && insert_here < 0)
317 insert_here = i2;
319 /* If clause implies p->clause[i], then p->clause[i] becomes redundant.
320 Otherwise the p->clause[i] has to stay. */
321 if ((p->clause[i] & clause) != clause)
322 i2++;
325 /* Look for clauses that are obviously true. I.e.
326 op0 == 5 || op0 != 5. */
327 for (c1 = predicate_first_dynamic_condition; c1 < NUM_CONDITIONS; c1++)
329 condition *cc1;
330 if (!(clause & (1 << c1)))
331 continue;
332 cc1 = &(*conditions)[c1 - predicate_first_dynamic_condition];
333 /* We have no way to represent !CHANGED and !IS_NOT_CONSTANT
334 and thus there is no point for looking for them. */
335 if (cc1->code == CHANGED || cc1->code == IS_NOT_CONSTANT)
336 continue;
337 for (c2 = c1 + 1; c2 < NUM_CONDITIONS; c2++)
338 if (clause & (1 << c2))
340 condition *cc1 =
341 &(*conditions)[c1 - predicate_first_dynamic_condition];
342 condition *cc2 =
343 &(*conditions)[c2 - predicate_first_dynamic_condition];
344 if (cc1->operand_num == cc2->operand_num
345 && cc1->val == cc2->val
346 && cc2->code != IS_NOT_CONSTANT
347 && cc2->code != CHANGED
348 && cc1->code == invert_tree_comparison (cc2->code,
349 HONOR_NANS (cc1->val)))
350 return;
355 /* We run out of variants. Be conservative in positive direction. */
356 if (i2 == MAX_CLAUSES)
357 return;
358 /* Keep clauses in decreasing order. This makes equivalence testing easy. */
359 p->clause[i2 + 1] = 0;
360 if (insert_here >= 0)
361 for (; i2 > insert_here; i2--)
362 p->clause[i2] = p->clause[i2 - 1];
363 else
364 insert_here = i2;
365 p->clause[insert_here] = clause;
369 /* Return P & P2. */
371 static struct predicate
372 and_predicates (conditions conditions,
373 struct predicate *p, struct predicate *p2)
375 struct predicate out = *p;
376 int i;
378 /* Avoid busy work. */
379 if (false_predicate_p (p2) || true_predicate_p (p))
380 return *p2;
381 if (false_predicate_p (p) || true_predicate_p (p2))
382 return *p;
384 /* See how far predicates match. */
385 for (i = 0; p->clause[i] && p->clause[i] == p2->clause[i]; i++)
387 gcc_checking_assert (i < MAX_CLAUSES);
390 /* Combine the predicates rest. */
391 for (; p2->clause[i]; i++)
393 gcc_checking_assert (i < MAX_CLAUSES);
394 add_clause (conditions, &out, p2->clause[i]);
396 return out;
400 /* Return true if predicates are obviously equal. */
402 static inline bool
403 predicates_equal_p (struct predicate *p, struct predicate *p2)
405 int i;
406 for (i = 0; p->clause[i]; i++)
408 gcc_checking_assert (i < MAX_CLAUSES);
409 gcc_checking_assert (p->clause[i] > p->clause[i + 1]);
410 gcc_checking_assert (!p2->clause[i]
411 || p2->clause[i] > p2->clause[i + 1]);
412 if (p->clause[i] != p2->clause[i])
413 return false;
415 return !p2->clause[i];
419 /* Return P | P2. */
421 static struct predicate
422 or_predicates (conditions conditions,
423 struct predicate *p, struct predicate *p2)
425 struct predicate out = true_predicate ();
426 int i, j;
428 /* Avoid busy work. */
429 if (false_predicate_p (p2) || true_predicate_p (p))
430 return *p;
431 if (false_predicate_p (p) || true_predicate_p (p2))
432 return *p2;
433 if (predicates_equal_p (p, p2))
434 return *p;
436 /* OK, combine the predicates. */
437 for (i = 0; p->clause[i]; i++)
438 for (j = 0; p2->clause[j]; j++)
440 gcc_checking_assert (i < MAX_CLAUSES && j < MAX_CLAUSES);
441 add_clause (conditions, &out, p->clause[i] | p2->clause[j]);
443 return out;
447 /* Having partial truth assignment in POSSIBLE_TRUTHS, return false
448 if predicate P is known to be false. */
450 static bool
451 evaluate_predicate (struct predicate *p, clause_t possible_truths)
453 int i;
455 /* True remains true. */
456 if (true_predicate_p (p))
457 return true;
459 gcc_assert (!(possible_truths & (1 << predicate_false_condition)));
461 /* See if we can find clause we can disprove. */
462 for (i = 0; p->clause[i]; i++)
464 gcc_checking_assert (i < MAX_CLAUSES);
465 if (!(p->clause[i] & possible_truths))
466 return false;
468 return true;
471 /* Return the probability in range 0...REG_BR_PROB_BASE that the predicated
472 instruction will be recomputed per invocation of the inlined call. */
474 static int
475 predicate_probability (conditions conds,
476 struct predicate *p, clause_t possible_truths,
477 vec<inline_param_summary> inline_param_summary)
479 int i;
480 int combined_prob = REG_BR_PROB_BASE;
482 /* True remains true. */
483 if (true_predicate_p (p))
484 return REG_BR_PROB_BASE;
486 if (false_predicate_p (p))
487 return 0;
489 gcc_assert (!(possible_truths & (1 << predicate_false_condition)));
491 /* See if we can find clause we can disprove. */
492 for (i = 0; p->clause[i]; i++)
494 gcc_checking_assert (i < MAX_CLAUSES);
495 if (!(p->clause[i] & possible_truths))
496 return 0;
497 else
499 int this_prob = 0;
500 int i2;
501 if (!inline_param_summary.exists ())
502 return REG_BR_PROB_BASE;
503 for (i2 = 0; i2 < NUM_CONDITIONS; i2++)
504 if ((p->clause[i] & possible_truths) & (1 << i2))
506 if (i2 >= predicate_first_dynamic_condition)
508 condition *c =
509 &(*conds)[i2 - predicate_first_dynamic_condition];
510 if (c->code == CHANGED
511 && (c->operand_num <
512 (int) inline_param_summary.length ()))
514 int iprob =
515 inline_param_summary[c->operand_num].change_prob;
516 this_prob = MAX (this_prob, iprob);
518 else
519 this_prob = REG_BR_PROB_BASE;
521 else
522 this_prob = REG_BR_PROB_BASE;
524 combined_prob = MIN (this_prob, combined_prob);
525 if (!combined_prob)
526 return 0;
529 return combined_prob;
533 /* Dump conditional COND. */
535 static void
536 dump_condition (FILE *f, conditions conditions, int cond)
538 condition *c;
539 if (cond == predicate_false_condition)
540 fprintf (f, "false");
541 else if (cond == predicate_not_inlined_condition)
542 fprintf (f, "not inlined");
543 else
545 c = &(*conditions)[cond - predicate_first_dynamic_condition];
546 fprintf (f, "op%i", c->operand_num);
547 if (c->agg_contents)
548 fprintf (f, "[%soffset: " HOST_WIDE_INT_PRINT_DEC "]",
549 c->by_ref ? "ref " : "", c->offset);
550 if (c->code == IS_NOT_CONSTANT)
552 fprintf (f, " not constant");
553 return;
555 if (c->code == CHANGED)
557 fprintf (f, " changed");
558 return;
560 fprintf (f, " %s ", op_symbol_code (c->code));
561 print_generic_expr (f, c->val, 1);
566 /* Dump clause CLAUSE. */
568 static void
569 dump_clause (FILE *f, conditions conds, clause_t clause)
571 int i;
572 bool found = false;
573 fprintf (f, "(");
574 if (!clause)
575 fprintf (f, "true");
576 for (i = 0; i < NUM_CONDITIONS; i++)
577 if (clause & (1 << i))
579 if (found)
580 fprintf (f, " || ");
581 found = true;
582 dump_condition (f, conds, i);
584 fprintf (f, ")");
588 /* Dump PREDICATE to F. CONDS a vector of conditions used when evauating
589 predicats. When NL is true new line is output at the end of dump. */
591 static void
592 dump_predicate (FILE *f, conditions conds, struct predicate *pred,
593 bool nl = true)
595 int i;
596 if (true_predicate_p (pred))
597 dump_clause (f, conds, 0);
598 else
599 for (i = 0; pred->clause[i]; i++)
601 if (i)
602 fprintf (f, " && ");
603 dump_clause (f, conds, pred->clause[i]);
605 if (nl)
606 fprintf (f, "\n");
610 /* Dump inline hints. */
611 void
612 dump_inline_hints (FILE *f, inline_hints hints)
614 if (!hints)
615 return;
616 fprintf (f, "inline hints:");
617 if (hints & INLINE_HINT_indirect_call)
619 hints &= ~INLINE_HINT_indirect_call;
620 fprintf (f, " indirect_call");
622 if (hints & INLINE_HINT_loop_iterations)
624 hints &= ~INLINE_HINT_loop_iterations;
625 fprintf (f, " loop_iterations");
627 if (hints & INLINE_HINT_loop_stride)
629 hints &= ~INLINE_HINT_loop_stride;
630 fprintf (f, " loop_stride");
632 if (hints & INLINE_HINT_same_scc)
634 hints &= ~INLINE_HINT_same_scc;
635 fprintf (f, " same_scc");
637 if (hints & INLINE_HINT_in_scc)
639 hints &= ~INLINE_HINT_in_scc;
640 fprintf (f, " in_scc");
642 if (hints & INLINE_HINT_cross_module)
644 hints &= ~INLINE_HINT_cross_module;
645 fprintf (f, " cross_module");
647 if (hints & INLINE_HINT_declared_inline)
649 hints &= ~INLINE_HINT_declared_inline;
650 fprintf (f, " declared_inline");
652 if (hints & INLINE_HINT_array_index)
654 hints &= ~INLINE_HINT_array_index;
655 fprintf (f, " array_index");
657 if (hints & INLINE_HINT_known_hot)
659 hints &= ~INLINE_HINT_known_hot;
660 fprintf (f, " known_hot");
662 gcc_assert (!hints);
666 /* Record SIZE and TIME to SUMMARY.
667 The accounted code will be executed when EXEC_PRED is true.
668 When NONCONST_PRED is false the code will evaulate to constant and
669 will get optimized out in specialized clones of the function. */
671 static void
672 account_size_time (struct inline_summary *summary, int size, sreal time,
673 struct predicate *exec_pred,
674 struct predicate *nonconst_pred_ptr)
676 size_time_entry *e;
677 bool found = false;
678 int i;
679 struct predicate nonconst_pred;
681 if (false_predicate_p (exec_pred))
682 return;
684 nonconst_pred = and_predicates (summary->conds, nonconst_pred_ptr, exec_pred);
686 if (false_predicate_p (&nonconst_pred))
687 return;
689 /* We need to create initial empty unconitional clause, but otherwie
690 we don't need to account empty times and sizes. */
691 if (!size && time == 0 && summary->entry)
692 return;
694 gcc_assert (time >= 0);
696 for (i = 0; vec_safe_iterate (summary->entry, i, &e); i++)
697 if (predicates_equal_p (&e->exec_predicate, exec_pred)
698 && predicates_equal_p (&e->nonconst_predicate, &nonconst_pred))
700 found = true;
701 break;
703 if (i == 256)
705 i = 0;
706 found = true;
707 e = &(*summary->entry)[0];
708 gcc_assert (!e->exec_predicate.clause[0]);
709 if (dump_file && (dump_flags & TDF_DETAILS))
710 fprintf (dump_file,
711 "\t\tReached limit on number of entries, "
712 "ignoring the predicate.");
714 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
716 fprintf (dump_file,
717 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
718 ((double) size) / INLINE_SIZE_SCALE,
719 (time.to_double ()), found ? "" : "new ");
720 dump_predicate (dump_file, summary->conds, exec_pred, 0);
721 if (!predicates_equal_p (exec_pred, &nonconst_pred))
723 fprintf (dump_file, " nonconst:");
724 dump_predicate (dump_file, summary->conds, &nonconst_pred);
726 else
727 fprintf (dump_file, "\n");
729 if (!found)
731 struct size_time_entry new_entry;
732 new_entry.size = size;
733 new_entry.time = time;
734 new_entry.exec_predicate = *exec_pred;
735 new_entry.nonconst_predicate = nonconst_pred;
736 vec_safe_push (summary->entry, new_entry);
738 else
740 e->size += size;
741 e->time += time;
745 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
747 static struct cgraph_edge *
748 redirect_to_unreachable (struct cgraph_edge *e)
750 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
751 struct cgraph_node *target = cgraph_node::get_create
752 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
754 if (e->speculative)
755 e = e->resolve_speculation (target->decl);
756 else if (!e->callee)
757 e->make_direct (target);
758 else
759 e->redirect_callee (target);
760 struct inline_edge_summary *es = inline_edge_summary (e);
761 e->inline_failed = CIF_UNREACHABLE;
762 e->frequency = 0;
763 e->count = 0;
764 es->call_stmt_size = 0;
765 es->call_stmt_time = 0;
766 if (callee)
767 callee->remove_symbol_and_inline_clones ();
768 return e;
771 /* Set predicate for edge E. */
773 static void
774 edge_set_predicate (struct cgraph_edge *e, struct predicate *predicate)
776 /* If the edge is determined to be never executed, redirect it
777 to BUILTIN_UNREACHABLE to save inliner from inlining into it. */
778 if (predicate && false_predicate_p (predicate)
779 /* When handling speculative edges, we need to do the redirection
780 just once. Do it always on the direct edge, so we do not
781 attempt to resolve speculation while duplicating the edge. */
782 && (!e->speculative || e->callee))
783 e = redirect_to_unreachable (e);
785 struct inline_edge_summary *es = inline_edge_summary (e);
786 if (predicate && !true_predicate_p (predicate))
788 if (!es->predicate)
789 es->predicate = edge_predicate_pool.allocate ();
790 *es->predicate = *predicate;
792 else
794 if (es->predicate)
795 edge_predicate_pool.remove (es->predicate);
796 es->predicate = NULL;
800 /* Set predicate for hint *P. */
802 static void
803 set_hint_predicate (struct predicate **p, struct predicate new_predicate)
805 if (false_predicate_p (&new_predicate) || true_predicate_p (&new_predicate))
807 if (*p)
808 edge_predicate_pool.remove (*p);
809 *p = NULL;
811 else
813 if (!*p)
814 *p = edge_predicate_pool.allocate ();
815 **p = new_predicate;
820 /* Compute what conditions may or may not hold given invormation about
821 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
822 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
823 copy when called in a given context. It is a bitmask of conditions. Bit
824 0 means that condition is known to be false, while bit 1 means that condition
825 may or may not be true. These differs - for example NOT_INLINED condition
826 is always false in the second and also builtin_constant_p tests can not use
827 the fact that parameter is indeed a constant.
829 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
830 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
831 Return clause of possible truths. When INLINE_P is true, assume that we are
832 inlining.
834 ERROR_MARK means compile time invariant. */
836 static void
837 evaluate_conditions_for_known_args (struct cgraph_node *node,
838 bool inline_p,
839 vec<tree> known_vals,
840 vec<ipa_agg_jump_function_p>
841 known_aggs,
842 clause_t *ret_clause,
843 clause_t *ret_nonspec_clause)
845 clause_t clause = inline_p ? 0 : 1 << predicate_not_inlined_condition;
846 clause_t nonspec_clause = 1 << predicate_not_inlined_condition;
847 struct inline_summary *info = inline_summaries->get (node);
848 int i;
849 struct condition *c;
851 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
853 tree val;
854 tree res;
856 /* We allow call stmt to have fewer arguments than the callee function
857 (especially for K&R style programs). So bound check here (we assume
858 known_aggs vector, if non-NULL, has the same length as
859 known_vals). */
860 gcc_checking_assert (!known_aggs.exists ()
861 || (known_vals.length () == known_aggs.length ()));
862 if (c->operand_num >= (int) known_vals.length ())
864 clause |= 1 << (i + predicate_first_dynamic_condition);
865 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
866 continue;
869 if (c->agg_contents)
871 struct ipa_agg_jump_function *agg;
873 if (c->code == CHANGED
874 && !c->by_ref
875 && (known_vals[c->operand_num] == error_mark_node))
876 continue;
878 if (known_aggs.exists ())
880 agg = known_aggs[c->operand_num];
881 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
882 c->offset, c->by_ref);
884 else
885 val = NULL_TREE;
887 else
889 val = known_vals[c->operand_num];
890 if (val == error_mark_node && c->code != CHANGED)
891 val = NULL_TREE;
894 if (!val)
896 clause |= 1 << (i + predicate_first_dynamic_condition);
897 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
898 continue;
900 if (c->code == CHANGED)
902 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
903 continue;
906 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
908 clause |= 1 << (i + predicate_first_dynamic_condition);
909 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
910 continue;
912 if (c->code == IS_NOT_CONSTANT)
914 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
915 continue;
918 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
919 res = val
920 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
921 : NULL;
923 if (res && integer_zerop (res))
924 continue;
926 clause |= 1 << (i + predicate_first_dynamic_condition);
927 nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
929 *ret_clause = clause;
930 if (ret_nonspec_clause)
931 *ret_nonspec_clause = nonspec_clause;
935 /* Work out what conditions might be true at invocation of E. */
937 static void
938 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
939 clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
940 vec<tree> *known_vals_ptr,
941 vec<ipa_polymorphic_call_context>
942 *known_contexts_ptr,
943 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
945 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
946 struct inline_summary *info = inline_summaries->get (callee);
947 vec<tree> known_vals = vNULL;
948 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
950 if (clause_ptr)
951 *clause_ptr = inline_p ? 0 : 1 << predicate_not_inlined_condition;
952 if (known_vals_ptr)
953 known_vals_ptr->create (0);
954 if (known_contexts_ptr)
955 known_contexts_ptr->create (0);
957 if (ipa_node_params_sum
958 && !e->call_stmt_cannot_inline_p
959 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
961 struct ipa_node_params *parms_info;
962 struct ipa_edge_args *args = IPA_EDGE_REF (e);
963 struct inline_edge_summary *es = inline_edge_summary (e);
964 int i, count = ipa_get_cs_argument_count (args);
966 if (e->caller->global.inlined_to)
967 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
968 else
969 parms_info = IPA_NODE_REF (e->caller);
971 if (count && (info->conds || known_vals_ptr))
972 known_vals.safe_grow_cleared (count);
973 if (count && (info->conds || known_aggs_ptr))
974 known_aggs.safe_grow_cleared (count);
975 if (count && known_contexts_ptr)
976 known_contexts_ptr->safe_grow_cleared (count);
978 for (i = 0; i < count; i++)
980 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
981 tree cst = ipa_value_from_jfunc (parms_info, jf);
983 if (!cst && e->call_stmt
984 && i < (int)gimple_call_num_args (e->call_stmt))
986 cst = gimple_call_arg (e->call_stmt, i);
987 if (!is_gimple_min_invariant (cst))
988 cst = NULL;
990 if (cst)
992 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
993 if (known_vals.exists ())
994 known_vals[i] = cst;
996 else if (inline_p && !es->param[i].change_prob)
997 known_vals[i] = error_mark_node;
999 if (known_contexts_ptr)
1000 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
1001 i, jf);
1002 /* TODO: When IPA-CP starts propagating and merging aggregate jump
1003 functions, use its knowledge of the caller too, just like the
1004 scalar case above. */
1005 known_aggs[i] = &jf->agg;
1008 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
1009 && ((clause_ptr && info->conds) || known_vals_ptr))
1011 int i, count = (int)gimple_call_num_args (e->call_stmt);
1013 if (count && (info->conds || known_vals_ptr))
1014 known_vals.safe_grow_cleared (count);
1015 for (i = 0; i < count; i++)
1017 tree cst = gimple_call_arg (e->call_stmt, i);
1018 if (!is_gimple_min_invariant (cst))
1019 cst = NULL;
1020 if (cst)
1021 known_vals[i] = cst;
1025 evaluate_conditions_for_known_args (callee, inline_p,
1026 known_vals, known_aggs, clause_ptr,
1027 nonspec_clause_ptr);
1029 if (known_vals_ptr)
1030 *known_vals_ptr = known_vals;
1031 else
1032 known_vals.release ();
1034 if (known_aggs_ptr)
1035 *known_aggs_ptr = known_aggs;
1036 else
1037 known_aggs.release ();
1041 /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */
1043 static void
1044 inline_summary_alloc (void)
1046 if (!edge_removal_hook_holder)
1047 edge_removal_hook_holder =
1048 symtab->add_edge_removal_hook (&inline_edge_removal_hook, NULL);
1049 if (!edge_duplication_hook_holder)
1050 edge_duplication_hook_holder =
1051 symtab->add_edge_duplication_hook (&inline_edge_duplication_hook, NULL);
1053 if (!inline_summaries)
1054 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
1056 if (inline_edge_summary_vec.length () <= (unsigned) symtab->edges_max_uid)
1057 inline_edge_summary_vec.safe_grow_cleared (symtab->edges_max_uid + 1);
1060 /* We are called multiple time for given function; clear
1061 data from previous run so they are not cumulated. */
1063 static void
1064 reset_inline_edge_summary (struct cgraph_edge *e)
1066 if (e->uid < (int) inline_edge_summary_vec.length ())
1068 struct inline_edge_summary *es = inline_edge_summary (e);
1070 es->call_stmt_size = es->call_stmt_time = 0;
1071 if (es->predicate)
1072 edge_predicate_pool.remove (es->predicate);
1073 es->predicate = NULL;
1074 es->param.release ();
1078 /* We are called multiple time for given function; clear
1079 data from previous run so they are not cumulated. */
1081 static void
1082 reset_inline_summary (struct cgraph_node *node,
1083 inline_summary *info)
1085 struct cgraph_edge *e;
1087 info->self_size = 0;
1088 info->self_time = 0;
1089 info->estimated_stack_size = 0;
1090 info->estimated_self_stack_size = 0;
1091 info->stack_frame_offset = 0;
1092 info->size = 0;
1093 info->time = 0;
1094 info->growth = 0;
1095 info->scc_no = 0;
1096 if (info->loop_iterations)
1098 edge_predicate_pool.remove (info->loop_iterations);
1099 info->loop_iterations = NULL;
1101 if (info->loop_stride)
1103 edge_predicate_pool.remove (info->loop_stride);
1104 info->loop_stride = NULL;
1106 if (info->array_index)
1108 edge_predicate_pool.remove (info->array_index);
1109 info->array_index = NULL;
1111 vec_free (info->conds);
1112 vec_free (info->entry);
1113 for (e = node->callees; e; e = e->next_callee)
1114 reset_inline_edge_summary (e);
1115 for (e = node->indirect_calls; e; e = e->next_callee)
1116 reset_inline_edge_summary (e);
1117 info->fp_expressions = false;
1120 /* Hook that is called by cgraph.c when a node is removed. */
1122 void
1123 inline_summary_t::remove (cgraph_node *node, inline_summary *info)
1125 reset_inline_summary (node, info);
1128 /* Remap predicate P of former function to be predicate of duplicated function.
1129 POSSIBLE_TRUTHS is clause of possible truths in the duplicated node,
1130 INFO is inline summary of the duplicated node. */
1132 static struct predicate
1133 remap_predicate_after_duplication (struct predicate *p,
1134 clause_t possible_truths,
1135 struct inline_summary *info)
1137 struct predicate new_predicate = true_predicate ();
1138 int j;
1139 for (j = 0; p->clause[j]; j++)
1140 if (!(possible_truths & p->clause[j]))
1142 new_predicate = false_predicate ();
1143 break;
1145 else
1146 add_clause (info->conds, &new_predicate,
1147 possible_truths & p->clause[j]);
1148 return new_predicate;
1151 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
1152 Additionally care about allocating new memory slot for updated predicate
1153 and set it to NULL when it becomes true or false (and thus uninteresting).
1156 static void
1157 remap_hint_predicate_after_duplication (struct predicate **p,
1158 clause_t possible_truths,
1159 struct inline_summary *info)
1161 struct predicate new_predicate;
1163 if (!*p)
1164 return;
1166 new_predicate = remap_predicate_after_duplication (*p,
1167 possible_truths, info);
1168 /* We do not want to free previous predicate; it is used by node origin. */
1169 *p = NULL;
1170 set_hint_predicate (p, new_predicate);
1174 /* Hook that is called by cgraph.c when a node is duplicated. */
1175 void
1176 inline_summary_t::duplicate (cgraph_node *src,
1177 cgraph_node *dst,
1178 inline_summary *,
1179 inline_summary *info)
1181 inline_summary_alloc ();
1182 memcpy (info, inline_summaries->get (src), sizeof (inline_summary));
1183 /* TODO: as an optimization, we may avoid copying conditions
1184 that are known to be false or true. */
1185 info->conds = vec_safe_copy (info->conds);
1187 /* When there are any replacements in the function body, see if we can figure
1188 out that something was optimized out. */
1189 if (ipa_node_params_sum && dst->clone.tree_map)
1191 vec<size_time_entry, va_gc> *entry = info->entry;
1192 /* Use SRC parm info since it may not be copied yet. */
1193 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
1194 vec<tree> known_vals = vNULL;
1195 int count = ipa_get_param_count (parms_info);
1196 int i, j;
1197 clause_t possible_truths;
1198 struct predicate true_pred = true_predicate ();
1199 size_time_entry *e;
1200 int optimized_out_size = 0;
1201 bool inlined_to_p = false;
1202 struct cgraph_edge *edge, *next;
1204 info->entry = 0;
1205 known_vals.safe_grow_cleared (count);
1206 for (i = 0; i < count; i++)
1208 struct ipa_replace_map *r;
1210 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
1212 if (((!r->old_tree && r->parm_num == i)
1213 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
1214 && r->replace_p && !r->ref_p)
1216 known_vals[i] = r->new_tree;
1217 break;
1221 evaluate_conditions_for_known_args (dst, false,
1222 known_vals,
1223 vNULL,
1224 &possible_truths,
1225 /* We are going to specialize,
1226 so ignore nonspec truths. */
1227 NULL);
1228 known_vals.release ();
1230 account_size_time (info, 0, 0, &true_pred, &true_pred);
1232 /* Remap size_time vectors.
1233 Simplify the predicate by prunning out alternatives that are known
1234 to be false.
1235 TODO: as on optimization, we can also eliminate conditions known
1236 to be true. */
1237 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
1239 struct predicate new_exec_pred;
1240 struct predicate new_nonconst_pred;
1241 new_exec_pred = remap_predicate_after_duplication (&e->exec_predicate,
1242 possible_truths,
1243 info);
1244 new_nonconst_pred
1245 = remap_predicate_after_duplication (&e->nonconst_predicate,
1246 possible_truths,
1247 info);
1248 if (false_predicate_p (&new_exec_pred)
1249 || false_predicate_p (&new_nonconst_pred))
1250 optimized_out_size += e->size;
1251 else
1252 account_size_time (info, e->size, e->time, &new_exec_pred,
1253 &new_nonconst_pred);
1256 /* Remap edge predicates with the same simplification as above.
1257 Also copy constantness arrays. */
1258 for (edge = dst->callees; edge; edge = next)
1260 struct predicate new_predicate;
1261 struct inline_edge_summary *es = inline_edge_summary (edge);
1262 next = edge->next_callee;
1264 if (!edge->inline_failed)
1265 inlined_to_p = true;
1266 if (!es->predicate)
1267 continue;
1268 new_predicate = remap_predicate_after_duplication (es->predicate,
1269 possible_truths,
1270 info);
1271 if (false_predicate_p (&new_predicate)
1272 && !false_predicate_p (es->predicate))
1273 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
1274 edge_set_predicate (edge, &new_predicate);
1277 /* Remap indirect edge predicates with the same simplificaiton as above.
1278 Also copy constantness arrays. */
1279 for (edge = dst->indirect_calls; edge; edge = next)
1281 struct predicate new_predicate;
1282 struct inline_edge_summary *es = inline_edge_summary (edge);
1283 next = edge->next_callee;
1285 gcc_checking_assert (edge->inline_failed);
1286 if (!es->predicate)
1287 continue;
1288 new_predicate = remap_predicate_after_duplication (es->predicate,
1289 possible_truths,
1290 info);
1291 if (false_predicate_p (&new_predicate)
1292 && !false_predicate_p (es->predicate))
1293 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
1294 edge_set_predicate (edge, &new_predicate);
1296 remap_hint_predicate_after_duplication (&info->loop_iterations,
1297 possible_truths, info);
1298 remap_hint_predicate_after_duplication (&info->loop_stride,
1299 possible_truths, info);
1300 remap_hint_predicate_after_duplication (&info->array_index,
1301 possible_truths, info);
1303 /* If inliner or someone after inliner will ever start producing
1304 non-trivial clones, we will get trouble with lack of information
1305 about updating self sizes, because size vectors already contains
1306 sizes of the calees. */
1307 gcc_assert (!inlined_to_p || !optimized_out_size);
1309 else
1311 info->entry = vec_safe_copy (info->entry);
1312 if (info->loop_iterations)
1314 predicate p = *info->loop_iterations;
1315 info->loop_iterations = NULL;
1316 set_hint_predicate (&info->loop_iterations, p);
1318 if (info->loop_stride)
1320 predicate p = *info->loop_stride;
1321 info->loop_stride = NULL;
1322 set_hint_predicate (&info->loop_stride, p);
1324 if (info->array_index)
1326 predicate p = *info->array_index;
1327 info->array_index = NULL;
1328 set_hint_predicate (&info->array_index, p);
1331 if (!dst->global.inlined_to)
1332 inline_update_overall_summary (dst);
1336 /* Hook that is called by cgraph.c when a node is duplicated. */
1338 static void
1339 inline_edge_duplication_hook (struct cgraph_edge *src,
1340 struct cgraph_edge *dst,
1341 ATTRIBUTE_UNUSED void *data)
1343 struct inline_edge_summary *info;
1344 struct inline_edge_summary *srcinfo;
1345 inline_summary_alloc ();
1346 info = inline_edge_summary (dst);
1347 srcinfo = inline_edge_summary (src);
1348 memcpy (info, srcinfo, sizeof (struct inline_edge_summary));
1349 info->predicate = NULL;
1350 edge_set_predicate (dst, srcinfo->predicate);
1351 info->param = srcinfo->param.copy ();
1352 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
1354 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
1355 - eni_size_weights.call_cost);
1356 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
1357 - eni_time_weights.call_cost);
1362 /* Keep edge cache consistent across edge removal. */
1364 static void
1365 inline_edge_removal_hook (struct cgraph_edge *edge,
1366 void *data ATTRIBUTE_UNUSED)
1368 if (edge_growth_cache.exists ())
1369 reset_edge_growth_cache (edge);
1370 reset_inline_edge_summary (edge);
1374 /* Initialize growth caches. */
1376 void
1377 initialize_growth_caches (void)
1379 if (symtab->edges_max_uid)
1380 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
1384 /* Free growth caches. */
1386 void
1387 free_growth_caches (void)
1389 edge_growth_cache.release ();
1393 /* Dump edge summaries associated to NODE and recursively to all clones.
1394 Indent by INDENT. */
1396 static void
1397 dump_inline_edge_summary (FILE *f, int indent, struct cgraph_node *node,
1398 struct inline_summary *info)
1400 struct cgraph_edge *edge;
1401 for (edge = node->callees; edge; edge = edge->next_callee)
1403 struct inline_edge_summary *es = inline_edge_summary (edge);
1404 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1405 int i;
1407 fprintf (f,
1408 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
1409 " time: %2i callee size:%2i stack:%2i",
1410 indent, "", callee->name (), callee->order,
1411 !edge->inline_failed
1412 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1413 indent, "", es->loop_depth, edge->frequency,
1414 es->call_stmt_size, es->call_stmt_time,
1415 (int) inline_summaries->get (callee)->size / INLINE_SIZE_SCALE,
1416 (int) inline_summaries->get (callee)->estimated_stack_size);
1418 if (es->predicate)
1420 fprintf (f, " predicate: ");
1421 dump_predicate (f, info->conds, es->predicate);
1423 else
1424 fprintf (f, "\n");
1425 if (es->param.exists ())
1426 for (i = 0; i < (int) es->param.length (); i++)
1428 int prob = es->param[i].change_prob;
1430 if (!prob)
1431 fprintf (f, "%*s op%i is compile time invariant\n",
1432 indent + 2, "", i);
1433 else if (prob != REG_BR_PROB_BASE)
1434 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1435 prob * 100.0 / REG_BR_PROB_BASE);
1437 if (!edge->inline_failed)
1439 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
1440 " callee size %i\n",
1441 indent + 2, "",
1442 (int) inline_summaries->get (callee)->stack_frame_offset,
1443 (int) inline_summaries->get (callee)->estimated_self_stack_size,
1444 (int) inline_summaries->get (callee)->estimated_stack_size);
1445 dump_inline_edge_summary (f, indent + 2, callee, info);
1448 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1450 struct inline_edge_summary *es = inline_edge_summary (edge);
1451 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
1452 " time: %2i",
1453 indent, "",
1454 es->loop_depth,
1455 edge->frequency, es->call_stmt_size, es->call_stmt_time);
1456 if (es->predicate)
1458 fprintf (f, "predicate: ");
1459 dump_predicate (f, info->conds, es->predicate);
1461 else
1462 fprintf (f, "\n");
1467 void
1468 dump_inline_summary (FILE *f, struct cgraph_node *node)
1470 if (node->definition)
1472 struct inline_summary *s = inline_summaries->get (node);
1473 size_time_entry *e;
1474 int i;
1475 fprintf (f, "Inline summary for %s/%i", node->name (),
1476 node->order);
1477 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1478 fprintf (f, " always_inline");
1479 if (s->inlinable)
1480 fprintf (f, " inlinable");
1481 if (s->contains_cilk_spawn)
1482 fprintf (f, " contains_cilk_spawn");
1483 if (s->fp_expressions)
1484 fprintf (f, " fp_expression");
1485 fprintf (f, "\n self time: %f\n", s->self_time.to_double ());
1486 fprintf (f, " global time: %f\n", s->time.to_double ());
1487 fprintf (f, " self size: %i\n", s->self_size);
1488 fprintf (f, " global size: %i\n", s->size);
1489 fprintf (f, " min size: %i\n", s->min_size);
1490 fprintf (f, " self stack: %i\n",
1491 (int) s->estimated_self_stack_size);
1492 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1493 if (s->growth)
1494 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1495 if (s->scc_no)
1496 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1497 for (i = 0; vec_safe_iterate (s->entry, i, &e); i++)
1499 fprintf (f, " size:%f, time:%f",
1500 (double) e->size / INLINE_SIZE_SCALE,
1501 e->time.to_double ());
1502 if (!true_predicate_p (&e->exec_predicate))
1504 fprintf (f, ", executed if:");
1505 dump_predicate (f, s->conds, &e->exec_predicate, 0);
1507 if (!predicates_equal_p (&e->exec_predicate,
1508 &e->nonconst_predicate))
1510 fprintf (f, ", nonconst if:");
1511 dump_predicate (f, s->conds, &e->nonconst_predicate, 0);
1513 fprintf (f, "\n");
1515 if (s->loop_iterations)
1517 fprintf (f, " loop iterations:");
1518 dump_predicate (f, s->conds, s->loop_iterations);
1520 if (s->loop_stride)
1522 fprintf (f, " loop stride:");
1523 dump_predicate (f, s->conds, s->loop_stride);
1525 if (s->array_index)
1527 fprintf (f, " array index:");
1528 dump_predicate (f, s->conds, s->array_index);
1530 fprintf (f, " calls:\n");
1531 dump_inline_edge_summary (f, 4, node, s);
1532 fprintf (f, "\n");
1536 DEBUG_FUNCTION void
1537 debug_inline_summary (struct cgraph_node *node)
1539 dump_inline_summary (stderr, node);
1542 void
1543 dump_inline_summaries (FILE *f)
1545 struct cgraph_node *node;
1547 FOR_EACH_DEFINED_FUNCTION (node)
1548 if (!node->global.inlined_to)
1549 dump_inline_summary (f, node);
1552 /* Give initial reasons why inlining would fail on EDGE. This gets either
1553 nullified or usually overwritten by more precise reasons later. */
1555 void
1556 initialize_inline_failed (struct cgraph_edge *e)
1558 struct cgraph_node *callee = e->callee;
1560 if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE
1561 && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
1563 else if (e->indirect_unknown_callee)
1564 e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL;
1565 else if (!callee->definition)
1566 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
1567 else if (callee->local.redefined_extern_inline)
1568 e->inline_failed = CIF_REDEFINED_EXTERN_INLINE;
1569 else
1570 e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
1571 gcc_checking_assert (!e->call_stmt_cannot_inline_p
1572 || cgraph_inline_failed_type (e->inline_failed)
1573 == CIF_FINAL_ERROR);
1576 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1577 boolean variable pointed to by DATA. */
1579 static bool
1580 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1581 void *data)
1583 bool *b = (bool *) data;
1584 *b = true;
1585 return true;
1588 /* If OP refers to value of function parameter, return the corresponding
1589 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1590 PARM_DECL) will be stored to *SIZE_P in that case too. */
1592 static tree
1593 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1595 /* SSA_NAME referring to parm default def? */
1596 if (TREE_CODE (op) == SSA_NAME
1597 && SSA_NAME_IS_DEFAULT_DEF (op)
1598 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1600 if (size_p)
1601 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1602 return SSA_NAME_VAR (op);
1604 /* Non-SSA parm reference? */
1605 if (TREE_CODE (op) == PARM_DECL)
1607 bool modified = false;
1609 ao_ref refd;
1610 ao_ref_init (&refd, op);
1611 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
1612 NULL);
1613 if (!modified)
1615 if (size_p)
1616 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1617 return op;
1620 return NULL_TREE;
1623 /* If OP refers to value of function parameter, return the corresponding
1624 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1625 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1626 stored to *SIZE_P in that case too. */
1628 static tree
1629 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1631 tree res = unmodified_parm_1 (stmt, op, size_p);
1632 if (res)
1633 return res;
1635 if (TREE_CODE (op) == SSA_NAME
1636 && !SSA_NAME_IS_DEFAULT_DEF (op)
1637 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1638 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1639 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1640 size_p);
1641 return NULL_TREE;
1644 /* If OP refers to a value of a function parameter or value loaded from an
1645 aggregate passed to a parameter (either by value or reference), return TRUE
1646 and store the number of the parameter to *INDEX_P, the access size into
1647 *SIZE_P, and information whether and how it has been loaded from an
1648 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1649 statement in which OP is used or loaded. */
1651 static bool
1652 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1653 gimple *stmt, tree op, int *index_p,
1654 HOST_WIDE_INT *size_p,
1655 struct agg_position_info *aggpos)
1657 tree res = unmodified_parm_1 (stmt, op, size_p);
1659 gcc_checking_assert (aggpos);
1660 if (res)
1662 *index_p = ipa_get_param_decl_index (fbi->info, res);
1663 if (*index_p < 0)
1664 return false;
1665 aggpos->agg_contents = false;
1666 aggpos->by_ref = false;
1667 return true;
1670 if (TREE_CODE (op) == SSA_NAME)
1672 if (SSA_NAME_IS_DEFAULT_DEF (op)
1673 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1674 return false;
1675 stmt = SSA_NAME_DEF_STMT (op);
1676 op = gimple_assign_rhs1 (stmt);
1677 if (!REFERENCE_CLASS_P (op))
1678 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1679 aggpos);
1682 aggpos->agg_contents = true;
1683 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1684 stmt, op, index_p, &aggpos->offset,
1685 size_p, &aggpos->by_ref);
1688 /* See if statement might disappear after inlining.
1689 0 - means not eliminated
1690 1 - half of statements goes away
1691 2 - for sure it is eliminated.
1692 We are not terribly sophisticated, basically looking for simple abstraction
1693 penalty wrappers. */
1695 static int
1696 eliminated_by_inlining_prob (gimple *stmt)
1698 enum gimple_code code = gimple_code (stmt);
1699 enum tree_code rhs_code;
1701 if (!optimize)
1702 return 0;
1704 switch (code)
1706 case GIMPLE_RETURN:
1707 return 2;
1708 case GIMPLE_ASSIGN:
1709 if (gimple_num_ops (stmt) != 2)
1710 return 0;
1712 rhs_code = gimple_assign_rhs_code (stmt);
1714 /* Casts of parameters, loads from parameters passed by reference
1715 and stores to return value or parameters are often free after
1716 inlining dua to SRA and further combining.
1717 Assume that half of statements goes away. */
1718 if (CONVERT_EXPR_CODE_P (rhs_code)
1719 || rhs_code == VIEW_CONVERT_EXPR
1720 || rhs_code == ADDR_EXPR
1721 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1723 tree rhs = gimple_assign_rhs1 (stmt);
1724 tree lhs = gimple_assign_lhs (stmt);
1725 tree inner_rhs = get_base_address (rhs);
1726 tree inner_lhs = get_base_address (lhs);
1727 bool rhs_free = false;
1728 bool lhs_free = false;
1730 if (!inner_rhs)
1731 inner_rhs = rhs;
1732 if (!inner_lhs)
1733 inner_lhs = lhs;
1735 /* Reads of parameter are expected to be free. */
1736 if (unmodified_parm (stmt, inner_rhs, NULL))
1737 rhs_free = true;
1738 /* Match expressions of form &this->field. Those will most likely
1739 combine with something upstream after inlining. */
1740 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1742 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1743 if (TREE_CODE (op) == PARM_DECL)
1744 rhs_free = true;
1745 else if (TREE_CODE (op) == MEM_REF
1746 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1747 rhs_free = true;
1750 /* When parameter is not SSA register because its address is taken
1751 and it is just copied into one, the statement will be completely
1752 free after inlining (we will copy propagate backward). */
1753 if (rhs_free && is_gimple_reg (lhs))
1754 return 2;
1756 /* Reads of parameters passed by reference
1757 expected to be free (i.e. optimized out after inlining). */
1758 if (TREE_CODE (inner_rhs) == MEM_REF
1759 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1760 rhs_free = true;
1762 /* Copying parameter passed by reference into gimple register is
1763 probably also going to copy propagate, but we can't be quite
1764 sure. */
1765 if (rhs_free && is_gimple_reg (lhs))
1766 lhs_free = true;
1768 /* Writes to parameters, parameters passed by value and return value
1769 (either dirrectly or passed via invisible reference) are free.
1771 TODO: We ought to handle testcase like
1772 struct a {int a,b;};
1773 struct a
1774 retrurnsturct (void)
1776 struct a a ={1,2};
1777 return a;
1780 This translate into:
1782 retrurnsturct ()
1784 int a$b;
1785 int a$a;
1786 struct a a;
1787 struct a D.2739;
1789 <bb 2>:
1790 D.2739.a = 1;
1791 D.2739.b = 2;
1792 return D.2739;
1795 For that we either need to copy ipa-split logic detecting writes
1796 to return value. */
1797 if (TREE_CODE (inner_lhs) == PARM_DECL
1798 || TREE_CODE (inner_lhs) == RESULT_DECL
1799 || (TREE_CODE (inner_lhs) == MEM_REF
1800 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1801 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1802 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1803 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1804 (inner_lhs,
1805 0))) == RESULT_DECL))))
1806 lhs_free = true;
1807 if (lhs_free
1808 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1809 rhs_free = true;
1810 if (lhs_free && rhs_free)
1811 return 1;
1813 return 0;
1814 default:
1815 return 0;
1820 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1821 predicates to the CFG edges. */
1823 static void
1824 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1825 struct inline_summary *summary,
1826 basic_block bb)
1828 gimple *last;
1829 tree op;
1830 int index;
1831 HOST_WIDE_INT size;
1832 struct agg_position_info aggpos;
1833 enum tree_code code, inverted_code;
1834 edge e;
1835 edge_iterator ei;
1836 gimple *set_stmt;
1837 tree op2;
1839 last = last_stmt (bb);
1840 if (!last || gimple_code (last) != GIMPLE_COND)
1841 return;
1842 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1843 return;
1844 op = gimple_cond_lhs (last);
1845 /* TODO: handle conditionals like
1846 var = op0 < 4;
1847 if (var != 0). */
1848 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1850 code = gimple_cond_code (last);
1851 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1853 FOR_EACH_EDGE (e, ei, bb->succs)
1855 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1856 ? code : inverted_code);
1857 /* invert_tree_comparison will return ERROR_MARK on FP
1858 comparsions that are not EQ/NE instead of returning proper
1859 unordered one. Be sure it is not confused with NON_CONSTANT. */
1860 if (this_code != ERROR_MARK)
1862 struct predicate p
1863 = add_condition (summary, index, size, &aggpos, this_code,
1864 unshare_expr_without_location
1865 (gimple_cond_rhs (last)));
1866 e->aux = edge_predicate_pool.allocate ();
1867 *(struct predicate *) e->aux = p;
1872 if (TREE_CODE (op) != SSA_NAME)
1873 return;
1874 /* Special case
1875 if (builtin_constant_p (op))
1876 constant_code
1877 else
1878 nonconstant_code.
1879 Here we can predicate nonconstant_code. We can't
1880 really handle constant_code since we have no predicate
1881 for this and also the constant code is not known to be
1882 optimized away when inliner doen't see operand is constant.
1883 Other optimizers might think otherwise. */
1884 if (gimple_cond_code (last) != NE_EXPR
1885 || !integer_zerop (gimple_cond_rhs (last)))
1886 return;
1887 set_stmt = SSA_NAME_DEF_STMT (op);
1888 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1889 || gimple_call_num_args (set_stmt) != 1)
1890 return;
1891 op2 = gimple_call_arg (set_stmt, 0);
1892 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1893 &aggpos))
1894 return;
1895 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1897 struct predicate p = add_condition (summary, index, size, &aggpos,
1898 IS_NOT_CONSTANT, NULL_TREE);
1899 e->aux = edge_predicate_pool.allocate ();
1900 *(struct predicate *) e->aux = p;
1905 /* If BB ends by a switch we can turn into predicates, attach corresponding
1906 predicates to the CFG edges. */
1908 static void
1909 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1910 struct inline_summary *summary,
1911 basic_block bb)
1913 gimple *lastg;
1914 tree op;
1915 int index;
1916 HOST_WIDE_INT size;
1917 struct agg_position_info aggpos;
1918 edge e;
1919 edge_iterator ei;
1920 size_t n;
1921 size_t case_idx;
1923 lastg = last_stmt (bb);
1924 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1925 return;
1926 gswitch *last = as_a <gswitch *> (lastg);
1927 op = gimple_switch_index (last);
1928 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1929 return;
1931 FOR_EACH_EDGE (e, ei, bb->succs)
1933 e->aux = edge_predicate_pool.allocate ();
1934 *(struct predicate *) e->aux = false_predicate ();
1936 n = gimple_switch_num_labels (last);
1937 for (case_idx = 0; case_idx < n; ++case_idx)
1939 tree cl = gimple_switch_label (last, case_idx);
1940 tree min, max;
1941 struct predicate p;
1943 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1944 min = CASE_LOW (cl);
1945 max = CASE_HIGH (cl);
1947 /* For default we might want to construct predicate that none
1948 of cases is met, but it is bit hard to do not having negations
1949 of conditionals handy. */
1950 if (!min && !max)
1951 p = true_predicate ();
1952 else if (!max)
1953 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1954 unshare_expr_without_location (min));
1955 else
1957 struct predicate p1, p2;
1958 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1959 unshare_expr_without_location (min));
1960 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1961 unshare_expr_without_location (max));
1962 p = and_predicates (summary->conds, &p1, &p2);
1964 *(struct predicate *) e->aux
1965 = or_predicates (summary->conds, &p, (struct predicate *) e->aux);
1970 /* For each BB in NODE attach to its AUX pointer predicate under
1971 which it is executable. */
1973 static void
1974 compute_bb_predicates (struct ipa_func_body_info *fbi,
1975 struct cgraph_node *node,
1976 struct inline_summary *summary)
1978 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1979 bool done = false;
1980 basic_block bb;
1982 FOR_EACH_BB_FN (bb, my_function)
1984 set_cond_stmt_execution_predicate (fbi, summary, bb);
1985 set_switch_stmt_execution_predicate (fbi, summary, bb);
1988 /* Entry block is always executable. */
1989 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1990 = edge_predicate_pool.allocate ();
1991 *(struct predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1992 = true_predicate ();
1994 /* A simple dataflow propagation of predicates forward in the CFG.
1995 TODO: work in reverse postorder. */
1996 while (!done)
1998 done = true;
1999 FOR_EACH_BB_FN (bb, my_function)
2001 struct predicate p = false_predicate ();
2002 edge e;
2003 edge_iterator ei;
2004 FOR_EACH_EDGE (e, ei, bb->preds)
2006 if (e->src->aux)
2008 struct predicate this_bb_predicate
2009 = *(struct predicate *) e->src->aux;
2010 if (e->aux)
2011 this_bb_predicate
2012 = and_predicates (summary->conds, &this_bb_predicate,
2013 (struct predicate *) e->aux);
2014 p = or_predicates (summary->conds, &p, &this_bb_predicate);
2015 if (true_predicate_p (&p))
2016 break;
2019 if (false_predicate_p (&p))
2020 gcc_assert (!bb->aux);
2021 else
2023 if (!bb->aux)
2025 done = false;
2026 bb->aux = edge_predicate_pool.allocate ();
2027 *((struct predicate *) bb->aux) = p;
2029 else if (!predicates_equal_p (&p, (struct predicate *) bb->aux))
2031 /* This OR operation is needed to ensure monotonous data flow
2032 in the case we hit the limit on number of clauses and the
2033 and/or operations above give approximate answers. */
2034 p = or_predicates (summary->conds, &p, (struct predicate *)bb->aux);
2035 if (!predicates_equal_p (&p, (struct predicate *) bb->aux))
2037 done = false;
2038 *((struct predicate *) bb->aux) = p;
2047 /* We keep info about constantness of SSA names. */
2049 typedef struct predicate predicate_t;
2050 /* Return predicate specifying when the STMT might have result that is not
2051 a compile time constant. */
2053 static struct predicate
2054 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
2055 struct inline_summary *summary,
2056 tree expr,
2057 vec<predicate_t> nonconstant_names)
2059 tree parm;
2060 int index;
2061 HOST_WIDE_INT size;
2063 while (UNARY_CLASS_P (expr))
2064 expr = TREE_OPERAND (expr, 0);
2066 parm = unmodified_parm (NULL, expr, &size);
2067 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
2068 return add_condition (summary, index, size, NULL, CHANGED, NULL_TREE);
2069 if (is_gimple_min_invariant (expr))
2070 return false_predicate ();
2071 if (TREE_CODE (expr) == SSA_NAME)
2072 return nonconstant_names[SSA_NAME_VERSION (expr)];
2073 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
2075 struct predicate p1 = will_be_nonconstant_expr_predicate
2076 (info, summary, TREE_OPERAND (expr, 0),
2077 nonconstant_names);
2078 struct predicate p2;
2079 if (true_predicate_p (&p1))
2080 return p1;
2081 p2 = will_be_nonconstant_expr_predicate (info, summary,
2082 TREE_OPERAND (expr, 1),
2083 nonconstant_names);
2084 return or_predicates (summary->conds, &p1, &p2);
2086 else if (TREE_CODE (expr) == COND_EXPR)
2088 struct predicate p1 = will_be_nonconstant_expr_predicate
2089 (info, summary, TREE_OPERAND (expr, 0),
2090 nonconstant_names);
2091 struct predicate p2;
2092 if (true_predicate_p (&p1))
2093 return p1;
2094 p2 = will_be_nonconstant_expr_predicate (info, summary,
2095 TREE_OPERAND (expr, 1),
2096 nonconstant_names);
2097 if (true_predicate_p (&p2))
2098 return p2;
2099 p1 = or_predicates (summary->conds, &p1, &p2);
2100 p2 = will_be_nonconstant_expr_predicate (info, summary,
2101 TREE_OPERAND (expr, 2),
2102 nonconstant_names);
2103 return or_predicates (summary->conds, &p1, &p2);
2105 else
2107 debug_tree (expr);
2108 gcc_unreachable ();
2110 return false_predicate ();
2114 /* Return predicate specifying when the STMT might have result that is not
2115 a compile time constant. */
2117 static struct predicate
2118 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2119 struct inline_summary *summary,
2120 gimple *stmt,
2121 vec<predicate_t> nonconstant_names)
2123 struct predicate p = true_predicate ();
2124 ssa_op_iter iter;
2125 tree use;
2126 struct predicate op_non_const;
2127 bool is_load;
2128 int base_index;
2129 HOST_WIDE_INT size;
2130 struct agg_position_info aggpos;
2132 /* What statments might be optimized away
2133 when their arguments are constant. */
2134 if (gimple_code (stmt) != GIMPLE_ASSIGN
2135 && gimple_code (stmt) != GIMPLE_COND
2136 && gimple_code (stmt) != GIMPLE_SWITCH
2137 && (gimple_code (stmt) != GIMPLE_CALL
2138 || !(gimple_call_flags (stmt) & ECF_CONST)))
2139 return p;
2141 /* Stores will stay anyway. */
2142 if (gimple_store_p (stmt))
2143 return p;
2145 is_load = gimple_assign_load_p (stmt);
2147 /* Loads can be optimized when the value is known. */
2148 if (is_load)
2150 tree op;
2151 gcc_assert (gimple_assign_single_p (stmt));
2152 op = gimple_assign_rhs1 (stmt);
2153 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
2154 &aggpos))
2155 return p;
2157 else
2158 base_index = -1;
2160 /* See if we understand all operands before we start
2161 adding conditionals. */
2162 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2164 tree parm = unmodified_parm (stmt, use, NULL);
2165 /* For arguments we can build a condition. */
2166 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2167 continue;
2168 if (TREE_CODE (use) != SSA_NAME)
2169 return p;
2170 /* If we know when operand is constant,
2171 we still can say something useful. */
2172 if (!true_predicate_p (&nonconstant_names[SSA_NAME_VERSION (use)]))
2173 continue;
2174 return p;
2177 if (is_load)
2178 op_non_const =
2179 add_condition (summary, base_index, size, &aggpos, CHANGED, NULL);
2180 else
2181 op_non_const = false_predicate ();
2182 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2184 HOST_WIDE_INT size;
2185 tree parm = unmodified_parm (stmt, use, &size);
2186 int index;
2188 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2190 if (index != base_index)
2191 p = add_condition (summary, index, size, NULL, CHANGED, NULL_TREE);
2192 else
2193 continue;
2195 else
2196 p = nonconstant_names[SSA_NAME_VERSION (use)];
2197 op_non_const = or_predicates (summary->conds, &p, &op_non_const);
2199 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2200 && gimple_op (stmt, 0)
2201 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2202 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2203 = op_non_const;
2204 return op_non_const;
2207 struct record_modified_bb_info
2209 bitmap bb_set;
2210 gimple *stmt;
2213 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
2214 probability how often it changes between USE_BB.
2215 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
2216 is in different loop nest, we can do better.
2217 This is all just estimate. In theory we look for minimal cut separating
2218 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2219 anyway. */
2221 static basic_block
2222 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2224 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2225 if (l && l->header->frequency < init_bb->frequency)
2226 return l->header;
2227 return init_bb;
2230 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2231 set except for info->stmt. */
2233 static bool
2234 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2236 struct record_modified_bb_info *info =
2237 (struct record_modified_bb_info *) data;
2238 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2239 return false;
2240 bitmap_set_bit (info->bb_set,
2241 SSA_NAME_IS_DEFAULT_DEF (vdef)
2242 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2243 : get_minimal_bb
2244 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2245 gimple_bb (info->stmt))->index);
2246 return false;
2249 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2250 will change since last invocation of STMT.
2252 Value 0 is reserved for compile time invariants.
2253 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2254 ought to be REG_BR_PROB_BASE / estimated_iters. */
2256 static int
2257 param_change_prob (gimple *stmt, int i)
2259 tree op = gimple_call_arg (stmt, i);
2260 basic_block bb = gimple_bb (stmt);
2262 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2263 op = TREE_OPERAND (op, 0);
2265 tree base = get_base_address (op);
2267 /* Global invariants never change. */
2268 if (is_gimple_min_invariant (base))
2269 return 0;
2271 /* We would have to do non-trivial analysis to really work out what
2272 is the probability of value to change (i.e. when init statement
2273 is in a sibling loop of the call).
2275 We do an conservative estimate: when call is executed N times more often
2276 than the statement defining value, we take the frequency 1/N. */
2277 if (TREE_CODE (base) == SSA_NAME)
2279 int init_freq;
2281 if (!bb->frequency)
2282 return REG_BR_PROB_BASE;
2284 if (SSA_NAME_IS_DEFAULT_DEF (base))
2285 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
2286 else
2287 init_freq = get_minimal_bb
2288 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2289 gimple_bb (stmt))->frequency;
2291 if (!init_freq)
2292 init_freq = 1;
2293 if (init_freq < bb->frequency)
2294 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
2295 else
2296 return REG_BR_PROB_BASE;
2298 else
2300 ao_ref refd;
2301 int max;
2302 struct record_modified_bb_info info;
2303 bitmap_iterator bi;
2304 unsigned index;
2305 tree init = ctor_for_folding (base);
2307 if (init != error_mark_node)
2308 return 0;
2309 if (!bb->frequency)
2310 return REG_BR_PROB_BASE;
2311 ao_ref_init (&refd, op);
2312 info.stmt = stmt;
2313 info.bb_set = BITMAP_ALLOC (NULL);
2314 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2315 NULL);
2316 if (bitmap_bit_p (info.bb_set, bb->index))
2318 BITMAP_FREE (info.bb_set);
2319 return REG_BR_PROB_BASE;
2322 /* Assume that every memory is initialized at entry.
2323 TODO: Can we easilly determine if value is always defined
2324 and thus we may skip entry block? */
2325 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
2326 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
2327 else
2328 max = 1;
2330 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2331 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
2333 BITMAP_FREE (info.bb_set);
2334 if (max < bb->frequency)
2335 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
2336 else
2337 return REG_BR_PROB_BASE;
2341 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2342 sub-graph and if the predicate the condition depends on is known. If so,
2343 return true and store the pointer the predicate in *P. */
2345 static bool
2346 phi_result_unknown_predicate (struct ipa_node_params *info,
2347 inline_summary *summary, basic_block bb,
2348 struct predicate *p,
2349 vec<predicate_t> nonconstant_names)
2351 edge e;
2352 edge_iterator ei;
2353 basic_block first_bb = NULL;
2354 gimple *stmt;
2356 if (single_pred_p (bb))
2358 *p = false_predicate ();
2359 return true;
2362 FOR_EACH_EDGE (e, ei, bb->preds)
2364 if (single_succ_p (e->src))
2366 if (!single_pred_p (e->src))
2367 return false;
2368 if (!first_bb)
2369 first_bb = single_pred (e->src);
2370 else if (single_pred (e->src) != first_bb)
2371 return false;
2373 else
2375 if (!first_bb)
2376 first_bb = e->src;
2377 else if (e->src != first_bb)
2378 return false;
2382 if (!first_bb)
2383 return false;
2385 stmt = last_stmt (first_bb);
2386 if (!stmt
2387 || gimple_code (stmt) != GIMPLE_COND
2388 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2389 return false;
2391 *p = will_be_nonconstant_expr_predicate (info, summary,
2392 gimple_cond_lhs (stmt),
2393 nonconstant_names);
2394 if (true_predicate_p (p))
2395 return false;
2396 else
2397 return true;
2400 /* Given a PHI statement in a function described by inline properties SUMMARY
2401 and *P being the predicate describing whether the selected PHI argument is
2402 known, store a predicate for the result of the PHI statement into
2403 NONCONSTANT_NAMES, if possible. */
2405 static void
2406 predicate_for_phi_result (struct inline_summary *summary, gphi *phi,
2407 struct predicate *p,
2408 vec<predicate_t> nonconstant_names)
2410 unsigned i;
2412 for (i = 0; i < gimple_phi_num_args (phi); i++)
2414 tree arg = gimple_phi_arg (phi, i)->def;
2415 if (!is_gimple_min_invariant (arg))
2417 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2418 *p = or_predicates (summary->conds, p,
2419 &nonconstant_names[SSA_NAME_VERSION (arg)]);
2420 if (true_predicate_p (p))
2421 return;
2425 if (dump_file && (dump_flags & TDF_DETAILS))
2427 fprintf (dump_file, "\t\tphi predicate: ");
2428 dump_predicate (dump_file, summary->conds, p);
2430 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2433 /* Return predicate specifying when array index in access OP becomes non-constant. */
2435 static struct predicate
2436 array_index_predicate (inline_summary *info,
2437 vec< predicate_t> nonconstant_names, tree op)
2439 struct predicate p = false_predicate ();
2440 while (handled_component_p (op))
2442 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
2444 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2445 p = or_predicates (info->conds, &p,
2446 &nonconstant_names[SSA_NAME_VERSION
2447 (TREE_OPERAND (op, 1))]);
2449 op = TREE_OPERAND (op, 0);
2451 return p;
2454 /* For a typical usage of __builtin_expect (a<b, 1), we
2455 may introduce an extra relation stmt:
2456 With the builtin, we have
2457 t1 = a <= b;
2458 t2 = (long int) t1;
2459 t3 = __builtin_expect (t2, 1);
2460 if (t3 != 0)
2461 goto ...
2462 Without the builtin, we have
2463 if (a<=b)
2464 goto...
2465 This affects the size/time estimation and may have
2466 an impact on the earlier inlining.
2467 Here find this pattern and fix it up later. */
2469 static gimple *
2470 find_foldable_builtin_expect (basic_block bb)
2472 gimple_stmt_iterator bsi;
2474 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2476 gimple *stmt = gsi_stmt (bsi);
2477 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2478 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2480 tree var = gimple_call_lhs (stmt);
2481 tree arg = gimple_call_arg (stmt, 0);
2482 use_operand_p use_p;
2483 gimple *use_stmt;
2484 bool match = false;
2485 bool done = false;
2487 if (!var || !arg)
2488 continue;
2489 gcc_assert (TREE_CODE (var) == SSA_NAME);
2491 while (TREE_CODE (arg) == SSA_NAME)
2493 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2494 if (!is_gimple_assign (stmt_tmp))
2495 break;
2496 switch (gimple_assign_rhs_code (stmt_tmp))
2498 case LT_EXPR:
2499 case LE_EXPR:
2500 case GT_EXPR:
2501 case GE_EXPR:
2502 case EQ_EXPR:
2503 case NE_EXPR:
2504 match = true;
2505 done = true;
2506 break;
2507 CASE_CONVERT:
2508 break;
2509 default:
2510 done = true;
2511 break;
2513 if (done)
2514 break;
2515 arg = gimple_assign_rhs1 (stmt_tmp);
2518 if (match && single_imm_use (var, &use_p, &use_stmt)
2519 && gimple_code (use_stmt) == GIMPLE_COND)
2520 return use_stmt;
2523 return NULL;
2526 /* Return true when the basic blocks contains only clobbers followed by RESX.
2527 Such BBs are kept around to make removal of dead stores possible with
2528 presence of EH and will be optimized out by optimize_clobbers later in the
2529 game.
2531 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2532 that can be clobber only, too.. When it is false, the RESX is not necessary
2533 on the end of basic block. */
2535 static bool
2536 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2538 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2539 edge_iterator ei;
2540 edge e;
2542 if (need_eh)
2544 if (gsi_end_p (gsi))
2545 return false;
2546 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2547 return false;
2548 gsi_prev (&gsi);
2550 else if (!single_succ_p (bb))
2551 return false;
2553 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2555 gimple *stmt = gsi_stmt (gsi);
2556 if (is_gimple_debug (stmt))
2557 continue;
2558 if (gimple_clobber_p (stmt))
2559 continue;
2560 if (gimple_code (stmt) == GIMPLE_LABEL)
2561 break;
2562 return false;
2565 /* See if all predecestors are either throws or clobber only BBs. */
2566 FOR_EACH_EDGE (e, ei, bb->preds)
2567 if (!(e->flags & EDGE_EH)
2568 && !clobber_only_eh_bb_p (e->src, false))
2569 return false;
2571 return true;
2574 /* Return true if STMT compute a floating point expression that may be affected
2575 by -ffast-math and similar flags. */
2577 static bool
2578 fp_expression_p (gimple *stmt)
2580 ssa_op_iter i;
2581 tree op;
2583 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2584 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2585 return true;
2586 return false;
2589 /* Compute function body size parameters for NODE.
2590 When EARLY is true, we compute only simple summaries without
2591 non-trivial predicates to drive the early inliner. */
2593 static void
2594 estimate_function_body_sizes (struct cgraph_node *node, bool early)
2596 sreal time = 0;
2597 /* Estimate static overhead for function prologue/epilogue and alignment. */
2598 int size = 2;
2599 /* Benefits are scaled by probability of elimination that is in range
2600 <0,2>. */
2601 basic_block bb;
2602 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2603 int freq;
2604 struct inline_summary *info = inline_summaries->get (node);
2605 struct predicate bb_predicate;
2606 struct ipa_func_body_info fbi;
2607 vec<predicate_t> nonconstant_names = vNULL;
2608 int nblocks, n;
2609 int *order;
2610 predicate array_index = true_predicate ();
2611 gimple *fix_builtin_expect_stmt;
2613 gcc_assert (my_function && my_function->cfg);
2614 gcc_assert (cfun == my_function);
2616 memset(&fbi, 0, sizeof(fbi));
2617 info->conds = NULL;
2618 info->entry = NULL;
2620 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2621 so we can produce proper inline hints.
2623 When optimizing and analyzing for early inliner, initialize node params
2624 so we can produce correct BB predicates. */
2626 if (opt_for_fn (node->decl, optimize))
2628 calculate_dominance_info (CDI_DOMINATORS);
2629 if (!early)
2630 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2631 else
2633 ipa_check_create_node_params ();
2634 ipa_initialize_node_params (node);
2637 if (ipa_node_params_sum)
2639 fbi.node = node;
2640 fbi.info = IPA_NODE_REF (node);
2641 fbi.bb_infos = vNULL;
2642 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2643 fbi.param_count = count_formal_params(node->decl);
2644 nonconstant_names.safe_grow_cleared
2645 (SSANAMES (my_function)->length ());
2649 if (dump_file)
2650 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2651 node->name ());
2653 /* When we run into maximal number of entries, we assign everything to the
2654 constant truth case. Be sure to have it in list. */
2655 bb_predicate = true_predicate ();
2656 account_size_time (info, 0, 0, &bb_predicate, &bb_predicate);
2658 bb_predicate = not_inlined_predicate ();
2659 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate,
2660 &bb_predicate);
2662 if (fbi.info)
2663 compute_bb_predicates (&fbi, node, info);
2664 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2665 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2666 for (n = 0; n < nblocks; n++)
2668 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2669 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2670 if (clobber_only_eh_bb_p (bb))
2672 if (dump_file && (dump_flags & TDF_DETAILS))
2673 fprintf (dump_file, "\n Ignoring BB %i;"
2674 " it will be optimized away by cleanup_clobbers\n",
2675 bb->index);
2676 continue;
2679 /* TODO: Obviously predicates can be propagated down across CFG. */
2680 if (fbi.info)
2682 if (bb->aux)
2683 bb_predicate = *(struct predicate *) bb->aux;
2684 else
2685 bb_predicate = false_predicate ();
2687 else
2688 bb_predicate = true_predicate ();
2690 if (dump_file && (dump_flags & TDF_DETAILS))
2692 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2693 dump_predicate (dump_file, info->conds, &bb_predicate);
2696 if (fbi.info && nonconstant_names.exists ())
2698 struct predicate phi_predicate;
2699 bool first_phi = true;
2701 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2702 gsi_next (&bsi))
2704 if (first_phi
2705 && !phi_result_unknown_predicate (fbi.info, info, bb,
2706 &phi_predicate,
2707 nonconstant_names))
2708 break;
2709 first_phi = false;
2710 if (dump_file && (dump_flags & TDF_DETAILS))
2712 fprintf (dump_file, " ");
2713 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0, 0);
2715 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2716 nonconstant_names);
2720 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2722 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2723 gsi_next (&bsi))
2725 gimple *stmt = gsi_stmt (bsi);
2726 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2727 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2728 int prob;
2729 struct predicate will_be_nonconstant;
2731 /* This relation stmt should be folded after we remove
2732 buildin_expect call. Adjust the cost here. */
2733 if (stmt == fix_builtin_expect_stmt)
2735 this_size--;
2736 this_time--;
2739 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, " ");
2742 print_gimple_stmt (dump_file, stmt, 0, 0);
2743 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2744 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2745 this_time);
2748 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2750 struct predicate this_array_index;
2751 this_array_index =
2752 array_index_predicate (info, nonconstant_names,
2753 gimple_assign_rhs1 (stmt));
2754 if (!false_predicate_p (&this_array_index))
2755 array_index =
2756 and_predicates (info->conds, &array_index,
2757 &this_array_index);
2759 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2761 struct predicate this_array_index;
2762 this_array_index =
2763 array_index_predicate (info, nonconstant_names,
2764 gimple_get_lhs (stmt));
2765 if (!false_predicate_p (&this_array_index))
2766 array_index =
2767 and_predicates (info->conds, &array_index,
2768 &this_array_index);
2772 if (is_gimple_call (stmt)
2773 && !gimple_call_internal_p (stmt))
2775 struct cgraph_edge *edge = node->get_edge (stmt);
2776 struct inline_edge_summary *es = inline_edge_summary (edge);
2778 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2779 resolved as constant. We however don't want to optimize
2780 out the cgraph edges. */
2781 if (nonconstant_names.exists ()
2782 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2783 && gimple_call_lhs (stmt)
2784 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2786 struct predicate false_p = false_predicate ();
2787 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2788 = false_p;
2790 if (ipa_node_params_sum)
2792 int count = gimple_call_num_args (stmt);
2793 int i;
2795 if (count)
2796 es->param.safe_grow_cleared (count);
2797 for (i = 0; i < count; i++)
2799 int prob = param_change_prob (stmt, i);
2800 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2801 es->param[i].change_prob = prob;
2805 es->call_stmt_size = this_size;
2806 es->call_stmt_time = this_time;
2807 es->loop_depth = bb_loop_depth (bb);
2808 edge_set_predicate (edge, &bb_predicate);
2811 /* TODO: When conditional jump or swithc is known to be constant, but
2812 we did not translate it into the predicates, we really can account
2813 just maximum of the possible paths. */
2814 if (fbi.info)
2815 will_be_nonconstant
2816 = will_be_nonconstant_predicate (&fbi, info,
2817 stmt, nonconstant_names);
2818 else
2819 will_be_nonconstant = true_predicate ();
2820 if (this_time || this_size)
2822 this_time *= freq;
2824 prob = eliminated_by_inlining_prob (stmt);
2825 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2826 fprintf (dump_file,
2827 "\t\t50%% will be eliminated by inlining\n");
2828 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2829 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2831 struct predicate p = and_predicates (info->conds, &bb_predicate,
2832 &will_be_nonconstant);
2834 /* We can ignore statement when we proved it is never going
2835 to happen, but we can not do that for call statements
2836 because edges are accounted specially. */
2838 if (!false_predicate_p (is_gimple_call (stmt)
2839 ? &bb_predicate : &p))
2841 time += this_time;
2842 size += this_size;
2845 /* We account everything but the calls. Calls have their own
2846 size/time info attached to cgraph edges. This is necessary
2847 in order to make the cost disappear after inlining. */
2848 if (!is_gimple_call (stmt))
2850 if (prob)
2852 struct predicate ip = not_inlined_predicate ();
2853 ip = and_predicates (info->conds, &ip, &bb_predicate);
2854 account_size_time (info, this_size * prob,
2855 (sreal)(this_time * prob)
2856 / (CGRAPH_FREQ_BASE * 2), &ip,
2857 &p);
2859 if (prob != 2)
2860 account_size_time (info, this_size * (2 - prob),
2861 (sreal)(this_time * (2 - prob))
2862 / (CGRAPH_FREQ_BASE * 2),
2863 &bb_predicate,
2864 &p);
2867 if (!info->fp_expressions && fp_expression_p (stmt))
2869 info->fp_expressions = true;
2870 if (dump_file)
2871 fprintf (dump_file, " fp_expression set\n");
2874 gcc_assert (time >= 0);
2875 gcc_assert (size >= 0);
2879 set_hint_predicate (&inline_summaries->get (node)->array_index, array_index);
2880 time = time / CGRAPH_FREQ_BASE;
2881 free (order);
2883 if (nonconstant_names.exists () && !early)
2885 struct loop *loop;
2886 predicate loop_iterations = true_predicate ();
2887 predicate loop_stride = true_predicate ();
2889 if (dump_file && (dump_flags & TDF_DETAILS))
2890 flow_loops_dump (dump_file, NULL, 0);
2891 scev_initialize ();
2892 FOR_EACH_LOOP (loop, 0)
2894 vec<edge> exits;
2895 edge ex;
2896 unsigned int j;
2897 struct tree_niter_desc niter_desc;
2898 bb_predicate = *(struct predicate *) loop->header->aux;
2900 exits = get_loop_exit_edges (loop);
2901 FOR_EACH_VEC_ELT (exits, j, ex)
2902 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2903 && !is_gimple_min_invariant (niter_desc.niter))
2905 predicate will_be_nonconstant
2906 = will_be_nonconstant_expr_predicate (fbi.info, info,
2907 niter_desc.niter,
2908 nonconstant_names);
2909 if (!true_predicate_p (&will_be_nonconstant))
2910 will_be_nonconstant = and_predicates (info->conds,
2911 &bb_predicate,
2912 &will_be_nonconstant);
2913 if (!true_predicate_p (&will_be_nonconstant)
2914 && !false_predicate_p (&will_be_nonconstant))
2915 /* This is slightly inprecise. We may want to represent each
2916 loop with independent predicate. */
2917 loop_iterations =
2918 and_predicates (info->conds, &loop_iterations,
2919 &will_be_nonconstant);
2921 exits.release ();
2924 /* To avoid quadratic behavior we analyze stride predicates only
2925 with respect to the containing loop. Thus we simply iterate
2926 over all defs in the outermost loop body. */
2927 for (loop = loops_for_fn (cfun)->tree_root->inner;
2928 loop != NULL; loop = loop->next)
2930 basic_block *body = get_loop_body (loop);
2931 for (unsigned i = 0; i < loop->num_nodes; i++)
2933 gimple_stmt_iterator gsi;
2934 bb_predicate = *(struct predicate *) body[i]->aux;
2935 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2936 gsi_next (&gsi))
2938 gimple *stmt = gsi_stmt (gsi);
2940 if (!is_gimple_assign (stmt))
2941 continue;
2943 tree def = gimple_assign_lhs (stmt);
2944 if (TREE_CODE (def) != SSA_NAME)
2945 continue;
2947 affine_iv iv;
2948 if (!simple_iv (loop_containing_stmt (stmt),
2949 loop_containing_stmt (stmt),
2950 def, &iv, true)
2951 || is_gimple_min_invariant (iv.step))
2952 continue;
2954 predicate will_be_nonconstant
2955 = will_be_nonconstant_expr_predicate (fbi.info, info,
2956 iv.step,
2957 nonconstant_names);
2958 if (!true_predicate_p (&will_be_nonconstant))
2959 will_be_nonconstant
2960 = and_predicates (info->conds, &bb_predicate,
2961 &will_be_nonconstant);
2962 if (!true_predicate_p (&will_be_nonconstant)
2963 && !false_predicate_p (&will_be_nonconstant))
2964 /* This is slightly inprecise. We may want to represent
2965 each loop with independent predicate. */
2966 loop_stride = and_predicates (info->conds, &loop_stride,
2967 &will_be_nonconstant);
2970 free (body);
2972 set_hint_predicate (&inline_summaries->get (node)->loop_iterations,
2973 loop_iterations);
2974 set_hint_predicate (&inline_summaries->get (node)->loop_stride,
2975 loop_stride);
2976 scev_finalize ();
2978 FOR_ALL_BB_FN (bb, my_function)
2980 edge e;
2981 edge_iterator ei;
2983 if (bb->aux)
2984 edge_predicate_pool.remove ((predicate *)bb->aux);
2985 bb->aux = NULL;
2986 FOR_EACH_EDGE (e, ei, bb->succs)
2988 if (e->aux)
2989 edge_predicate_pool.remove ((predicate *) e->aux);
2990 e->aux = NULL;
2993 inline_summaries->get (node)->self_time = time;
2994 inline_summaries->get (node)->self_size = size;
2995 nonconstant_names.release ();
2996 ipa_release_body_info (&fbi);
2997 if (opt_for_fn (node->decl, optimize))
2999 if (!early)
3000 loop_optimizer_finalize ();
3001 else if (!ipa_edge_args_vector)
3002 ipa_free_all_node_params ();
3003 free_dominance_info (CDI_DOMINATORS);
3005 if (dump_file)
3007 fprintf (dump_file, "\n");
3008 dump_inline_summary (dump_file, node);
3013 /* Compute parameters of functions used by inliner.
3014 EARLY is true when we compute parameters for the early inliner */
3016 void
3017 compute_inline_parameters (struct cgraph_node *node, bool early)
3019 HOST_WIDE_INT self_stack_size;
3020 struct cgraph_edge *e;
3021 struct inline_summary *info;
3023 gcc_assert (!node->global.inlined_to);
3025 inline_summary_alloc ();
3027 info = inline_summaries->get (node);
3028 reset_inline_summary (node, info);
3030 /* Estimate the stack size for the function if we're optimizing. */
3031 self_stack_size = optimize && !node->thunk.thunk_p
3032 ? estimated_stack_frame_size (node) : 0;
3033 info->estimated_self_stack_size = self_stack_size;
3034 info->estimated_stack_size = self_stack_size;
3035 info->stack_frame_offset = 0;
3037 if (node->thunk.thunk_p)
3039 struct inline_edge_summary *es = inline_edge_summary (node->callees);
3040 struct predicate t = true_predicate ();
3042 node->local.can_change_signature = false;
3043 es->call_stmt_size = eni_size_weights.call_cost;
3044 es->call_stmt_time = eni_time_weights.call_cost;
3045 account_size_time (info, INLINE_SIZE_SCALE * 2,
3046 2, &t, &t);
3047 t = not_inlined_predicate ();
3048 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t, &t);
3049 inline_update_overall_summary (node);
3050 info->self_size = info->size;
3051 info->self_time = info->time;
3052 /* We can not inline instrumentation clones. */
3053 if (node->thunk.add_pointer_bounds_args)
3055 info->inlinable = false;
3056 node->callees->inline_failed = CIF_CHKP;
3058 else
3059 info->inlinable = true;
3061 else
3063 /* Even is_gimple_min_invariant rely on current_function_decl. */
3064 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3066 /* Can this function be inlined at all? */
3067 if (!opt_for_fn (node->decl, optimize)
3068 && !lookup_attribute ("always_inline",
3069 DECL_ATTRIBUTES (node->decl)))
3070 info->inlinable = false;
3071 else
3072 info->inlinable = tree_inlinable_function_p (node->decl);
3074 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
3076 /* Type attributes can use parameter indices to describe them. */
3077 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
3078 node->local.can_change_signature = false;
3079 else
3081 /* Otherwise, inlinable functions always can change signature. */
3082 if (info->inlinable)
3083 node->local.can_change_signature = true;
3084 else
3086 /* Functions calling builtin_apply can not change signature. */
3087 for (e = node->callees; e; e = e->next_callee)
3089 tree cdecl = e->callee->decl;
3090 if (DECL_BUILT_IN (cdecl)
3091 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
3092 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
3093 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
3094 break;
3096 node->local.can_change_signature = !e;
3099 /* Functions called by instrumentation thunk can't change signature
3100 because instrumentation thunk modification is not supported. */
3101 if (node->local.can_change_signature)
3102 for (e = node->callers; e; e = e->next_caller)
3103 if (e->caller->thunk.thunk_p
3104 && e->caller->thunk.add_pointer_bounds_args)
3106 node->local.can_change_signature = false;
3107 break;
3109 estimate_function_body_sizes (node, early);
3110 pop_cfun ();
3112 for (e = node->callees; e; e = e->next_callee)
3113 if (e->callee->comdat_local_p ())
3114 break;
3115 node->calls_comdat_local = (e != NULL);
3117 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3118 info->time = info->self_time;
3119 info->size = info->self_size;
3120 info->stack_frame_offset = 0;
3121 info->estimated_stack_size = info->estimated_self_stack_size;
3122 if (flag_checking)
3124 inline_update_overall_summary (node);
3125 gcc_assert (!(info->time - info->self_time).to_int ()
3126 && info->size == info->self_size);
3131 /* Compute parameters of functions used by inliner using
3132 current_function_decl. */
3134 static unsigned int
3135 compute_inline_parameters_for_current (void)
3137 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
3138 return 0;
3141 namespace {
3143 const pass_data pass_data_inline_parameters =
3145 GIMPLE_PASS, /* type */
3146 "inline_param", /* name */
3147 OPTGROUP_INLINE, /* optinfo_flags */
3148 TV_INLINE_PARAMETERS, /* tv_id */
3149 0, /* properties_required */
3150 0, /* properties_provided */
3151 0, /* properties_destroyed */
3152 0, /* todo_flags_start */
3153 0, /* todo_flags_finish */
3156 class pass_inline_parameters : public gimple_opt_pass
3158 public:
3159 pass_inline_parameters (gcc::context *ctxt)
3160 : gimple_opt_pass (pass_data_inline_parameters, ctxt)
3163 /* opt_pass methods: */
3164 opt_pass * clone () { return new pass_inline_parameters (m_ctxt); }
3165 virtual unsigned int execute (function *)
3167 return compute_inline_parameters_for_current ();
3170 }; // class pass_inline_parameters
3172 } // anon namespace
3174 gimple_opt_pass *
3175 make_pass_inline_parameters (gcc::context *ctxt)
3177 return new pass_inline_parameters (ctxt);
3181 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
3182 KNOWN_CONTEXTS and KNOWN_AGGS. */
3184 static bool
3185 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3186 int *size, int *time,
3187 vec<tree> known_vals,
3188 vec<ipa_polymorphic_call_context> known_contexts,
3189 vec<ipa_agg_jump_function_p> known_aggs)
3191 tree target;
3192 struct cgraph_node *callee;
3193 struct inline_summary *isummary;
3194 enum availability avail;
3195 bool speculative;
3197 if (!known_vals.exists () && !known_contexts.exists ())
3198 return false;
3199 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3200 return false;
3202 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
3203 known_aggs, &speculative);
3204 if (!target || speculative)
3205 return false;
3207 /* Account for difference in cost between indirect and direct calls. */
3208 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3209 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3210 gcc_checking_assert (*time >= 0);
3211 gcc_checking_assert (*size >= 0);
3213 callee = cgraph_node::get (target);
3214 if (!callee || !callee->definition)
3215 return false;
3216 callee = callee->function_symbol (&avail);
3217 if (avail < AVAIL_AVAILABLE)
3218 return false;
3219 isummary = inline_summaries->get (callee);
3220 return isummary->inlinable;
3223 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3224 handle edge E with probability PROB.
3225 Set HINTS if edge may be devirtualized.
3226 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
3227 site. */
3229 static inline void
3230 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3231 sreal *time,
3232 int prob,
3233 vec<tree> known_vals,
3234 vec<ipa_polymorphic_call_context> known_contexts,
3235 vec<ipa_agg_jump_function_p> known_aggs,
3236 inline_hints *hints)
3238 struct inline_edge_summary *es = inline_edge_summary (e);
3239 int call_size = es->call_stmt_size;
3240 int call_time = es->call_stmt_time;
3241 int cur_size;
3242 if (!e->callee
3243 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
3244 known_vals, known_contexts, known_aggs)
3245 && hints && e->maybe_hot_p ())
3246 *hints |= INLINE_HINT_indirect_call;
3247 cur_size = call_size * INLINE_SIZE_SCALE;
3248 *size += cur_size;
3249 if (min_size)
3250 *min_size += cur_size;
3251 if (prob == REG_BR_PROB_BASE)
3252 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
3253 else
3254 *time += ((sreal)call_time) * (prob * e->frequency)
3255 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
3260 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3261 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3262 describe context of the call site. */
3264 static void
3265 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3266 int *min_size, sreal *time,
3267 inline_hints *hints,
3268 clause_t possible_truths,
3269 vec<tree> known_vals,
3270 vec<ipa_polymorphic_call_context> known_contexts,
3271 vec<ipa_agg_jump_function_p> known_aggs)
3273 struct cgraph_edge *e;
3274 for (e = node->callees; e; e = e->next_callee)
3276 if (inline_edge_summary_vec.length () <= (unsigned) e->uid)
3277 continue;
3279 struct inline_edge_summary *es = inline_edge_summary (e);
3281 /* Do not care about zero sized builtins. */
3282 if (e->inline_failed && !es->call_stmt_size)
3284 gcc_checking_assert (!es->call_stmt_time);
3285 continue;
3287 if (!es->predicate
3288 || evaluate_predicate (es->predicate, possible_truths))
3290 if (e->inline_failed)
3292 /* Predicates of calls shall not use NOT_CHANGED codes,
3293 sowe do not need to compute probabilities. */
3294 estimate_edge_size_and_time (e, size,
3295 es->predicate ? NULL : min_size,
3296 time, REG_BR_PROB_BASE,
3297 known_vals, known_contexts,
3298 known_aggs, hints);
3300 else
3301 estimate_calls_size_and_time (e->callee, size, min_size, time,
3302 hints,
3303 possible_truths,
3304 known_vals, known_contexts,
3305 known_aggs);
3308 for (e = node->indirect_calls; e; e = e->next_callee)
3310 if (inline_edge_summary_vec.length () <= (unsigned) e->uid)
3311 continue;
3313 struct inline_edge_summary *es = inline_edge_summary (e);
3314 if (!es->predicate
3315 || evaluate_predicate (es->predicate, possible_truths))
3316 estimate_edge_size_and_time (e, size,
3317 es->predicate ? NULL : min_size,
3318 time, REG_BR_PROB_BASE,
3319 known_vals, known_contexts, known_aggs,
3320 hints);
3325 /* Estimate size and time needed to execute NODE assuming
3326 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3327 information about NODE's arguments. If non-NULL use also probability
3328 information present in INLINE_PARAM_SUMMARY vector.
3329 Additionally detemine hints determined by the context. Finally compute
3330 minimal size needed for the call that is independent on the call context and
3331 can be used for fast estimates. Return the values in RET_SIZE,
3332 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3334 static void
3335 estimate_node_size_and_time (struct cgraph_node *node,
3336 clause_t possible_truths,
3337 clause_t nonspec_possible_truths,
3338 vec<tree> known_vals,
3339 vec<ipa_polymorphic_call_context> known_contexts,
3340 vec<ipa_agg_jump_function_p> known_aggs,
3341 int *ret_size, int *ret_min_size,
3342 sreal *ret_time,
3343 sreal *ret_nonspecialized_time,
3344 inline_hints *ret_hints,
3345 vec<inline_param_summary>
3346 inline_param_summary)
3348 struct inline_summary *info = inline_summaries->get (node);
3349 size_time_entry *e;
3350 int size = 0;
3351 sreal time = 0;
3352 int min_size = 0;
3353 inline_hints hints = 0;
3354 int i;
3356 if (dump_file && (dump_flags & TDF_DETAILS))
3358 bool found = false;
3359 fprintf (dump_file, " Estimating body: %s/%i\n"
3360 " Known to be false: ", node->name (),
3361 node->order);
3363 for (i = predicate_not_inlined_condition;
3364 i < (predicate_first_dynamic_condition
3365 + (int) vec_safe_length (info->conds)); i++)
3366 if (!(possible_truths & (1 << i)))
3368 if (found)
3369 fprintf (dump_file, ", ");
3370 found = true;
3371 dump_condition (dump_file, info->conds, i);
3375 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
3376 known_vals, known_contexts, known_aggs);
3377 sreal nonspecialized_time = time;
3379 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3381 bool nonconst = evaluate_predicate (&e->nonconst_predicate,
3382 possible_truths);
3383 bool exec = evaluate_predicate (&e->exec_predicate,
3384 nonspec_possible_truths);
3385 gcc_assert (!nonconst || exec);
3386 if (exec)
3388 gcc_checking_assert (e->time >= 0);
3389 gcc_checking_assert (time >= 0);
3391 /* We compute specialized size only because size of nonspecialized
3392 copy is context independent.
3394 The difference between nonspecialized execution and specialized is
3395 that nonspecialized is not going to have optimized out computations
3396 known to be constant in a specialized setting. */
3397 if (nonconst)
3398 size += e->size;
3399 nonspecialized_time += e->time;
3400 if (!nonconst)
3402 else if (!inline_param_summary.exists ())
3404 if (nonconst)
3405 time += e->time;
3407 else
3409 int prob = predicate_probability (info->conds,
3410 &e->nonconst_predicate,
3411 possible_truths,
3412 inline_param_summary);
3413 gcc_checking_assert (prob >= 0);
3414 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3415 time += e->time * prob / REG_BR_PROB_BASE;
3417 gcc_checking_assert (time >= 0);
3420 gcc_checking_assert (true_predicate_p (&(*info->entry)[0].exec_predicate));
3421 gcc_checking_assert (true_predicate_p (&(*info->entry)[0].nonconst_predicate));
3422 min_size = (*info->entry)[0].size;
3423 gcc_checking_assert (size >= 0);
3424 gcc_checking_assert (time >= 0);
3425 /* nonspecialized_time should be always bigger than specialized time.
3426 Roundoff issues however may get into the way. */
3427 gcc_checking_assert ((nonspecialized_time - time) >= -1);
3429 /* Roundoff issues may make specialized time bigger than nonspecialized
3430 time. We do not really want that to happen because some heurstics
3431 may get confused by seeing negative speedups. */
3432 if (time > nonspecialized_time)
3433 time = nonspecialized_time;
3435 if (info->loop_iterations
3436 && !evaluate_predicate (info->loop_iterations, possible_truths))
3437 hints |= INLINE_HINT_loop_iterations;
3438 if (info->loop_stride
3439 && !evaluate_predicate (info->loop_stride, possible_truths))
3440 hints |= INLINE_HINT_loop_stride;
3441 if (info->array_index
3442 && !evaluate_predicate (info->array_index, possible_truths))
3443 hints |= INLINE_HINT_array_index;
3444 if (info->scc_no)
3445 hints |= INLINE_HINT_in_scc;
3446 if (DECL_DECLARED_INLINE_P (node->decl))
3447 hints |= INLINE_HINT_declared_inline;
3449 size = RDIV (size, INLINE_SIZE_SCALE);
3450 min_size = RDIV (min_size, INLINE_SIZE_SCALE);
3452 if (dump_file && (dump_flags & TDF_DETAILS))
3453 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3454 time.to_double (), nonspecialized_time.to_double ());
3455 if (ret_time)
3456 *ret_time = time;
3457 if (ret_nonspecialized_time)
3458 *ret_nonspecialized_time = nonspecialized_time;
3459 if (ret_size)
3460 *ret_size = size;
3461 if (ret_min_size)
3462 *ret_min_size = min_size;
3463 if (ret_hints)
3464 *ret_hints = hints;
3465 return;
3469 /* Estimate size and time needed to execute callee of EDGE assuming that
3470 parameters known to be constant at caller of EDGE are propagated.
3471 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3472 and types for parameters. */
3474 void
3475 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3476 vec<tree> known_vals,
3477 vec<ipa_polymorphic_call_context>
3478 known_contexts,
3479 vec<ipa_agg_jump_function_p> known_aggs,
3480 int *ret_size, sreal *ret_time,
3481 inline_hints *hints)
3483 clause_t clause, nonspec_clause;
3484 sreal nonspec_time;
3486 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
3487 &clause, &nonspec_clause);
3488 estimate_node_size_and_time (node, clause, nonspec_clause,
3489 known_vals, known_contexts,
3490 known_aggs, ret_size, NULL, ret_time,
3491 &nonspec_time, hints, vNULL);
3494 /* Translate all conditions from callee representation into caller
3495 representation and symbolically evaluate predicate P into new predicate.
3497 INFO is inline_summary of function we are adding predicate into, CALLEE_INFO
3498 is summary of function predicate P is from. OPERAND_MAP is array giving
3499 callee formal IDs the caller formal IDs. POSSSIBLE_TRUTHS is clausule of all
3500 callee conditions that may be true in caller context. TOPLEV_PREDICATE is
3501 predicate under which callee is executed. OFFSET_MAP is an array of of
3502 offsets that need to be added to conditions, negative offset means that
3503 conditions relying on values passed by reference have to be discarded
3504 because they might not be preserved (and should be considered offset zero
3505 for other purposes). */
3507 static struct predicate
3508 remap_predicate (struct inline_summary *info,
3509 struct inline_summary *callee_info,
3510 struct predicate *p,
3511 vec<int> operand_map,
3512 vec<int> offset_map,
3513 clause_t possible_truths, struct predicate *toplev_predicate)
3515 int i;
3516 struct predicate out = true_predicate ();
3518 /* True predicate is easy. */
3519 if (true_predicate_p (p))
3520 return *toplev_predicate;
3521 for (i = 0; p->clause[i]; i++)
3523 clause_t clause = p->clause[i];
3524 int cond;
3525 struct predicate clause_predicate = false_predicate ();
3527 gcc_assert (i < MAX_CLAUSES);
3529 for (cond = 0; cond < NUM_CONDITIONS; cond++)
3530 /* Do we have condition we can't disprove? */
3531 if (clause & possible_truths & (1 << cond))
3533 struct predicate cond_predicate;
3534 /* Work out if the condition can translate to predicate in the
3535 inlined function. */
3536 if (cond >= predicate_first_dynamic_condition)
3538 struct condition *c;
3540 c = &(*callee_info->conds)[cond
3542 predicate_first_dynamic_condition];
3543 /* See if we can remap condition operand to caller's operand.
3544 Otherwise give up. */
3545 if (!operand_map.exists ()
3546 || (int) operand_map.length () <= c->operand_num
3547 || operand_map[c->operand_num] == -1
3548 /* TODO: For non-aggregate conditions, adding an offset is
3549 basically an arithmetic jump function processing which
3550 we should support in future. */
3551 || ((!c->agg_contents || !c->by_ref)
3552 && offset_map[c->operand_num] > 0)
3553 || (c->agg_contents && c->by_ref
3554 && offset_map[c->operand_num] < 0))
3555 cond_predicate = true_predicate ();
3556 else
3558 struct agg_position_info ap;
3559 HOST_WIDE_INT offset_delta = offset_map[c->operand_num];
3560 if (offset_delta < 0)
3562 gcc_checking_assert (!c->agg_contents || !c->by_ref);
3563 offset_delta = 0;
3565 gcc_assert (!c->agg_contents
3566 || c->by_ref || offset_delta == 0);
3567 ap.offset = c->offset + offset_delta;
3568 ap.agg_contents = c->agg_contents;
3569 ap.by_ref = c->by_ref;
3570 cond_predicate = add_condition (info,
3571 operand_map[c->operand_num],
3572 c->size, &ap, c->code,
3573 c->val);
3576 /* Fixed conditions remains same, construct single
3577 condition predicate. */
3578 else
3580 cond_predicate.clause[0] = 1 << cond;
3581 cond_predicate.clause[1] = 0;
3583 clause_predicate = or_predicates (info->conds, &clause_predicate,
3584 &cond_predicate);
3586 out = and_predicates (info->conds, &out, &clause_predicate);
3588 return and_predicates (info->conds, &out, toplev_predicate);
3592 /* Update summary information of inline clones after inlining.
3593 Compute peak stack usage. */
3595 static void
3596 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3598 struct cgraph_edge *e;
3599 struct inline_summary *callee_info = inline_summaries->get (node);
3600 struct inline_summary *caller_info = inline_summaries->get (node->callers->caller);
3601 HOST_WIDE_INT peak;
3603 callee_info->stack_frame_offset
3604 = caller_info->stack_frame_offset
3605 + caller_info->estimated_self_stack_size;
3606 peak = callee_info->stack_frame_offset
3607 + callee_info->estimated_self_stack_size;
3608 if (inline_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
3609 inline_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
3610 ipa_propagate_frequency (node);
3611 for (e = node->callees; e; e = e->next_callee)
3613 if (!e->inline_failed)
3614 inline_update_callee_summaries (e->callee, depth);
3615 inline_edge_summary (e)->loop_depth += depth;
3617 for (e = node->indirect_calls; e; e = e->next_callee)
3618 inline_edge_summary (e)->loop_depth += depth;
3621 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3622 When functoin A is inlined in B and A calls C with parameter that
3623 changes with probability PROB1 and C is known to be passthroug
3624 of argument if B that change with probability PROB2, the probability
3625 of change is now PROB1*PROB2. */
3627 static void
3628 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3629 struct cgraph_edge *edge)
3631 if (ipa_node_params_sum)
3633 int i;
3634 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3635 struct inline_edge_summary *es = inline_edge_summary (edge);
3636 struct inline_edge_summary *inlined_es
3637 = inline_edge_summary (inlined_edge);
3639 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3641 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3642 if (jfunc->type == IPA_JF_PASS_THROUGH
3643 || jfunc->type == IPA_JF_ANCESTOR)
3645 int id = jfunc->type == IPA_JF_PASS_THROUGH
3646 ? ipa_get_jf_pass_through_formal_id (jfunc)
3647 : ipa_get_jf_ancestor_formal_id (jfunc);
3648 if (id < (int) inlined_es->param.length ())
3650 int prob1 = es->param[i].change_prob;
3651 int prob2 = inlined_es->param[id].change_prob;
3652 int prob = combine_probabilities (prob1, prob2);
3654 if (prob1 && prob2 && !prob)
3655 prob = 1;
3657 es->param[i].change_prob = prob;
3664 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3666 Remap predicates of callees of NODE. Rest of arguments match
3667 remap_predicate.
3669 Also update change probabilities. */
3671 static void
3672 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3673 struct cgraph_node *node,
3674 struct inline_summary *info,
3675 struct inline_summary *callee_info,
3676 vec<int> operand_map,
3677 vec<int> offset_map,
3678 clause_t possible_truths,
3679 struct predicate *toplev_predicate)
3681 struct cgraph_edge *e, *next;
3682 for (e = node->callees; e; e = next)
3684 struct inline_edge_summary *es = inline_edge_summary (e);
3685 struct predicate p;
3686 next = e->next_callee;
3688 if (e->inline_failed)
3690 remap_edge_change_prob (inlined_edge, e);
3692 if (es->predicate)
3694 p = remap_predicate (info, callee_info,
3695 es->predicate, operand_map, offset_map,
3696 possible_truths, toplev_predicate);
3697 edge_set_predicate (e, &p);
3699 else
3700 edge_set_predicate (e, toplev_predicate);
3702 else
3703 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
3704 operand_map, offset_map, possible_truths,
3705 toplev_predicate);
3707 for (e = node->indirect_calls; e; e = next)
3709 struct inline_edge_summary *es = inline_edge_summary (e);
3710 struct predicate p;
3711 next = e->next_callee;
3713 remap_edge_change_prob (inlined_edge, e);
3714 if (es->predicate)
3716 p = remap_predicate (info, callee_info,
3717 es->predicate, operand_map, offset_map,
3718 possible_truths, toplev_predicate);
3719 edge_set_predicate (e, &p);
3721 else
3722 edge_set_predicate (e, toplev_predicate);
3726 /* Same as remap_predicate, but set result into hint *HINT. */
3728 static void
3729 remap_hint_predicate (struct inline_summary *info,
3730 struct inline_summary *callee_info,
3731 struct predicate **hint,
3732 vec<int> operand_map,
3733 vec<int> offset_map,
3734 clause_t possible_truths,
3735 struct predicate *toplev_predicate)
3737 predicate p;
3739 if (!*hint)
3740 return;
3741 p = remap_predicate (info, callee_info,
3742 *hint,
3743 operand_map, offset_map,
3744 possible_truths, toplev_predicate);
3745 if (!false_predicate_p (&p) && !true_predicate_p (&p))
3747 if (!*hint)
3748 set_hint_predicate (hint, p);
3749 else
3750 **hint = and_predicates (info->conds, *hint, &p);
3754 /* We inlined EDGE. Update summary of the function we inlined into. */
3756 void
3757 inline_merge_summary (struct cgraph_edge *edge)
3759 struct inline_summary *callee_info = inline_summaries->get (edge->callee);
3760 struct cgraph_node *to = (edge->caller->global.inlined_to
3761 ? edge->caller->global.inlined_to : edge->caller);
3762 struct inline_summary *info = inline_summaries->get (to);
3763 clause_t clause = 0; /* not_inline is known to be false. */
3764 size_time_entry *e;
3765 vec<int> operand_map = vNULL;
3766 vec<int> offset_map = vNULL;
3767 int i;
3768 struct predicate toplev_predicate;
3769 struct predicate true_p = true_predicate ();
3770 struct inline_edge_summary *es = inline_edge_summary (edge);
3772 if (es->predicate)
3773 toplev_predicate = *es->predicate;
3774 else
3775 toplev_predicate = true_predicate ();
3777 info->fp_expressions |= callee_info->fp_expressions;
3779 if (callee_info->conds)
3780 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3781 if (ipa_node_params_sum && callee_info->conds)
3783 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3784 int count = ipa_get_cs_argument_count (args);
3785 int i;
3787 if (count)
3789 operand_map.safe_grow_cleared (count);
3790 offset_map.safe_grow_cleared (count);
3792 for (i = 0; i < count; i++)
3794 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3795 int map = -1;
3797 /* TODO: handle non-NOPs when merging. */
3798 if (jfunc->type == IPA_JF_PASS_THROUGH)
3800 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3801 map = ipa_get_jf_pass_through_formal_id (jfunc);
3802 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3803 offset_map[i] = -1;
3805 else if (jfunc->type == IPA_JF_ANCESTOR)
3807 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3808 if (offset >= 0 && offset < INT_MAX)
3810 map = ipa_get_jf_ancestor_formal_id (jfunc);
3811 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3812 offset = -1;
3813 offset_map[i] = offset;
3816 operand_map[i] = map;
3817 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3820 for (i = 0; vec_safe_iterate (callee_info->entry, i, &e); i++)
3822 struct predicate p = remap_predicate (info, callee_info,
3823 &e->exec_predicate, operand_map,
3824 offset_map, clause,
3825 &toplev_predicate);
3826 struct predicate nonconstp
3827 = remap_predicate (info, callee_info,
3828 &e->nonconst_predicate, operand_map,
3829 offset_map, clause,
3830 &toplev_predicate);
3831 if (!false_predicate_p (&p) && !false_predicate_p (&nonconstp))
3833 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3834 int prob = predicate_probability (callee_info->conds,
3835 &e->nonconst_predicate,
3836 clause, es->param);
3837 add_time = add_time * prob / REG_BR_PROB_BASE;
3838 if (prob != REG_BR_PROB_BASE
3839 && dump_file && (dump_flags & TDF_DETAILS))
3841 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3842 (double) prob / REG_BR_PROB_BASE);
3844 account_size_time (info, e->size, add_time, &p, &nonconstp);
3847 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3848 offset_map, clause, &toplev_predicate);
3849 remap_hint_predicate (info, callee_info,
3850 &callee_info->loop_iterations,
3851 operand_map, offset_map, clause, &toplev_predicate);
3852 remap_hint_predicate (info, callee_info,
3853 &callee_info->loop_stride,
3854 operand_map, offset_map, clause, &toplev_predicate);
3855 remap_hint_predicate (info, callee_info,
3856 &callee_info->array_index,
3857 operand_map, offset_map, clause, &toplev_predicate);
3859 inline_update_callee_summaries (edge->callee,
3860 inline_edge_summary (edge)->loop_depth);
3862 /* We do not maintain predicates of inlined edges, free it. */
3863 edge_set_predicate (edge, &true_p);
3864 /* Similarly remove param summaries. */
3865 es->param.release ();
3866 operand_map.release ();
3867 offset_map.release ();
3870 /* For performance reasons inline_merge_summary is not updating overall size
3871 and time. Recompute it. */
3873 void
3874 inline_update_overall_summary (struct cgraph_node *node)
3876 struct inline_summary *info = inline_summaries->get (node);
3877 size_time_entry *e;
3878 int i;
3880 info->size = 0;
3881 info->time = 0;
3882 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3884 info->size += e->size;
3885 info->time += e->time;
3887 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3888 &info->time, NULL,
3889 ~(clause_t) (1 << predicate_false_condition),
3890 vNULL, vNULL, vNULL);
3891 info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
3894 /* Return hints derrived from EDGE. */
3896 simple_edge_hints (struct cgraph_edge *edge)
3898 int hints = 0;
3899 struct cgraph_node *to = (edge->caller->global.inlined_to
3900 ? edge->caller->global.inlined_to : edge->caller);
3901 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
3902 if (inline_summaries->get (to)->scc_no
3903 && inline_summaries->get (to)->scc_no
3904 == inline_summaries->get (callee)->scc_no
3905 && !edge->recursive_p ())
3906 hints |= INLINE_HINT_same_scc;
3908 if (callee->lto_file_data && edge->caller->lto_file_data
3909 && edge->caller->lto_file_data != callee->lto_file_data
3910 && !callee->merged_comdat && !callee->icf_merged)
3911 hints |= INLINE_HINT_cross_module;
3913 return hints;
3916 /* Estimate the time cost for the caller when inlining EDGE.
3917 Only to be called via estimate_edge_time, that handles the
3918 caching mechanism.
3920 When caching, also update the cache entry. Compute both time and
3921 size, since we always need both metrics eventually. */
3923 sreal
3924 do_estimate_edge_time (struct cgraph_edge *edge)
3926 sreal time, nonspec_time;
3927 int size;
3928 inline_hints hints;
3929 struct cgraph_node *callee;
3930 clause_t clause, nonspec_clause;
3931 vec<tree> known_vals;
3932 vec<ipa_polymorphic_call_context> known_contexts;
3933 vec<ipa_agg_jump_function_p> known_aggs;
3934 struct inline_edge_summary *es = inline_edge_summary (edge);
3935 int min_size;
3937 callee = edge->callee->ultimate_alias_target ();
3939 gcc_checking_assert (edge->inline_failed);
3940 evaluate_properties_for_edge (edge, true,
3941 &clause, &nonspec_clause, &known_vals,
3942 &known_contexts, &known_aggs);
3943 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3944 known_contexts, known_aggs, &size, &min_size,
3945 &time, &nonspec_time, &hints, es->param);
3947 /* When we have profile feedback, we can quite safely identify hot
3948 edges and for those we disable size limits. Don't do that when
3949 probability that caller will call the callee is low however, since it
3950 may hurt optimization of the caller's hot path. */
3951 if (edge->count && edge->maybe_hot_p ()
3952 && (edge->count * 2
3953 > (edge->caller->global.inlined_to
3954 ? edge->caller->global.inlined_to->count : edge->caller->count)))
3955 hints |= INLINE_HINT_known_hot;
3957 known_vals.release ();
3958 known_contexts.release ();
3959 known_aggs.release ();
3960 gcc_checking_assert (size >= 0);
3961 gcc_checking_assert (time >= 0);
3963 /* When caching, update the cache entry. */
3964 if (edge_growth_cache.exists ())
3966 inline_summaries->get (edge->callee)->min_size = min_size;
3967 if ((int) edge_growth_cache.length () <= edge->uid)
3968 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
3969 edge_growth_cache[edge->uid].time = time;
3970 edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
3972 edge_growth_cache[edge->uid].size = size + (size >= 0);
3973 hints |= simple_edge_hints (edge);
3974 edge_growth_cache[edge->uid].hints = hints + 1;
3976 return time;
3980 /* Return estimated callee growth after inlining EDGE.
3981 Only to be called via estimate_edge_size. */
3984 do_estimate_edge_size (struct cgraph_edge *edge)
3986 int size;
3987 struct cgraph_node *callee;
3988 clause_t clause, nonspec_clause;
3989 vec<tree> known_vals;
3990 vec<ipa_polymorphic_call_context> known_contexts;
3991 vec<ipa_agg_jump_function_p> known_aggs;
3993 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3995 if (edge_growth_cache.exists ())
3997 do_estimate_edge_time (edge);
3998 size = edge_growth_cache[edge->uid].size;
3999 gcc_checking_assert (size);
4000 return size - (size > 0);
4003 callee = edge->callee->ultimate_alias_target ();
4005 /* Early inliner runs without caching, go ahead and do the dirty work. */
4006 gcc_checking_assert (edge->inline_failed);
4007 evaluate_properties_for_edge (edge, true,
4008 &clause, &nonspec_clause,
4009 &known_vals, &known_contexts,
4010 &known_aggs);
4011 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
4012 known_contexts, known_aggs, &size, NULL, NULL,
4013 NULL, NULL, vNULL);
4014 known_vals.release ();
4015 known_contexts.release ();
4016 known_aggs.release ();
4017 return size;
4021 /* Estimate the growth of the caller when inlining EDGE.
4022 Only to be called via estimate_edge_size. */
4024 inline_hints
4025 do_estimate_edge_hints (struct cgraph_edge *edge)
4027 inline_hints hints;
4028 struct cgraph_node *callee;
4029 clause_t clause, nonspec_clause;
4030 vec<tree> known_vals;
4031 vec<ipa_polymorphic_call_context> known_contexts;
4032 vec<ipa_agg_jump_function_p> known_aggs;
4034 /* When we do caching, use do_estimate_edge_time to populate the entry. */
4036 if (edge_growth_cache.exists ())
4038 do_estimate_edge_time (edge);
4039 hints = edge_growth_cache[edge->uid].hints;
4040 gcc_checking_assert (hints);
4041 return hints - 1;
4044 callee = edge->callee->ultimate_alias_target ();
4046 /* Early inliner runs without caching, go ahead and do the dirty work. */
4047 gcc_checking_assert (edge->inline_failed);
4048 evaluate_properties_for_edge (edge, true,
4049 &clause, &nonspec_clause,
4050 &known_vals, &known_contexts,
4051 &known_aggs);
4052 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
4053 known_contexts, known_aggs, NULL, NULL,
4054 NULL, NULL, &hints, vNULL);
4055 known_vals.release ();
4056 known_contexts.release ();
4057 known_aggs.release ();
4058 hints |= simple_edge_hints (edge);
4059 return hints;
4062 /* Estimate the size of NODE after inlining EDGE which should be an
4063 edge to either NODE or a call inlined into NODE. */
4066 estimate_size_after_inlining (struct cgraph_node *node,
4067 struct cgraph_edge *edge)
4069 struct inline_edge_summary *es = inline_edge_summary (edge);
4070 if (!es->predicate || !false_predicate_p (es->predicate))
4072 int size = inline_summaries->get (node)->size + estimate_edge_growth (edge);
4073 gcc_assert (size >= 0);
4074 return size;
4076 return inline_summaries->get (node)->size;
4080 struct growth_data
4082 struct cgraph_node *node;
4083 bool self_recursive;
4084 bool uninlinable;
4085 int growth;
4089 /* Worker for do_estimate_growth. Collect growth for all callers. */
4091 static bool
4092 do_estimate_growth_1 (struct cgraph_node *node, void *data)
4094 struct cgraph_edge *e;
4095 struct growth_data *d = (struct growth_data *) data;
4097 for (e = node->callers; e; e = e->next_caller)
4099 gcc_checking_assert (e->inline_failed);
4101 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
4103 d->uninlinable = true;
4104 continue;
4107 if (e->recursive_p ())
4109 d->self_recursive = true;
4110 continue;
4112 d->growth += estimate_edge_growth (e);
4114 return false;
4118 /* Estimate the growth caused by inlining NODE into all callees. */
4121 estimate_growth (struct cgraph_node *node)
4123 struct growth_data d = { node, false, false, 0 };
4124 struct inline_summary *info = inline_summaries->get (node);
4126 node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
4128 /* For self recursive functions the growth estimation really should be
4129 infinity. We don't want to return very large values because the growth
4130 plays various roles in badness computation fractions. Be sure to not
4131 return zero or negative growths. */
4132 if (d.self_recursive)
4133 d.growth = d.growth < info->size ? info->size : d.growth;
4134 else if (DECL_EXTERNAL (node->decl) || d.uninlinable)
4136 else
4138 if (node->will_be_removed_from_program_if_no_direct_calls_p ())
4139 d.growth -= info->size;
4140 /* COMDAT functions are very often not shared across multiple units
4141 since they come from various template instantiations.
4142 Take this into account. */
4143 else if (DECL_COMDAT (node->decl)
4144 && node->can_remove_if_no_direct_calls_p ())
4145 d.growth -= (info->size
4146 * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
4147 + 50) / 100;
4150 return d.growth;
4153 /* Verify if there are fewer than MAX_CALLERS. */
4155 static bool
4156 check_callers (cgraph_node *node, int *max_callers)
4158 ipa_ref *ref;
4160 if (!node->can_remove_if_no_direct_calls_and_refs_p ())
4161 return true;
4163 for (cgraph_edge *e = node->callers; e; e = e->next_caller)
4165 (*max_callers)--;
4166 if (!*max_callers
4167 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
4168 return true;
4171 FOR_EACH_ALIAS (node, ref)
4172 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), max_callers))
4173 return true;
4175 return false;
4179 /* Make cheap estimation if growth of NODE is likely positive knowing
4180 EDGE_GROWTH of one particular edge.
4181 We assume that most of other edges will have similar growth
4182 and skip computation if there are too many callers. */
4184 bool
4185 growth_likely_positive (struct cgraph_node *node,
4186 int edge_growth)
4188 int max_callers;
4189 struct cgraph_edge *e;
4190 gcc_checking_assert (edge_growth > 0);
4192 /* First quickly check if NODE is removable at all. */
4193 if (DECL_EXTERNAL (node->decl))
4194 return true;
4195 if (!node->can_remove_if_no_direct_calls_and_refs_p ()
4196 || node->address_taken)
4197 return true;
4199 max_callers = inline_summaries->get (node)->size * 4 / edge_growth + 2;
4201 for (e = node->callers; e; e = e->next_caller)
4203 max_callers--;
4204 if (!max_callers
4205 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
4206 return true;
4209 ipa_ref *ref;
4210 FOR_EACH_ALIAS (node, ref)
4211 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), &max_callers))
4212 return true;
4214 /* Unlike for functions called once, we play unsafe with
4215 COMDATs. We can allow that since we know functions
4216 in consideration are small (and thus risk is small) and
4217 moreover grow estimates already accounts that COMDAT
4218 functions may or may not disappear when eliminated from
4219 current unit. With good probability making aggressive
4220 choice in all units is going to make overall program
4221 smaller. */
4222 if (DECL_COMDAT (node->decl))
4224 if (!node->can_remove_if_no_direct_calls_p ())
4225 return true;
4227 else if (!node->will_be_removed_from_program_if_no_direct_calls_p ())
4228 return true;
4230 return estimate_growth (node) > 0;
4234 /* This function performs intraprocedural analysis in NODE that is required to
4235 inline indirect calls. */
4237 static void
4238 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4240 ipa_analyze_node (node);
4241 if (dump_file && (dump_flags & TDF_DETAILS))
4243 ipa_print_node_params (dump_file, node);
4244 ipa_print_node_jump_functions (dump_file, node);
4249 /* Note function body size. */
4251 void
4252 inline_analyze_function (struct cgraph_node *node)
4254 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4256 if (dump_file)
4257 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
4258 node->name (), node->order);
4259 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
4260 inline_indirect_intraprocedural_analysis (node);
4261 compute_inline_parameters (node, false);
4262 if (!optimize)
4264 struct cgraph_edge *e;
4265 for (e = node->callees; e; e = e->next_callee)
4266 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4267 for (e = node->indirect_calls; e; e = e->next_callee)
4268 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4271 pop_cfun ();
4275 /* Called when new function is inserted to callgraph late. */
4277 void
4278 inline_summary_t::insert (struct cgraph_node *node, inline_summary *)
4280 inline_analyze_function (node);
4283 /* Note function body size. */
4285 void
4286 inline_generate_summary (void)
4288 struct cgraph_node *node;
4290 FOR_EACH_DEFINED_FUNCTION (node)
4291 if (DECL_STRUCT_FUNCTION (node->decl))
4292 node->local.versionable = tree_versionable_function_p (node->decl);
4294 /* When not optimizing, do not bother to analyze. Inlining is still done
4295 because edge redirection needs to happen there. */
4296 if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa)
4297 return;
4299 if (!inline_summaries)
4300 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
4302 inline_summaries->enable_insertion_hook ();
4304 ipa_register_cgraph_hooks ();
4305 inline_free_summary ();
4307 FOR_EACH_DEFINED_FUNCTION (node)
4308 if (!node->alias)
4309 inline_analyze_function (node);
4313 /* Read predicate from IB. */
4315 static struct predicate
4316 read_predicate (struct lto_input_block *ib)
4318 struct predicate out;
4319 clause_t clause;
4320 int k = 0;
4324 gcc_assert (k <= MAX_CLAUSES);
4325 clause = out.clause[k++] = streamer_read_uhwi (ib);
4327 while (clause);
4329 /* Zero-initialize the remaining clauses in OUT. */
4330 while (k <= MAX_CLAUSES)
4331 out.clause[k++] = 0;
4333 return out;
4337 /* Write inline summary for edge E to OB. */
4339 static void
4340 read_inline_edge_summary (struct lto_input_block *ib, struct cgraph_edge *e)
4342 struct inline_edge_summary *es = inline_edge_summary (e);
4343 struct predicate p;
4344 int length, i;
4346 es->call_stmt_size = streamer_read_uhwi (ib);
4347 es->call_stmt_time = streamer_read_uhwi (ib);
4348 es->loop_depth = streamer_read_uhwi (ib);
4349 p = read_predicate (ib);
4350 edge_set_predicate (e, &p);
4351 length = streamer_read_uhwi (ib);
4352 if (length)
4354 es->param.safe_grow_cleared (length);
4355 for (i = 0; i < length; i++)
4356 es->param[i].change_prob = streamer_read_uhwi (ib);
4361 /* Stream in inline summaries from the section. */
4363 static void
4364 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4365 size_t len)
4367 const struct lto_function_header *header =
4368 (const struct lto_function_header *) data;
4369 const int cfg_offset = sizeof (struct lto_function_header);
4370 const int main_offset = cfg_offset + header->cfg_size;
4371 const int string_offset = main_offset + header->main_size;
4372 struct data_in *data_in;
4373 unsigned int i, count2, j;
4374 unsigned int f_count;
4376 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4377 file_data->mode_table);
4379 data_in =
4380 lto_data_in_create (file_data, (const char *) data + string_offset,
4381 header->string_size, vNULL);
4382 f_count = streamer_read_uhwi (&ib);
4383 for (i = 0; i < f_count; i++)
4385 unsigned int index;
4386 struct cgraph_node *node;
4387 struct inline_summary *info;
4388 lto_symtab_encoder_t encoder;
4389 struct bitpack_d bp;
4390 struct cgraph_edge *e;
4391 predicate p;
4393 index = streamer_read_uhwi (&ib);
4394 encoder = file_data->symtab_node_encoder;
4395 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4396 index));
4397 info = inline_summaries->get (node);
4399 info->estimated_stack_size
4400 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
4401 info->size = info->self_size = streamer_read_uhwi (&ib);
4402 info->time = info->self_time = sreal::stream_in (&ib);
4404 bp = streamer_read_bitpack (&ib);
4405 info->inlinable = bp_unpack_value (&bp, 1);
4406 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
4407 info->fp_expressions = bp_unpack_value (&bp, 1);
4409 count2 = streamer_read_uhwi (&ib);
4410 gcc_assert (!info->conds);
4411 for (j = 0; j < count2; j++)
4413 struct condition c;
4414 c.operand_num = streamer_read_uhwi (&ib);
4415 c.size = streamer_read_uhwi (&ib);
4416 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4417 c.val = stream_read_tree (&ib, data_in);
4418 bp = streamer_read_bitpack (&ib);
4419 c.agg_contents = bp_unpack_value (&bp, 1);
4420 c.by_ref = bp_unpack_value (&bp, 1);
4421 if (c.agg_contents)
4422 c.offset = streamer_read_uhwi (&ib);
4423 vec_safe_push (info->conds, c);
4425 count2 = streamer_read_uhwi (&ib);
4426 gcc_assert (!info->entry);
4427 for (j = 0; j < count2; j++)
4429 struct size_time_entry e;
4431 e.size = streamer_read_uhwi (&ib);
4432 e.time = sreal::stream_in (&ib);
4433 e.exec_predicate = read_predicate (&ib);
4434 e.nonconst_predicate = read_predicate (&ib);
4436 vec_safe_push (info->entry, e);
4439 p = read_predicate (&ib);
4440 set_hint_predicate (&info->loop_iterations, p);
4441 p = read_predicate (&ib);
4442 set_hint_predicate (&info->loop_stride, p);
4443 p = read_predicate (&ib);
4444 set_hint_predicate (&info->array_index, p);
4445 for (e = node->callees; e; e = e->next_callee)
4446 read_inline_edge_summary (&ib, e);
4447 for (e = node->indirect_calls; e; e = e->next_callee)
4448 read_inline_edge_summary (&ib, e);
4451 lto_free_section_data (file_data, LTO_section_inline_summary, NULL, data,
4452 len);
4453 lto_data_in_delete (data_in);
4457 /* Read inline summary. Jump functions are shared among ipa-cp
4458 and inliner, so when ipa-cp is active, we don't need to write them
4459 twice. */
4461 void
4462 inline_read_summary (void)
4464 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4465 struct lto_file_decl_data *file_data;
4466 unsigned int j = 0;
4468 inline_summary_alloc ();
4470 while ((file_data = file_data_vec[j++]))
4472 size_t len;
4473 const char *data = lto_get_section_data (file_data,
4474 LTO_section_inline_summary,
4475 NULL, &len);
4476 if (data)
4477 inline_read_section (file_data, data, len);
4478 else
4479 /* Fatal error here. We do not want to support compiling ltrans units
4480 with different version of compiler or different flags than the WPA
4481 unit, so this should never happen. */
4482 fatal_error (input_location,
4483 "ipa inline summary is missing in input file");
4485 if (optimize)
4487 ipa_register_cgraph_hooks ();
4488 if (!flag_ipa_cp)
4489 ipa_prop_read_jump_functions ();
4492 gcc_assert (inline_summaries);
4493 inline_summaries->enable_insertion_hook ();
4497 /* Write predicate P to OB. */
4499 static void
4500 write_predicate (struct output_block *ob, struct predicate *p)
4502 int j;
4503 if (p)
4504 for (j = 0; p->clause[j]; j++)
4506 gcc_assert (j < MAX_CLAUSES);
4507 streamer_write_uhwi (ob, p->clause[j]);
4509 streamer_write_uhwi (ob, 0);
4513 /* Write inline summary for edge E to OB. */
4515 static void
4516 write_inline_edge_summary (struct output_block *ob, struct cgraph_edge *e)
4518 struct inline_edge_summary *es = inline_edge_summary (e);
4519 int i;
4521 streamer_write_uhwi (ob, es->call_stmt_size);
4522 streamer_write_uhwi (ob, es->call_stmt_time);
4523 streamer_write_uhwi (ob, es->loop_depth);
4524 write_predicate (ob, es->predicate);
4525 streamer_write_uhwi (ob, es->param.length ());
4526 for (i = 0; i < (int) es->param.length (); i++)
4527 streamer_write_uhwi (ob, es->param[i].change_prob);
4531 /* Write inline summary for node in SET.
4532 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4533 active, we don't need to write them twice. */
4535 void
4536 inline_write_summary (void)
4538 struct output_block *ob = create_output_block (LTO_section_inline_summary);
4539 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4540 unsigned int count = 0;
4541 int i;
4543 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4545 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4546 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4547 if (cnode && cnode->definition && !cnode->alias)
4548 count++;
4550 streamer_write_uhwi (ob, count);
4552 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4554 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4555 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4556 if (cnode && cnode->definition && !cnode->alias)
4558 struct inline_summary *info = inline_summaries->get (cnode);
4559 struct bitpack_d bp;
4560 struct cgraph_edge *edge;
4561 int i;
4562 size_time_entry *e;
4563 struct condition *c;
4565 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4566 streamer_write_hwi (ob, info->estimated_self_stack_size);
4567 streamer_write_hwi (ob, info->self_size);
4568 info->self_time.stream_out (ob);
4569 bp = bitpack_create (ob->main_stream);
4570 bp_pack_value (&bp, info->inlinable, 1);
4571 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
4572 bp_pack_value (&bp, info->fp_expressions, 1);
4573 streamer_write_bitpack (&bp);
4574 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4575 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4577 streamer_write_uhwi (ob, c->operand_num);
4578 streamer_write_uhwi (ob, c->size);
4579 streamer_write_uhwi (ob, c->code);
4580 stream_write_tree (ob, c->val, true);
4581 bp = bitpack_create (ob->main_stream);
4582 bp_pack_value (&bp, c->agg_contents, 1);
4583 bp_pack_value (&bp, c->by_ref, 1);
4584 streamer_write_bitpack (&bp);
4585 if (c->agg_contents)
4586 streamer_write_uhwi (ob, c->offset);
4588 streamer_write_uhwi (ob, vec_safe_length (info->entry));
4589 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
4591 streamer_write_uhwi (ob, e->size);
4592 e->time.stream_out (ob);
4593 write_predicate (ob, &e->exec_predicate);
4594 write_predicate (ob, &e->nonconst_predicate);
4596 write_predicate (ob, info->loop_iterations);
4597 write_predicate (ob, info->loop_stride);
4598 write_predicate (ob, info->array_index);
4599 for (edge = cnode->callees; edge; edge = edge->next_callee)
4600 write_inline_edge_summary (ob, edge);
4601 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4602 write_inline_edge_summary (ob, edge);
4605 streamer_write_char_stream (ob->main_stream, 0);
4606 produce_asm (ob, NULL);
4607 destroy_output_block (ob);
4609 if (optimize && !flag_ipa_cp)
4610 ipa_prop_write_jump_functions ();
4614 /* Release inline summary. */
4616 void
4617 inline_free_summary (void)
4619 struct cgraph_node *node;
4620 if (edge_removal_hook_holder)
4621 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
4622 edge_removal_hook_holder = NULL;
4623 if (edge_duplication_hook_holder)
4624 symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
4625 edge_duplication_hook_holder = NULL;
4626 if (!inline_edge_summary_vec.exists ())
4627 return;
4628 FOR_EACH_DEFINED_FUNCTION (node)
4629 if (!node->alias)
4630 reset_inline_summary (node, inline_summaries->get (node));
4631 inline_summaries->release ();
4632 inline_summaries = NULL;
4633 inline_edge_summary_vec.release ();
4634 edge_predicate_pool.release ();