ada: Fix internal error on instantiation with private component type
[official-gcc.git] / gcc / ipa-fnsummary.cc
blobf1244da291ca51713e4934b1ef6b9306137990b9
1 /* Function summary pass.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "target.h"
60 #include "tree.h"
61 #include "gimple.h"
62 #include "alloc-pool.h"
63 #include "tree-pass.h"
64 #include "ssa.h"
65 #include "tree-streamer.h"
66 #include "cgraph.h"
67 #include "diagnostic.h"
68 #include "fold-const.h"
69 #include "print-tree.h"
70 #include "tree-inline.h"
71 #include "gimple-pretty-print.h"
72 #include "cfganal.h"
73 #include "gimple-iterator.h"
74 #include "tree-cfg.h"
75 #include "tree-ssa-loop-niter.h"
76 #include "tree-ssa-loop.h"
77 #include "symbol-summary.h"
78 #include "ipa-prop.h"
79 #include "ipa-fnsummary.h"
80 #include "cfgloop.h"
81 #include "tree-scalar-evolution.h"
82 #include "ipa-utils.h"
83 #include "cfgexpand.h"
84 #include "gimplify.h"
85 #include "stringpool.h"
86 #include "attribs.h"
87 #include "tree-into-ssa.h"
88 #include "symtab-clones.h"
89 #include "gimple-range.h"
90 #include "tree-dfa.h"
92 /* Summaries. */
93 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
94 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
95 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
97 /* Edge predicates goes here. */
98 static object_allocator<ipa_predicate> edge_predicate_pool ("edge predicates");
101 /* Dump IPA hints. */
102 void
103 ipa_dump_hints (FILE *f, ipa_hints hints)
105 if (!hints)
106 return;
107 fprintf (f, "IPA hints:");
108 if (hints & INLINE_HINT_indirect_call)
110 hints &= ~INLINE_HINT_indirect_call;
111 fprintf (f, " indirect_call");
113 if (hints & INLINE_HINT_loop_iterations)
115 hints &= ~INLINE_HINT_loop_iterations;
116 fprintf (f, " loop_iterations");
118 if (hints & INLINE_HINT_loop_stride)
120 hints &= ~INLINE_HINT_loop_stride;
121 fprintf (f, " loop_stride");
123 if (hints & INLINE_HINT_same_scc)
125 hints &= ~INLINE_HINT_same_scc;
126 fprintf (f, " same_scc");
128 if (hints & INLINE_HINT_in_scc)
130 hints &= ~INLINE_HINT_in_scc;
131 fprintf (f, " in_scc");
133 if (hints & INLINE_HINT_cross_module)
135 hints &= ~INLINE_HINT_cross_module;
136 fprintf (f, " cross_module");
138 if (hints & INLINE_HINT_declared_inline)
140 hints &= ~INLINE_HINT_declared_inline;
141 fprintf (f, " declared_inline");
143 if (hints & INLINE_HINT_known_hot)
145 hints &= ~INLINE_HINT_known_hot;
146 fprintf (f, " known_hot");
148 if (hints & INLINE_HINT_builtin_constant_p)
150 hints &= ~INLINE_HINT_builtin_constant_p;
151 fprintf (f, " builtin_constant_p");
153 gcc_assert (!hints);
157 /* Record SIZE and TIME to SUMMARY.
158 The accounted code will be executed when EXEC_PRED is true.
159 When NONCONST_PRED is false the code will evaluate to constant and
160 will get optimized out in specialized clones of the function.
161 If CALL is true account to call_size_time_table rather than
162 size_time_table. */
164 void
165 ipa_fn_summary::account_size_time (int size, sreal time,
166 const ipa_predicate &exec_pred,
167 const ipa_predicate &nonconst_pred_in,
168 bool call)
170 size_time_entry *e;
171 bool found = false;
172 int i;
173 ipa_predicate nonconst_pred;
174 vec<size_time_entry> *table = call ? &call_size_time_table : &size_time_table;
176 if (exec_pred == false)
177 return;
179 nonconst_pred = nonconst_pred_in & exec_pred;
181 if (nonconst_pred == false)
182 return;
184 /* We need to create initial empty unconditional clause, but otherwise
185 we don't need to account empty times and sizes. */
186 if (!size && time == 0 && table->length ())
187 return;
189 /* Only for calls we are unaccounting what we previously recorded. */
190 gcc_checking_assert (time >= 0 || call);
192 for (i = 0; table->iterate (i, &e); i++)
193 if (e->exec_predicate == exec_pred
194 && e->nonconst_predicate == nonconst_pred)
196 found = true;
197 break;
199 if (i == max_size_time_table_size)
201 i = 0;
202 found = true;
203 e = &(*table)[0];
204 if (dump_file && (dump_flags & TDF_DETAILS))
205 fprintf (dump_file,
206 "\t\tReached limit on number of entries, "
207 "ignoring the predicate.");
209 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
211 fprintf (dump_file,
212 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
213 ((double) size) / ipa_fn_summary::size_scale,
214 (time.to_double ()), found ? "" : "new ");
215 exec_pred.dump (dump_file, conds, 0);
216 if (exec_pred != nonconst_pred)
218 fprintf (dump_file, " nonconst:");
219 nonconst_pred.dump (dump_file, conds);
221 else
222 fprintf (dump_file, "\n");
224 if (!found)
226 class size_time_entry new_entry;
227 new_entry.size = size;
228 new_entry.time = time;
229 new_entry.exec_predicate = exec_pred;
230 new_entry.nonconst_predicate = nonconst_pred;
231 if (call)
232 call_size_time_table.safe_push (new_entry);
233 else
234 size_time_table.safe_push (new_entry);
236 else
238 e->size += size;
239 e->time += time;
240 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
241 /* Tolerate small roundoff issues. */
242 if (e->time < 0)
243 e->time = 0;
247 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
249 static struct cgraph_edge *
250 redirect_to_unreachable (struct cgraph_edge *e)
252 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
253 struct cgraph_node *target
254 = cgraph_node::get_create (builtin_decl_unreachable ());
256 if (e->speculative)
257 e = cgraph_edge::resolve_speculation (e, target->decl);
258 else if (!e->callee)
259 e = cgraph_edge::make_direct (e, target);
260 else
261 e->redirect_callee (target);
262 class ipa_call_summary *es = ipa_call_summaries->get (e);
263 e->inline_failed = CIF_UNREACHABLE;
264 e->count = profile_count::zero ();
265 es->call_stmt_size = 0;
266 es->call_stmt_time = 0;
267 if (callee)
268 callee->remove_symbol_and_inline_clones ();
269 return e;
272 /* Set predicate for edge E. */
274 static void
275 edge_set_predicate (struct cgraph_edge *e, ipa_predicate *predicate)
277 /* If the edge is determined to be never executed, redirect it
278 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
279 be optimized out. */
280 if (predicate && *predicate == false
281 /* When handling speculative edges, we need to do the redirection
282 just once. Do it always on the direct edge, so we do not
283 attempt to resolve speculation while duplicating the edge. */
284 && (!e->speculative || e->callee))
285 e = redirect_to_unreachable (e);
287 class ipa_call_summary *es = ipa_call_summaries->get (e);
288 if (predicate && *predicate != true)
290 if (!es->predicate)
291 es->predicate = edge_predicate_pool.allocate ();
292 *es->predicate = *predicate;
294 else
296 if (es->predicate)
297 edge_predicate_pool.remove (es->predicate);
298 es->predicate = NULL;
302 /* Set predicate for hint *P. */
304 static void
305 set_hint_predicate (ipa_predicate **p, ipa_predicate new_predicate)
307 if (new_predicate == false || new_predicate == true)
309 if (*p)
310 edge_predicate_pool.remove (*p);
311 *p = NULL;
313 else
315 if (!*p)
316 *p = edge_predicate_pool.allocate ();
317 **p = new_predicate;
321 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
322 Otherwise add a new item to the vector with this predicate and frerq equal
323 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
324 in which case the function does nothing. */
326 static void
327 add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
328 const ipa_predicate &new_predicate, sreal add_freq,
329 unsigned max_num_predicates)
331 if (new_predicate == false || new_predicate == true)
332 return;
333 ipa_freqcounting_predicate *f;
334 for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
335 if (new_predicate == f->predicate)
337 f->freq += add_freq;
338 return;
340 if (vec_safe_length (*v) >= max_num_predicates)
341 /* Too many different predicates to account for. */
342 return;
344 ipa_freqcounting_predicate fcp;
345 fcp.predicate = NULL;
346 set_hint_predicate (&fcp.predicate, new_predicate);
347 fcp.freq = add_freq;
348 vec_safe_push (*v, fcp);
349 return;
352 /* Compute what conditions may or may not hold given information about
353 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
354 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
355 copy when called in a given context. It is a bitmask of conditions. Bit
356 0 means that condition is known to be false, while bit 1 means that condition
357 may or may not be true. These differs - for example NOT_INLINED condition
358 is always false in the second and also builtin_constant_p tests cannot use
359 the fact that parameter is indeed a constant.
361 When INLINE_P is true, assume that we are inlining. AVAL contains known
362 information about argument values. The function does not modify its content
363 and so AVALs could also be of type ipa_call_arg_values but so far all
364 callers work with the auto version and so we avoid the conversion for
365 convenience.
367 ERROR_MARK value of an argument means compile time invariant. */
369 static void
370 evaluate_conditions_for_known_args (struct cgraph_node *node,
371 bool inline_p,
372 ipa_auto_call_arg_values *avals,
373 clause_t *ret_clause,
374 clause_t *ret_nonspec_clause,
375 ipa_call_summary *es)
377 clause_t clause = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
378 clause_t nonspec_clause = 1 << ipa_predicate::not_inlined_condition;
379 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
380 int i;
381 struct condition *c;
383 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
385 tree val = NULL;
386 tree res;
387 int j;
388 struct expr_eval_op *op;
390 if (c->code == ipa_predicate::not_sra_candidate)
392 if (!inline_p
393 || !es
394 || (int)es->param.length () <= c->operand_num
395 || !es->param[c->operand_num].points_to_possible_sra_candidate)
396 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
397 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
398 continue;
401 if (c->agg_contents)
403 if (c->code == ipa_predicate::changed
404 && !c->by_ref
405 && (avals->safe_sval_at(c->operand_num) == error_mark_node))
406 continue;
408 if (tree sval = avals->safe_sval_at (c->operand_num))
409 val = ipa_find_agg_cst_from_init (sval, c->offset, c->by_ref);
410 if (!val)
412 ipa_argagg_value_list avs (avals);
413 val = avs.get_value (c->operand_num, c->offset / BITS_PER_UNIT,
414 c->by_ref);
417 else
419 val = avals->safe_sval_at (c->operand_num);
420 if (val && val == error_mark_node
421 && c->code != ipa_predicate::changed)
422 val = NULL_TREE;
425 if (!val
426 && (c->code == ipa_predicate::changed
427 || c->code == ipa_predicate::is_not_constant))
429 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
430 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
431 continue;
433 if (c->code == ipa_predicate::changed)
435 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
436 continue;
439 if (c->code == ipa_predicate::is_not_constant)
441 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
442 continue;
445 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
447 if (c->type != TREE_TYPE (val))
448 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
449 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
451 if (!val)
452 break;
453 if (!op->val[0])
454 val = fold_unary (op->code, op->type, val);
455 else if (!op->val[1])
456 val = fold_binary (op->code, op->type,
457 op->index ? op->val[0] : val,
458 op->index ? val : op->val[0]);
459 else if (op->index == 0)
460 val = fold_ternary (op->code, op->type,
461 val, op->val[0], op->val[1]);
462 else if (op->index == 1)
463 val = fold_ternary (op->code, op->type,
464 op->val[0], val, op->val[1]);
465 else if (op->index == 2)
466 val = fold_ternary (op->code, op->type,
467 op->val[0], op->val[1], val);
468 else
469 val = NULL_TREE;
472 res = val
473 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
474 : NULL;
476 if (res && integer_zerop (res))
477 continue;
478 if (res && integer_onep (res))
480 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
481 nonspec_clause
482 |= 1 << (i + ipa_predicate::first_dynamic_condition);
483 continue;
486 if (c->operand_num < (int) avals->m_known_value_ranges.length ()
487 && !c->agg_contents
488 && (!val || TREE_CODE (val) != INTEGER_CST))
490 Value_Range vr (avals->m_known_value_ranges[c->operand_num]);
491 if (!vr.undefined_p ()
492 && !vr.varying_p ()
493 && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ())))
495 if (!useless_type_conversion_p (c->type, vr.type ()))
496 range_cast (vr, c->type);
498 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
500 if (vr.varying_p () || vr.undefined_p ())
501 break;
503 Value_Range res (op->type);
504 if (!op->val[0])
506 Value_Range varying (op->type);
507 varying.set_varying (op->type);
508 range_op_handler handler (op->code);
509 if (!handler
510 || !res.supports_type_p (op->type)
511 || !handler.fold_range (res, op->type, vr, varying))
512 res.set_varying (op->type);
514 else if (!op->val[1])
516 Value_Range op0 (op->type);
517 range_op_handler handler (op->code);
519 ipa_range_set_and_normalize (op0, op->val[0]);
521 if (!handler
522 || !res.supports_type_p (op->type)
523 || !handler.fold_range (res, op->type,
524 op->index ? op0 : vr,
525 op->index ? vr : op0))
526 res.set_varying (op->type);
528 else
529 res.set_varying (op->type);
530 vr = res;
532 if (!vr.varying_p () && !vr.undefined_p ())
534 int_range<2> res;
535 Value_Range val_vr (TREE_TYPE (c->val));
536 range_op_handler handler (c->code);
538 ipa_range_set_and_normalize (val_vr, c->val);
540 if (!handler
541 || !val_vr.supports_type_p (TREE_TYPE (c->val))
542 || !handler.fold_range (res, boolean_type_node, vr, val_vr))
543 res.set_varying (boolean_type_node);
545 if (res.zero_p ())
546 continue;
551 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
552 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
554 *ret_clause = clause;
555 if (ret_nonspec_clause)
556 *ret_nonspec_clause = nonspec_clause;
559 /* Return true if VRP will be exectued on the function.
560 We do not want to anticipate optimizations that will not happen.
562 FIXME: This can be confused with -fdisable and debug counters and thus
563 it should not be used for correctness (only to make heuristics work).
564 This means that inliner should do its own optimizations of expressions
565 that it predicts to be constant so wrong code can not be triggered by
566 builtin_constant_p. */
568 static bool
569 vrp_will_run_p (struct cgraph_node *node)
571 return (opt_for_fn (node->decl, optimize)
572 && !opt_for_fn (node->decl, optimize_debug)
573 && opt_for_fn (node->decl, flag_tree_vrp));
576 /* Similarly about FRE. */
578 static bool
579 fre_will_run_p (struct cgraph_node *node)
581 return (opt_for_fn (node->decl, optimize)
582 && !opt_for_fn (node->decl, optimize_debug)
583 && opt_for_fn (node->decl, flag_tree_fre));
586 /* Work out what conditions might be true at invocation of E.
587 Compute costs for inlined edge if INLINE_P is true.
589 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
590 (if non-NULL) conditions evaluated for nonspecialized clone called
591 in a given context.
593 Vectors in AVALS will be populated with useful known information about
594 argument values - information not known to have any uses will be omitted -
595 except for m_known_contexts which will only be calculated if
596 COMPUTE_CONTEXTS is true. */
598 void
599 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
600 clause_t *clause_ptr,
601 clause_t *nonspec_clause_ptr,
602 ipa_auto_call_arg_values *avals,
603 bool compute_contexts)
605 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
606 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
607 class ipa_edge_args *args;
608 class ipa_call_summary *es = NULL;
610 if (clause_ptr)
611 *clause_ptr = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
613 if (ipa_node_params_sum
614 && !e->call_stmt_cannot_inline_p
615 && (info->conds || compute_contexts)
616 && (args = ipa_edge_args_sum->get (e)) != NULL)
618 struct cgraph_node *caller;
619 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
620 int i, count = ipa_get_cs_argument_count (args);
621 es = ipa_call_summaries->get (e);
623 if (count)
625 if (e->caller->inlined_to)
626 caller = e->caller->inlined_to;
627 else
628 caller = e->caller;
629 caller_parms_info = ipa_node_params_sum->get (caller);
630 callee_pi = ipa_node_params_sum->get (callee);
632 /* Watch for thunks. */
633 if (callee_pi)
634 /* Watch for variadic functions. */
635 count = MIN (count, ipa_get_param_count (callee_pi));
638 if (callee_pi)
639 for (i = 0; i < count; i++)
641 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
643 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
644 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
646 /* Determine if we know constant value of the parameter. */
647 tree type = ipa_get_type (callee_pi, i);
648 tree cst = ipa_value_from_jfunc (caller_parms_info, jf, type);
650 if (!cst && e->call_stmt
651 && i < (int)gimple_call_num_args (e->call_stmt))
653 cst = gimple_call_arg (e->call_stmt, i);
654 if (!is_gimple_min_invariant (cst))
655 cst = NULL;
657 if (cst)
659 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
660 if (!avals->m_known_vals.length ())
661 avals->m_known_vals.safe_grow_cleared (count, true);
662 avals->m_known_vals[i] = cst;
664 else if (inline_p && !es->param[i].change_prob)
666 if (!avals->m_known_vals.length ())
667 avals->m_known_vals.safe_grow_cleared (count, true);
668 avals->m_known_vals[i] = error_mark_node;
671 /* If we failed to get simple constant, try value range. */
672 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
673 && vrp_will_run_p (caller)
674 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
676 Value_Range vr (type);
678 ipa_value_range_from_jfunc (vr, caller_parms_info, e, jf, type);
679 if (!vr.undefined_p () && !vr.varying_p ())
681 if (!avals->m_known_value_ranges.length ())
683 avals->m_known_value_ranges.safe_grow (count, true);
684 for (int i = 0; i < count; ++i)
685 new (&avals->m_known_value_ranges[i])
686 Value_Range ();
688 avals->m_known_value_ranges[i] = vr;
692 /* Determine known aggregate values. */
693 if (fre_will_run_p (caller))
694 ipa_push_agg_values_from_jfunc (caller_parms_info,
695 caller, &jf->agg, i,
696 &avals->m_known_aggs);
699 /* For calls used in polymorphic calls we further determine
700 polymorphic call context. */
701 if (compute_contexts
702 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
704 ipa_polymorphic_call_context
705 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
706 if (!ctx.useless_p ())
708 if (!avals->m_known_contexts.length ())
709 avals->m_known_contexts.safe_grow_cleared (count, true);
710 avals->m_known_contexts[i]
711 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
715 else
716 gcc_assert (!count || callee->thunk);
718 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
720 int i, count = (int)gimple_call_num_args (e->call_stmt);
722 for (i = 0; i < count; i++)
724 tree cst = gimple_call_arg (e->call_stmt, i);
725 if (!is_gimple_min_invariant (cst))
726 cst = NULL;
727 if (cst)
729 if (!avals->m_known_vals.length ())
730 avals->m_known_vals.safe_grow_cleared (count, true);
731 avals->m_known_vals[i] = cst;
736 evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
737 nonspec_clause_ptr, es);
741 /* Allocate the function summary. */
743 static void
744 ipa_fn_summary_alloc (void)
746 gcc_checking_assert (!ipa_fn_summaries);
747 ipa_size_summaries = new ipa_size_summary_t (symtab);
748 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
749 ipa_call_summaries = new ipa_call_summary_t (symtab);
752 ipa_call_summary::~ipa_call_summary ()
754 if (predicate)
755 edge_predicate_pool.remove (predicate);
757 param.release ();
760 ipa_fn_summary::~ipa_fn_summary ()
762 unsigned len = vec_safe_length (loop_iterations);
763 for (unsigned i = 0; i < len; i++)
764 edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
765 len = vec_safe_length (loop_strides);
766 for (unsigned i = 0; i < len; i++)
767 edge_predicate_pool.remove ((*loop_strides)[i].predicate);
768 vec_free (conds);
769 call_size_time_table.release ();
770 vec_free (loop_iterations);
771 vec_free (loop_strides);
772 builtin_constant_p_parms.release ();
775 void
776 ipa_fn_summary_t::remove_callees (cgraph_node *node)
778 cgraph_edge *e;
779 for (e = node->callees; e; e = e->next_callee)
780 ipa_call_summaries->remove (e);
781 for (e = node->indirect_calls; e; e = e->next_callee)
782 ipa_call_summaries->remove (e);
785 /* Duplicate predicates in loop hint vector, allocating memory for them and
786 remove and deallocate any uninteresting (true or false) ones. Return the
787 result. */
789 static vec<ipa_freqcounting_predicate, va_gc> *
790 remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
791 clause_t possible_truths)
793 if (vec_safe_length (v) == 0)
794 return NULL;
796 vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
797 int len = res->length();
798 for (int i = len - 1; i >= 0; i--)
800 ipa_predicate new_predicate
801 = (*res)[i].predicate->remap_after_duplication (possible_truths);
802 /* We do not want to free previous predicate; it is used by node
803 origin. */
804 (*res)[i].predicate = NULL;
805 set_hint_predicate (&(*res)[i].predicate, new_predicate);
807 if (!(*res)[i].predicate)
808 res->unordered_remove (i);
811 return res;
815 /* Hook that is called by cgraph.cc when a node is duplicated. */
816 void
817 ipa_fn_summary_t::duplicate (cgraph_node *src,
818 cgraph_node *dst,
819 ipa_fn_summary *src_info,
820 ipa_fn_summary *info)
822 new (info) ipa_fn_summary (*src_info);
823 /* TODO: as an optimization, we may avoid copying conditions
824 that are known to be false or true. */
825 info->conds = vec_safe_copy (info->conds);
827 clone_info *cinfo = clone_info::get (dst);
828 /* When there are any replacements in the function body, see if we can figure
829 out that something was optimized out. */
830 if (ipa_node_params_sum && cinfo && cinfo->tree_map)
832 /* Use SRC parm info since it may not be copied yet. */
833 ipa_node_params *parms_info = ipa_node_params_sum->get (src);
834 ipa_auto_call_arg_values avals;
835 int count = ipa_get_param_count (parms_info);
836 int i, j;
837 clause_t possible_truths;
838 ipa_predicate true_pred = true;
839 size_time_entry *e;
840 int optimized_out_size = 0;
841 bool inlined_to_p = false;
842 struct cgraph_edge *edge, *next;
844 info->size_time_table.release ();
845 avals.m_known_vals.safe_grow_cleared (count, true);
846 for (i = 0; i < count; i++)
848 struct ipa_replace_map *r;
850 for (j = 0; vec_safe_iterate (cinfo->tree_map, j, &r); j++)
852 if (r->parm_num == i)
854 avals.m_known_vals[i] = r->new_tree;
855 break;
859 evaluate_conditions_for_known_args (dst, false,
860 &avals,
861 &possible_truths,
862 /* We are going to specialize,
863 so ignore nonspec truths. */
864 NULL,
865 NULL);
867 info->account_size_time (0, 0, true_pred, true_pred);
869 /* Remap size_time vectors.
870 Simplify the predicate by pruning out alternatives that are known
871 to be false.
872 TODO: as on optimization, we can also eliminate conditions known
873 to be true. */
874 for (i = 0; src_info->size_time_table.iterate (i, &e); i++)
876 ipa_predicate new_exec_pred;
877 ipa_predicate new_nonconst_pred;
878 new_exec_pred = e->exec_predicate.remap_after_duplication
879 (possible_truths);
880 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
881 (possible_truths);
882 if (new_exec_pred == false || new_nonconst_pred == false)
883 optimized_out_size += e->size;
884 else
885 info->account_size_time (e->size, e->time, new_exec_pred,
886 new_nonconst_pred);
889 /* Remap edge predicates with the same simplification as above.
890 Also copy constantness arrays. */
891 for (edge = dst->callees; edge; edge = next)
893 ipa_predicate new_predicate;
894 class ipa_call_summary *es = ipa_call_summaries->get (edge);
895 next = edge->next_callee;
897 if (!edge->inline_failed)
898 inlined_to_p = true;
899 if (!es->predicate)
900 continue;
901 new_predicate = es->predicate->remap_after_duplication
902 (possible_truths);
903 if (new_predicate == false && *es->predicate != false)
904 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
905 edge_set_predicate (edge, &new_predicate);
908 /* Remap indirect edge predicates with the same simplification as above.
909 Also copy constantness arrays. */
910 for (edge = dst->indirect_calls; edge; edge = next)
912 ipa_predicate new_predicate;
913 class ipa_call_summary *es = ipa_call_summaries->get (edge);
914 next = edge->next_callee;
916 gcc_checking_assert (edge->inline_failed);
917 if (!es->predicate)
918 continue;
919 new_predicate = es->predicate->remap_after_duplication
920 (possible_truths);
921 if (new_predicate == false && *es->predicate != false)
922 optimized_out_size
923 += es->call_stmt_size * ipa_fn_summary::size_scale;
924 edge_set_predicate (edge, &new_predicate);
926 info->loop_iterations
927 = remap_freqcounting_preds_after_dup (info->loop_iterations,
928 possible_truths);
929 info->loop_strides
930 = remap_freqcounting_preds_after_dup (info->loop_strides,
931 possible_truths);
932 if (info->builtin_constant_p_parms.length())
934 vec <int, va_heap, vl_ptr> parms = info->builtin_constant_p_parms;
935 int ip;
936 info->builtin_constant_p_parms = vNULL;
937 for (i = 0; parms.iterate (i, &ip); i++)
938 if (!avals.m_known_vals[ip])
939 info->builtin_constant_p_parms.safe_push (ip);
942 /* If inliner or someone after inliner will ever start producing
943 non-trivial clones, we will get trouble with lack of information
944 about updating self sizes, because size vectors already contains
945 sizes of the callees. */
946 gcc_assert (!inlined_to_p || !optimized_out_size);
948 else
950 info->size_time_table = src_info->size_time_table.copy ();
951 info->loop_iterations = vec_safe_copy (src_info->loop_iterations);
952 info->loop_strides = vec_safe_copy (info->loop_strides);
954 info->builtin_constant_p_parms
955 = info->builtin_constant_p_parms.copy ();
957 ipa_freqcounting_predicate *f;
958 for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
960 ipa_predicate p = *f->predicate;
961 f->predicate = NULL;
962 set_hint_predicate (&f->predicate, p);
964 for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
966 ipa_predicate p = *f->predicate;
967 f->predicate = NULL;
968 set_hint_predicate (&f->predicate, p);
971 if (!dst->inlined_to)
972 ipa_update_overall_fn_summary (dst);
976 /* Hook that is called by cgraph.cc when a node is duplicated. */
978 void
979 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
980 struct cgraph_edge *dst,
981 class ipa_call_summary *srcinfo,
982 class ipa_call_summary *info)
984 new (info) ipa_call_summary (*srcinfo);
985 info->predicate = NULL;
986 edge_set_predicate (dst, srcinfo->predicate);
987 info->param = srcinfo->param.copy ();
988 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
990 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
991 - eni_size_weights.call_cost);
992 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
993 - eni_time_weights.call_cost);
997 /* Dump edge summaries associated to NODE and recursively to all clones.
998 Indent by INDENT. */
1000 static void
1001 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
1002 class ipa_fn_summary *info)
1004 struct cgraph_edge *edge;
1005 for (edge = node->callees; edge; edge = edge->next_callee)
1007 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1008 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1009 int i;
1011 fprintf (f,
1012 "%*s%s %s\n%*s freq:%4.2f",
1013 indent, "", callee->dump_name (),
1014 !edge->inline_failed
1015 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1016 indent, "", edge->sreal_frequency ().to_double ());
1018 if (cross_module_call_p (edge))
1019 fprintf (f, " cross module");
1021 if (es)
1022 fprintf (f, " loop depth:%2i size:%2i time: %2i",
1023 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
1025 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
1026 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1027 if (s != NULL)
1028 fprintf (f, " callee size:%2i stack:%2i",
1029 (int) (ss->size / ipa_fn_summary::size_scale),
1030 (int) s->estimated_stack_size);
1032 if (es && es->predicate)
1034 fprintf (f, " predicate: ");
1035 es->predicate->dump (f, info->conds);
1037 else
1038 fprintf (f, "\n");
1039 if (es && es->param.exists ())
1040 for (i = 0; i < (int) es->param.length (); i++)
1042 int prob = es->param[i].change_prob;
1044 if (!prob)
1045 fprintf (f, "%*s op%i is compile time invariant\n",
1046 indent + 2, "", i);
1047 else if (prob != REG_BR_PROB_BASE)
1048 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1049 prob * 100.0 / REG_BR_PROB_BASE);
1050 if (es->param[i].points_to_local_or_readonly_memory)
1051 fprintf (f, "%*s op%i points to local or readonly memory\n",
1052 indent + 2, "", i);
1053 if (es->param[i].points_to_possible_sra_candidate)
1054 fprintf (f, "%*s op%i points to possible sra candidate\n",
1055 indent + 2, "", i);
1057 if (!edge->inline_failed)
1059 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1060 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
1061 indent + 2, "",
1062 (int) ipa_get_stack_frame_offset (callee),
1063 (int) ss->estimated_self_stack_size);
1064 dump_ipa_call_summary (f, indent + 2, callee, info);
1067 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1069 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1070 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1071 " time: %2i",
1072 indent, "",
1073 es->loop_depth,
1074 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1075 es->call_stmt_time);
1076 if (es->predicate)
1078 fprintf (f, "predicate: ");
1079 es->predicate->dump (f, info->conds);
1081 else
1082 fprintf (f, "\n");
1087 void
1088 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1090 if (node->definition)
1092 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1093 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1094 if (s != NULL)
1096 size_time_entry *e;
1097 int i;
1098 fprintf (f, "IPA function summary for %s", node->dump_name ());
1099 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1100 fprintf (f, " always_inline");
1101 if (s->inlinable)
1102 fprintf (f, " inlinable");
1103 if (s->fp_expressions)
1104 fprintf (f, " fp_expression");
1105 if (s->builtin_constant_p_parms.length ())
1107 fprintf (f, " builtin_constant_p_parms");
1108 for (unsigned int i = 0;
1109 i < s->builtin_constant_p_parms.length (); i++)
1110 fprintf (f, " %i", s->builtin_constant_p_parms[i]);
1112 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1113 fprintf (f, " self size: %i\n", ss->self_size);
1114 fprintf (f, " global size: %i\n", ss->size);
1115 fprintf (f, " min size: %i\n", s->min_size);
1116 fprintf (f, " self stack: %i\n",
1117 (int) ss->estimated_self_stack_size);
1118 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1119 if (s->growth)
1120 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1121 if (s->scc_no)
1122 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1123 for (i = 0; s->size_time_table.iterate (i, &e); i++)
1125 fprintf (f, " size:%f, time:%f",
1126 (double) e->size / ipa_fn_summary::size_scale,
1127 e->time.to_double ());
1128 if (e->exec_predicate != true)
1130 fprintf (f, ", executed if:");
1131 e->exec_predicate.dump (f, s->conds, 0);
1133 if (e->exec_predicate != e->nonconst_predicate)
1135 fprintf (f, ", nonconst if:");
1136 e->nonconst_predicate.dump (f, s->conds, 0);
1138 fprintf (f, "\n");
1140 ipa_freqcounting_predicate *fcp;
1141 bool first_fcp = true;
1142 for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
1144 if (first_fcp)
1146 fprintf (f, " loop iterations:");
1147 first_fcp = false;
1149 fprintf (f, " %3.2f for ", fcp->freq.to_double ());
1150 fcp->predicate->dump (f, s->conds);
1152 first_fcp = true;
1153 for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
1155 if (first_fcp)
1157 fprintf (f, " loop strides:");
1158 first_fcp = false;
1160 fprintf (f, " %3.2f for :", fcp->freq.to_double ());
1161 fcp->predicate->dump (f, s->conds);
1163 fprintf (f, " calls:\n");
1164 dump_ipa_call_summary (f, 4, node, s);
1165 fprintf (f, "\n");
1166 if (s->target_info)
1167 fprintf (f, " target_info: %x\n", s->target_info);
1169 else
1170 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1174 DEBUG_FUNCTION void
1175 ipa_debug_fn_summary (struct cgraph_node *node)
1177 ipa_dump_fn_summary (stderr, node);
1180 void
1181 ipa_dump_fn_summaries (FILE *f)
1183 struct cgraph_node *node;
1185 FOR_EACH_DEFINED_FUNCTION (node)
1186 if (!node->inlined_to)
1187 ipa_dump_fn_summary (f, node);
1190 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1191 boolean variable pointed to by DATA. */
1193 static bool
1194 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1195 void *data)
1197 bool *b = (bool *) data;
1198 *b = true;
1199 return true;
1202 /* If OP refers to value of function parameter, return the corresponding
1203 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1204 PARM_DECL) will be stored to *SIZE_P in that case too. */
1206 static tree
1207 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1208 poly_int64 *size_p)
1210 /* SSA_NAME referring to parm default def? */
1211 if (TREE_CODE (op) == SSA_NAME
1212 && SSA_NAME_IS_DEFAULT_DEF (op)
1213 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1215 if (size_p)
1216 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1217 return SSA_NAME_VAR (op);
1219 /* Non-SSA parm reference? */
1220 if (TREE_CODE (op) == PARM_DECL
1221 && fbi->aa_walk_budget > 0)
1223 bool modified = false;
1225 ao_ref refd;
1226 ao_ref_init (&refd, op);
1227 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1228 mark_modified, &modified, NULL, NULL,
1229 fbi->aa_walk_budget);
1230 if (walked < 0)
1232 fbi->aa_walk_budget = 0;
1233 return NULL_TREE;
1235 fbi->aa_walk_budget -= walked;
1236 if (!modified)
1238 if (size_p)
1239 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1240 return op;
1243 return NULL_TREE;
1246 /* If OP refers to value of function parameter, return the corresponding
1247 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1248 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1249 stored to *SIZE_P in that case too. */
1251 static tree
1252 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1253 poly_int64 *size_p)
1255 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1256 if (res)
1257 return res;
1259 if (TREE_CODE (op) == SSA_NAME
1260 && !SSA_NAME_IS_DEFAULT_DEF (op)
1261 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1262 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1263 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1264 size_p);
1265 return NULL_TREE;
1268 /* If OP refers to a value of a function parameter or value loaded from an
1269 aggregate passed to a parameter (either by value or reference), return TRUE
1270 and store the number of the parameter to *INDEX_P, the access size into
1271 *SIZE_P, and information whether and how it has been loaded from an
1272 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1273 statement in which OP is used or loaded. */
1275 static bool
1276 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1277 gimple *stmt, tree op, int *index_p,
1278 poly_int64 *size_p,
1279 struct agg_position_info *aggpos)
1281 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1283 gcc_checking_assert (aggpos);
1284 if (res)
1286 *index_p = ipa_get_param_decl_index (fbi->info, res);
1287 if (*index_p < 0)
1288 return false;
1289 aggpos->agg_contents = false;
1290 aggpos->by_ref = false;
1291 return true;
1294 if (TREE_CODE (op) == SSA_NAME)
1296 if (SSA_NAME_IS_DEFAULT_DEF (op)
1297 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1298 return false;
1299 stmt = SSA_NAME_DEF_STMT (op);
1300 op = gimple_assign_rhs1 (stmt);
1301 if (!REFERENCE_CLASS_P (op))
1302 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1303 aggpos);
1306 aggpos->agg_contents = true;
1307 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1308 stmt, op, index_p, &aggpos->offset,
1309 size_p, &aggpos->by_ref);
1312 /* If stmt is simple load or store of value pointed to by a function parmaeter,
1313 return its index. */
1315 static int
1316 load_or_store_of_ptr_parameter (ipa_func_body_info *fbi, gimple *stmt)
1318 if (!optimize)
1319 return -1;
1320 gassign *assign = dyn_cast <gassign *> (stmt);
1321 if (!assign)
1322 return -1;
1323 tree param;
1324 if (gimple_assign_load_p (stmt))
1325 param = gimple_assign_rhs1 (stmt);
1326 else if (gimple_store_p (stmt))
1327 param = gimple_assign_lhs (stmt);
1328 else
1329 return -1;
1330 tree base = get_base_address (param);
1331 if (TREE_CODE (base) != MEM_REF
1332 || TREE_CODE (TREE_OPERAND (base, 0)) != SSA_NAME
1333 || !SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base, 0)))
1334 return -1;
1335 tree p = SSA_NAME_VAR (TREE_OPERAND (base, 0));
1336 if (TREE_CODE (p) != PARM_DECL)
1337 return -1;
1338 return ipa_get_param_decl_index (fbi->info, p);
1341 /* See if statement might disappear after inlining.
1342 0 - means not eliminated
1343 1 - half of statements goes away
1344 2 - for sure it is eliminated.
1345 We are not terribly sophisticated, basically looking for simple abstraction
1346 penalty wrappers. */
1348 static int
1349 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1351 enum gimple_code code = gimple_code (stmt);
1352 enum tree_code rhs_code;
1354 if (!optimize)
1355 return 0;
1357 switch (code)
1359 case GIMPLE_RETURN:
1360 return 2;
1361 case GIMPLE_ASSIGN:
1362 if (gimple_num_ops (stmt) != 2)
1363 return 0;
1365 rhs_code = gimple_assign_rhs_code (stmt);
1367 /* Casts of parameters, loads from parameters passed by reference
1368 and stores to return value or parameters are often free after
1369 inlining due to SRA and further combining.
1370 Assume that half of statements goes away. */
1371 if (CONVERT_EXPR_CODE_P (rhs_code)
1372 || rhs_code == VIEW_CONVERT_EXPR
1373 || rhs_code == ADDR_EXPR
1374 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1376 tree rhs = gimple_assign_rhs1 (stmt);
1377 tree lhs = gimple_assign_lhs (stmt);
1378 tree inner_rhs = get_base_address (rhs);
1379 tree inner_lhs = get_base_address (lhs);
1380 bool rhs_free = false;
1381 bool lhs_free = false;
1383 if (!inner_rhs)
1384 inner_rhs = rhs;
1385 if (!inner_lhs)
1386 inner_lhs = lhs;
1388 /* Reads of parameter are expected to be free. */
1389 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1390 rhs_free = true;
1391 /* Match expressions of form &this->field. Those will most likely
1392 combine with something upstream after inlining. */
1393 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1395 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1396 if (TREE_CODE (op) == PARM_DECL)
1397 rhs_free = true;
1398 else if (TREE_CODE (op) == MEM_REF
1399 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1400 NULL))
1401 rhs_free = true;
1404 /* When parameter is not SSA register because its address is taken
1405 and it is just copied into one, the statement will be completely
1406 free after inlining (we will copy propagate backward). */
1407 if (rhs_free && is_gimple_reg (lhs))
1408 return 2;
1410 /* Reads of parameters passed by reference
1411 expected to be free (i.e. optimized out after inlining). */
1412 if (TREE_CODE (inner_rhs) == MEM_REF
1413 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1414 rhs_free = true;
1416 /* Copying parameter passed by reference into gimple register is
1417 probably also going to copy propagate, but we can't be quite
1418 sure. */
1419 if (rhs_free && is_gimple_reg (lhs))
1420 lhs_free = true;
1422 /* Writes to parameters, parameters passed by value and return value
1423 (either directly or passed via invisible reference) are free.
1425 TODO: We ought to handle testcase like
1426 struct a {int a,b;};
1427 struct a
1428 returnstruct (void)
1430 struct a a ={1,2};
1431 return a;
1434 This translate into:
1436 returnstruct ()
1438 int a$b;
1439 int a$a;
1440 struct a a;
1441 struct a D.2739;
1443 <bb 2>:
1444 D.2739.a = 1;
1445 D.2739.b = 2;
1446 return D.2739;
1449 For that we either need to copy ipa-split logic detecting writes
1450 to return value. */
1451 if (TREE_CODE (inner_lhs) == PARM_DECL
1452 || TREE_CODE (inner_lhs) == RESULT_DECL
1453 || (TREE_CODE (inner_lhs) == MEM_REF
1454 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1455 NULL)
1456 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1457 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1458 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1459 (inner_lhs,
1460 0))) == RESULT_DECL))))
1461 lhs_free = true;
1462 if (lhs_free
1463 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1464 rhs_free = true;
1465 if (lhs_free && rhs_free)
1466 return 1;
1468 return 0;
1469 default:
1470 return 0;
1474 /* Analyze EXPR if it represents a series of simple operations performed on
1475 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1476 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1477 Type of the parameter or load from an aggregate via the parameter is
1478 stored in *TYPE_P. Operations on the parameter are recorded to
1479 PARAM_OPS_P if it is not NULL. */
1481 static bool
1482 decompose_param_expr (struct ipa_func_body_info *fbi,
1483 gimple *stmt, tree expr,
1484 int *index_p, tree *type_p,
1485 struct agg_position_info *aggpos,
1486 expr_eval_ops *param_ops_p = NULL)
1488 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1489 int op_count = 0;
1491 if (param_ops_p)
1492 *param_ops_p = NULL;
1494 while (true)
1496 expr_eval_op eval_op;
1497 unsigned rhs_count;
1498 unsigned cst_count = 0;
1500 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1501 aggpos))
1503 tree type = TREE_TYPE (expr);
1505 if (aggpos->agg_contents)
1507 /* Stop if containing bit-field. */
1508 if (TREE_CODE (expr) == BIT_FIELD_REF
1509 || contains_bitfld_component_ref_p (expr))
1510 break;
1513 *type_p = type;
1514 return true;
1517 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1518 break;
1519 stmt = SSA_NAME_DEF_STMT (expr);
1521 if (gcall *call = dyn_cast <gcall *> (stmt))
1523 int flags = gimple_call_return_flags (call);
1524 if (!(flags & ERF_RETURNS_ARG))
1525 goto fail;
1526 int arg = flags & ERF_RETURN_ARG_MASK;
1527 if (arg >= (int)gimple_call_num_args (call))
1528 goto fail;
1529 expr = gimple_call_arg (stmt, arg);
1530 continue;
1533 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1534 break;
1536 switch (gimple_assign_rhs_class (stmt))
1538 case GIMPLE_SINGLE_RHS:
1539 expr = gimple_assign_rhs1 (stmt);
1540 continue;
1542 case GIMPLE_UNARY_RHS:
1543 rhs_count = 1;
1544 break;
1546 case GIMPLE_BINARY_RHS:
1547 rhs_count = 2;
1548 break;
1550 case GIMPLE_TERNARY_RHS:
1551 rhs_count = 3;
1552 break;
1554 default:
1555 goto fail;
1558 /* Stop if expression is too complex. */
1559 if (op_count++ == op_limit)
1560 break;
1562 if (param_ops_p)
1564 eval_op.code = gimple_assign_rhs_code (stmt);
1565 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1566 eval_op.val[0] = NULL_TREE;
1567 eval_op.val[1] = NULL_TREE;
1570 expr = NULL_TREE;
1571 for (unsigned i = 0; i < rhs_count; i++)
1573 tree op = gimple_op (stmt, i + 1);
1575 gcc_assert (op && !TYPE_P (op));
1576 if (is_gimple_ip_invariant (op))
1578 if (++cst_count == rhs_count)
1579 goto fail;
1581 eval_op.val[cst_count - 1] = op;
1583 else if (!expr)
1585 /* Found a non-constant operand, and record its index in rhs
1586 operands. */
1587 eval_op.index = i;
1588 expr = op;
1590 else
1592 /* Found more than one non-constant operands. */
1593 goto fail;
1597 if (param_ops_p)
1598 vec_safe_insert (*param_ops_p, 0, eval_op);
1601 /* Failed to decompose, free resource and return. */
1602 fail:
1603 if (param_ops_p)
1604 vec_free (*param_ops_p);
1606 return false;
1609 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1611 static void
1612 add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
1614 int ip;
1616 /* Avoid duplicates. */
1617 for (unsigned int i = 0;
1618 summary->builtin_constant_p_parms.iterate (i, &ip); i++)
1619 if (ip == parm)
1620 return;
1621 summary->builtin_constant_p_parms.safe_push (parm);
1624 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1625 predicates to the CFG edges. */
1627 static void
1628 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1629 class ipa_fn_summary *summary,
1630 class ipa_node_params *params_summary,
1631 basic_block bb)
1633 tree op, op2;
1634 int index;
1635 struct agg_position_info aggpos;
1636 enum tree_code code, inverted_code;
1637 edge e;
1638 edge_iterator ei;
1639 gimple *set_stmt;
1640 tree param_type;
1641 expr_eval_ops param_ops;
1643 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1644 if (!last)
1645 return;
1646 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1647 return;
1648 op = gimple_cond_lhs (last);
1650 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1651 &param_ops))
1653 code = gimple_cond_code (last);
1654 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1656 FOR_EACH_EDGE (e, ei, bb->succs)
1658 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1659 ? code : inverted_code);
1660 /* invert_tree_comparison will return ERROR_MARK on FP
1661 comparisons that are not EQ/NE instead of returning proper
1662 unordered one. Be sure it is not confused with NON_CONSTANT.
1664 And if the edge's target is the final block of diamond CFG graph
1665 of this conditional statement, we do not need to compute
1666 predicate for the edge because the final block's predicate must
1667 be at least as that of the first block of the statement. */
1668 if (this_code != ERROR_MARK
1669 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1671 ipa_predicate p
1672 = add_condition (summary, params_summary, index,
1673 param_type, &aggpos,
1674 this_code, gimple_cond_rhs (last), param_ops);
1675 e->aux = edge_predicate_pool.allocate ();
1676 *(ipa_predicate *) e->aux = p;
1679 vec_free (param_ops);
1680 return;
1683 if (TREE_CODE (op) != SSA_NAME)
1684 return;
1685 /* Special case
1686 if (builtin_constant_p (op))
1687 constant_code
1688 else
1689 nonconstant_code.
1690 Here we can predicate nonconstant_code. We can't
1691 really handle constant_code since we have no predicate
1692 for this and also the constant code is not known to be
1693 optimized away when inliner doesn't see operand is constant.
1694 Other optimizers might think otherwise. */
1695 if (gimple_cond_code (last) != NE_EXPR
1696 || !integer_zerop (gimple_cond_rhs (last)))
1697 return;
1698 set_stmt = SSA_NAME_DEF_STMT (op);
1699 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1700 || gimple_call_num_args (set_stmt) != 1)
1701 return;
1702 op2 = gimple_call_arg (set_stmt, 0);
1703 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1704 return;
1705 if (!aggpos.by_ref)
1706 add_builtin_constant_p_parm (summary, index);
1707 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1709 ipa_predicate p = add_condition (summary, params_summary, index,
1710 param_type, &aggpos,
1711 ipa_predicate::is_not_constant, NULL_TREE);
1712 e->aux = edge_predicate_pool.allocate ();
1713 *(ipa_predicate *) e->aux = p;
1718 /* If BB ends by a switch we can turn into predicates, attach corresponding
1719 predicates to the CFG edges. */
1721 static void
1722 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1723 class ipa_fn_summary *summary,
1724 class ipa_node_params *params_summary,
1725 basic_block bb)
1727 tree op;
1728 int index;
1729 struct agg_position_info aggpos;
1730 edge e;
1731 edge_iterator ei;
1732 size_t n;
1733 size_t case_idx;
1734 tree param_type;
1735 expr_eval_ops param_ops;
1737 gswitch *last = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb));
1738 if (!last)
1739 return;
1740 op = gimple_switch_index (last);
1741 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1742 &param_ops))
1743 return;
1745 auto_vec<std::pair<tree, tree> > ranges;
1746 tree type = TREE_TYPE (op);
1747 int bound_limit = opt_for_fn (fbi->node->decl,
1748 param_ipa_max_switch_predicate_bounds);
1749 int bound_count = 0;
1750 // This can safely be an integer range, as switches can only hold
1751 // integers.
1752 int_range<2> vr;
1754 get_range_query (cfun)->range_of_expr (vr, op);
1755 if (vr.undefined_p ())
1756 vr.set_varying (TREE_TYPE (op));
1757 tree vr_min, vr_max;
1758 // TODO: This entire function could use a rewrite to use the irange
1759 // API, instead of trying to recreate its intersection/union logic.
1760 // Any use of get_legacy_range() is a serious code smell.
1761 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
1762 wide_int vr_wmin = wi::to_wide (vr_min);
1763 wide_int vr_wmax = wi::to_wide (vr_max);
1765 FOR_EACH_EDGE (e, ei, bb->succs)
1767 e->aux = edge_predicate_pool.allocate ();
1768 *(ipa_predicate *) e->aux = false;
1771 e = gimple_switch_edge (cfun, last, 0);
1772 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1773 default case if its target basic block is in convergence point of all
1774 switch cases, which can be determined by checking whether it
1775 post-dominates the switch statement. */
1776 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1777 bound_count = INT_MAX;
1779 n = gimple_switch_num_labels (last);
1780 for (case_idx = 1; case_idx < n; ++case_idx)
1782 tree cl = gimple_switch_label (last, case_idx);
1783 tree min = CASE_LOW (cl);
1784 tree max = CASE_HIGH (cl);
1785 ipa_predicate p;
1787 e = gimple_switch_edge (cfun, last, case_idx);
1789 /* The case value might not have same type as switch expression,
1790 extend the value based on the expression type. */
1791 if (TREE_TYPE (min) != type)
1792 min = wide_int_to_tree (type, wi::to_wide (min));
1794 if (!max)
1795 max = min;
1796 else if (TREE_TYPE (max) != type)
1797 max = wide_int_to_tree (type, wi::to_wide (max));
1799 /* The case's target basic block is in convergence point of all switch
1800 cases, its predicate should be at least as that of the switch
1801 statement. */
1802 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1803 p = true;
1804 else if (min == max)
1805 p = add_condition (summary, params_summary, index, param_type,
1806 &aggpos, EQ_EXPR, min, param_ops);
1807 else
1809 ipa_predicate p1, p2;
1810 p1 = add_condition (summary, params_summary, index, param_type,
1811 &aggpos, GE_EXPR, min, param_ops);
1812 p2 = add_condition (summary, params_summary,index, param_type,
1813 &aggpos, LE_EXPR, max, param_ops);
1814 p = p1 & p2;
1816 *(ipa_predicate *) e->aux
1817 = p.or_with (summary->conds, *(ipa_predicate *) e->aux);
1819 /* If there are too many disjoint case ranges, predicate for default
1820 case might become too complicated. So add a limit here. */
1821 if (bound_count > bound_limit)
1822 continue;
1824 bool new_range = true;
1826 if (!ranges.is_empty ())
1828 wide_int curr_wmin = wi::to_wide (min);
1829 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1831 /* Merge case ranges if they are continuous. */
1832 if (curr_wmin == last_wmax + 1)
1833 new_range = false;
1834 else if (vr_type == VR_ANTI_RANGE)
1836 /* If two disjoint case ranges can be connected by anti-range
1837 of switch index, combine them to one range. */
1838 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1839 vr_type = VR_UNDEFINED;
1840 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1841 new_range = false;
1845 /* Create/extend a case range. And we count endpoints of range set,
1846 this number nearly equals to number of conditions that we will create
1847 for predicate of default case. */
1848 if (new_range)
1850 bound_count += (min == max) ? 1 : 2;
1851 ranges.safe_push (std::make_pair (min, max));
1853 else
1855 bound_count += (ranges.last ().first == ranges.last ().second);
1856 ranges.last ().second = max;
1860 e = gimple_switch_edge (cfun, last, 0);
1861 if (bound_count > bound_limit)
1863 *(ipa_predicate *) e->aux = true;
1864 vec_free (param_ops);
1865 return;
1868 ipa_predicate p_seg = true;
1869 ipa_predicate p_all = false;
1871 if (vr_type != VR_RANGE)
1873 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1874 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1877 /* Construct predicate to represent default range set that is negation of
1878 all case ranges. Case range is classified as containing single/non-single
1879 values. Suppose a piece of case ranges in the following.
1881 [D1...D2] [S1] ... [Sn] [D3...D4]
1883 To represent default case's range sets between two non-single value
1884 case ranges (From D2 to D3), we construct predicate as:
1886 D2 < x < D3 && x != S1 && ... && x != Sn
1888 for (size_t i = 0; i < ranges.length (); i++)
1890 tree min = ranges[i].first;
1891 tree max = ranges[i].second;
1893 if (min == max)
1894 p_seg &= add_condition (summary, params_summary, index,
1895 param_type, &aggpos, NE_EXPR,
1896 min, param_ops);
1897 else
1899 /* Do not create sub-predicate for range that is beyond low bound
1900 of switch index. */
1901 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1903 p_seg &= add_condition (summary, params_summary, index,
1904 param_type, &aggpos,
1905 LT_EXPR, min, param_ops);
1906 p_all = p_all.or_with (summary->conds, p_seg);
1909 /* Do not create sub-predicate for range that is beyond up bound
1910 of switch index. */
1911 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1913 p_seg = false;
1914 break;
1917 p_seg = add_condition (summary, params_summary, index,
1918 param_type, &aggpos, GT_EXPR,
1919 max, param_ops);
1923 p_all = p_all.or_with (summary->conds, p_seg);
1924 *(ipa_predicate *) e->aux
1925 = p_all.or_with (summary->conds, *(ipa_predicate *) e->aux);
1927 vec_free (param_ops);
1931 /* For each BB in NODE attach to its AUX pointer predicate under
1932 which it is executable. */
1934 static void
1935 compute_bb_predicates (struct ipa_func_body_info *fbi,
1936 struct cgraph_node *node,
1937 class ipa_fn_summary *summary,
1938 class ipa_node_params *params_summary)
1940 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1941 bool done = false;
1942 basic_block bb;
1944 FOR_EACH_BB_FN (bb, my_function)
1946 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1947 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1950 /* Entry block is always executable. */
1951 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1952 = edge_predicate_pool.allocate ();
1953 *(ipa_predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1955 /* A simple dataflow propagation of predicates forward in the CFG.
1956 TODO: work in reverse postorder. */
1957 while (!done)
1959 done = true;
1960 FOR_EACH_BB_FN (bb, my_function)
1962 ipa_predicate p = false;
1963 edge e;
1964 edge_iterator ei;
1965 FOR_EACH_EDGE (e, ei, bb->preds)
1967 if (e->src->aux)
1969 ipa_predicate this_bb_predicate
1970 = *(ipa_predicate *) e->src->aux;
1971 if (e->aux)
1972 this_bb_predicate &= (*(ipa_predicate *) e->aux);
1973 p = p.or_with (summary->conds, this_bb_predicate);
1974 if (p == true)
1975 break;
1978 if (p != false)
1980 basic_block pdom_bb;
1982 if (!bb->aux)
1984 done = false;
1985 bb->aux = edge_predicate_pool.allocate ();
1986 *((ipa_predicate *) bb->aux) = p;
1988 else if (p != *(ipa_predicate *) bb->aux)
1990 /* This OR operation is needed to ensure monotonous data flow
1991 in the case we hit the limit on number of clauses and the
1992 and/or operations above give approximate answers. */
1993 p = p.or_with (summary->conds, *(ipa_predicate *)bb->aux);
1994 if (p != *(ipa_predicate *)bb->aux)
1996 done = false;
1997 *((ipa_predicate *)bb->aux) = p;
2001 /* For switch/if statement, we can OR-combine predicates of all
2002 its cases/branches to get predicate for basic block in their
2003 convergence point, but sometimes this will generate very
2004 complicated predicate. Actually, we can get simplified
2005 predicate in another way by using the fact that predicate
2006 for a basic block must also hold true for its post dominators.
2007 To be specific, basic block in convergence point of
2008 conditional statement should include predicate of the
2009 statement. */
2010 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
2011 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
2013 else if (!pdom_bb->aux)
2015 done = false;
2016 pdom_bb->aux = edge_predicate_pool.allocate ();
2017 *((ipa_predicate *)pdom_bb->aux) = p;
2019 else if (p != *(ipa_predicate *)pdom_bb->aux)
2021 p = p.or_with (summary->conds,
2022 *(ipa_predicate *)pdom_bb->aux);
2023 if (p != *(ipa_predicate *)pdom_bb->aux)
2025 done = false;
2026 *((ipa_predicate *)pdom_bb->aux) = p;
2035 /* Return predicate specifying when the STMT might have result that is not
2036 a compile time constant. */
2038 static ipa_predicate
2039 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
2040 class ipa_fn_summary *summary,
2041 class ipa_node_params *params_summary,
2042 tree expr,
2043 vec<ipa_predicate> nonconstant_names)
2045 tree parm;
2046 int index;
2048 while (UNARY_CLASS_P (expr))
2049 expr = TREE_OPERAND (expr, 0);
2051 parm = unmodified_parm (fbi, NULL, expr, NULL);
2052 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2053 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
2054 ipa_predicate::changed, NULL_TREE);
2055 if (is_gimple_min_invariant (expr))
2056 return false;
2057 if (TREE_CODE (expr) == SSA_NAME)
2058 return nonconstant_names[SSA_NAME_VERSION (expr)];
2059 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
2061 ipa_predicate p1
2062 = will_be_nonconstant_expr_predicate (fbi, summary,
2063 params_summary,
2064 TREE_OPERAND (expr, 0),
2065 nonconstant_names);
2066 if (p1 == true)
2067 return p1;
2069 ipa_predicate p2
2070 = will_be_nonconstant_expr_predicate (fbi, summary,
2071 params_summary,
2072 TREE_OPERAND (expr, 1),
2073 nonconstant_names);
2074 return p1.or_with (summary->conds, p2);
2076 else if (TREE_CODE (expr) == COND_EXPR)
2078 ipa_predicate p1
2079 = will_be_nonconstant_expr_predicate (fbi, summary,
2080 params_summary,
2081 TREE_OPERAND (expr, 0),
2082 nonconstant_names);
2083 if (p1 == true)
2084 return p1;
2086 ipa_predicate p2
2087 = will_be_nonconstant_expr_predicate (fbi, summary,
2088 params_summary,
2089 TREE_OPERAND (expr, 1),
2090 nonconstant_names);
2091 if (p2 == true)
2092 return p2;
2093 p1 = p1.or_with (summary->conds, p2);
2094 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
2095 params_summary,
2096 TREE_OPERAND (expr, 2),
2097 nonconstant_names);
2098 return p2.or_with (summary->conds, p1);
2100 else if (TREE_CODE (expr) == CALL_EXPR)
2101 return true;
2102 else
2104 debug_tree (expr);
2105 gcc_unreachable ();
2110 /* Return predicate specifying when the STMT might have result that is not
2111 a compile time constant. */
2113 static ipa_predicate
2114 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2115 class ipa_fn_summary *summary,
2116 class ipa_node_params *params_summary,
2117 gimple *stmt,
2118 vec<ipa_predicate> nonconstant_names)
2120 ipa_predicate p = true;
2121 ssa_op_iter iter;
2122 tree use;
2123 tree param_type = NULL_TREE;
2124 ipa_predicate op_non_const;
2125 bool is_load;
2126 int base_index;
2127 struct agg_position_info aggpos;
2129 /* What statements might be optimized away
2130 when their arguments are constant. */
2131 if (gimple_code (stmt) != GIMPLE_ASSIGN
2132 && gimple_code (stmt) != GIMPLE_COND
2133 && gimple_code (stmt) != GIMPLE_SWITCH
2134 && (gimple_code (stmt) != GIMPLE_CALL
2135 || !(gimple_call_flags (stmt) & ECF_CONST)))
2136 return p;
2138 /* Stores will stay anyway. */
2139 if (gimple_store_p (stmt))
2140 return p;
2142 is_load = gimple_assign_load_p (stmt);
2144 /* Loads can be optimized when the value is known. */
2145 if (is_load)
2147 tree op = gimple_assign_rhs1 (stmt);
2148 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
2149 &aggpos))
2150 return p;
2152 else
2153 base_index = -1;
2155 /* See if we understand all operands before we start
2156 adding conditionals. */
2157 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2159 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2160 /* For arguments we can build a condition. */
2161 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2162 continue;
2163 if (TREE_CODE (use) != SSA_NAME)
2164 return p;
2165 /* If we know when operand is constant,
2166 we still can say something useful. */
2167 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2168 continue;
2169 return p;
2172 if (is_load)
2173 op_non_const =
2174 add_condition (summary, params_summary,
2175 base_index, param_type, &aggpos,
2176 ipa_predicate::changed, NULL_TREE);
2177 else
2178 op_non_const = false;
2179 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2181 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2182 int index;
2184 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2186 if (index != base_index)
2187 p = add_condition (summary, params_summary, index,
2188 TREE_TYPE (parm), NULL,
2189 ipa_predicate::changed, NULL_TREE);
2190 else
2191 continue;
2193 else
2194 p = nonconstant_names[SSA_NAME_VERSION (use)];
2195 op_non_const = p.or_with (summary->conds, op_non_const);
2197 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2198 && gimple_op (stmt, 0)
2199 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2200 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2201 = op_non_const;
2202 return op_non_const;
2205 struct record_modified_bb_info
2207 tree op;
2208 bitmap bb_set;
2209 gimple *stmt;
2212 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2213 probability how often it changes between USE_BB.
2214 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2215 is in different loop nest, we can do better.
2216 This is all just estimate. In theory we look for minimal cut separating
2217 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2218 anyway. */
2220 static basic_block
2221 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2223 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2224 if (l && l->header->count < init_bb->count)
2225 return l->header;
2226 return init_bb;
2229 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2230 set except for info->stmt. */
2232 static bool
2233 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2235 struct record_modified_bb_info *info =
2236 (struct record_modified_bb_info *) data;
2237 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2238 return false;
2239 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2240 return false;
2241 bitmap_set_bit (info->bb_set,
2242 SSA_NAME_IS_DEFAULT_DEF (vdef)
2243 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2244 : get_minimal_bb
2245 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2246 gimple_bb (info->stmt))->index);
2247 if (dump_file)
2249 fprintf (dump_file, " Param ");
2250 print_generic_expr (dump_file, info->op, TDF_SLIM);
2251 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2252 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2253 get_minimal_bb
2254 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2255 gimple_bb (info->stmt))->index);
2256 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2258 return false;
2261 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2262 will change since last invocation of STMT.
2264 Value 0 is reserved for compile time invariants.
2265 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2266 ought to be REG_BR_PROB_BASE / estimated_iters. */
2268 static int
2269 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2271 tree op = gimple_call_arg (stmt, i);
2272 basic_block bb = gimple_bb (stmt);
2274 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2275 op = TREE_OPERAND (op, 0);
2277 tree base = get_base_address (op);
2279 /* Global invariants never change. */
2280 if (is_gimple_min_invariant (base))
2281 return 0;
2283 /* We would have to do non-trivial analysis to really work out what
2284 is the probability of value to change (i.e. when init statement
2285 is in a sibling loop of the call).
2287 We do an conservative estimate: when call is executed N times more often
2288 than the statement defining value, we take the frequency 1/N. */
2289 if (TREE_CODE (base) == SSA_NAME)
2291 profile_count init_count;
2293 if (!bb->count.nonzero_p ())
2294 return REG_BR_PROB_BASE;
2296 if (SSA_NAME_IS_DEFAULT_DEF (base))
2297 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2298 else
2299 init_count = get_minimal_bb
2300 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2301 gimple_bb (stmt))->count;
2303 if (init_count < bb->count)
2304 return MAX ((init_count.to_sreal_scale (bb->count)
2305 * REG_BR_PROB_BASE).to_int (), 1);
2306 return REG_BR_PROB_BASE;
2308 else
2310 ao_ref refd;
2311 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2312 struct record_modified_bb_info info;
2313 tree init = ctor_for_folding (base);
2315 if (init != error_mark_node)
2316 return 0;
2317 if (!bb->count.nonzero_p () || fbi->aa_walk_budget == 0)
2318 return REG_BR_PROB_BASE;
2319 if (dump_file)
2321 fprintf (dump_file, " Analyzing param change probability of ");
2322 print_generic_expr (dump_file, op, TDF_SLIM);
2323 fprintf (dump_file, "\n");
2325 ao_ref_init (&refd, op);
2326 info.op = op;
2327 info.stmt = stmt;
2328 info.bb_set = BITMAP_ALLOC (NULL);
2329 int walked
2330 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2331 NULL, NULL, fbi->aa_walk_budget);
2332 if (walked > 0)
2333 fbi->aa_walk_budget -= walked;
2334 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2336 if (walked < 0)
2337 fbi->aa_walk_budget = 0;
2338 if (dump_file)
2340 if (walked < 0)
2341 fprintf (dump_file, " Ran out of AA walking budget.\n");
2342 else
2343 fprintf (dump_file, " Set in same BB as used.\n");
2345 BITMAP_FREE (info.bb_set);
2346 return REG_BR_PROB_BASE;
2349 bitmap_iterator bi;
2350 unsigned index;
2351 /* Lookup the most frequent update of the value and believe that
2352 it dominates all the other; precise analysis here is difficult. */
2353 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2354 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2355 if (dump_file)
2357 fprintf (dump_file, " Set with count ");
2358 max.dump (dump_file);
2359 fprintf (dump_file, " and used with count ");
2360 bb->count.dump (dump_file);
2361 fprintf (dump_file, " freq %f\n",
2362 max.to_sreal_scale (bb->count).to_double ());
2365 BITMAP_FREE (info.bb_set);
2366 if (max < bb->count)
2367 return MAX ((max.to_sreal_scale (bb->count)
2368 * REG_BR_PROB_BASE).to_int (), 1);
2369 return REG_BR_PROB_BASE;
2373 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2374 sub-graph and if the predicate the condition depends on is known. If so,
2375 return true and store the pointer the predicate in *P. */
2377 static bool
2378 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2379 ipa_fn_summary *summary,
2380 class ipa_node_params *params_summary,
2381 basic_block bb,
2382 ipa_predicate *p,
2383 vec<ipa_predicate> nonconstant_names)
2385 edge e;
2386 edge_iterator ei;
2387 basic_block first_bb = NULL;
2389 if (single_pred_p (bb))
2391 *p = false;
2392 return true;
2395 FOR_EACH_EDGE (e, ei, bb->preds)
2397 if (single_succ_p (e->src))
2399 if (!single_pred_p (e->src))
2400 return false;
2401 if (!first_bb)
2402 first_bb = single_pred (e->src);
2403 else if (single_pred (e->src) != first_bb)
2404 return false;
2406 else
2408 if (!first_bb)
2409 first_bb = e->src;
2410 else if (e->src != first_bb)
2411 return false;
2415 if (!first_bb)
2416 return false;
2418 gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (first_bb));
2419 if (!stmt
2420 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2421 return false;
2423 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2424 gimple_cond_lhs (stmt),
2425 nonconstant_names);
2426 if (*p == true)
2427 return false;
2428 else
2429 return true;
2432 /* Given a PHI statement in a function described by inline properties SUMMARY
2433 and *P being the predicate describing whether the selected PHI argument is
2434 known, store a predicate for the result of the PHI statement into
2435 NONCONSTANT_NAMES, if possible. */
2437 static void
2438 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2439 ipa_predicate *p,
2440 vec<ipa_predicate> nonconstant_names)
2442 unsigned i;
2444 for (i = 0; i < gimple_phi_num_args (phi); i++)
2446 tree arg = gimple_phi_arg (phi, i)->def;
2447 if (!is_gimple_min_invariant (arg))
2449 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2450 *p = p->or_with (summary->conds,
2451 nonconstant_names[SSA_NAME_VERSION (arg)]);
2452 if (*p == true)
2453 return;
2457 if (dump_file && (dump_flags & TDF_DETAILS))
2459 fprintf (dump_file, "\t\tphi predicate: ");
2460 p->dump (dump_file, summary->conds);
2462 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2465 /* For a typical usage of __builtin_expect (a<b, 1), we
2466 may introduce an extra relation stmt:
2467 With the builtin, we have
2468 t1 = a <= b;
2469 t2 = (long int) t1;
2470 t3 = __builtin_expect (t2, 1);
2471 if (t3 != 0)
2472 goto ...
2473 Without the builtin, we have
2474 if (a<=b)
2475 goto...
2476 This affects the size/time estimation and may have
2477 an impact on the earlier inlining.
2478 Here find this pattern and fix it up later. */
2480 static gimple *
2481 find_foldable_builtin_expect (basic_block bb)
2483 gimple_stmt_iterator bsi;
2485 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2487 gimple *stmt = gsi_stmt (bsi);
2488 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2489 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2490 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2492 tree var = gimple_call_lhs (stmt);
2493 tree arg = gimple_call_arg (stmt, 0);
2494 use_operand_p use_p;
2495 gimple *use_stmt;
2496 bool match = false;
2497 bool done = false;
2499 if (!var || !arg)
2500 continue;
2501 gcc_assert (TREE_CODE (var) == SSA_NAME);
2503 while (TREE_CODE (arg) == SSA_NAME)
2505 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2506 if (!is_gimple_assign (stmt_tmp))
2507 break;
2508 switch (gimple_assign_rhs_code (stmt_tmp))
2510 case LT_EXPR:
2511 case LE_EXPR:
2512 case GT_EXPR:
2513 case GE_EXPR:
2514 case EQ_EXPR:
2515 case NE_EXPR:
2516 match = true;
2517 done = true;
2518 break;
2519 CASE_CONVERT:
2520 break;
2521 default:
2522 done = true;
2523 break;
2525 if (done)
2526 break;
2527 arg = gimple_assign_rhs1 (stmt_tmp);
2530 if (match && single_imm_use (var, &use_p, &use_stmt)
2531 && gimple_code (use_stmt) == GIMPLE_COND)
2532 return use_stmt;
2535 return NULL;
2538 /* Return true when the basic blocks contains only clobbers followed by RESX.
2539 Such BBs are kept around to make removal of dead stores possible with
2540 presence of EH and will be optimized out by optimize_clobbers later in the
2541 game.
2543 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2544 that can be clobber only, too.. When it is false, the RESX is not necessary
2545 on the end of basic block. */
2547 static bool
2548 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2550 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2551 edge_iterator ei;
2552 edge e;
2554 if (need_eh)
2556 if (gsi_end_p (gsi))
2557 return false;
2558 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2559 return false;
2560 gsi_prev (&gsi);
2562 else if (!single_succ_p (bb))
2563 return false;
2565 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2567 gimple *stmt = gsi_stmt (gsi);
2568 if (is_gimple_debug (stmt))
2569 continue;
2570 if (gimple_clobber_p (stmt))
2571 continue;
2572 if (gimple_code (stmt) == GIMPLE_LABEL)
2573 break;
2574 return false;
2577 /* See if all predecessors are either throws or clobber only BBs. */
2578 FOR_EACH_EDGE (e, ei, bb->preds)
2579 if (!(e->flags & EDGE_EH)
2580 && !clobber_only_eh_bb_p (e->src, false))
2581 return false;
2583 return true;
2586 /* Return true if STMT compute a floating point expression that may be affected
2587 by -ffast-math and similar flags. */
2589 static bool
2590 fp_expression_p (gimple *stmt)
2592 ssa_op_iter i;
2593 tree op;
2595 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2596 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2597 return true;
2598 return false;
2601 /* Return true if T references memory location that is local
2602 for the function (that means, dead after return) or read-only. */
2604 bool
2605 refs_local_or_readonly_memory_p (tree t)
2607 /* Non-escaping memory is fine. */
2608 t = get_base_address (t);
2609 if ((TREE_CODE (t) == MEM_REF
2610 || TREE_CODE (t) == TARGET_MEM_REF))
2611 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2613 /* Automatic variables are fine. */
2614 if (DECL_P (t)
2615 && auto_var_in_fn_p (t, current_function_decl))
2616 return true;
2618 /* Read-only variables are fine. */
2619 if (DECL_P (t) && TREE_READONLY (t))
2620 return true;
2622 return false;
2625 /* Return true if T is a pointer pointing to memory location that is local
2626 for the function (that means, dead after return) or read-only. */
2628 bool
2629 points_to_local_or_readonly_memory_p (tree t)
2631 /* See if memory location is clearly invalid. */
2632 if (integer_zerop (t))
2633 return flag_delete_null_pointer_checks;
2634 if (TREE_CODE (t) == SSA_NAME)
2636 /* For IPA passes we can consinder accesses to return slot local
2637 even if it is not local in the sense that memory is dead by
2638 the end of founction.
2639 The outer function will see a store in the call assignment
2640 and thus this will do right thing for all uses of this
2641 function in the current IPA passes (modref, pure/const discovery
2642 and inlining heuristics). */
2643 if (DECL_RESULT (current_function_decl)
2644 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
2645 && t == ssa_default_def (cfun, DECL_RESULT (current_function_decl)))
2646 return true;
2647 return !ptr_deref_may_alias_global_p (t, false);
2649 if (TREE_CODE (t) == ADDR_EXPR)
2650 return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2651 return false;
2654 /* Return true if T is a pointer pointing to memory location that is possible
2655 sra candidate if all functions it is passed to are inlined. */
2657 static bool
2658 points_to_possible_sra_candidate_p (tree t)
2660 if (TREE_CODE (t) != ADDR_EXPR)
2661 return false;
2663 t = get_base_address (TREE_OPERAND (t, 0));
2665 /* Automatic variables are fine. */
2666 if (DECL_P (t)
2667 && auto_var_in_fn_p (t, current_function_decl))
2668 return true;
2669 return false;
2672 /* Analyze function body for NODE.
2673 EARLY indicates run from early optimization pipeline. */
2675 static void
2676 analyze_function_body (struct cgraph_node *node, bool early)
2678 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2679 /* Estimate static overhead for function prologue/epilogue and alignment. */
2680 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2681 /* Benefits are scaled by probability of elimination that is in range
2682 <0,2>. */
2683 basic_block bb;
2684 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2685 sreal freq;
2686 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2687 ipa_node_params *params_summary
2688 = early ? NULL : ipa_node_params_sum->get (node);
2689 ipa_predicate bb_predicate;
2690 struct ipa_func_body_info fbi;
2691 vec<ipa_predicate> nonconstant_names = vNULL;
2692 int nblocks, n;
2693 int *order;
2694 gimple *fix_builtin_expect_stmt;
2696 gcc_assert (my_function && my_function->cfg);
2697 gcc_assert (cfun == my_function);
2699 memset(&fbi, 0, sizeof(fbi));
2700 vec_free (info->conds);
2701 info->conds = NULL;
2702 info->size_time_table.release ();
2703 info->call_size_time_table.release ();
2705 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2706 so we can produce proper inline hints.
2708 When optimizing and analyzing for early inliner, initialize node params
2709 so we can produce correct BB predicates. */
2711 if (opt_for_fn (node->decl, optimize))
2713 calculate_dominance_info (CDI_DOMINATORS);
2714 calculate_dominance_info (CDI_POST_DOMINATORS);
2715 if (!early)
2716 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2717 else
2719 ipa_check_create_node_params ();
2720 ipa_initialize_node_params (node);
2723 if (ipa_node_params_sum)
2725 fbi.node = node;
2726 fbi.info = ipa_node_params_sum->get (node);
2727 fbi.bb_infos = vNULL;
2728 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2729 fbi.param_count = count_formal_params (node->decl);
2730 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2732 nonconstant_names.safe_grow_cleared
2733 (SSANAMES (my_function)->length (), true);
2737 if (dump_file)
2738 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2739 node->dump_name ());
2741 /* When we run into maximal number of entries, we assign everything to the
2742 constant truth case. Be sure to have it in list. */
2743 bb_predicate = true;
2744 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2746 bb_predicate = ipa_predicate::not_inlined ();
2747 info->account_size_time (opt_for_fn (node->decl,
2748 param_uninlined_function_insns)
2749 * ipa_fn_summary::size_scale,
2750 opt_for_fn (node->decl,
2751 param_uninlined_function_time),
2752 bb_predicate,
2753 bb_predicate);
2755 /* Only look for target information for inlinable functions. */
2756 bool scan_for_target_info =
2757 info->inlinable
2758 && targetm.target_option.need_ipa_fn_target_info (node->decl,
2759 info->target_info);
2761 if (fbi.info)
2762 compute_bb_predicates (&fbi, node, info, params_summary);
2763 const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2764 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2765 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2766 for (n = 0; n < nblocks; n++)
2768 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2769 freq = bb->count.to_sreal_scale (entry_count);
2770 if (clobber_only_eh_bb_p (bb))
2772 if (dump_file && (dump_flags & TDF_DETAILS))
2773 fprintf (dump_file, "\n Ignoring BB %i;"
2774 " it will be optimized away by cleanup_clobbers\n",
2775 bb->index);
2776 continue;
2779 /* TODO: Obviously predicates can be propagated down across CFG. */
2780 if (fbi.info)
2782 if (bb->aux)
2783 bb_predicate = *(ipa_predicate *)bb->aux;
2784 else
2785 bb_predicate = false;
2787 else
2788 bb_predicate = true;
2790 if (dump_file && (dump_flags & TDF_DETAILS))
2792 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2793 bb_predicate.dump (dump_file, info->conds);
2796 if (fbi.info && nonconstant_names.exists ())
2798 ipa_predicate phi_predicate;
2799 bool first_phi = true;
2801 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2802 gsi_next (&bsi))
2804 if (first_phi
2805 && !phi_result_unknown_predicate (&fbi, info,
2806 params_summary,
2808 &phi_predicate,
2809 nonconstant_names))
2810 break;
2811 first_phi = false;
2812 if (dump_file && (dump_flags & TDF_DETAILS))
2814 fprintf (dump_file, " ");
2815 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2817 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2818 nonconstant_names);
2822 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2824 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2825 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2827 gimple *stmt = gsi_stmt (bsi);
2828 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2829 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2830 int prob;
2831 ipa_predicate will_be_nonconstant;
2833 /* This relation stmt should be folded after we remove
2834 __builtin_expect call. Adjust the cost here. */
2835 if (stmt == fix_builtin_expect_stmt)
2837 this_size--;
2838 this_time--;
2841 if (dump_file && (dump_flags & TDF_DETAILS))
2843 fprintf (dump_file, " ");
2844 print_gimple_stmt (dump_file, stmt, 0);
2845 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2846 freq.to_double (), this_size,
2847 this_time);
2850 if (is_gimple_call (stmt)
2851 && !gimple_call_internal_p (stmt))
2853 struct cgraph_edge *edge = node->get_edge (stmt);
2854 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2856 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2857 resolved as constant. We however don't want to optimize
2858 out the cgraph edges. */
2859 if (nonconstant_names.exists ()
2860 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2861 && gimple_call_lhs (stmt)
2862 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2864 ipa_predicate false_p = false;
2865 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2866 = false_p;
2868 if (ipa_node_params_sum)
2870 int count = gimple_call_num_args (stmt);
2871 int i;
2873 if (count)
2874 es->param.safe_grow_cleared (count, true);
2875 for (i = 0; i < count; i++)
2877 int prob = param_change_prob (&fbi, stmt, i);
2878 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2879 es->param[i].change_prob = prob;
2880 es->param[i].points_to_local_or_readonly_memory
2881 = points_to_local_or_readonly_memory_p
2882 (gimple_call_arg (stmt, i));
2883 es->param[i].points_to_possible_sra_candidate
2884 = points_to_possible_sra_candidate_p
2885 (gimple_call_arg (stmt, i));
2888 /* We cannot setup VLA parameters during inlining. */
2889 for (unsigned int i = 0; i < gimple_call_num_args (stmt); ++i)
2890 if (TREE_CODE (gimple_call_arg (stmt, i)) == WITH_SIZE_EXPR)
2892 edge->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
2893 break;
2895 es->call_stmt_size = this_size;
2896 es->call_stmt_time = this_time;
2897 es->loop_depth = bb_loop_depth (bb);
2898 edge_set_predicate (edge, &bb_predicate);
2899 if (edge->speculative)
2901 cgraph_edge *indirect
2902 = edge->speculative_call_indirect_edge ();
2903 ipa_call_summary *es2
2904 = ipa_call_summaries->get_create (indirect);
2905 ipa_call_summaries->duplicate (edge, indirect,
2906 es, es2);
2908 /* Edge is the first direct call.
2909 create and duplicate call summaries for multiple
2910 speculative call targets. */
2911 for (cgraph_edge *direct
2912 = edge->next_speculative_call_target ();
2913 direct;
2914 direct = direct->next_speculative_call_target ())
2916 ipa_call_summary *es3
2917 = ipa_call_summaries->get_create (direct);
2918 ipa_call_summaries->duplicate (edge, direct,
2919 es, es3);
2924 /* TODO: When conditional jump or switch is known to be constant, but
2925 we did not translate it into the predicates, we really can account
2926 just maximum of the possible paths. */
2927 if (fbi.info)
2928 will_be_nonconstant
2929 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2930 stmt, nonconstant_names);
2931 else
2932 will_be_nonconstant = true;
2933 if (this_time || this_size)
2935 sreal final_time = (sreal)this_time * freq;
2936 prob = eliminated_by_inlining_prob (&fbi, stmt);
2937 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2938 fprintf (dump_file,
2939 "\t\t50%% will be eliminated by inlining\n");
2940 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2941 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2943 ipa_predicate p = bb_predicate & will_be_nonconstant;
2944 int parm = load_or_store_of_ptr_parameter (&fbi, stmt);
2945 ipa_predicate sra_predicate = true;
2946 if (parm != -1)
2947 sra_predicate &= add_condition (info, params_summary, parm,
2948 ptr_type_node, NULL,
2949 ipa_predicate::not_sra_candidate, NULL, 0);
2951 /* We can ignore statement when we proved it is never going
2952 to happen, but we cannot do that for call statements
2953 because edges are accounted specially. */
2955 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2957 time += final_time;
2958 size += this_size;
2961 /* We account everything but the calls. Calls have their own
2962 size/time info attached to cgraph edges. This is necessary
2963 in order to make the cost disappear after inlining. */
2964 if (!is_gimple_call (stmt))
2966 if (prob)
2968 ipa_predicate ip
2969 = bb_predicate & ipa_predicate::not_inlined () & sra_predicate;
2970 info->account_size_time (this_size * prob,
2971 (final_time * prob) / 2, ip,
2974 if (prob != 2)
2975 info->account_size_time (this_size * (2 - prob),
2976 (final_time * (2 - prob) / 2),
2977 bb_predicate & sra_predicate,
2981 if (!info->fp_expressions && fp_expression_p (stmt))
2983 info->fp_expressions = true;
2984 if (dump_file)
2985 fprintf (dump_file, " fp_expression set\n");
2989 /* For target specific information, we want to scan all statements
2990 rather than those statements with non-zero weights, to avoid
2991 missing to scan something interesting for target information,
2992 such as: internal function calls. */
2993 if (scan_for_target_info)
2994 scan_for_target_info =
2995 targetm.target_option.update_ipa_fn_target_info
2996 (info->target_info, stmt);
2998 /* Account cost of address calculations in the statements. */
2999 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
3001 for (tree op = gimple_op (stmt, i);
3002 op && handled_component_p (op);
3003 op = TREE_OPERAND (op, 0))
3004 if ((TREE_CODE (op) == ARRAY_REF
3005 || TREE_CODE (op) == ARRAY_RANGE_REF)
3006 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
3008 ipa_predicate p = bb_predicate;
3009 if (fbi.info)
3010 p = p & will_be_nonconstant_expr_predicate
3011 (&fbi, info, params_summary,
3012 TREE_OPERAND (op, 1),
3013 nonconstant_names);
3014 if (p != false)
3016 time += freq;
3017 size += 1;
3018 if (dump_file)
3019 fprintf (dump_file,
3020 "\t\tAccounting address calculation.\n");
3021 info->account_size_time (ipa_fn_summary::size_scale,
3022 freq,
3023 bb_predicate,
3031 free (order);
3033 if (nonconstant_names.exists () && !early)
3035 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3036 unsigned max_loop_predicates = opt_for_fn (node->decl,
3037 param_ipa_max_loop_predicates);
3039 if (dump_file && (dump_flags & TDF_DETAILS))
3040 flow_loops_dump (dump_file, NULL, 0);
3041 scev_initialize ();
3042 for (auto loop : loops_list (cfun, 0))
3044 ipa_predicate loop_iterations = true;
3045 sreal header_freq;
3046 edge ex;
3047 unsigned int j;
3048 class tree_niter_desc niter_desc;
3049 if (!loop->header->aux)
3050 continue;
3052 profile_count phdr_count = loop_preheader_edge (loop)->count ();
3053 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
3055 bb_predicate = *(ipa_predicate *)loop->header->aux;
3056 auto_vec<edge> exits = get_loop_exit_edges (loop);
3057 FOR_EACH_VEC_ELT (exits, j, ex)
3058 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
3059 && !is_gimple_min_invariant (niter_desc.niter))
3061 ipa_predicate will_be_nonconstant
3062 = will_be_nonconstant_expr_predicate (&fbi, info,
3063 params_summary,
3064 niter_desc.niter,
3065 nonconstant_names);
3066 if (will_be_nonconstant != true)
3067 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3068 if (will_be_nonconstant != true
3069 && will_be_nonconstant != false)
3070 loop_iterations &= will_be_nonconstant;
3072 add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
3073 phdr_freq, max_loop_predicates);
3076 /* To avoid quadratic behavior we analyze stride predicates only
3077 with respect to the containing loop. Thus we simply iterate
3078 over all defs in the outermost loop body. */
3079 for (class loop *loop = loops_for_fn (cfun)->tree_root->inner;
3080 loop != NULL; loop = loop->next)
3082 ipa_predicate loop_stride = true;
3083 basic_block *body = get_loop_body (loop);
3084 profile_count phdr_count = loop_preheader_edge (loop)->count ();
3085 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
3086 for (unsigned i = 0; i < loop->num_nodes; i++)
3088 gimple_stmt_iterator gsi;
3089 if (!body[i]->aux)
3090 continue;
3092 bb_predicate = *(ipa_predicate *)body[i]->aux;
3093 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
3094 gsi_next (&gsi))
3096 gimple *stmt = gsi_stmt (gsi);
3098 if (!is_gimple_assign (stmt))
3099 continue;
3101 tree def = gimple_assign_lhs (stmt);
3102 if (TREE_CODE (def) != SSA_NAME)
3103 continue;
3105 affine_iv iv;
3106 if (!simple_iv (loop_containing_stmt (stmt),
3107 loop_containing_stmt (stmt),
3108 def, &iv, true)
3109 || is_gimple_min_invariant (iv.step))
3110 continue;
3112 ipa_predicate will_be_nonconstant
3113 = will_be_nonconstant_expr_predicate (&fbi, info,
3114 params_summary,
3115 iv.step,
3116 nonconstant_names);
3117 if (will_be_nonconstant != true)
3118 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3119 if (will_be_nonconstant != true
3120 && will_be_nonconstant != false)
3121 loop_stride = loop_stride & will_be_nonconstant;
3124 add_freqcounting_predicate (&s->loop_strides, loop_stride,
3125 phdr_freq, max_loop_predicates);
3126 free (body);
3128 scev_finalize ();
3130 FOR_ALL_BB_FN (bb, my_function)
3132 edge e;
3133 edge_iterator ei;
3135 if (bb->aux)
3136 edge_predicate_pool.remove ((ipa_predicate *)bb->aux);
3137 bb->aux = NULL;
3138 FOR_EACH_EDGE (e, ei, bb->succs)
3140 if (e->aux)
3141 edge_predicate_pool.remove ((ipa_predicate *)e->aux);
3142 e->aux = NULL;
3145 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3146 ipa_size_summary *ss = ipa_size_summaries->get (node);
3147 s->time = time;
3148 ss->self_size = size;
3149 nonconstant_names.release ();
3150 ipa_release_body_info (&fbi);
3151 if (opt_for_fn (node->decl, optimize))
3153 if (!early)
3154 loop_optimizer_finalize ();
3155 else if (!ipa_edge_args_sum)
3156 ipa_free_all_node_params ();
3157 free_dominance_info (CDI_DOMINATORS);
3158 free_dominance_info (CDI_POST_DOMINATORS);
3160 if (dump_file)
3162 fprintf (dump_file, "\n");
3163 ipa_dump_fn_summary (dump_file, node);
3168 /* Compute function summary.
3169 EARLY is true when we compute parameters during early opts. */
3171 void
3172 compute_fn_summary (struct cgraph_node *node, bool early)
3174 HOST_WIDE_INT self_stack_size;
3175 struct cgraph_edge *e;
3177 gcc_assert (!node->inlined_to);
3179 if (!ipa_fn_summaries)
3180 ipa_fn_summary_alloc ();
3182 /* Create a new ipa_fn_summary. */
3183 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
3184 ipa_fn_summaries->remove (node);
3185 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3186 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
3188 /* Estimate the stack size for the function if we're optimizing. */
3189 self_stack_size = optimize && !node->thunk
3190 ? estimated_stack_frame_size (node) : 0;
3191 size_info->estimated_self_stack_size = self_stack_size;
3192 info->estimated_stack_size = self_stack_size;
3194 if (node->thunk)
3196 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
3197 ipa_predicate t = true;
3199 node->can_change_signature = false;
3200 es->call_stmt_size = eni_size_weights.call_cost;
3201 es->call_stmt_time = eni_time_weights.call_cost;
3202 info->account_size_time (ipa_fn_summary::size_scale
3203 * opt_for_fn (node->decl,
3204 param_uninlined_function_thunk_insns),
3205 opt_for_fn (node->decl,
3206 param_uninlined_function_thunk_time), t, t);
3207 t = ipa_predicate::not_inlined ();
3208 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
3209 ipa_update_overall_fn_summary (node);
3210 size_info->self_size = size_info->size;
3211 if (stdarg_p (TREE_TYPE (node->decl)))
3213 info->inlinable = false;
3214 node->callees->inline_failed = CIF_VARIADIC_THUNK;
3216 else
3217 info->inlinable = true;
3219 else
3221 /* Even is_gimple_min_invariant rely on current_function_decl. */
3222 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3224 /* During IPA profile merging we may be called w/o virtual SSA form
3225 built. */
3226 update_ssa (TODO_update_ssa_only_virtuals);
3228 /* Can this function be inlined at all? */
3229 if (!opt_for_fn (node->decl, optimize)
3230 && !lookup_attribute ("always_inline",
3231 DECL_ATTRIBUTES (node->decl)))
3232 info->inlinable = false;
3233 else
3234 info->inlinable = tree_inlinable_function_p (node->decl);
3236 bool no_signature = false;
3237 /* Type attributes can use parameter indices to describe them.
3238 Special case fn spec since we can safely preserve them in
3239 modref summaries. */
3240 for (tree list = TYPE_ATTRIBUTES (TREE_TYPE (node->decl));
3241 list && !no_signature; list = TREE_CHAIN (list))
3242 if (!ipa_param_adjustments::type_attribute_allowed_p
3243 (get_attribute_name (list)))
3245 if (dump_file)
3247 fprintf (dump_file, "No signature change:"
3248 " function type has unhandled attribute %s.\n",
3249 IDENTIFIER_POINTER (get_attribute_name (list)));
3251 no_signature = true;
3253 for (tree parm = DECL_ARGUMENTS (node->decl);
3254 parm && !no_signature; parm = DECL_CHAIN (parm))
3255 if (variably_modified_type_p (TREE_TYPE (parm), node->decl))
3257 if (dump_file)
3259 fprintf (dump_file, "No signature change:"
3260 " has parameter with variably modified type.\n");
3262 no_signature = true;
3265 /* Likewise for #pragma omp declare simd functions or functions
3266 with simd attribute. */
3267 if (no_signature
3268 || lookup_attribute ("omp declare simd",
3269 DECL_ATTRIBUTES (node->decl)))
3270 node->can_change_signature = false;
3271 else
3273 /* Otherwise, inlinable functions always can change signature. */
3274 if (info->inlinable)
3275 node->can_change_signature = true;
3276 else
3278 /* Functions calling builtin_apply cannot change signature. */
3279 for (e = node->callees; e; e = e->next_callee)
3281 tree cdecl = e->callee->decl;
3282 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS,
3283 BUILT_IN_VA_START))
3284 break;
3286 node->can_change_signature = !e;
3289 analyze_function_body (node, early);
3290 pop_cfun ();
3293 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3294 size_info->size = size_info->self_size;
3295 info->estimated_stack_size = size_info->estimated_self_stack_size;
3297 /* Code above should compute exactly the same result as
3298 ipa_update_overall_fn_summary except for case when speculative
3299 edges are present since these are accounted to size but not
3300 self_size. Do not compare time since different order the roundoff
3301 errors result in slight changes. */
3302 ipa_update_overall_fn_summary (node);
3303 if (flag_checking)
3305 for (e = node->indirect_calls; e; e = e->next_callee)
3306 if (e->speculative)
3307 break;
3308 gcc_assert (e || size_info->size == size_info->self_size);
3313 /* Compute parameters of functions used by inliner using
3314 current_function_decl. */
3316 static unsigned int
3317 compute_fn_summary_for_current (void)
3319 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3320 return 0;
3323 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3324 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3325 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3327 static bool
3328 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3329 int *size, int *time,
3330 ipa_call_arg_values *avals)
3332 tree target;
3333 struct cgraph_node *callee;
3334 class ipa_fn_summary *isummary;
3335 enum availability avail;
3336 bool speculative;
3338 if (!avals
3339 || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ()))
3340 return false;
3341 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3342 return false;
3344 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3345 if (!target || speculative)
3346 return false;
3348 /* Account for difference in cost between indirect and direct calls. */
3349 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3350 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3351 gcc_checking_assert (*time >= 0);
3352 gcc_checking_assert (*size >= 0);
3354 callee = cgraph_node::get (target);
3355 if (!callee || !callee->definition)
3356 return false;
3357 callee = callee->function_symbol (&avail);
3358 if (avail < AVAIL_AVAILABLE)
3359 return false;
3360 isummary = ipa_fn_summaries->get (callee);
3361 if (isummary == NULL)
3362 return false;
3364 return isummary->inlinable;
3367 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3368 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3369 devirtualized. AVALS, if non-NULL, describes the context of the call site
3370 as far as values of parameters are concerened. */
3372 static inline void
3373 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3374 sreal *time, ipa_call_arg_values *avals,
3375 ipa_hints *hints)
3377 class ipa_call_summary *es = ipa_call_summaries->get (e);
3378 int call_size = es->call_stmt_size;
3379 int call_time = es->call_stmt_time;
3380 int cur_size;
3382 if (!e->callee && hints && e->maybe_hot_p ()
3383 && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals))
3384 *hints |= INLINE_HINT_indirect_call;
3385 cur_size = call_size * ipa_fn_summary::size_scale;
3386 *size += cur_size;
3387 if (min_size)
3388 *min_size += cur_size;
3389 if (time)
3390 *time += ((sreal)call_time) * e->sreal_frequency ();
3394 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3395 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3396 site.
3398 Helper for estimate_calls_size_and_time which does the same but
3399 (in most cases) faster. */
3401 static void
3402 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3403 int *min_size, sreal *time,
3404 ipa_hints *hints,
3405 clause_t possible_truths,
3406 ipa_call_arg_values *avals)
3408 struct cgraph_edge *e;
3409 for (e = node->callees; e; e = e->next_callee)
3411 if (!e->inline_failed)
3413 gcc_checking_assert (!ipa_call_summaries->get (e));
3414 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3415 hints, possible_truths, avals);
3417 continue;
3419 class ipa_call_summary *es = ipa_call_summaries->get (e);
3421 /* Do not care about zero sized builtins. */
3422 if (!es->call_stmt_size)
3424 gcc_checking_assert (!es->call_stmt_time);
3425 continue;
3427 if (!es->predicate
3428 || es->predicate->evaluate (possible_truths))
3430 /* Predicates of calls shall not use NOT_CHANGED codes,
3431 so we do not need to compute probabilities. */
3432 estimate_edge_size_and_time (e, size,
3433 es->predicate ? NULL : min_size,
3434 time, avals, hints);
3437 for (e = node->indirect_calls; e; e = e->next_callee)
3439 class ipa_call_summary *es = ipa_call_summaries->get (e);
3440 if (!es->predicate
3441 || es->predicate->evaluate (possible_truths))
3442 estimate_edge_size_and_time (e, size,
3443 es->predicate ? NULL : min_size,
3444 time, avals, hints);
3448 /* Populate sum->call_size_time_table for edges from NODE. */
3450 static void
3451 summarize_calls_size_and_time (struct cgraph_node *node,
3452 ipa_fn_summary *sum)
3454 struct cgraph_edge *e;
3455 for (e = node->callees; e; e = e->next_callee)
3457 if (!e->inline_failed)
3459 gcc_checking_assert (!ipa_call_summaries->get (e));
3460 summarize_calls_size_and_time (e->callee, sum);
3461 continue;
3463 int size = 0;
3464 sreal time = 0;
3466 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3468 ipa_predicate pred = true;
3469 class ipa_call_summary *es = ipa_call_summaries->get (e);
3471 if (es->predicate)
3472 pred = *es->predicate;
3473 sum->account_size_time (size, time, pred, pred, true);
3475 for (e = node->indirect_calls; e; e = e->next_callee)
3477 int size = 0;
3478 sreal time = 0;
3480 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3481 ipa_predicate pred = true;
3482 class ipa_call_summary *es = ipa_call_summaries->get (e);
3484 if (es->predicate)
3485 pred = *es->predicate;
3486 sum->account_size_time (size, time, pred, pred, true);
3490 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3491 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3492 context of the call site. */
3494 static void
3495 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3496 int *min_size, sreal *time,
3497 ipa_hints *hints,
3498 clause_t possible_truths,
3499 ipa_call_arg_values *avals)
3501 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3502 bool use_table = true;
3504 gcc_assert (node->callees || node->indirect_calls);
3506 /* During early inlining we do not calculate info for very
3507 large functions and thus there is no need for producing
3508 summaries. */
3509 if (!ipa_node_params_sum)
3510 use_table = false;
3511 /* Do not calculate summaries for simple wrappers; it is waste
3512 of memory. */
3513 else if (node->callees && node->indirect_calls
3514 && node->callees->inline_failed && !node->callees->next_callee)
3515 use_table = false;
3516 /* If there is an indirect edge that may be optimized, we need
3517 to go the slow way. */
3518 else if (avals && hints
3519 && (avals->m_known_vals.length ()
3520 || avals->m_known_contexts.length ()
3521 || avals->m_known_aggs.length ()))
3523 ipa_node_params *params_summary = ipa_node_params_sum->get (node);
3524 unsigned int nargs = params_summary
3525 ? ipa_get_param_count (params_summary) : 0;
3527 for (unsigned int i = 0; i < nargs && use_table; i++)
3529 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3530 && (avals->safe_sval_at (i)
3531 || (ipa_argagg_value_list (avals).value_for_index_p (i))))
3532 use_table = false;
3533 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3534 && (avals->m_known_contexts.length () > i
3535 && !avals->m_known_contexts[i].useless_p ()))
3536 use_table = false;
3540 /* Fast path is via the call size time table. */
3541 if (use_table)
3543 /* Build summary if it is absent. */
3544 if (!sum->call_size_time_table.length ())
3546 ipa_predicate true_pred = true;
3547 sum->account_size_time (0, 0, true_pred, true_pred, true);
3548 summarize_calls_size_and_time (node, sum);
3551 int old_size = *size;
3552 sreal old_time = time ? *time : 0;
3554 if (min_size)
3555 *min_size += sum->call_size_time_table[0].size;
3557 unsigned int i;
3558 size_time_entry *e;
3560 /* Walk the table and account sizes and times. */
3561 for (i = 0; sum->call_size_time_table.iterate (i, &e);
3562 i++)
3563 if (e->exec_predicate.evaluate (possible_truths))
3565 *size += e->size;
3566 if (time)
3567 *time += e->time;
3570 /* Be careful and see if both methods agree. */
3571 if ((flag_checking || dump_file)
3572 /* Do not try to sanity check when we know we lost some
3573 precision. */
3574 && sum->call_size_time_table.length ()
3575 < ipa_fn_summary::max_size_time_table_size)
3577 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3578 possible_truths, avals);
3579 gcc_assert (*size == old_size);
3580 if (time && (*time - old_time > 1 || *time - old_time < -1)
3581 && dump_file)
3582 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3583 old_time.to_double (),
3584 time->to_double ());
3587 /* Slow path by walking all edges. */
3588 else
3589 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3590 possible_truths, avals);
3593 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3594 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3595 caller. */
3597 ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths,
3598 clause_t nonspec_possible_truths,
3599 vec<inline_param_summary>
3600 inline_param_summary,
3601 ipa_auto_call_arg_values *arg_values)
3602 : m_node (node), m_possible_truths (possible_truths),
3603 m_nonspec_possible_truths (nonspec_possible_truths),
3604 m_inline_param_summary (inline_param_summary),
3605 m_avals (arg_values)
3609 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3611 void
3612 ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx)
3614 m_node = ctx.m_node;
3615 m_possible_truths = ctx.m_possible_truths;
3616 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3617 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3618 unsigned int nargs = params_summary
3619 ? ipa_get_param_count (params_summary) : 0;
3621 m_inline_param_summary = vNULL;
3622 /* Copy the info only if there is at least one useful entry. */
3623 if (ctx.m_inline_param_summary.exists ())
3625 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3627 for (unsigned int i = 0; i < n; i++)
3628 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3629 && !ctx.m_inline_param_summary[i].useless_p ())
3631 m_inline_param_summary
3632 = ctx.m_inline_param_summary.copy ();
3633 break;
3636 m_avals.m_known_vals = vNULL;
3637 if (ctx.m_avals.m_known_vals.exists ())
3639 unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs);
3641 for (unsigned int i = 0; i < n; i++)
3642 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3643 && ctx.m_avals.m_known_vals[i])
3645 m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy ();
3646 break;
3650 m_avals.m_known_contexts = vNULL;
3651 if (ctx.m_avals.m_known_contexts.exists ())
3653 unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs);
3655 for (unsigned int i = 0; i < n; i++)
3656 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3657 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3659 m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy ();
3660 break;
3664 m_avals.m_known_aggs = vNULL;
3665 if (ctx.m_avals.m_known_aggs.exists ())
3667 const ipa_argagg_value_list avl (&ctx.m_avals);
3668 for (unsigned int i = 0; i < nargs; i++)
3669 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3670 && avl.value_for_index_p (i))
3672 m_avals.m_known_aggs = ctx.m_avals.m_known_aggs.copy ();
3673 break;
3677 m_avals.m_known_value_ranges = vNULL;
3680 /* Release memory used by known_vals/contexts/aggs vectors. and
3681 inline_param_summary. */
3683 void
3684 ipa_cached_call_context::release ()
3686 /* See if context is initialized at first place. */
3687 if (!m_node)
3688 return;
3689 m_avals.m_known_aggs.release ();
3690 m_avals.m_known_vals.release ();
3691 m_avals.m_known_contexts.release ();
3692 m_inline_param_summary.release ();
3695 /* Return true if CTX describes the same call context as THIS. */
3697 bool
3698 ipa_call_context::equal_to (const ipa_call_context &ctx)
3700 if (m_node != ctx.m_node
3701 || m_possible_truths != ctx.m_possible_truths
3702 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3703 return false;
3705 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3706 unsigned int nargs = params_summary
3707 ? ipa_get_param_count (params_summary) : 0;
3709 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3711 for (unsigned int i = 0; i < nargs; i++)
3713 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3714 continue;
3715 if (i >= m_inline_param_summary.length ()
3716 || m_inline_param_summary[i].useless_p ())
3718 if (i < ctx.m_inline_param_summary.length ()
3719 && !ctx.m_inline_param_summary[i].useless_p ())
3720 return false;
3721 continue;
3723 if (i >= ctx.m_inline_param_summary.length ()
3724 || ctx.m_inline_param_summary[i].useless_p ())
3726 if (i < m_inline_param_summary.length ()
3727 && !m_inline_param_summary[i].useless_p ())
3728 return false;
3729 continue;
3731 if (!m_inline_param_summary[i].equal_to
3732 (ctx.m_inline_param_summary[i]))
3733 return false;
3736 if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ())
3738 for (unsigned int i = 0; i < nargs; i++)
3740 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3741 continue;
3742 if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i])
3744 if (i < ctx.m_avals.m_known_vals.length ()
3745 && ctx.m_avals.m_known_vals[i])
3746 return false;
3747 continue;
3749 if (i >= ctx.m_avals.m_known_vals.length ()
3750 || !ctx.m_avals.m_known_vals[i])
3752 if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i])
3753 return false;
3754 continue;
3756 if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i])
3757 return false;
3760 if (m_avals.m_known_contexts.exists ()
3761 || ctx.m_avals.m_known_contexts.exists ())
3763 for (unsigned int i = 0; i < nargs; i++)
3765 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3766 continue;
3767 if (i >= m_avals.m_known_contexts.length ()
3768 || m_avals.m_known_contexts[i].useless_p ())
3770 if (i < ctx.m_avals.m_known_contexts.length ()
3771 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3772 return false;
3773 continue;
3775 if (i >= ctx.m_avals.m_known_contexts.length ()
3776 || ctx.m_avals.m_known_contexts[i].useless_p ())
3778 if (i < m_avals.m_known_contexts.length ()
3779 && !m_avals.m_known_contexts[i].useless_p ())
3780 return false;
3781 continue;
3783 if (!m_avals.m_known_contexts[i].equal_to
3784 (ctx.m_avals.m_known_contexts[i]))
3785 return false;
3788 if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ())
3790 unsigned i = 0, j = 0;
3791 while (i < m_avals.m_known_aggs.length ()
3792 || j < ctx.m_avals.m_known_aggs.length ())
3794 if (i >= m_avals.m_known_aggs.length ())
3796 int idx2 = ctx.m_avals.m_known_aggs[j].index;
3797 if (ipa_is_param_used_by_indirect_call (params_summary, idx2))
3798 return false;
3799 j++;
3800 continue;
3802 if (j >= ctx.m_avals.m_known_aggs.length ())
3804 int idx1 = m_avals.m_known_aggs[i].index;
3805 if (ipa_is_param_used_by_indirect_call (params_summary, idx1))
3806 return false;
3807 i++;
3808 continue;
3811 int idx1 = m_avals.m_known_aggs[i].index;
3812 int idx2 = ctx.m_avals.m_known_aggs[j].index;
3813 if (idx1 < idx2)
3815 if (ipa_is_param_used_by_indirect_call (params_summary, idx1))
3816 return false;
3817 i++;
3818 continue;
3820 if (idx1 > idx2)
3822 if (ipa_is_param_used_by_indirect_call (params_summary, idx2))
3823 return false;
3824 j++;
3825 continue;
3827 if (!ipa_is_param_used_by_indirect_call (params_summary, idx1))
3829 i++;
3830 j++;
3831 continue;
3834 if ((m_avals.m_known_aggs[i].unit_offset
3835 != ctx.m_avals.m_known_aggs[j].unit_offset)
3836 || (m_avals.m_known_aggs[i].by_ref
3837 != ctx.m_avals.m_known_aggs[j].by_ref)
3838 || !operand_equal_p (m_avals.m_known_aggs[i].value,
3839 ctx.m_avals.m_known_aggs[j].value))
3840 return false;
3841 i++;
3842 j++;
3845 return true;
3848 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3849 this context. Always compute size and min_size. Only compute time and
3850 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3851 is true. */
3853 void
3854 ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
3855 bool est_times, bool est_hints)
3857 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3858 size_time_entry *e;
3859 int size = 0;
3860 sreal time = 0;
3861 int min_size = 0;
3862 ipa_hints hints = 0;
3863 sreal loops_with_known_iterations = 0;
3864 sreal loops_with_known_strides = 0;
3865 int i;
3867 if (dump_file && (dump_flags & TDF_DETAILS))
3869 bool found = false;
3870 fprintf (dump_file, " Estimating body: %s\n"
3871 " Known to be false: ", m_node->dump_name ());
3873 for (i = ipa_predicate::not_inlined_condition;
3874 i < (ipa_predicate::first_dynamic_condition
3875 + (int) vec_safe_length (info->conds)); i++)
3876 if (!(m_possible_truths & (1 << i)))
3878 if (found)
3879 fprintf (dump_file, ", ");
3880 found = true;
3881 dump_condition (dump_file, info->conds, i);
3885 if (m_node->callees || m_node->indirect_calls)
3886 estimate_calls_size_and_time (m_node, &size, &min_size,
3887 est_times ? &time : NULL,
3888 est_hints ? &hints : NULL, m_possible_truths,
3889 &m_avals);
3891 sreal nonspecialized_time = time;
3893 min_size += info->size_time_table[0].size;
3894 for (i = 0; info->size_time_table.iterate (i, &e); i++)
3896 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3898 /* Because predicates are conservative, it can happen that nonconst is 1
3899 but exec is 0. */
3900 if (exec)
3902 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3904 gcc_checking_assert (e->time >= 0);
3905 gcc_checking_assert (time >= 0);
3907 /* We compute specialized size only because size of nonspecialized
3908 copy is context independent.
3910 The difference between nonspecialized execution and specialized is
3911 that nonspecialized is not going to have optimized out computations
3912 known to be constant in a specialized setting. */
3913 if (nonconst)
3914 size += e->size;
3915 if (!est_times)
3916 continue;
3917 nonspecialized_time += e->time;
3918 if (!nonconst)
3920 else if (!m_inline_param_summary.exists ())
3922 if (nonconst)
3923 time += e->time;
3925 else
3927 int prob = e->nonconst_predicate.probability
3928 (info->conds, m_possible_truths,
3929 m_inline_param_summary);
3930 gcc_checking_assert (prob >= 0);
3931 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3932 if (prob == REG_BR_PROB_BASE)
3933 time += e->time;
3934 else
3935 time += e->time * prob / REG_BR_PROB_BASE;
3937 gcc_checking_assert (time >= 0);
3940 gcc_checking_assert (info->size_time_table[0].exec_predicate == true);
3941 gcc_checking_assert (info->size_time_table[0].nonconst_predicate == true);
3942 gcc_checking_assert (min_size >= 0);
3943 gcc_checking_assert (size >= 0);
3944 gcc_checking_assert (time >= 0);
3945 /* nonspecialized_time should be always bigger than specialized time.
3946 Roundoff issues however may get into the way. */
3947 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3949 /* Roundoff issues may make specialized time bigger than nonspecialized
3950 time. We do not really want that to happen because some heuristics
3951 may get confused by seeing negative speedups. */
3952 if (time > nonspecialized_time)
3953 time = nonspecialized_time;
3955 if (est_hints)
3957 if (info->scc_no)
3958 hints |= INLINE_HINT_in_scc;
3959 if (DECL_DECLARED_INLINE_P (m_node->decl))
3960 hints |= INLINE_HINT_declared_inline;
3961 if (info->builtin_constant_p_parms.length ()
3962 && DECL_DECLARED_INLINE_P (m_node->decl))
3963 hints |= INLINE_HINT_builtin_constant_p;
3965 ipa_freqcounting_predicate *fcp;
3966 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
3967 if (!fcp->predicate->evaluate (m_possible_truths))
3969 hints |= INLINE_HINT_loop_iterations;
3970 loops_with_known_iterations += fcp->freq;
3972 estimates->loops_with_known_iterations = loops_with_known_iterations;
3974 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
3975 if (!fcp->predicate->evaluate (m_possible_truths))
3977 hints |= INLINE_HINT_loop_stride;
3978 loops_with_known_strides += fcp->freq;
3980 estimates->loops_with_known_strides = loops_with_known_strides;
3983 size = RDIV (size, ipa_fn_summary::size_scale);
3984 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3986 if (dump_file && (dump_flags & TDF_DETAILS))
3988 fprintf (dump_file, "\n size:%i", (int) size);
3989 if (est_times)
3990 fprintf (dump_file, " time:%f nonspec time:%f",
3991 time.to_double (), nonspecialized_time.to_double ());
3992 if (est_hints)
3993 fprintf (dump_file, " loops with known iterations:%f "
3994 "known strides:%f", loops_with_known_iterations.to_double (),
3995 loops_with_known_strides.to_double ());
3996 fprintf (dump_file, "\n");
3998 if (est_times)
4000 estimates->time = time;
4001 estimates->nonspecialized_time = nonspecialized_time;
4003 estimates->size = size;
4004 estimates->min_size = min_size;
4005 if (est_hints)
4006 estimates->hints = hints;
4007 return;
4011 /* Estimate size and time needed to execute callee of EDGE assuming that
4012 parameters known to be constant at caller of EDGE are propagated.
4013 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
4014 and types for parameters. */
4016 void
4017 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
4018 ipa_auto_call_arg_values *avals,
4019 ipa_call_estimates *estimates)
4021 clause_t clause, nonspec_clause;
4023 evaluate_conditions_for_known_args (node, false, avals, &clause,
4024 &nonspec_clause, NULL);
4025 ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
4026 ctx.estimate_size_and_time (estimates);
4029 /* Return stack frame offset where frame of NODE is supposed to start inside
4030 of the function it is inlined to.
4031 Return 0 for functions that are not inlined. */
4033 HOST_WIDE_INT
4034 ipa_get_stack_frame_offset (struct cgraph_node *node)
4036 HOST_WIDE_INT offset = 0;
4037 if (!node->inlined_to)
4038 return 0;
4039 node = node->callers->caller;
4040 while (true)
4042 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
4043 if (!node->inlined_to)
4044 return offset;
4045 node = node->callers->caller;
4050 /* Update summary information of inline clones after inlining.
4051 Compute peak stack usage. */
4053 static void
4054 inline_update_callee_summaries (struct cgraph_node *node, int depth)
4056 struct cgraph_edge *e;
4058 ipa_propagate_frequency (node);
4059 for (e = node->callees; e; e = e->next_callee)
4061 if (!e->inline_failed)
4062 inline_update_callee_summaries (e->callee, depth);
4063 else
4064 ipa_call_summaries->get (e)->loop_depth += depth;
4066 for (e = node->indirect_calls; e; e = e->next_callee)
4067 ipa_call_summaries->get (e)->loop_depth += depth;
4070 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
4071 INLINED_EDGE has been inlined.
4073 When function A is inlined in B and A calls C with parameter that
4074 changes with probability PROB1 and C is known to be passthrough
4075 of argument if B that change with probability PROB2, the probability
4076 of change is now PROB1*PROB2. */
4078 static void
4079 remap_edge_params (struct cgraph_edge *inlined_edge,
4080 struct cgraph_edge *edge)
4082 if (ipa_node_params_sum)
4084 int i;
4085 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4086 if (!args)
4087 return;
4088 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4089 class ipa_call_summary *inlined_es
4090 = ipa_call_summaries->get (inlined_edge);
4092 if (es->param.length () == 0)
4093 return;
4095 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
4097 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4098 if (jfunc->type == IPA_JF_PASS_THROUGH
4099 || jfunc->type == IPA_JF_ANCESTOR)
4101 int id = jfunc->type == IPA_JF_PASS_THROUGH
4102 ? ipa_get_jf_pass_through_formal_id (jfunc)
4103 : ipa_get_jf_ancestor_formal_id (jfunc);
4104 if (id < (int) inlined_es->param.length ())
4106 int prob1 = es->param[i].change_prob;
4107 int prob2 = inlined_es->param[id].change_prob;
4108 int prob = combine_probabilities (prob1, prob2);
4110 if (prob1 && prob2 && !prob)
4111 prob = 1;
4113 es->param[i].change_prob = prob;
4115 if (inlined_es
4116 ->param[id].points_to_local_or_readonly_memory)
4117 es->param[i].points_to_local_or_readonly_memory = true;
4118 if (inlined_es
4119 ->param[id].points_to_possible_sra_candidate)
4120 es->param[i].points_to_possible_sra_candidate = true;
4122 if (!es->param[i].points_to_local_or_readonly_memory
4123 && jfunc->type == IPA_JF_CONST
4124 && points_to_local_or_readonly_memory_p
4125 (ipa_get_jf_constant (jfunc)))
4126 es->param[i].points_to_local_or_readonly_memory = true;
4132 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4134 Remap predicates of callees of NODE. Rest of arguments match
4135 remap_predicate.
4137 Also update change probabilities. */
4139 static void
4140 remap_edge_summaries (struct cgraph_edge *inlined_edge,
4141 struct cgraph_node *node,
4142 class ipa_fn_summary *info,
4143 class ipa_node_params *params_summary,
4144 class ipa_fn_summary *callee_info,
4145 const vec<int> &operand_map,
4146 const vec<HOST_WIDE_INT> &offset_map,
4147 clause_t possible_truths,
4148 ipa_predicate *toplev_predicate)
4150 struct cgraph_edge *e, *next;
4151 for (e = node->callees; e; e = next)
4153 ipa_predicate p;
4154 next = e->next_callee;
4156 if (e->inline_failed)
4158 class ipa_call_summary *es = ipa_call_summaries->get (e);
4159 remap_edge_params (inlined_edge, e);
4161 if (es->predicate)
4163 p = es->predicate->remap_after_inlining
4164 (info, params_summary,
4165 callee_info, operand_map,
4166 offset_map, possible_truths,
4167 *toplev_predicate);
4168 edge_set_predicate (e, &p);
4170 else
4171 edge_set_predicate (e, toplev_predicate);
4173 else
4174 remap_edge_summaries (inlined_edge, e->callee, info,
4175 params_summary, callee_info,
4176 operand_map, offset_map, possible_truths,
4177 toplev_predicate);
4179 for (e = node->indirect_calls; e; e = next)
4181 class ipa_call_summary *es = ipa_call_summaries->get (e);
4182 ipa_predicate p;
4183 next = e->next_callee;
4185 remap_edge_params (inlined_edge, e);
4186 if (es->predicate)
4188 p = es->predicate->remap_after_inlining
4189 (info, params_summary,
4190 callee_info, operand_map, offset_map,
4191 possible_truths, *toplev_predicate);
4192 edge_set_predicate (e, &p);
4194 else
4195 edge_set_predicate (e, toplev_predicate);
4199 /* Run remap_after_inlining on each predicate in V. */
4201 static void
4202 remap_freqcounting_predicate (class ipa_fn_summary *info,
4203 class ipa_node_params *params_summary,
4204 class ipa_fn_summary *callee_info,
4205 vec<ipa_freqcounting_predicate, va_gc> *v,
4206 const vec<int> &operand_map,
4207 const vec<HOST_WIDE_INT> &offset_map,
4208 clause_t possible_truths,
4209 ipa_predicate *toplev_predicate)
4212 ipa_freqcounting_predicate *fcp;
4213 for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
4215 ipa_predicate p
4216 = fcp->predicate->remap_after_inlining (info, params_summary,
4217 callee_info, operand_map,
4218 offset_map, possible_truths,
4219 *toplev_predicate);
4220 if (p != false && p != true)
4221 *fcp->predicate &= p;
4225 /* We inlined EDGE. Update summary of the function we inlined into. */
4227 void
4228 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
4230 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
4231 struct cgraph_node *to = (edge->caller->inlined_to
4232 ? edge->caller->inlined_to : edge->caller);
4233 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
4234 clause_t clause = 0; /* not_inline is known to be false. */
4235 size_time_entry *e;
4236 auto_vec<int, 8> operand_map;
4237 auto_vec<HOST_WIDE_INT, 8> offset_map;
4238 int i;
4239 ipa_predicate toplev_predicate;
4240 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4241 ipa_node_params *params_summary = (ipa_node_params_sum
4242 ? ipa_node_params_sum->get (to) : NULL);
4244 if (es->predicate)
4245 toplev_predicate = *es->predicate;
4246 else
4247 toplev_predicate = true;
4249 info->fp_expressions |= callee_info->fp_expressions;
4250 info->target_info |= callee_info->target_info;
4252 if (callee_info->conds)
4254 ipa_auto_call_arg_values avals;
4255 evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false);
4257 if (ipa_node_params_sum && callee_info->conds)
4259 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4260 int count = args ? ipa_get_cs_argument_count (args) : 0;
4261 int i;
4263 if (count)
4265 operand_map.safe_grow_cleared (count, true);
4266 offset_map.safe_grow_cleared (count, true);
4268 for (i = 0; i < count; i++)
4270 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4271 int map = -1;
4273 /* TODO: handle non-NOPs when merging. */
4274 if (jfunc->type == IPA_JF_PASS_THROUGH)
4276 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4277 map = ipa_get_jf_pass_through_formal_id (jfunc);
4278 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
4279 offset_map[i] = -1;
4281 else if (jfunc->type == IPA_JF_ANCESTOR)
4283 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
4284 if (offset >= 0 && offset < INT_MAX)
4286 map = ipa_get_jf_ancestor_formal_id (jfunc);
4287 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
4288 offset = -1;
4289 offset_map[i] = offset;
4292 operand_map[i] = map;
4293 gcc_assert (map < ipa_get_param_count (params_summary));
4296 int ip;
4297 for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++)
4298 if (ip < count && operand_map[ip] >= 0)
4299 add_builtin_constant_p_parm (info, operand_map[ip]);
4301 sreal freq = edge->sreal_frequency ();
4302 for (i = 0; callee_info->size_time_table.iterate (i, &e); i++)
4304 ipa_predicate p;
4305 p = e->exec_predicate.remap_after_inlining
4306 (info, params_summary,
4307 callee_info, operand_map,
4308 offset_map, clause,
4309 toplev_predicate);
4310 ipa_predicate nonconstp;
4311 nonconstp = e->nonconst_predicate.remap_after_inlining
4312 (info, params_summary,
4313 callee_info, operand_map,
4314 offset_map, clause,
4315 toplev_predicate);
4316 if (p != false && nonconstp != false)
4318 sreal add_time = ((sreal)e->time * freq);
4319 int prob = e->nonconst_predicate.probability (callee_info->conds,
4320 clause, es->param);
4321 if (prob != REG_BR_PROB_BASE)
4322 add_time = add_time * prob / REG_BR_PROB_BASE;
4323 if (prob != REG_BR_PROB_BASE
4324 && dump_file && (dump_flags & TDF_DETAILS))
4326 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
4327 (double) prob / REG_BR_PROB_BASE);
4329 info->account_size_time (e->size, add_time, p, nonconstp);
4332 remap_edge_summaries (edge, edge->callee, info, params_summary,
4333 callee_info, operand_map,
4334 offset_map, clause, &toplev_predicate);
4335 remap_freqcounting_predicate (info, params_summary, callee_info,
4336 info->loop_iterations, operand_map,
4337 offset_map, clause, &toplev_predicate);
4338 remap_freqcounting_predicate (info, params_summary, callee_info,
4339 info->loop_strides, operand_map,
4340 offset_map, clause, &toplev_predicate);
4342 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4343 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4345 if (info->estimated_stack_size < peak)
4346 info->estimated_stack_size = peak;
4348 inline_update_callee_summaries (edge->callee, es->loop_depth);
4349 if (info->call_size_time_table.length ())
4351 int edge_size = 0;
4352 sreal edge_time = 0;
4354 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0);
4355 /* Unaccount size and time of the optimized out call. */
4356 info->account_size_time (-edge_size, -edge_time,
4357 es->predicate ? *es->predicate : true,
4358 es->predicate ? *es->predicate : true,
4359 true);
4360 /* Account new calls. */
4361 summarize_calls_size_and_time (edge->callee, info);
4364 /* Free summaries that are not maintained for inline clones/edges. */
4365 ipa_call_summaries->remove (edge);
4366 ipa_fn_summaries->remove (edge->callee);
4367 ipa_remove_from_growth_caches (edge);
4370 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4371 overall size and time. Recompute it.
4372 If RESET is true also recompute call_time_size_table. */
4374 void
4375 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4377 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4378 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4379 size_time_entry *e;
4380 int i;
4382 size_info->size = 0;
4383 info->time = 0;
4384 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4386 size_info->size += e->size;
4387 info->time += e->time;
4389 info->min_size = info->size_time_table[0].size;
4390 if (reset)
4391 info->call_size_time_table.release ();
4392 if (node->callees || node->indirect_calls)
4393 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4394 &info->time, NULL,
4395 ~(clause_t) (1 << ipa_predicate::false_condition),
4396 NULL);
4397 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4398 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4402 /* This function performs intraprocedural analysis in NODE that is required to
4403 inline indirect calls. */
4405 static void
4406 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4408 ipa_analyze_node (node);
4409 if (dump_file && (dump_flags & TDF_DETAILS))
4411 ipa_print_node_params (dump_file, node);
4412 ipa_print_node_jump_functions (dump_file, node);
4417 /* Note function body size. */
4419 void
4420 inline_analyze_function (struct cgraph_node *node)
4422 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4424 if (dump_file)
4425 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4426 if (opt_for_fn (node->decl, optimize) && !node->thunk)
4427 inline_indirect_intraprocedural_analysis (node);
4428 compute_fn_summary (node, false);
4429 if (!optimize)
4431 struct cgraph_edge *e;
4432 for (e = node->callees; e; e = e->next_callee)
4433 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4434 for (e = node->indirect_calls; e; e = e->next_callee)
4435 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4438 pop_cfun ();
4442 /* Called when new function is inserted to callgraph late. */
4444 void
4445 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4447 inline_analyze_function (node);
4450 /* Note function body size. */
4452 static void
4453 ipa_fn_summary_generate (void)
4455 struct cgraph_node *node;
4457 FOR_EACH_DEFINED_FUNCTION (node)
4458 if (DECL_STRUCT_FUNCTION (node->decl))
4459 node->versionable = tree_versionable_function_p (node->decl);
4461 ipa_fn_summary_alloc ();
4463 ipa_fn_summaries->enable_insertion_hook ();
4465 ipa_register_cgraph_hooks ();
4467 FOR_EACH_DEFINED_FUNCTION (node)
4468 if (!node->alias
4469 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4470 || opt_for_fn (node->decl, optimize)))
4471 inline_analyze_function (node);
4475 /* Write inline summary for edge E to OB. */
4477 static void
4478 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4479 bool prevails)
4481 class ipa_call_summary *es = prevails
4482 ? ipa_call_summaries->get_create (e) : NULL;
4483 ipa_predicate p;
4484 int length, i;
4486 int size = streamer_read_uhwi (ib);
4487 int time = streamer_read_uhwi (ib);
4488 int depth = streamer_read_uhwi (ib);
4490 if (es)
4492 es->call_stmt_size = size;
4493 es->call_stmt_time = time;
4494 es->loop_depth = depth;
4497 bitpack_d bp = streamer_read_bitpack (ib);
4498 if (es)
4499 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4500 else
4501 bp_unpack_value (&bp, 1);
4503 p.stream_in (ib);
4504 if (es)
4505 edge_set_predicate (e, &p);
4506 length = streamer_read_uhwi (ib);
4507 if (length && es
4508 && (e->possibly_call_in_translation_unit_p ()
4509 /* Also stream in jump functions to builtins in hope that they
4510 will get fnspecs. */
4511 || fndecl_built_in_p (e->callee->decl, BUILT_IN_NORMAL)))
4513 es->param.safe_grow_cleared (length, true);
4514 for (i = 0; i < length; i++)
4516 es->param[i].change_prob = streamer_read_uhwi (ib);
4517 bitpack_d bp = streamer_read_bitpack (ib);
4518 es->param[i].points_to_local_or_readonly_memory
4519 = bp_unpack_value (&bp, 1);
4520 es->param[i].points_to_possible_sra_candidate
4521 = bp_unpack_value (&bp, 1);
4524 else
4526 for (i = 0; i < length; i++)
4528 streamer_read_uhwi (ib);
4529 streamer_read_uhwi (ib);
4535 /* Stream in inline summaries from the section. */
4537 static void
4538 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4539 size_t len)
4541 const struct lto_function_header *header =
4542 (const struct lto_function_header *) data;
4543 const int cfg_offset = sizeof (struct lto_function_header);
4544 const int main_offset = cfg_offset + header->cfg_size;
4545 const int string_offset = main_offset + header->main_size;
4546 class data_in *data_in;
4547 unsigned int i, count2, j;
4548 unsigned int f_count;
4550 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4551 file_data);
4553 data_in =
4554 lto_data_in_create (file_data, (const char *) data + string_offset,
4555 header->string_size, vNULL);
4556 f_count = streamer_read_uhwi (&ib);
4557 for (i = 0; i < f_count; i++)
4559 unsigned int index;
4560 struct cgraph_node *node;
4561 class ipa_fn_summary *info;
4562 class ipa_node_params *params_summary;
4563 class ipa_size_summary *size_info;
4564 lto_symtab_encoder_t encoder;
4565 struct bitpack_d bp;
4566 struct cgraph_edge *e;
4567 ipa_predicate p;
4569 index = streamer_read_uhwi (&ib);
4570 encoder = file_data->symtab_node_encoder;
4571 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4572 index));
4573 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4574 params_summary = node->prevailing_p ()
4575 ? ipa_node_params_sum->get (node) : NULL;
4576 size_info = node->prevailing_p ()
4577 ? ipa_size_summaries->get_create (node) : NULL;
4579 int stack_size = streamer_read_uhwi (&ib);
4580 int size = streamer_read_uhwi (&ib);
4581 sreal time = sreal::stream_in (&ib);
4583 if (info)
4585 info->estimated_stack_size
4586 = size_info->estimated_self_stack_size = stack_size;
4587 size_info->size = size_info->self_size = size;
4588 info->time = time;
4591 bp = streamer_read_bitpack (&ib);
4592 if (info)
4594 info->inlinable = bp_unpack_value (&bp, 1);
4595 info->fp_expressions = bp_unpack_value (&bp, 1);
4596 if (!lto_stream_offload_p)
4597 info->target_info = streamer_read_uhwi (&ib);
4599 else
4601 bp_unpack_value (&bp, 1);
4602 bp_unpack_value (&bp, 1);
4603 if (!lto_stream_offload_p)
4604 streamer_read_uhwi (&ib);
4607 count2 = streamer_read_uhwi (&ib);
4608 gcc_assert (!info || !info->conds);
4609 if (info)
4610 vec_safe_reserve_exact (info->conds, count2);
4611 for (j = 0; j < count2; j++)
4613 struct condition c;
4614 unsigned int k, count3;
4615 c.operand_num = streamer_read_uhwi (&ib);
4616 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4617 c.type = stream_read_tree (&ib, data_in);
4618 c.val = stream_read_tree (&ib, data_in);
4619 bp = streamer_read_bitpack (&ib);
4620 c.agg_contents = bp_unpack_value (&bp, 1);
4621 c.by_ref = bp_unpack_value (&bp, 1);
4622 if (c.agg_contents)
4623 c.offset = streamer_read_uhwi (&ib);
4624 count3 = streamer_read_uhwi (&ib);
4625 c.param_ops = NULL;
4626 if (info)
4627 vec_safe_reserve_exact (c.param_ops, count3);
4628 if (params_summary)
4629 ipa_set_param_used_by_ipa_predicates
4630 (params_summary, c.operand_num, true);
4631 for (k = 0; k < count3; k++)
4633 struct expr_eval_op op;
4634 enum gimple_rhs_class rhs_class;
4635 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4636 op.type = stream_read_tree (&ib, data_in);
4637 switch (rhs_class = get_gimple_rhs_class (op.code))
4639 case GIMPLE_UNARY_RHS:
4640 op.index = 0;
4641 op.val[0] = NULL_TREE;
4642 op.val[1] = NULL_TREE;
4643 break;
4645 case GIMPLE_BINARY_RHS:
4646 case GIMPLE_TERNARY_RHS:
4647 bp = streamer_read_bitpack (&ib);
4648 op.index = bp_unpack_value (&bp, 2);
4649 op.val[0] = stream_read_tree (&ib, data_in);
4650 if (rhs_class == GIMPLE_BINARY_RHS)
4651 op.val[1] = NULL_TREE;
4652 else
4653 op.val[1] = stream_read_tree (&ib, data_in);
4654 break;
4656 default:
4657 fatal_error (UNKNOWN_LOCATION,
4658 "invalid fnsummary in LTO stream");
4660 if (info)
4661 c.param_ops->quick_push (op);
4663 if (info)
4664 info->conds->quick_push (c);
4666 count2 = streamer_read_uhwi (&ib);
4667 gcc_assert (!info || !info->size_time_table.length ());
4668 if (info && count2)
4669 info->size_time_table.reserve_exact (count2);
4670 for (j = 0; j < count2; j++)
4672 class size_time_entry e;
4674 e.size = streamer_read_uhwi (&ib);
4675 e.time = sreal::stream_in (&ib);
4676 e.exec_predicate.stream_in (&ib);
4677 e.nonconst_predicate.stream_in (&ib);
4679 if (info)
4680 info->size_time_table.quick_push (e);
4683 count2 = streamer_read_uhwi (&ib);
4684 for (j = 0; j < count2; j++)
4686 p.stream_in (&ib);
4687 sreal fcp_freq = sreal::stream_in (&ib);
4688 if (info)
4690 ipa_freqcounting_predicate fcp;
4691 fcp.predicate = NULL;
4692 set_hint_predicate (&fcp.predicate, p);
4693 fcp.freq = fcp_freq;
4694 vec_safe_push (info->loop_iterations, fcp);
4697 count2 = streamer_read_uhwi (&ib);
4698 for (j = 0; j < count2; j++)
4700 p.stream_in (&ib);
4701 sreal fcp_freq = sreal::stream_in (&ib);
4702 if (info)
4704 ipa_freqcounting_predicate fcp;
4705 fcp.predicate = NULL;
4706 set_hint_predicate (&fcp.predicate, p);
4707 fcp.freq = fcp_freq;
4708 vec_safe_push (info->loop_strides, fcp);
4711 count2 = streamer_read_uhwi (&ib);
4712 if (info && count2)
4713 info->builtin_constant_p_parms.reserve_exact (count2);
4714 for (j = 0; j < count2; j++)
4716 int parm = streamer_read_uhwi (&ib);
4717 if (info)
4718 info->builtin_constant_p_parms.quick_push (parm);
4720 for (e = node->callees; e; e = e->next_callee)
4721 read_ipa_call_summary (&ib, e, info != NULL);
4722 for (e = node->indirect_calls; e; e = e->next_callee)
4723 read_ipa_call_summary (&ib, e, info != NULL);
4726 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4727 len);
4728 lto_data_in_delete (data_in);
4732 /* Read inline summary. Jump functions are shared among ipa-cp
4733 and inliner, so when ipa-cp is active, we don't need to write them
4734 twice. */
4736 static void
4737 ipa_fn_summary_read (void)
4739 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4740 struct lto_file_decl_data *file_data;
4741 unsigned int j = 0;
4743 ipa_prop_read_jump_functions ();
4744 ipa_fn_summary_alloc ();
4746 while ((file_data = file_data_vec[j++]))
4748 size_t len;
4749 const char *data
4750 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4751 &len);
4752 if (data)
4753 inline_read_section (file_data, data, len);
4754 else
4755 /* Fatal error here. We do not want to support compiling ltrans units
4756 with different version of compiler or different flags than the WPA
4757 unit, so this should never happen. */
4758 fatal_error (input_location,
4759 "ipa inline summary is missing in input file");
4761 ipa_register_cgraph_hooks ();
4763 gcc_assert (ipa_fn_summaries);
4764 ipa_fn_summaries->enable_insertion_hook ();
4768 /* Write inline summary for edge E to OB. */
4770 static void
4771 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4773 class ipa_call_summary *es = ipa_call_summaries->get (e);
4774 int i;
4776 streamer_write_uhwi (ob, es->call_stmt_size);
4777 streamer_write_uhwi (ob, es->call_stmt_time);
4778 streamer_write_uhwi (ob, es->loop_depth);
4780 bitpack_d bp = bitpack_create (ob->main_stream);
4781 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4782 streamer_write_bitpack (&bp);
4784 if (es->predicate)
4785 es->predicate->stream_out (ob);
4786 else
4787 streamer_write_uhwi (ob, 0);
4788 streamer_write_uhwi (ob, es->param.length ());
4789 for (i = 0; i < (int) es->param.length (); i++)
4791 streamer_write_uhwi (ob, es->param[i].change_prob);
4792 bp = bitpack_create (ob->main_stream);
4793 bp_pack_value (&bp, es->param[i].points_to_local_or_readonly_memory, 1);
4794 bp_pack_value (&bp, es->param[i].points_to_possible_sra_candidate, 1);
4795 streamer_write_bitpack (&bp);
4800 /* Write inline summary for node in SET.
4801 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4802 active, we don't need to write them twice. */
4804 static void
4805 ipa_fn_summary_write (void)
4807 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4808 lto_symtab_encoder_iterator lsei;
4809 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4810 unsigned int count = 0;
4812 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4813 lsei_next_function_in_partition (&lsei))
4815 cgraph_node *cnode = lsei_cgraph_node (lsei);
4816 if (cnode->definition && !cnode->alias)
4817 count++;
4819 streamer_write_uhwi (ob, count);
4821 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4822 lsei_next_function_in_partition (&lsei))
4824 cgraph_node *cnode = lsei_cgraph_node (lsei);
4825 if (cnode->definition && !cnode->alias)
4827 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4828 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4829 struct bitpack_d bp;
4830 struct cgraph_edge *edge;
4831 int i;
4832 size_time_entry *e;
4833 struct condition *c;
4835 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4836 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4837 streamer_write_hwi (ob, size_info->self_size);
4838 info->time.stream_out (ob);
4839 bp = bitpack_create (ob->main_stream);
4840 bp_pack_value (&bp, info->inlinable, 1);
4841 bp_pack_value (&bp, info->fp_expressions, 1);
4842 streamer_write_bitpack (&bp);
4843 if (!lto_stream_offload_p)
4844 streamer_write_uhwi (ob, info->target_info);
4845 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4846 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4848 int j;
4849 struct expr_eval_op *op;
4851 streamer_write_uhwi (ob, c->operand_num);
4852 streamer_write_uhwi (ob, c->code);
4853 stream_write_tree (ob, c->type, true);
4854 stream_write_tree (ob, c->val, true);
4855 bp = bitpack_create (ob->main_stream);
4856 bp_pack_value (&bp, c->agg_contents, 1);
4857 bp_pack_value (&bp, c->by_ref, 1);
4858 streamer_write_bitpack (&bp);
4859 if (c->agg_contents)
4860 streamer_write_uhwi (ob, c->offset);
4861 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4862 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4864 streamer_write_uhwi (ob, op->code);
4865 stream_write_tree (ob, op->type, true);
4866 if (op->val[0])
4868 bp = bitpack_create (ob->main_stream);
4869 bp_pack_value (&bp, op->index, 2);
4870 streamer_write_bitpack (&bp);
4871 stream_write_tree (ob, op->val[0], true);
4872 if (op->val[1])
4873 stream_write_tree (ob, op->val[1], true);
4877 streamer_write_uhwi (ob, info->size_time_table.length ());
4878 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4880 streamer_write_uhwi (ob, e->size);
4881 e->time.stream_out (ob);
4882 e->exec_predicate.stream_out (ob);
4883 e->nonconst_predicate.stream_out (ob);
4885 ipa_freqcounting_predicate *fcp;
4886 streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
4887 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
4889 fcp->predicate->stream_out (ob);
4890 fcp->freq.stream_out (ob);
4892 streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
4893 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
4895 fcp->predicate->stream_out (ob);
4896 fcp->freq.stream_out (ob);
4898 streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ());
4899 int ip;
4900 for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip);
4901 i++)
4902 streamer_write_uhwi (ob, ip);
4903 for (edge = cnode->callees; edge; edge = edge->next_callee)
4904 write_ipa_call_summary (ob, edge);
4905 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4906 write_ipa_call_summary (ob, edge);
4909 streamer_write_char_stream (ob->main_stream, 0);
4910 produce_asm (ob, NULL);
4911 destroy_output_block (ob);
4913 ipa_prop_write_jump_functions ();
4917 /* Release function summary. */
4919 void
4920 ipa_free_fn_summary (void)
4922 if (!ipa_call_summaries)
4923 return;
4924 ggc_delete (ipa_fn_summaries);
4925 ipa_fn_summaries = NULL;
4926 delete ipa_call_summaries;
4927 ipa_call_summaries = NULL;
4928 edge_predicate_pool.release ();
4929 /* During IPA this is one of largest datastructures to release. */
4930 if (flag_wpa)
4931 ggc_trim ();
4934 /* Release function summary. */
4936 void
4937 ipa_free_size_summary (void)
4939 if (!ipa_size_summaries)
4940 return;
4941 delete ipa_size_summaries;
4942 ipa_size_summaries = NULL;
4945 namespace {
4947 const pass_data pass_data_local_fn_summary =
4949 GIMPLE_PASS, /* type */
4950 "local-fnsummary", /* name */
4951 OPTGROUP_INLINE, /* optinfo_flags */
4952 TV_INLINE_PARAMETERS, /* tv_id */
4953 0, /* properties_required */
4954 0, /* properties_provided */
4955 0, /* properties_destroyed */
4956 0, /* todo_flags_start */
4957 0, /* todo_flags_finish */
4960 class pass_local_fn_summary : public gimple_opt_pass
4962 public:
4963 pass_local_fn_summary (gcc::context *ctxt)
4964 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4967 /* opt_pass methods: */
4968 opt_pass * clone () final override
4970 return new pass_local_fn_summary (m_ctxt);
4972 unsigned int execute (function *) final override
4974 return compute_fn_summary_for_current ();
4977 }; // class pass_local_fn_summary
4979 } // anon namespace
4981 gimple_opt_pass *
4982 make_pass_local_fn_summary (gcc::context *ctxt)
4984 return new pass_local_fn_summary (ctxt);
4988 /* Free inline summary. */
4990 namespace {
4992 const pass_data pass_data_ipa_free_fn_summary =
4994 SIMPLE_IPA_PASS, /* type */
4995 "free-fnsummary", /* name */
4996 OPTGROUP_NONE, /* optinfo_flags */
4997 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4998 0, /* properties_required */
4999 0, /* properties_provided */
5000 0, /* properties_destroyed */
5001 0, /* todo_flags_start */
5002 0, /* todo_flags_finish */
5005 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
5007 public:
5008 pass_ipa_free_fn_summary (gcc::context *ctxt)
5009 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
5010 small_p (false)
5013 /* opt_pass methods: */
5014 opt_pass *clone () final override
5016 return new pass_ipa_free_fn_summary (m_ctxt);
5018 void set_pass_param (unsigned int n, bool param) final override
5020 gcc_assert (n == 0);
5021 small_p = param;
5023 bool gate (function *) final override { return true; }
5024 unsigned int execute (function *) final override
5026 ipa_free_fn_summary ();
5027 /* Free ipa-prop structures if they are no longer needed. */
5028 ipa_free_all_structures_after_iinln ();
5029 if (!flag_wpa)
5030 ipa_free_size_summary ();
5031 return 0;
5034 private:
5035 bool small_p;
5036 }; // class pass_ipa_free_fn_summary
5038 } // anon namespace
5040 simple_ipa_opt_pass *
5041 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
5043 return new pass_ipa_free_fn_summary (ctxt);
5046 namespace {
5048 const pass_data pass_data_ipa_fn_summary =
5050 IPA_PASS, /* type */
5051 "fnsummary", /* name */
5052 OPTGROUP_INLINE, /* optinfo_flags */
5053 TV_IPA_FNSUMMARY, /* tv_id */
5054 0, /* properties_required */
5055 0, /* properties_provided */
5056 0, /* properties_destroyed */
5057 0, /* todo_flags_start */
5058 ( TODO_dump_symtab ), /* todo_flags_finish */
5061 class pass_ipa_fn_summary : public ipa_opt_pass_d
5063 public:
5064 pass_ipa_fn_summary (gcc::context *ctxt)
5065 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
5066 ipa_fn_summary_generate, /* generate_summary */
5067 ipa_fn_summary_write, /* write_summary */
5068 ipa_fn_summary_read, /* read_summary */
5069 NULL, /* write_optimization_summary */
5070 NULL, /* read_optimization_summary */
5071 NULL, /* stmt_fixup */
5072 0, /* function_transform_todo_flags_start */
5073 NULL, /* function_transform */
5074 NULL) /* variable_transform */
5077 /* opt_pass methods: */
5078 unsigned int execute (function *) final override { return 0; }
5080 }; // class pass_ipa_fn_summary
5082 } // anon namespace
5084 ipa_opt_pass_d *
5085 make_pass_ipa_fn_summary (gcc::context *ctxt)
5087 return new pass_ipa_fn_summary (ctxt);
5090 /* Reset all state within ipa-fnsummary.cc so that we can rerun the compiler
5091 within the same process. For use by toplev::finalize. */
5093 void
5094 ipa_fnsummary_cc_finalize (void)
5096 ipa_free_fn_summary ();