tree-optimization/113126 - vector extension compare optimization
[official-gcc.git] / gcc / ipa-fnsummary.cc
blob74c9b4e1d1e5676f33162dee373a104fc11b5887
1 /* Function summary pass.
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "target.h"
60 #include "tree.h"
61 #include "gimple.h"
62 #include "alloc-pool.h"
63 #include "tree-pass.h"
64 #include "ssa.h"
65 #include "tree-streamer.h"
66 #include "cgraph.h"
67 #include "diagnostic.h"
68 #include "fold-const.h"
69 #include "print-tree.h"
70 #include "tree-inline.h"
71 #include "gimple-pretty-print.h"
72 #include "cfganal.h"
73 #include "gimple-iterator.h"
74 #include "tree-cfg.h"
75 #include "tree-ssa-loop-niter.h"
76 #include "tree-ssa-loop.h"
77 #include "symbol-summary.h"
78 #include "ipa-prop.h"
79 #include "ipa-fnsummary.h"
80 #include "cfgloop.h"
81 #include "tree-scalar-evolution.h"
82 #include "ipa-utils.h"
83 #include "cfgexpand.h"
84 #include "gimplify.h"
85 #include "stringpool.h"
86 #include "attribs.h"
87 #include "tree-into-ssa.h"
88 #include "symtab-clones.h"
89 #include "gimple-range.h"
90 #include "tree-dfa.h"
92 /* Summaries. */
93 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
94 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
95 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
97 /* Edge predicates goes here. */
98 static object_allocator<ipa_predicate> edge_predicate_pool ("edge predicates");
101 /* Dump IPA hints. */
102 void
103 ipa_dump_hints (FILE *f, ipa_hints hints)
105 if (!hints)
106 return;
107 fprintf (f, "IPA hints:");
108 if (hints & INLINE_HINT_indirect_call)
110 hints &= ~INLINE_HINT_indirect_call;
111 fprintf (f, " indirect_call");
113 if (hints & INLINE_HINT_loop_iterations)
115 hints &= ~INLINE_HINT_loop_iterations;
116 fprintf (f, " loop_iterations");
118 if (hints & INLINE_HINT_loop_stride)
120 hints &= ~INLINE_HINT_loop_stride;
121 fprintf (f, " loop_stride");
123 if (hints & INLINE_HINT_same_scc)
125 hints &= ~INLINE_HINT_same_scc;
126 fprintf (f, " same_scc");
128 if (hints & INLINE_HINT_in_scc)
130 hints &= ~INLINE_HINT_in_scc;
131 fprintf (f, " in_scc");
133 if (hints & INLINE_HINT_cross_module)
135 hints &= ~INLINE_HINT_cross_module;
136 fprintf (f, " cross_module");
138 if (hints & INLINE_HINT_declared_inline)
140 hints &= ~INLINE_HINT_declared_inline;
141 fprintf (f, " declared_inline");
143 if (hints & INLINE_HINT_known_hot)
145 hints &= ~INLINE_HINT_known_hot;
146 fprintf (f, " known_hot");
148 if (hints & INLINE_HINT_builtin_constant_p)
150 hints &= ~INLINE_HINT_builtin_constant_p;
151 fprintf (f, " builtin_constant_p");
153 gcc_assert (!hints);
157 /* Record SIZE and TIME to SUMMARY.
158 The accounted code will be executed when EXEC_PRED is true.
159 When NONCONST_PRED is false the code will evaluate to constant and
160 will get optimized out in specialized clones of the function.
161 If CALL is true account to call_size_time_table rather than
162 size_time_table. */
164 void
165 ipa_fn_summary::account_size_time (int size, sreal time,
166 const ipa_predicate &exec_pred,
167 const ipa_predicate &nonconst_pred_in,
168 bool call)
170 size_time_entry *e;
171 bool found = false;
172 int i;
173 ipa_predicate nonconst_pred;
174 vec<size_time_entry> *table = call ? &call_size_time_table : &size_time_table;
176 if (exec_pred == false)
177 return;
179 nonconst_pred = nonconst_pred_in & exec_pred;
181 if (nonconst_pred == false)
182 return;
184 /* We need to create initial empty unconditional clause, but otherwise
185 we don't need to account empty times and sizes. */
186 if (!size && time == 0 && table->length ())
187 return;
189 /* Only for calls we are unaccounting what we previously recorded. */
190 gcc_checking_assert (time >= 0 || call);
192 for (i = 0; table->iterate (i, &e); i++)
193 if (e->exec_predicate == exec_pred
194 && e->nonconst_predicate == nonconst_pred)
196 found = true;
197 break;
199 if (i == max_size_time_table_size)
201 i = 0;
202 found = true;
203 e = &(*table)[0];
204 if (dump_file && (dump_flags & TDF_DETAILS))
205 fprintf (dump_file,
206 "\t\tReached limit on number of entries, "
207 "ignoring the predicate.");
209 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
211 fprintf (dump_file,
212 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
213 ((double) size) / ipa_fn_summary::size_scale,
214 (time.to_double ()), found ? "" : "new ");
215 exec_pred.dump (dump_file, conds, 0);
216 if (exec_pred != nonconst_pred)
218 fprintf (dump_file, " nonconst:");
219 nonconst_pred.dump (dump_file, conds);
221 else
222 fprintf (dump_file, "\n");
224 if (!found)
226 class size_time_entry new_entry;
227 new_entry.size = size;
228 new_entry.time = time;
229 new_entry.exec_predicate = exec_pred;
230 new_entry.nonconst_predicate = nonconst_pred;
231 if (call)
232 call_size_time_table.safe_push (new_entry);
233 else
234 size_time_table.safe_push (new_entry);
236 else
238 e->size += size;
239 e->time += time;
240 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
241 /* Tolerate small roundoff issues. */
242 if (e->time < 0)
243 e->time = 0;
247 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
249 static struct cgraph_edge *
250 redirect_to_unreachable (struct cgraph_edge *e)
252 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
253 struct cgraph_node *target
254 = cgraph_node::get_create (builtin_decl_unreachable ());
256 if (e->speculative)
257 e = cgraph_edge::resolve_speculation (e, target->decl);
258 else if (!e->callee)
259 e = cgraph_edge::make_direct (e, target);
260 else
261 e->redirect_callee (target);
262 class ipa_call_summary *es = ipa_call_summaries->get (e);
263 e->inline_failed = CIF_UNREACHABLE;
264 e->count = profile_count::zero ();
265 es->call_stmt_size = 0;
266 es->call_stmt_time = 0;
267 if (callee)
268 callee->remove_symbol_and_inline_clones ();
269 return e;
272 /* Set predicate for edge E. */
274 static void
275 edge_set_predicate (struct cgraph_edge *e, ipa_predicate *predicate)
277 /* If the edge is determined to be never executed, redirect it
278 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
279 be optimized out. */
280 if (predicate && *predicate == false
281 /* When handling speculative edges, we need to do the redirection
282 just once. Do it always on the direct edge, so we do not
283 attempt to resolve speculation while duplicating the edge. */
284 && (!e->speculative || e->callee))
285 e = redirect_to_unreachable (e);
287 class ipa_call_summary *es = ipa_call_summaries->get (e);
288 if (predicate && *predicate != true)
290 if (!es->predicate)
291 es->predicate = edge_predicate_pool.allocate ();
292 *es->predicate = *predicate;
294 else
296 if (es->predicate)
297 edge_predicate_pool.remove (es->predicate);
298 es->predicate = NULL;
302 /* Set predicate for hint *P. */
304 static void
305 set_hint_predicate (ipa_predicate **p, ipa_predicate new_predicate)
307 if (new_predicate == false || new_predicate == true)
309 if (*p)
310 edge_predicate_pool.remove (*p);
311 *p = NULL;
313 else
315 if (!*p)
316 *p = edge_predicate_pool.allocate ();
317 **p = new_predicate;
321 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
322 Otherwise add a new item to the vector with this predicate and frerq equal
323 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
324 in which case the function does nothing. */
326 static void
327 add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
328 const ipa_predicate &new_predicate, sreal add_freq,
329 unsigned max_num_predicates)
331 if (new_predicate == false || new_predicate == true)
332 return;
333 ipa_freqcounting_predicate *f;
334 for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
335 if (new_predicate == f->predicate)
337 f->freq += add_freq;
338 return;
340 if (vec_safe_length (*v) >= max_num_predicates)
341 /* Too many different predicates to account for. */
342 return;
344 ipa_freqcounting_predicate fcp;
345 fcp.predicate = NULL;
346 set_hint_predicate (&fcp.predicate, new_predicate);
347 fcp.freq = add_freq;
348 vec_safe_push (*v, fcp);
349 return;
352 /* Compute what conditions may or may not hold given information about
353 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
354 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
355 copy when called in a given context. It is a bitmask of conditions. Bit
356 0 means that condition is known to be false, while bit 1 means that condition
357 may or may not be true. These differs - for example NOT_INLINED condition
358 is always false in the second and also builtin_constant_p tests cannot use
359 the fact that parameter is indeed a constant.
361 When INLINE_P is true, assume that we are inlining. AVAL contains known
362 information about argument values. The function does not modify its content
363 and so AVALs could also be of type ipa_call_arg_values but so far all
364 callers work with the auto version and so we avoid the conversion for
365 convenience.
367 ERROR_MARK value of an argument means compile time invariant. */
369 static void
370 evaluate_conditions_for_known_args (struct cgraph_node *node,
371 bool inline_p,
372 ipa_auto_call_arg_values *avals,
373 clause_t *ret_clause,
374 clause_t *ret_nonspec_clause,
375 ipa_call_summary *es)
377 clause_t clause = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
378 clause_t nonspec_clause = 1 << ipa_predicate::not_inlined_condition;
379 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
380 int i;
381 struct condition *c;
383 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
385 tree val = NULL;
386 tree res;
387 int j;
388 struct expr_eval_op *op;
390 if (c->code == ipa_predicate::not_sra_candidate)
392 if (!inline_p
393 || !es
394 || (int)es->param.length () <= c->operand_num
395 || !es->param[c->operand_num].points_to_possible_sra_candidate)
396 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
397 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
398 continue;
401 if (c->agg_contents)
403 if (c->code == ipa_predicate::changed
404 && !c->by_ref
405 && (avals->safe_sval_at(c->operand_num) == error_mark_node))
406 continue;
408 if (tree sval = avals->safe_sval_at (c->operand_num))
409 val = ipa_find_agg_cst_from_init (sval, c->offset, c->by_ref);
410 if (!val)
412 ipa_argagg_value_list avs (avals);
413 val = avs.get_value (c->operand_num, c->offset / BITS_PER_UNIT,
414 c->by_ref);
417 else
419 val = avals->safe_sval_at (c->operand_num);
420 if (val && val == error_mark_node
421 && c->code != ipa_predicate::changed)
422 val = NULL_TREE;
425 if (!val
426 && (c->code == ipa_predicate::changed
427 || c->code == ipa_predicate::is_not_constant))
429 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
430 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
431 continue;
433 if (c->code == ipa_predicate::changed)
435 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
436 continue;
439 if (c->code == ipa_predicate::is_not_constant)
441 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
442 continue;
445 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
447 if (c->type != TREE_TYPE (val))
448 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
449 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
451 if (!val)
452 break;
453 if (!op->val[0])
454 val = fold_unary (op->code, op->type, val);
455 else if (!op->val[1])
456 val = fold_binary (op->code, op->type,
457 op->index ? op->val[0] : val,
458 op->index ? val : op->val[0]);
459 else if (op->index == 0)
460 val = fold_ternary (op->code, op->type,
461 val, op->val[0], op->val[1]);
462 else if (op->index == 1)
463 val = fold_ternary (op->code, op->type,
464 op->val[0], val, op->val[1]);
465 else if (op->index == 2)
466 val = fold_ternary (op->code, op->type,
467 op->val[0], op->val[1], val);
468 else
469 val = NULL_TREE;
472 res = val
473 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
474 : NULL;
476 if (res && integer_zerop (res))
477 continue;
478 if (res && integer_onep (res))
480 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
481 nonspec_clause
482 |= 1 << (i + ipa_predicate::first_dynamic_condition);
483 continue;
486 if (c->operand_num < (int) avals->m_known_value_ranges.length ()
487 && !c->agg_contents
488 && (!val || TREE_CODE (val) != INTEGER_CST))
490 Value_Range vr (avals->m_known_value_ranges[c->operand_num]);
491 if (!vr.undefined_p ()
492 && !vr.varying_p ()
493 && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ())))
495 if (!useless_type_conversion_p (c->type, vr.type ()))
496 range_cast (vr, c->type);
498 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
500 if (vr.varying_p () || vr.undefined_p ())
501 break;
503 Value_Range res (op->type);
504 if (!op->val[0])
506 Value_Range varying (op->type);
507 varying.set_varying (op->type);
508 range_op_handler handler (op->code);
509 if (!handler
510 || !res.supports_type_p (op->type)
511 || !handler.fold_range (res, op->type, vr, varying))
512 res.set_varying (op->type);
514 else if (!op->val[1])
516 Value_Range op0 (op->type);
517 range_op_handler handler (op->code);
519 ipa_range_set_and_normalize (op0, op->val[0]);
521 if (!handler
522 || !res.supports_type_p (op->type)
523 || !handler.fold_range (res, op->type,
524 op->index ? op0 : vr,
525 op->index ? vr : op0))
526 res.set_varying (op->type);
528 else
529 res.set_varying (op->type);
530 vr = res;
532 if (!vr.varying_p () && !vr.undefined_p ())
534 int_range<2> res;
535 Value_Range val_vr (TREE_TYPE (c->val));
536 range_op_handler handler (c->code);
538 ipa_range_set_and_normalize (val_vr, c->val);
540 if (!handler
541 || !val_vr.supports_type_p (TREE_TYPE (c->val))
542 || !handler.fold_range (res, boolean_type_node, vr, val_vr))
543 res.set_varying (boolean_type_node);
545 if (res.zero_p ())
546 continue;
551 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
552 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
554 *ret_clause = clause;
555 if (ret_nonspec_clause)
556 *ret_nonspec_clause = nonspec_clause;
559 /* Return true if VRP will be exectued on the function.
560 We do not want to anticipate optimizations that will not happen.
562 FIXME: This can be confused with -fdisable and debug counters and thus
563 it should not be used for correctness (only to make heuristics work).
564 This means that inliner should do its own optimizations of expressions
565 that it predicts to be constant so wrong code can not be triggered by
566 builtin_constant_p. */
568 static bool
569 vrp_will_run_p (struct cgraph_node *node)
571 return (opt_for_fn (node->decl, optimize)
572 && !opt_for_fn (node->decl, optimize_debug)
573 && opt_for_fn (node->decl, flag_tree_vrp));
576 /* Similarly about FRE. */
578 static bool
579 fre_will_run_p (struct cgraph_node *node)
581 return (opt_for_fn (node->decl, optimize)
582 && !opt_for_fn (node->decl, optimize_debug)
583 && opt_for_fn (node->decl, flag_tree_fre));
586 /* Work out what conditions might be true at invocation of E.
587 Compute costs for inlined edge if INLINE_P is true.
589 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
590 (if non-NULL) conditions evaluated for nonspecialized clone called
591 in a given context.
593 Vectors in AVALS will be populated with useful known information about
594 argument values - information not known to have any uses will be omitted -
595 except for m_known_contexts which will only be calculated if
596 COMPUTE_CONTEXTS is true. */
598 void
599 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
600 clause_t *clause_ptr,
601 clause_t *nonspec_clause_ptr,
602 ipa_auto_call_arg_values *avals,
603 bool compute_contexts)
605 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
606 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
607 class ipa_edge_args *args;
608 class ipa_call_summary *es = NULL;
610 if (clause_ptr)
611 *clause_ptr = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
613 if (ipa_node_params_sum
614 && !e->call_stmt_cannot_inline_p
615 && (info->conds || compute_contexts)
616 && (args = ipa_edge_args_sum->get (e)) != NULL)
618 struct cgraph_node *caller;
619 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
620 int i, count = ipa_get_cs_argument_count (args);
621 es = ipa_call_summaries->get (e);
623 if (count)
625 if (e->caller->inlined_to)
626 caller = e->caller->inlined_to;
627 else
628 caller = e->caller;
629 caller_parms_info = ipa_node_params_sum->get (caller);
630 callee_pi = ipa_node_params_sum->get (callee);
632 /* Watch for thunks. */
633 if (callee_pi)
634 /* Watch for variadic functions. */
635 count = MIN (count, ipa_get_param_count (callee_pi));
638 if (callee_pi)
639 for (i = 0; i < count; i++)
641 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
643 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
644 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
646 /* Determine if we know constant value of the parameter. */
647 tree type = ipa_get_type (callee_pi, i);
648 tree cst = ipa_value_from_jfunc (caller_parms_info, jf, type);
650 if (!cst && e->call_stmt
651 && i < (int)gimple_call_num_args (e->call_stmt))
653 cst = gimple_call_arg (e->call_stmt, i);
654 if (!is_gimple_min_invariant (cst))
655 cst = NULL;
657 if (cst)
659 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
660 if (!avals->m_known_vals.length ())
661 avals->m_known_vals.safe_grow_cleared (count, true);
662 avals->m_known_vals[i] = cst;
664 else if (inline_p && !es->param[i].change_prob)
666 if (!avals->m_known_vals.length ())
667 avals->m_known_vals.safe_grow_cleared (count, true);
668 avals->m_known_vals[i] = error_mark_node;
671 /* If we failed to get simple constant, try value range. */
672 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
673 && vrp_will_run_p (caller)
674 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
676 Value_Range vr (type);
678 ipa_value_range_from_jfunc (vr, caller_parms_info, e, jf, type);
679 if (!vr.undefined_p () && !vr.varying_p ())
681 if (!avals->m_known_value_ranges.length ())
682 avals->m_known_value_ranges.safe_grow_cleared (count,
683 true);
684 avals->m_known_value_ranges[i] = vr;
688 /* Determine known aggregate values. */
689 if (fre_will_run_p (caller))
690 ipa_push_agg_values_from_jfunc (caller_parms_info,
691 caller, &jf->agg, i,
692 &avals->m_known_aggs);
695 /* For calls used in polymorphic calls we further determine
696 polymorphic call context. */
697 if (compute_contexts
698 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
700 ipa_polymorphic_call_context
701 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
702 if (!ctx.useless_p ())
704 if (!avals->m_known_contexts.length ())
705 avals->m_known_contexts.safe_grow_cleared (count, true);
706 avals->m_known_contexts[i]
707 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
711 else
712 gcc_assert (!count || callee->thunk);
714 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
716 int i, count = (int)gimple_call_num_args (e->call_stmt);
718 for (i = 0; i < count; i++)
720 tree cst = gimple_call_arg (e->call_stmt, i);
721 if (!is_gimple_min_invariant (cst))
722 cst = NULL;
723 if (cst)
725 if (!avals->m_known_vals.length ())
726 avals->m_known_vals.safe_grow_cleared (count, true);
727 avals->m_known_vals[i] = cst;
732 evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
733 nonspec_clause_ptr, es);
737 /* Allocate the function summary. */
739 static void
740 ipa_fn_summary_alloc (void)
742 gcc_checking_assert (!ipa_fn_summaries);
743 ipa_size_summaries = new ipa_size_summary_t (symtab);
744 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
745 ipa_call_summaries = new ipa_call_summary_t (symtab);
748 ipa_call_summary::~ipa_call_summary ()
750 if (predicate)
751 edge_predicate_pool.remove (predicate);
753 param.release ();
756 ipa_fn_summary::~ipa_fn_summary ()
758 unsigned len = vec_safe_length (loop_iterations);
759 for (unsigned i = 0; i < len; i++)
760 edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
761 len = vec_safe_length (loop_strides);
762 for (unsigned i = 0; i < len; i++)
763 edge_predicate_pool.remove ((*loop_strides)[i].predicate);
764 vec_free (conds);
765 call_size_time_table.release ();
766 vec_free (loop_iterations);
767 vec_free (loop_strides);
768 builtin_constant_p_parms.release ();
771 void
772 ipa_fn_summary_t::remove_callees (cgraph_node *node)
774 cgraph_edge *e;
775 for (e = node->callees; e; e = e->next_callee)
776 ipa_call_summaries->remove (e);
777 for (e = node->indirect_calls; e; e = e->next_callee)
778 ipa_call_summaries->remove (e);
781 /* Duplicate predicates in loop hint vector, allocating memory for them and
782 remove and deallocate any uninteresting (true or false) ones. Return the
783 result. */
785 static vec<ipa_freqcounting_predicate, va_gc> *
786 remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
787 clause_t possible_truths)
789 if (vec_safe_length (v) == 0)
790 return NULL;
792 vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
793 int len = res->length();
794 for (int i = len - 1; i >= 0; i--)
796 ipa_predicate new_predicate
797 = (*res)[i].predicate->remap_after_duplication (possible_truths);
798 /* We do not want to free previous predicate; it is used by node
799 origin. */
800 (*res)[i].predicate = NULL;
801 set_hint_predicate (&(*res)[i].predicate, new_predicate);
803 if (!(*res)[i].predicate)
804 res->unordered_remove (i);
807 return res;
811 /* Hook that is called by cgraph.cc when a node is duplicated. */
812 void
813 ipa_fn_summary_t::duplicate (cgraph_node *src,
814 cgraph_node *dst,
815 ipa_fn_summary *src_info,
816 ipa_fn_summary *info)
818 new (info) ipa_fn_summary (*src_info);
819 /* TODO: as an optimization, we may avoid copying conditions
820 that are known to be false or true. */
821 info->conds = vec_safe_copy (info->conds);
823 clone_info *cinfo = clone_info::get (dst);
824 /* When there are any replacements in the function body, see if we can figure
825 out that something was optimized out. */
826 if (ipa_node_params_sum && cinfo && cinfo->tree_map)
828 /* Use SRC parm info since it may not be copied yet. */
829 ipa_node_params *parms_info = ipa_node_params_sum->get (src);
830 ipa_auto_call_arg_values avals;
831 int count = ipa_get_param_count (parms_info);
832 int i, j;
833 clause_t possible_truths;
834 ipa_predicate true_pred = true;
835 size_time_entry *e;
836 int optimized_out_size = 0;
837 bool inlined_to_p = false;
838 struct cgraph_edge *edge, *next;
840 info->size_time_table.release ();
841 avals.m_known_vals.safe_grow_cleared (count, true);
842 for (i = 0; i < count; i++)
844 struct ipa_replace_map *r;
846 for (j = 0; vec_safe_iterate (cinfo->tree_map, j, &r); j++)
848 if (r->parm_num == i)
850 avals.m_known_vals[i] = r->new_tree;
851 break;
855 evaluate_conditions_for_known_args (dst, false,
856 &avals,
857 &possible_truths,
858 /* We are going to specialize,
859 so ignore nonspec truths. */
860 NULL,
861 NULL);
863 info->account_size_time (0, 0, true_pred, true_pred);
865 /* Remap size_time vectors.
866 Simplify the predicate by pruning out alternatives that are known
867 to be false.
868 TODO: as on optimization, we can also eliminate conditions known
869 to be true. */
870 for (i = 0; src_info->size_time_table.iterate (i, &e); i++)
872 ipa_predicate new_exec_pred;
873 ipa_predicate new_nonconst_pred;
874 new_exec_pred = e->exec_predicate.remap_after_duplication
875 (possible_truths);
876 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
877 (possible_truths);
878 if (new_exec_pred == false || new_nonconst_pred == false)
879 optimized_out_size += e->size;
880 else
881 info->account_size_time (e->size, e->time, new_exec_pred,
882 new_nonconst_pred);
885 /* Remap edge predicates with the same simplification as above.
886 Also copy constantness arrays. */
887 for (edge = dst->callees; edge; edge = next)
889 ipa_predicate new_predicate;
890 class ipa_call_summary *es = ipa_call_summaries->get (edge);
891 next = edge->next_callee;
893 if (!edge->inline_failed)
894 inlined_to_p = true;
895 if (!es->predicate)
896 continue;
897 new_predicate = es->predicate->remap_after_duplication
898 (possible_truths);
899 if (new_predicate == false && *es->predicate != false)
900 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
901 edge_set_predicate (edge, &new_predicate);
904 /* Remap indirect edge predicates with the same simplification as above.
905 Also copy constantness arrays. */
906 for (edge = dst->indirect_calls; edge; edge = next)
908 ipa_predicate new_predicate;
909 class ipa_call_summary *es = ipa_call_summaries->get (edge);
910 next = edge->next_callee;
912 gcc_checking_assert (edge->inline_failed);
913 if (!es->predicate)
914 continue;
915 new_predicate = es->predicate->remap_after_duplication
916 (possible_truths);
917 if (new_predicate == false && *es->predicate != false)
918 optimized_out_size
919 += es->call_stmt_size * ipa_fn_summary::size_scale;
920 edge_set_predicate (edge, &new_predicate);
922 info->loop_iterations
923 = remap_freqcounting_preds_after_dup (info->loop_iterations,
924 possible_truths);
925 info->loop_strides
926 = remap_freqcounting_preds_after_dup (info->loop_strides,
927 possible_truths);
928 if (info->builtin_constant_p_parms.length())
930 vec <int, va_heap, vl_ptr> parms = info->builtin_constant_p_parms;
931 int ip;
932 info->builtin_constant_p_parms = vNULL;
933 for (i = 0; parms.iterate (i, &ip); i++)
934 if (!avals.m_known_vals[ip])
935 info->builtin_constant_p_parms.safe_push (ip);
938 /* If inliner or someone after inliner will ever start producing
939 non-trivial clones, we will get trouble with lack of information
940 about updating self sizes, because size vectors already contains
941 sizes of the callees. */
942 gcc_assert (!inlined_to_p || !optimized_out_size);
944 else
946 info->size_time_table = src_info->size_time_table.copy ();
947 info->loop_iterations = vec_safe_copy (src_info->loop_iterations);
948 info->loop_strides = vec_safe_copy (info->loop_strides);
950 info->builtin_constant_p_parms
951 = info->builtin_constant_p_parms.copy ();
953 ipa_freqcounting_predicate *f;
954 for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
956 ipa_predicate p = *f->predicate;
957 f->predicate = NULL;
958 set_hint_predicate (&f->predicate, p);
960 for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
962 ipa_predicate p = *f->predicate;
963 f->predicate = NULL;
964 set_hint_predicate (&f->predicate, p);
967 if (!dst->inlined_to)
968 ipa_update_overall_fn_summary (dst);
972 /* Hook that is called by cgraph.cc when a node is duplicated. */
974 void
975 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
976 struct cgraph_edge *dst,
977 class ipa_call_summary *srcinfo,
978 class ipa_call_summary *info)
980 new (info) ipa_call_summary (*srcinfo);
981 info->predicate = NULL;
982 edge_set_predicate (dst, srcinfo->predicate);
983 info->param = srcinfo->param.copy ();
984 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
986 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
987 - eni_size_weights.call_cost);
988 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
989 - eni_time_weights.call_cost);
993 /* Dump edge summaries associated to NODE and recursively to all clones.
994 Indent by INDENT. */
996 static void
997 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
998 class ipa_fn_summary *info)
1000 struct cgraph_edge *edge;
1001 for (edge = node->callees; edge; edge = edge->next_callee)
1003 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1004 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1005 int i;
1007 fprintf (f,
1008 "%*s%s %s\n%*s freq:%4.2f",
1009 indent, "", callee->dump_name (),
1010 !edge->inline_failed
1011 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1012 indent, "", edge->sreal_frequency ().to_double ());
1014 if (cross_module_call_p (edge))
1015 fprintf (f, " cross module");
1017 if (es)
1018 fprintf (f, " loop depth:%2i size:%2i time: %2i",
1019 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
1021 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
1022 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1023 if (s != NULL)
1024 fprintf (f, " callee size:%2i stack:%2i",
1025 (int) (ss->size / ipa_fn_summary::size_scale),
1026 (int) s->estimated_stack_size);
1028 if (es && es->predicate)
1030 fprintf (f, " predicate: ");
1031 es->predicate->dump (f, info->conds);
1033 else
1034 fprintf (f, "\n");
1035 if (es && es->param.exists ())
1036 for (i = 0; i < (int) es->param.length (); i++)
1038 int prob = es->param[i].change_prob;
1040 if (!prob)
1041 fprintf (f, "%*s op%i is compile time invariant\n",
1042 indent + 2, "", i);
1043 else if (prob != REG_BR_PROB_BASE)
1044 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1045 prob * 100.0 / REG_BR_PROB_BASE);
1046 if (es->param[i].points_to_local_or_readonly_memory)
1047 fprintf (f, "%*s op%i points to local or readonly memory\n",
1048 indent + 2, "", i);
1049 if (es->param[i].points_to_possible_sra_candidate)
1050 fprintf (f, "%*s op%i points to possible sra candidate\n",
1051 indent + 2, "", i);
1053 if (!edge->inline_failed)
1055 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1056 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
1057 indent + 2, "",
1058 (int) ipa_get_stack_frame_offset (callee),
1059 (int) ss->estimated_self_stack_size);
1060 dump_ipa_call_summary (f, indent + 2, callee, info);
1063 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1065 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1066 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1067 " time: %2i",
1068 indent, "",
1069 es->loop_depth,
1070 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1071 es->call_stmt_time);
1072 if (es->predicate)
1074 fprintf (f, "predicate: ");
1075 es->predicate->dump (f, info->conds);
1077 else
1078 fprintf (f, "\n");
1083 void
1084 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1086 if (node->definition)
1088 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1089 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1090 if (s != NULL)
1092 size_time_entry *e;
1093 int i;
1094 fprintf (f, "IPA function summary for %s", node->dump_name ());
1095 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1096 fprintf (f, " always_inline");
1097 if (s->inlinable)
1098 fprintf (f, " inlinable");
1099 if (s->fp_expressions)
1100 fprintf (f, " fp_expression");
1101 if (s->builtin_constant_p_parms.length ())
1103 fprintf (f, " builtin_constant_p_parms");
1104 for (unsigned int i = 0;
1105 i < s->builtin_constant_p_parms.length (); i++)
1106 fprintf (f, " %i", s->builtin_constant_p_parms[i]);
1108 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1109 fprintf (f, " self size: %i\n", ss->self_size);
1110 fprintf (f, " global size: %i\n", ss->size);
1111 fprintf (f, " min size: %i\n", s->min_size);
1112 fprintf (f, " self stack: %i\n",
1113 (int) ss->estimated_self_stack_size);
1114 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1115 if (s->growth)
1116 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1117 if (s->scc_no)
1118 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1119 for (i = 0; s->size_time_table.iterate (i, &e); i++)
1121 fprintf (f, " size:%f, time:%f",
1122 (double) e->size / ipa_fn_summary::size_scale,
1123 e->time.to_double ());
1124 if (e->exec_predicate != true)
1126 fprintf (f, ", executed if:");
1127 e->exec_predicate.dump (f, s->conds, 0);
1129 if (e->exec_predicate != e->nonconst_predicate)
1131 fprintf (f, ", nonconst if:");
1132 e->nonconst_predicate.dump (f, s->conds, 0);
1134 fprintf (f, "\n");
1136 ipa_freqcounting_predicate *fcp;
1137 bool first_fcp = true;
1138 for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
1140 if (first_fcp)
1142 fprintf (f, " loop iterations:");
1143 first_fcp = false;
1145 fprintf (f, " %3.2f for ", fcp->freq.to_double ());
1146 fcp->predicate->dump (f, s->conds);
1148 first_fcp = true;
1149 for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
1151 if (first_fcp)
1153 fprintf (f, " loop strides:");
1154 first_fcp = false;
1156 fprintf (f, " %3.2f for :", fcp->freq.to_double ());
1157 fcp->predicate->dump (f, s->conds);
1159 fprintf (f, " calls:\n");
1160 dump_ipa_call_summary (f, 4, node, s);
1161 fprintf (f, "\n");
1162 if (s->target_info)
1163 fprintf (f, " target_info: %x\n", s->target_info);
1165 else
1166 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1170 DEBUG_FUNCTION void
1171 ipa_debug_fn_summary (struct cgraph_node *node)
1173 ipa_dump_fn_summary (stderr, node);
1176 void
1177 ipa_dump_fn_summaries (FILE *f)
1179 struct cgraph_node *node;
1181 FOR_EACH_DEFINED_FUNCTION (node)
1182 if (!node->inlined_to)
1183 ipa_dump_fn_summary (f, node);
1186 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1187 boolean variable pointed to by DATA. */
1189 static bool
1190 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1191 void *data)
1193 bool *b = (bool *) data;
1194 *b = true;
1195 return true;
1198 /* If OP refers to value of function parameter, return the corresponding
1199 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1200 PARM_DECL) will be stored to *SIZE_P in that case too. */
1202 static tree
1203 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1204 poly_int64 *size_p)
1206 /* SSA_NAME referring to parm default def? */
1207 if (TREE_CODE (op) == SSA_NAME
1208 && SSA_NAME_IS_DEFAULT_DEF (op)
1209 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1211 if (size_p)
1212 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1213 return SSA_NAME_VAR (op);
1215 /* Non-SSA parm reference? */
1216 if (TREE_CODE (op) == PARM_DECL
1217 && fbi->aa_walk_budget > 0)
1219 bool modified = false;
1221 ao_ref refd;
1222 ao_ref_init (&refd, op);
1223 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1224 mark_modified, &modified, NULL, NULL,
1225 fbi->aa_walk_budget);
1226 if (walked < 0)
1228 fbi->aa_walk_budget = 0;
1229 return NULL_TREE;
1231 fbi->aa_walk_budget -= walked;
1232 if (!modified)
1234 if (size_p)
1235 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1236 return op;
1239 return NULL_TREE;
1242 /* If OP refers to value of function parameter, return the corresponding
1243 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1244 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1245 stored to *SIZE_P in that case too. */
1247 static tree
1248 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1249 poly_int64 *size_p)
1251 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1252 if (res)
1253 return res;
1255 if (TREE_CODE (op) == SSA_NAME
1256 && !SSA_NAME_IS_DEFAULT_DEF (op)
1257 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1258 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1259 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1260 size_p);
1261 return NULL_TREE;
1264 /* If OP refers to a value of a function parameter or value loaded from an
1265 aggregate passed to a parameter (either by value or reference), return TRUE
1266 and store the number of the parameter to *INDEX_P, the access size into
1267 *SIZE_P, and information whether and how it has been loaded from an
1268 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1269 statement in which OP is used or loaded. */
1271 static bool
1272 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1273 gimple *stmt, tree op, int *index_p,
1274 poly_int64 *size_p,
1275 struct agg_position_info *aggpos)
1277 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1279 gcc_checking_assert (aggpos);
1280 if (res)
1282 *index_p = ipa_get_param_decl_index (fbi->info, res);
1283 if (*index_p < 0)
1284 return false;
1285 aggpos->agg_contents = false;
1286 aggpos->by_ref = false;
1287 return true;
1290 if (TREE_CODE (op) == SSA_NAME)
1292 if (SSA_NAME_IS_DEFAULT_DEF (op)
1293 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1294 return false;
1295 stmt = SSA_NAME_DEF_STMT (op);
1296 op = gimple_assign_rhs1 (stmt);
1297 if (!REFERENCE_CLASS_P (op))
1298 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1299 aggpos);
1302 aggpos->agg_contents = true;
1303 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1304 stmt, op, index_p, &aggpos->offset,
1305 size_p, &aggpos->by_ref);
1308 /* If stmt is simple load or store of value pointed to by a function parmaeter,
1309 return its index. */
1311 static int
1312 load_or_store_of_ptr_parameter (ipa_func_body_info *fbi, gimple *stmt)
1314 if (!optimize)
1315 return -1;
1316 gassign *assign = dyn_cast <gassign *> (stmt);
1317 if (!assign)
1318 return -1;
1319 tree param;
1320 if (gimple_assign_load_p (stmt))
1321 param = gimple_assign_rhs1 (stmt);
1322 else if (gimple_store_p (stmt))
1323 param = gimple_assign_lhs (stmt);
1324 else
1325 return -1;
1326 tree base = get_base_address (param);
1327 if (TREE_CODE (base) != MEM_REF
1328 || TREE_CODE (TREE_OPERAND (base, 0)) != SSA_NAME
1329 || !SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base, 0)))
1330 return -1;
1331 tree p = SSA_NAME_VAR (TREE_OPERAND (base, 0));
1332 if (TREE_CODE (p) != PARM_DECL)
1333 return -1;
1334 return ipa_get_param_decl_index (fbi->info, p);
1337 /* See if statement might disappear after inlining.
1338 0 - means not eliminated
1339 1 - half of statements goes away
1340 2 - for sure it is eliminated.
1341 We are not terribly sophisticated, basically looking for simple abstraction
1342 penalty wrappers. */
1344 static int
1345 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1347 enum gimple_code code = gimple_code (stmt);
1348 enum tree_code rhs_code;
1350 if (!optimize)
1351 return 0;
1353 switch (code)
1355 case GIMPLE_RETURN:
1356 return 2;
1357 case GIMPLE_ASSIGN:
1358 if (gimple_num_ops (stmt) != 2)
1359 return 0;
1361 rhs_code = gimple_assign_rhs_code (stmt);
1363 /* Casts of parameters, loads from parameters passed by reference
1364 and stores to return value or parameters are often free after
1365 inlining due to SRA and further combining.
1366 Assume that half of statements goes away. */
1367 if (CONVERT_EXPR_CODE_P (rhs_code)
1368 || rhs_code == VIEW_CONVERT_EXPR
1369 || rhs_code == ADDR_EXPR
1370 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1372 tree rhs = gimple_assign_rhs1 (stmt);
1373 tree lhs = gimple_assign_lhs (stmt);
1374 tree inner_rhs = get_base_address (rhs);
1375 tree inner_lhs = get_base_address (lhs);
1376 bool rhs_free = false;
1377 bool lhs_free = false;
1379 if (!inner_rhs)
1380 inner_rhs = rhs;
1381 if (!inner_lhs)
1382 inner_lhs = lhs;
1384 /* Reads of parameter are expected to be free. */
1385 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1386 rhs_free = true;
1387 /* Match expressions of form &this->field. Those will most likely
1388 combine with something upstream after inlining. */
1389 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1391 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1392 if (TREE_CODE (op) == PARM_DECL)
1393 rhs_free = true;
1394 else if (TREE_CODE (op) == MEM_REF
1395 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1396 NULL))
1397 rhs_free = true;
1400 /* When parameter is not SSA register because its address is taken
1401 and it is just copied into one, the statement will be completely
1402 free after inlining (we will copy propagate backward). */
1403 if (rhs_free && is_gimple_reg (lhs))
1404 return 2;
1406 /* Reads of parameters passed by reference
1407 expected to be free (i.e. optimized out after inlining). */
1408 if (TREE_CODE (inner_rhs) == MEM_REF
1409 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1410 rhs_free = true;
1412 /* Copying parameter passed by reference into gimple register is
1413 probably also going to copy propagate, but we can't be quite
1414 sure. */
1415 if (rhs_free && is_gimple_reg (lhs))
1416 lhs_free = true;
1418 /* Writes to parameters, parameters passed by value and return value
1419 (either directly or passed via invisible reference) are free.
1421 TODO: We ought to handle testcase like
1422 struct a {int a,b;};
1423 struct a
1424 returnstruct (void)
1426 struct a a ={1,2};
1427 return a;
1430 This translate into:
1432 returnstruct ()
1434 int a$b;
1435 int a$a;
1436 struct a a;
1437 struct a D.2739;
1439 <bb 2>:
1440 D.2739.a = 1;
1441 D.2739.b = 2;
1442 return D.2739;
1445 For that we either need to copy ipa-split logic detecting writes
1446 to return value. */
1447 if (TREE_CODE (inner_lhs) == PARM_DECL
1448 || TREE_CODE (inner_lhs) == RESULT_DECL
1449 || (TREE_CODE (inner_lhs) == MEM_REF
1450 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1451 NULL)
1452 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1453 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1454 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1455 (inner_lhs,
1456 0))) == RESULT_DECL))))
1457 lhs_free = true;
1458 if (lhs_free
1459 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1460 rhs_free = true;
1461 if (lhs_free && rhs_free)
1462 return 1;
1464 return 0;
1465 default:
1466 return 0;
1470 /* Analyze EXPR if it represents a series of simple operations performed on
1471 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1472 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1473 Type of the parameter or load from an aggregate via the parameter is
1474 stored in *TYPE_P. Operations on the parameter are recorded to
1475 PARAM_OPS_P if it is not NULL. */
1477 static bool
1478 decompose_param_expr (struct ipa_func_body_info *fbi,
1479 gimple *stmt, tree expr,
1480 int *index_p, tree *type_p,
1481 struct agg_position_info *aggpos,
1482 expr_eval_ops *param_ops_p = NULL)
1484 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1485 int op_count = 0;
1487 if (param_ops_p)
1488 *param_ops_p = NULL;
1490 while (true)
1492 expr_eval_op eval_op;
1493 unsigned rhs_count;
1494 unsigned cst_count = 0;
1496 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1497 aggpos))
1499 tree type = TREE_TYPE (expr);
1501 if (aggpos->agg_contents)
1503 /* Stop if containing bit-field. */
1504 if (TREE_CODE (expr) == BIT_FIELD_REF
1505 || contains_bitfld_component_ref_p (expr))
1506 break;
1509 *type_p = type;
1510 return true;
1513 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1514 break;
1515 stmt = SSA_NAME_DEF_STMT (expr);
1517 if (gcall *call = dyn_cast <gcall *> (stmt))
1519 int flags = gimple_call_return_flags (call);
1520 if (!(flags & ERF_RETURNS_ARG))
1521 goto fail;
1522 int arg = flags & ERF_RETURN_ARG_MASK;
1523 if (arg >= (int)gimple_call_num_args (call))
1524 goto fail;
1525 expr = gimple_call_arg (stmt, arg);
1526 continue;
1529 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1530 break;
1532 switch (gimple_assign_rhs_class (stmt))
1534 case GIMPLE_SINGLE_RHS:
1535 expr = gimple_assign_rhs1 (stmt);
1536 continue;
1538 case GIMPLE_UNARY_RHS:
1539 rhs_count = 1;
1540 break;
1542 case GIMPLE_BINARY_RHS:
1543 rhs_count = 2;
1544 break;
1546 case GIMPLE_TERNARY_RHS:
1547 rhs_count = 3;
1548 break;
1550 default:
1551 goto fail;
1554 /* Stop if expression is too complex. */
1555 if (op_count++ == op_limit)
1556 break;
1558 if (param_ops_p)
1560 eval_op.code = gimple_assign_rhs_code (stmt);
1561 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1562 eval_op.val[0] = NULL_TREE;
1563 eval_op.val[1] = NULL_TREE;
1566 expr = NULL_TREE;
1567 for (unsigned i = 0; i < rhs_count; i++)
1569 tree op = gimple_op (stmt, i + 1);
1571 gcc_assert (op && !TYPE_P (op));
1572 if (is_gimple_ip_invariant (op))
1574 if (++cst_count == rhs_count)
1575 goto fail;
1577 eval_op.val[cst_count - 1] = op;
1579 else if (!expr)
1581 /* Found a non-constant operand, and record its index in rhs
1582 operands. */
1583 eval_op.index = i;
1584 expr = op;
1586 else
1588 /* Found more than one non-constant operands. */
1589 goto fail;
1593 if (param_ops_p)
1594 vec_safe_insert (*param_ops_p, 0, eval_op);
1597 /* Failed to decompose, free resource and return. */
1598 fail:
1599 if (param_ops_p)
1600 vec_free (*param_ops_p);
1602 return false;
1605 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1607 static void
1608 add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
1610 int ip;
1612 /* Avoid duplicates. */
1613 for (unsigned int i = 0;
1614 summary->builtin_constant_p_parms.iterate (i, &ip); i++)
1615 if (ip == parm)
1616 return;
1617 summary->builtin_constant_p_parms.safe_push (parm);
1620 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1621 predicates to the CFG edges. */
1623 static void
1624 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1625 class ipa_fn_summary *summary,
1626 class ipa_node_params *params_summary,
1627 basic_block bb)
1629 tree op, op2;
1630 int index;
1631 struct agg_position_info aggpos;
1632 enum tree_code code, inverted_code;
1633 edge e;
1634 edge_iterator ei;
1635 gimple *set_stmt;
1636 tree param_type;
1637 expr_eval_ops param_ops;
1639 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1640 if (!last)
1641 return;
1642 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1643 return;
1644 op = gimple_cond_lhs (last);
1646 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1647 &param_ops))
1649 code = gimple_cond_code (last);
1650 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1652 FOR_EACH_EDGE (e, ei, bb->succs)
1654 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1655 ? code : inverted_code);
1656 /* invert_tree_comparison will return ERROR_MARK on FP
1657 comparisons that are not EQ/NE instead of returning proper
1658 unordered one. Be sure it is not confused with NON_CONSTANT.
1660 And if the edge's target is the final block of diamond CFG graph
1661 of this conditional statement, we do not need to compute
1662 predicate for the edge because the final block's predicate must
1663 be at least as that of the first block of the statement. */
1664 if (this_code != ERROR_MARK
1665 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1667 ipa_predicate p
1668 = add_condition (summary, params_summary, index,
1669 param_type, &aggpos,
1670 this_code, gimple_cond_rhs (last), param_ops);
1671 e->aux = edge_predicate_pool.allocate ();
1672 *(ipa_predicate *) e->aux = p;
1675 vec_free (param_ops);
1676 return;
1679 if (TREE_CODE (op) != SSA_NAME)
1680 return;
1681 /* Special case
1682 if (builtin_constant_p (op))
1683 constant_code
1684 else
1685 nonconstant_code.
1686 Here we can predicate nonconstant_code. We can't
1687 really handle constant_code since we have no predicate
1688 for this and also the constant code is not known to be
1689 optimized away when inliner doesn't see operand is constant.
1690 Other optimizers might think otherwise. */
1691 if (gimple_cond_code (last) != NE_EXPR
1692 || !integer_zerop (gimple_cond_rhs (last)))
1693 return;
1694 set_stmt = SSA_NAME_DEF_STMT (op);
1695 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1696 || gimple_call_num_args (set_stmt) != 1)
1697 return;
1698 op2 = gimple_call_arg (set_stmt, 0);
1699 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1700 return;
1701 if (!aggpos.by_ref)
1702 add_builtin_constant_p_parm (summary, index);
1703 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1705 ipa_predicate p = add_condition (summary, params_summary, index,
1706 param_type, &aggpos,
1707 ipa_predicate::is_not_constant, NULL_TREE);
1708 e->aux = edge_predicate_pool.allocate ();
1709 *(ipa_predicate *) e->aux = p;
1714 /* If BB ends by a switch we can turn into predicates, attach corresponding
1715 predicates to the CFG edges. */
1717 static void
1718 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1719 class ipa_fn_summary *summary,
1720 class ipa_node_params *params_summary,
1721 basic_block bb)
1723 tree op;
1724 int index;
1725 struct agg_position_info aggpos;
1726 edge e;
1727 edge_iterator ei;
1728 size_t n;
1729 size_t case_idx;
1730 tree param_type;
1731 expr_eval_ops param_ops;
1733 gswitch *last = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb));
1734 if (!last)
1735 return;
1736 op = gimple_switch_index (last);
1737 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1738 &param_ops))
1739 return;
1741 auto_vec<std::pair<tree, tree> > ranges;
1742 tree type = TREE_TYPE (op);
1743 int bound_limit = opt_for_fn (fbi->node->decl,
1744 param_ipa_max_switch_predicate_bounds);
1745 int bound_count = 0;
1746 // This can safely be an integer range, as switches can only hold
1747 // integers.
1748 int_range<2> vr;
1750 get_range_query (cfun)->range_of_expr (vr, op);
1751 if (vr.undefined_p ())
1752 vr.set_varying (TREE_TYPE (op));
1753 tree vr_min, vr_max;
1754 // TODO: This entire function could use a rewrite to use the irange
1755 // API, instead of trying to recreate its intersection/union logic.
1756 // Any use of get_legacy_range() is a serious code smell.
1757 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
1758 wide_int vr_wmin = wi::to_wide (vr_min);
1759 wide_int vr_wmax = wi::to_wide (vr_max);
1761 FOR_EACH_EDGE (e, ei, bb->succs)
1763 e->aux = edge_predicate_pool.allocate ();
1764 *(ipa_predicate *) e->aux = false;
1767 e = gimple_switch_edge (cfun, last, 0);
1768 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1769 default case if its target basic block is in convergence point of all
1770 switch cases, which can be determined by checking whether it
1771 post-dominates the switch statement. */
1772 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1773 bound_count = INT_MAX;
1775 n = gimple_switch_num_labels (last);
1776 for (case_idx = 1; case_idx < n; ++case_idx)
1778 tree cl = gimple_switch_label (last, case_idx);
1779 tree min = CASE_LOW (cl);
1780 tree max = CASE_HIGH (cl);
1781 ipa_predicate p;
1783 e = gimple_switch_edge (cfun, last, case_idx);
1785 /* The case value might not have same type as switch expression,
1786 extend the value based on the expression type. */
1787 if (TREE_TYPE (min) != type)
1788 min = wide_int_to_tree (type, wi::to_wide (min));
1790 if (!max)
1791 max = min;
1792 else if (TREE_TYPE (max) != type)
1793 max = wide_int_to_tree (type, wi::to_wide (max));
1795 /* The case's target basic block is in convergence point of all switch
1796 cases, its predicate should be at least as that of the switch
1797 statement. */
1798 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1799 p = true;
1800 else if (min == max)
1801 p = add_condition (summary, params_summary, index, param_type,
1802 &aggpos, EQ_EXPR, min, param_ops);
1803 else
1805 ipa_predicate p1, p2;
1806 p1 = add_condition (summary, params_summary, index, param_type,
1807 &aggpos, GE_EXPR, min, param_ops);
1808 p2 = add_condition (summary, params_summary,index, param_type,
1809 &aggpos, LE_EXPR, max, param_ops);
1810 p = p1 & p2;
1812 *(ipa_predicate *) e->aux
1813 = p.or_with (summary->conds, *(ipa_predicate *) e->aux);
1815 /* If there are too many disjoint case ranges, predicate for default
1816 case might become too complicated. So add a limit here. */
1817 if (bound_count > bound_limit)
1818 continue;
1820 bool new_range = true;
1822 if (!ranges.is_empty ())
1824 wide_int curr_wmin = wi::to_wide (min);
1825 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1827 /* Merge case ranges if they are continuous. */
1828 if (curr_wmin == last_wmax + 1)
1829 new_range = false;
1830 else if (vr_type == VR_ANTI_RANGE)
1832 /* If two disjoint case ranges can be connected by anti-range
1833 of switch index, combine them to one range. */
1834 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1835 vr_type = VR_UNDEFINED;
1836 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1837 new_range = false;
1841 /* Create/extend a case range. And we count endpoints of range set,
1842 this number nearly equals to number of conditions that we will create
1843 for predicate of default case. */
1844 if (new_range)
1846 bound_count += (min == max) ? 1 : 2;
1847 ranges.safe_push (std::make_pair (min, max));
1849 else
1851 bound_count += (ranges.last ().first == ranges.last ().second);
1852 ranges.last ().second = max;
1856 e = gimple_switch_edge (cfun, last, 0);
1857 if (bound_count > bound_limit)
1859 *(ipa_predicate *) e->aux = true;
1860 vec_free (param_ops);
1861 return;
1864 ipa_predicate p_seg = true;
1865 ipa_predicate p_all = false;
1867 if (vr_type != VR_RANGE)
1869 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1870 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1873 /* Construct predicate to represent default range set that is negation of
1874 all case ranges. Case range is classified as containing single/non-single
1875 values. Suppose a piece of case ranges in the following.
1877 [D1...D2] [S1] ... [Sn] [D3...D4]
1879 To represent default case's range sets between two non-single value
1880 case ranges (From D2 to D3), we construct predicate as:
1882 D2 < x < D3 && x != S1 && ... && x != Sn
1884 for (size_t i = 0; i < ranges.length (); i++)
1886 tree min = ranges[i].first;
1887 tree max = ranges[i].second;
1889 if (min == max)
1890 p_seg &= add_condition (summary, params_summary, index,
1891 param_type, &aggpos, NE_EXPR,
1892 min, param_ops);
1893 else
1895 /* Do not create sub-predicate for range that is beyond low bound
1896 of switch index. */
1897 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1899 p_seg &= add_condition (summary, params_summary, index,
1900 param_type, &aggpos,
1901 LT_EXPR, min, param_ops);
1902 p_all = p_all.or_with (summary->conds, p_seg);
1905 /* Do not create sub-predicate for range that is beyond up bound
1906 of switch index. */
1907 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1909 p_seg = false;
1910 break;
1913 p_seg = add_condition (summary, params_summary, index,
1914 param_type, &aggpos, GT_EXPR,
1915 max, param_ops);
1919 p_all = p_all.or_with (summary->conds, p_seg);
1920 *(ipa_predicate *) e->aux
1921 = p_all.or_with (summary->conds, *(ipa_predicate *) e->aux);
1923 vec_free (param_ops);
1927 /* For each BB in NODE attach to its AUX pointer predicate under
1928 which it is executable. */
1930 static void
1931 compute_bb_predicates (struct ipa_func_body_info *fbi,
1932 struct cgraph_node *node,
1933 class ipa_fn_summary *summary,
1934 class ipa_node_params *params_summary)
1936 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1937 bool done = false;
1938 basic_block bb;
1940 FOR_EACH_BB_FN (bb, my_function)
1942 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1943 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1946 /* Entry block is always executable. */
1947 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1948 = edge_predicate_pool.allocate ();
1949 *(ipa_predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1951 /* A simple dataflow propagation of predicates forward in the CFG.
1952 TODO: work in reverse postorder. */
1953 while (!done)
1955 done = true;
1956 FOR_EACH_BB_FN (bb, my_function)
1958 ipa_predicate p = false;
1959 edge e;
1960 edge_iterator ei;
1961 FOR_EACH_EDGE (e, ei, bb->preds)
1963 if (e->src->aux)
1965 ipa_predicate this_bb_predicate
1966 = *(ipa_predicate *) e->src->aux;
1967 if (e->aux)
1968 this_bb_predicate &= (*(ipa_predicate *) e->aux);
1969 p = p.or_with (summary->conds, this_bb_predicate);
1970 if (p == true)
1971 break;
1974 if (p != false)
1976 basic_block pdom_bb;
1978 if (!bb->aux)
1980 done = false;
1981 bb->aux = edge_predicate_pool.allocate ();
1982 *((ipa_predicate *) bb->aux) = p;
1984 else if (p != *(ipa_predicate *) bb->aux)
1986 /* This OR operation is needed to ensure monotonous data flow
1987 in the case we hit the limit on number of clauses and the
1988 and/or operations above give approximate answers. */
1989 p = p.or_with (summary->conds, *(ipa_predicate *)bb->aux);
1990 if (p != *(ipa_predicate *)bb->aux)
1992 done = false;
1993 *((ipa_predicate *)bb->aux) = p;
1997 /* For switch/if statement, we can OR-combine predicates of all
1998 its cases/branches to get predicate for basic block in their
1999 convergence point, but sometimes this will generate very
2000 complicated predicate. Actually, we can get simplified
2001 predicate in another way by using the fact that predicate
2002 for a basic block must also hold true for its post dominators.
2003 To be specific, basic block in convergence point of
2004 conditional statement should include predicate of the
2005 statement. */
2006 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
2007 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
2009 else if (!pdom_bb->aux)
2011 done = false;
2012 pdom_bb->aux = edge_predicate_pool.allocate ();
2013 *((ipa_predicate *)pdom_bb->aux) = p;
2015 else if (p != *(ipa_predicate *)pdom_bb->aux)
2017 p = p.or_with (summary->conds,
2018 *(ipa_predicate *)pdom_bb->aux);
2019 if (p != *(ipa_predicate *)pdom_bb->aux)
2021 done = false;
2022 *((ipa_predicate *)pdom_bb->aux) = p;
2031 /* Return predicate specifying when the STMT might have result that is not
2032 a compile time constant. */
2034 static ipa_predicate
2035 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
2036 class ipa_fn_summary *summary,
2037 class ipa_node_params *params_summary,
2038 tree expr,
2039 vec<ipa_predicate> nonconstant_names)
2041 tree parm;
2042 int index;
2044 while (UNARY_CLASS_P (expr))
2045 expr = TREE_OPERAND (expr, 0);
2047 parm = unmodified_parm (fbi, NULL, expr, NULL);
2048 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2049 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
2050 ipa_predicate::changed, NULL_TREE);
2051 if (is_gimple_min_invariant (expr))
2052 return false;
2053 if (TREE_CODE (expr) == SSA_NAME)
2054 return nonconstant_names[SSA_NAME_VERSION (expr)];
2055 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
2057 ipa_predicate p1
2058 = will_be_nonconstant_expr_predicate (fbi, summary,
2059 params_summary,
2060 TREE_OPERAND (expr, 0),
2061 nonconstant_names);
2062 if (p1 == true)
2063 return p1;
2065 ipa_predicate p2
2066 = will_be_nonconstant_expr_predicate (fbi, summary,
2067 params_summary,
2068 TREE_OPERAND (expr, 1),
2069 nonconstant_names);
2070 return p1.or_with (summary->conds, p2);
2072 else if (TREE_CODE (expr) == COND_EXPR)
2074 ipa_predicate p1
2075 = will_be_nonconstant_expr_predicate (fbi, summary,
2076 params_summary,
2077 TREE_OPERAND (expr, 0),
2078 nonconstant_names);
2079 if (p1 == true)
2080 return p1;
2082 ipa_predicate p2
2083 = will_be_nonconstant_expr_predicate (fbi, summary,
2084 params_summary,
2085 TREE_OPERAND (expr, 1),
2086 nonconstant_names);
2087 if (p2 == true)
2088 return p2;
2089 p1 = p1.or_with (summary->conds, p2);
2090 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
2091 params_summary,
2092 TREE_OPERAND (expr, 2),
2093 nonconstant_names);
2094 return p2.or_with (summary->conds, p1);
2096 else if (TREE_CODE (expr) == CALL_EXPR)
2097 return true;
2098 else
2100 debug_tree (expr);
2101 gcc_unreachable ();
2106 /* Return predicate specifying when the STMT might have result that is not
2107 a compile time constant. */
2109 static ipa_predicate
2110 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2111 class ipa_fn_summary *summary,
2112 class ipa_node_params *params_summary,
2113 gimple *stmt,
2114 vec<ipa_predicate> nonconstant_names)
2116 ipa_predicate p = true;
2117 ssa_op_iter iter;
2118 tree use;
2119 tree param_type = NULL_TREE;
2120 ipa_predicate op_non_const;
2121 bool is_load;
2122 int base_index;
2123 struct agg_position_info aggpos;
2125 /* What statements might be optimized away
2126 when their arguments are constant. */
2127 if (gimple_code (stmt) != GIMPLE_ASSIGN
2128 && gimple_code (stmt) != GIMPLE_COND
2129 && gimple_code (stmt) != GIMPLE_SWITCH
2130 && (gimple_code (stmt) != GIMPLE_CALL
2131 || !(gimple_call_flags (stmt) & ECF_CONST)))
2132 return p;
2134 /* Stores will stay anyway. */
2135 if (gimple_store_p (stmt))
2136 return p;
2138 is_load = gimple_assign_load_p (stmt);
2140 /* Loads can be optimized when the value is known. */
2141 if (is_load)
2143 tree op = gimple_assign_rhs1 (stmt);
2144 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
2145 &aggpos))
2146 return p;
2148 else
2149 base_index = -1;
2151 /* See if we understand all operands before we start
2152 adding conditionals. */
2153 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2155 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2156 /* For arguments we can build a condition. */
2157 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2158 continue;
2159 if (TREE_CODE (use) != SSA_NAME)
2160 return p;
2161 /* If we know when operand is constant,
2162 we still can say something useful. */
2163 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2164 continue;
2165 return p;
2168 if (is_load)
2169 op_non_const =
2170 add_condition (summary, params_summary,
2171 base_index, param_type, &aggpos,
2172 ipa_predicate::changed, NULL_TREE);
2173 else
2174 op_non_const = false;
2175 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2177 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2178 int index;
2180 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2182 if (index != base_index)
2183 p = add_condition (summary, params_summary, index,
2184 TREE_TYPE (parm), NULL,
2185 ipa_predicate::changed, NULL_TREE);
2186 else
2187 continue;
2189 else
2190 p = nonconstant_names[SSA_NAME_VERSION (use)];
2191 op_non_const = p.or_with (summary->conds, op_non_const);
2193 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2194 && gimple_op (stmt, 0)
2195 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2196 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2197 = op_non_const;
2198 return op_non_const;
2201 struct record_modified_bb_info
2203 tree op;
2204 bitmap bb_set;
2205 gimple *stmt;
2208 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2209 probability how often it changes between USE_BB.
2210 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2211 is in different loop nest, we can do better.
2212 This is all just estimate. In theory we look for minimal cut separating
2213 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2214 anyway. */
2216 static basic_block
2217 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2219 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2220 if (l && l->header->count < init_bb->count)
2221 return l->header;
2222 return init_bb;
2225 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2226 set except for info->stmt. */
2228 static bool
2229 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2231 struct record_modified_bb_info *info =
2232 (struct record_modified_bb_info *) data;
2233 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2234 return false;
2235 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2236 return false;
2237 bitmap_set_bit (info->bb_set,
2238 SSA_NAME_IS_DEFAULT_DEF (vdef)
2239 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2240 : get_minimal_bb
2241 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2242 gimple_bb (info->stmt))->index);
2243 if (dump_file)
2245 fprintf (dump_file, " Param ");
2246 print_generic_expr (dump_file, info->op, TDF_SLIM);
2247 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2248 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2249 get_minimal_bb
2250 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2251 gimple_bb (info->stmt))->index);
2252 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2254 return false;
2257 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2258 will change since last invocation of STMT.
2260 Value 0 is reserved for compile time invariants.
2261 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2262 ought to be REG_BR_PROB_BASE / estimated_iters. */
2264 static int
2265 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2267 tree op = gimple_call_arg (stmt, i);
2268 basic_block bb = gimple_bb (stmt);
2270 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2271 op = TREE_OPERAND (op, 0);
2273 tree base = get_base_address (op);
2275 /* Global invariants never change. */
2276 if (is_gimple_min_invariant (base))
2277 return 0;
2279 /* We would have to do non-trivial analysis to really work out what
2280 is the probability of value to change (i.e. when init statement
2281 is in a sibling loop of the call).
2283 We do an conservative estimate: when call is executed N times more often
2284 than the statement defining value, we take the frequency 1/N. */
2285 if (TREE_CODE (base) == SSA_NAME)
2287 profile_count init_count;
2289 if (!bb->count.nonzero_p ())
2290 return REG_BR_PROB_BASE;
2292 if (SSA_NAME_IS_DEFAULT_DEF (base))
2293 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2294 else
2295 init_count = get_minimal_bb
2296 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2297 gimple_bb (stmt))->count;
2299 if (init_count < bb->count)
2300 return MAX ((init_count.to_sreal_scale (bb->count)
2301 * REG_BR_PROB_BASE).to_int (), 1);
2302 return REG_BR_PROB_BASE;
2304 else
2306 ao_ref refd;
2307 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2308 struct record_modified_bb_info info;
2309 tree init = ctor_for_folding (base);
2311 if (init != error_mark_node)
2312 return 0;
2313 if (!bb->count.nonzero_p () || fbi->aa_walk_budget == 0)
2314 return REG_BR_PROB_BASE;
2315 if (dump_file)
2317 fprintf (dump_file, " Analyzing param change probability of ");
2318 print_generic_expr (dump_file, op, TDF_SLIM);
2319 fprintf (dump_file, "\n");
2321 ao_ref_init (&refd, op);
2322 info.op = op;
2323 info.stmt = stmt;
2324 info.bb_set = BITMAP_ALLOC (NULL);
2325 int walked
2326 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2327 NULL, NULL, fbi->aa_walk_budget);
2328 if (walked > 0)
2329 fbi->aa_walk_budget -= walked;
2330 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2332 if (walked < 0)
2333 fbi->aa_walk_budget = 0;
2334 if (dump_file)
2336 if (walked < 0)
2337 fprintf (dump_file, " Ran out of AA walking budget.\n");
2338 else
2339 fprintf (dump_file, " Set in same BB as used.\n");
2341 BITMAP_FREE (info.bb_set);
2342 return REG_BR_PROB_BASE;
2345 bitmap_iterator bi;
2346 unsigned index;
2347 /* Lookup the most frequent update of the value and believe that
2348 it dominates all the other; precise analysis here is difficult. */
2349 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2350 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2351 if (dump_file)
2353 fprintf (dump_file, " Set with count ");
2354 max.dump (dump_file);
2355 fprintf (dump_file, " and used with count ");
2356 bb->count.dump (dump_file);
2357 fprintf (dump_file, " freq %f\n",
2358 max.to_sreal_scale (bb->count).to_double ());
2361 BITMAP_FREE (info.bb_set);
2362 if (max < bb->count)
2363 return MAX ((max.to_sreal_scale (bb->count)
2364 * REG_BR_PROB_BASE).to_int (), 1);
2365 return REG_BR_PROB_BASE;
2369 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2370 sub-graph and if the predicate the condition depends on is known. If so,
2371 return true and store the pointer the predicate in *P. */
2373 static bool
2374 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2375 ipa_fn_summary *summary,
2376 class ipa_node_params *params_summary,
2377 basic_block bb,
2378 ipa_predicate *p,
2379 vec<ipa_predicate> nonconstant_names)
2381 edge e;
2382 edge_iterator ei;
2383 basic_block first_bb = NULL;
2385 if (single_pred_p (bb))
2387 *p = false;
2388 return true;
2391 FOR_EACH_EDGE (e, ei, bb->preds)
2393 if (single_succ_p (e->src))
2395 if (!single_pred_p (e->src))
2396 return false;
2397 if (!first_bb)
2398 first_bb = single_pred (e->src);
2399 else if (single_pred (e->src) != first_bb)
2400 return false;
2402 else
2404 if (!first_bb)
2405 first_bb = e->src;
2406 else if (e->src != first_bb)
2407 return false;
2411 if (!first_bb)
2412 return false;
2414 gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (first_bb));
2415 if (!stmt
2416 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2417 return false;
2419 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2420 gimple_cond_lhs (stmt),
2421 nonconstant_names);
2422 if (*p == true)
2423 return false;
2424 else
2425 return true;
2428 /* Given a PHI statement in a function described by inline properties SUMMARY
2429 and *P being the predicate describing whether the selected PHI argument is
2430 known, store a predicate for the result of the PHI statement into
2431 NONCONSTANT_NAMES, if possible. */
2433 static void
2434 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2435 ipa_predicate *p,
2436 vec<ipa_predicate> nonconstant_names)
2438 unsigned i;
2440 for (i = 0; i < gimple_phi_num_args (phi); i++)
2442 tree arg = gimple_phi_arg (phi, i)->def;
2443 if (!is_gimple_min_invariant (arg))
2445 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2446 *p = p->or_with (summary->conds,
2447 nonconstant_names[SSA_NAME_VERSION (arg)]);
2448 if (*p == true)
2449 return;
2453 if (dump_file && (dump_flags & TDF_DETAILS))
2455 fprintf (dump_file, "\t\tphi predicate: ");
2456 p->dump (dump_file, summary->conds);
2458 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2461 /* For a typical usage of __builtin_expect (a<b, 1), we
2462 may introduce an extra relation stmt:
2463 With the builtin, we have
2464 t1 = a <= b;
2465 t2 = (long int) t1;
2466 t3 = __builtin_expect (t2, 1);
2467 if (t3 != 0)
2468 goto ...
2469 Without the builtin, we have
2470 if (a<=b)
2471 goto...
2472 This affects the size/time estimation and may have
2473 an impact on the earlier inlining.
2474 Here find this pattern and fix it up later. */
2476 static gimple *
2477 find_foldable_builtin_expect (basic_block bb)
2479 gimple_stmt_iterator bsi;
2481 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2483 gimple *stmt = gsi_stmt (bsi);
2484 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2485 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2486 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2488 tree var = gimple_call_lhs (stmt);
2489 tree arg = gimple_call_arg (stmt, 0);
2490 use_operand_p use_p;
2491 gimple *use_stmt;
2492 bool match = false;
2493 bool done = false;
2495 if (!var || !arg)
2496 continue;
2497 gcc_assert (TREE_CODE (var) == SSA_NAME);
2499 while (TREE_CODE (arg) == SSA_NAME)
2501 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2502 if (!is_gimple_assign (stmt_tmp))
2503 break;
2504 switch (gimple_assign_rhs_code (stmt_tmp))
2506 case LT_EXPR:
2507 case LE_EXPR:
2508 case GT_EXPR:
2509 case GE_EXPR:
2510 case EQ_EXPR:
2511 case NE_EXPR:
2512 match = true;
2513 done = true;
2514 break;
2515 CASE_CONVERT:
2516 break;
2517 default:
2518 done = true;
2519 break;
2521 if (done)
2522 break;
2523 arg = gimple_assign_rhs1 (stmt_tmp);
2526 if (match && single_imm_use (var, &use_p, &use_stmt)
2527 && gimple_code (use_stmt) == GIMPLE_COND)
2528 return use_stmt;
2531 return NULL;
2534 /* Return true when the basic blocks contains only clobbers followed by RESX.
2535 Such BBs are kept around to make removal of dead stores possible with
2536 presence of EH and will be optimized out by optimize_clobbers later in the
2537 game.
2539 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2540 that can be clobber only, too.. When it is false, the RESX is not necessary
2541 on the end of basic block. */
2543 static bool
2544 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2546 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2547 edge_iterator ei;
2548 edge e;
2550 if (need_eh)
2552 if (gsi_end_p (gsi))
2553 return false;
2554 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2555 return false;
2556 gsi_prev (&gsi);
2558 else if (!single_succ_p (bb))
2559 return false;
2561 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2563 gimple *stmt = gsi_stmt (gsi);
2564 if (is_gimple_debug (stmt))
2565 continue;
2566 if (gimple_clobber_p (stmt))
2567 continue;
2568 if (gimple_code (stmt) == GIMPLE_LABEL)
2569 break;
2570 return false;
2573 /* See if all predecessors are either throws or clobber only BBs. */
2574 FOR_EACH_EDGE (e, ei, bb->preds)
2575 if (!(e->flags & EDGE_EH)
2576 && !clobber_only_eh_bb_p (e->src, false))
2577 return false;
2579 return true;
2582 /* Return true if STMT compute a floating point expression that may be affected
2583 by -ffast-math and similar flags. */
2585 static bool
2586 fp_expression_p (gimple *stmt)
2588 ssa_op_iter i;
2589 tree op;
2591 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2592 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2593 return true;
2594 return false;
2597 /* Return true if T references memory location that is local
2598 for the function (that means, dead after return) or read-only. */
2600 bool
2601 refs_local_or_readonly_memory_p (tree t)
2603 /* Non-escaping memory is fine. */
2604 t = get_base_address (t);
2605 if ((TREE_CODE (t) == MEM_REF
2606 || TREE_CODE (t) == TARGET_MEM_REF))
2607 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2609 /* Automatic variables are fine. */
2610 if (DECL_P (t)
2611 && auto_var_in_fn_p (t, current_function_decl))
2612 return true;
2614 /* Read-only variables are fine. */
2615 if (DECL_P (t) && TREE_READONLY (t))
2616 return true;
2618 return false;
2621 /* Return true if T is a pointer pointing to memory location that is local
2622 for the function (that means, dead after return) or read-only. */
2624 bool
2625 points_to_local_or_readonly_memory_p (tree t)
2627 /* See if memory location is clearly invalid. */
2628 if (integer_zerop (t))
2629 return flag_delete_null_pointer_checks;
2630 if (TREE_CODE (t) == SSA_NAME)
2632 /* For IPA passes we can consinder accesses to return slot local
2633 even if it is not local in the sense that memory is dead by
2634 the end of founction.
2635 The outer function will see a store in the call assignment
2636 and thus this will do right thing for all uses of this
2637 function in the current IPA passes (modref, pure/const discovery
2638 and inlining heuristics). */
2639 if (DECL_RESULT (current_function_decl)
2640 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
2641 && t == ssa_default_def (cfun, DECL_RESULT (current_function_decl)))
2642 return true;
2643 return !ptr_deref_may_alias_global_p (t, false);
2645 if (TREE_CODE (t) == ADDR_EXPR)
2646 return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2647 return false;
2650 /* Return true if T is a pointer pointing to memory location that is possible
2651 sra candidate if all functions it is passed to are inlined. */
2653 static bool
2654 points_to_possible_sra_candidate_p (tree t)
2656 if (TREE_CODE (t) != ADDR_EXPR)
2657 return false;
2659 t = get_base_address (TREE_OPERAND (t, 0));
2661 /* Automatic variables are fine. */
2662 if (DECL_P (t)
2663 && auto_var_in_fn_p (t, current_function_decl))
2664 return true;
2665 return false;
2668 /* Analyze function body for NODE.
2669 EARLY indicates run from early optimization pipeline. */
2671 static void
2672 analyze_function_body (struct cgraph_node *node, bool early)
2674 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2675 /* Estimate static overhead for function prologue/epilogue and alignment. */
2676 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2677 /* Benefits are scaled by probability of elimination that is in range
2678 <0,2>. */
2679 basic_block bb;
2680 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2681 sreal freq;
2682 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2683 ipa_node_params *params_summary
2684 = early ? NULL : ipa_node_params_sum->get (node);
2685 ipa_predicate bb_predicate;
2686 struct ipa_func_body_info fbi;
2687 vec<ipa_predicate> nonconstant_names = vNULL;
2688 int nblocks, n;
2689 int *order;
2690 gimple *fix_builtin_expect_stmt;
2692 gcc_assert (my_function && my_function->cfg);
2693 gcc_assert (cfun == my_function);
2695 memset(&fbi, 0, sizeof(fbi));
2696 vec_free (info->conds);
2697 info->conds = NULL;
2698 info->size_time_table.release ();
2699 info->call_size_time_table.release ();
2701 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2702 so we can produce proper inline hints.
2704 When optimizing and analyzing for early inliner, initialize node params
2705 so we can produce correct BB predicates. */
2707 if (opt_for_fn (node->decl, optimize))
2709 calculate_dominance_info (CDI_DOMINATORS);
2710 calculate_dominance_info (CDI_POST_DOMINATORS);
2711 if (!early)
2712 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2713 else
2715 ipa_check_create_node_params ();
2716 ipa_initialize_node_params (node);
2719 if (ipa_node_params_sum)
2721 fbi.node = node;
2722 fbi.info = ipa_node_params_sum->get (node);
2723 fbi.bb_infos = vNULL;
2724 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2725 fbi.param_count = count_formal_params (node->decl);
2726 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2728 nonconstant_names.safe_grow_cleared
2729 (SSANAMES (my_function)->length (), true);
2733 if (dump_file)
2734 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2735 node->dump_name ());
2737 /* When we run into maximal number of entries, we assign everything to the
2738 constant truth case. Be sure to have it in list. */
2739 bb_predicate = true;
2740 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2742 bb_predicate = ipa_predicate::not_inlined ();
2743 info->account_size_time (opt_for_fn (node->decl,
2744 param_uninlined_function_insns)
2745 * ipa_fn_summary::size_scale,
2746 opt_for_fn (node->decl,
2747 param_uninlined_function_time),
2748 bb_predicate,
2749 bb_predicate);
2751 /* Only look for target information for inlinable functions. */
2752 bool scan_for_target_info =
2753 info->inlinable
2754 && targetm.target_option.need_ipa_fn_target_info (node->decl,
2755 info->target_info);
2757 if (fbi.info)
2758 compute_bb_predicates (&fbi, node, info, params_summary);
2759 const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2760 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2761 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2762 for (n = 0; n < nblocks; n++)
2764 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2765 freq = bb->count.to_sreal_scale (entry_count);
2766 if (clobber_only_eh_bb_p (bb))
2768 if (dump_file && (dump_flags & TDF_DETAILS))
2769 fprintf (dump_file, "\n Ignoring BB %i;"
2770 " it will be optimized away by cleanup_clobbers\n",
2771 bb->index);
2772 continue;
2775 /* TODO: Obviously predicates can be propagated down across CFG. */
2776 if (fbi.info)
2778 if (bb->aux)
2779 bb_predicate = *(ipa_predicate *)bb->aux;
2780 else
2781 bb_predicate = false;
2783 else
2784 bb_predicate = true;
2786 if (dump_file && (dump_flags & TDF_DETAILS))
2788 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2789 bb_predicate.dump (dump_file, info->conds);
2792 if (fbi.info && nonconstant_names.exists ())
2794 ipa_predicate phi_predicate;
2795 bool first_phi = true;
2797 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2798 gsi_next (&bsi))
2800 if (first_phi
2801 && !phi_result_unknown_predicate (&fbi, info,
2802 params_summary,
2804 &phi_predicate,
2805 nonconstant_names))
2806 break;
2807 first_phi = false;
2808 if (dump_file && (dump_flags & TDF_DETAILS))
2810 fprintf (dump_file, " ");
2811 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2813 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2814 nonconstant_names);
2818 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2820 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2821 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2823 gimple *stmt = gsi_stmt (bsi);
2824 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2825 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2826 int prob;
2827 ipa_predicate will_be_nonconstant;
2829 /* This relation stmt should be folded after we remove
2830 __builtin_expect call. Adjust the cost here. */
2831 if (stmt == fix_builtin_expect_stmt)
2833 this_size--;
2834 this_time--;
2837 if (dump_file && (dump_flags & TDF_DETAILS))
2839 fprintf (dump_file, " ");
2840 print_gimple_stmt (dump_file, stmt, 0);
2841 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2842 freq.to_double (), this_size,
2843 this_time);
2846 if (is_gimple_call (stmt)
2847 && !gimple_call_internal_p (stmt))
2849 struct cgraph_edge *edge = node->get_edge (stmt);
2850 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2852 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2853 resolved as constant. We however don't want to optimize
2854 out the cgraph edges. */
2855 if (nonconstant_names.exists ()
2856 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2857 && gimple_call_lhs (stmt)
2858 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2860 ipa_predicate false_p = false;
2861 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2862 = false_p;
2864 if (ipa_node_params_sum)
2866 int count = gimple_call_num_args (stmt);
2867 int i;
2869 if (count)
2870 es->param.safe_grow_cleared (count, true);
2871 for (i = 0; i < count; i++)
2873 int prob = param_change_prob (&fbi, stmt, i);
2874 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2875 es->param[i].change_prob = prob;
2876 es->param[i].points_to_local_or_readonly_memory
2877 = points_to_local_or_readonly_memory_p
2878 (gimple_call_arg (stmt, i));
2879 es->param[i].points_to_possible_sra_candidate
2880 = points_to_possible_sra_candidate_p
2881 (gimple_call_arg (stmt, i));
2884 /* We cannot setup VLA parameters during inlining. */
2885 for (unsigned int i = 0; i < gimple_call_num_args (stmt); ++i)
2886 if (TREE_CODE (gimple_call_arg (stmt, i)) == WITH_SIZE_EXPR)
2888 edge->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
2889 break;
2891 es->call_stmt_size = this_size;
2892 es->call_stmt_time = this_time;
2893 es->loop_depth = bb_loop_depth (bb);
2894 edge_set_predicate (edge, &bb_predicate);
2895 if (edge->speculative)
2897 cgraph_edge *indirect
2898 = edge->speculative_call_indirect_edge ();
2899 ipa_call_summary *es2
2900 = ipa_call_summaries->get_create (indirect);
2901 ipa_call_summaries->duplicate (edge, indirect,
2902 es, es2);
2904 /* Edge is the first direct call.
2905 create and duplicate call summaries for multiple
2906 speculative call targets. */
2907 for (cgraph_edge *direct
2908 = edge->next_speculative_call_target ();
2909 direct;
2910 direct = direct->next_speculative_call_target ())
2912 ipa_call_summary *es3
2913 = ipa_call_summaries->get_create (direct);
2914 ipa_call_summaries->duplicate (edge, direct,
2915 es, es3);
2920 /* TODO: When conditional jump or switch is known to be constant, but
2921 we did not translate it into the predicates, we really can account
2922 just maximum of the possible paths. */
2923 if (fbi.info)
2924 will_be_nonconstant
2925 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2926 stmt, nonconstant_names);
2927 else
2928 will_be_nonconstant = true;
2929 if (this_time || this_size)
2931 sreal final_time = (sreal)this_time * freq;
2932 prob = eliminated_by_inlining_prob (&fbi, stmt);
2933 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2934 fprintf (dump_file,
2935 "\t\t50%% will be eliminated by inlining\n");
2936 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2937 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2939 ipa_predicate p = bb_predicate & will_be_nonconstant;
2940 int parm = load_or_store_of_ptr_parameter (&fbi, stmt);
2941 ipa_predicate sra_predicate = true;
2942 if (parm != -1)
2943 sra_predicate &= add_condition (info, params_summary, parm,
2944 ptr_type_node, NULL,
2945 ipa_predicate::not_sra_candidate, NULL, 0);
2947 /* We can ignore statement when we proved it is never going
2948 to happen, but we cannot do that for call statements
2949 because edges are accounted specially. */
2951 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2953 time += final_time;
2954 size += this_size;
2957 /* We account everything but the calls. Calls have their own
2958 size/time info attached to cgraph edges. This is necessary
2959 in order to make the cost disappear after inlining. */
2960 if (!is_gimple_call (stmt))
2962 if (prob)
2964 ipa_predicate ip
2965 = bb_predicate & ipa_predicate::not_inlined () & sra_predicate;
2966 info->account_size_time (this_size * prob,
2967 (final_time * prob) / 2, ip,
2970 if (prob != 2)
2971 info->account_size_time (this_size * (2 - prob),
2972 (final_time * (2 - prob) / 2),
2973 bb_predicate & sra_predicate,
2977 if (!info->fp_expressions && fp_expression_p (stmt))
2979 info->fp_expressions = true;
2980 if (dump_file)
2981 fprintf (dump_file, " fp_expression set\n");
2985 /* For target specific information, we want to scan all statements
2986 rather than those statements with non-zero weights, to avoid
2987 missing to scan something interesting for target information,
2988 such as: internal function calls. */
2989 if (scan_for_target_info)
2990 scan_for_target_info =
2991 targetm.target_option.update_ipa_fn_target_info
2992 (info->target_info, stmt);
2994 /* Account cost of address calculations in the statements. */
2995 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2997 for (tree op = gimple_op (stmt, i);
2998 op && handled_component_p (op);
2999 op = TREE_OPERAND (op, 0))
3000 if ((TREE_CODE (op) == ARRAY_REF
3001 || TREE_CODE (op) == ARRAY_RANGE_REF)
3002 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
3004 ipa_predicate p = bb_predicate;
3005 if (fbi.info)
3006 p = p & will_be_nonconstant_expr_predicate
3007 (&fbi, info, params_summary,
3008 TREE_OPERAND (op, 1),
3009 nonconstant_names);
3010 if (p != false)
3012 time += freq;
3013 size += 1;
3014 if (dump_file)
3015 fprintf (dump_file,
3016 "\t\tAccounting address calculation.\n");
3017 info->account_size_time (ipa_fn_summary::size_scale,
3018 freq,
3019 bb_predicate,
3027 free (order);
3029 if (nonconstant_names.exists () && !early)
3031 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3032 unsigned max_loop_predicates = opt_for_fn (node->decl,
3033 param_ipa_max_loop_predicates);
3035 if (dump_file && (dump_flags & TDF_DETAILS))
3036 flow_loops_dump (dump_file, NULL, 0);
3037 scev_initialize ();
3038 for (auto loop : loops_list (cfun, 0))
3040 ipa_predicate loop_iterations = true;
3041 sreal header_freq;
3042 edge ex;
3043 unsigned int j;
3044 class tree_niter_desc niter_desc;
3045 if (!loop->header->aux)
3046 continue;
3048 profile_count phdr_count = loop_preheader_edge (loop)->count ();
3049 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
3051 bb_predicate = *(ipa_predicate *)loop->header->aux;
3052 auto_vec<edge> exits = get_loop_exit_edges (loop);
3053 FOR_EACH_VEC_ELT (exits, j, ex)
3054 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
3055 && !is_gimple_min_invariant (niter_desc.niter))
3057 ipa_predicate will_be_nonconstant
3058 = will_be_nonconstant_expr_predicate (&fbi, info,
3059 params_summary,
3060 niter_desc.niter,
3061 nonconstant_names);
3062 if (will_be_nonconstant != true)
3063 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3064 if (will_be_nonconstant != true
3065 && will_be_nonconstant != false)
3066 loop_iterations &= will_be_nonconstant;
3068 add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
3069 phdr_freq, max_loop_predicates);
3072 /* To avoid quadratic behavior we analyze stride predicates only
3073 with respect to the containing loop. Thus we simply iterate
3074 over all defs in the outermost loop body. */
3075 for (class loop *loop = loops_for_fn (cfun)->tree_root->inner;
3076 loop != NULL; loop = loop->next)
3078 ipa_predicate loop_stride = true;
3079 basic_block *body = get_loop_body (loop);
3080 profile_count phdr_count = loop_preheader_edge (loop)->count ();
3081 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
3082 for (unsigned i = 0; i < loop->num_nodes; i++)
3084 gimple_stmt_iterator gsi;
3085 if (!body[i]->aux)
3086 continue;
3088 bb_predicate = *(ipa_predicate *)body[i]->aux;
3089 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
3090 gsi_next (&gsi))
3092 gimple *stmt = gsi_stmt (gsi);
3094 if (!is_gimple_assign (stmt))
3095 continue;
3097 tree def = gimple_assign_lhs (stmt);
3098 if (TREE_CODE (def) != SSA_NAME)
3099 continue;
3101 affine_iv iv;
3102 if (!simple_iv (loop_containing_stmt (stmt),
3103 loop_containing_stmt (stmt),
3104 def, &iv, true)
3105 || is_gimple_min_invariant (iv.step))
3106 continue;
3108 ipa_predicate will_be_nonconstant
3109 = will_be_nonconstant_expr_predicate (&fbi, info,
3110 params_summary,
3111 iv.step,
3112 nonconstant_names);
3113 if (will_be_nonconstant != true)
3114 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3115 if (will_be_nonconstant != true
3116 && will_be_nonconstant != false)
3117 loop_stride = loop_stride & will_be_nonconstant;
3120 add_freqcounting_predicate (&s->loop_strides, loop_stride,
3121 phdr_freq, max_loop_predicates);
3122 free (body);
3124 scev_finalize ();
3126 FOR_ALL_BB_FN (bb, my_function)
3128 edge e;
3129 edge_iterator ei;
3131 if (bb->aux)
3132 edge_predicate_pool.remove ((ipa_predicate *)bb->aux);
3133 bb->aux = NULL;
3134 FOR_EACH_EDGE (e, ei, bb->succs)
3136 if (e->aux)
3137 edge_predicate_pool.remove ((ipa_predicate *)e->aux);
3138 e->aux = NULL;
3141 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3142 ipa_size_summary *ss = ipa_size_summaries->get (node);
3143 s->time = time;
3144 ss->self_size = size;
3145 nonconstant_names.release ();
3146 ipa_release_body_info (&fbi);
3147 if (opt_for_fn (node->decl, optimize))
3149 if (!early)
3150 loop_optimizer_finalize ();
3151 else if (!ipa_edge_args_sum)
3152 ipa_free_all_node_params ();
3153 free_dominance_info (CDI_DOMINATORS);
3154 free_dominance_info (CDI_POST_DOMINATORS);
3156 if (dump_file)
3158 fprintf (dump_file, "\n");
3159 ipa_dump_fn_summary (dump_file, node);
3164 /* Compute function summary.
3165 EARLY is true when we compute parameters during early opts. */
3167 void
3168 compute_fn_summary (struct cgraph_node *node, bool early)
3170 HOST_WIDE_INT self_stack_size;
3171 struct cgraph_edge *e;
3173 gcc_assert (!node->inlined_to);
3175 if (!ipa_fn_summaries)
3176 ipa_fn_summary_alloc ();
3178 /* Create a new ipa_fn_summary. */
3179 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
3180 ipa_fn_summaries->remove (node);
3181 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3182 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
3184 /* Estimate the stack size for the function if we're optimizing. */
3185 self_stack_size = optimize && !node->thunk
3186 ? estimated_stack_frame_size (node) : 0;
3187 size_info->estimated_self_stack_size = self_stack_size;
3188 info->estimated_stack_size = self_stack_size;
3190 if (node->thunk)
3192 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
3193 ipa_predicate t = true;
3195 node->can_change_signature = false;
3196 es->call_stmt_size = eni_size_weights.call_cost;
3197 es->call_stmt_time = eni_time_weights.call_cost;
3198 info->account_size_time (ipa_fn_summary::size_scale
3199 * opt_for_fn (node->decl,
3200 param_uninlined_function_thunk_insns),
3201 opt_for_fn (node->decl,
3202 param_uninlined_function_thunk_time), t, t);
3203 t = ipa_predicate::not_inlined ();
3204 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
3205 ipa_update_overall_fn_summary (node);
3206 size_info->self_size = size_info->size;
3207 if (stdarg_p (TREE_TYPE (node->decl)))
3209 info->inlinable = false;
3210 node->callees->inline_failed = CIF_VARIADIC_THUNK;
3212 else
3213 info->inlinable = true;
3215 else
3217 /* Even is_gimple_min_invariant rely on current_function_decl. */
3218 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3220 /* During IPA profile merging we may be called w/o virtual SSA form
3221 built. */
3222 update_ssa (TODO_update_ssa_only_virtuals);
3224 /* Can this function be inlined at all? */
3225 if (!opt_for_fn (node->decl, optimize)
3226 && !lookup_attribute ("always_inline",
3227 DECL_ATTRIBUTES (node->decl)))
3228 info->inlinable = false;
3229 else
3230 info->inlinable = tree_inlinable_function_p (node->decl);
3232 bool no_signature = false;
3233 /* Type attributes can use parameter indices to describe them.
3234 Special case fn spec since we can safely preserve them in
3235 modref summaries. */
3236 for (tree list = TYPE_ATTRIBUTES (TREE_TYPE (node->decl));
3237 list && !no_signature; list = TREE_CHAIN (list))
3238 if (!ipa_param_adjustments::type_attribute_allowed_p
3239 (get_attribute_name (list)))
3241 if (dump_file)
3243 fprintf (dump_file, "No signature change:"
3244 " function type has unhandled attribute %s.\n",
3245 IDENTIFIER_POINTER (get_attribute_name (list)));
3247 no_signature = true;
3249 for (tree parm = DECL_ARGUMENTS (node->decl);
3250 parm && !no_signature; parm = DECL_CHAIN (parm))
3251 if (variably_modified_type_p (TREE_TYPE (parm), node->decl))
3253 if (dump_file)
3255 fprintf (dump_file, "No signature change:"
3256 " has parameter with variably modified type.\n");
3258 no_signature = true;
3261 /* Likewise for #pragma omp declare simd functions or functions
3262 with simd attribute. */
3263 if (no_signature
3264 || lookup_attribute ("omp declare simd",
3265 DECL_ATTRIBUTES (node->decl)))
3266 node->can_change_signature = false;
3267 else
3269 /* Otherwise, inlinable functions always can change signature. */
3270 if (info->inlinable)
3271 node->can_change_signature = true;
3272 else
3274 /* Functions calling builtin_apply cannot change signature. */
3275 for (e = node->callees; e; e = e->next_callee)
3277 tree cdecl = e->callee->decl;
3278 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS,
3279 BUILT_IN_VA_START))
3280 break;
3282 node->can_change_signature = !e;
3285 analyze_function_body (node, early);
3286 pop_cfun ();
3289 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3290 size_info->size = size_info->self_size;
3291 info->estimated_stack_size = size_info->estimated_self_stack_size;
3293 /* Code above should compute exactly the same result as
3294 ipa_update_overall_fn_summary except for case when speculative
3295 edges are present since these are accounted to size but not
3296 self_size. Do not compare time since different order the roundoff
3297 errors result in slight changes. */
3298 ipa_update_overall_fn_summary (node);
3299 if (flag_checking)
3301 for (e = node->indirect_calls; e; e = e->next_callee)
3302 if (e->speculative)
3303 break;
3304 gcc_assert (e || size_info->size == size_info->self_size);
3309 /* Compute parameters of functions used by inliner using
3310 current_function_decl. */
3312 static unsigned int
3313 compute_fn_summary_for_current (void)
3315 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3316 return 0;
3319 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3320 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3321 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3323 static bool
3324 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3325 int *size, int *time,
3326 ipa_call_arg_values *avals)
3328 tree target;
3329 struct cgraph_node *callee;
3330 class ipa_fn_summary *isummary;
3331 enum availability avail;
3332 bool speculative;
3334 if (!avals
3335 || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ()))
3336 return false;
3337 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3338 return false;
3340 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3341 if (!target || speculative)
3342 return false;
3344 /* Account for difference in cost between indirect and direct calls. */
3345 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3346 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3347 gcc_checking_assert (*time >= 0);
3348 gcc_checking_assert (*size >= 0);
3350 callee = cgraph_node::get (target);
3351 if (!callee || !callee->definition)
3352 return false;
3353 callee = callee->function_symbol (&avail);
3354 if (avail < AVAIL_AVAILABLE)
3355 return false;
3356 isummary = ipa_fn_summaries->get (callee);
3357 if (isummary == NULL)
3358 return false;
3360 return isummary->inlinable;
3363 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3364 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3365 devirtualized. AVALS, if non-NULL, describes the context of the call site
3366 as far as values of parameters are concerened. */
3368 static inline void
3369 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3370 sreal *time, ipa_call_arg_values *avals,
3371 ipa_hints *hints)
3373 class ipa_call_summary *es = ipa_call_summaries->get (e);
3374 int call_size = es->call_stmt_size;
3375 int call_time = es->call_stmt_time;
3376 int cur_size;
3378 if (!e->callee && hints && e->maybe_hot_p ()
3379 && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals))
3380 *hints |= INLINE_HINT_indirect_call;
3381 cur_size = call_size * ipa_fn_summary::size_scale;
3382 *size += cur_size;
3383 if (min_size)
3384 *min_size += cur_size;
3385 if (time)
3386 *time += ((sreal)call_time) * e->sreal_frequency ();
3390 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3391 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3392 site.
3394 Helper for estimate_calls_size_and_time which does the same but
3395 (in most cases) faster. */
3397 static void
3398 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3399 int *min_size, sreal *time,
3400 ipa_hints *hints,
3401 clause_t possible_truths,
3402 ipa_call_arg_values *avals)
3404 struct cgraph_edge *e;
3405 for (e = node->callees; e; e = e->next_callee)
3407 if (!e->inline_failed)
3409 gcc_checking_assert (!ipa_call_summaries->get (e));
3410 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3411 hints, possible_truths, avals);
3413 continue;
3415 class ipa_call_summary *es = ipa_call_summaries->get (e);
3417 /* Do not care about zero sized builtins. */
3418 if (!es->call_stmt_size)
3420 gcc_checking_assert (!es->call_stmt_time);
3421 continue;
3423 if (!es->predicate
3424 || es->predicate->evaluate (possible_truths))
3426 /* Predicates of calls shall not use NOT_CHANGED codes,
3427 so we do not need to compute probabilities. */
3428 estimate_edge_size_and_time (e, size,
3429 es->predicate ? NULL : min_size,
3430 time, avals, hints);
3433 for (e = node->indirect_calls; e; e = e->next_callee)
3435 class ipa_call_summary *es = ipa_call_summaries->get (e);
3436 if (!es->predicate
3437 || es->predicate->evaluate (possible_truths))
3438 estimate_edge_size_and_time (e, size,
3439 es->predicate ? NULL : min_size,
3440 time, avals, hints);
3444 /* Populate sum->call_size_time_table for edges from NODE. */
3446 static void
3447 summarize_calls_size_and_time (struct cgraph_node *node,
3448 ipa_fn_summary *sum)
3450 struct cgraph_edge *e;
3451 for (e = node->callees; e; e = e->next_callee)
3453 if (!e->inline_failed)
3455 gcc_checking_assert (!ipa_call_summaries->get (e));
3456 summarize_calls_size_and_time (e->callee, sum);
3457 continue;
3459 int size = 0;
3460 sreal time = 0;
3462 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3464 ipa_predicate pred = true;
3465 class ipa_call_summary *es = ipa_call_summaries->get (e);
3467 if (es->predicate)
3468 pred = *es->predicate;
3469 sum->account_size_time (size, time, pred, pred, true);
3471 for (e = node->indirect_calls; e; e = e->next_callee)
3473 int size = 0;
3474 sreal time = 0;
3476 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3477 ipa_predicate pred = true;
3478 class ipa_call_summary *es = ipa_call_summaries->get (e);
3480 if (es->predicate)
3481 pred = *es->predicate;
3482 sum->account_size_time (size, time, pred, pred, true);
3486 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3487 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3488 context of the call site. */
3490 static void
3491 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3492 int *min_size, sreal *time,
3493 ipa_hints *hints,
3494 clause_t possible_truths,
3495 ipa_call_arg_values *avals)
3497 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3498 bool use_table = true;
3500 gcc_assert (node->callees || node->indirect_calls);
3502 /* During early inlining we do not calculate info for very
3503 large functions and thus there is no need for producing
3504 summaries. */
3505 if (!ipa_node_params_sum)
3506 use_table = false;
3507 /* Do not calculate summaries for simple wrappers; it is waste
3508 of memory. */
3509 else if (node->callees && node->indirect_calls
3510 && node->callees->inline_failed && !node->callees->next_callee)
3511 use_table = false;
3512 /* If there is an indirect edge that may be optimized, we need
3513 to go the slow way. */
3514 else if (avals && hints
3515 && (avals->m_known_vals.length ()
3516 || avals->m_known_contexts.length ()
3517 || avals->m_known_aggs.length ()))
3519 ipa_node_params *params_summary = ipa_node_params_sum->get (node);
3520 unsigned int nargs = params_summary
3521 ? ipa_get_param_count (params_summary) : 0;
3523 for (unsigned int i = 0; i < nargs && use_table; i++)
3525 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3526 && (avals->safe_sval_at (i)
3527 || (ipa_argagg_value_list (avals).value_for_index_p (i))))
3528 use_table = false;
3529 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3530 && (avals->m_known_contexts.length () > i
3531 && !avals->m_known_contexts[i].useless_p ()))
3532 use_table = false;
3536 /* Fast path is via the call size time table. */
3537 if (use_table)
3539 /* Build summary if it is absent. */
3540 if (!sum->call_size_time_table.length ())
3542 ipa_predicate true_pred = true;
3543 sum->account_size_time (0, 0, true_pred, true_pred, true);
3544 summarize_calls_size_and_time (node, sum);
3547 int old_size = *size;
3548 sreal old_time = time ? *time : 0;
3550 if (min_size)
3551 *min_size += sum->call_size_time_table[0].size;
3553 unsigned int i;
3554 size_time_entry *e;
3556 /* Walk the table and account sizes and times. */
3557 for (i = 0; sum->call_size_time_table.iterate (i, &e);
3558 i++)
3559 if (e->exec_predicate.evaluate (possible_truths))
3561 *size += e->size;
3562 if (time)
3563 *time += e->time;
3566 /* Be careful and see if both methods agree. */
3567 if ((flag_checking || dump_file)
3568 /* Do not try to sanity check when we know we lost some
3569 precision. */
3570 && sum->call_size_time_table.length ()
3571 < ipa_fn_summary::max_size_time_table_size)
3573 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3574 possible_truths, avals);
3575 gcc_assert (*size == old_size);
3576 if (time && (*time - old_time > 1 || *time - old_time < -1)
3577 && dump_file)
3578 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3579 old_time.to_double (),
3580 time->to_double ());
3583 /* Slow path by walking all edges. */
3584 else
3585 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3586 possible_truths, avals);
3589 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3590 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3591 caller. */
3593 ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths,
3594 clause_t nonspec_possible_truths,
3595 vec<inline_param_summary>
3596 inline_param_summary,
3597 ipa_auto_call_arg_values *arg_values)
3598 : m_node (node), m_possible_truths (possible_truths),
3599 m_nonspec_possible_truths (nonspec_possible_truths),
3600 m_inline_param_summary (inline_param_summary),
3601 m_avals (arg_values)
3605 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3607 void
3608 ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx)
3610 m_node = ctx.m_node;
3611 m_possible_truths = ctx.m_possible_truths;
3612 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3613 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3614 unsigned int nargs = params_summary
3615 ? ipa_get_param_count (params_summary) : 0;
3617 m_inline_param_summary = vNULL;
3618 /* Copy the info only if there is at least one useful entry. */
3619 if (ctx.m_inline_param_summary.exists ())
3621 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3623 for (unsigned int i = 0; i < n; i++)
3624 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3625 && !ctx.m_inline_param_summary[i].useless_p ())
3627 m_inline_param_summary
3628 = ctx.m_inline_param_summary.copy ();
3629 break;
3632 m_avals.m_known_vals = vNULL;
3633 if (ctx.m_avals.m_known_vals.exists ())
3635 unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs);
3637 for (unsigned int i = 0; i < n; i++)
3638 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3639 && ctx.m_avals.m_known_vals[i])
3641 m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy ();
3642 break;
3646 m_avals.m_known_contexts = vNULL;
3647 if (ctx.m_avals.m_known_contexts.exists ())
3649 unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs);
3651 for (unsigned int i = 0; i < n; i++)
3652 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3653 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3655 m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy ();
3656 break;
3660 m_avals.m_known_aggs = vNULL;
3661 if (ctx.m_avals.m_known_aggs.exists ())
3663 const ipa_argagg_value_list avl (&ctx.m_avals);
3664 for (unsigned int i = 0; i < nargs; i++)
3665 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3666 && avl.value_for_index_p (i))
3668 m_avals.m_known_aggs = ctx.m_avals.m_known_aggs.copy ();
3669 break;
3673 m_avals.m_known_value_ranges = vNULL;
3676 /* Release memory used by known_vals/contexts/aggs vectors. and
3677 inline_param_summary. */
3679 void
3680 ipa_cached_call_context::release ()
3682 /* See if context is initialized at first place. */
3683 if (!m_node)
3684 return;
3685 m_avals.m_known_aggs.release ();
3686 m_avals.m_known_vals.release ();
3687 m_avals.m_known_contexts.release ();
3688 m_inline_param_summary.release ();
3691 /* Return true if CTX describes the same call context as THIS. */
3693 bool
3694 ipa_call_context::equal_to (const ipa_call_context &ctx)
3696 if (m_node != ctx.m_node
3697 || m_possible_truths != ctx.m_possible_truths
3698 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3699 return false;
3701 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3702 unsigned int nargs = params_summary
3703 ? ipa_get_param_count (params_summary) : 0;
3705 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3707 for (unsigned int i = 0; i < nargs; i++)
3709 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3710 continue;
3711 if (i >= m_inline_param_summary.length ()
3712 || m_inline_param_summary[i].useless_p ())
3714 if (i < ctx.m_inline_param_summary.length ()
3715 && !ctx.m_inline_param_summary[i].useless_p ())
3716 return false;
3717 continue;
3719 if (i >= ctx.m_inline_param_summary.length ()
3720 || ctx.m_inline_param_summary[i].useless_p ())
3722 if (i < m_inline_param_summary.length ()
3723 && !m_inline_param_summary[i].useless_p ())
3724 return false;
3725 continue;
3727 if (!m_inline_param_summary[i].equal_to
3728 (ctx.m_inline_param_summary[i]))
3729 return false;
3732 if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ())
3734 for (unsigned int i = 0; i < nargs; i++)
3736 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3737 continue;
3738 if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i])
3740 if (i < ctx.m_avals.m_known_vals.length ()
3741 && ctx.m_avals.m_known_vals[i])
3742 return false;
3743 continue;
3745 if (i >= ctx.m_avals.m_known_vals.length ()
3746 || !ctx.m_avals.m_known_vals[i])
3748 if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i])
3749 return false;
3750 continue;
3752 if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i])
3753 return false;
3756 if (m_avals.m_known_contexts.exists ()
3757 || ctx.m_avals.m_known_contexts.exists ())
3759 for (unsigned int i = 0; i < nargs; i++)
3761 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3762 continue;
3763 if (i >= m_avals.m_known_contexts.length ()
3764 || m_avals.m_known_contexts[i].useless_p ())
3766 if (i < ctx.m_avals.m_known_contexts.length ()
3767 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3768 return false;
3769 continue;
3771 if (i >= ctx.m_avals.m_known_contexts.length ()
3772 || ctx.m_avals.m_known_contexts[i].useless_p ())
3774 if (i < m_avals.m_known_contexts.length ()
3775 && !m_avals.m_known_contexts[i].useless_p ())
3776 return false;
3777 continue;
3779 if (!m_avals.m_known_contexts[i].equal_to
3780 (ctx.m_avals.m_known_contexts[i]))
3781 return false;
3784 if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ())
3786 unsigned i = 0, j = 0;
3787 while (i < m_avals.m_known_aggs.length ()
3788 || j < ctx.m_avals.m_known_aggs.length ())
3790 if (i >= m_avals.m_known_aggs.length ())
3792 int idx2 = ctx.m_avals.m_known_aggs[j].index;
3793 if (ipa_is_param_used_by_indirect_call (params_summary, idx2))
3794 return false;
3795 j++;
3796 continue;
3798 if (j >= ctx.m_avals.m_known_aggs.length ())
3800 int idx1 = m_avals.m_known_aggs[i].index;
3801 if (ipa_is_param_used_by_indirect_call (params_summary, idx1))
3802 return false;
3803 i++;
3804 continue;
3807 int idx1 = m_avals.m_known_aggs[i].index;
3808 int idx2 = ctx.m_avals.m_known_aggs[j].index;
3809 if (idx1 < idx2)
3811 if (ipa_is_param_used_by_indirect_call (params_summary, idx1))
3812 return false;
3813 i++;
3814 continue;
3816 if (idx1 > idx2)
3818 if (ipa_is_param_used_by_indirect_call (params_summary, idx2))
3819 return false;
3820 j++;
3821 continue;
3823 if (!ipa_is_param_used_by_indirect_call (params_summary, idx1))
3825 i++;
3826 j++;
3827 continue;
3830 if ((m_avals.m_known_aggs[i].unit_offset
3831 != ctx.m_avals.m_known_aggs[j].unit_offset)
3832 || (m_avals.m_known_aggs[i].by_ref
3833 != ctx.m_avals.m_known_aggs[j].by_ref)
3834 || !operand_equal_p (m_avals.m_known_aggs[i].value,
3835 ctx.m_avals.m_known_aggs[j].value))
3836 return false;
3837 i++;
3838 j++;
3841 return true;
3844 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3845 this context. Always compute size and min_size. Only compute time and
3846 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3847 is true. */
3849 void
3850 ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
3851 bool est_times, bool est_hints)
3853 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3854 size_time_entry *e;
3855 int size = 0;
3856 sreal time = 0;
3857 int min_size = 0;
3858 ipa_hints hints = 0;
3859 sreal loops_with_known_iterations = 0;
3860 sreal loops_with_known_strides = 0;
3861 int i;
3863 if (dump_file && (dump_flags & TDF_DETAILS))
3865 bool found = false;
3866 fprintf (dump_file, " Estimating body: %s\n"
3867 " Known to be false: ", m_node->dump_name ());
3869 for (i = ipa_predicate::not_inlined_condition;
3870 i < (ipa_predicate::first_dynamic_condition
3871 + (int) vec_safe_length (info->conds)); i++)
3872 if (!(m_possible_truths & (1 << i)))
3874 if (found)
3875 fprintf (dump_file, ", ");
3876 found = true;
3877 dump_condition (dump_file, info->conds, i);
3881 if (m_node->callees || m_node->indirect_calls)
3882 estimate_calls_size_and_time (m_node, &size, &min_size,
3883 est_times ? &time : NULL,
3884 est_hints ? &hints : NULL, m_possible_truths,
3885 &m_avals);
3887 sreal nonspecialized_time = time;
3889 min_size += info->size_time_table[0].size;
3890 for (i = 0; info->size_time_table.iterate (i, &e); i++)
3892 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3894 /* Because predicates are conservative, it can happen that nonconst is 1
3895 but exec is 0. */
3896 if (exec)
3898 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3900 gcc_checking_assert (e->time >= 0);
3901 gcc_checking_assert (time >= 0);
3903 /* We compute specialized size only because size of nonspecialized
3904 copy is context independent.
3906 The difference between nonspecialized execution and specialized is
3907 that nonspecialized is not going to have optimized out computations
3908 known to be constant in a specialized setting. */
3909 if (nonconst)
3910 size += e->size;
3911 if (!est_times)
3912 continue;
3913 nonspecialized_time += e->time;
3914 if (!nonconst)
3916 else if (!m_inline_param_summary.exists ())
3918 if (nonconst)
3919 time += e->time;
3921 else
3923 int prob = e->nonconst_predicate.probability
3924 (info->conds, m_possible_truths,
3925 m_inline_param_summary);
3926 gcc_checking_assert (prob >= 0);
3927 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3928 if (prob == REG_BR_PROB_BASE)
3929 time += e->time;
3930 else
3931 time += e->time * prob / REG_BR_PROB_BASE;
3933 gcc_checking_assert (time >= 0);
3936 gcc_checking_assert (info->size_time_table[0].exec_predicate == true);
3937 gcc_checking_assert (info->size_time_table[0].nonconst_predicate == true);
3938 gcc_checking_assert (min_size >= 0);
3939 gcc_checking_assert (size >= 0);
3940 gcc_checking_assert (time >= 0);
3941 /* nonspecialized_time should be always bigger than specialized time.
3942 Roundoff issues however may get into the way. */
3943 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3945 /* Roundoff issues may make specialized time bigger than nonspecialized
3946 time. We do not really want that to happen because some heuristics
3947 may get confused by seeing negative speedups. */
3948 if (time > nonspecialized_time)
3949 time = nonspecialized_time;
3951 if (est_hints)
3953 if (info->scc_no)
3954 hints |= INLINE_HINT_in_scc;
3955 if (DECL_DECLARED_INLINE_P (m_node->decl))
3956 hints |= INLINE_HINT_declared_inline;
3957 if (info->builtin_constant_p_parms.length ()
3958 && DECL_DECLARED_INLINE_P (m_node->decl))
3959 hints |= INLINE_HINT_builtin_constant_p;
3961 ipa_freqcounting_predicate *fcp;
3962 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
3963 if (!fcp->predicate->evaluate (m_possible_truths))
3965 hints |= INLINE_HINT_loop_iterations;
3966 loops_with_known_iterations += fcp->freq;
3968 estimates->loops_with_known_iterations = loops_with_known_iterations;
3970 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
3971 if (!fcp->predicate->evaluate (m_possible_truths))
3973 hints |= INLINE_HINT_loop_stride;
3974 loops_with_known_strides += fcp->freq;
3976 estimates->loops_with_known_strides = loops_with_known_strides;
3979 size = RDIV (size, ipa_fn_summary::size_scale);
3980 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3982 if (dump_file && (dump_flags & TDF_DETAILS))
3984 fprintf (dump_file, "\n size:%i", (int) size);
3985 if (est_times)
3986 fprintf (dump_file, " time:%f nonspec time:%f",
3987 time.to_double (), nonspecialized_time.to_double ());
3988 if (est_hints)
3989 fprintf (dump_file, " loops with known iterations:%f "
3990 "known strides:%f", loops_with_known_iterations.to_double (),
3991 loops_with_known_strides.to_double ());
3992 fprintf (dump_file, "\n");
3994 if (est_times)
3996 estimates->time = time;
3997 estimates->nonspecialized_time = nonspecialized_time;
3999 estimates->size = size;
4000 estimates->min_size = min_size;
4001 if (est_hints)
4002 estimates->hints = hints;
4003 return;
4007 /* Estimate size and time needed to execute callee of EDGE assuming that
4008 parameters known to be constant at caller of EDGE are propagated.
4009 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
4010 and types for parameters. */
4012 void
4013 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
4014 ipa_auto_call_arg_values *avals,
4015 ipa_call_estimates *estimates)
4017 clause_t clause, nonspec_clause;
4019 evaluate_conditions_for_known_args (node, false, avals, &clause,
4020 &nonspec_clause, NULL);
4021 ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
4022 ctx.estimate_size_and_time (estimates);
4025 /* Return stack frame offset where frame of NODE is supposed to start inside
4026 of the function it is inlined to.
4027 Return 0 for functions that are not inlined. */
4029 HOST_WIDE_INT
4030 ipa_get_stack_frame_offset (struct cgraph_node *node)
4032 HOST_WIDE_INT offset = 0;
4033 if (!node->inlined_to)
4034 return 0;
4035 node = node->callers->caller;
4036 while (true)
4038 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
4039 if (!node->inlined_to)
4040 return offset;
4041 node = node->callers->caller;
4046 /* Update summary information of inline clones after inlining.
4047 Compute peak stack usage. */
4049 static void
4050 inline_update_callee_summaries (struct cgraph_node *node, int depth)
4052 struct cgraph_edge *e;
4054 ipa_propagate_frequency (node);
4055 for (e = node->callees; e; e = e->next_callee)
4057 if (!e->inline_failed)
4058 inline_update_callee_summaries (e->callee, depth);
4059 else
4060 ipa_call_summaries->get (e)->loop_depth += depth;
4062 for (e = node->indirect_calls; e; e = e->next_callee)
4063 ipa_call_summaries->get (e)->loop_depth += depth;
4066 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
4067 INLINED_EDGE has been inlined.
4069 When function A is inlined in B and A calls C with parameter that
4070 changes with probability PROB1 and C is known to be passthrough
4071 of argument if B that change with probability PROB2, the probability
4072 of change is now PROB1*PROB2. */
4074 static void
4075 remap_edge_params (struct cgraph_edge *inlined_edge,
4076 struct cgraph_edge *edge)
4078 if (ipa_node_params_sum)
4080 int i;
4081 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4082 if (!args)
4083 return;
4084 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4085 class ipa_call_summary *inlined_es
4086 = ipa_call_summaries->get (inlined_edge);
4088 if (es->param.length () == 0)
4089 return;
4091 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
4093 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4094 if (jfunc->type == IPA_JF_PASS_THROUGH
4095 || jfunc->type == IPA_JF_ANCESTOR)
4097 int id = jfunc->type == IPA_JF_PASS_THROUGH
4098 ? ipa_get_jf_pass_through_formal_id (jfunc)
4099 : ipa_get_jf_ancestor_formal_id (jfunc);
4100 if (id < (int) inlined_es->param.length ())
4102 int prob1 = es->param[i].change_prob;
4103 int prob2 = inlined_es->param[id].change_prob;
4104 int prob = combine_probabilities (prob1, prob2);
4106 if (prob1 && prob2 && !prob)
4107 prob = 1;
4109 es->param[i].change_prob = prob;
4111 if (inlined_es
4112 ->param[id].points_to_local_or_readonly_memory)
4113 es->param[i].points_to_local_or_readonly_memory = true;
4114 if (inlined_es
4115 ->param[id].points_to_possible_sra_candidate)
4116 es->param[i].points_to_possible_sra_candidate = true;
4118 if (!es->param[i].points_to_local_or_readonly_memory
4119 && jfunc->type == IPA_JF_CONST
4120 && points_to_local_or_readonly_memory_p
4121 (ipa_get_jf_constant (jfunc)))
4122 es->param[i].points_to_local_or_readonly_memory = true;
4128 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4130 Remap predicates of callees of NODE. Rest of arguments match
4131 remap_predicate.
4133 Also update change probabilities. */
4135 static void
4136 remap_edge_summaries (struct cgraph_edge *inlined_edge,
4137 struct cgraph_node *node,
4138 class ipa_fn_summary *info,
4139 class ipa_node_params *params_summary,
4140 class ipa_fn_summary *callee_info,
4141 const vec<int> &operand_map,
4142 const vec<HOST_WIDE_INT> &offset_map,
4143 clause_t possible_truths,
4144 ipa_predicate *toplev_predicate)
4146 struct cgraph_edge *e, *next;
4147 for (e = node->callees; e; e = next)
4149 ipa_predicate p;
4150 next = e->next_callee;
4152 if (e->inline_failed)
4154 class ipa_call_summary *es = ipa_call_summaries->get (e);
4155 remap_edge_params (inlined_edge, e);
4157 if (es->predicate)
4159 p = es->predicate->remap_after_inlining
4160 (info, params_summary,
4161 callee_info, operand_map,
4162 offset_map, possible_truths,
4163 *toplev_predicate);
4164 edge_set_predicate (e, &p);
4166 else
4167 edge_set_predicate (e, toplev_predicate);
4169 else
4170 remap_edge_summaries (inlined_edge, e->callee, info,
4171 params_summary, callee_info,
4172 operand_map, offset_map, possible_truths,
4173 toplev_predicate);
4175 for (e = node->indirect_calls; e; e = next)
4177 class ipa_call_summary *es = ipa_call_summaries->get (e);
4178 ipa_predicate p;
4179 next = e->next_callee;
4181 remap_edge_params (inlined_edge, e);
4182 if (es->predicate)
4184 p = es->predicate->remap_after_inlining
4185 (info, params_summary,
4186 callee_info, operand_map, offset_map,
4187 possible_truths, *toplev_predicate);
4188 edge_set_predicate (e, &p);
4190 else
4191 edge_set_predicate (e, toplev_predicate);
4195 /* Run remap_after_inlining on each predicate in V. */
4197 static void
4198 remap_freqcounting_predicate (class ipa_fn_summary *info,
4199 class ipa_node_params *params_summary,
4200 class ipa_fn_summary *callee_info,
4201 vec<ipa_freqcounting_predicate, va_gc> *v,
4202 const vec<int> &operand_map,
4203 const vec<HOST_WIDE_INT> &offset_map,
4204 clause_t possible_truths,
4205 ipa_predicate *toplev_predicate)
4208 ipa_freqcounting_predicate *fcp;
4209 for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
4211 ipa_predicate p
4212 = fcp->predicate->remap_after_inlining (info, params_summary,
4213 callee_info, operand_map,
4214 offset_map, possible_truths,
4215 *toplev_predicate);
4216 if (p != false && p != true)
4217 *fcp->predicate &= p;
4221 /* We inlined EDGE. Update summary of the function we inlined into. */
4223 void
4224 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
4226 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
4227 struct cgraph_node *to = (edge->caller->inlined_to
4228 ? edge->caller->inlined_to : edge->caller);
4229 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
4230 clause_t clause = 0; /* not_inline is known to be false. */
4231 size_time_entry *e;
4232 auto_vec<int, 8> operand_map;
4233 auto_vec<HOST_WIDE_INT, 8> offset_map;
4234 int i;
4235 ipa_predicate toplev_predicate;
4236 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4237 ipa_node_params *params_summary = (ipa_node_params_sum
4238 ? ipa_node_params_sum->get (to) : NULL);
4240 if (es->predicate)
4241 toplev_predicate = *es->predicate;
4242 else
4243 toplev_predicate = true;
4245 info->fp_expressions |= callee_info->fp_expressions;
4246 info->target_info |= callee_info->target_info;
4248 if (callee_info->conds)
4250 ipa_auto_call_arg_values avals;
4251 evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false);
4253 if (ipa_node_params_sum && callee_info->conds)
4255 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4256 int count = args ? ipa_get_cs_argument_count (args) : 0;
4257 int i;
4259 if (count)
4261 operand_map.safe_grow_cleared (count, true);
4262 offset_map.safe_grow_cleared (count, true);
4264 for (i = 0; i < count; i++)
4266 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4267 int map = -1;
4269 /* TODO: handle non-NOPs when merging. */
4270 if (jfunc->type == IPA_JF_PASS_THROUGH)
4272 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4273 map = ipa_get_jf_pass_through_formal_id (jfunc);
4274 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
4275 offset_map[i] = -1;
4277 else if (jfunc->type == IPA_JF_ANCESTOR)
4279 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
4280 if (offset >= 0 && offset < INT_MAX)
4282 map = ipa_get_jf_ancestor_formal_id (jfunc);
4283 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
4284 offset = -1;
4285 offset_map[i] = offset;
4288 operand_map[i] = map;
4289 gcc_assert (map < ipa_get_param_count (params_summary));
4292 int ip;
4293 for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++)
4294 if (ip < count && operand_map[ip] >= 0)
4295 add_builtin_constant_p_parm (info, operand_map[ip]);
4297 sreal freq = edge->sreal_frequency ();
4298 for (i = 0; callee_info->size_time_table.iterate (i, &e); i++)
4300 ipa_predicate p;
4301 p = e->exec_predicate.remap_after_inlining
4302 (info, params_summary,
4303 callee_info, operand_map,
4304 offset_map, clause,
4305 toplev_predicate);
4306 ipa_predicate nonconstp;
4307 nonconstp = e->nonconst_predicate.remap_after_inlining
4308 (info, params_summary,
4309 callee_info, operand_map,
4310 offset_map, clause,
4311 toplev_predicate);
4312 if (p != false && nonconstp != false)
4314 sreal add_time = ((sreal)e->time * freq);
4315 int prob = e->nonconst_predicate.probability (callee_info->conds,
4316 clause, es->param);
4317 if (prob != REG_BR_PROB_BASE)
4318 add_time = add_time * prob / REG_BR_PROB_BASE;
4319 if (prob != REG_BR_PROB_BASE
4320 && dump_file && (dump_flags & TDF_DETAILS))
4322 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
4323 (double) prob / REG_BR_PROB_BASE);
4325 info->account_size_time (e->size, add_time, p, nonconstp);
4328 remap_edge_summaries (edge, edge->callee, info, params_summary,
4329 callee_info, operand_map,
4330 offset_map, clause, &toplev_predicate);
4331 remap_freqcounting_predicate (info, params_summary, callee_info,
4332 info->loop_iterations, operand_map,
4333 offset_map, clause, &toplev_predicate);
4334 remap_freqcounting_predicate (info, params_summary, callee_info,
4335 info->loop_strides, operand_map,
4336 offset_map, clause, &toplev_predicate);
4338 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4339 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4341 if (info->estimated_stack_size < peak)
4342 info->estimated_stack_size = peak;
4344 inline_update_callee_summaries (edge->callee, es->loop_depth);
4345 if (info->call_size_time_table.length ())
4347 int edge_size = 0;
4348 sreal edge_time = 0;
4350 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0);
4351 /* Unaccount size and time of the optimized out call. */
4352 info->account_size_time (-edge_size, -edge_time,
4353 es->predicate ? *es->predicate : true,
4354 es->predicate ? *es->predicate : true,
4355 true);
4356 /* Account new calls. */
4357 summarize_calls_size_and_time (edge->callee, info);
4360 /* Free summaries that are not maintained for inline clones/edges. */
4361 ipa_call_summaries->remove (edge);
4362 ipa_fn_summaries->remove (edge->callee);
4363 ipa_remove_from_growth_caches (edge);
4366 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4367 overall size and time. Recompute it.
4368 If RESET is true also recompute call_time_size_table. */
4370 void
4371 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4373 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4374 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4375 size_time_entry *e;
4376 int i;
4378 size_info->size = 0;
4379 info->time = 0;
4380 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4382 size_info->size += e->size;
4383 info->time += e->time;
4385 info->min_size = info->size_time_table[0].size;
4386 if (reset)
4387 info->call_size_time_table.release ();
4388 if (node->callees || node->indirect_calls)
4389 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4390 &info->time, NULL,
4391 ~(clause_t) (1 << ipa_predicate::false_condition),
4392 NULL);
4393 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4394 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4398 /* This function performs intraprocedural analysis in NODE that is required to
4399 inline indirect calls. */
4401 static void
4402 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4404 ipa_analyze_node (node);
4405 if (dump_file && (dump_flags & TDF_DETAILS))
4407 ipa_print_node_params (dump_file, node);
4408 ipa_print_node_jump_functions (dump_file, node);
4413 /* Note function body size. */
4415 void
4416 inline_analyze_function (struct cgraph_node *node)
4418 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4420 if (dump_file)
4421 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4422 if (opt_for_fn (node->decl, optimize) && !node->thunk)
4423 inline_indirect_intraprocedural_analysis (node);
4424 compute_fn_summary (node, false);
4425 if (!optimize)
4427 struct cgraph_edge *e;
4428 for (e = node->callees; e; e = e->next_callee)
4429 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4430 for (e = node->indirect_calls; e; e = e->next_callee)
4431 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4434 pop_cfun ();
4438 /* Called when new function is inserted to callgraph late. */
4440 void
4441 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4443 inline_analyze_function (node);
4446 /* Note function body size. */
4448 static void
4449 ipa_fn_summary_generate (void)
4451 struct cgraph_node *node;
4453 FOR_EACH_DEFINED_FUNCTION (node)
4454 if (DECL_STRUCT_FUNCTION (node->decl))
4455 node->versionable = tree_versionable_function_p (node->decl);
4457 ipa_fn_summary_alloc ();
4459 ipa_fn_summaries->enable_insertion_hook ();
4461 ipa_register_cgraph_hooks ();
4463 FOR_EACH_DEFINED_FUNCTION (node)
4464 if (!node->alias
4465 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4466 || opt_for_fn (node->decl, optimize)))
4467 inline_analyze_function (node);
4471 /* Write inline summary for edge E to OB. */
4473 static void
4474 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4475 bool prevails)
4477 class ipa_call_summary *es = prevails
4478 ? ipa_call_summaries->get_create (e) : NULL;
4479 ipa_predicate p;
4480 int length, i;
4482 int size = streamer_read_uhwi (ib);
4483 int time = streamer_read_uhwi (ib);
4484 int depth = streamer_read_uhwi (ib);
4486 if (es)
4488 es->call_stmt_size = size;
4489 es->call_stmt_time = time;
4490 es->loop_depth = depth;
4493 bitpack_d bp = streamer_read_bitpack (ib);
4494 if (es)
4495 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4496 else
4497 bp_unpack_value (&bp, 1);
4499 p.stream_in (ib);
4500 if (es)
4501 edge_set_predicate (e, &p);
4502 length = streamer_read_uhwi (ib);
4503 if (length && es
4504 && (e->possibly_call_in_translation_unit_p ()
4505 /* Also stream in jump functions to builtins in hope that they
4506 will get fnspecs. */
4507 || fndecl_built_in_p (e->callee->decl, BUILT_IN_NORMAL)))
4509 es->param.safe_grow_cleared (length, true);
4510 for (i = 0; i < length; i++)
4512 es->param[i].change_prob = streamer_read_uhwi (ib);
4513 bitpack_d bp = streamer_read_bitpack (ib);
4514 es->param[i].points_to_local_or_readonly_memory
4515 = bp_unpack_value (&bp, 1);
4516 es->param[i].points_to_possible_sra_candidate
4517 = bp_unpack_value (&bp, 1);
4520 else
4522 for (i = 0; i < length; i++)
4524 streamer_read_uhwi (ib);
4525 streamer_read_uhwi (ib);
4531 /* Stream in inline summaries from the section. */
4533 static void
4534 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4535 size_t len)
4537 const struct lto_function_header *header =
4538 (const struct lto_function_header *) data;
4539 const int cfg_offset = sizeof (struct lto_function_header);
4540 const int main_offset = cfg_offset + header->cfg_size;
4541 const int string_offset = main_offset + header->main_size;
4542 class data_in *data_in;
4543 unsigned int i, count2, j;
4544 unsigned int f_count;
4546 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4547 file_data);
4549 data_in =
4550 lto_data_in_create (file_data, (const char *) data + string_offset,
4551 header->string_size, vNULL);
4552 f_count = streamer_read_uhwi (&ib);
4553 for (i = 0; i < f_count; i++)
4555 unsigned int index;
4556 struct cgraph_node *node;
4557 class ipa_fn_summary *info;
4558 class ipa_node_params *params_summary;
4559 class ipa_size_summary *size_info;
4560 lto_symtab_encoder_t encoder;
4561 struct bitpack_d bp;
4562 struct cgraph_edge *e;
4563 ipa_predicate p;
4565 index = streamer_read_uhwi (&ib);
4566 encoder = file_data->symtab_node_encoder;
4567 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4568 index));
4569 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4570 params_summary = node->prevailing_p ()
4571 ? ipa_node_params_sum->get (node) : NULL;
4572 size_info = node->prevailing_p ()
4573 ? ipa_size_summaries->get_create (node) : NULL;
4575 int stack_size = streamer_read_uhwi (&ib);
4576 int size = streamer_read_uhwi (&ib);
4577 sreal time = sreal::stream_in (&ib);
4579 if (info)
4581 info->estimated_stack_size
4582 = size_info->estimated_self_stack_size = stack_size;
4583 size_info->size = size_info->self_size = size;
4584 info->time = time;
4587 bp = streamer_read_bitpack (&ib);
4588 if (info)
4590 info->inlinable = bp_unpack_value (&bp, 1);
4591 info->fp_expressions = bp_unpack_value (&bp, 1);
4592 if (!lto_stream_offload_p)
4593 info->target_info = streamer_read_uhwi (&ib);
4595 else
4597 bp_unpack_value (&bp, 1);
4598 bp_unpack_value (&bp, 1);
4599 if (!lto_stream_offload_p)
4600 streamer_read_uhwi (&ib);
4603 count2 = streamer_read_uhwi (&ib);
4604 gcc_assert (!info || !info->conds);
4605 if (info)
4606 vec_safe_reserve_exact (info->conds, count2);
4607 for (j = 0; j < count2; j++)
4609 struct condition c;
4610 unsigned int k, count3;
4611 c.operand_num = streamer_read_uhwi (&ib);
4612 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4613 c.type = stream_read_tree (&ib, data_in);
4614 c.val = stream_read_tree (&ib, data_in);
4615 bp = streamer_read_bitpack (&ib);
4616 c.agg_contents = bp_unpack_value (&bp, 1);
4617 c.by_ref = bp_unpack_value (&bp, 1);
4618 if (c.agg_contents)
4619 c.offset = streamer_read_uhwi (&ib);
4620 count3 = streamer_read_uhwi (&ib);
4621 c.param_ops = NULL;
4622 if (info)
4623 vec_safe_reserve_exact (c.param_ops, count3);
4624 if (params_summary)
4625 ipa_set_param_used_by_ipa_predicates
4626 (params_summary, c.operand_num, true);
4627 for (k = 0; k < count3; k++)
4629 struct expr_eval_op op;
4630 enum gimple_rhs_class rhs_class;
4631 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4632 op.type = stream_read_tree (&ib, data_in);
4633 switch (rhs_class = get_gimple_rhs_class (op.code))
4635 case GIMPLE_UNARY_RHS:
4636 op.index = 0;
4637 op.val[0] = NULL_TREE;
4638 op.val[1] = NULL_TREE;
4639 break;
4641 case GIMPLE_BINARY_RHS:
4642 case GIMPLE_TERNARY_RHS:
4643 bp = streamer_read_bitpack (&ib);
4644 op.index = bp_unpack_value (&bp, 2);
4645 op.val[0] = stream_read_tree (&ib, data_in);
4646 if (rhs_class == GIMPLE_BINARY_RHS)
4647 op.val[1] = NULL_TREE;
4648 else
4649 op.val[1] = stream_read_tree (&ib, data_in);
4650 break;
4652 default:
4653 fatal_error (UNKNOWN_LOCATION,
4654 "invalid fnsummary in LTO stream");
4656 if (info)
4657 c.param_ops->quick_push (op);
4659 if (info)
4660 info->conds->quick_push (c);
4662 count2 = streamer_read_uhwi (&ib);
4663 gcc_assert (!info || !info->size_time_table.length ());
4664 if (info && count2)
4665 info->size_time_table.reserve_exact (count2);
4666 for (j = 0; j < count2; j++)
4668 class size_time_entry e;
4670 e.size = streamer_read_uhwi (&ib);
4671 e.time = sreal::stream_in (&ib);
4672 e.exec_predicate.stream_in (&ib);
4673 e.nonconst_predicate.stream_in (&ib);
4675 if (info)
4676 info->size_time_table.quick_push (e);
4679 count2 = streamer_read_uhwi (&ib);
4680 for (j = 0; j < count2; j++)
4682 p.stream_in (&ib);
4683 sreal fcp_freq = sreal::stream_in (&ib);
4684 if (info)
4686 ipa_freqcounting_predicate fcp;
4687 fcp.predicate = NULL;
4688 set_hint_predicate (&fcp.predicate, p);
4689 fcp.freq = fcp_freq;
4690 vec_safe_push (info->loop_iterations, fcp);
4693 count2 = streamer_read_uhwi (&ib);
4694 for (j = 0; j < count2; j++)
4696 p.stream_in (&ib);
4697 sreal fcp_freq = sreal::stream_in (&ib);
4698 if (info)
4700 ipa_freqcounting_predicate fcp;
4701 fcp.predicate = NULL;
4702 set_hint_predicate (&fcp.predicate, p);
4703 fcp.freq = fcp_freq;
4704 vec_safe_push (info->loop_strides, fcp);
4707 count2 = streamer_read_uhwi (&ib);
4708 if (info && count2)
4709 info->builtin_constant_p_parms.reserve_exact (count2);
4710 for (j = 0; j < count2; j++)
4712 int parm = streamer_read_uhwi (&ib);
4713 if (info)
4714 info->builtin_constant_p_parms.quick_push (parm);
4716 for (e = node->callees; e; e = e->next_callee)
4717 read_ipa_call_summary (&ib, e, info != NULL);
4718 for (e = node->indirect_calls; e; e = e->next_callee)
4719 read_ipa_call_summary (&ib, e, info != NULL);
4722 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4723 len);
4724 lto_data_in_delete (data_in);
4728 /* Read inline summary. Jump functions are shared among ipa-cp
4729 and inliner, so when ipa-cp is active, we don't need to write them
4730 twice. */
4732 static void
4733 ipa_fn_summary_read (void)
4735 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4736 struct lto_file_decl_data *file_data;
4737 unsigned int j = 0;
4739 ipa_prop_read_jump_functions ();
4740 ipa_fn_summary_alloc ();
4742 while ((file_data = file_data_vec[j++]))
4744 size_t len;
4745 const char *data
4746 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4747 &len);
4748 if (data)
4749 inline_read_section (file_data, data, len);
4750 else
4751 /* Fatal error here. We do not want to support compiling ltrans units
4752 with different version of compiler or different flags than the WPA
4753 unit, so this should never happen. */
4754 fatal_error (input_location,
4755 "ipa inline summary is missing in input file");
4757 ipa_register_cgraph_hooks ();
4759 gcc_assert (ipa_fn_summaries);
4760 ipa_fn_summaries->enable_insertion_hook ();
4764 /* Write inline summary for edge E to OB. */
4766 static void
4767 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4769 class ipa_call_summary *es = ipa_call_summaries->get (e);
4770 int i;
4772 streamer_write_uhwi (ob, es->call_stmt_size);
4773 streamer_write_uhwi (ob, es->call_stmt_time);
4774 streamer_write_uhwi (ob, es->loop_depth);
4776 bitpack_d bp = bitpack_create (ob->main_stream);
4777 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4778 streamer_write_bitpack (&bp);
4780 if (es->predicate)
4781 es->predicate->stream_out (ob);
4782 else
4783 streamer_write_uhwi (ob, 0);
4784 streamer_write_uhwi (ob, es->param.length ());
4785 for (i = 0; i < (int) es->param.length (); i++)
4787 streamer_write_uhwi (ob, es->param[i].change_prob);
4788 bp = bitpack_create (ob->main_stream);
4789 bp_pack_value (&bp, es->param[i].points_to_local_or_readonly_memory, 1);
4790 bp_pack_value (&bp, es->param[i].points_to_possible_sra_candidate, 1);
4791 streamer_write_bitpack (&bp);
4796 /* Write inline summary for node in SET.
4797 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4798 active, we don't need to write them twice. */
4800 static void
4801 ipa_fn_summary_write (void)
4803 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4804 lto_symtab_encoder_iterator lsei;
4805 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4806 unsigned int count = 0;
4808 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4809 lsei_next_function_in_partition (&lsei))
4811 cgraph_node *cnode = lsei_cgraph_node (lsei);
4812 if (cnode->definition && !cnode->alias)
4813 count++;
4815 streamer_write_uhwi (ob, count);
4817 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4818 lsei_next_function_in_partition (&lsei))
4820 cgraph_node *cnode = lsei_cgraph_node (lsei);
4821 if (cnode->definition && !cnode->alias)
4823 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4824 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4825 struct bitpack_d bp;
4826 struct cgraph_edge *edge;
4827 int i;
4828 size_time_entry *e;
4829 struct condition *c;
4831 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4832 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4833 streamer_write_hwi (ob, size_info->self_size);
4834 info->time.stream_out (ob);
4835 bp = bitpack_create (ob->main_stream);
4836 bp_pack_value (&bp, info->inlinable, 1);
4837 bp_pack_value (&bp, info->fp_expressions, 1);
4838 streamer_write_bitpack (&bp);
4839 if (!lto_stream_offload_p)
4840 streamer_write_uhwi (ob, info->target_info);
4841 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4842 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4844 int j;
4845 struct expr_eval_op *op;
4847 streamer_write_uhwi (ob, c->operand_num);
4848 streamer_write_uhwi (ob, c->code);
4849 stream_write_tree (ob, c->type, true);
4850 stream_write_tree (ob, c->val, true);
4851 bp = bitpack_create (ob->main_stream);
4852 bp_pack_value (&bp, c->agg_contents, 1);
4853 bp_pack_value (&bp, c->by_ref, 1);
4854 streamer_write_bitpack (&bp);
4855 if (c->agg_contents)
4856 streamer_write_uhwi (ob, c->offset);
4857 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4858 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4860 streamer_write_uhwi (ob, op->code);
4861 stream_write_tree (ob, op->type, true);
4862 if (op->val[0])
4864 bp = bitpack_create (ob->main_stream);
4865 bp_pack_value (&bp, op->index, 2);
4866 streamer_write_bitpack (&bp);
4867 stream_write_tree (ob, op->val[0], true);
4868 if (op->val[1])
4869 stream_write_tree (ob, op->val[1], true);
4873 streamer_write_uhwi (ob, info->size_time_table.length ());
4874 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4876 streamer_write_uhwi (ob, e->size);
4877 e->time.stream_out (ob);
4878 e->exec_predicate.stream_out (ob);
4879 e->nonconst_predicate.stream_out (ob);
4881 ipa_freqcounting_predicate *fcp;
4882 streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
4883 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
4885 fcp->predicate->stream_out (ob);
4886 fcp->freq.stream_out (ob);
4888 streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
4889 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
4891 fcp->predicate->stream_out (ob);
4892 fcp->freq.stream_out (ob);
4894 streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ());
4895 int ip;
4896 for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip);
4897 i++)
4898 streamer_write_uhwi (ob, ip);
4899 for (edge = cnode->callees; edge; edge = edge->next_callee)
4900 write_ipa_call_summary (ob, edge);
4901 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4902 write_ipa_call_summary (ob, edge);
4905 streamer_write_char_stream (ob->main_stream, 0);
4906 produce_asm (ob, NULL);
4907 destroy_output_block (ob);
4909 ipa_prop_write_jump_functions ();
4913 /* Release function summary. */
4915 void
4916 ipa_free_fn_summary (void)
4918 if (!ipa_call_summaries)
4919 return;
4920 ggc_delete (ipa_fn_summaries);
4921 ipa_fn_summaries = NULL;
4922 delete ipa_call_summaries;
4923 ipa_call_summaries = NULL;
4924 edge_predicate_pool.release ();
4925 /* During IPA this is one of largest datastructures to release. */
4926 if (flag_wpa)
4927 ggc_trim ();
4930 /* Release function summary. */
4932 void
4933 ipa_free_size_summary (void)
4935 if (!ipa_size_summaries)
4936 return;
4937 delete ipa_size_summaries;
4938 ipa_size_summaries = NULL;
4941 namespace {
4943 const pass_data pass_data_local_fn_summary =
4945 GIMPLE_PASS, /* type */
4946 "local-fnsummary", /* name */
4947 OPTGROUP_INLINE, /* optinfo_flags */
4948 TV_INLINE_PARAMETERS, /* tv_id */
4949 0, /* properties_required */
4950 0, /* properties_provided */
4951 0, /* properties_destroyed */
4952 0, /* todo_flags_start */
4953 0, /* todo_flags_finish */
4956 class pass_local_fn_summary : public gimple_opt_pass
4958 public:
4959 pass_local_fn_summary (gcc::context *ctxt)
4960 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4963 /* opt_pass methods: */
4964 opt_pass * clone () final override
4966 return new pass_local_fn_summary (m_ctxt);
4968 unsigned int execute (function *) final override
4970 return compute_fn_summary_for_current ();
4973 }; // class pass_local_fn_summary
4975 } // anon namespace
4977 gimple_opt_pass *
4978 make_pass_local_fn_summary (gcc::context *ctxt)
4980 return new pass_local_fn_summary (ctxt);
4984 /* Free inline summary. */
4986 namespace {
4988 const pass_data pass_data_ipa_free_fn_summary =
4990 SIMPLE_IPA_PASS, /* type */
4991 "free-fnsummary", /* name */
4992 OPTGROUP_NONE, /* optinfo_flags */
4993 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4994 0, /* properties_required */
4995 0, /* properties_provided */
4996 0, /* properties_destroyed */
4997 0, /* todo_flags_start */
4998 0, /* todo_flags_finish */
5001 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
5003 public:
5004 pass_ipa_free_fn_summary (gcc::context *ctxt)
5005 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
5006 small_p (false)
5009 /* opt_pass methods: */
5010 opt_pass *clone () final override
5012 return new pass_ipa_free_fn_summary (m_ctxt);
5014 void set_pass_param (unsigned int n, bool param) final override
5016 gcc_assert (n == 0);
5017 small_p = param;
5019 bool gate (function *) final override { return true; }
5020 unsigned int execute (function *) final override
5022 ipa_free_fn_summary ();
5023 /* Free ipa-prop structures if they are no longer needed. */
5024 ipa_free_all_structures_after_iinln ();
5025 if (!flag_wpa)
5026 ipa_free_size_summary ();
5027 return 0;
5030 private:
5031 bool small_p;
5032 }; // class pass_ipa_free_fn_summary
5034 } // anon namespace
5036 simple_ipa_opt_pass *
5037 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
5039 return new pass_ipa_free_fn_summary (ctxt);
5042 namespace {
5044 const pass_data pass_data_ipa_fn_summary =
5046 IPA_PASS, /* type */
5047 "fnsummary", /* name */
5048 OPTGROUP_INLINE, /* optinfo_flags */
5049 TV_IPA_FNSUMMARY, /* tv_id */
5050 0, /* properties_required */
5051 0, /* properties_provided */
5052 0, /* properties_destroyed */
5053 0, /* todo_flags_start */
5054 ( TODO_dump_symtab ), /* todo_flags_finish */
5057 class pass_ipa_fn_summary : public ipa_opt_pass_d
5059 public:
5060 pass_ipa_fn_summary (gcc::context *ctxt)
5061 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
5062 ipa_fn_summary_generate, /* generate_summary */
5063 ipa_fn_summary_write, /* write_summary */
5064 ipa_fn_summary_read, /* read_summary */
5065 NULL, /* write_optimization_summary */
5066 NULL, /* read_optimization_summary */
5067 NULL, /* stmt_fixup */
5068 0, /* function_transform_todo_flags_start */
5069 NULL, /* function_transform */
5070 NULL) /* variable_transform */
5073 /* opt_pass methods: */
5074 unsigned int execute (function *) final override { return 0; }
5076 }; // class pass_ipa_fn_summary
5078 } // anon namespace
5080 ipa_opt_pass_d *
5081 make_pass_ipa_fn_summary (gcc::context *ctxt)
5083 return new pass_ipa_fn_summary (ctxt);
5086 /* Reset all state within ipa-fnsummary.cc so that we can rerun the compiler
5087 within the same process. For use by toplev::finalize. */
5089 void
5090 ipa_fnsummary_cc_finalize (void)
5092 ipa_free_fn_summary ();
5093 ipa_free_size_summary ();