Default to dwarf version 4 on hppa64-hpux
[official-gcc.git] / gcc / ipa-fnsummary.c
blob3119991940537baca5d0986aab094ff27c7bb683
1 /* Function summary pass.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "tree.h"
60 #include "gimple.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
63 #include "ssa.h"
64 #include "tree-streamer.h"
65 #include "cgraph.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86 #include "tree-into-ssa.h"
87 #include "symtab-clones.h"
88 #include "gimple-range.h"
90 /* Summaries. */
91 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
92 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
93 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
95 /* Edge predicates goes here. */
96 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
99 /* Dump IPA hints. */
100 void
101 ipa_dump_hints (FILE *f, ipa_hints hints)
103 if (!hints)
104 return;
105 fprintf (f, "IPA hints:");
106 if (hints & INLINE_HINT_indirect_call)
108 hints &= ~INLINE_HINT_indirect_call;
109 fprintf (f, " indirect_call");
111 if (hints & INLINE_HINT_loop_iterations)
113 hints &= ~INLINE_HINT_loop_iterations;
114 fprintf (f, " loop_iterations");
116 if (hints & INLINE_HINT_loop_stride)
118 hints &= ~INLINE_HINT_loop_stride;
119 fprintf (f, " loop_stride");
121 if (hints & INLINE_HINT_same_scc)
123 hints &= ~INLINE_HINT_same_scc;
124 fprintf (f, " same_scc");
126 if (hints & INLINE_HINT_in_scc)
128 hints &= ~INLINE_HINT_in_scc;
129 fprintf (f, " in_scc");
131 if (hints & INLINE_HINT_cross_module)
133 hints &= ~INLINE_HINT_cross_module;
134 fprintf (f, " cross_module");
136 if (hints & INLINE_HINT_declared_inline)
138 hints &= ~INLINE_HINT_declared_inline;
139 fprintf (f, " declared_inline");
141 if (hints & INLINE_HINT_known_hot)
143 hints &= ~INLINE_HINT_known_hot;
144 fprintf (f, " known_hot");
146 if (hints & INLINE_HINT_builtin_constant_p)
148 hints &= ~INLINE_HINT_builtin_constant_p;
149 fprintf (f, " builtin_constant_p");
151 gcc_assert (!hints);
155 /* Record SIZE and TIME to SUMMARY.
156 The accounted code will be executed when EXEC_PRED is true.
157 When NONCONST_PRED is false the code will evaluate to constant and
158 will get optimized out in specialized clones of the function.
159 If CALL is true account to call_size_time_table rather than
160 size_time_table. */
162 void
163 ipa_fn_summary::account_size_time (int size, sreal time,
164 const predicate &exec_pred,
165 const predicate &nonconst_pred_in,
166 bool call)
168 size_time_entry *e;
169 bool found = false;
170 int i;
171 predicate nonconst_pred;
172 vec<size_time_entry> *table = call ? &call_size_time_table : &size_time_table;
174 if (exec_pred == false)
175 return;
177 nonconst_pred = nonconst_pred_in & exec_pred;
179 if (nonconst_pred == false)
180 return;
182 /* We need to create initial empty unconditional clause, but otherwise
183 we don't need to account empty times and sizes. */
184 if (!size && time == 0 && table->length ())
185 return;
187 /* Only for calls we are unaccounting what we previously recorded. */
188 gcc_checking_assert (time >= 0 || call);
190 for (i = 0; table->iterate (i, &e); i++)
191 if (e->exec_predicate == exec_pred
192 && e->nonconst_predicate == nonconst_pred)
194 found = true;
195 break;
197 if (i == max_size_time_table_size)
199 i = 0;
200 found = true;
201 e = &(*table)[0];
202 if (dump_file && (dump_flags & TDF_DETAILS))
203 fprintf (dump_file,
204 "\t\tReached limit on number of entries, "
205 "ignoring the predicate.");
207 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
209 fprintf (dump_file,
210 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
211 ((double) size) / ipa_fn_summary::size_scale,
212 (time.to_double ()), found ? "" : "new ");
213 exec_pred.dump (dump_file, conds, 0);
214 if (exec_pred != nonconst_pred)
216 fprintf (dump_file, " nonconst:");
217 nonconst_pred.dump (dump_file, conds);
219 else
220 fprintf (dump_file, "\n");
222 if (!found)
224 class size_time_entry new_entry;
225 new_entry.size = size;
226 new_entry.time = time;
227 new_entry.exec_predicate = exec_pred;
228 new_entry.nonconst_predicate = nonconst_pred;
229 if (call)
230 call_size_time_table.safe_push (new_entry);
231 else
232 size_time_table.safe_push (new_entry);
234 else
236 e->size += size;
237 e->time += time;
238 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
239 /* Tolerate small roundoff issues. */
240 if (e->time < 0)
241 e->time = 0;
245 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
247 static struct cgraph_edge *
248 redirect_to_unreachable (struct cgraph_edge *e)
250 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
251 struct cgraph_node *target = cgraph_node::get_create
252 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
254 if (e->speculative)
255 e = cgraph_edge::resolve_speculation (e, target->decl);
256 else if (!e->callee)
257 e = cgraph_edge::make_direct (e, target);
258 else
259 e->redirect_callee (target);
260 class ipa_call_summary *es = ipa_call_summaries->get (e);
261 e->inline_failed = CIF_UNREACHABLE;
262 e->count = profile_count::zero ();
263 es->call_stmt_size = 0;
264 es->call_stmt_time = 0;
265 if (callee)
266 callee->remove_symbol_and_inline_clones ();
267 return e;
270 /* Set predicate for edge E. */
272 static void
273 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
275 /* If the edge is determined to be never executed, redirect it
276 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
277 be optimized out. */
278 if (predicate && *predicate == false
279 /* When handling speculative edges, we need to do the redirection
280 just once. Do it always on the direct edge, so we do not
281 attempt to resolve speculation while duplicating the edge. */
282 && (!e->speculative || e->callee))
283 e = redirect_to_unreachable (e);
285 class ipa_call_summary *es = ipa_call_summaries->get (e);
286 if (predicate && *predicate != true)
288 if (!es->predicate)
289 es->predicate = edge_predicate_pool.allocate ();
290 *es->predicate = *predicate;
292 else
294 if (es->predicate)
295 edge_predicate_pool.remove (es->predicate);
296 es->predicate = NULL;
300 /* Set predicate for hint *P. */
302 static void
303 set_hint_predicate (predicate **p, predicate new_predicate)
305 if (new_predicate == false || new_predicate == true)
307 if (*p)
308 edge_predicate_pool.remove (*p);
309 *p = NULL;
311 else
313 if (!*p)
314 *p = edge_predicate_pool.allocate ();
315 **p = new_predicate;
319 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
320 Otherwise add a new item to the vector with this predicate and frerq equal
321 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
322 in which case the function does nothing. */
324 static void
325 add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
326 const predicate &new_predicate, sreal add_freq,
327 unsigned max_num_predicates)
329 if (new_predicate == false || new_predicate == true)
330 return;
331 ipa_freqcounting_predicate *f;
332 for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
333 if (new_predicate == f->predicate)
335 f->freq += add_freq;
336 return;
338 if (vec_safe_length (*v) >= max_num_predicates)
339 /* Too many different predicates to account for. */
340 return;
342 ipa_freqcounting_predicate fcp;
343 fcp.predicate = NULL;
344 set_hint_predicate (&fcp.predicate, new_predicate);
345 fcp.freq = add_freq;
346 vec_safe_push (*v, fcp);
347 return;
350 /* Compute what conditions may or may not hold given information about
351 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
352 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
353 copy when called in a given context. It is a bitmask of conditions. Bit
354 0 means that condition is known to be false, while bit 1 means that condition
355 may or may not be true. These differs - for example NOT_INLINED condition
356 is always false in the second and also builtin_constant_p tests cannot use
357 the fact that parameter is indeed a constant.
359 When INLINE_P is true, assume that we are inlining. AVAL contains known
360 information about argument values. The function does not modify its content
361 and so AVALs could also be of type ipa_call_arg_values but so far all
362 callers work with the auto version and so we avoid the conversion for
363 convenience.
365 ERROR_MARK value of an argument means compile time invariant. */
367 static void
368 evaluate_conditions_for_known_args (struct cgraph_node *node,
369 bool inline_p,
370 ipa_auto_call_arg_values *avals,
371 clause_t *ret_clause,
372 clause_t *ret_nonspec_clause)
374 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
375 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
376 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
377 int i;
378 struct condition *c;
380 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
382 tree val = NULL;
383 tree res;
384 int j;
385 struct expr_eval_op *op;
387 /* We allow call stmt to have fewer arguments than the callee function
388 (especially for K&R style programs). So bound check here (we assume
389 m_known_aggs vector is either empty or has the same length as
390 m_known_vals). */
391 gcc_checking_assert (!avals->m_known_aggs.length ()
392 || !avals->m_known_vals.length ()
393 || (avals->m_known_vals.length ()
394 == avals->m_known_aggs.length ()));
396 if (c->agg_contents)
398 if (c->code == predicate::changed
399 && !c->by_ref
400 && (avals->safe_sval_at(c->operand_num) == error_mark_node))
401 continue;
403 if (ipa_agg_value_set *agg = avals->safe_aggval_at (c->operand_num))
405 tree sval = avals->safe_sval_at (c->operand_num);
406 val = ipa_find_agg_cst_for_param (agg, sval, c->offset,
407 c->by_ref);
409 else
410 val = NULL_TREE;
412 else
414 val = avals->safe_sval_at (c->operand_num);
415 if (val && val == error_mark_node && c->code != predicate::changed)
416 val = NULL_TREE;
419 if (!val
420 && (c->code == predicate::changed
421 || c->code == predicate::is_not_constant))
423 clause |= 1 << (i + predicate::first_dynamic_condition);
424 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
425 continue;
427 if (c->code == predicate::changed)
429 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
430 continue;
433 if (c->code == predicate::is_not_constant)
435 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
436 continue;
439 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
441 if (c->type != TREE_TYPE (val))
442 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
443 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
445 if (!val)
446 break;
447 if (!op->val[0])
448 val = fold_unary (op->code, op->type, val);
449 else if (!op->val[1])
450 val = fold_binary (op->code, op->type,
451 op->index ? op->val[0] : val,
452 op->index ? val : op->val[0]);
453 else if (op->index == 0)
454 val = fold_ternary (op->code, op->type,
455 val, op->val[0], op->val[1]);
456 else if (op->index == 1)
457 val = fold_ternary (op->code, op->type,
458 op->val[0], val, op->val[1]);
459 else if (op->index == 2)
460 val = fold_ternary (op->code, op->type,
461 op->val[0], op->val[1], val);
462 else
463 val = NULL_TREE;
466 res = val
467 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
468 : NULL;
470 if (res && integer_zerop (res))
471 continue;
472 if (res && integer_onep (res))
474 clause |= 1 << (i + predicate::first_dynamic_condition);
475 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
476 continue;
479 if (c->operand_num < (int) avals->m_known_value_ranges.length ()
480 && !c->agg_contents
481 && (!val || TREE_CODE (val) != INTEGER_CST))
483 value_range vr = avals->m_known_value_ranges[c->operand_num];
484 if (!vr.undefined_p ()
485 && !vr.varying_p ()
486 && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ())))
488 if (!useless_type_conversion_p (c->type, vr.type ()))
490 value_range res;
491 range_fold_unary_expr (&res, NOP_EXPR,
492 c->type, &vr, vr.type ());
493 vr = res;
495 tree type = c->type;
497 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
499 if (vr.varying_p () || vr.undefined_p ())
500 break;
502 value_range res;
503 if (!op->val[0])
504 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
505 else if (!op->val[1])
507 value_range op0 (op->val[0], op->val[0]);
508 range_fold_binary_expr (&res, op->code, op->type,
509 op->index ? &op0 : &vr,
510 op->index ? &vr : &op0);
512 else
513 gcc_unreachable ();
514 type = op->type;
515 vr = res;
517 if (!vr.varying_p () && !vr.undefined_p ())
519 value_range res;
520 value_range val_vr (c->val, c->val);
521 range_fold_binary_expr (&res, c->code, boolean_type_node,
522 &vr,
523 &val_vr);
524 if (res.zero_p ())
525 continue;
530 clause |= 1 << (i + predicate::first_dynamic_condition);
531 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
533 *ret_clause = clause;
534 if (ret_nonspec_clause)
535 *ret_nonspec_clause = nonspec_clause;
538 /* Return true if VRP will be exectued on the function.
539 We do not want to anticipate optimizations that will not happen.
541 FIXME: This can be confused with -fdisable and debug counters and thus
542 it should not be used for correctness (only to make heuristics work).
543 This means that inliner should do its own optimizations of expressions
544 that it predicts to be constant so wrong code can not be triggered by
545 builtin_constant_p. */
547 static bool
548 vrp_will_run_p (struct cgraph_node *node)
550 return (opt_for_fn (node->decl, optimize)
551 && !opt_for_fn (node->decl, optimize_debug)
552 && opt_for_fn (node->decl, flag_tree_vrp));
555 /* Similarly about FRE. */
557 static bool
558 fre_will_run_p (struct cgraph_node *node)
560 return (opt_for_fn (node->decl, optimize)
561 && !opt_for_fn (node->decl, optimize_debug)
562 && opt_for_fn (node->decl, flag_tree_fre));
565 /* Work out what conditions might be true at invocation of E.
566 Compute costs for inlined edge if INLINE_P is true.
568 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
569 (if non-NULL) conditions evaluated for nonspecialized clone called
570 in a given context.
572 Vectors in AVALS will be populated with useful known information about
573 argument values - information not known to have any uses will be omitted -
574 except for m_known_contexts which will only be calculated if
575 COMPUTE_CONTEXTS is true. */
577 void
578 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
579 clause_t *clause_ptr,
580 clause_t *nonspec_clause_ptr,
581 ipa_auto_call_arg_values *avals,
582 bool compute_contexts)
584 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
585 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
586 class ipa_edge_args *args;
588 if (clause_ptr)
589 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
591 if (ipa_node_params_sum
592 && !e->call_stmt_cannot_inline_p
593 && (info->conds || compute_contexts)
594 && (args = ipa_edge_args_sum->get (e)) != NULL)
596 struct cgraph_node *caller;
597 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
598 class ipa_call_summary *es = ipa_call_summaries->get (e);
599 int i, count = ipa_get_cs_argument_count (args);
601 if (count)
603 if (e->caller->inlined_to)
604 caller = e->caller->inlined_to;
605 else
606 caller = e->caller;
607 caller_parms_info = ipa_node_params_sum->get (caller);
608 callee_pi = ipa_node_params_sum->get (callee);
610 /* Watch for thunks. */
611 if (callee_pi)
612 /* Watch for variadic functions. */
613 count = MIN (count, ipa_get_param_count (callee_pi));
616 if (callee_pi)
617 for (i = 0; i < count; i++)
619 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
621 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
622 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
624 /* Determine if we know constant value of the parameter. */
625 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
626 ipa_get_type (callee_pi, i));
628 if (!cst && e->call_stmt
629 && i < (int)gimple_call_num_args (e->call_stmt))
631 cst = gimple_call_arg (e->call_stmt, i);
632 if (!is_gimple_min_invariant (cst))
633 cst = NULL;
635 if (cst)
637 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
638 if (!avals->m_known_vals.length ())
639 avals->m_known_vals.safe_grow_cleared (count, true);
640 avals->m_known_vals[i] = cst;
642 else if (inline_p && !es->param[i].change_prob)
644 if (!avals->m_known_vals.length ())
645 avals->m_known_vals.safe_grow_cleared (count, true);
646 avals->m_known_vals[i] = error_mark_node;
649 /* If we failed to get simple constant, try value range. */
650 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
651 && vrp_will_run_p (caller)
652 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
654 value_range vr
655 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
656 ipa_get_type (callee_pi,
657 i));
658 if (!vr.undefined_p () && !vr.varying_p ())
660 if (!avals->m_known_value_ranges.length ())
662 avals->m_known_value_ranges.safe_grow (count, true);
663 for (int i = 0; i < count; ++i)
664 new (&avals->m_known_value_ranges[i])
665 value_range ();
667 avals->m_known_value_ranges[i] = vr;
671 /* Determine known aggregate values. */
672 if (fre_will_run_p (caller))
674 ipa_agg_value_set agg
675 = ipa_agg_value_set_from_jfunc (caller_parms_info,
676 caller, &jf->agg);
677 if (agg.items.length ())
679 if (!avals->m_known_aggs.length ())
680 avals->m_known_aggs.safe_grow_cleared (count, true);
681 avals->m_known_aggs[i] = agg;
686 /* For calls used in polymorphic calls we further determine
687 polymorphic call context. */
688 if (compute_contexts
689 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
691 ipa_polymorphic_call_context
692 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
693 if (!ctx.useless_p ())
695 if (!avals->m_known_contexts.length ())
696 avals->m_known_contexts.safe_grow_cleared (count, true);
697 avals->m_known_contexts[i]
698 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
702 else
703 gcc_assert (!count || callee->thunk);
705 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
707 int i, count = (int)gimple_call_num_args (e->call_stmt);
709 for (i = 0; i < count; i++)
711 tree cst = gimple_call_arg (e->call_stmt, i);
712 if (!is_gimple_min_invariant (cst))
713 cst = NULL;
714 if (cst)
716 if (!avals->m_known_vals.length ())
717 avals->m_known_vals.safe_grow_cleared (count, true);
718 avals->m_known_vals[i] = cst;
723 evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
724 nonspec_clause_ptr);
728 /* Allocate the function summary. */
730 static void
731 ipa_fn_summary_alloc (void)
733 gcc_checking_assert (!ipa_fn_summaries);
734 ipa_size_summaries = new ipa_size_summary_t (symtab);
735 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
736 ipa_call_summaries = new ipa_call_summary_t (symtab);
739 ipa_call_summary::~ipa_call_summary ()
741 if (predicate)
742 edge_predicate_pool.remove (predicate);
744 param.release ();
747 ipa_fn_summary::~ipa_fn_summary ()
749 unsigned len = vec_safe_length (loop_iterations);
750 for (unsigned i = 0; i < len; i++)
751 edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
752 len = vec_safe_length (loop_strides);
753 for (unsigned i = 0; i < len; i++)
754 edge_predicate_pool.remove ((*loop_strides)[i].predicate);
755 vec_free (conds);
756 call_size_time_table.release ();
757 vec_free (loop_iterations);
758 vec_free (loop_strides);
759 builtin_constant_p_parms.release ();
762 void
763 ipa_fn_summary_t::remove_callees (cgraph_node *node)
765 cgraph_edge *e;
766 for (e = node->callees; e; e = e->next_callee)
767 ipa_call_summaries->remove (e);
768 for (e = node->indirect_calls; e; e = e->next_callee)
769 ipa_call_summaries->remove (e);
772 /* Duplicate predicates in loop hint vector, allocating memory for them and
773 remove and deallocate any uninteresting (true or false) ones. Return the
774 result. */
776 static vec<ipa_freqcounting_predicate, va_gc> *
777 remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
778 clause_t possible_truths)
780 if (vec_safe_length (v) == 0)
781 return NULL;
783 vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
784 int len = res->length();
785 for (int i = len - 1; i >= 0; i--)
787 predicate new_predicate
788 = (*res)[i].predicate->remap_after_duplication (possible_truths);
789 /* We do not want to free previous predicate; it is used by node
790 origin. */
791 (*res)[i].predicate = NULL;
792 set_hint_predicate (&(*res)[i].predicate, new_predicate);
794 if (!(*res)[i].predicate)
795 res->unordered_remove (i);
798 return res;
802 /* Hook that is called by cgraph.c when a node is duplicated. */
803 void
804 ipa_fn_summary_t::duplicate (cgraph_node *src,
805 cgraph_node *dst,
806 ipa_fn_summary *src_info,
807 ipa_fn_summary *info)
809 new (info) ipa_fn_summary (*src_info);
810 /* TODO: as an optimization, we may avoid copying conditions
811 that are known to be false or true. */
812 info->conds = vec_safe_copy (info->conds);
814 clone_info *cinfo = clone_info::get (dst);
815 /* When there are any replacements in the function body, see if we can figure
816 out that something was optimized out. */
817 if (ipa_node_params_sum && cinfo && cinfo->tree_map)
819 /* Use SRC parm info since it may not be copied yet. */
820 ipa_node_params *parms_info = ipa_node_params_sum->get (src);
821 ipa_auto_call_arg_values avals;
822 int count = ipa_get_param_count (parms_info);
823 int i, j;
824 clause_t possible_truths;
825 predicate true_pred = true;
826 size_time_entry *e;
827 int optimized_out_size = 0;
828 bool inlined_to_p = false;
829 struct cgraph_edge *edge, *next;
831 info->size_time_table.release ();
832 avals.m_known_vals.safe_grow_cleared (count, true);
833 for (i = 0; i < count; i++)
835 struct ipa_replace_map *r;
837 for (j = 0; vec_safe_iterate (cinfo->tree_map, j, &r); j++)
839 if (r->parm_num == i)
841 avals.m_known_vals[i] = r->new_tree;
842 break;
846 evaluate_conditions_for_known_args (dst, false,
847 &avals,
848 &possible_truths,
849 /* We are going to specialize,
850 so ignore nonspec truths. */
851 NULL);
853 info->account_size_time (0, 0, true_pred, true_pred);
855 /* Remap size_time vectors.
856 Simplify the predicate by pruning out alternatives that are known
857 to be false.
858 TODO: as on optimization, we can also eliminate conditions known
859 to be true. */
860 for (i = 0; src_info->size_time_table.iterate (i, &e); i++)
862 predicate new_exec_pred;
863 predicate new_nonconst_pred;
864 new_exec_pred = e->exec_predicate.remap_after_duplication
865 (possible_truths);
866 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
867 (possible_truths);
868 if (new_exec_pred == false || new_nonconst_pred == false)
869 optimized_out_size += e->size;
870 else
871 info->account_size_time (e->size, e->time, new_exec_pred,
872 new_nonconst_pred);
875 /* Remap edge predicates with the same simplification as above.
876 Also copy constantness arrays. */
877 for (edge = dst->callees; edge; edge = next)
879 predicate new_predicate;
880 class ipa_call_summary *es = ipa_call_summaries->get (edge);
881 next = edge->next_callee;
883 if (!edge->inline_failed)
884 inlined_to_p = true;
885 if (!es->predicate)
886 continue;
887 new_predicate = es->predicate->remap_after_duplication
888 (possible_truths);
889 if (new_predicate == false && *es->predicate != false)
890 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
891 edge_set_predicate (edge, &new_predicate);
894 /* Remap indirect edge predicates with the same simplification as above.
895 Also copy constantness arrays. */
896 for (edge = dst->indirect_calls; edge; edge = next)
898 predicate new_predicate;
899 class ipa_call_summary *es = ipa_call_summaries->get (edge);
900 next = edge->next_callee;
902 gcc_checking_assert (edge->inline_failed);
903 if (!es->predicate)
904 continue;
905 new_predicate = es->predicate->remap_after_duplication
906 (possible_truths);
907 if (new_predicate == false && *es->predicate != false)
908 optimized_out_size
909 += es->call_stmt_size * ipa_fn_summary::size_scale;
910 edge_set_predicate (edge, &new_predicate);
912 info->loop_iterations
913 = remap_freqcounting_preds_after_dup (info->loop_iterations,
914 possible_truths);
915 info->loop_strides
916 = remap_freqcounting_preds_after_dup (info->loop_strides,
917 possible_truths);
918 if (info->builtin_constant_p_parms.length())
920 vec <int, va_heap, vl_ptr> parms = info->builtin_constant_p_parms;
921 int ip;
922 info->builtin_constant_p_parms = vNULL;
923 for (i = 0; parms.iterate (i, &ip); i++)
924 if (!avals.m_known_vals[ip])
925 info->builtin_constant_p_parms.safe_push (ip);
928 /* If inliner or someone after inliner will ever start producing
929 non-trivial clones, we will get trouble with lack of information
930 about updating self sizes, because size vectors already contains
931 sizes of the callees. */
932 gcc_assert (!inlined_to_p || !optimized_out_size);
934 else
936 info->size_time_table = src_info->size_time_table.copy ();
937 info->loop_iterations = vec_safe_copy (src_info->loop_iterations);
938 info->loop_strides = vec_safe_copy (info->loop_strides);
940 info->builtin_constant_p_parms
941 = info->builtin_constant_p_parms.copy ();
943 ipa_freqcounting_predicate *f;
944 for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
946 predicate p = *f->predicate;
947 f->predicate = NULL;
948 set_hint_predicate (&f->predicate, p);
950 for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
952 predicate p = *f->predicate;
953 f->predicate = NULL;
954 set_hint_predicate (&f->predicate, p);
957 if (!dst->inlined_to)
958 ipa_update_overall_fn_summary (dst);
962 /* Hook that is called by cgraph.c when a node is duplicated. */
964 void
965 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
966 struct cgraph_edge *dst,
967 class ipa_call_summary *srcinfo,
968 class ipa_call_summary *info)
970 new (info) ipa_call_summary (*srcinfo);
971 info->predicate = NULL;
972 edge_set_predicate (dst, srcinfo->predicate);
973 info->param = srcinfo->param.copy ();
974 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
976 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
977 - eni_size_weights.call_cost);
978 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
979 - eni_time_weights.call_cost);
983 /* Dump edge summaries associated to NODE and recursively to all clones.
984 Indent by INDENT. */
986 static void
987 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
988 class ipa_fn_summary *info)
990 struct cgraph_edge *edge;
991 for (edge = node->callees; edge; edge = edge->next_callee)
993 class ipa_call_summary *es = ipa_call_summaries->get (edge);
994 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
995 int i;
997 fprintf (f,
998 "%*s%s %s\n%*s freq:%4.2f",
999 indent, "", callee->dump_name (),
1000 !edge->inline_failed
1001 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1002 indent, "", edge->sreal_frequency ().to_double ());
1004 if (cross_module_call_p (edge))
1005 fprintf (f, " cross module");
1007 if (es)
1008 fprintf (f, " loop depth:%2i size:%2i time: %2i",
1009 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
1011 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
1012 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1013 if (s != NULL)
1014 fprintf (f, " callee size:%2i stack:%2i",
1015 (int) (ss->size / ipa_fn_summary::size_scale),
1016 (int) s->estimated_stack_size);
1018 if (es && es->predicate)
1020 fprintf (f, " predicate: ");
1021 es->predicate->dump (f, info->conds);
1023 else
1024 fprintf (f, "\n");
1025 if (es && es->param.exists ())
1026 for (i = 0; i < (int) es->param.length (); i++)
1028 int prob = es->param[i].change_prob;
1030 if (!prob)
1031 fprintf (f, "%*s op%i is compile time invariant\n",
1032 indent + 2, "", i);
1033 else if (prob != REG_BR_PROB_BASE)
1034 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1035 prob * 100.0 / REG_BR_PROB_BASE);
1036 if (es->param[i].points_to_local_or_readonly_memory)
1037 fprintf (f, "%*s op%i points to local or readonly memory\n",
1038 indent + 2, "", i);
1040 if (!edge->inline_failed)
1042 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1043 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
1044 indent + 2, "",
1045 (int) ipa_get_stack_frame_offset (callee),
1046 (int) ss->estimated_self_stack_size);
1047 dump_ipa_call_summary (f, indent + 2, callee, info);
1050 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1052 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1053 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1054 " time: %2i",
1055 indent, "",
1056 es->loop_depth,
1057 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1058 es->call_stmt_time);
1059 if (es->predicate)
1061 fprintf (f, "predicate: ");
1062 es->predicate->dump (f, info->conds);
1064 else
1065 fprintf (f, "\n");
1070 void
1071 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1073 if (node->definition)
1075 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1076 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1077 if (s != NULL)
1079 size_time_entry *e;
1080 int i;
1081 fprintf (f, "IPA function summary for %s", node->dump_name ());
1082 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1083 fprintf (f, " always_inline");
1084 if (s->inlinable)
1085 fprintf (f, " inlinable");
1086 if (s->fp_expressions)
1087 fprintf (f, " fp_expression");
1088 if (s->builtin_constant_p_parms.length ())
1090 fprintf (f, " builtin_constant_p_parms");
1091 for (unsigned int i = 0;
1092 i < s->builtin_constant_p_parms.length (); i++)
1093 fprintf (f, " %i", s->builtin_constant_p_parms[i]);
1095 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1096 fprintf (f, " self size: %i\n", ss->self_size);
1097 fprintf (f, " global size: %i\n", ss->size);
1098 fprintf (f, " min size: %i\n", s->min_size);
1099 fprintf (f, " self stack: %i\n",
1100 (int) ss->estimated_self_stack_size);
1101 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1102 if (s->growth)
1103 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1104 if (s->scc_no)
1105 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1106 for (i = 0; s->size_time_table.iterate (i, &e); i++)
1108 fprintf (f, " size:%f, time:%f",
1109 (double) e->size / ipa_fn_summary::size_scale,
1110 e->time.to_double ());
1111 if (e->exec_predicate != true)
1113 fprintf (f, ", executed if:");
1114 e->exec_predicate.dump (f, s->conds, 0);
1116 if (e->exec_predicate != e->nonconst_predicate)
1118 fprintf (f, ", nonconst if:");
1119 e->nonconst_predicate.dump (f, s->conds, 0);
1121 fprintf (f, "\n");
1123 ipa_freqcounting_predicate *fcp;
1124 bool first_fcp = true;
1125 for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
1127 if (first_fcp)
1129 fprintf (f, " loop iterations:");
1130 first_fcp = false;
1132 fprintf (f, " %3.2f for ", fcp->freq.to_double ());
1133 fcp->predicate->dump (f, s->conds);
1135 first_fcp = true;
1136 for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
1138 if (first_fcp)
1140 fprintf (f, " loop strides:");
1141 first_fcp = false;
1143 fprintf (f, " %3.2f for :", fcp->freq.to_double ());
1144 fcp->predicate->dump (f, s->conds);
1146 fprintf (f, " calls:\n");
1147 dump_ipa_call_summary (f, 4, node, s);
1148 fprintf (f, "\n");
1150 else
1151 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1155 DEBUG_FUNCTION void
1156 ipa_debug_fn_summary (struct cgraph_node *node)
1158 ipa_dump_fn_summary (stderr, node);
1161 void
1162 ipa_dump_fn_summaries (FILE *f)
1164 struct cgraph_node *node;
1166 FOR_EACH_DEFINED_FUNCTION (node)
1167 if (!node->inlined_to)
1168 ipa_dump_fn_summary (f, node);
1171 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1172 boolean variable pointed to by DATA. */
1174 static bool
1175 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1176 void *data)
1178 bool *b = (bool *) data;
1179 *b = true;
1180 return true;
1183 /* If OP refers to value of function parameter, return the corresponding
1184 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1185 PARM_DECL) will be stored to *SIZE_P in that case too. */
1187 static tree
1188 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1189 poly_int64 *size_p)
1191 /* SSA_NAME referring to parm default def? */
1192 if (TREE_CODE (op) == SSA_NAME
1193 && SSA_NAME_IS_DEFAULT_DEF (op)
1194 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1196 if (size_p)
1197 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1198 return SSA_NAME_VAR (op);
1200 /* Non-SSA parm reference? */
1201 if (TREE_CODE (op) == PARM_DECL
1202 && fbi->aa_walk_budget > 0)
1204 bool modified = false;
1206 ao_ref refd;
1207 ao_ref_init (&refd, op);
1208 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1209 mark_modified, &modified, NULL, NULL,
1210 fbi->aa_walk_budget);
1211 if (walked < 0)
1213 fbi->aa_walk_budget = 0;
1214 return NULL_TREE;
1216 fbi->aa_walk_budget -= walked;
1217 if (!modified)
1219 if (size_p)
1220 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1221 return op;
1224 return NULL_TREE;
1227 /* If OP refers to value of function parameter, return the corresponding
1228 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1229 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1230 stored to *SIZE_P in that case too. */
1232 static tree
1233 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1234 poly_int64 *size_p)
1236 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1237 if (res)
1238 return res;
1240 if (TREE_CODE (op) == SSA_NAME
1241 && !SSA_NAME_IS_DEFAULT_DEF (op)
1242 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1243 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1244 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1245 size_p);
1246 return NULL_TREE;
1249 /* If OP refers to a value of a function parameter or value loaded from an
1250 aggregate passed to a parameter (either by value or reference), return TRUE
1251 and store the number of the parameter to *INDEX_P, the access size into
1252 *SIZE_P, and information whether and how it has been loaded from an
1253 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1254 statement in which OP is used or loaded. */
1256 static bool
1257 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1258 gimple *stmt, tree op, int *index_p,
1259 poly_int64 *size_p,
1260 struct agg_position_info *aggpos)
1262 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1264 gcc_checking_assert (aggpos);
1265 if (res)
1267 *index_p = ipa_get_param_decl_index (fbi->info, res);
1268 if (*index_p < 0)
1269 return false;
1270 aggpos->agg_contents = false;
1271 aggpos->by_ref = false;
1272 return true;
1275 if (TREE_CODE (op) == SSA_NAME)
1277 if (SSA_NAME_IS_DEFAULT_DEF (op)
1278 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1279 return false;
1280 stmt = SSA_NAME_DEF_STMT (op);
1281 op = gimple_assign_rhs1 (stmt);
1282 if (!REFERENCE_CLASS_P (op))
1283 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1284 aggpos);
1287 aggpos->agg_contents = true;
1288 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1289 stmt, op, index_p, &aggpos->offset,
1290 size_p, &aggpos->by_ref);
1293 /* See if statement might disappear after inlining.
1294 0 - means not eliminated
1295 1 - half of statements goes away
1296 2 - for sure it is eliminated.
1297 We are not terribly sophisticated, basically looking for simple abstraction
1298 penalty wrappers. */
1300 static int
1301 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1303 enum gimple_code code = gimple_code (stmt);
1304 enum tree_code rhs_code;
1306 if (!optimize)
1307 return 0;
1309 switch (code)
1311 case GIMPLE_RETURN:
1312 return 2;
1313 case GIMPLE_ASSIGN:
1314 if (gimple_num_ops (stmt) != 2)
1315 return 0;
1317 rhs_code = gimple_assign_rhs_code (stmt);
1319 /* Casts of parameters, loads from parameters passed by reference
1320 and stores to return value or parameters are often free after
1321 inlining due to SRA and further combining.
1322 Assume that half of statements goes away. */
1323 if (CONVERT_EXPR_CODE_P (rhs_code)
1324 || rhs_code == VIEW_CONVERT_EXPR
1325 || rhs_code == ADDR_EXPR
1326 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1328 tree rhs = gimple_assign_rhs1 (stmt);
1329 tree lhs = gimple_assign_lhs (stmt);
1330 tree inner_rhs = get_base_address (rhs);
1331 tree inner_lhs = get_base_address (lhs);
1332 bool rhs_free = false;
1333 bool lhs_free = false;
1335 if (!inner_rhs)
1336 inner_rhs = rhs;
1337 if (!inner_lhs)
1338 inner_lhs = lhs;
1340 /* Reads of parameter are expected to be free. */
1341 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1342 rhs_free = true;
1343 /* Match expressions of form &this->field. Those will most likely
1344 combine with something upstream after inlining. */
1345 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1347 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1348 if (TREE_CODE (op) == PARM_DECL)
1349 rhs_free = true;
1350 else if (TREE_CODE (op) == MEM_REF
1351 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1352 NULL))
1353 rhs_free = true;
1356 /* When parameter is not SSA register because its address is taken
1357 and it is just copied into one, the statement will be completely
1358 free after inlining (we will copy propagate backward). */
1359 if (rhs_free && is_gimple_reg (lhs))
1360 return 2;
1362 /* Reads of parameters passed by reference
1363 expected to be free (i.e. optimized out after inlining). */
1364 if (TREE_CODE (inner_rhs) == MEM_REF
1365 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1366 rhs_free = true;
1368 /* Copying parameter passed by reference into gimple register is
1369 probably also going to copy propagate, but we can't be quite
1370 sure. */
1371 if (rhs_free && is_gimple_reg (lhs))
1372 lhs_free = true;
1374 /* Writes to parameters, parameters passed by value and return value
1375 (either directly or passed via invisible reference) are free.
1377 TODO: We ought to handle testcase like
1378 struct a {int a,b;};
1379 struct a
1380 returnstruct (void)
1382 struct a a ={1,2};
1383 return a;
1386 This translate into:
1388 returnstruct ()
1390 int a$b;
1391 int a$a;
1392 struct a a;
1393 struct a D.2739;
1395 <bb 2>:
1396 D.2739.a = 1;
1397 D.2739.b = 2;
1398 return D.2739;
1401 For that we either need to copy ipa-split logic detecting writes
1402 to return value. */
1403 if (TREE_CODE (inner_lhs) == PARM_DECL
1404 || TREE_CODE (inner_lhs) == RESULT_DECL
1405 || (TREE_CODE (inner_lhs) == MEM_REF
1406 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1407 NULL)
1408 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1409 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1410 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1411 (inner_lhs,
1412 0))) == RESULT_DECL))))
1413 lhs_free = true;
1414 if (lhs_free
1415 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1416 rhs_free = true;
1417 if (lhs_free && rhs_free)
1418 return 1;
1420 return 0;
1421 default:
1422 return 0;
1426 /* Analyze EXPR if it represents a series of simple operations performed on
1427 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1428 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1429 Type of the parameter or load from an aggregate via the parameter is
1430 stored in *TYPE_P. Operations on the parameter are recorded to
1431 PARAM_OPS_P if it is not NULL. */
1433 static bool
1434 decompose_param_expr (struct ipa_func_body_info *fbi,
1435 gimple *stmt, tree expr,
1436 int *index_p, tree *type_p,
1437 struct agg_position_info *aggpos,
1438 expr_eval_ops *param_ops_p = NULL)
1440 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1441 int op_count = 0;
1443 if (param_ops_p)
1444 *param_ops_p = NULL;
1446 while (true)
1448 expr_eval_op eval_op;
1449 unsigned rhs_count;
1450 unsigned cst_count = 0;
1452 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1453 aggpos))
1455 tree type = TREE_TYPE (expr);
1457 if (aggpos->agg_contents)
1459 /* Stop if containing bit-field. */
1460 if (TREE_CODE (expr) == BIT_FIELD_REF
1461 || contains_bitfld_component_ref_p (expr))
1462 break;
1465 *type_p = type;
1466 return true;
1469 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1470 break;
1472 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1473 break;
1475 switch (gimple_assign_rhs_class (stmt))
1477 case GIMPLE_SINGLE_RHS:
1478 expr = gimple_assign_rhs1 (stmt);
1479 continue;
1481 case GIMPLE_UNARY_RHS:
1482 rhs_count = 1;
1483 break;
1485 case GIMPLE_BINARY_RHS:
1486 rhs_count = 2;
1487 break;
1489 case GIMPLE_TERNARY_RHS:
1490 rhs_count = 3;
1491 break;
1493 default:
1494 goto fail;
1497 /* Stop if expression is too complex. */
1498 if (op_count++ == op_limit)
1499 break;
1501 if (param_ops_p)
1503 eval_op.code = gimple_assign_rhs_code (stmt);
1504 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1505 eval_op.val[0] = NULL_TREE;
1506 eval_op.val[1] = NULL_TREE;
1509 expr = NULL_TREE;
1510 for (unsigned i = 0; i < rhs_count; i++)
1512 tree op = gimple_op (stmt, i + 1);
1514 gcc_assert (op && !TYPE_P (op));
1515 if (is_gimple_ip_invariant (op))
1517 if (++cst_count == rhs_count)
1518 goto fail;
1520 eval_op.val[cst_count - 1] = op;
1522 else if (!expr)
1524 /* Found a non-constant operand, and record its index in rhs
1525 operands. */
1526 eval_op.index = i;
1527 expr = op;
1529 else
1531 /* Found more than one non-constant operands. */
1532 goto fail;
1536 if (param_ops_p)
1537 vec_safe_insert (*param_ops_p, 0, eval_op);
1540 /* Failed to decompose, free resource and return. */
1541 fail:
1542 if (param_ops_p)
1543 vec_free (*param_ops_p);
1545 return false;
1548 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1550 static void
1551 add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
1553 int ip;
1555 /* Avoid duplicates. */
1556 for (unsigned int i = 0;
1557 summary->builtin_constant_p_parms.iterate (i, &ip); i++)
1558 if (ip == parm)
1559 return;
1560 summary->builtin_constant_p_parms.safe_push (parm);
1563 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1564 predicates to the CFG edges. */
1566 static void
1567 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1568 class ipa_fn_summary *summary,
1569 class ipa_node_params *params_summary,
1570 basic_block bb)
1572 gimple *last;
1573 tree op, op2;
1574 int index;
1575 struct agg_position_info aggpos;
1576 enum tree_code code, inverted_code;
1577 edge e;
1578 edge_iterator ei;
1579 gimple *set_stmt;
1580 tree param_type;
1581 expr_eval_ops param_ops;
1583 last = last_stmt (bb);
1584 if (!last || gimple_code (last) != GIMPLE_COND)
1585 return;
1586 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1587 return;
1588 op = gimple_cond_lhs (last);
1590 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1591 &param_ops))
1593 code = gimple_cond_code (last);
1594 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1596 FOR_EACH_EDGE (e, ei, bb->succs)
1598 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1599 ? code : inverted_code);
1600 /* invert_tree_comparison will return ERROR_MARK on FP
1601 comparisons that are not EQ/NE instead of returning proper
1602 unordered one. Be sure it is not confused with NON_CONSTANT.
1604 And if the edge's target is the final block of diamond CFG graph
1605 of this conditional statement, we do not need to compute
1606 predicate for the edge because the final block's predicate must
1607 be at least as that of the first block of the statement. */
1608 if (this_code != ERROR_MARK
1609 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1611 predicate p
1612 = add_condition (summary, params_summary, index,
1613 param_type, &aggpos,
1614 this_code, gimple_cond_rhs (last), param_ops);
1615 e->aux = edge_predicate_pool.allocate ();
1616 *(predicate *) e->aux = p;
1619 vec_free (param_ops);
1622 if (TREE_CODE (op) != SSA_NAME)
1623 return;
1624 /* Special case
1625 if (builtin_constant_p (op))
1626 constant_code
1627 else
1628 nonconstant_code.
1629 Here we can predicate nonconstant_code. We can't
1630 really handle constant_code since we have no predicate
1631 for this and also the constant code is not known to be
1632 optimized away when inliner doesn't see operand is constant.
1633 Other optimizers might think otherwise. */
1634 if (gimple_cond_code (last) != NE_EXPR
1635 || !integer_zerop (gimple_cond_rhs (last)))
1636 return;
1637 set_stmt = SSA_NAME_DEF_STMT (op);
1638 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1639 || gimple_call_num_args (set_stmt) != 1)
1640 return;
1641 op2 = gimple_call_arg (set_stmt, 0);
1642 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1643 return;
1644 if (!aggpos.by_ref)
1645 add_builtin_constant_p_parm (summary, index);
1646 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1648 predicate p = add_condition (summary, params_summary, index,
1649 param_type, &aggpos,
1650 predicate::is_not_constant, NULL_TREE);
1651 e->aux = edge_predicate_pool.allocate ();
1652 *(predicate *) e->aux = p;
1657 /* If BB ends by a switch we can turn into predicates, attach corresponding
1658 predicates to the CFG edges. */
1660 static void
1661 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1662 class ipa_fn_summary *summary,
1663 class ipa_node_params *params_summary,
1664 basic_block bb)
1666 gimple *lastg;
1667 tree op;
1668 int index;
1669 struct agg_position_info aggpos;
1670 edge e;
1671 edge_iterator ei;
1672 size_t n;
1673 size_t case_idx;
1674 tree param_type;
1675 expr_eval_ops param_ops;
1677 lastg = last_stmt (bb);
1678 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1679 return;
1680 gswitch *last = as_a <gswitch *> (lastg);
1681 op = gimple_switch_index (last);
1682 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1683 &param_ops))
1684 return;
1686 auto_vec<std::pair<tree, tree> > ranges;
1687 tree type = TREE_TYPE (op);
1688 int bound_limit = opt_for_fn (fbi->node->decl,
1689 param_ipa_max_switch_predicate_bounds);
1690 int bound_count = 0;
1691 value_range vr;
1693 get_range_query (cfun)->range_of_expr (vr, op);
1694 if (vr.undefined_p ())
1695 vr.set_varying (TREE_TYPE (op));
1696 value_range_kind vr_type = vr.kind ();
1697 wide_int vr_wmin = wi::to_wide (vr.min ());
1698 wide_int vr_wmax = wi::to_wide (vr.max ());
1700 FOR_EACH_EDGE (e, ei, bb->succs)
1702 e->aux = edge_predicate_pool.allocate ();
1703 *(predicate *) e->aux = false;
1706 e = gimple_switch_edge (cfun, last, 0);
1707 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1708 default case if its target basic block is in convergence point of all
1709 switch cases, which can be determined by checking whether it
1710 post-dominates the switch statement. */
1711 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1712 bound_count = INT_MAX;
1714 n = gimple_switch_num_labels (last);
1715 for (case_idx = 1; case_idx < n; ++case_idx)
1717 tree cl = gimple_switch_label (last, case_idx);
1718 tree min = CASE_LOW (cl);
1719 tree max = CASE_HIGH (cl);
1720 predicate p;
1722 e = gimple_switch_edge (cfun, last, case_idx);
1724 /* The case value might not have same type as switch expression,
1725 extend the value based on the expression type. */
1726 if (TREE_TYPE (min) != type)
1727 min = wide_int_to_tree (type, wi::to_wide (min));
1729 if (!max)
1730 max = min;
1731 else if (TREE_TYPE (max) != type)
1732 max = wide_int_to_tree (type, wi::to_wide (max));
1734 /* The case's target basic block is in convergence point of all switch
1735 cases, its predicate should be at least as that of the switch
1736 statement. */
1737 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1738 p = true;
1739 else if (min == max)
1740 p = add_condition (summary, params_summary, index, param_type,
1741 &aggpos, EQ_EXPR, min, param_ops);
1742 else
1744 predicate p1, p2;
1745 p1 = add_condition (summary, params_summary, index, param_type,
1746 &aggpos, GE_EXPR, min, param_ops);
1747 p2 = add_condition (summary, params_summary,index, param_type,
1748 &aggpos, LE_EXPR, max, param_ops);
1749 p = p1 & p2;
1751 *(class predicate *) e->aux
1752 = p.or_with (summary->conds, *(class predicate *) e->aux);
1754 /* If there are too many disjoint case ranges, predicate for default
1755 case might become too complicated. So add a limit here. */
1756 if (bound_count > bound_limit)
1757 continue;
1759 bool new_range = true;
1761 if (!ranges.is_empty ())
1763 wide_int curr_wmin = wi::to_wide (min);
1764 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1766 /* Merge case ranges if they are continuous. */
1767 if (curr_wmin == last_wmax + 1)
1768 new_range = false;
1769 else if (vr_type == VR_ANTI_RANGE)
1771 /* If two disjoint case ranges can be connected by anti-range
1772 of switch index, combine them to one range. */
1773 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1774 vr_type = VR_UNDEFINED;
1775 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1776 new_range = false;
1780 /* Create/extend a case range. And we count endpoints of range set,
1781 this number nearly equals to number of conditions that we will create
1782 for predicate of default case. */
1783 if (new_range)
1785 bound_count += (min == max) ? 1 : 2;
1786 ranges.safe_push (std::make_pair (min, max));
1788 else
1790 bound_count += (ranges.last ().first == ranges.last ().second);
1791 ranges.last ().second = max;
1795 e = gimple_switch_edge (cfun, last, 0);
1796 if (bound_count > bound_limit)
1798 *(class predicate *) e->aux = true;
1799 vec_free (param_ops);
1800 return;
1803 predicate p_seg = true;
1804 predicate p_all = false;
1806 if (vr_type != VR_RANGE)
1808 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1809 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1812 /* Construct predicate to represent default range set that is negation of
1813 all case ranges. Case range is classified as containing single/non-single
1814 values. Suppose a piece of case ranges in the following.
1816 [D1...D2] [S1] ... [Sn] [D3...D4]
1818 To represent default case's range sets between two non-single value
1819 case ranges (From D2 to D3), we construct predicate as:
1821 D2 < x < D3 && x != S1 && ... && x != Sn
1823 for (size_t i = 0; i < ranges.length (); i++)
1825 tree min = ranges[i].first;
1826 tree max = ranges[i].second;
1828 if (min == max)
1829 p_seg &= add_condition (summary, params_summary, index,
1830 param_type, &aggpos, NE_EXPR,
1831 min, param_ops);
1832 else
1834 /* Do not create sub-predicate for range that is beyond low bound
1835 of switch index. */
1836 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1838 p_seg &= add_condition (summary, params_summary, index,
1839 param_type, &aggpos,
1840 LT_EXPR, min, param_ops);
1841 p_all = p_all.or_with (summary->conds, p_seg);
1844 /* Do not create sub-predicate for range that is beyond up bound
1845 of switch index. */
1846 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1848 p_seg = false;
1849 break;
1852 p_seg = add_condition (summary, params_summary, index,
1853 param_type, &aggpos, GT_EXPR,
1854 max, param_ops);
1858 p_all = p_all.or_with (summary->conds, p_seg);
1859 *(class predicate *) e->aux
1860 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1862 vec_free (param_ops);
1866 /* For each BB in NODE attach to its AUX pointer predicate under
1867 which it is executable. */
1869 static void
1870 compute_bb_predicates (struct ipa_func_body_info *fbi,
1871 struct cgraph_node *node,
1872 class ipa_fn_summary *summary,
1873 class ipa_node_params *params_summary)
1875 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1876 bool done = false;
1877 basic_block bb;
1879 FOR_EACH_BB_FN (bb, my_function)
1881 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1882 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1885 /* Entry block is always executable. */
1886 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1887 = edge_predicate_pool.allocate ();
1888 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1890 /* A simple dataflow propagation of predicates forward in the CFG.
1891 TODO: work in reverse postorder. */
1892 while (!done)
1894 done = true;
1895 FOR_EACH_BB_FN (bb, my_function)
1897 predicate p = false;
1898 edge e;
1899 edge_iterator ei;
1900 FOR_EACH_EDGE (e, ei, bb->preds)
1902 if (e->src->aux)
1904 predicate this_bb_predicate
1905 = *(predicate *) e->src->aux;
1906 if (e->aux)
1907 this_bb_predicate &= (*(class predicate *) e->aux);
1908 p = p.or_with (summary->conds, this_bb_predicate);
1909 if (p == true)
1910 break;
1913 if (p != false)
1915 basic_block pdom_bb;
1917 if (!bb->aux)
1919 done = false;
1920 bb->aux = edge_predicate_pool.allocate ();
1921 *((predicate *) bb->aux) = p;
1923 else if (p != *(predicate *) bb->aux)
1925 /* This OR operation is needed to ensure monotonous data flow
1926 in the case we hit the limit on number of clauses and the
1927 and/or operations above give approximate answers. */
1928 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1929 if (p != *(predicate *) bb->aux)
1931 done = false;
1932 *((predicate *) bb->aux) = p;
1936 /* For switch/if statement, we can OR-combine predicates of all
1937 its cases/branches to get predicate for basic block in their
1938 convergence point, but sometimes this will generate very
1939 complicated predicate. Actually, we can get simplified
1940 predicate in another way by using the fact that predicate
1941 for a basic block must also hold true for its post dominators.
1942 To be specific, basic block in convergence point of
1943 conditional statement should include predicate of the
1944 statement. */
1945 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1946 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1948 else if (!pdom_bb->aux)
1950 done = false;
1951 pdom_bb->aux = edge_predicate_pool.allocate ();
1952 *((predicate *) pdom_bb->aux) = p;
1954 else if (p != *(predicate *) pdom_bb->aux)
1956 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1957 if (p != *(predicate *) pdom_bb->aux)
1959 done = false;
1960 *((predicate *) pdom_bb->aux) = p;
1969 /* Return predicate specifying when the STMT might have result that is not
1970 a compile time constant. */
1972 static predicate
1973 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1974 class ipa_fn_summary *summary,
1975 class ipa_node_params *params_summary,
1976 tree expr,
1977 vec<predicate> nonconstant_names)
1979 tree parm;
1980 int index;
1982 while (UNARY_CLASS_P (expr))
1983 expr = TREE_OPERAND (expr, 0);
1985 parm = unmodified_parm (fbi, NULL, expr, NULL);
1986 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1987 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1988 predicate::changed, NULL_TREE);
1989 if (is_gimple_min_invariant (expr))
1990 return false;
1991 if (TREE_CODE (expr) == SSA_NAME)
1992 return nonconstant_names[SSA_NAME_VERSION (expr)];
1993 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1995 predicate p1
1996 = will_be_nonconstant_expr_predicate (fbi, summary,
1997 params_summary,
1998 TREE_OPERAND (expr, 0),
1999 nonconstant_names);
2000 if (p1 == true)
2001 return p1;
2003 predicate p2
2004 = will_be_nonconstant_expr_predicate (fbi, summary,
2005 params_summary,
2006 TREE_OPERAND (expr, 1),
2007 nonconstant_names);
2008 return p1.or_with (summary->conds, p2);
2010 else if (TREE_CODE (expr) == COND_EXPR)
2012 predicate p1
2013 = will_be_nonconstant_expr_predicate (fbi, summary,
2014 params_summary,
2015 TREE_OPERAND (expr, 0),
2016 nonconstant_names);
2017 if (p1 == true)
2018 return p1;
2020 predicate p2
2021 = will_be_nonconstant_expr_predicate (fbi, summary,
2022 params_summary,
2023 TREE_OPERAND (expr, 1),
2024 nonconstant_names);
2025 if (p2 == true)
2026 return p2;
2027 p1 = p1.or_with (summary->conds, p2);
2028 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
2029 params_summary,
2030 TREE_OPERAND (expr, 2),
2031 nonconstant_names);
2032 return p2.or_with (summary->conds, p1);
2034 else if (TREE_CODE (expr) == CALL_EXPR)
2035 return true;
2036 else
2038 debug_tree (expr);
2039 gcc_unreachable ();
2041 return false;
2045 /* Return predicate specifying when the STMT might have result that is not
2046 a compile time constant. */
2048 static predicate
2049 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2050 class ipa_fn_summary *summary,
2051 class ipa_node_params *params_summary,
2052 gimple *stmt,
2053 vec<predicate> nonconstant_names)
2055 predicate p = true;
2056 ssa_op_iter iter;
2057 tree use;
2058 tree param_type = NULL_TREE;
2059 predicate op_non_const;
2060 bool is_load;
2061 int base_index;
2062 struct agg_position_info aggpos;
2064 /* What statements might be optimized away
2065 when their arguments are constant. */
2066 if (gimple_code (stmt) != GIMPLE_ASSIGN
2067 && gimple_code (stmt) != GIMPLE_COND
2068 && gimple_code (stmt) != GIMPLE_SWITCH
2069 && (gimple_code (stmt) != GIMPLE_CALL
2070 || !(gimple_call_flags (stmt) & ECF_CONST)))
2071 return p;
2073 /* Stores will stay anyway. */
2074 if (gimple_store_p (stmt))
2075 return p;
2077 is_load = gimple_assign_load_p (stmt);
2079 /* Loads can be optimized when the value is known. */
2080 if (is_load)
2082 tree op = gimple_assign_rhs1 (stmt);
2083 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
2084 &aggpos))
2085 return p;
2087 else
2088 base_index = -1;
2090 /* See if we understand all operands before we start
2091 adding conditionals. */
2092 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2094 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2095 /* For arguments we can build a condition. */
2096 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2097 continue;
2098 if (TREE_CODE (use) != SSA_NAME)
2099 return p;
2100 /* If we know when operand is constant,
2101 we still can say something useful. */
2102 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2103 continue;
2104 return p;
2107 if (is_load)
2108 op_non_const =
2109 add_condition (summary, params_summary,
2110 base_index, param_type, &aggpos,
2111 predicate::changed, NULL_TREE);
2112 else
2113 op_non_const = false;
2114 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2116 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2117 int index;
2119 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2121 if (index != base_index)
2122 p = add_condition (summary, params_summary, index,
2123 TREE_TYPE (parm), NULL,
2124 predicate::changed, NULL_TREE);
2125 else
2126 continue;
2128 else
2129 p = nonconstant_names[SSA_NAME_VERSION (use)];
2130 op_non_const = p.or_with (summary->conds, op_non_const);
2132 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2133 && gimple_op (stmt, 0)
2134 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2135 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2136 = op_non_const;
2137 return op_non_const;
2140 struct record_modified_bb_info
2142 tree op;
2143 bitmap bb_set;
2144 gimple *stmt;
2147 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2148 probability how often it changes between USE_BB.
2149 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2150 is in different loop nest, we can do better.
2151 This is all just estimate. In theory we look for minimal cut separating
2152 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2153 anyway. */
2155 static basic_block
2156 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2158 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2159 if (l && l->header->count < init_bb->count)
2160 return l->header;
2161 return init_bb;
2164 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2165 set except for info->stmt. */
2167 static bool
2168 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2170 struct record_modified_bb_info *info =
2171 (struct record_modified_bb_info *) data;
2172 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2173 return false;
2174 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2175 return false;
2176 bitmap_set_bit (info->bb_set,
2177 SSA_NAME_IS_DEFAULT_DEF (vdef)
2178 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2179 : get_minimal_bb
2180 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2181 gimple_bb (info->stmt))->index);
2182 if (dump_file)
2184 fprintf (dump_file, " Param ");
2185 print_generic_expr (dump_file, info->op, TDF_SLIM);
2186 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2187 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2188 get_minimal_bb
2189 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2190 gimple_bb (info->stmt))->index);
2191 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2193 return false;
2196 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2197 will change since last invocation of STMT.
2199 Value 0 is reserved for compile time invariants.
2200 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2201 ought to be REG_BR_PROB_BASE / estimated_iters. */
2203 static int
2204 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2206 tree op = gimple_call_arg (stmt, i);
2207 basic_block bb = gimple_bb (stmt);
2209 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2210 op = TREE_OPERAND (op, 0);
2212 tree base = get_base_address (op);
2214 /* Global invariants never change. */
2215 if (is_gimple_min_invariant (base))
2216 return 0;
2218 /* We would have to do non-trivial analysis to really work out what
2219 is the probability of value to change (i.e. when init statement
2220 is in a sibling loop of the call).
2222 We do an conservative estimate: when call is executed N times more often
2223 than the statement defining value, we take the frequency 1/N. */
2224 if (TREE_CODE (base) == SSA_NAME)
2226 profile_count init_count;
2228 if (!bb->count.nonzero_p ())
2229 return REG_BR_PROB_BASE;
2231 if (SSA_NAME_IS_DEFAULT_DEF (base))
2232 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2233 else
2234 init_count = get_minimal_bb
2235 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2236 gimple_bb (stmt))->count;
2238 if (init_count < bb->count)
2239 return MAX ((init_count.to_sreal_scale (bb->count)
2240 * REG_BR_PROB_BASE).to_int (), 1);
2241 return REG_BR_PROB_BASE;
2243 else
2245 ao_ref refd;
2246 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2247 struct record_modified_bb_info info;
2248 tree init = ctor_for_folding (base);
2250 if (init != error_mark_node)
2251 return 0;
2252 if (!bb->count.nonzero_p () || fbi->aa_walk_budget == 0)
2253 return REG_BR_PROB_BASE;
2254 if (dump_file)
2256 fprintf (dump_file, " Analyzing param change probability of ");
2257 print_generic_expr (dump_file, op, TDF_SLIM);
2258 fprintf (dump_file, "\n");
2260 ao_ref_init (&refd, op);
2261 info.op = op;
2262 info.stmt = stmt;
2263 info.bb_set = BITMAP_ALLOC (NULL);
2264 int walked
2265 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2266 NULL, NULL, fbi->aa_walk_budget);
2267 if (walked > 0)
2268 fbi->aa_walk_budget -= walked;
2269 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2271 if (walked < 0)
2272 fbi->aa_walk_budget = 0;
2273 if (dump_file)
2275 if (walked < 0)
2276 fprintf (dump_file, " Ran out of AA walking budget.\n");
2277 else
2278 fprintf (dump_file, " Set in same BB as used.\n");
2280 BITMAP_FREE (info.bb_set);
2281 return REG_BR_PROB_BASE;
2284 bitmap_iterator bi;
2285 unsigned index;
2286 /* Lookup the most frequent update of the value and believe that
2287 it dominates all the other; precise analysis here is difficult. */
2288 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2289 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2290 if (dump_file)
2292 fprintf (dump_file, " Set with count ");
2293 max.dump (dump_file);
2294 fprintf (dump_file, " and used with count ");
2295 bb->count.dump (dump_file);
2296 fprintf (dump_file, " freq %f\n",
2297 max.to_sreal_scale (bb->count).to_double ());
2300 BITMAP_FREE (info.bb_set);
2301 if (max < bb->count)
2302 return MAX ((max.to_sreal_scale (bb->count)
2303 * REG_BR_PROB_BASE).to_int (), 1);
2304 return REG_BR_PROB_BASE;
2308 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2309 sub-graph and if the predicate the condition depends on is known. If so,
2310 return true and store the pointer the predicate in *P. */
2312 static bool
2313 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2314 ipa_fn_summary *summary,
2315 class ipa_node_params *params_summary,
2316 basic_block bb,
2317 predicate *p,
2318 vec<predicate> nonconstant_names)
2320 edge e;
2321 edge_iterator ei;
2322 basic_block first_bb = NULL;
2323 gimple *stmt;
2325 if (single_pred_p (bb))
2327 *p = false;
2328 return true;
2331 FOR_EACH_EDGE (e, ei, bb->preds)
2333 if (single_succ_p (e->src))
2335 if (!single_pred_p (e->src))
2336 return false;
2337 if (!first_bb)
2338 first_bb = single_pred (e->src);
2339 else if (single_pred (e->src) != first_bb)
2340 return false;
2342 else
2344 if (!first_bb)
2345 first_bb = e->src;
2346 else if (e->src != first_bb)
2347 return false;
2351 if (!first_bb)
2352 return false;
2354 stmt = last_stmt (first_bb);
2355 if (!stmt
2356 || gimple_code (stmt) != GIMPLE_COND
2357 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2358 return false;
2360 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2361 gimple_cond_lhs (stmt),
2362 nonconstant_names);
2363 if (*p == true)
2364 return false;
2365 else
2366 return true;
2369 /* Given a PHI statement in a function described by inline properties SUMMARY
2370 and *P being the predicate describing whether the selected PHI argument is
2371 known, store a predicate for the result of the PHI statement into
2372 NONCONSTANT_NAMES, if possible. */
2374 static void
2375 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2376 predicate *p,
2377 vec<predicate> nonconstant_names)
2379 unsigned i;
2381 for (i = 0; i < gimple_phi_num_args (phi); i++)
2383 tree arg = gimple_phi_arg (phi, i)->def;
2384 if (!is_gimple_min_invariant (arg))
2386 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2387 *p = p->or_with (summary->conds,
2388 nonconstant_names[SSA_NAME_VERSION (arg)]);
2389 if (*p == true)
2390 return;
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2396 fprintf (dump_file, "\t\tphi predicate: ");
2397 p->dump (dump_file, summary->conds);
2399 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2402 /* For a typical usage of __builtin_expect (a<b, 1), we
2403 may introduce an extra relation stmt:
2404 With the builtin, we have
2405 t1 = a <= b;
2406 t2 = (long int) t1;
2407 t3 = __builtin_expect (t2, 1);
2408 if (t3 != 0)
2409 goto ...
2410 Without the builtin, we have
2411 if (a<=b)
2412 goto...
2413 This affects the size/time estimation and may have
2414 an impact on the earlier inlining.
2415 Here find this pattern and fix it up later. */
2417 static gimple *
2418 find_foldable_builtin_expect (basic_block bb)
2420 gimple_stmt_iterator bsi;
2422 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2424 gimple *stmt = gsi_stmt (bsi);
2425 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2426 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2427 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2429 tree var = gimple_call_lhs (stmt);
2430 tree arg = gimple_call_arg (stmt, 0);
2431 use_operand_p use_p;
2432 gimple *use_stmt;
2433 bool match = false;
2434 bool done = false;
2436 if (!var || !arg)
2437 continue;
2438 gcc_assert (TREE_CODE (var) == SSA_NAME);
2440 while (TREE_CODE (arg) == SSA_NAME)
2442 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2443 if (!is_gimple_assign (stmt_tmp))
2444 break;
2445 switch (gimple_assign_rhs_code (stmt_tmp))
2447 case LT_EXPR:
2448 case LE_EXPR:
2449 case GT_EXPR:
2450 case GE_EXPR:
2451 case EQ_EXPR:
2452 case NE_EXPR:
2453 match = true;
2454 done = true;
2455 break;
2456 CASE_CONVERT:
2457 break;
2458 default:
2459 done = true;
2460 break;
2462 if (done)
2463 break;
2464 arg = gimple_assign_rhs1 (stmt_tmp);
2467 if (match && single_imm_use (var, &use_p, &use_stmt)
2468 && gimple_code (use_stmt) == GIMPLE_COND)
2469 return use_stmt;
2472 return NULL;
2475 /* Return true when the basic blocks contains only clobbers followed by RESX.
2476 Such BBs are kept around to make removal of dead stores possible with
2477 presence of EH and will be optimized out by optimize_clobbers later in the
2478 game.
2480 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2481 that can be clobber only, too.. When it is false, the RESX is not necessary
2482 on the end of basic block. */
2484 static bool
2485 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2487 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2488 edge_iterator ei;
2489 edge e;
2491 if (need_eh)
2493 if (gsi_end_p (gsi))
2494 return false;
2495 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2496 return false;
2497 gsi_prev (&gsi);
2499 else if (!single_succ_p (bb))
2500 return false;
2502 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2504 gimple *stmt = gsi_stmt (gsi);
2505 if (is_gimple_debug (stmt))
2506 continue;
2507 if (gimple_clobber_p (stmt))
2508 continue;
2509 if (gimple_code (stmt) == GIMPLE_LABEL)
2510 break;
2511 return false;
2514 /* See if all predecessors are either throws or clobber only BBs. */
2515 FOR_EACH_EDGE (e, ei, bb->preds)
2516 if (!(e->flags & EDGE_EH)
2517 && !clobber_only_eh_bb_p (e->src, false))
2518 return false;
2520 return true;
2523 /* Return true if STMT compute a floating point expression that may be affected
2524 by -ffast-math and similar flags. */
2526 static bool
2527 fp_expression_p (gimple *stmt)
2529 ssa_op_iter i;
2530 tree op;
2532 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2533 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2534 return true;
2535 return false;
2538 /* Return true if T references memory location that is local
2539 for the function (that means, dead after return) or read-only. */
2541 bool
2542 refs_local_or_readonly_memory_p (tree t)
2544 /* Non-escaping memory is fine. */
2545 t = get_base_address (t);
2546 if ((TREE_CODE (t) == MEM_REF
2547 || TREE_CODE (t) == TARGET_MEM_REF))
2548 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2550 /* Automatic variables are fine. */
2551 if (DECL_P (t)
2552 && auto_var_in_fn_p (t, current_function_decl))
2553 return true;
2555 /* Read-only variables are fine. */
2556 if (DECL_P (t) && TREE_READONLY (t))
2557 return true;
2559 return false;
2562 /* Return true if T is a pointer pointing to memory location that is local
2563 for the function (that means, dead after return) or read-only. */
2565 bool
2566 points_to_local_or_readonly_memory_p (tree t)
2568 /* See if memory location is clearly invalid. */
2569 if (integer_zerop (t))
2570 return flag_delete_null_pointer_checks;
2571 if (TREE_CODE (t) == SSA_NAME)
2572 return !ptr_deref_may_alias_global_p (t);
2573 if (TREE_CODE (t) == ADDR_EXPR)
2574 return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2575 return false;
2579 /* Analyze function body for NODE.
2580 EARLY indicates run from early optimization pipeline. */
2582 static void
2583 analyze_function_body (struct cgraph_node *node, bool early)
2585 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2586 /* Estimate static overhead for function prologue/epilogue and alignment. */
2587 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2588 /* Benefits are scaled by probability of elimination that is in range
2589 <0,2>. */
2590 basic_block bb;
2591 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2592 sreal freq;
2593 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2594 ipa_node_params *params_summary
2595 = early ? NULL : ipa_node_params_sum->get (node);
2596 predicate bb_predicate;
2597 struct ipa_func_body_info fbi;
2598 vec<predicate> nonconstant_names = vNULL;
2599 int nblocks, n;
2600 int *order;
2601 gimple *fix_builtin_expect_stmt;
2603 gcc_assert (my_function && my_function->cfg);
2604 gcc_assert (cfun == my_function);
2606 memset(&fbi, 0, sizeof(fbi));
2607 vec_free (info->conds);
2608 info->conds = NULL;
2609 info->size_time_table.release ();
2610 info->call_size_time_table.release ();
2612 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2613 so we can produce proper inline hints.
2615 When optimizing and analyzing for early inliner, initialize node params
2616 so we can produce correct BB predicates. */
2618 if (opt_for_fn (node->decl, optimize))
2620 calculate_dominance_info (CDI_DOMINATORS);
2621 calculate_dominance_info (CDI_POST_DOMINATORS);
2622 if (!early)
2623 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2624 else
2626 ipa_check_create_node_params ();
2627 ipa_initialize_node_params (node);
2630 if (ipa_node_params_sum)
2632 fbi.node = node;
2633 fbi.info = ipa_node_params_sum->get (node);
2634 fbi.bb_infos = vNULL;
2635 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2636 fbi.param_count = count_formal_params (node->decl);
2637 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2639 nonconstant_names.safe_grow_cleared
2640 (SSANAMES (my_function)->length (), true);
2644 if (dump_file)
2645 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2646 node->dump_name ());
2648 /* When we run into maximal number of entries, we assign everything to the
2649 constant truth case. Be sure to have it in list. */
2650 bb_predicate = true;
2651 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2653 bb_predicate = predicate::not_inlined ();
2654 info->account_size_time (opt_for_fn (node->decl,
2655 param_uninlined_function_insns)
2656 * ipa_fn_summary::size_scale,
2657 opt_for_fn (node->decl,
2658 param_uninlined_function_time),
2659 bb_predicate,
2660 bb_predicate);
2662 if (fbi.info)
2663 compute_bb_predicates (&fbi, node, info, params_summary);
2664 const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2665 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2666 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2667 for (n = 0; n < nblocks; n++)
2669 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2670 freq = bb->count.to_sreal_scale (entry_count);
2671 if (clobber_only_eh_bb_p (bb))
2673 if (dump_file && (dump_flags & TDF_DETAILS))
2674 fprintf (dump_file, "\n Ignoring BB %i;"
2675 " it will be optimized away by cleanup_clobbers\n",
2676 bb->index);
2677 continue;
2680 /* TODO: Obviously predicates can be propagated down across CFG. */
2681 if (fbi.info)
2683 if (bb->aux)
2684 bb_predicate = *(predicate *) bb->aux;
2685 else
2686 bb_predicate = false;
2688 else
2689 bb_predicate = true;
2691 if (dump_file && (dump_flags & TDF_DETAILS))
2693 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2694 bb_predicate.dump (dump_file, info->conds);
2697 if (fbi.info && nonconstant_names.exists ())
2699 predicate phi_predicate;
2700 bool first_phi = true;
2702 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2703 gsi_next (&bsi))
2705 if (first_phi
2706 && !phi_result_unknown_predicate (&fbi, info,
2707 params_summary,
2709 &phi_predicate,
2710 nonconstant_names))
2711 break;
2712 first_phi = false;
2713 if (dump_file && (dump_flags & TDF_DETAILS))
2715 fprintf (dump_file, " ");
2716 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2718 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2719 nonconstant_names);
2723 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2725 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2726 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2728 gimple *stmt = gsi_stmt (bsi);
2729 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2730 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2731 int prob;
2732 predicate will_be_nonconstant;
2734 /* This relation stmt should be folded after we remove
2735 __builtin_expect call. Adjust the cost here. */
2736 if (stmt == fix_builtin_expect_stmt)
2738 this_size--;
2739 this_time--;
2742 if (dump_file && (dump_flags & TDF_DETAILS))
2744 fprintf (dump_file, " ");
2745 print_gimple_stmt (dump_file, stmt, 0);
2746 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2747 freq.to_double (), this_size,
2748 this_time);
2751 if (is_gimple_call (stmt)
2752 && !gimple_call_internal_p (stmt))
2754 struct cgraph_edge *edge = node->get_edge (stmt);
2755 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2757 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2758 resolved as constant. We however don't want to optimize
2759 out the cgraph edges. */
2760 if (nonconstant_names.exists ()
2761 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2762 && gimple_call_lhs (stmt)
2763 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2765 predicate false_p = false;
2766 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2767 = false_p;
2769 if (ipa_node_params_sum)
2771 int count = gimple_call_num_args (stmt);
2772 int i;
2774 if (count)
2775 es->param.safe_grow_cleared (count, true);
2776 for (i = 0; i < count; i++)
2778 int prob = param_change_prob (&fbi, stmt, i);
2779 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2780 es->param[i].change_prob = prob;
2781 es->param[i].points_to_local_or_readonly_memory
2782 = points_to_local_or_readonly_memory_p
2783 (gimple_call_arg (stmt, i));
2786 /* We cannot setup VLA parameters during inlining. */
2787 for (unsigned int i = 0; i < gimple_call_num_args (stmt); ++i)
2788 if (TREE_CODE (gimple_call_arg (stmt, i)) == WITH_SIZE_EXPR)
2790 edge->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
2791 break;
2793 es->call_stmt_size = this_size;
2794 es->call_stmt_time = this_time;
2795 es->loop_depth = bb_loop_depth (bb);
2796 edge_set_predicate (edge, &bb_predicate);
2797 if (edge->speculative)
2799 cgraph_edge *indirect
2800 = edge->speculative_call_indirect_edge ();
2801 ipa_call_summary *es2
2802 = ipa_call_summaries->get_create (indirect);
2803 ipa_call_summaries->duplicate (edge, indirect,
2804 es, es2);
2806 /* Edge is the first direct call.
2807 create and duplicate call summaries for multiple
2808 speculative call targets. */
2809 for (cgraph_edge *direct
2810 = edge->next_speculative_call_target ();
2811 direct;
2812 direct = direct->next_speculative_call_target ())
2814 ipa_call_summary *es3
2815 = ipa_call_summaries->get_create (direct);
2816 ipa_call_summaries->duplicate (edge, direct,
2817 es, es3);
2822 /* TODO: When conditional jump or switch is known to be constant, but
2823 we did not translate it into the predicates, we really can account
2824 just maximum of the possible paths. */
2825 if (fbi.info)
2826 will_be_nonconstant
2827 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2828 stmt, nonconstant_names);
2829 else
2830 will_be_nonconstant = true;
2831 if (this_time || this_size)
2833 sreal final_time = (sreal)this_time * freq;
2835 prob = eliminated_by_inlining_prob (&fbi, stmt);
2836 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2837 fprintf (dump_file,
2838 "\t\t50%% will be eliminated by inlining\n");
2839 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2840 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2842 class predicate p = bb_predicate & will_be_nonconstant;
2844 /* We can ignore statement when we proved it is never going
2845 to happen, but we cannot do that for call statements
2846 because edges are accounted specially. */
2848 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2850 time += final_time;
2851 size += this_size;
2854 /* We account everything but the calls. Calls have their own
2855 size/time info attached to cgraph edges. This is necessary
2856 in order to make the cost disappear after inlining. */
2857 if (!is_gimple_call (stmt))
2859 if (prob)
2861 predicate ip = bb_predicate & predicate::not_inlined ();
2862 info->account_size_time (this_size * prob,
2863 (final_time * prob) / 2, ip,
2866 if (prob != 2)
2867 info->account_size_time (this_size * (2 - prob),
2868 (final_time * (2 - prob) / 2),
2869 bb_predicate,
2873 if (!info->fp_expressions && fp_expression_p (stmt))
2875 info->fp_expressions = true;
2876 if (dump_file)
2877 fprintf (dump_file, " fp_expression set\n");
2881 /* Account cost of address calculations in the statements. */
2882 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2884 for (tree op = gimple_op (stmt, i);
2885 op && handled_component_p (op);
2886 op = TREE_OPERAND (op, 0))
2887 if ((TREE_CODE (op) == ARRAY_REF
2888 || TREE_CODE (op) == ARRAY_RANGE_REF)
2889 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2891 predicate p = bb_predicate;
2892 if (fbi.info)
2893 p = p & will_be_nonconstant_expr_predicate
2894 (&fbi, info, params_summary,
2895 TREE_OPERAND (op, 1),
2896 nonconstant_names);
2897 if (p != false)
2899 time += freq;
2900 size += 1;
2901 if (dump_file)
2902 fprintf (dump_file,
2903 "\t\tAccounting address calculation.\n");
2904 info->account_size_time (ipa_fn_summary::size_scale,
2905 freq,
2906 bb_predicate,
2914 free (order);
2916 if (nonconstant_names.exists () && !early)
2918 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2919 class loop *loop;
2920 unsigned max_loop_predicates = opt_for_fn (node->decl,
2921 param_ipa_max_loop_predicates);
2923 if (dump_file && (dump_flags & TDF_DETAILS))
2924 flow_loops_dump (dump_file, NULL, 0);
2925 scev_initialize ();
2926 for (auto loop : loops_list (cfun, 0))
2928 predicate loop_iterations = true;
2929 sreal header_freq;
2930 edge ex;
2931 unsigned int j;
2932 class tree_niter_desc niter_desc;
2933 if (!loop->header->aux)
2934 continue;
2936 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2937 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2939 bb_predicate = *(predicate *) loop->header->aux;
2940 auto_vec<edge> exits = get_loop_exit_edges (loop);
2941 FOR_EACH_VEC_ELT (exits, j, ex)
2942 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2943 && !is_gimple_min_invariant (niter_desc.niter))
2945 predicate will_be_nonconstant
2946 = will_be_nonconstant_expr_predicate (&fbi, info,
2947 params_summary,
2948 niter_desc.niter,
2949 nonconstant_names);
2950 if (will_be_nonconstant != true)
2951 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2952 if (will_be_nonconstant != true
2953 && will_be_nonconstant != false)
2954 loop_iterations &= will_be_nonconstant;
2956 add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
2957 phdr_freq, max_loop_predicates);
2960 /* To avoid quadratic behavior we analyze stride predicates only
2961 with respect to the containing loop. Thus we simply iterate
2962 over all defs in the outermost loop body. */
2963 for (loop = loops_for_fn (cfun)->tree_root->inner;
2964 loop != NULL; loop = loop->next)
2966 predicate loop_stride = true;
2967 basic_block *body = get_loop_body (loop);
2968 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2969 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2970 for (unsigned i = 0; i < loop->num_nodes; i++)
2972 gimple_stmt_iterator gsi;
2973 if (!body[i]->aux)
2974 continue;
2976 bb_predicate = *(predicate *) body[i]->aux;
2977 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2978 gsi_next (&gsi))
2980 gimple *stmt = gsi_stmt (gsi);
2982 if (!is_gimple_assign (stmt))
2983 continue;
2985 tree def = gimple_assign_lhs (stmt);
2986 if (TREE_CODE (def) != SSA_NAME)
2987 continue;
2989 affine_iv iv;
2990 if (!simple_iv (loop_containing_stmt (stmt),
2991 loop_containing_stmt (stmt),
2992 def, &iv, true)
2993 || is_gimple_min_invariant (iv.step))
2994 continue;
2996 predicate will_be_nonconstant
2997 = will_be_nonconstant_expr_predicate (&fbi, info,
2998 params_summary,
2999 iv.step,
3000 nonconstant_names);
3001 if (will_be_nonconstant != true)
3002 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3003 if (will_be_nonconstant != true
3004 && will_be_nonconstant != false)
3005 loop_stride = loop_stride & will_be_nonconstant;
3008 add_freqcounting_predicate (&s->loop_strides, loop_stride,
3009 phdr_freq, max_loop_predicates);
3010 free (body);
3012 scev_finalize ();
3014 FOR_ALL_BB_FN (bb, my_function)
3016 edge e;
3017 edge_iterator ei;
3019 if (bb->aux)
3020 edge_predicate_pool.remove ((predicate *)bb->aux);
3021 bb->aux = NULL;
3022 FOR_EACH_EDGE (e, ei, bb->succs)
3024 if (e->aux)
3025 edge_predicate_pool.remove ((predicate *) e->aux);
3026 e->aux = NULL;
3029 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3030 ipa_size_summary *ss = ipa_size_summaries->get (node);
3031 s->time = time;
3032 ss->self_size = size;
3033 nonconstant_names.release ();
3034 ipa_release_body_info (&fbi);
3035 if (opt_for_fn (node->decl, optimize))
3037 if (!early)
3038 loop_optimizer_finalize ();
3039 else if (!ipa_edge_args_sum)
3040 ipa_free_all_node_params ();
3041 free_dominance_info (CDI_DOMINATORS);
3042 free_dominance_info (CDI_POST_DOMINATORS);
3044 if (dump_file)
3046 fprintf (dump_file, "\n");
3047 ipa_dump_fn_summary (dump_file, node);
3052 /* Compute function summary.
3053 EARLY is true when we compute parameters during early opts. */
3055 void
3056 compute_fn_summary (struct cgraph_node *node, bool early)
3058 HOST_WIDE_INT self_stack_size;
3059 struct cgraph_edge *e;
3061 gcc_assert (!node->inlined_to);
3063 if (!ipa_fn_summaries)
3064 ipa_fn_summary_alloc ();
3066 /* Create a new ipa_fn_summary. */
3067 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
3068 ipa_fn_summaries->remove (node);
3069 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3070 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
3072 /* Estimate the stack size for the function if we're optimizing. */
3073 self_stack_size = optimize && !node->thunk
3074 ? estimated_stack_frame_size (node) : 0;
3075 size_info->estimated_self_stack_size = self_stack_size;
3076 info->estimated_stack_size = self_stack_size;
3078 if (node->thunk)
3080 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
3081 predicate t = true;
3083 node->can_change_signature = false;
3084 es->call_stmt_size = eni_size_weights.call_cost;
3085 es->call_stmt_time = eni_time_weights.call_cost;
3086 info->account_size_time (ipa_fn_summary::size_scale
3087 * opt_for_fn (node->decl,
3088 param_uninlined_function_thunk_insns),
3089 opt_for_fn (node->decl,
3090 param_uninlined_function_thunk_time), t, t);
3091 t = predicate::not_inlined ();
3092 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
3093 ipa_update_overall_fn_summary (node);
3094 size_info->self_size = size_info->size;
3095 if (stdarg_p (TREE_TYPE (node->decl)))
3097 info->inlinable = false;
3098 node->callees->inline_failed = CIF_VARIADIC_THUNK;
3100 else
3101 info->inlinable = true;
3103 else
3105 /* Even is_gimple_min_invariant rely on current_function_decl. */
3106 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3108 /* During IPA profile merging we may be called w/o virtual SSA form
3109 built. */
3110 update_ssa (TODO_update_ssa_only_virtuals);
3112 /* Can this function be inlined at all? */
3113 if (!opt_for_fn (node->decl, optimize)
3114 && !lookup_attribute ("always_inline",
3115 DECL_ATTRIBUTES (node->decl)))
3116 info->inlinable = false;
3117 else
3118 info->inlinable = tree_inlinable_function_p (node->decl);
3120 /* Type attributes can use parameter indices to describe them. */
3121 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
3122 /* Likewise for #pragma omp declare simd functions or functions
3123 with simd attribute. */
3124 || lookup_attribute ("omp declare simd",
3125 DECL_ATTRIBUTES (node->decl)))
3126 node->can_change_signature = false;
3127 else
3129 /* Otherwise, inlinable functions always can change signature. */
3130 if (info->inlinable)
3131 node->can_change_signature = true;
3132 else
3134 /* Functions calling builtin_apply cannot change signature. */
3135 for (e = node->callees; e; e = e->next_callee)
3137 tree cdecl = e->callee->decl;
3138 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
3139 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
3140 break;
3142 node->can_change_signature = !e;
3145 analyze_function_body (node, early);
3146 pop_cfun ();
3149 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3150 size_info->size = size_info->self_size;
3151 info->estimated_stack_size = size_info->estimated_self_stack_size;
3153 /* Code above should compute exactly the same result as
3154 ipa_update_overall_fn_summary except for case when speculative
3155 edges are present since these are accounted to size but not
3156 self_size. Do not compare time since different order the roundoff
3157 errors result in slight changes. */
3158 ipa_update_overall_fn_summary (node);
3159 if (flag_checking)
3161 for (e = node->indirect_calls; e; e = e->next_callee)
3162 if (e->speculative)
3163 break;
3164 gcc_assert (e || size_info->size == size_info->self_size);
3169 /* Compute parameters of functions used by inliner using
3170 current_function_decl. */
3172 static unsigned int
3173 compute_fn_summary_for_current (void)
3175 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3176 return 0;
3179 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3180 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3181 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3183 static bool
3184 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3185 int *size, int *time,
3186 ipa_call_arg_values *avals)
3188 tree target;
3189 struct cgraph_node *callee;
3190 class ipa_fn_summary *isummary;
3191 enum availability avail;
3192 bool speculative;
3194 if (!avals
3195 || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ()))
3196 return false;
3197 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3198 return false;
3200 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3201 if (!target || speculative)
3202 return false;
3204 /* Account for difference in cost between indirect and direct calls. */
3205 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3206 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3207 gcc_checking_assert (*time >= 0);
3208 gcc_checking_assert (*size >= 0);
3210 callee = cgraph_node::get (target);
3211 if (!callee || !callee->definition)
3212 return false;
3213 callee = callee->function_symbol (&avail);
3214 if (avail < AVAIL_AVAILABLE)
3215 return false;
3216 isummary = ipa_fn_summaries->get (callee);
3217 if (isummary == NULL)
3218 return false;
3220 return isummary->inlinable;
3223 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3224 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3225 devirtualized. AVALS, if non-NULL, describes the context of the call site
3226 as far as values of parameters are concerened. */
3228 static inline void
3229 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3230 sreal *time, ipa_call_arg_values *avals,
3231 ipa_hints *hints)
3233 class ipa_call_summary *es = ipa_call_summaries->get (e);
3234 int call_size = es->call_stmt_size;
3235 int call_time = es->call_stmt_time;
3236 int cur_size;
3238 if (!e->callee && hints && e->maybe_hot_p ()
3239 && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals))
3240 *hints |= INLINE_HINT_indirect_call;
3241 cur_size = call_size * ipa_fn_summary::size_scale;
3242 *size += cur_size;
3243 if (min_size)
3244 *min_size += cur_size;
3245 if (time)
3246 *time += ((sreal)call_time) * e->sreal_frequency ();
3250 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3251 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3252 site.
3254 Helper for estimate_calls_size_and_time which does the same but
3255 (in most cases) faster. */
3257 static void
3258 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3259 int *min_size, sreal *time,
3260 ipa_hints *hints,
3261 clause_t possible_truths,
3262 ipa_call_arg_values *avals)
3264 struct cgraph_edge *e;
3265 for (e = node->callees; e; e = e->next_callee)
3267 if (!e->inline_failed)
3269 gcc_checking_assert (!ipa_call_summaries->get (e));
3270 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3271 hints, possible_truths, avals);
3273 continue;
3275 class ipa_call_summary *es = ipa_call_summaries->get (e);
3277 /* Do not care about zero sized builtins. */
3278 if (!es->call_stmt_size)
3280 gcc_checking_assert (!es->call_stmt_time);
3281 continue;
3283 if (!es->predicate
3284 || es->predicate->evaluate (possible_truths))
3286 /* Predicates of calls shall not use NOT_CHANGED codes,
3287 so we do not need to compute probabilities. */
3288 estimate_edge_size_and_time (e, size,
3289 es->predicate ? NULL : min_size,
3290 time, avals, hints);
3293 for (e = node->indirect_calls; e; e = e->next_callee)
3295 class ipa_call_summary *es = ipa_call_summaries->get (e);
3296 if (!es->predicate
3297 || es->predicate->evaluate (possible_truths))
3298 estimate_edge_size_and_time (e, size,
3299 es->predicate ? NULL : min_size,
3300 time, avals, hints);
3304 /* Populate sum->call_size_time_table for edges from NODE. */
3306 static void
3307 summarize_calls_size_and_time (struct cgraph_node *node,
3308 ipa_fn_summary *sum)
3310 struct cgraph_edge *e;
3311 for (e = node->callees; e; e = e->next_callee)
3313 if (!e->inline_failed)
3315 gcc_checking_assert (!ipa_call_summaries->get (e));
3316 summarize_calls_size_and_time (e->callee, sum);
3317 continue;
3319 int size = 0;
3320 sreal time = 0;
3322 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3324 struct predicate pred = true;
3325 class ipa_call_summary *es = ipa_call_summaries->get (e);
3327 if (es->predicate)
3328 pred = *es->predicate;
3329 sum->account_size_time (size, time, pred, pred, true);
3331 for (e = node->indirect_calls; e; e = e->next_callee)
3333 int size = 0;
3334 sreal time = 0;
3336 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3337 struct predicate pred = true;
3338 class ipa_call_summary *es = ipa_call_summaries->get (e);
3340 if (es->predicate)
3341 pred = *es->predicate;
3342 sum->account_size_time (size, time, pred, pred, true);
3346 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3347 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3348 context of the call site. */
3350 static void
3351 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3352 int *min_size, sreal *time,
3353 ipa_hints *hints,
3354 clause_t possible_truths,
3355 ipa_call_arg_values *avals)
3357 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3358 bool use_table = true;
3360 gcc_assert (node->callees || node->indirect_calls);
3362 /* During early inlining we do not calculate info for very
3363 large functions and thus there is no need for producing
3364 summaries. */
3365 if (!ipa_node_params_sum)
3366 use_table = false;
3367 /* Do not calculate summaries for simple wrappers; it is waste
3368 of memory. */
3369 else if (node->callees && node->indirect_calls
3370 && node->callees->inline_failed && !node->callees->next_callee)
3371 use_table = false;
3372 /* If there is an indirect edge that may be optimized, we need
3373 to go the slow way. */
3374 else if (avals && hints
3375 && (avals->m_known_vals.length ()
3376 || avals->m_known_contexts.length ()
3377 || avals->m_known_aggs.length ()))
3379 ipa_node_params *params_summary = ipa_node_params_sum->get (node);
3380 unsigned int nargs = params_summary
3381 ? ipa_get_param_count (params_summary) : 0;
3383 for (unsigned int i = 0; i < nargs && use_table; i++)
3385 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3386 && (avals->safe_sval_at (i)
3387 || (avals->m_known_aggs.length () > i
3388 && avals->m_known_aggs[i].items.length ())))
3389 use_table = false;
3390 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3391 && (avals->m_known_contexts.length () > i
3392 && !avals->m_known_contexts[i].useless_p ()))
3393 use_table = false;
3397 /* Fast path is via the call size time table. */
3398 if (use_table)
3400 /* Build summary if it is absent. */
3401 if (!sum->call_size_time_table.length ())
3403 predicate true_pred = true;
3404 sum->account_size_time (0, 0, true_pred, true_pred, true);
3405 summarize_calls_size_and_time (node, sum);
3408 int old_size = *size;
3409 sreal old_time = time ? *time : 0;
3411 if (min_size)
3412 *min_size += sum->call_size_time_table[0].size;
3414 unsigned int i;
3415 size_time_entry *e;
3417 /* Walk the table and account sizes and times. */
3418 for (i = 0; sum->call_size_time_table.iterate (i, &e);
3419 i++)
3420 if (e->exec_predicate.evaluate (possible_truths))
3422 *size += e->size;
3423 if (time)
3424 *time += e->time;
3427 /* Be careful and see if both methods agree. */
3428 if ((flag_checking || dump_file)
3429 /* Do not try to sanity check when we know we lost some
3430 precision. */
3431 && sum->call_size_time_table.length ()
3432 < ipa_fn_summary::max_size_time_table_size)
3434 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3435 possible_truths, avals);
3436 gcc_assert (*size == old_size);
3437 if (time && (*time - old_time > 1 || *time - old_time < -1)
3438 && dump_file)
3439 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3440 old_time.to_double (),
3441 time->to_double ());
3444 /* Slow path by walking all edges. */
3445 else
3446 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3447 possible_truths, avals);
3450 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3451 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3452 caller. */
3454 ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths,
3455 clause_t nonspec_possible_truths,
3456 vec<inline_param_summary>
3457 inline_param_summary,
3458 ipa_auto_call_arg_values *arg_values)
3459 : m_node (node), m_possible_truths (possible_truths),
3460 m_nonspec_possible_truths (nonspec_possible_truths),
3461 m_inline_param_summary (inline_param_summary),
3462 m_avals (arg_values)
3466 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3468 void
3469 ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx)
3471 m_node = ctx.m_node;
3472 m_possible_truths = ctx.m_possible_truths;
3473 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3474 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3475 unsigned int nargs = params_summary
3476 ? ipa_get_param_count (params_summary) : 0;
3478 m_inline_param_summary = vNULL;
3479 /* Copy the info only if there is at least one useful entry. */
3480 if (ctx.m_inline_param_summary.exists ())
3482 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3484 for (unsigned int i = 0; i < n; i++)
3485 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3486 && !ctx.m_inline_param_summary[i].useless_p ())
3488 m_inline_param_summary
3489 = ctx.m_inline_param_summary.copy ();
3490 break;
3493 m_avals.m_known_vals = vNULL;
3494 if (ctx.m_avals.m_known_vals.exists ())
3496 unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs);
3498 for (unsigned int i = 0; i < n; i++)
3499 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3500 && ctx.m_avals.m_known_vals[i])
3502 m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy ();
3503 break;
3507 m_avals.m_known_contexts = vNULL;
3508 if (ctx.m_avals.m_known_contexts.exists ())
3510 unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs);
3512 for (unsigned int i = 0; i < n; i++)
3513 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3514 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3516 m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy ();
3517 break;
3521 m_avals.m_known_aggs = vNULL;
3522 if (ctx.m_avals.m_known_aggs.exists ())
3524 unsigned int n = MIN (ctx.m_avals.m_known_aggs.length (), nargs);
3526 for (unsigned int i = 0; i < n; i++)
3527 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3528 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3530 m_avals.m_known_aggs
3531 = ipa_copy_agg_values (ctx.m_avals.m_known_aggs);
3532 break;
3536 m_avals.m_known_value_ranges = vNULL;
3539 /* Release memory used by known_vals/contexts/aggs vectors. and
3540 inline_param_summary. */
3542 void
3543 ipa_cached_call_context::release ()
3545 /* See if context is initialized at first place. */
3546 if (!m_node)
3547 return;
3548 ipa_release_agg_values (m_avals.m_known_aggs, true);
3549 m_avals.m_known_vals.release ();
3550 m_avals.m_known_contexts.release ();
3551 m_inline_param_summary.release ();
3554 /* Return true if CTX describes the same call context as THIS. */
3556 bool
3557 ipa_call_context::equal_to (const ipa_call_context &ctx)
3559 if (m_node != ctx.m_node
3560 || m_possible_truths != ctx.m_possible_truths
3561 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3562 return false;
3564 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3565 unsigned int nargs = params_summary
3566 ? ipa_get_param_count (params_summary) : 0;
3568 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3570 for (unsigned int i = 0; i < nargs; i++)
3572 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3573 continue;
3574 if (i >= m_inline_param_summary.length ()
3575 || m_inline_param_summary[i].useless_p ())
3577 if (i < ctx.m_inline_param_summary.length ()
3578 && !ctx.m_inline_param_summary[i].useless_p ())
3579 return false;
3580 continue;
3582 if (i >= ctx.m_inline_param_summary.length ()
3583 || ctx.m_inline_param_summary[i].useless_p ())
3585 if (i < m_inline_param_summary.length ()
3586 && !m_inline_param_summary[i].useless_p ())
3587 return false;
3588 continue;
3590 if (!m_inline_param_summary[i].equal_to
3591 (ctx.m_inline_param_summary[i]))
3592 return false;
3595 if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ())
3597 for (unsigned int i = 0; i < nargs; i++)
3599 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3600 continue;
3601 if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i])
3603 if (i < ctx.m_avals.m_known_vals.length ()
3604 && ctx.m_avals.m_known_vals[i])
3605 return false;
3606 continue;
3608 if (i >= ctx.m_avals.m_known_vals.length ()
3609 || !ctx.m_avals.m_known_vals[i])
3611 if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i])
3612 return false;
3613 continue;
3615 if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i])
3616 return false;
3619 if (m_avals.m_known_contexts.exists ()
3620 || ctx.m_avals.m_known_contexts.exists ())
3622 for (unsigned int i = 0; i < nargs; i++)
3624 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3625 continue;
3626 if (i >= m_avals.m_known_contexts.length ()
3627 || m_avals.m_known_contexts[i].useless_p ())
3629 if (i < ctx.m_avals.m_known_contexts.length ()
3630 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3631 return false;
3632 continue;
3634 if (i >= ctx.m_avals.m_known_contexts.length ()
3635 || ctx.m_avals.m_known_contexts[i].useless_p ())
3637 if (i < m_avals.m_known_contexts.length ()
3638 && !m_avals.m_known_contexts[i].useless_p ())
3639 return false;
3640 continue;
3642 if (!m_avals.m_known_contexts[i].equal_to
3643 (ctx.m_avals.m_known_contexts[i]))
3644 return false;
3647 if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ())
3649 for (unsigned int i = 0; i < nargs; i++)
3651 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3652 continue;
3653 if (i >= m_avals.m_known_aggs.length ()
3654 || m_avals.m_known_aggs[i].is_empty ())
3656 if (i < ctx.m_avals.m_known_aggs.length ()
3657 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3658 return false;
3659 continue;
3661 if (i >= ctx.m_avals.m_known_aggs.length ()
3662 || ctx.m_avals.m_known_aggs[i].is_empty ())
3664 if (i < m_avals.m_known_aggs.length ()
3665 && !m_avals.m_known_aggs[i].is_empty ())
3666 return false;
3667 continue;
3669 if (!m_avals.m_known_aggs[i].equal_to (ctx.m_avals.m_known_aggs[i]))
3670 return false;
3673 return true;
3676 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3677 this context. Always compute size and min_size. Only compute time and
3678 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3679 is true. */
3681 void
3682 ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
3683 bool est_times, bool est_hints)
3685 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3686 size_time_entry *e;
3687 int size = 0;
3688 sreal time = 0;
3689 int min_size = 0;
3690 ipa_hints hints = 0;
3691 sreal loops_with_known_iterations = 0;
3692 sreal loops_with_known_strides = 0;
3693 int i;
3695 if (dump_file && (dump_flags & TDF_DETAILS))
3697 bool found = false;
3698 fprintf (dump_file, " Estimating body: %s\n"
3699 " Known to be false: ", m_node->dump_name ());
3701 for (i = predicate::not_inlined_condition;
3702 i < (predicate::first_dynamic_condition
3703 + (int) vec_safe_length (info->conds)); i++)
3704 if (!(m_possible_truths & (1 << i)))
3706 if (found)
3707 fprintf (dump_file, ", ");
3708 found = true;
3709 dump_condition (dump_file, info->conds, i);
3713 if (m_node->callees || m_node->indirect_calls)
3714 estimate_calls_size_and_time (m_node, &size, &min_size,
3715 est_times ? &time : NULL,
3716 est_hints ? &hints : NULL, m_possible_truths,
3717 &m_avals);
3719 sreal nonspecialized_time = time;
3721 min_size += info->size_time_table[0].size;
3722 for (i = 0; info->size_time_table.iterate (i, &e); i++)
3724 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3726 /* Because predicates are conservative, it can happen that nonconst is 1
3727 but exec is 0. */
3728 if (exec)
3730 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3732 gcc_checking_assert (e->time >= 0);
3733 gcc_checking_assert (time >= 0);
3735 /* We compute specialized size only because size of nonspecialized
3736 copy is context independent.
3738 The difference between nonspecialized execution and specialized is
3739 that nonspecialized is not going to have optimized out computations
3740 known to be constant in a specialized setting. */
3741 if (nonconst)
3742 size += e->size;
3743 if (!est_times)
3744 continue;
3745 nonspecialized_time += e->time;
3746 if (!nonconst)
3748 else if (!m_inline_param_summary.exists ())
3750 if (nonconst)
3751 time += e->time;
3753 else
3755 int prob = e->nonconst_predicate.probability
3756 (info->conds, m_possible_truths,
3757 m_inline_param_summary);
3758 gcc_checking_assert (prob >= 0);
3759 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3760 if (prob == REG_BR_PROB_BASE)
3761 time += e->time;
3762 else
3763 time += e->time * prob / REG_BR_PROB_BASE;
3765 gcc_checking_assert (time >= 0);
3768 gcc_checking_assert (info->size_time_table[0].exec_predicate == true);
3769 gcc_checking_assert (info->size_time_table[0].nonconst_predicate == true);
3770 gcc_checking_assert (min_size >= 0);
3771 gcc_checking_assert (size >= 0);
3772 gcc_checking_assert (time >= 0);
3773 /* nonspecialized_time should be always bigger than specialized time.
3774 Roundoff issues however may get into the way. */
3775 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3777 /* Roundoff issues may make specialized time bigger than nonspecialized
3778 time. We do not really want that to happen because some heuristics
3779 may get confused by seeing negative speedups. */
3780 if (time > nonspecialized_time)
3781 time = nonspecialized_time;
3783 if (est_hints)
3785 if (info->scc_no)
3786 hints |= INLINE_HINT_in_scc;
3787 if (DECL_DECLARED_INLINE_P (m_node->decl))
3788 hints |= INLINE_HINT_declared_inline;
3789 if (info->builtin_constant_p_parms.length ()
3790 && DECL_DECLARED_INLINE_P (m_node->decl))
3791 hints |= INLINE_HINT_builtin_constant_p;
3793 ipa_freqcounting_predicate *fcp;
3794 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
3795 if (!fcp->predicate->evaluate (m_possible_truths))
3797 hints |= INLINE_HINT_loop_iterations;
3798 loops_with_known_iterations += fcp->freq;
3800 estimates->loops_with_known_iterations = loops_with_known_iterations;
3802 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
3803 if (!fcp->predicate->evaluate (m_possible_truths))
3805 hints |= INLINE_HINT_loop_stride;
3806 loops_with_known_strides += fcp->freq;
3808 estimates->loops_with_known_strides = loops_with_known_strides;
3811 size = RDIV (size, ipa_fn_summary::size_scale);
3812 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3814 if (dump_file && (dump_flags & TDF_DETAILS))
3816 fprintf (dump_file, "\n size:%i", (int) size);
3817 if (est_times)
3818 fprintf (dump_file, " time:%f nonspec time:%f",
3819 time.to_double (), nonspecialized_time.to_double ());
3820 if (est_hints)
3821 fprintf (dump_file, " loops with known iterations:%f "
3822 "known strides:%f", loops_with_known_iterations.to_double (),
3823 loops_with_known_strides.to_double ());
3824 fprintf (dump_file, "\n");
3826 if (est_times)
3828 estimates->time = time;
3829 estimates->nonspecialized_time = nonspecialized_time;
3831 estimates->size = size;
3832 estimates->min_size = min_size;
3833 if (est_hints)
3834 estimates->hints = hints;
3835 return;
3839 /* Estimate size and time needed to execute callee of EDGE assuming that
3840 parameters known to be constant at caller of EDGE are propagated.
3841 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3842 and types for parameters. */
3844 void
3845 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3846 ipa_auto_call_arg_values *avals,
3847 ipa_call_estimates *estimates)
3849 clause_t clause, nonspec_clause;
3851 evaluate_conditions_for_known_args (node, false, avals, &clause,
3852 &nonspec_clause);
3853 ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
3854 ctx.estimate_size_and_time (estimates);
3857 /* Return stack frame offset where frame of NODE is supposed to start inside
3858 of the function it is inlined to.
3859 Return 0 for functions that are not inlined. */
3861 HOST_WIDE_INT
3862 ipa_get_stack_frame_offset (struct cgraph_node *node)
3864 HOST_WIDE_INT offset = 0;
3865 if (!node->inlined_to)
3866 return 0;
3867 node = node->callers->caller;
3868 while (true)
3870 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3871 if (!node->inlined_to)
3872 return offset;
3873 node = node->callers->caller;
3878 /* Update summary information of inline clones after inlining.
3879 Compute peak stack usage. */
3881 static void
3882 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3884 struct cgraph_edge *e;
3886 ipa_propagate_frequency (node);
3887 for (e = node->callees; e; e = e->next_callee)
3889 if (!e->inline_failed)
3890 inline_update_callee_summaries (e->callee, depth);
3891 else
3892 ipa_call_summaries->get (e)->loop_depth += depth;
3894 for (e = node->indirect_calls; e; e = e->next_callee)
3895 ipa_call_summaries->get (e)->loop_depth += depth;
3898 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3899 INLINED_EDGE has been inlined.
3901 When function A is inlined in B and A calls C with parameter that
3902 changes with probability PROB1 and C is known to be passthrough
3903 of argument if B that change with probability PROB2, the probability
3904 of change is now PROB1*PROB2. */
3906 static void
3907 remap_edge_params (struct cgraph_edge *inlined_edge,
3908 struct cgraph_edge *edge)
3910 if (ipa_node_params_sum)
3912 int i;
3913 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
3914 if (!args)
3915 return;
3916 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3917 class ipa_call_summary *inlined_es
3918 = ipa_call_summaries->get (inlined_edge);
3920 if (es->param.length () == 0)
3921 return;
3923 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3925 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3926 if (jfunc->type == IPA_JF_PASS_THROUGH
3927 || jfunc->type == IPA_JF_ANCESTOR)
3929 int id = jfunc->type == IPA_JF_PASS_THROUGH
3930 ? ipa_get_jf_pass_through_formal_id (jfunc)
3931 : ipa_get_jf_ancestor_formal_id (jfunc);
3932 if (id < (int) inlined_es->param.length ())
3934 int prob1 = es->param[i].change_prob;
3935 int prob2 = inlined_es->param[id].change_prob;
3936 int prob = combine_probabilities (prob1, prob2);
3938 if (prob1 && prob2 && !prob)
3939 prob = 1;
3941 es->param[i].change_prob = prob;
3943 if (inlined_es
3944 ->param[id].points_to_local_or_readonly_memory)
3945 es->param[i].points_to_local_or_readonly_memory = true;
3947 if (!es->param[i].points_to_local_or_readonly_memory
3948 && jfunc->type == IPA_JF_CONST
3949 && points_to_local_or_readonly_memory_p
3950 (ipa_get_jf_constant (jfunc)))
3951 es->param[i].points_to_local_or_readonly_memory = true;
3957 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3959 Remap predicates of callees of NODE. Rest of arguments match
3960 remap_predicate.
3962 Also update change probabilities. */
3964 static void
3965 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3966 struct cgraph_node *node,
3967 class ipa_fn_summary *info,
3968 class ipa_node_params *params_summary,
3969 class ipa_fn_summary *callee_info,
3970 const vec<int> &operand_map,
3971 const vec<HOST_WIDE_INT> &offset_map,
3972 clause_t possible_truths,
3973 predicate *toplev_predicate)
3975 struct cgraph_edge *e, *next;
3976 for (e = node->callees; e; e = next)
3978 predicate p;
3979 next = e->next_callee;
3981 if (e->inline_failed)
3983 class ipa_call_summary *es = ipa_call_summaries->get (e);
3984 remap_edge_params (inlined_edge, e);
3986 if (es->predicate)
3988 p = es->predicate->remap_after_inlining
3989 (info, params_summary,
3990 callee_info, operand_map,
3991 offset_map, possible_truths,
3992 *toplev_predicate);
3993 edge_set_predicate (e, &p);
3995 else
3996 edge_set_predicate (e, toplev_predicate);
3998 else
3999 remap_edge_summaries (inlined_edge, e->callee, info,
4000 params_summary, callee_info,
4001 operand_map, offset_map, possible_truths,
4002 toplev_predicate);
4004 for (e = node->indirect_calls; e; e = next)
4006 class ipa_call_summary *es = ipa_call_summaries->get (e);
4007 predicate p;
4008 next = e->next_callee;
4010 remap_edge_params (inlined_edge, e);
4011 if (es->predicate)
4013 p = es->predicate->remap_after_inlining
4014 (info, params_summary,
4015 callee_info, operand_map, offset_map,
4016 possible_truths, *toplev_predicate);
4017 edge_set_predicate (e, &p);
4019 else
4020 edge_set_predicate (e, toplev_predicate);
4024 /* Run remap_after_inlining on each predicate in V. */
4026 static void
4027 remap_freqcounting_predicate (class ipa_fn_summary *info,
4028 class ipa_node_params *params_summary,
4029 class ipa_fn_summary *callee_info,
4030 vec<ipa_freqcounting_predicate, va_gc> *v,
4031 const vec<int> &operand_map,
4032 const vec<HOST_WIDE_INT> &offset_map,
4033 clause_t possible_truths,
4034 predicate *toplev_predicate)
4037 ipa_freqcounting_predicate *fcp;
4038 for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
4040 predicate p
4041 = fcp->predicate->remap_after_inlining (info, params_summary,
4042 callee_info, operand_map,
4043 offset_map, possible_truths,
4044 *toplev_predicate);
4045 if (p != false && p != true)
4046 *fcp->predicate &= p;
4050 /* We inlined EDGE. Update summary of the function we inlined into. */
4052 void
4053 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
4055 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
4056 struct cgraph_node *to = (edge->caller->inlined_to
4057 ? edge->caller->inlined_to : edge->caller);
4058 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
4059 clause_t clause = 0; /* not_inline is known to be false. */
4060 size_time_entry *e;
4061 auto_vec<int, 8> operand_map;
4062 auto_vec<HOST_WIDE_INT, 8> offset_map;
4063 int i;
4064 predicate toplev_predicate;
4065 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4066 ipa_node_params *params_summary = (ipa_node_params_sum
4067 ? ipa_node_params_sum->get (to) : NULL);
4069 if (es->predicate)
4070 toplev_predicate = *es->predicate;
4071 else
4072 toplev_predicate = true;
4074 info->fp_expressions |= callee_info->fp_expressions;
4076 if (callee_info->conds)
4078 ipa_auto_call_arg_values avals;
4079 evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false);
4081 if (ipa_node_params_sum && callee_info->conds)
4083 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4084 int count = args ? ipa_get_cs_argument_count (args) : 0;
4085 int i;
4087 if (count)
4089 operand_map.safe_grow_cleared (count, true);
4090 offset_map.safe_grow_cleared (count, true);
4092 for (i = 0; i < count; i++)
4094 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4095 int map = -1;
4097 /* TODO: handle non-NOPs when merging. */
4098 if (jfunc->type == IPA_JF_PASS_THROUGH)
4100 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4101 map = ipa_get_jf_pass_through_formal_id (jfunc);
4102 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
4103 offset_map[i] = -1;
4105 else if (jfunc->type == IPA_JF_ANCESTOR)
4107 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
4108 if (offset >= 0 && offset < INT_MAX)
4110 map = ipa_get_jf_ancestor_formal_id (jfunc);
4111 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
4112 offset = -1;
4113 offset_map[i] = offset;
4116 operand_map[i] = map;
4117 gcc_assert (map < ipa_get_param_count (params_summary));
4120 int ip;
4121 for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++)
4122 if (ip < count && operand_map[ip] >= 0)
4123 add_builtin_constant_p_parm (info, operand_map[ip]);
4125 sreal freq = edge->sreal_frequency ();
4126 for (i = 0; callee_info->size_time_table.iterate (i, &e); i++)
4128 predicate p;
4129 p = e->exec_predicate.remap_after_inlining
4130 (info, params_summary,
4131 callee_info, operand_map,
4132 offset_map, clause,
4133 toplev_predicate);
4134 predicate nonconstp;
4135 nonconstp = e->nonconst_predicate.remap_after_inlining
4136 (info, params_summary,
4137 callee_info, operand_map,
4138 offset_map, clause,
4139 toplev_predicate);
4140 if (p != false && nonconstp != false)
4142 sreal add_time = ((sreal)e->time * freq);
4143 int prob = e->nonconst_predicate.probability (callee_info->conds,
4144 clause, es->param);
4145 if (prob != REG_BR_PROB_BASE)
4146 add_time = add_time * prob / REG_BR_PROB_BASE;
4147 if (prob != REG_BR_PROB_BASE
4148 && dump_file && (dump_flags & TDF_DETAILS))
4150 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
4151 (double) prob / REG_BR_PROB_BASE);
4153 info->account_size_time (e->size, add_time, p, nonconstp);
4156 remap_edge_summaries (edge, edge->callee, info, params_summary,
4157 callee_info, operand_map,
4158 offset_map, clause, &toplev_predicate);
4159 remap_freqcounting_predicate (info, params_summary, callee_info,
4160 info->loop_iterations, operand_map,
4161 offset_map, clause, &toplev_predicate);
4162 remap_freqcounting_predicate (info, params_summary, callee_info,
4163 info->loop_strides, operand_map,
4164 offset_map, clause, &toplev_predicate);
4166 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4167 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4169 if (info->estimated_stack_size < peak)
4170 info->estimated_stack_size = peak;
4172 inline_update_callee_summaries (edge->callee, es->loop_depth);
4173 if (info->call_size_time_table.length ())
4175 int edge_size = 0;
4176 sreal edge_time = 0;
4178 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0);
4179 /* Unaccount size and time of the optimized out call. */
4180 info->account_size_time (-edge_size, -edge_time,
4181 es->predicate ? *es->predicate : true,
4182 es->predicate ? *es->predicate : true,
4183 true);
4184 /* Account new calls. */
4185 summarize_calls_size_and_time (edge->callee, info);
4188 /* Free summaries that are not maintained for inline clones/edges. */
4189 ipa_call_summaries->remove (edge);
4190 ipa_fn_summaries->remove (edge->callee);
4191 ipa_remove_from_growth_caches (edge);
4194 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4195 overall size and time. Recompute it.
4196 If RESET is true also recompute call_time_size_table. */
4198 void
4199 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4201 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4202 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4203 size_time_entry *e;
4204 int i;
4206 size_info->size = 0;
4207 info->time = 0;
4208 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4210 size_info->size += e->size;
4211 info->time += e->time;
4213 info->min_size = info->size_time_table[0].size;
4214 if (reset)
4215 info->call_size_time_table.release ();
4216 if (node->callees || node->indirect_calls)
4217 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4218 &info->time, NULL,
4219 ~(clause_t) (1 << predicate::false_condition),
4220 NULL);
4221 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4222 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4226 /* This function performs intraprocedural analysis in NODE that is required to
4227 inline indirect calls. */
4229 static void
4230 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4232 ipa_analyze_node (node);
4233 if (dump_file && (dump_flags & TDF_DETAILS))
4235 ipa_print_node_params (dump_file, node);
4236 ipa_print_node_jump_functions (dump_file, node);
4241 /* Note function body size. */
4243 void
4244 inline_analyze_function (struct cgraph_node *node)
4246 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4248 if (dump_file)
4249 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4250 if (opt_for_fn (node->decl, optimize) && !node->thunk)
4251 inline_indirect_intraprocedural_analysis (node);
4252 compute_fn_summary (node, false);
4253 if (!optimize)
4255 struct cgraph_edge *e;
4256 for (e = node->callees; e; e = e->next_callee)
4257 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4258 for (e = node->indirect_calls; e; e = e->next_callee)
4259 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4262 pop_cfun ();
4266 /* Called when new function is inserted to callgraph late. */
4268 void
4269 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4271 inline_analyze_function (node);
4274 /* Note function body size. */
4276 static void
4277 ipa_fn_summary_generate (void)
4279 struct cgraph_node *node;
4281 FOR_EACH_DEFINED_FUNCTION (node)
4282 if (DECL_STRUCT_FUNCTION (node->decl))
4283 node->versionable = tree_versionable_function_p (node->decl);
4285 ipa_fn_summary_alloc ();
4287 ipa_fn_summaries->enable_insertion_hook ();
4289 ipa_register_cgraph_hooks ();
4291 FOR_EACH_DEFINED_FUNCTION (node)
4292 if (!node->alias
4293 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4294 || opt_for_fn (node->decl, optimize)))
4295 inline_analyze_function (node);
4299 /* Write inline summary for edge E to OB. */
4301 static void
4302 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4303 bool prevails)
4305 class ipa_call_summary *es = prevails
4306 ? ipa_call_summaries->get_create (e) : NULL;
4307 predicate p;
4308 int length, i;
4310 int size = streamer_read_uhwi (ib);
4311 int time = streamer_read_uhwi (ib);
4312 int depth = streamer_read_uhwi (ib);
4314 if (es)
4316 es->call_stmt_size = size;
4317 es->call_stmt_time = time;
4318 es->loop_depth = depth;
4321 bitpack_d bp = streamer_read_bitpack (ib);
4322 if (es)
4323 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4324 else
4325 bp_unpack_value (&bp, 1);
4327 p.stream_in (ib);
4328 if (es)
4329 edge_set_predicate (e, &p);
4330 length = streamer_read_uhwi (ib);
4331 if (length && es
4332 && (e->possibly_call_in_translation_unit_p ()
4333 /* Also stream in jump functions to builtins in hope that they
4334 will get fnspecs. */
4335 || fndecl_built_in_p (e->callee->decl, BUILT_IN_NORMAL)))
4337 es->param.safe_grow_cleared (length, true);
4338 for (i = 0; i < length; i++)
4340 es->param[i].change_prob = streamer_read_uhwi (ib);
4341 es->param[i].points_to_local_or_readonly_memory
4342 = streamer_read_uhwi (ib);
4345 else
4347 for (i = 0; i < length; i++)
4349 streamer_read_uhwi (ib);
4350 streamer_read_uhwi (ib);
4356 /* Stream in inline summaries from the section. */
4358 static void
4359 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4360 size_t len)
4362 const struct lto_function_header *header =
4363 (const struct lto_function_header *) data;
4364 const int cfg_offset = sizeof (struct lto_function_header);
4365 const int main_offset = cfg_offset + header->cfg_size;
4366 const int string_offset = main_offset + header->main_size;
4367 class data_in *data_in;
4368 unsigned int i, count2, j;
4369 unsigned int f_count;
4371 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4372 file_data->mode_table);
4374 data_in =
4375 lto_data_in_create (file_data, (const char *) data + string_offset,
4376 header->string_size, vNULL);
4377 f_count = streamer_read_uhwi (&ib);
4378 for (i = 0; i < f_count; i++)
4380 unsigned int index;
4381 struct cgraph_node *node;
4382 class ipa_fn_summary *info;
4383 class ipa_node_params *params_summary;
4384 class ipa_size_summary *size_info;
4385 lto_symtab_encoder_t encoder;
4386 struct bitpack_d bp;
4387 struct cgraph_edge *e;
4388 predicate p;
4390 index = streamer_read_uhwi (&ib);
4391 encoder = file_data->symtab_node_encoder;
4392 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4393 index));
4394 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4395 params_summary = node->prevailing_p ()
4396 ? ipa_node_params_sum->get (node) : NULL;
4397 size_info = node->prevailing_p ()
4398 ? ipa_size_summaries->get_create (node) : NULL;
4400 int stack_size = streamer_read_uhwi (&ib);
4401 int size = streamer_read_uhwi (&ib);
4402 sreal time = sreal::stream_in (&ib);
4404 if (info)
4406 info->estimated_stack_size
4407 = size_info->estimated_self_stack_size = stack_size;
4408 size_info->size = size_info->self_size = size;
4409 info->time = time;
4412 bp = streamer_read_bitpack (&ib);
4413 if (info)
4415 info->inlinable = bp_unpack_value (&bp, 1);
4416 info->fp_expressions = bp_unpack_value (&bp, 1);
4418 else
4420 bp_unpack_value (&bp, 1);
4421 bp_unpack_value (&bp, 1);
4424 count2 = streamer_read_uhwi (&ib);
4425 gcc_assert (!info || !info->conds);
4426 if (info)
4427 vec_safe_reserve_exact (info->conds, count2);
4428 for (j = 0; j < count2; j++)
4430 struct condition c;
4431 unsigned int k, count3;
4432 c.operand_num = streamer_read_uhwi (&ib);
4433 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4434 c.type = stream_read_tree (&ib, data_in);
4435 c.val = stream_read_tree (&ib, data_in);
4436 bp = streamer_read_bitpack (&ib);
4437 c.agg_contents = bp_unpack_value (&bp, 1);
4438 c.by_ref = bp_unpack_value (&bp, 1);
4439 if (c.agg_contents)
4440 c.offset = streamer_read_uhwi (&ib);
4441 count3 = streamer_read_uhwi (&ib);
4442 c.param_ops = NULL;
4443 if (info)
4444 vec_safe_reserve_exact (c.param_ops, count3);
4445 if (params_summary)
4446 ipa_set_param_used_by_ipa_predicates
4447 (params_summary, c.operand_num, true);
4448 for (k = 0; k < count3; k++)
4450 struct expr_eval_op op;
4451 enum gimple_rhs_class rhs_class;
4452 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4453 op.type = stream_read_tree (&ib, data_in);
4454 switch (rhs_class = get_gimple_rhs_class (op.code))
4456 case GIMPLE_UNARY_RHS:
4457 op.index = 0;
4458 op.val[0] = NULL_TREE;
4459 op.val[1] = NULL_TREE;
4460 break;
4462 case GIMPLE_BINARY_RHS:
4463 case GIMPLE_TERNARY_RHS:
4464 bp = streamer_read_bitpack (&ib);
4465 op.index = bp_unpack_value (&bp, 2);
4466 op.val[0] = stream_read_tree (&ib, data_in);
4467 if (rhs_class == GIMPLE_BINARY_RHS)
4468 op.val[1] = NULL_TREE;
4469 else
4470 op.val[1] = stream_read_tree (&ib, data_in);
4471 break;
4473 default:
4474 fatal_error (UNKNOWN_LOCATION,
4475 "invalid fnsummary in LTO stream");
4477 if (info)
4478 c.param_ops->quick_push (op);
4480 if (info)
4481 info->conds->quick_push (c);
4483 count2 = streamer_read_uhwi (&ib);
4484 gcc_assert (!info || !info->size_time_table.length ());
4485 if (info && count2)
4486 info->size_time_table.reserve_exact (count2);
4487 for (j = 0; j < count2; j++)
4489 class size_time_entry e;
4491 e.size = streamer_read_uhwi (&ib);
4492 e.time = sreal::stream_in (&ib);
4493 e.exec_predicate.stream_in (&ib);
4494 e.nonconst_predicate.stream_in (&ib);
4496 if (info)
4497 info->size_time_table.quick_push (e);
4500 count2 = streamer_read_uhwi (&ib);
4501 for (j = 0; j < count2; j++)
4503 p.stream_in (&ib);
4504 sreal fcp_freq = sreal::stream_in (&ib);
4505 if (info)
4507 ipa_freqcounting_predicate fcp;
4508 fcp.predicate = NULL;
4509 set_hint_predicate (&fcp.predicate, p);
4510 fcp.freq = fcp_freq;
4511 vec_safe_push (info->loop_iterations, fcp);
4514 count2 = streamer_read_uhwi (&ib);
4515 for (j = 0; j < count2; j++)
4517 p.stream_in (&ib);
4518 sreal fcp_freq = sreal::stream_in (&ib);
4519 if (info)
4521 ipa_freqcounting_predicate fcp;
4522 fcp.predicate = NULL;
4523 set_hint_predicate (&fcp.predicate, p);
4524 fcp.freq = fcp_freq;
4525 vec_safe_push (info->loop_strides, fcp);
4528 count2 = streamer_read_uhwi (&ib);
4529 if (info && count2)
4530 info->builtin_constant_p_parms.reserve_exact (count2);
4531 for (j = 0; j < count2; j++)
4533 int parm = streamer_read_uhwi (&ib);
4534 if (info)
4535 info->builtin_constant_p_parms.quick_push (parm);
4537 for (e = node->callees; e; e = e->next_callee)
4538 read_ipa_call_summary (&ib, e, info != NULL);
4539 for (e = node->indirect_calls; e; e = e->next_callee)
4540 read_ipa_call_summary (&ib, e, info != NULL);
4543 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4544 len);
4545 lto_data_in_delete (data_in);
4549 /* Read inline summary. Jump functions are shared among ipa-cp
4550 and inliner, so when ipa-cp is active, we don't need to write them
4551 twice. */
4553 static void
4554 ipa_fn_summary_read (void)
4556 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4557 struct lto_file_decl_data *file_data;
4558 unsigned int j = 0;
4560 ipa_prop_read_jump_functions ();
4561 ipa_fn_summary_alloc ();
4563 while ((file_data = file_data_vec[j++]))
4565 size_t len;
4566 const char *data
4567 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4568 &len);
4569 if (data)
4570 inline_read_section (file_data, data, len);
4571 else
4572 /* Fatal error here. We do not want to support compiling ltrans units
4573 with different version of compiler or different flags than the WPA
4574 unit, so this should never happen. */
4575 fatal_error (input_location,
4576 "ipa inline summary is missing in input file");
4578 ipa_register_cgraph_hooks ();
4580 gcc_assert (ipa_fn_summaries);
4581 ipa_fn_summaries->enable_insertion_hook ();
4585 /* Write inline summary for edge E to OB. */
4587 static void
4588 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4590 class ipa_call_summary *es = ipa_call_summaries->get (e);
4591 int i;
4593 streamer_write_uhwi (ob, es->call_stmt_size);
4594 streamer_write_uhwi (ob, es->call_stmt_time);
4595 streamer_write_uhwi (ob, es->loop_depth);
4597 bitpack_d bp = bitpack_create (ob->main_stream);
4598 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4599 streamer_write_bitpack (&bp);
4601 if (es->predicate)
4602 es->predicate->stream_out (ob);
4603 else
4604 streamer_write_uhwi (ob, 0);
4605 streamer_write_uhwi (ob, es->param.length ());
4606 for (i = 0; i < (int) es->param.length (); i++)
4608 streamer_write_uhwi (ob, es->param[i].change_prob);
4609 streamer_write_uhwi (ob, es->param[i].points_to_local_or_readonly_memory);
4614 /* Write inline summary for node in SET.
4615 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4616 active, we don't need to write them twice. */
4618 static void
4619 ipa_fn_summary_write (void)
4621 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4622 lto_symtab_encoder_iterator lsei;
4623 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4624 unsigned int count = 0;
4626 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4627 lsei_next_function_in_partition (&lsei))
4629 cgraph_node *cnode = lsei_cgraph_node (lsei);
4630 if (cnode->definition && !cnode->alias)
4631 count++;
4633 streamer_write_uhwi (ob, count);
4635 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4636 lsei_next_function_in_partition (&lsei))
4638 cgraph_node *cnode = lsei_cgraph_node (lsei);
4639 if (cnode->definition && !cnode->alias)
4641 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4642 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4643 struct bitpack_d bp;
4644 struct cgraph_edge *edge;
4645 int i;
4646 size_time_entry *e;
4647 struct condition *c;
4649 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4650 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4651 streamer_write_hwi (ob, size_info->self_size);
4652 info->time.stream_out (ob);
4653 bp = bitpack_create (ob->main_stream);
4654 bp_pack_value (&bp, info->inlinable, 1);
4655 bp_pack_value (&bp, info->fp_expressions, 1);
4656 streamer_write_bitpack (&bp);
4657 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4658 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4660 int j;
4661 struct expr_eval_op *op;
4663 streamer_write_uhwi (ob, c->operand_num);
4664 streamer_write_uhwi (ob, c->code);
4665 stream_write_tree (ob, c->type, true);
4666 stream_write_tree (ob, c->val, true);
4667 bp = bitpack_create (ob->main_stream);
4668 bp_pack_value (&bp, c->agg_contents, 1);
4669 bp_pack_value (&bp, c->by_ref, 1);
4670 streamer_write_bitpack (&bp);
4671 if (c->agg_contents)
4672 streamer_write_uhwi (ob, c->offset);
4673 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4674 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4676 streamer_write_uhwi (ob, op->code);
4677 stream_write_tree (ob, op->type, true);
4678 if (op->val[0])
4680 bp = bitpack_create (ob->main_stream);
4681 bp_pack_value (&bp, op->index, 2);
4682 streamer_write_bitpack (&bp);
4683 stream_write_tree (ob, op->val[0], true);
4684 if (op->val[1])
4685 stream_write_tree (ob, op->val[1], true);
4689 streamer_write_uhwi (ob, info->size_time_table.length ());
4690 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4692 streamer_write_uhwi (ob, e->size);
4693 e->time.stream_out (ob);
4694 e->exec_predicate.stream_out (ob);
4695 e->nonconst_predicate.stream_out (ob);
4697 ipa_freqcounting_predicate *fcp;
4698 streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
4699 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
4701 fcp->predicate->stream_out (ob);
4702 fcp->freq.stream_out (ob);
4704 streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
4705 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
4707 fcp->predicate->stream_out (ob);
4708 fcp->freq.stream_out (ob);
4710 streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ());
4711 int ip;
4712 for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip);
4713 i++)
4714 streamer_write_uhwi (ob, ip);
4715 for (edge = cnode->callees; edge; edge = edge->next_callee)
4716 write_ipa_call_summary (ob, edge);
4717 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4718 write_ipa_call_summary (ob, edge);
4721 streamer_write_char_stream (ob->main_stream, 0);
4722 produce_asm (ob, NULL);
4723 destroy_output_block (ob);
4725 ipa_prop_write_jump_functions ();
4729 /* Release function summary. */
4731 void
4732 ipa_free_fn_summary (void)
4734 if (!ipa_call_summaries)
4735 return;
4736 ggc_delete (ipa_fn_summaries);
4737 ipa_fn_summaries = NULL;
4738 delete ipa_call_summaries;
4739 ipa_call_summaries = NULL;
4740 edge_predicate_pool.release ();
4741 /* During IPA this is one of largest datastructures to release. */
4742 if (flag_wpa)
4743 ggc_trim ();
4746 /* Release function summary. */
4748 void
4749 ipa_free_size_summary (void)
4751 if (!ipa_size_summaries)
4752 return;
4753 delete ipa_size_summaries;
4754 ipa_size_summaries = NULL;
4757 namespace {
4759 const pass_data pass_data_local_fn_summary =
4761 GIMPLE_PASS, /* type */
4762 "local-fnsummary", /* name */
4763 OPTGROUP_INLINE, /* optinfo_flags */
4764 TV_INLINE_PARAMETERS, /* tv_id */
4765 0, /* properties_required */
4766 0, /* properties_provided */
4767 0, /* properties_destroyed */
4768 0, /* todo_flags_start */
4769 0, /* todo_flags_finish */
4772 class pass_local_fn_summary : public gimple_opt_pass
4774 public:
4775 pass_local_fn_summary (gcc::context *ctxt)
4776 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4779 /* opt_pass methods: */
4780 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4781 virtual unsigned int execute (function *)
4783 return compute_fn_summary_for_current ();
4786 }; // class pass_local_fn_summary
4788 } // anon namespace
4790 gimple_opt_pass *
4791 make_pass_local_fn_summary (gcc::context *ctxt)
4793 return new pass_local_fn_summary (ctxt);
4797 /* Free inline summary. */
4799 namespace {
4801 const pass_data pass_data_ipa_free_fn_summary =
4803 SIMPLE_IPA_PASS, /* type */
4804 "free-fnsummary", /* name */
4805 OPTGROUP_NONE, /* optinfo_flags */
4806 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4807 0, /* properties_required */
4808 0, /* properties_provided */
4809 0, /* properties_destroyed */
4810 0, /* todo_flags_start */
4811 0, /* todo_flags_finish */
4814 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4816 public:
4817 pass_ipa_free_fn_summary (gcc::context *ctxt)
4818 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4819 small_p (false)
4822 /* opt_pass methods: */
4823 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4824 void set_pass_param (unsigned int n, bool param)
4826 gcc_assert (n == 0);
4827 small_p = param;
4829 virtual bool gate (function *) { return true; }
4830 virtual unsigned int execute (function *)
4832 ipa_free_fn_summary ();
4833 /* Free ipa-prop structures if they are no longer needed. */
4834 ipa_free_all_structures_after_iinln ();
4835 if (!flag_wpa)
4836 ipa_free_size_summary ();
4837 return 0;
4840 private:
4841 bool small_p;
4842 }; // class pass_ipa_free_fn_summary
4844 } // anon namespace
4846 simple_ipa_opt_pass *
4847 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4849 return new pass_ipa_free_fn_summary (ctxt);
4852 namespace {
4854 const pass_data pass_data_ipa_fn_summary =
4856 IPA_PASS, /* type */
4857 "fnsummary", /* name */
4858 OPTGROUP_INLINE, /* optinfo_flags */
4859 TV_IPA_FNSUMMARY, /* tv_id */
4860 0, /* properties_required */
4861 0, /* properties_provided */
4862 0, /* properties_destroyed */
4863 0, /* todo_flags_start */
4864 ( TODO_dump_symtab ), /* todo_flags_finish */
4867 class pass_ipa_fn_summary : public ipa_opt_pass_d
4869 public:
4870 pass_ipa_fn_summary (gcc::context *ctxt)
4871 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4872 ipa_fn_summary_generate, /* generate_summary */
4873 ipa_fn_summary_write, /* write_summary */
4874 ipa_fn_summary_read, /* read_summary */
4875 NULL, /* write_optimization_summary */
4876 NULL, /* read_optimization_summary */
4877 NULL, /* stmt_fixup */
4878 0, /* function_transform_todo_flags_start */
4879 NULL, /* function_transform */
4880 NULL) /* variable_transform */
4883 /* opt_pass methods: */
4884 virtual unsigned int execute (function *) { return 0; }
4886 }; // class pass_ipa_fn_summary
4888 } // anon namespace
4890 ipa_opt_pass_d *
4891 make_pass_ipa_fn_summary (gcc::context *ctxt)
4893 return new pass_ipa_fn_summary (ctxt);
4896 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4897 within the same process. For use by toplev::finalize. */
4899 void
4900 ipa_fnsummary_c_finalize (void)
4902 ipa_free_fn_summary ();