[RS6000] Power10 ICE running gcc.target/powerpc/ppc-ne0-1.c
[official-gcc.git] / gcc / ipa-fnsummary.c
blobf27c5f07fa7ed2b42c3819e4355be605e4308695
1 /* Function summary pass.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "tree.h"
60 #include "gimple.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
63 #include "ssa.h"
64 #include "tree-streamer.h"
65 #include "cgraph.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86 #include "tree-into-ssa.h"
88 /* Summaries. */
89 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
90 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
91 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
93 /* Edge predicates goes here. */
94 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
97 /* Dump IPA hints. */
98 void
99 ipa_dump_hints (FILE *f, ipa_hints hints)
101 if (!hints)
102 return;
103 fprintf (f, "IPA hints:");
104 if (hints & INLINE_HINT_indirect_call)
106 hints &= ~INLINE_HINT_indirect_call;
107 fprintf (f, " indirect_call");
109 if (hints & INLINE_HINT_loop_iterations)
111 hints &= ~INLINE_HINT_loop_iterations;
112 fprintf (f, " loop_iterations");
114 if (hints & INLINE_HINT_loop_stride)
116 hints &= ~INLINE_HINT_loop_stride;
117 fprintf (f, " loop_stride");
119 if (hints & INLINE_HINT_same_scc)
121 hints &= ~INLINE_HINT_same_scc;
122 fprintf (f, " same_scc");
124 if (hints & INLINE_HINT_in_scc)
126 hints &= ~INLINE_HINT_in_scc;
127 fprintf (f, " in_scc");
129 if (hints & INLINE_HINT_cross_module)
131 hints &= ~INLINE_HINT_cross_module;
132 fprintf (f, " cross_module");
134 if (hints & INLINE_HINT_declared_inline)
136 hints &= ~INLINE_HINT_declared_inline;
137 fprintf (f, " declared_inline");
139 if (hints & INLINE_HINT_known_hot)
141 hints &= ~INLINE_HINT_known_hot;
142 fprintf (f, " known_hot");
144 if (hints & INLINE_HINT_builtin_constant_p)
146 hints &= ~INLINE_HINT_builtin_constant_p;
147 fprintf (f, " builtin_constant_p");
149 gcc_assert (!hints);
153 /* Record SIZE and TIME to SUMMARY.
154 The accounted code will be executed when EXEC_PRED is true.
155 When NONCONST_PRED is false the code will evaluate to constant and
156 will get optimized out in specialized clones of the function.
157 If CALL is true account to call_size_time_table rather than
158 size_time_table. */
160 void
161 ipa_fn_summary::account_size_time (int size, sreal time,
162 const predicate &exec_pred,
163 const predicate &nonconst_pred_in,
164 bool call)
166 size_time_entry *e;
167 bool found = false;
168 int i;
169 predicate nonconst_pred;
170 vec<size_time_entry, va_gc> *table = call
171 ? call_size_time_table : size_time_table;
173 if (exec_pred == false)
174 return;
176 nonconst_pred = nonconst_pred_in & exec_pred;
178 if (nonconst_pred == false)
179 return;
181 /* We need to create initial empty unconditional clause, but otherwise
182 we don't need to account empty times and sizes. */
183 if (!size && time == 0 && table)
184 return;
186 /* Only for calls we are unaccounting what we previously recorded. */
187 gcc_checking_assert (time >= 0 || call);
189 for (i = 0; vec_safe_iterate (table, i, &e); i++)
190 if (e->exec_predicate == exec_pred
191 && e->nonconst_predicate == nonconst_pred)
193 found = true;
194 break;
196 if (i == max_size_time_table_size)
198 i = 0;
199 found = true;
200 e = &(*table)[0];
201 if (dump_file && (dump_flags & TDF_DETAILS))
202 fprintf (dump_file,
203 "\t\tReached limit on number of entries, "
204 "ignoring the predicate.");
206 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
208 fprintf (dump_file,
209 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
210 ((double) size) / ipa_fn_summary::size_scale,
211 (time.to_double ()), found ? "" : "new ");
212 exec_pred.dump (dump_file, conds, 0);
213 if (exec_pred != nonconst_pred)
215 fprintf (dump_file, " nonconst:");
216 nonconst_pred.dump (dump_file, conds);
218 else
219 fprintf (dump_file, "\n");
221 if (!found)
223 class size_time_entry new_entry;
224 new_entry.size = size;
225 new_entry.time = time;
226 new_entry.exec_predicate = exec_pred;
227 new_entry.nonconst_predicate = nonconst_pred;
228 if (call)
229 vec_safe_push (call_size_time_table, new_entry);
230 else
231 vec_safe_push (size_time_table, new_entry);
233 else
235 e->size += size;
236 e->time += time;
237 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
238 /* Tolerate small roundoff issues. */
239 if (e->time < 0)
240 e->time = 0;
244 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
246 static struct cgraph_edge *
247 redirect_to_unreachable (struct cgraph_edge *e)
249 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
250 struct cgraph_node *target = cgraph_node::get_create
251 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
253 if (e->speculative)
254 e = cgraph_edge::resolve_speculation (e, target->decl);
255 else if (!e->callee)
256 e = cgraph_edge::make_direct (e, target);
257 else
258 e->redirect_callee (target);
259 class ipa_call_summary *es = ipa_call_summaries->get (e);
260 e->inline_failed = CIF_UNREACHABLE;
261 e->count = profile_count::zero ();
262 es->call_stmt_size = 0;
263 es->call_stmt_time = 0;
264 if (callee)
265 callee->remove_symbol_and_inline_clones ();
266 return e;
269 /* Set predicate for edge E. */
271 static void
272 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
274 /* If the edge is determined to be never executed, redirect it
275 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
276 be optimized out. */
277 if (predicate && *predicate == false
278 /* When handling speculative edges, we need to do the redirection
279 just once. Do it always on the direct edge, so we do not
280 attempt to resolve speculation while duplicating the edge. */
281 && (!e->speculative || e->callee))
282 e = redirect_to_unreachable (e);
284 class ipa_call_summary *es = ipa_call_summaries->get (e);
285 if (predicate && *predicate != true)
287 if (!es->predicate)
288 es->predicate = edge_predicate_pool.allocate ();
289 *es->predicate = *predicate;
291 else
293 if (es->predicate)
294 edge_predicate_pool.remove (es->predicate);
295 es->predicate = NULL;
299 /* Set predicate for hint *P. */
301 static void
302 set_hint_predicate (predicate **p, predicate new_predicate)
304 if (new_predicate == false || new_predicate == true)
306 if (*p)
307 edge_predicate_pool.remove (*p);
308 *p = NULL;
310 else
312 if (!*p)
313 *p = edge_predicate_pool.allocate ();
314 **p = new_predicate;
318 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
319 Otherwise add a new item to the vector with this predicate and frerq equal
320 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
321 in which case the function does nothing. */
323 static void
324 add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
325 const predicate &new_predicate, sreal add_freq,
326 unsigned max_num_predicates)
328 if (new_predicate == false || new_predicate == true)
329 return;
330 ipa_freqcounting_predicate *f;
331 for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
332 if (new_predicate == f->predicate)
334 f->freq += add_freq;
335 return;
337 if (vec_safe_length (*v) >= max_num_predicates)
338 /* Too many different predicates to account for. */
339 return;
341 ipa_freqcounting_predicate fcp;
342 fcp.predicate = NULL;
343 set_hint_predicate (&fcp.predicate, new_predicate);
344 fcp.freq = add_freq;
345 vec_safe_push (*v, fcp);
346 return;
349 /* Compute what conditions may or may not hold given information about
350 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
351 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
352 copy when called in a given context. It is a bitmask of conditions. Bit
353 0 means that condition is known to be false, while bit 1 means that condition
354 may or may not be true. These differs - for example NOT_INLINED condition
355 is always false in the second and also builtin_constant_p tests cannot use
356 the fact that parameter is indeed a constant.
358 When INLINE_P is true, assume that we are inlining. AVAL contains known
359 information about argument values. The function does not modify its content
360 and so AVALs could also be of type ipa_call_arg_values but so far all
361 callers work with the auto version and so we avoid the conversion for
362 convenience.
364 ERROR_MARK value of an argument means compile time invariant. */
366 static void
367 evaluate_conditions_for_known_args (struct cgraph_node *node,
368 bool inline_p,
369 ipa_auto_call_arg_values *avals,
370 clause_t *ret_clause,
371 clause_t *ret_nonspec_clause)
373 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
374 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
375 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
376 int i;
377 struct condition *c;
379 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
381 tree val = NULL;
382 tree res;
383 int j;
384 struct expr_eval_op *op;
386 /* We allow call stmt to have fewer arguments than the callee function
387 (especially for K&R style programs). So bound check here (we assume
388 m_known_aggs vector is either empty or has the same length as
389 m_known_vals). */
390 gcc_checking_assert (!avals->m_known_aggs.length ()
391 || !avals->m_known_vals.length ()
392 || (avals->m_known_vals.length ()
393 == avals->m_known_aggs.length ()));
395 if (c->agg_contents)
397 if (c->code == predicate::changed
398 && !c->by_ref
399 && (avals->safe_sval_at(c->operand_num) == error_mark_node))
400 continue;
402 if (ipa_agg_value_set *agg = avals->safe_aggval_at (c->operand_num))
404 tree sval = avals->safe_sval_at (c->operand_num);
405 val = ipa_find_agg_cst_for_param (agg, sval, c->offset,
406 c->by_ref);
408 else
409 val = NULL_TREE;
411 else
413 val = avals->safe_sval_at (c->operand_num);
414 if (val && val == error_mark_node && c->code != predicate::changed)
415 val = NULL_TREE;
418 if (!val
419 && (c->code == predicate::changed
420 || c->code == predicate::is_not_constant))
422 clause |= 1 << (i + predicate::first_dynamic_condition);
423 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
424 continue;
426 if (c->code == predicate::changed)
428 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
429 continue;
432 if (c->code == predicate::is_not_constant)
434 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
435 continue;
438 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
440 if (c->type != TREE_TYPE (val))
441 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
442 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
444 if (!val)
445 break;
446 if (!op->val[0])
447 val = fold_unary (op->code, op->type, val);
448 else if (!op->val[1])
449 val = fold_binary (op->code, op->type,
450 op->index ? op->val[0] : val,
451 op->index ? val : op->val[0]);
452 else if (op->index == 0)
453 val = fold_ternary (op->code, op->type,
454 val, op->val[0], op->val[1]);
455 else if (op->index == 1)
456 val = fold_ternary (op->code, op->type,
457 op->val[0], val, op->val[1]);
458 else if (op->index == 2)
459 val = fold_ternary (op->code, op->type,
460 op->val[0], op->val[1], val);
461 else
462 val = NULL_TREE;
465 res = val
466 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
467 : NULL;
469 if (res && integer_zerop (res))
470 continue;
471 if (res && integer_onep (res))
473 clause |= 1 << (i + predicate::first_dynamic_condition);
474 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
475 continue;
478 if (c->operand_num < (int) avals->m_known_value_ranges.length ()
479 && !c->agg_contents
480 && (!val || TREE_CODE (val) != INTEGER_CST))
482 value_range vr = avals->m_known_value_ranges[c->operand_num];
483 if (!vr.undefined_p ()
484 && !vr.varying_p ()
485 && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ())))
487 if (!useless_type_conversion_p (c->type, vr.type ()))
489 value_range res;
490 range_fold_unary_expr (&res, NOP_EXPR,
491 c->type, &vr, vr.type ());
492 vr = res;
494 tree type = c->type;
496 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
498 if (vr.varying_p () || vr.undefined_p ())
499 break;
501 value_range res;
502 if (!op->val[0])
503 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
504 else if (!op->val[1])
506 value_range op0 (op->val[0], op->val[0]);
507 range_fold_binary_expr (&res, op->code, op->type,
508 op->index ? &op0 : &vr,
509 op->index ? &vr : &op0);
511 else
512 gcc_unreachable ();
513 type = op->type;
514 vr = res;
516 if (!vr.varying_p () && !vr.undefined_p ())
518 value_range res;
519 value_range val_vr (c->val, c->val);
520 range_fold_binary_expr (&res, c->code, boolean_type_node,
521 &vr,
522 &val_vr);
523 if (res.zero_p ())
524 continue;
529 clause |= 1 << (i + predicate::first_dynamic_condition);
530 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
532 *ret_clause = clause;
533 if (ret_nonspec_clause)
534 *ret_nonspec_clause = nonspec_clause;
537 /* Return true if VRP will be exectued on the function.
538 We do not want to anticipate optimizations that will not happen.
540 FIXME: This can be confused with -fdisable and debug counters and thus
541 it should not be used for correctness (only to make heuristics work).
542 This means that inliner should do its own optimizations of expressions
543 that it predicts to be constant so wrong code can not be triggered by
544 builtin_constant_p. */
546 static bool
547 vrp_will_run_p (struct cgraph_node *node)
549 return (opt_for_fn (node->decl, optimize)
550 && !opt_for_fn (node->decl, optimize_debug)
551 && opt_for_fn (node->decl, flag_tree_vrp));
554 /* Similarly about FRE. */
556 static bool
557 fre_will_run_p (struct cgraph_node *node)
559 return (opt_for_fn (node->decl, optimize)
560 && !opt_for_fn (node->decl, optimize_debug)
561 && opt_for_fn (node->decl, flag_tree_fre));
564 /* Work out what conditions might be true at invocation of E.
565 Compute costs for inlined edge if INLINE_P is true.
567 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
568 (if non-NULL) conditions evaluated for nonspecialized clone called
569 in a given context.
571 Vectors in AVALS will be populated with useful known information about
572 argument values - information not known to have any uses will be omitted -
573 except for m_known_contexts which will only be calculated if
574 COMPUTE_CONTEXTS is true. */
576 void
577 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
578 clause_t *clause_ptr,
579 clause_t *nonspec_clause_ptr,
580 ipa_auto_call_arg_values *avals,
581 bool compute_contexts)
583 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
584 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
585 class ipa_edge_args *args;
587 if (clause_ptr)
588 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
590 if (ipa_node_params_sum
591 && !e->call_stmt_cannot_inline_p
592 && (info->conds || compute_contexts)
593 && (args = IPA_EDGE_REF (e)) != NULL)
595 struct cgraph_node *caller;
596 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
597 class ipa_call_summary *es = ipa_call_summaries->get (e);
598 int i, count = ipa_get_cs_argument_count (args);
600 if (count)
602 if (e->caller->inlined_to)
603 caller = e->caller->inlined_to;
604 else
605 caller = e->caller;
606 caller_parms_info = IPA_NODE_REF (caller);
607 callee_pi = IPA_NODE_REF (callee);
609 /* Watch for thunks. */
610 if (callee_pi)
611 /* Watch for variadic functions. */
612 count = MIN (count, ipa_get_param_count (callee_pi));
615 if (callee_pi)
616 for (i = 0; i < count; i++)
618 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
620 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
621 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
623 /* Determine if we know constant value of the parameter. */
624 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
625 ipa_get_type (callee_pi, i));
627 if (!cst && e->call_stmt
628 && i < (int)gimple_call_num_args (e->call_stmt))
630 cst = gimple_call_arg (e->call_stmt, i);
631 if (!is_gimple_min_invariant (cst))
632 cst = NULL;
634 if (cst)
636 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
637 if (!avals->m_known_vals.length ())
638 avals->m_known_vals.safe_grow_cleared (count, true);
639 avals->m_known_vals[i] = cst;
641 else if (inline_p && !es->param[i].change_prob)
643 if (!avals->m_known_vals.length ())
644 avals->m_known_vals.safe_grow_cleared (count, true);
645 avals->m_known_vals[i] = error_mark_node;
648 /* If we failed to get simple constant, try value range. */
649 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
650 && vrp_will_run_p (caller)
651 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
653 value_range vr
654 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
655 ipa_get_type (callee_pi,
656 i));
657 if (!vr.undefined_p () && !vr.varying_p ())
659 if (!avals->m_known_value_ranges.length ())
661 avals->m_known_value_ranges.safe_grow (count, true);
662 for (int i = 0; i < count; ++i)
663 new (&avals->m_known_value_ranges[i])
664 value_range ();
666 avals->m_known_value_ranges[i] = vr;
670 /* Determine known aggregate values. */
671 if (fre_will_run_p (caller))
673 ipa_agg_value_set agg
674 = ipa_agg_value_set_from_jfunc (caller_parms_info,
675 caller, &jf->agg);
676 if (agg.items.length ())
678 if (!avals->m_known_aggs.length ())
679 avals->m_known_aggs.safe_grow_cleared (count, true);
680 avals->m_known_aggs[i] = agg;
685 /* For calls used in polymorphic calls we further determine
686 polymorphic call context. */
687 if (compute_contexts
688 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
690 ipa_polymorphic_call_context
691 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
692 if (!ctx.useless_p ())
694 if (!avals->m_known_contexts.length ())
695 avals->m_known_contexts.safe_grow_cleared (count, true);
696 avals->m_known_contexts[i]
697 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
701 else
702 gcc_assert (!count || callee->thunk);
704 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
706 int i, count = (int)gimple_call_num_args (e->call_stmt);
708 for (i = 0; i < count; i++)
710 tree cst = gimple_call_arg (e->call_stmt, i);
711 if (!is_gimple_min_invariant (cst))
712 cst = NULL;
713 if (cst)
715 if (!avals->m_known_vals.length ())
716 avals->m_known_vals.safe_grow_cleared (count, true);
717 avals->m_known_vals[i] = cst;
722 evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
723 nonspec_clause_ptr);
727 /* Allocate the function summary. */
729 static void
730 ipa_fn_summary_alloc (void)
732 gcc_checking_assert (!ipa_fn_summaries);
733 ipa_size_summaries = new ipa_size_summary_t (symtab);
734 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
735 ipa_call_summaries = new ipa_call_summary_t (symtab);
738 ipa_call_summary::~ipa_call_summary ()
740 if (predicate)
741 edge_predicate_pool.remove (predicate);
743 param.release ();
746 ipa_fn_summary::~ipa_fn_summary ()
748 unsigned len = vec_safe_length (loop_iterations);
749 for (unsigned i = 0; i < len; i++)
750 edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
751 len = vec_safe_length (loop_strides);
752 for (unsigned i = 0; i < len; i++)
753 edge_predicate_pool.remove ((*loop_strides)[i].predicate);
754 vec_free (conds);
755 vec_free (size_time_table);
756 vec_free (call_size_time_table);
757 vec_free (loop_iterations);
758 vec_free (loop_strides);
759 builtin_constant_p_parms.release ();
762 void
763 ipa_fn_summary_t::remove_callees (cgraph_node *node)
765 cgraph_edge *e;
766 for (e = node->callees; e; e = e->next_callee)
767 ipa_call_summaries->remove (e);
768 for (e = node->indirect_calls; e; e = e->next_callee)
769 ipa_call_summaries->remove (e);
772 /* Duplicate predicates in loop hint vector, allocating memory for them and
773 remove and deallocate any uninteresting (true or false) ones. Return the
774 result. */
776 static vec<ipa_freqcounting_predicate, va_gc> *
777 remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
778 clause_t possible_truths)
780 if (vec_safe_length (v) == 0)
781 return NULL;
783 vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
784 int len = res->length();
785 for (int i = len - 1; i >= 0; i--)
787 predicate new_predicate
788 = (*res)[i].predicate->remap_after_duplication (possible_truths);
789 /* We do not want to free previous predicate; it is used by node
790 origin. */
791 (*res)[i].predicate = NULL;
792 set_hint_predicate (&(*res)[i].predicate, new_predicate);
794 if (!(*res)[i].predicate)
795 res->unordered_remove (i);
798 return res;
802 /* Hook that is called by cgraph.c when a node is duplicated. */
803 void
804 ipa_fn_summary_t::duplicate (cgraph_node *src,
805 cgraph_node *dst,
806 ipa_fn_summary *,
807 ipa_fn_summary *info)
809 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
810 /* TODO: as an optimization, we may avoid copying conditions
811 that are known to be false or true. */
812 info->conds = vec_safe_copy (info->conds);
814 /* When there are any replacements in the function body, see if we can figure
815 out that something was optimized out. */
816 if (ipa_node_params_sum && dst->clone.tree_map)
818 vec<size_time_entry, va_gc> *entry = info->size_time_table;
819 /* Use SRC parm info since it may not be copied yet. */
820 class ipa_node_params *parms_info = IPA_NODE_REF (src);
821 ipa_auto_call_arg_values avals;
822 int count = ipa_get_param_count (parms_info);
823 int i, j;
824 clause_t possible_truths;
825 predicate true_pred = true;
826 size_time_entry *e;
827 int optimized_out_size = 0;
828 bool inlined_to_p = false;
829 struct cgraph_edge *edge, *next;
831 info->size_time_table = 0;
832 avals.m_known_vals.safe_grow_cleared (count, true);
833 for (i = 0; i < count; i++)
835 struct ipa_replace_map *r;
837 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
839 if (r->parm_num == i)
841 avals.m_known_vals[i] = r->new_tree;
842 break;
846 evaluate_conditions_for_known_args (dst, false,
847 &avals,
848 &possible_truths,
849 /* We are going to specialize,
850 so ignore nonspec truths. */
851 NULL);
853 info->account_size_time (0, 0, true_pred, true_pred);
855 /* Remap size_time vectors.
856 Simplify the predicate by pruning out alternatives that are known
857 to be false.
858 TODO: as on optimization, we can also eliminate conditions known
859 to be true. */
860 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
862 predicate new_exec_pred;
863 predicate new_nonconst_pred;
864 new_exec_pred = e->exec_predicate.remap_after_duplication
865 (possible_truths);
866 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
867 (possible_truths);
868 if (new_exec_pred == false || new_nonconst_pred == false)
869 optimized_out_size += e->size;
870 else
871 info->account_size_time (e->size, e->time, new_exec_pred,
872 new_nonconst_pred);
875 /* Remap edge predicates with the same simplification as above.
876 Also copy constantness arrays. */
877 for (edge = dst->callees; edge; edge = next)
879 predicate new_predicate;
880 class ipa_call_summary *es = ipa_call_summaries->get (edge);
881 next = edge->next_callee;
883 if (!edge->inline_failed)
884 inlined_to_p = true;
885 if (!es->predicate)
886 continue;
887 new_predicate = es->predicate->remap_after_duplication
888 (possible_truths);
889 if (new_predicate == false && *es->predicate != false)
890 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
891 edge_set_predicate (edge, &new_predicate);
894 /* Remap indirect edge predicates with the same simplification as above.
895 Also copy constantness arrays. */
896 for (edge = dst->indirect_calls; edge; edge = next)
898 predicate new_predicate;
899 class ipa_call_summary *es = ipa_call_summaries->get (edge);
900 next = edge->next_callee;
902 gcc_checking_assert (edge->inline_failed);
903 if (!es->predicate)
904 continue;
905 new_predicate = es->predicate->remap_after_duplication
906 (possible_truths);
907 if (new_predicate == false && *es->predicate != false)
908 optimized_out_size
909 += es->call_stmt_size * ipa_fn_summary::size_scale;
910 edge_set_predicate (edge, &new_predicate);
912 info->loop_iterations
913 = remap_freqcounting_preds_after_dup (info->loop_iterations,
914 possible_truths);
915 info->loop_strides
916 = remap_freqcounting_preds_after_dup (info->loop_strides,
917 possible_truths);
918 if (info->builtin_constant_p_parms.length())
920 vec <int, va_heap, vl_ptr> parms = info->builtin_constant_p_parms;
921 int ip;
922 info->builtin_constant_p_parms = vNULL;
923 for (i = 0; parms.iterate (i, &ip); i++)
924 if (!avals.m_known_vals[ip])
925 info->builtin_constant_p_parms.safe_push (ip);
928 /* If inliner or someone after inliner will ever start producing
929 non-trivial clones, we will get trouble with lack of information
930 about updating self sizes, because size vectors already contains
931 sizes of the callees. */
932 gcc_assert (!inlined_to_p || !optimized_out_size);
934 else
936 info->size_time_table = vec_safe_copy (info->size_time_table);
937 info->loop_iterations = vec_safe_copy (info->loop_iterations);
938 info->loop_strides = vec_safe_copy (info->loop_strides);
940 info->builtin_constant_p_parms
941 = info->builtin_constant_p_parms.copy ();
943 ipa_freqcounting_predicate *f;
944 for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
946 predicate p = *f->predicate;
947 f->predicate = NULL;
948 set_hint_predicate (&f->predicate, p);
950 for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
952 predicate p = *f->predicate;
953 f->predicate = NULL;
954 set_hint_predicate (&f->predicate, p);
957 if (!dst->inlined_to)
958 ipa_update_overall_fn_summary (dst);
962 /* Hook that is called by cgraph.c when a node is duplicated. */
964 void
965 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
966 struct cgraph_edge *dst,
967 class ipa_call_summary *srcinfo,
968 class ipa_call_summary *info)
970 new (info) ipa_call_summary (*srcinfo);
971 info->predicate = NULL;
972 edge_set_predicate (dst, srcinfo->predicate);
973 info->param = srcinfo->param.copy ();
974 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
976 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
977 - eni_size_weights.call_cost);
978 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
979 - eni_time_weights.call_cost);
983 /* Dump edge summaries associated to NODE and recursively to all clones.
984 Indent by INDENT. */
986 static void
987 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
988 class ipa_fn_summary *info)
990 struct cgraph_edge *edge;
991 for (edge = node->callees; edge; edge = edge->next_callee)
993 class ipa_call_summary *es = ipa_call_summaries->get (edge);
994 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
995 int i;
997 fprintf (f,
998 "%*s%s %s\n%*s freq:%4.2f",
999 indent, "", callee->dump_name (),
1000 !edge->inline_failed
1001 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1002 indent, "", edge->sreal_frequency ().to_double ());
1004 if (cross_module_call_p (edge))
1005 fprintf (f, " cross module");
1007 if (es)
1008 fprintf (f, " loop depth:%2i size:%2i time: %2i",
1009 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
1011 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
1012 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1013 if (s != NULL)
1014 fprintf (f, " callee size:%2i stack:%2i",
1015 (int) (ss->size / ipa_fn_summary::size_scale),
1016 (int) s->estimated_stack_size);
1018 if (es && es->predicate)
1020 fprintf (f, " predicate: ");
1021 es->predicate->dump (f, info->conds);
1023 else
1024 fprintf (f, "\n");
1025 if (es && es->param.exists ())
1026 for (i = 0; i < (int) es->param.length (); i++)
1028 int prob = es->param[i].change_prob;
1030 if (!prob)
1031 fprintf (f, "%*s op%i is compile time invariant\n",
1032 indent + 2, "", i);
1033 else if (prob != REG_BR_PROB_BASE)
1034 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1035 prob * 100.0 / REG_BR_PROB_BASE);
1036 if (es->param[i].points_to_local_or_readonly_memory)
1037 fprintf (f, "%*s op%i points to local or readonly memory\n",
1038 indent + 2, "", i);
1040 if (!edge->inline_failed)
1042 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1043 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
1044 indent + 2, "",
1045 (int) ipa_get_stack_frame_offset (callee),
1046 (int) ss->estimated_self_stack_size);
1047 dump_ipa_call_summary (f, indent + 2, callee, info);
1050 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1052 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1053 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1054 " time: %2i",
1055 indent, "",
1056 es->loop_depth,
1057 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1058 es->call_stmt_time);
1059 if (es->predicate)
1061 fprintf (f, "predicate: ");
1062 es->predicate->dump (f, info->conds);
1064 else
1065 fprintf (f, "\n");
1070 void
1071 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1073 if (node->definition)
1075 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1076 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1077 if (s != NULL)
1079 size_time_entry *e;
1080 int i;
1081 fprintf (f, "IPA function summary for %s", node->dump_name ());
1082 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1083 fprintf (f, " always_inline");
1084 if (s->inlinable)
1085 fprintf (f, " inlinable");
1086 if (s->fp_expressions)
1087 fprintf (f, " fp_expression");
1088 if (s->builtin_constant_p_parms.length ())
1090 fprintf (f, " builtin_constant_p_parms");
1091 for (unsigned int i = 0;
1092 i < s->builtin_constant_p_parms.length (); i++)
1093 fprintf (f, " %i", s->builtin_constant_p_parms[i]);
1095 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1096 fprintf (f, " self size: %i\n", ss->self_size);
1097 fprintf (f, " global size: %i\n", ss->size);
1098 fprintf (f, " min size: %i\n", s->min_size);
1099 fprintf (f, " self stack: %i\n",
1100 (int) ss->estimated_self_stack_size);
1101 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1102 if (s->growth)
1103 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1104 if (s->scc_no)
1105 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1106 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
1108 fprintf (f, " size:%f, time:%f",
1109 (double) e->size / ipa_fn_summary::size_scale,
1110 e->time.to_double ());
1111 if (e->exec_predicate != true)
1113 fprintf (f, ", executed if:");
1114 e->exec_predicate.dump (f, s->conds, 0);
1116 if (e->exec_predicate != e->nonconst_predicate)
1118 fprintf (f, ", nonconst if:");
1119 e->nonconst_predicate.dump (f, s->conds, 0);
1121 fprintf (f, "\n");
1123 ipa_freqcounting_predicate *fcp;
1124 bool first_fcp = true;
1125 for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
1127 if (first_fcp)
1129 fprintf (f, " loop iterations:");
1130 first_fcp = false;
1132 fprintf (f, " %3.2f for ", fcp->freq.to_double ());
1133 fcp->predicate->dump (f, s->conds);
1135 first_fcp = true;
1136 for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
1138 if (first_fcp)
1140 fprintf (f, " loop strides:");
1141 first_fcp = false;
1143 fprintf (f, " %3.2f for :", fcp->freq.to_double ());
1144 fcp->predicate->dump (f, s->conds);
1146 fprintf (f, " calls:\n");
1147 dump_ipa_call_summary (f, 4, node, s);
1148 fprintf (f, "\n");
1150 else
1151 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1155 DEBUG_FUNCTION void
1156 ipa_debug_fn_summary (struct cgraph_node *node)
1158 ipa_dump_fn_summary (stderr, node);
1161 void
1162 ipa_dump_fn_summaries (FILE *f)
1164 struct cgraph_node *node;
1166 FOR_EACH_DEFINED_FUNCTION (node)
1167 if (!node->inlined_to)
1168 ipa_dump_fn_summary (f, node);
1171 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1172 boolean variable pointed to by DATA. */
1174 static bool
1175 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1176 void *data)
1178 bool *b = (bool *) data;
1179 *b = true;
1180 return true;
1183 /* If OP refers to value of function parameter, return the corresponding
1184 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1185 PARM_DECL) will be stored to *SIZE_P in that case too. */
1187 static tree
1188 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1189 poly_int64 *size_p)
1191 /* SSA_NAME referring to parm default def? */
1192 if (TREE_CODE (op) == SSA_NAME
1193 && SSA_NAME_IS_DEFAULT_DEF (op)
1194 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1196 if (size_p)
1197 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1198 return SSA_NAME_VAR (op);
1200 /* Non-SSA parm reference? */
1201 if (TREE_CODE (op) == PARM_DECL)
1203 bool modified = false;
1205 ao_ref refd;
1206 ao_ref_init (&refd, op);
1207 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1208 mark_modified, &modified, NULL, NULL,
1209 fbi->aa_walk_budget + 1);
1210 if (walked < 0)
1212 fbi->aa_walk_budget = 0;
1213 return NULL_TREE;
1215 if (!modified)
1217 if (size_p)
1218 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1219 return op;
1222 return NULL_TREE;
1225 /* If OP refers to value of function parameter, return the corresponding
1226 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1227 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1228 stored to *SIZE_P in that case too. */
1230 static tree
1231 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1232 poly_int64 *size_p)
1234 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1235 if (res)
1236 return res;
1238 if (TREE_CODE (op) == SSA_NAME
1239 && !SSA_NAME_IS_DEFAULT_DEF (op)
1240 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1241 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1242 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1243 size_p);
1244 return NULL_TREE;
1247 /* If OP refers to a value of a function parameter or value loaded from an
1248 aggregate passed to a parameter (either by value or reference), return TRUE
1249 and store the number of the parameter to *INDEX_P, the access size into
1250 *SIZE_P, and information whether and how it has been loaded from an
1251 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1252 statement in which OP is used or loaded. */
1254 static bool
1255 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1256 gimple *stmt, tree op, int *index_p,
1257 poly_int64 *size_p,
1258 struct agg_position_info *aggpos)
1260 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1262 gcc_checking_assert (aggpos);
1263 if (res)
1265 *index_p = ipa_get_param_decl_index (fbi->info, res);
1266 if (*index_p < 0)
1267 return false;
1268 aggpos->agg_contents = false;
1269 aggpos->by_ref = false;
1270 return true;
1273 if (TREE_CODE (op) == SSA_NAME)
1275 if (SSA_NAME_IS_DEFAULT_DEF (op)
1276 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1277 return false;
1278 stmt = SSA_NAME_DEF_STMT (op);
1279 op = gimple_assign_rhs1 (stmt);
1280 if (!REFERENCE_CLASS_P (op))
1281 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1282 aggpos);
1285 aggpos->agg_contents = true;
1286 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1287 stmt, op, index_p, &aggpos->offset,
1288 size_p, &aggpos->by_ref);
1291 /* See if statement might disappear after inlining.
1292 0 - means not eliminated
1293 1 - half of statements goes away
1294 2 - for sure it is eliminated.
1295 We are not terribly sophisticated, basically looking for simple abstraction
1296 penalty wrappers. */
1298 static int
1299 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1301 enum gimple_code code = gimple_code (stmt);
1302 enum tree_code rhs_code;
1304 if (!optimize)
1305 return 0;
1307 switch (code)
1309 case GIMPLE_RETURN:
1310 return 2;
1311 case GIMPLE_ASSIGN:
1312 if (gimple_num_ops (stmt) != 2)
1313 return 0;
1315 rhs_code = gimple_assign_rhs_code (stmt);
1317 /* Casts of parameters, loads from parameters passed by reference
1318 and stores to return value or parameters are often free after
1319 inlining due to SRA and further combining.
1320 Assume that half of statements goes away. */
1321 if (CONVERT_EXPR_CODE_P (rhs_code)
1322 || rhs_code == VIEW_CONVERT_EXPR
1323 || rhs_code == ADDR_EXPR
1324 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1326 tree rhs = gimple_assign_rhs1 (stmt);
1327 tree lhs = gimple_assign_lhs (stmt);
1328 tree inner_rhs = get_base_address (rhs);
1329 tree inner_lhs = get_base_address (lhs);
1330 bool rhs_free = false;
1331 bool lhs_free = false;
1333 if (!inner_rhs)
1334 inner_rhs = rhs;
1335 if (!inner_lhs)
1336 inner_lhs = lhs;
1338 /* Reads of parameter are expected to be free. */
1339 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1340 rhs_free = true;
1341 /* Match expressions of form &this->field. Those will most likely
1342 combine with something upstream after inlining. */
1343 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1345 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1346 if (TREE_CODE (op) == PARM_DECL)
1347 rhs_free = true;
1348 else if (TREE_CODE (op) == MEM_REF
1349 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1350 NULL))
1351 rhs_free = true;
1354 /* When parameter is not SSA register because its address is taken
1355 and it is just copied into one, the statement will be completely
1356 free after inlining (we will copy propagate backward). */
1357 if (rhs_free && is_gimple_reg (lhs))
1358 return 2;
1360 /* Reads of parameters passed by reference
1361 expected to be free (i.e. optimized out after inlining). */
1362 if (TREE_CODE (inner_rhs) == MEM_REF
1363 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1364 rhs_free = true;
1366 /* Copying parameter passed by reference into gimple register is
1367 probably also going to copy propagate, but we can't be quite
1368 sure. */
1369 if (rhs_free && is_gimple_reg (lhs))
1370 lhs_free = true;
1372 /* Writes to parameters, parameters passed by value and return value
1373 (either directly or passed via invisible reference) are free.
1375 TODO: We ought to handle testcase like
1376 struct a {int a,b;};
1377 struct a
1378 returnstruct (void)
1380 struct a a ={1,2};
1381 return a;
1384 This translate into:
1386 returnstruct ()
1388 int a$b;
1389 int a$a;
1390 struct a a;
1391 struct a D.2739;
1393 <bb 2>:
1394 D.2739.a = 1;
1395 D.2739.b = 2;
1396 return D.2739;
1399 For that we either need to copy ipa-split logic detecting writes
1400 to return value. */
1401 if (TREE_CODE (inner_lhs) == PARM_DECL
1402 || TREE_CODE (inner_lhs) == RESULT_DECL
1403 || (TREE_CODE (inner_lhs) == MEM_REF
1404 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1405 NULL)
1406 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1407 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1408 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1409 (inner_lhs,
1410 0))) == RESULT_DECL))))
1411 lhs_free = true;
1412 if (lhs_free
1413 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1414 rhs_free = true;
1415 if (lhs_free && rhs_free)
1416 return 1;
1418 return 0;
1419 default:
1420 return 0;
1424 /* Analyze EXPR if it represents a series of simple operations performed on
1425 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1426 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1427 Type of the parameter or load from an aggregate via the parameter is
1428 stored in *TYPE_P. Operations on the parameter are recorded to
1429 PARAM_OPS_P if it is not NULL. */
1431 static bool
1432 decompose_param_expr (struct ipa_func_body_info *fbi,
1433 gimple *stmt, tree expr,
1434 int *index_p, tree *type_p,
1435 struct agg_position_info *aggpos,
1436 expr_eval_ops *param_ops_p = NULL)
1438 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1439 int op_count = 0;
1441 if (param_ops_p)
1442 *param_ops_p = NULL;
1444 while (true)
1446 expr_eval_op eval_op;
1447 unsigned rhs_count;
1448 unsigned cst_count = 0;
1450 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1451 aggpos))
1453 tree type = TREE_TYPE (expr);
1455 if (aggpos->agg_contents)
1457 /* Stop if containing bit-field. */
1458 if (TREE_CODE (expr) == BIT_FIELD_REF
1459 || contains_bitfld_component_ref_p (expr))
1460 break;
1463 *type_p = type;
1464 return true;
1467 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1468 break;
1470 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1471 break;
1473 switch (gimple_assign_rhs_class (stmt))
1475 case GIMPLE_SINGLE_RHS:
1476 expr = gimple_assign_rhs1 (stmt);
1477 continue;
1479 case GIMPLE_UNARY_RHS:
1480 rhs_count = 1;
1481 break;
1483 case GIMPLE_BINARY_RHS:
1484 rhs_count = 2;
1485 break;
1487 case GIMPLE_TERNARY_RHS:
1488 rhs_count = 3;
1489 break;
1491 default:
1492 goto fail;
1495 /* Stop if expression is too complex. */
1496 if (op_count++ == op_limit)
1497 break;
1499 if (param_ops_p)
1501 eval_op.code = gimple_assign_rhs_code (stmt);
1502 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1503 eval_op.val[0] = NULL_TREE;
1504 eval_op.val[1] = NULL_TREE;
1507 expr = NULL_TREE;
1508 for (unsigned i = 0; i < rhs_count; i++)
1510 tree op = gimple_op (stmt, i + 1);
1512 gcc_assert (op && !TYPE_P (op));
1513 if (is_gimple_ip_invariant (op))
1515 if (++cst_count == rhs_count)
1516 goto fail;
1518 eval_op.val[cst_count - 1] = op;
1520 else if (!expr)
1522 /* Found a non-constant operand, and record its index in rhs
1523 operands. */
1524 eval_op.index = i;
1525 expr = op;
1527 else
1529 /* Found more than one non-constant operands. */
1530 goto fail;
1534 if (param_ops_p)
1535 vec_safe_insert (*param_ops_p, 0, eval_op);
1538 /* Failed to decompose, free resource and return. */
1539 fail:
1540 if (param_ops_p)
1541 vec_free (*param_ops_p);
1543 return false;
1546 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1548 static void
1549 add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
1551 int ip;
1553 /* Avoid duplicates. */
1554 for (unsigned int i = 0;
1555 summary->builtin_constant_p_parms.iterate (i, &ip); i++)
1556 if (ip == parm)
1557 return;
1558 summary->builtin_constant_p_parms.safe_push (parm);
1561 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1562 predicates to the CFG edges. */
1564 static void
1565 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1566 class ipa_fn_summary *summary,
1567 class ipa_node_params *params_summary,
1568 basic_block bb)
1570 gimple *last;
1571 tree op, op2;
1572 int index;
1573 struct agg_position_info aggpos;
1574 enum tree_code code, inverted_code;
1575 edge e;
1576 edge_iterator ei;
1577 gimple *set_stmt;
1578 tree param_type;
1579 expr_eval_ops param_ops;
1581 last = last_stmt (bb);
1582 if (!last || gimple_code (last) != GIMPLE_COND)
1583 return;
1584 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1585 return;
1586 op = gimple_cond_lhs (last);
1588 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1589 &param_ops))
1591 code = gimple_cond_code (last);
1592 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1594 FOR_EACH_EDGE (e, ei, bb->succs)
1596 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1597 ? code : inverted_code);
1598 /* invert_tree_comparison will return ERROR_MARK on FP
1599 comparisons that are not EQ/NE instead of returning proper
1600 unordered one. Be sure it is not confused with NON_CONSTANT.
1602 And if the edge's target is the final block of diamond CFG graph
1603 of this conditional statement, we do not need to compute
1604 predicate for the edge because the final block's predicate must
1605 be at least as that of the first block of the statement. */
1606 if (this_code != ERROR_MARK
1607 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1609 predicate p
1610 = add_condition (summary, params_summary, index,
1611 param_type, &aggpos,
1612 this_code, gimple_cond_rhs (last), param_ops);
1613 e->aux = edge_predicate_pool.allocate ();
1614 *(predicate *) e->aux = p;
1617 vec_free (param_ops);
1620 if (TREE_CODE (op) != SSA_NAME)
1621 return;
1622 /* Special case
1623 if (builtin_constant_p (op))
1624 constant_code
1625 else
1626 nonconstant_code.
1627 Here we can predicate nonconstant_code. We can't
1628 really handle constant_code since we have no predicate
1629 for this and also the constant code is not known to be
1630 optimized away when inliner doesn't see operand is constant.
1631 Other optimizers might think otherwise. */
1632 if (gimple_cond_code (last) != NE_EXPR
1633 || !integer_zerop (gimple_cond_rhs (last)))
1634 return;
1635 set_stmt = SSA_NAME_DEF_STMT (op);
1636 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1637 || gimple_call_num_args (set_stmt) != 1)
1638 return;
1639 op2 = gimple_call_arg (set_stmt, 0);
1640 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1641 return;
1642 if (!aggpos.by_ref)
1643 add_builtin_constant_p_parm (summary, index);
1644 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1646 predicate p = add_condition (summary, params_summary, index,
1647 param_type, &aggpos,
1648 predicate::is_not_constant, NULL_TREE);
1649 e->aux = edge_predicate_pool.allocate ();
1650 *(predicate *) e->aux = p;
1655 /* If BB ends by a switch we can turn into predicates, attach corresponding
1656 predicates to the CFG edges. */
1658 static void
1659 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1660 class ipa_fn_summary *summary,
1661 class ipa_node_params *params_summary,
1662 basic_block bb)
1664 gimple *lastg;
1665 tree op;
1666 int index;
1667 struct agg_position_info aggpos;
1668 edge e;
1669 edge_iterator ei;
1670 size_t n;
1671 size_t case_idx;
1672 tree param_type;
1673 expr_eval_ops param_ops;
1675 lastg = last_stmt (bb);
1676 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1677 return;
1678 gswitch *last = as_a <gswitch *> (lastg);
1679 op = gimple_switch_index (last);
1680 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1681 &param_ops))
1682 return;
1684 auto_vec<std::pair<tree, tree> > ranges;
1685 tree type = TREE_TYPE (op);
1686 int bound_limit = opt_for_fn (fbi->node->decl,
1687 param_ipa_max_switch_predicate_bounds);
1688 int bound_count = 0;
1689 wide_int vr_wmin, vr_wmax;
1690 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1692 FOR_EACH_EDGE (e, ei, bb->succs)
1694 e->aux = edge_predicate_pool.allocate ();
1695 *(predicate *) e->aux = false;
1698 e = gimple_switch_edge (cfun, last, 0);
1699 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1700 default case if its target basic block is in convergence point of all
1701 switch cases, which can be determined by checking whether it
1702 post-dominates the switch statement. */
1703 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1704 bound_count = INT_MAX;
1706 n = gimple_switch_num_labels (last);
1707 for (case_idx = 1; case_idx < n; ++case_idx)
1709 tree cl = gimple_switch_label (last, case_idx);
1710 tree min = CASE_LOW (cl);
1711 tree max = CASE_HIGH (cl);
1712 predicate p;
1714 e = gimple_switch_edge (cfun, last, case_idx);
1716 /* The case value might not have same type as switch expression,
1717 extend the value based on the expression type. */
1718 if (TREE_TYPE (min) != type)
1719 min = wide_int_to_tree (type, wi::to_wide (min));
1721 if (!max)
1722 max = min;
1723 else if (TREE_TYPE (max) != type)
1724 max = wide_int_to_tree (type, wi::to_wide (max));
1726 /* The case's target basic block is in convergence point of all switch
1727 cases, its predicate should be at least as that of the switch
1728 statement. */
1729 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1730 p = true;
1731 else if (min == max)
1732 p = add_condition (summary, params_summary, index, param_type,
1733 &aggpos, EQ_EXPR, min, param_ops);
1734 else
1736 predicate p1, p2;
1737 p1 = add_condition (summary, params_summary, index, param_type,
1738 &aggpos, GE_EXPR, min, param_ops);
1739 p2 = add_condition (summary, params_summary,index, param_type,
1740 &aggpos, LE_EXPR, max, param_ops);
1741 p = p1 & p2;
1743 *(class predicate *) e->aux
1744 = p.or_with (summary->conds, *(class predicate *) e->aux);
1746 /* If there are too many disjoint case ranges, predicate for default
1747 case might become too complicated. So add a limit here. */
1748 if (bound_count > bound_limit)
1749 continue;
1751 bool new_range = true;
1753 if (!ranges.is_empty ())
1755 wide_int curr_wmin = wi::to_wide (min);
1756 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1758 /* Merge case ranges if they are continuous. */
1759 if (curr_wmin == last_wmax + 1)
1760 new_range = false;
1761 else if (vr_type == VR_ANTI_RANGE)
1763 /* If two disjoint case ranges can be connected by anti-range
1764 of switch index, combine them to one range. */
1765 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1766 vr_type = VR_UNDEFINED;
1767 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1768 new_range = false;
1772 /* Create/extend a case range. And we count endpoints of range set,
1773 this number nearly equals to number of conditions that we will create
1774 for predicate of default case. */
1775 if (new_range)
1777 bound_count += (min == max) ? 1 : 2;
1778 ranges.safe_push (std::make_pair (min, max));
1780 else
1782 bound_count += (ranges.last ().first == ranges.last ().second);
1783 ranges.last ().second = max;
1787 e = gimple_switch_edge (cfun, last, 0);
1788 if (bound_count > bound_limit)
1790 *(class predicate *) e->aux = true;
1791 vec_free (param_ops);
1792 return;
1795 predicate p_seg = true;
1796 predicate p_all = false;
1798 if (vr_type != VR_RANGE)
1800 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1801 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1804 /* Construct predicate to represent default range set that is negation of
1805 all case ranges. Case range is classified as containing single/non-single
1806 values. Suppose a piece of case ranges in the following.
1808 [D1...D2] [S1] ... [Sn] [D3...D4]
1810 To represent default case's range sets between two non-single value
1811 case ranges (From D2 to D3), we construct predicate as:
1813 D2 < x < D3 && x != S1 && ... && x != Sn
1815 for (size_t i = 0; i < ranges.length (); i++)
1817 tree min = ranges[i].first;
1818 tree max = ranges[i].second;
1820 if (min == max)
1821 p_seg &= add_condition (summary, params_summary, index,
1822 param_type, &aggpos, NE_EXPR,
1823 min, param_ops);
1824 else
1826 /* Do not create sub-predicate for range that is beyond low bound
1827 of switch index. */
1828 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1830 p_seg &= add_condition (summary, params_summary, index,
1831 param_type, &aggpos,
1832 LT_EXPR, min, param_ops);
1833 p_all = p_all.or_with (summary->conds, p_seg);
1836 /* Do not create sub-predicate for range that is beyond up bound
1837 of switch index. */
1838 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1840 p_seg = false;
1841 break;
1844 p_seg = add_condition (summary, params_summary, index,
1845 param_type, &aggpos, GT_EXPR,
1846 max, param_ops);
1850 p_all = p_all.or_with (summary->conds, p_seg);
1851 *(class predicate *) e->aux
1852 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1854 vec_free (param_ops);
1858 /* For each BB in NODE attach to its AUX pointer predicate under
1859 which it is executable. */
1861 static void
1862 compute_bb_predicates (struct ipa_func_body_info *fbi,
1863 struct cgraph_node *node,
1864 class ipa_fn_summary *summary,
1865 class ipa_node_params *params_summary)
1867 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1868 bool done = false;
1869 basic_block bb;
1871 FOR_EACH_BB_FN (bb, my_function)
1873 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1874 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1877 /* Entry block is always executable. */
1878 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1879 = edge_predicate_pool.allocate ();
1880 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1882 /* A simple dataflow propagation of predicates forward in the CFG.
1883 TODO: work in reverse postorder. */
1884 while (!done)
1886 done = true;
1887 FOR_EACH_BB_FN (bb, my_function)
1889 predicate p = false;
1890 edge e;
1891 edge_iterator ei;
1892 FOR_EACH_EDGE (e, ei, bb->preds)
1894 if (e->src->aux)
1896 predicate this_bb_predicate
1897 = *(predicate *) e->src->aux;
1898 if (e->aux)
1899 this_bb_predicate &= (*(class predicate *) e->aux);
1900 p = p.or_with (summary->conds, this_bb_predicate);
1901 if (p == true)
1902 break;
1905 if (p != false)
1907 basic_block pdom_bb;
1909 if (!bb->aux)
1911 done = false;
1912 bb->aux = edge_predicate_pool.allocate ();
1913 *((predicate *) bb->aux) = p;
1915 else if (p != *(predicate *) bb->aux)
1917 /* This OR operation is needed to ensure monotonous data flow
1918 in the case we hit the limit on number of clauses and the
1919 and/or operations above give approximate answers. */
1920 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1921 if (p != *(predicate *) bb->aux)
1923 done = false;
1924 *((predicate *) bb->aux) = p;
1928 /* For switch/if statement, we can OR-combine predicates of all
1929 its cases/branches to get predicate for basic block in their
1930 convergence point, but sometimes this will generate very
1931 complicated predicate. Actually, we can get simplified
1932 predicate in another way by using the fact that predicate
1933 for a basic block must also hold true for its post dominators.
1934 To be specific, basic block in convergence point of
1935 conditional statement should include predicate of the
1936 statement. */
1937 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1938 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1940 else if (!pdom_bb->aux)
1942 done = false;
1943 pdom_bb->aux = edge_predicate_pool.allocate ();
1944 *((predicate *) pdom_bb->aux) = p;
1946 else if (p != *(predicate *) pdom_bb->aux)
1948 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1949 if (p != *(predicate *) pdom_bb->aux)
1951 done = false;
1952 *((predicate *) pdom_bb->aux) = p;
1961 /* Return predicate specifying when the STMT might have result that is not
1962 a compile time constant. */
1964 static predicate
1965 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1966 class ipa_fn_summary *summary,
1967 class ipa_node_params *params_summary,
1968 tree expr,
1969 vec<predicate> nonconstant_names)
1971 tree parm;
1972 int index;
1974 while (UNARY_CLASS_P (expr))
1975 expr = TREE_OPERAND (expr, 0);
1977 parm = unmodified_parm (fbi, NULL, expr, NULL);
1978 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1979 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1980 predicate::changed, NULL_TREE);
1981 if (is_gimple_min_invariant (expr))
1982 return false;
1983 if (TREE_CODE (expr) == SSA_NAME)
1984 return nonconstant_names[SSA_NAME_VERSION (expr)];
1985 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1987 predicate p1
1988 = will_be_nonconstant_expr_predicate (fbi, summary,
1989 params_summary,
1990 TREE_OPERAND (expr, 0),
1991 nonconstant_names);
1992 if (p1 == true)
1993 return p1;
1995 predicate p2
1996 = will_be_nonconstant_expr_predicate (fbi, summary,
1997 params_summary,
1998 TREE_OPERAND (expr, 1),
1999 nonconstant_names);
2000 return p1.or_with (summary->conds, p2);
2002 else if (TREE_CODE (expr) == COND_EXPR)
2004 predicate p1
2005 = will_be_nonconstant_expr_predicate (fbi, summary,
2006 params_summary,
2007 TREE_OPERAND (expr, 0),
2008 nonconstant_names);
2009 if (p1 == true)
2010 return p1;
2012 predicate p2
2013 = will_be_nonconstant_expr_predicate (fbi, summary,
2014 params_summary,
2015 TREE_OPERAND (expr, 1),
2016 nonconstant_names);
2017 if (p2 == true)
2018 return p2;
2019 p1 = p1.or_with (summary->conds, p2);
2020 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
2021 params_summary,
2022 TREE_OPERAND (expr, 2),
2023 nonconstant_names);
2024 return p2.or_with (summary->conds, p1);
2026 else if (TREE_CODE (expr) == CALL_EXPR)
2027 return true;
2028 else
2030 debug_tree (expr);
2031 gcc_unreachable ();
2033 return false;
2037 /* Return predicate specifying when the STMT might have result that is not
2038 a compile time constant. */
2040 static predicate
2041 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2042 class ipa_fn_summary *summary,
2043 class ipa_node_params *params_summary,
2044 gimple *stmt,
2045 vec<predicate> nonconstant_names)
2047 predicate p = true;
2048 ssa_op_iter iter;
2049 tree use;
2050 tree param_type = NULL_TREE;
2051 predicate op_non_const;
2052 bool is_load;
2053 int base_index;
2054 struct agg_position_info aggpos;
2056 /* What statements might be optimized away
2057 when their arguments are constant. */
2058 if (gimple_code (stmt) != GIMPLE_ASSIGN
2059 && gimple_code (stmt) != GIMPLE_COND
2060 && gimple_code (stmt) != GIMPLE_SWITCH
2061 && (gimple_code (stmt) != GIMPLE_CALL
2062 || !(gimple_call_flags (stmt) & ECF_CONST)))
2063 return p;
2065 /* Stores will stay anyway. */
2066 if (gimple_store_p (stmt))
2067 return p;
2069 is_load = gimple_assign_load_p (stmt);
2071 /* Loads can be optimized when the value is known. */
2072 if (is_load)
2074 tree op = gimple_assign_rhs1 (stmt);
2075 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
2076 &aggpos))
2077 return p;
2079 else
2080 base_index = -1;
2082 /* See if we understand all operands before we start
2083 adding conditionals. */
2084 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2086 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2087 /* For arguments we can build a condition. */
2088 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2089 continue;
2090 if (TREE_CODE (use) != SSA_NAME)
2091 return p;
2092 /* If we know when operand is constant,
2093 we still can say something useful. */
2094 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2095 continue;
2096 return p;
2099 if (is_load)
2100 op_non_const =
2101 add_condition (summary, params_summary,
2102 base_index, param_type, &aggpos,
2103 predicate::changed, NULL_TREE);
2104 else
2105 op_non_const = false;
2106 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2108 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2109 int index;
2111 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2113 if (index != base_index)
2114 p = add_condition (summary, params_summary, index,
2115 TREE_TYPE (parm), NULL,
2116 predicate::changed, NULL_TREE);
2117 else
2118 continue;
2120 else
2121 p = nonconstant_names[SSA_NAME_VERSION (use)];
2122 op_non_const = p.or_with (summary->conds, op_non_const);
2124 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2125 && gimple_op (stmt, 0)
2126 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2127 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2128 = op_non_const;
2129 return op_non_const;
2132 struct record_modified_bb_info
2134 tree op;
2135 bitmap bb_set;
2136 gimple *stmt;
2139 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2140 probability how often it changes between USE_BB.
2141 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2142 is in different loop nest, we can do better.
2143 This is all just estimate. In theory we look for minimal cut separating
2144 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2145 anyway. */
2147 static basic_block
2148 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2150 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2151 if (l && l->header->count < init_bb->count)
2152 return l->header;
2153 return init_bb;
2156 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2157 set except for info->stmt. */
2159 static bool
2160 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2162 struct record_modified_bb_info *info =
2163 (struct record_modified_bb_info *) data;
2164 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2165 return false;
2166 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2167 return false;
2168 bitmap_set_bit (info->bb_set,
2169 SSA_NAME_IS_DEFAULT_DEF (vdef)
2170 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2171 : get_minimal_bb
2172 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2173 gimple_bb (info->stmt))->index);
2174 if (dump_file)
2176 fprintf (dump_file, " Param ");
2177 print_generic_expr (dump_file, info->op, TDF_SLIM);
2178 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2179 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2180 get_minimal_bb
2181 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2182 gimple_bb (info->stmt))->index);
2183 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2185 return false;
2188 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2189 will change since last invocation of STMT.
2191 Value 0 is reserved for compile time invariants.
2192 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2193 ought to be REG_BR_PROB_BASE / estimated_iters. */
2195 static int
2196 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2198 tree op = gimple_call_arg (stmt, i);
2199 basic_block bb = gimple_bb (stmt);
2201 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2202 op = TREE_OPERAND (op, 0);
2204 tree base = get_base_address (op);
2206 /* Global invariants never change. */
2207 if (is_gimple_min_invariant (base))
2208 return 0;
2210 /* We would have to do non-trivial analysis to really work out what
2211 is the probability of value to change (i.e. when init statement
2212 is in a sibling loop of the call).
2214 We do an conservative estimate: when call is executed N times more often
2215 than the statement defining value, we take the frequency 1/N. */
2216 if (TREE_CODE (base) == SSA_NAME)
2218 profile_count init_count;
2220 if (!bb->count.nonzero_p ())
2221 return REG_BR_PROB_BASE;
2223 if (SSA_NAME_IS_DEFAULT_DEF (base))
2224 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2225 else
2226 init_count = get_minimal_bb
2227 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2228 gimple_bb (stmt))->count;
2230 if (init_count < bb->count)
2231 return MAX ((init_count.to_sreal_scale (bb->count)
2232 * REG_BR_PROB_BASE).to_int (), 1);
2233 return REG_BR_PROB_BASE;
2235 else
2237 ao_ref refd;
2238 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2239 struct record_modified_bb_info info;
2240 tree init = ctor_for_folding (base);
2242 if (init != error_mark_node)
2243 return 0;
2244 if (!bb->count.nonzero_p ())
2245 return REG_BR_PROB_BASE;
2246 if (dump_file)
2248 fprintf (dump_file, " Analyzing param change probability of ");
2249 print_generic_expr (dump_file, op, TDF_SLIM);
2250 fprintf (dump_file, "\n");
2252 ao_ref_init (&refd, op);
2253 info.op = op;
2254 info.stmt = stmt;
2255 info.bb_set = BITMAP_ALLOC (NULL);
2256 int walked
2257 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2258 NULL, NULL, fbi->aa_walk_budget);
2259 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2261 if (dump_file)
2263 if (walked < 0)
2264 fprintf (dump_file, " Ran out of AA walking budget.\n");
2265 else
2266 fprintf (dump_file, " Set in same BB as used.\n");
2268 BITMAP_FREE (info.bb_set);
2269 return REG_BR_PROB_BASE;
2272 bitmap_iterator bi;
2273 unsigned index;
2274 /* Lookup the most frequent update of the value and believe that
2275 it dominates all the other; precise analysis here is difficult. */
2276 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2277 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2278 if (dump_file)
2280 fprintf (dump_file, " Set with count ");
2281 max.dump (dump_file);
2282 fprintf (dump_file, " and used with count ");
2283 bb->count.dump (dump_file);
2284 fprintf (dump_file, " freq %f\n",
2285 max.to_sreal_scale (bb->count).to_double ());
2288 BITMAP_FREE (info.bb_set);
2289 if (max < bb->count)
2290 return MAX ((max.to_sreal_scale (bb->count)
2291 * REG_BR_PROB_BASE).to_int (), 1);
2292 return REG_BR_PROB_BASE;
2296 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2297 sub-graph and if the predicate the condition depends on is known. If so,
2298 return true and store the pointer the predicate in *P. */
2300 static bool
2301 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2302 ipa_fn_summary *summary,
2303 class ipa_node_params *params_summary,
2304 basic_block bb,
2305 predicate *p,
2306 vec<predicate> nonconstant_names)
2308 edge e;
2309 edge_iterator ei;
2310 basic_block first_bb = NULL;
2311 gimple *stmt;
2313 if (single_pred_p (bb))
2315 *p = false;
2316 return true;
2319 FOR_EACH_EDGE (e, ei, bb->preds)
2321 if (single_succ_p (e->src))
2323 if (!single_pred_p (e->src))
2324 return false;
2325 if (!first_bb)
2326 first_bb = single_pred (e->src);
2327 else if (single_pred (e->src) != first_bb)
2328 return false;
2330 else
2332 if (!first_bb)
2333 first_bb = e->src;
2334 else if (e->src != first_bb)
2335 return false;
2339 if (!first_bb)
2340 return false;
2342 stmt = last_stmt (first_bb);
2343 if (!stmt
2344 || gimple_code (stmt) != GIMPLE_COND
2345 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2346 return false;
2348 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2349 gimple_cond_lhs (stmt),
2350 nonconstant_names);
2351 if (*p == true)
2352 return false;
2353 else
2354 return true;
2357 /* Given a PHI statement in a function described by inline properties SUMMARY
2358 and *P being the predicate describing whether the selected PHI argument is
2359 known, store a predicate for the result of the PHI statement into
2360 NONCONSTANT_NAMES, if possible. */
2362 static void
2363 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2364 predicate *p,
2365 vec<predicate> nonconstant_names)
2367 unsigned i;
2369 for (i = 0; i < gimple_phi_num_args (phi); i++)
2371 tree arg = gimple_phi_arg (phi, i)->def;
2372 if (!is_gimple_min_invariant (arg))
2374 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2375 *p = p->or_with (summary->conds,
2376 nonconstant_names[SSA_NAME_VERSION (arg)]);
2377 if (*p == true)
2378 return;
2382 if (dump_file && (dump_flags & TDF_DETAILS))
2384 fprintf (dump_file, "\t\tphi predicate: ");
2385 p->dump (dump_file, summary->conds);
2387 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2390 /* For a typical usage of __builtin_expect (a<b, 1), we
2391 may introduce an extra relation stmt:
2392 With the builtin, we have
2393 t1 = a <= b;
2394 t2 = (long int) t1;
2395 t3 = __builtin_expect (t2, 1);
2396 if (t3 != 0)
2397 goto ...
2398 Without the builtin, we have
2399 if (a<=b)
2400 goto...
2401 This affects the size/time estimation and may have
2402 an impact on the earlier inlining.
2403 Here find this pattern and fix it up later. */
2405 static gimple *
2406 find_foldable_builtin_expect (basic_block bb)
2408 gimple_stmt_iterator bsi;
2410 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2412 gimple *stmt = gsi_stmt (bsi);
2413 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2414 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2415 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2417 tree var = gimple_call_lhs (stmt);
2418 tree arg = gimple_call_arg (stmt, 0);
2419 use_operand_p use_p;
2420 gimple *use_stmt;
2421 bool match = false;
2422 bool done = false;
2424 if (!var || !arg)
2425 continue;
2426 gcc_assert (TREE_CODE (var) == SSA_NAME);
2428 while (TREE_CODE (arg) == SSA_NAME)
2430 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2431 if (!is_gimple_assign (stmt_tmp))
2432 break;
2433 switch (gimple_assign_rhs_code (stmt_tmp))
2435 case LT_EXPR:
2436 case LE_EXPR:
2437 case GT_EXPR:
2438 case GE_EXPR:
2439 case EQ_EXPR:
2440 case NE_EXPR:
2441 match = true;
2442 done = true;
2443 break;
2444 CASE_CONVERT:
2445 break;
2446 default:
2447 done = true;
2448 break;
2450 if (done)
2451 break;
2452 arg = gimple_assign_rhs1 (stmt_tmp);
2455 if (match && single_imm_use (var, &use_p, &use_stmt)
2456 && gimple_code (use_stmt) == GIMPLE_COND)
2457 return use_stmt;
2460 return NULL;
2463 /* Return true when the basic blocks contains only clobbers followed by RESX.
2464 Such BBs are kept around to make removal of dead stores possible with
2465 presence of EH and will be optimized out by optimize_clobbers later in the
2466 game.
2468 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2469 that can be clobber only, too.. When it is false, the RESX is not necessary
2470 on the end of basic block. */
2472 static bool
2473 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2475 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2476 edge_iterator ei;
2477 edge e;
2479 if (need_eh)
2481 if (gsi_end_p (gsi))
2482 return false;
2483 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2484 return false;
2485 gsi_prev (&gsi);
2487 else if (!single_succ_p (bb))
2488 return false;
2490 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2492 gimple *stmt = gsi_stmt (gsi);
2493 if (is_gimple_debug (stmt))
2494 continue;
2495 if (gimple_clobber_p (stmt))
2496 continue;
2497 if (gimple_code (stmt) == GIMPLE_LABEL)
2498 break;
2499 return false;
2502 /* See if all predecessors are either throws or clobber only BBs. */
2503 FOR_EACH_EDGE (e, ei, bb->preds)
2504 if (!(e->flags & EDGE_EH)
2505 && !clobber_only_eh_bb_p (e->src, false))
2506 return false;
2508 return true;
2511 /* Return true if STMT compute a floating point expression that may be affected
2512 by -ffast-math and similar flags. */
2514 static bool
2515 fp_expression_p (gimple *stmt)
2517 ssa_op_iter i;
2518 tree op;
2520 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2521 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2522 return true;
2523 return false;
2526 /* Return true if T references memory location that is local
2527 for the function (that means, dead after return) or read-only. */
2529 bool
2530 refs_local_or_readonly_memory_p (tree t)
2532 /* Non-escaping memory is fine. */
2533 t = get_base_address (t);
2534 if ((TREE_CODE (t) == MEM_REF
2535 || TREE_CODE (t) == TARGET_MEM_REF))
2536 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2538 /* Automatic variables are fine. */
2539 if (DECL_P (t)
2540 && auto_var_in_fn_p (t, current_function_decl))
2541 return true;
2543 /* Read-only variables are fine. */
2544 if (DECL_P (t) && TREE_READONLY (t))
2545 return true;
2547 return false;
2550 /* Return true if T is a pointer pointing to memory location that is local
2551 for the function (that means, dead after return) or read-only. */
2553 bool
2554 points_to_local_or_readonly_memory_p (tree t)
2556 /* See if memory location is clearly invalid. */
2557 if (integer_zerop (t))
2558 return flag_delete_null_pointer_checks;
2559 if (TREE_CODE (t) == SSA_NAME)
2560 return !ptr_deref_may_alias_global_p (t);
2561 if (TREE_CODE (t) == ADDR_EXPR)
2562 return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2563 return false;
2567 /* Analyze function body for NODE.
2568 EARLY indicates run from early optimization pipeline. */
2570 static void
2571 analyze_function_body (struct cgraph_node *node, bool early)
2573 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2574 /* Estimate static overhead for function prologue/epilogue and alignment. */
2575 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2576 /* Benefits are scaled by probability of elimination that is in range
2577 <0,2>. */
2578 basic_block bb;
2579 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2580 sreal freq;
2581 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2582 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2583 predicate bb_predicate;
2584 struct ipa_func_body_info fbi;
2585 vec<predicate> nonconstant_names = vNULL;
2586 int nblocks, n;
2587 int *order;
2588 gimple *fix_builtin_expect_stmt;
2590 gcc_assert (my_function && my_function->cfg);
2591 gcc_assert (cfun == my_function);
2593 memset(&fbi, 0, sizeof(fbi));
2594 vec_free (info->conds);
2595 info->conds = NULL;
2596 vec_free (info->size_time_table);
2597 info->size_time_table = NULL;
2599 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2600 so we can produce proper inline hints.
2602 When optimizing and analyzing for early inliner, initialize node params
2603 so we can produce correct BB predicates. */
2605 if (opt_for_fn (node->decl, optimize))
2607 calculate_dominance_info (CDI_DOMINATORS);
2608 calculate_dominance_info (CDI_POST_DOMINATORS);
2609 if (!early)
2610 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2611 else
2613 ipa_check_create_node_params ();
2614 ipa_initialize_node_params (node);
2617 if (ipa_node_params_sum)
2619 fbi.node = node;
2620 fbi.info = IPA_NODE_REF (node);
2621 fbi.bb_infos = vNULL;
2622 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2623 fbi.param_count = count_formal_params (node->decl);
2624 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2626 nonconstant_names.safe_grow_cleared
2627 (SSANAMES (my_function)->length (), true);
2631 if (dump_file)
2632 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2633 node->dump_name ());
2635 /* When we run into maximal number of entries, we assign everything to the
2636 constant truth case. Be sure to have it in list. */
2637 bb_predicate = true;
2638 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2640 bb_predicate = predicate::not_inlined ();
2641 info->account_size_time (opt_for_fn (node->decl,
2642 param_uninlined_function_insns)
2643 * ipa_fn_summary::size_scale,
2644 opt_for_fn (node->decl,
2645 param_uninlined_function_time),
2646 bb_predicate,
2647 bb_predicate);
2649 if (fbi.info)
2650 compute_bb_predicates (&fbi, node, info, params_summary);
2651 const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2652 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2653 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2654 for (n = 0; n < nblocks; n++)
2656 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2657 freq = bb->count.to_sreal_scale (entry_count);
2658 if (clobber_only_eh_bb_p (bb))
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2661 fprintf (dump_file, "\n Ignoring BB %i;"
2662 " it will be optimized away by cleanup_clobbers\n",
2663 bb->index);
2664 continue;
2667 /* TODO: Obviously predicates can be propagated down across CFG. */
2668 if (fbi.info)
2670 if (bb->aux)
2671 bb_predicate = *(predicate *) bb->aux;
2672 else
2673 bb_predicate = false;
2675 else
2676 bb_predicate = true;
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2680 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2681 bb_predicate.dump (dump_file, info->conds);
2684 if (fbi.info && nonconstant_names.exists ())
2686 predicate phi_predicate;
2687 bool first_phi = true;
2689 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2690 gsi_next (&bsi))
2692 if (first_phi
2693 && !phi_result_unknown_predicate (&fbi, info,
2694 params_summary,
2696 &phi_predicate,
2697 nonconstant_names))
2698 break;
2699 first_phi = false;
2700 if (dump_file && (dump_flags & TDF_DETAILS))
2702 fprintf (dump_file, " ");
2703 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2705 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2706 nonconstant_names);
2710 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2712 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2713 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2715 gimple *stmt = gsi_stmt (bsi);
2716 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2717 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2718 int prob;
2719 predicate will_be_nonconstant;
2721 /* This relation stmt should be folded after we remove
2722 __builtin_expect call. Adjust the cost here. */
2723 if (stmt == fix_builtin_expect_stmt)
2725 this_size--;
2726 this_time--;
2729 if (dump_file && (dump_flags & TDF_DETAILS))
2731 fprintf (dump_file, " ");
2732 print_gimple_stmt (dump_file, stmt, 0);
2733 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2734 freq.to_double (), this_size,
2735 this_time);
2738 if (is_gimple_call (stmt)
2739 && !gimple_call_internal_p (stmt))
2741 struct cgraph_edge *edge = node->get_edge (stmt);
2742 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2744 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2745 resolved as constant. We however don't want to optimize
2746 out the cgraph edges. */
2747 if (nonconstant_names.exists ()
2748 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2749 && gimple_call_lhs (stmt)
2750 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2752 predicate false_p = false;
2753 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2754 = false_p;
2756 if (ipa_node_params_sum)
2758 int count = gimple_call_num_args (stmt);
2759 int i;
2761 if (count)
2762 es->param.safe_grow_cleared (count, true);
2763 for (i = 0; i < count; i++)
2765 int prob = param_change_prob (&fbi, stmt, i);
2766 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2767 es->param[i].change_prob = prob;
2768 es->param[i].points_to_local_or_readonly_memory
2769 = points_to_local_or_readonly_memory_p
2770 (gimple_call_arg (stmt, i));
2774 es->call_stmt_size = this_size;
2775 es->call_stmt_time = this_time;
2776 es->loop_depth = bb_loop_depth (bb);
2777 edge_set_predicate (edge, &bb_predicate);
2778 if (edge->speculative)
2780 cgraph_edge *indirect
2781 = edge->speculative_call_indirect_edge ();
2782 ipa_call_summary *es2
2783 = ipa_call_summaries->get_create (indirect);
2784 ipa_call_summaries->duplicate (edge, indirect,
2785 es, es2);
2787 /* Edge is the first direct call.
2788 create and duplicate call summaries for multiple
2789 speculative call targets. */
2790 for (cgraph_edge *direct
2791 = edge->next_speculative_call_target ();
2792 direct;
2793 direct = direct->next_speculative_call_target ())
2795 ipa_call_summary *es3
2796 = ipa_call_summaries->get_create (direct);
2797 ipa_call_summaries->duplicate (edge, direct,
2798 es, es3);
2803 /* TODO: When conditional jump or switch is known to be constant, but
2804 we did not translate it into the predicates, we really can account
2805 just maximum of the possible paths. */
2806 if (fbi.info)
2807 will_be_nonconstant
2808 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2809 stmt, nonconstant_names);
2810 else
2811 will_be_nonconstant = true;
2812 if (this_time || this_size)
2814 sreal final_time = (sreal)this_time * freq;
2816 prob = eliminated_by_inlining_prob (&fbi, stmt);
2817 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2818 fprintf (dump_file,
2819 "\t\t50%% will be eliminated by inlining\n");
2820 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2821 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2823 class predicate p = bb_predicate & will_be_nonconstant;
2825 /* We can ignore statement when we proved it is never going
2826 to happen, but we cannot do that for call statements
2827 because edges are accounted specially. */
2829 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2831 time += final_time;
2832 size += this_size;
2835 /* We account everything but the calls. Calls have their own
2836 size/time info attached to cgraph edges. This is necessary
2837 in order to make the cost disappear after inlining. */
2838 if (!is_gimple_call (stmt))
2840 if (prob)
2842 predicate ip = bb_predicate & predicate::not_inlined ();
2843 info->account_size_time (this_size * prob,
2844 (final_time * prob) / 2, ip,
2847 if (prob != 2)
2848 info->account_size_time (this_size * (2 - prob),
2849 (final_time * (2 - prob) / 2),
2850 bb_predicate,
2854 if (!info->fp_expressions && fp_expression_p (stmt))
2856 info->fp_expressions = true;
2857 if (dump_file)
2858 fprintf (dump_file, " fp_expression set\n");
2862 /* Account cost of address calculations in the statements. */
2863 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2865 for (tree op = gimple_op (stmt, i);
2866 op && handled_component_p (op);
2867 op = TREE_OPERAND (op, 0))
2868 if ((TREE_CODE (op) == ARRAY_REF
2869 || TREE_CODE (op) == ARRAY_RANGE_REF)
2870 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2872 predicate p = bb_predicate;
2873 if (fbi.info)
2874 p = p & will_be_nonconstant_expr_predicate
2875 (&fbi, info, params_summary,
2876 TREE_OPERAND (op, 1),
2877 nonconstant_names);
2878 if (p != false)
2880 time += freq;
2881 size += 1;
2882 if (dump_file)
2883 fprintf (dump_file,
2884 "\t\tAccounting address calculation.\n");
2885 info->account_size_time (ipa_fn_summary::size_scale,
2886 freq,
2887 bb_predicate,
2895 free (order);
2897 if (nonconstant_names.exists () && !early)
2899 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2900 class loop *loop;
2901 unsigned max_loop_predicates = opt_for_fn (node->decl,
2902 param_ipa_max_loop_predicates);
2904 if (dump_file && (dump_flags & TDF_DETAILS))
2905 flow_loops_dump (dump_file, NULL, 0);
2906 scev_initialize ();
2907 FOR_EACH_LOOP (loop, 0)
2909 predicate loop_iterations = true;
2910 sreal header_freq;
2911 edge ex;
2912 unsigned int j;
2913 class tree_niter_desc niter_desc;
2914 if (!loop->header->aux)
2915 continue;
2917 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2918 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2920 bb_predicate = *(predicate *) loop->header->aux;
2921 auto_vec<edge> exits = get_loop_exit_edges (loop);
2922 FOR_EACH_VEC_ELT (exits, j, ex)
2923 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2924 && !is_gimple_min_invariant (niter_desc.niter))
2926 predicate will_be_nonconstant
2927 = will_be_nonconstant_expr_predicate (&fbi, info,
2928 params_summary,
2929 niter_desc.niter,
2930 nonconstant_names);
2931 if (will_be_nonconstant != true)
2932 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2933 if (will_be_nonconstant != true
2934 && will_be_nonconstant != false)
2935 loop_iterations &= will_be_nonconstant;
2937 add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
2938 phdr_freq, max_loop_predicates);
2941 /* To avoid quadratic behavior we analyze stride predicates only
2942 with respect to the containing loop. Thus we simply iterate
2943 over all defs in the outermost loop body. */
2944 for (loop = loops_for_fn (cfun)->tree_root->inner;
2945 loop != NULL; loop = loop->next)
2947 predicate loop_stride = true;
2948 basic_block *body = get_loop_body (loop);
2949 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2950 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2951 for (unsigned i = 0; i < loop->num_nodes; i++)
2953 gimple_stmt_iterator gsi;
2954 if (!body[i]->aux)
2955 continue;
2957 bb_predicate = *(predicate *) body[i]->aux;
2958 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2959 gsi_next (&gsi))
2961 gimple *stmt = gsi_stmt (gsi);
2963 if (!is_gimple_assign (stmt))
2964 continue;
2966 tree def = gimple_assign_lhs (stmt);
2967 if (TREE_CODE (def) != SSA_NAME)
2968 continue;
2970 affine_iv iv;
2971 if (!simple_iv (loop_containing_stmt (stmt),
2972 loop_containing_stmt (stmt),
2973 def, &iv, true)
2974 || is_gimple_min_invariant (iv.step))
2975 continue;
2977 predicate will_be_nonconstant
2978 = will_be_nonconstant_expr_predicate (&fbi, info,
2979 params_summary,
2980 iv.step,
2981 nonconstant_names);
2982 if (will_be_nonconstant != true)
2983 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2984 if (will_be_nonconstant != true
2985 && will_be_nonconstant != false)
2986 loop_stride = loop_stride & will_be_nonconstant;
2989 add_freqcounting_predicate (&s->loop_strides, loop_stride,
2990 phdr_freq, max_loop_predicates);
2991 free (body);
2993 scev_finalize ();
2995 FOR_ALL_BB_FN (bb, my_function)
2997 edge e;
2998 edge_iterator ei;
3000 if (bb->aux)
3001 edge_predicate_pool.remove ((predicate *)bb->aux);
3002 bb->aux = NULL;
3003 FOR_EACH_EDGE (e, ei, bb->succs)
3005 if (e->aux)
3006 edge_predicate_pool.remove ((predicate *) e->aux);
3007 e->aux = NULL;
3010 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3011 ipa_size_summary *ss = ipa_size_summaries->get (node);
3012 s->time = time;
3013 ss->self_size = size;
3014 nonconstant_names.release ();
3015 ipa_release_body_info (&fbi);
3016 if (opt_for_fn (node->decl, optimize))
3018 if (!early)
3019 loop_optimizer_finalize ();
3020 else if (!ipa_edge_args_sum)
3021 ipa_free_all_node_params ();
3022 free_dominance_info (CDI_DOMINATORS);
3023 free_dominance_info (CDI_POST_DOMINATORS);
3025 if (dump_file)
3027 fprintf (dump_file, "\n");
3028 ipa_dump_fn_summary (dump_file, node);
3033 /* Compute function summary.
3034 EARLY is true when we compute parameters during early opts. */
3036 void
3037 compute_fn_summary (struct cgraph_node *node, bool early)
3039 HOST_WIDE_INT self_stack_size;
3040 struct cgraph_edge *e;
3042 gcc_assert (!node->inlined_to);
3044 if (!ipa_fn_summaries)
3045 ipa_fn_summary_alloc ();
3047 /* Create a new ipa_fn_summary. */
3048 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
3049 ipa_fn_summaries->remove (node);
3050 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3051 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
3053 /* Estimate the stack size for the function if we're optimizing. */
3054 self_stack_size = optimize && !node->thunk
3055 ? estimated_stack_frame_size (node) : 0;
3056 size_info->estimated_self_stack_size = self_stack_size;
3057 info->estimated_stack_size = self_stack_size;
3059 if (node->thunk)
3061 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
3062 predicate t = true;
3064 node->can_change_signature = false;
3065 es->call_stmt_size = eni_size_weights.call_cost;
3066 es->call_stmt_time = eni_time_weights.call_cost;
3067 info->account_size_time (ipa_fn_summary::size_scale
3068 * opt_for_fn (node->decl,
3069 param_uninlined_function_thunk_insns),
3070 opt_for_fn (node->decl,
3071 param_uninlined_function_thunk_time), t, t);
3072 t = predicate::not_inlined ();
3073 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
3074 ipa_update_overall_fn_summary (node);
3075 size_info->self_size = size_info->size;
3076 if (stdarg_p (TREE_TYPE (node->decl)))
3078 info->inlinable = false;
3079 node->callees->inline_failed = CIF_VARIADIC_THUNK;
3081 else
3082 info->inlinable = true;
3084 else
3086 /* Even is_gimple_min_invariant rely on current_function_decl. */
3087 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3089 /* During IPA profile merging we may be called w/o virtual SSA form
3090 built. */
3091 update_ssa (TODO_update_ssa_only_virtuals);
3093 /* Can this function be inlined at all? */
3094 if (!opt_for_fn (node->decl, optimize)
3095 && !lookup_attribute ("always_inline",
3096 DECL_ATTRIBUTES (node->decl)))
3097 info->inlinable = false;
3098 else
3099 info->inlinable = tree_inlinable_function_p (node->decl);
3101 /* Type attributes can use parameter indices to describe them. */
3102 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
3103 /* Likewise for #pragma omp declare simd functions or functions
3104 with simd attribute. */
3105 || lookup_attribute ("omp declare simd",
3106 DECL_ATTRIBUTES (node->decl)))
3107 node->can_change_signature = false;
3108 else
3110 /* Otherwise, inlinable functions always can change signature. */
3111 if (info->inlinable)
3112 node->can_change_signature = true;
3113 else
3115 /* Functions calling builtin_apply cannot change signature. */
3116 for (e = node->callees; e; e = e->next_callee)
3118 tree cdecl = e->callee->decl;
3119 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
3120 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
3121 break;
3123 node->can_change_signature = !e;
3126 analyze_function_body (node, early);
3127 pop_cfun ();
3130 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3131 size_info->size = size_info->self_size;
3132 info->estimated_stack_size = size_info->estimated_self_stack_size;
3134 /* Code above should compute exactly the same result as
3135 ipa_update_overall_fn_summary but because computation happens in
3136 different order the roundoff errors result in slight changes. */
3137 ipa_update_overall_fn_summary (node);
3138 /* In LTO mode we may have speculative edges set. */
3139 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
3143 /* Compute parameters of functions used by inliner using
3144 current_function_decl. */
3146 static unsigned int
3147 compute_fn_summary_for_current (void)
3149 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3150 return 0;
3153 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3154 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3155 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3157 static bool
3158 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3159 int *size, int *time,
3160 ipa_call_arg_values *avals)
3162 tree target;
3163 struct cgraph_node *callee;
3164 class ipa_fn_summary *isummary;
3165 enum availability avail;
3166 bool speculative;
3168 if (!avals
3169 || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ()))
3170 return false;
3171 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3172 return false;
3174 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3175 if (!target || speculative)
3176 return false;
3178 /* Account for difference in cost between indirect and direct calls. */
3179 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3180 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3181 gcc_checking_assert (*time >= 0);
3182 gcc_checking_assert (*size >= 0);
3184 callee = cgraph_node::get (target);
3185 if (!callee || !callee->definition)
3186 return false;
3187 callee = callee->function_symbol (&avail);
3188 if (avail < AVAIL_AVAILABLE)
3189 return false;
3190 isummary = ipa_fn_summaries->get (callee);
3191 if (isummary == NULL)
3192 return false;
3194 return isummary->inlinable;
3197 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3198 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3199 devirtualized. AVALS, if non-NULL, describes the context of the call site
3200 as far as values of parameters are concerened. */
3202 static inline void
3203 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3204 sreal *time, ipa_call_arg_values *avals,
3205 ipa_hints *hints)
3207 class ipa_call_summary *es = ipa_call_summaries->get (e);
3208 int call_size = es->call_stmt_size;
3209 int call_time = es->call_stmt_time;
3210 int cur_size;
3212 if (!e->callee && hints && e->maybe_hot_p ()
3213 && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals))
3214 *hints |= INLINE_HINT_indirect_call;
3215 cur_size = call_size * ipa_fn_summary::size_scale;
3216 *size += cur_size;
3217 if (min_size)
3218 *min_size += cur_size;
3219 if (time)
3220 *time += ((sreal)call_time) * e->sreal_frequency ();
3224 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3225 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3226 site.
3228 Helper for estimate_calls_size_and_time which does the same but
3229 (in most cases) faster. */
3231 static void
3232 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3233 int *min_size, sreal *time,
3234 ipa_hints *hints,
3235 clause_t possible_truths,
3236 ipa_call_arg_values *avals)
3238 struct cgraph_edge *e;
3239 for (e = node->callees; e; e = e->next_callee)
3241 if (!e->inline_failed)
3243 gcc_checking_assert (!ipa_call_summaries->get (e));
3244 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3245 hints, possible_truths, avals);
3247 continue;
3249 class ipa_call_summary *es = ipa_call_summaries->get (e);
3251 /* Do not care about zero sized builtins. */
3252 if (!es->call_stmt_size)
3254 gcc_checking_assert (!es->call_stmt_time);
3255 continue;
3257 if (!es->predicate
3258 || es->predicate->evaluate (possible_truths))
3260 /* Predicates of calls shall not use NOT_CHANGED codes,
3261 so we do not need to compute probabilities. */
3262 estimate_edge_size_and_time (e, size,
3263 es->predicate ? NULL : min_size,
3264 time, avals, hints);
3267 for (e = node->indirect_calls; e; e = e->next_callee)
3269 class ipa_call_summary *es = ipa_call_summaries->get (e);
3270 if (!es->predicate
3271 || es->predicate->evaluate (possible_truths))
3272 estimate_edge_size_and_time (e, size,
3273 es->predicate ? NULL : min_size,
3274 time, avals, hints);
3278 /* Populate sum->call_size_time_table for edges from NODE. */
3280 static void
3281 summarize_calls_size_and_time (struct cgraph_node *node,
3282 ipa_fn_summary *sum)
3284 struct cgraph_edge *e;
3285 for (e = node->callees; e; e = e->next_callee)
3287 if (!e->inline_failed)
3289 gcc_checking_assert (!ipa_call_summaries->get (e));
3290 summarize_calls_size_and_time (e->callee, sum);
3291 continue;
3293 int size = 0;
3294 sreal time = 0;
3296 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3298 struct predicate pred = true;
3299 class ipa_call_summary *es = ipa_call_summaries->get (e);
3301 if (es->predicate)
3302 pred = *es->predicate;
3303 sum->account_size_time (size, time, pred, pred, true);
3305 for (e = node->indirect_calls; e; e = e->next_callee)
3307 int size = 0;
3308 sreal time = 0;
3310 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3311 struct predicate pred = true;
3312 class ipa_call_summary *es = ipa_call_summaries->get (e);
3314 if (es->predicate)
3315 pred = *es->predicate;
3316 sum->account_size_time (size, time, pred, pred, true);
3320 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3321 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3322 context of the call site. */
3324 static void
3325 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3326 int *min_size, sreal *time,
3327 ipa_hints *hints,
3328 clause_t possible_truths,
3329 ipa_call_arg_values *avals)
3331 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3332 bool use_table = true;
3334 gcc_assert (node->callees || node->indirect_calls);
3336 /* During early inlining we do not calculate info for very
3337 large functions and thus there is no need for producing
3338 summaries. */
3339 if (!ipa_node_params_sum)
3340 use_table = false;
3341 /* Do not calculate summaries for simple wrappers; it is waste
3342 of memory. */
3343 else if (node->callees && node->indirect_calls
3344 && node->callees->inline_failed && !node->callees->next_callee)
3345 use_table = false;
3346 /* If there is an indirect edge that may be optimized, we need
3347 to go the slow way. */
3348 else if (avals && hints
3349 && (avals->m_known_vals.length ()
3350 || avals->m_known_contexts.length ()
3351 || avals->m_known_aggs.length ()))
3353 class ipa_node_params *params_summary = IPA_NODE_REF (node);
3354 unsigned int nargs = params_summary
3355 ? ipa_get_param_count (params_summary) : 0;
3357 for (unsigned int i = 0; i < nargs && use_table; i++)
3359 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3360 && (avals->safe_sval_at (i)
3361 || (avals->m_known_aggs.length () > i
3362 && avals->m_known_aggs[i].items.length ())))
3363 use_table = false;
3364 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3365 && (avals->m_known_contexts.length () > i
3366 && !avals->m_known_contexts[i].useless_p ()))
3367 use_table = false;
3371 /* Fast path is via the call size time table. */
3372 if (use_table)
3374 /* Build summary if it is absent. */
3375 if (!sum->call_size_time_table)
3377 predicate true_pred = true;
3378 sum->account_size_time (0, 0, true_pred, true_pred, true);
3379 summarize_calls_size_and_time (node, sum);
3382 int old_size = *size;
3383 sreal old_time = time ? *time : 0;
3385 if (min_size)
3386 *min_size += (*sum->call_size_time_table)[0].size;
3388 unsigned int i;
3389 size_time_entry *e;
3391 /* Walk the table and account sizes and times. */
3392 for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
3393 i++)
3394 if (e->exec_predicate.evaluate (possible_truths))
3396 *size += e->size;
3397 if (time)
3398 *time += e->time;
3401 /* Be careful and see if both methods agree. */
3402 if ((flag_checking || dump_file)
3403 /* Do not try to sanity check when we know we lost some
3404 precision. */
3405 && sum->call_size_time_table->length ()
3406 < ipa_fn_summary::max_size_time_table_size)
3408 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3409 possible_truths, avals);
3410 gcc_assert (*size == old_size);
3411 if (time && (*time - old_time > 1 || *time - old_time < -1)
3412 && dump_file)
3413 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3414 old_time.to_double (),
3415 time->to_double ());
3418 /* Slow path by walking all edges. */
3419 else
3420 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3421 possible_truths, avals);
3424 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3425 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3426 caller. */
3428 ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths,
3429 clause_t nonspec_possible_truths,
3430 vec<inline_param_summary>
3431 inline_param_summary,
3432 ipa_auto_call_arg_values *arg_values)
3433 : m_node (node), m_possible_truths (possible_truths),
3434 m_nonspec_possible_truths (nonspec_possible_truths),
3435 m_inline_param_summary (inline_param_summary),
3436 m_avals (arg_values)
3440 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3442 void
3443 ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx)
3445 m_node = ctx.m_node;
3446 m_possible_truths = ctx.m_possible_truths;
3447 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3448 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3449 unsigned int nargs = params_summary
3450 ? ipa_get_param_count (params_summary) : 0;
3452 m_inline_param_summary = vNULL;
3453 /* Copy the info only if there is at least one useful entry. */
3454 if (ctx.m_inline_param_summary.exists ())
3456 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3458 for (unsigned int i = 0; i < n; i++)
3459 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3460 && !ctx.m_inline_param_summary[i].useless_p ())
3462 m_inline_param_summary
3463 = ctx.m_inline_param_summary.copy ();
3464 break;
3467 m_avals.m_known_vals = vNULL;
3468 if (ctx.m_avals.m_known_vals.exists ())
3470 unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs);
3472 for (unsigned int i = 0; i < n; i++)
3473 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3474 && ctx.m_avals.m_known_vals[i])
3476 m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy ();
3477 break;
3481 m_avals.m_known_contexts = vNULL;
3482 if (ctx.m_avals.m_known_contexts.exists ())
3484 unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs);
3486 for (unsigned int i = 0; i < n; i++)
3487 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3488 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3490 m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy ();
3491 break;
3495 m_avals.m_known_aggs = vNULL;
3496 if (ctx.m_avals.m_known_aggs.exists ())
3498 unsigned int n = MIN (ctx.m_avals.m_known_aggs.length (), nargs);
3500 for (unsigned int i = 0; i < n; i++)
3501 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3502 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3504 m_avals.m_known_aggs
3505 = ipa_copy_agg_values (ctx.m_avals.m_known_aggs);
3506 break;
3510 m_avals.m_known_value_ranges = vNULL;
3513 /* Release memory used by known_vals/contexts/aggs vectors. and
3514 inline_param_summary. */
3516 void
3517 ipa_cached_call_context::release ()
3519 /* See if context is initialized at first place. */
3520 if (!m_node)
3521 return;
3522 ipa_release_agg_values (m_avals.m_known_aggs, true);
3523 m_avals.m_known_vals.release ();
3524 m_avals.m_known_contexts.release ();
3525 m_inline_param_summary.release ();
3528 /* Return true if CTX describes the same call context as THIS. */
3530 bool
3531 ipa_call_context::equal_to (const ipa_call_context &ctx)
3533 if (m_node != ctx.m_node
3534 || m_possible_truths != ctx.m_possible_truths
3535 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3536 return false;
3538 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3539 unsigned int nargs = params_summary
3540 ? ipa_get_param_count (params_summary) : 0;
3542 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3544 for (unsigned int i = 0; i < nargs; i++)
3546 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3547 continue;
3548 if (i >= m_inline_param_summary.length ()
3549 || m_inline_param_summary[i].useless_p ())
3551 if (i < ctx.m_inline_param_summary.length ()
3552 && !ctx.m_inline_param_summary[i].useless_p ())
3553 return false;
3554 continue;
3556 if (i >= ctx.m_inline_param_summary.length ()
3557 || ctx.m_inline_param_summary[i].useless_p ())
3559 if (i < m_inline_param_summary.length ()
3560 && !m_inline_param_summary[i].useless_p ())
3561 return false;
3562 continue;
3564 if (!m_inline_param_summary[i].equal_to
3565 (ctx.m_inline_param_summary[i]))
3566 return false;
3569 if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ())
3571 for (unsigned int i = 0; i < nargs; i++)
3573 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3574 continue;
3575 if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i])
3577 if (i < ctx.m_avals.m_known_vals.length ()
3578 && ctx.m_avals.m_known_vals[i])
3579 return false;
3580 continue;
3582 if (i >= ctx.m_avals.m_known_vals.length ()
3583 || !ctx.m_avals.m_known_vals[i])
3585 if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i])
3586 return false;
3587 continue;
3589 if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i])
3590 return false;
3593 if (m_avals.m_known_contexts.exists ()
3594 || ctx.m_avals.m_known_contexts.exists ())
3596 for (unsigned int i = 0; i < nargs; i++)
3598 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3599 continue;
3600 if (i >= m_avals.m_known_contexts.length ()
3601 || m_avals.m_known_contexts[i].useless_p ())
3603 if (i < ctx.m_avals.m_known_contexts.length ()
3604 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3605 return false;
3606 continue;
3608 if (i >= ctx.m_avals.m_known_contexts.length ()
3609 || ctx.m_avals.m_known_contexts[i].useless_p ())
3611 if (i < m_avals.m_known_contexts.length ()
3612 && !m_avals.m_known_contexts[i].useless_p ())
3613 return false;
3614 continue;
3616 if (!m_avals.m_known_contexts[i].equal_to
3617 (ctx.m_avals.m_known_contexts[i]))
3618 return false;
3621 if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ())
3623 for (unsigned int i = 0; i < nargs; i++)
3625 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3626 continue;
3627 if (i >= m_avals.m_known_aggs.length ()
3628 || m_avals.m_known_aggs[i].is_empty ())
3630 if (i < ctx.m_avals.m_known_aggs.length ()
3631 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3632 return false;
3633 continue;
3635 if (i >= ctx.m_avals.m_known_aggs.length ()
3636 || ctx.m_avals.m_known_aggs[i].is_empty ())
3638 if (i < m_avals.m_known_aggs.length ()
3639 && !m_avals.m_known_aggs[i].is_empty ())
3640 return false;
3641 continue;
3643 if (!m_avals.m_known_aggs[i].equal_to (ctx.m_avals.m_known_aggs[i]))
3644 return false;
3647 return true;
3650 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3651 this context. Always compute size and min_size. Only compute time and
3652 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3653 is true. */
3655 void
3656 ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
3657 bool est_times, bool est_hints)
3659 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3660 size_time_entry *e;
3661 int size = 0;
3662 sreal time = 0;
3663 int min_size = 0;
3664 ipa_hints hints = 0;
3665 sreal loops_with_known_iterations = 0;
3666 sreal loops_with_known_strides = 0;
3667 int i;
3669 if (dump_file && (dump_flags & TDF_DETAILS))
3671 bool found = false;
3672 fprintf (dump_file, " Estimating body: %s\n"
3673 " Known to be false: ", m_node->dump_name ());
3675 for (i = predicate::not_inlined_condition;
3676 i < (predicate::first_dynamic_condition
3677 + (int) vec_safe_length (info->conds)); i++)
3678 if (!(m_possible_truths & (1 << i)))
3680 if (found)
3681 fprintf (dump_file, ", ");
3682 found = true;
3683 dump_condition (dump_file, info->conds, i);
3687 if (m_node->callees || m_node->indirect_calls)
3688 estimate_calls_size_and_time (m_node, &size, &min_size,
3689 est_times ? &time : NULL,
3690 est_hints ? &hints : NULL, m_possible_truths,
3691 &m_avals);
3693 sreal nonspecialized_time = time;
3695 min_size += (*info->size_time_table)[0].size;
3696 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3698 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3700 /* Because predicates are conservative, it can happen that nonconst is 1
3701 but exec is 0. */
3702 if (exec)
3704 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3706 gcc_checking_assert (e->time >= 0);
3707 gcc_checking_assert (time >= 0);
3709 /* We compute specialized size only because size of nonspecialized
3710 copy is context independent.
3712 The difference between nonspecialized execution and specialized is
3713 that nonspecialized is not going to have optimized out computations
3714 known to be constant in a specialized setting. */
3715 if (nonconst)
3716 size += e->size;
3717 if (!est_times)
3718 continue;
3719 nonspecialized_time += e->time;
3720 if (!nonconst)
3722 else if (!m_inline_param_summary.exists ())
3724 if (nonconst)
3725 time += e->time;
3727 else
3729 int prob = e->nonconst_predicate.probability
3730 (info->conds, m_possible_truths,
3731 m_inline_param_summary);
3732 gcc_checking_assert (prob >= 0);
3733 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3734 if (prob == REG_BR_PROB_BASE)
3735 time += e->time;
3736 else
3737 time += e->time * prob / REG_BR_PROB_BASE;
3739 gcc_checking_assert (time >= 0);
3742 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3743 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3744 gcc_checking_assert (min_size >= 0);
3745 gcc_checking_assert (size >= 0);
3746 gcc_checking_assert (time >= 0);
3747 /* nonspecialized_time should be always bigger than specialized time.
3748 Roundoff issues however may get into the way. */
3749 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3751 /* Roundoff issues may make specialized time bigger than nonspecialized
3752 time. We do not really want that to happen because some heuristics
3753 may get confused by seeing negative speedups. */
3754 if (time > nonspecialized_time)
3755 time = nonspecialized_time;
3757 if (est_hints)
3759 if (info->scc_no)
3760 hints |= INLINE_HINT_in_scc;
3761 if (DECL_DECLARED_INLINE_P (m_node->decl))
3762 hints |= INLINE_HINT_declared_inline;
3763 if (info->builtin_constant_p_parms.length ()
3764 && DECL_DECLARED_INLINE_P (m_node->decl))
3765 hints |= INLINE_HINT_builtin_constant_p;
3767 ipa_freqcounting_predicate *fcp;
3768 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
3769 if (!fcp->predicate->evaluate (m_possible_truths))
3771 hints |= INLINE_HINT_loop_iterations;
3772 loops_with_known_iterations += fcp->freq;
3774 estimates->loops_with_known_iterations = loops_with_known_iterations;
3776 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
3777 if (!fcp->predicate->evaluate (m_possible_truths))
3779 hints |= INLINE_HINT_loop_stride;
3780 loops_with_known_strides += fcp->freq;
3782 estimates->loops_with_known_strides = loops_with_known_strides;
3785 size = RDIV (size, ipa_fn_summary::size_scale);
3786 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3788 if (dump_file && (dump_flags & TDF_DETAILS))
3790 fprintf (dump_file, "\n size:%i", (int) size);
3791 if (est_times)
3792 fprintf (dump_file, " time:%f nonspec time:%f",
3793 time.to_double (), nonspecialized_time.to_double ());
3794 if (est_hints)
3795 fprintf (dump_file, " loops with known iterations:%f "
3796 "known strides:%f", loops_with_known_iterations.to_double (),
3797 loops_with_known_strides.to_double ());
3798 fprintf (dump_file, "\n");
3800 if (est_times)
3802 estimates->time = time;
3803 estimates->nonspecialized_time = nonspecialized_time;
3805 estimates->size = size;
3806 estimates->min_size = min_size;
3807 if (est_hints)
3808 estimates->hints = hints;
3809 return;
3813 /* Estimate size and time needed to execute callee of EDGE assuming that
3814 parameters known to be constant at caller of EDGE are propagated.
3815 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3816 and types for parameters. */
3818 void
3819 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3820 ipa_auto_call_arg_values *avals,
3821 ipa_call_estimates *estimates)
3823 clause_t clause, nonspec_clause;
3825 evaluate_conditions_for_known_args (node, false, avals, &clause,
3826 &nonspec_clause);
3827 ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
3828 ctx.estimate_size_and_time (estimates);
3831 /* Return stack frame offset where frame of NODE is supposed to start inside
3832 of the function it is inlined to.
3833 Return 0 for functions that are not inlined. */
3835 HOST_WIDE_INT
3836 ipa_get_stack_frame_offset (struct cgraph_node *node)
3838 HOST_WIDE_INT offset = 0;
3839 if (!node->inlined_to)
3840 return 0;
3841 node = node->callers->caller;
3842 while (true)
3844 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3845 if (!node->inlined_to)
3846 return offset;
3847 node = node->callers->caller;
3852 /* Update summary information of inline clones after inlining.
3853 Compute peak stack usage. */
3855 static void
3856 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3858 struct cgraph_edge *e;
3860 ipa_propagate_frequency (node);
3861 for (e = node->callees; e; e = e->next_callee)
3863 if (!e->inline_failed)
3864 inline_update_callee_summaries (e->callee, depth);
3865 else
3866 ipa_call_summaries->get (e)->loop_depth += depth;
3868 for (e = node->indirect_calls; e; e = e->next_callee)
3869 ipa_call_summaries->get (e)->loop_depth += depth;
3872 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3873 INLINED_EDGE has been inlined.
3875 When function A is inlined in B and A calls C with parameter that
3876 changes with probability PROB1 and C is known to be passthrough
3877 of argument if B that change with probability PROB2, the probability
3878 of change is now PROB1*PROB2. */
3880 static void
3881 remap_edge_params (struct cgraph_edge *inlined_edge,
3882 struct cgraph_edge *edge)
3884 if (ipa_node_params_sum)
3886 int i;
3887 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3888 if (!args)
3889 return;
3890 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3891 class ipa_call_summary *inlined_es
3892 = ipa_call_summaries->get (inlined_edge);
3894 if (es->param.length () == 0)
3895 return;
3897 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3899 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3900 if (jfunc->type == IPA_JF_PASS_THROUGH
3901 || jfunc->type == IPA_JF_ANCESTOR)
3903 int id = jfunc->type == IPA_JF_PASS_THROUGH
3904 ? ipa_get_jf_pass_through_formal_id (jfunc)
3905 : ipa_get_jf_ancestor_formal_id (jfunc);
3906 if (id < (int) inlined_es->param.length ())
3908 int prob1 = es->param[i].change_prob;
3909 int prob2 = inlined_es->param[id].change_prob;
3910 int prob = combine_probabilities (prob1, prob2);
3912 if (prob1 && prob2 && !prob)
3913 prob = 1;
3915 es->param[i].change_prob = prob;
3917 if (inlined_es
3918 ->param[id].points_to_local_or_readonly_memory)
3919 es->param[i].points_to_local_or_readonly_memory = true;
3921 if (!es->param[i].points_to_local_or_readonly_memory
3922 && jfunc->type == IPA_JF_CONST
3923 && points_to_local_or_readonly_memory_p
3924 (ipa_get_jf_constant (jfunc)))
3925 es->param[i].points_to_local_or_readonly_memory = true;
3931 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3933 Remap predicates of callees of NODE. Rest of arguments match
3934 remap_predicate.
3936 Also update change probabilities. */
3938 static void
3939 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3940 struct cgraph_node *node,
3941 class ipa_fn_summary *info,
3942 class ipa_node_params *params_summary,
3943 class ipa_fn_summary *callee_info,
3944 vec<int> operand_map,
3945 vec<HOST_WIDE_INT> offset_map,
3946 clause_t possible_truths,
3947 predicate *toplev_predicate)
3949 struct cgraph_edge *e, *next;
3950 for (e = node->callees; e; e = next)
3952 predicate p;
3953 next = e->next_callee;
3955 if (e->inline_failed)
3957 class ipa_call_summary *es = ipa_call_summaries->get (e);
3958 remap_edge_params (inlined_edge, e);
3960 if (es->predicate)
3962 p = es->predicate->remap_after_inlining
3963 (info, params_summary,
3964 callee_info, operand_map,
3965 offset_map, possible_truths,
3966 *toplev_predicate);
3967 edge_set_predicate (e, &p);
3969 else
3970 edge_set_predicate (e, toplev_predicate);
3972 else
3973 remap_edge_summaries (inlined_edge, e->callee, info,
3974 params_summary, callee_info,
3975 operand_map, offset_map, possible_truths,
3976 toplev_predicate);
3978 for (e = node->indirect_calls; e; e = next)
3980 class ipa_call_summary *es = ipa_call_summaries->get (e);
3981 predicate p;
3982 next = e->next_callee;
3984 remap_edge_params (inlined_edge, e);
3985 if (es->predicate)
3987 p = es->predicate->remap_after_inlining
3988 (info, params_summary,
3989 callee_info, operand_map, offset_map,
3990 possible_truths, *toplev_predicate);
3991 edge_set_predicate (e, &p);
3993 else
3994 edge_set_predicate (e, toplev_predicate);
3998 /* Run remap_after_inlining on each predicate in V. */
4000 static void
4001 remap_freqcounting_predicate (class ipa_fn_summary *info,
4002 class ipa_node_params *params_summary,
4003 class ipa_fn_summary *callee_info,
4004 vec<ipa_freqcounting_predicate, va_gc> *v,
4005 vec<int> operand_map,
4006 vec<HOST_WIDE_INT> offset_map,
4007 clause_t possible_truths,
4008 predicate *toplev_predicate)
4011 ipa_freqcounting_predicate *fcp;
4012 for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
4014 predicate p
4015 = fcp->predicate->remap_after_inlining (info, params_summary,
4016 callee_info, operand_map,
4017 offset_map, possible_truths,
4018 *toplev_predicate);
4019 if (p != false && p != true)
4020 *fcp->predicate &= p;
4024 /* We inlined EDGE. Update summary of the function we inlined into. */
4026 void
4027 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
4029 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
4030 struct cgraph_node *to = (edge->caller->inlined_to
4031 ? edge->caller->inlined_to : edge->caller);
4032 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
4033 clause_t clause = 0; /* not_inline is known to be false. */
4034 size_time_entry *e;
4035 auto_vec<int, 8> operand_map;
4036 auto_vec<HOST_WIDE_INT, 8> offset_map;
4037 int i;
4038 predicate toplev_predicate;
4039 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4040 class ipa_node_params *params_summary = (ipa_node_params_sum
4041 ? IPA_NODE_REF (to) : NULL);
4043 if (es->predicate)
4044 toplev_predicate = *es->predicate;
4045 else
4046 toplev_predicate = true;
4048 info->fp_expressions |= callee_info->fp_expressions;
4050 if (callee_info->conds)
4052 ipa_auto_call_arg_values avals;
4053 evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false);
4055 if (ipa_node_params_sum && callee_info->conds)
4057 class ipa_edge_args *args = IPA_EDGE_REF (edge);
4058 int count = args ? ipa_get_cs_argument_count (args) : 0;
4059 int i;
4061 if (count)
4063 operand_map.safe_grow_cleared (count, true);
4064 offset_map.safe_grow_cleared (count, true);
4066 for (i = 0; i < count; i++)
4068 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4069 int map = -1;
4071 /* TODO: handle non-NOPs when merging. */
4072 if (jfunc->type == IPA_JF_PASS_THROUGH)
4074 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4075 map = ipa_get_jf_pass_through_formal_id (jfunc);
4076 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
4077 offset_map[i] = -1;
4079 else if (jfunc->type == IPA_JF_ANCESTOR)
4081 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
4082 if (offset >= 0 && offset < INT_MAX)
4084 map = ipa_get_jf_ancestor_formal_id (jfunc);
4085 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
4086 offset = -1;
4087 offset_map[i] = offset;
4090 operand_map[i] = map;
4091 gcc_assert (map < ipa_get_param_count (params_summary));
4094 int ip;
4095 for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++)
4096 if (ip < count && operand_map[ip] >= 0)
4097 add_builtin_constant_p_parm (info, operand_map[ip]);
4099 sreal freq = edge->sreal_frequency ();
4100 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
4102 predicate p;
4103 p = e->exec_predicate.remap_after_inlining
4104 (info, params_summary,
4105 callee_info, operand_map,
4106 offset_map, clause,
4107 toplev_predicate);
4108 predicate nonconstp;
4109 nonconstp = e->nonconst_predicate.remap_after_inlining
4110 (info, params_summary,
4111 callee_info, operand_map,
4112 offset_map, clause,
4113 toplev_predicate);
4114 if (p != false && nonconstp != false)
4116 sreal add_time = ((sreal)e->time * freq);
4117 int prob = e->nonconst_predicate.probability (callee_info->conds,
4118 clause, es->param);
4119 if (prob != REG_BR_PROB_BASE)
4120 add_time = add_time * prob / REG_BR_PROB_BASE;
4121 if (prob != REG_BR_PROB_BASE
4122 && dump_file && (dump_flags & TDF_DETAILS))
4124 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
4125 (double) prob / REG_BR_PROB_BASE);
4127 info->account_size_time (e->size, add_time, p, nonconstp);
4130 remap_edge_summaries (edge, edge->callee, info, params_summary,
4131 callee_info, operand_map,
4132 offset_map, clause, &toplev_predicate);
4133 remap_freqcounting_predicate (info, params_summary, callee_info,
4134 info->loop_iterations, operand_map,
4135 offset_map, clause, &toplev_predicate);
4136 remap_freqcounting_predicate (info, params_summary, callee_info,
4137 info->loop_strides, operand_map,
4138 offset_map, clause, &toplev_predicate);
4140 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4141 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4143 if (info->estimated_stack_size < peak)
4144 info->estimated_stack_size = peak;
4146 inline_update_callee_summaries (edge->callee, es->loop_depth);
4147 if (info->call_size_time_table)
4149 int edge_size = 0;
4150 sreal edge_time = 0;
4152 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0);
4153 /* Unaccount size and time of the optimized out call. */
4154 info->account_size_time (-edge_size, -edge_time,
4155 es->predicate ? *es->predicate : true,
4156 es->predicate ? *es->predicate : true,
4157 true);
4158 /* Account new calls. */
4159 summarize_calls_size_and_time (edge->callee, info);
4162 /* Free summaries that are not maintained for inline clones/edges. */
4163 ipa_call_summaries->remove (edge);
4164 ipa_fn_summaries->remove (edge->callee);
4165 ipa_remove_from_growth_caches (edge);
4168 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4169 overall size and time. Recompute it.
4170 If RESET is true also recompute call_time_size_table. */
4172 void
4173 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4175 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4176 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4177 size_time_entry *e;
4178 int i;
4180 size_info->size = 0;
4181 info->time = 0;
4182 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4184 size_info->size += e->size;
4185 info->time += e->time;
4187 info->min_size = (*info->size_time_table)[0].size;
4188 if (reset)
4189 vec_free (info->call_size_time_table);
4190 if (node->callees || node->indirect_calls)
4191 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4192 &info->time, NULL,
4193 ~(clause_t) (1 << predicate::false_condition),
4194 NULL);
4195 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4196 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4200 /* This function performs intraprocedural analysis in NODE that is required to
4201 inline indirect calls. */
4203 static void
4204 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4206 ipa_analyze_node (node);
4207 if (dump_file && (dump_flags & TDF_DETAILS))
4209 ipa_print_node_params (dump_file, node);
4210 ipa_print_node_jump_functions (dump_file, node);
4215 /* Note function body size. */
4217 void
4218 inline_analyze_function (struct cgraph_node *node)
4220 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4222 if (dump_file)
4223 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4224 if (opt_for_fn (node->decl, optimize) && !node->thunk)
4225 inline_indirect_intraprocedural_analysis (node);
4226 compute_fn_summary (node, false);
4227 if (!optimize)
4229 struct cgraph_edge *e;
4230 for (e = node->callees; e; e = e->next_callee)
4231 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4232 for (e = node->indirect_calls; e; e = e->next_callee)
4233 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4236 pop_cfun ();
4240 /* Called when new function is inserted to callgraph late. */
4242 void
4243 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4245 inline_analyze_function (node);
4248 /* Note function body size. */
4250 static void
4251 ipa_fn_summary_generate (void)
4253 struct cgraph_node *node;
4255 FOR_EACH_DEFINED_FUNCTION (node)
4256 if (DECL_STRUCT_FUNCTION (node->decl))
4257 node->versionable = tree_versionable_function_p (node->decl);
4259 ipa_fn_summary_alloc ();
4261 ipa_fn_summaries->enable_insertion_hook ();
4263 ipa_register_cgraph_hooks ();
4265 FOR_EACH_DEFINED_FUNCTION (node)
4266 if (!node->alias
4267 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4268 || opt_for_fn (node->decl, optimize)))
4269 inline_analyze_function (node);
4273 /* Write inline summary for edge E to OB. */
4275 static void
4276 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4277 bool prevails)
4279 class ipa_call_summary *es = prevails
4280 ? ipa_call_summaries->get_create (e) : NULL;
4281 predicate p;
4282 int length, i;
4284 int size = streamer_read_uhwi (ib);
4285 int time = streamer_read_uhwi (ib);
4286 int depth = streamer_read_uhwi (ib);
4288 if (es)
4290 es->call_stmt_size = size;
4291 es->call_stmt_time = time;
4292 es->loop_depth = depth;
4295 bitpack_d bp = streamer_read_bitpack (ib);
4296 if (es)
4297 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4298 else
4299 bp_unpack_value (&bp, 1);
4301 p.stream_in (ib);
4302 if (es)
4303 edge_set_predicate (e, &p);
4304 length = streamer_read_uhwi (ib);
4305 if (length && es && e->possibly_call_in_translation_unit_p ())
4307 es->param.safe_grow_cleared (length, true);
4308 for (i = 0; i < length; i++)
4310 es->param[i].change_prob = streamer_read_uhwi (ib);
4311 es->param[i].points_to_local_or_readonly_memory
4312 = streamer_read_uhwi (ib);
4315 else
4317 for (i = 0; i < length; i++)
4319 streamer_read_uhwi (ib);
4320 streamer_read_uhwi (ib);
4326 /* Stream in inline summaries from the section. */
4328 static void
4329 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4330 size_t len)
4332 const struct lto_function_header *header =
4333 (const struct lto_function_header *) data;
4334 const int cfg_offset = sizeof (struct lto_function_header);
4335 const int main_offset = cfg_offset + header->cfg_size;
4336 const int string_offset = main_offset + header->main_size;
4337 class data_in *data_in;
4338 unsigned int i, count2, j;
4339 unsigned int f_count;
4341 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4342 file_data->mode_table);
4344 data_in =
4345 lto_data_in_create (file_data, (const char *) data + string_offset,
4346 header->string_size, vNULL);
4347 f_count = streamer_read_uhwi (&ib);
4348 for (i = 0; i < f_count; i++)
4350 unsigned int index;
4351 struct cgraph_node *node;
4352 class ipa_fn_summary *info;
4353 class ipa_node_params *params_summary;
4354 class ipa_size_summary *size_info;
4355 lto_symtab_encoder_t encoder;
4356 struct bitpack_d bp;
4357 struct cgraph_edge *e;
4358 predicate p;
4360 index = streamer_read_uhwi (&ib);
4361 encoder = file_data->symtab_node_encoder;
4362 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4363 index));
4364 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4365 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
4366 size_info = node->prevailing_p ()
4367 ? ipa_size_summaries->get_create (node) : NULL;
4369 int stack_size = streamer_read_uhwi (&ib);
4370 int size = streamer_read_uhwi (&ib);
4371 sreal time = sreal::stream_in (&ib);
4373 if (info)
4375 info->estimated_stack_size
4376 = size_info->estimated_self_stack_size = stack_size;
4377 size_info->size = size_info->self_size = size;
4378 info->time = time;
4381 bp = streamer_read_bitpack (&ib);
4382 if (info)
4384 info->inlinable = bp_unpack_value (&bp, 1);
4385 info->fp_expressions = bp_unpack_value (&bp, 1);
4387 else
4389 bp_unpack_value (&bp, 1);
4390 bp_unpack_value (&bp, 1);
4393 count2 = streamer_read_uhwi (&ib);
4394 gcc_assert (!info || !info->conds);
4395 if (info)
4396 vec_safe_reserve_exact (info->conds, count2);
4397 for (j = 0; j < count2; j++)
4399 struct condition c;
4400 unsigned int k, count3;
4401 c.operand_num = streamer_read_uhwi (&ib);
4402 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4403 c.type = stream_read_tree (&ib, data_in);
4404 c.val = stream_read_tree (&ib, data_in);
4405 bp = streamer_read_bitpack (&ib);
4406 c.agg_contents = bp_unpack_value (&bp, 1);
4407 c.by_ref = bp_unpack_value (&bp, 1);
4408 if (c.agg_contents)
4409 c.offset = streamer_read_uhwi (&ib);
4410 count3 = streamer_read_uhwi (&ib);
4411 c.param_ops = NULL;
4412 if (info)
4413 vec_safe_reserve_exact (c.param_ops, count3);
4414 if (params_summary)
4415 ipa_set_param_used_by_ipa_predicates
4416 (params_summary, c.operand_num, true);
4417 for (k = 0; k < count3; k++)
4419 struct expr_eval_op op;
4420 enum gimple_rhs_class rhs_class;
4421 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4422 op.type = stream_read_tree (&ib, data_in);
4423 switch (rhs_class = get_gimple_rhs_class (op.code))
4425 case GIMPLE_UNARY_RHS:
4426 op.index = 0;
4427 op.val[0] = NULL_TREE;
4428 op.val[1] = NULL_TREE;
4429 break;
4431 case GIMPLE_BINARY_RHS:
4432 case GIMPLE_TERNARY_RHS:
4433 bp = streamer_read_bitpack (&ib);
4434 op.index = bp_unpack_value (&bp, 2);
4435 op.val[0] = stream_read_tree (&ib, data_in);
4436 if (rhs_class == GIMPLE_BINARY_RHS)
4437 op.val[1] = NULL_TREE;
4438 else
4439 op.val[1] = stream_read_tree (&ib, data_in);
4440 break;
4442 default:
4443 fatal_error (UNKNOWN_LOCATION,
4444 "invalid fnsummary in LTO stream");
4446 if (info)
4447 c.param_ops->quick_push (op);
4449 if (info)
4450 info->conds->quick_push (c);
4452 count2 = streamer_read_uhwi (&ib);
4453 gcc_assert (!info || !info->size_time_table);
4454 if (info && count2)
4455 vec_safe_reserve_exact (info->size_time_table, count2);
4456 for (j = 0; j < count2; j++)
4458 class size_time_entry e;
4460 e.size = streamer_read_uhwi (&ib);
4461 e.time = sreal::stream_in (&ib);
4462 e.exec_predicate.stream_in (&ib);
4463 e.nonconst_predicate.stream_in (&ib);
4465 if (info)
4466 info->size_time_table->quick_push (e);
4469 count2 = streamer_read_uhwi (&ib);
4470 for (j = 0; j < count2; j++)
4472 p.stream_in (&ib);
4473 sreal fcp_freq = sreal::stream_in (&ib);
4474 if (info)
4476 ipa_freqcounting_predicate fcp;
4477 fcp.predicate = NULL;
4478 set_hint_predicate (&fcp.predicate, p);
4479 fcp.freq = fcp_freq;
4480 vec_safe_push (info->loop_iterations, fcp);
4483 count2 = streamer_read_uhwi (&ib);
4484 for (j = 0; j < count2; j++)
4486 p.stream_in (&ib);
4487 sreal fcp_freq = sreal::stream_in (&ib);
4488 if (info)
4490 ipa_freqcounting_predicate fcp;
4491 fcp.predicate = NULL;
4492 set_hint_predicate (&fcp.predicate, p);
4493 fcp.freq = fcp_freq;
4494 vec_safe_push (info->loop_strides, fcp);
4497 count2 = streamer_read_uhwi (&ib);
4498 if (info && count2)
4499 info->builtin_constant_p_parms.reserve_exact (count2);
4500 for (j = 0; j < count2; j++)
4502 int parm = streamer_read_uhwi (&ib);
4503 if (info)
4504 info->builtin_constant_p_parms.quick_push (parm);
4506 for (e = node->callees; e; e = e->next_callee)
4507 read_ipa_call_summary (&ib, e, info != NULL);
4508 for (e = node->indirect_calls; e; e = e->next_callee)
4509 read_ipa_call_summary (&ib, e, info != NULL);
4512 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4513 len);
4514 lto_data_in_delete (data_in);
4518 /* Read inline summary. Jump functions are shared among ipa-cp
4519 and inliner, so when ipa-cp is active, we don't need to write them
4520 twice. */
4522 static void
4523 ipa_fn_summary_read (void)
4525 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4526 struct lto_file_decl_data *file_data;
4527 unsigned int j = 0;
4529 ipa_fn_summary_alloc ();
4531 while ((file_data = file_data_vec[j++]))
4533 size_t len;
4534 const char *data
4535 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4536 &len);
4537 if (data)
4538 inline_read_section (file_data, data, len);
4539 else
4540 /* Fatal error here. We do not want to support compiling ltrans units
4541 with different version of compiler or different flags than the WPA
4542 unit, so this should never happen. */
4543 fatal_error (input_location,
4544 "ipa inline summary is missing in input file");
4546 ipa_register_cgraph_hooks ();
4547 if (!flag_ipa_cp)
4548 ipa_prop_read_jump_functions ();
4550 gcc_assert (ipa_fn_summaries);
4551 ipa_fn_summaries->enable_insertion_hook ();
4555 /* Write inline summary for edge E to OB. */
4557 static void
4558 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4560 class ipa_call_summary *es = ipa_call_summaries->get (e);
4561 int i;
4563 streamer_write_uhwi (ob, es->call_stmt_size);
4564 streamer_write_uhwi (ob, es->call_stmt_time);
4565 streamer_write_uhwi (ob, es->loop_depth);
4567 bitpack_d bp = bitpack_create (ob->main_stream);
4568 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4569 streamer_write_bitpack (&bp);
4571 if (es->predicate)
4572 es->predicate->stream_out (ob);
4573 else
4574 streamer_write_uhwi (ob, 0);
4575 streamer_write_uhwi (ob, es->param.length ());
4576 for (i = 0; i < (int) es->param.length (); i++)
4578 streamer_write_uhwi (ob, es->param[i].change_prob);
4579 streamer_write_uhwi (ob, es->param[i].points_to_local_or_readonly_memory);
4584 /* Write inline summary for node in SET.
4585 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4586 active, we don't need to write them twice. */
4588 static void
4589 ipa_fn_summary_write (void)
4591 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4592 lto_symtab_encoder_iterator lsei;
4593 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4594 unsigned int count = 0;
4596 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4597 lsei_next_function_in_partition (&lsei))
4599 cgraph_node *cnode = lsei_cgraph_node (lsei);
4600 if (cnode->definition && !cnode->alias)
4601 count++;
4603 streamer_write_uhwi (ob, count);
4605 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4606 lsei_next_function_in_partition (&lsei))
4608 cgraph_node *cnode = lsei_cgraph_node (lsei);
4609 if (cnode->definition && !cnode->alias)
4611 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4612 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4613 struct bitpack_d bp;
4614 struct cgraph_edge *edge;
4615 int i;
4616 size_time_entry *e;
4617 struct condition *c;
4619 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4620 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4621 streamer_write_hwi (ob, size_info->self_size);
4622 info->time.stream_out (ob);
4623 bp = bitpack_create (ob->main_stream);
4624 bp_pack_value (&bp, info->inlinable, 1);
4625 bp_pack_value (&bp, false, 1);
4626 bp_pack_value (&bp, info->fp_expressions, 1);
4627 streamer_write_bitpack (&bp);
4628 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4629 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4631 int j;
4632 struct expr_eval_op *op;
4634 streamer_write_uhwi (ob, c->operand_num);
4635 streamer_write_uhwi (ob, c->code);
4636 stream_write_tree (ob, c->type, true);
4637 stream_write_tree (ob, c->val, true);
4638 bp = bitpack_create (ob->main_stream);
4639 bp_pack_value (&bp, c->agg_contents, 1);
4640 bp_pack_value (&bp, c->by_ref, 1);
4641 streamer_write_bitpack (&bp);
4642 if (c->agg_contents)
4643 streamer_write_uhwi (ob, c->offset);
4644 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4645 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4647 streamer_write_uhwi (ob, op->code);
4648 stream_write_tree (ob, op->type, true);
4649 if (op->val[0])
4651 bp = bitpack_create (ob->main_stream);
4652 bp_pack_value (&bp, op->index, 2);
4653 streamer_write_bitpack (&bp);
4654 stream_write_tree (ob, op->val[0], true);
4655 if (op->val[1])
4656 stream_write_tree (ob, op->val[1], true);
4660 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4661 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4663 streamer_write_uhwi (ob, e->size);
4664 e->time.stream_out (ob);
4665 e->exec_predicate.stream_out (ob);
4666 e->nonconst_predicate.stream_out (ob);
4668 ipa_freqcounting_predicate *fcp;
4669 streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
4670 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
4672 fcp->predicate->stream_out (ob);
4673 fcp->freq.stream_out (ob);
4675 streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
4676 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
4678 fcp->predicate->stream_out (ob);
4679 fcp->freq.stream_out (ob);
4681 streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ());
4682 int ip;
4683 for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip);
4684 i++)
4685 streamer_write_uhwi (ob, ip);
4686 for (edge = cnode->callees; edge; edge = edge->next_callee)
4687 write_ipa_call_summary (ob, edge);
4688 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4689 write_ipa_call_summary (ob, edge);
4692 streamer_write_char_stream (ob->main_stream, 0);
4693 produce_asm (ob, NULL);
4694 destroy_output_block (ob);
4696 if (!flag_ipa_cp)
4697 ipa_prop_write_jump_functions ();
4701 /* Release function summary. */
4703 void
4704 ipa_free_fn_summary (void)
4706 if (!ipa_call_summaries)
4707 return;
4708 ggc_delete (ipa_fn_summaries);
4709 ipa_fn_summaries = NULL;
4710 delete ipa_call_summaries;
4711 ipa_call_summaries = NULL;
4712 edge_predicate_pool.release ();
4713 /* During IPA this is one of largest datastructures to release. */
4714 if (flag_wpa)
4715 ggc_trim ();
4718 /* Release function summary. */
4720 void
4721 ipa_free_size_summary (void)
4723 if (!ipa_size_summaries)
4724 return;
4725 delete ipa_size_summaries;
4726 ipa_size_summaries = NULL;
4729 namespace {
4731 const pass_data pass_data_local_fn_summary =
4733 GIMPLE_PASS, /* type */
4734 "local-fnsummary", /* name */
4735 OPTGROUP_INLINE, /* optinfo_flags */
4736 TV_INLINE_PARAMETERS, /* tv_id */
4737 0, /* properties_required */
4738 0, /* properties_provided */
4739 0, /* properties_destroyed */
4740 0, /* todo_flags_start */
4741 0, /* todo_flags_finish */
4744 class pass_local_fn_summary : public gimple_opt_pass
4746 public:
4747 pass_local_fn_summary (gcc::context *ctxt)
4748 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4751 /* opt_pass methods: */
4752 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4753 virtual unsigned int execute (function *)
4755 return compute_fn_summary_for_current ();
4758 }; // class pass_local_fn_summary
4760 } // anon namespace
4762 gimple_opt_pass *
4763 make_pass_local_fn_summary (gcc::context *ctxt)
4765 return new pass_local_fn_summary (ctxt);
4769 /* Free inline summary. */
4771 namespace {
4773 const pass_data pass_data_ipa_free_fn_summary =
4775 SIMPLE_IPA_PASS, /* type */
4776 "free-fnsummary", /* name */
4777 OPTGROUP_NONE, /* optinfo_flags */
4778 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4779 0, /* properties_required */
4780 0, /* properties_provided */
4781 0, /* properties_destroyed */
4782 0, /* todo_flags_start */
4783 0, /* todo_flags_finish */
4786 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4788 public:
4789 pass_ipa_free_fn_summary (gcc::context *ctxt)
4790 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4791 small_p (false)
4794 /* opt_pass methods: */
4795 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4796 void set_pass_param (unsigned int n, bool param)
4798 gcc_assert (n == 0);
4799 small_p = param;
4801 virtual bool gate (function *) { return true; }
4802 virtual unsigned int execute (function *)
4804 ipa_free_fn_summary ();
4805 /* Free ipa-prop structures if they are no longer needed. */
4806 ipa_free_all_structures_after_iinln ();
4807 if (!flag_wpa)
4808 ipa_free_size_summary ();
4809 return 0;
4812 private:
4813 bool small_p;
4814 }; // class pass_ipa_free_fn_summary
4816 } // anon namespace
4818 simple_ipa_opt_pass *
4819 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4821 return new pass_ipa_free_fn_summary (ctxt);
4824 namespace {
4826 const pass_data pass_data_ipa_fn_summary =
4828 IPA_PASS, /* type */
4829 "fnsummary", /* name */
4830 OPTGROUP_INLINE, /* optinfo_flags */
4831 TV_IPA_FNSUMMARY, /* tv_id */
4832 0, /* properties_required */
4833 0, /* properties_provided */
4834 0, /* properties_destroyed */
4835 0, /* todo_flags_start */
4836 ( TODO_dump_symtab ), /* todo_flags_finish */
4839 class pass_ipa_fn_summary : public ipa_opt_pass_d
4841 public:
4842 pass_ipa_fn_summary (gcc::context *ctxt)
4843 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4844 ipa_fn_summary_generate, /* generate_summary */
4845 ipa_fn_summary_write, /* write_summary */
4846 ipa_fn_summary_read, /* read_summary */
4847 NULL, /* write_optimization_summary */
4848 NULL, /* read_optimization_summary */
4849 NULL, /* stmt_fixup */
4850 0, /* function_transform_todo_flags_start */
4851 NULL, /* function_transform */
4852 NULL) /* variable_transform */
4855 /* opt_pass methods: */
4856 virtual unsigned int execute (function *) { return 0; }
4858 }; // class pass_ipa_fn_summary
4860 } // anon namespace
4862 ipa_opt_pass_d *
4863 make_pass_ipa_fn_summary (gcc::context *ctxt)
4865 return new pass_ipa_fn_summary (ctxt);
4868 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4869 within the same process. For use by toplev::finalize. */
4871 void
4872 ipa_fnsummary_c_finalize (void)
4874 ipa_free_fn_summary ();