testsuite: simplify target requirements for various Power9 testcases.
[official-gcc.git] / gcc / ipa-fnsummary.c
blob9e3eda4d3cbe3d616ff7e6b707a437521f75f0a8
1 /* Function summary pass.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "tree.h"
60 #include "gimple.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
63 #include "ssa.h"
64 #include "tree-streamer.h"
65 #include "cgraph.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86 #include "tree-into-ssa.h"
88 /* Summaries. */
89 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
90 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
91 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
93 /* Edge predicates goes here. */
94 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
97 /* Dump IPA hints. */
98 void
99 ipa_dump_hints (FILE *f, ipa_hints hints)
101 if (!hints)
102 return;
103 fprintf (f, "IPA hints:");
104 if (hints & INLINE_HINT_indirect_call)
106 hints &= ~INLINE_HINT_indirect_call;
107 fprintf (f, " indirect_call");
109 if (hints & INLINE_HINT_loop_iterations)
111 hints &= ~INLINE_HINT_loop_iterations;
112 fprintf (f, " loop_iterations");
114 if (hints & INLINE_HINT_loop_stride)
116 hints &= ~INLINE_HINT_loop_stride;
117 fprintf (f, " loop_stride");
119 if (hints & INLINE_HINT_same_scc)
121 hints &= ~INLINE_HINT_same_scc;
122 fprintf (f, " same_scc");
124 if (hints & INLINE_HINT_in_scc)
126 hints &= ~INLINE_HINT_in_scc;
127 fprintf (f, " in_scc");
129 if (hints & INLINE_HINT_cross_module)
131 hints &= ~INLINE_HINT_cross_module;
132 fprintf (f, " cross_module");
134 if (hints & INLINE_HINT_declared_inline)
136 hints &= ~INLINE_HINT_declared_inline;
137 fprintf (f, " declared_inline");
139 if (hints & INLINE_HINT_known_hot)
141 hints &= ~INLINE_HINT_known_hot;
142 fprintf (f, " known_hot");
144 gcc_assert (!hints);
148 /* Record SIZE and TIME to SUMMARY.
149 The accounted code will be executed when EXEC_PRED is true.
150 When NONCONST_PRED is false the code will evaluate to constant and
151 will get optimized out in specialized clones of the function.
152 If CALL is true account to call_size_time_table rather than
153 size_time_table. */
155 void
156 ipa_fn_summary::account_size_time (int size, sreal time,
157 const predicate &exec_pred,
158 const predicate &nonconst_pred_in,
159 bool call)
161 size_time_entry *e;
162 bool found = false;
163 int i;
164 predicate nonconst_pred;
165 vec<size_time_entry, va_gc> *table = call
166 ? call_size_time_table : size_time_table;
168 if (exec_pred == false)
169 return;
171 nonconst_pred = nonconst_pred_in & exec_pred;
173 if (nonconst_pred == false)
174 return;
176 /* We need to create initial empty unconditional clause, but otherwise
177 we don't need to account empty times and sizes. */
178 if (!size && time == 0 && table)
179 return;
181 /* Only for calls we are unaccounting what we previously recorded. */
182 gcc_checking_assert (time >= 0 || call);
184 for (i = 0; vec_safe_iterate (table, i, &e); i++)
185 if (e->exec_predicate == exec_pred
186 && e->nonconst_predicate == nonconst_pred)
188 found = true;
189 break;
191 if (i == max_size_time_table_size)
193 i = 0;
194 found = true;
195 e = &(*table)[0];
196 if (dump_file && (dump_flags & TDF_DETAILS))
197 fprintf (dump_file,
198 "\t\tReached limit on number of entries, "
199 "ignoring the predicate.");
201 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
203 fprintf (dump_file,
204 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
205 ((double) size) / ipa_fn_summary::size_scale,
206 (time.to_double ()), found ? "" : "new ");
207 exec_pred.dump (dump_file, conds, 0);
208 if (exec_pred != nonconst_pred)
210 fprintf (dump_file, " nonconst:");
211 nonconst_pred.dump (dump_file, conds);
213 else
214 fprintf (dump_file, "\n");
216 if (!found)
218 class size_time_entry new_entry;
219 new_entry.size = size;
220 new_entry.time = time;
221 new_entry.exec_predicate = exec_pred;
222 new_entry.nonconst_predicate = nonconst_pred;
223 if (call)
224 vec_safe_push (call_size_time_table, new_entry);
225 else
226 vec_safe_push (size_time_table, new_entry);
228 else
230 e->size += size;
231 e->time += time;
232 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
233 /* Tolerate small roundoff issues. */
234 if (e->time < 0)
235 e->time = 0;
239 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
241 static struct cgraph_edge *
242 redirect_to_unreachable (struct cgraph_edge *e)
244 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
245 struct cgraph_node *target = cgraph_node::get_create
246 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
248 if (e->speculative)
249 e = cgraph_edge::resolve_speculation (e, target->decl);
250 else if (!e->callee)
251 e = cgraph_edge::make_direct (e, target);
252 else
253 e->redirect_callee (target);
254 class ipa_call_summary *es = ipa_call_summaries->get (e);
255 e->inline_failed = CIF_UNREACHABLE;
256 e->count = profile_count::zero ();
257 es->call_stmt_size = 0;
258 es->call_stmt_time = 0;
259 if (callee)
260 callee->remove_symbol_and_inline_clones ();
261 return e;
264 /* Set predicate for edge E. */
266 static void
267 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
269 /* If the edge is determined to be never executed, redirect it
270 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
271 be optimized out. */
272 if (predicate && *predicate == false
273 /* When handling speculative edges, we need to do the redirection
274 just once. Do it always on the direct edge, so we do not
275 attempt to resolve speculation while duplicating the edge. */
276 && (!e->speculative || e->callee))
277 e = redirect_to_unreachable (e);
279 class ipa_call_summary *es = ipa_call_summaries->get (e);
280 if (predicate && *predicate != true)
282 if (!es->predicate)
283 es->predicate = edge_predicate_pool.allocate ();
284 *es->predicate = *predicate;
286 else
288 if (es->predicate)
289 edge_predicate_pool.remove (es->predicate);
290 es->predicate = NULL;
294 /* Set predicate for hint *P. */
296 static void
297 set_hint_predicate (predicate **p, predicate new_predicate)
299 if (new_predicate == false || new_predicate == true)
301 if (*p)
302 edge_predicate_pool.remove (*p);
303 *p = NULL;
305 else
307 if (!*p)
308 *p = edge_predicate_pool.allocate ();
309 **p = new_predicate;
313 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
314 Otherwise add a new item to the vector with this predicate and frerq equal
315 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
316 in which case the function does nothing. */
318 static void
319 add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
320 const predicate &new_predicate, sreal add_freq,
321 unsigned max_num_predicates)
323 if (new_predicate == false || new_predicate == true)
324 return;
325 ipa_freqcounting_predicate *f;
326 for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
327 if (new_predicate == f->predicate)
329 f->freq += add_freq;
330 return;
332 if (vec_safe_length (*v) >= max_num_predicates)
333 /* Too many different predicates to account for. */
334 return;
336 ipa_freqcounting_predicate fcp;
337 fcp.predicate = NULL;
338 set_hint_predicate (&fcp.predicate, new_predicate);
339 fcp.freq = add_freq;
340 vec_safe_push (*v, fcp);
341 return;
344 /* Compute what conditions may or may not hold given information about
345 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
346 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
347 copy when called in a given context. It is a bitmask of conditions. Bit
348 0 means that condition is known to be false, while bit 1 means that condition
349 may or may not be true. These differs - for example NOT_INLINED condition
350 is always false in the second and also builtin_constant_p tests cannot use
351 the fact that parameter is indeed a constant.
353 When INLINE_P is true, assume that we are inlining. AVAL contains known
354 information about argument values. The function does not modify its content
355 and so AVALs could also be of type ipa_call_arg_values but so far all
356 callers work with the auto version and so we avoid the conversion for
357 convenience.
359 ERROR_MARK value of an argument means compile time invariant. */
361 static void
362 evaluate_conditions_for_known_args (struct cgraph_node *node,
363 bool inline_p,
364 ipa_auto_call_arg_values *avals,
365 clause_t *ret_clause,
366 clause_t *ret_nonspec_clause)
368 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
369 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
370 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
371 int i;
372 struct condition *c;
374 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
376 tree val = NULL;
377 tree res;
378 int j;
379 struct expr_eval_op *op;
381 /* We allow call stmt to have fewer arguments than the callee function
382 (especially for K&R style programs). So bound check here (we assume
383 m_known_aggs vector is either empty or has the same length as
384 m_known_vals). */
385 gcc_checking_assert (!avals->m_known_aggs.length ()
386 || !avals->m_known_vals.length ()
387 || (avals->m_known_vals.length ()
388 == avals->m_known_aggs.length ()));
390 if (c->agg_contents)
392 if (c->code == predicate::changed
393 && !c->by_ref
394 && (avals->safe_sval_at(c->operand_num) == error_mark_node))
395 continue;
397 if (ipa_agg_value_set *agg = avals->safe_aggval_at (c->operand_num))
399 tree sval = avals->safe_sval_at (c->operand_num);
400 val = ipa_find_agg_cst_for_param (agg, sval, c->offset,
401 c->by_ref);
403 else
404 val = NULL_TREE;
406 else
408 val = avals->safe_sval_at (c->operand_num);
409 if (val && val == error_mark_node && c->code != predicate::changed)
410 val = NULL_TREE;
413 if (!val
414 && (c->code == predicate::changed
415 || c->code == predicate::is_not_constant))
417 clause |= 1 << (i + predicate::first_dynamic_condition);
418 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
419 continue;
421 if (c->code == predicate::changed)
423 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
424 continue;
427 if (c->code == predicate::is_not_constant)
429 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
430 continue;
433 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
435 if (c->type != TREE_TYPE (val))
436 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
437 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
439 if (!val)
440 break;
441 if (!op->val[0])
442 val = fold_unary (op->code, op->type, val);
443 else if (!op->val[1])
444 val = fold_binary (op->code, op->type,
445 op->index ? op->val[0] : val,
446 op->index ? val : op->val[0]);
447 else if (op->index == 0)
448 val = fold_ternary (op->code, op->type,
449 val, op->val[0], op->val[1]);
450 else if (op->index == 1)
451 val = fold_ternary (op->code, op->type,
452 op->val[0], val, op->val[1]);
453 else if (op->index == 2)
454 val = fold_ternary (op->code, op->type,
455 op->val[0], op->val[1], val);
456 else
457 val = NULL_TREE;
460 res = val
461 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
462 : NULL;
464 if (res && integer_zerop (res))
465 continue;
466 if (res && integer_onep (res))
468 clause |= 1 << (i + predicate::first_dynamic_condition);
469 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
470 continue;
473 if (c->operand_num < (int) avals->m_known_value_ranges.length ()
474 && !c->agg_contents
475 && (!val || TREE_CODE (val) != INTEGER_CST))
477 value_range vr = avals->m_known_value_ranges[c->operand_num];
478 if (!vr.undefined_p ()
479 && !vr.varying_p ()
480 && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ())))
482 if (!useless_type_conversion_p (c->type, vr.type ()))
484 value_range res;
485 range_fold_unary_expr (&res, NOP_EXPR,
486 c->type, &vr, vr.type ());
487 vr = res;
489 tree type = c->type;
491 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
493 if (vr.varying_p () || vr.undefined_p ())
494 break;
496 value_range res;
497 if (!op->val[0])
498 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
499 else if (!op->val[1])
501 value_range op0 (op->val[0], op->val[0]);
502 range_fold_binary_expr (&res, op->code, op->type,
503 op->index ? &op0 : &vr,
504 op->index ? &vr : &op0);
506 else
507 gcc_unreachable ();
508 type = op->type;
509 vr = res;
511 if (!vr.varying_p () && !vr.undefined_p ())
513 value_range res;
514 value_range val_vr (c->val, c->val);
515 range_fold_binary_expr (&res, c->code, boolean_type_node,
516 &vr,
517 &val_vr);
518 if (res.zero_p ())
519 continue;
524 clause |= 1 << (i + predicate::first_dynamic_condition);
525 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
527 *ret_clause = clause;
528 if (ret_nonspec_clause)
529 *ret_nonspec_clause = nonspec_clause;
532 /* Return true if VRP will be exectued on the function.
533 We do not want to anticipate optimizations that will not happen.
535 FIXME: This can be confused with -fdisable and debug counters and thus
536 it should not be used for correctness (only to make heuristics work).
537 This means that inliner should do its own optimizations of expressions
538 that it predicts to be constant so wrong code can not be triggered by
539 builtin_constant_p. */
541 static bool
542 vrp_will_run_p (struct cgraph_node *node)
544 return (opt_for_fn (node->decl, optimize)
545 && !opt_for_fn (node->decl, optimize_debug)
546 && opt_for_fn (node->decl, flag_tree_vrp));
549 /* Similarly about FRE. */
551 static bool
552 fre_will_run_p (struct cgraph_node *node)
554 return (opt_for_fn (node->decl, optimize)
555 && !opt_for_fn (node->decl, optimize_debug)
556 && opt_for_fn (node->decl, flag_tree_fre));
559 /* Work out what conditions might be true at invocation of E.
560 Compute costs for inlined edge if INLINE_P is true.
562 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
563 (if non-NULL) conditions evaluated for nonspecialized clone called
564 in a given context.
566 Vectors in AVALS will be populated with useful known information about
567 argument values - information not known to have any uses will be omitted -
568 except for m_known_contexts which will only be calculated if
569 COMPUTE_CONTEXTS is true. */
571 void
572 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
573 clause_t *clause_ptr,
574 clause_t *nonspec_clause_ptr,
575 ipa_auto_call_arg_values *avals,
576 bool compute_contexts)
578 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
579 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
580 class ipa_edge_args *args;
582 if (clause_ptr)
583 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
585 if (ipa_node_params_sum
586 && !e->call_stmt_cannot_inline_p
587 && (info->conds || compute_contexts)
588 && (args = IPA_EDGE_REF (e)) != NULL)
590 struct cgraph_node *caller;
591 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
592 class ipa_call_summary *es = ipa_call_summaries->get (e);
593 int i, count = ipa_get_cs_argument_count (args);
595 if (count)
597 if (e->caller->inlined_to)
598 caller = e->caller->inlined_to;
599 else
600 caller = e->caller;
601 caller_parms_info = IPA_NODE_REF (caller);
602 callee_pi = IPA_NODE_REF (callee);
604 /* Watch for thunks. */
605 if (callee_pi)
606 /* Watch for variadic functions. */
607 count = MIN (count, ipa_get_param_count (callee_pi));
610 if (callee_pi)
611 for (i = 0; i < count; i++)
613 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
615 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
616 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
618 /* Determine if we know constant value of the parameter. */
619 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
620 ipa_get_type (callee_pi, i));
622 if (!cst && e->call_stmt
623 && i < (int)gimple_call_num_args (e->call_stmt))
625 cst = gimple_call_arg (e->call_stmt, i);
626 if (!is_gimple_min_invariant (cst))
627 cst = NULL;
629 if (cst)
631 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
632 if (!avals->m_known_vals.length ())
633 avals->m_known_vals.safe_grow_cleared (count, true);
634 avals->m_known_vals[i] = cst;
636 else if (inline_p && !es->param[i].change_prob)
638 if (!avals->m_known_vals.length ())
639 avals->m_known_vals.safe_grow_cleared (count, true);
640 avals->m_known_vals[i] = error_mark_node;
643 /* If we failed to get simple constant, try value range. */
644 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
645 && vrp_will_run_p (caller)
646 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
648 value_range vr
649 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
650 ipa_get_type (callee_pi,
651 i));
652 if (!vr.undefined_p () && !vr.varying_p ())
654 if (!avals->m_known_value_ranges.length ())
656 avals->m_known_value_ranges.safe_grow (count, true);
657 for (int i = 0; i < count; ++i)
658 new (&avals->m_known_value_ranges[i])
659 value_range ();
661 avals->m_known_value_ranges[i] = vr;
665 /* Determine known aggregate values. */
666 if (fre_will_run_p (caller))
668 ipa_agg_value_set agg
669 = ipa_agg_value_set_from_jfunc (caller_parms_info,
670 caller, &jf->agg);
671 if (agg.items.length ())
673 if (!avals->m_known_aggs.length ())
674 avals->m_known_aggs.safe_grow_cleared (count, true);
675 avals->m_known_aggs[i] = agg;
680 /* For calls used in polymorphic calls we further determine
681 polymorphic call context. */
682 if (compute_contexts
683 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
685 ipa_polymorphic_call_context
686 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
687 if (!ctx.useless_p ())
689 if (!avals->m_known_contexts.length ())
690 avals->m_known_contexts.safe_grow_cleared (count, true);
691 avals->m_known_contexts[i]
692 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
696 else
697 gcc_assert (!count || callee->thunk.thunk_p);
699 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
701 int i, count = (int)gimple_call_num_args (e->call_stmt);
703 for (i = 0; i < count; i++)
705 tree cst = gimple_call_arg (e->call_stmt, i);
706 if (!is_gimple_min_invariant (cst))
707 cst = NULL;
708 if (cst)
710 if (!avals->m_known_vals.length ())
711 avals->m_known_vals.safe_grow_cleared (count, true);
712 avals->m_known_vals[i] = cst;
717 evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
718 nonspec_clause_ptr);
722 /* Allocate the function summary. */
724 static void
725 ipa_fn_summary_alloc (void)
727 gcc_checking_assert (!ipa_fn_summaries);
728 ipa_size_summaries = new ipa_size_summary_t (symtab);
729 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
730 ipa_call_summaries = new ipa_call_summary_t (symtab);
733 ipa_call_summary::~ipa_call_summary ()
735 if (predicate)
736 edge_predicate_pool.remove (predicate);
738 param.release ();
741 ipa_fn_summary::~ipa_fn_summary ()
743 unsigned len = vec_safe_length (loop_iterations);
744 for (unsigned i = 0; i < len; i++)
745 edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
746 len = vec_safe_length (loop_strides);
747 for (unsigned i = 0; i < len; i++)
748 edge_predicate_pool.remove ((*loop_strides)[i].predicate);
749 vec_free (conds);
750 vec_free (size_time_table);
751 vec_free (call_size_time_table);
752 vec_free (loop_iterations);
753 vec_free (loop_strides);
756 void
757 ipa_fn_summary_t::remove_callees (cgraph_node *node)
759 cgraph_edge *e;
760 for (e = node->callees; e; e = e->next_callee)
761 ipa_call_summaries->remove (e);
762 for (e = node->indirect_calls; e; e = e->next_callee)
763 ipa_call_summaries->remove (e);
766 /* Duplicate predicates in loop hint vector, allocating memory for them and
767 remove and deallocate any uninteresting (true or false) ones. Return the
768 result. */
770 static vec<ipa_freqcounting_predicate, va_gc> *
771 remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
772 clause_t possible_truths)
774 if (vec_safe_length (v) == 0)
775 return NULL;
777 vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
778 int len = res->length();
779 for (int i = len - 1; i >= 0; i--)
781 predicate new_predicate
782 = (*res)[i].predicate->remap_after_duplication (possible_truths);
783 /* We do not want to free previous predicate; it is used by node
784 origin. */
785 (*res)[i].predicate = NULL;
786 set_hint_predicate (&(*res)[i].predicate, new_predicate);
788 if (!(*res)[i].predicate)
789 res->unordered_remove (i);
792 return res;
796 /* Hook that is called by cgraph.c when a node is duplicated. */
797 void
798 ipa_fn_summary_t::duplicate (cgraph_node *src,
799 cgraph_node *dst,
800 ipa_fn_summary *,
801 ipa_fn_summary *info)
803 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
804 /* TODO: as an optimization, we may avoid copying conditions
805 that are known to be false or true. */
806 info->conds = vec_safe_copy (info->conds);
808 /* When there are any replacements in the function body, see if we can figure
809 out that something was optimized out. */
810 if (ipa_node_params_sum && dst->clone.tree_map)
812 vec<size_time_entry, va_gc> *entry = info->size_time_table;
813 /* Use SRC parm info since it may not be copied yet. */
814 class ipa_node_params *parms_info = IPA_NODE_REF (src);
815 ipa_auto_call_arg_values avals;
816 int count = ipa_get_param_count (parms_info);
817 int i, j;
818 clause_t possible_truths;
819 predicate true_pred = true;
820 size_time_entry *e;
821 int optimized_out_size = 0;
822 bool inlined_to_p = false;
823 struct cgraph_edge *edge, *next;
825 info->size_time_table = 0;
826 avals.m_known_vals.safe_grow_cleared (count, true);
827 for (i = 0; i < count; i++)
829 struct ipa_replace_map *r;
831 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
833 if (r->parm_num == i)
835 avals.m_known_vals[i] = r->new_tree;
836 break;
840 evaluate_conditions_for_known_args (dst, false,
841 &avals,
842 &possible_truths,
843 /* We are going to specialize,
844 so ignore nonspec truths. */
845 NULL);
847 info->account_size_time (0, 0, true_pred, true_pred);
849 /* Remap size_time vectors.
850 Simplify the predicate by pruning out alternatives that are known
851 to be false.
852 TODO: as on optimization, we can also eliminate conditions known
853 to be true. */
854 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
856 predicate new_exec_pred;
857 predicate new_nonconst_pred;
858 new_exec_pred = e->exec_predicate.remap_after_duplication
859 (possible_truths);
860 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
861 (possible_truths);
862 if (new_exec_pred == false || new_nonconst_pred == false)
863 optimized_out_size += e->size;
864 else
865 info->account_size_time (e->size, e->time, new_exec_pred,
866 new_nonconst_pred);
869 /* Remap edge predicates with the same simplification as above.
870 Also copy constantness arrays. */
871 for (edge = dst->callees; edge; edge = next)
873 predicate new_predicate;
874 class ipa_call_summary *es = ipa_call_summaries->get (edge);
875 next = edge->next_callee;
877 if (!edge->inline_failed)
878 inlined_to_p = true;
879 if (!es->predicate)
880 continue;
881 new_predicate = es->predicate->remap_after_duplication
882 (possible_truths);
883 if (new_predicate == false && *es->predicate != false)
884 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
885 edge_set_predicate (edge, &new_predicate);
888 /* Remap indirect edge predicates with the same simplification as above.
889 Also copy constantness arrays. */
890 for (edge = dst->indirect_calls; edge; edge = next)
892 predicate new_predicate;
893 class ipa_call_summary *es = ipa_call_summaries->get (edge);
894 next = edge->next_callee;
896 gcc_checking_assert (edge->inline_failed);
897 if (!es->predicate)
898 continue;
899 new_predicate = es->predicate->remap_after_duplication
900 (possible_truths);
901 if (new_predicate == false && *es->predicate != false)
902 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
903 edge_set_predicate (edge, &new_predicate);
905 info->loop_iterations
906 = remap_freqcounting_preds_after_dup (info->loop_iterations,
907 possible_truths);
908 info->loop_strides
909 = remap_freqcounting_preds_after_dup (info->loop_strides,
910 possible_truths);
912 /* If inliner or someone after inliner will ever start producing
913 non-trivial clones, we will get trouble with lack of information
914 about updating self sizes, because size vectors already contains
915 sizes of the callees. */
916 gcc_assert (!inlined_to_p || !optimized_out_size);
918 else
920 info->size_time_table = vec_safe_copy (info->size_time_table);
921 info->loop_iterations = vec_safe_copy (info->loop_iterations);
922 info->loop_strides = vec_safe_copy (info->loop_strides);
924 ipa_freqcounting_predicate *f;
925 for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
927 predicate p = *f->predicate;
928 f->predicate = NULL;
929 set_hint_predicate (&f->predicate, p);
931 for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
933 predicate p = *f->predicate;
934 f->predicate = NULL;
935 set_hint_predicate (&f->predicate, p);
938 if (!dst->inlined_to)
939 ipa_update_overall_fn_summary (dst);
943 /* Hook that is called by cgraph.c when a node is duplicated. */
945 void
946 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
947 struct cgraph_edge *dst,
948 class ipa_call_summary *srcinfo,
949 class ipa_call_summary *info)
951 new (info) ipa_call_summary (*srcinfo);
952 info->predicate = NULL;
953 edge_set_predicate (dst, srcinfo->predicate);
954 info->param = srcinfo->param.copy ();
955 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
957 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
958 - eni_size_weights.call_cost);
959 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
960 - eni_time_weights.call_cost);
964 /* Dump edge summaries associated to NODE and recursively to all clones.
965 Indent by INDENT. */
967 static void
968 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
969 class ipa_fn_summary *info)
971 struct cgraph_edge *edge;
972 for (edge = node->callees; edge; edge = edge->next_callee)
974 class ipa_call_summary *es = ipa_call_summaries->get (edge);
975 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
976 int i;
978 fprintf (f,
979 "%*s%s %s\n%*s freq:%4.2f",
980 indent, "", callee->dump_name (),
981 !edge->inline_failed
982 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
983 indent, "", edge->sreal_frequency ().to_double ());
985 if (cross_module_call_p (edge))
986 fprintf (f, " cross module");
988 if (es)
989 fprintf (f, " loop depth:%2i size:%2i time: %2i",
990 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
992 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
993 ipa_size_summary *ss = ipa_size_summaries->get (callee);
994 if (s != NULL)
995 fprintf (f, " callee size:%2i stack:%2i",
996 (int) (ss->size / ipa_fn_summary::size_scale),
997 (int) s->estimated_stack_size);
999 if (es && es->predicate)
1001 fprintf (f, " predicate: ");
1002 es->predicate->dump (f, info->conds);
1004 else
1005 fprintf (f, "\n");
1006 if (es && es->param.exists ())
1007 for (i = 0; i < (int) es->param.length (); i++)
1009 int prob = es->param[i].change_prob;
1011 if (!prob)
1012 fprintf (f, "%*s op%i is compile time invariant\n",
1013 indent + 2, "", i);
1014 else if (prob != REG_BR_PROB_BASE)
1015 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1016 prob * 100.0 / REG_BR_PROB_BASE);
1017 if (es->param[i].points_to_local_or_readonly_memory)
1018 fprintf (f, "%*s op%i points to local or readonly memory\n",
1019 indent + 2, "", i);
1021 if (!edge->inline_failed)
1023 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1024 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
1025 indent + 2, "",
1026 (int) ipa_get_stack_frame_offset (callee),
1027 (int) ss->estimated_self_stack_size);
1028 dump_ipa_call_summary (f, indent + 2, callee, info);
1031 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1033 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1034 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1035 " time: %2i",
1036 indent, "",
1037 es->loop_depth,
1038 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1039 es->call_stmt_time);
1040 if (es->predicate)
1042 fprintf (f, "predicate: ");
1043 es->predicate->dump (f, info->conds);
1045 else
1046 fprintf (f, "\n");
1051 void
1052 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1054 if (node->definition)
1056 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1057 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1058 if (s != NULL)
1060 size_time_entry *e;
1061 int i;
1062 fprintf (f, "IPA function summary for %s", node->dump_name ());
1063 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1064 fprintf (f, " always_inline");
1065 if (s->inlinable)
1066 fprintf (f, " inlinable");
1067 if (s->fp_expressions)
1068 fprintf (f, " fp_expression");
1069 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1070 fprintf (f, " self size: %i\n", ss->self_size);
1071 fprintf (f, " global size: %i\n", ss->size);
1072 fprintf (f, " min size: %i\n", s->min_size);
1073 fprintf (f, " self stack: %i\n",
1074 (int) ss->estimated_self_stack_size);
1075 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1076 if (s->growth)
1077 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1078 if (s->scc_no)
1079 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1080 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
1082 fprintf (f, " size:%f, time:%f",
1083 (double) e->size / ipa_fn_summary::size_scale,
1084 e->time.to_double ());
1085 if (e->exec_predicate != true)
1087 fprintf (f, ", executed if:");
1088 e->exec_predicate.dump (f, s->conds, 0);
1090 if (e->exec_predicate != e->nonconst_predicate)
1092 fprintf (f, ", nonconst if:");
1093 e->nonconst_predicate.dump (f, s->conds, 0);
1095 fprintf (f, "\n");
1097 ipa_freqcounting_predicate *fcp;
1098 bool first_fcp = true;
1099 for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
1101 if (first_fcp)
1103 fprintf (f, " loop iterations:");
1104 first_fcp = false;
1106 fprintf (f, " %3.2f for ", fcp->freq.to_double ());
1107 fcp->predicate->dump (f, s->conds);
1109 first_fcp = true;
1110 for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
1112 if (first_fcp)
1114 fprintf (f, " loop strides:");
1115 first_fcp = false;
1117 fprintf (f, " %3.2f for :", fcp->freq.to_double ());
1118 fcp->predicate->dump (f, s->conds);
1120 fprintf (f, " calls:\n");
1121 dump_ipa_call_summary (f, 4, node, s);
1122 fprintf (f, "\n");
1124 else
1125 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1129 DEBUG_FUNCTION void
1130 ipa_debug_fn_summary (struct cgraph_node *node)
1132 ipa_dump_fn_summary (stderr, node);
1135 void
1136 ipa_dump_fn_summaries (FILE *f)
1138 struct cgraph_node *node;
1140 FOR_EACH_DEFINED_FUNCTION (node)
1141 if (!node->inlined_to)
1142 ipa_dump_fn_summary (f, node);
1145 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1146 boolean variable pointed to by DATA. */
1148 static bool
1149 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1150 void *data)
1152 bool *b = (bool *) data;
1153 *b = true;
1154 return true;
1157 /* If OP refers to value of function parameter, return the corresponding
1158 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1159 PARM_DECL) will be stored to *SIZE_P in that case too. */
1161 static tree
1162 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1163 poly_int64 *size_p)
1165 /* SSA_NAME referring to parm default def? */
1166 if (TREE_CODE (op) == SSA_NAME
1167 && SSA_NAME_IS_DEFAULT_DEF (op)
1168 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1170 if (size_p)
1171 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1172 return SSA_NAME_VAR (op);
1174 /* Non-SSA parm reference? */
1175 if (TREE_CODE (op) == PARM_DECL)
1177 bool modified = false;
1179 ao_ref refd;
1180 ao_ref_init (&refd, op);
1181 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1182 mark_modified, &modified, NULL, NULL,
1183 fbi->aa_walk_budget + 1);
1184 if (walked < 0)
1186 fbi->aa_walk_budget = 0;
1187 return NULL_TREE;
1189 if (!modified)
1191 if (size_p)
1192 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1193 return op;
1196 return NULL_TREE;
1199 /* If OP refers to value of function parameter, return the corresponding
1200 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1201 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1202 stored to *SIZE_P in that case too. */
1204 static tree
1205 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1206 poly_int64 *size_p)
1208 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1209 if (res)
1210 return res;
1212 if (TREE_CODE (op) == SSA_NAME
1213 && !SSA_NAME_IS_DEFAULT_DEF (op)
1214 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1215 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1216 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1217 size_p);
1218 return NULL_TREE;
1221 /* If OP refers to a value of a function parameter or value loaded from an
1222 aggregate passed to a parameter (either by value or reference), return TRUE
1223 and store the number of the parameter to *INDEX_P, the access size into
1224 *SIZE_P, and information whether and how it has been loaded from an
1225 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1226 statement in which OP is used or loaded. */
1228 static bool
1229 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1230 gimple *stmt, tree op, int *index_p,
1231 poly_int64 *size_p,
1232 struct agg_position_info *aggpos)
1234 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1236 gcc_checking_assert (aggpos);
1237 if (res)
1239 *index_p = ipa_get_param_decl_index (fbi->info, res);
1240 if (*index_p < 0)
1241 return false;
1242 aggpos->agg_contents = false;
1243 aggpos->by_ref = false;
1244 return true;
1247 if (TREE_CODE (op) == SSA_NAME)
1249 if (SSA_NAME_IS_DEFAULT_DEF (op)
1250 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1251 return false;
1252 stmt = SSA_NAME_DEF_STMT (op);
1253 op = gimple_assign_rhs1 (stmt);
1254 if (!REFERENCE_CLASS_P (op))
1255 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1256 aggpos);
1259 aggpos->agg_contents = true;
1260 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1261 stmt, op, index_p, &aggpos->offset,
1262 size_p, &aggpos->by_ref);
1265 /* See if statement might disappear after inlining.
1266 0 - means not eliminated
1267 1 - half of statements goes away
1268 2 - for sure it is eliminated.
1269 We are not terribly sophisticated, basically looking for simple abstraction
1270 penalty wrappers. */
1272 static int
1273 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1275 enum gimple_code code = gimple_code (stmt);
1276 enum tree_code rhs_code;
1278 if (!optimize)
1279 return 0;
1281 switch (code)
1283 case GIMPLE_RETURN:
1284 return 2;
1285 case GIMPLE_ASSIGN:
1286 if (gimple_num_ops (stmt) != 2)
1287 return 0;
1289 rhs_code = gimple_assign_rhs_code (stmt);
1291 /* Casts of parameters, loads from parameters passed by reference
1292 and stores to return value or parameters are often free after
1293 inlining due to SRA and further combining.
1294 Assume that half of statements goes away. */
1295 if (CONVERT_EXPR_CODE_P (rhs_code)
1296 || rhs_code == VIEW_CONVERT_EXPR
1297 || rhs_code == ADDR_EXPR
1298 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1300 tree rhs = gimple_assign_rhs1 (stmt);
1301 tree lhs = gimple_assign_lhs (stmt);
1302 tree inner_rhs = get_base_address (rhs);
1303 tree inner_lhs = get_base_address (lhs);
1304 bool rhs_free = false;
1305 bool lhs_free = false;
1307 if (!inner_rhs)
1308 inner_rhs = rhs;
1309 if (!inner_lhs)
1310 inner_lhs = lhs;
1312 /* Reads of parameter are expected to be free. */
1313 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1314 rhs_free = true;
1315 /* Match expressions of form &this->field. Those will most likely
1316 combine with something upstream after inlining. */
1317 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1319 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1320 if (TREE_CODE (op) == PARM_DECL)
1321 rhs_free = true;
1322 else if (TREE_CODE (op) == MEM_REF
1323 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1324 NULL))
1325 rhs_free = true;
1328 /* When parameter is not SSA register because its address is taken
1329 and it is just copied into one, the statement will be completely
1330 free after inlining (we will copy propagate backward). */
1331 if (rhs_free && is_gimple_reg (lhs))
1332 return 2;
1334 /* Reads of parameters passed by reference
1335 expected to be free (i.e. optimized out after inlining). */
1336 if (TREE_CODE (inner_rhs) == MEM_REF
1337 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1338 rhs_free = true;
1340 /* Copying parameter passed by reference into gimple register is
1341 probably also going to copy propagate, but we can't be quite
1342 sure. */
1343 if (rhs_free && is_gimple_reg (lhs))
1344 lhs_free = true;
1346 /* Writes to parameters, parameters passed by value and return value
1347 (either directly or passed via invisible reference) are free.
1349 TODO: We ought to handle testcase like
1350 struct a {int a,b;};
1351 struct a
1352 returnstruct (void)
1354 struct a a ={1,2};
1355 return a;
1358 This translate into:
1360 returnstruct ()
1362 int a$b;
1363 int a$a;
1364 struct a a;
1365 struct a D.2739;
1367 <bb 2>:
1368 D.2739.a = 1;
1369 D.2739.b = 2;
1370 return D.2739;
1373 For that we either need to copy ipa-split logic detecting writes
1374 to return value. */
1375 if (TREE_CODE (inner_lhs) == PARM_DECL
1376 || TREE_CODE (inner_lhs) == RESULT_DECL
1377 || (TREE_CODE (inner_lhs) == MEM_REF
1378 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1379 NULL)
1380 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1381 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1382 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1383 (inner_lhs,
1384 0))) == RESULT_DECL))))
1385 lhs_free = true;
1386 if (lhs_free
1387 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1388 rhs_free = true;
1389 if (lhs_free && rhs_free)
1390 return 1;
1392 return 0;
1393 default:
1394 return 0;
1398 /* Analyze EXPR if it represents a series of simple operations performed on
1399 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1400 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1401 Type of the parameter or load from an aggregate via the parameter is
1402 stored in *TYPE_P. Operations on the parameter are recorded to
1403 PARAM_OPS_P if it is not NULL. */
1405 static bool
1406 decompose_param_expr (struct ipa_func_body_info *fbi,
1407 gimple *stmt, tree expr,
1408 int *index_p, tree *type_p,
1409 struct agg_position_info *aggpos,
1410 expr_eval_ops *param_ops_p = NULL)
1412 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1413 int op_count = 0;
1415 if (param_ops_p)
1416 *param_ops_p = NULL;
1418 while (true)
1420 expr_eval_op eval_op;
1421 unsigned rhs_count;
1422 unsigned cst_count = 0;
1424 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1425 aggpos))
1427 tree type = TREE_TYPE (expr);
1429 if (aggpos->agg_contents)
1431 /* Stop if containing bit-field. */
1432 if (TREE_CODE (expr) == BIT_FIELD_REF
1433 || contains_bitfld_component_ref_p (expr))
1434 break;
1437 *type_p = type;
1438 return true;
1441 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1442 break;
1444 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1445 break;
1447 switch (gimple_assign_rhs_class (stmt))
1449 case GIMPLE_SINGLE_RHS:
1450 expr = gimple_assign_rhs1 (stmt);
1451 continue;
1453 case GIMPLE_UNARY_RHS:
1454 rhs_count = 1;
1455 break;
1457 case GIMPLE_BINARY_RHS:
1458 rhs_count = 2;
1459 break;
1461 case GIMPLE_TERNARY_RHS:
1462 rhs_count = 3;
1463 break;
1465 default:
1466 goto fail;
1469 /* Stop if expression is too complex. */
1470 if (op_count++ == op_limit)
1471 break;
1473 if (param_ops_p)
1475 eval_op.code = gimple_assign_rhs_code (stmt);
1476 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1477 eval_op.val[0] = NULL_TREE;
1478 eval_op.val[1] = NULL_TREE;
1481 expr = NULL_TREE;
1482 for (unsigned i = 0; i < rhs_count; i++)
1484 tree op = gimple_op (stmt, i + 1);
1486 gcc_assert (op && !TYPE_P (op));
1487 if (is_gimple_ip_invariant (op))
1489 if (++cst_count == rhs_count)
1490 goto fail;
1492 eval_op.val[cst_count - 1] = op;
1494 else if (!expr)
1496 /* Found a non-constant operand, and record its index in rhs
1497 operands. */
1498 eval_op.index = i;
1499 expr = op;
1501 else
1503 /* Found more than one non-constant operands. */
1504 goto fail;
1508 if (param_ops_p)
1509 vec_safe_insert (*param_ops_p, 0, eval_op);
1512 /* Failed to decompose, free resource and return. */
1513 fail:
1514 if (param_ops_p)
1515 vec_free (*param_ops_p);
1517 return false;
1520 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1521 predicates to the CFG edges. */
1523 static void
1524 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1525 class ipa_fn_summary *summary,
1526 class ipa_node_params *params_summary,
1527 basic_block bb)
1529 gimple *last;
1530 tree op, op2;
1531 int index;
1532 struct agg_position_info aggpos;
1533 enum tree_code code, inverted_code;
1534 edge e;
1535 edge_iterator ei;
1536 gimple *set_stmt;
1537 tree param_type;
1538 expr_eval_ops param_ops;
1540 last = last_stmt (bb);
1541 if (!last || gimple_code (last) != GIMPLE_COND)
1542 return;
1543 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1544 return;
1545 op = gimple_cond_lhs (last);
1547 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1548 &param_ops))
1550 code = gimple_cond_code (last);
1551 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1553 FOR_EACH_EDGE (e, ei, bb->succs)
1555 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1556 ? code : inverted_code);
1557 /* invert_tree_comparison will return ERROR_MARK on FP
1558 comparisons that are not EQ/NE instead of returning proper
1559 unordered one. Be sure it is not confused with NON_CONSTANT.
1561 And if the edge's target is the final block of diamond CFG graph
1562 of this conditional statement, we do not need to compute
1563 predicate for the edge because the final block's predicate must
1564 be at least as that of the first block of the statement. */
1565 if (this_code != ERROR_MARK
1566 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1568 predicate p
1569 = add_condition (summary, params_summary, index,
1570 param_type, &aggpos,
1571 this_code, gimple_cond_rhs (last), param_ops);
1572 e->aux = edge_predicate_pool.allocate ();
1573 *(predicate *) e->aux = p;
1576 vec_free (param_ops);
1579 if (TREE_CODE (op) != SSA_NAME)
1580 return;
1581 /* Special case
1582 if (builtin_constant_p (op))
1583 constant_code
1584 else
1585 nonconstant_code.
1586 Here we can predicate nonconstant_code. We can't
1587 really handle constant_code since we have no predicate
1588 for this and also the constant code is not known to be
1589 optimized away when inliner doesn't see operand is constant.
1590 Other optimizers might think otherwise. */
1591 if (gimple_cond_code (last) != NE_EXPR
1592 || !integer_zerop (gimple_cond_rhs (last)))
1593 return;
1594 set_stmt = SSA_NAME_DEF_STMT (op);
1595 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1596 || gimple_call_num_args (set_stmt) != 1)
1597 return;
1598 op2 = gimple_call_arg (set_stmt, 0);
1599 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1600 return;
1601 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1603 predicate p = add_condition (summary, params_summary, index,
1604 param_type, &aggpos,
1605 predicate::is_not_constant, NULL_TREE);
1606 e->aux = edge_predicate_pool.allocate ();
1607 *(predicate *) e->aux = p;
1612 /* If BB ends by a switch we can turn into predicates, attach corresponding
1613 predicates to the CFG edges. */
1615 static void
1616 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1617 class ipa_fn_summary *summary,
1618 class ipa_node_params *params_summary,
1619 basic_block bb)
1621 gimple *lastg;
1622 tree op;
1623 int index;
1624 struct agg_position_info aggpos;
1625 edge e;
1626 edge_iterator ei;
1627 size_t n;
1628 size_t case_idx;
1629 tree param_type;
1630 expr_eval_ops param_ops;
1632 lastg = last_stmt (bb);
1633 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1634 return;
1635 gswitch *last = as_a <gswitch *> (lastg);
1636 op = gimple_switch_index (last);
1637 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1638 &param_ops))
1639 return;
1641 auto_vec<std::pair<tree, tree> > ranges;
1642 tree type = TREE_TYPE (op);
1643 int bound_limit = opt_for_fn (fbi->node->decl,
1644 param_ipa_max_switch_predicate_bounds);
1645 int bound_count = 0;
1646 wide_int vr_wmin, vr_wmax;
1647 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1649 FOR_EACH_EDGE (e, ei, bb->succs)
1651 e->aux = edge_predicate_pool.allocate ();
1652 *(predicate *) e->aux = false;
1655 e = gimple_switch_edge (cfun, last, 0);
1656 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1657 default case if its target basic block is in convergence point of all
1658 switch cases, which can be determined by checking whether it
1659 post-dominates the switch statement. */
1660 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1661 bound_count = INT_MAX;
1663 n = gimple_switch_num_labels (last);
1664 for (case_idx = 1; case_idx < n; ++case_idx)
1666 tree cl = gimple_switch_label (last, case_idx);
1667 tree min = CASE_LOW (cl);
1668 tree max = CASE_HIGH (cl);
1669 predicate p;
1671 e = gimple_switch_edge (cfun, last, case_idx);
1673 /* The case value might not have same type as switch expression,
1674 extend the value based on the expression type. */
1675 if (TREE_TYPE (min) != type)
1676 min = wide_int_to_tree (type, wi::to_wide (min));
1678 if (!max)
1679 max = min;
1680 else if (TREE_TYPE (max) != type)
1681 max = wide_int_to_tree (type, wi::to_wide (max));
1683 /* The case's target basic block is in convergence point of all switch
1684 cases, its predicate should be at least as that of the switch
1685 statement. */
1686 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1687 p = true;
1688 else if (min == max)
1689 p = add_condition (summary, params_summary, index, param_type,
1690 &aggpos, EQ_EXPR, min, param_ops);
1691 else
1693 predicate p1, p2;
1694 p1 = add_condition (summary, params_summary, index, param_type,
1695 &aggpos, GE_EXPR, min, param_ops);
1696 p2 = add_condition (summary, params_summary,index, param_type,
1697 &aggpos, LE_EXPR, max, param_ops);
1698 p = p1 & p2;
1700 *(class predicate *) e->aux
1701 = p.or_with (summary->conds, *(class predicate *) e->aux);
1703 /* If there are too many disjoint case ranges, predicate for default
1704 case might become too complicated. So add a limit here. */
1705 if (bound_count > bound_limit)
1706 continue;
1708 bool new_range = true;
1710 if (!ranges.is_empty ())
1712 wide_int curr_wmin = wi::to_wide (min);
1713 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1715 /* Merge case ranges if they are continuous. */
1716 if (curr_wmin == last_wmax + 1)
1717 new_range = false;
1718 else if (vr_type == VR_ANTI_RANGE)
1720 /* If two disjoint case ranges can be connected by anti-range
1721 of switch index, combine them to one range. */
1722 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1723 vr_type = VR_UNDEFINED;
1724 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1725 new_range = false;
1729 /* Create/extend a case range. And we count endpoints of range set,
1730 this number nearly equals to number of conditions that we will create
1731 for predicate of default case. */
1732 if (new_range)
1734 bound_count += (min == max) ? 1 : 2;
1735 ranges.safe_push (std::make_pair (min, max));
1737 else
1739 bound_count += (ranges.last ().first == ranges.last ().second);
1740 ranges.last ().second = max;
1744 e = gimple_switch_edge (cfun, last, 0);
1745 if (bound_count > bound_limit)
1747 *(class predicate *) e->aux = true;
1748 vec_free (param_ops);
1749 return;
1752 predicate p_seg = true;
1753 predicate p_all = false;
1755 if (vr_type != VR_RANGE)
1757 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1758 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1761 /* Construct predicate to represent default range set that is negation of
1762 all case ranges. Case range is classified as containing single/non-single
1763 values. Suppose a piece of case ranges in the following.
1765 [D1...D2] [S1] ... [Sn] [D3...D4]
1767 To represent default case's range sets between two non-single value
1768 case ranges (From D2 to D3), we construct predicate as:
1770 D2 < x < D3 && x != S1 && ... && x != Sn
1772 for (size_t i = 0; i < ranges.length (); i++)
1774 tree min = ranges[i].first;
1775 tree max = ranges[i].second;
1777 if (min == max)
1778 p_seg &= add_condition (summary, params_summary, index,
1779 param_type, &aggpos, NE_EXPR,
1780 min, param_ops);
1781 else
1783 /* Do not create sub-predicate for range that is beyond low bound
1784 of switch index. */
1785 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1787 p_seg &= add_condition (summary, params_summary, index,
1788 param_type, &aggpos,
1789 LT_EXPR, min, param_ops);
1790 p_all = p_all.or_with (summary->conds, p_seg);
1793 /* Do not create sub-predicate for range that is beyond up bound
1794 of switch index. */
1795 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1797 p_seg = false;
1798 break;
1801 p_seg = add_condition (summary, params_summary, index,
1802 param_type, &aggpos, GT_EXPR,
1803 max, param_ops);
1807 p_all = p_all.or_with (summary->conds, p_seg);
1808 *(class predicate *) e->aux
1809 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1811 vec_free (param_ops);
1815 /* For each BB in NODE attach to its AUX pointer predicate under
1816 which it is executable. */
1818 static void
1819 compute_bb_predicates (struct ipa_func_body_info *fbi,
1820 struct cgraph_node *node,
1821 class ipa_fn_summary *summary,
1822 class ipa_node_params *params_summary)
1824 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1825 bool done = false;
1826 basic_block bb;
1828 FOR_EACH_BB_FN (bb, my_function)
1830 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1831 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1834 /* Entry block is always executable. */
1835 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1836 = edge_predicate_pool.allocate ();
1837 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1839 /* A simple dataflow propagation of predicates forward in the CFG.
1840 TODO: work in reverse postorder. */
1841 while (!done)
1843 done = true;
1844 FOR_EACH_BB_FN (bb, my_function)
1846 predicate p = false;
1847 edge e;
1848 edge_iterator ei;
1849 FOR_EACH_EDGE (e, ei, bb->preds)
1851 if (e->src->aux)
1853 predicate this_bb_predicate
1854 = *(predicate *) e->src->aux;
1855 if (e->aux)
1856 this_bb_predicate &= (*(class predicate *) e->aux);
1857 p = p.or_with (summary->conds, this_bb_predicate);
1858 if (p == true)
1859 break;
1862 if (p != false)
1864 basic_block pdom_bb;
1866 if (!bb->aux)
1868 done = false;
1869 bb->aux = edge_predicate_pool.allocate ();
1870 *((predicate *) bb->aux) = p;
1872 else if (p != *(predicate *) bb->aux)
1874 /* This OR operation is needed to ensure monotonous data flow
1875 in the case we hit the limit on number of clauses and the
1876 and/or operations above give approximate answers. */
1877 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1878 if (p != *(predicate *) bb->aux)
1880 done = false;
1881 *((predicate *) bb->aux) = p;
1885 /* For switch/if statement, we can OR-combine predicates of all
1886 its cases/branches to get predicate for basic block in their
1887 convergence point, but sometimes this will generate very
1888 complicated predicate. Actually, we can get simplified
1889 predicate in another way by using the fact that predicate
1890 for a basic block must also hold true for its post dominators.
1891 To be specific, basic block in convergence point of
1892 conditional statement should include predicate of the
1893 statement. */
1894 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1895 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1897 else if (!pdom_bb->aux)
1899 done = false;
1900 pdom_bb->aux = edge_predicate_pool.allocate ();
1901 *((predicate *) pdom_bb->aux) = p;
1903 else if (p != *(predicate *) pdom_bb->aux)
1905 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1906 if (p != *(predicate *) pdom_bb->aux)
1908 done = false;
1909 *((predicate *) pdom_bb->aux) = p;
1918 /* Return predicate specifying when the STMT might have result that is not
1919 a compile time constant. */
1921 static predicate
1922 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1923 class ipa_fn_summary *summary,
1924 class ipa_node_params *params_summary,
1925 tree expr,
1926 vec<predicate> nonconstant_names)
1928 tree parm;
1929 int index;
1931 while (UNARY_CLASS_P (expr))
1932 expr = TREE_OPERAND (expr, 0);
1934 parm = unmodified_parm (fbi, NULL, expr, NULL);
1935 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1936 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1937 predicate::changed, NULL_TREE);
1938 if (is_gimple_min_invariant (expr))
1939 return false;
1940 if (TREE_CODE (expr) == SSA_NAME)
1941 return nonconstant_names[SSA_NAME_VERSION (expr)];
1942 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1944 predicate p1
1945 = will_be_nonconstant_expr_predicate (fbi, summary,
1946 params_summary,
1947 TREE_OPERAND (expr, 0),
1948 nonconstant_names);
1949 if (p1 == true)
1950 return p1;
1952 predicate p2
1953 = will_be_nonconstant_expr_predicate (fbi, summary,
1954 params_summary,
1955 TREE_OPERAND (expr, 1),
1956 nonconstant_names);
1957 return p1.or_with (summary->conds, p2);
1959 else if (TREE_CODE (expr) == COND_EXPR)
1961 predicate p1
1962 = will_be_nonconstant_expr_predicate (fbi, summary,
1963 params_summary,
1964 TREE_OPERAND (expr, 0),
1965 nonconstant_names);
1966 if (p1 == true)
1967 return p1;
1969 predicate p2
1970 = will_be_nonconstant_expr_predicate (fbi, summary,
1971 params_summary,
1972 TREE_OPERAND (expr, 1),
1973 nonconstant_names);
1974 if (p2 == true)
1975 return p2;
1976 p1 = p1.or_with (summary->conds, p2);
1977 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1978 params_summary,
1979 TREE_OPERAND (expr, 2),
1980 nonconstant_names);
1981 return p2.or_with (summary->conds, p1);
1983 else if (TREE_CODE (expr) == CALL_EXPR)
1984 return true;
1985 else
1987 debug_tree (expr);
1988 gcc_unreachable ();
1990 return false;
1994 /* Return predicate specifying when the STMT might have result that is not
1995 a compile time constant. */
1997 static predicate
1998 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1999 class ipa_fn_summary *summary,
2000 class ipa_node_params *params_summary,
2001 gimple *stmt,
2002 vec<predicate> nonconstant_names)
2004 predicate p = true;
2005 ssa_op_iter iter;
2006 tree use;
2007 tree param_type = NULL_TREE;
2008 predicate op_non_const;
2009 bool is_load;
2010 int base_index;
2011 struct agg_position_info aggpos;
2013 /* What statements might be optimized away
2014 when their arguments are constant. */
2015 if (gimple_code (stmt) != GIMPLE_ASSIGN
2016 && gimple_code (stmt) != GIMPLE_COND
2017 && gimple_code (stmt) != GIMPLE_SWITCH
2018 && (gimple_code (stmt) != GIMPLE_CALL
2019 || !(gimple_call_flags (stmt) & ECF_CONST)))
2020 return p;
2022 /* Stores will stay anyway. */
2023 if (gimple_store_p (stmt))
2024 return p;
2026 is_load = gimple_assign_load_p (stmt);
2028 /* Loads can be optimized when the value is known. */
2029 if (is_load)
2031 tree op = gimple_assign_rhs1 (stmt);
2032 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
2033 &aggpos))
2034 return p;
2036 else
2037 base_index = -1;
2039 /* See if we understand all operands before we start
2040 adding conditionals. */
2041 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2043 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2044 /* For arguments we can build a condition. */
2045 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2046 continue;
2047 if (TREE_CODE (use) != SSA_NAME)
2048 return p;
2049 /* If we know when operand is constant,
2050 we still can say something useful. */
2051 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2052 continue;
2053 return p;
2056 if (is_load)
2057 op_non_const =
2058 add_condition (summary, params_summary,
2059 base_index, param_type, &aggpos,
2060 predicate::changed, NULL_TREE);
2061 else
2062 op_non_const = false;
2063 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2065 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2066 int index;
2068 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2070 if (index != base_index)
2071 p = add_condition (summary, params_summary, index,
2072 TREE_TYPE (parm), NULL,
2073 predicate::changed, NULL_TREE);
2074 else
2075 continue;
2077 else
2078 p = nonconstant_names[SSA_NAME_VERSION (use)];
2079 op_non_const = p.or_with (summary->conds, op_non_const);
2081 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2082 && gimple_op (stmt, 0)
2083 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2084 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2085 = op_non_const;
2086 return op_non_const;
2089 struct record_modified_bb_info
2091 tree op;
2092 bitmap bb_set;
2093 gimple *stmt;
2096 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2097 probability how often it changes between USE_BB.
2098 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2099 is in different loop nest, we can do better.
2100 This is all just estimate. In theory we look for minimal cut separating
2101 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2102 anyway. */
2104 static basic_block
2105 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2107 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2108 if (l && l->header->count < init_bb->count)
2109 return l->header;
2110 return init_bb;
2113 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2114 set except for info->stmt. */
2116 static bool
2117 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2119 struct record_modified_bb_info *info =
2120 (struct record_modified_bb_info *) data;
2121 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2122 return false;
2123 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2124 return false;
2125 bitmap_set_bit (info->bb_set,
2126 SSA_NAME_IS_DEFAULT_DEF (vdef)
2127 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2128 : get_minimal_bb
2129 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2130 gimple_bb (info->stmt))->index);
2131 if (dump_file)
2133 fprintf (dump_file, " Param ");
2134 print_generic_expr (dump_file, info->op, TDF_SLIM);
2135 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2136 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2137 get_minimal_bb
2138 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2139 gimple_bb (info->stmt))->index);
2140 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2142 return false;
2145 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2146 will change since last invocation of STMT.
2148 Value 0 is reserved for compile time invariants.
2149 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2150 ought to be REG_BR_PROB_BASE / estimated_iters. */
2152 static int
2153 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2155 tree op = gimple_call_arg (stmt, i);
2156 basic_block bb = gimple_bb (stmt);
2158 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2159 op = TREE_OPERAND (op, 0);
2161 tree base = get_base_address (op);
2163 /* Global invariants never change. */
2164 if (is_gimple_min_invariant (base))
2165 return 0;
2167 /* We would have to do non-trivial analysis to really work out what
2168 is the probability of value to change (i.e. when init statement
2169 is in a sibling loop of the call).
2171 We do an conservative estimate: when call is executed N times more often
2172 than the statement defining value, we take the frequency 1/N. */
2173 if (TREE_CODE (base) == SSA_NAME)
2175 profile_count init_count;
2177 if (!bb->count.nonzero_p ())
2178 return REG_BR_PROB_BASE;
2180 if (SSA_NAME_IS_DEFAULT_DEF (base))
2181 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2182 else
2183 init_count = get_minimal_bb
2184 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2185 gimple_bb (stmt))->count;
2187 if (init_count < bb->count)
2188 return MAX ((init_count.to_sreal_scale (bb->count)
2189 * REG_BR_PROB_BASE).to_int (), 1);
2190 return REG_BR_PROB_BASE;
2192 else
2194 ao_ref refd;
2195 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2196 struct record_modified_bb_info info;
2197 tree init = ctor_for_folding (base);
2199 if (init != error_mark_node)
2200 return 0;
2201 if (!bb->count.nonzero_p ())
2202 return REG_BR_PROB_BASE;
2203 if (dump_file)
2205 fprintf (dump_file, " Analyzing param change probability of ");
2206 print_generic_expr (dump_file, op, TDF_SLIM);
2207 fprintf (dump_file, "\n");
2209 ao_ref_init (&refd, op);
2210 info.op = op;
2211 info.stmt = stmt;
2212 info.bb_set = BITMAP_ALLOC (NULL);
2213 int walked
2214 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2215 NULL, NULL, fbi->aa_walk_budget);
2216 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2218 if (dump_file)
2220 if (walked < 0)
2221 fprintf (dump_file, " Ran out of AA walking budget.\n");
2222 else
2223 fprintf (dump_file, " Set in same BB as used.\n");
2225 BITMAP_FREE (info.bb_set);
2226 return REG_BR_PROB_BASE;
2229 bitmap_iterator bi;
2230 unsigned index;
2231 /* Lookup the most frequent update of the value and believe that
2232 it dominates all the other; precise analysis here is difficult. */
2233 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2234 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2235 if (dump_file)
2237 fprintf (dump_file, " Set with count ");
2238 max.dump (dump_file);
2239 fprintf (dump_file, " and used with count ");
2240 bb->count.dump (dump_file);
2241 fprintf (dump_file, " freq %f\n",
2242 max.to_sreal_scale (bb->count).to_double ());
2245 BITMAP_FREE (info.bb_set);
2246 if (max < bb->count)
2247 return MAX ((max.to_sreal_scale (bb->count)
2248 * REG_BR_PROB_BASE).to_int (), 1);
2249 return REG_BR_PROB_BASE;
2253 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2254 sub-graph and if the predicate the condition depends on is known. If so,
2255 return true and store the pointer the predicate in *P. */
2257 static bool
2258 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2259 ipa_fn_summary *summary,
2260 class ipa_node_params *params_summary,
2261 basic_block bb,
2262 predicate *p,
2263 vec<predicate> nonconstant_names)
2265 edge e;
2266 edge_iterator ei;
2267 basic_block first_bb = NULL;
2268 gimple *stmt;
2270 if (single_pred_p (bb))
2272 *p = false;
2273 return true;
2276 FOR_EACH_EDGE (e, ei, bb->preds)
2278 if (single_succ_p (e->src))
2280 if (!single_pred_p (e->src))
2281 return false;
2282 if (!first_bb)
2283 first_bb = single_pred (e->src);
2284 else if (single_pred (e->src) != first_bb)
2285 return false;
2287 else
2289 if (!first_bb)
2290 first_bb = e->src;
2291 else if (e->src != first_bb)
2292 return false;
2296 if (!first_bb)
2297 return false;
2299 stmt = last_stmt (first_bb);
2300 if (!stmt
2301 || gimple_code (stmt) != GIMPLE_COND
2302 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2303 return false;
2305 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2306 gimple_cond_lhs (stmt),
2307 nonconstant_names);
2308 if (*p == true)
2309 return false;
2310 else
2311 return true;
2314 /* Given a PHI statement in a function described by inline properties SUMMARY
2315 and *P being the predicate describing whether the selected PHI argument is
2316 known, store a predicate for the result of the PHI statement into
2317 NONCONSTANT_NAMES, if possible. */
2319 static void
2320 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2321 predicate *p,
2322 vec<predicate> nonconstant_names)
2324 unsigned i;
2326 for (i = 0; i < gimple_phi_num_args (phi); i++)
2328 tree arg = gimple_phi_arg (phi, i)->def;
2329 if (!is_gimple_min_invariant (arg))
2331 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2332 *p = p->or_with (summary->conds,
2333 nonconstant_names[SSA_NAME_VERSION (arg)]);
2334 if (*p == true)
2335 return;
2339 if (dump_file && (dump_flags & TDF_DETAILS))
2341 fprintf (dump_file, "\t\tphi predicate: ");
2342 p->dump (dump_file, summary->conds);
2344 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2347 /* For a typical usage of __builtin_expect (a<b, 1), we
2348 may introduce an extra relation stmt:
2349 With the builtin, we have
2350 t1 = a <= b;
2351 t2 = (long int) t1;
2352 t3 = __builtin_expect (t2, 1);
2353 if (t3 != 0)
2354 goto ...
2355 Without the builtin, we have
2356 if (a<=b)
2357 goto...
2358 This affects the size/time estimation and may have
2359 an impact on the earlier inlining.
2360 Here find this pattern and fix it up later. */
2362 static gimple *
2363 find_foldable_builtin_expect (basic_block bb)
2365 gimple_stmt_iterator bsi;
2367 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2369 gimple *stmt = gsi_stmt (bsi);
2370 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2371 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2372 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2374 tree var = gimple_call_lhs (stmt);
2375 tree arg = gimple_call_arg (stmt, 0);
2376 use_operand_p use_p;
2377 gimple *use_stmt;
2378 bool match = false;
2379 bool done = false;
2381 if (!var || !arg)
2382 continue;
2383 gcc_assert (TREE_CODE (var) == SSA_NAME);
2385 while (TREE_CODE (arg) == SSA_NAME)
2387 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2388 if (!is_gimple_assign (stmt_tmp))
2389 break;
2390 switch (gimple_assign_rhs_code (stmt_tmp))
2392 case LT_EXPR:
2393 case LE_EXPR:
2394 case GT_EXPR:
2395 case GE_EXPR:
2396 case EQ_EXPR:
2397 case NE_EXPR:
2398 match = true;
2399 done = true;
2400 break;
2401 CASE_CONVERT:
2402 break;
2403 default:
2404 done = true;
2405 break;
2407 if (done)
2408 break;
2409 arg = gimple_assign_rhs1 (stmt_tmp);
2412 if (match && single_imm_use (var, &use_p, &use_stmt)
2413 && gimple_code (use_stmt) == GIMPLE_COND)
2414 return use_stmt;
2417 return NULL;
2420 /* Return true when the basic blocks contains only clobbers followed by RESX.
2421 Such BBs are kept around to make removal of dead stores possible with
2422 presence of EH and will be optimized out by optimize_clobbers later in the
2423 game.
2425 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2426 that can be clobber only, too.. When it is false, the RESX is not necessary
2427 on the end of basic block. */
2429 static bool
2430 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2432 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2433 edge_iterator ei;
2434 edge e;
2436 if (need_eh)
2438 if (gsi_end_p (gsi))
2439 return false;
2440 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2441 return false;
2442 gsi_prev (&gsi);
2444 else if (!single_succ_p (bb))
2445 return false;
2447 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2449 gimple *stmt = gsi_stmt (gsi);
2450 if (is_gimple_debug (stmt))
2451 continue;
2452 if (gimple_clobber_p (stmt))
2453 continue;
2454 if (gimple_code (stmt) == GIMPLE_LABEL)
2455 break;
2456 return false;
2459 /* See if all predecessors are either throws or clobber only BBs. */
2460 FOR_EACH_EDGE (e, ei, bb->preds)
2461 if (!(e->flags & EDGE_EH)
2462 && !clobber_only_eh_bb_p (e->src, false))
2463 return false;
2465 return true;
2468 /* Return true if STMT compute a floating point expression that may be affected
2469 by -ffast-math and similar flags. */
2471 static bool
2472 fp_expression_p (gimple *stmt)
2474 ssa_op_iter i;
2475 tree op;
2477 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2478 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2479 return true;
2480 return false;
2483 /* Return true if T references memory location that is local
2484 for the function (that means, dead after return) or read-only. */
2486 bool
2487 refs_local_or_readonly_memory_p (tree t)
2489 /* Non-escaping memory is fine. */
2490 t = get_base_address (t);
2491 if ((TREE_CODE (t) == MEM_REF
2492 || TREE_CODE (t) == TARGET_MEM_REF))
2493 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2495 /* Automatic variables are fine. */
2496 if (DECL_P (t)
2497 && auto_var_in_fn_p (t, current_function_decl))
2498 return true;
2500 /* Read-only variables are fine. */
2501 if (DECL_P (t) && TREE_READONLY (t))
2502 return true;
2504 return false;
2507 /* Return true if T is a pointer pointing to memory location that is local
2508 for the function (that means, dead after return) or read-only. */
2510 bool
2511 points_to_local_or_readonly_memory_p (tree t)
2513 /* See if memory location is clearly invalid. */
2514 if (integer_zerop (t))
2515 return flag_delete_null_pointer_checks;
2516 if (TREE_CODE (t) == SSA_NAME)
2517 return !ptr_deref_may_alias_global_p (t);
2518 if (TREE_CODE (t) == ADDR_EXPR)
2519 return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2520 return false;
2524 /* Analyze function body for NODE.
2525 EARLY indicates run from early optimization pipeline. */
2527 static void
2528 analyze_function_body (struct cgraph_node *node, bool early)
2530 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2531 /* Estimate static overhead for function prologue/epilogue and alignment. */
2532 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2533 /* Benefits are scaled by probability of elimination that is in range
2534 <0,2>. */
2535 basic_block bb;
2536 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2537 sreal freq;
2538 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2539 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2540 predicate bb_predicate;
2541 struct ipa_func_body_info fbi;
2542 vec<predicate> nonconstant_names = vNULL;
2543 int nblocks, n;
2544 int *order;
2545 gimple *fix_builtin_expect_stmt;
2547 gcc_assert (my_function && my_function->cfg);
2548 gcc_assert (cfun == my_function);
2550 memset(&fbi, 0, sizeof(fbi));
2551 vec_free (info->conds);
2552 info->conds = NULL;
2553 vec_free (info->size_time_table);
2554 info->size_time_table = NULL;
2556 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2557 so we can produce proper inline hints.
2559 When optimizing and analyzing for early inliner, initialize node params
2560 so we can produce correct BB predicates. */
2562 if (opt_for_fn (node->decl, optimize))
2564 calculate_dominance_info (CDI_DOMINATORS);
2565 calculate_dominance_info (CDI_POST_DOMINATORS);
2566 if (!early)
2567 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2568 else
2570 ipa_check_create_node_params ();
2571 ipa_initialize_node_params (node);
2574 if (ipa_node_params_sum)
2576 fbi.node = node;
2577 fbi.info = IPA_NODE_REF (node);
2578 fbi.bb_infos = vNULL;
2579 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2580 fbi.param_count = count_formal_params (node->decl);
2581 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2583 nonconstant_names.safe_grow_cleared
2584 (SSANAMES (my_function)->length (), true);
2588 if (dump_file)
2589 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2590 node->dump_name ());
2592 /* When we run into maximal number of entries, we assign everything to the
2593 constant truth case. Be sure to have it in list. */
2594 bb_predicate = true;
2595 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2597 bb_predicate = predicate::not_inlined ();
2598 info->account_size_time (opt_for_fn (node->decl,
2599 param_uninlined_function_insns)
2600 * ipa_fn_summary::size_scale,
2601 opt_for_fn (node->decl,
2602 param_uninlined_function_time),
2603 bb_predicate,
2604 bb_predicate);
2606 if (fbi.info)
2607 compute_bb_predicates (&fbi, node, info, params_summary);
2608 const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2609 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2610 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2611 for (n = 0; n < nblocks; n++)
2613 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2614 freq = bb->count.to_sreal_scale (entry_count);
2615 if (clobber_only_eh_bb_p (bb))
2617 if (dump_file && (dump_flags & TDF_DETAILS))
2618 fprintf (dump_file, "\n Ignoring BB %i;"
2619 " it will be optimized away by cleanup_clobbers\n",
2620 bb->index);
2621 continue;
2624 /* TODO: Obviously predicates can be propagated down across CFG. */
2625 if (fbi.info)
2627 if (bb->aux)
2628 bb_predicate = *(predicate *) bb->aux;
2629 else
2630 bb_predicate = false;
2632 else
2633 bb_predicate = true;
2635 if (dump_file && (dump_flags & TDF_DETAILS))
2637 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2638 bb_predicate.dump (dump_file, info->conds);
2641 if (fbi.info && nonconstant_names.exists ())
2643 predicate phi_predicate;
2644 bool first_phi = true;
2646 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2647 gsi_next (&bsi))
2649 if (first_phi
2650 && !phi_result_unknown_predicate (&fbi, info,
2651 params_summary,
2653 &phi_predicate,
2654 nonconstant_names))
2655 break;
2656 first_phi = false;
2657 if (dump_file && (dump_flags & TDF_DETAILS))
2659 fprintf (dump_file, " ");
2660 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2662 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2663 nonconstant_names);
2667 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2669 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2670 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2672 gimple *stmt = gsi_stmt (bsi);
2673 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2674 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2675 int prob;
2676 predicate will_be_nonconstant;
2678 /* This relation stmt should be folded after we remove
2679 __builtin_expect call. Adjust the cost here. */
2680 if (stmt == fix_builtin_expect_stmt)
2682 this_size--;
2683 this_time--;
2686 if (dump_file && (dump_flags & TDF_DETAILS))
2688 fprintf (dump_file, " ");
2689 print_gimple_stmt (dump_file, stmt, 0);
2690 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2691 freq.to_double (), this_size,
2692 this_time);
2695 if (is_gimple_call (stmt)
2696 && !gimple_call_internal_p (stmt))
2698 struct cgraph_edge *edge = node->get_edge (stmt);
2699 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2701 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2702 resolved as constant. We however don't want to optimize
2703 out the cgraph edges. */
2704 if (nonconstant_names.exists ()
2705 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2706 && gimple_call_lhs (stmt)
2707 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2709 predicate false_p = false;
2710 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2711 = false_p;
2713 if (ipa_node_params_sum)
2715 int count = gimple_call_num_args (stmt);
2716 int i;
2718 if (count)
2719 es->param.safe_grow_cleared (count, true);
2720 for (i = 0; i < count; i++)
2722 int prob = param_change_prob (&fbi, stmt, i);
2723 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2724 es->param[i].change_prob = prob;
2725 es->param[i].points_to_local_or_readonly_memory
2726 = points_to_local_or_readonly_memory_p
2727 (gimple_call_arg (stmt, i));
2731 es->call_stmt_size = this_size;
2732 es->call_stmt_time = this_time;
2733 es->loop_depth = bb_loop_depth (bb);
2734 edge_set_predicate (edge, &bb_predicate);
2735 if (edge->speculative)
2737 cgraph_edge *indirect
2738 = edge->speculative_call_indirect_edge ();
2739 ipa_call_summary *es2
2740 = ipa_call_summaries->get_create (indirect);
2741 ipa_call_summaries->duplicate (edge, indirect,
2742 es, es2);
2744 /* Edge is the first direct call.
2745 create and duplicate call summaries for multiple
2746 speculative call targets. */
2747 for (cgraph_edge *direct
2748 = edge->next_speculative_call_target ();
2749 direct;
2750 direct = direct->next_speculative_call_target ())
2752 ipa_call_summary *es3
2753 = ipa_call_summaries->get_create (direct);
2754 ipa_call_summaries->duplicate (edge, direct,
2755 es, es3);
2760 /* TODO: When conditional jump or switch is known to be constant, but
2761 we did not translate it into the predicates, we really can account
2762 just maximum of the possible paths. */
2763 if (fbi.info)
2764 will_be_nonconstant
2765 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2766 stmt, nonconstant_names);
2767 else
2768 will_be_nonconstant = true;
2769 if (this_time || this_size)
2771 sreal final_time = (sreal)this_time * freq;
2773 prob = eliminated_by_inlining_prob (&fbi, stmt);
2774 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2775 fprintf (dump_file,
2776 "\t\t50%% will be eliminated by inlining\n");
2777 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2778 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2780 class predicate p = bb_predicate & will_be_nonconstant;
2782 /* We can ignore statement when we proved it is never going
2783 to happen, but we cannot do that for call statements
2784 because edges are accounted specially. */
2786 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2788 time += final_time;
2789 size += this_size;
2792 /* We account everything but the calls. Calls have their own
2793 size/time info attached to cgraph edges. This is necessary
2794 in order to make the cost disappear after inlining. */
2795 if (!is_gimple_call (stmt))
2797 if (prob)
2799 predicate ip = bb_predicate & predicate::not_inlined ();
2800 info->account_size_time (this_size * prob,
2801 (final_time * prob) / 2, ip,
2804 if (prob != 2)
2805 info->account_size_time (this_size * (2 - prob),
2806 (final_time * (2 - prob) / 2),
2807 bb_predicate,
2811 if (!info->fp_expressions && fp_expression_p (stmt))
2813 info->fp_expressions = true;
2814 if (dump_file)
2815 fprintf (dump_file, " fp_expression set\n");
2819 /* Account cost of address calculations in the statements. */
2820 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2822 for (tree op = gimple_op (stmt, i);
2823 op && handled_component_p (op);
2824 op = TREE_OPERAND (op, 0))
2825 if ((TREE_CODE (op) == ARRAY_REF
2826 || TREE_CODE (op) == ARRAY_RANGE_REF)
2827 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2829 predicate p = bb_predicate;
2830 if (fbi.info)
2831 p = p & will_be_nonconstant_expr_predicate
2832 (&fbi, info, params_summary,
2833 TREE_OPERAND (op, 1),
2834 nonconstant_names);
2835 if (p != false)
2837 time += freq;
2838 size += 1;
2839 if (dump_file)
2840 fprintf (dump_file,
2841 "\t\tAccounting address calculation.\n");
2842 info->account_size_time (ipa_fn_summary::size_scale,
2843 freq,
2844 bb_predicate,
2852 free (order);
2854 if (nonconstant_names.exists () && !early)
2856 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2857 class loop *loop;
2858 unsigned max_loop_predicates = opt_for_fn (node->decl,
2859 param_ipa_max_loop_predicates);
2861 if (dump_file && (dump_flags & TDF_DETAILS))
2862 flow_loops_dump (dump_file, NULL, 0);
2863 scev_initialize ();
2864 FOR_EACH_LOOP (loop, 0)
2866 predicate loop_iterations = true;
2867 sreal header_freq;
2868 edge ex;
2869 unsigned int j;
2870 class tree_niter_desc niter_desc;
2871 if (!loop->header->aux)
2872 continue;
2874 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2875 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2877 bb_predicate = *(predicate *) loop->header->aux;
2878 auto_vec<edge> exits = get_loop_exit_edges (loop);
2879 FOR_EACH_VEC_ELT (exits, j, ex)
2880 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2881 && !is_gimple_min_invariant (niter_desc.niter))
2883 predicate will_be_nonconstant
2884 = will_be_nonconstant_expr_predicate (&fbi, info,
2885 params_summary,
2886 niter_desc.niter,
2887 nonconstant_names);
2888 if (will_be_nonconstant != true)
2889 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2890 if (will_be_nonconstant != true
2891 && will_be_nonconstant != false)
2892 loop_iterations &= will_be_nonconstant;
2894 add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
2895 phdr_freq, max_loop_predicates);
2898 /* To avoid quadratic behavior we analyze stride predicates only
2899 with respect to the containing loop. Thus we simply iterate
2900 over all defs in the outermost loop body. */
2901 for (loop = loops_for_fn (cfun)->tree_root->inner;
2902 loop != NULL; loop = loop->next)
2904 predicate loop_stride = true;
2905 basic_block *body = get_loop_body (loop);
2906 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2907 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2908 for (unsigned i = 0; i < loop->num_nodes; i++)
2910 gimple_stmt_iterator gsi;
2911 if (!body[i]->aux)
2912 continue;
2914 bb_predicate = *(predicate *) body[i]->aux;
2915 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2916 gsi_next (&gsi))
2918 gimple *stmt = gsi_stmt (gsi);
2920 if (!is_gimple_assign (stmt))
2921 continue;
2923 tree def = gimple_assign_lhs (stmt);
2924 if (TREE_CODE (def) != SSA_NAME)
2925 continue;
2927 affine_iv iv;
2928 if (!simple_iv (loop_containing_stmt (stmt),
2929 loop_containing_stmt (stmt),
2930 def, &iv, true)
2931 || is_gimple_min_invariant (iv.step))
2932 continue;
2934 predicate will_be_nonconstant
2935 = will_be_nonconstant_expr_predicate (&fbi, info,
2936 params_summary,
2937 iv.step,
2938 nonconstant_names);
2939 if (will_be_nonconstant != true)
2940 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2941 if (will_be_nonconstant != true
2942 && will_be_nonconstant != false)
2943 loop_stride = loop_stride & will_be_nonconstant;
2946 add_freqcounting_predicate (&s->loop_strides, loop_stride,
2947 phdr_freq, max_loop_predicates);
2948 free (body);
2950 scev_finalize ();
2952 FOR_ALL_BB_FN (bb, my_function)
2954 edge e;
2955 edge_iterator ei;
2957 if (bb->aux)
2958 edge_predicate_pool.remove ((predicate *)bb->aux);
2959 bb->aux = NULL;
2960 FOR_EACH_EDGE (e, ei, bb->succs)
2962 if (e->aux)
2963 edge_predicate_pool.remove ((predicate *) e->aux);
2964 e->aux = NULL;
2967 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2968 ipa_size_summary *ss = ipa_size_summaries->get (node);
2969 s->time = time;
2970 ss->self_size = size;
2971 nonconstant_names.release ();
2972 ipa_release_body_info (&fbi);
2973 if (opt_for_fn (node->decl, optimize))
2975 if (!early)
2976 loop_optimizer_finalize ();
2977 else if (!ipa_edge_args_sum)
2978 ipa_free_all_node_params ();
2979 free_dominance_info (CDI_DOMINATORS);
2980 free_dominance_info (CDI_POST_DOMINATORS);
2982 if (dump_file)
2984 fprintf (dump_file, "\n");
2985 ipa_dump_fn_summary (dump_file, node);
2990 /* Compute function summary.
2991 EARLY is true when we compute parameters during early opts. */
2993 void
2994 compute_fn_summary (struct cgraph_node *node, bool early)
2996 HOST_WIDE_INT self_stack_size;
2997 struct cgraph_edge *e;
2999 gcc_assert (!node->inlined_to);
3001 if (!ipa_fn_summaries)
3002 ipa_fn_summary_alloc ();
3004 /* Create a new ipa_fn_summary. */
3005 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
3006 ipa_fn_summaries->remove (node);
3007 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3008 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
3010 /* Estimate the stack size for the function if we're optimizing. */
3011 self_stack_size = optimize && !node->thunk.thunk_p
3012 ? estimated_stack_frame_size (node) : 0;
3013 size_info->estimated_self_stack_size = self_stack_size;
3014 info->estimated_stack_size = self_stack_size;
3016 if (node->thunk.thunk_p)
3018 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
3019 predicate t = true;
3021 node->can_change_signature = false;
3022 es->call_stmt_size = eni_size_weights.call_cost;
3023 es->call_stmt_time = eni_time_weights.call_cost;
3024 info->account_size_time (ipa_fn_summary::size_scale
3025 * opt_for_fn (node->decl,
3026 param_uninlined_function_thunk_insns),
3027 opt_for_fn (node->decl,
3028 param_uninlined_function_thunk_time), t, t);
3029 t = predicate::not_inlined ();
3030 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
3031 ipa_update_overall_fn_summary (node);
3032 size_info->self_size = size_info->size;
3033 if (stdarg_p (TREE_TYPE (node->decl)))
3035 info->inlinable = false;
3036 node->callees->inline_failed = CIF_VARIADIC_THUNK;
3038 else
3039 info->inlinable = true;
3041 else
3043 /* Even is_gimple_min_invariant rely on current_function_decl. */
3044 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3046 /* During IPA profile merging we may be called w/o virtual SSA form
3047 built. */
3048 update_ssa (TODO_update_ssa_only_virtuals);
3050 /* Can this function be inlined at all? */
3051 if (!opt_for_fn (node->decl, optimize)
3052 && !lookup_attribute ("always_inline",
3053 DECL_ATTRIBUTES (node->decl)))
3054 info->inlinable = false;
3055 else
3056 info->inlinable = tree_inlinable_function_p (node->decl);
3058 /* Type attributes can use parameter indices to describe them. */
3059 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
3060 /* Likewise for #pragma omp declare simd functions or functions
3061 with simd attribute. */
3062 || lookup_attribute ("omp declare simd",
3063 DECL_ATTRIBUTES (node->decl)))
3064 node->can_change_signature = false;
3065 else
3067 /* Otherwise, inlinable functions always can change signature. */
3068 if (info->inlinable)
3069 node->can_change_signature = true;
3070 else
3072 /* Functions calling builtin_apply cannot change signature. */
3073 for (e = node->callees; e; e = e->next_callee)
3075 tree cdecl = e->callee->decl;
3076 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
3077 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
3078 break;
3080 node->can_change_signature = !e;
3083 analyze_function_body (node, early);
3084 pop_cfun ();
3087 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3088 size_info->size = size_info->self_size;
3089 info->estimated_stack_size = size_info->estimated_self_stack_size;
3091 /* Code above should compute exactly the same result as
3092 ipa_update_overall_fn_summary but because computation happens in
3093 different order the roundoff errors result in slight changes. */
3094 ipa_update_overall_fn_summary (node);
3095 /* In LTO mode we may have speculative edges set. */
3096 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
3100 /* Compute parameters of functions used by inliner using
3101 current_function_decl. */
3103 static unsigned int
3104 compute_fn_summary_for_current (void)
3106 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3107 return 0;
3110 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3111 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3112 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3114 static bool
3115 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3116 int *size, int *time,
3117 ipa_call_arg_values *avals)
3119 tree target;
3120 struct cgraph_node *callee;
3121 class ipa_fn_summary *isummary;
3122 enum availability avail;
3123 bool speculative;
3125 if (!avals
3126 || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ()))
3127 return false;
3128 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3129 return false;
3131 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3132 if (!target || speculative)
3133 return false;
3135 /* Account for difference in cost between indirect and direct calls. */
3136 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3137 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3138 gcc_checking_assert (*time >= 0);
3139 gcc_checking_assert (*size >= 0);
3141 callee = cgraph_node::get (target);
3142 if (!callee || !callee->definition)
3143 return false;
3144 callee = callee->function_symbol (&avail);
3145 if (avail < AVAIL_AVAILABLE)
3146 return false;
3147 isummary = ipa_fn_summaries->get (callee);
3148 if (isummary == NULL)
3149 return false;
3151 return isummary->inlinable;
3154 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3155 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3156 devirtualized. AVALS, if non-NULL, describes the context of the call site
3157 as far as values of parameters are concerened. */
3159 static inline void
3160 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3161 sreal *time, ipa_call_arg_values *avals,
3162 ipa_hints *hints)
3164 class ipa_call_summary *es = ipa_call_summaries->get (e);
3165 int call_size = es->call_stmt_size;
3166 int call_time = es->call_stmt_time;
3167 int cur_size;
3169 if (!e->callee && hints && e->maybe_hot_p ()
3170 && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals))
3171 *hints |= INLINE_HINT_indirect_call;
3172 cur_size = call_size * ipa_fn_summary::size_scale;
3173 *size += cur_size;
3174 if (min_size)
3175 *min_size += cur_size;
3176 if (time)
3177 *time += ((sreal)call_time) * e->sreal_frequency ();
3181 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3182 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3183 site.
3185 Helper for estimate_calls_size_and_time which does the same but
3186 (in most cases) faster. */
3188 static void
3189 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3190 int *min_size, sreal *time,
3191 ipa_hints *hints,
3192 clause_t possible_truths,
3193 ipa_call_arg_values *avals)
3195 struct cgraph_edge *e;
3196 for (e = node->callees; e; e = e->next_callee)
3198 if (!e->inline_failed)
3200 gcc_checking_assert (!ipa_call_summaries->get (e));
3201 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3202 hints, possible_truths, avals);
3204 continue;
3206 class ipa_call_summary *es = ipa_call_summaries->get (e);
3208 /* Do not care about zero sized builtins. */
3209 if (!es->call_stmt_size)
3211 gcc_checking_assert (!es->call_stmt_time);
3212 continue;
3214 if (!es->predicate
3215 || es->predicate->evaluate (possible_truths))
3217 /* Predicates of calls shall not use NOT_CHANGED codes,
3218 so we do not need to compute probabilities. */
3219 estimate_edge_size_and_time (e, size,
3220 es->predicate ? NULL : min_size,
3221 time, avals, hints);
3224 for (e = node->indirect_calls; e; e = e->next_callee)
3226 class ipa_call_summary *es = ipa_call_summaries->get (e);
3227 if (!es->predicate
3228 || es->predicate->evaluate (possible_truths))
3229 estimate_edge_size_and_time (e, size,
3230 es->predicate ? NULL : min_size,
3231 time, avals, hints);
3235 /* Populate sum->call_size_time_table for edges from NODE. */
3237 static void
3238 summarize_calls_size_and_time (struct cgraph_node *node,
3239 ipa_fn_summary *sum)
3241 struct cgraph_edge *e;
3242 for (e = node->callees; e; e = e->next_callee)
3244 if (!e->inline_failed)
3246 gcc_checking_assert (!ipa_call_summaries->get (e));
3247 summarize_calls_size_and_time (e->callee, sum);
3248 continue;
3250 int size = 0;
3251 sreal time = 0;
3253 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3255 struct predicate pred = true;
3256 class ipa_call_summary *es = ipa_call_summaries->get (e);
3258 if (es->predicate)
3259 pred = *es->predicate;
3260 sum->account_size_time (size, time, pred, pred, true);
3262 for (e = node->indirect_calls; e; e = e->next_callee)
3264 int size = 0;
3265 sreal time = 0;
3267 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3268 struct predicate pred = true;
3269 class ipa_call_summary *es = ipa_call_summaries->get (e);
3271 if (es->predicate)
3272 pred = *es->predicate;
3273 sum->account_size_time (size, time, pred, pred, true);
3277 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3278 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3279 context of the call site. */
3281 static void
3282 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3283 int *min_size, sreal *time,
3284 ipa_hints *hints,
3285 clause_t possible_truths,
3286 ipa_call_arg_values *avals)
3288 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3289 bool use_table = true;
3291 gcc_assert (node->callees || node->indirect_calls);
3293 /* During early inlining we do not calculate info for very
3294 large functions and thus there is no need for producing
3295 summaries. */
3296 if (!ipa_node_params_sum)
3297 use_table = false;
3298 /* Do not calculate summaries for simple wrappers; it is waste
3299 of memory. */
3300 else if (node->callees && node->indirect_calls
3301 && node->callees->inline_failed && !node->callees->next_callee)
3302 use_table = false;
3303 /* If there is an indirect edge that may be optimized, we need
3304 to go the slow way. */
3305 else if (avals && hints
3306 && (avals->m_known_vals.length ()
3307 || avals->m_known_contexts.length ()
3308 || avals->m_known_aggs.length ()))
3310 class ipa_node_params *params_summary = IPA_NODE_REF (node);
3311 unsigned int nargs = params_summary
3312 ? ipa_get_param_count (params_summary) : 0;
3314 for (unsigned int i = 0; i < nargs && use_table; i++)
3316 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3317 && (avals->safe_sval_at (i)
3318 || (avals->m_known_aggs.length () > i
3319 && avals->m_known_aggs[i].items.length ())))
3320 use_table = false;
3321 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3322 && (avals->m_known_contexts.length () > i
3323 && !avals->m_known_contexts[i].useless_p ()))
3324 use_table = false;
3328 /* Fast path is via the call size time table. */
3329 if (use_table)
3331 /* Build summary if it is absent. */
3332 if (!sum->call_size_time_table)
3334 predicate true_pred = true;
3335 sum->account_size_time (0, 0, true_pred, true_pred, true);
3336 summarize_calls_size_and_time (node, sum);
3339 int old_size = *size;
3340 sreal old_time = time ? *time : 0;
3342 if (min_size)
3343 *min_size += (*sum->call_size_time_table)[0].size;
3345 unsigned int i;
3346 size_time_entry *e;
3348 /* Walk the table and account sizes and times. */
3349 for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
3350 i++)
3351 if (e->exec_predicate.evaluate (possible_truths))
3353 *size += e->size;
3354 if (time)
3355 *time += e->time;
3358 /* Be careful and see if both methods agree. */
3359 if ((flag_checking || dump_file)
3360 /* Do not try to sanity check when we know we lost some
3361 precision. */
3362 && sum->call_size_time_table->length ()
3363 < ipa_fn_summary::max_size_time_table_size)
3365 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3366 possible_truths, avals);
3367 gcc_assert (*size == old_size);
3368 if (time && (*time - old_time > 1 || *time - old_time < -1)
3369 && dump_file)
3370 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3371 old_time.to_double (),
3372 time->to_double ());
3375 /* Slow path by walking all edges. */
3376 else
3377 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3378 possible_truths, avals);
3381 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3382 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3383 caller. */
3385 ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths,
3386 clause_t nonspec_possible_truths,
3387 vec<inline_param_summary>
3388 inline_param_summary,
3389 ipa_auto_call_arg_values *arg_values)
3390 : m_node (node), m_possible_truths (possible_truths),
3391 m_nonspec_possible_truths (nonspec_possible_truths),
3392 m_inline_param_summary (inline_param_summary),
3393 m_avals (arg_values)
3397 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3399 void
3400 ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx)
3402 m_node = ctx.m_node;
3403 m_possible_truths = ctx.m_possible_truths;
3404 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3405 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3406 unsigned int nargs = params_summary
3407 ? ipa_get_param_count (params_summary) : 0;
3409 m_inline_param_summary = vNULL;
3410 /* Copy the info only if there is at least one useful entry. */
3411 if (ctx.m_inline_param_summary.exists ())
3413 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3415 for (unsigned int i = 0; i < n; i++)
3416 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3417 && !ctx.m_inline_param_summary[i].useless_p ())
3419 m_inline_param_summary
3420 = ctx.m_inline_param_summary.copy ();
3421 break;
3424 m_avals.m_known_vals = vNULL;
3425 if (ctx.m_avals.m_known_vals.exists ())
3427 unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs);
3429 for (unsigned int i = 0; i < n; i++)
3430 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3431 && ctx.m_avals.m_known_vals[i])
3433 m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy ();
3434 break;
3438 m_avals.m_known_contexts = vNULL;
3439 if (ctx.m_avals.m_known_contexts.exists ())
3441 unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs);
3443 for (unsigned int i = 0; i < n; i++)
3444 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3445 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3447 m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy ();
3448 break;
3452 m_avals.m_known_aggs = vNULL;
3453 if (ctx.m_avals.m_known_aggs.exists ())
3455 unsigned int n = MIN (ctx.m_avals.m_known_aggs.length (), nargs);
3457 for (unsigned int i = 0; i < n; i++)
3458 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3459 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3461 m_avals.m_known_aggs
3462 = ipa_copy_agg_values (ctx.m_avals.m_known_aggs);
3463 break;
3467 m_avals.m_known_value_ranges = vNULL;
3470 /* Release memory used by known_vals/contexts/aggs vectors. and
3471 inline_param_summary. */
3473 void
3474 ipa_cached_call_context::release ()
3476 /* See if context is initialized at first place. */
3477 if (!m_node)
3478 return;
3479 ipa_release_agg_values (m_avals.m_known_aggs, true);
3480 m_avals.m_known_vals.release ();
3481 m_avals.m_known_contexts.release ();
3482 m_inline_param_summary.release ();
3485 /* Return true if CTX describes the same call context as THIS. */
3487 bool
3488 ipa_call_context::equal_to (const ipa_call_context &ctx)
3490 if (m_node != ctx.m_node
3491 || m_possible_truths != ctx.m_possible_truths
3492 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3493 return false;
3495 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3496 unsigned int nargs = params_summary
3497 ? ipa_get_param_count (params_summary) : 0;
3499 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3501 for (unsigned int i = 0; i < nargs; i++)
3503 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3504 continue;
3505 if (i >= m_inline_param_summary.length ()
3506 || m_inline_param_summary[i].useless_p ())
3508 if (i < ctx.m_inline_param_summary.length ()
3509 && !ctx.m_inline_param_summary[i].useless_p ())
3510 return false;
3511 continue;
3513 if (i >= ctx.m_inline_param_summary.length ()
3514 || ctx.m_inline_param_summary[i].useless_p ())
3516 if (i < m_inline_param_summary.length ()
3517 && !m_inline_param_summary[i].useless_p ())
3518 return false;
3519 continue;
3521 if (!m_inline_param_summary[i].equal_to
3522 (ctx.m_inline_param_summary[i]))
3523 return false;
3526 if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ())
3528 for (unsigned int i = 0; i < nargs; i++)
3530 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3531 continue;
3532 if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i])
3534 if (i < ctx.m_avals.m_known_vals.length ()
3535 && ctx.m_avals.m_known_vals[i])
3536 return false;
3537 continue;
3539 if (i >= ctx.m_avals.m_known_vals.length ()
3540 || !ctx.m_avals.m_known_vals[i])
3542 if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i])
3543 return false;
3544 continue;
3546 if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i])
3547 return false;
3550 if (m_avals.m_known_contexts.exists ()
3551 || ctx.m_avals.m_known_contexts.exists ())
3553 for (unsigned int i = 0; i < nargs; i++)
3555 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3556 continue;
3557 if (i >= m_avals.m_known_contexts.length ()
3558 || m_avals.m_known_contexts[i].useless_p ())
3560 if (i < ctx.m_avals.m_known_contexts.length ()
3561 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3562 return false;
3563 continue;
3565 if (i >= ctx.m_avals.m_known_contexts.length ()
3566 || ctx.m_avals.m_known_contexts[i].useless_p ())
3568 if (i < m_avals.m_known_contexts.length ()
3569 && !m_avals.m_known_contexts[i].useless_p ())
3570 return false;
3571 continue;
3573 if (!m_avals.m_known_contexts[i].equal_to
3574 (ctx.m_avals.m_known_contexts[i]))
3575 return false;
3578 if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ())
3580 for (unsigned int i = 0; i < nargs; i++)
3582 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3583 continue;
3584 if (i >= m_avals.m_known_aggs.length ()
3585 || m_avals.m_known_aggs[i].is_empty ())
3587 if (i < ctx.m_avals.m_known_aggs.length ()
3588 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3589 return false;
3590 continue;
3592 if (i >= ctx.m_avals.m_known_aggs.length ()
3593 || ctx.m_avals.m_known_aggs[i].is_empty ())
3595 if (i < m_avals.m_known_aggs.length ()
3596 && !m_avals.m_known_aggs[i].is_empty ())
3597 return false;
3598 continue;
3600 if (!m_avals.m_known_aggs[i].equal_to (ctx.m_avals.m_known_aggs[i]))
3601 return false;
3604 return true;
3607 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3608 this context. Always compute size and min_size. Only compute time and
3609 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3610 is true. */
3612 void
3613 ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
3614 bool est_times, bool est_hints)
3616 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3617 size_time_entry *e;
3618 int size = 0;
3619 sreal time = 0;
3620 int min_size = 0;
3621 ipa_hints hints = 0;
3622 sreal loops_with_known_iterations = 0;
3623 sreal loops_with_known_strides = 0;
3624 int i;
3626 if (dump_file && (dump_flags & TDF_DETAILS))
3628 bool found = false;
3629 fprintf (dump_file, " Estimating body: %s\n"
3630 " Known to be false: ", m_node->dump_name ());
3632 for (i = predicate::not_inlined_condition;
3633 i < (predicate::first_dynamic_condition
3634 + (int) vec_safe_length (info->conds)); i++)
3635 if (!(m_possible_truths & (1 << i)))
3637 if (found)
3638 fprintf (dump_file, ", ");
3639 found = true;
3640 dump_condition (dump_file, info->conds, i);
3644 if (m_node->callees || m_node->indirect_calls)
3645 estimate_calls_size_and_time (m_node, &size, &min_size,
3646 est_times ? &time : NULL,
3647 est_hints ? &hints : NULL, m_possible_truths,
3648 &m_avals);
3650 sreal nonspecialized_time = time;
3652 min_size += (*info->size_time_table)[0].size;
3653 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3655 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3657 /* Because predicates are conservative, it can happen that nonconst is 1
3658 but exec is 0. */
3659 if (exec)
3661 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3663 gcc_checking_assert (e->time >= 0);
3664 gcc_checking_assert (time >= 0);
3666 /* We compute specialized size only because size of nonspecialized
3667 copy is context independent.
3669 The difference between nonspecialized execution and specialized is
3670 that nonspecialized is not going to have optimized out computations
3671 known to be constant in a specialized setting. */
3672 if (nonconst)
3673 size += e->size;
3674 if (!est_times)
3675 continue;
3676 nonspecialized_time += e->time;
3677 if (!nonconst)
3679 else if (!m_inline_param_summary.exists ())
3681 if (nonconst)
3682 time += e->time;
3684 else
3686 int prob = e->nonconst_predicate.probability
3687 (info->conds, m_possible_truths,
3688 m_inline_param_summary);
3689 gcc_checking_assert (prob >= 0);
3690 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3691 if (prob == REG_BR_PROB_BASE)
3692 time += e->time;
3693 else
3694 time += e->time * prob / REG_BR_PROB_BASE;
3696 gcc_checking_assert (time >= 0);
3699 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3700 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3701 gcc_checking_assert (min_size >= 0);
3702 gcc_checking_assert (size >= 0);
3703 gcc_checking_assert (time >= 0);
3704 /* nonspecialized_time should be always bigger than specialized time.
3705 Roundoff issues however may get into the way. */
3706 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3708 /* Roundoff issues may make specialized time bigger than nonspecialized
3709 time. We do not really want that to happen because some heuristics
3710 may get confused by seeing negative speedups. */
3711 if (time > nonspecialized_time)
3712 time = nonspecialized_time;
3714 if (est_hints)
3716 if (info->scc_no)
3717 hints |= INLINE_HINT_in_scc;
3718 if (DECL_DECLARED_INLINE_P (m_node->decl))
3719 hints |= INLINE_HINT_declared_inline;
3721 ipa_freqcounting_predicate *fcp;
3722 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
3723 if (!fcp->predicate->evaluate (m_possible_truths))
3725 hints |= INLINE_HINT_loop_iterations;
3726 loops_with_known_iterations += fcp->freq;
3728 estimates->loops_with_known_iterations = loops_with_known_iterations;
3730 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
3731 if (!fcp->predicate->evaluate (m_possible_truths))
3733 hints |= INLINE_HINT_loop_stride;
3734 loops_with_known_strides += fcp->freq;
3736 estimates->loops_with_known_strides = loops_with_known_strides;
3739 size = RDIV (size, ipa_fn_summary::size_scale);
3740 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3742 if (dump_file && (dump_flags & TDF_DETAILS))
3744 fprintf (dump_file, "\n size:%i", (int) size);
3745 if (est_times)
3746 fprintf (dump_file, " time:%f nonspec time:%f",
3747 time.to_double (), nonspecialized_time.to_double ());
3748 if (est_hints)
3749 fprintf (dump_file, " loops with known iterations:%f "
3750 "known strides:%f", loops_with_known_iterations.to_double (),
3751 loops_with_known_strides.to_double ());
3752 fprintf (dump_file, "\n");
3754 if (est_times)
3756 estimates->time = time;
3757 estimates->nonspecialized_time = nonspecialized_time;
3759 estimates->size = size;
3760 estimates->min_size = min_size;
3761 if (est_hints)
3762 estimates->hints = hints;
3763 return;
3767 /* Estimate size and time needed to execute callee of EDGE assuming that
3768 parameters known to be constant at caller of EDGE are propagated.
3769 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3770 and types for parameters. */
3772 void
3773 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3774 ipa_auto_call_arg_values *avals,
3775 ipa_call_estimates *estimates)
3777 clause_t clause, nonspec_clause;
3779 evaluate_conditions_for_known_args (node, false, avals, &clause,
3780 &nonspec_clause);
3781 ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
3782 ctx.estimate_size_and_time (estimates);
3785 /* Return stack frame offset where frame of NODE is supposed to start inside
3786 of the function it is inlined to.
3787 Return 0 for functions that are not inlined. */
3789 HOST_WIDE_INT
3790 ipa_get_stack_frame_offset (struct cgraph_node *node)
3792 HOST_WIDE_INT offset = 0;
3793 if (!node->inlined_to)
3794 return 0;
3795 node = node->callers->caller;
3796 while (true)
3798 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3799 if (!node->inlined_to)
3800 return offset;
3801 node = node->callers->caller;
3806 /* Update summary information of inline clones after inlining.
3807 Compute peak stack usage. */
3809 static void
3810 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3812 struct cgraph_edge *e;
3814 ipa_propagate_frequency (node);
3815 for (e = node->callees; e; e = e->next_callee)
3817 if (!e->inline_failed)
3818 inline_update_callee_summaries (e->callee, depth);
3819 else
3820 ipa_call_summaries->get (e)->loop_depth += depth;
3822 for (e = node->indirect_calls; e; e = e->next_callee)
3823 ipa_call_summaries->get (e)->loop_depth += depth;
3826 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3827 INLINED_EDGE has been inlined.
3829 When function A is inlined in B and A calls C with parameter that
3830 changes with probability PROB1 and C is known to be passthrough
3831 of argument if B that change with probability PROB2, the probability
3832 of change is now PROB1*PROB2. */
3834 static void
3835 remap_edge_params (struct cgraph_edge *inlined_edge,
3836 struct cgraph_edge *edge)
3838 if (ipa_node_params_sum)
3840 int i;
3841 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3842 if (!args)
3843 return;
3844 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3845 class ipa_call_summary *inlined_es
3846 = ipa_call_summaries->get (inlined_edge);
3848 if (es->param.length () == 0)
3849 return;
3851 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3853 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3854 if (jfunc->type == IPA_JF_PASS_THROUGH
3855 || jfunc->type == IPA_JF_ANCESTOR)
3857 int id = jfunc->type == IPA_JF_PASS_THROUGH
3858 ? ipa_get_jf_pass_through_formal_id (jfunc)
3859 : ipa_get_jf_ancestor_formal_id (jfunc);
3860 if (id < (int) inlined_es->param.length ())
3862 int prob1 = es->param[i].change_prob;
3863 int prob2 = inlined_es->param[id].change_prob;
3864 int prob = combine_probabilities (prob1, prob2);
3866 if (prob1 && prob2 && !prob)
3867 prob = 1;
3869 es->param[i].change_prob = prob;
3871 if (inlined_es
3872 ->param[id].points_to_local_or_readonly_memory)
3873 es->param[i].points_to_local_or_readonly_memory = true;
3875 if (!es->param[i].points_to_local_or_readonly_memory
3876 && jfunc->type == IPA_JF_CONST
3877 && points_to_local_or_readonly_memory_p
3878 (ipa_get_jf_constant (jfunc)))
3879 es->param[i].points_to_local_or_readonly_memory = true;
3885 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3887 Remap predicates of callees of NODE. Rest of arguments match
3888 remap_predicate.
3890 Also update change probabilities. */
3892 static void
3893 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3894 struct cgraph_node *node,
3895 class ipa_fn_summary *info,
3896 class ipa_node_params *params_summary,
3897 class ipa_fn_summary *callee_info,
3898 vec<int> operand_map,
3899 vec<HOST_WIDE_INT> offset_map,
3900 clause_t possible_truths,
3901 predicate *toplev_predicate)
3903 struct cgraph_edge *e, *next;
3904 for (e = node->callees; e; e = next)
3906 predicate p;
3907 next = e->next_callee;
3909 if (e->inline_failed)
3911 class ipa_call_summary *es = ipa_call_summaries->get (e);
3912 remap_edge_params (inlined_edge, e);
3914 if (es->predicate)
3916 p = es->predicate->remap_after_inlining
3917 (info, params_summary,
3918 callee_info, operand_map,
3919 offset_map, possible_truths,
3920 *toplev_predicate);
3921 edge_set_predicate (e, &p);
3923 else
3924 edge_set_predicate (e, toplev_predicate);
3926 else
3927 remap_edge_summaries (inlined_edge, e->callee, info,
3928 params_summary, callee_info,
3929 operand_map, offset_map, possible_truths,
3930 toplev_predicate);
3932 for (e = node->indirect_calls; e; e = next)
3934 class ipa_call_summary *es = ipa_call_summaries->get (e);
3935 predicate p;
3936 next = e->next_callee;
3938 remap_edge_params (inlined_edge, e);
3939 if (es->predicate)
3941 p = es->predicate->remap_after_inlining
3942 (info, params_summary,
3943 callee_info, operand_map, offset_map,
3944 possible_truths, *toplev_predicate);
3945 edge_set_predicate (e, &p);
3947 else
3948 edge_set_predicate (e, toplev_predicate);
3952 /* Run remap_after_inlining on each predicate in V. */
3954 static void
3955 remap_freqcounting_predicate (class ipa_fn_summary *info,
3956 class ipa_node_params *params_summary,
3957 class ipa_fn_summary *callee_info,
3958 vec<ipa_freqcounting_predicate, va_gc> *v,
3959 vec<int> operand_map,
3960 vec<HOST_WIDE_INT> offset_map,
3961 clause_t possible_truths,
3962 predicate *toplev_predicate)
3965 ipa_freqcounting_predicate *fcp;
3966 for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
3968 predicate p
3969 = fcp->predicate->remap_after_inlining (info, params_summary,
3970 callee_info, operand_map,
3971 offset_map, possible_truths,
3972 *toplev_predicate);
3973 if (p != false && p != true)
3974 *fcp->predicate &= p;
3978 /* We inlined EDGE. Update summary of the function we inlined into. */
3980 void
3981 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3983 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3984 struct cgraph_node *to = (edge->caller->inlined_to
3985 ? edge->caller->inlined_to : edge->caller);
3986 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3987 clause_t clause = 0; /* not_inline is known to be false. */
3988 size_time_entry *e;
3989 auto_vec<int, 8> operand_map;
3990 auto_vec<HOST_WIDE_INT, 8> offset_map;
3991 int i;
3992 predicate toplev_predicate;
3993 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3994 class ipa_node_params *params_summary = (ipa_node_params_sum
3995 ? IPA_NODE_REF (to) : NULL);
3997 if (es->predicate)
3998 toplev_predicate = *es->predicate;
3999 else
4000 toplev_predicate = true;
4002 info->fp_expressions |= callee_info->fp_expressions;
4004 if (callee_info->conds)
4006 ipa_auto_call_arg_values avals;
4007 evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false);
4009 if (ipa_node_params_sum && callee_info->conds)
4011 class ipa_edge_args *args = IPA_EDGE_REF (edge);
4012 int count = args ? ipa_get_cs_argument_count (args) : 0;
4013 int i;
4015 if (count)
4017 operand_map.safe_grow_cleared (count, true);
4018 offset_map.safe_grow_cleared (count, true);
4020 for (i = 0; i < count; i++)
4022 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4023 int map = -1;
4025 /* TODO: handle non-NOPs when merging. */
4026 if (jfunc->type == IPA_JF_PASS_THROUGH)
4028 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4029 map = ipa_get_jf_pass_through_formal_id (jfunc);
4030 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
4031 offset_map[i] = -1;
4033 else if (jfunc->type == IPA_JF_ANCESTOR)
4035 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
4036 if (offset >= 0 && offset < INT_MAX)
4038 map = ipa_get_jf_ancestor_formal_id (jfunc);
4039 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
4040 offset = -1;
4041 offset_map[i] = offset;
4044 operand_map[i] = map;
4045 gcc_assert (map < ipa_get_param_count (params_summary));
4048 sreal freq = edge->sreal_frequency ();
4049 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
4051 predicate p;
4052 p = e->exec_predicate.remap_after_inlining
4053 (info, params_summary,
4054 callee_info, operand_map,
4055 offset_map, clause,
4056 toplev_predicate);
4057 predicate nonconstp;
4058 nonconstp = e->nonconst_predicate.remap_after_inlining
4059 (info, params_summary,
4060 callee_info, operand_map,
4061 offset_map, clause,
4062 toplev_predicate);
4063 if (p != false && nonconstp != false)
4065 sreal add_time = ((sreal)e->time * freq);
4066 int prob = e->nonconst_predicate.probability (callee_info->conds,
4067 clause, es->param);
4068 if (prob != REG_BR_PROB_BASE)
4069 add_time = add_time * prob / REG_BR_PROB_BASE;
4070 if (prob != REG_BR_PROB_BASE
4071 && dump_file && (dump_flags & TDF_DETAILS))
4073 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
4074 (double) prob / REG_BR_PROB_BASE);
4076 info->account_size_time (e->size, add_time, p, nonconstp);
4079 remap_edge_summaries (edge, edge->callee, info, params_summary,
4080 callee_info, operand_map,
4081 offset_map, clause, &toplev_predicate);
4082 remap_freqcounting_predicate (info, params_summary, callee_info,
4083 info->loop_iterations, operand_map,
4084 offset_map, clause, &toplev_predicate);
4085 remap_freqcounting_predicate (info, params_summary, callee_info,
4086 info->loop_strides, operand_map,
4087 offset_map, clause, &toplev_predicate);
4089 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4090 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4092 if (info->estimated_stack_size < peak)
4093 info->estimated_stack_size = peak;
4095 inline_update_callee_summaries (edge->callee, es->loop_depth);
4096 if (info->call_size_time_table)
4098 int edge_size = 0;
4099 sreal edge_time = 0;
4101 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0);
4102 /* Unaccount size and time of the optimized out call. */
4103 info->account_size_time (-edge_size, -edge_time,
4104 es->predicate ? *es->predicate : true,
4105 es->predicate ? *es->predicate : true,
4106 true);
4107 /* Account new calls. */
4108 summarize_calls_size_and_time (edge->callee, info);
4111 /* Free summaries that are not maintained for inline clones/edges. */
4112 ipa_call_summaries->remove (edge);
4113 ipa_fn_summaries->remove (edge->callee);
4114 ipa_remove_from_growth_caches (edge);
4117 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4118 overall size and time. Recompute it.
4119 If RESET is true also recompute call_time_size_table. */
4121 void
4122 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4124 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4125 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4126 size_time_entry *e;
4127 int i;
4129 size_info->size = 0;
4130 info->time = 0;
4131 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4133 size_info->size += e->size;
4134 info->time += e->time;
4136 info->min_size = (*info->size_time_table)[0].size;
4137 if (reset)
4138 vec_free (info->call_size_time_table);
4139 if (node->callees || node->indirect_calls)
4140 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4141 &info->time, NULL,
4142 ~(clause_t) (1 << predicate::false_condition),
4143 NULL);
4144 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4145 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4149 /* This function performs intraprocedural analysis in NODE that is required to
4150 inline indirect calls. */
4152 static void
4153 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4155 ipa_analyze_node (node);
4156 if (dump_file && (dump_flags & TDF_DETAILS))
4158 ipa_print_node_params (dump_file, node);
4159 ipa_print_node_jump_functions (dump_file, node);
4164 /* Note function body size. */
4166 void
4167 inline_analyze_function (struct cgraph_node *node)
4169 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4171 if (dump_file)
4172 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4173 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
4174 inline_indirect_intraprocedural_analysis (node);
4175 compute_fn_summary (node, false);
4176 if (!optimize)
4178 struct cgraph_edge *e;
4179 for (e = node->callees; e; e = e->next_callee)
4180 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4181 for (e = node->indirect_calls; e; e = e->next_callee)
4182 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4185 pop_cfun ();
4189 /* Called when new function is inserted to callgraph late. */
4191 void
4192 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4194 inline_analyze_function (node);
4197 /* Note function body size. */
4199 static void
4200 ipa_fn_summary_generate (void)
4202 struct cgraph_node *node;
4204 FOR_EACH_DEFINED_FUNCTION (node)
4205 if (DECL_STRUCT_FUNCTION (node->decl))
4206 node->versionable = tree_versionable_function_p (node->decl);
4208 ipa_fn_summary_alloc ();
4210 ipa_fn_summaries->enable_insertion_hook ();
4212 ipa_register_cgraph_hooks ();
4214 FOR_EACH_DEFINED_FUNCTION (node)
4215 if (!node->alias
4216 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4217 || opt_for_fn (node->decl, optimize)))
4218 inline_analyze_function (node);
4222 /* Write inline summary for edge E to OB. */
4224 static void
4225 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4226 bool prevails)
4228 class ipa_call_summary *es = prevails
4229 ? ipa_call_summaries->get_create (e) : NULL;
4230 predicate p;
4231 int length, i;
4233 int size = streamer_read_uhwi (ib);
4234 int time = streamer_read_uhwi (ib);
4235 int depth = streamer_read_uhwi (ib);
4237 if (es)
4239 es->call_stmt_size = size;
4240 es->call_stmt_time = time;
4241 es->loop_depth = depth;
4244 bitpack_d bp = streamer_read_bitpack (ib);
4245 if (es)
4246 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4247 else
4248 bp_unpack_value (&bp, 1);
4250 p.stream_in (ib);
4251 if (es)
4252 edge_set_predicate (e, &p);
4253 length = streamer_read_uhwi (ib);
4254 if (length && es && e->possibly_call_in_translation_unit_p ())
4256 es->param.safe_grow_cleared (length, true);
4257 for (i = 0; i < length; i++)
4259 es->param[i].change_prob = streamer_read_uhwi (ib);
4260 es->param[i].points_to_local_or_readonly_memory
4261 = streamer_read_uhwi (ib);
4264 else
4266 for (i = 0; i < length; i++)
4268 streamer_read_uhwi (ib);
4269 streamer_read_uhwi (ib);
4275 /* Stream in inline summaries from the section. */
4277 static void
4278 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4279 size_t len)
4281 const struct lto_function_header *header =
4282 (const struct lto_function_header *) data;
4283 const int cfg_offset = sizeof (struct lto_function_header);
4284 const int main_offset = cfg_offset + header->cfg_size;
4285 const int string_offset = main_offset + header->main_size;
4286 class data_in *data_in;
4287 unsigned int i, count2, j;
4288 unsigned int f_count;
4290 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4291 file_data->mode_table);
4293 data_in =
4294 lto_data_in_create (file_data, (const char *) data + string_offset,
4295 header->string_size, vNULL);
4296 f_count = streamer_read_uhwi (&ib);
4297 for (i = 0; i < f_count; i++)
4299 unsigned int index;
4300 struct cgraph_node *node;
4301 class ipa_fn_summary *info;
4302 class ipa_node_params *params_summary;
4303 class ipa_size_summary *size_info;
4304 lto_symtab_encoder_t encoder;
4305 struct bitpack_d bp;
4306 struct cgraph_edge *e;
4307 predicate p;
4309 index = streamer_read_uhwi (&ib);
4310 encoder = file_data->symtab_node_encoder;
4311 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4312 index));
4313 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4314 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
4315 size_info = node->prevailing_p ()
4316 ? ipa_size_summaries->get_create (node) : NULL;
4318 int stack_size = streamer_read_uhwi (&ib);
4319 int size = streamer_read_uhwi (&ib);
4320 sreal time = sreal::stream_in (&ib);
4322 if (info)
4324 info->estimated_stack_size
4325 = size_info->estimated_self_stack_size = stack_size;
4326 size_info->size = size_info->self_size = size;
4327 info->time = time;
4330 bp = streamer_read_bitpack (&ib);
4331 if (info)
4333 info->inlinable = bp_unpack_value (&bp, 1);
4334 info->fp_expressions = bp_unpack_value (&bp, 1);
4336 else
4338 bp_unpack_value (&bp, 1);
4339 bp_unpack_value (&bp, 1);
4342 count2 = streamer_read_uhwi (&ib);
4343 gcc_assert (!info || !info->conds);
4344 if (info)
4345 vec_safe_reserve_exact (info->conds, count2);
4346 for (j = 0; j < count2; j++)
4348 struct condition c;
4349 unsigned int k, count3;
4350 c.operand_num = streamer_read_uhwi (&ib);
4351 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4352 c.type = stream_read_tree (&ib, data_in);
4353 c.val = stream_read_tree (&ib, data_in);
4354 bp = streamer_read_bitpack (&ib);
4355 c.agg_contents = bp_unpack_value (&bp, 1);
4356 c.by_ref = bp_unpack_value (&bp, 1);
4357 if (c.agg_contents)
4358 c.offset = streamer_read_uhwi (&ib);
4359 count3 = streamer_read_uhwi (&ib);
4360 c.param_ops = NULL;
4361 if (info)
4362 vec_safe_reserve_exact (c.param_ops, count3);
4363 if (params_summary)
4364 ipa_set_param_used_by_ipa_predicates
4365 (params_summary, c.operand_num, true);
4366 for (k = 0; k < count3; k++)
4368 struct expr_eval_op op;
4369 enum gimple_rhs_class rhs_class;
4370 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4371 op.type = stream_read_tree (&ib, data_in);
4372 switch (rhs_class = get_gimple_rhs_class (op.code))
4374 case GIMPLE_UNARY_RHS:
4375 op.index = 0;
4376 op.val[0] = NULL_TREE;
4377 op.val[1] = NULL_TREE;
4378 break;
4380 case GIMPLE_BINARY_RHS:
4381 case GIMPLE_TERNARY_RHS:
4382 bp = streamer_read_bitpack (&ib);
4383 op.index = bp_unpack_value (&bp, 2);
4384 op.val[0] = stream_read_tree (&ib, data_in);
4385 if (rhs_class == GIMPLE_BINARY_RHS)
4386 op.val[1] = NULL_TREE;
4387 else
4388 op.val[1] = stream_read_tree (&ib, data_in);
4389 break;
4391 default:
4392 fatal_error (UNKNOWN_LOCATION,
4393 "invalid fnsummary in LTO stream");
4395 if (info)
4396 c.param_ops->quick_push (op);
4398 if (info)
4399 info->conds->quick_push (c);
4401 count2 = streamer_read_uhwi (&ib);
4402 gcc_assert (!info || !info->size_time_table);
4403 if (info && count2)
4404 vec_safe_reserve_exact (info->size_time_table, count2);
4405 for (j = 0; j < count2; j++)
4407 class size_time_entry e;
4409 e.size = streamer_read_uhwi (&ib);
4410 e.time = sreal::stream_in (&ib);
4411 e.exec_predicate.stream_in (&ib);
4412 e.nonconst_predicate.stream_in (&ib);
4414 if (info)
4415 info->size_time_table->quick_push (e);
4418 count2 = streamer_read_uhwi (&ib);
4419 for (j = 0; j < count2; j++)
4421 p.stream_in (&ib);
4422 sreal fcp_freq = sreal::stream_in (&ib);
4423 if (info)
4425 ipa_freqcounting_predicate fcp;
4426 fcp.predicate = NULL;
4427 set_hint_predicate (&fcp.predicate, p);
4428 fcp.freq = fcp_freq;
4429 vec_safe_push (info->loop_iterations, fcp);
4432 count2 = streamer_read_uhwi (&ib);
4433 for (j = 0; j < count2; j++)
4435 p.stream_in (&ib);
4436 sreal fcp_freq = sreal::stream_in (&ib);
4437 if (info)
4439 ipa_freqcounting_predicate fcp;
4440 fcp.predicate = NULL;
4441 set_hint_predicate (&fcp.predicate, p);
4442 fcp.freq = fcp_freq;
4443 vec_safe_push (info->loop_strides, fcp);
4446 for (e = node->callees; e; e = e->next_callee)
4447 read_ipa_call_summary (&ib, e, info != NULL);
4448 for (e = node->indirect_calls; e; e = e->next_callee)
4449 read_ipa_call_summary (&ib, e, info != NULL);
4452 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4453 len);
4454 lto_data_in_delete (data_in);
4458 /* Read inline summary. Jump functions are shared among ipa-cp
4459 and inliner, so when ipa-cp is active, we don't need to write them
4460 twice. */
4462 static void
4463 ipa_fn_summary_read (void)
4465 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4466 struct lto_file_decl_data *file_data;
4467 unsigned int j = 0;
4469 ipa_fn_summary_alloc ();
4471 while ((file_data = file_data_vec[j++]))
4473 size_t len;
4474 const char *data
4475 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4476 &len);
4477 if (data)
4478 inline_read_section (file_data, data, len);
4479 else
4480 /* Fatal error here. We do not want to support compiling ltrans units
4481 with different version of compiler or different flags than the WPA
4482 unit, so this should never happen. */
4483 fatal_error (input_location,
4484 "ipa inline summary is missing in input file");
4486 ipa_register_cgraph_hooks ();
4487 if (!flag_ipa_cp)
4488 ipa_prop_read_jump_functions ();
4490 gcc_assert (ipa_fn_summaries);
4491 ipa_fn_summaries->enable_insertion_hook ();
4495 /* Write inline summary for edge E to OB. */
4497 static void
4498 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4500 class ipa_call_summary *es = ipa_call_summaries->get (e);
4501 int i;
4503 streamer_write_uhwi (ob, es->call_stmt_size);
4504 streamer_write_uhwi (ob, es->call_stmt_time);
4505 streamer_write_uhwi (ob, es->loop_depth);
4507 bitpack_d bp = bitpack_create (ob->main_stream);
4508 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4509 streamer_write_bitpack (&bp);
4511 if (es->predicate)
4512 es->predicate->stream_out (ob);
4513 else
4514 streamer_write_uhwi (ob, 0);
4515 streamer_write_uhwi (ob, es->param.length ());
4516 for (i = 0; i < (int) es->param.length (); i++)
4518 streamer_write_uhwi (ob, es->param[i].change_prob);
4519 streamer_write_uhwi (ob, es->param[i].points_to_local_or_readonly_memory);
4524 /* Write inline summary for node in SET.
4525 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4526 active, we don't need to write them twice. */
4528 static void
4529 ipa_fn_summary_write (void)
4531 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4532 lto_symtab_encoder_iterator lsei;
4533 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4534 unsigned int count = 0;
4536 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4537 lsei_next_function_in_partition (&lsei))
4539 cgraph_node *cnode = lsei_cgraph_node (lsei);
4540 if (cnode->definition && !cnode->alias)
4541 count++;
4543 streamer_write_uhwi (ob, count);
4545 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4546 lsei_next_function_in_partition (&lsei))
4548 cgraph_node *cnode = lsei_cgraph_node (lsei);
4549 if (cnode->definition && !cnode->alias)
4551 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4552 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4553 struct bitpack_d bp;
4554 struct cgraph_edge *edge;
4555 int i;
4556 size_time_entry *e;
4557 struct condition *c;
4559 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4560 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4561 streamer_write_hwi (ob, size_info->self_size);
4562 info->time.stream_out (ob);
4563 bp = bitpack_create (ob->main_stream);
4564 bp_pack_value (&bp, info->inlinable, 1);
4565 bp_pack_value (&bp, false, 1);
4566 bp_pack_value (&bp, info->fp_expressions, 1);
4567 streamer_write_bitpack (&bp);
4568 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4569 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4571 int j;
4572 struct expr_eval_op *op;
4574 streamer_write_uhwi (ob, c->operand_num);
4575 streamer_write_uhwi (ob, c->code);
4576 stream_write_tree (ob, c->type, true);
4577 stream_write_tree (ob, c->val, true);
4578 bp = bitpack_create (ob->main_stream);
4579 bp_pack_value (&bp, c->agg_contents, 1);
4580 bp_pack_value (&bp, c->by_ref, 1);
4581 streamer_write_bitpack (&bp);
4582 if (c->agg_contents)
4583 streamer_write_uhwi (ob, c->offset);
4584 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4585 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4587 streamer_write_uhwi (ob, op->code);
4588 stream_write_tree (ob, op->type, true);
4589 if (op->val[0])
4591 bp = bitpack_create (ob->main_stream);
4592 bp_pack_value (&bp, op->index, 2);
4593 streamer_write_bitpack (&bp);
4594 stream_write_tree (ob, op->val[0], true);
4595 if (op->val[1])
4596 stream_write_tree (ob, op->val[1], true);
4600 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4601 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4603 streamer_write_uhwi (ob, e->size);
4604 e->time.stream_out (ob);
4605 e->exec_predicate.stream_out (ob);
4606 e->nonconst_predicate.stream_out (ob);
4608 ipa_freqcounting_predicate *fcp;
4609 streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
4610 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
4612 fcp->predicate->stream_out (ob);
4613 fcp->freq.stream_out (ob);
4615 streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
4616 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
4618 fcp->predicate->stream_out (ob);
4619 fcp->freq.stream_out (ob);
4621 for (edge = cnode->callees; edge; edge = edge->next_callee)
4622 write_ipa_call_summary (ob, edge);
4623 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4624 write_ipa_call_summary (ob, edge);
4627 streamer_write_char_stream (ob->main_stream, 0);
4628 produce_asm (ob, NULL);
4629 destroy_output_block (ob);
4631 if (!flag_ipa_cp)
4632 ipa_prop_write_jump_functions ();
4636 /* Release function summary. */
4638 void
4639 ipa_free_fn_summary (void)
4641 if (!ipa_call_summaries)
4642 return;
4643 ggc_delete (ipa_fn_summaries);
4644 ipa_fn_summaries = NULL;
4645 delete ipa_call_summaries;
4646 ipa_call_summaries = NULL;
4647 edge_predicate_pool.release ();
4648 /* During IPA this is one of largest datastructures to release. */
4649 if (flag_wpa)
4650 ggc_trim ();
4653 /* Release function summary. */
4655 void
4656 ipa_free_size_summary (void)
4658 if (!ipa_size_summaries)
4659 return;
4660 delete ipa_size_summaries;
4661 ipa_size_summaries = NULL;
4664 namespace {
4666 const pass_data pass_data_local_fn_summary =
4668 GIMPLE_PASS, /* type */
4669 "local-fnsummary", /* name */
4670 OPTGROUP_INLINE, /* optinfo_flags */
4671 TV_INLINE_PARAMETERS, /* tv_id */
4672 0, /* properties_required */
4673 0, /* properties_provided */
4674 0, /* properties_destroyed */
4675 0, /* todo_flags_start */
4676 0, /* todo_flags_finish */
4679 class pass_local_fn_summary : public gimple_opt_pass
4681 public:
4682 pass_local_fn_summary (gcc::context *ctxt)
4683 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4686 /* opt_pass methods: */
4687 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4688 virtual unsigned int execute (function *)
4690 return compute_fn_summary_for_current ();
4693 }; // class pass_local_fn_summary
4695 } // anon namespace
4697 gimple_opt_pass *
4698 make_pass_local_fn_summary (gcc::context *ctxt)
4700 return new pass_local_fn_summary (ctxt);
4704 /* Free inline summary. */
4706 namespace {
4708 const pass_data pass_data_ipa_free_fn_summary =
4710 SIMPLE_IPA_PASS, /* type */
4711 "free-fnsummary", /* name */
4712 OPTGROUP_NONE, /* optinfo_flags */
4713 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4714 0, /* properties_required */
4715 0, /* properties_provided */
4716 0, /* properties_destroyed */
4717 0, /* todo_flags_start */
4718 0, /* todo_flags_finish */
4721 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4723 public:
4724 pass_ipa_free_fn_summary (gcc::context *ctxt)
4725 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4726 small_p (false)
4729 /* opt_pass methods: */
4730 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4731 void set_pass_param (unsigned int n, bool param)
4733 gcc_assert (n == 0);
4734 small_p = param;
4736 virtual bool gate (function *) { return true; }
4737 virtual unsigned int execute (function *)
4739 ipa_free_fn_summary ();
4740 /* Free ipa-prop structures if they are no longer needed. */
4741 ipa_free_all_structures_after_iinln ();
4742 if (!flag_wpa)
4743 ipa_free_size_summary ();
4744 return 0;
4747 private:
4748 bool small_p;
4749 }; // class pass_ipa_free_fn_summary
4751 } // anon namespace
4753 simple_ipa_opt_pass *
4754 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4756 return new pass_ipa_free_fn_summary (ctxt);
4759 namespace {
4761 const pass_data pass_data_ipa_fn_summary =
4763 IPA_PASS, /* type */
4764 "fnsummary", /* name */
4765 OPTGROUP_INLINE, /* optinfo_flags */
4766 TV_IPA_FNSUMMARY, /* tv_id */
4767 0, /* properties_required */
4768 0, /* properties_provided */
4769 0, /* properties_destroyed */
4770 0, /* todo_flags_start */
4771 ( TODO_dump_symtab ), /* todo_flags_finish */
4774 class pass_ipa_fn_summary : public ipa_opt_pass_d
4776 public:
4777 pass_ipa_fn_summary (gcc::context *ctxt)
4778 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4779 ipa_fn_summary_generate, /* generate_summary */
4780 ipa_fn_summary_write, /* write_summary */
4781 ipa_fn_summary_read, /* read_summary */
4782 NULL, /* write_optimization_summary */
4783 NULL, /* read_optimization_summary */
4784 NULL, /* stmt_fixup */
4785 0, /* function_transform_todo_flags_start */
4786 NULL, /* function_transform */
4787 NULL) /* variable_transform */
4790 /* opt_pass methods: */
4791 virtual unsigned int execute (function *) { return 0; }
4793 }; // class pass_ipa_fn_summary
4795 } // anon namespace
4797 ipa_opt_pass_d *
4798 make_pass_ipa_fn_summary (gcc::context *ctxt)
4800 return new pass_ipa_fn_summary (ctxt);
4803 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4804 within the same process. For use by toplev::finalize. */
4806 void
4807 ipa_fnsummary_c_finalize (void)
4809 ipa_free_fn_summary ();