Improve costs for DImode shifts of interger constants.
[official-gcc.git] / gcc / ipa-fnsummary.c
blob86d01addb4498d31dc217c971fe28f8e363f7805
1 /* Function summary pass.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "tree.h"
60 #include "gimple.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
63 #include "ssa.h"
64 #include "tree-streamer.h"
65 #include "cgraph.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86 #include "tree-into-ssa.h"
88 /* Summaries. */
89 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
90 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
91 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
93 /* Edge predicates goes here. */
94 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
97 /* Dump IPA hints. */
98 void
99 ipa_dump_hints (FILE *f, ipa_hints hints)
101 if (!hints)
102 return;
103 fprintf (f, "IPA hints:");
104 if (hints & INLINE_HINT_indirect_call)
106 hints &= ~INLINE_HINT_indirect_call;
107 fprintf (f, " indirect_call");
109 if (hints & INLINE_HINT_loop_iterations)
111 hints &= ~INLINE_HINT_loop_iterations;
112 fprintf (f, " loop_iterations");
114 if (hints & INLINE_HINT_loop_stride)
116 hints &= ~INLINE_HINT_loop_stride;
117 fprintf (f, " loop_stride");
119 if (hints & INLINE_HINT_same_scc)
121 hints &= ~INLINE_HINT_same_scc;
122 fprintf (f, " same_scc");
124 if (hints & INLINE_HINT_in_scc)
126 hints &= ~INLINE_HINT_in_scc;
127 fprintf (f, " in_scc");
129 if (hints & INLINE_HINT_cross_module)
131 hints &= ~INLINE_HINT_cross_module;
132 fprintf (f, " cross_module");
134 if (hints & INLINE_HINT_declared_inline)
136 hints &= ~INLINE_HINT_declared_inline;
137 fprintf (f, " declared_inline");
139 if (hints & INLINE_HINT_known_hot)
141 hints &= ~INLINE_HINT_known_hot;
142 fprintf (f, " known_hot");
144 gcc_assert (!hints);
148 /* Record SIZE and TIME to SUMMARY.
149 The accounted code will be executed when EXEC_PRED is true.
150 When NONCONST_PRED is false the code will evaluate to constant and
151 will get optimized out in specialized clones of the function.
152 If CALL is true account to call_size_time_table rather than
153 size_time_table. */
155 void
156 ipa_fn_summary::account_size_time (int size, sreal time,
157 const predicate &exec_pred,
158 const predicate &nonconst_pred_in,
159 bool call)
161 size_time_entry *e;
162 bool found = false;
163 int i;
164 predicate nonconst_pred;
165 vec<size_time_entry, va_gc> *table = call
166 ? call_size_time_table : size_time_table;
168 if (exec_pred == false)
169 return;
171 nonconst_pred = nonconst_pred_in & exec_pred;
173 if (nonconst_pred == false)
174 return;
176 /* We need to create initial empty unconditional clause, but otherwise
177 we don't need to account empty times and sizes. */
178 if (!size && time == 0 && table)
179 return;
181 /* Only for calls we are unaccounting what we previously recorded. */
182 gcc_checking_assert (time >= 0 || call);
184 for (i = 0; vec_safe_iterate (table, i, &e); i++)
185 if (e->exec_predicate == exec_pred
186 && e->nonconst_predicate == nonconst_pred)
188 found = true;
189 break;
191 if (i == max_size_time_table_size)
193 i = 0;
194 found = true;
195 e = &(*table)[0];
196 if (dump_file && (dump_flags & TDF_DETAILS))
197 fprintf (dump_file,
198 "\t\tReached limit on number of entries, "
199 "ignoring the predicate.");
201 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
203 fprintf (dump_file,
204 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
205 ((double) size) / ipa_fn_summary::size_scale,
206 (time.to_double ()), found ? "" : "new ");
207 exec_pred.dump (dump_file, conds, 0);
208 if (exec_pred != nonconst_pred)
210 fprintf (dump_file, " nonconst:");
211 nonconst_pred.dump (dump_file, conds);
213 else
214 fprintf (dump_file, "\n");
216 if (!found)
218 class size_time_entry new_entry;
219 new_entry.size = size;
220 new_entry.time = time;
221 new_entry.exec_predicate = exec_pred;
222 new_entry.nonconst_predicate = nonconst_pred;
223 if (call)
224 vec_safe_push (call_size_time_table, new_entry);
225 else
226 vec_safe_push (size_time_table, new_entry);
228 else
230 e->size += size;
231 e->time += time;
232 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
233 /* Tolerate small roundoff issues. */
234 if (e->time < 0)
235 e->time = 0;
239 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
241 static struct cgraph_edge *
242 redirect_to_unreachable (struct cgraph_edge *e)
244 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
245 struct cgraph_node *target = cgraph_node::get_create
246 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
248 if (e->speculative)
249 e = cgraph_edge::resolve_speculation (e, target->decl);
250 else if (!e->callee)
251 e = cgraph_edge::make_direct (e, target);
252 else
253 e->redirect_callee (target);
254 class ipa_call_summary *es = ipa_call_summaries->get (e);
255 e->inline_failed = CIF_UNREACHABLE;
256 e->count = profile_count::zero ();
257 es->call_stmt_size = 0;
258 es->call_stmt_time = 0;
259 if (callee)
260 callee->remove_symbol_and_inline_clones ();
261 return e;
264 /* Set predicate for edge E. */
266 static void
267 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
269 /* If the edge is determined to be never executed, redirect it
270 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
271 be optimized out. */
272 if (predicate && *predicate == false
273 /* When handling speculative edges, we need to do the redirection
274 just once. Do it always on the direct edge, so we do not
275 attempt to resolve speculation while duplicating the edge. */
276 && (!e->speculative || e->callee))
277 e = redirect_to_unreachable (e);
279 class ipa_call_summary *es = ipa_call_summaries->get (e);
280 if (predicate && *predicate != true)
282 if (!es->predicate)
283 es->predicate = edge_predicate_pool.allocate ();
284 *es->predicate = *predicate;
286 else
288 if (es->predicate)
289 edge_predicate_pool.remove (es->predicate);
290 es->predicate = NULL;
294 /* Set predicate for hint *P. */
296 static void
297 set_hint_predicate (predicate **p, predicate new_predicate)
299 if (new_predicate == false || new_predicate == true)
301 if (*p)
302 edge_predicate_pool.remove (*p);
303 *p = NULL;
305 else
307 if (!*p)
308 *p = edge_predicate_pool.allocate ();
309 **p = new_predicate;
314 /* Compute what conditions may or may not hold given information about
315 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
316 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
317 copy when called in a given context. It is a bitmask of conditions. Bit
318 0 means that condition is known to be false, while bit 1 means that condition
319 may or may not be true. These differs - for example NOT_INLINED condition
320 is always false in the second and also builtin_constant_p tests cannot use
321 the fact that parameter is indeed a constant.
323 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
324 KNOWN_AGGS is a vector of aggregate known offset/value set for each
325 parameter. Return clause of possible truths. When INLINE_P is true, assume
326 that we are inlining.
328 ERROR_MARK means compile time invariant. */
330 static void
331 evaluate_conditions_for_known_args (struct cgraph_node *node,
332 bool inline_p,
333 vec<tree> known_vals,
334 vec<value_range> known_value_ranges,
335 vec<ipa_agg_value_set> known_aggs,
336 clause_t *ret_clause,
337 clause_t *ret_nonspec_clause)
339 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
340 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
341 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
342 int i;
343 struct condition *c;
345 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
347 tree val = NULL;
348 tree res;
349 int j;
350 struct expr_eval_op *op;
352 /* We allow call stmt to have fewer arguments than the callee function
353 (especially for K&R style programs). So bound check here (we assume
354 known_aggs vector, if non-NULL, has the same length as
355 known_vals). */
356 gcc_checking_assert (!known_aggs.length () || !known_vals.length ()
357 || (known_vals.length () == known_aggs.length ()));
359 if (c->agg_contents)
361 struct ipa_agg_value_set *agg;
363 if (c->code == predicate::changed
364 && !c->by_ref
365 && c->operand_num < (int)known_vals.length ()
366 && (known_vals[c->operand_num] == error_mark_node))
367 continue;
369 if (c->operand_num < (int)known_aggs.length ())
371 agg = &known_aggs[c->operand_num];
372 val = ipa_find_agg_cst_for_param (agg,
373 c->operand_num
374 < (int) known_vals.length ()
375 ? known_vals[c->operand_num]
376 : NULL,
377 c->offset, c->by_ref);
379 else
380 val = NULL_TREE;
382 else if (c->operand_num < (int) known_vals.length ())
384 val = known_vals[c->operand_num];
385 if (val == error_mark_node && c->code != predicate::changed)
386 val = NULL_TREE;
389 if (!val
390 && (c->code == predicate::changed
391 || c->code == predicate::is_not_constant))
393 clause |= 1 << (i + predicate::first_dynamic_condition);
394 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
395 continue;
397 if (c->code == predicate::changed)
399 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
400 continue;
403 if (c->code == predicate::is_not_constant)
405 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
406 continue;
409 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
411 if (c->type != TREE_TYPE (val))
412 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
413 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
415 if (!val)
416 break;
417 if (!op->val[0])
418 val = fold_unary (op->code, op->type, val);
419 else if (!op->val[1])
420 val = fold_binary (op->code, op->type,
421 op->index ? op->val[0] : val,
422 op->index ? val : op->val[0]);
423 else if (op->index == 0)
424 val = fold_ternary (op->code, op->type,
425 val, op->val[0], op->val[1]);
426 else if (op->index == 1)
427 val = fold_ternary (op->code, op->type,
428 op->val[0], val, op->val[1]);
429 else if (op->index == 2)
430 val = fold_ternary (op->code, op->type,
431 op->val[0], op->val[1], val);
432 else
433 val = NULL_TREE;
436 res = val
437 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
438 : NULL;
440 if (res && integer_zerop (res))
441 continue;
442 if (res && integer_onep (res))
444 clause |= 1 << (i + predicate::first_dynamic_condition);
445 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
446 continue;
449 if (c->operand_num < (int) known_value_ranges.length ()
450 && !c->agg_contents
451 && !known_value_ranges[c->operand_num].undefined_p ()
452 && !known_value_ranges[c->operand_num].varying_p ()
453 && TYPE_SIZE (c->type)
454 == TYPE_SIZE (known_value_ranges[c->operand_num].type ())
455 && (!val || TREE_CODE (val) != INTEGER_CST))
457 value_range vr = known_value_ranges[c->operand_num];
458 if (!useless_type_conversion_p (c->type, vr.type ()))
460 value_range res;
461 range_fold_unary_expr (&res, NOP_EXPR,
462 c->type, &vr, vr.type ());
463 vr = res;
465 tree type = c->type;
467 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
469 if (vr.varying_p () || vr.undefined_p ())
470 break;
472 value_range res;
473 if (!op->val[0])
474 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
475 else if (!op->val[1])
477 value_range op0 (op->val[0], op->val[0]);
478 range_fold_binary_expr (&res, op->code, op->type,
479 op->index ? &op0 : &vr,
480 op->index ? &vr : &op0);
482 else
483 gcc_unreachable ();
484 type = op->type;
485 vr = res;
487 if (!vr.varying_p () && !vr.undefined_p ())
489 value_range res;
490 value_range val_vr (c->val, c->val);
491 range_fold_binary_expr (&res, c->code, boolean_type_node,
492 &vr,
493 &val_vr);
494 if (res.zero_p ())
495 continue;
499 clause |= 1 << (i + predicate::first_dynamic_condition);
500 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
502 *ret_clause = clause;
503 if (ret_nonspec_clause)
504 *ret_nonspec_clause = nonspec_clause;
507 /* Return true if VRP will be exectued on the function.
508 We do not want to anticipate optimizations that will not happen.
510 FIXME: This can be confused with -fdisable and debug counters and thus
511 it should not be used for correctness (only to make heuristics work).
512 This means that inliner should do its own optimizations of expressions
513 that it predicts to be constant so wrong code can not be triggered by
514 builtin_constant_p. */
516 static bool
517 vrp_will_run_p (struct cgraph_node *node)
519 return (opt_for_fn (node->decl, optimize)
520 && !opt_for_fn (node->decl, optimize_debug)
521 && opt_for_fn (node->decl, flag_tree_vrp));
524 /* Similarly about FRE. */
526 static bool
527 fre_will_run_p (struct cgraph_node *node)
529 return (opt_for_fn (node->decl, optimize)
530 && !opt_for_fn (node->decl, optimize_debug)
531 && opt_for_fn (node->decl, flag_tree_fre));
534 /* Work out what conditions might be true at invocation of E.
535 Compute costs for inlined edge if INLINE_P is true.
537 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
538 (if non-NULL) conditions evaluated for nonspecialized clone called
539 in a given context.
541 KNOWN_VALS_PTR and KNOWN_AGGS_PTR must be non-NULL and will be filled by
542 known constant and aggregate values of parameters.
544 KNOWN_CONTEXT_PTR, if non-NULL, will be filled by polymorphic call contexts
545 of parameter used by a polymorphic call. */
547 void
548 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
549 clause_t *clause_ptr,
550 clause_t *nonspec_clause_ptr,
551 vec<tree> *known_vals_ptr,
552 vec<ipa_polymorphic_call_context>
553 *known_contexts_ptr,
554 vec<ipa_agg_value_set> *known_aggs_ptr)
556 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
557 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
558 auto_vec<value_range, 32> known_value_ranges;
559 class ipa_edge_args *args;
561 if (clause_ptr)
562 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
564 if (ipa_node_params_sum
565 && !e->call_stmt_cannot_inline_p
566 && (info->conds || known_contexts_ptr)
567 && (args = IPA_EDGE_REF (e)) != NULL)
569 struct cgraph_node *caller;
570 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
571 class ipa_call_summary *es = ipa_call_summaries->get (e);
572 int i, count = ipa_get_cs_argument_count (args);
574 if (count)
576 if (e->caller->inlined_to)
577 caller = e->caller->inlined_to;
578 else
579 caller = e->caller;
580 caller_parms_info = IPA_NODE_REF (caller);
581 callee_pi = IPA_NODE_REF (callee);
583 /* Watch for thunks. */
584 if (callee_pi)
585 /* Watch for variadic functions. */
586 count = MIN (count, ipa_get_param_count (callee_pi));
589 if (callee_pi)
590 for (i = 0; i < count; i++)
592 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
594 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
595 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
597 /* Determine if we know constant value of the parameter. */
598 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
599 ipa_get_type (callee_pi, i));
601 if (!cst && e->call_stmt
602 && i < (int)gimple_call_num_args (e->call_stmt))
604 cst = gimple_call_arg (e->call_stmt, i);
605 if (!is_gimple_min_invariant (cst))
606 cst = NULL;
608 if (cst)
610 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
611 if (!known_vals_ptr->length ())
612 vec_safe_grow_cleared (known_vals_ptr, count, true);
613 (*known_vals_ptr)[i] = cst;
615 else if (inline_p && !es->param[i].change_prob)
617 if (!known_vals_ptr->length ())
618 vec_safe_grow_cleared (known_vals_ptr, count, true);
619 (*known_vals_ptr)[i] = error_mark_node;
622 /* If we failed to get simple constant, try value range. */
623 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
624 && vrp_will_run_p (caller)
625 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
627 value_range vr
628 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
629 ipa_get_type (callee_pi,
630 i));
631 if (!vr.undefined_p () && !vr.varying_p ())
633 if (!known_value_ranges.length ())
635 known_value_ranges.safe_grow (count, true);
636 for (int i = 0; i < count; ++i)
637 new (&known_value_ranges[i]) value_range ();
639 known_value_ranges[i] = vr;
643 /* Determine known aggregate values. */
644 if (fre_will_run_p (caller))
646 ipa_agg_value_set agg
647 = ipa_agg_value_set_from_jfunc (caller_parms_info,
648 caller, &jf->agg);
649 if (agg.items.length ())
651 if (!known_aggs_ptr->length ())
652 vec_safe_grow_cleared (known_aggs_ptr, count, true);
653 (*known_aggs_ptr)[i] = agg;
658 /* For calls used in polymorphic calls we further determine
659 polymorphic call context. */
660 if (known_contexts_ptr
661 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
663 ipa_polymorphic_call_context
664 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
665 if (!ctx.useless_p ())
667 if (!known_contexts_ptr->length ())
668 known_contexts_ptr->safe_grow_cleared (count, true);
669 (*known_contexts_ptr)[i]
670 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
674 else
675 gcc_assert (!count || callee->thunk.thunk_p);
677 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
679 int i, count = (int)gimple_call_num_args (e->call_stmt);
681 for (i = 0; i < count; i++)
683 tree cst = gimple_call_arg (e->call_stmt, i);
684 if (!is_gimple_min_invariant (cst))
685 cst = NULL;
686 if (cst)
688 if (!known_vals_ptr->length ())
689 vec_safe_grow_cleared (known_vals_ptr, count, true);
690 (*known_vals_ptr)[i] = cst;
695 evaluate_conditions_for_known_args (callee, inline_p,
696 *known_vals_ptr,
697 known_value_ranges,
698 *known_aggs_ptr,
699 clause_ptr,
700 nonspec_clause_ptr);
704 /* Allocate the function summary. */
706 static void
707 ipa_fn_summary_alloc (void)
709 gcc_checking_assert (!ipa_fn_summaries);
710 ipa_size_summaries = new ipa_size_summary_t (symtab);
711 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
712 ipa_call_summaries = new ipa_call_summary_t (symtab);
715 ipa_call_summary::~ipa_call_summary ()
717 if (predicate)
718 edge_predicate_pool.remove (predicate);
720 param.release ();
723 ipa_fn_summary::~ipa_fn_summary ()
725 if (loop_iterations)
726 edge_predicate_pool.remove (loop_iterations);
727 if (loop_stride)
728 edge_predicate_pool.remove (loop_stride);
729 vec_free (conds);
730 vec_free (size_time_table);
731 vec_free (call_size_time_table);
734 void
735 ipa_fn_summary_t::remove_callees (cgraph_node *node)
737 cgraph_edge *e;
738 for (e = node->callees; e; e = e->next_callee)
739 ipa_call_summaries->remove (e);
740 for (e = node->indirect_calls; e; e = e->next_callee)
741 ipa_call_summaries->remove (e);
744 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
745 Additionally care about allocating new memory slot for updated predicate
746 and set it to NULL when it becomes true or false (and thus uninteresting).
749 static void
750 remap_hint_predicate_after_duplication (predicate **p,
751 clause_t possible_truths)
753 predicate new_predicate;
755 if (!*p)
756 return;
758 new_predicate = (*p)->remap_after_duplication (possible_truths);
759 /* We do not want to free previous predicate; it is used by node origin. */
760 *p = NULL;
761 set_hint_predicate (p, new_predicate);
765 /* Hook that is called by cgraph.c when a node is duplicated. */
766 void
767 ipa_fn_summary_t::duplicate (cgraph_node *src,
768 cgraph_node *dst,
769 ipa_fn_summary *,
770 ipa_fn_summary *info)
772 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
773 /* TODO: as an optimization, we may avoid copying conditions
774 that are known to be false or true. */
775 info->conds = vec_safe_copy (info->conds);
777 /* When there are any replacements in the function body, see if we can figure
778 out that something was optimized out. */
779 if (ipa_node_params_sum && dst->clone.tree_map)
781 vec<size_time_entry, va_gc> *entry = info->size_time_table;
782 /* Use SRC parm info since it may not be copied yet. */
783 class ipa_node_params *parms_info = IPA_NODE_REF (src);
784 vec<tree> known_vals = vNULL;
785 int count = ipa_get_param_count (parms_info);
786 int i, j;
787 clause_t possible_truths;
788 predicate true_pred = true;
789 size_time_entry *e;
790 int optimized_out_size = 0;
791 bool inlined_to_p = false;
792 struct cgraph_edge *edge, *next;
794 info->size_time_table = 0;
795 known_vals.safe_grow_cleared (count, true);
796 for (i = 0; i < count; i++)
798 struct ipa_replace_map *r;
800 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
802 if (r->parm_num == i)
804 known_vals[i] = r->new_tree;
805 break;
809 evaluate_conditions_for_known_args (dst, false,
810 known_vals,
811 vNULL,
812 vNULL,
813 &possible_truths,
814 /* We are going to specialize,
815 so ignore nonspec truths. */
816 NULL);
817 known_vals.release ();
819 info->account_size_time (0, 0, true_pred, true_pred);
821 /* Remap size_time vectors.
822 Simplify the predicate by pruning out alternatives that are known
823 to be false.
824 TODO: as on optimization, we can also eliminate conditions known
825 to be true. */
826 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
828 predicate new_exec_pred;
829 predicate new_nonconst_pred;
830 new_exec_pred = e->exec_predicate.remap_after_duplication
831 (possible_truths);
832 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
833 (possible_truths);
834 if (new_exec_pred == false || new_nonconst_pred == false)
835 optimized_out_size += e->size;
836 else
837 info->account_size_time (e->size, e->time, new_exec_pred,
838 new_nonconst_pred);
841 /* Remap edge predicates with the same simplification as above.
842 Also copy constantness arrays. */
843 for (edge = dst->callees; edge; edge = next)
845 predicate new_predicate;
846 class ipa_call_summary *es = ipa_call_summaries->get (edge);
847 next = edge->next_callee;
849 if (!edge->inline_failed)
850 inlined_to_p = true;
851 if (!es->predicate)
852 continue;
853 new_predicate = es->predicate->remap_after_duplication
854 (possible_truths);
855 if (new_predicate == false && *es->predicate != false)
856 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
857 edge_set_predicate (edge, &new_predicate);
860 /* Remap indirect edge predicates with the same simplification as above.
861 Also copy constantness arrays. */
862 for (edge = dst->indirect_calls; edge; edge = next)
864 predicate new_predicate;
865 class ipa_call_summary *es = ipa_call_summaries->get (edge);
866 next = edge->next_callee;
868 gcc_checking_assert (edge->inline_failed);
869 if (!es->predicate)
870 continue;
871 new_predicate = es->predicate->remap_after_duplication
872 (possible_truths);
873 if (new_predicate == false && *es->predicate != false)
874 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
875 edge_set_predicate (edge, &new_predicate);
877 remap_hint_predicate_after_duplication (&info->loop_iterations,
878 possible_truths);
879 remap_hint_predicate_after_duplication (&info->loop_stride,
880 possible_truths);
882 /* If inliner or someone after inliner will ever start producing
883 non-trivial clones, we will get trouble with lack of information
884 about updating self sizes, because size vectors already contains
885 sizes of the callees. */
886 gcc_assert (!inlined_to_p || !optimized_out_size);
888 else
890 info->size_time_table = vec_safe_copy (info->size_time_table);
891 if (info->loop_iterations)
893 predicate p = *info->loop_iterations;
894 info->loop_iterations = NULL;
895 set_hint_predicate (&info->loop_iterations, p);
897 if (info->loop_stride)
899 predicate p = *info->loop_stride;
900 info->loop_stride = NULL;
901 set_hint_predicate (&info->loop_stride, p);
904 if (!dst->inlined_to)
905 ipa_update_overall_fn_summary (dst);
909 /* Hook that is called by cgraph.c when a node is duplicated. */
911 void
912 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
913 struct cgraph_edge *dst,
914 class ipa_call_summary *srcinfo,
915 class ipa_call_summary *info)
917 new (info) ipa_call_summary (*srcinfo);
918 info->predicate = NULL;
919 edge_set_predicate (dst, srcinfo->predicate);
920 info->param = srcinfo->param.copy ();
921 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
923 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
924 - eni_size_weights.call_cost);
925 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
926 - eni_time_weights.call_cost);
930 /* Dump edge summaries associated to NODE and recursively to all clones.
931 Indent by INDENT. */
933 static void
934 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
935 class ipa_fn_summary *info)
937 struct cgraph_edge *edge;
938 for (edge = node->callees; edge; edge = edge->next_callee)
940 class ipa_call_summary *es = ipa_call_summaries->get (edge);
941 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
942 int i;
944 fprintf (f,
945 "%*s%s %s\n%*s freq:%4.2f",
946 indent, "", callee->dump_name (),
947 !edge->inline_failed
948 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
949 indent, "", edge->sreal_frequency ().to_double ());
951 if (cross_module_call_p (edge))
952 fprintf (f, " cross module");
954 if (es)
955 fprintf (f, " loop depth:%2i size:%2i time: %2i",
956 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
958 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
959 ipa_size_summary *ss = ipa_size_summaries->get (callee);
960 if (s != NULL)
961 fprintf (f, " callee size:%2i stack:%2i",
962 (int) (ss->size / ipa_fn_summary::size_scale),
963 (int) s->estimated_stack_size);
965 if (es && es->predicate)
967 fprintf (f, " predicate: ");
968 es->predicate->dump (f, info->conds);
970 else
971 fprintf (f, "\n");
972 if (es && es->param.exists ())
973 for (i = 0; i < (int) es->param.length (); i++)
975 int prob = es->param[i].change_prob;
977 if (!prob)
978 fprintf (f, "%*s op%i is compile time invariant\n",
979 indent + 2, "", i);
980 else if (prob != REG_BR_PROB_BASE)
981 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
982 prob * 100.0 / REG_BR_PROB_BASE);
984 if (!edge->inline_failed)
986 ipa_size_summary *ss = ipa_size_summaries->get (callee);
987 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
988 indent + 2, "",
989 (int) ipa_get_stack_frame_offset (callee),
990 (int) ss->estimated_self_stack_size);
991 dump_ipa_call_summary (f, indent + 2, callee, info);
994 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
996 class ipa_call_summary *es = ipa_call_summaries->get (edge);
997 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
998 " time: %2i",
999 indent, "",
1000 es->loop_depth,
1001 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1002 es->call_stmt_time);
1003 if (es->predicate)
1005 fprintf (f, "predicate: ");
1006 es->predicate->dump (f, info->conds);
1008 else
1009 fprintf (f, "\n");
1014 void
1015 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1017 if (node->definition)
1019 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1020 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1021 if (s != NULL)
1023 size_time_entry *e;
1024 int i;
1025 fprintf (f, "IPA function summary for %s", node->dump_name ());
1026 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1027 fprintf (f, " always_inline");
1028 if (s->inlinable)
1029 fprintf (f, " inlinable");
1030 if (s->fp_expressions)
1031 fprintf (f, " fp_expression");
1032 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1033 fprintf (f, " self size: %i\n", ss->self_size);
1034 fprintf (f, " global size: %i\n", ss->size);
1035 fprintf (f, " min size: %i\n", s->min_size);
1036 fprintf (f, " self stack: %i\n",
1037 (int) ss->estimated_self_stack_size);
1038 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1039 if (s->growth)
1040 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1041 if (s->scc_no)
1042 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1043 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
1045 fprintf (f, " size:%f, time:%f",
1046 (double) e->size / ipa_fn_summary::size_scale,
1047 e->time.to_double ());
1048 if (e->exec_predicate != true)
1050 fprintf (f, ", executed if:");
1051 e->exec_predicate.dump (f, s->conds, 0);
1053 if (e->exec_predicate != e->nonconst_predicate)
1055 fprintf (f, ", nonconst if:");
1056 e->nonconst_predicate.dump (f, s->conds, 0);
1058 fprintf (f, "\n");
1060 if (s->loop_iterations)
1062 fprintf (f, " loop iterations:");
1063 s->loop_iterations->dump (f, s->conds);
1065 if (s->loop_stride)
1067 fprintf (f, " loop stride:");
1068 s->loop_stride->dump (f, s->conds);
1070 fprintf (f, " calls:\n");
1071 dump_ipa_call_summary (f, 4, node, s);
1072 fprintf (f, "\n");
1074 else
1075 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1079 DEBUG_FUNCTION void
1080 ipa_debug_fn_summary (struct cgraph_node *node)
1082 ipa_dump_fn_summary (stderr, node);
1085 void
1086 ipa_dump_fn_summaries (FILE *f)
1088 struct cgraph_node *node;
1090 FOR_EACH_DEFINED_FUNCTION (node)
1091 if (!node->inlined_to)
1092 ipa_dump_fn_summary (f, node);
1095 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1096 boolean variable pointed to by DATA. */
1098 static bool
1099 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1100 void *data)
1102 bool *b = (bool *) data;
1103 *b = true;
1104 return true;
1107 /* If OP refers to value of function parameter, return the corresponding
1108 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1109 PARM_DECL) will be stored to *SIZE_P in that case too. */
1111 static tree
1112 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1113 poly_int64 *size_p)
1115 /* SSA_NAME referring to parm default def? */
1116 if (TREE_CODE (op) == SSA_NAME
1117 && SSA_NAME_IS_DEFAULT_DEF (op)
1118 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1120 if (size_p)
1121 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1122 return SSA_NAME_VAR (op);
1124 /* Non-SSA parm reference? */
1125 if (TREE_CODE (op) == PARM_DECL)
1127 bool modified = false;
1129 ao_ref refd;
1130 ao_ref_init (&refd, op);
1131 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1132 mark_modified, &modified, NULL, NULL,
1133 fbi->aa_walk_budget + 1);
1134 if (walked < 0)
1136 fbi->aa_walk_budget = 0;
1137 return NULL_TREE;
1139 if (!modified)
1141 if (size_p)
1142 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1143 return op;
1146 return NULL_TREE;
1149 /* If OP refers to value of function parameter, return the corresponding
1150 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1151 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1152 stored to *SIZE_P in that case too. */
1154 static tree
1155 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1156 poly_int64 *size_p)
1158 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1159 if (res)
1160 return res;
1162 if (TREE_CODE (op) == SSA_NAME
1163 && !SSA_NAME_IS_DEFAULT_DEF (op)
1164 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1165 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1166 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1167 size_p);
1168 return NULL_TREE;
1171 /* If OP refers to a value of a function parameter or value loaded from an
1172 aggregate passed to a parameter (either by value or reference), return TRUE
1173 and store the number of the parameter to *INDEX_P, the access size into
1174 *SIZE_P, and information whether and how it has been loaded from an
1175 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1176 statement in which OP is used or loaded. */
1178 static bool
1179 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1180 gimple *stmt, tree op, int *index_p,
1181 poly_int64 *size_p,
1182 struct agg_position_info *aggpos)
1184 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1186 gcc_checking_assert (aggpos);
1187 if (res)
1189 *index_p = ipa_get_param_decl_index (fbi->info, res);
1190 if (*index_p < 0)
1191 return false;
1192 aggpos->agg_contents = false;
1193 aggpos->by_ref = false;
1194 return true;
1197 if (TREE_CODE (op) == SSA_NAME)
1199 if (SSA_NAME_IS_DEFAULT_DEF (op)
1200 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1201 return false;
1202 stmt = SSA_NAME_DEF_STMT (op);
1203 op = gimple_assign_rhs1 (stmt);
1204 if (!REFERENCE_CLASS_P (op))
1205 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1206 aggpos);
1209 aggpos->agg_contents = true;
1210 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1211 stmt, op, index_p, &aggpos->offset,
1212 size_p, &aggpos->by_ref);
1215 /* See if statement might disappear after inlining.
1216 0 - means not eliminated
1217 1 - half of statements goes away
1218 2 - for sure it is eliminated.
1219 We are not terribly sophisticated, basically looking for simple abstraction
1220 penalty wrappers. */
1222 static int
1223 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1225 enum gimple_code code = gimple_code (stmt);
1226 enum tree_code rhs_code;
1228 if (!optimize)
1229 return 0;
1231 switch (code)
1233 case GIMPLE_RETURN:
1234 return 2;
1235 case GIMPLE_ASSIGN:
1236 if (gimple_num_ops (stmt) != 2)
1237 return 0;
1239 rhs_code = gimple_assign_rhs_code (stmt);
1241 /* Casts of parameters, loads from parameters passed by reference
1242 and stores to return value or parameters are often free after
1243 inlining due to SRA and further combining.
1244 Assume that half of statements goes away. */
1245 if (CONVERT_EXPR_CODE_P (rhs_code)
1246 || rhs_code == VIEW_CONVERT_EXPR
1247 || rhs_code == ADDR_EXPR
1248 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1250 tree rhs = gimple_assign_rhs1 (stmt);
1251 tree lhs = gimple_assign_lhs (stmt);
1252 tree inner_rhs = get_base_address (rhs);
1253 tree inner_lhs = get_base_address (lhs);
1254 bool rhs_free = false;
1255 bool lhs_free = false;
1257 if (!inner_rhs)
1258 inner_rhs = rhs;
1259 if (!inner_lhs)
1260 inner_lhs = lhs;
1262 /* Reads of parameter are expected to be free. */
1263 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1264 rhs_free = true;
1265 /* Match expressions of form &this->field. Those will most likely
1266 combine with something upstream after inlining. */
1267 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1269 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1270 if (TREE_CODE (op) == PARM_DECL)
1271 rhs_free = true;
1272 else if (TREE_CODE (op) == MEM_REF
1273 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1274 NULL))
1275 rhs_free = true;
1278 /* When parameter is not SSA register because its address is taken
1279 and it is just copied into one, the statement will be completely
1280 free after inlining (we will copy propagate backward). */
1281 if (rhs_free && is_gimple_reg (lhs))
1282 return 2;
1284 /* Reads of parameters passed by reference
1285 expected to be free (i.e. optimized out after inlining). */
1286 if (TREE_CODE (inner_rhs) == MEM_REF
1287 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1288 rhs_free = true;
1290 /* Copying parameter passed by reference into gimple register is
1291 probably also going to copy propagate, but we can't be quite
1292 sure. */
1293 if (rhs_free && is_gimple_reg (lhs))
1294 lhs_free = true;
1296 /* Writes to parameters, parameters passed by value and return value
1297 (either directly or passed via invisible reference) are free.
1299 TODO: We ought to handle testcase like
1300 struct a {int a,b;};
1301 struct a
1302 returnstruct (void)
1304 struct a a ={1,2};
1305 return a;
1308 This translate into:
1310 returnstruct ()
1312 int a$b;
1313 int a$a;
1314 struct a a;
1315 struct a D.2739;
1317 <bb 2>:
1318 D.2739.a = 1;
1319 D.2739.b = 2;
1320 return D.2739;
1323 For that we either need to copy ipa-split logic detecting writes
1324 to return value. */
1325 if (TREE_CODE (inner_lhs) == PARM_DECL
1326 || TREE_CODE (inner_lhs) == RESULT_DECL
1327 || (TREE_CODE (inner_lhs) == MEM_REF
1328 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1329 NULL)
1330 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1331 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1332 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1333 (inner_lhs,
1334 0))) == RESULT_DECL))))
1335 lhs_free = true;
1336 if (lhs_free
1337 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1338 rhs_free = true;
1339 if (lhs_free && rhs_free)
1340 return 1;
1342 return 0;
1343 default:
1344 return 0;
1348 /* Analyze EXPR if it represents a series of simple operations performed on
1349 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1350 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1351 Type of the parameter or load from an aggregate via the parameter is
1352 stored in *TYPE_P. Operations on the parameter are recorded to
1353 PARAM_OPS_P if it is not NULL. */
1355 static bool
1356 decompose_param_expr (struct ipa_func_body_info *fbi,
1357 gimple *stmt, tree expr,
1358 int *index_p, tree *type_p,
1359 struct agg_position_info *aggpos,
1360 expr_eval_ops *param_ops_p = NULL)
1362 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1363 int op_count = 0;
1365 if (param_ops_p)
1366 *param_ops_p = NULL;
1368 while (true)
1370 expr_eval_op eval_op;
1371 unsigned rhs_count;
1372 unsigned cst_count = 0;
1374 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1375 aggpos))
1377 tree type = TREE_TYPE (expr);
1379 if (aggpos->agg_contents)
1381 /* Stop if containing bit-field. */
1382 if (TREE_CODE (expr) == BIT_FIELD_REF
1383 || contains_bitfld_component_ref_p (expr))
1384 break;
1387 *type_p = type;
1388 return true;
1391 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1392 break;
1394 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1395 break;
1397 switch (gimple_assign_rhs_class (stmt))
1399 case GIMPLE_SINGLE_RHS:
1400 expr = gimple_assign_rhs1 (stmt);
1401 continue;
1403 case GIMPLE_UNARY_RHS:
1404 rhs_count = 1;
1405 break;
1407 case GIMPLE_BINARY_RHS:
1408 rhs_count = 2;
1409 break;
1411 case GIMPLE_TERNARY_RHS:
1412 rhs_count = 3;
1413 break;
1415 default:
1416 goto fail;
1419 /* Stop if expression is too complex. */
1420 if (op_count++ == op_limit)
1421 break;
1423 if (param_ops_p)
1425 eval_op.code = gimple_assign_rhs_code (stmt);
1426 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1427 eval_op.val[0] = NULL_TREE;
1428 eval_op.val[1] = NULL_TREE;
1431 expr = NULL_TREE;
1432 for (unsigned i = 0; i < rhs_count; i++)
1434 tree op = gimple_op (stmt, i + 1);
1436 gcc_assert (op && !TYPE_P (op));
1437 if (is_gimple_ip_invariant (op))
1439 if (++cst_count == rhs_count)
1440 goto fail;
1442 eval_op.val[cst_count - 1] = op;
1444 else if (!expr)
1446 /* Found a non-constant operand, and record its index in rhs
1447 operands. */
1448 eval_op.index = i;
1449 expr = op;
1451 else
1453 /* Found more than one non-constant operands. */
1454 goto fail;
1458 if (param_ops_p)
1459 vec_safe_insert (*param_ops_p, 0, eval_op);
1462 /* Failed to decompose, free resource and return. */
1463 fail:
1464 if (param_ops_p)
1465 vec_free (*param_ops_p);
1467 return false;
1470 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1471 predicates to the CFG edges. */
1473 static void
1474 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1475 class ipa_fn_summary *summary,
1476 class ipa_node_params *params_summary,
1477 basic_block bb)
1479 gimple *last;
1480 tree op, op2;
1481 int index;
1482 struct agg_position_info aggpos;
1483 enum tree_code code, inverted_code;
1484 edge e;
1485 edge_iterator ei;
1486 gimple *set_stmt;
1487 tree param_type;
1488 expr_eval_ops param_ops;
1490 last = last_stmt (bb);
1491 if (!last || gimple_code (last) != GIMPLE_COND)
1492 return;
1493 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1494 return;
1495 op = gimple_cond_lhs (last);
1497 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1498 &param_ops))
1500 code = gimple_cond_code (last);
1501 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1503 FOR_EACH_EDGE (e, ei, bb->succs)
1505 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1506 ? code : inverted_code);
1507 /* invert_tree_comparison will return ERROR_MARK on FP
1508 comparisons that are not EQ/NE instead of returning proper
1509 unordered one. Be sure it is not confused with NON_CONSTANT.
1511 And if the edge's target is the final block of diamond CFG graph
1512 of this conditional statement, we do not need to compute
1513 predicate for the edge because the final block's predicate must
1514 be at least as that of the first block of the statement. */
1515 if (this_code != ERROR_MARK
1516 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1518 predicate p
1519 = add_condition (summary, params_summary, index,
1520 param_type, &aggpos,
1521 this_code, gimple_cond_rhs (last), param_ops);
1522 e->aux = edge_predicate_pool.allocate ();
1523 *(predicate *) e->aux = p;
1526 vec_free (param_ops);
1529 if (TREE_CODE (op) != SSA_NAME)
1530 return;
1531 /* Special case
1532 if (builtin_constant_p (op))
1533 constant_code
1534 else
1535 nonconstant_code.
1536 Here we can predicate nonconstant_code. We can't
1537 really handle constant_code since we have no predicate
1538 for this and also the constant code is not known to be
1539 optimized away when inliner doesn't see operand is constant.
1540 Other optimizers might think otherwise. */
1541 if (gimple_cond_code (last) != NE_EXPR
1542 || !integer_zerop (gimple_cond_rhs (last)))
1543 return;
1544 set_stmt = SSA_NAME_DEF_STMT (op);
1545 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1546 || gimple_call_num_args (set_stmt) != 1)
1547 return;
1548 op2 = gimple_call_arg (set_stmt, 0);
1549 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1550 return;
1551 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1553 predicate p = add_condition (summary, params_summary, index,
1554 param_type, &aggpos,
1555 predicate::is_not_constant, NULL_TREE);
1556 e->aux = edge_predicate_pool.allocate ();
1557 *(predicate *) e->aux = p;
1562 /* If BB ends by a switch we can turn into predicates, attach corresponding
1563 predicates to the CFG edges. */
1565 static void
1566 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1567 class ipa_fn_summary *summary,
1568 class ipa_node_params *params_summary,
1569 basic_block bb)
1571 gimple *lastg;
1572 tree op;
1573 int index;
1574 struct agg_position_info aggpos;
1575 edge e;
1576 edge_iterator ei;
1577 size_t n;
1578 size_t case_idx;
1579 tree param_type;
1580 expr_eval_ops param_ops;
1582 lastg = last_stmt (bb);
1583 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1584 return;
1585 gswitch *last = as_a <gswitch *> (lastg);
1586 op = gimple_switch_index (last);
1587 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1588 &param_ops))
1589 return;
1591 auto_vec<std::pair<tree, tree> > ranges;
1592 tree type = TREE_TYPE (op);
1593 int bound_limit = opt_for_fn (fbi->node->decl,
1594 param_ipa_max_switch_predicate_bounds);
1595 int bound_count = 0;
1596 wide_int vr_wmin, vr_wmax;
1597 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1599 FOR_EACH_EDGE (e, ei, bb->succs)
1601 e->aux = edge_predicate_pool.allocate ();
1602 *(predicate *) e->aux = false;
1605 e = gimple_switch_edge (cfun, last, 0);
1606 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1607 default case if its target basic block is in convergence point of all
1608 switch cases, which can be determined by checking whether it
1609 post-dominates the switch statement. */
1610 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1611 bound_count = INT_MAX;
1613 n = gimple_switch_num_labels (last);
1614 for (case_idx = 1; case_idx < n; ++case_idx)
1616 tree cl = gimple_switch_label (last, case_idx);
1617 tree min = CASE_LOW (cl);
1618 tree max = CASE_HIGH (cl);
1619 predicate p;
1621 e = gimple_switch_edge (cfun, last, case_idx);
1623 /* The case value might not have same type as switch expression,
1624 extend the value based on the expression type. */
1625 if (TREE_TYPE (min) != type)
1626 min = wide_int_to_tree (type, wi::to_wide (min));
1628 if (!max)
1629 max = min;
1630 else if (TREE_TYPE (max) != type)
1631 max = wide_int_to_tree (type, wi::to_wide (max));
1633 /* The case's target basic block is in convergence point of all switch
1634 cases, its predicate should be at least as that of the switch
1635 statement. */
1636 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1637 p = true;
1638 else if (min == max)
1639 p = add_condition (summary, params_summary, index, param_type,
1640 &aggpos, EQ_EXPR, min, param_ops);
1641 else
1643 predicate p1, p2;
1644 p1 = add_condition (summary, params_summary, index, param_type,
1645 &aggpos, GE_EXPR, min, param_ops);
1646 p2 = add_condition (summary, params_summary,index, param_type,
1647 &aggpos, LE_EXPR, max, param_ops);
1648 p = p1 & p2;
1650 *(class predicate *) e->aux
1651 = p.or_with (summary->conds, *(class predicate *) e->aux);
1653 /* If there are too many disjoint case ranges, predicate for default
1654 case might become too complicated. So add a limit here. */
1655 if (bound_count > bound_limit)
1656 continue;
1658 bool new_range = true;
1660 if (!ranges.is_empty ())
1662 wide_int curr_wmin = wi::to_wide (min);
1663 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1665 /* Merge case ranges if they are continuous. */
1666 if (curr_wmin == last_wmax + 1)
1667 new_range = false;
1668 else if (vr_type == VR_ANTI_RANGE)
1670 /* If two disjoint case ranges can be connected by anti-range
1671 of switch index, combine them to one range. */
1672 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1673 vr_type = VR_UNDEFINED;
1674 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1675 new_range = false;
1679 /* Create/extend a case range. And we count endpoints of range set,
1680 this number nearly equals to number of conditions that we will create
1681 for predicate of default case. */
1682 if (new_range)
1684 bound_count += (min == max) ? 1 : 2;
1685 ranges.safe_push (std::make_pair (min, max));
1687 else
1689 bound_count += (ranges.last ().first == ranges.last ().second);
1690 ranges.last ().second = max;
1694 e = gimple_switch_edge (cfun, last, 0);
1695 if (bound_count > bound_limit)
1697 *(class predicate *) e->aux = true;
1698 vec_free (param_ops);
1699 return;
1702 predicate p_seg = true;
1703 predicate p_all = false;
1705 if (vr_type != VR_RANGE)
1707 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1708 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1711 /* Construct predicate to represent default range set that is negation of
1712 all case ranges. Case range is classified as containing single/non-single
1713 values. Suppose a piece of case ranges in the following.
1715 [D1...D2] [S1] ... [Sn] [D3...D4]
1717 To represent default case's range sets between two non-single value
1718 case ranges (From D2 to D3), we construct predicate as:
1720 D2 < x < D3 && x != S1 && ... && x != Sn
1722 for (size_t i = 0; i < ranges.length (); i++)
1724 tree min = ranges[i].first;
1725 tree max = ranges[i].second;
1727 if (min == max)
1728 p_seg &= add_condition (summary, params_summary, index,
1729 param_type, &aggpos, NE_EXPR,
1730 min, param_ops);
1731 else
1733 /* Do not create sub-predicate for range that is beyond low bound
1734 of switch index. */
1735 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1737 p_seg &= add_condition (summary, params_summary, index,
1738 param_type, &aggpos,
1739 LT_EXPR, min, param_ops);
1740 p_all = p_all.or_with (summary->conds, p_seg);
1743 /* Do not create sub-predicate for range that is beyond up bound
1744 of switch index. */
1745 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1747 p_seg = false;
1748 break;
1751 p_seg = add_condition (summary, params_summary, index,
1752 param_type, &aggpos, GT_EXPR,
1753 max, param_ops);
1757 p_all = p_all.or_with (summary->conds, p_seg);
1758 *(class predicate *) e->aux
1759 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1761 vec_free (param_ops);
1765 /* For each BB in NODE attach to its AUX pointer predicate under
1766 which it is executable. */
1768 static void
1769 compute_bb_predicates (struct ipa_func_body_info *fbi,
1770 struct cgraph_node *node,
1771 class ipa_fn_summary *summary,
1772 class ipa_node_params *params_summary)
1774 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1775 bool done = false;
1776 basic_block bb;
1778 FOR_EACH_BB_FN (bb, my_function)
1780 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1781 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1784 /* Entry block is always executable. */
1785 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1786 = edge_predicate_pool.allocate ();
1787 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1789 /* A simple dataflow propagation of predicates forward in the CFG.
1790 TODO: work in reverse postorder. */
1791 while (!done)
1793 done = true;
1794 FOR_EACH_BB_FN (bb, my_function)
1796 predicate p = false;
1797 edge e;
1798 edge_iterator ei;
1799 FOR_EACH_EDGE (e, ei, bb->preds)
1801 if (e->src->aux)
1803 predicate this_bb_predicate
1804 = *(predicate *) e->src->aux;
1805 if (e->aux)
1806 this_bb_predicate &= (*(class predicate *) e->aux);
1807 p = p.or_with (summary->conds, this_bb_predicate);
1808 if (p == true)
1809 break;
1812 if (p != false)
1814 basic_block pdom_bb;
1816 if (!bb->aux)
1818 done = false;
1819 bb->aux = edge_predicate_pool.allocate ();
1820 *((predicate *) bb->aux) = p;
1822 else if (p != *(predicate *) bb->aux)
1824 /* This OR operation is needed to ensure monotonous data flow
1825 in the case we hit the limit on number of clauses and the
1826 and/or operations above give approximate answers. */
1827 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1828 if (p != *(predicate *) bb->aux)
1830 done = false;
1831 *((predicate *) bb->aux) = p;
1835 /* For switch/if statement, we can OR-combine predicates of all
1836 its cases/branches to get predicate for basic block in their
1837 convergence point, but sometimes this will generate very
1838 complicated predicate. Actually, we can get simplified
1839 predicate in another way by using the fact that predicate
1840 for a basic block must also hold true for its post dominators.
1841 To be specific, basic block in convergence point of
1842 conditional statement should include predicate of the
1843 statement. */
1844 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1845 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1847 else if (!pdom_bb->aux)
1849 done = false;
1850 pdom_bb->aux = edge_predicate_pool.allocate ();
1851 *((predicate *) pdom_bb->aux) = p;
1853 else if (p != *(predicate *) pdom_bb->aux)
1855 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1856 if (p != *(predicate *) pdom_bb->aux)
1858 done = false;
1859 *((predicate *) pdom_bb->aux) = p;
1868 /* Return predicate specifying when the STMT might have result that is not
1869 a compile time constant. */
1871 static predicate
1872 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1873 class ipa_fn_summary *summary,
1874 class ipa_node_params *params_summary,
1875 tree expr,
1876 vec<predicate> nonconstant_names)
1878 tree parm;
1879 int index;
1881 while (UNARY_CLASS_P (expr))
1882 expr = TREE_OPERAND (expr, 0);
1884 parm = unmodified_parm (fbi, NULL, expr, NULL);
1885 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1886 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1887 predicate::changed, NULL_TREE);
1888 if (is_gimple_min_invariant (expr))
1889 return false;
1890 if (TREE_CODE (expr) == SSA_NAME)
1891 return nonconstant_names[SSA_NAME_VERSION (expr)];
1892 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1894 predicate p1
1895 = will_be_nonconstant_expr_predicate (fbi, summary,
1896 params_summary,
1897 TREE_OPERAND (expr, 0),
1898 nonconstant_names);
1899 if (p1 == true)
1900 return p1;
1902 predicate p2
1903 = will_be_nonconstant_expr_predicate (fbi, summary,
1904 params_summary,
1905 TREE_OPERAND (expr, 1),
1906 nonconstant_names);
1907 return p1.or_with (summary->conds, p2);
1909 else if (TREE_CODE (expr) == COND_EXPR)
1911 predicate p1
1912 = will_be_nonconstant_expr_predicate (fbi, summary,
1913 params_summary,
1914 TREE_OPERAND (expr, 0),
1915 nonconstant_names);
1916 if (p1 == true)
1917 return p1;
1919 predicate p2
1920 = will_be_nonconstant_expr_predicate (fbi, summary,
1921 params_summary,
1922 TREE_OPERAND (expr, 1),
1923 nonconstant_names);
1924 if (p2 == true)
1925 return p2;
1926 p1 = p1.or_with (summary->conds, p2);
1927 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1928 params_summary,
1929 TREE_OPERAND (expr, 2),
1930 nonconstant_names);
1931 return p2.or_with (summary->conds, p1);
1933 else if (TREE_CODE (expr) == CALL_EXPR)
1934 return true;
1935 else
1937 debug_tree (expr);
1938 gcc_unreachable ();
1940 return false;
1944 /* Return predicate specifying when the STMT might have result that is not
1945 a compile time constant. */
1947 static predicate
1948 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1949 class ipa_fn_summary *summary,
1950 class ipa_node_params *params_summary,
1951 gimple *stmt,
1952 vec<predicate> nonconstant_names)
1954 predicate p = true;
1955 ssa_op_iter iter;
1956 tree use;
1957 tree param_type = NULL_TREE;
1958 predicate op_non_const;
1959 bool is_load;
1960 int base_index;
1961 struct agg_position_info aggpos;
1963 /* What statements might be optimized away
1964 when their arguments are constant. */
1965 if (gimple_code (stmt) != GIMPLE_ASSIGN
1966 && gimple_code (stmt) != GIMPLE_COND
1967 && gimple_code (stmt) != GIMPLE_SWITCH
1968 && (gimple_code (stmt) != GIMPLE_CALL
1969 || !(gimple_call_flags (stmt) & ECF_CONST)))
1970 return p;
1972 /* Stores will stay anyway. */
1973 if (gimple_store_p (stmt))
1974 return p;
1976 is_load = gimple_assign_load_p (stmt);
1978 /* Loads can be optimized when the value is known. */
1979 if (is_load)
1981 tree op = gimple_assign_rhs1 (stmt);
1982 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1983 &aggpos))
1984 return p;
1986 else
1987 base_index = -1;
1989 /* See if we understand all operands before we start
1990 adding conditionals. */
1991 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1993 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1994 /* For arguments we can build a condition. */
1995 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1996 continue;
1997 if (TREE_CODE (use) != SSA_NAME)
1998 return p;
1999 /* If we know when operand is constant,
2000 we still can say something useful. */
2001 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2002 continue;
2003 return p;
2006 if (is_load)
2007 op_non_const =
2008 add_condition (summary, params_summary,
2009 base_index, param_type, &aggpos,
2010 predicate::changed, NULL_TREE);
2011 else
2012 op_non_const = false;
2013 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2015 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2016 int index;
2018 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2020 if (index != base_index)
2021 p = add_condition (summary, params_summary, index,
2022 TREE_TYPE (parm), NULL,
2023 predicate::changed, NULL_TREE);
2024 else
2025 continue;
2027 else
2028 p = nonconstant_names[SSA_NAME_VERSION (use)];
2029 op_non_const = p.or_with (summary->conds, op_non_const);
2031 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2032 && gimple_op (stmt, 0)
2033 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2034 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2035 = op_non_const;
2036 return op_non_const;
2039 struct record_modified_bb_info
2041 tree op;
2042 bitmap bb_set;
2043 gimple *stmt;
2046 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2047 probability how often it changes between USE_BB.
2048 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2049 is in different loop nest, we can do better.
2050 This is all just estimate. In theory we look for minimal cut separating
2051 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2052 anyway. */
2054 static basic_block
2055 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2057 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2058 if (l && l->header->count < init_bb->count)
2059 return l->header;
2060 return init_bb;
2063 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2064 set except for info->stmt. */
2066 static bool
2067 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2069 struct record_modified_bb_info *info =
2070 (struct record_modified_bb_info *) data;
2071 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2072 return false;
2073 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2074 return false;
2075 bitmap_set_bit (info->bb_set,
2076 SSA_NAME_IS_DEFAULT_DEF (vdef)
2077 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2078 : get_minimal_bb
2079 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2080 gimple_bb (info->stmt))->index);
2081 if (dump_file)
2083 fprintf (dump_file, " Param ");
2084 print_generic_expr (dump_file, info->op, TDF_SLIM);
2085 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2086 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2087 get_minimal_bb
2088 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2089 gimple_bb (info->stmt))->index);
2090 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2092 return false;
2095 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2096 will change since last invocation of STMT.
2098 Value 0 is reserved for compile time invariants.
2099 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2100 ought to be REG_BR_PROB_BASE / estimated_iters. */
2102 static int
2103 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2105 tree op = gimple_call_arg (stmt, i);
2106 basic_block bb = gimple_bb (stmt);
2108 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2109 op = TREE_OPERAND (op, 0);
2111 tree base = get_base_address (op);
2113 /* Global invariants never change. */
2114 if (is_gimple_min_invariant (base))
2115 return 0;
2117 /* We would have to do non-trivial analysis to really work out what
2118 is the probability of value to change (i.e. when init statement
2119 is in a sibling loop of the call).
2121 We do an conservative estimate: when call is executed N times more often
2122 than the statement defining value, we take the frequency 1/N. */
2123 if (TREE_CODE (base) == SSA_NAME)
2125 profile_count init_count;
2127 if (!bb->count.nonzero_p ())
2128 return REG_BR_PROB_BASE;
2130 if (SSA_NAME_IS_DEFAULT_DEF (base))
2131 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2132 else
2133 init_count = get_minimal_bb
2134 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2135 gimple_bb (stmt))->count;
2137 if (init_count < bb->count)
2138 return MAX ((init_count.to_sreal_scale (bb->count)
2139 * REG_BR_PROB_BASE).to_int (), 1);
2140 return REG_BR_PROB_BASE;
2142 else
2144 ao_ref refd;
2145 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2146 struct record_modified_bb_info info;
2147 tree init = ctor_for_folding (base);
2149 if (init != error_mark_node)
2150 return 0;
2151 if (!bb->count.nonzero_p ())
2152 return REG_BR_PROB_BASE;
2153 if (dump_file)
2155 fprintf (dump_file, " Analyzing param change probability of ");
2156 print_generic_expr (dump_file, op, TDF_SLIM);
2157 fprintf (dump_file, "\n");
2159 ao_ref_init (&refd, op);
2160 info.op = op;
2161 info.stmt = stmt;
2162 info.bb_set = BITMAP_ALLOC (NULL);
2163 int walked
2164 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2165 NULL, NULL, fbi->aa_walk_budget);
2166 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2168 if (dump_file)
2170 if (walked < 0)
2171 fprintf (dump_file, " Ran out of AA walking budget.\n");
2172 else
2173 fprintf (dump_file, " Set in same BB as used.\n");
2175 BITMAP_FREE (info.bb_set);
2176 return REG_BR_PROB_BASE;
2179 bitmap_iterator bi;
2180 unsigned index;
2181 /* Lookup the most frequent update of the value and believe that
2182 it dominates all the other; precise analysis here is difficult. */
2183 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2184 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2185 if (dump_file)
2187 fprintf (dump_file, " Set with count ");
2188 max.dump (dump_file);
2189 fprintf (dump_file, " and used with count ");
2190 bb->count.dump (dump_file);
2191 fprintf (dump_file, " freq %f\n",
2192 max.to_sreal_scale (bb->count).to_double ());
2195 BITMAP_FREE (info.bb_set);
2196 if (max < bb->count)
2197 return MAX ((max.to_sreal_scale (bb->count)
2198 * REG_BR_PROB_BASE).to_int (), 1);
2199 return REG_BR_PROB_BASE;
2203 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2204 sub-graph and if the predicate the condition depends on is known. If so,
2205 return true and store the pointer the predicate in *P. */
2207 static bool
2208 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2209 ipa_fn_summary *summary,
2210 class ipa_node_params *params_summary,
2211 basic_block bb,
2212 predicate *p,
2213 vec<predicate> nonconstant_names)
2215 edge e;
2216 edge_iterator ei;
2217 basic_block first_bb = NULL;
2218 gimple *stmt;
2220 if (single_pred_p (bb))
2222 *p = false;
2223 return true;
2226 FOR_EACH_EDGE (e, ei, bb->preds)
2228 if (single_succ_p (e->src))
2230 if (!single_pred_p (e->src))
2231 return false;
2232 if (!first_bb)
2233 first_bb = single_pred (e->src);
2234 else if (single_pred (e->src) != first_bb)
2235 return false;
2237 else
2239 if (!first_bb)
2240 first_bb = e->src;
2241 else if (e->src != first_bb)
2242 return false;
2246 if (!first_bb)
2247 return false;
2249 stmt = last_stmt (first_bb);
2250 if (!stmt
2251 || gimple_code (stmt) != GIMPLE_COND
2252 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2253 return false;
2255 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2256 gimple_cond_lhs (stmt),
2257 nonconstant_names);
2258 if (*p == true)
2259 return false;
2260 else
2261 return true;
2264 /* Given a PHI statement in a function described by inline properties SUMMARY
2265 and *P being the predicate describing whether the selected PHI argument is
2266 known, store a predicate for the result of the PHI statement into
2267 NONCONSTANT_NAMES, if possible. */
2269 static void
2270 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2271 predicate *p,
2272 vec<predicate> nonconstant_names)
2274 unsigned i;
2276 for (i = 0; i < gimple_phi_num_args (phi); i++)
2278 tree arg = gimple_phi_arg (phi, i)->def;
2279 if (!is_gimple_min_invariant (arg))
2281 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2282 *p = p->or_with (summary->conds,
2283 nonconstant_names[SSA_NAME_VERSION (arg)]);
2284 if (*p == true)
2285 return;
2289 if (dump_file && (dump_flags & TDF_DETAILS))
2291 fprintf (dump_file, "\t\tphi predicate: ");
2292 p->dump (dump_file, summary->conds);
2294 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2297 /* For a typical usage of __builtin_expect (a<b, 1), we
2298 may introduce an extra relation stmt:
2299 With the builtin, we have
2300 t1 = a <= b;
2301 t2 = (long int) t1;
2302 t3 = __builtin_expect (t2, 1);
2303 if (t3 != 0)
2304 goto ...
2305 Without the builtin, we have
2306 if (a<=b)
2307 goto...
2308 This affects the size/time estimation and may have
2309 an impact on the earlier inlining.
2310 Here find this pattern and fix it up later. */
2312 static gimple *
2313 find_foldable_builtin_expect (basic_block bb)
2315 gimple_stmt_iterator bsi;
2317 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2319 gimple *stmt = gsi_stmt (bsi);
2320 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2321 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2322 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2324 tree var = gimple_call_lhs (stmt);
2325 tree arg = gimple_call_arg (stmt, 0);
2326 use_operand_p use_p;
2327 gimple *use_stmt;
2328 bool match = false;
2329 bool done = false;
2331 if (!var || !arg)
2332 continue;
2333 gcc_assert (TREE_CODE (var) == SSA_NAME);
2335 while (TREE_CODE (arg) == SSA_NAME)
2337 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2338 if (!is_gimple_assign (stmt_tmp))
2339 break;
2340 switch (gimple_assign_rhs_code (stmt_tmp))
2342 case LT_EXPR:
2343 case LE_EXPR:
2344 case GT_EXPR:
2345 case GE_EXPR:
2346 case EQ_EXPR:
2347 case NE_EXPR:
2348 match = true;
2349 done = true;
2350 break;
2351 CASE_CONVERT:
2352 break;
2353 default:
2354 done = true;
2355 break;
2357 if (done)
2358 break;
2359 arg = gimple_assign_rhs1 (stmt_tmp);
2362 if (match && single_imm_use (var, &use_p, &use_stmt)
2363 && gimple_code (use_stmt) == GIMPLE_COND)
2364 return use_stmt;
2367 return NULL;
2370 /* Return true when the basic blocks contains only clobbers followed by RESX.
2371 Such BBs are kept around to make removal of dead stores possible with
2372 presence of EH and will be optimized out by optimize_clobbers later in the
2373 game.
2375 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2376 that can be clobber only, too.. When it is false, the RESX is not necessary
2377 on the end of basic block. */
2379 static bool
2380 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2382 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2383 edge_iterator ei;
2384 edge e;
2386 if (need_eh)
2388 if (gsi_end_p (gsi))
2389 return false;
2390 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2391 return false;
2392 gsi_prev (&gsi);
2394 else if (!single_succ_p (bb))
2395 return false;
2397 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2399 gimple *stmt = gsi_stmt (gsi);
2400 if (is_gimple_debug (stmt))
2401 continue;
2402 if (gimple_clobber_p (stmt))
2403 continue;
2404 if (gimple_code (stmt) == GIMPLE_LABEL)
2405 break;
2406 return false;
2409 /* See if all predecessors are either throws or clobber only BBs. */
2410 FOR_EACH_EDGE (e, ei, bb->preds)
2411 if (!(e->flags & EDGE_EH)
2412 && !clobber_only_eh_bb_p (e->src, false))
2413 return false;
2415 return true;
2418 /* Return true if STMT compute a floating point expression that may be affected
2419 by -ffast-math and similar flags. */
2421 static bool
2422 fp_expression_p (gimple *stmt)
2424 ssa_op_iter i;
2425 tree op;
2427 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2428 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2429 return true;
2430 return false;
2433 /* Analyze function body for NODE.
2434 EARLY indicates run from early optimization pipeline. */
2436 static void
2437 analyze_function_body (struct cgraph_node *node, bool early)
2439 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2440 /* Estimate static overhead for function prologue/epilogue and alignment. */
2441 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2442 /* Benefits are scaled by probability of elimination that is in range
2443 <0,2>. */
2444 basic_block bb;
2445 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2446 sreal freq;
2447 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2448 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2449 predicate bb_predicate;
2450 struct ipa_func_body_info fbi;
2451 vec<predicate> nonconstant_names = vNULL;
2452 int nblocks, n;
2453 int *order;
2454 gimple *fix_builtin_expect_stmt;
2456 gcc_assert (my_function && my_function->cfg);
2457 gcc_assert (cfun == my_function);
2459 memset(&fbi, 0, sizeof(fbi));
2460 vec_free (info->conds);
2461 info->conds = NULL;
2462 vec_free (info->size_time_table);
2463 info->size_time_table = NULL;
2465 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2466 so we can produce proper inline hints.
2468 When optimizing and analyzing for early inliner, initialize node params
2469 so we can produce correct BB predicates. */
2471 if (opt_for_fn (node->decl, optimize))
2473 calculate_dominance_info (CDI_DOMINATORS);
2474 calculate_dominance_info (CDI_POST_DOMINATORS);
2475 if (!early)
2476 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2477 else
2479 ipa_check_create_node_params ();
2480 ipa_initialize_node_params (node);
2483 if (ipa_node_params_sum)
2485 fbi.node = node;
2486 fbi.info = IPA_NODE_REF (node);
2487 fbi.bb_infos = vNULL;
2488 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2489 fbi.param_count = count_formal_params (node->decl);
2490 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2492 nonconstant_names.safe_grow_cleared
2493 (SSANAMES (my_function)->length (), true);
2497 if (dump_file)
2498 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2499 node->dump_name ());
2501 /* When we run into maximal number of entries, we assign everything to the
2502 constant truth case. Be sure to have it in list. */
2503 bb_predicate = true;
2504 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2506 bb_predicate = predicate::not_inlined ();
2507 info->account_size_time (opt_for_fn (node->decl,
2508 param_uninlined_function_insns)
2509 * ipa_fn_summary::size_scale,
2510 opt_for_fn (node->decl,
2511 param_uninlined_function_time),
2512 bb_predicate,
2513 bb_predicate);
2515 if (fbi.info)
2516 compute_bb_predicates (&fbi, node, info, params_summary);
2517 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2518 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2519 for (n = 0; n < nblocks; n++)
2521 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2522 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2523 if (clobber_only_eh_bb_p (bb))
2525 if (dump_file && (dump_flags & TDF_DETAILS))
2526 fprintf (dump_file, "\n Ignoring BB %i;"
2527 " it will be optimized away by cleanup_clobbers\n",
2528 bb->index);
2529 continue;
2532 /* TODO: Obviously predicates can be propagated down across CFG. */
2533 if (fbi.info)
2535 if (bb->aux)
2536 bb_predicate = *(predicate *) bb->aux;
2537 else
2538 bb_predicate = false;
2540 else
2541 bb_predicate = true;
2543 if (dump_file && (dump_flags & TDF_DETAILS))
2545 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2546 bb_predicate.dump (dump_file, info->conds);
2549 if (fbi.info && nonconstant_names.exists ())
2551 predicate phi_predicate;
2552 bool first_phi = true;
2554 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2555 gsi_next (&bsi))
2557 if (first_phi
2558 && !phi_result_unknown_predicate (&fbi, info,
2559 params_summary,
2561 &phi_predicate,
2562 nonconstant_names))
2563 break;
2564 first_phi = false;
2565 if (dump_file && (dump_flags & TDF_DETAILS))
2567 fprintf (dump_file, " ");
2568 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2570 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2571 nonconstant_names);
2575 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2577 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2578 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2580 gimple *stmt = gsi_stmt (bsi);
2581 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2582 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2583 int prob;
2584 predicate will_be_nonconstant;
2586 /* This relation stmt should be folded after we remove
2587 __builtin_expect call. Adjust the cost here. */
2588 if (stmt == fix_builtin_expect_stmt)
2590 this_size--;
2591 this_time--;
2594 if (dump_file && (dump_flags & TDF_DETAILS))
2596 fprintf (dump_file, " ");
2597 print_gimple_stmt (dump_file, stmt, 0);
2598 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2599 freq.to_double (), this_size,
2600 this_time);
2603 if (is_gimple_call (stmt)
2604 && !gimple_call_internal_p (stmt))
2606 struct cgraph_edge *edge = node->get_edge (stmt);
2607 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2609 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2610 resolved as constant. We however don't want to optimize
2611 out the cgraph edges. */
2612 if (nonconstant_names.exists ()
2613 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2614 && gimple_call_lhs (stmt)
2615 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2617 predicate false_p = false;
2618 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2619 = false_p;
2621 if (ipa_node_params_sum)
2623 int count = gimple_call_num_args (stmt);
2624 int i;
2626 if (count)
2627 es->param.safe_grow_cleared (count, true);
2628 for (i = 0; i < count; i++)
2630 int prob = param_change_prob (&fbi, stmt, i);
2631 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2632 es->param[i].change_prob = prob;
2636 es->call_stmt_size = this_size;
2637 es->call_stmt_time = this_time;
2638 es->loop_depth = bb_loop_depth (bb);
2639 edge_set_predicate (edge, &bb_predicate);
2640 if (edge->speculative)
2642 cgraph_edge *indirect
2643 = edge->speculative_call_indirect_edge ();
2644 ipa_call_summary *es2
2645 = ipa_call_summaries->get_create (indirect);
2646 ipa_call_summaries->duplicate (edge, indirect,
2647 es, es2);
2649 /* Edge is the first direct call.
2650 create and duplicate call summaries for multiple
2651 speculative call targets. */
2652 for (cgraph_edge *direct
2653 = edge->next_speculative_call_target ();
2654 direct;
2655 direct = direct->next_speculative_call_target ())
2657 ipa_call_summary *es3
2658 = ipa_call_summaries->get_create (direct);
2659 ipa_call_summaries->duplicate (edge, direct,
2660 es, es3);
2665 /* TODO: When conditional jump or switch is known to be constant, but
2666 we did not translate it into the predicates, we really can account
2667 just maximum of the possible paths. */
2668 if (fbi.info)
2669 will_be_nonconstant
2670 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2671 stmt, nonconstant_names);
2672 else
2673 will_be_nonconstant = true;
2674 if (this_time || this_size)
2676 sreal final_time = (sreal)this_time * freq;
2678 prob = eliminated_by_inlining_prob (&fbi, stmt);
2679 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2680 fprintf (dump_file,
2681 "\t\t50%% will be eliminated by inlining\n");
2682 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2683 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2685 class predicate p = bb_predicate & will_be_nonconstant;
2687 /* We can ignore statement when we proved it is never going
2688 to happen, but we cannot do that for call statements
2689 because edges are accounted specially. */
2691 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2693 time += final_time;
2694 size += this_size;
2697 /* We account everything but the calls. Calls have their own
2698 size/time info attached to cgraph edges. This is necessary
2699 in order to make the cost disappear after inlining. */
2700 if (!is_gimple_call (stmt))
2702 if (prob)
2704 predicate ip = bb_predicate & predicate::not_inlined ();
2705 info->account_size_time (this_size * prob,
2706 (final_time * prob) / 2, ip,
2709 if (prob != 2)
2710 info->account_size_time (this_size * (2 - prob),
2711 (final_time * (2 - prob) / 2),
2712 bb_predicate,
2716 if (!info->fp_expressions && fp_expression_p (stmt))
2718 info->fp_expressions = true;
2719 if (dump_file)
2720 fprintf (dump_file, " fp_expression set\n");
2724 /* Account cost of address calculations in the statements. */
2725 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2727 for (tree op = gimple_op (stmt, i);
2728 op && handled_component_p (op);
2729 op = TREE_OPERAND (op, 0))
2730 if ((TREE_CODE (op) == ARRAY_REF
2731 || TREE_CODE (op) == ARRAY_RANGE_REF)
2732 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2734 predicate p = bb_predicate;
2735 if (fbi.info)
2736 p = p & will_be_nonconstant_expr_predicate
2737 (&fbi, info, params_summary,
2738 TREE_OPERAND (op, 1),
2739 nonconstant_names);
2740 if (p != false)
2742 time += freq;
2743 size += 1;
2744 if (dump_file)
2745 fprintf (dump_file,
2746 "\t\tAccounting address calculation.\n");
2747 info->account_size_time (ipa_fn_summary::size_scale,
2748 freq,
2749 bb_predicate,
2757 free (order);
2759 if (nonconstant_names.exists () && !early)
2761 class loop *loop;
2762 predicate loop_iterations = true;
2763 predicate loop_stride = true;
2765 if (dump_file && (dump_flags & TDF_DETAILS))
2766 flow_loops_dump (dump_file, NULL, 0);
2767 scev_initialize ();
2768 FOR_EACH_LOOP (loop, 0)
2770 vec<edge> exits;
2771 edge ex;
2772 unsigned int j;
2773 class tree_niter_desc niter_desc;
2774 if (loop->header->aux)
2775 bb_predicate = *(predicate *) loop->header->aux;
2776 else
2777 bb_predicate = false;
2779 exits = get_loop_exit_edges (loop);
2780 FOR_EACH_VEC_ELT (exits, j, ex)
2781 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2782 && !is_gimple_min_invariant (niter_desc.niter))
2784 predicate will_be_nonconstant
2785 = will_be_nonconstant_expr_predicate (&fbi, info,
2786 params_summary,
2787 niter_desc.niter,
2788 nonconstant_names);
2789 if (will_be_nonconstant != true)
2790 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2791 if (will_be_nonconstant != true
2792 && will_be_nonconstant != false)
2793 /* This is slightly inprecise. We may want to represent each
2794 loop with independent predicate. */
2795 loop_iterations &= will_be_nonconstant;
2797 exits.release ();
2800 /* To avoid quadratic behavior we analyze stride predicates only
2801 with respect to the containing loop. Thus we simply iterate
2802 over all defs in the outermost loop body. */
2803 for (loop = loops_for_fn (cfun)->tree_root->inner;
2804 loop != NULL; loop = loop->next)
2806 basic_block *body = get_loop_body (loop);
2807 for (unsigned i = 0; i < loop->num_nodes; i++)
2809 gimple_stmt_iterator gsi;
2810 if (body[i]->aux)
2811 bb_predicate = *(predicate *) body[i]->aux;
2812 else
2813 bb_predicate = false;
2814 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2815 gsi_next (&gsi))
2817 gimple *stmt = gsi_stmt (gsi);
2819 if (!is_gimple_assign (stmt))
2820 continue;
2822 tree def = gimple_assign_lhs (stmt);
2823 if (TREE_CODE (def) != SSA_NAME)
2824 continue;
2826 affine_iv iv;
2827 if (!simple_iv (loop_containing_stmt (stmt),
2828 loop_containing_stmt (stmt),
2829 def, &iv, true)
2830 || is_gimple_min_invariant (iv.step))
2831 continue;
2833 predicate will_be_nonconstant
2834 = will_be_nonconstant_expr_predicate (&fbi, info,
2835 params_summary,
2836 iv.step,
2837 nonconstant_names);
2838 if (will_be_nonconstant != true)
2839 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2840 if (will_be_nonconstant != true
2841 && will_be_nonconstant != false)
2842 /* This is slightly inprecise. We may want to represent
2843 each loop with independent predicate. */
2844 loop_stride = loop_stride & will_be_nonconstant;
2847 free (body);
2849 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2850 set_hint_predicate (&s->loop_iterations, loop_iterations);
2851 set_hint_predicate (&s->loop_stride, loop_stride);
2852 scev_finalize ();
2854 FOR_ALL_BB_FN (bb, my_function)
2856 edge e;
2857 edge_iterator ei;
2859 if (bb->aux)
2860 edge_predicate_pool.remove ((predicate *)bb->aux);
2861 bb->aux = NULL;
2862 FOR_EACH_EDGE (e, ei, bb->succs)
2864 if (e->aux)
2865 edge_predicate_pool.remove ((predicate *) e->aux);
2866 e->aux = NULL;
2869 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2870 ipa_size_summary *ss = ipa_size_summaries->get (node);
2871 s->time = time;
2872 ss->self_size = size;
2873 nonconstant_names.release ();
2874 ipa_release_body_info (&fbi);
2875 if (opt_for_fn (node->decl, optimize))
2877 if (!early)
2878 loop_optimizer_finalize ();
2879 else if (!ipa_edge_args_sum)
2880 ipa_free_all_node_params ();
2881 free_dominance_info (CDI_DOMINATORS);
2882 free_dominance_info (CDI_POST_DOMINATORS);
2884 if (dump_file)
2886 fprintf (dump_file, "\n");
2887 ipa_dump_fn_summary (dump_file, node);
2892 /* Compute function summary.
2893 EARLY is true when we compute parameters during early opts. */
2895 void
2896 compute_fn_summary (struct cgraph_node *node, bool early)
2898 HOST_WIDE_INT self_stack_size;
2899 struct cgraph_edge *e;
2901 gcc_assert (!node->inlined_to);
2903 if (!ipa_fn_summaries)
2904 ipa_fn_summary_alloc ();
2906 /* Create a new ipa_fn_summary. */
2907 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2908 ipa_fn_summaries->remove (node);
2909 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2910 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2912 /* Estimate the stack size for the function if we're optimizing. */
2913 self_stack_size = optimize && !node->thunk.thunk_p
2914 ? estimated_stack_frame_size (node) : 0;
2915 size_info->estimated_self_stack_size = self_stack_size;
2916 info->estimated_stack_size = self_stack_size;
2918 if (node->thunk.thunk_p)
2920 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2921 predicate t = true;
2923 node->can_change_signature = false;
2924 es->call_stmt_size = eni_size_weights.call_cost;
2925 es->call_stmt_time = eni_time_weights.call_cost;
2926 info->account_size_time (ipa_fn_summary::size_scale
2927 * opt_for_fn (node->decl,
2928 param_uninlined_function_thunk_insns),
2929 opt_for_fn (node->decl,
2930 param_uninlined_function_thunk_time), t, t);
2931 t = predicate::not_inlined ();
2932 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2933 ipa_update_overall_fn_summary (node);
2934 size_info->self_size = size_info->size;
2935 if (stdarg_p (TREE_TYPE (node->decl)))
2937 info->inlinable = false;
2938 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2940 else
2941 info->inlinable = true;
2943 else
2945 /* Even is_gimple_min_invariant rely on current_function_decl. */
2946 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2948 /* During IPA profile merging we may be called w/o virtual SSA form
2949 built. */
2950 update_ssa (TODO_update_ssa_only_virtuals);
2952 /* Can this function be inlined at all? */
2953 if (!opt_for_fn (node->decl, optimize)
2954 && !lookup_attribute ("always_inline",
2955 DECL_ATTRIBUTES (node->decl)))
2956 info->inlinable = false;
2957 else
2958 info->inlinable = tree_inlinable_function_p (node->decl);
2960 /* Type attributes can use parameter indices to describe them. */
2961 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2962 /* Likewise for #pragma omp declare simd functions or functions
2963 with simd attribute. */
2964 || lookup_attribute ("omp declare simd",
2965 DECL_ATTRIBUTES (node->decl)))
2966 node->can_change_signature = false;
2967 else
2969 /* Otherwise, inlinable functions always can change signature. */
2970 if (info->inlinable)
2971 node->can_change_signature = true;
2972 else
2974 /* Functions calling builtin_apply cannot change signature. */
2975 for (e = node->callees; e; e = e->next_callee)
2977 tree cdecl = e->callee->decl;
2978 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2979 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2980 break;
2982 node->can_change_signature = !e;
2985 analyze_function_body (node, early);
2986 pop_cfun ();
2989 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2990 size_info->size = size_info->self_size;
2991 info->estimated_stack_size = size_info->estimated_self_stack_size;
2993 /* Code above should compute exactly the same result as
2994 ipa_update_overall_fn_summary but because computation happens in
2995 different order the roundoff errors result in slight changes. */
2996 ipa_update_overall_fn_summary (node);
2997 /* In LTO mode we may have speculative edges set. */
2998 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
3002 /* Compute parameters of functions used by inliner using
3003 current_function_decl. */
3005 static unsigned int
3006 compute_fn_summary_for_current (void)
3008 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3009 return 0;
3012 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
3013 KNOWN_CONTEXTS and KNOWN_AGGS. */
3015 static bool
3016 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3017 int *size, int *time,
3018 vec<tree> known_vals,
3019 vec<ipa_polymorphic_call_context> known_contexts,
3020 vec<ipa_agg_value_set> known_aggs)
3022 tree target;
3023 struct cgraph_node *callee;
3024 class ipa_fn_summary *isummary;
3025 enum availability avail;
3026 bool speculative;
3028 if (!known_vals.length () && !known_contexts.length ())
3029 return false;
3030 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3031 return false;
3033 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
3034 known_aggs, &speculative);
3035 if (!target || speculative)
3036 return false;
3038 /* Account for difference in cost between indirect and direct calls. */
3039 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3040 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3041 gcc_checking_assert (*time >= 0);
3042 gcc_checking_assert (*size >= 0);
3044 callee = cgraph_node::get (target);
3045 if (!callee || !callee->definition)
3046 return false;
3047 callee = callee->function_symbol (&avail);
3048 if (avail < AVAIL_AVAILABLE)
3049 return false;
3050 isummary = ipa_fn_summaries->get (callee);
3051 if (isummary == NULL)
3052 return false;
3054 return isummary->inlinable;
3057 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3058 handle edge E with probability PROB.
3059 Set HINTS if edge may be devirtualized.
3060 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
3061 site. */
3063 static inline void
3064 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3065 sreal *time,
3066 vec<tree> known_vals,
3067 vec<ipa_polymorphic_call_context> known_contexts,
3068 vec<ipa_agg_value_set> known_aggs,
3069 ipa_hints *hints)
3071 class ipa_call_summary *es = ipa_call_summaries->get (e);
3072 int call_size = es->call_stmt_size;
3073 int call_time = es->call_stmt_time;
3074 int cur_size;
3076 if (!e->callee && hints && e->maybe_hot_p ()
3077 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
3078 known_vals, known_contexts, known_aggs))
3079 *hints |= INLINE_HINT_indirect_call;
3080 cur_size = call_size * ipa_fn_summary::size_scale;
3081 *size += cur_size;
3082 if (min_size)
3083 *min_size += cur_size;
3084 if (time)
3085 *time += ((sreal)call_time) * e->sreal_frequency ();
3089 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3090 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3091 describe context of the call site.
3093 Helper for estimate_calls_size_and_time which does the same but
3094 (in most cases) faster. */
3096 static void
3097 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3098 int *min_size, sreal *time,
3099 ipa_hints *hints,
3100 clause_t possible_truths,
3101 vec<tree> known_vals,
3102 vec<ipa_polymorphic_call_context> known_contexts,
3103 vec<ipa_agg_value_set> known_aggs)
3105 struct cgraph_edge *e;
3106 for (e = node->callees; e; e = e->next_callee)
3108 if (!e->inline_failed)
3110 gcc_checking_assert (!ipa_call_summaries->get (e));
3111 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3112 hints,
3113 possible_truths,
3114 known_vals, known_contexts,
3115 known_aggs);
3116 continue;
3118 class ipa_call_summary *es = ipa_call_summaries->get (e);
3120 /* Do not care about zero sized builtins. */
3121 if (!es->call_stmt_size)
3123 gcc_checking_assert (!es->call_stmt_time);
3124 continue;
3126 if (!es->predicate
3127 || es->predicate->evaluate (possible_truths))
3129 /* Predicates of calls shall not use NOT_CHANGED codes,
3130 so we do not need to compute probabilities. */
3131 estimate_edge_size_and_time (e, size,
3132 es->predicate ? NULL : min_size,
3133 time,
3134 known_vals, known_contexts,
3135 known_aggs, hints);
3138 for (e = node->indirect_calls; e; e = e->next_callee)
3140 class ipa_call_summary *es = ipa_call_summaries->get (e);
3141 if (!es->predicate
3142 || es->predicate->evaluate (possible_truths))
3143 estimate_edge_size_and_time (e, size,
3144 es->predicate ? NULL : min_size,
3145 time,
3146 known_vals, known_contexts, known_aggs,
3147 hints);
3151 /* Populate sum->call_size_time_table for edges from NODE. */
3153 static void
3154 summarize_calls_size_and_time (struct cgraph_node *node,
3155 ipa_fn_summary *sum)
3157 struct cgraph_edge *e;
3158 for (e = node->callees; e; e = e->next_callee)
3160 if (!e->inline_failed)
3162 gcc_checking_assert (!ipa_call_summaries->get (e));
3163 summarize_calls_size_and_time (e->callee, sum);
3164 continue;
3166 int size = 0;
3167 sreal time = 0;
3169 estimate_edge_size_and_time (e, &size, NULL, &time,
3170 vNULL, vNULL, vNULL, NULL);
3172 struct predicate pred = true;
3173 class ipa_call_summary *es = ipa_call_summaries->get (e);
3175 if (es->predicate)
3176 pred = *es->predicate;
3177 sum->account_size_time (size, time, pred, pred, true);
3179 for (e = node->indirect_calls; e; e = e->next_callee)
3181 int size = 0;
3182 sreal time = 0;
3184 estimate_edge_size_and_time (e, &size, NULL, &time,
3185 vNULL, vNULL, vNULL, NULL);
3186 struct predicate pred = true;
3187 class ipa_call_summary *es = ipa_call_summaries->get (e);
3189 if (es->predicate)
3190 pred = *es->predicate;
3191 sum->account_size_time (size, time, pred, pred, true);
3195 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3196 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3197 describe context of the call site. */
3199 static void
3200 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3201 int *min_size, sreal *time,
3202 ipa_hints *hints,
3203 clause_t possible_truths,
3204 vec<tree> known_vals,
3205 vec<ipa_polymorphic_call_context> known_contexts,
3206 vec<ipa_agg_value_set> known_aggs)
3208 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3209 bool use_table = true;
3211 gcc_assert (node->callees || node->indirect_calls);
3213 /* During early inlining we do not calculate info for very
3214 large functions and thus there is no need for producing
3215 summaries. */
3216 if (!ipa_node_params_sum)
3217 use_table = false;
3218 /* Do not calculate summaries for simple wrappers; it is waste
3219 of memory. */
3220 else if (node->callees && node->indirect_calls
3221 && node->callees->inline_failed && !node->callees->next_callee)
3222 use_table = false;
3223 /* If there is an indirect edge that may be optimized, we need
3224 to go the slow way. */
3225 else if ((known_vals.length ()
3226 || known_contexts.length ()
3227 || known_aggs.length ()) && hints)
3229 class ipa_node_params *params_summary = IPA_NODE_REF (node);
3230 unsigned int nargs = params_summary
3231 ? ipa_get_param_count (params_summary) : 0;
3233 for (unsigned int i = 0; i < nargs && use_table; i++)
3235 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3236 && ((known_vals.length () > i && known_vals[i])
3237 || (known_aggs.length () > i
3238 && known_aggs[i].items.length ())))
3239 use_table = false;
3240 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3241 && (known_contexts.length () > i
3242 && !known_contexts[i].useless_p ()))
3243 use_table = false;
3247 /* Fast path is via the call size time table. */
3248 if (use_table)
3250 /* Build summary if it is absent. */
3251 if (!sum->call_size_time_table)
3253 predicate true_pred = true;
3254 sum->account_size_time (0, 0, true_pred, true_pred, true);
3255 summarize_calls_size_and_time (node, sum);
3258 int old_size = *size;
3259 sreal old_time = time ? *time : 0;
3261 if (min_size)
3262 *min_size += (*sum->call_size_time_table)[0].size;
3264 unsigned int i;
3265 size_time_entry *e;
3267 /* Walk the table and account sizes and times. */
3268 for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
3269 i++)
3270 if (e->exec_predicate.evaluate (possible_truths))
3272 *size += e->size;
3273 if (time)
3274 *time += e->time;
3277 /* Be careful and see if both methods agree. */
3278 if ((flag_checking || dump_file)
3279 /* Do not try to sanity check when we know we lost some
3280 precision. */
3281 && sum->call_size_time_table->length ()
3282 < ipa_fn_summary::max_size_time_table_size)
3284 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3285 possible_truths, known_vals,
3286 known_contexts, known_aggs);
3287 gcc_assert (*size == old_size);
3288 if (time && (*time - old_time > 1 || *time - old_time < -1)
3289 && dump_file)
3290 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3291 old_time.to_double (),
3292 time->to_double ());
3295 /* Slow path by walking all edges. */
3296 else
3297 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3298 possible_truths, known_vals, known_contexts,
3299 known_aggs);
3302 /* Default constructor for ipa call context.
3303 Memory allocation of known_vals, known_contexts
3304 and known_aggs vectors is owned by the caller, but can
3305 be release by ipa_call_context::release.
3307 inline_param_summary is owned by the caller. */
3308 ipa_call_context::ipa_call_context (cgraph_node *node,
3309 clause_t possible_truths,
3310 clause_t nonspec_possible_truths,
3311 vec<tree> known_vals,
3312 vec<ipa_polymorphic_call_context>
3313 known_contexts,
3314 vec<ipa_agg_value_set> known_aggs,
3315 vec<inline_param_summary>
3316 inline_param_summary)
3317 : m_node (node), m_possible_truths (possible_truths),
3318 m_nonspec_possible_truths (nonspec_possible_truths),
3319 m_inline_param_summary (inline_param_summary),
3320 m_known_vals (known_vals),
3321 m_known_contexts (known_contexts),
3322 m_known_aggs (known_aggs)
3326 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3328 void
3329 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3331 m_node = ctx.m_node;
3332 m_possible_truths = ctx.m_possible_truths;
3333 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3334 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3335 unsigned int nargs = params_summary
3336 ? ipa_get_param_count (params_summary) : 0;
3338 m_inline_param_summary = vNULL;
3339 /* Copy the info only if there is at least one useful entry. */
3340 if (ctx.m_inline_param_summary.exists ())
3342 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3344 for (unsigned int i = 0; i < n; i++)
3345 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3346 && !ctx.m_inline_param_summary[i].useless_p ())
3348 m_inline_param_summary
3349 = ctx.m_inline_param_summary.copy ();
3350 break;
3353 m_known_vals = vNULL;
3354 if (ctx.m_known_vals.exists ())
3356 unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3358 for (unsigned int i = 0; i < n; i++)
3359 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3360 && ctx.m_known_vals[i])
3362 m_known_vals = ctx.m_known_vals.copy ();
3363 break;
3367 m_known_contexts = vNULL;
3368 if (ctx.m_known_contexts.exists ())
3370 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3372 for (unsigned int i = 0; i < n; i++)
3373 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3374 && !ctx.m_known_contexts[i].useless_p ())
3376 m_known_contexts = ctx.m_known_contexts.copy ();
3377 break;
3381 m_known_aggs = vNULL;
3382 if (ctx.m_known_aggs.exists ())
3384 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3386 for (unsigned int i = 0; i < n; i++)
3387 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3388 && !ctx.m_known_aggs[i].is_empty ())
3390 m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs);
3391 break;
3396 /* Release memory used by known_vals/contexts/aggs vectors.
3397 If ALL is true release also inline_param_summary.
3398 This happens when context was previously duplicated to be stored
3399 into cache. */
3401 void
3402 ipa_call_context::release (bool all)
3404 /* See if context is initialized at first place. */
3405 if (!m_node)
3406 return;
3407 ipa_release_agg_values (m_known_aggs, all);
3408 if (all)
3410 m_known_vals.release ();
3411 m_known_contexts.release ();
3412 m_inline_param_summary.release ();
3416 /* Return true if CTX describes the same call context as THIS. */
3418 bool
3419 ipa_call_context::equal_to (const ipa_call_context &ctx)
3421 if (m_node != ctx.m_node
3422 || m_possible_truths != ctx.m_possible_truths
3423 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3424 return false;
3426 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3427 unsigned int nargs = params_summary
3428 ? ipa_get_param_count (params_summary) : 0;
3430 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3432 for (unsigned int i = 0; i < nargs; i++)
3434 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3435 continue;
3436 if (i >= m_inline_param_summary.length ()
3437 || m_inline_param_summary[i].useless_p ())
3439 if (i < ctx.m_inline_param_summary.length ()
3440 && !ctx.m_inline_param_summary[i].useless_p ())
3441 return false;
3442 continue;
3444 if (i >= ctx.m_inline_param_summary.length ()
3445 || ctx.m_inline_param_summary[i].useless_p ())
3447 if (i < m_inline_param_summary.length ()
3448 && !m_inline_param_summary[i].useless_p ())
3449 return false;
3450 continue;
3452 if (!m_inline_param_summary[i].equal_to
3453 (ctx.m_inline_param_summary[i]))
3454 return false;
3457 if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3459 for (unsigned int i = 0; i < nargs; i++)
3461 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3462 continue;
3463 if (i >= m_known_vals.length () || !m_known_vals[i])
3465 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3466 return false;
3467 continue;
3469 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3471 if (i < m_known_vals.length () && m_known_vals[i])
3472 return false;
3473 continue;
3475 if (m_known_vals[i] != ctx.m_known_vals[i])
3476 return false;
3479 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3481 for (unsigned int i = 0; i < nargs; i++)
3483 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3484 continue;
3485 if (i >= m_known_contexts.length ()
3486 || m_known_contexts[i].useless_p ())
3488 if (i < ctx.m_known_contexts.length ()
3489 && !ctx.m_known_contexts[i].useless_p ())
3490 return false;
3491 continue;
3493 if (i >= ctx.m_known_contexts.length ()
3494 || ctx.m_known_contexts[i].useless_p ())
3496 if (i < m_known_contexts.length ()
3497 && !m_known_contexts[i].useless_p ())
3498 return false;
3499 continue;
3501 if (!m_known_contexts[i].equal_to
3502 (ctx.m_known_contexts[i]))
3503 return false;
3506 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3508 for (unsigned int i = 0; i < nargs; i++)
3510 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3511 continue;
3512 if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ())
3514 if (i < ctx.m_known_aggs.length ()
3515 && !ctx.m_known_aggs[i].is_empty ())
3516 return false;
3517 continue;
3519 if (i >= ctx.m_known_aggs.length ()
3520 || ctx.m_known_aggs[i].is_empty ())
3522 if (i < m_known_aggs.length ()
3523 && !m_known_aggs[i].is_empty ())
3524 return false;
3525 continue;
3527 if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i]))
3528 return false;
3531 return true;
3534 /* Estimate size and time needed to execute call in the given context.
3535 Additionally determine hints determined by the context. Finally compute
3536 minimal size needed for the call that is independent on the call context and
3537 can be used for fast estimates. Return the values in RET_SIZE,
3538 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3540 void
3541 ipa_call_context::estimate_size_and_time (int *ret_size,
3542 int *ret_min_size,
3543 sreal *ret_time,
3544 sreal *ret_nonspecialized_time,
3545 ipa_hints *ret_hints)
3547 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3548 size_time_entry *e;
3549 int size = 0;
3550 sreal time = 0;
3551 int min_size = 0;
3552 ipa_hints hints = 0;
3553 int i;
3555 if (dump_file && (dump_flags & TDF_DETAILS))
3557 bool found = false;
3558 fprintf (dump_file, " Estimating body: %s\n"
3559 " Known to be false: ", m_node->dump_name ());
3561 for (i = predicate::not_inlined_condition;
3562 i < (predicate::first_dynamic_condition
3563 + (int) vec_safe_length (info->conds)); i++)
3564 if (!(m_possible_truths & (1 << i)))
3566 if (found)
3567 fprintf (dump_file, ", ");
3568 found = true;
3569 dump_condition (dump_file, info->conds, i);
3573 if (m_node->callees || m_node->indirect_calls)
3574 estimate_calls_size_and_time (m_node, &size, &min_size,
3575 ret_time ? &time : NULL,
3576 ret_hints ? &hints : NULL, m_possible_truths,
3577 m_known_vals, m_known_contexts, m_known_aggs);
3579 sreal nonspecialized_time = time;
3581 min_size += (*info->size_time_table)[0].size;
3582 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3584 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3586 /* Because predicates are conservative, it can happen that nonconst is 1
3587 but exec is 0. */
3588 if (exec)
3590 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3592 gcc_checking_assert (e->time >= 0);
3593 gcc_checking_assert (time >= 0);
3595 /* We compute specialized size only because size of nonspecialized
3596 copy is context independent.
3598 The difference between nonspecialized execution and specialized is
3599 that nonspecialized is not going to have optimized out computations
3600 known to be constant in a specialized setting. */
3601 if (nonconst)
3602 size += e->size;
3603 if (!ret_time)
3604 continue;
3605 nonspecialized_time += e->time;
3606 if (!nonconst)
3608 else if (!m_inline_param_summary.exists ())
3610 if (nonconst)
3611 time += e->time;
3613 else
3615 int prob = e->nonconst_predicate.probability
3616 (info->conds, m_possible_truths,
3617 m_inline_param_summary);
3618 gcc_checking_assert (prob >= 0);
3619 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3620 if (prob == REG_BR_PROB_BASE)
3621 time += e->time;
3622 else
3623 time += e->time * prob / REG_BR_PROB_BASE;
3625 gcc_checking_assert (time >= 0);
3628 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3629 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3630 gcc_checking_assert (min_size >= 0);
3631 gcc_checking_assert (size >= 0);
3632 gcc_checking_assert (time >= 0);
3633 /* nonspecialized_time should be always bigger than specialized time.
3634 Roundoff issues however may get into the way. */
3635 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3637 /* Roundoff issues may make specialized time bigger than nonspecialized
3638 time. We do not really want that to happen because some heuristics
3639 may get confused by seeing negative speedups. */
3640 if (time > nonspecialized_time)
3641 time = nonspecialized_time;
3643 if (ret_hints)
3645 if (info->loop_iterations
3646 && !info->loop_iterations->evaluate (m_possible_truths))
3647 hints |= INLINE_HINT_loop_iterations;
3648 if (info->loop_stride
3649 && !info->loop_stride->evaluate (m_possible_truths))
3650 hints |= INLINE_HINT_loop_stride;
3651 if (info->scc_no)
3652 hints |= INLINE_HINT_in_scc;
3653 if (DECL_DECLARED_INLINE_P (m_node->decl))
3654 hints |= INLINE_HINT_declared_inline;
3657 size = RDIV (size, ipa_fn_summary::size_scale);
3658 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3660 if (dump_file && (dump_flags & TDF_DETAILS))
3661 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3662 time.to_double (), nonspecialized_time.to_double ());
3663 if (ret_time)
3664 *ret_time = time;
3665 if (ret_nonspecialized_time)
3666 *ret_nonspecialized_time = nonspecialized_time;
3667 if (ret_size)
3668 *ret_size = size;
3669 if (ret_min_size)
3670 *ret_min_size = min_size;
3671 if (ret_hints)
3672 *ret_hints = hints;
3673 return;
3677 /* Estimate size and time needed to execute callee of EDGE assuming that
3678 parameters known to be constant at caller of EDGE are propagated.
3679 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3680 and types for parameters. */
3682 void
3683 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3684 vec<tree> known_vals,
3685 vec<ipa_polymorphic_call_context>
3686 known_contexts,
3687 vec<ipa_agg_value_set> known_aggs,
3688 int *ret_size, sreal *ret_time,
3689 sreal *ret_nonspec_time,
3690 ipa_hints *hints)
3692 clause_t clause, nonspec_clause;
3694 /* TODO: Also pass known value ranges. */
3695 evaluate_conditions_for_known_args (node, false, known_vals, vNULL,
3696 known_aggs, &clause, &nonspec_clause);
3697 ipa_call_context ctx (node, clause, nonspec_clause,
3698 known_vals, known_contexts,
3699 known_aggs, vNULL);
3700 ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3701 ret_nonspec_time, hints);
3704 /* Return stack frame offset where frame of NODE is supposed to start inside
3705 of the function it is inlined to.
3706 Return 0 for functions that are not inlined. */
3708 HOST_WIDE_INT
3709 ipa_get_stack_frame_offset (struct cgraph_node *node)
3711 HOST_WIDE_INT offset = 0;
3712 if (!node->inlined_to)
3713 return 0;
3714 node = node->callers->caller;
3715 while (true)
3717 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3718 if (!node->inlined_to)
3719 return offset;
3720 node = node->callers->caller;
3725 /* Update summary information of inline clones after inlining.
3726 Compute peak stack usage. */
3728 static void
3729 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3731 struct cgraph_edge *e;
3733 ipa_propagate_frequency (node);
3734 for (e = node->callees; e; e = e->next_callee)
3736 if (!e->inline_failed)
3737 inline_update_callee_summaries (e->callee, depth);
3738 else
3739 ipa_call_summaries->get (e)->loop_depth += depth;
3741 for (e = node->indirect_calls; e; e = e->next_callee)
3742 ipa_call_summaries->get (e)->loop_depth += depth;
3745 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3746 When function A is inlined in B and A calls C with parameter that
3747 changes with probability PROB1 and C is known to be passthrough
3748 of argument if B that change with probability PROB2, the probability
3749 of change is now PROB1*PROB2. */
3751 static void
3752 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3753 struct cgraph_edge *edge)
3755 if (ipa_node_params_sum)
3757 int i;
3758 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3759 if (!args)
3760 return;
3761 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3762 class ipa_call_summary *inlined_es
3763 = ipa_call_summaries->get (inlined_edge);
3765 if (es->param.length () == 0)
3766 return;
3768 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3770 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3771 if (jfunc->type == IPA_JF_PASS_THROUGH
3772 || jfunc->type == IPA_JF_ANCESTOR)
3774 int id = jfunc->type == IPA_JF_PASS_THROUGH
3775 ? ipa_get_jf_pass_through_formal_id (jfunc)
3776 : ipa_get_jf_ancestor_formal_id (jfunc);
3777 if (id < (int) inlined_es->param.length ())
3779 int prob1 = es->param[i].change_prob;
3780 int prob2 = inlined_es->param[id].change_prob;
3781 int prob = combine_probabilities (prob1, prob2);
3783 if (prob1 && prob2 && !prob)
3784 prob = 1;
3786 es->param[i].change_prob = prob;
3793 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3795 Remap predicates of callees of NODE. Rest of arguments match
3796 remap_predicate.
3798 Also update change probabilities. */
3800 static void
3801 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3802 struct cgraph_node *node,
3803 class ipa_fn_summary *info,
3804 class ipa_node_params *params_summary,
3805 class ipa_fn_summary *callee_info,
3806 vec<int> operand_map,
3807 vec<int> offset_map,
3808 clause_t possible_truths,
3809 predicate *toplev_predicate)
3811 struct cgraph_edge *e, *next;
3812 for (e = node->callees; e; e = next)
3814 predicate p;
3815 next = e->next_callee;
3817 if (e->inline_failed)
3819 class ipa_call_summary *es = ipa_call_summaries->get (e);
3820 remap_edge_change_prob (inlined_edge, e);
3822 if (es->predicate)
3824 p = es->predicate->remap_after_inlining
3825 (info, params_summary,
3826 callee_info, operand_map,
3827 offset_map, possible_truths,
3828 *toplev_predicate);
3829 edge_set_predicate (e, &p);
3831 else
3832 edge_set_predicate (e, toplev_predicate);
3834 else
3835 remap_edge_summaries (inlined_edge, e->callee, info,
3836 params_summary, callee_info,
3837 operand_map, offset_map, possible_truths,
3838 toplev_predicate);
3840 for (e = node->indirect_calls; e; e = next)
3842 class ipa_call_summary *es = ipa_call_summaries->get (e);
3843 predicate p;
3844 next = e->next_callee;
3846 remap_edge_change_prob (inlined_edge, e);
3847 if (es->predicate)
3849 p = es->predicate->remap_after_inlining
3850 (info, params_summary,
3851 callee_info, operand_map, offset_map,
3852 possible_truths, *toplev_predicate);
3853 edge_set_predicate (e, &p);
3855 else
3856 edge_set_predicate (e, toplev_predicate);
3860 /* Same as remap_predicate, but set result into hint *HINT. */
3862 static void
3863 remap_hint_predicate (class ipa_fn_summary *info,
3864 class ipa_node_params *params_summary,
3865 class ipa_fn_summary *callee_info,
3866 predicate **hint,
3867 vec<int> operand_map,
3868 vec<int> offset_map,
3869 clause_t possible_truths,
3870 predicate *toplev_predicate)
3872 predicate p;
3874 if (!*hint)
3875 return;
3876 p = (*hint)->remap_after_inlining
3877 (info, params_summary, callee_info,
3878 operand_map, offset_map,
3879 possible_truths, *toplev_predicate);
3880 if (p != false && p != true)
3882 if (!*hint)
3883 set_hint_predicate (hint, p);
3884 else
3885 **hint &= p;
3889 /* We inlined EDGE. Update summary of the function we inlined into. */
3891 void
3892 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3894 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3895 struct cgraph_node *to = (edge->caller->inlined_to
3896 ? edge->caller->inlined_to : edge->caller);
3897 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3898 clause_t clause = 0; /* not_inline is known to be false. */
3899 size_time_entry *e;
3900 auto_vec<int, 8> operand_map;
3901 auto_vec<int, 8> offset_map;
3902 int i;
3903 predicate toplev_predicate;
3904 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3905 class ipa_node_params *params_summary = (ipa_node_params_sum
3906 ? IPA_NODE_REF (to) : NULL);
3908 if (es->predicate)
3909 toplev_predicate = *es->predicate;
3910 else
3911 toplev_predicate = true;
3913 info->fp_expressions |= callee_info->fp_expressions;
3915 if (callee_info->conds)
3917 auto_vec<tree, 32> known_vals;
3918 auto_vec<ipa_agg_value_set, 32> known_aggs;
3919 evaluate_properties_for_edge (edge, true, &clause, NULL,
3920 &known_vals, NULL, &known_aggs);
3922 if (ipa_node_params_sum && callee_info->conds)
3924 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3925 int count = args ? ipa_get_cs_argument_count (args) : 0;
3926 int i;
3928 if (count)
3930 operand_map.safe_grow_cleared (count, true);
3931 offset_map.safe_grow_cleared (count, true);
3933 for (i = 0; i < count; i++)
3935 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3936 int map = -1;
3938 /* TODO: handle non-NOPs when merging. */
3939 if (jfunc->type == IPA_JF_PASS_THROUGH)
3941 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3942 map = ipa_get_jf_pass_through_formal_id (jfunc);
3943 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3944 offset_map[i] = -1;
3946 else if (jfunc->type == IPA_JF_ANCESTOR)
3948 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3949 if (offset >= 0 && offset < INT_MAX)
3951 map = ipa_get_jf_ancestor_formal_id (jfunc);
3952 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3953 offset = -1;
3954 offset_map[i] = offset;
3957 operand_map[i] = map;
3958 gcc_assert (map < ipa_get_param_count (params_summary));
3961 sreal freq = edge->sreal_frequency ();
3962 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3964 predicate p;
3965 p = e->exec_predicate.remap_after_inlining
3966 (info, params_summary,
3967 callee_info, operand_map,
3968 offset_map, clause,
3969 toplev_predicate);
3970 predicate nonconstp;
3971 nonconstp = e->nonconst_predicate.remap_after_inlining
3972 (info, params_summary,
3973 callee_info, operand_map,
3974 offset_map, clause,
3975 toplev_predicate);
3976 if (p != false && nonconstp != false)
3978 sreal add_time = ((sreal)e->time * freq);
3979 int prob = e->nonconst_predicate.probability (callee_info->conds,
3980 clause, es->param);
3981 if (prob != REG_BR_PROB_BASE)
3982 add_time = add_time * prob / REG_BR_PROB_BASE;
3983 if (prob != REG_BR_PROB_BASE
3984 && dump_file && (dump_flags & TDF_DETAILS))
3986 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3987 (double) prob / REG_BR_PROB_BASE);
3989 info->account_size_time (e->size, add_time, p, nonconstp);
3992 remap_edge_summaries (edge, edge->callee, info, params_summary,
3993 callee_info, operand_map,
3994 offset_map, clause, &toplev_predicate);
3995 remap_hint_predicate (info, params_summary, callee_info,
3996 &callee_info->loop_iterations,
3997 operand_map, offset_map, clause, &toplev_predicate);
3998 remap_hint_predicate (info, params_summary, callee_info,
3999 &callee_info->loop_stride,
4000 operand_map, offset_map, clause, &toplev_predicate);
4002 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4003 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4005 if (info->estimated_stack_size < peak)
4006 info->estimated_stack_size = peak;
4008 inline_update_callee_summaries (edge->callee, es->loop_depth);
4009 if (info->call_size_time_table)
4011 int edge_size = 0;
4012 sreal edge_time = 0;
4014 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, vNULL,
4015 vNULL, vNULL, 0);
4016 /* Unaccount size and time of the optimized out call. */
4017 info->account_size_time (-edge_size, -edge_time,
4018 es->predicate ? *es->predicate : true,
4019 es->predicate ? *es->predicate : true,
4020 true);
4021 /* Account new calls. */
4022 summarize_calls_size_and_time (edge->callee, info);
4025 /* Free summaries that are not maintained for inline clones/edges. */
4026 ipa_call_summaries->remove (edge);
4027 ipa_fn_summaries->remove (edge->callee);
4028 ipa_remove_from_growth_caches (edge);
4031 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4032 overall size and time. Recompute it.
4033 If RESET is true also recompute call_time_size_table. */
4035 void
4036 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4038 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4039 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4040 size_time_entry *e;
4041 int i;
4043 size_info->size = 0;
4044 info->time = 0;
4045 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4047 size_info->size += e->size;
4048 info->time += e->time;
4050 info->min_size = (*info->size_time_table)[0].size;
4051 if (reset)
4052 vec_free (info->call_size_time_table);
4053 if (node->callees || node->indirect_calls)
4054 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4055 &info->time, NULL,
4056 ~(clause_t) (1 << predicate::false_condition),
4057 vNULL, vNULL, vNULL);
4058 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4059 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4063 /* This function performs intraprocedural analysis in NODE that is required to
4064 inline indirect calls. */
4066 static void
4067 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4069 ipa_analyze_node (node);
4070 if (dump_file && (dump_flags & TDF_DETAILS))
4072 ipa_print_node_params (dump_file, node);
4073 ipa_print_node_jump_functions (dump_file, node);
4078 /* Note function body size. */
4080 void
4081 inline_analyze_function (struct cgraph_node *node)
4083 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4085 if (dump_file)
4086 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4087 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
4088 inline_indirect_intraprocedural_analysis (node);
4089 compute_fn_summary (node, false);
4090 if (!optimize)
4092 struct cgraph_edge *e;
4093 for (e = node->callees; e; e = e->next_callee)
4094 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4095 for (e = node->indirect_calls; e; e = e->next_callee)
4096 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4099 pop_cfun ();
4103 /* Called when new function is inserted to callgraph late. */
4105 void
4106 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4108 inline_analyze_function (node);
4111 /* Note function body size. */
4113 static void
4114 ipa_fn_summary_generate (void)
4116 struct cgraph_node *node;
4118 FOR_EACH_DEFINED_FUNCTION (node)
4119 if (DECL_STRUCT_FUNCTION (node->decl))
4120 node->versionable = tree_versionable_function_p (node->decl);
4122 ipa_fn_summary_alloc ();
4124 ipa_fn_summaries->enable_insertion_hook ();
4126 ipa_register_cgraph_hooks ();
4128 FOR_EACH_DEFINED_FUNCTION (node)
4129 if (!node->alias
4130 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4131 || opt_for_fn (node->decl, optimize)))
4132 inline_analyze_function (node);
4136 /* Write inline summary for edge E to OB. */
4138 static void
4139 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4140 bool prevails)
4142 class ipa_call_summary *es = prevails
4143 ? ipa_call_summaries->get_create (e) : NULL;
4144 predicate p;
4145 int length, i;
4147 int size = streamer_read_uhwi (ib);
4148 int time = streamer_read_uhwi (ib);
4149 int depth = streamer_read_uhwi (ib);
4151 if (es)
4153 es->call_stmt_size = size;
4154 es->call_stmt_time = time;
4155 es->loop_depth = depth;
4158 bitpack_d bp = streamer_read_bitpack (ib);
4159 if (es)
4160 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4161 else
4162 bp_unpack_value (&bp, 1);
4164 p.stream_in (ib);
4165 if (es)
4166 edge_set_predicate (e, &p);
4167 length = streamer_read_uhwi (ib);
4168 if (length && es && e->possibly_call_in_translation_unit_p ())
4170 es->param.safe_grow_cleared (length, true);
4171 for (i = 0; i < length; i++)
4172 es->param[i].change_prob = streamer_read_uhwi (ib);
4174 else
4176 for (i = 0; i < length; i++)
4177 streamer_read_uhwi (ib);
4182 /* Stream in inline summaries from the section. */
4184 static void
4185 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4186 size_t len)
4188 const struct lto_function_header *header =
4189 (const struct lto_function_header *) data;
4190 const int cfg_offset = sizeof (struct lto_function_header);
4191 const int main_offset = cfg_offset + header->cfg_size;
4192 const int string_offset = main_offset + header->main_size;
4193 class data_in *data_in;
4194 unsigned int i, count2, j;
4195 unsigned int f_count;
4197 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4198 file_data->mode_table);
4200 data_in =
4201 lto_data_in_create (file_data, (const char *) data + string_offset,
4202 header->string_size, vNULL);
4203 f_count = streamer_read_uhwi (&ib);
4204 for (i = 0; i < f_count; i++)
4206 unsigned int index;
4207 struct cgraph_node *node;
4208 class ipa_fn_summary *info;
4209 class ipa_node_params *params_summary;
4210 class ipa_size_summary *size_info;
4211 lto_symtab_encoder_t encoder;
4212 struct bitpack_d bp;
4213 struct cgraph_edge *e;
4214 predicate p;
4216 index = streamer_read_uhwi (&ib);
4217 encoder = file_data->symtab_node_encoder;
4218 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4219 index));
4220 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4221 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
4222 size_info = node->prevailing_p ()
4223 ? ipa_size_summaries->get_create (node) : NULL;
4225 int stack_size = streamer_read_uhwi (&ib);
4226 int size = streamer_read_uhwi (&ib);
4227 sreal time = sreal::stream_in (&ib);
4229 if (info)
4231 info->estimated_stack_size
4232 = size_info->estimated_self_stack_size = stack_size;
4233 size_info->size = size_info->self_size = size;
4234 info->time = time;
4237 bp = streamer_read_bitpack (&ib);
4238 if (info)
4240 info->inlinable = bp_unpack_value (&bp, 1);
4241 info->fp_expressions = bp_unpack_value (&bp, 1);
4243 else
4245 bp_unpack_value (&bp, 1);
4246 bp_unpack_value (&bp, 1);
4249 count2 = streamer_read_uhwi (&ib);
4250 gcc_assert (!info || !info->conds);
4251 if (info)
4252 vec_safe_reserve_exact (info->conds, count2);
4253 for (j = 0; j < count2; j++)
4255 struct condition c;
4256 unsigned int k, count3;
4257 c.operand_num = streamer_read_uhwi (&ib);
4258 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4259 c.type = stream_read_tree (&ib, data_in);
4260 c.val = stream_read_tree (&ib, data_in);
4261 bp = streamer_read_bitpack (&ib);
4262 c.agg_contents = bp_unpack_value (&bp, 1);
4263 c.by_ref = bp_unpack_value (&bp, 1);
4264 if (c.agg_contents)
4265 c.offset = streamer_read_uhwi (&ib);
4266 count3 = streamer_read_uhwi (&ib);
4267 c.param_ops = NULL;
4268 if (info)
4269 vec_safe_reserve_exact (c.param_ops, count3);
4270 if (params_summary)
4271 ipa_set_param_used_by_ipa_predicates
4272 (params_summary, c.operand_num, true);
4273 for (k = 0; k < count3; k++)
4275 struct expr_eval_op op;
4276 enum gimple_rhs_class rhs_class;
4277 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4278 op.type = stream_read_tree (&ib, data_in);
4279 switch (rhs_class = get_gimple_rhs_class (op.code))
4281 case GIMPLE_UNARY_RHS:
4282 op.index = 0;
4283 op.val[0] = NULL_TREE;
4284 op.val[1] = NULL_TREE;
4285 break;
4287 case GIMPLE_BINARY_RHS:
4288 case GIMPLE_TERNARY_RHS:
4289 bp = streamer_read_bitpack (&ib);
4290 op.index = bp_unpack_value (&bp, 2);
4291 op.val[0] = stream_read_tree (&ib, data_in);
4292 if (rhs_class == GIMPLE_BINARY_RHS)
4293 op.val[1] = NULL_TREE;
4294 else
4295 op.val[1] = stream_read_tree (&ib, data_in);
4296 break;
4298 default:
4299 fatal_error (UNKNOWN_LOCATION,
4300 "invalid fnsummary in LTO stream");
4302 if (info)
4303 c.param_ops->quick_push (op);
4305 if (info)
4306 info->conds->quick_push (c);
4308 count2 = streamer_read_uhwi (&ib);
4309 gcc_assert (!info || !info->size_time_table);
4310 if (info && count2)
4311 vec_safe_reserve_exact (info->size_time_table, count2);
4312 for (j = 0; j < count2; j++)
4314 class size_time_entry e;
4316 e.size = streamer_read_uhwi (&ib);
4317 e.time = sreal::stream_in (&ib);
4318 e.exec_predicate.stream_in (&ib);
4319 e.nonconst_predicate.stream_in (&ib);
4321 if (info)
4322 info->size_time_table->quick_push (e);
4325 p.stream_in (&ib);
4326 if (info)
4327 set_hint_predicate (&info->loop_iterations, p);
4328 p.stream_in (&ib);
4329 if (info)
4330 set_hint_predicate (&info->loop_stride, p);
4331 for (e = node->callees; e; e = e->next_callee)
4332 read_ipa_call_summary (&ib, e, info != NULL);
4333 for (e = node->indirect_calls; e; e = e->next_callee)
4334 read_ipa_call_summary (&ib, e, info != NULL);
4337 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4338 len);
4339 lto_data_in_delete (data_in);
4343 /* Read inline summary. Jump functions are shared among ipa-cp
4344 and inliner, so when ipa-cp is active, we don't need to write them
4345 twice. */
4347 static void
4348 ipa_fn_summary_read (void)
4350 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4351 struct lto_file_decl_data *file_data;
4352 unsigned int j = 0;
4354 ipa_fn_summary_alloc ();
4356 while ((file_data = file_data_vec[j++]))
4358 size_t len;
4359 const char *data
4360 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4361 &len);
4362 if (data)
4363 inline_read_section (file_data, data, len);
4364 else
4365 /* Fatal error here. We do not want to support compiling ltrans units
4366 with different version of compiler or different flags than the WPA
4367 unit, so this should never happen. */
4368 fatal_error (input_location,
4369 "ipa inline summary is missing in input file");
4371 ipa_register_cgraph_hooks ();
4372 if (!flag_ipa_cp)
4373 ipa_prop_read_jump_functions ();
4375 gcc_assert (ipa_fn_summaries);
4376 ipa_fn_summaries->enable_insertion_hook ();
4380 /* Write inline summary for edge E to OB. */
4382 static void
4383 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4385 class ipa_call_summary *es = ipa_call_summaries->get (e);
4386 int i;
4388 streamer_write_uhwi (ob, es->call_stmt_size);
4389 streamer_write_uhwi (ob, es->call_stmt_time);
4390 streamer_write_uhwi (ob, es->loop_depth);
4392 bitpack_d bp = bitpack_create (ob->main_stream);
4393 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4394 streamer_write_bitpack (&bp);
4396 if (es->predicate)
4397 es->predicate->stream_out (ob);
4398 else
4399 streamer_write_uhwi (ob, 0);
4400 streamer_write_uhwi (ob, es->param.length ());
4401 for (i = 0; i < (int) es->param.length (); i++)
4402 streamer_write_uhwi (ob, es->param[i].change_prob);
4406 /* Write inline summary for node in SET.
4407 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4408 active, we don't need to write them twice. */
4410 static void
4411 ipa_fn_summary_write (void)
4413 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4414 lto_symtab_encoder_iterator lsei;
4415 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4416 unsigned int count = 0;
4418 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4419 lsei_next_function_in_partition (&lsei))
4421 cgraph_node *cnode = lsei_cgraph_node (lsei);
4422 if (cnode->definition && !cnode->alias)
4423 count++;
4425 streamer_write_uhwi (ob, count);
4427 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4428 lsei_next_function_in_partition (&lsei))
4430 cgraph_node *cnode = lsei_cgraph_node (lsei);
4431 if (cnode->definition && !cnode->alias)
4433 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4434 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4435 struct bitpack_d bp;
4436 struct cgraph_edge *edge;
4437 int i;
4438 size_time_entry *e;
4439 struct condition *c;
4441 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4442 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4443 streamer_write_hwi (ob, size_info->self_size);
4444 info->time.stream_out (ob);
4445 bp = bitpack_create (ob->main_stream);
4446 bp_pack_value (&bp, info->inlinable, 1);
4447 bp_pack_value (&bp, false, 1);
4448 bp_pack_value (&bp, info->fp_expressions, 1);
4449 streamer_write_bitpack (&bp);
4450 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4451 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4453 int j;
4454 struct expr_eval_op *op;
4456 streamer_write_uhwi (ob, c->operand_num);
4457 streamer_write_uhwi (ob, c->code);
4458 stream_write_tree (ob, c->type, true);
4459 stream_write_tree (ob, c->val, true);
4460 bp = bitpack_create (ob->main_stream);
4461 bp_pack_value (&bp, c->agg_contents, 1);
4462 bp_pack_value (&bp, c->by_ref, 1);
4463 streamer_write_bitpack (&bp);
4464 if (c->agg_contents)
4465 streamer_write_uhwi (ob, c->offset);
4466 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4467 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4469 streamer_write_uhwi (ob, op->code);
4470 stream_write_tree (ob, op->type, true);
4471 if (op->val[0])
4473 bp = bitpack_create (ob->main_stream);
4474 bp_pack_value (&bp, op->index, 2);
4475 streamer_write_bitpack (&bp);
4476 stream_write_tree (ob, op->val[0], true);
4477 if (op->val[1])
4478 stream_write_tree (ob, op->val[1], true);
4482 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4483 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4485 streamer_write_uhwi (ob, e->size);
4486 e->time.stream_out (ob);
4487 e->exec_predicate.stream_out (ob);
4488 e->nonconst_predicate.stream_out (ob);
4490 if (info->loop_iterations)
4491 info->loop_iterations->stream_out (ob);
4492 else
4493 streamer_write_uhwi (ob, 0);
4494 if (info->loop_stride)
4495 info->loop_stride->stream_out (ob);
4496 else
4497 streamer_write_uhwi (ob, 0);
4498 for (edge = cnode->callees; edge; edge = edge->next_callee)
4499 write_ipa_call_summary (ob, edge);
4500 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4501 write_ipa_call_summary (ob, edge);
4504 streamer_write_char_stream (ob->main_stream, 0);
4505 produce_asm (ob, NULL);
4506 destroy_output_block (ob);
4508 if (!flag_ipa_cp)
4509 ipa_prop_write_jump_functions ();
4513 /* Release function summary. */
4515 void
4516 ipa_free_fn_summary (void)
4518 if (!ipa_call_summaries)
4519 return;
4520 ggc_delete (ipa_fn_summaries);
4521 ipa_fn_summaries = NULL;
4522 delete ipa_call_summaries;
4523 ipa_call_summaries = NULL;
4524 edge_predicate_pool.release ();
4525 /* During IPA this is one of largest datastructures to release. */
4526 if (flag_wpa)
4527 ggc_trim ();
4530 /* Release function summary. */
4532 void
4533 ipa_free_size_summary (void)
4535 if (!ipa_size_summaries)
4536 return;
4537 delete ipa_size_summaries;
4538 ipa_size_summaries = NULL;
4541 namespace {
4543 const pass_data pass_data_local_fn_summary =
4545 GIMPLE_PASS, /* type */
4546 "local-fnsummary", /* name */
4547 OPTGROUP_INLINE, /* optinfo_flags */
4548 TV_INLINE_PARAMETERS, /* tv_id */
4549 0, /* properties_required */
4550 0, /* properties_provided */
4551 0, /* properties_destroyed */
4552 0, /* todo_flags_start */
4553 0, /* todo_flags_finish */
4556 class pass_local_fn_summary : public gimple_opt_pass
4558 public:
4559 pass_local_fn_summary (gcc::context *ctxt)
4560 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4563 /* opt_pass methods: */
4564 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4565 virtual unsigned int execute (function *)
4567 return compute_fn_summary_for_current ();
4570 }; // class pass_local_fn_summary
4572 } // anon namespace
4574 gimple_opt_pass *
4575 make_pass_local_fn_summary (gcc::context *ctxt)
4577 return new pass_local_fn_summary (ctxt);
4581 /* Free inline summary. */
4583 namespace {
4585 const pass_data pass_data_ipa_free_fn_summary =
4587 SIMPLE_IPA_PASS, /* type */
4588 "free-fnsummary", /* name */
4589 OPTGROUP_NONE, /* optinfo_flags */
4590 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4591 0, /* properties_required */
4592 0, /* properties_provided */
4593 0, /* properties_destroyed */
4594 0, /* todo_flags_start */
4595 0, /* todo_flags_finish */
4598 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4600 public:
4601 pass_ipa_free_fn_summary (gcc::context *ctxt)
4602 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4603 small_p (false)
4606 /* opt_pass methods: */
4607 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4608 void set_pass_param (unsigned int n, bool param)
4610 gcc_assert (n == 0);
4611 small_p = param;
4613 virtual bool gate (function *) { return true; }
4614 virtual unsigned int execute (function *)
4616 ipa_free_fn_summary ();
4617 if (!flag_wpa)
4618 ipa_free_size_summary ();
4619 return 0;
4622 private:
4623 bool small_p;
4624 }; // class pass_ipa_free_fn_summary
4626 } // anon namespace
4628 simple_ipa_opt_pass *
4629 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4631 return new pass_ipa_free_fn_summary (ctxt);
4634 namespace {
4636 const pass_data pass_data_ipa_fn_summary =
4638 IPA_PASS, /* type */
4639 "fnsummary", /* name */
4640 OPTGROUP_INLINE, /* optinfo_flags */
4641 TV_IPA_FNSUMMARY, /* tv_id */
4642 0, /* properties_required */
4643 0, /* properties_provided */
4644 0, /* properties_destroyed */
4645 0, /* todo_flags_start */
4646 ( TODO_dump_symtab ), /* todo_flags_finish */
4649 class pass_ipa_fn_summary : public ipa_opt_pass_d
4651 public:
4652 pass_ipa_fn_summary (gcc::context *ctxt)
4653 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4654 ipa_fn_summary_generate, /* generate_summary */
4655 ipa_fn_summary_write, /* write_summary */
4656 ipa_fn_summary_read, /* read_summary */
4657 NULL, /* write_optimization_summary */
4658 NULL, /* read_optimization_summary */
4659 NULL, /* stmt_fixup */
4660 0, /* function_transform_todo_flags_start */
4661 NULL, /* function_transform */
4662 NULL) /* variable_transform */
4665 /* opt_pass methods: */
4666 virtual unsigned int execute (function *) { return 0; }
4668 }; // class pass_ipa_fn_summary
4670 } // anon namespace
4672 ipa_opt_pass_d *
4673 make_pass_ipa_fn_summary (gcc::context *ctxt)
4675 return new pass_ipa_fn_summary (ctxt);
4678 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4679 within the same process. For use by toplev::finalize. */
4681 void
4682 ipa_fnsummary_c_finalize (void)
4684 ipa_free_fn_summary ();